limbo-ai 1.27.0 → 1.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +243 -12
- package/evals/cases/create-recurring-reminder.json +40 -0
- package/evals/results/history/run-1774966775381.json +135 -0
- package/evals/results/history/run-1774966839076.json +33 -0
- package/evals/results/history/run-1774966890459.json +33 -0
- package/evals/results/history/run-1774967730887.json +189 -0
- package/evals/results/history/run-1774967764419.json +113 -0
- package/package.json +1 -1
- package/setup-server/public/index.html +750 -675
package/cli.js
CHANGED
|
@@ -1137,24 +1137,191 @@ function ensureVolumePermissions() {
|
|
|
1137
1137
|
], { stdio: 'pipe' });
|
|
1138
1138
|
}
|
|
1139
1139
|
|
|
1140
|
-
// ─── Server detection &
|
|
1140
|
+
// ─── Server detection & tunnel for remote wizard access ─────────────────────
|
|
1141
1141
|
|
|
1142
1142
|
function isServerEnvironment() {
|
|
1143
1143
|
return !!(process.env.SSH_CONNECTION || process.env.SSH_CLIENT ||
|
|
1144
1144
|
(os.platform() === 'linux' && !process.env.DISPLAY));
|
|
1145
1145
|
}
|
|
1146
1146
|
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1147
|
+
const CF_CERT_PATH = path.join(os.homedir(), '.cloudflared', 'cert.pem');
|
|
1148
|
+
const CF_TUNNEL_CONFIG = path.join(LIMBO_DIR, 'tunnel-config.json');
|
|
1149
|
+
|
|
1150
|
+
function hasCloudflared() {
|
|
1151
|
+
try { execSync('cloudflared --version', { stdio: 'pipe' }); return true; } // hardcoded, safe
|
|
1152
|
+
catch { return false; }
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
function isCloudflareLoggedIn() {
|
|
1156
|
+
return fs.existsSync(CF_CERT_PATH);
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
// Interactive prompt: choose tunnel type
|
|
1160
|
+
async function promptTunnelChoice() {
|
|
1161
|
+
const rl = require('readline').createInterface({ input: process.stdin, output: process.stdout });
|
|
1162
|
+
const ask = (q) => new Promise((resolve) => rl.question(q, resolve));
|
|
1163
|
+
|
|
1164
|
+
console.log(`
|
|
1165
|
+
${c.bold}Setup wizard needs a public URL for your client.${c.reset}
|
|
1166
|
+
|
|
1167
|
+
${c.green}1)${c.reset} Cloudflare tunnel ${c.dim}(stable URL under your domain, recommended)${c.reset}
|
|
1168
|
+
${c.green}2)${c.reset} Quick tunnel ${c.dim}(instant, temporary URL via localhost.run)${c.reset}
|
|
1169
|
+
`);
|
|
1170
|
+
const choice = (await ask(' Choose [1/2]: ')).trim();
|
|
1171
|
+
rl.close();
|
|
1172
|
+
return choice === '2' ? 'quick' : 'cloudflare';
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1175
|
+
// cloudflared login (interactive, opens browser or prints URL)
|
|
1176
|
+
async function ensureCloudflareLogin() {
|
|
1177
|
+
if (isCloudflareLoggedIn()) return true;
|
|
1178
|
+
|
|
1179
|
+
log('Logging in to Cloudflare...');
|
|
1180
|
+
log('A browser window will open (or a URL will be printed). Select your domain.\n');
|
|
1181
|
+
|
|
1182
|
+
const result = spawnSync('cloudflared', ['login'], { stdio: 'inherit' });
|
|
1183
|
+
if (result.status !== 0 || !isCloudflareLoggedIn()) {
|
|
1184
|
+
warn('Cloudflare login failed or was cancelled.');
|
|
1185
|
+
return false;
|
|
1186
|
+
}
|
|
1187
|
+
ok('Cloudflare login successful.');
|
|
1188
|
+
return true;
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
// Tunnel hostnames are always setup-<slug>.heylimbo.com
|
|
1192
|
+
const CF_TUNNEL_BASE_DOMAIN = 'heylimbo.com';
|
|
1193
|
+
|
|
1194
|
+
// Create a named CF tunnel using cloudflared CLI (requires cert.pem from login)
|
|
1195
|
+
async function createNamedCfTunnel(port) {
|
|
1196
|
+
const slug = crypto.randomBytes(4).toString('hex').slice(0, 7);
|
|
1197
|
+
const tunnelName = 'limbo-setup-' + slug;
|
|
1198
|
+
const hostname = 'setup-' + slug + '.' + CF_TUNNEL_BASE_DOMAIN;
|
|
1199
|
+
|
|
1200
|
+
try {
|
|
1201
|
+
// 1. Create tunnel
|
|
1202
|
+
spinnerWrite('Creating tunnel...');
|
|
1203
|
+
const createResult = spawnSync('cloudflared', ['tunnel', 'create', tunnelName], {
|
|
1204
|
+
stdio: 'pipe', encoding: 'utf8',
|
|
1205
|
+
});
|
|
1206
|
+
if (createResult.status !== 0) {
|
|
1207
|
+
spinnerClear();
|
|
1208
|
+
warn('Failed to create tunnel: ' + (createResult.stderr || '').trim());
|
|
1209
|
+
return null;
|
|
1210
|
+
}
|
|
1211
|
+
|
|
1212
|
+
// Extract tunnel ID from output ("Created tunnel <name> with id <uuid>")
|
|
1213
|
+
const idMatch = (createResult.stdout + createResult.stderr).match(/with id ([0-9a-f-]+)/i);
|
|
1214
|
+
if (!idMatch) {
|
|
1215
|
+
spinnerClear();
|
|
1216
|
+
warn('Could not parse tunnel ID from cloudflared output.');
|
|
1217
|
+
return null;
|
|
1218
|
+
}
|
|
1219
|
+
const tunnelId = idMatch[1];
|
|
1220
|
+
|
|
1221
|
+
// 2. Route DNS
|
|
1222
|
+
spinnerWrite('Configuring DNS...');
|
|
1223
|
+
const dnsResult = spawnSync('cloudflared', ['tunnel', 'route', 'dns', tunnelName, hostname], {
|
|
1224
|
+
stdio: 'pipe', encoding: 'utf8',
|
|
1225
|
+
});
|
|
1226
|
+
if (dnsResult.status !== 0) {
|
|
1227
|
+
// Non-fatal: might already exist, or we can continue anyway
|
|
1228
|
+
const stderr = (dnsResult.stderr || '').trim();
|
|
1229
|
+
if (!stderr.includes('already exists')) {
|
|
1230
|
+
warn('DNS routing warning: ' + stderr);
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1234
|
+
// 3. Write minimal config file for this tunnel
|
|
1235
|
+
const cfCredPath = path.join(os.homedir(), '.cloudflared', tunnelId + '.json');
|
|
1236
|
+
const tunnelConfig = path.join(LIMBO_DIR, 'tunnel-cloudflared.yml');
|
|
1237
|
+
const configContent = [
|
|
1238
|
+
'tunnel: ' + tunnelId,
|
|
1239
|
+
'credentials-file: ' + cfCredPath,
|
|
1240
|
+
'ingress:',
|
|
1241
|
+
' - hostname: ' + hostname,
|
|
1242
|
+
' service: http://localhost:' + port,
|
|
1243
|
+
' - service: http_status:404',
|
|
1244
|
+
'',
|
|
1245
|
+
].join('\n');
|
|
1246
|
+
fs.writeFileSync(tunnelConfig, configContent, { mode: 0o600 });
|
|
1247
|
+
|
|
1248
|
+
// 4. Run tunnel
|
|
1249
|
+
const logFile = path.join(LIMBO_DIR, 'tunnel-setup.log');
|
|
1250
|
+
const tunnelProc = spawn('cloudflared', [
|
|
1251
|
+
'tunnel', '--config', tunnelConfig, 'run', tunnelName,
|
|
1252
|
+
], {
|
|
1253
|
+
detached: true,
|
|
1254
|
+
stdio: ['ignore', fs.openSync(logFile, 'w'), fs.openSync(logFile, 'a')],
|
|
1255
|
+
});
|
|
1256
|
+
tunnelProc.unref();
|
|
1257
|
+
|
|
1258
|
+
// Wait for connection
|
|
1259
|
+
let connected = false;
|
|
1260
|
+
for (let i = 0; i < 15; i++) {
|
|
1261
|
+
spinnerWrite('Connecting tunnel...');
|
|
1262
|
+
sleep(1000);
|
|
1263
|
+
try {
|
|
1264
|
+
const logs = fs.readFileSync(logFile, 'utf8');
|
|
1265
|
+
if (logs.includes('Registered tunnel connection') || logs.includes('INF Registered')) {
|
|
1266
|
+
connected = true;
|
|
1267
|
+
break;
|
|
1268
|
+
}
|
|
1269
|
+
} catch {}
|
|
1270
|
+
}
|
|
1271
|
+
spinnerClear();
|
|
1272
|
+
|
|
1273
|
+
if (!connected) {
|
|
1274
|
+
warn('Cloudflare tunnel did not connect in time.');
|
|
1275
|
+
try { tunnelProc.kill(); } catch {}
|
|
1276
|
+
spawnSync('cloudflared', ['tunnel', 'delete', '-f', tunnelName], { stdio: 'pipe' });
|
|
1277
|
+
return null;
|
|
1278
|
+
}
|
|
1279
|
+
|
|
1280
|
+
// Wait for DNS propagation (Chromium caches negative DNS lookups aggressively)
|
|
1281
|
+
const https = require('https');
|
|
1282
|
+
for (let i = 0; i < 15; i++) {
|
|
1283
|
+
spinnerWrite('Waiting for DNS (' + (i + 1) + 's)...');
|
|
1284
|
+
try {
|
|
1285
|
+
await new Promise((resolve, reject) => {
|
|
1286
|
+
const req = https.get('https://' + hostname + '/healthz', (res) => {
|
|
1287
|
+
resolve(res.statusCode);
|
|
1288
|
+
});
|
|
1289
|
+
req.on('error', reject);
|
|
1290
|
+
req.setTimeout(3000, () => { req.destroy(); reject(new Error('timeout')); });
|
|
1291
|
+
});
|
|
1292
|
+
break; // DNS resolved and tunnel responded
|
|
1293
|
+
} catch {
|
|
1294
|
+
sleep(1000);
|
|
1295
|
+
}
|
|
1296
|
+
}
|
|
1297
|
+
spinnerClear();
|
|
1298
|
+
|
|
1299
|
+
// Save metadata for cleanup
|
|
1300
|
+
const meta = { tunnelName, tunnelId, hostname, type: 'cloudflare-named' };
|
|
1301
|
+
fs.writeFileSync(CF_TUNNEL_CONFIG, JSON.stringify(meta), { mode: 0o600 });
|
|
1302
|
+
|
|
1303
|
+
return {
|
|
1304
|
+
type: 'cloudflare-named',
|
|
1305
|
+
url: 'https://' + hostname,
|
|
1306
|
+
pid: tunnelProc.pid,
|
|
1307
|
+
logFile,
|
|
1308
|
+
tunnelName,
|
|
1309
|
+
};
|
|
1310
|
+
} catch (err) {
|
|
1311
|
+
spinnerClear();
|
|
1312
|
+
warn('Cloudflare tunnel failed: ' + err.message);
|
|
1313
|
+
return null;
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
|
|
1317
|
+
// Fallback: localhost.run SSH tunnel (ephemeral, no install needed)
|
|
1318
|
+
async function createQuickTunnel(port) {
|
|
1150
1319
|
try {
|
|
1151
1320
|
const logFile = path.join(LIMBO_DIR, 'tunnel-setup.log');
|
|
1152
|
-
// localhost.run provides instant HTTPS URLs via SSH reverse tunneling.
|
|
1153
|
-
// No binary to install, no DNS propagation delay, no Chrome caching issues.
|
|
1154
1321
|
const tunnelProc = spawn('ssh', [
|
|
1155
1322
|
'-o', 'StrictHostKeyChecking=accept-new',
|
|
1156
1323
|
'-o', 'ServerAliveInterval=30',
|
|
1157
|
-
'-R',
|
|
1324
|
+
'-R', '80:localhost:' + port,
|
|
1158
1325
|
'nokey@localhost.run',
|
|
1159
1326
|
], {
|
|
1160
1327
|
detached: true,
|
|
@@ -1162,7 +1329,6 @@ async function createSetupTunnel(port) {
|
|
|
1162
1329
|
});
|
|
1163
1330
|
tunnelProc.unref();
|
|
1164
1331
|
|
|
1165
|
-
// localhost.run prints the URL almost instantly (no DNS propagation needed)
|
|
1166
1332
|
let tunnelUrl = null;
|
|
1167
1333
|
for (let i = 0; i < 10; i++) {
|
|
1168
1334
|
spinnerWrite('Securing tunnel...');
|
|
@@ -1187,12 +1353,74 @@ async function createSetupTunnel(port) {
|
|
|
1187
1353
|
}
|
|
1188
1354
|
}
|
|
1189
1355
|
|
|
1356
|
+
// Interactive tunnel creation: prompts admin for choice
|
|
1357
|
+
async function createSetupTunnel(port) {
|
|
1358
|
+
const hasCf = hasCloudflared();
|
|
1359
|
+
|
|
1360
|
+
// If cloudflared is available, offer the choice
|
|
1361
|
+
if (hasCf) {
|
|
1362
|
+
const choice = await promptTunnelChoice();
|
|
1363
|
+
|
|
1364
|
+
if (choice === 'cloudflare') {
|
|
1365
|
+
const loggedIn = await ensureCloudflareLogin();
|
|
1366
|
+
if (loggedIn) {
|
|
1367
|
+
const tunnel = await createNamedCfTunnel(port);
|
|
1368
|
+
if (tunnel) return tunnel;
|
|
1369
|
+
warn('Falling back to quick tunnel...');
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1372
|
+
}
|
|
1373
|
+
|
|
1374
|
+
return createQuickTunnel(port);
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
// Clean up tunnel process and CF resources
|
|
1190
1378
|
function teardownSetupTunnel(tunnel) {
|
|
1191
1379
|
if (!tunnel) return;
|
|
1192
1380
|
try { process.kill(tunnel.pid); } catch {}
|
|
1193
1381
|
if (tunnel.logFile) try { fs.unlinkSync(tunnel.logFile); } catch {}
|
|
1194
1382
|
}
|
|
1195
1383
|
|
|
1384
|
+
// Clean up leftover CF tunnels from previous runs
|
|
1385
|
+
function cleanupCfTunnel() {
|
|
1386
|
+
try {
|
|
1387
|
+
const meta = JSON.parse(fs.readFileSync(CF_TUNNEL_CONFIG, 'utf8'));
|
|
1388
|
+
if (meta.tunnelName) {
|
|
1389
|
+
spawnSync('cloudflared', ['tunnel', 'cleanup', meta.tunnelName], { stdio: 'pipe' });
|
|
1390
|
+
spawnSync('cloudflared', ['tunnel', 'delete', '-f', meta.tunnelName], { stdio: 'pipe' });
|
|
1391
|
+
}
|
|
1392
|
+
fs.unlinkSync(CF_TUNNEL_CONFIG);
|
|
1393
|
+
const tunnelConfig = path.join(LIMBO_DIR, 'tunnel-cloudflared.yml');
|
|
1394
|
+
try { fs.unlinkSync(tunnelConfig); } catch {}
|
|
1395
|
+
} catch {}
|
|
1396
|
+
}
|
|
1397
|
+
|
|
1398
|
+
// Read a single env var from ~/.limbo/.env
|
|
1399
|
+
function loadEnvVar(name) {
|
|
1400
|
+
try {
|
|
1401
|
+
const content = fs.readFileSync(ENV_FILE, 'utf8');
|
|
1402
|
+
const match = content.match(new RegExp('^' + name + '=(.+)$', 'm'));
|
|
1403
|
+
return match ? match[1].trim() : null;
|
|
1404
|
+
} catch { return null; }
|
|
1405
|
+
}
|
|
1406
|
+
|
|
1407
|
+
// Append or update env vars in ~/.limbo/.env without overwriting existing ones
|
|
1408
|
+
function persistEnvVars(vars) {
|
|
1409
|
+
try {
|
|
1410
|
+
let content = '';
|
|
1411
|
+
try { content = fs.readFileSync(ENV_FILE, 'utf8'); } catch {}
|
|
1412
|
+
for (const [key, value] of Object.entries(vars)) {
|
|
1413
|
+
const re = new RegExp('^' + key + '=.*$', 'm');
|
|
1414
|
+
if (re.test(content)) {
|
|
1415
|
+
content = content.replace(re, key + '=' + value);
|
|
1416
|
+
} else {
|
|
1417
|
+
content = content.trimEnd() + '\n' + key + '=' + value + '\n';
|
|
1418
|
+
}
|
|
1419
|
+
}
|
|
1420
|
+
fs.writeFileSync(ENV_FILE, content, { mode: 0o600 });
|
|
1421
|
+
} catch {}
|
|
1422
|
+
}
|
|
1423
|
+
|
|
1196
1424
|
function installGlobalAlias() {
|
|
1197
1425
|
// Create a `limbo` shell wrapper so users don't have to type `npx limbo-ai` every time.
|
|
1198
1426
|
// Tries /usr/local/bin first (macOS, Linux with sudo), falls back to ~/.local/bin (no sudo).
|
|
@@ -1609,6 +1837,9 @@ function writeMinimalEnv() {
|
|
|
1609
1837
|
// ─── Commands ────────────────────────────────────────────────────────────────
|
|
1610
1838
|
|
|
1611
1839
|
async function cmdStart() {
|
|
1840
|
+
// Clean up any leftover CF tunnel from a previous setup run
|
|
1841
|
+
cleanupCfTunnel();
|
|
1842
|
+
|
|
1612
1843
|
// ── Auto-install Docker if missing ────────────────────────────────────────
|
|
1613
1844
|
if (!hasDocker()) {
|
|
1614
1845
|
installDocker();
|
|
@@ -1653,7 +1884,7 @@ async function cmdStart() {
|
|
|
1653
1884
|
const flagApiKey = parseFlag('--api-key');
|
|
1654
1885
|
const flagModel = parseFlag('--model');
|
|
1655
1886
|
const flagLang = parseFlag('--language') || 'en';
|
|
1656
|
-
|
|
1887
|
+
// CF tunnel flags parsed by createSetupTunnel() via parseFlag() — no local var needed
|
|
1657
1888
|
|
|
1658
1889
|
if (flagProvider) {
|
|
1659
1890
|
const validProviders = ['openai', 'anthropic', 'openrouter'];
|
|
@@ -1745,9 +1976,9 @@ async function cmdStart() {
|
|
|
1745
1976
|
// Extract wizard URL from container logs (polls briefly, no healthcheck needed)
|
|
1746
1977
|
const wizardUrl = extractWizardUrl();
|
|
1747
1978
|
|
|
1748
|
-
//
|
|
1979
|
+
// Create a public tunnel (auto on servers, or with --tunnel flag)
|
|
1749
1980
|
let tunnel = null;
|
|
1750
|
-
if (isServerEnvironment()) {
|
|
1981
|
+
if (isServerEnvironment() || process.argv.includes('--tunnel')) {
|
|
1751
1982
|
tunnel = await createSetupTunnel(PORT);
|
|
1752
1983
|
}
|
|
1753
1984
|
|
|
@@ -2010,7 +2241,7 @@ ${c.bold}Flags:${c.reset}
|
|
|
2010
2241
|
--api-key <key> API key for headless install
|
|
2011
2242
|
--model <name> Model name (optional, uses provider default)
|
|
2012
2243
|
--language <code> Language: en, es (default: en)
|
|
2013
|
-
--tunnel
|
|
2244
|
+
--tunnel Force tunnel creation prompt (even on local/non-server environments)
|
|
2014
2245
|
|
|
2015
2246
|
${c.bold}Config:${c.reset}
|
|
2016
2247
|
limbo config voice --enable --api-key gsk_xxx Enable voice transcription
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "create-recurring-reminder",
|
|
3
|
+
"description": "Recurring reminder should create a valid cron job that zeroclaw can parse (regression test for schedule schema)",
|
|
4
|
+
"steps": [
|
|
5
|
+
{
|
|
6
|
+
"input": "Recordame todos los días a las 10am que tengo que tomar agua",
|
|
7
|
+
"assertions": [
|
|
8
|
+
{
|
|
9
|
+
"type": "response_matches",
|
|
10
|
+
"pattern": "(?i)(timezone|huso horario|zona horaria)"
|
|
11
|
+
}
|
|
12
|
+
]
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"input": "America/Buenos_Aires",
|
|
16
|
+
"assertions": [
|
|
17
|
+
{
|
|
18
|
+
"type": "cron_created",
|
|
19
|
+
"pattern": "agua|water|tomar",
|
|
20
|
+
"timezone": "America/Buenos_Aires",
|
|
21
|
+
"local_hour": 10,
|
|
22
|
+
"local_minute": 0
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"type": "response_matches",
|
|
26
|
+
"pattern": "(?i)(reminder|recordatorio|avisarte|programado|todos los días|diario|daily)"
|
|
27
|
+
}
|
|
28
|
+
]
|
|
29
|
+
}
|
|
30
|
+
],
|
|
31
|
+
"runs": 1,
|
|
32
|
+
"pass_threshold": 1.0,
|
|
33
|
+
"tags": [
|
|
34
|
+
"cron",
|
|
35
|
+
"reminder",
|
|
36
|
+
"recurring",
|
|
37
|
+
"regression"
|
|
38
|
+
],
|
|
39
|
+
"difficulty": "easy"
|
|
40
|
+
}
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "run-1774966775381",
|
|
3
|
+
"timestamp": "2026-03-31T14:20:21.314Z",
|
|
4
|
+
"meta": {
|
|
5
|
+
"provider": "anthropic",
|
|
6
|
+
"model": "claude-sonnet-4-6",
|
|
7
|
+
"reasoningEffort": "medium",
|
|
8
|
+
"authMode": "subscription",
|
|
9
|
+
"zeroclawVersion": "0.6.5",
|
|
10
|
+
"profileKey": "anthropic__claude-sonnet-4-6__medium",
|
|
11
|
+
"profileLabel": "claude-sonnet-4-6 · medium · anthropic"
|
|
12
|
+
},
|
|
13
|
+
"kind": "subset",
|
|
14
|
+
"scope": {
|
|
15
|
+
"case": null,
|
|
16
|
+
"tag": "cron",
|
|
17
|
+
"difficulty": null,
|
|
18
|
+
"judge": false
|
|
19
|
+
},
|
|
20
|
+
"results": [
|
|
21
|
+
{
|
|
22
|
+
"case": "create-recurring-reminder",
|
|
23
|
+
"run": 1,
|
|
24
|
+
"passRate": 0,
|
|
25
|
+
"passed": 0,
|
|
26
|
+
"total": 2,
|
|
27
|
+
"scoreResults": [
|
|
28
|
+
{
|
|
29
|
+
"assertion": {
|
|
30
|
+
"type": "cron_created",
|
|
31
|
+
"pattern": "agua|water|tomar",
|
|
32
|
+
"timezone": "America/Buenos_Aires",
|
|
33
|
+
"local_hour": 10,
|
|
34
|
+
"local_minute": 0
|
|
35
|
+
},
|
|
36
|
+
"pass": false,
|
|
37
|
+
"reason": "No cron job matched /agua|water|tomar/i"
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"assertion": {
|
|
41
|
+
"type": "response_matches",
|
|
42
|
+
"pattern": "(?i)(reminder|recordatorio|avisarte|programado|todos los días|diario|daily)"
|
|
43
|
+
},
|
|
44
|
+
"pass": false,
|
|
45
|
+
"reason": "Response did NOT match /(?i)(reminder|recordatorio|avisarte|programado|todos los días|diario|daily)/"
|
|
46
|
+
}
|
|
47
|
+
],
|
|
48
|
+
"judgeResults": null,
|
|
49
|
+
"response": "Tu USER.md no tiene timezone configurada. ¿En qué zona horaria estás?",
|
|
50
|
+
"steps": [
|
|
51
|
+
{
|
|
52
|
+
"index": 1,
|
|
53
|
+
"input": "Recordame todos los días a las 10am que tengo que tomar agua",
|
|
54
|
+
"response": "Tu USER.md no tiene timezone configurada. ¿En qué zona horaria estás?",
|
|
55
|
+
"latencyMs": 8160,
|
|
56
|
+
"userProfile": "# About Your User\n\nThis file was generated at first run from environment variables. It personalizes how you interact with your user.\n\n## Identity\n\n- **Name:** Tomas\n- **Timezone:** \n- **Language:** Spanish\n\n## Communication Preferences\n\nRespond in **Spanish**. Keep responses concise by default unless the user asks for more detail.\n\nAddress the user as **Tomas** when it feels natural, but don't overdo it.\n\n## Additional Context\n\nNo additional context provided.\n",
|
|
57
|
+
"mcpLogCount": 0,
|
|
58
|
+
"mcpLogs": [],
|
|
59
|
+
"assertions": [
|
|
60
|
+
{
|
|
61
|
+
"type": "cron_created",
|
|
62
|
+
"pattern": "agua|water|tomar",
|
|
63
|
+
"timezone": "America/Buenos_Aires",
|
|
64
|
+
"local_hour": 10,
|
|
65
|
+
"local_minute": 0
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
"type": "response_matches",
|
|
69
|
+
"pattern": "(?i)(reminder|recordatorio|avisarte|programado|todos los días|diario|daily)"
|
|
70
|
+
}
|
|
71
|
+
],
|
|
72
|
+
"scoreResults": [
|
|
73
|
+
{
|
|
74
|
+
"assertion": {
|
|
75
|
+
"type": "cron_created",
|
|
76
|
+
"pattern": "agua|water|tomar",
|
|
77
|
+
"timezone": "America/Buenos_Aires",
|
|
78
|
+
"local_hour": 10,
|
|
79
|
+
"local_minute": 0
|
|
80
|
+
},
|
|
81
|
+
"pass": false,
|
|
82
|
+
"reason": "No cron job matched /agua|water|tomar/i"
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"assertion": {
|
|
86
|
+
"type": "response_matches",
|
|
87
|
+
"pattern": "(?i)(reminder|recordatorio|avisarte|programado|todos los días|diario|daily)"
|
|
88
|
+
},
|
|
89
|
+
"pass": false,
|
|
90
|
+
"reason": "Response did NOT match /(?i)(reminder|recordatorio|avisarte|programado|todos los días|diario|daily)/"
|
|
91
|
+
}
|
|
92
|
+
],
|
|
93
|
+
"vaultDiff": {
|
|
94
|
+
"created": 0,
|
|
95
|
+
"modified": 0,
|
|
96
|
+
"deleted": 0
|
|
97
|
+
},
|
|
98
|
+
"cronJobs": []
|
|
99
|
+
}
|
|
100
|
+
],
|
|
101
|
+
"vaultDiff": {
|
|
102
|
+
"created": 0,
|
|
103
|
+
"modified": 0,
|
|
104
|
+
"deleted": 0
|
|
105
|
+
},
|
|
106
|
+
"mcpLogCount": 0,
|
|
107
|
+
"mcpLogs": [],
|
|
108
|
+
"searchTimeMs": null,
|
|
109
|
+
"latencyMs": 8160,
|
|
110
|
+
"timestamp": "2026-03-31T14:19:44.783Z"
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"case": "create-reminder",
|
|
114
|
+
"run": 1,
|
|
115
|
+
"passRate": 0,
|
|
116
|
+
"passed": 0,
|
|
117
|
+
"total": 4,
|
|
118
|
+
"scoreResults": [],
|
|
119
|
+
"judgeResults": null,
|
|
120
|
+
"error": "Command failed: [limbo-vault] Index built: 53 notes (FTS5 search active)\nError: All providers/models failed. Attempts:\nprovider=anthropic model=claude-sonnet-4-6 attempt 1/3: retryable; error=Anthropic API error (529 <unknown status code>): {\"type\":\"error\",\"error\":{\"type\":\"overloaded_error\",\"message\":\"Overloaded\"},\"request_id\":\"req_011CZbJcJ4oEaZTpzMXBGbfd\"}\nprovider=anthropic model=claude-sonnet-4-6 attempt 2/3: retryable; error=Anthropic API error (529 <unknown status code>): {\"type\":\"error\",\"error\":{\"type\":\"overloaded_error\",\"message\":\"Overloaded\"},\"request_id\":\"req_011CZbJcgrkzNU1bWuHcv5Ek\"}\nprovider=anthropic model=claude-sonnet-4-6 attempt 3/3: retryable; error=Anthropic API error (529 <unknown status code>): {\"type\":\"error\",\"error\":{\"type\":\"overloaded_error\",\"message\":\"Overloaded\"},\"request_id\":\"req_011CZbJd6vMvXxPUWfJSrV8Q\"}",
|
|
121
|
+
"timestamp": "2026-03-31T14:20:02.092Z"
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
"case": "reminder-timezone",
|
|
125
|
+
"run": 1,
|
|
126
|
+
"passRate": 0,
|
|
127
|
+
"passed": 0,
|
|
128
|
+
"total": 2,
|
|
129
|
+
"scoreResults": [],
|
|
130
|
+
"judgeResults": null,
|
|
131
|
+
"error": "Command failed: [limbo-vault] Index built: 53 notes (FTS5 search active)\nError: All providers/models failed. Attempts:\nprovider=anthropic model=claude-sonnet-4-6 attempt 1/3: retryable; error=Anthropic API error (529 <unknown status code>): {\"type\":\"error\",\"error\":{\"type\":\"overloaded_error\",\"message\":\"Overloaded\"},\"request_id\":\"req_011CZbJdaUogfeWS49FDuerF\"}\nprovider=anthropic model=claude-sonnet-4-6 attempt 2/3: retryable; error=Anthropic API error (529 <unknown status code>): {\"type\":\"error\",\"error\":{\"type\":\"overloaded_error\",\"message\":\"Overloaded\"},\"request_id\":\"req_011CZbJdvT6KoUwzqoqykHdm\"}\nprovider=anthropic model=claude-sonnet-4-6 attempt 3/3: retryable; error=Anthropic API error (529 <unknown status code>): {\"type\":\"error\",\"error\":{\"type\":\"overloaded_error\",\"message\":\"Overloaded\"},\"request_id\":\"req_011CZbJeY8hK3Aa4cfKB8jbS\"}",
|
|
132
|
+
"timestamp": "2026-03-31T14:20:21.314Z"
|
|
133
|
+
}
|
|
134
|
+
]
|
|
135
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "run-1774966839076",
|
|
3
|
+
"timestamp": "2026-03-31T14:20:55.888Z",
|
|
4
|
+
"meta": {
|
|
5
|
+
"provider": "anthropic",
|
|
6
|
+
"model": "claude-sonnet-4-6",
|
|
7
|
+
"reasoningEffort": "medium",
|
|
8
|
+
"authMode": "subscription",
|
|
9
|
+
"zeroclawVersion": "0.6.5",
|
|
10
|
+
"profileKey": "anthropic__claude-sonnet-4-6__medium",
|
|
11
|
+
"profileLabel": "claude-sonnet-4-6 · medium · anthropic"
|
|
12
|
+
},
|
|
13
|
+
"kind": "subset",
|
|
14
|
+
"scope": {
|
|
15
|
+
"case": "create-recurring-reminder",
|
|
16
|
+
"tag": null,
|
|
17
|
+
"difficulty": null,
|
|
18
|
+
"judge": false
|
|
19
|
+
},
|
|
20
|
+
"results": [
|
|
21
|
+
{
|
|
22
|
+
"case": "create-recurring-reminder",
|
|
23
|
+
"run": 1,
|
|
24
|
+
"passRate": 0,
|
|
25
|
+
"passed": 0,
|
|
26
|
+
"total": 3,
|
|
27
|
+
"scoreResults": [],
|
|
28
|
+
"judgeResults": null,
|
|
29
|
+
"error": "Command failed: [limbo-vault] Index built: 53 notes (FTS5 search active)\nError: All providers/models failed. Attempts:\nprovider=anthropic model=claude-sonnet-4-6 attempt 1/3: retryable; error=Anthropic API error (529 <unknown status code>): {\"type\":\"error\",\"error\":{\"type\":\"overloaded_error\",\"message\":\"Overloaded\"},\"request_id\":\"req_011CZbJgK9tycat3ekbkHPgo\"}\nprovider=anthropic model=claude-sonnet-4-6 attempt 2/3: retryable; error=Anthropic API error (529 <unknown status code>): {\"type\":\"error\",\"error\":{\"type\":\"overloaded_error\",\"message\":\"Overloaded\"},\"request_id\":\"req_011CZbJgctkLn8o24dRWTpnV\"}\nprovider=anthropic model=claude-sonnet-4-6 attempt 3/3: retryable; error=Anthropic API error (529 <unknown status code>): {\"type\":\"error\",\"error\":{\"type\":\"overloaded_error\",\"message\":\"Overloaded\"},\"request_id\":\"req_011CZbJhEuwCpjc13MQzff76\"}",
|
|
30
|
+
"timestamp": "2026-03-31T14:20:55.886Z"
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "run-1774966890459",
|
|
3
|
+
"timestamp": "2026-03-31T14:21:52.853Z",
|
|
4
|
+
"meta": {
|
|
5
|
+
"provider": "anthropic",
|
|
6
|
+
"model": "claude-sonnet-4-6",
|
|
7
|
+
"reasoningEffort": "medium",
|
|
8
|
+
"authMode": "subscription",
|
|
9
|
+
"zeroclawVersion": "0.6.5",
|
|
10
|
+
"profileKey": "anthropic__claude-sonnet-4-6__medium",
|
|
11
|
+
"profileLabel": "claude-sonnet-4-6 · medium · anthropic"
|
|
12
|
+
},
|
|
13
|
+
"kind": "subset",
|
|
14
|
+
"scope": {
|
|
15
|
+
"case": "create-recurring-reminder",
|
|
16
|
+
"tag": null,
|
|
17
|
+
"difficulty": null,
|
|
18
|
+
"judge": false
|
|
19
|
+
},
|
|
20
|
+
"results": [
|
|
21
|
+
{
|
|
22
|
+
"case": "create-recurring-reminder",
|
|
23
|
+
"run": 1,
|
|
24
|
+
"passRate": 0,
|
|
25
|
+
"passed": 0,
|
|
26
|
+
"total": 3,
|
|
27
|
+
"scoreResults": [],
|
|
28
|
+
"judgeResults": null,
|
|
29
|
+
"error": "Command failed: [limbo-vault] Index built: 53 notes (FTS5 search active)\nError: All providers/models failed. Attempts:\nprovider=anthropic model=claude-sonnet-4-6 attempt 1/3: retryable; error=Anthropic API error (529 <unknown status code>): {\"type\":\"error\",\"error\":{\"type\":\"overloaded_error\",\"message\":\"Overloaded\"},\"request_id\":\"req_011CZbJk6VG1SsXqXeQxGKaH\"}\nprovider=anthropic model=claude-sonnet-4-6 attempt 2/3: retryable; error=Anthropic API error (529 <unknown status code>): {\"type\":\"error\",\"error\":{\"type\":\"overloaded_error\",\"message\":\"Overloaded\"},\"request_id\":\"req_011CZbJkRdSeKQ7mjEd1u9wd\"}\nprovider=anthropic model=claude-sonnet-4-6 attempt 3/3: retryable; error=Anthropic API error (529 <unknown status code>): {\"type\":\"error\",\"error\":{\"type\":\"overloaded_error\",\"message\":\"Overloaded\"},\"request_id\":\"req_011CZbJkwnAGZG38XQ4qHTm2\"}",
|
|
30
|
+
"timestamp": "2026-03-31T14:21:52.850Z"
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|