@hasna/testers 0.0.15 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/LICENSE +170 -21
  2. package/dashboard/dist/assets/{index-BSYf1bIR.css → index-CQzkimyO.css} +1 -1
  3. package/dashboard/dist/index.html +2 -2
  4. package/dist/cli/index.js +2043 -818
  5. package/dist/db/database.d.ts.map +1 -1
  6. package/dist/db/personas.d.ts +8 -0
  7. package/dist/db/personas.d.ts.map +1 -1
  8. package/dist/db/results.d.ts +2 -1
  9. package/dist/db/results.d.ts.map +1 -1
  10. package/dist/db/scenarios.d.ts +1 -0
  11. package/dist/db/scenarios.d.ts.map +1 -1
  12. package/dist/db/seed-personas.d.ts +15 -0
  13. package/dist/db/seed-personas.d.ts.map +1 -0
  14. package/dist/index.d.ts +1 -1
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +2220 -1441
  17. package/dist/lib/ai-client.d.ts +7 -8
  18. package/dist/lib/ai-client.d.ts.map +1 -1
  19. package/dist/lib/browser-bun.d.ts +153 -0
  20. package/dist/lib/browser-bun.d.ts.map +1 -0
  21. package/dist/lib/browser.d.ts +1 -1
  22. package/dist/lib/browser.d.ts.map +1 -1
  23. package/dist/lib/config.d.ts.map +1 -1
  24. package/dist/lib/costs.d.ts +5 -0
  25. package/dist/lib/costs.d.ts.map +1 -1
  26. package/dist/lib/failure-analyzer.d.ts +7 -0
  27. package/dist/lib/failure-analyzer.d.ts.map +1 -0
  28. package/dist/lib/failure-explainer.d.ts +17 -0
  29. package/dist/lib/failure-explainer.d.ts.map +1 -0
  30. package/dist/lib/failure-pipeline.d.ts +11 -0
  31. package/dist/lib/failure-pipeline.d.ts.map +1 -1
  32. package/dist/lib/hybrid-runner.d.ts +100 -0
  33. package/dist/lib/hybrid-runner.d.ts.map +1 -0
  34. package/dist/lib/judge.d.ts +1 -1
  35. package/dist/lib/judge.d.ts.map +1 -1
  36. package/dist/lib/reporter.d.ts +2 -0
  37. package/dist/lib/reporter.d.ts.map +1 -1
  38. package/dist/lib/runner.d.ts +5 -1
  39. package/dist/lib/runner.d.ts.map +1 -1
  40. package/dist/lib/screenshotter.d.ts.map +1 -1
  41. package/dist/mcp/index.js +8580 -6403
  42. package/dist/server/index.js +1082 -154
  43. package/dist/types/index.d.ts +60 -2
  44. package/dist/types/index.d.ts.map +1 -1
  45. package/package.json +4 -4
  46. package/dist/cli/index.d.ts +0 -3
  47. package/dist/cli/index.d.ts.map +0 -1
  48. package/dist/mcp/index.d.ts +0 -3
  49. package/dist/mcp/index.d.ts.map +0 -1
  50. /package/dashboard/dist/assets/{index-Bdn52878.js → index-D52SWwDa.js} +0 -0
package/dist/index.js CHANGED
@@ -1,12 +1,16 @@
1
1
  // @bun
2
2
  var __defProp = Object.defineProperty;
3
+ var __returnValue = (v) => v;
4
+ function __exportSetter(name, newValue) {
5
+ this[name] = __returnValue.bind(null, newValue);
6
+ }
3
7
  var __export = (target, all) => {
4
8
  for (var name in all)
5
9
  __defProp(target, name, {
6
10
  get: all[name],
7
11
  enumerable: true,
8
12
  configurable: true,
9
- set: (newValue) => all[name] = () => newValue
13
+ set: __exportSetter.bind(all, name)
10
14
  });
11
15
  };
12
16
  var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
@@ -56,9 +60,12 @@ function scenarioFromRow(row) {
56
60
  assertions: JSON.parse(row.assertions || "[]"),
57
61
  personaId: row.persona_id ?? null,
58
62
  scenarioType: row.scenario_type ?? "browser",
63
+ requiredRole: row.required_role ?? null,
59
64
  version: row.version,
60
65
  createdAt: row.created_at,
61
- updatedAt: row.updated_at
66
+ updatedAt: row.updated_at,
67
+ lastPassedAt: row.last_passed_at ?? null,
68
+ lastPassedUrl: row.last_passed_url ?? null
62
69
  };
63
70
  }
64
71
  function runFromRow(row) {
@@ -98,7 +105,8 @@ function resultFromRow(row) {
98
105
  metadata: row.metadata ? JSON.parse(row.metadata) : null,
99
106
  createdAt: row.created_at,
100
107
  personaId: row.persona_id ?? null,
101
- personaName: row.persona_name ?? null
108
+ personaName: row.persona_name ?? null,
109
+ failureAnalysis: row.failure_analysis ? JSON.parse(row.failure_analysis) : null
102
110
  };
103
111
  }
104
112
  function screenshotFromRow(row) {
@@ -148,6 +156,7 @@ function flowFromRow(row) {
148
156
  };
149
157
  }
150
158
  function personaFromRow(row) {
159
+ const hasAuth = row.auth_email && row.auth_password;
151
160
  return {
152
161
  id: row.id,
153
162
  shortId: row.short_id,
@@ -156,21 +165,33 @@ function personaFromRow(row) {
156
165
  description: row.description,
157
166
  role: row.role,
158
167
  instructions: row.instructions,
159
- traits: JSON.parse(row.traits),
160
- goals: JSON.parse(row.goals),
168
+ traits: JSON.parse(row.traits || "[]"),
169
+ goals: JSON.parse(row.goals || "[]"),
170
+ behaviors: JSON.parse(row.behaviors || "[]"),
171
+ expertiseLevel: row.expertise_level || "intermediate",
172
+ demographics: JSON.parse(row.demographics || "{}"),
173
+ painPoints: JSON.parse(row.pain_points || "[]"),
161
174
  metadata: row.metadata ? JSON.parse(row.metadata) : null,
162
175
  enabled: row.enabled === 1,
163
176
  version: row.version,
164
177
  createdAt: row.created_at,
165
- updatedAt: row.updated_at
178
+ updatedAt: row.updated_at,
179
+ auth: hasAuth ? {
180
+ email: row.auth_email,
181
+ password: row.auth_password,
182
+ loginPath: row.auth_login_path ?? "/login",
183
+ cookies: row.auth_cookies ? JSON.parse(row.auth_cookies) : null
184
+ } : null
166
185
  };
167
186
  }
168
- var MODEL_MAP, ScenarioNotFoundError, RunNotFoundError, ResultNotFoundError, VersionConflictError, BrowserError, AIClientError, TodosConnectionError, ProjectNotFoundError, AgentNotFoundError, ScheduleNotFoundError, FlowNotFoundError, DependencyCycleError;
187
+ var MODEL_MAP, ScenarioNotFoundError, RunNotFoundError, ResultNotFoundError, VersionConflictError, BrowserError, AIClientError, TodosConnectionError, ProjectNotFoundError, AgentNotFoundError, ScheduleNotFoundError, BudgetExceededError, FlowNotFoundError, DependencyCycleError;
169
188
  var init_types = __esm(() => {
170
189
  MODEL_MAP = {
171
190
  quick: "claude-haiku-4-5-20251001",
172
191
  thorough: "claude-sonnet-4-6-20260311",
173
- deep: "claude-opus-4-6-20260311"
192
+ deep: "claude-opus-4-6-20260311",
193
+ "cerebras-fast": "llama-3.1-8b",
194
+ "cerebras-smart": "llama-3.3-70b"
174
195
  };
175
196
  ScenarioNotFoundError = class ScenarioNotFoundError extends Error {
176
197
  constructor(id) {
@@ -232,6 +253,12 @@ var init_types = __esm(() => {
232
253
  this.name = "ScheduleNotFoundError";
233
254
  }
234
255
  };
256
+ BudgetExceededError = class BudgetExceededError extends Error {
257
+ constructor(estimatedCents, capCents) {
258
+ super(`Estimated run cost ($${(estimatedCents / 100).toFixed(2)}) exceeds budget cap ($${(capCents / 100).toFixed(2)}). Pass skipBudgetCheck: true to override.`);
259
+ this.name = "BudgetExceededError";
260
+ }
261
+ };
235
262
  FlowNotFoundError = class FlowNotFoundError extends Error {
236
263
  constructor(id) {
237
264
  super(`Flow not found: ${id}`);
@@ -673,6 +700,26 @@ CREATE TABLE IF NOT EXISTS golden_check_results (
673
700
  CREATE INDEX IF NOT EXISTS idx_golden_project ON golden_answers(project_id);
674
701
  CREATE INDEX IF NOT EXISTS idx_golden_enabled ON golden_answers(enabled);
675
702
  CREATE INDEX IF NOT EXISTS idx_golden_results_golden ON golden_check_results(golden_id);
703
+ `,
704
+ `
705
+ ALTER TABLE results ADD COLUMN failure_analysis TEXT;
706
+ `,
707
+ `
708
+ ALTER TABLE personas ADD COLUMN behaviors TEXT DEFAULT '[]';
709
+ ALTER TABLE personas ADD COLUMN expertise_level TEXT DEFAULT 'intermediate';
710
+ ALTER TABLE personas ADD COLUMN demographics TEXT DEFAULT '{}';
711
+ ALTER TABLE personas ADD COLUMN pain_points TEXT DEFAULT '[]';
712
+ `,
713
+ `
714
+ ALTER TABLE scenarios ADD COLUMN last_passed_at TEXT;
715
+ ALTER TABLE scenarios ADD COLUMN last_passed_url TEXT;
716
+ `,
717
+ `
718
+ ALTER TABLE personas ADD COLUMN auth_email TEXT;
719
+ ALTER TABLE personas ADD COLUMN auth_password TEXT;
720
+ ALTER TABLE personas ADD COLUMN auth_login_path TEXT DEFAULT '/login';
721
+ ALTER TABLE personas ADD COLUMN auth_cookies TEXT;
722
+ ALTER TABLE scenarios ADD COLUMN required_role TEXT;
676
723
  `
677
724
  ];
678
725
  });
@@ -1026,7 +1073,8 @@ function loadConfig() {
1026
1073
  todosDbPath: fileConfig.todosDbPath,
1027
1074
  judgeModel: fileConfig.judgeModel,
1028
1075
  judgeProvider: fileConfig.judgeProvider,
1029
- selfHeal: fileConfig.selfHeal ?? false
1076
+ selfHeal: fileConfig.selfHeal ?? false,
1077
+ conversationsSpace: fileConfig.conversationsSpace
1030
1078
  };
1031
1079
  const envModel = process.env["TESTERS_MODEL"];
1032
1080
  if (envModel) {
@@ -1217,6 +1265,412 @@ var init_browser_lightpanda = __esm(() => {
1217
1265
  init_types();
1218
1266
  });
1219
1267
 
1268
+ // src/lib/browser-bun.ts
1269
+ var exports_browser_bun = {};
1270
+ __export(exports_browser_bun, {
1271
+ isBunWebViewAvailable: () => isBunWebViewAvailable,
1272
+ BunWebViewSession: () => BunWebViewSession
1273
+ });
1274
+ import { join as join3 } from "path";
1275
+ import { mkdirSync as mkdirSync2 } from "fs";
1276
+ import { homedir as homedir3 } from "os";
1277
+ function isBunWebViewAvailable() {
1278
+ return typeof globalThis.Bun !== "undefined" && typeof globalThis.Bun.WebView !== "undefined";
1279
+ }
1280
+ function getProfileDir(profileName) {
1281
+ const base = process.env["TESTERS_BROWSER_DATA_DIR"] ?? join3(homedir3(), ".testers", "browser");
1282
+ const dir = join3(base, "profiles", profileName);
1283
+ mkdirSync2(dir, { recursive: true });
1284
+ return dir;
1285
+ }
1286
+ var BunWebViewSession;
1287
+ var init_browser_bun = __esm(() => {
1288
+ BunWebViewSession = class BunWebViewSession {
1289
+ view;
1290
+ _sessionId;
1291
+ _eventListeners = new Map;
1292
+ constructor(opts = {}) {
1293
+ if (!isBunWebViewAvailable()) {
1294
+ throw new Error("Bun.WebView is not available. Install Bun canary: bun upgrade --canary");
1295
+ }
1296
+ const BunWebView = globalThis.Bun.WebView;
1297
+ const constructorOpts = {
1298
+ width: opts.width ?? 1280,
1299
+ height: opts.height ?? 720
1300
+ };
1301
+ if (opts.profile) {
1302
+ constructorOpts.dataStore = { directory: getProfileDir(opts.profile) };
1303
+ } else {
1304
+ constructorOpts.dataStore = "ephemeral";
1305
+ }
1306
+ if (opts.onConsole) {
1307
+ constructorOpts.console = opts.onConsole;
1308
+ }
1309
+ this.view = new BunWebView(constructorOpts);
1310
+ this.view.onNavigated = (url) => {
1311
+ this._emit("navigated", url);
1312
+ };
1313
+ this.view.onNavigationFailed = (error) => {
1314
+ this._emit("navigationfailed", error);
1315
+ };
1316
+ }
1317
+ async goto(url, opts) {
1318
+ await this.view.navigate(url);
1319
+ await new Promise((r) => setTimeout(r, 200));
1320
+ }
1321
+ async goBack() {
1322
+ await this.view.goBack();
1323
+ }
1324
+ async goForward() {
1325
+ await this.view.goForward();
1326
+ }
1327
+ async reload() {
1328
+ await this.view.reload();
1329
+ }
1330
+ async evaluate(fnOrExpr, ...args) {
1331
+ let expr;
1332
+ if (typeof fnOrExpr === "function") {
1333
+ const serializedArgs = args.map((a) => JSON.stringify(a)).join(", ");
1334
+ expr = `(${fnOrExpr.toString()})(${serializedArgs})`;
1335
+ } else {
1336
+ expr = fnOrExpr;
1337
+ }
1338
+ return this.view.evaluate(expr);
1339
+ }
1340
+ async screenshot(opts) {
1341
+ const uint8 = await this.view.screenshot();
1342
+ return Buffer.from(uint8);
1343
+ }
1344
+ async click(selector, opts) {
1345
+ await this.view.click(selector, opts ? { button: opts.button } : undefined);
1346
+ }
1347
+ async type(selector, text, opts) {
1348
+ try {
1349
+ await this.view.click(selector);
1350
+ } catch {}
1351
+ await this.view.type(text);
1352
+ }
1353
+ async fill(selector, value) {
1354
+ await this.view.evaluate(`
1355
+ (() => {
1356
+ const el = document.querySelector(${JSON.stringify(selector)});
1357
+ if (el) { el.value = ''; el.dispatchEvent(new Event('input')); }
1358
+ })()
1359
+ `);
1360
+ await this.type(selector, value);
1361
+ }
1362
+ async press(key, opts) {
1363
+ await this.view.press(key, opts);
1364
+ }
1365
+ async scroll(direction, amount) {
1366
+ const dx = direction === "left" ? -amount : direction === "right" ? amount : 0;
1367
+ const dy = direction === "up" ? -amount : direction === "down" ? amount : 0;
1368
+ await this.view.scroll(dx, dy);
1369
+ }
1370
+ async scrollIntoView(selector) {
1371
+ await this.view.scrollTo(selector);
1372
+ }
1373
+ async hover(selector) {
1374
+ try {
1375
+ await this.view.scrollTo(selector);
1376
+ } catch {}
1377
+ }
1378
+ async resize(width, height) {
1379
+ await this.view.resize(width, height);
1380
+ }
1381
+ async $(selector) {
1382
+ const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
1383
+ if (!exists)
1384
+ return null;
1385
+ return {
1386
+ textContent: async () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`)
1387
+ };
1388
+ }
1389
+ async $$(selector) {
1390
+ const count = await this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)}).length`);
1391
+ return Array.from({ length: count }, (_, i) => ({
1392
+ textContent: async () => this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)})[${i}]?.textContent ?? null`)
1393
+ }));
1394
+ }
1395
+ async inputValue(selector) {
1396
+ return this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.value ?? ''`);
1397
+ }
1398
+ async isChecked(selector) {
1399
+ return this.view.evaluate(`!!(document.querySelector(${JSON.stringify(selector)})?.checked)`);
1400
+ }
1401
+ async isVisible(selector) {
1402
+ return this.view.evaluate(`
1403
+ (() => {
1404
+ const el = document.querySelector(${JSON.stringify(selector)});
1405
+ if (!el) return false;
1406
+ const style = window.getComputedStyle(el);
1407
+ return style.display !== 'none' && style.visibility !== 'hidden' && el.offsetWidth > 0;
1408
+ })()
1409
+ `);
1410
+ }
1411
+ async isEnabled(selector) {
1412
+ return this.view.evaluate(`!(document.querySelector(${JSON.stringify(selector)})?.disabled)`);
1413
+ }
1414
+ async selectOption(selector, value) {
1415
+ await this.view.evaluate(`
1416
+ (() => {
1417
+ const el = document.querySelector(${JSON.stringify(selector)});
1418
+ if (el) {
1419
+ el.value = ${JSON.stringify(value)};
1420
+ el.dispatchEvent(new Event('change'));
1421
+ }
1422
+ })()
1423
+ `);
1424
+ return [value];
1425
+ }
1426
+ async check(selector) {
1427
+ await this.view.evaluate(`
1428
+ (() => {
1429
+ const el = document.querySelector(${JSON.stringify(selector)});
1430
+ if (el && !el.checked) { el.checked = true; el.dispatchEvent(new Event('change')); }
1431
+ })()
1432
+ `);
1433
+ }
1434
+ async uncheck(selector) {
1435
+ await this.view.evaluate(`
1436
+ (() => {
1437
+ const el = document.querySelector(${JSON.stringify(selector)});
1438
+ if (el && el.checked) { el.checked = false; el.dispatchEvent(new Event('change')); }
1439
+ })()
1440
+ `);
1441
+ }
1442
+ async setInputFiles(selector, files) {
1443
+ throw new Error("File upload not supported in Bun.WebView engine. Use engine: 'playwright' instead.");
1444
+ }
1445
+ getByRole(role, opts) {
1446
+ const name = opts?.name?.toString() ?? "";
1447
+ const selector = name ? `[role="${role}"][aria-label*="${name}"], ${role}[aria-label*="${name}"]` : `[role="${role}"], ${role}`;
1448
+ return {
1449
+ click: (clickOpts) => this.click(selector, clickOpts),
1450
+ fill: (value) => this.fill(selector, value),
1451
+ check: () => this.check(selector),
1452
+ uncheck: () => this.uncheck(selector),
1453
+ isVisible: () => this.isVisible(selector),
1454
+ textContent: () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`),
1455
+ inputValue: () => this.inputValue(selector),
1456
+ first: () => ({
1457
+ click: (clickOpts) => this.click(selector, clickOpts),
1458
+ fill: (value) => this.fill(selector, value),
1459
+ textContent: () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`),
1460
+ isVisible: () => this.isVisible(selector),
1461
+ hover: () => this.hover(selector),
1462
+ boundingBox: async () => null,
1463
+ scrollIntoViewIfNeeded: () => this.scrollIntoView(selector),
1464
+ evaluate: (fn) => this.view.evaluate(`(${fn.toString()})(document.querySelector(${JSON.stringify(selector)}))`),
1465
+ waitFor: (opts2) => {
1466
+ return new Promise((resolve, reject) => {
1467
+ const timeout = opts2?.timeout ?? 1e4;
1468
+ const start = Date.now();
1469
+ const check = async () => {
1470
+ const visible = await this.isVisible(selector);
1471
+ if (visible)
1472
+ return resolve();
1473
+ if (Date.now() - start > timeout)
1474
+ return reject(new Error(`Timeout waiting for ${selector}`));
1475
+ setTimeout(check, 100);
1476
+ };
1477
+ check();
1478
+ });
1479
+ }
1480
+ }),
1481
+ count: async () => {
1482
+ const count = await this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)}).length`);
1483
+ return count;
1484
+ },
1485
+ nth: (n) => ({
1486
+ click: (clickOpts) => this.click(selector, clickOpts),
1487
+ textContent: () => this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)})[${n}]?.textContent ?? null`),
1488
+ isVisible: () => this.isVisible(selector)
1489
+ })
1490
+ };
1491
+ }
1492
+ getByText(text, opts) {
1493
+ const selector = opts?.exact ? `*:is(button, a, span, div, p, h1, h2, h3, h4, label)` : "*";
1494
+ return {
1495
+ first: () => ({
1496
+ click: async (clickOpts) => {
1497
+ await this.view.evaluate(`
1498
+ (() => {
1499
+ const text = ${JSON.stringify(text)};
1500
+ const all = document.querySelectorAll('*');
1501
+ for (const el of all) {
1502
+ if (el.children.length === 0 && el.textContent?.trim() === text) {
1503
+ el.click(); return;
1504
+ }
1505
+ }
1506
+ for (const el of all) {
1507
+ if (el.textContent?.includes(text)) { el.click(); return; }
1508
+ }
1509
+ })()
1510
+ `);
1511
+ },
1512
+ waitFor: (waitOpts) => {
1513
+ const timeout = waitOpts?.timeout ?? 1e4;
1514
+ return new Promise((resolve, reject) => {
1515
+ const start = Date.now();
1516
+ const check = async () => {
1517
+ const found = await this.view.evaluate(`document.body?.textContent?.includes(${JSON.stringify(text)})`);
1518
+ if (found)
1519
+ return resolve();
1520
+ if (Date.now() - start > timeout)
1521
+ return reject(new Error(`Timeout: text "${text}" not found`));
1522
+ setTimeout(check, 100);
1523
+ };
1524
+ check();
1525
+ });
1526
+ }
1527
+ })
1528
+ };
1529
+ }
1530
+ locator(selector) {
1531
+ return {
1532
+ click: (opts) => this.click(selector, opts),
1533
+ fill: (value) => this.fill(selector, value),
1534
+ scrollIntoViewIfNeeded: () => this.scrollIntoView(selector),
1535
+ first: () => this.getByRole("*").first(),
1536
+ evaluate: (fn) => this.view.evaluate(`(${fn.toString()})(document.querySelector(${JSON.stringify(selector)}))`),
1537
+ waitFor: (opts) => {
1538
+ const timeout = opts?.timeout ?? 1e4;
1539
+ return new Promise((resolve, reject) => {
1540
+ const start = Date.now();
1541
+ const check = async () => {
1542
+ const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
1543
+ if (exists)
1544
+ return resolve();
1545
+ if (Date.now() - start > timeout)
1546
+ return reject(new Error(`Timeout: ${selector}`));
1547
+ setTimeout(check, 100);
1548
+ };
1549
+ check();
1550
+ });
1551
+ }
1552
+ };
1553
+ }
1554
+ url() {
1555
+ return this.view.url;
1556
+ }
1557
+ async title() {
1558
+ return this.view.title || await this.evaluate("document.title");
1559
+ }
1560
+ viewportSize() {
1561
+ return { width: 1280, height: 720 };
1562
+ }
1563
+ async waitForLoadState(state, opts) {
1564
+ await new Promise((r) => setTimeout(r, 200));
1565
+ }
1566
+ async waitForURL(pattern, opts) {
1567
+ const timeout = opts?.timeout ?? 30000;
1568
+ const start = Date.now();
1569
+ while (Date.now() - start < timeout) {
1570
+ const url = this.view.url;
1571
+ const matches = pattern instanceof RegExp ? pattern.test(url) : url.includes(pattern);
1572
+ if (matches)
1573
+ return;
1574
+ await new Promise((r) => setTimeout(r, 100));
1575
+ }
1576
+ throw new Error(`Timeout waiting for URL to match ${pattern}`);
1577
+ }
1578
+ async waitForSelector(selector, opts) {
1579
+ const timeout = opts?.timeout ?? 1e4;
1580
+ const start = Date.now();
1581
+ while (Date.now() - start < timeout) {
1582
+ const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
1583
+ if (exists)
1584
+ return;
1585
+ await new Promise((r) => setTimeout(r, 100));
1586
+ }
1587
+ throw new Error(`Timeout waiting for ${selector}`);
1588
+ }
1589
+ async setContent(html) {
1590
+ await this.view.navigate(`data:text/html,${encodeURIComponent(html)}`);
1591
+ await new Promise((r) => setTimeout(r, 100));
1592
+ }
1593
+ async content() {
1594
+ return this.view.evaluate("document.documentElement.outerHTML");
1595
+ }
1596
+ async addInitScript(script) {
1597
+ const expr = typeof script === "function" ? `(${script.toString()})()` : script;
1598
+ await this.view.evaluate(expr);
1599
+ }
1600
+ keyboard = {
1601
+ press: (key) => this.view.press(key)
1602
+ };
1603
+ context() {
1604
+ return {
1605
+ close: async () => {
1606
+ await this.close();
1607
+ },
1608
+ newPage: async () => {
1609
+ throw new Error("Multi-tab not supported in Bun.WebView. Use engine: 'playwright'");
1610
+ },
1611
+ cookies: async () => [],
1612
+ addCookies: async (_) => {},
1613
+ clearCookies: async () => {},
1614
+ newCDPSession: async () => {
1615
+ throw new Error("CDP session via context not available in Bun.WebView. Use view.cdp() when shipped.");
1616
+ },
1617
+ route: async (_pattern, _handler) => {
1618
+ throw new Error("Network interception not supported in Bun.WebView. Use engine: 'cdp' or 'playwright'.");
1619
+ },
1620
+ unrouteAll: async () => {},
1621
+ pages: () => [],
1622
+ addInitScript: async (script) => {
1623
+ await this.addInitScript(script);
1624
+ }
1625
+ };
1626
+ }
1627
+ on(event, handler) {
1628
+ if (!this._eventListeners.has(event))
1629
+ this._eventListeners.set(event, []);
1630
+ this._eventListeners.get(event).push(handler);
1631
+ return this;
1632
+ }
1633
+ off(event, handler) {
1634
+ const listeners = this._eventListeners.get(event) ?? [];
1635
+ this._eventListeners.set(event, listeners.filter((l) => l !== handler));
1636
+ return this;
1637
+ }
1638
+ _emit(event, ...args) {
1639
+ for (const handler of this._eventListeners.get(event) ?? []) {
1640
+ try {
1641
+ handler(...args);
1642
+ } catch {}
1643
+ }
1644
+ }
1645
+ async pdf(_opts) {
1646
+ throw new Error("PDF generation not supported in Bun.WebView. Use engine: 'playwright'.");
1647
+ }
1648
+ coverage = {
1649
+ startJSCoverage: async () => {},
1650
+ stopJSCoverage: async () => [],
1651
+ startCSSCoverage: async () => {},
1652
+ stopCSSCoverage: async () => []
1653
+ };
1654
+ setSessionId(id) {
1655
+ this._sessionId = id;
1656
+ }
1657
+ getSessionId() {
1658
+ return this._sessionId;
1659
+ }
1660
+ getNativeView() {
1661
+ return this.view;
1662
+ }
1663
+ async close() {
1664
+ try {
1665
+ await this.view.close();
1666
+ } catch {}
1667
+ }
1668
+ [Symbol.asyncDispose]() {
1669
+ return this.close();
1670
+ }
1671
+ };
1672
+ });
1673
+
1220
1674
  // src/lib/browser.ts
1221
1675
  var exports_browser = {};
1222
1676
  __export(exports_browser, {
@@ -1238,6 +1692,22 @@ async function launchBrowser(options) {
1238
1692
  }
1239
1693
  return launchLightpanda2({ viewport: options?.viewport });
1240
1694
  }
1695
+ if (engine === "bun") {
1696
+ const { isBunWebViewAvailable: isBunWebViewAvailable2, BunWebViewSession: BunWebViewSession2 } = await Promise.resolve().then(() => (init_browser_bun(), exports_browser_bun));
1697
+ if (!isBunWebViewAvailable2()) {
1698
+ throw new BrowserError("Bun.WebView not available. Upgrade to Bun canary: bun upgrade --canary");
1699
+ }
1700
+ const session = new BunWebViewSession2({
1701
+ width: options?.viewport?.width ?? 1280,
1702
+ height: options?.viewport?.height ?? 720
1703
+ });
1704
+ return {
1705
+ newContext: async () => ({ newPage: async () => session, close: async () => {} }),
1706
+ close: async () => session.close(),
1707
+ contexts: () => [],
1708
+ _bunSession: session
1709
+ };
1710
+ }
1241
1711
  const headless = options?.headless ?? true;
1242
1712
  const viewport = options?.viewport ?? DEFAULT_VIEWPORT;
1243
1713
  try {
@@ -1259,6 +1729,12 @@ async function getPage(browser, options) {
1259
1729
  const { getLightpandaPage: getLightpandaPage2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda));
1260
1730
  return getLightpandaPage2(browser, options);
1261
1731
  }
1732
+ if (engine === "bun") {
1733
+ const bunSession = browser._bunSession;
1734
+ if (bunSession)
1735
+ return bunSession;
1736
+ throw new BrowserError("Bun.WebView session not found on browser instance");
1737
+ }
1262
1738
  const viewport = options?.viewport ?? DEFAULT_VIEWPORT;
1263
1739
  try {
1264
1740
  const context = await browser.newContext({
@@ -1278,6 +1754,12 @@ async function closeBrowser(browser, engine) {
1278
1754
  const { closeLightpanda: closeLightpanda2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda));
1279
1755
  return closeLightpanda2(browser);
1280
1756
  }
1757
+ if (engine === "bun") {
1758
+ const bunSession = browser._bunSession;
1759
+ if (bunSession)
1760
+ await bunSession.close();
1761
+ return;
1762
+ }
1281
1763
  try {
1282
1764
  await browser.close();
1283
1765
  } catch (error) {
@@ -1347,6 +1829,9 @@ async function launchBrowserEngine(engine, config) {
1347
1829
  }
1348
1830
  return launchLightpanda2({ viewport: config.viewport });
1349
1831
  }
1832
+ if (engine === "bun") {
1833
+ return launchBrowser({ headless: config.headless, viewport: config.viewport, engine: "bun" });
1834
+ }
1350
1835
  return chromium2.launch({
1351
1836
  headless: config.headless,
1352
1837
  args: ["--no-sandbox", "--disable-setuid-sandbox"]
@@ -1908,6 +2393,8 @@ async function runAgentLoop(options) {
1908
2393
  Instructions: ${persona.instructions}` : "",
1909
2394
  persona.traits.length > 0 ? `Traits: ${persona.traits.join(", ")}` : "",
1910
2395
  persona.goals.length > 0 ? `Goals: ${persona.goals.join("; ")}` : "",
2396
+ persona.behaviors && persona.behaviors.length > 0 ? `Behaviors: ${persona.behaviors.join("; ")}` : "",
2397
+ persona.painPoints && persona.painPoints.length > 0 ? `Pain points: ${persona.painPoints.join("; ")}` : "",
1911
2398
  "",
1912
2399
  "Stay in character throughout the test. Your observations, choices, and priorities should reflect this persona."
1913
2400
  ].filter(Boolean).join(`
@@ -1958,6 +2445,15 @@ Instructions: ${persona.instructions}` : "",
1958
2445
  const isOpenAICompat = "provider" in client;
1959
2446
  try {
1960
2447
  for (let turn = 0;turn < maxTurns; turn++) {
2448
+ if (persona && turn > 0 && turn % 5 === 0) {
2449
+ messages = [
2450
+ ...messages,
2451
+ {
2452
+ role: "user",
2453
+ content: `[Reminder: You are ${persona.name} \u2014 ${persona.role}. Traits: ${persona.traits.join(", ")}. Stay in character.]`
2454
+ }
2455
+ ];
2456
+ }
1961
2457
  const response = isOpenAICompat ? await callOpenAICompatible({
1962
2458
  baseUrl: client.baseUrl,
1963
2459
  apiKey: client.apiKey,
@@ -2052,6 +2548,8 @@ function detectProvider(model) {
2052
2548
  return "openai";
2053
2549
  if (model.startsWith("gemini-"))
2054
2550
  return "google";
2551
+ if (model.startsWith("llama-") || model.startsWith("qwen-") || model.includes("cerebras"))
2552
+ return "cerebras";
2055
2553
  return "anthropic";
2056
2554
  }
2057
2555
  function createClient(apiKey) {
@@ -2145,6 +2643,12 @@ function createClientForModel(model, apiKey) {
2145
2643
  throw new AIClientError("No Google API key. Set GOOGLE_API_KEY or pass it explicitly.");
2146
2644
  return { provider: "google", baseUrl: "https://generativelanguage.googleapis.com/v1beta/openai", apiKey: key };
2147
2645
  }
2646
+ if (provider === "cerebras") {
2647
+ const key = apiKey ?? process.env["CEREBRAS_API_KEY"];
2648
+ if (!key)
2649
+ throw new AIClientError("No Cerebras API key. Set CEREBRAS_API_KEY or pass it explicitly.");
2650
+ return { provider: "cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKey: key };
2651
+ }
2148
2652
  return createClient(apiKey);
2149
2653
  }
2150
2654
  var BROWSER_TOOLS;
@@ -2547,7 +3051,32 @@ function listScenarios(filter) {
2547
3051
  params.push(filter.offset);
2548
3052
  }
2549
3053
  const rows = db2.query(sql).all(...params);
2550
- return rows.map(scenarioFromRow);
3054
+ const scenarios = rows.map(scenarioFromRow);
3055
+ if (scenarios.length === 0)
3056
+ return scenarios;
3057
+ const scenarioIds = scenarios.map((s) => s.id);
3058
+ const placeholders = scenarioIds.map(() => "?").join(",");
3059
+ const statsRows = db2.query(`
3060
+ SELECT scenario_id,
3061
+ COUNT(*) as total,
3062
+ SUM(CASE WHEN status = 'passed' THEN 1 ELSE 0 END) as passed
3063
+ FROM (
3064
+ SELECT scenario_id, status
3065
+ FROM results
3066
+ WHERE scenario_id IN (${placeholders})
3067
+ ORDER BY created_at DESC
3068
+ )
3069
+ GROUP BY scenario_id
3070
+ `).all(...scenarioIds);
3071
+ const statsMap = new Map(statsRows.map((r) => [r.scenario_id, r]));
3072
+ return scenarios.map((s) => {
3073
+ const stats = statsMap.get(s.id);
3074
+ return {
3075
+ ...s,
3076
+ flakinessScore: stats ? stats.passed / stats.total : null,
3077
+ recentRunCount: stats?.total ?? 0
3078
+ };
3079
+ });
2551
3080
  }
2552
3081
  function updateScenario(id, input, version) {
2553
3082
  const db2 = getDatabase();
@@ -2623,6 +3152,10 @@ function updateScenario(id, input, version) {
2623
3152
  }
2624
3153
  return getScenario(existing.id);
2625
3154
  }
3155
+ function updateScenarioPassedCache(id, url) {
3156
+ const db2 = getDatabase();
3157
+ db2.query("UPDATE scenarios SET last_passed_at = ?, last_passed_url = ? WHERE id = ?").run(now(), url, id);
3158
+ }
2626
3159
  function deleteScenario(id) {
2627
3160
  const db2 = getDatabase();
2628
3161
  const scenario = getScenario(id);
@@ -2706,6 +3239,10 @@ function updateResult(id, updates) {
2706
3239
  sets.push("metadata = ?");
2707
3240
  params.push(JSON.stringify(updates.metadata));
2708
3241
  }
3242
+ if (updates.failureAnalysis !== undefined) {
3243
+ sets.push("failure_analysis = ?");
3244
+ params.push(updates.failureAnalysis !== null ? JSON.stringify(updates.failureAnalysis) : null);
3245
+ }
2709
3246
  if (sets.length === 0) {
2710
3247
  return existing;
2711
3248
  }
@@ -2949,9 +3486,9 @@ init_browser();
2949
3486
  init_browser_lightpanda();
2950
3487
 
2951
3488
  // src/lib/screenshotter.ts
2952
- import { mkdirSync as mkdirSync2, existsSync as existsSync3, writeFileSync } from "fs";
2953
- import { join as join3 } from "path";
2954
- import { homedir as homedir3 } from "os";
3489
+ import { mkdirSync as mkdirSync3, existsSync as existsSync3, writeFileSync } from "fs";
3490
+ import { join as join4 } from "path";
3491
+ import { homedir as homedir4 } from "os";
2955
3492
  function slugify(text) {
2956
3493
  return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
2957
3494
  }
@@ -2971,11 +3508,11 @@ function getScreenshotDir(baseDir, runId, scenarioSlug, projectName, timestamp)
2971
3508
  const project = projectName ?? "default";
2972
3509
  const dateDir = formatDate(now2);
2973
3510
  const timeDir = `${formatTime(now2)}_${runId.slice(0, 8)}`;
2974
- return join3(baseDir, project, dateDir, timeDir, scenarioSlug);
3511
+ return join4(baseDir, project, dateDir, timeDir, scenarioSlug);
2975
3512
  }
2976
3513
  function ensureDir(dirPath) {
2977
3514
  if (!existsSync3(dirPath)) {
2978
- mkdirSync2(dirPath, { recursive: true });
3515
+ mkdirSync3(dirPath, { recursive: true });
2979
3516
  }
2980
3517
  }
2981
3518
  function writeMetaSidecar(screenshotPath, meta) {
@@ -2987,21 +3524,21 @@ function writeMetaSidecar(screenshotPath, meta) {
2987
3524
  function writeRunMeta(dir, meta) {
2988
3525
  ensureDir(dir);
2989
3526
  try {
2990
- writeFileSync(join3(dir, "_run-meta.json"), JSON.stringify(meta, null, 2), "utf-8");
3527
+ writeFileSync(join4(dir, "_run-meta.json"), JSON.stringify(meta, null, 2), "utf-8");
2991
3528
  } catch {}
2992
3529
  }
2993
3530
  function writeScenarioMeta(dir, meta) {
2994
3531
  ensureDir(dir);
2995
3532
  try {
2996
- writeFileSync(join3(dir, "_scenario-meta.json"), JSON.stringify(meta, null, 2), "utf-8");
3533
+ writeFileSync(join4(dir, "_scenario-meta.json"), JSON.stringify(meta, null, 2), "utf-8");
2997
3534
  } catch {}
2998
3535
  }
2999
3536
  async function generateThumbnail(page, screenshotDir, filename) {
3000
3537
  try {
3001
- const thumbDir = join3(screenshotDir, "_thumbnail");
3538
+ const thumbDir = join4(screenshotDir, "_thumbnail");
3002
3539
  ensureDir(thumbDir);
3003
3540
  const thumbFilename = filename.replace(/\.(png|jpeg)$/, ".thumb.$1");
3004
- const thumbPath = join3(thumbDir, thumbFilename);
3541
+ const thumbPath = join4(thumbDir, thumbFilename);
3005
3542
  const viewport = page.viewportSize();
3006
3543
  if (viewport) {
3007
3544
  await page.screenshot({
@@ -3015,7 +3552,7 @@ async function generateThumbnail(page, screenshotDir, filename) {
3015
3552
  return null;
3016
3553
  }
3017
3554
  }
3018
- var DEFAULT_BASE_DIR = join3(homedir3(), ".testers", "screenshots");
3555
+ var DEFAULT_BASE_DIR = join4(homedir4(), ".testers", "screenshots");
3019
3556
 
3020
3557
  class Screenshotter {
3021
3558
  baseDir;
@@ -3036,14 +3573,16 @@ class Screenshotter {
3036
3573
  const action = options.description ?? options.action;
3037
3574
  const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
3038
3575
  const filename = generateFilename(options.stepNumber, action);
3039
- const filePath = join3(dir, filename);
3576
+ const filePath = join4(dir, filename);
3040
3577
  ensureDir(dir);
3041
- await page.screenshot({
3578
+ const screenshotOpts = {
3042
3579
  path: filePath,
3043
3580
  fullPage: this.fullPage,
3044
- type: this.format,
3045
- quality: this.format === "jpeg" ? this.quality : undefined
3046
- });
3581
+ type: this.format
3582
+ };
3583
+ if (this.format === "jpeg")
3584
+ screenshotOpts.quality = this.quality;
3585
+ await page.screenshot(screenshotOpts);
3047
3586
  const viewport = page.viewportSize() ?? { width: 0, height: 0 };
3048
3587
  const pageUrl = page.url();
3049
3588
  const timestamp = new Date().toISOString();
@@ -3071,14 +3610,16 @@ class Screenshotter {
3071
3610
  const action = options.description ?? options.action;
3072
3611
  const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
3073
3612
  const filename = generateFilename(options.stepNumber, action);
3074
- const filePath = join3(dir, filename);
3613
+ const filePath = join4(dir, filename);
3075
3614
  ensureDir(dir);
3076
- await page.screenshot({
3615
+ const ssOpts2 = {
3077
3616
  path: filePath,
3078
3617
  fullPage: true,
3079
- type: this.format,
3080
- quality: this.format === "jpeg" ? this.quality : undefined
3081
- });
3618
+ type: this.format
3619
+ };
3620
+ if (this.format === "jpeg")
3621
+ ssOpts2.quality = this.quality;
3622
+ await page.screenshot(ssOpts2);
3082
3623
  const viewport = page.viewportSize() ?? { width: 0, height: 0 };
3083
3624
  const pageUrl = page.url();
3084
3625
  const timestamp = new Date().toISOString();
@@ -3106,13 +3647,15 @@ class Screenshotter {
3106
3647
  const action = options.description ?? options.action;
3107
3648
  const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
3108
3649
  const filename = generateFilename(options.stepNumber, action);
3109
- const filePath = join3(dir, filename);
3650
+ const filePath = join4(dir, filename);
3110
3651
  ensureDir(dir);
3111
- await page.locator(selector).screenshot({
3652
+ const ssOpts3 = {
3112
3653
  path: filePath,
3113
- type: this.format,
3114
- quality: this.format === "jpeg" ? this.quality : undefined
3115
- });
3654
+ type: this.format
3655
+ };
3656
+ if (this.format === "jpeg")
3657
+ ssOpts3.quality = this.quality;
3658
+ await page.locator(selector).screenshot(ssOpts3);
3116
3659
  const viewport = page.viewportSize() ?? { width: 0, height: 0 };
3117
3660
  const pageUrl = page.url();
3118
3661
  const timestamp = new Date().toISOString();
@@ -3140,6 +3683,9 @@ class Screenshotter {
3140
3683
  // src/index.ts
3141
3684
  init_ai_client();
3142
3685
 
3686
+ // src/lib/runner.ts
3687
+ init_types();
3688
+
3143
3689
  // src/lib/judge.ts
3144
3690
  init_ai_client();
3145
3691
  init_types();
@@ -3200,11 +3746,13 @@ function resolveJudgeModel(config) {
3200
3746
  apiKey = process.env["OPENAI_API_KEY"];
3201
3747
  else if (provider === "google")
3202
3748
  apiKey = process.env["GOOGLE_API_KEY"];
3749
+ else if (provider === "cerebras")
3750
+ apiKey = process.env["CEREBRAS_API_KEY"];
3203
3751
  }
3204
3752
  if (!apiKey) {
3205
- apiKey = process.env["ANTHROPIC_API_KEY"] ?? process.env["OPENAI_API_KEY"] ?? process.env["GOOGLE_API_KEY"] ?? globalConfig.anthropicApiKey;
3753
+ apiKey = process.env["ANTHROPIC_API_KEY"] ?? process.env["CEREBRAS_API_KEY"] ?? process.env["OPENAI_API_KEY"] ?? process.env["GOOGLE_API_KEY"] ?? globalConfig.anthropicApiKey;
3206
3754
  if (!apiKey)
3207
- throw new AIClientError("No API key found for judge. Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or GOOGLE_API_KEY.");
3755
+ throw new AIClientError("No API key found for judge. Set ANTHROPIC_API_KEY, CEREBRAS_API_KEY, OPENAI_API_KEY, or GOOGLE_API_KEY.");
3208
3756
  }
3209
3757
  return { model, provider, apiKey };
3210
3758
  }
@@ -3219,8 +3767,8 @@ reason: 1-2 sentences max`;
3219
3767
  async function callJudge(prompt, config) {
3220
3768
  const { model, provider, apiKey } = resolveJudgeModel(config);
3221
3769
  const threshold = 0.7;
3222
- if (provider === "openai" || provider === "google") {
3223
- const baseUrl = provider === "openai" ? "https://api.openai.com/v1" : "https://generativelanguage.googleapis.com/v1beta/openai";
3770
+ if (provider === "openai" || provider === "google" || provider === "cerebras") {
3771
+ const baseUrl = provider === "openai" ? "https://api.openai.com/v1" : provider === "cerebras" ? "https://api.cerebras.ai/v1" : "https://generativelanguage.googleapis.com/v1beta/openai";
3224
3772
  const resp2 = await callOpenAICompatible({
3225
3773
  baseUrl,
3226
3774
  apiKey,
@@ -3625,1320 +4173,1723 @@ async function runPipelineScenario(scenario, options) {
3625
4173
  // src/lib/runner.ts
3626
4174
  init_runs();
3627
4175
 
3628
- // src/db/personas.ts
3629
- init_types();
3630
- init_database();
3631
- function getPersona(id) {
3632
- const db2 = getDatabase();
3633
- let row = db2.query("SELECT * FROM personas WHERE id = ?").get(id);
3634
- if (row)
3635
- return personaFromRow(row);
3636
- row = db2.query("SELECT * FROM personas WHERE short_id = ?").get(id);
3637
- if (row)
3638
- return personaFromRow(row);
3639
- return null;
3640
- }
3641
-
3642
- // src/lib/runner.ts
3643
- init_browser();
3644
- init_ai_client();
3645
- init_config();
3646
-
3647
- // src/lib/webhooks.ts
3648
- init_database();
3649
- function fromRow(row) {
3650
- return {
3651
- id: row.id,
3652
- url: row.url,
3653
- events: JSON.parse(row.events),
3654
- projectId: row.project_id,
3655
- secret: row.secret,
3656
- active: row.active === 1,
3657
- createdAt: row.created_at
3658
- };
3659
- }
3660
- function createWebhook(input) {
3661
- const db2 = getDatabase();
3662
- const id = uuid();
3663
- const events = input.events ?? ["failed"];
3664
- const secret = input.secret ?? crypto.randomUUID().replace(/-/g, "");
3665
- db2.query(`
3666
- INSERT INTO webhooks (id, url, events, project_id, secret, active, created_at)
3667
- VALUES (?, ?, ?, ?, ?, 1, ?)
3668
- `).run(id, input.url, JSON.stringify(events), input.projectId ?? null, secret, now());
3669
- return getWebhook(id);
3670
- }
3671
- function getWebhook(id) {
3672
- const db2 = getDatabase();
3673
- const row = db2.query("SELECT * FROM webhooks WHERE id = ?").get(id);
3674
- if (!row) {
3675
- const rows = db2.query("SELECT * FROM webhooks WHERE id LIKE ? || '%'").all(id);
3676
- if (rows.length === 1)
3677
- return fromRow(rows[0]);
4176
+ // src/lib/failure-analyzer.ts
4177
+ function analyzeFailure(error, reasoning) {
4178
+ const combinedText = [error, reasoning].filter(Boolean).join(" ");
4179
+ if (!combinedText.trim())
3678
4180
  return null;
4181
+ const errorText = error ?? "";
4182
+ const reasoningText = reasoning ?? "";
4183
+ if (/waiting for selector/i.test(errorText) || /not found/i.test(errorText) || /No element/i.test(errorText) || /waiting for selector/i.test(reasoningText) || /could not find element/i.test(reasoningText) || /element not found/i.test(reasoningText)) {
4184
+ const selectorMatch = errorText.match(/'([^']+)'/) ?? reasoningText.match(/'([^']+)'/);
4185
+ const affectedElement = selectorMatch ? selectorMatch[1] : undefined;
4186
+ const stepMatch = reasoningText.match(/step\s+(\d+)/i);
4187
+ const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
4188
+ return {
4189
+ type: "selector_not_found",
4190
+ affectedElement,
4191
+ stepNumber,
4192
+ confidence: affectedElement ? "high" : "medium"
4193
+ };
3679
4194
  }
3680
- return fromRow(row);
3681
- }
3682
- function listWebhooks(projectId) {
3683
- const db2 = getDatabase();
3684
- let query = "SELECT * FROM webhooks WHERE active = 1";
3685
- const params = [];
3686
- if (projectId) {
3687
- query += " AND (project_id = ? OR project_id IS NULL)";
3688
- params.push(projectId);
4195
+ if (/assert/i.test(errorText) || /expected/i.test(errorText) || /to equal/i.test(errorText) || /to be/i.test(errorText) || /\bgot\b/.test(errorText) || /assertion.*failed/i.test(reasoningText) || /expected.*but.*got/i.test(reasoningText)) {
4196
+ const expectedActualMatch = errorText.match(/expected[:\s]+(['"]?)([^'"]+)\1[,\s]+(?:got|received|actual)[:\s]+(['"]?)([^'"]+)\3/i);
4197
+ const toEqualMatch = errorText.match(/expected[:\s]+(['"]?)([^'"]+)\1\s+to\s+equal\s+(['"]?)([^'"]+)\3/i);
4198
+ let expected;
4199
+ let actual;
4200
+ if (expectedActualMatch) {
4201
+ expected = expectedActualMatch[2];
4202
+ actual = expectedActualMatch[4];
4203
+ } else if (toEqualMatch) {
4204
+ expected = toEqualMatch[4];
4205
+ actual = toEqualMatch[2];
4206
+ }
4207
+ const stepMatch = reasoningText.match(/step\s+(\d+)/i);
4208
+ const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
4209
+ return {
4210
+ type: "assertion_failed",
4211
+ expected,
4212
+ actual,
4213
+ stepNumber,
4214
+ confidence: expected && actual ? "high" : "medium"
4215
+ };
3689
4216
  }
3690
- query += " ORDER BY created_at DESC";
3691
- const rows = db2.query(query).all(...params);
3692
- return rows.map(fromRow);
3693
- }
3694
- function deleteWebhook(id) {
3695
- const db2 = getDatabase();
3696
- const webhook = getWebhook(id);
3697
- if (!webhook)
3698
- return false;
3699
- db2.query("DELETE FROM webhooks WHERE id = ?").run(webhook.id);
3700
- return true;
3701
- }
3702
- function signPayload(body, secret) {
3703
- const encoder = new TextEncoder;
3704
- const key = encoder.encode(secret);
3705
- const data = encoder.encode(body);
3706
- let hash = 0;
3707
- for (let i = 0;i < data.length; i++) {
3708
- hash = (hash << 5) - hash + data[i] + (key[i % key.length] ?? 0) | 0;
4217
+ if (/timeout/i.test(errorText) || /timed out/i.test(errorText) || /Timeout/i.test(reasoningText) || /timed out/i.test(reasoningText)) {
4218
+ const stepMatch = reasoningText.match(/step\s+(\d+)/i);
4219
+ const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
4220
+ return {
4221
+ type: "timeout",
4222
+ stepNumber,
4223
+ confidence: "high"
4224
+ };
4225
+ }
4226
+ if (/\b401\b/.test(errorText) || /\b403\b/.test(errorText) || /login/i.test(errorText) || /unauthorized/i.test(errorText) || /\bauth\b/i.test(errorText) || /\b401\b/.test(reasoningText) || /\b403\b/.test(reasoningText) || /unauthorized/i.test(reasoningText) || /authentication/i.test(reasoningText)) {
4227
+ return {
4228
+ type: "auth_error",
4229
+ confidence: "high"
4230
+ };
4231
+ }
4232
+ if (/ECONNREFUSED/i.test(errorText) || /ENOTFOUND/i.test(errorText) || /fetch failed/i.test(errorText) || /network/i.test(errorText) || /ECONNREFUSED/i.test(reasoningText) || /fetch failed/i.test(reasoningText) || /connection refused/i.test(reasoningText)) {
4233
+ return {
4234
+ type: "network_error",
4235
+ confidence: "high"
4236
+ };
4237
+ }
4238
+ if (/\beval\b/i.test(errorText) || /evaluate/i.test(errorText) || /\bscript\b/i.test(errorText) || /\beval\b/i.test(reasoningText) || /evaluate/i.test(reasoningText)) {
4239
+ return {
4240
+ type: "eval_failed",
4241
+ confidence: "medium"
4242
+ };
3709
4243
  }
3710
- return `sha256=${Math.abs(hash).toString(16).padStart(16, "0")}`;
3711
- }
3712
- function formatSlackPayload(payload) {
3713
- const status = payload.run.status === "passed" ? ":white_check_mark:" : ":x:";
3714
- const color = payload.run.status === "passed" ? "#22c55e" : "#ef4444";
3715
4244
  return {
3716
- attachments: [
3717
- {
3718
- color,
3719
- blocks: [
3720
- {
3721
- type: "section",
3722
- text: {
3723
- type: "mrkdwn",
3724
- text: `${status} *Test Run ${payload.run.status.toUpperCase()}*
3725
- ` + `URL: ${payload.run.url}
3726
- ` + `Results: ${payload.run.passed}/${payload.run.total} passed` + (payload.run.failed > 0 ? ` (${payload.run.failed} failed)` : "") + (payload.schedule ? `
3727
- Schedule: ${payload.schedule.name}` : "")
3728
- }
3729
- }
3730
- ]
3731
- }
3732
- ]
4245
+ type: "unknown",
4246
+ confidence: "low"
3733
4247
  };
3734
4248
  }
3735
- async function dispatchWebhooks(event, run, schedule) {
3736
- const webhooks = listWebhooks(run.projectId ?? undefined);
3737
- const payload = {
3738
- event,
3739
- run: {
3740
- id: run.id,
3741
- url: run.url,
3742
- status: run.status,
3743
- passed: run.passed,
3744
- failed: run.failed,
3745
- total: run.total
3746
- },
3747
- schedule,
3748
- timestamp: new Date().toISOString()
3749
- };
3750
- for (const webhook of webhooks) {
3751
- if (!webhook.events.includes(event) && !webhook.events.includes("*"))
3752
- continue;
3753
- const isSlack = webhook.url.includes("hooks.slack.com");
3754
- const body = isSlack ? JSON.stringify(formatSlackPayload(payload)) : JSON.stringify(payload);
3755
- const headers = {
3756
- "Content-Type": "application/json"
3757
- };
3758
- if (webhook.secret) {
3759
- headers["X-Testers-Signature"] = signPayload(body, webhook.secret);
3760
- }
3761
- try {
3762
- const response = await fetch(webhook.url, {
3763
- method: "POST",
3764
- headers,
3765
- body
3766
- });
3767
- if (!response.ok) {
3768
- await new Promise((r) => setTimeout(r, 5000));
3769
- await fetch(webhook.url, { method: "POST", headers, body });
3770
- }
3771
- } catch {}
4249
+
4250
+ // node_modules/chalk/source/vendor/ansi-styles/index.js
4251
+ var ANSI_BACKGROUND_OFFSET = 10;
4252
+ var wrapAnsi16 = (offset = 0) => (code) => `\x1B[${code + offset}m`;
4253
+ var wrapAnsi256 = (offset = 0) => (code) => `\x1B[${38 + offset};5;${code}m`;
4254
+ var wrapAnsi16m = (offset = 0) => (red, green, blue) => `\x1B[${38 + offset};2;${red};${green};${blue}m`;
4255
+ var styles = {
4256
+ modifier: {
4257
+ reset: [0, 0],
4258
+ bold: [1, 22],
4259
+ dim: [2, 22],
4260
+ italic: [3, 23],
4261
+ underline: [4, 24],
4262
+ overline: [53, 55],
4263
+ inverse: [7, 27],
4264
+ hidden: [8, 28],
4265
+ strikethrough: [9, 29]
4266
+ },
4267
+ color: {
4268
+ black: [30, 39],
4269
+ red: [31, 39],
4270
+ green: [32, 39],
4271
+ yellow: [33, 39],
4272
+ blue: [34, 39],
4273
+ magenta: [35, 39],
4274
+ cyan: [36, 39],
4275
+ white: [37, 39],
4276
+ blackBright: [90, 39],
4277
+ gray: [90, 39],
4278
+ grey: [90, 39],
4279
+ redBright: [91, 39],
4280
+ greenBright: [92, 39],
4281
+ yellowBright: [93, 39],
4282
+ blueBright: [94, 39],
4283
+ magentaBright: [95, 39],
4284
+ cyanBright: [96, 39],
4285
+ whiteBright: [97, 39]
4286
+ },
4287
+ bgColor: {
4288
+ bgBlack: [40, 49],
4289
+ bgRed: [41, 49],
4290
+ bgGreen: [42, 49],
4291
+ bgYellow: [43, 49],
4292
+ bgBlue: [44, 49],
4293
+ bgMagenta: [45, 49],
4294
+ bgCyan: [46, 49],
4295
+ bgWhite: [47, 49],
4296
+ bgBlackBright: [100, 49],
4297
+ bgGray: [100, 49],
4298
+ bgGrey: [100, 49],
4299
+ bgRedBright: [101, 49],
4300
+ bgGreenBright: [102, 49],
4301
+ bgYellowBright: [103, 49],
4302
+ bgBlueBright: [104, 49],
4303
+ bgMagentaBright: [105, 49],
4304
+ bgCyanBright: [106, 49],
4305
+ bgWhiteBright: [107, 49]
3772
4306
  }
3773
- }
3774
- async function testWebhook(id) {
3775
- const webhook = getWebhook(id);
3776
- if (!webhook)
3777
- return false;
3778
- const testPayload = {
3779
- event: "test",
3780
- run: { id: "test-run", url: "http://localhost:3000", status: "passed", passed: 3, failed: 0, total: 3 },
3781
- timestamp: new Date().toISOString()
3782
- };
3783
- try {
3784
- const body = JSON.stringify(testPayload);
3785
- const response = await fetch(webhook.url, {
3786
- method: "POST",
3787
- headers: {
3788
- "Content-Type": "application/json",
3789
- ...webhook.secret ? { "X-Testers-Signature": signPayload(body, webhook.secret) } : {}
3790
- },
3791
- body
4307
+ };
4308
+ var modifierNames = Object.keys(styles.modifier);
4309
+ var foregroundColorNames = Object.keys(styles.color);
4310
+ var backgroundColorNames = Object.keys(styles.bgColor);
4311
+ var colorNames = [...foregroundColorNames, ...backgroundColorNames];
4312
+ function assembleStyles() {
4313
+ const codes = new Map;
4314
+ for (const [groupName, group] of Object.entries(styles)) {
4315
+ for (const [styleName, style] of Object.entries(group)) {
4316
+ styles[styleName] = {
4317
+ open: `\x1B[${style[0]}m`,
4318
+ close: `\x1B[${style[1]}m`
4319
+ };
4320
+ group[styleName] = styles[styleName];
4321
+ codes.set(style[0], style[1]);
4322
+ }
4323
+ Object.defineProperty(styles, groupName, {
4324
+ value: group,
4325
+ enumerable: false
3792
4326
  });
3793
- return response.ok;
3794
- } catch {
3795
- return false;
3796
4327
  }
4328
+ Object.defineProperty(styles, "codes", {
4329
+ value: codes,
4330
+ enumerable: false
4331
+ });
4332
+ styles.color.close = "\x1B[39m";
4333
+ styles.bgColor.close = "\x1B[49m";
4334
+ styles.color.ansi = wrapAnsi16();
4335
+ styles.color.ansi256 = wrapAnsi256();
4336
+ styles.color.ansi16m = wrapAnsi16m();
4337
+ styles.bgColor.ansi = wrapAnsi16(ANSI_BACKGROUND_OFFSET);
4338
+ styles.bgColor.ansi256 = wrapAnsi256(ANSI_BACKGROUND_OFFSET);
4339
+ styles.bgColor.ansi16m = wrapAnsi16m(ANSI_BACKGROUND_OFFSET);
4340
+ Object.defineProperties(styles, {
4341
+ rgbToAnsi256: {
4342
+ value(red, green, blue) {
4343
+ if (red === green && green === blue) {
4344
+ if (red < 8) {
4345
+ return 16;
4346
+ }
4347
+ if (red > 248) {
4348
+ return 231;
4349
+ }
4350
+ return Math.round((red - 8) / 247 * 24) + 232;
4351
+ }
4352
+ return 16 + 36 * Math.round(red / 255 * 5) + 6 * Math.round(green / 255 * 5) + Math.round(blue / 255 * 5);
4353
+ },
4354
+ enumerable: false
4355
+ },
4356
+ hexToRgb: {
4357
+ value(hex) {
4358
+ const matches = /[a-f\d]{6}|[a-f\d]{3}/i.exec(hex.toString(16));
4359
+ if (!matches) {
4360
+ return [0, 0, 0];
4361
+ }
4362
+ let [colorString] = matches;
4363
+ if (colorString.length === 3) {
4364
+ colorString = [...colorString].map((character) => character + character).join("");
4365
+ }
4366
+ const integer = Number.parseInt(colorString, 16);
4367
+ return [
4368
+ integer >> 16 & 255,
4369
+ integer >> 8 & 255,
4370
+ integer & 255
4371
+ ];
4372
+ },
4373
+ enumerable: false
4374
+ },
4375
+ hexToAnsi256: {
4376
+ value: (hex) => styles.rgbToAnsi256(...styles.hexToRgb(hex)),
4377
+ enumerable: false
4378
+ },
4379
+ ansi256ToAnsi: {
4380
+ value(code) {
4381
+ if (code < 8) {
4382
+ return 30 + code;
4383
+ }
4384
+ if (code < 16) {
4385
+ return 90 + (code - 8);
4386
+ }
4387
+ let red;
4388
+ let green;
4389
+ let blue;
4390
+ if (code >= 232) {
4391
+ red = ((code - 232) * 10 + 8) / 255;
4392
+ green = red;
4393
+ blue = red;
4394
+ } else {
4395
+ code -= 16;
4396
+ const remainder = code % 36;
4397
+ red = Math.floor(code / 36) / 5;
4398
+ green = Math.floor(remainder / 6) / 5;
4399
+ blue = remainder % 6 / 5;
4400
+ }
4401
+ const value = Math.max(red, green, blue) * 2;
4402
+ if (value === 0) {
4403
+ return 30;
4404
+ }
4405
+ let result = 30 + (Math.round(blue) << 2 | Math.round(green) << 1 | Math.round(red));
4406
+ if (value === 2) {
4407
+ result += 60;
4408
+ }
4409
+ return result;
4410
+ },
4411
+ enumerable: false
4412
+ },
4413
+ rgbToAnsi: {
4414
+ value: (red, green, blue) => styles.ansi256ToAnsi(styles.rgbToAnsi256(red, green, blue)),
4415
+ enumerable: false
4416
+ },
4417
+ hexToAnsi: {
4418
+ value: (hex) => styles.ansi256ToAnsi(styles.hexToAnsi256(hex)),
4419
+ enumerable: false
4420
+ }
4421
+ });
4422
+ return styles;
3797
4423
  }
4424
+ var ansiStyles = assembleStyles();
4425
+ var ansi_styles_default = ansiStyles;
3798
4426
 
3799
- // src/lib/logs-integration.ts
3800
- async function pushFailedRunToLogs(run, failedResults, scenarios) {
3801
- const logsUrl = process.env.LOGS_URL;
3802
- if (!logsUrl)
3803
- return;
3804
- const scenarioMap = new Map(scenarios.map((s) => [s.id, s]));
3805
- const entries = failedResults.map((result) => {
3806
- const scenario = scenarioMap.get(result.scenarioId);
3807
- return {
3808
- level: "error",
3809
- source: "sdk",
3810
- service: "testers",
3811
- message: `[testers] Scenario failed: ${scenario?.name ?? result.scenarioId}${result.error ? ` \u2014 ${result.error}` : ""}`,
3812
- metadata: {
3813
- run_id: run.id,
3814
- scenario_id: result.scenarioId,
3815
- scenario_name: scenario?.name,
3816
- url: run.url,
3817
- status: result.status,
3818
- duration_ms: result.durationMs
3819
- }
3820
- };
3821
- });
3822
- try {
3823
- await fetch(`${logsUrl.replace(/\/$/, "")}/api/logs`, {
3824
- method: "POST",
3825
- headers: { "Content-Type": "application/json" },
3826
- body: JSON.stringify(entries)
3827
- });
3828
- } catch {}
4427
+ // node_modules/chalk/source/vendor/supports-color/index.js
4428
+ import process2 from "process";
4429
+ import os from "os";
4430
+ import tty from "tty";
4431
+ function hasFlag(flag, argv = globalThis.Deno ? globalThis.Deno.args : process2.argv) {
4432
+ const prefix = flag.startsWith("-") ? "" : flag.length === 1 ? "-" : "--";
4433
+ const position = argv.indexOf(prefix + flag);
4434
+ const terminatorPosition = argv.indexOf("--");
4435
+ return position !== -1 && (terminatorPosition === -1 || position < terminatorPosition);
3829
4436
  }
3830
-
3831
- // src/lib/todos-connector.ts
3832
- import { Database as Database2 } from "bun:sqlite";
3833
- import { existsSync as existsSync4 } from "fs";
3834
- import { join as join4 } from "path";
3835
- import { homedir as homedir4 } from "os";
3836
- init_types();
3837
- function resolveTodosDbPath() {
3838
- const envPath = process.env["TODOS_DB_PATH"];
3839
- if (envPath)
3840
- return envPath;
3841
- return join4(homedir4(), ".todos", "todos.db");
4437
+ var { env } = process2;
4438
+ var flagForceColor;
4439
+ if (hasFlag("no-color") || hasFlag("no-colors") || hasFlag("color=false") || hasFlag("color=never")) {
4440
+ flagForceColor = 0;
4441
+ } else if (hasFlag("color") || hasFlag("colors") || hasFlag("color=true") || hasFlag("color=always")) {
4442
+ flagForceColor = 1;
3842
4443
  }
3843
- function connectToTodos() {
3844
- const dbPath = resolveTodosDbPath();
3845
- if (!existsSync4(dbPath)) {
3846
- throw new TodosConnectionError(`Todos database not found at ${dbPath}. Install @hasna/todos or set TODOS_DB_PATH.`);
4444
+ function envForceColor() {
4445
+ if ("FORCE_COLOR" in env) {
4446
+ if (env.FORCE_COLOR === "true") {
4447
+ return 1;
4448
+ }
4449
+ if (env.FORCE_COLOR === "false") {
4450
+ return 0;
4451
+ }
4452
+ return env.FORCE_COLOR.length === 0 ? 1 : Math.min(Number.parseInt(env.FORCE_COLOR, 10), 3);
3847
4453
  }
3848
- const db2 = new Database2(dbPath, { readonly: true });
3849
- db2.exec("PRAGMA foreign_keys = ON");
3850
- return db2;
3851
4454
  }
3852
- function pullTasks(options = {}) {
3853
- const db2 = connectToTodos();
3854
- try {
3855
- let query = "SELECT id, short_id, title, description, status, priority, tags, project_id FROM tasks WHERE 1=1";
3856
- const params = [];
3857
- if (options.status) {
3858
- query += " AND status = ?";
3859
- params.push(options.status);
3860
- } else {
3861
- query += " AND status IN ('pending', 'in_progress')";
4455
+ function translateLevel(level) {
4456
+ if (level === 0) {
4457
+ return false;
4458
+ }
4459
+ return {
4460
+ level,
4461
+ hasBasic: true,
4462
+ has256: level >= 2,
4463
+ has16m: level >= 3
4464
+ };
4465
+ }
4466
+ function _supportsColor(haveStream, { streamIsTTY, sniffFlags = true } = {}) {
4467
+ const noFlagForceColor = envForceColor();
4468
+ if (noFlagForceColor !== undefined) {
4469
+ flagForceColor = noFlagForceColor;
4470
+ }
4471
+ const forceColor = sniffFlags ? flagForceColor : noFlagForceColor;
4472
+ if (forceColor === 0) {
4473
+ return 0;
4474
+ }
4475
+ if (sniffFlags) {
4476
+ if (hasFlag("color=16m") || hasFlag("color=full") || hasFlag("color=truecolor")) {
4477
+ return 3;
3862
4478
  }
3863
- if (options.priority) {
3864
- query += " AND priority = ?";
3865
- params.push(options.priority);
4479
+ if (hasFlag("color=256")) {
4480
+ return 2;
3866
4481
  }
3867
- if (options.projectName) {
3868
- const project = db2.query("SELECT id FROM projects WHERE name = ?").get(options.projectName);
3869
- if (project) {
3870
- query += " AND project_id = ?";
3871
- params.push(project.id);
3872
- }
4482
+ }
4483
+ if ("TF_BUILD" in env && "AGENT_NAME" in env) {
4484
+ return 1;
4485
+ }
4486
+ if (haveStream && !streamIsTTY && forceColor === undefined) {
4487
+ return 0;
4488
+ }
4489
+ const min = forceColor || 0;
4490
+ if (env.TERM === "dumb") {
4491
+ return min;
4492
+ }
4493
+ if (process2.platform === "win32") {
4494
+ const osRelease = os.release().split(".");
4495
+ if (Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
4496
+ return Number(osRelease[2]) >= 14931 ? 3 : 2;
3873
4497
  }
3874
- query += " ORDER BY CASE priority WHEN 'critical' THEN 0 WHEN 'high' THEN 1 WHEN 'medium' THEN 2 WHEN 'low' THEN 3 END";
3875
- const tasks = db2.query(query).all(...params);
3876
- if (options.tags && options.tags.length > 0) {
3877
- return tasks.filter((task) => {
3878
- const taskTags = JSON.parse(task.tags || "[]");
3879
- return options.tags.some((tag) => taskTags.includes(tag));
3880
- });
4498
+ return 1;
4499
+ }
4500
+ if ("CI" in env) {
4501
+ if (["GITHUB_ACTIONS", "GITEA_ACTIONS", "CIRCLECI"].some((key) => (key in env))) {
4502
+ return 3;
3881
4503
  }
3882
- return tasks;
3883
- } finally {
3884
- db2.close();
4504
+ if (["TRAVIS", "APPVEYOR", "GITLAB_CI", "BUILDKITE", "DRONE"].some((sign) => (sign in env)) || env.CI_NAME === "codeship") {
4505
+ return 1;
4506
+ }
4507
+ return min;
3885
4508
  }
3886
- }
3887
- function taskToScenarioInput(task, projectId) {
3888
- const tags = JSON.parse(task.tags || "[]");
3889
- const priority = ["low", "medium", "high", "critical"].includes(task.priority) ? task.priority : "medium";
3890
- const steps = [];
3891
- if (task.description) {
3892
- const lines = task.description.split(`
3893
- `);
3894
- for (const line of lines) {
3895
- const match = line.match(/^\s*\d+[\.\)]\s*(.+)/);
3896
- if (match?.[1]) {
3897
- steps.push(match[1].trim());
4509
+ if ("TEAMCITY_VERSION" in env) {
4510
+ return /^(9\.(0*[1-9]\d*)\.|\d{2,}\.)/.test(env.TEAMCITY_VERSION) ? 1 : 0;
4511
+ }
4512
+ if (env.COLORTERM === "truecolor") {
4513
+ return 3;
4514
+ }
4515
+ if (env.TERM === "xterm-kitty") {
4516
+ return 3;
4517
+ }
4518
+ if (env.TERM === "xterm-ghostty") {
4519
+ return 3;
4520
+ }
4521
+ if (env.TERM === "wezterm") {
4522
+ return 3;
4523
+ }
4524
+ if ("TERM_PROGRAM" in env) {
4525
+ const version = Number.parseInt((env.TERM_PROGRAM_VERSION || "").split(".")[0], 10);
4526
+ switch (env.TERM_PROGRAM) {
4527
+ case "iTerm.app": {
4528
+ return version >= 3 ? 3 : 2;
4529
+ }
4530
+ case "Apple_Terminal": {
4531
+ return 2;
3898
4532
  }
3899
4533
  }
3900
4534
  }
3901
- return {
3902
- name: task.title.replace(/^(OPE\d+-\d+|[A-Z]+-\d+):\s*/, ""),
3903
- description: task.description || task.title,
3904
- steps,
3905
- tags,
3906
- priority,
3907
- projectId,
3908
- metadata: { todosTaskId: task.id, todosShortId: task.short_id }
3909
- };
3910
- }
3911
- function importFromTodos(options = {}) {
3912
- const tasks = pullTasks({
3913
- projectName: options.projectName,
3914
- tags: options.tags ?? ["qa", "test", "testing"],
3915
- priority: options.priority
3916
- });
3917
- const existing = listScenarios({ projectId: options.projectId });
3918
- const existingTodoIds = new Set(existing.filter((s) => s.metadata?.todosTaskId).map((s) => s.metadata.todosTaskId));
3919
- let imported = 0;
3920
- let skipped = 0;
3921
- for (const task of tasks) {
3922
- if (existingTodoIds.has(task.id)) {
3923
- skipped++;
3924
- continue;
3925
- }
3926
- const input = taskToScenarioInput(task, options.projectId);
3927
- createScenario(input);
3928
- imported++;
4535
+ if (/-256(color)?$/i.test(env.TERM)) {
4536
+ return 2;
3929
4537
  }
3930
- return { imported, skipped };
3931
- }
3932
- function markTodoDone(taskId) {
3933
- const dbPath = resolveTodosDbPath();
3934
- if (!existsSync4(dbPath))
3935
- return false;
3936
- const db2 = new Database2(dbPath);
3937
- try {
3938
- const task = db2.query("SELECT id, version FROM tasks WHERE id LIKE ? || '%'").get(taskId);
3939
- if (!task)
3940
- return false;
3941
- db2.query("UPDATE tasks SET status = 'completed', completed_at = datetime('now'), version = version + 1, updated_at = datetime('now') WHERE id = ? AND version = ?").run(task.id, task.version);
3942
- return true;
3943
- } finally {
3944
- db2.close();
4538
+ if (/^screen|^xterm|^vt100|^vt220|^rxvt|color|ansi|cygwin|linux/i.test(env.TERM)) {
4539
+ return 1;
3945
4540
  }
4541
+ if ("COLORTERM" in env) {
4542
+ return 1;
4543
+ }
4544
+ return min;
3946
4545
  }
4546
+ function createSupportsColor(stream, options = {}) {
4547
+ const level = _supportsColor(stream, {
4548
+ streamIsTTY: stream && stream.isTTY,
4549
+ ...options
4550
+ });
4551
+ return translateLevel(level);
4552
+ }
4553
+ var supportsColor = {
4554
+ stdout: createSupportsColor({ isTTY: tty.isatty(1) }),
4555
+ stderr: createSupportsColor({ isTTY: tty.isatty(2) })
4556
+ };
4557
+ var supports_color_default = supportsColor;
3947
4558
 
3948
- // src/lib/failure-pipeline.ts
3949
- async function createFailureTasks(run, failedResults, scenarios) {
3950
- if (failedResults.length === 0)
3951
- return { created: 0, skipped: 0 };
3952
- const projectId = process.env["TESTERS_TODOS_PROJECT_ID"];
3953
- if (!projectId)
3954
- return { created: 0, skipped: 0 };
3955
- let db2 = null;
3956
- try {
3957
- db2 = connectToTodos();
3958
- } catch {
3959
- return { created: 0, skipped: 0 };
4559
+ // node_modules/chalk/source/utilities.js
4560
+ function stringReplaceAll(string, substring, replacer) {
4561
+ let index = string.indexOf(substring);
4562
+ if (index === -1) {
4563
+ return string;
3960
4564
  }
3961
- const scenarioMap = new Map(scenarios.map((s) => [s.id, s]));
3962
- let created = 0;
3963
- let skipped = 0;
3964
- try {
3965
- for (const result of failedResults) {
3966
- const scenario = scenarioMap.get(result.scenarioId);
3967
- const title = `BUG: [testers] ${scenario?.name ?? result.scenarioId} failed`;
3968
- const existing = db2.query("SELECT id FROM tasks WHERE title = ? AND status NOT IN ('completed', 'cancelled') LIMIT 1").get(title);
3969
- if (existing) {
3970
- skipped++;
3971
- continue;
3972
- }
3973
- const id = crypto.randomUUID();
3974
- const now2 = new Date().toISOString();
3975
- const description = [
3976
- `Test failure detected by open-testers.`,
3977
- ``,
3978
- `**Run:** ${run.id}`,
3979
- `**URL:** ${run.url}`,
3980
- `**Scenario:** ${scenario?.name ?? result.scenarioId}`,
3981
- `**Status:** ${result.status}`,
3982
- result.error ? `**Error:** ${result.error}` : null,
3983
- result.reasoning ? `**Reasoning:** ${result.reasoning.slice(0, 500)}` : null,
3984
- `**Duration:** ${result.durationMs ? `${(result.durationMs / 1000).toFixed(1)}s` : "N/A"}`,
3985
- `**Tokens:** ${result.tokensUsed ?? 0}`
3986
- ].filter(Boolean).join(`
3987
- `);
3988
- try {
3989
- db2.query(`
3990
- INSERT INTO tasks (id, short_id, title, description, status, priority, tags, project_id, version, created_at, updated_at)
3991
- VALUES (?, ?, ?, ?, 'pending', 'high', ?, ?, 1, ?, ?)
3992
- `).run(id, `BUG-${id.slice(0, 6)}`, title, description, JSON.stringify(["bug", "testers", "auto-created"]), projectId, now2, now2);
3993
- created++;
3994
- } catch {
3995
- skipped++;
3996
- }
3997
- }
3998
- } finally {
3999
- db2.close();
4000
- }
4001
- return { created, skipped };
4565
+ const substringLength = substring.length;
4566
+ let endIndex = 0;
4567
+ let returnValue = "";
4568
+ do {
4569
+ returnValue += string.slice(endIndex, index) + substring + replacer;
4570
+ endIndex = index + substringLength;
4571
+ index = string.indexOf(substring, endIndex);
4572
+ } while (index !== -1);
4573
+ returnValue += string.slice(endIndex);
4574
+ return returnValue;
4002
4575
  }
4003
- async function notifyFailureToConversations(run, failedResults, scenarios) {
4004
- const baseUrl = process.env["TESTERS_CONVERSATIONS_URL"];
4005
- const space = process.env["TESTERS_CONVERSATIONS_SPACE"];
4006
- if (!baseUrl || !space)
4007
- return;
4008
- const scenarioMap = new Map(scenarios.map((s) => [s.id, s]));
4009
- const total = run.total;
4010
- const failedCount = failedResults.length;
4011
- const passedCount = run.passed;
4012
- const failureLines = failedResults.slice(0, 5).map((r) => {
4013
- const name = scenarioMap.get(r.scenarioId)?.name ?? r.scenarioId;
4014
- const err = r.error ? ` \u2014 ${r.error.slice(0, 120)}` : "";
4015
- return ` \u274C ${name}${err}`;
4016
- });
4017
- const extra = failedResults.length > 5 ? ` \u2026 and ${failedResults.length - 5} more` : "";
4018
- const message = [
4019
- `\uD83D\uDEA8 **Testers run failed** \u2014 ${failedCount}/${total} scenarios failed`,
4020
- ``,
4021
- `**URL:** ${run.url}`,
4022
- `**Run ID:** \`${run.id}\``,
4023
- `**Pass rate:** ${passedCount}/${total}`,
4024
- ``,
4025
- `**Failures:**`,
4026
- ...failureLines,
4027
- extra
4028
- ].filter((l) => l !== "").join(`
4029
- `);
4030
- try {
4031
- await fetch(`${baseUrl.replace(/\/$/, "")}/api/spaces/${encodeURIComponent(space)}/messages`, {
4032
- method: "POST",
4033
- headers: { "Content-Type": "application/json" },
4034
- body: JSON.stringify({ content: message, from: "testers" })
4035
- });
4036
- } catch {}
4576
+ function stringEncaseCRLFWithFirstIndex(string, prefix, postfix, index) {
4577
+ let endIndex = 0;
4578
+ let returnValue = "";
4579
+ do {
4580
+ const gotCR = string[index - 1] === "\r";
4581
+ returnValue += string.slice(endIndex, gotCR ? index - 1 : index) + prefix + (gotCR ? `\r
4582
+ ` : `
4583
+ `) + postfix;
4584
+ endIndex = index + 1;
4585
+ index = string.indexOf(`
4586
+ `, endIndex);
4587
+ } while (index !== -1);
4588
+ returnValue += string.slice(endIndex);
4589
+ return returnValue;
4037
4590
  }
4038
4591
 
4039
- // src/lib/runner.ts
4040
- var eventHandler = null;
4041
- function onRunEvent(handler) {
4042
- eventHandler = handler;
4592
+ // node_modules/chalk/source/index.js
4593
+ var { stdout: stdoutColor, stderr: stderrColor } = supports_color_default;
4594
+ var GENERATOR = Symbol("GENERATOR");
4595
+ var STYLER = Symbol("STYLER");
4596
+ var IS_EMPTY = Symbol("IS_EMPTY");
4597
+ var levelMapping = [
4598
+ "ansi",
4599
+ "ansi",
4600
+ "ansi256",
4601
+ "ansi16m"
4602
+ ];
4603
+ var styles2 = Object.create(null);
4604
+ var applyOptions = (object, options = {}) => {
4605
+ if (options.level && !(Number.isInteger(options.level) && options.level >= 0 && options.level <= 3)) {
4606
+ throw new Error("The `level` option should be an integer from 0 to 3");
4607
+ }
4608
+ const colorLevel = stdoutColor ? stdoutColor.level : 0;
4609
+ object.level = options.level === undefined ? colorLevel : options.level;
4610
+ };
4611
+ var chalkFactory = (options) => {
4612
+ const chalk = (...strings) => strings.join(" ");
4613
+ applyOptions(chalk, options);
4614
+ Object.setPrototypeOf(chalk, createChalk.prototype);
4615
+ return chalk;
4616
+ };
4617
+ function createChalk(options) {
4618
+ return chalkFactory(options);
4043
4619
  }
4044
- function emit(event) {
4045
- if (eventHandler)
4046
- eventHandler(event);
4620
+ Object.setPrototypeOf(createChalk.prototype, Function.prototype);
4621
+ for (const [styleName, style] of Object.entries(ansi_styles_default)) {
4622
+ styles2[styleName] = {
4623
+ get() {
4624
+ const builder = createBuilder(this, createStyler(style.open, style.close, this[STYLER]), this[IS_EMPTY]);
4625
+ Object.defineProperty(this, styleName, { value: builder });
4626
+ return builder;
4627
+ }
4628
+ };
4047
4629
  }
4048
- function withTimeout(promise, ms, label) {
4049
- return new Promise((resolve, reject) => {
4050
- const warningAt = Math.floor(ms * 0.8);
4051
- const warningTimer = setTimeout(() => {
4052
- emit({
4053
- type: "scenario:timeout_warning",
4054
- scenarioName: label,
4055
- timeoutMs: ms,
4056
- elapsedMs: warningAt
4057
- });
4058
- }, warningAt);
4059
- const timer = setTimeout(() => {
4060
- clearTimeout(warningTimer);
4061
- reject(new Error(`Scenario '${label}' timed out after ${ms}ms. Try: testers run --timeout ${ms * 2} or simplify the scenario steps.`));
4062
- }, ms);
4063
- promise.then((val) => {
4064
- clearTimeout(timer);
4065
- clearTimeout(warningTimer);
4066
- resolve(val);
4067
- }, (err) => {
4068
- clearTimeout(timer);
4069
- clearTimeout(warningTimer);
4070
- reject(err);
4071
- });
4072
- });
4630
+ styles2.visible = {
4631
+ get() {
4632
+ const builder = createBuilder(this, this[STYLER], true);
4633
+ Object.defineProperty(this, "visible", { value: builder });
4634
+ return builder;
4635
+ }
4636
+ };
4637
+ var getModelAnsi = (model, level, type, ...arguments_) => {
4638
+ if (model === "rgb") {
4639
+ if (level === "ansi16m") {
4640
+ return ansi_styles_default[type].ansi16m(...arguments_);
4641
+ }
4642
+ if (level === "ansi256") {
4643
+ return ansi_styles_default[type].ansi256(ansi_styles_default.rgbToAnsi256(...arguments_));
4644
+ }
4645
+ return ansi_styles_default[type].ansi(ansi_styles_default.rgbToAnsi(...arguments_));
4646
+ }
4647
+ if (model === "hex") {
4648
+ return getModelAnsi("rgb", level, type, ...ansi_styles_default.hexToRgb(...arguments_));
4649
+ }
4650
+ return ansi_styles_default[type][model](...arguments_);
4651
+ };
4652
+ var usedModels = ["rgb", "hex", "ansi256"];
4653
+ for (const model of usedModels) {
4654
+ styles2[model] = {
4655
+ get() {
4656
+ const { level } = this;
4657
+ return function(...arguments_) {
4658
+ const styler = createStyler(getModelAnsi(model, levelMapping[level], "color", ...arguments_), ansi_styles_default.color.close, this[STYLER]);
4659
+ return createBuilder(this, styler, this[IS_EMPTY]);
4660
+ };
4661
+ }
4662
+ };
4663
+ const bgModel = "bg" + model[0].toUpperCase() + model.slice(1);
4664
+ styles2[bgModel] = {
4665
+ get() {
4666
+ const { level } = this;
4667
+ return function(...arguments_) {
4668
+ const styler = createStyler(getModelAnsi(model, levelMapping[level], "bgColor", ...arguments_), ansi_styles_default.bgColor.close, this[STYLER]);
4669
+ return createBuilder(this, styler, this[IS_EMPTY]);
4670
+ };
4671
+ }
4672
+ };
4073
4673
  }
4074
- async function runSingleScenario(scenario, runId, options) {
4075
- const scenarioType = scenario.scenarioType ?? "browser";
4076
- if (scenarioType === "eval") {
4077
- return runEvalScenario(scenario, { runId, baseUrl: options.url });
4674
+ var proto = Object.defineProperties(() => {}, {
4675
+ ...styles2,
4676
+ level: {
4677
+ enumerable: true,
4678
+ get() {
4679
+ return this[GENERATOR].level;
4680
+ },
4681
+ set(level) {
4682
+ this[GENERATOR].level = level;
4683
+ }
4078
4684
  }
4079
- const config = loadConfig();
4080
- if (options.selfHeal !== undefined)
4081
- config.selfHeal = options.selfHeal;
4082
- const model = resolveModel2(options.model ?? scenario.model ?? config.defaultModel);
4083
- const client = createClientForModel(model, options.apiKey ?? config.anthropicApiKey);
4084
- const screenshotter = new Screenshotter({
4085
- baseDir: options.screenshotDir ?? config.screenshots.dir
4086
- });
4087
- const resolvedPersonaId = options.personaId ?? scenario.personaId;
4088
- const persona = resolvedPersonaId ? getPersona(resolvedPersonaId) : null;
4089
- const result = createResult({
4090
- runId,
4091
- scenarioId: scenario.id,
4092
- model,
4093
- stepsTotal: scenario.steps.length || 10,
4094
- personaId: persona?.id ?? null,
4095
- personaName: persona?.name ?? null
4096
- });
4097
- emit({ type: "scenario:start", scenarioId: scenario.id, scenarioName: scenario.name, resultId: result.id, runId });
4098
- let browser = null;
4099
- let page = null;
4100
- try {
4101
- browser = await launchBrowser({ headless: !(options.headed ?? false), engine: options.engine });
4102
- page = await getPage(browser, {
4103
- viewport: config.browser.viewport
4104
- });
4105
- const targetUrl = scenario.targetPath ? `${options.url.replace(/\/$/, "")}${scenario.targetPath}` : options.url;
4106
- const scenarioTimeout = scenario.timeoutMs ?? options.timeout ?? config.browser.timeout ?? 60000;
4107
- await page.goto(targetUrl, { timeout: Math.min(scenarioTimeout, 30000) });
4108
- const stepStartTimes = new Map;
4109
- const agentResult = await withTimeout(runAgentLoop({
4110
- client,
4111
- page,
4112
- scenario,
4113
- screenshotter,
4114
- model,
4115
- runId,
4116
- maxTurns: 30,
4117
- a11y: options.a11y,
4118
- persona: persona ? {
4119
- name: persona.name,
4120
- role: persona.role,
4121
- description: persona.description,
4122
- instructions: persona.instructions,
4123
- traits: persona.traits,
4124
- goals: persona.goals
4125
- } : null,
4126
- onStep: (stepEvent) => {
4127
- let stepDurationMs;
4128
- if (stepEvent.type === "tool_call") {
4129
- stepStartTimes.set(stepEvent.stepNumber, Date.now());
4130
- } else if (stepEvent.type === "tool_result") {
4131
- const startTime = stepStartTimes.get(stepEvent.stepNumber);
4132
- if (startTime !== undefined) {
4133
- stepDurationMs = Date.now() - startTime;
4134
- stepStartTimes.delete(stepEvent.stepNumber);
4135
- }
4136
- }
4137
- emit({
4138
- type: `step:${stepEvent.type}`,
4139
- scenarioId: scenario.id,
4140
- scenarioName: scenario.name,
4141
- runId,
4142
- toolName: stepEvent.toolName,
4143
- toolInput: stepEvent.toolInput,
4144
- toolResult: stepEvent.toolResult,
4145
- thinking: stepEvent.thinking,
4146
- stepNumber: stepEvent.stepNumber,
4147
- stepDurationMs
4148
- });
4149
- }
4150
- }), scenarioTimeout, scenario.name);
4151
- if (options.engine !== "lightpanda") {
4152
- for (const ss of agentResult.screenshots) {
4153
- try {
4154
- createScreenshot({
4155
- resultId: result.id,
4156
- stepNumber: ss.stepNumber,
4157
- action: ss.action,
4158
- filePath: ss.filePath,
4159
- width: ss.width,
4160
- height: ss.height,
4161
- description: ss.description,
4162
- pageUrl: ss.pageUrl,
4163
- thumbnailPath: ss.thumbnailPath
4164
- });
4165
- emit({ type: "screenshot:captured", screenshotPath: ss.filePath, scenarioId: scenario.id, runId });
4166
- } catch {}
4167
- }
4168
- }
4169
- const lightpandaNote = options.engine === "lightpanda" ? " (Running with Lightpanda \u2014 no screenshots)" : "";
4170
- const updatedResult = updateResult(result.id, {
4171
- status: agentResult.status,
4172
- reasoning: agentResult.reasoning ? agentResult.reasoning + lightpandaNote : lightpandaNote || undefined,
4173
- stepsCompleted: agentResult.stepsCompleted,
4174
- durationMs: Date.now() - new Date(result.createdAt).getTime(),
4175
- tokensUsed: agentResult.tokensUsed,
4176
- costCents: estimateCost(model, agentResult.tokensUsed)
4177
- });
4178
- const eventType = agentResult.status === "passed" ? "scenario:pass" : "scenario:fail";
4179
- emit({ type: eventType, scenarioId: scenario.id, scenarioName: scenario.name, resultId: result.id, runId });
4180
- return updatedResult;
4181
- } catch (error) {
4182
- const errorMsg = error instanceof Error ? error.message : String(error);
4183
- const updatedResult = updateResult(result.id, {
4184
- status: "error",
4185
- error: errorMsg,
4186
- durationMs: Date.now() - new Date(result.createdAt).getTime()
4187
- });
4188
- emit({ type: "scenario:error", scenarioId: scenario.id, scenarioName: scenario.name, error: errorMsg, runId });
4189
- return updatedResult;
4190
- } finally {
4191
- if (browser)
4192
- await closeBrowser(browser, options.engine);
4685
+ });
4686
+ var createStyler = (open, close, parent) => {
4687
+ let openAll;
4688
+ let closeAll;
4689
+ if (parent === undefined) {
4690
+ openAll = open;
4691
+ closeAll = close;
4692
+ } else {
4693
+ openAll = parent.openAll + open;
4694
+ closeAll = close + parent.closeAll;
4193
4695
  }
4194
- }
4195
- async function runBatch(scenarios, options) {
4196
- const config = loadConfig();
4197
- const model = resolveModel2(options.model ?? config.defaultModel);
4198
- const parallel = options.parallel ?? 1;
4199
- const samples = options.samples ?? 1;
4200
- const flakinessThreshold = options.flakinessThreshold ?? 0.95;
4201
- const run = createRun({
4202
- url: options.url,
4203
- model,
4204
- headed: options.headed,
4205
- parallel,
4206
- projectId: options.projectId,
4207
- samples,
4208
- flakinessThreshold
4209
- });
4210
- updateRun(run.id, { status: "running", total: scenarios.length });
4211
- let sortedScenarios = scenarios;
4212
- try {
4213
- const { topologicalSort: topologicalSort2 } = await Promise.resolve().then(() => (init_flows(), exports_flows));
4214
- const scenarioIds = scenarios.map((s) => s.id);
4215
- const sortedIds = topologicalSort2(scenarioIds);
4216
- const scenarioMap = new Map(scenarios.map((s) => [s.id, s]));
4217
- sortedScenarios = sortedIds.map((id) => scenarioMap.get(id)).filter((s) => s !== undefined);
4218
- for (const s of scenarios) {
4219
- if (!sortedIds.includes(s.id))
4220
- sortedScenarios.push(s);
4221
- }
4222
- } catch {}
4223
- const results = [];
4224
- const failedScenarioIds = new Set;
4225
- const canRun = async (scenario) => {
4226
- try {
4227
- const { getDependencies: getDependencies2 } = await Promise.resolve().then(() => (init_flows(), exports_flows));
4228
- const deps = getDependencies2(scenario.id);
4229
- for (const depId of deps) {
4230
- if (failedScenarioIds.has(depId))
4231
- return false;
4232
- }
4233
- } catch {}
4234
- return true;
4696
+ return {
4697
+ open,
4698
+ close,
4699
+ openAll,
4700
+ closeAll,
4701
+ parent
4235
4702
  };
4236
- const maxRetries = options.retry ?? 0;
4237
- if (parallel <= 1) {
4238
- for (const scenario of sortedScenarios) {
4239
- if (!await canRun(scenario)) {
4240
- const result2 = createResult({ runId: run.id, scenarioId: scenario.id, model, stepsTotal: 0 });
4241
- const skipped = updateResult(result2.id, { status: "skipped", error: "Skipped: dependency failed" });
4242
- results.push(skipped);
4243
- failedScenarioIds.add(scenario.id);
4244
- emit({ type: "scenario:error", scenarioId: scenario.id, scenarioName: scenario.name, error: "Dependency failed \u2014 skipped", runId: run.id });
4245
- continue;
4246
- }
4247
- let result = await runSingleScenario(scenario, run.id, options);
4248
- let attempt = 1;
4249
- while ((result.status === "failed" || result.status === "error") && attempt <= maxRetries) {
4250
- emit({ type: "scenario:start", scenarioId: scenario.id, scenarioName: scenario.name, runId: run.id, retryAttempt: attempt + 1, maxRetries: maxRetries + 1 });
4251
- result = await runSingleScenario(scenario, run.id, options);
4252
- attempt++;
4253
- }
4254
- if (samples > 1) {
4255
- const sampleResults = [result];
4256
- for (let s = 1;s < samples; s++) {
4257
- emit({ type: "scenario:start", scenarioId: scenario.id, scenarioName: scenario.name, runId: run.id });
4258
- const sampleResult = await runSingleScenario(scenario, run.id, options);
4259
- sampleResults.push(sampleResult);
4260
- }
4261
- const passCount = sampleResults.filter((r) => r.status === "passed").length;
4262
- const passRate = passCount / samples;
4263
- if (passCount > 0 && passCount < samples && passRate < flakinessThreshold) {
4264
- result = updateResult(result.id, {
4265
- status: "flaky",
4266
- reasoning: `Flaky: ${passCount}/${samples} samples passed (${Math.round(passRate * 100)}% pass rate, threshold ${Math.round(flakinessThreshold * 100)}%)`,
4267
- metadata: { samples, passCount, passRate, sampleResultIds: sampleResults.map((r) => r.id) }
4268
- });
4269
- } else if (passCount === 0) {
4270
- result = updateResult(result.id, {
4271
- metadata: { samples, passCount, passRate, sampleResultIds: sampleResults.map((r) => r.id) }
4272
- });
4273
- } else if (passCount === samples) {
4274
- result = updateResult(result.id, {
4275
- metadata: { samples, passCount, passRate, sampleResultIds: sampleResults.map((r) => r.id) }
4276
- });
4277
- }
4278
- }
4279
- results.push(result);
4280
- if (result.status === "failed" || result.status === "error" || result.status === "flaky") {
4281
- failedScenarioIds.add(scenario.id);
4282
- }
4283
- }
4284
- } else {
4285
- const queue = [...sortedScenarios];
4286
- const running = [];
4287
- const processNext = async () => {
4288
- const scenario = queue.shift();
4289
- if (!scenario)
4290
- return;
4291
- if (!await canRun(scenario)) {
4292
- const result2 = createResult({ runId: run.id, scenarioId: scenario.id, model, stepsTotal: 0 });
4293
- const skipped = updateResult(result2.id, { status: "skipped", error: "Skipped: dependency failed" });
4294
- results.push(skipped);
4295
- failedScenarioIds.add(scenario.id);
4296
- await processNext();
4297
- return;
4298
- }
4299
- const result = await runSingleScenario(scenario, run.id, options);
4300
- results.push(result);
4301
- if (result.status === "failed" || result.status === "error") {
4302
- failedScenarioIds.add(scenario.id);
4303
- }
4304
- await processNext();
4305
- };
4306
- const workers = Math.min(parallel, sortedScenarios.length);
4307
- for (let i = 0;i < workers; i++) {
4308
- running.push(processNext());
4309
- }
4310
- await Promise.all(running);
4703
+ };
4704
+ var createBuilder = (self, _styler, _isEmpty) => {
4705
+ const builder = (...arguments_) => applyStyle(builder, arguments_.length === 1 ? "" + arguments_[0] : arguments_.join(" "));
4706
+ Object.setPrototypeOf(builder, proto);
4707
+ builder[GENERATOR] = self;
4708
+ builder[STYLER] = _styler;
4709
+ builder[IS_EMPTY] = _isEmpty;
4710
+ return builder;
4711
+ };
4712
+ var applyStyle = (self, string) => {
4713
+ if (self.level <= 0 || !string) {
4714
+ return self[IS_EMPTY] ? "" : string;
4311
4715
  }
4312
- let divergenceResults = [];
4313
- if (options.personaIds && options.personaIds.length > 1) {
4314
- const additionalPersonaIds = options.personaIds.slice(1);
4315
- for (const personaId of additionalPersonaIds) {
4316
- for (const scenario of sortedScenarios) {
4317
- const personaResult = await runSingleScenario(scenario, run.id, { ...options, personaId });
4318
- divergenceResults.push(personaResult);
4319
- results.push(personaResult);
4320
- }
4716
+ let styler = self[STYLER];
4717
+ if (styler === undefined) {
4718
+ return string;
4719
+ }
4720
+ const { openAll, closeAll } = styler;
4721
+ if (string.includes("\x1B")) {
4722
+ while (styler !== undefined) {
4723
+ string = stringReplaceAll(string, styler.close, styler.open);
4724
+ styler = styler.parent;
4321
4725
  }
4322
4726
  }
4323
- const passed = results.filter((r) => r.status === "passed").length;
4324
- const failed = results.filter((r) => r.status === "failed" || r.status === "error").length;
4325
- const finalStatus = failed > 0 ? "failed" : "passed";
4326
- const finalRun = updateRun(run.id, {
4327
- status: finalStatus,
4328
- passed,
4329
- failed,
4330
- total: scenarios.length,
4331
- finished_at: new Date().toISOString()
4332
- });
4333
- emit({ type: "run:complete", runId: run.id });
4334
- const eventType = finalRun.status === "failed" ? "failed" : "completed";
4335
- dispatchWebhooks(eventType, finalRun).catch(() => {});
4336
- if (finalRun.status === "failed") {
4337
- const failedResults = results.filter((r) => r.status === "failed" || r.status === "error");
4338
- pushFailedRunToLogs(finalRun, failedResults, scenarios).catch(() => {});
4339
- createFailureTasks(finalRun, failedResults, scenarios).catch(() => {});
4340
- notifyFailureToConversations(finalRun, failedResults, scenarios).catch(() => {});
4727
+ const lfIndex = string.indexOf(`
4728
+ `);
4729
+ if (lfIndex !== -1) {
4730
+ string = stringEncaseCRLFWithFirstIndex(string, closeAll, openAll, lfIndex);
4341
4731
  }
4342
- return { run: finalRun, results };
4343
- }
4344
- async function runByFilter(options) {
4345
- let scenarios;
4346
- if (options.scenarioIds && options.scenarioIds.length > 0) {
4347
- const all = listScenarios({ projectId: options.projectId });
4348
- scenarios = all.filter((s) => options.scenarioIds.includes(s.id) || options.scenarioIds.includes(s.shortId));
4349
- } else {
4350
- scenarios = listScenarios({
4351
- projectId: options.projectId,
4352
- tags: options.tags,
4353
- priority: options.priority
4354
- });
4732
+ return openAll + string + closeAll;
4733
+ };
4734
+ Object.defineProperties(createChalk.prototype, styles2);
4735
+ var chalk = createChalk();
4736
+ var chalkStderr = createChalk({ level: stderrColor ? stderrColor.level : 0 });
4737
+ var source_default = chalk;
4738
+
4739
+ // src/lib/costs.ts
4740
+ init_database();
4741
+ init_config();
4742
+ function getDateFilter(period) {
4743
+ switch (period) {
4744
+ case "day":
4745
+ return "AND r.created_at >= date('now', 'start of day')";
4746
+ case "week":
4747
+ return "AND r.created_at >= date('now', '-7 days')";
4748
+ case "month":
4749
+ return "AND r.created_at >= date('now', '-30 days')";
4750
+ case "all":
4751
+ return "";
4355
4752
  }
4356
- if (scenarios.length === 0) {
4357
- const config = loadConfig();
4358
- const model = resolveModel2(options.model ?? config.defaultModel);
4359
- const run = createRun({ url: options.url, model, projectId: options.projectId });
4360
- updateRun(run.id, { status: "passed", total: 0, finished_at: new Date().toISOString() });
4361
- return { run: getRun(run.id), results: [] };
4753
+ }
4754
+ function getPeriodDays(period) {
4755
+ switch (period) {
4756
+ case "day":
4757
+ return 1;
4758
+ case "week":
4759
+ return 7;
4760
+ case "month":
4761
+ return 30;
4762
+ case "all":
4763
+ return 30;
4362
4764
  }
4363
- return runBatch(scenarios, options);
4364
4765
  }
4365
- function startRunAsync(options) {
4766
+ function loadBudgetConfig() {
4366
4767
  const config = loadConfig();
4367
- const model = resolveModel2(options.model ?? config.defaultModel);
4368
- let scenarios;
4369
- if (options.scenarioIds && options.scenarioIds.length > 0) {
4370
- const all = listScenarios({ projectId: options.projectId });
4371
- scenarios = all.filter((s) => options.scenarioIds.includes(s.id) || options.scenarioIds.includes(s.shortId));
4372
- } else {
4373
- scenarios = listScenarios({
4374
- projectId: options.projectId,
4375
- tags: options.tags,
4376
- priority: options.priority
4377
- });
4768
+ const budget = config.budget;
4769
+ return {
4770
+ maxPerRunCents: budget?.maxPerRunCents ?? 50,
4771
+ maxPerDayCents: budget?.maxPerDayCents ?? 500,
4772
+ warnAtPercent: budget?.warnAtPercent ?? 0.8
4773
+ };
4774
+ }
4775
+ function getCostSummary(options) {
4776
+ const db2 = getDatabase();
4777
+ const period = options?.period ?? "month";
4778
+ const projectId = options?.projectId;
4779
+ const dateFilter = getDateFilter(period);
4780
+ const projectFilter = projectId ? "AND ru.project_id = ?" : "";
4781
+ const projectParams = projectId ? [projectId] : [];
4782
+ const totalsRow = db2.query(`SELECT
4783
+ COALESCE(SUM(r.cost_cents), 0) as total_cost,
4784
+ COALESCE(SUM(r.tokens_used), 0) as total_tokens,
4785
+ COUNT(DISTINCT r.run_id) as run_count
4786
+ FROM results r
4787
+ JOIN runs ru ON r.run_id = ru.id
4788
+ WHERE 1=1 ${dateFilter} ${projectFilter}`).get(...projectParams);
4789
+ const modelRows = db2.query(`SELECT
4790
+ r.model,
4791
+ COALESCE(SUM(r.cost_cents), 0) as cost_cents,
4792
+ COALESCE(SUM(r.tokens_used), 0) as tokens,
4793
+ COUNT(DISTINCT r.run_id) as runs
4794
+ FROM results r
4795
+ JOIN runs ru ON r.run_id = ru.id
4796
+ WHERE 1=1 ${dateFilter} ${projectFilter}
4797
+ GROUP BY r.model
4798
+ ORDER BY cost_cents DESC`).all(...projectParams);
4799
+ const byModel = {};
4800
+ for (const row of modelRows) {
4801
+ byModel[row.model] = {
4802
+ costCents: row.cost_cents,
4803
+ tokens: row.tokens,
4804
+ runs: row.runs
4805
+ };
4378
4806
  }
4379
- const parallel = options.parallel ?? 1;
4380
- const run = createRun({
4381
- url: options.url,
4382
- model,
4383
- headed: options.headed,
4384
- parallel,
4385
- projectId: options.projectId
4386
- });
4387
- if (scenarios.length === 0) {
4388
- updateRun(run.id, { status: "passed", total: 0, finished_at: new Date().toISOString() });
4389
- return { runId: run.id, scenarioCount: 0 };
4807
+ const scenarioRows = db2.query(`SELECT
4808
+ r.scenario_id,
4809
+ COALESCE(s.name, r.scenario_id) as name,
4810
+ COALESCE(SUM(r.cost_cents), 0) as cost_cents,
4811
+ COALESCE(SUM(r.tokens_used), 0) as tokens,
4812
+ COUNT(DISTINCT r.run_id) as runs
4813
+ FROM results r
4814
+ JOIN runs ru ON r.run_id = ru.id
4815
+ LEFT JOIN scenarios s ON r.scenario_id = s.id
4816
+ WHERE 1=1 ${dateFilter} ${projectFilter}
4817
+ GROUP BY r.scenario_id
4818
+ ORDER BY cost_cents DESC
4819
+ LIMIT 10`).all(...projectParams);
4820
+ const byScenario = scenarioRows.map((row) => ({
4821
+ scenarioId: row.scenario_id,
4822
+ name: row.name,
4823
+ costCents: row.cost_cents,
4824
+ tokens: row.tokens,
4825
+ runs: row.runs
4826
+ }));
4827
+ const runCount = totalsRow.run_count;
4828
+ const avgCostPerRun = runCount > 0 ? totalsRow.total_cost / runCount : 0;
4829
+ const periodDays = getPeriodDays(period);
4830
+ const estimatedMonthlyCents = periodDays > 0 ? totalsRow.total_cost / periodDays * 30 : 0;
4831
+ return {
4832
+ period,
4833
+ totalCostCents: totalsRow.total_cost,
4834
+ totalTokens: totalsRow.total_tokens,
4835
+ runCount,
4836
+ byModel,
4837
+ byScenario,
4838
+ avgCostPerRun,
4839
+ estimatedMonthlyCents
4840
+ };
4841
+ }
4842
+ var COST_PER_SCENARIO_CENTS = {
4843
+ haiku: 5,
4844
+ sonnet: 30,
4845
+ opus: 150,
4846
+ "claude-haiku": 5,
4847
+ "claude-sonnet": 30,
4848
+ "claude-opus": 150,
4849
+ "gpt-4o-mini": 3,
4850
+ "gpt-4o": 25,
4851
+ "gemini-2.0-flash": 2,
4852
+ "gemini-1.5-pro": 20,
4853
+ "llama-3.1-8b": 1,
4854
+ "llama-3.3-70b": 3
4855
+ };
4856
+ function modelToCostKey(model) {
4857
+ const exact = COST_PER_SCENARIO_CENTS[model];
4858
+ if (exact !== undefined)
4859
+ return exact;
4860
+ const lower = model.toLowerCase();
4861
+ if (lower.includes("opus"))
4862
+ return COST_PER_SCENARIO_CENTS["opus"];
4863
+ if (lower.includes("sonnet"))
4864
+ return COST_PER_SCENARIO_CENTS["sonnet"];
4865
+ if (lower.includes("haiku"))
4866
+ return COST_PER_SCENARIO_CENTS["haiku"];
4867
+ if (lower.includes("gpt-4o-mini"))
4868
+ return COST_PER_SCENARIO_CENTS["gpt-4o-mini"];
4869
+ if (lower.includes("gpt-4o"))
4870
+ return COST_PER_SCENARIO_CENTS["gpt-4o"];
4871
+ if (lower.includes("gemini-2.0-flash") || lower.includes("gemini-flash"))
4872
+ return COST_PER_SCENARIO_CENTS["gemini-2.0-flash"];
4873
+ if (lower.includes("gemini-1.5-pro") || lower.includes("gemini-pro"))
4874
+ return COST_PER_SCENARIO_CENTS["gemini-1.5-pro"];
4875
+ if (lower.includes("llama-3.3") || lower.includes("llama3.3"))
4876
+ return COST_PER_SCENARIO_CENTS["llama-3.3-70b"];
4877
+ if (lower.includes("llama"))
4878
+ return COST_PER_SCENARIO_CENTS["llama-3.1-8b"];
4879
+ return 10;
4880
+ }
4881
+ function estimateRunCostCents(scenarioCount, model, samples = 1) {
4882
+ const costPerScenario = modelToCostKey(model);
4883
+ return scenarioCount * costPerScenario * Math.max(1, samples);
4884
+ }
4885
+ function checkBudget(estimatedCostCents) {
4886
+ const budget = loadBudgetConfig();
4887
+ if (estimatedCostCents > budget.maxPerRunCents) {
4888
+ return {
4889
+ allowed: false,
4890
+ warning: `Estimated cost (${formatDollars(estimatedCostCents)}) exceeds per-run limit (${formatDollars(budget.maxPerRunCents)})`
4891
+ };
4390
4892
  }
4391
- updateRun(run.id, { status: "running", total: scenarios.length });
4392
- (async () => {
4393
- const results = [];
4394
- try {
4395
- if (parallel <= 1) {
4396
- for (const scenario of scenarios) {
4397
- const result = await runSingleScenario(scenario, run.id, options);
4398
- results.push(result);
4399
- }
4400
- } else {
4401
- const queue = [...scenarios];
4402
- const running = [];
4403
- const processNext = async () => {
4404
- const scenario = queue.shift();
4405
- if (!scenario)
4406
- return;
4407
- const result = await runSingleScenario(scenario, run.id, options);
4408
- results.push(result);
4409
- await processNext();
4410
- };
4411
- const workers = Math.min(parallel, scenarios.length);
4412
- for (let i = 0;i < workers; i++) {
4413
- running.push(processNext());
4414
- }
4415
- await Promise.all(running);
4416
- }
4417
- const passed = results.filter((r) => r.status === "passed").length;
4418
- const failed = results.filter((r) => r.status === "failed" || r.status === "error").length;
4419
- updateRun(run.id, {
4420
- status: failed > 0 ? "failed" : "passed",
4421
- passed,
4422
- failed,
4423
- total: scenarios.length,
4424
- finished_at: new Date().toISOString()
4425
- });
4426
- emit({ type: "run:complete", runId: run.id });
4427
- const asyncRun = getRun(run.id);
4428
- if (asyncRun)
4429
- dispatchWebhooks(asyncRun.status === "failed" ? "failed" : "completed", asyncRun).catch(() => {});
4430
- } catch (error) {
4431
- const errorMsg = error instanceof Error ? error.message : String(error);
4432
- updateRun(run.id, {
4433
- status: "failed",
4434
- finished_at: new Date().toISOString()
4435
- });
4436
- emit({ type: "run:complete", runId: run.id, error: errorMsg });
4437
- const failedRun = getRun(run.id);
4438
- if (failedRun)
4439
- dispatchWebhooks("failed", failedRun).catch(() => {});
4440
- }
4441
- })();
4442
- return { runId: run.id, scenarioCount: scenarios.length };
4893
+ const todaySummary = getCostSummary({ period: "day" });
4894
+ const projectedDaily = todaySummary.totalCostCents + estimatedCostCents;
4895
+ if (projectedDaily > budget.maxPerDayCents) {
4896
+ return {
4897
+ allowed: false,
4898
+ warning: `Daily spending (${formatDollars(todaySummary.totalCostCents)}) + this run (${formatDollars(estimatedCostCents)}) would exceed daily limit (${formatDollars(budget.maxPerDayCents)})`
4899
+ };
4900
+ }
4901
+ if (projectedDaily > budget.maxPerDayCents * budget.warnAtPercent) {
4902
+ return {
4903
+ allowed: true,
4904
+ warning: `Approaching daily limit: ${formatDollars(projectedDaily)} of ${formatDollars(budget.maxPerDayCents)} (${Math.round(projectedDaily / budget.maxPerDayCents * 100)}%)`
4905
+ };
4906
+ }
4907
+ return { allowed: true };
4443
4908
  }
4444
- function estimateCost(model, tokens) {
4445
- const costs = {
4446
- "claude-haiku-4-5-20251001": 0.1,
4447
- "claude-sonnet-4-6-20260311": 0.9,
4448
- "claude-opus-4-6-20260311": 3
4449
- };
4450
- const costPer1M = costs[model] ?? 0.5;
4451
- return tokens / 1e6 * costPer1M * 100;
4909
+ function formatDollars(cents) {
4910
+ return `$${(cents / 100).toFixed(2)}`;
4452
4911
  }
4453
- // node_modules/chalk/source/vendor/ansi-styles/index.js
4454
- var ANSI_BACKGROUND_OFFSET = 10;
4455
- var wrapAnsi16 = (offset = 0) => (code) => `\x1B[${code + offset}m`;
4456
- var wrapAnsi256 = (offset = 0) => (code) => `\x1B[${38 + offset};5;${code}m`;
4457
- var wrapAnsi16m = (offset = 0) => (red, green, blue) => `\x1B[${38 + offset};2;${red};${green};${blue}m`;
4458
- var styles = {
4459
- modifier: {
4460
- reset: [0, 0],
4461
- bold: [1, 22],
4462
- dim: [2, 22],
4463
- italic: [3, 23],
4464
- underline: [4, 24],
4465
- overline: [53, 55],
4466
- inverse: [7, 27],
4467
- hidden: [8, 28],
4468
- strikethrough: [9, 29]
4469
- },
4470
- color: {
4471
- black: [30, 39],
4472
- red: [31, 39],
4473
- green: [32, 39],
4474
- yellow: [33, 39],
4475
- blue: [34, 39],
4476
- magenta: [35, 39],
4477
- cyan: [36, 39],
4478
- white: [37, 39],
4479
- blackBright: [90, 39],
4480
- gray: [90, 39],
4481
- grey: [90, 39],
4482
- redBright: [91, 39],
4483
- greenBright: [92, 39],
4484
- yellowBright: [93, 39],
4485
- blueBright: [94, 39],
4486
- magentaBright: [95, 39],
4487
- cyanBright: [96, 39],
4488
- whiteBright: [97, 39]
4489
- },
4490
- bgColor: {
4491
- bgBlack: [40, 49],
4492
- bgRed: [41, 49],
4493
- bgGreen: [42, 49],
4494
- bgYellow: [43, 49],
4495
- bgBlue: [44, 49],
4496
- bgMagenta: [45, 49],
4497
- bgCyan: [46, 49],
4498
- bgWhite: [47, 49],
4499
- bgBlackBright: [100, 49],
4500
- bgGray: [100, 49],
4501
- bgGrey: [100, 49],
4502
- bgRedBright: [101, 49],
4503
- bgGreenBright: [102, 49],
4504
- bgYellowBright: [103, 49],
4505
- bgBlueBright: [104, 49],
4506
- bgMagentaBright: [105, 49],
4507
- bgCyanBright: [106, 49],
4508
- bgWhiteBright: [107, 49]
4912
+ function formatTokens(tokens) {
4913
+ if (tokens >= 1e6)
4914
+ return `${(tokens / 1e6).toFixed(1)}M`;
4915
+ if (tokens >= 1000)
4916
+ return `${(tokens / 1000).toFixed(1)}K`;
4917
+ return String(tokens);
4918
+ }
4919
+ function formatCostsTerminal(summary) {
4920
+ const lines = [];
4921
+ lines.push("");
4922
+ lines.push(source_default.bold(` Cost Summary (${summary.period})`));
4923
+ lines.push("");
4924
+ lines.push(` Total: ${source_default.yellow(formatDollars(summary.totalCostCents))} (${formatTokens(summary.totalTokens)} tokens across ${summary.runCount} runs)`);
4925
+ lines.push(` Avg/run: ${source_default.yellow(formatDollars(summary.avgCostPerRun))}`);
4926
+ lines.push(` Est/month: ${source_default.yellow(formatDollars(summary.estimatedMonthlyCents))}`);
4927
+ const modelEntries = Object.entries(summary.byModel);
4928
+ if (modelEntries.length > 0) {
4929
+ lines.push("");
4930
+ lines.push(source_default.bold(" By Model"));
4931
+ lines.push(` ${"Model".padEnd(40)} ${"Cost".padEnd(12)} ${"Tokens".padEnd(12)} Runs`);
4932
+ lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(12)} ${"\u2500".repeat(12)} ${"\u2500".repeat(6)}`);
4933
+ for (const [model, data] of modelEntries) {
4934
+ lines.push(` ${model.padEnd(40)} ${formatDollars(data.costCents).padEnd(12)} ${formatTokens(data.tokens).padEnd(12)} ${data.runs}`);
4935
+ }
4509
4936
  }
4510
- };
4511
- var modifierNames = Object.keys(styles.modifier);
4512
- var foregroundColorNames = Object.keys(styles.color);
4513
- var backgroundColorNames = Object.keys(styles.bgColor);
4514
- var colorNames = [...foregroundColorNames, ...backgroundColorNames];
4515
- function assembleStyles() {
4516
- const codes = new Map;
4517
- for (const [groupName, group] of Object.entries(styles)) {
4518
- for (const [styleName, style] of Object.entries(group)) {
4519
- styles[styleName] = {
4520
- open: `\x1B[${style[0]}m`,
4521
- close: `\x1B[${style[1]}m`
4522
- };
4523
- group[styleName] = styles[styleName];
4524
- codes.set(style[0], style[1]);
4937
+ if (summary.byScenario.length > 0) {
4938
+ lines.push("");
4939
+ lines.push(source_default.bold(" Scenarios by Cost (most expensive first)"));
4940
+ lines.push(` ${"Scenario".padEnd(40)} ${"Total Cost".padEnd(12)} ${"Avg/Run".padEnd(12)} ${"Runs".padEnd(6)} Tokens`);
4941
+ lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(12)} ${"\u2500".repeat(12)} ${"\u2500".repeat(6)} ${"\u2500".repeat(10)}`);
4942
+ for (const s of summary.byScenario) {
4943
+ const label = s.name.length > 38 ? s.name.slice(0, 35) + "..." : s.name;
4944
+ const avgPerRun = s.runs > 0 ? s.costCents / s.runs : 0;
4945
+ lines.push(` ${label.padEnd(40)} ${formatDollars(s.costCents).padEnd(12)} ${formatDollars(avgPerRun).padEnd(12)} ${String(s.runs).padEnd(6)} ${formatTokens(s.tokens)}`);
4525
4946
  }
4526
- Object.defineProperty(styles, groupName, {
4527
- value: group,
4528
- enumerable: false
4529
- });
4530
4947
  }
4531
- Object.defineProperty(styles, "codes", {
4532
- value: codes,
4533
- enumerable: false
4534
- });
4535
- styles.color.close = "\x1B[39m";
4536
- styles.bgColor.close = "\x1B[49m";
4537
- styles.color.ansi = wrapAnsi16();
4538
- styles.color.ansi256 = wrapAnsi256();
4539
- styles.color.ansi16m = wrapAnsi16m();
4540
- styles.bgColor.ansi = wrapAnsi16(ANSI_BACKGROUND_OFFSET);
4541
- styles.bgColor.ansi256 = wrapAnsi256(ANSI_BACKGROUND_OFFSET);
4542
- styles.bgColor.ansi16m = wrapAnsi16m(ANSI_BACKGROUND_OFFSET);
4543
- Object.defineProperties(styles, {
4544
- rgbToAnsi256: {
4545
- value(red, green, blue) {
4546
- if (red === green && green === blue) {
4547
- if (red < 8) {
4548
- return 16;
4549
- }
4550
- if (red > 248) {
4551
- return 231;
4948
+ lines.push("");
4949
+ return lines.join(`
4950
+ `);
4951
+ }
4952
+ function formatCostsJSON(summary) {
4953
+ return JSON.stringify(summary, null, 2);
4954
+ }
4955
+
4956
+ // src/db/personas.ts
4957
+ init_types();
4958
+ init_database();
4959
+ function getPersona(id) {
4960
+ const db2 = getDatabase();
4961
+ let row = db2.query("SELECT * FROM personas WHERE id = ?").get(id);
4962
+ if (row)
4963
+ return personaFromRow(row);
4964
+ row = db2.query("SELECT * FROM personas WHERE short_id = ?").get(id);
4965
+ if (row)
4966
+ return personaFromRow(row);
4967
+ return null;
4968
+ }
4969
+
4970
+ // src/lib/runner.ts
4971
+ init_browser();
4972
+ init_ai_client();
4973
+ init_config();
4974
+
4975
+ // src/lib/webhooks.ts
4976
+ init_database();
4977
+ function fromRow(row) {
4978
+ return {
4979
+ id: row.id,
4980
+ url: row.url,
4981
+ events: JSON.parse(row.events),
4982
+ projectId: row.project_id,
4983
+ secret: row.secret,
4984
+ active: row.active === 1,
4985
+ createdAt: row.created_at
4986
+ };
4987
+ }
4988
+ function createWebhook(input) {
4989
+ const db2 = getDatabase();
4990
+ const id = uuid();
4991
+ const events = input.events ?? ["failed"];
4992
+ const secret = input.secret ?? crypto.randomUUID().replace(/-/g, "");
4993
+ db2.query(`
4994
+ INSERT INTO webhooks (id, url, events, project_id, secret, active, created_at)
4995
+ VALUES (?, ?, ?, ?, ?, 1, ?)
4996
+ `).run(id, input.url, JSON.stringify(events), input.projectId ?? null, secret, now());
4997
+ return getWebhook(id);
4998
+ }
4999
+ function getWebhook(id) {
5000
+ const db2 = getDatabase();
5001
+ const row = db2.query("SELECT * FROM webhooks WHERE id = ?").get(id);
5002
+ if (!row) {
5003
+ const rows = db2.query("SELECT * FROM webhooks WHERE id LIKE ? || '%'").all(id);
5004
+ if (rows.length === 1)
5005
+ return fromRow(rows[0]);
5006
+ return null;
5007
+ }
5008
+ return fromRow(row);
5009
+ }
5010
+ function listWebhooks(projectId) {
5011
+ const db2 = getDatabase();
5012
+ let query = "SELECT * FROM webhooks WHERE active = 1";
5013
+ const params = [];
5014
+ if (projectId) {
5015
+ query += " AND (project_id = ? OR project_id IS NULL)";
5016
+ params.push(projectId);
5017
+ }
5018
+ query += " ORDER BY created_at DESC";
5019
+ const rows = db2.query(query).all(...params);
5020
+ return rows.map(fromRow);
5021
+ }
5022
+ function deleteWebhook(id) {
5023
+ const db2 = getDatabase();
5024
+ const webhook = getWebhook(id);
5025
+ if (!webhook)
5026
+ return false;
5027
+ db2.query("DELETE FROM webhooks WHERE id = ?").run(webhook.id);
5028
+ return true;
5029
+ }
5030
+ function signPayload(body, secret) {
5031
+ const encoder = new TextEncoder;
5032
+ const key = encoder.encode(secret);
5033
+ const data = encoder.encode(body);
5034
+ let hash = 0;
5035
+ for (let i = 0;i < data.length; i++) {
5036
+ hash = (hash << 5) - hash + data[i] + (key[i % key.length] ?? 0) | 0;
5037
+ }
5038
+ return `sha256=${Math.abs(hash).toString(16).padStart(16, "0")}`;
5039
+ }
5040
+ function formatSlackPayload(payload) {
5041
+ const status = payload.run.status === "passed" ? ":white_check_mark:" : ":x:";
5042
+ const color = payload.run.status === "passed" ? "#22c55e" : "#ef4444";
5043
+ return {
5044
+ attachments: [
5045
+ {
5046
+ color,
5047
+ blocks: [
5048
+ {
5049
+ type: "section",
5050
+ text: {
5051
+ type: "mrkdwn",
5052
+ text: `${status} *Test Run ${payload.run.status.toUpperCase()}*
5053
+ ` + `URL: ${payload.run.url}
5054
+ ` + `Results: ${payload.run.passed}/${payload.run.total} passed` + (payload.run.failed > 0 ? ` (${payload.run.failed} failed)` : "") + (payload.schedule ? `
5055
+ Schedule: ${payload.schedule.name}` : "")
5056
+ }
4552
5057
  }
4553
- return Math.round((red - 8) / 247 * 24) + 232;
4554
- }
4555
- return 16 + 36 * Math.round(red / 255 * 5) + 6 * Math.round(green / 255 * 5) + Math.round(blue / 255 * 5);
4556
- },
4557
- enumerable: false
4558
- },
4559
- hexToRgb: {
4560
- value(hex) {
4561
- const matches = /[a-f\d]{6}|[a-f\d]{3}/i.exec(hex.toString(16));
4562
- if (!matches) {
4563
- return [0, 0, 0];
4564
- }
4565
- let [colorString] = matches;
4566
- if (colorString.length === 3) {
4567
- colorString = [...colorString].map((character) => character + character).join("");
4568
- }
4569
- const integer = Number.parseInt(colorString, 16);
4570
- return [
4571
- integer >> 16 & 255,
4572
- integer >> 8 & 255,
4573
- integer & 255
4574
- ];
4575
- },
4576
- enumerable: false
4577
- },
4578
- hexToAnsi256: {
4579
- value: (hex) => styles.rgbToAnsi256(...styles.hexToRgb(hex)),
4580
- enumerable: false
5058
+ ]
5059
+ }
5060
+ ]
5061
+ };
5062
+ }
5063
+ async function dispatchWebhooks(event, run, schedule) {
5064
+ const webhooks = listWebhooks(run.projectId ?? undefined);
5065
+ const payload = {
5066
+ event,
5067
+ run: {
5068
+ id: run.id,
5069
+ url: run.url,
5070
+ status: run.status,
5071
+ passed: run.passed,
5072
+ failed: run.failed,
5073
+ total: run.total
4581
5074
  },
4582
- ansi256ToAnsi: {
4583
- value(code) {
4584
- if (code < 8) {
4585
- return 30 + code;
4586
- }
4587
- if (code < 16) {
4588
- return 90 + (code - 8);
4589
- }
4590
- let red;
4591
- let green;
4592
- let blue;
4593
- if (code >= 232) {
4594
- red = ((code - 232) * 10 + 8) / 255;
4595
- green = red;
4596
- blue = red;
4597
- } else {
4598
- code -= 16;
4599
- const remainder = code % 36;
4600
- red = Math.floor(code / 36) / 5;
4601
- green = Math.floor(remainder / 6) / 5;
4602
- blue = remainder % 6 / 5;
4603
- }
4604
- const value = Math.max(red, green, blue) * 2;
4605
- if (value === 0) {
4606
- return 30;
4607
- }
4608
- let result = 30 + (Math.round(blue) << 2 | Math.round(green) << 1 | Math.round(red));
4609
- if (value === 2) {
4610
- result += 60;
4611
- }
4612
- return result;
5075
+ schedule,
5076
+ timestamp: new Date().toISOString()
5077
+ };
5078
+ for (const webhook of webhooks) {
5079
+ if (!webhook.events.includes(event) && !webhook.events.includes("*"))
5080
+ continue;
5081
+ const isSlack = webhook.url.includes("hooks.slack.com");
5082
+ const body = isSlack ? JSON.stringify(formatSlackPayload(payload)) : JSON.stringify(payload);
5083
+ const headers = {
5084
+ "Content-Type": "application/json"
5085
+ };
5086
+ if (webhook.secret) {
5087
+ headers["X-Testers-Signature"] = signPayload(body, webhook.secret);
5088
+ }
5089
+ try {
5090
+ const response = await fetch(webhook.url, {
5091
+ method: "POST",
5092
+ headers,
5093
+ body
5094
+ });
5095
+ if (!response.ok) {
5096
+ await new Promise((r) => setTimeout(r, 5000));
5097
+ await fetch(webhook.url, { method: "POST", headers, body });
5098
+ }
5099
+ } catch {}
5100
+ }
5101
+ }
5102
+ async function testWebhook(id) {
5103
+ const webhook = getWebhook(id);
5104
+ if (!webhook)
5105
+ return false;
5106
+ const testPayload = {
5107
+ event: "test",
5108
+ run: { id: "test-run", url: "http://localhost:3000", status: "passed", passed: 3, failed: 0, total: 3 },
5109
+ timestamp: new Date().toISOString()
5110
+ };
5111
+ try {
5112
+ const body = JSON.stringify(testPayload);
5113
+ const response = await fetch(webhook.url, {
5114
+ method: "POST",
5115
+ headers: {
5116
+ "Content-Type": "application/json",
5117
+ ...webhook.secret ? { "X-Testers-Signature": signPayload(body, webhook.secret) } : {}
4613
5118
  },
4614
- enumerable: false
4615
- },
4616
- rgbToAnsi: {
4617
- value: (red, green, blue) => styles.ansi256ToAnsi(styles.rgbToAnsi256(red, green, blue)),
4618
- enumerable: false
4619
- },
4620
- hexToAnsi: {
4621
- value: (hex) => styles.ansi256ToAnsi(styles.hexToAnsi256(hex)),
4622
- enumerable: false
5119
+ body
5120
+ });
5121
+ return response.ok;
5122
+ } catch {
5123
+ return false;
5124
+ }
5125
+ }
5126
+
5127
+ // src/lib/logs-integration.ts
5128
+ async function pushFailedRunToLogs(run, failedResults, scenarios) {
5129
+ const logsUrl = process.env.LOGS_URL;
5130
+ if (!logsUrl)
5131
+ return;
5132
+ const scenarioMap = new Map(scenarios.map((s) => [s.id, s]));
5133
+ const entries = failedResults.map((result) => {
5134
+ const scenario = scenarioMap.get(result.scenarioId);
5135
+ return {
5136
+ level: "error",
5137
+ source: "sdk",
5138
+ service: "testers",
5139
+ message: `[testers] Scenario failed: ${scenario?.name ?? result.scenarioId}${result.error ? ` \u2014 ${result.error}` : ""}`,
5140
+ metadata: {
5141
+ run_id: run.id,
5142
+ scenario_id: result.scenarioId,
5143
+ scenario_name: scenario?.name,
5144
+ url: run.url,
5145
+ status: result.status,
5146
+ duration_ms: result.durationMs
5147
+ }
5148
+ };
5149
+ });
5150
+ try {
5151
+ await fetch(`${logsUrl.replace(/\/$/, "")}/api/logs`, {
5152
+ method: "POST",
5153
+ headers: { "Content-Type": "application/json" },
5154
+ body: JSON.stringify(entries)
5155
+ });
5156
+ } catch {}
5157
+ }
5158
+
5159
+ // src/lib/todos-connector.ts
5160
+ import { Database as Database2 } from "bun:sqlite";
5161
+ import { existsSync as existsSync4 } from "fs";
5162
+ import { join as join5 } from "path";
5163
+ import { homedir as homedir5 } from "os";
5164
+ init_types();
5165
+ function resolveTodosDbPath() {
5166
+ const envPath = process.env["TODOS_DB_PATH"];
5167
+ if (envPath)
5168
+ return envPath;
5169
+ return join5(homedir5(), ".todos", "todos.db");
5170
+ }
5171
+ function connectToTodos() {
5172
+ const dbPath = resolveTodosDbPath();
5173
+ if (!existsSync4(dbPath)) {
5174
+ throw new TodosConnectionError(`Todos database not found at ${dbPath}. Install @hasna/todos or set TODOS_DB_PATH.`);
5175
+ }
5176
+ const db2 = new Database2(dbPath, { readonly: true });
5177
+ db2.exec("PRAGMA foreign_keys = ON");
5178
+ return db2;
5179
+ }
5180
+ function pullTasks(options = {}) {
5181
+ const db2 = connectToTodos();
5182
+ try {
5183
+ let query = "SELECT id, short_id, title, description, status, priority, tags, project_id FROM tasks WHERE 1=1";
5184
+ const params = [];
5185
+ if (options.status) {
5186
+ query += " AND status = ?";
5187
+ params.push(options.status);
5188
+ } else {
5189
+ query += " AND status IN ('pending', 'in_progress')";
5190
+ }
5191
+ if (options.priority) {
5192
+ query += " AND priority = ?";
5193
+ params.push(options.priority);
5194
+ }
5195
+ if (options.projectName) {
5196
+ const project = db2.query("SELECT id FROM projects WHERE name = ?").get(options.projectName);
5197
+ if (project) {
5198
+ query += " AND project_id = ?";
5199
+ params.push(project.id);
5200
+ }
5201
+ }
5202
+ query += " ORDER BY CASE priority WHEN 'critical' THEN 0 WHEN 'high' THEN 1 WHEN 'medium' THEN 2 WHEN 'low' THEN 3 END";
5203
+ const tasks = db2.query(query).all(...params);
5204
+ if (options.tags && options.tags.length > 0) {
5205
+ return tasks.filter((task) => {
5206
+ const taskTags = JSON.parse(task.tags || "[]");
5207
+ return options.tags.some((tag) => taskTags.includes(tag));
5208
+ });
5209
+ }
5210
+ return tasks;
5211
+ } finally {
5212
+ db2.close();
5213
+ }
5214
+ }
5215
+ function taskToScenarioInput(task, projectId) {
5216
+ const tags = JSON.parse(task.tags || "[]");
5217
+ const priority = ["low", "medium", "high", "critical"].includes(task.priority) ? task.priority : "medium";
5218
+ const steps = [];
5219
+ if (task.description) {
5220
+ const lines = task.description.split(`
5221
+ `);
5222
+ for (const line of lines) {
5223
+ const match = line.match(/^\s*\d+[\.\)]\s*(.+)/);
5224
+ if (match?.[1]) {
5225
+ steps.push(match[1].trim());
5226
+ }
4623
5227
  }
5228
+ }
5229
+ return {
5230
+ name: task.title.replace(/^(OPE\d+-\d+|[A-Z]+-\d+):\s*/, ""),
5231
+ description: task.description || task.title,
5232
+ steps,
5233
+ tags,
5234
+ priority,
5235
+ projectId,
5236
+ metadata: { todosTaskId: task.id, todosShortId: task.short_id }
5237
+ };
5238
+ }
5239
+ function importFromTodos(options = {}) {
5240
+ const tasks = pullTasks({
5241
+ projectName: options.projectName,
5242
+ tags: options.tags ?? ["qa", "test", "testing"],
5243
+ priority: options.priority
4624
5244
  });
4625
- return styles;
5245
+ const existing = listScenarios({ projectId: options.projectId });
5246
+ const existingTodoIds = new Set(existing.filter((s) => s.metadata?.todosTaskId).map((s) => s.metadata.todosTaskId));
5247
+ let imported = 0;
5248
+ let skipped = 0;
5249
+ for (const task of tasks) {
5250
+ if (existingTodoIds.has(task.id)) {
5251
+ skipped++;
5252
+ continue;
5253
+ }
5254
+ const input = taskToScenarioInput(task, options.projectId);
5255
+ createScenario(input);
5256
+ imported++;
5257
+ }
5258
+ return { imported, skipped };
5259
+ }
5260
+ function markTodoDone(taskId) {
5261
+ const dbPath = resolveTodosDbPath();
5262
+ if (!existsSync4(dbPath))
5263
+ return false;
5264
+ const db2 = new Database2(dbPath);
5265
+ try {
5266
+ const task = db2.query("SELECT id, version FROM tasks WHERE id LIKE ? || '%'").get(taskId);
5267
+ if (!task)
5268
+ return false;
5269
+ db2.query("UPDATE tasks SET status = 'completed', completed_at = datetime('now'), version = version + 1, updated_at = datetime('now') WHERE id = ? AND version = ?").run(task.id, task.version);
5270
+ return true;
5271
+ } finally {
5272
+ db2.close();
5273
+ }
4626
5274
  }
4627
- var ansiStyles = assembleStyles();
4628
- var ansi_styles_default = ansiStyles;
4629
5275
 
4630
- // node_modules/chalk/source/vendor/supports-color/index.js
4631
- import process2 from "process";
4632
- import os from "os";
4633
- import tty from "tty";
4634
- function hasFlag(flag, argv = globalThis.Deno ? globalThis.Deno.args : process2.argv) {
4635
- const prefix = flag.startsWith("-") ? "" : flag.length === 1 ? "-" : "--";
4636
- const position = argv.indexOf(prefix + flag);
4637
- const terminatorPosition = argv.indexOf("--");
4638
- return position !== -1 && (terminatorPosition === -1 || position < terminatorPosition);
5276
+ // src/lib/failure-pipeline.ts
5277
+ async function createFailureTasks(run, failedResults, scenarios) {
5278
+ if (failedResults.length === 0)
5279
+ return { created: 0, skipped: 0 };
5280
+ const projectId = process.env["TESTERS_TODOS_PROJECT_ID"];
5281
+ if (!projectId)
5282
+ return { created: 0, skipped: 0 };
5283
+ let db2 = null;
5284
+ try {
5285
+ db2 = connectToTodos();
5286
+ } catch {
5287
+ return { created: 0, skipped: 0 };
5288
+ }
5289
+ const scenarioMap = new Map(scenarios.map((s) => [s.id, s]));
5290
+ let created = 0;
5291
+ let skipped = 0;
5292
+ try {
5293
+ for (const result of failedResults) {
5294
+ const scenario = scenarioMap.get(result.scenarioId);
5295
+ const title = `BUG: [testers] ${scenario?.name ?? result.scenarioId} failed`;
5296
+ const existing = db2.query("SELECT id FROM tasks WHERE title = ? AND status NOT IN ('completed', 'cancelled') LIMIT 1").get(title);
5297
+ if (existing) {
5298
+ skipped++;
5299
+ continue;
5300
+ }
5301
+ const id = crypto.randomUUID();
5302
+ const now2 = new Date().toISOString();
5303
+ const description = [
5304
+ `Test failure detected by open-testers.`,
5305
+ ``,
5306
+ `**Run:** ${run.id}`,
5307
+ `**URL:** ${run.url}`,
5308
+ `**Scenario:** ${scenario?.name ?? result.scenarioId}`,
5309
+ `**Status:** ${result.status}`,
5310
+ result.error ? `**Error:** ${result.error}` : null,
5311
+ result.reasoning ? `**Reasoning:** ${result.reasoning.slice(0, 500)}` : null,
5312
+ `**Duration:** ${result.durationMs ? `${(result.durationMs / 1000).toFixed(1)}s` : "N/A"}`,
5313
+ `**Tokens:** ${result.tokensUsed ?? 0}`
5314
+ ].filter(Boolean).join(`
5315
+ `);
5316
+ try {
5317
+ db2.query(`
5318
+ INSERT INTO tasks (id, short_id, title, description, status, priority, tags, project_id, version, created_at, updated_at)
5319
+ VALUES (?, ?, ?, ?, 'pending', 'high', ?, ?, 1, ?, ?)
5320
+ `).run(id, `BUG-${id.slice(0, 6)}`, title, description, JSON.stringify(["bug", "testers", "auto-created"]), projectId, now2, now2);
5321
+ created++;
5322
+ } catch {
5323
+ skipped++;
5324
+ }
5325
+ }
5326
+ } finally {
5327
+ db2.close();
5328
+ }
5329
+ return { created, skipped };
4639
5330
  }
4640
- var { env } = process2;
4641
- var flagForceColor;
4642
- if (hasFlag("no-color") || hasFlag("no-colors") || hasFlag("color=false") || hasFlag("color=never")) {
4643
- flagForceColor = 0;
4644
- } else if (hasFlag("color") || hasFlag("colors") || hasFlag("color=true") || hasFlag("color=always")) {
4645
- flagForceColor = 1;
5331
+ async function notifyFailureToConversations(run, failedResults, scenarios) {
5332
+ const baseUrl = process.env["TESTERS_CONVERSATIONS_URL"];
5333
+ const space = process.env["TESTERS_CONVERSATIONS_SPACE"];
5334
+ if (!baseUrl || !space)
5335
+ return;
5336
+ const scenarioMap = new Map(scenarios.map((s) => [s.id, s]));
5337
+ const total = run.total;
5338
+ const failedCount = failedResults.length;
5339
+ const passedCount = run.passed;
5340
+ const failureLines = failedResults.slice(0, 5).map((r) => {
5341
+ const name = scenarioMap.get(r.scenarioId)?.name ?? r.scenarioId;
5342
+ const err = r.error ? ` \u2014 ${r.error.slice(0, 120)}` : "";
5343
+ return ` \u274C ${name}${err}`;
5344
+ });
5345
+ const extra = failedResults.length > 5 ? ` \u2026 and ${failedResults.length - 5} more` : "";
5346
+ const message = [
5347
+ `\uD83D\uDEA8 **Testers run failed** \u2014 ${failedCount}/${total} scenarios failed`,
5348
+ ``,
5349
+ `**URL:** ${run.url}`,
5350
+ `**Run ID:** \`${run.id}\``,
5351
+ `**Pass rate:** ${passedCount}/${total}`,
5352
+ ``,
5353
+ `**Failures:**`,
5354
+ ...failureLines,
5355
+ extra
5356
+ ].filter((l) => l !== "").join(`
5357
+ `);
5358
+ try {
5359
+ await fetch(`${baseUrl.replace(/\/$/, "")}/api/spaces/${encodeURIComponent(space)}/messages`, {
5360
+ method: "POST",
5361
+ headers: { "Content-Type": "application/json" },
5362
+ body: JSON.stringify({ content: message, from: "testers" })
5363
+ });
5364
+ } catch {}
4646
5365
  }
4647
- function envForceColor() {
4648
- if ("FORCE_COLOR" in env) {
4649
- if (env.FORCE_COLOR === "true") {
4650
- return 1;
4651
- }
4652
- if (env.FORCE_COLOR === "false") {
4653
- return 0;
5366
+ async function notifyRunToConversations(run, results, options) {
5367
+ const baseUrl = process.env["TESTERS_CONVERSATIONS_URL"];
5368
+ const space = options?.spaceId ?? process.env["TESTERS_CONVERSATIONS_SPACE"];
5369
+ if (!baseUrl || !space)
5370
+ return;
5371
+ const passRate = run.total > 0 ? (run.passed / run.total * 100).toFixed(0) : "0";
5372
+ const statusIcon = run.status === "passed" ? "\u2705" : run.status === "failed" ? "\u274C" : "\u26A0\uFE0F";
5373
+ const durationSec = run.finishedAt && run.startedAt ? ((new Date(run.finishedAt).getTime() - new Date(run.startedAt).getTime()) / 1000).toFixed(1) : null;
5374
+ const lines = [
5375
+ `${statusIcon} **Testers run ${run.status.toUpperCase()}** \u2014 ${run.passed}/${run.total} scenarios (${passRate}% pass rate)`,
5376
+ ``,
5377
+ `**URL:** ${run.url}`,
5378
+ `**Run ID:** \`${run.id}\``,
5379
+ `**Model:** ${run.model}`,
5380
+ durationSec ? `**Duration:** ${durationSec}s` : null
5381
+ ].filter((l) => l !== null);
5382
+ if (run.status === "failed") {
5383
+ const failedResults = results.filter((r) => r.status === "failed" || r.status === "error");
5384
+ const failLines = failedResults.slice(0, 5).map((r) => {
5385
+ const err = r.error ? ` \u2014 ${r.error.slice(0, 100)}` : "";
5386
+ return ` \u274C ${r.scenarioId.slice(0, 8)}${err}`;
5387
+ });
5388
+ if (failLines.length > 0) {
5389
+ lines.push(``, `**Failures:**`);
5390
+ lines.push(...failLines);
5391
+ if (failedResults.length > 5)
5392
+ lines.push(` \u2026 and ${failedResults.length - 5} more`);
4654
5393
  }
4655
- return env.FORCE_COLOR.length === 0 ? 1 : Math.min(Number.parseInt(env.FORCE_COLOR, 10), 3);
4656
5394
  }
5395
+ const message = lines.join(`
5396
+ `);
5397
+ try {
5398
+ await fetch(`${baseUrl.replace(/\/$/, "")}/api/spaces/${encodeURIComponent(space)}/messages`, {
5399
+ method: "POST",
5400
+ headers: { "Content-Type": "application/json" },
5401
+ body: JSON.stringify({ content: message, from: "testers" })
5402
+ });
5403
+ } catch {}
4657
5404
  }
4658
- function translateLevel(level) {
4659
- if (level === 0) {
4660
- return false;
4661
- }
4662
- return {
4663
- level,
4664
- hasBasic: true,
4665
- has256: level >= 2,
4666
- has16m: level >= 3
4667
- };
5405
+
5406
+ // src/lib/runner.ts
5407
+ var eventHandler = null;
5408
+ function onRunEvent(handler) {
5409
+ eventHandler = handler;
4668
5410
  }
4669
- function _supportsColor(haveStream, { streamIsTTY, sniffFlags = true } = {}) {
4670
- const noFlagForceColor = envForceColor();
4671
- if (noFlagForceColor !== undefined) {
4672
- flagForceColor = noFlagForceColor;
5411
+ function emit(event) {
5412
+ if (eventHandler)
5413
+ eventHandler(event);
5414
+ }
5415
+ function withTimeout(promise, ms, label) {
5416
+ return new Promise((resolve, reject) => {
5417
+ const warningAt = Math.floor(ms * 0.8);
5418
+ const warningTimer = setTimeout(() => {
5419
+ emit({
5420
+ type: "scenario:timeout_warning",
5421
+ scenarioName: label,
5422
+ timeoutMs: ms,
5423
+ elapsedMs: warningAt
5424
+ });
5425
+ }, warningAt);
5426
+ const timer = setTimeout(() => {
5427
+ clearTimeout(warningTimer);
5428
+ reject(new Error(`Scenario '${label}' timed out after ${ms}ms. Try: testers run --timeout ${ms * 2} or simplify the scenario steps.`));
5429
+ }, ms);
5430
+ promise.then((val) => {
5431
+ clearTimeout(timer);
5432
+ clearTimeout(warningTimer);
5433
+ resolve(val);
5434
+ }, (err) => {
5435
+ clearTimeout(timer);
5436
+ clearTimeout(warningTimer);
5437
+ reject(err);
5438
+ });
5439
+ });
5440
+ }
5441
+ async function runSingleScenario(scenario, runId, options) {
5442
+ const scenarioType = scenario.scenarioType ?? "browser";
5443
+ if (scenarioType === "eval") {
5444
+ return runEvalScenario(scenario, { runId, baseUrl: options.url });
5445
+ }
5446
+ const config = loadConfig();
5447
+ if (options.selfHeal !== undefined)
5448
+ config.selfHeal = options.selfHeal;
5449
+ let effectiveOptions = options;
5450
+ if (options.minimal) {
5451
+ effectiveOptions = {
5452
+ ...options,
5453
+ engine: options.engine ?? "playwright"
5454
+ };
5455
+ try {
5456
+ const { isLightpandaAvailable: isLightpandaAvailable2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda)).catch(() => ({ isLightpandaAvailable: () => false }));
5457
+ if (isLightpandaAvailable2())
5458
+ effectiveOptions = { ...effectiveOptions, engine: "lightpanda" };
5459
+ } catch {}
4673
5460
  }
4674
- const forceColor = sniffFlags ? flagForceColor : noFlagForceColor;
4675
- if (forceColor === 0) {
4676
- return 0;
5461
+ const model = resolveModel2(effectiveOptions.minimal ? "quick" : effectiveOptions.model ?? scenario.model ?? config.defaultModel);
5462
+ if (options.cacheMaxAgeMs && options.cacheMaxAgeMs > 0 && scenario.lastPassedAt && scenario.lastPassedUrl === options.url) {
5463
+ const age = Date.now() - new Date(scenario.lastPassedAt).getTime();
5464
+ if (age < options.cacheMaxAgeMs) {
5465
+ const cached = createResult({ runId, scenarioId: scenario.id, model, stepsTotal: 0 });
5466
+ return updateResult(cached.id, {
5467
+ status: "passed",
5468
+ reasoning: `Cache hit: passed ${Math.round(age / 1000)}s ago at ${options.url}`,
5469
+ stepsCompleted: 0,
5470
+ durationMs: 0,
5471
+ tokensUsed: 0
5472
+ });
5473
+ }
4677
5474
  }
4678
- if (sniffFlags) {
4679
- if (hasFlag("color=16m") || hasFlag("color=full") || hasFlag("color=truecolor")) {
4680
- return 3;
5475
+ const client = createClientForModel(model, effectiveOptions.apiKey ?? config.anthropicApiKey);
5476
+ const screenshotter = new Screenshotter({
5477
+ baseDir: effectiveOptions.screenshotDir ?? config.screenshots.dir
5478
+ });
5479
+ const resolvedPersonaId = options.personaId ?? scenario.personaId;
5480
+ const persona = resolvedPersonaId ? getPersona(resolvedPersonaId) : null;
5481
+ const result = createResult({
5482
+ runId,
5483
+ scenarioId: scenario.id,
5484
+ model,
5485
+ stepsTotal: scenario.steps.length || 10,
5486
+ personaId: persona?.id ?? null,
5487
+ personaName: persona?.name ?? null
5488
+ });
5489
+ emit({ type: "scenario:start", scenarioId: scenario.id, scenarioName: scenario.name, resultId: result.id, runId });
5490
+ let browser = null;
5491
+ let page = null;
5492
+ try {
5493
+ browser = await launchBrowser({ headless: !(effectiveOptions.headed ?? false), engine: effectiveOptions.engine });
5494
+ page = await getPage(browser, {
5495
+ viewport: config.browser.viewport
5496
+ });
5497
+ const targetUrl = scenario.targetPath ? `${options.url.replace(/\/$/, "")}${scenario.targetPath}` : options.url;
5498
+ const scenarioTimeout = scenario.timeoutMs ?? options.timeout ?? config.browser.timeout ?? 60000;
5499
+ const consoleErrors = [];
5500
+ page.on("console", (msg) => {
5501
+ if (msg.type() === "error")
5502
+ consoleErrors.push(msg.text());
5503
+ });
5504
+ page.on("pageerror", (err) => {
5505
+ consoleErrors.push(err.message);
5506
+ });
5507
+ await page.goto(targetUrl, { timeout: Math.min(scenarioTimeout, 30000) });
5508
+ const stepStartTimes = new Map;
5509
+ const agentResult = await withTimeout(runAgentLoop({
5510
+ client,
5511
+ page,
5512
+ scenario,
5513
+ screenshotter,
5514
+ model,
5515
+ runId,
5516
+ maxTurns: effectiveOptions.minimal ? 10 : 30,
5517
+ a11y: effectiveOptions.a11y,
5518
+ persona: persona ? {
5519
+ name: persona.name,
5520
+ role: persona.role,
5521
+ description: persona.description,
5522
+ instructions: persona.instructions,
5523
+ traits: persona.traits,
5524
+ goals: persona.goals,
5525
+ behaviors: persona.behaviors,
5526
+ painPoints: persona.painPoints
5527
+ } : null,
5528
+ onStep: (stepEvent) => {
5529
+ let stepDurationMs;
5530
+ if (stepEvent.type === "tool_call") {
5531
+ stepStartTimes.set(stepEvent.stepNumber, Date.now());
5532
+ } else if (stepEvent.type === "tool_result") {
5533
+ const startTime = stepStartTimes.get(stepEvent.stepNumber);
5534
+ if (startTime !== undefined) {
5535
+ stepDurationMs = Date.now() - startTime;
5536
+ stepStartTimes.delete(stepEvent.stepNumber);
5537
+ }
5538
+ }
5539
+ emit({
5540
+ type: `step:${stepEvent.type}`,
5541
+ scenarioId: scenario.id,
5542
+ scenarioName: scenario.name,
5543
+ runId,
5544
+ toolName: stepEvent.toolName,
5545
+ toolInput: stepEvent.toolInput,
5546
+ toolResult: stepEvent.toolResult,
5547
+ thinking: stepEvent.thinking,
5548
+ stepNumber: stepEvent.stepNumber,
5549
+ stepDurationMs
5550
+ });
5551
+ }
5552
+ }), scenarioTimeout, scenario.name);
5553
+ if (options.engine !== "lightpanda" && options.engine !== "bun") {
5554
+ for (const ss of agentResult.screenshots) {
5555
+ try {
5556
+ createScreenshot({
5557
+ resultId: result.id,
5558
+ stepNumber: ss.stepNumber,
5559
+ action: ss.action,
5560
+ filePath: ss.filePath,
5561
+ width: ss.width,
5562
+ height: ss.height,
5563
+ description: ss.description,
5564
+ pageUrl: ss.pageUrl,
5565
+ thumbnailPath: ss.thumbnailPath
5566
+ });
5567
+ emit({ type: "screenshot:captured", screenshotPath: ss.filePath, scenarioId: scenario.id, runId });
5568
+ } catch {}
5569
+ }
4681
5570
  }
4682
- if (hasFlag("color=256")) {
4683
- return 2;
5571
+ const lightpandaNote = options.engine === "lightpanda" ? " (Running with Lightpanda \u2014 no screenshots)" : options.engine === "bun" ? " (Running with Bun.WebView \u2014 native, ~11x faster)" : "";
5572
+ let updatedResult = updateResult(result.id, {
5573
+ status: agentResult.status,
5574
+ reasoning: agentResult.reasoning ? agentResult.reasoning + lightpandaNote : lightpandaNote || undefined,
5575
+ stepsCompleted: agentResult.stepsCompleted,
5576
+ durationMs: Date.now() - new Date(result.createdAt).getTime(),
5577
+ tokensUsed: agentResult.tokensUsed,
5578
+ costCents: estimateCost(model, agentResult.tokensUsed)
5579
+ });
5580
+ if (agentResult.status === "failed" || agentResult.status === "error") {
5581
+ const failureAnalysis = analyzeFailure(null, agentResult.reasoning ?? null);
5582
+ if (failureAnalysis) {
5583
+ updatedResult = updateResult(result.id, { failureAnalysis });
5584
+ }
4684
5585
  }
5586
+ if (agentResult.status === "passed") {
5587
+ try {
5588
+ updateScenarioPassedCache(scenario.id, options.url);
5589
+ } catch {}
5590
+ }
5591
+ const eventType = agentResult.status === "passed" ? "scenario:pass" : "scenario:fail";
5592
+ emit({ type: eventType, scenarioId: scenario.id, scenarioName: scenario.name, resultId: result.id, runId });
5593
+ return updatedResult;
5594
+ } catch (error) {
5595
+ const errorMsg = error instanceof Error ? error.message : String(error);
5596
+ let updatedResult = updateResult(result.id, {
5597
+ status: "error",
5598
+ error: errorMsg,
5599
+ durationMs: Date.now() - new Date(result.createdAt).getTime()
5600
+ });
5601
+ const failureAnalysis = analyzeFailure(errorMsg, null);
5602
+ if (failureAnalysis) {
5603
+ updatedResult = updateResult(result.id, { failureAnalysis });
5604
+ }
5605
+ emit({ type: "scenario:error", scenarioId: scenario.id, scenarioName: scenario.name, error: errorMsg, runId });
5606
+ return updatedResult;
5607
+ } finally {
5608
+ if (browser)
5609
+ await closeBrowser(browser, effectiveOptions.engine);
4685
5610
  }
4686
- if ("TF_BUILD" in env && "AGENT_NAME" in env) {
4687
- return 1;
4688
- }
4689
- if (haveStream && !streamIsTTY && forceColor === undefined) {
4690
- return 0;
4691
- }
4692
- const min = forceColor || 0;
4693
- if (env.TERM === "dumb") {
4694
- return min;
4695
- }
4696
- if (process2.platform === "win32") {
4697
- const osRelease = os.release().split(".");
4698
- if (Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
4699
- return Number(osRelease[2]) >= 14931 ? 3 : 2;
5611
+ }
5612
+ async function runBatch(scenarios, options) {
5613
+ const config = loadConfig();
5614
+ const model = resolveModel2(options.minimal ? "quick" : options.model ?? config.defaultModel);
5615
+ const parallel = options.minimal ? Math.max(5, options.parallel ?? 1) : options.parallel ?? 1;
5616
+ const samples = options.samples ?? 1;
5617
+ const flakinessThreshold = options.flakinessThreshold ?? 0.95;
5618
+ if (!options.skipBudgetCheck) {
5619
+ const cap = options.maxCostCents ?? config.defaultMaxCostCents;
5620
+ if (cap !== undefined && cap > 0) {
5621
+ const estimated = estimateRunCostCents(scenarios.length, model, samples);
5622
+ if (estimated > cap) {
5623
+ throw new BudgetExceededError(estimated, cap);
5624
+ }
4700
5625
  }
4701
- return 1;
4702
5626
  }
4703
- if ("CI" in env) {
4704
- if (["GITHUB_ACTIONS", "GITEA_ACTIONS", "CIRCLECI"].some((key) => (key in env))) {
4705
- return 3;
5627
+ const run = createRun({
5628
+ url: options.url,
5629
+ model,
5630
+ headed: options.headed,
5631
+ parallel,
5632
+ projectId: options.projectId,
5633
+ samples,
5634
+ flakinessThreshold
5635
+ });
5636
+ updateRun(run.id, { status: "running", total: scenarios.length });
5637
+ let sortedScenarios = scenarios;
5638
+ try {
5639
+ const { topologicalSort: topologicalSort2 } = await Promise.resolve().then(() => (init_flows(), exports_flows));
5640
+ const scenarioIds = scenarios.map((s) => s.id);
5641
+ const sortedIds = topologicalSort2(scenarioIds);
5642
+ const scenarioMap = new Map(scenarios.map((s) => [s.id, s]));
5643
+ sortedScenarios = sortedIds.map((id) => scenarioMap.get(id)).filter((s) => s !== undefined);
5644
+ for (const s of scenarios) {
5645
+ if (!sortedIds.includes(s.id))
5646
+ sortedScenarios.push(s);
4706
5647
  }
4707
- if (["TRAVIS", "APPVEYOR", "GITLAB_CI", "BUILDKITE", "DRONE"].some((sign) => (sign in env)) || env.CI_NAME === "codeship") {
4708
- return 1;
5648
+ } catch {}
5649
+ const results = [];
5650
+ const failedScenarioIds = new Set;
5651
+ const canRun = async (scenario) => {
5652
+ try {
5653
+ const { getDependencies: getDependencies2 } = await Promise.resolve().then(() => (init_flows(), exports_flows));
5654
+ const deps = getDependencies2(scenario.id);
5655
+ for (const depId of deps) {
5656
+ if (failedScenarioIds.has(depId))
5657
+ return false;
5658
+ }
5659
+ } catch {}
5660
+ return true;
5661
+ };
5662
+ const maxRetries = options.retry ?? 0;
5663
+ if (parallel <= 1) {
5664
+ for (const scenario of sortedScenarios) {
5665
+ if (!await canRun(scenario)) {
5666
+ const result2 = createResult({ runId: run.id, scenarioId: scenario.id, model, stepsTotal: 0 });
5667
+ const skipped = updateResult(result2.id, { status: "skipped", error: "Skipped: dependency failed" });
5668
+ results.push(skipped);
5669
+ failedScenarioIds.add(scenario.id);
5670
+ emit({ type: "scenario:error", scenarioId: scenario.id, scenarioName: scenario.name, error: "Dependency failed \u2014 skipped", runId: run.id });
5671
+ continue;
5672
+ }
5673
+ let result = await runSingleScenario(scenario, run.id, options);
5674
+ let attempt = 1;
5675
+ while ((result.status === "failed" || result.status === "error") && attempt <= maxRetries) {
5676
+ emit({ type: "scenario:start", scenarioId: scenario.id, scenarioName: scenario.name, runId: run.id, retryAttempt: attempt + 1, maxRetries: maxRetries + 1 });
5677
+ result = await runSingleScenario(scenario, run.id, options);
5678
+ attempt++;
5679
+ }
5680
+ if (samples > 1) {
5681
+ const sampleResults = [result];
5682
+ for (let s = 1;s < samples; s++) {
5683
+ emit({ type: "scenario:start", scenarioId: scenario.id, scenarioName: scenario.name, runId: run.id });
5684
+ const sampleResult = await runSingleScenario(scenario, run.id, options);
5685
+ sampleResults.push(sampleResult);
5686
+ }
5687
+ const passCount = sampleResults.filter((r) => r.status === "passed").length;
5688
+ const passRate = passCount / samples;
5689
+ if (passCount > 0 && passCount < samples && passRate < flakinessThreshold) {
5690
+ result = updateResult(result.id, {
5691
+ status: "flaky",
5692
+ reasoning: `Flaky: ${passCount}/${samples} samples passed (${Math.round(passRate * 100)}% pass rate, threshold ${Math.round(flakinessThreshold * 100)}%)`,
5693
+ metadata: { samples, passCount, passRate, sampleResultIds: sampleResults.map((r) => r.id) }
5694
+ });
5695
+ } else if (passCount === 0) {
5696
+ result = updateResult(result.id, {
5697
+ metadata: { samples, passCount, passRate, sampleResultIds: sampleResults.map((r) => r.id) }
5698
+ });
5699
+ } else if (passCount === samples) {
5700
+ result = updateResult(result.id, {
5701
+ metadata: { samples, passCount, passRate, sampleResultIds: sampleResults.map((r) => r.id) }
5702
+ });
5703
+ }
5704
+ }
5705
+ results.push(result);
5706
+ if (result.status === "failed" || result.status === "error" || result.status === "flaky") {
5707
+ failedScenarioIds.add(scenario.id);
5708
+ }
4709
5709
  }
4710
- return min;
4711
- }
4712
- if ("TEAMCITY_VERSION" in env) {
4713
- return /^(9\.(0*[1-9]\d*)\.|\d{2,}\.)/.test(env.TEAMCITY_VERSION) ? 1 : 0;
4714
- }
4715
- if (env.COLORTERM === "truecolor") {
4716
- return 3;
4717
- }
4718
- if (env.TERM === "xterm-kitty") {
4719
- return 3;
4720
- }
4721
- if (env.TERM === "xterm-ghostty") {
4722
- return 3;
4723
- }
4724
- if (env.TERM === "wezterm") {
4725
- return 3;
4726
- }
4727
- if ("TERM_PROGRAM" in env) {
4728
- const version = Number.parseInt((env.TERM_PROGRAM_VERSION || "").split(".")[0], 10);
4729
- switch (env.TERM_PROGRAM) {
4730
- case "iTerm.app": {
4731
- return version >= 3 ? 3 : 2;
5710
+ } else {
5711
+ const queue = [...sortedScenarios];
5712
+ const running = [];
5713
+ const processNext = async () => {
5714
+ const scenario = queue.shift();
5715
+ if (!scenario)
5716
+ return;
5717
+ if (!await canRun(scenario)) {
5718
+ const result2 = createResult({ runId: run.id, scenarioId: scenario.id, model, stepsTotal: 0 });
5719
+ const skipped = updateResult(result2.id, { status: "skipped", error: "Skipped: dependency failed" });
5720
+ results.push(skipped);
5721
+ failedScenarioIds.add(scenario.id);
5722
+ await processNext();
5723
+ return;
4732
5724
  }
4733
- case "Apple_Terminal": {
4734
- return 2;
5725
+ const result = await runSingleScenario(scenario, run.id, options);
5726
+ results.push(result);
5727
+ if (result.status === "failed" || result.status === "error") {
5728
+ failedScenarioIds.add(scenario.id);
4735
5729
  }
5730
+ await processNext();
5731
+ };
5732
+ const workers = Math.min(parallel, sortedScenarios.length);
5733
+ for (let i = 0;i < workers; i++) {
5734
+ running.push(processNext());
4736
5735
  }
5736
+ await Promise.all(running);
4737
5737
  }
4738
- if (/-256(color)?$/i.test(env.TERM)) {
4739
- return 2;
5738
+ let divergenceResults = [];
5739
+ if (options.personaIds && options.personaIds.length > 1) {
5740
+ const additionalPersonaIds = options.personaIds.slice(1);
5741
+ for (const personaId of additionalPersonaIds) {
5742
+ for (const scenario of sortedScenarios) {
5743
+ const personaResult = await runSingleScenario(scenario, run.id, { ...options, personaId });
5744
+ divergenceResults.push(personaResult);
5745
+ results.push(personaResult);
5746
+ }
5747
+ }
4740
5748
  }
4741
- if (/^screen|^xterm|^vt100|^vt220|^rxvt|color|ansi|cygwin|linux/i.test(env.TERM)) {
4742
- return 1;
5749
+ const passed = results.filter((r) => r.status === "passed").length;
5750
+ const failed = results.filter((r) => r.status === "failed" || r.status === "error").length;
5751
+ const finalStatus = failed > 0 ? "failed" : "passed";
5752
+ const finalRun = updateRun(run.id, {
5753
+ status: finalStatus,
5754
+ passed,
5755
+ failed,
5756
+ total: scenarios.length,
5757
+ finished_at: new Date().toISOString()
5758
+ });
5759
+ emit({ type: "run:complete", runId: run.id });
5760
+ const eventType = finalRun.status === "failed" ? "failed" : "completed";
5761
+ dispatchWebhooks(eventType, finalRun).catch(() => {});
5762
+ if (finalRun.status === "failed") {
5763
+ const failedResults = results.filter((r) => r.status === "failed" || r.status === "error");
5764
+ pushFailedRunToLogs(finalRun, failedResults, scenarios).catch(() => {});
5765
+ createFailureTasks(finalRun, failedResults, scenarios).catch(() => {});
5766
+ notifyFailureToConversations(finalRun, failedResults, scenarios).catch(() => {});
4743
5767
  }
4744
- if ("COLORTERM" in env) {
4745
- return 1;
5768
+ const conversationsSpaceId = config.conversationsSpace ?? process.env["TESTERS_CONVERSATIONS_SPACE"];
5769
+ if (conversationsSpaceId) {
5770
+ notifyRunToConversations(finalRun, results, { spaceId: conversationsSpaceId }).catch(() => {});
4746
5771
  }
4747
- return min;
4748
- }
4749
- function createSupportsColor(stream, options = {}) {
4750
- const level = _supportsColor(stream, {
4751
- streamIsTTY: stream && stream.isTTY,
4752
- ...options
4753
- });
4754
- return translateLevel(level);
5772
+ return { run: finalRun, results };
4755
5773
  }
4756
- var supportsColor = {
4757
- stdout: createSupportsColor({ isTTY: tty.isatty(1) }),
4758
- stderr: createSupportsColor({ isTTY: tty.isatty(2) })
4759
- };
4760
- var supports_color_default = supportsColor;
4761
-
4762
- // node_modules/chalk/source/utilities.js
4763
- function stringReplaceAll(string, substring, replacer) {
4764
- let index = string.indexOf(substring);
4765
- if (index === -1) {
4766
- return string;
5774
+ async function runByFilter(options) {
5775
+ let scenarios;
5776
+ if (options.scenarioIds && options.scenarioIds.length > 0) {
5777
+ const all = listScenarios({ projectId: options.projectId });
5778
+ scenarios = all.filter((s) => options.scenarioIds.includes(s.id) || options.scenarioIds.includes(s.shortId));
5779
+ } else {
5780
+ scenarios = listScenarios({
5781
+ projectId: options.projectId,
5782
+ tags: options.tags,
5783
+ priority: options.priority
5784
+ });
4767
5785
  }
4768
- const substringLength = substring.length;
4769
- let endIndex = 0;
4770
- let returnValue = "";
4771
- do {
4772
- returnValue += string.slice(endIndex, index) + substring + replacer;
4773
- endIndex = index + substringLength;
4774
- index = string.indexOf(substring, endIndex);
4775
- } while (index !== -1);
4776
- returnValue += string.slice(endIndex);
4777
- return returnValue;
4778
- }
4779
- function stringEncaseCRLFWithFirstIndex(string, prefix, postfix, index) {
4780
- let endIndex = 0;
4781
- let returnValue = "";
4782
- do {
4783
- const gotCR = string[index - 1] === "\r";
4784
- returnValue += string.slice(endIndex, gotCR ? index - 1 : index) + prefix + (gotCR ? `\r
4785
- ` : `
4786
- `) + postfix;
4787
- endIndex = index + 1;
4788
- index = string.indexOf(`
4789
- `, endIndex);
4790
- } while (index !== -1);
4791
- returnValue += string.slice(endIndex);
4792
- return returnValue;
4793
- }
4794
-
4795
- // node_modules/chalk/source/index.js
4796
- var { stdout: stdoutColor, stderr: stderrColor } = supports_color_default;
4797
- var GENERATOR = Symbol("GENERATOR");
4798
- var STYLER = Symbol("STYLER");
4799
- var IS_EMPTY = Symbol("IS_EMPTY");
4800
- var levelMapping = [
4801
- "ansi",
4802
- "ansi",
4803
- "ansi256",
4804
- "ansi16m"
4805
- ];
4806
- var styles2 = Object.create(null);
4807
- var applyOptions = (object, options = {}) => {
4808
- if (options.level && !(Number.isInteger(options.level) && options.level >= 0 && options.level <= 3)) {
4809
- throw new Error("The `level` option should be an integer from 0 to 3");
5786
+ if (scenarios.length === 0) {
5787
+ const config = loadConfig();
5788
+ const model = resolveModel2(options.model ?? config.defaultModel);
5789
+ const run = createRun({ url: options.url, model, projectId: options.projectId });
5790
+ updateRun(run.id, { status: "passed", total: 0, finished_at: new Date().toISOString() });
5791
+ return { run: getRun(run.id), results: [] };
4810
5792
  }
4811
- const colorLevel = stdoutColor ? stdoutColor.level : 0;
4812
- object.level = options.level === undefined ? colorLevel : options.level;
4813
- };
4814
- var chalkFactory = (options) => {
4815
- const chalk = (...strings) => strings.join(" ");
4816
- applyOptions(chalk, options);
4817
- Object.setPrototypeOf(chalk, createChalk.prototype);
4818
- return chalk;
4819
- };
4820
- function createChalk(options) {
4821
- return chalkFactory(options);
4822
- }
4823
- Object.setPrototypeOf(createChalk.prototype, Function.prototype);
4824
- for (const [styleName, style] of Object.entries(ansi_styles_default)) {
4825
- styles2[styleName] = {
4826
- get() {
4827
- const builder = createBuilder(this, createStyler(style.open, style.close, this[STYLER]), this[IS_EMPTY]);
4828
- Object.defineProperty(this, styleName, { value: builder });
4829
- return builder;
4830
- }
4831
- };
5793
+ return runBatch(scenarios, options);
4832
5794
  }
4833
- styles2.visible = {
4834
- get() {
4835
- const builder = createBuilder(this, this[STYLER], true);
4836
- Object.defineProperty(this, "visible", { value: builder });
4837
- return builder;
5795
+ function startRunAsync(options) {
5796
+ const config = loadConfig();
5797
+ const model = resolveModel2(options.model ?? config.defaultModel);
5798
+ let scenarios;
5799
+ if (options.scenarioIds && options.scenarioIds.length > 0) {
5800
+ const all = listScenarios({ projectId: options.projectId });
5801
+ scenarios = all.filter((s) => options.scenarioIds.includes(s.id) || options.scenarioIds.includes(s.shortId));
5802
+ } else {
5803
+ scenarios = listScenarios({
5804
+ projectId: options.projectId,
5805
+ tags: options.tags,
5806
+ priority: options.priority
5807
+ });
4838
5808
  }
4839
- };
4840
- var getModelAnsi = (model, level, type, ...arguments_) => {
4841
- if (model === "rgb") {
4842
- if (level === "ansi16m") {
4843
- return ansi_styles_default[type].ansi16m(...arguments_);
4844
- }
4845
- if (level === "ansi256") {
4846
- return ansi_styles_default[type].ansi256(ansi_styles_default.rgbToAnsi256(...arguments_));
5809
+ if (!options.skipBudgetCheck) {
5810
+ const cap = options.maxCostCents ?? config.defaultMaxCostCents;
5811
+ if (cap !== undefined && cap > 0 && scenarios.length > 0) {
5812
+ const samples = options.samples ?? 1;
5813
+ const estimated = estimateRunCostCents(scenarios.length, model, samples);
5814
+ if (estimated > cap) {
5815
+ throw new BudgetExceededError(estimated, cap);
5816
+ }
4847
5817
  }
4848
- return ansi_styles_default[type].ansi(ansi_styles_default.rgbToAnsi(...arguments_));
4849
5818
  }
4850
- if (model === "hex") {
4851
- return getModelAnsi("rgb", level, type, ...ansi_styles_default.hexToRgb(...arguments_));
5819
+ const parallel = options.parallel ?? 1;
5820
+ const run = createRun({
5821
+ url: options.url,
5822
+ model,
5823
+ headed: options.headed,
5824
+ parallel,
5825
+ projectId: options.projectId
5826
+ });
5827
+ if (scenarios.length === 0) {
5828
+ updateRun(run.id, { status: "passed", total: 0, finished_at: new Date().toISOString() });
5829
+ return { runId: run.id, scenarioCount: 0 };
4852
5830
  }
4853
- return ansi_styles_default[type][model](...arguments_);
4854
- };
4855
- var usedModels = ["rgb", "hex", "ansi256"];
4856
- for (const model of usedModels) {
4857
- styles2[model] = {
4858
- get() {
4859
- const { level } = this;
4860
- return function(...arguments_) {
4861
- const styler = createStyler(getModelAnsi(model, levelMapping[level], "color", ...arguments_), ansi_styles_default.color.close, this[STYLER]);
4862
- return createBuilder(this, styler, this[IS_EMPTY]);
4863
- };
4864
- }
4865
- };
4866
- const bgModel = "bg" + model[0].toUpperCase() + model.slice(1);
4867
- styles2[bgModel] = {
4868
- get() {
4869
- const { level } = this;
4870
- return function(...arguments_) {
4871
- const styler = createStyler(getModelAnsi(model, levelMapping[level], "bgColor", ...arguments_), ansi_styles_default.bgColor.close, this[STYLER]);
4872
- return createBuilder(this, styler, this[IS_EMPTY]);
4873
- };
5831
+ updateRun(run.id, { status: "running", total: scenarios.length });
5832
+ (async () => {
5833
+ const results = [];
5834
+ try {
5835
+ if (parallel <= 1) {
5836
+ for (const scenario of scenarios) {
5837
+ const result = await runSingleScenario(scenario, run.id, options);
5838
+ results.push(result);
5839
+ }
5840
+ } else {
5841
+ const queue = [...scenarios];
5842
+ const running = [];
5843
+ const processNext = async () => {
5844
+ const scenario = queue.shift();
5845
+ if (!scenario)
5846
+ return;
5847
+ const result = await runSingleScenario(scenario, run.id, options);
5848
+ results.push(result);
5849
+ await processNext();
5850
+ };
5851
+ const workers = Math.min(parallel, scenarios.length);
5852
+ for (let i = 0;i < workers; i++) {
5853
+ running.push(processNext());
5854
+ }
5855
+ await Promise.all(running);
5856
+ }
5857
+ const passed = results.filter((r) => r.status === "passed").length;
5858
+ const failed = results.filter((r) => r.status === "failed" || r.status === "error").length;
5859
+ updateRun(run.id, {
5860
+ status: failed > 0 ? "failed" : "passed",
5861
+ passed,
5862
+ failed,
5863
+ total: scenarios.length,
5864
+ finished_at: new Date().toISOString()
5865
+ });
5866
+ emit({ type: "run:complete", runId: run.id });
5867
+ const asyncRun = getRun(run.id);
5868
+ if (asyncRun)
5869
+ dispatchWebhooks(asyncRun.status === "failed" ? "failed" : "completed", asyncRun).catch(() => {});
5870
+ } catch (error) {
5871
+ const errorMsg = error instanceof Error ? error.message : String(error);
5872
+ updateRun(run.id, {
5873
+ status: "failed",
5874
+ finished_at: new Date().toISOString()
5875
+ });
5876
+ emit({ type: "run:complete", runId: run.id, error: errorMsg });
5877
+ const failedRun = getRun(run.id);
5878
+ if (failedRun)
5879
+ dispatchWebhooks("failed", failedRun).catch(() => {});
4874
5880
  }
4875
- };
5881
+ })();
5882
+ return { runId: run.id, scenarioCount: scenarios.length };
4876
5883
  }
4877
- var proto = Object.defineProperties(() => {}, {
4878
- ...styles2,
4879
- level: {
4880
- enumerable: true,
4881
- get() {
4882
- return this[GENERATOR].level;
4883
- },
4884
- set(level) {
4885
- this[GENERATOR].level = level;
4886
- }
4887
- }
4888
- });
4889
- var createStyler = (open, close, parent) => {
4890
- let openAll;
4891
- let closeAll;
4892
- if (parent === undefined) {
4893
- openAll = open;
4894
- closeAll = close;
4895
- } else {
4896
- openAll = parent.openAll + open;
4897
- closeAll = close + parent.closeAll;
4898
- }
4899
- return {
4900
- open,
4901
- close,
4902
- openAll,
4903
- closeAll,
4904
- parent
5884
+ function estimateCost(model, tokens) {
5885
+ const costs = {
5886
+ "claude-haiku-4-5-20251001": 0.1,
5887
+ "claude-sonnet-4-6-20260311": 0.9,
5888
+ "claude-opus-4-6-20260311": 3
4905
5889
  };
4906
- };
4907
- var createBuilder = (self, _styler, _isEmpty) => {
4908
- const builder = (...arguments_) => applyStyle(builder, arguments_.length === 1 ? "" + arguments_[0] : arguments_.join(" "));
4909
- Object.setPrototypeOf(builder, proto);
4910
- builder[GENERATOR] = self;
4911
- builder[STYLER] = _styler;
4912
- builder[IS_EMPTY] = _isEmpty;
4913
- return builder;
4914
- };
4915
- var applyStyle = (self, string) => {
4916
- if (self.level <= 0 || !string) {
4917
- return self[IS_EMPTY] ? "" : string;
4918
- }
4919
- let styler = self[STYLER];
4920
- if (styler === undefined) {
4921
- return string;
4922
- }
4923
- const { openAll, closeAll } = styler;
4924
- if (string.includes("\x1B")) {
4925
- while (styler !== undefined) {
4926
- string = stringReplaceAll(string, styler.close, styler.open);
4927
- styler = styler.parent;
4928
- }
4929
- }
4930
- const lfIndex = string.indexOf(`
4931
- `);
4932
- if (lfIndex !== -1) {
4933
- string = stringEncaseCRLFWithFirstIndex(string, closeAll, openAll, lfIndex);
4934
- }
4935
- return openAll + string + closeAll;
4936
- };
4937
- Object.defineProperties(createChalk.prototype, styles2);
4938
- var chalk = createChalk();
4939
- var chalkStderr = createChalk({ level: stderrColor ? stderrColor.level : 0 });
4940
- var source_default = chalk;
4941
-
5890
+ const costPer1M = costs[model] ?? 0.5;
5891
+ return tokens / 1e6 * costPer1M * 100;
5892
+ }
4942
5893
  // src/lib/reporter.ts
4943
5894
  init_database();
4944
5895
  function useEmoji() {
@@ -5148,7 +6099,8 @@ function formatScenarioList(scenarios) {
5148
6099
  lastStatusIcon = source_default.dim("~");
5149
6100
  passRateStr = stats.passRate === "\u2014" ? source_default.dim("\u2014") : source_default.dim(stats.passRate);
5150
6101
  }
5151
- lines.push(` ${source_default.cyan(s.shortId)} ${s.name} ${priorityColor(s.priority)}${tags} ${lastStatusIcon} ${passRateStr}`);
6102
+ const flakinessStr = s.flakinessScore !== null && s.flakinessScore !== undefined && s.flakinessScore < 0.8 ? source_default.yellow(` \u26A1 flaky (${Math.round(s.flakinessScore * 100)}%)`) : "";
6103
+ lines.push(` ${source_default.cyan(s.shortId)} ${s.name} ${priorityColor(s.priority)}${tags}${flakinessStr} ${lastStatusIcon} ${passRateStr}`);
5152
6104
  }
5153
6105
  lines.push("");
5154
6106
  return lines.join(`
@@ -5402,11 +6354,11 @@ class Scheduler {
5402
6354
  }
5403
6355
  }
5404
6356
  // src/lib/init.ts
5405
- import { existsSync as existsSync5, readFileSync as readFileSync2, writeFileSync as writeFileSync2, mkdirSync as mkdirSync3 } from "fs";
5406
- import { join as join5, basename } from "path";
5407
- import { homedir as homedir5 } from "os";
6357
+ import { existsSync as existsSync5, readFileSync as readFileSync2, writeFileSync as writeFileSync2, mkdirSync as mkdirSync4 } from "fs";
6358
+ import { join as join6, basename } from "path";
6359
+ import { homedir as homedir6 } from "os";
5408
6360
  function detectFramework(dir) {
5409
- const pkgPath = join5(dir, "package.json");
6361
+ const pkgPath = join6(dir, "package.json");
5410
6362
  if (!existsSync5(pkgPath))
5411
6363
  return null;
5412
6364
  let pkg;
@@ -5626,10 +6578,10 @@ function initProject(options) {
5626
6578
  const project = ensureProject(name, projectPath);
5627
6579
  const starterInputs = getStarterScenarios(framework ?? { name: "Unknown", features: [] }, project.id);
5628
6580
  const scenarios = starterInputs.map((input) => createScenario(input));
5629
- const configDir = join5(homedir5(), ".testers");
5630
- const configPath = join5(configDir, "config.json");
6581
+ const configDir = join6(homedir6(), ".testers");
6582
+ const configPath = join6(configDir, "config.json");
5631
6583
  if (!existsSync5(configDir)) {
5632
- mkdirSync3(configDir, { recursive: true });
6584
+ mkdirSync4(configDir, { recursive: true });
5633
6585
  }
5634
6586
  let config = {};
5635
6587
  if (existsSync5(configPath)) {
@@ -6276,179 +7228,6 @@ function generateLatestReport() {
6276
7228
  throw new Error("No runs found");
6277
7229
  return generateHtmlReport(runs[0].id);
6278
7230
  }
6279
- // src/lib/costs.ts
6280
- init_database();
6281
- init_config();
6282
- function getDateFilter(period) {
6283
- switch (period) {
6284
- case "day":
6285
- return "AND r.created_at >= date('now', 'start of day')";
6286
- case "week":
6287
- return "AND r.created_at >= date('now', '-7 days')";
6288
- case "month":
6289
- return "AND r.created_at >= date('now', '-30 days')";
6290
- case "all":
6291
- return "";
6292
- }
6293
- }
6294
- function getPeriodDays(period) {
6295
- switch (period) {
6296
- case "day":
6297
- return 1;
6298
- case "week":
6299
- return 7;
6300
- case "month":
6301
- return 30;
6302
- case "all":
6303
- return 30;
6304
- }
6305
- }
6306
- function loadBudgetConfig() {
6307
- const config = loadConfig();
6308
- const budget = config.budget;
6309
- return {
6310
- maxPerRunCents: budget?.maxPerRunCents ?? 50,
6311
- maxPerDayCents: budget?.maxPerDayCents ?? 500,
6312
- warnAtPercent: budget?.warnAtPercent ?? 0.8
6313
- };
6314
- }
6315
- function getCostSummary(options) {
6316
- const db2 = getDatabase();
6317
- const period = options?.period ?? "month";
6318
- const projectId = options?.projectId;
6319
- const dateFilter = getDateFilter(period);
6320
- const projectFilter = projectId ? "AND ru.project_id = ?" : "";
6321
- const projectParams = projectId ? [projectId] : [];
6322
- const totalsRow = db2.query(`SELECT
6323
- COALESCE(SUM(r.cost_cents), 0) as total_cost,
6324
- COALESCE(SUM(r.tokens_used), 0) as total_tokens,
6325
- COUNT(DISTINCT r.run_id) as run_count
6326
- FROM results r
6327
- JOIN runs ru ON r.run_id = ru.id
6328
- WHERE 1=1 ${dateFilter} ${projectFilter}`).get(...projectParams);
6329
- const modelRows = db2.query(`SELECT
6330
- r.model,
6331
- COALESCE(SUM(r.cost_cents), 0) as cost_cents,
6332
- COALESCE(SUM(r.tokens_used), 0) as tokens,
6333
- COUNT(DISTINCT r.run_id) as runs
6334
- FROM results r
6335
- JOIN runs ru ON r.run_id = ru.id
6336
- WHERE 1=1 ${dateFilter} ${projectFilter}
6337
- GROUP BY r.model
6338
- ORDER BY cost_cents DESC`).all(...projectParams);
6339
- const byModel = {};
6340
- for (const row of modelRows) {
6341
- byModel[row.model] = {
6342
- costCents: row.cost_cents,
6343
- tokens: row.tokens,
6344
- runs: row.runs
6345
- };
6346
- }
6347
- const scenarioRows = db2.query(`SELECT
6348
- r.scenario_id,
6349
- COALESCE(s.name, r.scenario_id) as name,
6350
- COALESCE(SUM(r.cost_cents), 0) as cost_cents,
6351
- COALESCE(SUM(r.tokens_used), 0) as tokens,
6352
- COUNT(DISTINCT r.run_id) as runs
6353
- FROM results r
6354
- JOIN runs ru ON r.run_id = ru.id
6355
- LEFT JOIN scenarios s ON r.scenario_id = s.id
6356
- WHERE 1=1 ${dateFilter} ${projectFilter}
6357
- GROUP BY r.scenario_id
6358
- ORDER BY cost_cents DESC
6359
- LIMIT 10`).all(...projectParams);
6360
- const byScenario = scenarioRows.map((row) => ({
6361
- scenarioId: row.scenario_id,
6362
- name: row.name,
6363
- costCents: row.cost_cents,
6364
- tokens: row.tokens,
6365
- runs: row.runs
6366
- }));
6367
- const runCount = totalsRow.run_count;
6368
- const avgCostPerRun = runCount > 0 ? totalsRow.total_cost / runCount : 0;
6369
- const periodDays = getPeriodDays(period);
6370
- const estimatedMonthlyCents = periodDays > 0 ? totalsRow.total_cost / periodDays * 30 : 0;
6371
- return {
6372
- period,
6373
- totalCostCents: totalsRow.total_cost,
6374
- totalTokens: totalsRow.total_tokens,
6375
- runCount,
6376
- byModel,
6377
- byScenario,
6378
- avgCostPerRun,
6379
- estimatedMonthlyCents
6380
- };
6381
- }
6382
- function checkBudget(estimatedCostCents) {
6383
- const budget = loadBudgetConfig();
6384
- if (estimatedCostCents > budget.maxPerRunCents) {
6385
- return {
6386
- allowed: false,
6387
- warning: `Estimated cost (${formatDollars(estimatedCostCents)}) exceeds per-run limit (${formatDollars(budget.maxPerRunCents)})`
6388
- };
6389
- }
6390
- const todaySummary = getCostSummary({ period: "day" });
6391
- const projectedDaily = todaySummary.totalCostCents + estimatedCostCents;
6392
- if (projectedDaily > budget.maxPerDayCents) {
6393
- return {
6394
- allowed: false,
6395
- warning: `Daily spending (${formatDollars(todaySummary.totalCostCents)}) + this run (${formatDollars(estimatedCostCents)}) would exceed daily limit (${formatDollars(budget.maxPerDayCents)})`
6396
- };
6397
- }
6398
- if (projectedDaily > budget.maxPerDayCents * budget.warnAtPercent) {
6399
- return {
6400
- allowed: true,
6401
- warning: `Approaching daily limit: ${formatDollars(projectedDaily)} of ${formatDollars(budget.maxPerDayCents)} (${Math.round(projectedDaily / budget.maxPerDayCents * 100)}%)`
6402
- };
6403
- }
6404
- return { allowed: true };
6405
- }
6406
- function formatDollars(cents) {
6407
- return `$${(cents / 100).toFixed(2)}`;
6408
- }
6409
- function formatTokens(tokens) {
6410
- if (tokens >= 1e6)
6411
- return `${(tokens / 1e6).toFixed(1)}M`;
6412
- if (tokens >= 1000)
6413
- return `${(tokens / 1000).toFixed(1)}K`;
6414
- return String(tokens);
6415
- }
6416
- function formatCostsTerminal(summary) {
6417
- const lines = [];
6418
- lines.push("");
6419
- lines.push(source_default.bold(` Cost Summary (${summary.period})`));
6420
- lines.push("");
6421
- lines.push(` Total: ${source_default.yellow(formatDollars(summary.totalCostCents))} (${formatTokens(summary.totalTokens)} tokens across ${summary.runCount} runs)`);
6422
- lines.push(` Avg/run: ${source_default.yellow(formatDollars(summary.avgCostPerRun))}`);
6423
- lines.push(` Est/month: ${source_default.yellow(formatDollars(summary.estimatedMonthlyCents))}`);
6424
- const modelEntries = Object.entries(summary.byModel);
6425
- if (modelEntries.length > 0) {
6426
- lines.push("");
6427
- lines.push(source_default.bold(" By Model"));
6428
- lines.push(` ${"Model".padEnd(40)} ${"Cost".padEnd(12)} ${"Tokens".padEnd(12)} Runs`);
6429
- lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(12)} ${"\u2500".repeat(12)} ${"\u2500".repeat(6)}`);
6430
- for (const [model, data] of modelEntries) {
6431
- lines.push(` ${model.padEnd(40)} ${formatDollars(data.costCents).padEnd(12)} ${formatTokens(data.tokens).padEnd(12)} ${data.runs}`);
6432
- }
6433
- }
6434
- if (summary.byScenario.length > 0) {
6435
- lines.push("");
6436
- lines.push(source_default.bold(" Scenarios by Cost (most expensive first)"));
6437
- lines.push(` ${"Scenario".padEnd(40)} ${"Total Cost".padEnd(12)} ${"Avg/Run".padEnd(12)} ${"Runs".padEnd(6)} Tokens`);
6438
- lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(12)} ${"\u2500".repeat(12)} ${"\u2500".repeat(6)} ${"\u2500".repeat(10)}`);
6439
- for (const s of summary.byScenario) {
6440
- const label = s.name.length > 38 ? s.name.slice(0, 35) + "..." : s.name;
6441
- const avgPerRun = s.runs > 0 ? s.costCents / s.runs : 0;
6442
- lines.push(` ${label.padEnd(40)} ${formatDollars(s.costCents).padEnd(12)} ${formatDollars(avgPerRun).padEnd(12)} ${String(s.runs).padEnd(6)} ${formatTokens(s.tokens)}`);
6443
- }
6444
- }
6445
- lines.push("");
6446
- return lines.join(`
6447
- `);
6448
- }
6449
- function formatCostsJSON(summary) {
6450
- return JSON.stringify(summary, null, 2);
6451
- }
6452
7231
  // src/lib/watch.ts
6453
7232
  import { watch } from "fs";
6454
7233
  import { resolve } from "path";