@hasna/testers 0.0.15 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/LICENSE +170 -21
  2. package/dashboard/dist/assets/{index-BSYf1bIR.css → index-CQzkimyO.css} +1 -1
  3. package/dashboard/dist/index.html +2 -2
  4. package/dist/cli/index.js +2043 -818
  5. package/dist/db/database.d.ts.map +1 -1
  6. package/dist/db/personas.d.ts +8 -0
  7. package/dist/db/personas.d.ts.map +1 -1
  8. package/dist/db/results.d.ts +2 -1
  9. package/dist/db/results.d.ts.map +1 -1
  10. package/dist/db/scenarios.d.ts +1 -0
  11. package/dist/db/scenarios.d.ts.map +1 -1
  12. package/dist/db/seed-personas.d.ts +15 -0
  13. package/dist/db/seed-personas.d.ts.map +1 -0
  14. package/dist/index.d.ts +1 -1
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +2220 -1441
  17. package/dist/lib/ai-client.d.ts +7 -8
  18. package/dist/lib/ai-client.d.ts.map +1 -1
  19. package/dist/lib/browser-bun.d.ts +153 -0
  20. package/dist/lib/browser-bun.d.ts.map +1 -0
  21. package/dist/lib/browser.d.ts +1 -1
  22. package/dist/lib/browser.d.ts.map +1 -1
  23. package/dist/lib/config.d.ts.map +1 -1
  24. package/dist/lib/costs.d.ts +5 -0
  25. package/dist/lib/costs.d.ts.map +1 -1
  26. package/dist/lib/failure-analyzer.d.ts +7 -0
  27. package/dist/lib/failure-analyzer.d.ts.map +1 -0
  28. package/dist/lib/failure-explainer.d.ts +17 -0
  29. package/dist/lib/failure-explainer.d.ts.map +1 -0
  30. package/dist/lib/failure-pipeline.d.ts +11 -0
  31. package/dist/lib/failure-pipeline.d.ts.map +1 -1
  32. package/dist/lib/hybrid-runner.d.ts +100 -0
  33. package/dist/lib/hybrid-runner.d.ts.map +1 -0
  34. package/dist/lib/judge.d.ts +1 -1
  35. package/dist/lib/judge.d.ts.map +1 -1
  36. package/dist/lib/reporter.d.ts +2 -0
  37. package/dist/lib/reporter.d.ts.map +1 -1
  38. package/dist/lib/runner.d.ts +5 -1
  39. package/dist/lib/runner.d.ts.map +1 -1
  40. package/dist/lib/screenshotter.d.ts.map +1 -1
  41. package/dist/mcp/index.js +8580 -6403
  42. package/dist/server/index.js +1082 -154
  43. package/dist/types/index.d.ts +60 -2
  44. package/dist/types/index.d.ts.map +1 -1
  45. package/package.json +4 -4
  46. package/dist/cli/index.d.ts +0 -3
  47. package/dist/cli/index.d.ts.map +0 -1
  48. package/dist/mcp/index.d.ts +0 -3
  49. package/dist/mcp/index.d.ts.map +0 -1
  50. /package/dashboard/dist/assets/{index-Bdn52878.js → index-D52SWwDa.js} +0 -0
package/dist/cli/index.js CHANGED
@@ -6,39 +6,60 @@ var __defProp = Object.defineProperty;
6
6
  var __getOwnPropNames = Object.getOwnPropertyNames;
7
7
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
8
8
  var __hasOwnProp = Object.prototype.hasOwnProperty;
9
+ function __accessProp(key) {
10
+ return this[key];
11
+ }
12
+ var __toESMCache_node;
13
+ var __toESMCache_esm;
9
14
  var __toESM = (mod, isNodeMode, target) => {
15
+ var canCache = mod != null && typeof mod === "object";
16
+ if (canCache) {
17
+ var cache = isNodeMode ? __toESMCache_node ??= new WeakMap : __toESMCache_esm ??= new WeakMap;
18
+ var cached = cache.get(mod);
19
+ if (cached)
20
+ return cached;
21
+ }
10
22
  target = mod != null ? __create(__getProtoOf(mod)) : {};
11
23
  const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
12
24
  for (let key of __getOwnPropNames(mod))
13
25
  if (!__hasOwnProp.call(to, key))
14
26
  __defProp(to, key, {
15
- get: () => mod[key],
27
+ get: __accessProp.bind(mod, key),
16
28
  enumerable: true
17
29
  });
30
+ if (canCache)
31
+ cache.set(mod, to);
18
32
  return to;
19
33
  };
20
- var __moduleCache = /* @__PURE__ */ new WeakMap;
21
34
  var __toCommonJS = (from) => {
22
- var entry = __moduleCache.get(from), desc;
35
+ var entry = (__moduleCache ??= new WeakMap).get(from), desc;
23
36
  if (entry)
24
37
  return entry;
25
38
  entry = __defProp({}, "__esModule", { value: true });
26
- if (from && typeof from === "object" || typeof from === "function")
27
- __getOwnPropNames(from).map((key) => !__hasOwnProp.call(entry, key) && __defProp(entry, key, {
28
- get: () => from[key],
29
- enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
30
- }));
39
+ if (from && typeof from === "object" || typeof from === "function") {
40
+ for (var key of __getOwnPropNames(from))
41
+ if (!__hasOwnProp.call(entry, key))
42
+ __defProp(entry, key, {
43
+ get: __accessProp.bind(from, key),
44
+ enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
45
+ });
46
+ }
31
47
  __moduleCache.set(from, entry);
32
48
  return entry;
33
49
  };
50
+ var __moduleCache;
34
51
  var __commonJS = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
52
+ var __returnValue = (v) => v;
53
+ function __exportSetter(name, newValue) {
54
+ this[name] = __returnValue.bind(null, newValue);
55
+ }
35
56
  var __export = (target, all) => {
36
57
  for (var name in all)
37
58
  __defProp(target, name, {
38
59
  get: all[name],
39
60
  enumerable: true,
40
61
  configurable: true,
41
- set: (newValue) => all[name] = () => newValue
62
+ set: __exportSetter.bind(all, name)
42
63
  });
43
64
  };
44
65
  var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
@@ -2122,9 +2143,12 @@ function scenarioFromRow(row) {
2122
2143
  assertions: JSON.parse(row.assertions || "[]"),
2123
2144
  personaId: row.persona_id ?? null,
2124
2145
  scenarioType: row.scenario_type ?? "browser",
2146
+ requiredRole: row.required_role ?? null,
2125
2147
  version: row.version,
2126
2148
  createdAt: row.created_at,
2127
- updatedAt: row.updated_at
2149
+ updatedAt: row.updated_at,
2150
+ lastPassedAt: row.last_passed_at ?? null,
2151
+ lastPassedUrl: row.last_passed_url ?? null
2128
2152
  };
2129
2153
  }
2130
2154
  function runFromRow(row) {
@@ -2164,7 +2188,8 @@ function resultFromRow(row) {
2164
2188
  metadata: row.metadata ? JSON.parse(row.metadata) : null,
2165
2189
  createdAt: row.created_at,
2166
2190
  personaId: row.persona_id ?? null,
2167
- personaName: row.persona_name ?? null
2191
+ personaName: row.persona_name ?? null,
2192
+ failureAnalysis: row.failure_analysis ? JSON.parse(row.failure_analysis) : null
2168
2193
  };
2169
2194
  }
2170
2195
  function screenshotFromRow(row) {
@@ -2232,6 +2257,7 @@ function flowFromRow(row) {
2232
2257
  };
2233
2258
  }
2234
2259
  function personaFromRow(row) {
2260
+ const hasAuth = row.auth_email && row.auth_password;
2235
2261
  return {
2236
2262
  id: row.id,
2237
2263
  shortId: row.short_id,
@@ -2240,13 +2266,23 @@ function personaFromRow(row) {
2240
2266
  description: row.description,
2241
2267
  role: row.role,
2242
2268
  instructions: row.instructions,
2243
- traits: JSON.parse(row.traits),
2244
- goals: JSON.parse(row.goals),
2269
+ traits: JSON.parse(row.traits || "[]"),
2270
+ goals: JSON.parse(row.goals || "[]"),
2271
+ behaviors: JSON.parse(row.behaviors || "[]"),
2272
+ expertiseLevel: row.expertise_level || "intermediate",
2273
+ demographics: JSON.parse(row.demographics || "{}"),
2274
+ painPoints: JSON.parse(row.pain_points || "[]"),
2245
2275
  metadata: row.metadata ? JSON.parse(row.metadata) : null,
2246
2276
  enabled: row.enabled === 1,
2247
2277
  version: row.version,
2248
2278
  createdAt: row.created_at,
2249
- updatedAt: row.updated_at
2279
+ updatedAt: row.updated_at,
2280
+ auth: hasAuth ? {
2281
+ email: row.auth_email,
2282
+ password: row.auth_password,
2283
+ loginPath: row.auth_login_path ?? "/login",
2284
+ cookies: row.auth_cookies ? JSON.parse(row.auth_cookies) : null
2285
+ } : null
2250
2286
  };
2251
2287
  }
2252
2288
  function apiCheckFromRow(row) {
@@ -2288,12 +2324,14 @@ function apiCheckResultFromRow(row) {
2288
2324
  createdAt: row.created_at
2289
2325
  };
2290
2326
  }
2291
- var MODEL_MAP, VersionConflictError, BrowserError, AIClientError, TodosConnectionError, ScheduleNotFoundError, DependencyCycleError;
2327
+ var MODEL_MAP, VersionConflictError, BrowserError, AIClientError, TodosConnectionError, ScheduleNotFoundError, BudgetExceededError, DependencyCycleError;
2292
2328
  var init_types = __esm(() => {
2293
2329
  MODEL_MAP = {
2294
2330
  quick: "claude-haiku-4-5-20251001",
2295
2331
  thorough: "claude-sonnet-4-6-20260311",
2296
- deep: "claude-opus-4-6-20260311"
2332
+ deep: "claude-opus-4-6-20260311",
2333
+ "cerebras-fast": "llama-3.1-8b",
2334
+ "cerebras-smart": "llama-3.3-70b"
2297
2335
  };
2298
2336
  VersionConflictError = class VersionConflictError extends Error {
2299
2337
  constructor(entity, id) {
@@ -2325,6 +2363,12 @@ var init_types = __esm(() => {
2325
2363
  this.name = "ScheduleNotFoundError";
2326
2364
  }
2327
2365
  };
2366
+ BudgetExceededError = class BudgetExceededError extends Error {
2367
+ constructor(estimatedCents, capCents) {
2368
+ super(`Estimated run cost ($${(estimatedCents / 100).toFixed(2)}) exceeds budget cap ($${(capCents / 100).toFixed(2)}). Pass skipBudgetCheck: true to override.`);
2369
+ this.name = "BudgetExceededError";
2370
+ }
2371
+ };
2328
2372
  DependencyCycleError = class DependencyCycleError extends Error {
2329
2373
  constructor(scenarioId, dependsOn) {
2330
2374
  super(`Adding dependency ${dependsOn} to ${scenarioId} would create a cycle`);
@@ -2734,6 +2778,26 @@ CREATE TABLE IF NOT EXISTS golden_check_results (
2734
2778
  CREATE INDEX IF NOT EXISTS idx_golden_project ON golden_answers(project_id);
2735
2779
  CREATE INDEX IF NOT EXISTS idx_golden_enabled ON golden_answers(enabled);
2736
2780
  CREATE INDEX IF NOT EXISTS idx_golden_results_golden ON golden_check_results(golden_id);
2781
+ `,
2782
+ `
2783
+ ALTER TABLE results ADD COLUMN failure_analysis TEXT;
2784
+ `,
2785
+ `
2786
+ ALTER TABLE personas ADD COLUMN behaviors TEXT DEFAULT '[]';
2787
+ ALTER TABLE personas ADD COLUMN expertise_level TEXT DEFAULT 'intermediate';
2788
+ ALTER TABLE personas ADD COLUMN demographics TEXT DEFAULT '{}';
2789
+ ALTER TABLE personas ADD COLUMN pain_points TEXT DEFAULT '[]';
2790
+ `,
2791
+ `
2792
+ ALTER TABLE scenarios ADD COLUMN last_passed_at TEXT;
2793
+ ALTER TABLE scenarios ADD COLUMN last_passed_url TEXT;
2794
+ `,
2795
+ `
2796
+ ALTER TABLE personas ADD COLUMN auth_email TEXT;
2797
+ ALTER TABLE personas ADD COLUMN auth_password TEXT;
2798
+ ALTER TABLE personas ADD COLUMN auth_login_path TEXT DEFAULT '/login';
2799
+ ALTER TABLE personas ADD COLUMN auth_cookies TEXT;
2800
+ ALTER TABLE scenarios ADD COLUMN required_role TEXT;
2737
2801
  `
2738
2802
  ];
2739
2803
  });
@@ -2741,6 +2805,7 @@ CREATE INDEX IF NOT EXISTS idx_golden_results_golden ON golden_check_results(gol
2741
2805
  // src/db/scenarios.ts
2742
2806
  var exports_scenarios = {};
2743
2807
  __export(exports_scenarios, {
2808
+ updateScenarioPassedCache: () => updateScenarioPassedCache,
2744
2809
  updateScenario: () => updateScenario,
2745
2810
  listScenarios: () => listScenarios,
2746
2811
  getScenarioByShortId: () => getScenarioByShortId,
@@ -2834,7 +2899,32 @@ function listScenarios(filter) {
2834
2899
  params.push(filter.offset);
2835
2900
  }
2836
2901
  const rows = db2.query(sql).all(...params);
2837
- return rows.map(scenarioFromRow);
2902
+ const scenarios = rows.map(scenarioFromRow);
2903
+ if (scenarios.length === 0)
2904
+ return scenarios;
2905
+ const scenarioIds = scenarios.map((s) => s.id);
2906
+ const placeholders = scenarioIds.map(() => "?").join(",");
2907
+ const statsRows = db2.query(`
2908
+ SELECT scenario_id,
2909
+ COUNT(*) as total,
2910
+ SUM(CASE WHEN status = 'passed' THEN 1 ELSE 0 END) as passed
2911
+ FROM (
2912
+ SELECT scenario_id, status
2913
+ FROM results
2914
+ WHERE scenario_id IN (${placeholders})
2915
+ ORDER BY created_at DESC
2916
+ )
2917
+ GROUP BY scenario_id
2918
+ `).all(...scenarioIds);
2919
+ const statsMap = new Map(statsRows.map((r) => [r.scenario_id, r]));
2920
+ return scenarios.map((s) => {
2921
+ const stats = statsMap.get(s.id);
2922
+ return {
2923
+ ...s,
2924
+ flakinessScore: stats ? stats.passed / stats.total : null,
2925
+ recentRunCount: stats?.total ?? 0
2926
+ };
2927
+ });
2838
2928
  }
2839
2929
  function updateScenario(id, input, version) {
2840
2930
  const db2 = getDatabase();
@@ -2955,6 +3045,10 @@ function findStaleScenarios(days) {
2955
3045
  lastRunAt: row.last_run_at
2956
3046
  }));
2957
3047
  }
3048
+ function updateScenarioPassedCache(id, url) {
3049
+ const db2 = getDatabase();
3050
+ db2.query("UPDATE scenarios SET last_passed_at = ?, last_passed_url = ? WHERE id = ?").run(now(), url, id);
3051
+ }
2958
3052
  function deleteScenario(id) {
2959
3053
  const db2 = getDatabase();
2960
3054
  const scenario = getScenario(id);
@@ -3195,6 +3289,10 @@ function updateResult(id, updates) {
3195
3289
  sets.push("metadata = ?");
3196
3290
  params.push(JSON.stringify(updates.metadata));
3197
3291
  }
3292
+ if (updates.failureAnalysis !== undefined) {
3293
+ sets.push("failure_analysis = ?");
3294
+ params.push(updates.failureAnalysis !== null ? JSON.stringify(updates.failureAnalysis) : null);
3295
+ }
3198
3296
  if (sets.length === 0) {
3199
3297
  return existing;
3200
3298
  }
@@ -3398,6 +3496,412 @@ var init_browser_lightpanda = __esm(() => {
3398
3496
  init_types();
3399
3497
  });
3400
3498
 
3499
+ // src/lib/browser-bun.ts
3500
+ var exports_browser_bun = {};
3501
+ __export(exports_browser_bun, {
3502
+ isBunWebViewAvailable: () => isBunWebViewAvailable,
3503
+ BunWebViewSession: () => BunWebViewSession
3504
+ });
3505
+ import { join as join2 } from "path";
3506
+ import { mkdirSync as mkdirSync2 } from "fs";
3507
+ import { homedir as homedir2 } from "os";
3508
+ function isBunWebViewAvailable() {
3509
+ return typeof globalThis.Bun !== "undefined" && typeof globalThis.Bun.WebView !== "undefined";
3510
+ }
3511
+ function getProfileDir(profileName) {
3512
+ const base = process.env["TESTERS_BROWSER_DATA_DIR"] ?? join2(homedir2(), ".testers", "browser");
3513
+ const dir = join2(base, "profiles", profileName);
3514
+ mkdirSync2(dir, { recursive: true });
3515
+ return dir;
3516
+ }
3517
+ var BunWebViewSession;
3518
+ var init_browser_bun = __esm(() => {
3519
+ BunWebViewSession = class BunWebViewSession {
3520
+ view;
3521
+ _sessionId;
3522
+ _eventListeners = new Map;
3523
+ constructor(opts = {}) {
3524
+ if (!isBunWebViewAvailable()) {
3525
+ throw new Error("Bun.WebView is not available. Install Bun canary: bun upgrade --canary");
3526
+ }
3527
+ const BunWebView = globalThis.Bun.WebView;
3528
+ const constructorOpts = {
3529
+ width: opts.width ?? 1280,
3530
+ height: opts.height ?? 720
3531
+ };
3532
+ if (opts.profile) {
3533
+ constructorOpts.dataStore = { directory: getProfileDir(opts.profile) };
3534
+ } else {
3535
+ constructorOpts.dataStore = "ephemeral";
3536
+ }
3537
+ if (opts.onConsole) {
3538
+ constructorOpts.console = opts.onConsole;
3539
+ }
3540
+ this.view = new BunWebView(constructorOpts);
3541
+ this.view.onNavigated = (url) => {
3542
+ this._emit("navigated", url);
3543
+ };
3544
+ this.view.onNavigationFailed = (error) => {
3545
+ this._emit("navigationfailed", error);
3546
+ };
3547
+ }
3548
+ async goto(url, opts) {
3549
+ await this.view.navigate(url);
3550
+ await new Promise((r) => setTimeout(r, 200));
3551
+ }
3552
+ async goBack() {
3553
+ await this.view.goBack();
3554
+ }
3555
+ async goForward() {
3556
+ await this.view.goForward();
3557
+ }
3558
+ async reload() {
3559
+ await this.view.reload();
3560
+ }
3561
+ async evaluate(fnOrExpr, ...args) {
3562
+ let expr;
3563
+ if (typeof fnOrExpr === "function") {
3564
+ const serializedArgs = args.map((a) => JSON.stringify(a)).join(", ");
3565
+ expr = `(${fnOrExpr.toString()})(${serializedArgs})`;
3566
+ } else {
3567
+ expr = fnOrExpr;
3568
+ }
3569
+ return this.view.evaluate(expr);
3570
+ }
3571
+ async screenshot(opts) {
3572
+ const uint8 = await this.view.screenshot();
3573
+ return Buffer.from(uint8);
3574
+ }
3575
+ async click(selector, opts) {
3576
+ await this.view.click(selector, opts ? { button: opts.button } : undefined);
3577
+ }
3578
+ async type(selector, text, opts) {
3579
+ try {
3580
+ await this.view.click(selector);
3581
+ } catch {}
3582
+ await this.view.type(text);
3583
+ }
3584
+ async fill(selector, value) {
3585
+ await this.view.evaluate(`
3586
+ (() => {
3587
+ const el = document.querySelector(${JSON.stringify(selector)});
3588
+ if (el) { el.value = ''; el.dispatchEvent(new Event('input')); }
3589
+ })()
3590
+ `);
3591
+ await this.type(selector, value);
3592
+ }
3593
+ async press(key, opts) {
3594
+ await this.view.press(key, opts);
3595
+ }
3596
+ async scroll(direction, amount) {
3597
+ const dx = direction === "left" ? -amount : direction === "right" ? amount : 0;
3598
+ const dy = direction === "up" ? -amount : direction === "down" ? amount : 0;
3599
+ await this.view.scroll(dx, dy);
3600
+ }
3601
+ async scrollIntoView(selector) {
3602
+ await this.view.scrollTo(selector);
3603
+ }
3604
+ async hover(selector) {
3605
+ try {
3606
+ await this.view.scrollTo(selector);
3607
+ } catch {}
3608
+ }
3609
+ async resize(width, height) {
3610
+ await this.view.resize(width, height);
3611
+ }
3612
+ async $(selector) {
3613
+ const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
3614
+ if (!exists)
3615
+ return null;
3616
+ return {
3617
+ textContent: async () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`)
3618
+ };
3619
+ }
3620
+ async $$(selector) {
3621
+ const count = await this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)}).length`);
3622
+ return Array.from({ length: count }, (_, i) => ({
3623
+ textContent: async () => this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)})[${i}]?.textContent ?? null`)
3624
+ }));
3625
+ }
3626
+ async inputValue(selector) {
3627
+ return this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.value ?? ''`);
3628
+ }
3629
+ async isChecked(selector) {
3630
+ return this.view.evaluate(`!!(document.querySelector(${JSON.stringify(selector)})?.checked)`);
3631
+ }
3632
+ async isVisible(selector) {
3633
+ return this.view.evaluate(`
3634
+ (() => {
3635
+ const el = document.querySelector(${JSON.stringify(selector)});
3636
+ if (!el) return false;
3637
+ const style = window.getComputedStyle(el);
3638
+ return style.display !== 'none' && style.visibility !== 'hidden' && el.offsetWidth > 0;
3639
+ })()
3640
+ `);
3641
+ }
3642
+ async isEnabled(selector) {
3643
+ return this.view.evaluate(`!(document.querySelector(${JSON.stringify(selector)})?.disabled)`);
3644
+ }
3645
+ async selectOption(selector, value) {
3646
+ await this.view.evaluate(`
3647
+ (() => {
3648
+ const el = document.querySelector(${JSON.stringify(selector)});
3649
+ if (el) {
3650
+ el.value = ${JSON.stringify(value)};
3651
+ el.dispatchEvent(new Event('change'));
3652
+ }
3653
+ })()
3654
+ `);
3655
+ return [value];
3656
+ }
3657
+ async check(selector) {
3658
+ await this.view.evaluate(`
3659
+ (() => {
3660
+ const el = document.querySelector(${JSON.stringify(selector)});
3661
+ if (el && !el.checked) { el.checked = true; el.dispatchEvent(new Event('change')); }
3662
+ })()
3663
+ `);
3664
+ }
3665
+ async uncheck(selector) {
3666
+ await this.view.evaluate(`
3667
+ (() => {
3668
+ const el = document.querySelector(${JSON.stringify(selector)});
3669
+ if (el && el.checked) { el.checked = false; el.dispatchEvent(new Event('change')); }
3670
+ })()
3671
+ `);
3672
+ }
3673
+ async setInputFiles(selector, files) {
3674
+ throw new Error("File upload not supported in Bun.WebView engine. Use engine: 'playwright' instead.");
3675
+ }
3676
+ getByRole(role, opts) {
3677
+ const name = opts?.name?.toString() ?? "";
3678
+ const selector = name ? `[role="${role}"][aria-label*="${name}"], ${role}[aria-label*="${name}"]` : `[role="${role}"], ${role}`;
3679
+ return {
3680
+ click: (clickOpts) => this.click(selector, clickOpts),
3681
+ fill: (value) => this.fill(selector, value),
3682
+ check: () => this.check(selector),
3683
+ uncheck: () => this.uncheck(selector),
3684
+ isVisible: () => this.isVisible(selector),
3685
+ textContent: () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`),
3686
+ inputValue: () => this.inputValue(selector),
3687
+ first: () => ({
3688
+ click: (clickOpts) => this.click(selector, clickOpts),
3689
+ fill: (value) => this.fill(selector, value),
3690
+ textContent: () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`),
3691
+ isVisible: () => this.isVisible(selector),
3692
+ hover: () => this.hover(selector),
3693
+ boundingBox: async () => null,
3694
+ scrollIntoViewIfNeeded: () => this.scrollIntoView(selector),
3695
+ evaluate: (fn) => this.view.evaluate(`(${fn.toString()})(document.querySelector(${JSON.stringify(selector)}))`),
3696
+ waitFor: (opts2) => {
3697
+ return new Promise((resolve, reject) => {
3698
+ const timeout = opts2?.timeout ?? 1e4;
3699
+ const start = Date.now();
3700
+ const check = async () => {
3701
+ const visible = await this.isVisible(selector);
3702
+ if (visible)
3703
+ return resolve();
3704
+ if (Date.now() - start > timeout)
3705
+ return reject(new Error(`Timeout waiting for ${selector}`));
3706
+ setTimeout(check, 100);
3707
+ };
3708
+ check();
3709
+ });
3710
+ }
3711
+ }),
3712
+ count: async () => {
3713
+ const count = await this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)}).length`);
3714
+ return count;
3715
+ },
3716
+ nth: (n) => ({
3717
+ click: (clickOpts) => this.click(selector, clickOpts),
3718
+ textContent: () => this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)})[${n}]?.textContent ?? null`),
3719
+ isVisible: () => this.isVisible(selector)
3720
+ })
3721
+ };
3722
+ }
3723
+ getByText(text, opts) {
3724
+ const selector = opts?.exact ? `*:is(button, a, span, div, p, h1, h2, h3, h4, label)` : "*";
3725
+ return {
3726
+ first: () => ({
3727
+ click: async (clickOpts) => {
3728
+ await this.view.evaluate(`
3729
+ (() => {
3730
+ const text = ${JSON.stringify(text)};
3731
+ const all = document.querySelectorAll('*');
3732
+ for (const el of all) {
3733
+ if (el.children.length === 0 && el.textContent?.trim() === text) {
3734
+ el.click(); return;
3735
+ }
3736
+ }
3737
+ for (const el of all) {
3738
+ if (el.textContent?.includes(text)) { el.click(); return; }
3739
+ }
3740
+ })()
3741
+ `);
3742
+ },
3743
+ waitFor: (waitOpts) => {
3744
+ const timeout = waitOpts?.timeout ?? 1e4;
3745
+ return new Promise((resolve, reject) => {
3746
+ const start = Date.now();
3747
+ const check = async () => {
3748
+ const found = await this.view.evaluate(`document.body?.textContent?.includes(${JSON.stringify(text)})`);
3749
+ if (found)
3750
+ return resolve();
3751
+ if (Date.now() - start > timeout)
3752
+ return reject(new Error(`Timeout: text "${text}" not found`));
3753
+ setTimeout(check, 100);
3754
+ };
3755
+ check();
3756
+ });
3757
+ }
3758
+ })
3759
+ };
3760
+ }
3761
+ locator(selector) {
3762
+ return {
3763
+ click: (opts) => this.click(selector, opts),
3764
+ fill: (value) => this.fill(selector, value),
3765
+ scrollIntoViewIfNeeded: () => this.scrollIntoView(selector),
3766
+ first: () => this.getByRole("*").first(),
3767
+ evaluate: (fn) => this.view.evaluate(`(${fn.toString()})(document.querySelector(${JSON.stringify(selector)}))`),
3768
+ waitFor: (opts) => {
3769
+ const timeout = opts?.timeout ?? 1e4;
3770
+ return new Promise((resolve, reject) => {
3771
+ const start = Date.now();
3772
+ const check = async () => {
3773
+ const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
3774
+ if (exists)
3775
+ return resolve();
3776
+ if (Date.now() - start > timeout)
3777
+ return reject(new Error(`Timeout: ${selector}`));
3778
+ setTimeout(check, 100);
3779
+ };
3780
+ check();
3781
+ });
3782
+ }
3783
+ };
3784
+ }
3785
+ url() {
3786
+ return this.view.url;
3787
+ }
3788
+ async title() {
3789
+ return this.view.title || await this.evaluate("document.title");
3790
+ }
3791
+ viewportSize() {
3792
+ return { width: 1280, height: 720 };
3793
+ }
3794
+ async waitForLoadState(state, opts) {
3795
+ await new Promise((r) => setTimeout(r, 200));
3796
+ }
3797
+ async waitForURL(pattern, opts) {
3798
+ const timeout = opts?.timeout ?? 30000;
3799
+ const start = Date.now();
3800
+ while (Date.now() - start < timeout) {
3801
+ const url = this.view.url;
3802
+ const matches = pattern instanceof RegExp ? pattern.test(url) : url.includes(pattern);
3803
+ if (matches)
3804
+ return;
3805
+ await new Promise((r) => setTimeout(r, 100));
3806
+ }
3807
+ throw new Error(`Timeout waiting for URL to match ${pattern}`);
3808
+ }
3809
+ async waitForSelector(selector, opts) {
3810
+ const timeout = opts?.timeout ?? 1e4;
3811
+ const start = Date.now();
3812
+ while (Date.now() - start < timeout) {
3813
+ const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
3814
+ if (exists)
3815
+ return;
3816
+ await new Promise((r) => setTimeout(r, 100));
3817
+ }
3818
+ throw new Error(`Timeout waiting for ${selector}`);
3819
+ }
3820
+ async setContent(html) {
3821
+ await this.view.navigate(`data:text/html,${encodeURIComponent(html)}`);
3822
+ await new Promise((r) => setTimeout(r, 100));
3823
+ }
3824
+ async content() {
3825
+ return this.view.evaluate("document.documentElement.outerHTML");
3826
+ }
3827
+ async addInitScript(script) {
3828
+ const expr = typeof script === "function" ? `(${script.toString()})()` : script;
3829
+ await this.view.evaluate(expr);
3830
+ }
3831
+ keyboard = {
3832
+ press: (key) => this.view.press(key)
3833
+ };
3834
+ context() {
3835
+ return {
3836
+ close: async () => {
3837
+ await this.close();
3838
+ },
3839
+ newPage: async () => {
3840
+ throw new Error("Multi-tab not supported in Bun.WebView. Use engine: 'playwright'");
3841
+ },
3842
+ cookies: async () => [],
3843
+ addCookies: async (_) => {},
3844
+ clearCookies: async () => {},
3845
+ newCDPSession: async () => {
3846
+ throw new Error("CDP session via context not available in Bun.WebView. Use view.cdp() when shipped.");
3847
+ },
3848
+ route: async (_pattern, _handler) => {
3849
+ throw new Error("Network interception not supported in Bun.WebView. Use engine: 'cdp' or 'playwright'.");
3850
+ },
3851
+ unrouteAll: async () => {},
3852
+ pages: () => [],
3853
+ addInitScript: async (script) => {
3854
+ await this.addInitScript(script);
3855
+ }
3856
+ };
3857
+ }
3858
+ on(event, handler) {
3859
+ if (!this._eventListeners.has(event))
3860
+ this._eventListeners.set(event, []);
3861
+ this._eventListeners.get(event).push(handler);
3862
+ return this;
3863
+ }
3864
+ off(event, handler) {
3865
+ const listeners = this._eventListeners.get(event) ?? [];
3866
+ this._eventListeners.set(event, listeners.filter((l) => l !== handler));
3867
+ return this;
3868
+ }
3869
+ _emit(event, ...args) {
3870
+ for (const handler of this._eventListeners.get(event) ?? []) {
3871
+ try {
3872
+ handler(...args);
3873
+ } catch {}
3874
+ }
3875
+ }
3876
+ async pdf(_opts) {
3877
+ throw new Error("PDF generation not supported in Bun.WebView. Use engine: 'playwright'.");
3878
+ }
3879
+ coverage = {
3880
+ startJSCoverage: async () => {},
3881
+ stopJSCoverage: async () => [],
3882
+ startCSSCoverage: async () => {},
3883
+ stopCSSCoverage: async () => []
3884
+ };
3885
+ setSessionId(id) {
3886
+ this._sessionId = id;
3887
+ }
3888
+ getSessionId() {
3889
+ return this._sessionId;
3890
+ }
3891
+ getNativeView() {
3892
+ return this.view;
3893
+ }
3894
+ async close() {
3895
+ try {
3896
+ await this.view.close();
3897
+ } catch {}
3898
+ }
3899
+ [Symbol.asyncDispose]() {
3900
+ return this.close();
3901
+ }
3902
+ };
3903
+ });
3904
+
3401
3905
  // src/lib/browser.ts
3402
3906
  var exports_browser = {};
3403
3907
  __export(exports_browser, {
@@ -3419,6 +3923,22 @@ async function launchBrowser(options) {
3419
3923
  }
3420
3924
  return launchLightpanda2({ viewport: options?.viewport });
3421
3925
  }
3926
+ if (engine === "bun") {
3927
+ const { isBunWebViewAvailable: isBunWebViewAvailable2, BunWebViewSession: BunWebViewSession2 } = await Promise.resolve().then(() => (init_browser_bun(), exports_browser_bun));
3928
+ if (!isBunWebViewAvailable2()) {
3929
+ throw new BrowserError("Bun.WebView not available. Upgrade to Bun canary: bun upgrade --canary");
3930
+ }
3931
+ const session = new BunWebViewSession2({
3932
+ width: options?.viewport?.width ?? 1280,
3933
+ height: options?.viewport?.height ?? 720
3934
+ });
3935
+ return {
3936
+ newContext: async () => ({ newPage: async () => session, close: async () => {} }),
3937
+ close: async () => session.close(),
3938
+ contexts: () => [],
3939
+ _bunSession: session
3940
+ };
3941
+ }
3422
3942
  const headless = options?.headless ?? true;
3423
3943
  const viewport = options?.viewport ?? DEFAULT_VIEWPORT;
3424
3944
  try {
@@ -3440,6 +3960,12 @@ async function getPage(browser, options) {
3440
3960
  const { getLightpandaPage: getLightpandaPage2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda));
3441
3961
  return getLightpandaPage2(browser, options);
3442
3962
  }
3963
+ if (engine === "bun") {
3964
+ const bunSession = browser._bunSession;
3965
+ if (bunSession)
3966
+ return bunSession;
3967
+ throw new BrowserError("Bun.WebView session not found on browser instance");
3968
+ }
3443
3969
  const viewport = options?.viewport ?? DEFAULT_VIEWPORT;
3444
3970
  try {
3445
3971
  const context = await browser.newContext({
@@ -3459,6 +3985,12 @@ async function closeBrowser(browser, engine) {
3459
3985
  const { closeLightpanda: closeLightpanda2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda));
3460
3986
  return closeLightpanda2(browser);
3461
3987
  }
3988
+ if (engine === "bun") {
3989
+ const bunSession = browser._bunSession;
3990
+ if (bunSession)
3991
+ await bunSession.close();
3992
+ return;
3993
+ }
3462
3994
  try {
3463
3995
  await browser.close();
3464
3996
  } catch (error) {
@@ -3528,6 +4060,9 @@ async function launchBrowserEngine(engine, config) {
3528
4060
  }
3529
4061
  return launchLightpanda2({ viewport: config.viewport });
3530
4062
  }
4063
+ if (engine === "bun") {
4064
+ return launchBrowser({ headless: config.headless, viewport: config.viewport, engine: "bun" });
4065
+ }
3531
4066
  return chromium2.launch({
3532
4067
  headless: config.headless,
3533
4068
  args: ["--no-sandbox", "--disable-setuid-sandbox"]
@@ -3662,8 +4197,8 @@ async function scanA11y(options) {
3662
4197
  var AXE_CDN = "https://cdn.jsdelivr.net/npm/axe-core@4/axe.min.js";
3663
4198
 
3664
4199
  // src/lib/config.ts
3665
- import { homedir as homedir2 } from "os";
3666
- import { join as join2 } from "path";
4200
+ import { homedir as homedir3 } from "os";
4201
+ import { join as join3 } from "path";
3667
4202
  import { readFileSync, existsSync as existsSync2 } from "fs";
3668
4203
  function getDefaultConfig() {
3669
4204
  return {
@@ -3675,7 +4210,7 @@ function getDefaultConfig() {
3675
4210
  timeout: 60000
3676
4211
  },
3677
4212
  screenshots: {
3678
- dir: join2(homedir2(), ".testers", "screenshots"),
4213
+ dir: join3(homedir3(), ".testers", "screenshots"),
3679
4214
  format: "png",
3680
4215
  quality: 90,
3681
4216
  fullPage: false
@@ -3701,7 +4236,8 @@ function loadConfig() {
3701
4236
  todosDbPath: fileConfig.todosDbPath,
3702
4237
  judgeModel: fileConfig.judgeModel,
3703
4238
  judgeProvider: fileConfig.judgeProvider,
3704
- selfHeal: fileConfig.selfHeal ?? false
4239
+ selfHeal: fileConfig.selfHeal ?? false,
4240
+ conversationsSpace: fileConfig.conversationsSpace
3705
4241
  };
3706
4242
  const envModel = process.env["TESTERS_MODEL"];
3707
4243
  if (envModel) {
@@ -3720,8 +4256,8 @@ function loadConfig() {
3720
4256
  var CONFIG_DIR, CONFIG_PATH;
3721
4257
  var init_config = __esm(() => {
3722
4258
  init_types();
3723
- CONFIG_DIR = join2(homedir2(), ".testers");
3724
- CONFIG_PATH = join2(CONFIG_DIR, "config.json");
4259
+ CONFIG_DIR = join3(homedir3(), ".testers");
4260
+ CONFIG_PATH = join3(CONFIG_DIR, "config.json");
3725
4261
  });
3726
4262
 
3727
4263
  // src/lib/healer.ts
@@ -4163,6 +4699,8 @@ async function runAgentLoop(options) {
4163
4699
  Instructions: ${persona.instructions}` : "",
4164
4700
  persona.traits.length > 0 ? `Traits: ${persona.traits.join(", ")}` : "",
4165
4701
  persona.goals.length > 0 ? `Goals: ${persona.goals.join("; ")}` : "",
4702
+ persona.behaviors && persona.behaviors.length > 0 ? `Behaviors: ${persona.behaviors.join("; ")}` : "",
4703
+ persona.painPoints && persona.painPoints.length > 0 ? `Pain points: ${persona.painPoints.join("; ")}` : "",
4166
4704
  "",
4167
4705
  "Stay in character throughout the test. Your observations, choices, and priorities should reflect this persona."
4168
4706
  ].filter(Boolean).join(`
@@ -4213,6 +4751,15 @@ Instructions: ${persona.instructions}` : "",
4213
4751
  const isOpenAICompat = "provider" in client;
4214
4752
  try {
4215
4753
  for (let turn = 0;turn < maxTurns; turn++) {
4754
+ if (persona && turn > 0 && turn % 5 === 0) {
4755
+ messages = [
4756
+ ...messages,
4757
+ {
4758
+ role: "user",
4759
+ content: `[Reminder: You are ${persona.name} \u2014 ${persona.role}. Traits: ${persona.traits.join(", ")}. Stay in character.]`
4760
+ }
4761
+ ];
4762
+ }
4216
4763
  const response = isOpenAICompat ? await callOpenAICompatible({
4217
4764
  baseUrl: client.baseUrl,
4218
4765
  apiKey: client.apiKey,
@@ -4307,6 +4854,8 @@ function detectProvider(model) {
4307
4854
  return "openai";
4308
4855
  if (model.startsWith("gemini-"))
4309
4856
  return "google";
4857
+ if (model.startsWith("llama-") || model.startsWith("qwen-") || model.includes("cerebras"))
4858
+ return "cerebras";
4310
4859
  return "anthropic";
4311
4860
  }
4312
4861
  function createClient(apiKey) {
@@ -4400,6 +4949,12 @@ function createClientForModel(model, apiKey) {
4400
4949
  throw new AIClientError("No Google API key. Set GOOGLE_API_KEY or pass it explicitly.");
4401
4950
  return { provider: "google", baseUrl: "https://generativelanguage.googleapis.com/v1beta/openai", apiKey: key };
4402
4951
  }
4952
+ if (provider === "cerebras") {
4953
+ const key = apiKey ?? process.env["CEREBRAS_API_KEY"];
4954
+ if (!key)
4955
+ throw new AIClientError("No Cerebras API key. Set CEREBRAS_API_KEY or pass it explicitly.");
4956
+ return { provider: "cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKey: key };
4957
+ }
4403
4958
  return createClient(apiKey);
4404
4959
  }
4405
4960
  var BROWSER_TOOLS;
@@ -4760,19 +5315,21 @@ function resolveJudgeModel(config) {
4760
5315
  apiKey = process.env["OPENAI_API_KEY"];
4761
5316
  else if (provider === "google")
4762
5317
  apiKey = process.env["GOOGLE_API_KEY"];
5318
+ else if (provider === "cerebras")
5319
+ apiKey = process.env["CEREBRAS_API_KEY"];
4763
5320
  }
4764
5321
  if (!apiKey) {
4765
- apiKey = process.env["ANTHROPIC_API_KEY"] ?? process.env["OPENAI_API_KEY"] ?? process.env["GOOGLE_API_KEY"] ?? globalConfig.anthropicApiKey;
5322
+ apiKey = process.env["ANTHROPIC_API_KEY"] ?? process.env["CEREBRAS_API_KEY"] ?? process.env["OPENAI_API_KEY"] ?? process.env["GOOGLE_API_KEY"] ?? globalConfig.anthropicApiKey;
4766
5323
  if (!apiKey)
4767
- throw new AIClientError("No API key found for judge. Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or GOOGLE_API_KEY.");
5324
+ throw new AIClientError("No API key found for judge. Set ANTHROPIC_API_KEY, CEREBRAS_API_KEY, OPENAI_API_KEY, or GOOGLE_API_KEY.");
4768
5325
  }
4769
5326
  return { model, provider, apiKey };
4770
5327
  }
4771
5328
  async function callJudge(prompt, config) {
4772
5329
  const { model, provider, apiKey } = resolveJudgeModel(config);
4773
5330
  const threshold = 0.7;
4774
- if (provider === "openai" || provider === "google") {
4775
- const baseUrl = provider === "openai" ? "https://api.openai.com/v1" : "https://generativelanguage.googleapis.com/v1beta/openai";
5331
+ if (provider === "openai" || provider === "google" || provider === "cerebras") {
5332
+ const baseUrl = provider === "openai" ? "https://api.openai.com/v1" : provider === "cerebras" ? "https://api.cerebras.ai/v1" : "https://generativelanguage.googleapis.com/v1beta/openai";
4776
5333
  const resp2 = await callOpenAICompatible({
4777
5334
  baseUrl,
4778
5335
  apiKey,
@@ -5322,84 +5879,437 @@ var init_eval_runner = __esm(() => {
5322
5879
  init_pipeline_runner();
5323
5880
  });
5324
5881
 
5325
- // src/db/personas.ts
5326
- function createPersona(input) {
5327
- const db2 = getDatabase();
5328
- const id = uuid();
5329
- const short_id = shortUuid();
5330
- const timestamp = now();
5331
- db2.query(`
5332
- INSERT INTO personas (id, short_id, project_id, name, description, role, instructions, traits, goals, metadata, enabled, version, created_at, updated_at)
5333
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1, ?, ?)
5334
- `).run(id, short_id, input.projectId ?? null, input.name, input.description ?? "", input.role, input.instructions ?? "", JSON.stringify(input.traits ?? []), JSON.stringify(input.goals ?? []), input.metadata ? JSON.stringify(input.metadata) : "{}", input.enabled === false ? 0 : 1, timestamp, timestamp);
5335
- return getPersona(id);
5336
- }
5337
- function getPersona(id) {
5338
- const db2 = getDatabase();
5339
- let row = db2.query("SELECT * FROM personas WHERE id = ?").get(id);
5340
- if (row)
5341
- return personaFromRow(row);
5342
- row = db2.query("SELECT * FROM personas WHERE short_id = ?").get(id);
5343
- if (row)
5344
- return personaFromRow(row);
5345
- return null;
5346
- }
5347
- function listPersonas(filter) {
5348
- const db2 = getDatabase();
5349
- const conditions = [];
5350
- const params = [];
5351
- if (filter?.globalOnly) {
5352
- conditions.push("project_id IS NULL");
5353
- } else if (filter?.projectId) {
5354
- conditions.push("(project_id = ? OR project_id IS NULL)");
5355
- params.push(filter.projectId);
5882
+ // src/lib/failure-analyzer.ts
5883
+ function analyzeFailure(error, reasoning) {
5884
+ const combinedText = [error, reasoning].filter(Boolean).join(" ");
5885
+ if (!combinedText.trim())
5886
+ return null;
5887
+ const errorText = error ?? "";
5888
+ const reasoningText = reasoning ?? "";
5889
+ if (/waiting for selector/i.test(errorText) || /not found/i.test(errorText) || /No element/i.test(errorText) || /waiting for selector/i.test(reasoningText) || /could not find element/i.test(reasoningText) || /element not found/i.test(reasoningText)) {
5890
+ const selectorMatch = errorText.match(/'([^']+)'/) ?? reasoningText.match(/'([^']+)'/);
5891
+ const affectedElement = selectorMatch ? selectorMatch[1] : undefined;
5892
+ const stepMatch = reasoningText.match(/step\s+(\d+)/i);
5893
+ const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
5894
+ return {
5895
+ type: "selector_not_found",
5896
+ affectedElement,
5897
+ stepNumber,
5898
+ confidence: affectedElement ? "high" : "medium"
5899
+ };
5356
5900
  }
5357
- if (filter?.enabled !== undefined) {
5358
- conditions.push("enabled = ?");
5359
- params.push(filter.enabled ? 1 : 0);
5901
+ if (/assert/i.test(errorText) || /expected/i.test(errorText) || /to equal/i.test(errorText) || /to be/i.test(errorText) || /\bgot\b/.test(errorText) || /assertion.*failed/i.test(reasoningText) || /expected.*but.*got/i.test(reasoningText)) {
5902
+ const expectedActualMatch = errorText.match(/expected[:\s]+(['"]?)([^'"]+)\1[,\s]+(?:got|received|actual)[:\s]+(['"]?)([^'"]+)\3/i);
5903
+ const toEqualMatch = errorText.match(/expected[:\s]+(['"]?)([^'"]+)\1\s+to\s+equal\s+(['"]?)([^'"]+)\3/i);
5904
+ let expected;
5905
+ let actual;
5906
+ if (expectedActualMatch) {
5907
+ expected = expectedActualMatch[2];
5908
+ actual = expectedActualMatch[4];
5909
+ } else if (toEqualMatch) {
5910
+ expected = toEqualMatch[4];
5911
+ actual = toEqualMatch[2];
5912
+ }
5913
+ const stepMatch = reasoningText.match(/step\s+(\d+)/i);
5914
+ const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
5915
+ return {
5916
+ type: "assertion_failed",
5917
+ expected,
5918
+ actual,
5919
+ stepNumber,
5920
+ confidence: expected && actual ? "high" : "medium"
5921
+ };
5360
5922
  }
5361
- let sql = "SELECT * FROM personas";
5362
- if (conditions.length > 0) {
5363
- sql += " WHERE " + conditions.join(" AND ");
5923
+ if (/timeout/i.test(errorText) || /timed out/i.test(errorText) || /Timeout/i.test(reasoningText) || /timed out/i.test(reasoningText)) {
5924
+ const stepMatch = reasoningText.match(/step\s+(\d+)/i);
5925
+ const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
5926
+ return {
5927
+ type: "timeout",
5928
+ stepNumber,
5929
+ confidence: "high"
5930
+ };
5364
5931
  }
5365
- sql += " ORDER BY created_at DESC";
5366
- if (filter?.limit) {
5367
- sql += " LIMIT ?";
5368
- params.push(filter.limit);
5932
+ if (/\b401\b/.test(errorText) || /\b403\b/.test(errorText) || /login/i.test(errorText) || /unauthorized/i.test(errorText) || /\bauth\b/i.test(errorText) || /\b401\b/.test(reasoningText) || /\b403\b/.test(reasoningText) || /unauthorized/i.test(reasoningText) || /authentication/i.test(reasoningText)) {
5933
+ return {
5934
+ type: "auth_error",
5935
+ confidence: "high"
5936
+ };
5369
5937
  }
5370
- if (filter?.offset) {
5371
- sql += " OFFSET ?";
5372
- params.push(filter.offset);
5938
+ if (/ECONNREFUSED/i.test(errorText) || /ENOTFOUND/i.test(errorText) || /fetch failed/i.test(errorText) || /network/i.test(errorText) || /ECONNREFUSED/i.test(reasoningText) || /fetch failed/i.test(reasoningText) || /connection refused/i.test(reasoningText)) {
5939
+ return {
5940
+ type: "network_error",
5941
+ confidence: "high"
5942
+ };
5373
5943
  }
5374
- const rows = db2.query(sql).all(...params);
5375
- return rows.map(personaFromRow);
5376
- }
5377
- function deletePersona(id) {
5378
- const db2 = getDatabase();
5379
- const persona = getPersona(id);
5380
- if (!persona)
5381
- return false;
5382
- const result = db2.query("DELETE FROM personas WHERE id = ?").run(persona.id);
5383
- return result.changes > 0;
5944
+ if (/\beval\b/i.test(errorText) || /evaluate/i.test(errorText) || /\bscript\b/i.test(errorText) || /\beval\b/i.test(reasoningText) || /evaluate/i.test(reasoningText)) {
5945
+ return {
5946
+ type: "eval_failed",
5947
+ confidence: "medium"
5948
+ };
5949
+ }
5950
+ return {
5951
+ type: "unknown",
5952
+ confidence: "low"
5953
+ };
5384
5954
  }
5385
- var init_personas = __esm(() => {
5386
- init_types();
5387
- init_database();
5388
- });
5389
5955
 
5390
- // src/lib/screenshotter.ts
5391
- import { mkdirSync as mkdirSync2, existsSync as existsSync3, writeFileSync } from "fs";
5392
- import { join as join3 } from "path";
5393
- import { homedir as homedir3 } from "os";
5394
- function slugify(text) {
5395
- return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
5956
+ // src/lib/costs.ts
5957
+ import chalk from "chalk";
5958
+ function getDateFilter(period) {
5959
+ switch (period) {
5960
+ case "day":
5961
+ return "AND r.created_at >= date('now', 'start of day')";
5962
+ case "week":
5963
+ return "AND r.created_at >= date('now', '-7 days')";
5964
+ case "month":
5965
+ return "AND r.created_at >= date('now', '-30 days')";
5966
+ case "all":
5967
+ return "";
5968
+ }
5396
5969
  }
5397
- function generateFilename(stepNumber, action) {
5398
- const padded = String(stepNumber).padStart(3, "0");
5399
- const slug = slugify(action);
5400
- return `${padded}_${slug}.png`;
5970
+ function getPeriodDays(period) {
5971
+ switch (period) {
5972
+ case "day":
5973
+ return 1;
5974
+ case "week":
5975
+ return 7;
5976
+ case "month":
5977
+ return 30;
5978
+ case "all":
5979
+ return 30;
5980
+ }
5401
5981
  }
5402
- function formatDate(date) {
5982
+ function loadBudgetConfig() {
5983
+ const config = loadConfig();
5984
+ const budget = config.budget;
5985
+ return {
5986
+ maxPerRunCents: budget?.maxPerRunCents ?? 50,
5987
+ maxPerDayCents: budget?.maxPerDayCents ?? 500,
5988
+ warnAtPercent: budget?.warnAtPercent ?? 0.8
5989
+ };
5990
+ }
5991
+ function getCostSummary(options) {
5992
+ const db2 = getDatabase();
5993
+ const period = options?.period ?? "month";
5994
+ const projectId = options?.projectId;
5995
+ const dateFilter = getDateFilter(period);
5996
+ const projectFilter = projectId ? "AND ru.project_id = ?" : "";
5997
+ const projectParams = projectId ? [projectId] : [];
5998
+ const totalsRow = db2.query(`SELECT
5999
+ COALESCE(SUM(r.cost_cents), 0) as total_cost,
6000
+ COALESCE(SUM(r.tokens_used), 0) as total_tokens,
6001
+ COUNT(DISTINCT r.run_id) as run_count
6002
+ FROM results r
6003
+ JOIN runs ru ON r.run_id = ru.id
6004
+ WHERE 1=1 ${dateFilter} ${projectFilter}`).get(...projectParams);
6005
+ const modelRows = db2.query(`SELECT
6006
+ r.model,
6007
+ COALESCE(SUM(r.cost_cents), 0) as cost_cents,
6008
+ COALESCE(SUM(r.tokens_used), 0) as tokens,
6009
+ COUNT(DISTINCT r.run_id) as runs
6010
+ FROM results r
6011
+ JOIN runs ru ON r.run_id = ru.id
6012
+ WHERE 1=1 ${dateFilter} ${projectFilter}
6013
+ GROUP BY r.model
6014
+ ORDER BY cost_cents DESC`).all(...projectParams);
6015
+ const byModel = {};
6016
+ for (const row of modelRows) {
6017
+ byModel[row.model] = {
6018
+ costCents: row.cost_cents,
6019
+ tokens: row.tokens,
6020
+ runs: row.runs
6021
+ };
6022
+ }
6023
+ const scenarioRows = db2.query(`SELECT
6024
+ r.scenario_id,
6025
+ COALESCE(s.name, r.scenario_id) as name,
6026
+ COALESCE(SUM(r.cost_cents), 0) as cost_cents,
6027
+ COALESCE(SUM(r.tokens_used), 0) as tokens,
6028
+ COUNT(DISTINCT r.run_id) as runs
6029
+ FROM results r
6030
+ JOIN runs ru ON r.run_id = ru.id
6031
+ LEFT JOIN scenarios s ON r.scenario_id = s.id
6032
+ WHERE 1=1 ${dateFilter} ${projectFilter}
6033
+ GROUP BY r.scenario_id
6034
+ ORDER BY cost_cents DESC
6035
+ LIMIT 10`).all(...projectParams);
6036
+ const byScenario = scenarioRows.map((row) => ({
6037
+ scenarioId: row.scenario_id,
6038
+ name: row.name,
6039
+ costCents: row.cost_cents,
6040
+ tokens: row.tokens,
6041
+ runs: row.runs
6042
+ }));
6043
+ const runCount = totalsRow.run_count;
6044
+ const avgCostPerRun = runCount > 0 ? totalsRow.total_cost / runCount : 0;
6045
+ const periodDays = getPeriodDays(period);
6046
+ const estimatedMonthlyCents = periodDays > 0 ? totalsRow.total_cost / periodDays * 30 : 0;
6047
+ return {
6048
+ period,
6049
+ totalCostCents: totalsRow.total_cost,
6050
+ totalTokens: totalsRow.total_tokens,
6051
+ runCount,
6052
+ byModel,
6053
+ byScenario,
6054
+ avgCostPerRun,
6055
+ estimatedMonthlyCents
6056
+ };
6057
+ }
6058
+ function modelToCostKey(model) {
6059
+ const exact = COST_PER_SCENARIO_CENTS[model];
6060
+ if (exact !== undefined)
6061
+ return exact;
6062
+ const lower = model.toLowerCase();
6063
+ if (lower.includes("opus"))
6064
+ return COST_PER_SCENARIO_CENTS["opus"];
6065
+ if (lower.includes("sonnet"))
6066
+ return COST_PER_SCENARIO_CENTS["sonnet"];
6067
+ if (lower.includes("haiku"))
6068
+ return COST_PER_SCENARIO_CENTS["haiku"];
6069
+ if (lower.includes("gpt-4o-mini"))
6070
+ return COST_PER_SCENARIO_CENTS["gpt-4o-mini"];
6071
+ if (lower.includes("gpt-4o"))
6072
+ return COST_PER_SCENARIO_CENTS["gpt-4o"];
6073
+ if (lower.includes("gemini-2.0-flash") || lower.includes("gemini-flash"))
6074
+ return COST_PER_SCENARIO_CENTS["gemini-2.0-flash"];
6075
+ if (lower.includes("gemini-1.5-pro") || lower.includes("gemini-pro"))
6076
+ return COST_PER_SCENARIO_CENTS["gemini-1.5-pro"];
6077
+ if (lower.includes("llama-3.3") || lower.includes("llama3.3"))
6078
+ return COST_PER_SCENARIO_CENTS["llama-3.3-70b"];
6079
+ if (lower.includes("llama"))
6080
+ return COST_PER_SCENARIO_CENTS["llama-3.1-8b"];
6081
+ return 10;
6082
+ }
6083
+ function estimateRunCostCents(scenarioCount, model, samples = 1) {
6084
+ const costPerScenario = modelToCostKey(model);
6085
+ return scenarioCount * costPerScenario * Math.max(1, samples);
6086
+ }
6087
+ function getCostsByScenario(options) {
6088
+ const db2 = getDatabase();
6089
+ const period = options?.period ?? "month";
6090
+ const projectId = options?.projectId;
6091
+ const dateFilter = getDateFilter(period);
6092
+ const projectFilter = projectId ? "AND ru.project_id = ?" : "";
6093
+ const projectParams = projectId ? [projectId] : [];
6094
+ const rows = db2.query(`SELECT
6095
+ r.scenario_id,
6096
+ COALESCE(s.name, r.scenario_id) as name,
6097
+ COUNT(DISTINCT r.run_id) as run_count,
6098
+ COALESCE(SUM(r.cost_cents), 0) as total_cost_cents
6099
+ FROM results r
6100
+ JOIN runs ru ON r.run_id = ru.id
6101
+ LEFT JOIN scenarios s ON r.scenario_id = s.id
6102
+ WHERE 1=1 ${dateFilter} ${projectFilter}
6103
+ GROUP BY r.scenario_id
6104
+ ORDER BY total_cost_cents DESC`).all(...projectParams);
6105
+ return rows.map((row) => ({
6106
+ scenarioId: row.scenario_id,
6107
+ name: row.name,
6108
+ runCount: row.run_count,
6109
+ totalCostCents: row.total_cost_cents,
6110
+ avgCostPerRunCents: row.run_count > 0 ? row.total_cost_cents / row.run_count : 0
6111
+ }));
6112
+ }
6113
+ function formatCostsByScenarioTerminal(rows, period) {
6114
+ const lines = [];
6115
+ lines.push("");
6116
+ lines.push(chalk.bold(` Cost by Scenario (${period})`));
6117
+ lines.push("");
6118
+ if (rows.length === 0) {
6119
+ lines.push(chalk.dim(" No cost data found."));
6120
+ lines.push("");
6121
+ return lines.join(`
6122
+ `);
6123
+ }
6124
+ lines.push(` ${"Scenario".padEnd(40)} ${"Runs".padEnd(8)} ${"Total Cost".padEnd(14)} Avg/Run`);
6125
+ lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(8)} ${"\u2500".repeat(14)} ${"\u2500".repeat(10)}`);
6126
+ for (const row of rows) {
6127
+ const label = row.name.length > 38 ? row.name.slice(0, 35) + "..." : row.name;
6128
+ lines.push(` ${label.padEnd(40)} ${String(row.runCount).padEnd(8)} ${formatDollars(row.totalCostCents).padEnd(14)} ${formatDollars(row.avgCostPerRunCents)}`);
6129
+ }
6130
+ lines.push("");
6131
+ return lines.join(`
6132
+ `);
6133
+ }
6134
+ function checkBudget(estimatedCostCents) {
6135
+ const budget = loadBudgetConfig();
6136
+ if (estimatedCostCents > budget.maxPerRunCents) {
6137
+ return {
6138
+ allowed: false,
6139
+ warning: `Estimated cost (${formatDollars(estimatedCostCents)}) exceeds per-run limit (${formatDollars(budget.maxPerRunCents)})`
6140
+ };
6141
+ }
6142
+ const todaySummary = getCostSummary({ period: "day" });
6143
+ const projectedDaily = todaySummary.totalCostCents + estimatedCostCents;
6144
+ if (projectedDaily > budget.maxPerDayCents) {
6145
+ return {
6146
+ allowed: false,
6147
+ warning: `Daily spending (${formatDollars(todaySummary.totalCostCents)}) + this run (${formatDollars(estimatedCostCents)}) would exceed daily limit (${formatDollars(budget.maxPerDayCents)})`
6148
+ };
6149
+ }
6150
+ if (projectedDaily > budget.maxPerDayCents * budget.warnAtPercent) {
6151
+ return {
6152
+ allowed: true,
6153
+ warning: `Approaching daily limit: ${formatDollars(projectedDaily)} of ${formatDollars(budget.maxPerDayCents)} (${Math.round(projectedDaily / budget.maxPerDayCents * 100)}%)`
6154
+ };
6155
+ }
6156
+ return { allowed: true };
6157
+ }
6158
+ function formatDollars(cents) {
6159
+ return `$${(cents / 100).toFixed(2)}`;
6160
+ }
6161
+ function formatTokens(tokens) {
6162
+ if (tokens >= 1e6)
6163
+ return `${(tokens / 1e6).toFixed(1)}M`;
6164
+ if (tokens >= 1000)
6165
+ return `${(tokens / 1000).toFixed(1)}K`;
6166
+ return String(tokens);
6167
+ }
6168
+ function formatCostsTerminal(summary) {
6169
+ const lines = [];
6170
+ lines.push("");
6171
+ lines.push(chalk.bold(` Cost Summary (${summary.period})`));
6172
+ lines.push("");
6173
+ lines.push(` Total: ${chalk.yellow(formatDollars(summary.totalCostCents))} (${formatTokens(summary.totalTokens)} tokens across ${summary.runCount} runs)`);
6174
+ lines.push(` Avg/run: ${chalk.yellow(formatDollars(summary.avgCostPerRun))}`);
6175
+ lines.push(` Est/month: ${chalk.yellow(formatDollars(summary.estimatedMonthlyCents))}`);
6176
+ const modelEntries = Object.entries(summary.byModel);
6177
+ if (modelEntries.length > 0) {
6178
+ lines.push("");
6179
+ lines.push(chalk.bold(" By Model"));
6180
+ lines.push(` ${"Model".padEnd(40)} ${"Cost".padEnd(12)} ${"Tokens".padEnd(12)} Runs`);
6181
+ lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(12)} ${"\u2500".repeat(12)} ${"\u2500".repeat(6)}`);
6182
+ for (const [model, data] of modelEntries) {
6183
+ lines.push(` ${model.padEnd(40)} ${formatDollars(data.costCents).padEnd(12)} ${formatTokens(data.tokens).padEnd(12)} ${data.runs}`);
6184
+ }
6185
+ }
6186
+ if (summary.byScenario.length > 0) {
6187
+ lines.push("");
6188
+ lines.push(chalk.bold(" Scenarios by Cost (most expensive first)"));
6189
+ lines.push(` ${"Scenario".padEnd(40)} ${"Total Cost".padEnd(12)} ${"Avg/Run".padEnd(12)} ${"Runs".padEnd(6)} Tokens`);
6190
+ lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(12)} ${"\u2500".repeat(12)} ${"\u2500".repeat(6)} ${"\u2500".repeat(10)}`);
6191
+ for (const s of summary.byScenario) {
6192
+ const label = s.name.length > 38 ? s.name.slice(0, 35) + "..." : s.name;
6193
+ const avgPerRun = s.runs > 0 ? s.costCents / s.runs : 0;
6194
+ lines.push(` ${label.padEnd(40)} ${formatDollars(s.costCents).padEnd(12)} ${formatDollars(avgPerRun).padEnd(12)} ${String(s.runs).padEnd(6)} ${formatTokens(s.tokens)}`);
6195
+ }
6196
+ }
6197
+ lines.push("");
6198
+ return lines.join(`
6199
+ `);
6200
+ }
6201
+ function formatCostsJSON(summary) {
6202
+ return JSON.stringify(summary, null, 2);
6203
+ }
6204
+ function formatCostsCsv(summary) {
6205
+ const lines = [];
6206
+ lines.push("scenario,runs,total_cost_cents,avg_cost_cents,tokens");
6207
+ for (const s of summary.byScenario) {
6208
+ const avgCostCents = s.runs > 0 ? s.costCents / s.runs : 0;
6209
+ const name = s.name.includes(",") ? `"${s.name.replace(/"/g, '""')}"` : s.name;
6210
+ lines.push(`${name},${s.runs},${s.costCents},${avgCostCents.toFixed(2)},${s.tokens}`);
6211
+ }
6212
+ return lines.join(`
6213
+ `);
6214
+ }
6215
+ var COST_PER_SCENARIO_CENTS;
6216
+ var init_costs = __esm(() => {
6217
+ init_database();
6218
+ init_config();
6219
+ COST_PER_SCENARIO_CENTS = {
6220
+ haiku: 5,
6221
+ sonnet: 30,
6222
+ opus: 150,
6223
+ "claude-haiku": 5,
6224
+ "claude-sonnet": 30,
6225
+ "claude-opus": 150,
6226
+ "gpt-4o-mini": 3,
6227
+ "gpt-4o": 25,
6228
+ "gemini-2.0-flash": 2,
6229
+ "gemini-1.5-pro": 20,
6230
+ "llama-3.1-8b": 1,
6231
+ "llama-3.3-70b": 3
6232
+ };
6233
+ });
6234
+
6235
+ // src/db/personas.ts
6236
+ function createPersona(input) {
6237
+ const db2 = getDatabase();
6238
+ const id = uuid();
6239
+ const short_id = shortUuid();
6240
+ const timestamp = now();
6241
+ db2.query(`
6242
+ INSERT INTO personas (id, short_id, project_id, name, description, role, instructions, traits, goals, behaviors, expertise_level, demographics, pain_points, metadata, enabled, auth_email, auth_password, auth_login_path, version, created_at, updated_at)
6243
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1, ?, ?)
6244
+ `).run(id, short_id, input.projectId ?? null, input.name, input.description ?? "", input.role, input.instructions ?? "", JSON.stringify(input.traits ?? []), JSON.stringify(input.goals ?? []), JSON.stringify(input.behaviors ?? []), input.expertiseLevel ?? "intermediate", JSON.stringify(input.demographics ?? {}), JSON.stringify(input.painPoints ?? []), input.metadata ? JSON.stringify(input.metadata) : "{}", input.enabled === false ? 0 : 1, input.authEmail ?? null, input.authPassword ?? null, input.authLoginPath ?? null, timestamp, timestamp);
6245
+ return getPersona(id);
6246
+ }
6247
+ function getPersona(id) {
6248
+ const db2 = getDatabase();
6249
+ let row = db2.query("SELECT * FROM personas WHERE id = ?").get(id);
6250
+ if (row)
6251
+ return personaFromRow(row);
6252
+ row = db2.query("SELECT * FROM personas WHERE short_id = ?").get(id);
6253
+ if (row)
6254
+ return personaFromRow(row);
6255
+ return null;
6256
+ }
6257
+ function listPersonas(filter) {
6258
+ const db2 = getDatabase();
6259
+ const conditions = [];
6260
+ const params = [];
6261
+ if (filter?.globalOnly) {
6262
+ conditions.push("project_id IS NULL");
6263
+ } else if (filter?.projectId) {
6264
+ conditions.push("(project_id = ? OR project_id IS NULL)");
6265
+ params.push(filter.projectId);
6266
+ }
6267
+ if (filter?.enabled !== undefined) {
6268
+ conditions.push("enabled = ?");
6269
+ params.push(filter.enabled ? 1 : 0);
6270
+ }
6271
+ let sql = "SELECT * FROM personas";
6272
+ if (conditions.length > 0) {
6273
+ sql += " WHERE " + conditions.join(" AND ");
6274
+ }
6275
+ sql += " ORDER BY created_at DESC";
6276
+ if (filter?.limit) {
6277
+ sql += " LIMIT ?";
6278
+ params.push(filter.limit);
6279
+ }
6280
+ if (filter?.offset) {
6281
+ sql += " OFFSET ?";
6282
+ params.push(filter.offset);
6283
+ }
6284
+ const rows = db2.query(sql).all(...params);
6285
+ return rows.map(personaFromRow);
6286
+ }
6287
+ function deletePersona(id) {
6288
+ const db2 = getDatabase();
6289
+ const persona = getPersona(id);
6290
+ if (!persona)
6291
+ return false;
6292
+ const result = db2.query("DELETE FROM personas WHERE id = ?").run(persona.id);
6293
+ return result.changes > 0;
6294
+ }
6295
+ var init_personas = __esm(() => {
6296
+ init_types();
6297
+ init_database();
6298
+ });
6299
+
6300
+ // src/lib/screenshotter.ts
6301
+ import { mkdirSync as mkdirSync3, existsSync as existsSync3, writeFileSync } from "fs";
6302
+ import { join as join4 } from "path";
6303
+ import { homedir as homedir4 } from "os";
6304
+ function slugify(text) {
6305
+ return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
6306
+ }
6307
+ function generateFilename(stepNumber, action) {
6308
+ const padded = String(stepNumber).padStart(3, "0");
6309
+ const slug = slugify(action);
6310
+ return `${padded}_${slug}.png`;
6311
+ }
6312
+ function formatDate(date) {
5403
6313
  return date.toISOString().slice(0, 10);
5404
6314
  }
5405
6315
  function formatTime(date) {
@@ -5410,11 +6320,11 @@ function getScreenshotDir(baseDir, runId, scenarioSlug, projectName, timestamp)
5410
6320
  const project = projectName ?? "default";
5411
6321
  const dateDir = formatDate(now2);
5412
6322
  const timeDir = `${formatTime(now2)}_${runId.slice(0, 8)}`;
5413
- return join3(baseDir, project, dateDir, timeDir, scenarioSlug);
6323
+ return join4(baseDir, project, dateDir, timeDir, scenarioSlug);
5414
6324
  }
5415
6325
  function ensureDir(dirPath) {
5416
6326
  if (!existsSync3(dirPath)) {
5417
- mkdirSync2(dirPath, { recursive: true });
6327
+ mkdirSync3(dirPath, { recursive: true });
5418
6328
  }
5419
6329
  }
5420
6330
  function writeMetaSidecar(screenshotPath, meta) {
@@ -5425,10 +6335,10 @@ function writeMetaSidecar(screenshotPath, meta) {
5425
6335
  }
5426
6336
  async function generateThumbnail(page, screenshotDir, filename) {
5427
6337
  try {
5428
- const thumbDir = join3(screenshotDir, "_thumbnail");
6338
+ const thumbDir = join4(screenshotDir, "_thumbnail");
5429
6339
  ensureDir(thumbDir);
5430
6340
  const thumbFilename = filename.replace(/\.(png|jpeg)$/, ".thumb.$1");
5431
- const thumbPath = join3(thumbDir, thumbFilename);
6341
+ const thumbPath = join4(thumbDir, thumbFilename);
5432
6342
  const viewport = page.viewportSize();
5433
6343
  if (viewport) {
5434
6344
  await page.screenshot({
@@ -5462,14 +6372,16 @@ class Screenshotter {
5462
6372
  const action = options.description ?? options.action;
5463
6373
  const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
5464
6374
  const filename = generateFilename(options.stepNumber, action);
5465
- const filePath = join3(dir, filename);
6375
+ const filePath = join4(dir, filename);
5466
6376
  ensureDir(dir);
5467
- await page.screenshot({
6377
+ const screenshotOpts = {
5468
6378
  path: filePath,
5469
6379
  fullPage: this.fullPage,
5470
- type: this.format,
5471
- quality: this.format === "jpeg" ? this.quality : undefined
5472
- });
6380
+ type: this.format
6381
+ };
6382
+ if (this.format === "jpeg")
6383
+ screenshotOpts.quality = this.quality;
6384
+ await page.screenshot(screenshotOpts);
5473
6385
  const viewport = page.viewportSize() ?? { width: 0, height: 0 };
5474
6386
  const pageUrl = page.url();
5475
6387
  const timestamp = new Date().toISOString();
@@ -5497,14 +6409,16 @@ class Screenshotter {
5497
6409
  const action = options.description ?? options.action;
5498
6410
  const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
5499
6411
  const filename = generateFilename(options.stepNumber, action);
5500
- const filePath = join3(dir, filename);
6412
+ const filePath = join4(dir, filename);
5501
6413
  ensureDir(dir);
5502
- await page.screenshot({
6414
+ const ssOpts2 = {
5503
6415
  path: filePath,
5504
6416
  fullPage: true,
5505
- type: this.format,
5506
- quality: this.format === "jpeg" ? this.quality : undefined
5507
- });
6417
+ type: this.format
6418
+ };
6419
+ if (this.format === "jpeg")
6420
+ ssOpts2.quality = this.quality;
6421
+ await page.screenshot(ssOpts2);
5508
6422
  const viewport = page.viewportSize() ?? { width: 0, height: 0 };
5509
6423
  const pageUrl = page.url();
5510
6424
  const timestamp = new Date().toISOString();
@@ -5532,13 +6446,15 @@ class Screenshotter {
5532
6446
  const action = options.description ?? options.action;
5533
6447
  const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
5534
6448
  const filename = generateFilename(options.stepNumber, action);
5535
- const filePath = join3(dir, filename);
6449
+ const filePath = join4(dir, filename);
5536
6450
  ensureDir(dir);
5537
- await page.locator(selector).screenshot({
6451
+ const ssOpts3 = {
5538
6452
  path: filePath,
5539
- type: this.format,
5540
- quality: this.format === "jpeg" ? this.quality : undefined
5541
- });
6453
+ type: this.format
6454
+ };
6455
+ if (this.format === "jpeg")
6456
+ ssOpts3.quality = this.quality;
6457
+ await page.locator(selector).screenshot(ssOpts3);
5542
6458
  const viewport = page.viewportSize() ?? { width: 0, height: 0 };
5543
6459
  const pageUrl = page.url();
5544
6460
  const timestamp = new Date().toISOString();
@@ -5564,7 +6480,7 @@ class Screenshotter {
5564
6480
  }
5565
6481
  var DEFAULT_BASE_DIR;
5566
6482
  var init_screenshotter = __esm(() => {
5567
- DEFAULT_BASE_DIR = join3(homedir3(), ".testers", "screenshots");
6483
+ DEFAULT_BASE_DIR = join4(homedir4(), ".testers", "screenshots");
5568
6484
  });
5569
6485
 
5570
6486
  // src/lib/webhooks.ts
@@ -5735,13 +6651,13 @@ async function pushFailedRunToLogs(run, failedResults, scenarios) {
5735
6651
  // src/lib/todos-connector.ts
5736
6652
  import { Database as Database2 } from "bun:sqlite";
5737
6653
  import { existsSync as existsSync4 } from "fs";
5738
- import { join as join4 } from "path";
5739
- import { homedir as homedir4 } from "os";
6654
+ import { join as join5 } from "path";
6655
+ import { homedir as homedir5 } from "os";
5740
6656
  function resolveTodosDbPath() {
5741
6657
  const envPath = process.env["TODOS_DB_PATH"];
5742
6658
  if (envPath)
5743
6659
  return envPath;
5744
- return join4(homedir4(), ".todos", "todos.db");
6660
+ return join5(homedir5(), ".todos", "todos.db");
5745
6661
  }
5746
6662
  function connectToTodos() {
5747
6663
  const dbPath = resolveTodosDbPath();
@@ -5927,6 +6843,45 @@ async function notifyFailureToConversations(run, failedResults, scenarios) {
5927
6843
  });
5928
6844
  } catch {}
5929
6845
  }
6846
+ async function notifyRunToConversations(run, results, options) {
6847
+ const baseUrl = process.env["TESTERS_CONVERSATIONS_URL"];
6848
+ const space = options?.spaceId ?? process.env["TESTERS_CONVERSATIONS_SPACE"];
6849
+ if (!baseUrl || !space)
6850
+ return;
6851
+ const passRate = run.total > 0 ? (run.passed / run.total * 100).toFixed(0) : "0";
6852
+ const statusIcon = run.status === "passed" ? "\u2705" : run.status === "failed" ? "\u274C" : "\u26A0\uFE0F";
6853
+ const durationSec = run.finishedAt && run.startedAt ? ((new Date(run.finishedAt).getTime() - new Date(run.startedAt).getTime()) / 1000).toFixed(1) : null;
6854
+ const lines = [
6855
+ `${statusIcon} **Testers run ${run.status.toUpperCase()}** \u2014 ${run.passed}/${run.total} scenarios (${passRate}% pass rate)`,
6856
+ ``,
6857
+ `**URL:** ${run.url}`,
6858
+ `**Run ID:** \`${run.id}\``,
6859
+ `**Model:** ${run.model}`,
6860
+ durationSec ? `**Duration:** ${durationSec}s` : null
6861
+ ].filter((l) => l !== null);
6862
+ if (run.status === "failed") {
6863
+ const failedResults = results.filter((r) => r.status === "failed" || r.status === "error");
6864
+ const failLines = failedResults.slice(0, 5).map((r) => {
6865
+ const err = r.error ? ` \u2014 ${r.error.slice(0, 100)}` : "";
6866
+ return ` \u274C ${r.scenarioId.slice(0, 8)}${err}`;
6867
+ });
6868
+ if (failLines.length > 0) {
6869
+ lines.push(``, `**Failures:**`);
6870
+ lines.push(...failLines);
6871
+ if (failedResults.length > 5)
6872
+ lines.push(` \u2026 and ${failedResults.length - 5} more`);
6873
+ }
6874
+ }
6875
+ const message = lines.join(`
6876
+ `);
6877
+ try {
6878
+ await fetch(`${baseUrl.replace(/\/$/, "")}/api/spaces/${encodeURIComponent(space)}/messages`, {
6879
+ method: "POST",
6880
+ headers: { "Content-Type": "application/json" },
6881
+ body: JSON.stringify({ content: message, from: "testers" })
6882
+ });
6883
+ } catch {}
6884
+ }
5930
6885
  var init_failure_pipeline = __esm(() => {
5931
6886
  init_todos_connector();
5932
6887
  });
@@ -6132,10 +7087,35 @@ async function runSingleScenario(scenario, runId, options) {
6132
7087
  const config = loadConfig();
6133
7088
  if (options.selfHeal !== undefined)
6134
7089
  config.selfHeal = options.selfHeal;
6135
- const model = resolveModel(options.model ?? scenario.model ?? config.defaultModel);
6136
- const client = createClientForModel(model, options.apiKey ?? config.anthropicApiKey);
7090
+ let effectiveOptions = options;
7091
+ if (options.minimal) {
7092
+ effectiveOptions = {
7093
+ ...options,
7094
+ engine: options.engine ?? "playwright"
7095
+ };
7096
+ try {
7097
+ const { isLightpandaAvailable: isLightpandaAvailable2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda)).catch(() => ({ isLightpandaAvailable: () => false }));
7098
+ if (isLightpandaAvailable2())
7099
+ effectiveOptions = { ...effectiveOptions, engine: "lightpanda" };
7100
+ } catch {}
7101
+ }
7102
+ const model = resolveModel(effectiveOptions.minimal ? "quick" : effectiveOptions.model ?? scenario.model ?? config.defaultModel);
7103
+ if (options.cacheMaxAgeMs && options.cacheMaxAgeMs > 0 && scenario.lastPassedAt && scenario.lastPassedUrl === options.url) {
7104
+ const age = Date.now() - new Date(scenario.lastPassedAt).getTime();
7105
+ if (age < options.cacheMaxAgeMs) {
7106
+ const cached = createResult({ runId, scenarioId: scenario.id, model, stepsTotal: 0 });
7107
+ return updateResult(cached.id, {
7108
+ status: "passed",
7109
+ reasoning: `Cache hit: passed ${Math.round(age / 1000)}s ago at ${options.url}`,
7110
+ stepsCompleted: 0,
7111
+ durationMs: 0,
7112
+ tokensUsed: 0
7113
+ });
7114
+ }
7115
+ }
7116
+ const client = createClientForModel(model, effectiveOptions.apiKey ?? config.anthropicApiKey);
6137
7117
  const screenshotter = new Screenshotter({
6138
- baseDir: options.screenshotDir ?? config.screenshots.dir
7118
+ baseDir: effectiveOptions.screenshotDir ?? config.screenshots.dir
6139
7119
  });
6140
7120
  const resolvedPersonaId = options.personaId ?? scenario.personaId;
6141
7121
  const persona = resolvedPersonaId ? getPersona(resolvedPersonaId) : null;
@@ -6151,12 +7131,20 @@ async function runSingleScenario(scenario, runId, options) {
6151
7131
  let browser = null;
6152
7132
  let page = null;
6153
7133
  try {
6154
- browser = await launchBrowser({ headless: !(options.headed ?? false), engine: options.engine });
7134
+ browser = await launchBrowser({ headless: !(effectiveOptions.headed ?? false), engine: effectiveOptions.engine });
6155
7135
  page = await getPage(browser, {
6156
7136
  viewport: config.browser.viewport
6157
7137
  });
6158
7138
  const targetUrl = scenario.targetPath ? `${options.url.replace(/\/$/, "")}${scenario.targetPath}` : options.url;
6159
7139
  const scenarioTimeout = scenario.timeoutMs ?? options.timeout ?? config.browser.timeout ?? 60000;
7140
+ const consoleErrors = [];
7141
+ page.on("console", (msg) => {
7142
+ if (msg.type() === "error")
7143
+ consoleErrors.push(msg.text());
7144
+ });
7145
+ page.on("pageerror", (err) => {
7146
+ consoleErrors.push(err.message);
7147
+ });
6160
7148
  await page.goto(targetUrl, { timeout: Math.min(scenarioTimeout, 30000) });
6161
7149
  const stepStartTimes = new Map;
6162
7150
  const agentResult = await withTimeout(runAgentLoop({
@@ -6166,15 +7154,17 @@ async function runSingleScenario(scenario, runId, options) {
6166
7154
  screenshotter,
6167
7155
  model,
6168
7156
  runId,
6169
- maxTurns: 30,
6170
- a11y: options.a11y,
7157
+ maxTurns: effectiveOptions.minimal ? 10 : 30,
7158
+ a11y: effectiveOptions.a11y,
6171
7159
  persona: persona ? {
6172
7160
  name: persona.name,
6173
7161
  role: persona.role,
6174
7162
  description: persona.description,
6175
7163
  instructions: persona.instructions,
6176
7164
  traits: persona.traits,
6177
- goals: persona.goals
7165
+ goals: persona.goals,
7166
+ behaviors: persona.behaviors,
7167
+ painPoints: persona.painPoints
6178
7168
  } : null,
6179
7169
  onStep: (stepEvent) => {
6180
7170
  let stepDurationMs;
@@ -6201,7 +7191,7 @@ async function runSingleScenario(scenario, runId, options) {
6201
7191
  });
6202
7192
  }
6203
7193
  }), scenarioTimeout, scenario.name);
6204
- if (options.engine !== "lightpanda") {
7194
+ if (options.engine !== "lightpanda" && options.engine !== "bun") {
6205
7195
  for (const ss of agentResult.screenshots) {
6206
7196
  try {
6207
7197
  createScreenshot({
@@ -6219,8 +7209,8 @@ async function runSingleScenario(scenario, runId, options) {
6219
7209
  } catch {}
6220
7210
  }
6221
7211
  }
6222
- const lightpandaNote = options.engine === "lightpanda" ? " (Running with Lightpanda \u2014 no screenshots)" : "";
6223
- const updatedResult = updateResult(result.id, {
7212
+ const lightpandaNote = options.engine === "lightpanda" ? " (Running with Lightpanda \u2014 no screenshots)" : options.engine === "bun" ? " (Running with Bun.WebView \u2014 native, ~11x faster)" : "";
7213
+ let updatedResult = updateResult(result.id, {
6224
7214
  status: agentResult.status,
6225
7215
  reasoning: agentResult.reasoning ? agentResult.reasoning + lightpandaNote : lightpandaNote || undefined,
6226
7216
  stepsCompleted: agentResult.stepsCompleted,
@@ -6228,29 +7218,53 @@ async function runSingleScenario(scenario, runId, options) {
6228
7218
  tokensUsed: agentResult.tokensUsed,
6229
7219
  costCents: estimateCost(model, agentResult.tokensUsed)
6230
7220
  });
7221
+ if (agentResult.status === "failed" || agentResult.status === "error") {
7222
+ const failureAnalysis = analyzeFailure(null, agentResult.reasoning ?? null);
7223
+ if (failureAnalysis) {
7224
+ updatedResult = updateResult(result.id, { failureAnalysis });
7225
+ }
7226
+ }
7227
+ if (agentResult.status === "passed") {
7228
+ try {
7229
+ updateScenarioPassedCache(scenario.id, options.url);
7230
+ } catch {}
7231
+ }
6231
7232
  const eventType = agentResult.status === "passed" ? "scenario:pass" : "scenario:fail";
6232
7233
  emit({ type: eventType, scenarioId: scenario.id, scenarioName: scenario.name, resultId: result.id, runId });
6233
7234
  return updatedResult;
6234
7235
  } catch (error) {
6235
7236
  const errorMsg = error instanceof Error ? error.message : String(error);
6236
- const updatedResult = updateResult(result.id, {
7237
+ let updatedResult = updateResult(result.id, {
6237
7238
  status: "error",
6238
7239
  error: errorMsg,
6239
7240
  durationMs: Date.now() - new Date(result.createdAt).getTime()
6240
7241
  });
7242
+ const failureAnalysis = analyzeFailure(errorMsg, null);
7243
+ if (failureAnalysis) {
7244
+ updatedResult = updateResult(result.id, { failureAnalysis });
7245
+ }
6241
7246
  emit({ type: "scenario:error", scenarioId: scenario.id, scenarioName: scenario.name, error: errorMsg, runId });
6242
7247
  return updatedResult;
6243
7248
  } finally {
6244
7249
  if (browser)
6245
- await closeBrowser(browser, options.engine);
7250
+ await closeBrowser(browser, effectiveOptions.engine);
6246
7251
  }
6247
7252
  }
6248
7253
  async function runBatch(scenarios, options) {
6249
7254
  const config = loadConfig();
6250
- const model = resolveModel(options.model ?? config.defaultModel);
6251
- const parallel = options.parallel ?? 1;
7255
+ const model = resolveModel(options.minimal ? "quick" : options.model ?? config.defaultModel);
7256
+ const parallel = options.minimal ? Math.max(5, options.parallel ?? 1) : options.parallel ?? 1;
6252
7257
  const samples = options.samples ?? 1;
6253
7258
  const flakinessThreshold = options.flakinessThreshold ?? 0.95;
7259
+ if (!options.skipBudgetCheck) {
7260
+ const cap = options.maxCostCents ?? config.defaultMaxCostCents;
7261
+ if (cap !== undefined && cap > 0) {
7262
+ const estimated = estimateRunCostCents(scenarios.length, model, samples);
7263
+ if (estimated > cap) {
7264
+ throw new BudgetExceededError(estimated, cap);
7265
+ }
7266
+ }
7267
+ }
6254
7268
  const run = createRun({
6255
7269
  url: options.url,
6256
7270
  model,
@@ -6392,6 +7406,10 @@ async function runBatch(scenarios, options) {
6392
7406
  createFailureTasks(finalRun, failedResults, scenarios).catch(() => {});
6393
7407
  notifyFailureToConversations(finalRun, failedResults, scenarios).catch(() => {});
6394
7408
  }
7409
+ const conversationsSpaceId = config.conversationsSpace ?? process.env["TESTERS_CONVERSATIONS_SPACE"];
7410
+ if (conversationsSpaceId) {
7411
+ notifyRunToConversations(finalRun, results, { spaceId: conversationsSpaceId }).catch(() => {});
7412
+ }
6395
7413
  return { run: finalRun, results };
6396
7414
  }
6397
7415
  async function runByFilter(options) {
@@ -6429,6 +7447,16 @@ function startRunAsync(options) {
6429
7447
  priority: options.priority
6430
7448
  });
6431
7449
  }
7450
+ if (!options.skipBudgetCheck) {
7451
+ const cap = options.maxCostCents ?? config.defaultMaxCostCents;
7452
+ if (cap !== undefined && cap > 0 && scenarios.length > 0) {
7453
+ const samples = options.samples ?? 1;
7454
+ const estimated = estimateRunCostCents(scenarios.length, model, samples);
7455
+ if (estimated > cap) {
7456
+ throw new BudgetExceededError(estimated, cap);
7457
+ }
7458
+ }
7459
+ }
6432
7460
  const parallel = options.parallel ?? 1;
6433
7461
  const run = createRun({
6434
7462
  url: options.url,
@@ -6505,9 +7533,11 @@ function estimateCost(model, tokens) {
6505
7533
  }
6506
7534
  var eventHandler = null;
6507
7535
  var init_runner = __esm(() => {
7536
+ init_types();
6508
7537
  init_eval_runner();
6509
7538
  init_runs();
6510
7539
  init_results();
7540
+ init_costs();
6511
7541
  init_screenshots();
6512
7542
  init_scenarios();
6513
7543
  init_personas();
@@ -6532,7 +7562,7 @@ __export(exports_reporter, {
6532
7562
  formatJSON: () => formatJSON,
6533
7563
  formatActionableSummary: () => formatActionableSummary
6534
7564
  });
6535
- import chalk from "chalk";
7565
+ import chalk2 from "chalk";
6536
7566
  function useEmoji() {
6537
7567
  return !process.env["NO_COLOR"] && process.argv.indexOf("--no-color") === -1;
6538
7568
  }
@@ -6540,13 +7570,13 @@ function formatTerminal(run, results, options) {
6540
7570
  const lines = [];
6541
7571
  const failedOnly = options?.failedOnly ?? false;
6542
7572
  lines.push("");
6543
- lines.push(chalk.bold(` Run ${run.id.slice(0, 8)} \u2014 ${run.url}`));
6544
- lines.push(chalk.dim(` Model: ${run.model} | Parallel: ${run.parallel} | Headed: ${run.headed ? "yes" : "no"}`));
7573
+ lines.push(chalk2.bold(` Run ${run.id.slice(0, 8)} \u2014 ${run.url}`));
7574
+ lines.push(chalk2.dim(` Model: ${run.model} | Parallel: ${run.parallel} | Headed: ${run.headed ? "yes" : "no"}`));
6545
7575
  lines.push("");
6546
7576
  if (failedOnly) {
6547
7577
  const passedCount = results.filter((r) => r.status === "passed").length;
6548
7578
  if (passedCount > 0) {
6549
- lines.push(chalk.dim(` (${passedCount} passed scenario${passedCount !== 1 ? "s" : ""} hidden \u2014 use without --failed-only to see all)`));
7579
+ lines.push(chalk2.dim(` (${passedCount} passed scenario${passedCount !== 1 ? "s" : ""} hidden \u2014 use without --failed-only to see all)`));
6550
7580
  lines.push("");
6551
7581
  }
6552
7582
  }
@@ -6564,28 +7594,28 @@ function formatTerminal(run, results, options) {
6564
7594
  const emoji = useEmoji();
6565
7595
  switch (result.status) {
6566
7596
  case "passed":
6567
- statusIcon = emoji ? "\u2705" : chalk.green("PASS");
6568
- statusColor = chalk.green;
7597
+ statusIcon = emoji ? "\u2705" : chalk2.green("PASS");
7598
+ statusColor = chalk2.green;
6569
7599
  break;
6570
7600
  case "failed":
6571
- statusIcon = emoji ? "\u274C" : chalk.red("FAIL");
6572
- statusColor = chalk.red;
7601
+ statusIcon = emoji ? "\u274C" : chalk2.red("FAIL");
7602
+ statusColor = chalk2.red;
6573
7603
  break;
6574
7604
  case "error":
6575
- statusIcon = emoji ? "\u26A0\uFE0F " : chalk.yellow("ERR ");
6576
- statusColor = chalk.yellow;
7605
+ statusIcon = emoji ? "\u26A0\uFE0F " : chalk2.yellow("ERR ");
7606
+ statusColor = chalk2.yellow;
6577
7607
  break;
6578
7608
  default:
6579
- statusIcon = emoji ? "\u23ED\uFE0F " : chalk.dim("SKIP");
6580
- statusColor = chalk.dim;
7609
+ statusIcon = emoji ? "\u23ED\uFE0F " : chalk2.dim("SKIP");
7610
+ statusColor = chalk2.dim;
6581
7611
  break;
6582
7612
  }
6583
- lines.push(` ${statusIcon} ${statusColor(name)} ${chalk.dim(duration)} ${chalk.dim(`${screenshotCount} screenshots`)}`);
7613
+ lines.push(` ${statusIcon} ${statusColor(name)} ${chalk2.dim(duration)} ${chalk2.dim(`${screenshotCount} screenshots`)}`);
6584
7614
  if (result.reasoning && (result.status === "failed" || result.status === "error")) {
6585
- lines.push(chalk.dim(` ${result.reasoning}`));
7615
+ lines.push(chalk2.dim(` ${result.reasoning}`));
6586
7616
  }
6587
7617
  if (result.error) {
6588
- lines.push(chalk.red(` ${result.error}`));
7618
+ lines.push(chalk2.red(` ${result.error}`));
6589
7619
  }
6590
7620
  }
6591
7621
  lines.push("");
@@ -6596,9 +7626,9 @@ function formatTerminal(run, results, options) {
6596
7626
  }
6597
7627
  function formatSummary(run) {
6598
7628
  const duration = run.finishedAt ? `${((new Date(run.finishedAt).getTime() - new Date(run.startedAt).getTime()) / 1000).toFixed(1)}s` : "running";
6599
- const passedStr = chalk.green(`${run.passed} passed`);
6600
- const failedStr = run.failed > 0 ? chalk.red(` ${run.failed} failed`) : "";
6601
- const totalStr = chalk.dim(` (${run.total} total)`);
7629
+ const passedStr = chalk2.green(`${run.passed} passed`);
7630
+ const failedStr = run.failed > 0 ? chalk2.red(` ${run.failed} failed`) : "";
7631
+ const totalStr = chalk2.dim(` (${run.total} total)`);
6602
7632
  return ` ${passedStr}${failedStr}${totalStr} in ${duration}`;
6603
7633
  }
6604
7634
  function formatActionableSummary(run, results) {
@@ -6609,18 +7639,18 @@ function formatActionableSummary(run, results) {
6609
7639
  const passStr = `${emoji ? "\u2705" : "PASS"} ${passedCount} passed`;
6610
7640
  const failStr = failedCount > 0 ? ` ${emoji ? "\u274C" : "FAIL"} ${failedCount} failed` : "";
6611
7641
  const lines = [];
6612
- lines.push(` ${chalk.bold(passStr)}${failedCount > 0 ? chalk.bold(failStr) : ""}`);
7642
+ lines.push(` ${chalk2.bold(passStr)}${failedCount > 0 ? chalk2.bold(failStr) : ""}`);
6613
7643
  if (failedCount > 0) {
6614
- lines.push(chalk.dim(` retry failed: testers retry ${shortId} | view: testers results ${shortId}`));
7644
+ lines.push(chalk2.dim(` retry failed: testers retry ${shortId} | view: testers results ${shortId}`));
6615
7645
  } else {
6616
- lines.push(chalk.dim(` view: testers results ${shortId}`));
7646
+ lines.push(chalk2.dim(` view: testers results ${shortId}`));
6617
7647
  }
6618
7648
  const totalCostCents = results.reduce((sum, r) => sum + (r.costCents ?? 0), 0);
6619
7649
  const totalTokens = results.reduce((sum, r) => sum + (r.tokensUsed ?? 0), 0);
6620
7650
  if (totalTokens > 0) {
6621
7651
  const costStr = `$${(totalCostCents / 100).toFixed(4)}`;
6622
7652
  const tokensStr = totalTokens.toLocaleString();
6623
- lines.push(chalk.dim(` ${emoji ? "\uD83D\uDCB0" : "cost:"} Cost: ${costStr} (${tokensStr} tokens)`));
7653
+ lines.push(chalk2.dim(` ${emoji ? "\uD83D\uDCB0" : "cost:"} Cost: ${costStr} (${tokensStr} tokens)`));
6624
7654
  }
6625
7655
  return lines.join(`
6626
7656
  `);
@@ -6685,19 +7715,19 @@ function getExitCode(run) {
6685
7715
  function formatRunList(runs) {
6686
7716
  const lines = [];
6687
7717
  lines.push("");
6688
- lines.push(chalk.bold(" Recent Runs"));
7718
+ lines.push(chalk2.bold(" Recent Runs"));
6689
7719
  lines.push("");
6690
7720
  if (runs.length === 0) {
6691
- lines.push(chalk.dim(" No runs found."));
7721
+ lines.push(chalk2.dim(" No runs found."));
6692
7722
  lines.push("");
6693
7723
  return lines.join(`
6694
7724
  `);
6695
7725
  }
6696
7726
  for (const run of runs) {
6697
- const statusIcon = run.status === "passed" ? chalk.green("PASS") : run.status === "failed" ? chalk.red("FAIL") : run.status === "running" ? chalk.blue("RUN ") : chalk.dim(run.status.toUpperCase().padEnd(4));
7727
+ const statusIcon = run.status === "passed" ? chalk2.green("PASS") : run.status === "failed" ? chalk2.red("FAIL") : run.status === "running" ? chalk2.blue("RUN ") : chalk2.dim(run.status.toUpperCase().padEnd(4));
6698
7728
  const date = new Date(run.startedAt).toLocaleString();
6699
7729
  const id = run.id.slice(0, 8);
6700
- lines.push(` ${statusIcon} ${chalk.dim(id)} ${run.url} ${chalk.dim(`${run.passed}/${run.total}`)} ${chalk.dim(date)}`);
7730
+ lines.push(` ${statusIcon} ${chalk2.dim(id)} ${run.url} ${chalk2.dim(`${run.passed}/${run.total}`)} ${chalk2.dim(date)}`);
6701
7731
  }
6702
7732
  lines.push("");
6703
7733
  return lines.join(`
@@ -6715,32 +7745,33 @@ function getScenarioRunStats(scenarioId) {
6715
7745
  function formatScenarioList(scenarios) {
6716
7746
  const lines = [];
6717
7747
  lines.push("");
6718
- lines.push(chalk.bold(" Scenarios"));
7748
+ lines.push(chalk2.bold(" Scenarios"));
6719
7749
  lines.push("");
6720
7750
  if (scenarios.length === 0) {
6721
- lines.push(chalk.dim(" No scenarios found. Use 'testers add' to create one."));
7751
+ lines.push(chalk2.dim(" No scenarios found. Use 'testers add' to create one."));
6722
7752
  lines.push("");
6723
7753
  return lines.join(`
6724
7754
  `);
6725
7755
  }
6726
7756
  for (const s of scenarios) {
6727
- const priorityColor = s.priority === "critical" ? chalk.red : s.priority === "high" ? chalk.yellow : s.priority === "medium" ? chalk.blue : chalk.dim;
6728
- const tags = s.tags.length > 0 ? chalk.dim(` [${s.tags.join(", ")}]`) : "";
6729
- let lastStatusIcon = chalk.dim("\u2014");
6730
- let passRateStr = chalk.dim("\u2014");
7757
+ const priorityColor = s.priority === "critical" ? chalk2.red : s.priority === "high" ? chalk2.yellow : s.priority === "medium" ? chalk2.blue : chalk2.dim;
7758
+ const tags = s.tags.length > 0 ? chalk2.dim(` [${s.tags.join(", ")}]`) : "";
7759
+ let lastStatusIcon = chalk2.dim("\u2014");
7760
+ let passRateStr = chalk2.dim("\u2014");
6731
7761
  if (s.id) {
6732
7762
  const stats = getScenarioRunStats(s.id);
6733
7763
  if (stats.lastStatus === "passed")
6734
- lastStatusIcon = chalk.green("\u2713");
7764
+ lastStatusIcon = chalk2.green("\u2713");
6735
7765
  else if (stats.lastStatus === "failed")
6736
- lastStatusIcon = chalk.red("\u2717");
7766
+ lastStatusIcon = chalk2.red("\u2717");
6737
7767
  else if (stats.lastStatus === "error")
6738
- lastStatusIcon = chalk.yellow("!");
7768
+ lastStatusIcon = chalk2.yellow("!");
6739
7769
  else if (stats.lastStatus === "skipped")
6740
- lastStatusIcon = chalk.dim("~");
6741
- passRateStr = stats.passRate === "\u2014" ? chalk.dim("\u2014") : chalk.dim(stats.passRate);
7770
+ lastStatusIcon = chalk2.dim("~");
7771
+ passRateStr = stats.passRate === "\u2014" ? chalk2.dim("\u2014") : chalk2.dim(stats.passRate);
6742
7772
  }
6743
- lines.push(` ${chalk.cyan(s.shortId)} ${s.name} ${priorityColor(s.priority)}${tags} ${lastStatusIcon} ${passRateStr}`);
7773
+ const flakinessStr = s.flakinessScore !== null && s.flakinessScore !== undefined && s.flakinessScore < 0.8 ? chalk2.yellow(` \u26A1 flaky (${Math.round(s.flakinessScore * 100)}%)`) : "";
7774
+ lines.push(` ${chalk2.cyan(s.shortId)} ${s.name} ${priorityColor(s.priority)}${tags}${flakinessStr} ${lastStatusIcon} ${passRateStr}`);
6744
7775
  }
6745
7776
  lines.push("");
6746
7777
  return lines.join(`
@@ -6750,30 +7781,30 @@ function formatResultDetail(result, screenshots) {
6750
7781
  const lines = [];
6751
7782
  const scenario = getScenario(result.scenarioId);
6752
7783
  lines.push("");
6753
- lines.push(chalk.bold(` Result ${result.id.slice(0, 8)}`));
7784
+ lines.push(chalk2.bold(` Result ${result.id.slice(0, 8)}`));
6754
7785
  if (scenario) {
6755
7786
  lines.push(` Scenario: ${scenario.shortId} \u2014 ${scenario.name}`);
6756
7787
  }
6757
- lines.push(` Status: ${result.status === "passed" ? chalk.green("PASSED") : chalk.red(result.status.toUpperCase())}`);
7788
+ lines.push(` Status: ${result.status === "passed" ? chalk2.green("PASSED") : chalk2.red(result.status.toUpperCase())}`);
6758
7789
  lines.push(` Model: ${result.model}`);
6759
7790
  lines.push(` Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
6760
7791
  lines.push(` Steps: ${result.stepsCompleted}/${result.stepsTotal}`);
6761
7792
  lines.push(` Tokens: ${result.tokensUsed} (~$${(result.costCents / 100).toFixed(4)})`);
6762
7793
  if (result.reasoning) {
6763
7794
  lines.push("");
6764
- lines.push(chalk.bold(" Reasoning:"));
7795
+ lines.push(chalk2.bold(" Reasoning:"));
6765
7796
  lines.push(` ${result.reasoning}`);
6766
7797
  }
6767
7798
  if (result.error) {
6768
7799
  lines.push("");
6769
- lines.push(chalk.red.bold(" Error:"));
6770
- lines.push(chalk.red(` ${result.error}`));
7800
+ lines.push(chalk2.red.bold(" Error:"));
7801
+ lines.push(chalk2.red(` ${result.error}`));
6771
7802
  }
6772
7803
  if (screenshots.length > 0) {
6773
7804
  lines.push("");
6774
- lines.push(chalk.bold(` Screenshots (${screenshots.length}):`));
7805
+ lines.push(chalk2.bold(` Screenshots (${screenshots.length}):`));
6775
7806
  for (const ss of screenshots) {
6776
- lines.push(` ${chalk.dim(`${String(ss.stepNumber).padStart(3, "0")}`)} ${ss.action} \u2014 ${chalk.dim(ss.filePath)}`);
7807
+ lines.push(` ${chalk2.dim(`${String(ss.stepNumber).padStart(3, "0")}`)} ${ss.action} \u2014 ${chalk2.dim(ss.filePath)}`);
6777
7808
  }
6778
7809
  }
6779
7810
  lines.push("");
@@ -7298,6 +8329,60 @@ async function postGitHubComment(run, results, options) {
7298
8329
  }
7299
8330
  }
7300
8331
 
8332
+ // src/lib/affected.ts
8333
+ var exports_affected = {};
8334
+ __export(exports_affected, {
8335
+ matchFilesToScenarios: () => matchFilesToScenarios
8336
+ });
8337
+ function globToRegex(glob) {
8338
+ const escaped = glob.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*\*/g, "\x00DS\x00").replace(/\*/g, "[^/]*").replace(/\x00DS\x00/g, ".*");
8339
+ return new RegExp(`^${escaped}$`, "i");
8340
+ }
8341
+ function matchFilesToScenarios(filePaths, scenarios, mappings = []) {
8342
+ if (filePaths.length === 0)
8343
+ return scenarios;
8344
+ const compiledMappings = mappings.map((m) => ({
8345
+ regex: globToRegex(m.glob),
8346
+ tags: m.tags
8347
+ }));
8348
+ const normPaths = filePaths.map((p) => p.replace(/\\/g, "/").toLowerCase());
8349
+ const matchedIds = new Set;
8350
+ for (const scenario of scenarios) {
8351
+ let matched = false;
8352
+ if (!matched) {
8353
+ for (const { regex, tags } of compiledMappings) {
8354
+ if (normPaths.some((fp) => regex.test(fp)) && tags.some((tag) => scenario.tags.includes(tag))) {
8355
+ matched = true;
8356
+ break;
8357
+ }
8358
+ }
8359
+ }
8360
+ if (!matched && scenario.targetPath) {
8361
+ const segments = scenario.targetPath.replace(/^\//, "").split("/").filter((s) => s.length > 2);
8362
+ if (segments.some((seg) => normPaths.some((fp) => fp.includes(seg.toLowerCase())))) {
8363
+ matched = true;
8364
+ }
8365
+ }
8366
+ if (!matched) {
8367
+ for (const tag of scenario.tags) {
8368
+ if (tag.length > 2 && normPaths.some((fp) => fp.includes(tag.toLowerCase()))) {
8369
+ matched = true;
8370
+ break;
8371
+ }
8372
+ }
8373
+ }
8374
+ if (!matched) {
8375
+ const nameWords = scenario.name.toLowerCase().split(/[\s\-_/]+/).filter((w) => w.length > 3);
8376
+ if (nameWords.some((word) => normPaths.some((fp) => fp.includes(word)))) {
8377
+ matched = true;
8378
+ }
8379
+ }
8380
+ if (matched)
8381
+ matchedIds.add(scenario.id);
8382
+ }
8383
+ return scenarios.filter((s) => matchedIds.has(s.id));
8384
+ }
8385
+
7301
8386
  // src/lib/compliance-report.ts
7302
8387
  var exports_compliance_report = {};
7303
8388
  __export(exports_compliance_report, {
@@ -7963,60 +9048,6 @@ var init_recorder = __esm(() => {
7963
9048
  init_scenarios();
7964
9049
  });
7965
9050
 
7966
- // src/lib/affected.ts
7967
- var exports_affected = {};
7968
- __export(exports_affected, {
7969
- matchFilesToScenarios: () => matchFilesToScenarios
7970
- });
7971
- function globToRegex(glob) {
7972
- const escaped = glob.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*\*/g, "\x00DS\x00").replace(/\*/g, "[^/]*").replace(/\x00DS\x00/g, ".*");
7973
- return new RegExp(`^${escaped}$`, "i");
7974
- }
7975
- function matchFilesToScenarios(filePaths, scenarios, mappings = []) {
7976
- if (filePaths.length === 0)
7977
- return scenarios;
7978
- const compiledMappings = mappings.map((m) => ({
7979
- regex: globToRegex(m.glob),
7980
- tags: m.tags
7981
- }));
7982
- const normPaths = filePaths.map((p) => p.replace(/\\/g, "/").toLowerCase());
7983
- const matchedIds = new Set;
7984
- for (const scenario of scenarios) {
7985
- let matched = false;
7986
- if (!matched) {
7987
- for (const { regex, tags } of compiledMappings) {
7988
- if (normPaths.some((fp) => regex.test(fp)) && tags.some((tag) => scenario.tags.includes(tag))) {
7989
- matched = true;
7990
- break;
7991
- }
7992
- }
7993
- }
7994
- if (!matched && scenario.targetPath) {
7995
- const segments = scenario.targetPath.replace(/^\//, "").split("/").filter((s) => s.length > 2);
7996
- if (segments.some((seg) => normPaths.some((fp) => fp.includes(seg.toLowerCase())))) {
7997
- matched = true;
7998
- }
7999
- }
8000
- if (!matched) {
8001
- for (const tag of scenario.tags) {
8002
- if (tag.length > 2 && normPaths.some((fp) => fp.includes(tag.toLowerCase()))) {
8003
- matched = true;
8004
- break;
8005
- }
8006
- }
8007
- }
8008
- if (!matched) {
8009
- const nameWords = scenario.name.toLowerCase().split(/[\s\-_/]+/).filter((w) => w.length > 3);
8010
- if (nameWords.some((word) => normPaths.some((fp) => fp.includes(word)))) {
8011
- matched = true;
8012
- }
8013
- }
8014
- if (matched)
8015
- matchedIds.add(scenario.id);
8016
- }
8017
- return scenarios.filter((s) => matchedIds.has(s.id));
8018
- }
8019
-
8020
9051
  // src/lib/git-watch.ts
8021
9052
  var exports_git_watch = {};
8022
9053
  __export(exports_git_watch, {
@@ -9081,6 +10112,102 @@ var init_health_scan = __esm(() => {
9081
10112
  init_todos_connector();
9082
10113
  });
9083
10114
 
10115
+ // src/db/seed-personas.ts
10116
+ var exports_seed_personas = {};
10117
+ __export(exports_seed_personas, {
10118
+ seedDefaultPersonas: () => seedDefaultPersonas,
10119
+ DEFAULT_PERSONAS: () => DEFAULT_PERSONAS
10120
+ });
10121
+ function seedDefaultPersonas() {
10122
+ const existing = listPersonas({ globalOnly: true });
10123
+ if (existing.length > 0)
10124
+ return { seeded: 0, skipped: DEFAULT_PERSONAS.length };
10125
+ let seeded = 0;
10126
+ for (const p of DEFAULT_PERSONAS) {
10127
+ try {
10128
+ createPersona(p);
10129
+ seeded++;
10130
+ } catch {}
10131
+ }
10132
+ return { seeded, skipped: DEFAULT_PERSONAS.length - seeded };
10133
+ }
10134
+ var DEFAULT_PERSONAS;
10135
+ var init_seed_personas = __esm(() => {
10136
+ init_personas();
10137
+ DEFAULT_PERSONAS = [
10138
+ {
10139
+ name: "First-Time User",
10140
+ role: "first-time user who has never used this app",
10141
+ description: "A new user encountering the product for the first time. No prior knowledge of the interface or flows.",
10142
+ instructions: "Explore cautiously. Read labels carefully before clicking. If something is unclear, hesitate and try the most obvious option. Notice and comment on anything confusing or unexpected.",
10143
+ traits: ["cautious", "reads-instructions", "easily-confused", "asks-why"],
10144
+ goals: ["complete the main task", "understand what the app does", "not make mistakes"],
10145
+ behaviors: ["reads every label before clicking", "hovers over elements before interacting", "notices missing help text"],
10146
+ painPoints: ["unclear error messages", "no onboarding guidance", "confusing navigation labels"]
10147
+ },
10148
+ {
10149
+ name: "Power User",
10150
+ role: "experienced power user who uses this app daily",
10151
+ description: "A seasoned user who knows all shortcuts and wants to accomplish tasks as fast as possible.",
10152
+ instructions: "Move fast. Use keyboard shortcuts where possible. Skip tutorials. Go directly to the feature. Get frustrated by extra clicks.",
10153
+ traits: ["fast", "impatient", "keyboard-first", "efficiency-focused"],
10154
+ goals: ["accomplish tasks as fast as possible", "avoid unnecessary steps", "use advanced features"],
10155
+ behaviors: ["skips onboarding modals immediately", "uses keyboard shortcuts", "ignores decorative UI elements"],
10156
+ painPoints: ["forced multi-step wizards", "no keyboard shortcuts", "slow page loads"]
10157
+ },
10158
+ {
10159
+ name: "Mobile User",
10160
+ role: "user on a mobile device with a small screen",
10161
+ description: "A user on a phone with limited screen space and touch-based interaction.",
10162
+ instructions: "Simulate touch interactions (tap = click). Notice if buttons are too small to tap. Check if text is readable. Look for horizontal scroll issues.",
10163
+ traits: ["touch-based", "limited-screen", "on-the-go", "interrupted"],
10164
+ goals: ["complete tasks on a small screen", "find mobile-optimized flows", "identify layout issues"],
10165
+ behaviors: ["taps instead of clicking", "scrolls vertically to find content", "notices overflow and truncated text"],
10166
+ painPoints: ["tiny tap targets", "horizontal scrolling", "desktop-only modals"]
10167
+ },
10168
+ {
10169
+ name: "Accessibility User",
10170
+ role: "user with accessibility needs relying on keyboard navigation",
10171
+ description: "A user who navigates primarily via keyboard and relies on semantic HTML and ARIA labels.",
10172
+ instructions: "Navigate using Tab, Enter, Escape, and arrow keys only. Note if focus indicators are visible. Check if interactive elements have accessible labels.",
10173
+ traits: ["keyboard-only", "screen-reader-compatible", "focus-dependent"],
10174
+ goals: ["complete all tasks via keyboard", "verify accessibility compliance", "identify ARIA issues"],
10175
+ behaviors: ["tabs through all interactive elements", "checks for visible focus rings", "reads aria-labels aloud"],
10176
+ painPoints: ["missing focus indicators", "unlabeled icon buttons", "focus traps in modals"]
10177
+ },
10178
+ {
10179
+ name: "Security Auditor",
10180
+ role: "security-focused tester looking for vulnerabilities",
10181
+ description: "A security professional trying to find injection vulnerabilities, unauthorized access, and data leaks.",
10182
+ instructions: "Try edge cases in every input field. Attempt to access other users' data. Test form validation boundaries. Check for sensitive data exposure.",
10183
+ traits: ["suspicious", "boundary-testing", "adversarial", "detail-oriented"],
10184
+ goals: ["find security vulnerabilities", "test input validation", "verify authorization controls"],
10185
+ behaviors: ["enters SQL/XSS payloads in input fields", "manipulates URL parameters", "checks network responses for sensitive data"],
10186
+ painPoints: ["no input sanitization", "verbose error messages exposing internals", "missing CSRF protection"]
10187
+ },
10188
+ {
10189
+ name: "Non-Technical User",
10190
+ role: "non-technical user unfamiliar with software conventions",
10191
+ description: "A user without technical background who is confused by jargon and relies on visual cues.",
10192
+ instructions: "Avoid technical terminology. Be confused by 'API key', 'JSON', 'endpoint'. Look for visual cues and icons. Read all text literally.",
10193
+ traits: ["non-technical", "jargon-confused", "visual-learner", "literal-reader"],
10194
+ goals: ["understand what to do from visual cues only", "complete basic tasks", "identify confusing terminology"],
10195
+ behaviors: ["reads every word on the screen", "looks for visual icons to understand actions", "asks 'what does this mean?'"],
10196
+ painPoints: ["technical jargon without explanation", "developer-facing error codes", "settings with no plain-language descriptions"]
10197
+ },
10198
+ {
10199
+ name: "Skeptical Buyer",
10200
+ role: "potential customer evaluating the product before purchasing",
10201
+ description: "A prospect who is not yet committed, looking for value and trust signals before converting.",
10202
+ instructions: "Look for pricing, terms, and trust signals. Check for social proof. Try to find limitations. Look for hidden costs. Notice anything that creates doubt.",
10203
+ traits: ["evaluating", "price-conscious", "trust-seeking", "skeptical"],
10204
+ goals: ["evaluate whether the product is worth it", "find the pricing", "understand limitations"],
10205
+ behaviors: ["looks for pricing before signing up", "reads reviews and testimonials", "searches for 'free trial' or 'no credit card'"],
10206
+ painPoints: ["hidden pricing", "no free tier or trial", "unclear cancellation policy"]
10207
+ }
10208
+ ];
10209
+ });
10210
+
9084
10211
  // src/lib/persona-diff.ts
9085
10212
  var exports_persona_diff = {};
9086
10213
  __export(exports_persona_diff, {
@@ -9639,6 +10766,170 @@ var init_golden_monitor = __esm(() => {
9639
10766
  init_golden_answers();
9640
10767
  });
9641
10768
 
10769
+ // src/lib/hybrid-runner.ts
10770
+ var exports_hybrid_runner = {};
10771
+ __export(exports_hybrid_runner, {
10772
+ runHybridScenario: () => runHybridScenario
10773
+ });
10774
+ async function runDeterministicStep(page, step, baseUrl) {
10775
+ try {
10776
+ switch (step.type) {
10777
+ case "navigate": {
10778
+ const url = step.url.startsWith("http") ? step.url : `${baseUrl.replace(/\/$/, "")}${step.url}`;
10779
+ await page.goto(url, { timeout: 30000 });
10780
+ return { ok: true };
10781
+ }
10782
+ case "click":
10783
+ await page.click(step.selector, { timeout: 1e4 });
10784
+ return { ok: true };
10785
+ case "fill":
10786
+ await page.fill(step.selector, step.value, { timeout: 1e4 });
10787
+ return { ok: true };
10788
+ case "wait":
10789
+ await new Promise((r) => setTimeout(r, step.ms));
10790
+ return { ok: true };
10791
+ case "wait_for":
10792
+ await page.waitForSelector(step.selector, { timeout: step.timeoutMs ?? 1e4 });
10793
+ return { ok: true };
10794
+ case "screenshot":
10795
+ await page.screenshot({ fullPage: false });
10796
+ return { ok: true };
10797
+ case "assert_text": {
10798
+ const text = await page.locator(step.selector).textContent({ timeout: 5000 });
10799
+ const actual = text ?? "";
10800
+ const ok = step.contains ? actual.includes(step.expected) : actual.trim() === step.expected.trim();
10801
+ if (!ok)
10802
+ return { ok: false, error: `Expected "${step.expected}", got "${actual}"` };
10803
+ return { ok: true };
10804
+ }
10805
+ case "assert_visible": {
10806
+ const shouldBeVisible = step.visible !== false;
10807
+ const count = await page.locator(step.selector).count();
10808
+ const isVisible = count > 0;
10809
+ if (shouldBeVisible && !isVisible)
10810
+ return { ok: false, error: `Expected ${step.selector} to be visible` };
10811
+ if (!shouldBeVisible && isVisible)
10812
+ return { ok: false, error: `Expected ${step.selector} to be hidden` };
10813
+ return { ok: true };
10814
+ }
10815
+ default:
10816
+ return { ok: true };
10817
+ }
10818
+ } catch (err) {
10819
+ return { ok: false, error: err instanceof Error ? err.message : String(err) };
10820
+ }
10821
+ }
10822
+ async function runHybridScenario(scenario, options) {
10823
+ const config = loadConfig();
10824
+ const baseUrl = scenario.baseUrl ?? options?.baseUrl ?? "http://localhost:3000";
10825
+ const startTime = Date.now();
10826
+ const stepResults = [];
10827
+ let browser = null;
10828
+ let page = null;
10829
+ try {
10830
+ browser = await launchBrowser({ headless: true });
10831
+ page = await getPage(browser, { viewport: config.browser.viewport });
10832
+ const screenshotter = new Screenshotter({ baseDir: options?.screenshotDir ?? config.screenshots.dir });
10833
+ for (let i = 0;i < scenario.steps.length; i++) {
10834
+ const step = scenario.steps[i];
10835
+ const stepStart = Date.now();
10836
+ if (step.type === "ai" || step.type === "ai_verify") {
10837
+ const model = resolveModel(step.model ?? scenario.model ?? config.defaultModel);
10838
+ const client = createClientForModel(model, options?.apiKey ?? config.anthropicApiKey);
10839
+ const instruction = step.type === "ai_verify" ? `Verify the following assertion about the current page state: "${step.assertion}". Do NOT navigate. Just inspect the page and call report_result with pass or fail.` : step.instruction;
10840
+ const syntheticScenario = {
10841
+ id: `hybrid-step-${i}`,
10842
+ shortId: `hs-${i}`,
10843
+ projectId: null,
10844
+ name: `${scenario.name} \u2014 step ${i + 1}`,
10845
+ description: instruction,
10846
+ steps: [instruction],
10847
+ tags: [],
10848
+ priority: "medium",
10849
+ model,
10850
+ timeoutMs: scenario.timeoutMs ?? 60000,
10851
+ targetPath: null,
10852
+ requiresAuth: false,
10853
+ authConfig: null,
10854
+ metadata: null,
10855
+ assertions: [],
10856
+ personaId: null,
10857
+ scenarioType: "browser",
10858
+ requiredRole: null,
10859
+ version: 1,
10860
+ createdAt: new Date().toISOString(),
10861
+ updatedAt: new Date().toISOString(),
10862
+ lastPassedAt: null,
10863
+ lastPassedUrl: null
10864
+ };
10865
+ try {
10866
+ const agentResult = await runAgentLoop({
10867
+ client,
10868
+ page,
10869
+ scenario: syntheticScenario,
10870
+ screenshotter,
10871
+ model,
10872
+ runId: `hybrid-${Date.now()}`,
10873
+ maxTurns: step.maxTurns ?? 15
10874
+ });
10875
+ stepResults.push({
10876
+ stepIndex: i,
10877
+ type: step.type,
10878
+ status: agentResult.status === "passed" ? "passed" : "failed",
10879
+ durationMs: Date.now() - stepStart,
10880
+ reasoning: agentResult.reasoning,
10881
+ error: agentResult.status !== "passed" ? agentResult.reasoning : undefined
10882
+ });
10883
+ if (agentResult.status !== "passed") {
10884
+ return {
10885
+ scenarioName: scenario.name,
10886
+ status: "failed",
10887
+ stepResults,
10888
+ durationMs: Date.now() - startTime,
10889
+ error: `Step ${i + 1} (ai): ${agentResult.reasoning}`
10890
+ };
10891
+ }
10892
+ } catch (err) {
10893
+ const msg = err instanceof Error ? err.message : String(err);
10894
+ stepResults.push({ stepIndex: i, type: step.type, status: "failed", durationMs: Date.now() - stepStart, error: msg });
10895
+ return { scenarioName: scenario.name, status: "failed", stepResults, durationMs: Date.now() - startTime, error: msg };
10896
+ }
10897
+ } else {
10898
+ const result = await runDeterministicStep(page, step, baseUrl);
10899
+ stepResults.push({
10900
+ stepIndex: i,
10901
+ type: step.type,
10902
+ status: result.ok ? "passed" : "failed",
10903
+ durationMs: Date.now() - stepStart,
10904
+ error: result.error
10905
+ });
10906
+ if (!result.ok) {
10907
+ return {
10908
+ scenarioName: scenario.name,
10909
+ status: "failed",
10910
+ stepResults,
10911
+ durationMs: Date.now() - startTime,
10912
+ error: `Step ${i + 1} (${step.type}): ${result.error}`
10913
+ };
10914
+ }
10915
+ }
10916
+ }
10917
+ return { scenarioName: scenario.name, status: "passed", stepResults, durationMs: Date.now() - startTime };
10918
+ } catch (err) {
10919
+ const msg = err instanceof Error ? err.message : String(err);
10920
+ return { scenarioName: scenario.name, status: "error", stepResults, durationMs: Date.now() - startTime, error: msg };
10921
+ } finally {
10922
+ if (browser)
10923
+ await closeBrowser(browser);
10924
+ }
10925
+ }
10926
+ var init_hybrid_runner = __esm(() => {
10927
+ init_browser();
10928
+ init_screenshotter();
10929
+ init_ai_client();
10930
+ init_config();
10931
+ });
10932
+
9642
10933
  // node_modules/commander/esm.mjs
9643
10934
  var import__ = __toESM(require_commander(), 1);
9644
10935
  var {
@@ -9660,7 +10951,7 @@ import chalk6 from "chalk";
9660
10951
  // package.json
9661
10952
  var package_default = {
9662
10953
  name: "@hasna/testers",
9663
- version: "0.0.15",
10954
+ version: "0.0.16",
9664
10955
  description: "AI-powered QA testing CLI \u2014 spawns cheap AI agents to test web apps with headless browsers",
9665
10956
  type: "module",
9666
10957
  main: "dist/index.js",
@@ -9716,14 +11007,14 @@ var package_default = {
9716
11007
  bun: ">=1.0.0"
9717
11008
  },
9718
11009
  publishConfig: {
9719
- access: "public",
9720
- registry: "https://registry.npmjs.org/"
11010
+ registry: "https://registry.npmjs.org",
11011
+ access: "public"
9721
11012
  },
9722
11013
  repository: {
9723
11014
  type: "git",
9724
11015
  url: "https://github.com/hasna/open-testers.git"
9725
11016
  },
9726
- license: "MIT",
11017
+ license: "Apache-2.0",
9727
11018
  keywords: [
9728
11019
  "testing",
9729
11020
  "qa",
@@ -9751,13 +11042,13 @@ import { render, Box, Text, useInput, useApp } from "ink";
9751
11042
  import React, { useState } from "react";
9752
11043
  import { readFileSync as readFileSync7, readdirSync, writeFileSync as writeFileSync3 } from "fs";
9753
11044
  import { createInterface } from "readline";
9754
- import { join as join6, resolve } from "path";
11045
+ import { join as join7, resolve } from "path";
9755
11046
 
9756
11047
  // src/lib/init.ts
9757
11048
  init_scenarios();
9758
- import { existsSync as existsSync5, readFileSync as readFileSync2, writeFileSync as writeFileSync2, mkdirSync as mkdirSync3 } from "fs";
9759
- import { join as join5, basename } from "path";
9760
- import { homedir as homedir5 } from "os";
11049
+ import { existsSync as existsSync5, readFileSync as readFileSync2, writeFileSync as writeFileSync2, mkdirSync as mkdirSync4 } from "fs";
11050
+ import { join as join6, basename } from "path";
11051
+ import { homedir as homedir6 } from "os";
9761
11052
 
9762
11053
  // src/db/projects.ts
9763
11054
  init_types();
@@ -9795,7 +11086,7 @@ function ensureProject(name, path) {
9795
11086
 
9796
11087
  // src/lib/init.ts
9797
11088
  function detectFramework(dir) {
9798
- const pkgPath = join5(dir, "package.json");
11089
+ const pkgPath = join6(dir, "package.json");
9799
11090
  if (!existsSync5(pkgPath))
9800
11091
  return null;
9801
11092
  let pkg;
@@ -10015,10 +11306,10 @@ function initProject(options) {
10015
11306
  const project = ensureProject(name, projectPath);
10016
11307
  const starterInputs = getStarterScenarios(framework ?? { name: "Unknown", features: [] }, project.id);
10017
11308
  const scenarios = starterInputs.map((input) => createScenario(input));
10018
- const configDir = join5(homedir5(), ".testers");
10019
- const configPath = join5(configDir, "config.json");
11309
+ const configDir = join6(homedir6(), ".testers");
11310
+ const configPath = join6(configDir, "config.json");
10020
11311
  if (!existsSync5(configDir)) {
10021
- mkdirSync3(configDir, { recursive: true });
11312
+ mkdirSync4(configDir, { recursive: true });
10022
11313
  }
10023
11314
  let config = {};
10024
11315
  if (existsSync5(configPath)) {
@@ -10258,7 +11549,7 @@ function formatSmokeReport(result) {
10258
11549
  init_runs();
10259
11550
  init_results();
10260
11551
  init_scenarios();
10261
- import chalk2 from "chalk";
11552
+ import chalk3 from "chalk";
10262
11553
  function diffRuns(runId1, runId2) {
10263
11554
  const run1 = getRun(runId1);
10264
11555
  if (!run1) {
@@ -10348,630 +11639,398 @@ function formatDurationComparison(d1, d2) {
10348
11639
  function formatDiffTerminal(diff) {
10349
11640
  const lines = [];
10350
11641
  lines.push("");
10351
- lines.push(chalk2.bold(" Run Comparison"));
10352
- lines.push(` Run 1: ${chalk2.dim(diff.run1.id.slice(0, 8))} (${diff.run1.status}) \u2014 ${diff.run1.startedAt}`);
10353
- lines.push(` Run 2: ${chalk2.dim(diff.run2.id.slice(0, 8))} (${diff.run2.status}) \u2014 ${diff.run2.startedAt}`);
11642
+ lines.push(chalk3.bold(" Run Comparison"));
11643
+ lines.push(` Run 1: ${chalk3.dim(diff.run1.id.slice(0, 8))} (${diff.run1.status}) \u2014 ${diff.run1.startedAt}`);
11644
+ lines.push(` Run 2: ${chalk3.dim(diff.run2.id.slice(0, 8))} (${diff.run2.status}) \u2014 ${diff.run2.startedAt}`);
10354
11645
  lines.push("");
10355
11646
  if (diff.regressions.length > 0) {
10356
- lines.push(chalk2.red.bold(` Regressions (${diff.regressions.length}):`));
11647
+ lines.push(chalk3.red.bold(` Regressions (${diff.regressions.length}):`));
10357
11648
  for (const d of diff.regressions) {
10358
11649
  const label = formatScenarioLabel(d);
10359
11650
  const dur = formatDurationComparison(d.duration1, d.duration2);
10360
- lines.push(chalk2.red(` \u2B07 ${label} ${d.status1} -> ${d.status2} ${chalk2.dim(dur)}`));
11651
+ lines.push(chalk3.red(` \u2B07 ${label} ${d.status1} -> ${d.status2} ${chalk3.dim(dur)}`));
10361
11652
  }
10362
11653
  lines.push("");
10363
11654
  }
10364
11655
  if (diff.fixes.length > 0) {
10365
- lines.push(chalk2.green.bold(` Fixes (${diff.fixes.length}):`));
11656
+ lines.push(chalk3.green.bold(` Fixes (${diff.fixes.length}):`));
10366
11657
  for (const d of diff.fixes) {
10367
11658
  const label = formatScenarioLabel(d);
10368
11659
  const dur = formatDurationComparison(d.duration1, d.duration2);
10369
- lines.push(chalk2.green(` \u2B06 ${label} ${d.status1} -> ${d.status2} ${chalk2.dim(dur)}`));
11660
+ lines.push(chalk3.green(` \u2B06 ${label} ${d.status1} -> ${d.status2} ${chalk3.dim(dur)}`));
10370
11661
  }
10371
11662
  lines.push("");
10372
11663
  }
10373
11664
  if (diff.unchanged.length > 0) {
10374
- lines.push(chalk2.dim(` Unchanged (${diff.unchanged.length}):`));
11665
+ lines.push(chalk3.dim(` Unchanged (${diff.unchanged.length}):`));
10375
11666
  for (const d of diff.unchanged) {
10376
11667
  const label = formatScenarioLabel(d);
10377
11668
  const dur = formatDurationComparison(d.duration1, d.duration2);
10378
- lines.push(chalk2.dim(` = ${label} ${d.status2} ${dur}`));
11669
+ lines.push(chalk3.dim(` = ${label} ${d.status2} ${dur}`));
10379
11670
  }
10380
11671
  lines.push("");
10381
11672
  }
10382
11673
  if (diff.newScenarios.length > 0) {
10383
- lines.push(chalk2.cyan(` New in run 2 (${diff.newScenarios.length}):`));
11674
+ lines.push(chalk3.cyan(` New in run 2 (${diff.newScenarios.length}):`));
10384
11675
  for (const d of diff.newScenarios) {
10385
11676
  const label = formatScenarioLabel(d);
10386
- lines.push(chalk2.cyan(` + ${label} ${d.status2}`));
10387
- }
10388
- lines.push("");
10389
- }
10390
- if (diff.removedScenarios.length > 0) {
10391
- lines.push(chalk2.yellow(` Removed from run 2 (${diff.removedScenarios.length}):`));
10392
- for (const d of diff.removedScenarios) {
10393
- const label = formatScenarioLabel(d);
10394
- lines.push(chalk2.yellow(` - ${label} was ${d.status1}`));
10395
- }
10396
- lines.push("");
10397
- }
10398
- lines.push(chalk2.bold(` Summary: ${diff.regressions.length} regressions, ${diff.fixes.length} fixes, ${diff.unchanged.length} unchanged`));
10399
- lines.push("");
10400
- return lines.join(`
10401
- `);
10402
- }
10403
- function formatDiffJSON(diff) {
10404
- return JSON.stringify(diff, null, 2);
10405
- }
10406
-
10407
- // src/lib/visual-diff.ts
10408
- init_screenshots();
10409
- init_results();
10410
- init_runs();
10411
- init_scenarios();
10412
- init_database();
10413
- import { readFileSync as readFileSync3, existsSync as existsSync6 } from "fs";
10414
- import chalk3 from "chalk";
10415
- var DEFAULT_THRESHOLD = 0.1;
10416
- function setBaseline(runId) {
10417
- const run = getRun(runId);
10418
- if (!run) {
10419
- throw new Error(`Run not found: ${runId}`);
10420
- }
10421
- const db2 = getDatabase();
10422
- if (run.projectId) {
10423
- db2.query("UPDATE runs SET is_baseline = 0 WHERE project_id = ? AND is_baseline = 1").run(run.projectId);
10424
- } else {
10425
- db2.query("UPDATE runs SET is_baseline = 0 WHERE project_id IS NULL AND is_baseline = 1").run();
10426
- }
10427
- updateRun(run.id, { is_baseline: 1 });
10428
- }
10429
- function compareImages(image1Path, image2Path) {
10430
- if (!existsSync6(image1Path)) {
10431
- throw new Error(`Baseline image not found: ${image1Path}`);
10432
- }
10433
- if (!existsSync6(image2Path)) {
10434
- throw new Error(`Current image not found: ${image2Path}`);
10435
- }
10436
- const buf1 = readFileSync3(image1Path);
10437
- const buf2 = readFileSync3(image2Path);
10438
- if (buf1.equals(buf2)) {
10439
- const estimatedPixels = Math.max(1, Math.floor(buf1.length / 4));
10440
- return { diffPercent: 0, diffPixels: 0, totalPixels: estimatedPixels };
10441
- }
10442
- if (buf1.length !== buf2.length) {
10443
- const maxLen = Math.max(buf1.length, buf2.length);
10444
- const estimatedPixels = Math.max(1, Math.floor(maxLen / 4));
10445
- return { diffPercent: 100, diffPixels: estimatedPixels, totalPixels: estimatedPixels };
10446
- }
10447
- let diffBytes = 0;
10448
- for (let i = 0;i < buf1.length; i++) {
10449
- if (buf1[i] !== buf2[i]) {
10450
- diffBytes++;
10451
- }
10452
- }
10453
- const totalPixels = Math.max(1, Math.floor(buf1.length / 4));
10454
- const diffPixels = Math.max(1, Math.floor(diffBytes / 4));
10455
- const diffPercent = parseFloat((diffBytes / buf1.length * 100).toFixed(4));
10456
- return { diffPercent, diffPixels, totalPixels };
10457
- }
10458
- function compareRunScreenshots(runId, baselineRunId, threshold = DEFAULT_THRESHOLD) {
10459
- const run = getRun(runId);
10460
- if (!run)
10461
- throw new Error(`Run not found: ${runId}`);
10462
- const baselineRun = getRun(baselineRunId);
10463
- if (!baselineRun)
10464
- throw new Error(`Baseline run not found: ${baselineRunId}`);
10465
- const currentResults = getResultsByRun(run.id);
10466
- const baselineResults = getResultsByRun(baselineRun.id);
10467
- const baselineMap = new Map;
10468
- for (const result of baselineResults) {
10469
- const screenshots = listScreenshots(result.id);
10470
- for (const ss of screenshots) {
10471
- const key = `${result.scenarioId}:${ss.stepNumber}`;
10472
- baselineMap.set(key, { path: ss.filePath, action: ss.action });
10473
- }
10474
- }
10475
- const results = [];
10476
- for (const result of currentResults) {
10477
- const screenshots = listScreenshots(result.id);
10478
- for (const ss of screenshots) {
10479
- const key = `${result.scenarioId}:${ss.stepNumber}`;
10480
- const baseline = baselineMap.get(key);
10481
- if (!baseline)
10482
- continue;
10483
- if (!existsSync6(baseline.path) || !existsSync6(ss.filePath))
10484
- continue;
10485
- try {
10486
- const comparison = compareImages(baseline.path, ss.filePath);
10487
- results.push({
10488
- scenarioId: result.scenarioId,
10489
- stepNumber: ss.stepNumber,
10490
- action: ss.action,
10491
- baselinePath: baseline.path,
10492
- currentPath: ss.filePath,
10493
- diffPercent: comparison.diffPercent,
10494
- isRegression: comparison.diffPercent > threshold
10495
- });
10496
- } catch {}
10497
- }
10498
- }
10499
- return results;
10500
- }
10501
- function formatVisualDiffTerminal(results, threshold = DEFAULT_THRESHOLD) {
10502
- if (results.length === 0) {
10503
- return chalk3.dim(`
10504
- No screenshot comparisons found.
10505
- `);
10506
- }
10507
- const lines = [];
10508
- lines.push("");
10509
- lines.push(chalk3.bold(" Visual Regression Summary"));
10510
- lines.push("");
10511
- const regressions = results.filter((r) => r.diffPercent >= threshold);
10512
- const passed = results.filter((r) => r.diffPercent < threshold);
10513
- if (regressions.length > 0) {
10514
- lines.push(chalk3.red.bold(` Regressions (${regressions.length}):`));
10515
- for (const r of regressions) {
10516
- const scenario = getScenario(r.scenarioId);
10517
- const label = scenario ? `${scenario.shortId}: ${scenario.name}` : r.scenarioId.slice(0, 8);
10518
- const pct = chalk3.red(`${r.diffPercent.toFixed(2)}%`);
10519
- lines.push(` ${chalk3.red("!")} ${label} step ${r.stepNumber} (${r.action}) \u2014 ${pct} diff`);
11677
+ lines.push(chalk3.cyan(` + ${label} ${d.status2}`));
10520
11678
  }
10521
11679
  lines.push("");
10522
11680
  }
10523
- if (passed.length > 0) {
10524
- lines.push(chalk3.green.bold(` Passed (${passed.length}):`));
10525
- for (const r of passed) {
10526
- const scenario = getScenario(r.scenarioId);
10527
- const label = scenario ? `${scenario.shortId}: ${scenario.name}` : r.scenarioId.slice(0, 8);
10528
- const pct = chalk3.green(`${r.diffPercent.toFixed(2)}%`);
10529
- lines.push(` ${chalk3.green("\u2713")} ${label} step ${r.stepNumber} (${r.action}) \u2014 ${pct} diff`);
11681
+ if (diff.removedScenarios.length > 0) {
11682
+ lines.push(chalk3.yellow(` Removed from run 2 (${diff.removedScenarios.length}):`));
11683
+ for (const d of diff.removedScenarios) {
11684
+ const label = formatScenarioLabel(d);
11685
+ lines.push(chalk3.yellow(` - ${label} was ${d.status1}`));
10530
11686
  }
10531
11687
  lines.push("");
10532
11688
  }
10533
- lines.push(chalk3.bold(` Visual Summary: ${regressions.length} regressions, ${passed.length} passed (threshold: ${threshold}%)`));
11689
+ lines.push(chalk3.bold(` Summary: ${diff.regressions.length} regressions, ${diff.fixes.length} fixes, ${diff.unchanged.length} unchanged`));
10534
11690
  lines.push("");
10535
11691
  return lines.join(`
10536
11692
  `);
10537
11693
  }
11694
+ function formatDiffJSON(diff) {
11695
+ return JSON.stringify(diff, null, 2);
11696
+ }
10538
11697
 
10539
- // src/lib/report.ts
10540
- init_runs();
10541
- init_results();
11698
+ // src/lib/visual-diff.ts
10542
11699
  init_screenshots();
11700
+ init_results();
11701
+ init_runs();
10543
11702
  init_scenarios();
10544
- import { readFileSync as readFileSync4, existsSync as existsSync7 } from "fs";
10545
- function imageToBase64(filePath) {
10546
- if (!filePath || !existsSync7(filePath))
10547
- return "";
10548
- try {
10549
- const buffer = readFileSync4(filePath);
10550
- const base64 = buffer.toString("base64");
10551
- return `data:image/png;base64,${base64}`;
10552
- } catch {
10553
- return "";
10554
- }
10555
- }
10556
- function escapeHtml(text) {
10557
- return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#039;");
10558
- }
10559
- function formatDuration2(ms) {
10560
- if (ms < 1000)
10561
- return `${ms}ms`;
10562
- if (ms < 60000)
10563
- return `${(ms / 1000).toFixed(1)}s`;
10564
- const mins = Math.floor(ms / 60000);
10565
- const secs = (ms % 60000 / 1000).toFixed(0);
10566
- return `${mins}m ${secs}s`;
10567
- }
10568
- function formatCost(cents) {
10569
- if (cents < 1)
10570
- return `$${(cents / 100).toFixed(4)}`;
10571
- return `$${(cents / 100).toFixed(2)}`;
10572
- }
10573
- function statusBadge(status) {
10574
- const colors = {
10575
- passed: { bg: "#22c55e", text: "#000" },
10576
- failed: { bg: "#ef4444", text: "#fff" },
10577
- error: { bg: "#eab308", text: "#000" },
10578
- skipped: { bg: "#6b7280", text: "#fff" }
10579
- };
10580
- const c = colors[status] ?? { bg: "#6b7280", text: "#fff" };
10581
- const label = status.toUpperCase();
10582
- return `<span style="display:inline-block;padding:2px 10px;border-radius:4px;font-size:12px;font-weight:700;background:${c.bg};color:${c.text};letter-spacing:0.5px;">${label}</span>`;
10583
- }
10584
- function renderScreenshots(screenshots) {
10585
- if (screenshots.length === 0)
10586
- return "";
10587
- let html = `<div style="display:flex;flex-wrap:wrap;gap:12px;margin-top:12px;">`;
10588
- for (let i = 0;i < screenshots.length; i++) {
10589
- const ss = screenshots[i];
10590
- const dataUri = imageToBase64(ss.filePath);
10591
- const checkId = `ss-${ss.id}`;
10592
- if (dataUri) {
10593
- html += `
10594
- <div style="flex:0 0 auto;">
10595
- <input type="checkbox" id="${checkId}" style="display:none;" />
10596
- <label for="${checkId}" style="cursor:pointer;">
10597
- <img src="${dataUri}" alt="Step ${ss.stepNumber}: ${escapeHtml(ss.action)}"
10598
- style="max-width:200px;max-height:150px;border-radius:6px;border:1px solid #262626;display:block;" />
10599
- </label>
10600
- <div style="position:fixed;top:0;left:0;width:100%;height:100%;background:rgba(0,0,0,0.9);z-index:1000;display:none;align-items:center;justify-content:center;">
10601
- <label for="${checkId}" style="position:absolute;top:0;left:0;width:100%;height:100%;cursor:pointer;"></label>
10602
- <img src="${dataUri}" alt="Step ${ss.stepNumber}: ${escapeHtml(ss.action)}"
10603
- style="max-width:600px;max-height:90vh;border-radius:8px;position:relative;z-index:1001;" />
10604
- </div>
10605
- <div style="font-size:11px;color:#888;margin-top:4px;max-width:200px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;">
10606
- ${ss.stepNumber}. ${escapeHtml(ss.action)}
10607
- </div>
10608
- </div>`;
10609
- } else {
10610
- html += `
10611
- <div style="flex:0 0 auto;width:200px;height:150px;background:#1a1a1a;border:1px dashed #333;border-radius:6px;display:flex;align-items:center;justify-content:center;color:#555;font-size:12px;">
10612
- Screenshot not found
10613
- <div style="font-size:11px;color:#888;margin-top:4px;">${ss.stepNumber}. ${escapeHtml(ss.action)}</div>
10614
- </div>`;
10615
- }
10616
- }
10617
- html += `</div>`;
10618
- return html;
10619
- }
10620
- function generateHtmlReport(runId) {
11703
+ init_database();
11704
+ import { readFileSync as readFileSync3, existsSync as existsSync6 } from "fs";
11705
+ import chalk4 from "chalk";
11706
+ var DEFAULT_THRESHOLD = 0.1;
11707
+ function setBaseline(runId) {
10621
11708
  const run = getRun(runId);
10622
- if (!run)
11709
+ if (!run) {
10623
11710
  throw new Error(`Run not found: ${runId}`);
10624
- const results = getResultsByRun(run.id);
10625
- const resultData = [];
10626
- for (const result of results) {
10627
- const screenshots = listScreenshots(result.id);
10628
- const scenario = getScenario(result.scenarioId);
10629
- resultData.push({
10630
- result,
10631
- scenarioName: scenario?.name ?? "Unknown Scenario",
10632
- scenarioShortId: scenario?.shortId ?? result.scenarioId.slice(0, 8),
10633
- screenshots
10634
- });
10635
11711
  }
10636
- const passedCount = results.filter((r) => r.status === "passed").length;
10637
- const failedCount = results.filter((r) => r.status === "failed").length;
10638
- const errorCount = results.filter((r) => r.status === "error").length;
10639
- const totalCount = results.length;
10640
- const totalTokens = results.reduce((sum, r) => sum + r.tokensUsed, 0);
10641
- const totalCostCents = results.reduce((sum, r) => sum + r.costCents, 0);
10642
- const totalDurationMs = run.finishedAt && run.startedAt ? new Date(run.finishedAt).getTime() - new Date(run.startedAt).getTime() : results.reduce((sum, r) => sum + r.durationMs, 0);
10643
- const generatedAt = new Date().toISOString();
10644
- let resultCards = "";
10645
- for (const { result, scenarioName, scenarioShortId, screenshots } of resultData) {
10646
- resultCards += `
10647
- <div style="background:#141414;border:1px solid #262626;border-radius:8px;padding:20px;margin-bottom:16px;">
10648
- <div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
10649
- ${statusBadge(result.status)}
10650
- <span style="font-size:16px;font-weight:600;color:#e5e5e5;">${escapeHtml(scenarioName)}</span>
10651
- <span style="font-size:12px;color:#666;font-family:monospace;">${escapeHtml(scenarioShortId)}</span>
10652
- </div>
10653
-
10654
- ${result.reasoning ? `<div style="color:#a3a3a3;font-size:14px;line-height:1.6;margin-bottom:12px;padding:12px;background:#0d0d0d;border-radius:6px;border-left:3px solid #333;">${escapeHtml(result.reasoning)}</div>` : ""}
10655
-
10656
- ${result.error ? `<div style="color:#ef4444;font-size:13px;margin-bottom:12px;padding:12px;background:#1a0a0a;border-radius:6px;border-left:3px solid #ef4444;font-family:monospace;">${escapeHtml(result.error)}</div>` : ""}
10657
-
10658
- <div style="display:flex;gap:24px;font-size:13px;color:#888;">
10659
- <span>Duration: <span style="color:#d4d4d4;">${formatDuration2(result.durationMs)}</span></span>
10660
- <span>Steps: <span style="color:#d4d4d4;">${result.stepsCompleted}/${result.stepsTotal}</span></span>
10661
- <span>Tokens: <span style="color:#d4d4d4;">${result.tokensUsed.toLocaleString()}</span></span>
10662
- <span>Cost: <span style="color:#d4d4d4;">${formatCost(result.costCents)}</span></span>
10663
- <span>Model: <span style="color:#d4d4d4;">${escapeHtml(result.model)}</span></span>
10664
- </div>
10665
-
10666
- ${renderScreenshots(screenshots)}
10667
- </div>`;
11712
+ const db2 = getDatabase();
11713
+ if (run.projectId) {
11714
+ db2.query("UPDATE runs SET is_baseline = 0 WHERE project_id = ? AND is_baseline = 1").run(run.projectId);
11715
+ } else {
11716
+ db2.query("UPDATE runs SET is_baseline = 0 WHERE project_id IS NULL AND is_baseline = 1").run();
10668
11717
  }
10669
- return `<!DOCTYPE html>
10670
- <html lang="en">
10671
- <head>
10672
- <meta charset="UTF-8" />
10673
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
10674
- <title>Test Report - ${escapeHtml(run.id.slice(0, 8))}</title>
10675
- <style>
10676
- * { margin: 0; padding: 0; box-sizing: border-box; }
10677
- body { background: #0a0a0a; color: #e5e5e5; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; padding: 40px 20px; }
10678
- .container { max-width: 960px; margin: 0 auto; }
10679
- input[type="checkbox"]:checked ~ div:last-of-type { display: flex !important; }
10680
- </style>
10681
- </head>
10682
- <body>
10683
- <div class="container">
10684
- <!-- Header -->
10685
- <div style="margin-bottom:32px;">
10686
- <h1 style="font-size:28px;font-weight:700;margin-bottom:8px;color:#fff;">Test Report</h1>
10687
- <div style="display:flex;flex-wrap:wrap;gap:24px;font-size:14px;color:#888;">
10688
- <span>Run: <span style="color:#d4d4d4;font-family:monospace;">${escapeHtml(run.id.slice(0, 8))}</span></span>
10689
- <span>URL: <a href="${escapeHtml(run.url)}" style="color:#60a5fa;text-decoration:none;">${escapeHtml(run.url)}</a></span>
10690
- <span>Model: <span style="color:#d4d4d4;">${escapeHtml(run.model)}</span></span>
10691
- <span>Date: <span style="color:#d4d4d4;">${escapeHtml(run.startedAt)}</span></span>
10692
- <span>Duration: <span style="color:#d4d4d4;">${formatDuration2(totalDurationMs)}</span></span>
10693
- <span>Status: ${statusBadge(run.status)}</span>
10694
- </div>
10695
- </div>
10696
-
10697
- <!-- Summary Bar -->
10698
- <div style="display:flex;gap:16px;margin-bottom:32px;">
10699
- <div style="flex:1;background:#141414;border:1px solid #262626;border-radius:8px;padding:16px;text-align:center;">
10700
- <div style="font-size:28px;font-weight:700;color:#e5e5e5;">${totalCount}</div>
10701
- <div style="font-size:12px;color:#888;margin-top:4px;">TOTAL</div>
10702
- </div>
10703
- <div style="flex:1;background:#141414;border:1px solid #262626;border-radius:8px;padding:16px;text-align:center;">
10704
- <div style="font-size:28px;font-weight:700;color:#22c55e;">${passedCount}</div>
10705
- <div style="font-size:12px;color:#888;margin-top:4px;">PASSED</div>
10706
- </div>
10707
- <div style="flex:1;background:#141414;border:1px solid #262626;border-radius:8px;padding:16px;text-align:center;">
10708
- <div style="font-size:28px;font-weight:700;color:#ef4444;">${failedCount}</div>
10709
- <div style="font-size:12px;color:#888;margin-top:4px;">FAILED</div>
10710
- </div>
10711
- ${errorCount > 0 ? `
10712
- <div style="flex:1;background:#141414;border:1px solid #262626;border-radius:8px;padding:16px;text-align:center;">
10713
- <div style="font-size:28px;font-weight:700;color:#eab308;">${errorCount}</div>
10714
- <div style="font-size:12px;color:#888;margin-top:4px;">ERRORS</div>
10715
- </div>` : ""}
10716
- </div>
10717
-
10718
- <!-- Results -->
10719
- ${resultCards}
10720
-
10721
- <!-- Footer -->
10722
- <div style="margin-top:32px;padding-top:20px;border-top:1px solid #262626;display:flex;justify-content:space-between;font-size:13px;color:#666;">
10723
- <div>
10724
- Total tokens: ${totalTokens.toLocaleString()} | Total cost: ${formatCost(totalCostCents)}
10725
- </div>
10726
- <div>
10727
- Generated: ${escapeHtml(generatedAt)}
10728
- </div>
10729
- </div>
10730
- </div>
10731
- </body>
10732
- </html>`;
10733
- }
10734
- function generateLatestReport() {
10735
- const runs = listRuns({ limit: 1 });
10736
- if (runs.length === 0)
10737
- throw new Error("No runs found");
10738
- return generateHtmlReport(runs[0].id);
11718
+ updateRun(run.id, { is_baseline: 1 });
10739
11719
  }
10740
-
10741
- // src/lib/costs.ts
10742
- init_database();
10743
- init_config();
10744
- import chalk4 from "chalk";
10745
- function getDateFilter(period) {
10746
- switch (period) {
10747
- case "day":
10748
- return "AND r.created_at >= date('now', 'start of day')";
10749
- case "week":
10750
- return "AND r.created_at >= date('now', '-7 days')";
10751
- case "month":
10752
- return "AND r.created_at >= date('now', '-30 days')";
10753
- case "all":
10754
- return "";
11720
+ function compareImages(image1Path, image2Path) {
11721
+ if (!existsSync6(image1Path)) {
11722
+ throw new Error(`Baseline image not found: ${image1Path}`);
10755
11723
  }
10756
- }
10757
- function getPeriodDays(period) {
10758
- switch (period) {
10759
- case "day":
10760
- return 1;
10761
- case "week":
10762
- return 7;
10763
- case "month":
10764
- return 30;
10765
- case "all":
10766
- return 30;
11724
+ if (!existsSync6(image2Path)) {
11725
+ throw new Error(`Current image not found: ${image2Path}`);
11726
+ }
11727
+ const buf1 = readFileSync3(image1Path);
11728
+ const buf2 = readFileSync3(image2Path);
11729
+ if (buf1.equals(buf2)) {
11730
+ const estimatedPixels = Math.max(1, Math.floor(buf1.length / 4));
11731
+ return { diffPercent: 0, diffPixels: 0, totalPixels: estimatedPixels };
11732
+ }
11733
+ if (buf1.length !== buf2.length) {
11734
+ const maxLen = Math.max(buf1.length, buf2.length);
11735
+ const estimatedPixels = Math.max(1, Math.floor(maxLen / 4));
11736
+ return { diffPercent: 100, diffPixels: estimatedPixels, totalPixels: estimatedPixels };
10767
11737
  }
10768
- }
10769
- function loadBudgetConfig() {
10770
- const config = loadConfig();
10771
- const budget = config.budget;
10772
- return {
10773
- maxPerRunCents: budget?.maxPerRunCents ?? 50,
10774
- maxPerDayCents: budget?.maxPerDayCents ?? 500,
10775
- warnAtPercent: budget?.warnAtPercent ?? 0.8
10776
- };
10777
- }
10778
- function getCostSummary(options) {
10779
- const db2 = getDatabase();
10780
- const period = options?.period ?? "month";
10781
- const projectId = options?.projectId;
10782
- const dateFilter = getDateFilter(period);
10783
- const projectFilter = projectId ? "AND ru.project_id = ?" : "";
10784
- const projectParams = projectId ? [projectId] : [];
10785
- const totalsRow = db2.query(`SELECT
10786
- COALESCE(SUM(r.cost_cents), 0) as total_cost,
10787
- COALESCE(SUM(r.tokens_used), 0) as total_tokens,
10788
- COUNT(DISTINCT r.run_id) as run_count
10789
- FROM results r
10790
- JOIN runs ru ON r.run_id = ru.id
10791
- WHERE 1=1 ${dateFilter} ${projectFilter}`).get(...projectParams);
10792
- const modelRows = db2.query(`SELECT
10793
- r.model,
10794
- COALESCE(SUM(r.cost_cents), 0) as cost_cents,
10795
- COALESCE(SUM(r.tokens_used), 0) as tokens,
10796
- COUNT(DISTINCT r.run_id) as runs
10797
- FROM results r
10798
- JOIN runs ru ON r.run_id = ru.id
10799
- WHERE 1=1 ${dateFilter} ${projectFilter}
10800
- GROUP BY r.model
10801
- ORDER BY cost_cents DESC`).all(...projectParams);
10802
- const byModel = {};
10803
- for (const row of modelRows) {
10804
- byModel[row.model] = {
10805
- costCents: row.cost_cents,
10806
- tokens: row.tokens,
10807
- runs: row.runs
10808
- };
11738
+ let diffBytes = 0;
11739
+ for (let i = 0;i < buf1.length; i++) {
11740
+ if (buf1[i] !== buf2[i]) {
11741
+ diffBytes++;
11742
+ }
10809
11743
  }
10810
- const scenarioRows = db2.query(`SELECT
10811
- r.scenario_id,
10812
- COALESCE(s.name, r.scenario_id) as name,
10813
- COALESCE(SUM(r.cost_cents), 0) as cost_cents,
10814
- COALESCE(SUM(r.tokens_used), 0) as tokens,
10815
- COUNT(DISTINCT r.run_id) as runs
10816
- FROM results r
10817
- JOIN runs ru ON r.run_id = ru.id
10818
- LEFT JOIN scenarios s ON r.scenario_id = s.id
10819
- WHERE 1=1 ${dateFilter} ${projectFilter}
10820
- GROUP BY r.scenario_id
10821
- ORDER BY cost_cents DESC
10822
- LIMIT 10`).all(...projectParams);
10823
- const byScenario = scenarioRows.map((row) => ({
10824
- scenarioId: row.scenario_id,
10825
- name: row.name,
10826
- costCents: row.cost_cents,
10827
- tokens: row.tokens,
10828
- runs: row.runs
10829
- }));
10830
- const runCount = totalsRow.run_count;
10831
- const avgCostPerRun = runCount > 0 ? totalsRow.total_cost / runCount : 0;
10832
- const periodDays = getPeriodDays(period);
10833
- const estimatedMonthlyCents = periodDays > 0 ? totalsRow.total_cost / periodDays * 30 : 0;
10834
- return {
10835
- period,
10836
- totalCostCents: totalsRow.total_cost,
10837
- totalTokens: totalsRow.total_tokens,
10838
- runCount,
10839
- byModel,
10840
- byScenario,
10841
- avgCostPerRun,
10842
- estimatedMonthlyCents
10843
- };
11744
+ const totalPixels = Math.max(1, Math.floor(buf1.length / 4));
11745
+ const diffPixels = Math.max(1, Math.floor(diffBytes / 4));
11746
+ const diffPercent = parseFloat((diffBytes / buf1.length * 100).toFixed(4));
11747
+ return { diffPercent, diffPixels, totalPixels };
10844
11748
  }
10845
- function getCostsByScenario(options) {
10846
- const db2 = getDatabase();
10847
- const period = options?.period ?? "month";
10848
- const projectId = options?.projectId;
10849
- const dateFilter = getDateFilter(period);
10850
- const projectFilter = projectId ? "AND ru.project_id = ?" : "";
10851
- const projectParams = projectId ? [projectId] : [];
10852
- const rows = db2.query(`SELECT
10853
- r.scenario_id,
10854
- COALESCE(s.name, r.scenario_id) as name,
10855
- COUNT(DISTINCT r.run_id) as run_count,
10856
- COALESCE(SUM(r.cost_cents), 0) as total_cost_cents
10857
- FROM results r
10858
- JOIN runs ru ON r.run_id = ru.id
10859
- LEFT JOIN scenarios s ON r.scenario_id = s.id
10860
- WHERE 1=1 ${dateFilter} ${projectFilter}
10861
- GROUP BY r.scenario_id
10862
- ORDER BY total_cost_cents DESC`).all(...projectParams);
10863
- return rows.map((row) => ({
10864
- scenarioId: row.scenario_id,
10865
- name: row.name,
10866
- runCount: row.run_count,
10867
- totalCostCents: row.total_cost_cents,
10868
- avgCostPerRunCents: row.run_count > 0 ? row.total_cost_cents / row.run_count : 0
10869
- }));
11749
+ function compareRunScreenshots(runId, baselineRunId, threshold = DEFAULT_THRESHOLD) {
11750
+ const run = getRun(runId);
11751
+ if (!run)
11752
+ throw new Error(`Run not found: ${runId}`);
11753
+ const baselineRun = getRun(baselineRunId);
11754
+ if (!baselineRun)
11755
+ throw new Error(`Baseline run not found: ${baselineRunId}`);
11756
+ const currentResults = getResultsByRun(run.id);
11757
+ const baselineResults = getResultsByRun(baselineRun.id);
11758
+ const baselineMap = new Map;
11759
+ for (const result of baselineResults) {
11760
+ const screenshots = listScreenshots(result.id);
11761
+ for (const ss of screenshots) {
11762
+ const key = `${result.scenarioId}:${ss.stepNumber}`;
11763
+ baselineMap.set(key, { path: ss.filePath, action: ss.action });
11764
+ }
11765
+ }
11766
+ const results = [];
11767
+ for (const result of currentResults) {
11768
+ const screenshots = listScreenshots(result.id);
11769
+ for (const ss of screenshots) {
11770
+ const key = `${result.scenarioId}:${ss.stepNumber}`;
11771
+ const baseline = baselineMap.get(key);
11772
+ if (!baseline)
11773
+ continue;
11774
+ if (!existsSync6(baseline.path) || !existsSync6(ss.filePath))
11775
+ continue;
11776
+ try {
11777
+ const comparison = compareImages(baseline.path, ss.filePath);
11778
+ results.push({
11779
+ scenarioId: result.scenarioId,
11780
+ stepNumber: ss.stepNumber,
11781
+ action: ss.action,
11782
+ baselinePath: baseline.path,
11783
+ currentPath: ss.filePath,
11784
+ diffPercent: comparison.diffPercent,
11785
+ isRegression: comparison.diffPercent > threshold
11786
+ });
11787
+ } catch {}
11788
+ }
11789
+ }
11790
+ return results;
10870
11791
  }
10871
- function formatCostsByScenarioTerminal(rows, period) {
11792
+ function formatVisualDiffTerminal(results, threshold = DEFAULT_THRESHOLD) {
11793
+ if (results.length === 0) {
11794
+ return chalk4.dim(`
11795
+ No screenshot comparisons found.
11796
+ `);
11797
+ }
10872
11798
  const lines = [];
10873
11799
  lines.push("");
10874
- lines.push(chalk4.bold(` Cost by Scenario (${period})`));
11800
+ lines.push(chalk4.bold(" Visual Regression Summary"));
10875
11801
  lines.push("");
10876
- if (rows.length === 0) {
10877
- lines.push(chalk4.dim(" No cost data found."));
11802
+ const regressions = results.filter((r) => r.diffPercent >= threshold);
11803
+ const passed = results.filter((r) => r.diffPercent < threshold);
11804
+ if (regressions.length > 0) {
11805
+ lines.push(chalk4.red.bold(` Regressions (${regressions.length}):`));
11806
+ for (const r of regressions) {
11807
+ const scenario = getScenario(r.scenarioId);
11808
+ const label = scenario ? `${scenario.shortId}: ${scenario.name}` : r.scenarioId.slice(0, 8);
11809
+ const pct = chalk4.red(`${r.diffPercent.toFixed(2)}%`);
11810
+ lines.push(` ${chalk4.red("!")} ${label} step ${r.stepNumber} (${r.action}) \u2014 ${pct} diff`);
11811
+ }
10878
11812
  lines.push("");
10879
- return lines.join(`
10880
- `);
10881
11813
  }
10882
- lines.push(` ${"Scenario".padEnd(40)} ${"Runs".padEnd(8)} ${"Total Cost".padEnd(14)} Avg/Run`);
10883
- lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(8)} ${"\u2500".repeat(14)} ${"\u2500".repeat(10)}`);
10884
- for (const row of rows) {
10885
- const label = row.name.length > 38 ? row.name.slice(0, 35) + "..." : row.name;
10886
- lines.push(` ${label.padEnd(40)} ${String(row.runCount).padEnd(8)} ${formatDollars(row.totalCostCents).padEnd(14)} ${formatDollars(row.avgCostPerRunCents)}`);
11814
+ if (passed.length > 0) {
11815
+ lines.push(chalk4.green.bold(` Passed (${passed.length}):`));
11816
+ for (const r of passed) {
11817
+ const scenario = getScenario(r.scenarioId);
11818
+ const label = scenario ? `${scenario.shortId}: ${scenario.name}` : r.scenarioId.slice(0, 8);
11819
+ const pct = chalk4.green(`${r.diffPercent.toFixed(2)}%`);
11820
+ lines.push(` ${chalk4.green("\u2713")} ${label} step ${r.stepNumber} (${r.action}) \u2014 ${pct} diff`);
11821
+ }
11822
+ lines.push("");
10887
11823
  }
11824
+ lines.push(chalk4.bold(` Visual Summary: ${regressions.length} regressions, ${passed.length} passed (threshold: ${threshold}%)`));
10888
11825
  lines.push("");
10889
11826
  return lines.join(`
10890
11827
  `);
10891
11828
  }
10892
- function checkBudget(estimatedCostCents) {
10893
- const budget = loadBudgetConfig();
10894
- if (estimatedCostCents > budget.maxPerRunCents) {
10895
- return {
10896
- allowed: false,
10897
- warning: `Estimated cost (${formatDollars(estimatedCostCents)}) exceeds per-run limit (${formatDollars(budget.maxPerRunCents)})`
10898
- };
10899
- }
10900
- const todaySummary = getCostSummary({ period: "day" });
10901
- const projectedDaily = todaySummary.totalCostCents + estimatedCostCents;
10902
- if (projectedDaily > budget.maxPerDayCents) {
10903
- return {
10904
- allowed: false,
10905
- warning: `Daily spending (${formatDollars(todaySummary.totalCostCents)}) + this run (${formatDollars(estimatedCostCents)}) would exceed daily limit (${formatDollars(budget.maxPerDayCents)})`
10906
- };
11829
+
11830
+ // src/lib/report.ts
11831
+ init_runs();
11832
+ init_results();
11833
+ init_screenshots();
11834
+ init_scenarios();
11835
+ import { readFileSync as readFileSync4, existsSync as existsSync7 } from "fs";
11836
+ function imageToBase64(filePath) {
11837
+ if (!filePath || !existsSync7(filePath))
11838
+ return "";
11839
+ try {
11840
+ const buffer = readFileSync4(filePath);
11841
+ const base64 = buffer.toString("base64");
11842
+ return `data:image/png;base64,${base64}`;
11843
+ } catch {
11844
+ return "";
10907
11845
  }
10908
- if (projectedDaily > budget.maxPerDayCents * budget.warnAtPercent) {
10909
- return {
10910
- allowed: true,
10911
- warning: `Approaching daily limit: ${formatDollars(projectedDaily)} of ${formatDollars(budget.maxPerDayCents)} (${Math.round(projectedDaily / budget.maxPerDayCents * 100)}%)`
10912
- };
11846
+ }
11847
+ function escapeHtml(text) {
11848
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#039;");
11849
+ }
11850
+ function formatDuration2(ms) {
11851
+ if (ms < 1000)
11852
+ return `${ms}ms`;
11853
+ if (ms < 60000)
11854
+ return `${(ms / 1000).toFixed(1)}s`;
11855
+ const mins = Math.floor(ms / 60000);
11856
+ const secs = (ms % 60000 / 1000).toFixed(0);
11857
+ return `${mins}m ${secs}s`;
11858
+ }
11859
+ function formatCost(cents) {
11860
+ if (cents < 1)
11861
+ return `$${(cents / 100).toFixed(4)}`;
11862
+ return `$${(cents / 100).toFixed(2)}`;
11863
+ }
11864
+ function statusBadge(status) {
11865
+ const colors = {
11866
+ passed: { bg: "#22c55e", text: "#000" },
11867
+ failed: { bg: "#ef4444", text: "#fff" },
11868
+ error: { bg: "#eab308", text: "#000" },
11869
+ skipped: { bg: "#6b7280", text: "#fff" }
11870
+ };
11871
+ const c = colors[status] ?? { bg: "#6b7280", text: "#fff" };
11872
+ const label = status.toUpperCase();
11873
+ return `<span style="display:inline-block;padding:2px 10px;border-radius:4px;font-size:12px;font-weight:700;background:${c.bg};color:${c.text};letter-spacing:0.5px;">${label}</span>`;
11874
+ }
11875
+ function renderScreenshots(screenshots) {
11876
+ if (screenshots.length === 0)
11877
+ return "";
11878
+ let html = `<div style="display:flex;flex-wrap:wrap;gap:12px;margin-top:12px;">`;
11879
+ for (let i = 0;i < screenshots.length; i++) {
11880
+ const ss = screenshots[i];
11881
+ const dataUri = imageToBase64(ss.filePath);
11882
+ const checkId = `ss-${ss.id}`;
11883
+ if (dataUri) {
11884
+ html += `
11885
+ <div style="flex:0 0 auto;">
11886
+ <input type="checkbox" id="${checkId}" style="display:none;" />
11887
+ <label for="${checkId}" style="cursor:pointer;">
11888
+ <img src="${dataUri}" alt="Step ${ss.stepNumber}: ${escapeHtml(ss.action)}"
11889
+ style="max-width:200px;max-height:150px;border-radius:6px;border:1px solid #262626;display:block;" />
11890
+ </label>
11891
+ <div style="position:fixed;top:0;left:0;width:100%;height:100%;background:rgba(0,0,0,0.9);z-index:1000;display:none;align-items:center;justify-content:center;">
11892
+ <label for="${checkId}" style="position:absolute;top:0;left:0;width:100%;height:100%;cursor:pointer;"></label>
11893
+ <img src="${dataUri}" alt="Step ${ss.stepNumber}: ${escapeHtml(ss.action)}"
11894
+ style="max-width:600px;max-height:90vh;border-radius:8px;position:relative;z-index:1001;" />
11895
+ </div>
11896
+ <div style="font-size:11px;color:#888;margin-top:4px;max-width:200px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;">
11897
+ ${ss.stepNumber}. ${escapeHtml(ss.action)}
11898
+ </div>
11899
+ </div>`;
11900
+ } else {
11901
+ html += `
11902
+ <div style="flex:0 0 auto;width:200px;height:150px;background:#1a1a1a;border:1px dashed #333;border-radius:6px;display:flex;align-items:center;justify-content:center;color:#555;font-size:12px;">
11903
+ Screenshot not found
11904
+ <div style="font-size:11px;color:#888;margin-top:4px;">${ss.stepNumber}. ${escapeHtml(ss.action)}</div>
11905
+ </div>`;
11906
+ }
10913
11907
  }
10914
- return { allowed: true };
10915
- }
10916
- function formatDollars(cents) {
10917
- return `$${(cents / 100).toFixed(2)}`;
10918
- }
10919
- function formatTokens(tokens) {
10920
- if (tokens >= 1e6)
10921
- return `${(tokens / 1e6).toFixed(1)}M`;
10922
- if (tokens >= 1000)
10923
- return `${(tokens / 1000).toFixed(1)}K`;
10924
- return String(tokens);
11908
+ html += `</div>`;
11909
+ return html;
10925
11910
  }
10926
- function formatCostsTerminal(summary) {
10927
- const lines = [];
10928
- lines.push("");
10929
- lines.push(chalk4.bold(` Cost Summary (${summary.period})`));
10930
- lines.push("");
10931
- lines.push(` Total: ${chalk4.yellow(formatDollars(summary.totalCostCents))} (${formatTokens(summary.totalTokens)} tokens across ${summary.runCount} runs)`);
10932
- lines.push(` Avg/run: ${chalk4.yellow(formatDollars(summary.avgCostPerRun))}`);
10933
- lines.push(` Est/month: ${chalk4.yellow(formatDollars(summary.estimatedMonthlyCents))}`);
10934
- const modelEntries = Object.entries(summary.byModel);
10935
- if (modelEntries.length > 0) {
10936
- lines.push("");
10937
- lines.push(chalk4.bold(" By Model"));
10938
- lines.push(` ${"Model".padEnd(40)} ${"Cost".padEnd(12)} ${"Tokens".padEnd(12)} Runs`);
10939
- lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(12)} ${"\u2500".repeat(12)} ${"\u2500".repeat(6)}`);
10940
- for (const [model, data] of modelEntries) {
10941
- lines.push(` ${model.padEnd(40)} ${formatDollars(data.costCents).padEnd(12)} ${formatTokens(data.tokens).padEnd(12)} ${data.runs}`);
10942
- }
11911
+ function generateHtmlReport(runId) {
11912
+ const run = getRun(runId);
11913
+ if (!run)
11914
+ throw new Error(`Run not found: ${runId}`);
11915
+ const results = getResultsByRun(run.id);
11916
+ const resultData = [];
11917
+ for (const result of results) {
11918
+ const screenshots = listScreenshots(result.id);
11919
+ const scenario = getScenario(result.scenarioId);
11920
+ resultData.push({
11921
+ result,
11922
+ scenarioName: scenario?.name ?? "Unknown Scenario",
11923
+ scenarioShortId: scenario?.shortId ?? result.scenarioId.slice(0, 8),
11924
+ screenshots
11925
+ });
10943
11926
  }
10944
- if (summary.byScenario.length > 0) {
10945
- lines.push("");
10946
- lines.push(chalk4.bold(" Scenarios by Cost (most expensive first)"));
10947
- lines.push(` ${"Scenario".padEnd(40)} ${"Total Cost".padEnd(12)} ${"Avg/Run".padEnd(12)} ${"Runs".padEnd(6)} Tokens`);
10948
- lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(12)} ${"\u2500".repeat(12)} ${"\u2500".repeat(6)} ${"\u2500".repeat(10)}`);
10949
- for (const s of summary.byScenario) {
10950
- const label = s.name.length > 38 ? s.name.slice(0, 35) + "..." : s.name;
10951
- const avgPerRun = s.runs > 0 ? s.costCents / s.runs : 0;
10952
- lines.push(` ${label.padEnd(40)} ${formatDollars(s.costCents).padEnd(12)} ${formatDollars(avgPerRun).padEnd(12)} ${String(s.runs).padEnd(6)} ${formatTokens(s.tokens)}`);
10953
- }
11927
+ const passedCount = results.filter((r) => r.status === "passed").length;
11928
+ const failedCount = results.filter((r) => r.status === "failed").length;
11929
+ const errorCount = results.filter((r) => r.status === "error").length;
11930
+ const totalCount = results.length;
11931
+ const totalTokens = results.reduce((sum, r) => sum + r.tokensUsed, 0);
11932
+ const totalCostCents = results.reduce((sum, r) => sum + r.costCents, 0);
11933
+ const totalDurationMs = run.finishedAt && run.startedAt ? new Date(run.finishedAt).getTime() - new Date(run.startedAt).getTime() : results.reduce((sum, r) => sum + r.durationMs, 0);
11934
+ const generatedAt = new Date().toISOString();
11935
+ let resultCards = "";
11936
+ for (const { result, scenarioName, scenarioShortId, screenshots } of resultData) {
11937
+ resultCards += `
11938
+ <div style="background:#141414;border:1px solid #262626;border-radius:8px;padding:20px;margin-bottom:16px;">
11939
+ <div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
11940
+ ${statusBadge(result.status)}
11941
+ <span style="font-size:16px;font-weight:600;color:#e5e5e5;">${escapeHtml(scenarioName)}</span>
11942
+ <span style="font-size:12px;color:#666;font-family:monospace;">${escapeHtml(scenarioShortId)}</span>
11943
+ </div>
11944
+
11945
+ ${result.reasoning ? `<div style="color:#a3a3a3;font-size:14px;line-height:1.6;margin-bottom:12px;padding:12px;background:#0d0d0d;border-radius:6px;border-left:3px solid #333;">${escapeHtml(result.reasoning)}</div>` : ""}
11946
+
11947
+ ${result.error ? `<div style="color:#ef4444;font-size:13px;margin-bottom:12px;padding:12px;background:#1a0a0a;border-radius:6px;border-left:3px solid #ef4444;font-family:monospace;">${escapeHtml(result.error)}</div>` : ""}
11948
+
11949
+ <div style="display:flex;gap:24px;font-size:13px;color:#888;">
11950
+ <span>Duration: <span style="color:#d4d4d4;">${formatDuration2(result.durationMs)}</span></span>
11951
+ <span>Steps: <span style="color:#d4d4d4;">${result.stepsCompleted}/${result.stepsTotal}</span></span>
11952
+ <span>Tokens: <span style="color:#d4d4d4;">${result.tokensUsed.toLocaleString()}</span></span>
11953
+ <span>Cost: <span style="color:#d4d4d4;">${formatCost(result.costCents)}</span></span>
11954
+ <span>Model: <span style="color:#d4d4d4;">${escapeHtml(result.model)}</span></span>
11955
+ </div>
11956
+
11957
+ ${renderScreenshots(screenshots)}
11958
+ </div>`;
10954
11959
  }
10955
- lines.push("");
10956
- return lines.join(`
10957
- `);
10958
- }
10959
- function formatCostsJSON(summary) {
10960
- return JSON.stringify(summary, null, 2);
11960
+ return `<!DOCTYPE html>
11961
+ <html lang="en">
11962
+ <head>
11963
+ <meta charset="UTF-8" />
11964
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
11965
+ <title>Test Report - ${escapeHtml(run.id.slice(0, 8))}</title>
11966
+ <style>
11967
+ * { margin: 0; padding: 0; box-sizing: border-box; }
11968
+ body { background: #0a0a0a; color: #e5e5e5; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; padding: 40px 20px; }
11969
+ .container { max-width: 960px; margin: 0 auto; }
11970
+ input[type="checkbox"]:checked ~ div:last-of-type { display: flex !important; }
11971
+ </style>
11972
+ </head>
11973
+ <body>
11974
+ <div class="container">
11975
+ <!-- Header -->
11976
+ <div style="margin-bottom:32px;">
11977
+ <h1 style="font-size:28px;font-weight:700;margin-bottom:8px;color:#fff;">Test Report</h1>
11978
+ <div style="display:flex;flex-wrap:wrap;gap:24px;font-size:14px;color:#888;">
11979
+ <span>Run: <span style="color:#d4d4d4;font-family:monospace;">${escapeHtml(run.id.slice(0, 8))}</span></span>
11980
+ <span>URL: <a href="${escapeHtml(run.url)}" style="color:#60a5fa;text-decoration:none;">${escapeHtml(run.url)}</a></span>
11981
+ <span>Model: <span style="color:#d4d4d4;">${escapeHtml(run.model)}</span></span>
11982
+ <span>Date: <span style="color:#d4d4d4;">${escapeHtml(run.startedAt)}</span></span>
11983
+ <span>Duration: <span style="color:#d4d4d4;">${formatDuration2(totalDurationMs)}</span></span>
11984
+ <span>Status: ${statusBadge(run.status)}</span>
11985
+ </div>
11986
+ </div>
11987
+
11988
+ <!-- Summary Bar -->
11989
+ <div style="display:flex;gap:16px;margin-bottom:32px;">
11990
+ <div style="flex:1;background:#141414;border:1px solid #262626;border-radius:8px;padding:16px;text-align:center;">
11991
+ <div style="font-size:28px;font-weight:700;color:#e5e5e5;">${totalCount}</div>
11992
+ <div style="font-size:12px;color:#888;margin-top:4px;">TOTAL</div>
11993
+ </div>
11994
+ <div style="flex:1;background:#141414;border:1px solid #262626;border-radius:8px;padding:16px;text-align:center;">
11995
+ <div style="font-size:28px;font-weight:700;color:#22c55e;">${passedCount}</div>
11996
+ <div style="font-size:12px;color:#888;margin-top:4px;">PASSED</div>
11997
+ </div>
11998
+ <div style="flex:1;background:#141414;border:1px solid #262626;border-radius:8px;padding:16px;text-align:center;">
11999
+ <div style="font-size:28px;font-weight:700;color:#ef4444;">${failedCount}</div>
12000
+ <div style="font-size:12px;color:#888;margin-top:4px;">FAILED</div>
12001
+ </div>
12002
+ ${errorCount > 0 ? `
12003
+ <div style="flex:1;background:#141414;border:1px solid #262626;border-radius:8px;padding:16px;text-align:center;">
12004
+ <div style="font-size:28px;font-weight:700;color:#eab308;">${errorCount}</div>
12005
+ <div style="font-size:12px;color:#888;margin-top:4px;">ERRORS</div>
12006
+ </div>` : ""}
12007
+ </div>
12008
+
12009
+ <!-- Results -->
12010
+ ${resultCards}
12011
+
12012
+ <!-- Footer -->
12013
+ <div style="margin-top:32px;padding-top:20px;border-top:1px solid #262626;display:flex;justify-content:space-between;font-size:13px;color:#666;">
12014
+ <div>
12015
+ Total tokens: ${totalTokens.toLocaleString()} | Total cost: ${formatCost(totalCostCents)}
12016
+ </div>
12017
+ <div>
12018
+ Generated: ${escapeHtml(generatedAt)}
12019
+ </div>
12020
+ </div>
12021
+ </div>
12022
+ </body>
12023
+ </html>`;
10961
12024
  }
10962
- function formatCostsCsv(summary) {
10963
- const lines = [];
10964
- lines.push("scenario,runs,total_cost_cents,avg_cost_cents,tokens");
10965
- for (const s of summary.byScenario) {
10966
- const avgCostCents = s.runs > 0 ? s.costCents / s.runs : 0;
10967
- const name = s.name.includes(",") ? `"${s.name.replace(/"/g, '""')}"` : s.name;
10968
- lines.push(`${name},${s.runs},${s.costCents},${avgCostCents.toFixed(2)},${s.tokens}`);
10969
- }
10970
- return lines.join(`
10971
- `);
12025
+ function generateLatestReport() {
12026
+ const runs = listRuns({ limit: 1 });
12027
+ if (runs.length === 0)
12028
+ throw new Error("No runs found");
12029
+ return generateHtmlReport(runs[0].id);
10972
12030
  }
10973
12031
 
10974
12032
  // src/cli/index.tsx
12033
+ init_costs();
10975
12034
  init_personas();
10976
12035
  init_api_checks();
10977
12036
 
@@ -11436,7 +12495,7 @@ function parseAssertionString(str) {
11436
12495
  }
11437
12496
 
11438
12497
  // src/cli/index.tsx
11439
- import { existsSync as existsSync8, mkdirSync as mkdirSync4 } from "fs";
12498
+ import { existsSync as existsSync8, mkdirSync as mkdirSync5 } from "fs";
11440
12499
  import { jsxDEV } from "react/jsx-dev-runtime";
11441
12500
  var PRIORITIES = ["low", "medium", "high", "critical"];
11442
12501
  function AddForm({ onComplete }) {
@@ -11695,8 +12754,8 @@ function logError(...args) {
11695
12754
  console.error(...args);
11696
12755
  }
11697
12756
  program2.name("testers").version(package_default.version).description("AI-powered browser testing CLI").option("-q, --quiet", "Suppress all output", false).option("--no-color", "Disable color output");
11698
- var CONFIG_DIR2 = join6(process.env["HOME"] ?? "~", ".testers");
11699
- var CONFIG_PATH2 = join6(CONFIG_DIR2, "config.json");
12757
+ var CONFIG_DIR2 = join7(process.env["HOME"] ?? "~", ".testers");
12758
+ var CONFIG_PATH2 = join7(CONFIG_DIR2, "config.json");
11700
12759
  function getActiveProject() {
11701
12760
  try {
11702
12761
  if (existsSync8(CONFIG_PATH2)) {
@@ -11964,7 +13023,7 @@ program2.command("remove <id>").alias("uninstall").description("Remove a scenari
11964
13023
  program2.command("run [url] [description]").alias("test").description("Run test scenarios against a URL").option("-t, --tag <tag>", "Filter by tag (repeatable)", (val, acc) => {
11965
13024
  acc.push(val);
11966
13025
  return acc;
11967
- }, []).option("-s, --scenario <id>", "Run specific scenario ID").option("-p, --priority <level>", "Filter by priority").option("--headed", "Run browser in headed mode", false).option("-m, --model <model>", "AI model to use").option("--parallel <n>", "Number of parallel browsers", "1").option("--json", "Output results as JSON", false).option("-o, --output <filepath>", "Write JSON results to file").option("--timeout <ms>", "Timeout in milliseconds").option("--from-todos", "Import scenarios from todos before running", false).option("--project <id>", "Project ID").option("-b, --background", "Start run in background and return immediately", false).option("--browser <engine>", "Browser engine: playwright or lightpanda", "playwright").option("--env <name>", "Use a named environment for the URL").option("--dry-run", "Print what would run without launching browser", false).option("--retry <n>", "Retry failed scenarios up to n times", "0").option("--samples <n>", "Run each scenario N times and report flakiness (pass rate)", "1").option("--flakiness-threshold <n>", "Pass rate threshold below which a scenario is marked flaky (0-1)", "0.95").option("--a11y [level]", "Run axe-core WCAG accessibility scan after each navigation (level: A, AA, AAA \u2014 default AA)").option("--self-heal", "Enable AI-powered selector repair when elements can't be found (requires judgeModel or ANTHROPIC_API_KEY)", false).option("--verbose", "Show per-step timing and full tool results", false).option("--watch-results", "When used with --background, poll and display live results table until run completes", false).option("--failed-only", "Only show failed/error scenarios in output (passed count shown as summary)", false).option("--smoke", "Run only smoke-tagged scenarios (fast validation suite, <2 min)", false).option("--github-comment", "Post pass/fail summary as a GitHub PR comment (requires GITHUB_TOKEN env var)", false).option("--pr <number>", "GitHub PR number (auto-detected from GITHUB_REF if not provided)").option("--persona <id>", "Override persona for this run (comma-separated IDs for divergence testing)").action(async (urlArg, description, opts) => {
13026
+ }, []).option("-s, --scenario <id>", "Run specific scenario ID").option("-p, --priority <level>", "Filter by priority").option("--headed", "Run browser in headed mode", false).option("-m, --model <model>", "AI model to use").option("--parallel <n>", "Number of parallel browsers", "1").option("--json", "Output results as JSON", false).option("-o, --output <filepath>", "Write JSON results to file").option("--timeout <ms>", "Timeout in milliseconds").option("--from-todos", "Import scenarios from todos before running", false).option("--project <id>", "Project ID").option("-b, --background", "Start run in background and return immediately", false).option("--browser <engine>", "Browser engine: playwright (default), lightpanda (9x faster, no screenshots), or bun (native WKWebView, 11x faster, Bun canary required)", "playwright").option("--env <name>", "Use a named environment for the URL").option("--dry-run", "Print what would run without launching browser", false).option("--retry <n>", "Retry failed scenarios up to n times", "0").option("--samples <n>", "Run each scenario N times and report flakiness (pass rate)", "1").option("--flakiness-threshold <n>", "Pass rate threshold below which a scenario is marked flaky (0-1)", "0.95").option("--a11y [level]", "Run axe-core WCAG accessibility scan after each navigation (level: A, AA, AAA \u2014 default AA)").option("--self-heal", "Enable AI-powered selector repair when elements can't be found (requires judgeModel or ANTHROPIC_API_KEY)", false).option("--verbose", "Show per-step timing and full tool results", false).option("--watch-results", "When used with --background, poll and display live results table until run completes", false).option("--failed-only", "Only show failed/error scenarios in output (passed count shown as summary)", false).option("--smoke", "Run only smoke-tagged scenarios (fast validation suite, <2 min)", false).option("--minimal", "Fastest possible run: cheapest model, max parallelism, min turns (ideal for CI)", false).option("--github-comment", "Post pass/fail summary as a GitHub PR comment (requires GITHUB_TOKEN env var)", false).option("--pr <number>", "GitHub PR number (auto-detected from GITHUB_REF if not provided)").option("--persona <id>", "Override persona for this run (comma-separated IDs for divergence testing)").option("--max-cost <dollars>", "Hard budget cap in dollars \u2014 abort if estimated cost exceeds this (e.g. 0.50 for 50 cents)").option("--cache-max-age <seconds>", "Skip scenarios that passed at the same URL within this many seconds (0 = disabled)", "0").option("--diff", "Auto-detect changed files from git diff and run only relevant scenarios", false).action(async (urlArg, description, opts) => {
11968
13027
  try {
11969
13028
  const projectId = resolveProject(opts.project);
11970
13029
  let url = urlArg;
@@ -12224,11 +13283,38 @@ program2.command("run [url] [description]").alias("test").description("Run test
12224
13283
  log(chalk6.bold(` Running all ${allScenarios.length} scenarios...`));
12225
13284
  log("");
12226
13285
  }
13286
+ let diffScenarioIds;
13287
+ if (opts.diff) {
13288
+ try {
13289
+ const { execSync: execSync3 } = await import("child_process");
13290
+ const staged = execSync3("git diff --cached --name-only", { cwd: process.cwd(), encoding: "utf-8" }).trim();
13291
+ const unstaged = execSync3("git diff --name-only HEAD", { cwd: process.cwd(), encoding: "utf-8" }).trim();
13292
+ const diffOutput = [staged, unstaged].filter(Boolean).join(`
13293
+ `);
13294
+ if (!diffOutput.trim()) {
13295
+ log(chalk6.yellow(" --diff: No changed files detected. Running all scenarios."));
13296
+ } else {
13297
+ const filePaths = [...new Set(diffOutput.split(`
13298
+ `).filter(Boolean))];
13299
+ const { matchFilesToScenarios: matchFilesToScenarios2 } = await Promise.resolve().then(() => exports_affected);
13300
+ const allScenarios = listScenarios({ projectId });
13301
+ const matched = matchFilesToScenarios2(filePaths, allScenarios, []);
13302
+ if (matched.length === 0) {
13303
+ log(chalk6.yellow(` --diff: No scenarios match changed files (${filePaths.length} files changed). Exiting.`));
13304
+ process.exit(0);
13305
+ }
13306
+ diffScenarioIds = matched.map((s) => s.id);
13307
+ log(chalk6.dim(` --diff: ${filePaths.length} files changed \u2192 ${matched.length} matching scenario(s)`));
13308
+ }
13309
+ } catch {
13310
+ log(chalk6.yellow(" --diff: git diff failed. Running all scenarios."));
13311
+ }
13312
+ }
12227
13313
  const personaIdList = opts.persona ? opts.persona.split(",").map((s) => s.trim()).filter(Boolean) : undefined;
12228
13314
  const { run, results } = await runByFilter({
12229
13315
  url,
12230
13316
  tags: opts.tag.length > 0 ? opts.tag : undefined,
12231
- scenarioIds: opts.scenario ? [opts.scenario] : undefined,
13317
+ scenarioIds: diffScenarioIds ?? (opts.scenario ? [opts.scenario] : undefined),
12232
13318
  priority: opts.priority,
12233
13319
  model: opts.model,
12234
13320
  headed: opts.headed,
@@ -12242,7 +13328,10 @@ program2.command("run [url] [description]").alias("test").description("Run test
12242
13328
  a11y: opts.a11y ? typeof opts.a11y === "string" ? { level: opts.a11y } : true : undefined,
12243
13329
  selfHeal: opts.selfHeal || undefined,
12244
13330
  personaId: personaIdList?.[0],
12245
- personaIds: personaIdList && personaIdList.length > 1 ? personaIdList : undefined
13331
+ personaIds: personaIdList && personaIdList.length > 1 ? personaIdList : undefined,
13332
+ maxCostCents: opts.maxCost ? Math.round(parseFloat(opts.maxCost) * 100) : undefined,
13333
+ cacheMaxAgeMs: opts.cacheMaxAge ? parseInt(opts.cacheMaxAge, 10) * 1000 : undefined,
13334
+ minimal: opts.minimal || undefined
12246
13335
  });
12247
13336
  if (opts.json || opts.output) {
12248
13337
  const jsonOutput = formatJSON(run, results);
@@ -12355,7 +13444,7 @@ program2.command("import <dir>").description("Import markdown test files as scen
12355
13444
  }
12356
13445
  let imported = 0;
12357
13446
  for (const file of files) {
12358
- const content = readFileSync7(join6(absDir, file), "utf-8");
13447
+ const content = readFileSync7(join7(absDir, file), "utf-8");
12359
13448
  const lines = content.split(`
12360
13449
  `);
12361
13450
  let name = file.replace(/\.md$/, "");
@@ -12416,7 +13505,7 @@ program2.command("export [format]").description("Export scenarios as JSON (defau
12416
13505
  }
12417
13506
  const outputDir = opts.output ?? ".";
12418
13507
  if (!existsSync8(outputDir)) {
12419
- mkdirSync4(outputDir, { recursive: true });
13508
+ mkdirSync5(outputDir, { recursive: true });
12420
13509
  }
12421
13510
  for (const s of scenarios) {
12422
13511
  const lines = [];
@@ -12443,7 +13532,7 @@ program2.command("export [format]").description("Export scenarios as JSON (defau
12443
13532
  lines.push("");
12444
13533
  }
12445
13534
  const safeFilename = s.name.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 80);
12446
- const filePath = join6(outputDir, `${s.shortId}-${safeFilename}.md`);
13535
+ const filePath = join7(outputDir, `${s.shortId}-${safeFilename}.md`);
12447
13536
  writeFileSync3(filePath, lines.join(`
12448
13537
  `), "utf-8");
12449
13538
  log(chalk6.dim(` ${s.shortId}: ${s.name} \u2192 ${filePath}`));
@@ -12468,7 +13557,7 @@ program2.command("status").description("Show database and auth status").action((
12468
13557
  try {
12469
13558
  const config = loadConfig();
12470
13559
  const hasApiKey = !!config.anthropicApiKey || !!process.env["ANTHROPIC_API_KEY"];
12471
- const dbPath = join6(process.env["HOME"] ?? "~", ".testers", "testers.db");
13560
+ const dbPath = join7(process.env["HOME"] ?? "~", ".testers", "testers.db");
12472
13561
  log("");
12473
13562
  log(chalk6.bold(" Open Testers Status"));
12474
13563
  log("");
@@ -12558,7 +13647,7 @@ projectCmd.command("use <name>").description("Set active project (find or create
12558
13647
  try {
12559
13648
  const project = ensureProject(name, process.cwd());
12560
13649
  if (!existsSync8(CONFIG_DIR2)) {
12561
- mkdirSync4(CONFIG_DIR2, { recursive: true });
13650
+ mkdirSync5(CONFIG_DIR2, { recursive: true });
12562
13651
  }
12563
13652
  let config = {};
12564
13653
  if (existsSync8(CONFIG_PATH2)) {
@@ -12812,11 +13901,11 @@ program2.command("init").description("Initialize a new testing project").option(
12812
13901
  log(` ${chalk6.dim(s.shortId)} ${s.name} ${chalk6.dim(`[${s.tags.join(", ")}]`)}`);
12813
13902
  }
12814
13903
  if (opts.ci === "github") {
12815
- const workflowDir = join6(process.cwd(), ".github", "workflows");
13904
+ const workflowDir = join7(process.cwd(), ".github", "workflows");
12816
13905
  if (!existsSync8(workflowDir)) {
12817
- mkdirSync4(workflowDir, { recursive: true });
13906
+ mkdirSync5(workflowDir, { recursive: true });
12818
13907
  }
12819
- const workflowPath = join6(workflowDir, "testers.yml");
13908
+ const workflowPath = join7(workflowDir, "testers.yml");
12820
13909
  writeFileSync3(workflowPath, generateGitHubActionsWorkflow(), "utf-8");
12821
13910
  log(` CI: ${chalk6.green("GitHub Actions workflow written to .github/workflows/testers.yml")}`);
12822
13911
  } else if (opts.ci) {
@@ -13816,7 +14905,7 @@ program2.command("doctor").description("Check system setup and configuration").a
13816
14905
  log(chalk6.red("\u2717") + " ANTHROPIC_API_KEY is not set (required for AI-powered tests)");
13817
14906
  allPassed = false;
13818
14907
  }
13819
- const dbPath = join6(process.env["HOME"] ?? "~", ".testers", "testers.db");
14908
+ const dbPath = join7(process.env["HOME"] ?? "~", ".testers", "testers.db");
13820
14909
  try {
13821
14910
  const { Database: Database3 } = await import("bun:sqlite");
13822
14911
  const db2 = new Database3(dbPath, { create: true });
@@ -13843,6 +14932,23 @@ program2.command("doctor").description("Check system setup and configuration").a
13843
14932
  const { isLightpandaAvailable: isLightpandaAvailable2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda));
13844
14933
  const lightpandaAvailable = isLightpandaAvailable2();
13845
14934
  log((lightpandaAvailable ? chalk6.green("\u2713") : chalk6.dim("\u25CB")) + ` Lightpanda: ${lightpandaAvailable ? "installed" : "not installed (optional)"}`);
14935
+ const { isBunWebViewAvailable: isBunWebViewAvailable2 } = await Promise.resolve().then(() => (init_browser_bun(), exports_browser_bun));
14936
+ const bunAvailable = isBunWebViewAvailable2();
14937
+ log((bunAvailable ? chalk6.green("\u2713") : chalk6.dim("\u25CB")) + ` Bun.WebView: ${bunAvailable ? "available (native, ~11x faster)" : "not available \u2014 upgrade to Bun canary: bun upgrade --canary (optional)"}`);
14938
+ log("");
14939
+ log(chalk6.dim(" AI Providers:"));
14940
+ const anthropicKey = !!process.env["ANTHROPIC_API_KEY"];
14941
+ const openaiKey = !!process.env["OPENAI_API_KEY"];
14942
+ const googleKey = !!process.env["GOOGLE_API_KEY"];
14943
+ const cerebrasKey = !!process.env["CEREBRAS_API_KEY"];
14944
+ log((anthropicKey ? chalk6.green(" \u2713") : chalk6.red(" \u2717")) + ` Anthropic (ANTHROPIC_API_KEY)${!anthropicKey ? " \u2014 required for default model" : ""}`);
14945
+ log((openaiKey ? chalk6.green(" \u2713") : chalk6.dim(" \u25CB")) + ` OpenAI (OPENAI_API_KEY) \u2014 optional, enables gpt-* models`);
14946
+ log((googleKey ? chalk6.green(" \u2713") : chalk6.dim(" \u25CB")) + ` Google Gemini (GOOGLE_API_KEY) \u2014 optional, enables gemini-* models`);
14947
+ log((cerebrasKey ? chalk6.green(" \u2713") : chalk6.dim(" \u25CB")) + ` Cerebras (CEREBRAS_API_KEY) \u2014 optional, enables llama-*/qwen-* at ~20x faster inference`);
14948
+ if (!anthropicKey && !openaiKey && !googleKey && !cerebrasKey) {
14949
+ log(chalk6.red(" \u2717") + " No AI provider API keys found \u2014 at least one is required");
14950
+ allPassed = false;
14951
+ }
13846
14952
  if (!allPassed) {
13847
14953
  process.exit(1);
13848
14954
  }
@@ -13851,7 +14957,7 @@ program2.command("serve").description("Start the Open Testers web dashboard").op
13851
14957
  try {
13852
14958
  const port = parseInt(opts.port, 10);
13853
14959
  const url = `http://localhost:${port}`;
13854
- const serverBin = join6(resolve(process.execPath, ".."), "..", "dist", "server", "index.js");
14960
+ const serverBin = join7(resolve(process.execPath, ".."), "..", "dist", "server", "index.js");
13855
14961
  const { join: pathJoin, resolve: pathResolve, dirname: dirname2 } = await import("path");
13856
14962
  const { fileURLToPath } = await import("url");
13857
14963
  const serverPath = pathJoin(dirname2(fileURLToPath(import.meta.url)), "..", "server", "index.js");
@@ -14403,6 +15509,24 @@ personaCmd.command("attach <persona-id> <scenario-id>").description("Attach a pe
14403
15509
  process.exit(1);
14404
15510
  }
14405
15511
  });
15512
+ personaCmd.command("seed").description("Seed the 7 default global personas (idempotent)").option("--json", "Output as JSON", false).action((seedOpts) => {
15513
+ try {
15514
+ const { seedDefaultPersonas: seedDefaultPersonas2 } = (init_seed_personas(), __toCommonJS(exports_seed_personas));
15515
+ const result = seedDefaultPersonas2();
15516
+ if (seedOpts.json) {
15517
+ log(JSON.stringify(result, null, 2));
15518
+ } else {
15519
+ if (result.seeded > 0) {
15520
+ log(chalk6.green(`Seeded ${result.seeded} default personas.`));
15521
+ } else {
15522
+ log(chalk6.dim(`Default personas already present (${result.skipped} skipped).`));
15523
+ }
15524
+ }
15525
+ } catch (error) {
15526
+ logError(chalk6.red(`Error: ${error instanceof Error ? error.message : String(error)}`));
15527
+ process.exit(1);
15528
+ }
15529
+ });
14406
15530
  personaCmd.command("detach <scenario-id>").description("Detach persona from a scenario").action(async (scenarioId) => {
14407
15531
  try {
14408
15532
  const scenario = getScenario(scenarioId) ?? getScenarioByShortId(scenarioId);
@@ -14677,6 +15801,107 @@ goldenCmd.command("run <base-url>").description("Run all golden answer checks an
14677
15801
  process.exit(1);
14678
15802
  }
14679
15803
  });
15804
+ program2.command("run-many <url>").description("Run scenarios \xD7 personas matrix \u2014 test each scenario under multiple personas").option("--personas <ids>", "Comma-separated persona IDs, or 'all' for all global personas", "all").option("--scenarios <ids>", "Comma-separated scenario IDs, or 'all'", "all").option("--parallel <n>", "Parallel workers per run", "2").option("--model <model>", "AI model to use").option("--project <id>", "Filter by project ID").option("--json", "Output as JSON", false).action(async (url, opts) => {
15805
+ try {
15806
+ const projectId = resolveProject(opts.project);
15807
+ let personas;
15808
+ if (opts.personas === "all") {
15809
+ personas = listPersonas({ globalOnly: true, enabled: true });
15810
+ } else {
15811
+ const ids = opts.personas.split(",").map((s) => s.trim()).filter(Boolean);
15812
+ personas = ids.map((id) => getPersona(id)).filter(Boolean);
15813
+ }
15814
+ if (personas.length === 0) {
15815
+ logError(chalk6.red("No personas found. Run: testers persona seed"));
15816
+ process.exit(1);
15817
+ }
15818
+ let scenarios;
15819
+ if (opts.scenarios === "all") {
15820
+ scenarios = listScenarios({ projectId, limit: 20 });
15821
+ } else {
15822
+ const ids = opts.scenarios.split(",").map((s) => s.trim()).filter(Boolean);
15823
+ const all = listScenarios({ projectId });
15824
+ scenarios = all.filter((s) => ids.includes(s.id) || ids.includes(s.shortId));
15825
+ }
15826
+ if (scenarios.length === 0) {
15827
+ logError(chalk6.red("No scenarios found."));
15828
+ process.exit(1);
15829
+ }
15830
+ log("");
15831
+ log(chalk6.bold(` Running ${scenarios.length} scenarios \xD7 ${personas.length} personas (${scenarios.length * personas.length} total runs)`));
15832
+ log("");
15833
+ const matrixResults = [];
15834
+ for (const persona of personas) {
15835
+ if (!persona)
15836
+ continue;
15837
+ log(chalk6.dim(` Starting run for persona: ${persona.name} ...`));
15838
+ const { run, results } = await runByFilter({
15839
+ url,
15840
+ scenarioIds: scenarios.map((s) => s.id),
15841
+ model: opts.model,
15842
+ parallel: parseInt(opts.parallel, 10),
15843
+ projectId,
15844
+ personaId: persona.id
15845
+ });
15846
+ matrixResults.push({ personaName: persona.name, runId: run.id, run });
15847
+ const status = run.status === "passed" ? chalk6.green("PASS") : chalk6.red("FAIL");
15848
+ log(` ${status} ${persona.name.padEnd(24)} ${run.passed}/${run.total} passed`);
15849
+ }
15850
+ if (opts.json) {
15851
+ log(JSON.stringify(matrixResults.map((r) => ({ personaName: r.personaName, runId: r.runId, run: r.run })), null, 2));
15852
+ } else {
15853
+ log("");
15854
+ log(chalk6.bold(" Summary"));
15855
+ let allPassed = true;
15856
+ for (const r of matrixResults) {
15857
+ if (r.run && r.run.failed > 0)
15858
+ allPassed = false;
15859
+ }
15860
+ log(allPassed ? chalk6.green(" All personas passed!") : chalk6.yellow(" Some personas had failures \u2014 review per-persona results above."));
15861
+ log("");
15862
+ }
15863
+ const anyFailed = matrixResults.some((r) => r.run && r.run.failed > 0);
15864
+ process.exit(anyFailed ? 1 : 0);
15865
+ } catch (error) {
15866
+ logError(chalk6.red(`Error: ${error instanceof Error ? error.message : String(error)}`));
15867
+ process.exit(1);
15868
+ }
15869
+ });
15870
+ program2.command("run-script <file>").description("Run a hybrid test script (.ts) that exports an array of HybridScenario objects").option("--url <url>", "Base URL to run against").option("--json", "Output as JSON", false).action(async (file, opts) => {
15871
+ try {
15872
+ const { resolve: resolve2 } = await import("path");
15873
+ const { runHybridScenario: runHybridScenario2 } = await Promise.resolve().then(() => (init_hybrid_runner(), exports_hybrid_runner));
15874
+ const scriptPath = resolve2(process.cwd(), file);
15875
+ const mod = await import(scriptPath);
15876
+ const scenarios = mod.scenarios ?? mod.default ?? [];
15877
+ if (!Array.isArray(scenarios) || scenarios.length === 0) {
15878
+ logError(chalk6.red(`No scenarios exported from ${file}. Export an array as 'export const scenarios = [...]'`));
15879
+ process.exit(1);
15880
+ }
15881
+ const results = [];
15882
+ for (const scenario of scenarios) {
15883
+ log(chalk6.dim(`Running: ${scenario.name} ...`));
15884
+ const result = await runHybridScenario2(scenario, { baseUrl: opts.url });
15885
+ results.push(result);
15886
+ const icon = result.status === "passed" ? chalk6.green("PASS") : chalk6.red("FAIL");
15887
+ log(`${icon} ${result.name ?? scenario.name} (${result.durationMs}ms)`);
15888
+ if (result.status !== "passed" && result.error) {
15889
+ log(chalk6.dim(` ${result.error}`));
15890
+ }
15891
+ }
15892
+ if (opts.json) {
15893
+ log(JSON.stringify(results, null, 2));
15894
+ }
15895
+ const passed = results.filter((r) => r.status === "passed").length;
15896
+ const failed = results.length - passed;
15897
+ log("");
15898
+ log(chalk6.bold(`Results: ${passed}/${results.length} passed${failed > 0 ? `, ${failed} failed` : ""}`));
15899
+ process.exit(failed > 0 ? 1 : 0);
15900
+ } catch (error) {
15901
+ logError(chalk6.red(`Error: ${error instanceof Error ? error.message : String(error)}`));
15902
+ process.exit(1);
15903
+ }
15904
+ });
14680
15905
  program2.hook("preAction", () => {
14681
15906
  const opts = program2.opts();
14682
15907
  QUIET = opts.quiet === true;