@hasna/testers 0.0.15 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/LICENSE +170 -21
  2. package/dashboard/dist/assets/{index-BSYf1bIR.css → index-CQzkimyO.css} +1 -1
  3. package/dashboard/dist/index.html +2 -2
  4. package/dist/cli/index.js +2043 -818
  5. package/dist/db/database.d.ts.map +1 -1
  6. package/dist/db/personas.d.ts +8 -0
  7. package/dist/db/personas.d.ts.map +1 -1
  8. package/dist/db/results.d.ts +2 -1
  9. package/dist/db/results.d.ts.map +1 -1
  10. package/dist/db/scenarios.d.ts +1 -0
  11. package/dist/db/scenarios.d.ts.map +1 -1
  12. package/dist/db/seed-personas.d.ts +15 -0
  13. package/dist/db/seed-personas.d.ts.map +1 -0
  14. package/dist/index.d.ts +1 -1
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +2220 -1441
  17. package/dist/lib/ai-client.d.ts +7 -8
  18. package/dist/lib/ai-client.d.ts.map +1 -1
  19. package/dist/lib/browser-bun.d.ts +153 -0
  20. package/dist/lib/browser-bun.d.ts.map +1 -0
  21. package/dist/lib/browser.d.ts +1 -1
  22. package/dist/lib/browser.d.ts.map +1 -1
  23. package/dist/lib/config.d.ts.map +1 -1
  24. package/dist/lib/costs.d.ts +5 -0
  25. package/dist/lib/costs.d.ts.map +1 -1
  26. package/dist/lib/failure-analyzer.d.ts +7 -0
  27. package/dist/lib/failure-analyzer.d.ts.map +1 -0
  28. package/dist/lib/failure-explainer.d.ts +17 -0
  29. package/dist/lib/failure-explainer.d.ts.map +1 -0
  30. package/dist/lib/failure-pipeline.d.ts +11 -0
  31. package/dist/lib/failure-pipeline.d.ts.map +1 -1
  32. package/dist/lib/hybrid-runner.d.ts +100 -0
  33. package/dist/lib/hybrid-runner.d.ts.map +1 -0
  34. package/dist/lib/judge.d.ts +1 -1
  35. package/dist/lib/judge.d.ts.map +1 -1
  36. package/dist/lib/reporter.d.ts +2 -0
  37. package/dist/lib/reporter.d.ts.map +1 -1
  38. package/dist/lib/runner.d.ts +5 -1
  39. package/dist/lib/runner.d.ts.map +1 -1
  40. package/dist/lib/screenshotter.d.ts.map +1 -1
  41. package/dist/mcp/index.js +8580 -6403
  42. package/dist/server/index.js +1082 -154
  43. package/dist/types/index.d.ts +60 -2
  44. package/dist/types/index.d.ts.map +1 -1
  45. package/package.json +4 -4
  46. package/dist/cli/index.d.ts +0 -3
  47. package/dist/cli/index.d.ts.map +0 -1
  48. package/dist/mcp/index.d.ts +0 -3
  49. package/dist/mcp/index.d.ts.map +0 -1
  50. /package/dashboard/dist/assets/{index-Bdn52878.js → index-D52SWwDa.js} +0 -0
@@ -1,13 +1,17 @@
1
1
  #!/usr/bin/env bun
2
2
  // @bun
3
3
  var __defProp = Object.defineProperty;
4
+ var __returnValue = (v) => v;
5
+ function __exportSetter(name, newValue) {
6
+ this[name] = __returnValue.bind(null, newValue);
7
+ }
4
8
  var __export = (target, all) => {
5
9
  for (var name in all)
6
10
  __defProp(target, name, {
7
11
  get: all[name],
8
12
  enumerable: true,
9
13
  configurable: true,
10
- set: (newValue) => all[name] = () => newValue
14
+ set: __exportSetter.bind(all, name)
11
15
  });
12
16
  };
13
17
  var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
@@ -46,9 +50,12 @@ function scenarioFromRow(row) {
46
50
  assertions: JSON.parse(row.assertions || "[]"),
47
51
  personaId: row.persona_id ?? null,
48
52
  scenarioType: row.scenario_type ?? "browser",
53
+ requiredRole: row.required_role ?? null,
49
54
  version: row.version,
50
55
  createdAt: row.created_at,
51
- updatedAt: row.updated_at
56
+ updatedAt: row.updated_at,
57
+ lastPassedAt: row.last_passed_at ?? null,
58
+ lastPassedUrl: row.last_passed_url ?? null
52
59
  };
53
60
  }
54
61
  function runFromRow(row) {
@@ -88,7 +95,8 @@ function resultFromRow(row) {
88
95
  metadata: row.metadata ? JSON.parse(row.metadata) : null,
89
96
  createdAt: row.created_at,
90
97
  personaId: row.persona_id ?? null,
91
- personaName: row.persona_name ?? null
98
+ personaName: row.persona_name ?? null,
99
+ failureAnalysis: row.failure_analysis ? JSON.parse(row.failure_analysis) : null
92
100
  };
93
101
  }
94
102
  function screenshotFromRow(row) {
@@ -156,6 +164,7 @@ function flowFromRow(row) {
156
164
  };
157
165
  }
158
166
  function personaFromRow(row) {
167
+ const hasAuth = row.auth_email && row.auth_password;
159
168
  return {
160
169
  id: row.id,
161
170
  shortId: row.short_id,
@@ -164,13 +173,23 @@ function personaFromRow(row) {
164
173
  description: row.description,
165
174
  role: row.role,
166
175
  instructions: row.instructions,
167
- traits: JSON.parse(row.traits),
168
- goals: JSON.parse(row.goals),
176
+ traits: JSON.parse(row.traits || "[]"),
177
+ goals: JSON.parse(row.goals || "[]"),
178
+ behaviors: JSON.parse(row.behaviors || "[]"),
179
+ expertiseLevel: row.expertise_level || "intermediate",
180
+ demographics: JSON.parse(row.demographics || "{}"),
181
+ painPoints: JSON.parse(row.pain_points || "[]"),
169
182
  metadata: row.metadata ? JSON.parse(row.metadata) : null,
170
183
  enabled: row.enabled === 1,
171
184
  version: row.version,
172
185
  createdAt: row.created_at,
173
- updatedAt: row.updated_at
186
+ updatedAt: row.updated_at,
187
+ auth: hasAuth ? {
188
+ email: row.auth_email,
189
+ password: row.auth_password,
190
+ loginPath: row.auth_login_path ?? "/login",
191
+ cookies: row.auth_cookies ? JSON.parse(row.auth_cookies) : null
192
+ } : null
174
193
  };
175
194
  }
176
195
  function apiCheckFromRow(row) {
@@ -212,12 +231,14 @@ function apiCheckResultFromRow(row) {
212
231
  createdAt: row.created_at
213
232
  };
214
233
  }
215
- var MODEL_MAP, VersionConflictError, BrowserError, AIClientError, TodosConnectionError, ScheduleNotFoundError, ApiCheckNotFoundError, DependencyCycleError, PersonaNotFoundError;
234
+ var MODEL_MAP, VersionConflictError, BrowserError, AIClientError, TodosConnectionError, ScheduleNotFoundError, BudgetExceededError, ApiCheckNotFoundError, DependencyCycleError, PersonaNotFoundError;
216
235
  var init_types = __esm(() => {
217
236
  MODEL_MAP = {
218
237
  quick: "claude-haiku-4-5-20251001",
219
238
  thorough: "claude-sonnet-4-6-20260311",
220
- deep: "claude-opus-4-6-20260311"
239
+ deep: "claude-opus-4-6-20260311",
240
+ "cerebras-fast": "llama-3.1-8b",
241
+ "cerebras-smart": "llama-3.3-70b"
221
242
  };
222
243
  VersionConflictError = class VersionConflictError extends Error {
223
244
  constructor(entity, id) {
@@ -249,6 +270,12 @@ var init_types = __esm(() => {
249
270
  this.name = "ScheduleNotFoundError";
250
271
  }
251
272
  };
273
+ BudgetExceededError = class BudgetExceededError extends Error {
274
+ constructor(estimatedCents, capCents) {
275
+ super(`Estimated run cost ($${(estimatedCents / 100).toFixed(2)}) exceeds budget cap ($${(capCents / 100).toFixed(2)}). Pass skipBudgetCheck: true to override.`);
276
+ this.name = "BudgetExceededError";
277
+ }
278
+ };
252
279
  ApiCheckNotFoundError = class ApiCheckNotFoundError extends Error {
253
280
  constructor(id) {
254
281
  super(`API check not found: ${id}`);
@@ -670,10 +697,123 @@ CREATE TABLE IF NOT EXISTS golden_check_results (
670
697
  CREATE INDEX IF NOT EXISTS idx_golden_project ON golden_answers(project_id);
671
698
  CREATE INDEX IF NOT EXISTS idx_golden_enabled ON golden_answers(enabled);
672
699
  CREATE INDEX IF NOT EXISTS idx_golden_results_golden ON golden_check_results(golden_id);
700
+ `,
701
+ `
702
+ ALTER TABLE results ADD COLUMN failure_analysis TEXT;
703
+ `,
704
+ `
705
+ ALTER TABLE personas ADD COLUMN behaviors TEXT DEFAULT '[]';
706
+ ALTER TABLE personas ADD COLUMN expertise_level TEXT DEFAULT 'intermediate';
707
+ ALTER TABLE personas ADD COLUMN demographics TEXT DEFAULT '{}';
708
+ ALTER TABLE personas ADD COLUMN pain_points TEXT DEFAULT '[]';
709
+ `,
710
+ `
711
+ ALTER TABLE scenarios ADD COLUMN last_passed_at TEXT;
712
+ ALTER TABLE scenarios ADD COLUMN last_passed_url TEXT;
713
+ `,
714
+ `
715
+ ALTER TABLE personas ADD COLUMN auth_email TEXT;
716
+ ALTER TABLE personas ADD COLUMN auth_password TEXT;
717
+ ALTER TABLE personas ADD COLUMN auth_login_path TEXT DEFAULT '/login';
718
+ ALTER TABLE personas ADD COLUMN auth_cookies TEXT;
719
+ ALTER TABLE scenarios ADD COLUMN required_role TEXT;
673
720
  `
674
721
  ];
675
722
  });
676
723
 
724
+ // src/db/results.ts
725
+ function createResult(input) {
726
+ const db2 = getDatabase();
727
+ const id = uuid();
728
+ const timestamp = now();
729
+ db2.query(`
730
+ INSERT INTO results (id, run_id, scenario_id, status, reasoning, error, steps_completed, steps_total, duration_ms, model, tokens_used, cost_cents, metadata, created_at, persona_id, persona_name)
731
+ VALUES (?, ?, ?, 'skipped', NULL, NULL, 0, ?, 0, ?, 0, 0, '{}', ?, ?, ?)
732
+ `).run(id, input.runId, input.scenarioId, input.stepsTotal, input.model, timestamp, input.personaId ?? null, input.personaName ?? null);
733
+ return getResult(id);
734
+ }
735
+ function getResult(id) {
736
+ const db2 = getDatabase();
737
+ let row = db2.query("SELECT * FROM results WHERE id = ?").get(id);
738
+ if (row)
739
+ return resultFromRow(row);
740
+ const fullId = resolvePartialId("results", id);
741
+ if (fullId) {
742
+ row = db2.query("SELECT * FROM results WHERE id = ?").get(fullId);
743
+ if (row)
744
+ return resultFromRow(row);
745
+ }
746
+ return null;
747
+ }
748
+ function listResults(runId) {
749
+ const db2 = getDatabase();
750
+ const rows = db2.query("SELECT * FROM results WHERE run_id = ? ORDER BY created_at ASC").all(runId);
751
+ return rows.map(resultFromRow);
752
+ }
753
+ function updateResult(id, updates) {
754
+ const db2 = getDatabase();
755
+ const existing = getResult(id);
756
+ if (!existing) {
757
+ throw new Error(`Result not found: ${id}`);
758
+ }
759
+ const sets = [];
760
+ const params = [];
761
+ if (updates.status !== undefined) {
762
+ sets.push("status = ?");
763
+ params.push(updates.status);
764
+ }
765
+ if (updates.reasoning !== undefined) {
766
+ sets.push("reasoning = ?");
767
+ params.push(updates.reasoning);
768
+ }
769
+ if (updates.error !== undefined) {
770
+ sets.push("error = ?");
771
+ params.push(updates.error);
772
+ }
773
+ if (updates.stepsCompleted !== undefined) {
774
+ sets.push("steps_completed = ?");
775
+ params.push(updates.stepsCompleted);
776
+ }
777
+ if (updates.durationMs !== undefined) {
778
+ sets.push("duration_ms = ?");
779
+ params.push(updates.durationMs);
780
+ }
781
+ if (updates.tokensUsed !== undefined) {
782
+ sets.push("tokens_used = ?");
783
+ params.push(updates.tokensUsed);
784
+ }
785
+ if (updates.costCents !== undefined) {
786
+ sets.push("cost_cents = ?");
787
+ params.push(updates.costCents);
788
+ }
789
+ if (updates.metadata !== undefined) {
790
+ sets.push("metadata = ?");
791
+ params.push(JSON.stringify(updates.metadata));
792
+ }
793
+ if (updates.failureAnalysis !== undefined) {
794
+ sets.push("failure_analysis = ?");
795
+ params.push(updates.failureAnalysis !== null ? JSON.stringify(updates.failureAnalysis) : null);
796
+ }
797
+ if (sets.length === 0) {
798
+ return existing;
799
+ }
800
+ params.push(existing.id);
801
+ db2.query(`UPDATE results SET ${sets.join(", ")} WHERE id = ?`).run(...params);
802
+ return getResult(existing.id);
803
+ }
804
+ function getResultsByRun(runId) {
805
+ return listResults(runId);
806
+ }
807
+ function countResultsByRun(runId) {
808
+ const db2 = getDatabase();
809
+ const row = db2.query("SELECT COUNT(*) as count FROM results WHERE run_id = ?").get(runId);
810
+ return row.count;
811
+ }
812
+ var init_results = __esm(() => {
813
+ init_types();
814
+ init_database();
815
+ });
816
+
677
817
  // src/lib/browser-lightpanda.ts
678
818
  var exports_browser_lightpanda = {};
679
819
  __export(exports_browser_lightpanda, {
@@ -836,6 +976,412 @@ var init_browser_lightpanda = __esm(() => {
836
976
  init_types();
837
977
  });
838
978
 
979
+ // src/lib/browser-bun.ts
980
+ var exports_browser_bun = {};
981
+ __export(exports_browser_bun, {
982
+ isBunWebViewAvailable: () => isBunWebViewAvailable,
983
+ BunWebViewSession: () => BunWebViewSession
984
+ });
985
+ import { join as join2 } from "path";
986
+ import { mkdirSync as mkdirSync2 } from "fs";
987
+ import { homedir as homedir2 } from "os";
988
+ function isBunWebViewAvailable() {
989
+ return typeof globalThis.Bun !== "undefined" && typeof globalThis.Bun.WebView !== "undefined";
990
+ }
991
+ function getProfileDir(profileName) {
992
+ const base = process.env["TESTERS_BROWSER_DATA_DIR"] ?? join2(homedir2(), ".testers", "browser");
993
+ const dir = join2(base, "profiles", profileName);
994
+ mkdirSync2(dir, { recursive: true });
995
+ return dir;
996
+ }
997
+ var BunWebViewSession;
998
+ var init_browser_bun = __esm(() => {
999
+ BunWebViewSession = class BunWebViewSession {
1000
+ view;
1001
+ _sessionId;
1002
+ _eventListeners = new Map;
1003
+ constructor(opts = {}) {
1004
+ if (!isBunWebViewAvailable()) {
1005
+ throw new Error("Bun.WebView is not available. Install Bun canary: bun upgrade --canary");
1006
+ }
1007
+ const BunWebView = globalThis.Bun.WebView;
1008
+ const constructorOpts = {
1009
+ width: opts.width ?? 1280,
1010
+ height: opts.height ?? 720
1011
+ };
1012
+ if (opts.profile) {
1013
+ constructorOpts.dataStore = { directory: getProfileDir(opts.profile) };
1014
+ } else {
1015
+ constructorOpts.dataStore = "ephemeral";
1016
+ }
1017
+ if (opts.onConsole) {
1018
+ constructorOpts.console = opts.onConsole;
1019
+ }
1020
+ this.view = new BunWebView(constructorOpts);
1021
+ this.view.onNavigated = (url) => {
1022
+ this._emit("navigated", url);
1023
+ };
1024
+ this.view.onNavigationFailed = (error) => {
1025
+ this._emit("navigationfailed", error);
1026
+ };
1027
+ }
1028
+ async goto(url, opts) {
1029
+ await this.view.navigate(url);
1030
+ await new Promise((r) => setTimeout(r, 200));
1031
+ }
1032
+ async goBack() {
1033
+ await this.view.goBack();
1034
+ }
1035
+ async goForward() {
1036
+ await this.view.goForward();
1037
+ }
1038
+ async reload() {
1039
+ await this.view.reload();
1040
+ }
1041
+ async evaluate(fnOrExpr, ...args) {
1042
+ let expr;
1043
+ if (typeof fnOrExpr === "function") {
1044
+ const serializedArgs = args.map((a) => JSON.stringify(a)).join(", ");
1045
+ expr = `(${fnOrExpr.toString()})(${serializedArgs})`;
1046
+ } else {
1047
+ expr = fnOrExpr;
1048
+ }
1049
+ return this.view.evaluate(expr);
1050
+ }
1051
+ async screenshot(opts) {
1052
+ const uint8 = await this.view.screenshot();
1053
+ return Buffer.from(uint8);
1054
+ }
1055
+ async click(selector, opts) {
1056
+ await this.view.click(selector, opts ? { button: opts.button } : undefined);
1057
+ }
1058
+ async type(selector, text, opts) {
1059
+ try {
1060
+ await this.view.click(selector);
1061
+ } catch {}
1062
+ await this.view.type(text);
1063
+ }
1064
+ async fill(selector, value) {
1065
+ await this.view.evaluate(`
1066
+ (() => {
1067
+ const el = document.querySelector(${JSON.stringify(selector)});
1068
+ if (el) { el.value = ''; el.dispatchEvent(new Event('input')); }
1069
+ })()
1070
+ `);
1071
+ await this.type(selector, value);
1072
+ }
1073
+ async press(key, opts) {
1074
+ await this.view.press(key, opts);
1075
+ }
1076
+ async scroll(direction, amount) {
1077
+ const dx = direction === "left" ? -amount : direction === "right" ? amount : 0;
1078
+ const dy = direction === "up" ? -amount : direction === "down" ? amount : 0;
1079
+ await this.view.scroll(dx, dy);
1080
+ }
1081
+ async scrollIntoView(selector) {
1082
+ await this.view.scrollTo(selector);
1083
+ }
1084
+ async hover(selector) {
1085
+ try {
1086
+ await this.view.scrollTo(selector);
1087
+ } catch {}
1088
+ }
1089
+ async resize(width, height) {
1090
+ await this.view.resize(width, height);
1091
+ }
1092
+ async $(selector) {
1093
+ const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
1094
+ if (!exists)
1095
+ return null;
1096
+ return {
1097
+ textContent: async () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`)
1098
+ };
1099
+ }
1100
+ async $$(selector) {
1101
+ const count = await this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)}).length`);
1102
+ return Array.from({ length: count }, (_, i) => ({
1103
+ textContent: async () => this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)})[${i}]?.textContent ?? null`)
1104
+ }));
1105
+ }
1106
+ async inputValue(selector) {
1107
+ return this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.value ?? ''`);
1108
+ }
1109
+ async isChecked(selector) {
1110
+ return this.view.evaluate(`!!(document.querySelector(${JSON.stringify(selector)})?.checked)`);
1111
+ }
1112
+ async isVisible(selector) {
1113
+ return this.view.evaluate(`
1114
+ (() => {
1115
+ const el = document.querySelector(${JSON.stringify(selector)});
1116
+ if (!el) return false;
1117
+ const style = window.getComputedStyle(el);
1118
+ return style.display !== 'none' && style.visibility !== 'hidden' && el.offsetWidth > 0;
1119
+ })()
1120
+ `);
1121
+ }
1122
+ async isEnabled(selector) {
1123
+ return this.view.evaluate(`!(document.querySelector(${JSON.stringify(selector)})?.disabled)`);
1124
+ }
1125
+ async selectOption(selector, value) {
1126
+ await this.view.evaluate(`
1127
+ (() => {
1128
+ const el = document.querySelector(${JSON.stringify(selector)});
1129
+ if (el) {
1130
+ el.value = ${JSON.stringify(value)};
1131
+ el.dispatchEvent(new Event('change'));
1132
+ }
1133
+ })()
1134
+ `);
1135
+ return [value];
1136
+ }
1137
+ async check(selector) {
1138
+ await this.view.evaluate(`
1139
+ (() => {
1140
+ const el = document.querySelector(${JSON.stringify(selector)});
1141
+ if (el && !el.checked) { el.checked = true; el.dispatchEvent(new Event('change')); }
1142
+ })()
1143
+ `);
1144
+ }
1145
+ async uncheck(selector) {
1146
+ await this.view.evaluate(`
1147
+ (() => {
1148
+ const el = document.querySelector(${JSON.stringify(selector)});
1149
+ if (el && el.checked) { el.checked = false; el.dispatchEvent(new Event('change')); }
1150
+ })()
1151
+ `);
1152
+ }
1153
+ async setInputFiles(selector, files) {
1154
+ throw new Error("File upload not supported in Bun.WebView engine. Use engine: 'playwright' instead.");
1155
+ }
1156
+ getByRole(role, opts) {
1157
+ const name = opts?.name?.toString() ?? "";
1158
+ const selector = name ? `[role="${role}"][aria-label*="${name}"], ${role}[aria-label*="${name}"]` : `[role="${role}"], ${role}`;
1159
+ return {
1160
+ click: (clickOpts) => this.click(selector, clickOpts),
1161
+ fill: (value) => this.fill(selector, value),
1162
+ check: () => this.check(selector),
1163
+ uncheck: () => this.uncheck(selector),
1164
+ isVisible: () => this.isVisible(selector),
1165
+ textContent: () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`),
1166
+ inputValue: () => this.inputValue(selector),
1167
+ first: () => ({
1168
+ click: (clickOpts) => this.click(selector, clickOpts),
1169
+ fill: (value) => this.fill(selector, value),
1170
+ textContent: () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`),
1171
+ isVisible: () => this.isVisible(selector),
1172
+ hover: () => this.hover(selector),
1173
+ boundingBox: async () => null,
1174
+ scrollIntoViewIfNeeded: () => this.scrollIntoView(selector),
1175
+ evaluate: (fn) => this.view.evaluate(`(${fn.toString()})(document.querySelector(${JSON.stringify(selector)}))`),
1176
+ waitFor: (opts2) => {
1177
+ return new Promise((resolve, reject) => {
1178
+ const timeout = opts2?.timeout ?? 1e4;
1179
+ const start = Date.now();
1180
+ const check = async () => {
1181
+ const visible = await this.isVisible(selector);
1182
+ if (visible)
1183
+ return resolve();
1184
+ if (Date.now() - start > timeout)
1185
+ return reject(new Error(`Timeout waiting for ${selector}`));
1186
+ setTimeout(check, 100);
1187
+ };
1188
+ check();
1189
+ });
1190
+ }
1191
+ }),
1192
+ count: async () => {
1193
+ const count = await this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)}).length`);
1194
+ return count;
1195
+ },
1196
+ nth: (n) => ({
1197
+ click: (clickOpts) => this.click(selector, clickOpts),
1198
+ textContent: () => this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)})[${n}]?.textContent ?? null`),
1199
+ isVisible: () => this.isVisible(selector)
1200
+ })
1201
+ };
1202
+ }
1203
+ getByText(text, opts) {
1204
+ const selector = opts?.exact ? `*:is(button, a, span, div, p, h1, h2, h3, h4, label)` : "*";
1205
+ return {
1206
+ first: () => ({
1207
+ click: async (clickOpts) => {
1208
+ await this.view.evaluate(`
1209
+ (() => {
1210
+ const text = ${JSON.stringify(text)};
1211
+ const all = document.querySelectorAll('*');
1212
+ for (const el of all) {
1213
+ if (el.children.length === 0 && el.textContent?.trim() === text) {
1214
+ el.click(); return;
1215
+ }
1216
+ }
1217
+ for (const el of all) {
1218
+ if (el.textContent?.includes(text)) { el.click(); return; }
1219
+ }
1220
+ })()
1221
+ `);
1222
+ },
1223
+ waitFor: (waitOpts) => {
1224
+ const timeout = waitOpts?.timeout ?? 1e4;
1225
+ return new Promise((resolve, reject) => {
1226
+ const start = Date.now();
1227
+ const check = async () => {
1228
+ const found = await this.view.evaluate(`document.body?.textContent?.includes(${JSON.stringify(text)})`);
1229
+ if (found)
1230
+ return resolve();
1231
+ if (Date.now() - start > timeout)
1232
+ return reject(new Error(`Timeout: text "${text}" not found`));
1233
+ setTimeout(check, 100);
1234
+ };
1235
+ check();
1236
+ });
1237
+ }
1238
+ })
1239
+ };
1240
+ }
1241
+ locator(selector) {
1242
+ return {
1243
+ click: (opts) => this.click(selector, opts),
1244
+ fill: (value) => this.fill(selector, value),
1245
+ scrollIntoViewIfNeeded: () => this.scrollIntoView(selector),
1246
+ first: () => this.getByRole("*").first(),
1247
+ evaluate: (fn) => this.view.evaluate(`(${fn.toString()})(document.querySelector(${JSON.stringify(selector)}))`),
1248
+ waitFor: (opts) => {
1249
+ const timeout = opts?.timeout ?? 1e4;
1250
+ return new Promise((resolve, reject) => {
1251
+ const start = Date.now();
1252
+ const check = async () => {
1253
+ const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
1254
+ if (exists)
1255
+ return resolve();
1256
+ if (Date.now() - start > timeout)
1257
+ return reject(new Error(`Timeout: ${selector}`));
1258
+ setTimeout(check, 100);
1259
+ };
1260
+ check();
1261
+ });
1262
+ }
1263
+ };
1264
+ }
1265
+ url() {
1266
+ return this.view.url;
1267
+ }
1268
+ async title() {
1269
+ return this.view.title || await this.evaluate("document.title");
1270
+ }
1271
+ viewportSize() {
1272
+ return { width: 1280, height: 720 };
1273
+ }
1274
+ async waitForLoadState(state, opts) {
1275
+ await new Promise((r) => setTimeout(r, 200));
1276
+ }
1277
+ async waitForURL(pattern, opts) {
1278
+ const timeout = opts?.timeout ?? 30000;
1279
+ const start = Date.now();
1280
+ while (Date.now() - start < timeout) {
1281
+ const url = this.view.url;
1282
+ const matches = pattern instanceof RegExp ? pattern.test(url) : url.includes(pattern);
1283
+ if (matches)
1284
+ return;
1285
+ await new Promise((r) => setTimeout(r, 100));
1286
+ }
1287
+ throw new Error(`Timeout waiting for URL to match ${pattern}`);
1288
+ }
1289
+ async waitForSelector(selector, opts) {
1290
+ const timeout = opts?.timeout ?? 1e4;
1291
+ const start = Date.now();
1292
+ while (Date.now() - start < timeout) {
1293
+ const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
1294
+ if (exists)
1295
+ return;
1296
+ await new Promise((r) => setTimeout(r, 100));
1297
+ }
1298
+ throw new Error(`Timeout waiting for ${selector}`);
1299
+ }
1300
+ async setContent(html) {
1301
+ await this.view.navigate(`data:text/html,${encodeURIComponent(html)}`);
1302
+ await new Promise((r) => setTimeout(r, 100));
1303
+ }
1304
+ async content() {
1305
+ return this.view.evaluate("document.documentElement.outerHTML");
1306
+ }
1307
+ async addInitScript(script) {
1308
+ const expr = typeof script === "function" ? `(${script.toString()})()` : script;
1309
+ await this.view.evaluate(expr);
1310
+ }
1311
+ keyboard = {
1312
+ press: (key) => this.view.press(key)
1313
+ };
1314
+ context() {
1315
+ return {
1316
+ close: async () => {
1317
+ await this.close();
1318
+ },
1319
+ newPage: async () => {
1320
+ throw new Error("Multi-tab not supported in Bun.WebView. Use engine: 'playwright'");
1321
+ },
1322
+ cookies: async () => [],
1323
+ addCookies: async (_) => {},
1324
+ clearCookies: async () => {},
1325
+ newCDPSession: async () => {
1326
+ throw new Error("CDP session via context not available in Bun.WebView. Use view.cdp() when shipped.");
1327
+ },
1328
+ route: async (_pattern, _handler) => {
1329
+ throw new Error("Network interception not supported in Bun.WebView. Use engine: 'cdp' or 'playwright'.");
1330
+ },
1331
+ unrouteAll: async () => {},
1332
+ pages: () => [],
1333
+ addInitScript: async (script) => {
1334
+ await this.addInitScript(script);
1335
+ }
1336
+ };
1337
+ }
1338
+ on(event, handler) {
1339
+ if (!this._eventListeners.has(event))
1340
+ this._eventListeners.set(event, []);
1341
+ this._eventListeners.get(event).push(handler);
1342
+ return this;
1343
+ }
1344
+ off(event, handler) {
1345
+ const listeners = this._eventListeners.get(event) ?? [];
1346
+ this._eventListeners.set(event, listeners.filter((l) => l !== handler));
1347
+ return this;
1348
+ }
1349
+ _emit(event, ...args) {
1350
+ for (const handler of this._eventListeners.get(event) ?? []) {
1351
+ try {
1352
+ handler(...args);
1353
+ } catch {}
1354
+ }
1355
+ }
1356
+ async pdf(_opts) {
1357
+ throw new Error("PDF generation not supported in Bun.WebView. Use engine: 'playwright'.");
1358
+ }
1359
+ coverage = {
1360
+ startJSCoverage: async () => {},
1361
+ stopJSCoverage: async () => [],
1362
+ startCSSCoverage: async () => {},
1363
+ stopCSSCoverage: async () => []
1364
+ };
1365
+ setSessionId(id) {
1366
+ this._sessionId = id;
1367
+ }
1368
+ getSessionId() {
1369
+ return this._sessionId;
1370
+ }
1371
+ getNativeView() {
1372
+ return this.view;
1373
+ }
1374
+ async close() {
1375
+ try {
1376
+ await this.view.close();
1377
+ } catch {}
1378
+ }
1379
+ [Symbol.asyncDispose]() {
1380
+ return this.close();
1381
+ }
1382
+ };
1383
+ });
1384
+
839
1385
  // src/lib/browser.ts
840
1386
  var exports_browser = {};
841
1387
  __export(exports_browser, {
@@ -857,6 +1403,22 @@ async function launchBrowser(options) {
857
1403
  }
858
1404
  return launchLightpanda2({ viewport: options?.viewport });
859
1405
  }
1406
+ if (engine === "bun") {
1407
+ const { isBunWebViewAvailable: isBunWebViewAvailable2, BunWebViewSession: BunWebViewSession2 } = await Promise.resolve().then(() => (init_browser_bun(), exports_browser_bun));
1408
+ if (!isBunWebViewAvailable2()) {
1409
+ throw new BrowserError("Bun.WebView not available. Upgrade to Bun canary: bun upgrade --canary");
1410
+ }
1411
+ const session = new BunWebViewSession2({
1412
+ width: options?.viewport?.width ?? 1280,
1413
+ height: options?.viewport?.height ?? 720
1414
+ });
1415
+ return {
1416
+ newContext: async () => ({ newPage: async () => session, close: async () => {} }),
1417
+ close: async () => session.close(),
1418
+ contexts: () => [],
1419
+ _bunSession: session
1420
+ };
1421
+ }
860
1422
  const headless = options?.headless ?? true;
861
1423
  const viewport = options?.viewport ?? DEFAULT_VIEWPORT;
862
1424
  try {
@@ -878,6 +1440,12 @@ async function getPage(browser, options) {
878
1440
  const { getLightpandaPage: getLightpandaPage2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda));
879
1441
  return getLightpandaPage2(browser, options);
880
1442
  }
1443
+ if (engine === "bun") {
1444
+ const bunSession = browser._bunSession;
1445
+ if (bunSession)
1446
+ return bunSession;
1447
+ throw new BrowserError("Bun.WebView session not found on browser instance");
1448
+ }
881
1449
  const viewport = options?.viewport ?? DEFAULT_VIEWPORT;
882
1450
  try {
883
1451
  const context = await browser.newContext({
@@ -897,6 +1465,12 @@ async function closeBrowser(browser, engine) {
897
1465
  const { closeLightpanda: closeLightpanda2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda));
898
1466
  return closeLightpanda2(browser);
899
1467
  }
1468
+ if (engine === "bun") {
1469
+ const bunSession = browser._bunSession;
1470
+ if (bunSession)
1471
+ await bunSession.close();
1472
+ return;
1473
+ }
900
1474
  try {
901
1475
  await browser.close();
902
1476
  } catch (error) {
@@ -966,6 +1540,9 @@ async function launchBrowserEngine(engine, config) {
966
1540
  }
967
1541
  return launchLightpanda2({ viewport: config.viewport });
968
1542
  }
1543
+ if (engine === "bun") {
1544
+ return launchBrowser({ headless: config.headless, viewport: config.viewport, engine: "bun" });
1545
+ }
969
1546
  return chromium2.launch({
970
1547
  headless: config.headless,
971
1548
  args: ["--no-sandbox", "--disable-setuid-sandbox"]
@@ -1100,8 +1677,8 @@ async function scanA11y(options) {
1100
1677
  var AXE_CDN = "https://cdn.jsdelivr.net/npm/axe-core@4/axe.min.js";
1101
1678
 
1102
1679
  // src/lib/config.ts
1103
- import { homedir as homedir2 } from "os";
1104
- import { join as join2 } from "path";
1680
+ import { homedir as homedir3 } from "os";
1681
+ import { join as join3 } from "path";
1105
1682
  import { readFileSync, existsSync as existsSync2 } from "fs";
1106
1683
  function getDefaultConfig() {
1107
1684
  return {
@@ -1113,7 +1690,7 @@ function getDefaultConfig() {
1113
1690
  timeout: 60000
1114
1691
  },
1115
1692
  screenshots: {
1116
- dir: join2(homedir2(), ".testers", "screenshots"),
1693
+ dir: join3(homedir3(), ".testers", "screenshots"),
1117
1694
  format: "png",
1118
1695
  quality: 90,
1119
1696
  fullPage: false
@@ -1139,7 +1716,8 @@ function loadConfig() {
1139
1716
  todosDbPath: fileConfig.todosDbPath,
1140
1717
  judgeModel: fileConfig.judgeModel,
1141
1718
  judgeProvider: fileConfig.judgeProvider,
1142
- selfHeal: fileConfig.selfHeal ?? false
1719
+ selfHeal: fileConfig.selfHeal ?? false,
1720
+ conversationsSpace: fileConfig.conversationsSpace
1143
1721
  };
1144
1722
  const envModel = process.env["TESTERS_MODEL"];
1145
1723
  if (envModel) {
@@ -1158,8 +1736,8 @@ function loadConfig() {
1158
1736
  var CONFIG_DIR, CONFIG_PATH;
1159
1737
  var init_config = __esm(() => {
1160
1738
  init_types();
1161
- CONFIG_DIR = join2(homedir2(), ".testers");
1162
- CONFIG_PATH = join2(CONFIG_DIR, "config.json");
1739
+ CONFIG_DIR = join3(homedir3(), ".testers");
1740
+ CONFIG_PATH = join3(CONFIG_DIR, "config.json");
1163
1741
  });
1164
1742
 
1165
1743
  // src/lib/healer.ts
@@ -1590,6 +2168,8 @@ async function runAgentLoop(options) {
1590
2168
  Instructions: ${persona.instructions}` : "",
1591
2169
  persona.traits.length > 0 ? `Traits: ${persona.traits.join(", ")}` : "",
1592
2170
  persona.goals.length > 0 ? `Goals: ${persona.goals.join("; ")}` : "",
2171
+ persona.behaviors && persona.behaviors.length > 0 ? `Behaviors: ${persona.behaviors.join("; ")}` : "",
2172
+ persona.painPoints && persona.painPoints.length > 0 ? `Pain points: ${persona.painPoints.join("; ")}` : "",
1593
2173
  "",
1594
2174
  "Stay in character throughout the test. Your observations, choices, and priorities should reflect this persona."
1595
2175
  ].filter(Boolean).join(`
@@ -1640,6 +2220,15 @@ Instructions: ${persona.instructions}` : "",
1640
2220
  const isOpenAICompat = "provider" in client;
1641
2221
  try {
1642
2222
  for (let turn = 0;turn < maxTurns; turn++) {
2223
+ if (persona && turn > 0 && turn % 5 === 0) {
2224
+ messages = [
2225
+ ...messages,
2226
+ {
2227
+ role: "user",
2228
+ content: `[Reminder: You are ${persona.name} \u2014 ${persona.role}. Traits: ${persona.traits.join(", ")}. Stay in character.]`
2229
+ }
2230
+ ];
2231
+ }
1643
2232
  const response = isOpenAICompat ? await callOpenAICompatible({
1644
2233
  baseUrl: client.baseUrl,
1645
2234
  apiKey: client.apiKey,
@@ -1734,6 +2323,8 @@ function detectProvider(model) {
1734
2323
  return "openai";
1735
2324
  if (model.startsWith("gemini-"))
1736
2325
  return "google";
2326
+ if (model.startsWith("llama-") || model.startsWith("qwen-") || model.includes("cerebras"))
2327
+ return "cerebras";
1737
2328
  return "anthropic";
1738
2329
  }
1739
2330
  function createClient(apiKey) {
@@ -1827,6 +2418,12 @@ function createClientForModel(model, apiKey) {
1827
2418
  throw new AIClientError("No Google API key. Set GOOGLE_API_KEY or pass it explicitly.");
1828
2419
  return { provider: "google", baseUrl: "https://generativelanguage.googleapis.com/v1beta/openai", apiKey: key };
1829
2420
  }
2421
+ if (provider === "cerebras") {
2422
+ const key = apiKey ?? process.env["CEREBRAS_API_KEY"];
2423
+ if (!key)
2424
+ throw new AIClientError("No Cerebras API key. Set CEREBRAS_API_KEY or pass it explicitly.");
2425
+ return { provider: "cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKey: key };
2426
+ }
1830
2427
  return createClient(apiKey);
1831
2428
  }
1832
2429
  var BROWSER_TOOLS;
@@ -2289,6 +2886,106 @@ var init_flows = __esm(() => {
2289
2886
  init_types();
2290
2887
  });
2291
2888
 
2889
+ // src/lib/failure-explainer.ts
2890
+ var exports_failure_explainer = {};
2891
+ __export(exports_failure_explainer, {
2892
+ explainFailure: () => explainFailure
2893
+ });
2894
+ function detectType(error, reasoning, existingAnalysis) {
2895
+ if (existingAnalysis?.type && existingAnalysis.type !== "unknown") {
2896
+ return existingAnalysis.type;
2897
+ }
2898
+ const text = [error, reasoning].filter(Boolean).join(" ");
2899
+ for (const { pattern, type } of TYPE_PATTERNS) {
2900
+ if (pattern.test(text))
2901
+ return type;
2902
+ }
2903
+ return "unknown";
2904
+ }
2905
+ function extractAffectedElement(error, existingAnalysis) {
2906
+ if (existingAnalysis?.affectedElement)
2907
+ return existingAnalysis.affectedElement;
2908
+ if (!error)
2909
+ return;
2910
+ const match = error.match(/selector[:\s]+['"`]?([^'"`\s,]+)['"`]?/i) ?? error.match(/element[:\s]+['"`]?([^'"`\s,]+)['"`]?/i);
2911
+ return match?.[1];
2912
+ }
2913
+ function explainFailure(resultId) {
2914
+ const result = getResult(resultId);
2915
+ if (!result)
2916
+ throw new Error(`Result not found: ${resultId}`);
2917
+ if (result.status === "passed") {
2918
+ return {
2919
+ resultId,
2920
+ type: "unknown",
2921
+ summary: "This result passed \u2014 no failure to explain.",
2922
+ likelyCause: "N/A",
2923
+ suggestedFix: "N/A",
2924
+ confidence: 1,
2925
+ raw: { error: null, reasoning: result.reasoning, failureAnalysis: null }
2926
+ };
2927
+ }
2928
+ const type = detectType(result.error, result.reasoning, result.failureAnalysis);
2929
+ const affectedElement = extractAffectedElement(result.error, result.failureAnalysis);
2930
+ const existingAnalysis = result.failureAnalysis;
2931
+ const confidenceMap = {
2932
+ selector_not_found: 0.9,
2933
+ timeout: 0.85,
2934
+ auth_error: 0.8,
2935
+ network_error: 0.85,
2936
+ assertion_failed: 0.75,
2937
+ eval_failed: 0.7,
2938
+ unknown: 0.4
2939
+ };
2940
+ const confidence = existingAnalysis?.confidence === "high" ? 0.9 : existingAnalysis?.confidence === "medium" ? 0.7 : existingAnalysis?.confidence === "low" ? 0.5 : confidenceMap[type] ?? 0.5;
2941
+ const errorSnippet = result.error ? result.error.slice(0, 200) : "(no error message)";
2942
+ const summary = `${type.replace(/_/g, " ")} in result ${resultId.slice(0, 8)}${affectedElement ? ` \u2014 element: ${affectedElement}` : ""}. Error: ${errorSnippet}`;
2943
+ return {
2944
+ resultId,
2945
+ type,
2946
+ summary,
2947
+ likelyCause: LIKELY_CAUSES[type],
2948
+ suggestedFix: affectedElement && type === "selector_not_found" ? `Update selector "${affectedElement}". Use get_elements or get_page_html to find the new selector.` : SUGGESTED_FIXES[type],
2949
+ affectedElement,
2950
+ confidence,
2951
+ raw: {
2952
+ error: result.error,
2953
+ reasoning: result.reasoning,
2954
+ failureAnalysis: result.failureAnalysis
2955
+ }
2956
+ };
2957
+ }
2958
+ var TYPE_PATTERNS, SUGGESTED_FIXES, LIKELY_CAUSES;
2959
+ var init_failure_explainer = __esm(() => {
2960
+ init_results();
2961
+ TYPE_PATTERNS = [
2962
+ { pattern: /not found|no element|waiting for selector|selector.*not found/i, type: "selector_not_found" },
2963
+ { pattern: /timeout|timed out|exceeded.*ms/i, type: "timeout" },
2964
+ { pattern: /auth|login|unauthorized|403|forbidden|credentials/i, type: "auth_error" },
2965
+ { pattern: /network|fetch|ERR_|ECONNREFUSED|ENOTFOUND|request failed/i, type: "network_error" },
2966
+ { pattern: /assert|expected.*got|does not contain|mismatch/i, type: "assertion_failed" },
2967
+ { pattern: /eval|eval.*failed|score/i, type: "eval_failed" }
2968
+ ];
2969
+ SUGGESTED_FIXES = {
2970
+ selector_not_found: "Update the CSS selector. The element may have been renamed, moved, or removed. Use get_elements to discover the current selector.",
2971
+ assertion_failed: "Verify the expected value matches the current app behavior. The UI text or state may have changed.",
2972
+ timeout: "Increase the scenario timeout or check if the app is responding slowly. Try reducing the number of steps.",
2973
+ auth_error: "Verify the auth credentials and login flow. Check if the session is being properly established.",
2974
+ network_error: "Check if the app is running and accessible at the target URL. Verify network connectivity.",
2975
+ eval_failed: "Review the evaluation criteria. The AI may need clearer pass/fail conditions.",
2976
+ unknown: "Review the full error message and reasoning. Check the app logs for more context."
2977
+ };
2978
+ LIKELY_CAUSES = {
2979
+ selector_not_found: "A CSS selector could not locate the target element \u2014 the element may not exist, be hidden, or the selector may be stale.",
2980
+ assertion_failed: "An assertion check did not match the expected value \u2014 the app output differs from what the scenario expects.",
2981
+ timeout: "The scenario exceeded the time limit \u2014 either the app is slow or the test is waiting for an element that never appears.",
2982
+ auth_error: "Authentication failed or the session was not established \u2014 the test could not access protected content.",
2983
+ network_error: "A network request failed \u2014 the app may be unreachable or returning unexpected errors.",
2984
+ eval_failed: "The AI evaluator could not determine a pass/fail verdict with sufficient confidence.",
2985
+ unknown: "The failure cause is unclear from the available error data."
2986
+ };
2987
+ });
2988
+
2292
2989
  // src/db/scan-issues.ts
2293
2990
  var exports_scan_issues = {};
2294
2991
  __export(exports_scan_issues, {
@@ -2387,8 +3084,8 @@ var init_scan_issues = __esm(() => {
2387
3084
 
2388
3085
  // src/server/index.ts
2389
3086
  import { existsSync as existsSync5 } from "fs";
2390
- import { join as join5 } from "path";
2391
- import { homedir as homedir5 } from "os";
3087
+ import { join as join6 } from "path";
3088
+ import { homedir as homedir6 } from "os";
2392
3089
 
2393
3090
  // node_modules/zod/v3/external.js
2394
3091
  var exports_external = {};
@@ -6450,7 +7147,32 @@ function listScenarios(filter) {
6450
7147
  params.push(filter.offset);
6451
7148
  }
6452
7149
  const rows = db2.query(sql).all(...params);
6453
- return rows.map(scenarioFromRow);
7150
+ const scenarios = rows.map(scenarioFromRow);
7151
+ if (scenarios.length === 0)
7152
+ return scenarios;
7153
+ const scenarioIds = scenarios.map((s) => s.id);
7154
+ const placeholders = scenarioIds.map(() => "?").join(",");
7155
+ const statsRows = db2.query(`
7156
+ SELECT scenario_id,
7157
+ COUNT(*) as total,
7158
+ SUM(CASE WHEN status = 'passed' THEN 1 ELSE 0 END) as passed
7159
+ FROM (
7160
+ SELECT scenario_id, status
7161
+ FROM results
7162
+ WHERE scenario_id IN (${placeholders})
7163
+ ORDER BY created_at DESC
7164
+ )
7165
+ GROUP BY scenario_id
7166
+ `).all(...scenarioIds);
7167
+ const statsMap = new Map(statsRows.map((r) => [r.scenario_id, r]));
7168
+ return scenarios.map((s) => {
7169
+ const stats = statsMap.get(s.id);
7170
+ return {
7171
+ ...s,
7172
+ flakinessScore: stats ? stats.passed / stats.total : null,
7173
+ recentRunCount: stats?.total ?? 0
7174
+ };
7175
+ });
6454
7176
  }
6455
7177
  function updateScenario(id, input, version) {
6456
7178
  const db2 = getDatabase();
@@ -6555,6 +7277,10 @@ function countScenarios(filter) {
6555
7277
  const row = db2.query(sql).get(...params);
6556
7278
  return row.count;
6557
7279
  }
7280
+ function updateScenarioPassedCache(id, url) {
7281
+ const db2 = getDatabase();
7282
+ db2.query("UPDATE scenarios SET last_passed_at = ?, last_passed_url = ? WHERE id = ?").run(now(), url, id);
7283
+ }
6558
7284
  function deleteScenario(id) {
6559
7285
  const db2 = getDatabase();
6560
7286
  const scenario = getScenario(id);
@@ -6703,92 +7429,8 @@ function updateRun(id, updates) {
6703
7429
  return getRun(existing.id);
6704
7430
  }
6705
7431
 
6706
- // src/db/results.ts
6707
- init_types();
6708
- init_database();
6709
- function createResult(input) {
6710
- const db2 = getDatabase();
6711
- const id = uuid();
6712
- const timestamp = now();
6713
- db2.query(`
6714
- INSERT INTO results (id, run_id, scenario_id, status, reasoning, error, steps_completed, steps_total, duration_ms, model, tokens_used, cost_cents, metadata, created_at, persona_id, persona_name)
6715
- VALUES (?, ?, ?, 'skipped', NULL, NULL, 0, ?, 0, ?, 0, 0, '{}', ?, ?, ?)
6716
- `).run(id, input.runId, input.scenarioId, input.stepsTotal, input.model, timestamp, input.personaId ?? null, input.personaName ?? null);
6717
- return getResult(id);
6718
- }
6719
- function getResult(id) {
6720
- const db2 = getDatabase();
6721
- let row = db2.query("SELECT * FROM results WHERE id = ?").get(id);
6722
- if (row)
6723
- return resultFromRow(row);
6724
- const fullId = resolvePartialId("results", id);
6725
- if (fullId) {
6726
- row = db2.query("SELECT * FROM results WHERE id = ?").get(fullId);
6727
- if (row)
6728
- return resultFromRow(row);
6729
- }
6730
- return null;
6731
- }
6732
- function listResults(runId) {
6733
- const db2 = getDatabase();
6734
- const rows = db2.query("SELECT * FROM results WHERE run_id = ? ORDER BY created_at ASC").all(runId);
6735
- return rows.map(resultFromRow);
6736
- }
6737
- function updateResult(id, updates) {
6738
- const db2 = getDatabase();
6739
- const existing = getResult(id);
6740
- if (!existing) {
6741
- throw new Error(`Result not found: ${id}`);
6742
- }
6743
- const sets = [];
6744
- const params = [];
6745
- if (updates.status !== undefined) {
6746
- sets.push("status = ?");
6747
- params.push(updates.status);
6748
- }
6749
- if (updates.reasoning !== undefined) {
6750
- sets.push("reasoning = ?");
6751
- params.push(updates.reasoning);
6752
- }
6753
- if (updates.error !== undefined) {
6754
- sets.push("error = ?");
6755
- params.push(updates.error);
6756
- }
6757
- if (updates.stepsCompleted !== undefined) {
6758
- sets.push("steps_completed = ?");
6759
- params.push(updates.stepsCompleted);
6760
- }
6761
- if (updates.durationMs !== undefined) {
6762
- sets.push("duration_ms = ?");
6763
- params.push(updates.durationMs);
6764
- }
6765
- if (updates.tokensUsed !== undefined) {
6766
- sets.push("tokens_used = ?");
6767
- params.push(updates.tokensUsed);
6768
- }
6769
- if (updates.costCents !== undefined) {
6770
- sets.push("cost_cents = ?");
6771
- params.push(updates.costCents);
6772
- }
6773
- if (updates.metadata !== undefined) {
6774
- sets.push("metadata = ?");
6775
- params.push(JSON.stringify(updates.metadata));
6776
- }
6777
- if (sets.length === 0) {
6778
- return existing;
6779
- }
6780
- params.push(existing.id);
6781
- db2.query(`UPDATE results SET ${sets.join(", ")} WHERE id = ?`).run(...params);
6782
- return getResult(existing.id);
6783
- }
6784
- function getResultsByRun(runId) {
6785
- return listResults(runId);
6786
- }
6787
- function countResultsByRun(runId) {
6788
- const db2 = getDatabase();
6789
- const row = db2.query("SELECT COUNT(*) as count FROM results WHERE run_id = ?").get(runId);
6790
- return row.count;
6791
- }
7432
+ // src/server/index.ts
7433
+ init_results();
6792
7434
 
6793
7435
  // src/db/screenshots.ts
6794
7436
  init_types();
@@ -6819,6 +7461,12 @@ function countScreenshots(resultId) {
6819
7461
  return row.count;
6820
7462
  }
6821
7463
 
7464
+ // src/lib/runner.ts
7465
+ init_types();
7466
+
7467
+ // src/lib/eval-runner.ts
7468
+ init_results();
7469
+
6822
7470
  // src/lib/judge.ts
6823
7471
  init_ai_client();
6824
7472
  init_types();
@@ -6879,11 +7527,13 @@ function resolveJudgeModel(config) {
6879
7527
  apiKey = process.env["OPENAI_API_KEY"];
6880
7528
  else if (provider === "google")
6881
7529
  apiKey = process.env["GOOGLE_API_KEY"];
7530
+ else if (provider === "cerebras")
7531
+ apiKey = process.env["CEREBRAS_API_KEY"];
6882
7532
  }
6883
7533
  if (!apiKey) {
6884
- apiKey = process.env["ANTHROPIC_API_KEY"] ?? process.env["OPENAI_API_KEY"] ?? process.env["GOOGLE_API_KEY"] ?? globalConfig.anthropicApiKey;
7534
+ apiKey = process.env["ANTHROPIC_API_KEY"] ?? process.env["CEREBRAS_API_KEY"] ?? process.env["OPENAI_API_KEY"] ?? process.env["GOOGLE_API_KEY"] ?? globalConfig.anthropicApiKey;
6885
7535
  if (!apiKey)
6886
- throw new AIClientError("No API key found for judge. Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or GOOGLE_API_KEY.");
7536
+ throw new AIClientError("No API key found for judge. Set ANTHROPIC_API_KEY, CEREBRAS_API_KEY, OPENAI_API_KEY, or GOOGLE_API_KEY.");
6887
7537
  }
6888
7538
  return { model, provider, apiKey };
6889
7539
  }
@@ -6898,8 +7548,8 @@ reason: 1-2 sentences max`;
6898
7548
  async function callJudge(prompt, config) {
6899
7549
  const { model, provider, apiKey } = resolveJudgeModel(config);
6900
7550
  const threshold = 0.7;
6901
- if (provider === "openai" || provider === "google") {
6902
- const baseUrl = provider === "openai" ? "https://api.openai.com/v1" : "https://generativelanguage.googleapis.com/v1beta/openai";
7551
+ if (provider === "openai" || provider === "google" || provider === "cerebras") {
7552
+ const baseUrl = provider === "openai" ? "https://api.openai.com/v1" : provider === "cerebras" ? "https://api.cerebras.ai/v1" : "https://generativelanguage.googleapis.com/v1beta/openai";
6903
7553
  const resp2 = await callOpenAICompatible({
6904
7554
  baseUrl,
6905
7555
  apiKey,
@@ -7301,6 +7951,130 @@ async function runPipelineScenario(scenario, options) {
7301
7951
  });
7302
7952
  }
7303
7953
 
7954
+ // src/lib/runner.ts
7955
+ init_results();
7956
+
7957
+ // src/lib/failure-analyzer.ts
7958
+ function analyzeFailure(error, reasoning) {
7959
+ const combinedText = [error, reasoning].filter(Boolean).join(" ");
7960
+ if (!combinedText.trim())
7961
+ return null;
7962
+ const errorText = error ?? "";
7963
+ const reasoningText = reasoning ?? "";
7964
+ if (/waiting for selector/i.test(errorText) || /not found/i.test(errorText) || /No element/i.test(errorText) || /waiting for selector/i.test(reasoningText) || /could not find element/i.test(reasoningText) || /element not found/i.test(reasoningText)) {
7965
+ const selectorMatch = errorText.match(/'([^']+)'/) ?? reasoningText.match(/'([^']+)'/);
7966
+ const affectedElement = selectorMatch ? selectorMatch[1] : undefined;
7967
+ const stepMatch = reasoningText.match(/step\s+(\d+)/i);
7968
+ const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
7969
+ return {
7970
+ type: "selector_not_found",
7971
+ affectedElement,
7972
+ stepNumber,
7973
+ confidence: affectedElement ? "high" : "medium"
7974
+ };
7975
+ }
7976
+ if (/assert/i.test(errorText) || /expected/i.test(errorText) || /to equal/i.test(errorText) || /to be/i.test(errorText) || /\bgot\b/.test(errorText) || /assertion.*failed/i.test(reasoningText) || /expected.*but.*got/i.test(reasoningText)) {
7977
+ const expectedActualMatch = errorText.match(/expected[:\s]+(['"]?)([^'"]+)\1[,\s]+(?:got|received|actual)[:\s]+(['"]?)([^'"]+)\3/i);
7978
+ const toEqualMatch = errorText.match(/expected[:\s]+(['"]?)([^'"]+)\1\s+to\s+equal\s+(['"]?)([^'"]+)\3/i);
7979
+ let expected;
7980
+ let actual;
7981
+ if (expectedActualMatch) {
7982
+ expected = expectedActualMatch[2];
7983
+ actual = expectedActualMatch[4];
7984
+ } else if (toEqualMatch) {
7985
+ expected = toEqualMatch[4];
7986
+ actual = toEqualMatch[2];
7987
+ }
7988
+ const stepMatch = reasoningText.match(/step\s+(\d+)/i);
7989
+ const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
7990
+ return {
7991
+ type: "assertion_failed",
7992
+ expected,
7993
+ actual,
7994
+ stepNumber,
7995
+ confidence: expected && actual ? "high" : "medium"
7996
+ };
7997
+ }
7998
+ if (/timeout/i.test(errorText) || /timed out/i.test(errorText) || /Timeout/i.test(reasoningText) || /timed out/i.test(reasoningText)) {
7999
+ const stepMatch = reasoningText.match(/step\s+(\d+)/i);
8000
+ const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
8001
+ return {
8002
+ type: "timeout",
8003
+ stepNumber,
8004
+ confidence: "high"
8005
+ };
8006
+ }
8007
+ if (/\b401\b/.test(errorText) || /\b403\b/.test(errorText) || /login/i.test(errorText) || /unauthorized/i.test(errorText) || /\bauth\b/i.test(errorText) || /\b401\b/.test(reasoningText) || /\b403\b/.test(reasoningText) || /unauthorized/i.test(reasoningText) || /authentication/i.test(reasoningText)) {
8008
+ return {
8009
+ type: "auth_error",
8010
+ confidence: "high"
8011
+ };
8012
+ }
8013
+ if (/ECONNREFUSED/i.test(errorText) || /ENOTFOUND/i.test(errorText) || /fetch failed/i.test(errorText) || /network/i.test(errorText) || /ECONNREFUSED/i.test(reasoningText) || /fetch failed/i.test(reasoningText) || /connection refused/i.test(reasoningText)) {
8014
+ return {
8015
+ type: "network_error",
8016
+ confidence: "high"
8017
+ };
8018
+ }
8019
+ if (/\beval\b/i.test(errorText) || /evaluate/i.test(errorText) || /\bscript\b/i.test(errorText) || /\beval\b/i.test(reasoningText) || /evaluate/i.test(reasoningText)) {
8020
+ return {
8021
+ type: "eval_failed",
8022
+ confidence: "medium"
8023
+ };
8024
+ }
8025
+ return {
8026
+ type: "unknown",
8027
+ confidence: "low"
8028
+ };
8029
+ }
8030
+
8031
+ // src/lib/costs.ts
8032
+ init_database();
8033
+ init_config();
8034
+ var COST_PER_SCENARIO_CENTS = {
8035
+ haiku: 5,
8036
+ sonnet: 30,
8037
+ opus: 150,
8038
+ "claude-haiku": 5,
8039
+ "claude-sonnet": 30,
8040
+ "claude-opus": 150,
8041
+ "gpt-4o-mini": 3,
8042
+ "gpt-4o": 25,
8043
+ "gemini-2.0-flash": 2,
8044
+ "gemini-1.5-pro": 20,
8045
+ "llama-3.1-8b": 1,
8046
+ "llama-3.3-70b": 3
8047
+ };
8048
+ function modelToCostKey(model) {
8049
+ const exact = COST_PER_SCENARIO_CENTS[model];
8050
+ if (exact !== undefined)
8051
+ return exact;
8052
+ const lower = model.toLowerCase();
8053
+ if (lower.includes("opus"))
8054
+ return COST_PER_SCENARIO_CENTS["opus"];
8055
+ if (lower.includes("sonnet"))
8056
+ return COST_PER_SCENARIO_CENTS["sonnet"];
8057
+ if (lower.includes("haiku"))
8058
+ return COST_PER_SCENARIO_CENTS["haiku"];
8059
+ if (lower.includes("gpt-4o-mini"))
8060
+ return COST_PER_SCENARIO_CENTS["gpt-4o-mini"];
8061
+ if (lower.includes("gpt-4o"))
8062
+ return COST_PER_SCENARIO_CENTS["gpt-4o"];
8063
+ if (lower.includes("gemini-2.0-flash") || lower.includes("gemini-flash"))
8064
+ return COST_PER_SCENARIO_CENTS["gemini-2.0-flash"];
8065
+ if (lower.includes("gemini-1.5-pro") || lower.includes("gemini-pro"))
8066
+ return COST_PER_SCENARIO_CENTS["gemini-1.5-pro"];
8067
+ if (lower.includes("llama-3.3") || lower.includes("llama3.3"))
8068
+ return COST_PER_SCENARIO_CENTS["llama-3.3-70b"];
8069
+ if (lower.includes("llama"))
8070
+ return COST_PER_SCENARIO_CENTS["llama-3.1-8b"];
8071
+ return 10;
8072
+ }
8073
+ function estimateRunCostCents(scenarioCount, model, samples = 1) {
8074
+ const costPerScenario = modelToCostKey(model);
8075
+ return scenarioCount * costPerScenario * Math.max(1, samples);
8076
+ }
8077
+
7304
8078
  // src/db/personas.ts
7305
8079
  init_types();
7306
8080
  init_database();
@@ -7310,9 +8084,9 @@ function createPersona(input) {
7310
8084
  const short_id = shortUuid();
7311
8085
  const timestamp = now();
7312
8086
  db2.query(`
7313
- INSERT INTO personas (id, short_id, project_id, name, description, role, instructions, traits, goals, metadata, enabled, version, created_at, updated_at)
7314
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1, ?, ?)
7315
- `).run(id, short_id, input.projectId ?? null, input.name, input.description ?? "", input.role, input.instructions ?? "", JSON.stringify(input.traits ?? []), JSON.stringify(input.goals ?? []), input.metadata ? JSON.stringify(input.metadata) : "{}", input.enabled === false ? 0 : 1, timestamp, timestamp);
8087
+ INSERT INTO personas (id, short_id, project_id, name, description, role, instructions, traits, goals, behaviors, expertise_level, demographics, pain_points, metadata, enabled, auth_email, auth_password, auth_login_path, version, created_at, updated_at)
8088
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1, ?, ?)
8089
+ `).run(id, short_id, input.projectId ?? null, input.name, input.description ?? "", input.role, input.instructions ?? "", JSON.stringify(input.traits ?? []), JSON.stringify(input.goals ?? []), JSON.stringify(input.behaviors ?? []), input.expertiseLevel ?? "intermediate", JSON.stringify(input.demographics ?? {}), JSON.stringify(input.painPoints ?? []), input.metadata ? JSON.stringify(input.metadata) : "{}", input.enabled === false ? 0 : 1, input.authEmail ?? null, input.authPassword ?? null, input.authLoginPath ?? null, timestamp, timestamp);
7316
8090
  return getPersona(id);
7317
8091
  }
7318
8092
  function getPersona(id) {
@@ -7390,6 +8164,22 @@ function updatePersona(id, updates, version) {
7390
8164
  sets.push("goals = ?");
7391
8165
  params.push(JSON.stringify(updates.goals));
7392
8166
  }
8167
+ if (updates.behaviors !== undefined) {
8168
+ sets.push("behaviors = ?");
8169
+ params.push(JSON.stringify(updates.behaviors));
8170
+ }
8171
+ if (updates.expertiseLevel !== undefined) {
8172
+ sets.push("expertise_level = ?");
8173
+ params.push(updates.expertiseLevel);
8174
+ }
8175
+ if (updates.demographics !== undefined) {
8176
+ sets.push("demographics = ?");
8177
+ params.push(JSON.stringify(updates.demographics));
8178
+ }
8179
+ if (updates.painPoints !== undefined) {
8180
+ sets.push("pain_points = ?");
8181
+ params.push(JSON.stringify(updates.painPoints));
8182
+ }
7393
8183
  if (updates.enabled !== undefined) {
7394
8184
  sets.push("enabled = ?");
7395
8185
  params.push(updates.enabled ? 1 : 0);
@@ -7398,6 +8188,22 @@ function updatePersona(id, updates, version) {
7398
8188
  sets.push("metadata = ?");
7399
8189
  params.push(JSON.stringify(updates.metadata));
7400
8190
  }
8191
+ if (updates.authEmail !== undefined) {
8192
+ sets.push("auth_email = ?");
8193
+ params.push(updates.authEmail);
8194
+ }
8195
+ if (updates.authPassword !== undefined) {
8196
+ sets.push("auth_password = ?");
8197
+ params.push(updates.authPassword);
8198
+ }
8199
+ if (updates.authLoginPath !== undefined) {
8200
+ sets.push("auth_login_path = ?");
8201
+ params.push(updates.authLoginPath);
8202
+ }
8203
+ if (updates.authCookies !== undefined) {
8204
+ sets.push("auth_cookies = ?");
8205
+ params.push(updates.authCookies ? JSON.stringify(updates.authCookies) : null);
8206
+ }
7401
8207
  if (sets.length === 0) {
7402
8208
  return existing;
7403
8209
  }
@@ -7447,9 +8253,9 @@ function countPersonas(filter) {
7447
8253
  init_browser();
7448
8254
 
7449
8255
  // src/lib/screenshotter.ts
7450
- import { mkdirSync as mkdirSync2, existsSync as existsSync3, writeFileSync } from "fs";
7451
- import { join as join3 } from "path";
7452
- import { homedir as homedir3 } from "os";
8256
+ import { mkdirSync as mkdirSync3, existsSync as existsSync3, writeFileSync } from "fs";
8257
+ import { join as join4 } from "path";
8258
+ import { homedir as homedir4 } from "os";
7453
8259
  function slugify(text) {
7454
8260
  return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
7455
8261
  }
@@ -7469,11 +8275,11 @@ function getScreenshotDir(baseDir, runId, scenarioSlug, projectName, timestamp)
7469
8275
  const project = projectName ?? "default";
7470
8276
  const dateDir = formatDate(now2);
7471
8277
  const timeDir = `${formatTime(now2)}_${runId.slice(0, 8)}`;
7472
- return join3(baseDir, project, dateDir, timeDir, scenarioSlug);
8278
+ return join4(baseDir, project, dateDir, timeDir, scenarioSlug);
7473
8279
  }
7474
8280
  function ensureDir(dirPath) {
7475
8281
  if (!existsSync3(dirPath)) {
7476
- mkdirSync2(dirPath, { recursive: true });
8282
+ mkdirSync3(dirPath, { recursive: true });
7477
8283
  }
7478
8284
  }
7479
8285
  function writeMetaSidecar(screenshotPath, meta) {
@@ -7484,10 +8290,10 @@ function writeMetaSidecar(screenshotPath, meta) {
7484
8290
  }
7485
8291
  async function generateThumbnail(page, screenshotDir, filename) {
7486
8292
  try {
7487
- const thumbDir = join3(screenshotDir, "_thumbnail");
8293
+ const thumbDir = join4(screenshotDir, "_thumbnail");
7488
8294
  ensureDir(thumbDir);
7489
8295
  const thumbFilename = filename.replace(/\.(png|jpeg)$/, ".thumb.$1");
7490
- const thumbPath = join3(thumbDir, thumbFilename);
8296
+ const thumbPath = join4(thumbDir, thumbFilename);
7491
8297
  const viewport = page.viewportSize();
7492
8298
  if (viewport) {
7493
8299
  await page.screenshot({
@@ -7501,7 +8307,7 @@ async function generateThumbnail(page, screenshotDir, filename) {
7501
8307
  return null;
7502
8308
  }
7503
8309
  }
7504
- var DEFAULT_BASE_DIR = join3(homedir3(), ".testers", "screenshots");
8310
+ var DEFAULT_BASE_DIR = join4(homedir4(), ".testers", "screenshots");
7505
8311
 
7506
8312
  class Screenshotter {
7507
8313
  baseDir;
@@ -7522,14 +8328,16 @@ class Screenshotter {
7522
8328
  const action = options.description ?? options.action;
7523
8329
  const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
7524
8330
  const filename = generateFilename(options.stepNumber, action);
7525
- const filePath = join3(dir, filename);
8331
+ const filePath = join4(dir, filename);
7526
8332
  ensureDir(dir);
7527
- await page.screenshot({
8333
+ const screenshotOpts = {
7528
8334
  path: filePath,
7529
8335
  fullPage: this.fullPage,
7530
- type: this.format,
7531
- quality: this.format === "jpeg" ? this.quality : undefined
7532
- });
8336
+ type: this.format
8337
+ };
8338
+ if (this.format === "jpeg")
8339
+ screenshotOpts.quality = this.quality;
8340
+ await page.screenshot(screenshotOpts);
7533
8341
  const viewport = page.viewportSize() ?? { width: 0, height: 0 };
7534
8342
  const pageUrl = page.url();
7535
8343
  const timestamp = new Date().toISOString();
@@ -7557,14 +8365,16 @@ class Screenshotter {
7557
8365
  const action = options.description ?? options.action;
7558
8366
  const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
7559
8367
  const filename = generateFilename(options.stepNumber, action);
7560
- const filePath = join3(dir, filename);
8368
+ const filePath = join4(dir, filename);
7561
8369
  ensureDir(dir);
7562
- await page.screenshot({
8370
+ const ssOpts2 = {
7563
8371
  path: filePath,
7564
8372
  fullPage: true,
7565
- type: this.format,
7566
- quality: this.format === "jpeg" ? this.quality : undefined
7567
- });
8373
+ type: this.format
8374
+ };
8375
+ if (this.format === "jpeg")
8376
+ ssOpts2.quality = this.quality;
8377
+ await page.screenshot(ssOpts2);
7568
8378
  const viewport = page.viewportSize() ?? { width: 0, height: 0 };
7569
8379
  const pageUrl = page.url();
7570
8380
  const timestamp = new Date().toISOString();
@@ -7592,13 +8402,15 @@ class Screenshotter {
7592
8402
  const action = options.description ?? options.action;
7593
8403
  const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
7594
8404
  const filename = generateFilename(options.stepNumber, action);
7595
- const filePath = join3(dir, filename);
8405
+ const filePath = join4(dir, filename);
7596
8406
  ensureDir(dir);
7597
- await page.locator(selector).screenshot({
8407
+ const ssOpts3 = {
7598
8408
  path: filePath,
7599
- type: this.format,
7600
- quality: this.format === "jpeg" ? this.quality : undefined
7601
- });
8409
+ type: this.format
8410
+ };
8411
+ if (this.format === "jpeg")
8412
+ ssOpts3.quality = this.quality;
8413
+ await page.locator(selector).screenshot(ssOpts3);
7602
8414
  const viewport = page.viewportSize() ?? { width: 0, height: 0 };
7603
8415
  const pageUrl = page.url();
7604
8416
  const timestamp = new Date().toISOString();
@@ -7793,14 +8605,14 @@ async function pushFailedRunToLogs(run, failedResults, scenarios) {
7793
8605
  // src/lib/todos-connector.ts
7794
8606
  import { Database as Database2 } from "bun:sqlite";
7795
8607
  import { existsSync as existsSync4 } from "fs";
7796
- import { join as join4 } from "path";
7797
- import { homedir as homedir4 } from "os";
8608
+ import { join as join5 } from "path";
8609
+ import { homedir as homedir5 } from "os";
7798
8610
  init_types();
7799
8611
  function resolveTodosDbPath() {
7800
8612
  const envPath = process.env["TODOS_DB_PATH"];
7801
8613
  if (envPath)
7802
8614
  return envPath;
7803
- return join4(homedir4(), ".todos", "todos.db");
8615
+ return join5(homedir5(), ".todos", "todos.db");
7804
8616
  }
7805
8617
  function connectToTodos() {
7806
8618
  const dbPath = resolveTodosDbPath();
@@ -7902,6 +8714,45 @@ async function notifyFailureToConversations(run, failedResults, scenarios) {
7902
8714
  });
7903
8715
  } catch {}
7904
8716
  }
8717
+ async function notifyRunToConversations(run, results, options) {
8718
+ const baseUrl = process.env["TESTERS_CONVERSATIONS_URL"];
8719
+ const space = options?.spaceId ?? process.env["TESTERS_CONVERSATIONS_SPACE"];
8720
+ if (!baseUrl || !space)
8721
+ return;
8722
+ const passRate = run.total > 0 ? (run.passed / run.total * 100).toFixed(0) : "0";
8723
+ const statusIcon = run.status === "passed" ? "\u2705" : run.status === "failed" ? "\u274C" : "\u26A0\uFE0F";
8724
+ const durationSec = run.finishedAt && run.startedAt ? ((new Date(run.finishedAt).getTime() - new Date(run.startedAt).getTime()) / 1000).toFixed(1) : null;
8725
+ const lines = [
8726
+ `${statusIcon} **Testers run ${run.status.toUpperCase()}** \u2014 ${run.passed}/${run.total} scenarios (${passRate}% pass rate)`,
8727
+ ``,
8728
+ `**URL:** ${run.url}`,
8729
+ `**Run ID:** \`${run.id}\``,
8730
+ `**Model:** ${run.model}`,
8731
+ durationSec ? `**Duration:** ${durationSec}s` : null
8732
+ ].filter((l) => l !== null);
8733
+ if (run.status === "failed") {
8734
+ const failedResults = results.filter((r) => r.status === "failed" || r.status === "error");
8735
+ const failLines = failedResults.slice(0, 5).map((r) => {
8736
+ const err = r.error ? ` \u2014 ${r.error.slice(0, 100)}` : "";
8737
+ return ` \u274C ${r.scenarioId.slice(0, 8)}${err}`;
8738
+ });
8739
+ if (failLines.length > 0) {
8740
+ lines.push(``, `**Failures:**`);
8741
+ lines.push(...failLines);
8742
+ if (failedResults.length > 5)
8743
+ lines.push(` \u2026 and ${failedResults.length - 5} more`);
8744
+ }
8745
+ }
8746
+ const message = lines.join(`
8747
+ `);
8748
+ try {
8749
+ await fetch(`${baseUrl.replace(/\/$/, "")}/api/spaces/${encodeURIComponent(space)}/messages`, {
8750
+ method: "POST",
8751
+ headers: { "Content-Type": "application/json" },
8752
+ body: JSON.stringify({ content: message, from: "testers" })
8753
+ });
8754
+ } catch {}
8755
+ }
7905
8756
 
7906
8757
  // src/lib/runner.ts
7907
8758
  var eventHandler = null;
@@ -7943,10 +8794,35 @@ async function runSingleScenario(scenario, runId, options) {
7943
8794
  const config = loadConfig();
7944
8795
  if (options.selfHeal !== undefined)
7945
8796
  config.selfHeal = options.selfHeal;
7946
- const model = resolveModel(options.model ?? scenario.model ?? config.defaultModel);
7947
- const client = createClientForModel(model, options.apiKey ?? config.anthropicApiKey);
8797
+ let effectiveOptions = options;
8798
+ if (options.minimal) {
8799
+ effectiveOptions = {
8800
+ ...options,
8801
+ engine: options.engine ?? "playwright"
8802
+ };
8803
+ try {
8804
+ const { isLightpandaAvailable: isLightpandaAvailable2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda)).catch(() => ({ isLightpandaAvailable: () => false }));
8805
+ if (isLightpandaAvailable2())
8806
+ effectiveOptions = { ...effectiveOptions, engine: "lightpanda" };
8807
+ } catch {}
8808
+ }
8809
+ const model = resolveModel(effectiveOptions.minimal ? "quick" : effectiveOptions.model ?? scenario.model ?? config.defaultModel);
8810
+ if (options.cacheMaxAgeMs && options.cacheMaxAgeMs > 0 && scenario.lastPassedAt && scenario.lastPassedUrl === options.url) {
8811
+ const age = Date.now() - new Date(scenario.lastPassedAt).getTime();
8812
+ if (age < options.cacheMaxAgeMs) {
8813
+ const cached = createResult({ runId, scenarioId: scenario.id, model, stepsTotal: 0 });
8814
+ return updateResult(cached.id, {
8815
+ status: "passed",
8816
+ reasoning: `Cache hit: passed ${Math.round(age / 1000)}s ago at ${options.url}`,
8817
+ stepsCompleted: 0,
8818
+ durationMs: 0,
8819
+ tokensUsed: 0
8820
+ });
8821
+ }
8822
+ }
8823
+ const client = createClientForModel(model, effectiveOptions.apiKey ?? config.anthropicApiKey);
7948
8824
  const screenshotter = new Screenshotter({
7949
- baseDir: options.screenshotDir ?? config.screenshots.dir
8825
+ baseDir: effectiveOptions.screenshotDir ?? config.screenshots.dir
7950
8826
  });
7951
8827
  const resolvedPersonaId = options.personaId ?? scenario.personaId;
7952
8828
  const persona = resolvedPersonaId ? getPersona(resolvedPersonaId) : null;
@@ -7962,12 +8838,20 @@ async function runSingleScenario(scenario, runId, options) {
7962
8838
  let browser = null;
7963
8839
  let page = null;
7964
8840
  try {
7965
- browser = await launchBrowser({ headless: !(options.headed ?? false), engine: options.engine });
8841
+ browser = await launchBrowser({ headless: !(effectiveOptions.headed ?? false), engine: effectiveOptions.engine });
7966
8842
  page = await getPage(browser, {
7967
8843
  viewport: config.browser.viewport
7968
8844
  });
7969
8845
  const targetUrl = scenario.targetPath ? `${options.url.replace(/\/$/, "")}${scenario.targetPath}` : options.url;
7970
8846
  const scenarioTimeout = scenario.timeoutMs ?? options.timeout ?? config.browser.timeout ?? 60000;
8847
+ const consoleErrors = [];
8848
+ page.on("console", (msg) => {
8849
+ if (msg.type() === "error")
8850
+ consoleErrors.push(msg.text());
8851
+ });
8852
+ page.on("pageerror", (err) => {
8853
+ consoleErrors.push(err.message);
8854
+ });
7971
8855
  await page.goto(targetUrl, { timeout: Math.min(scenarioTimeout, 30000) });
7972
8856
  const stepStartTimes = new Map;
7973
8857
  const agentResult = await withTimeout(runAgentLoop({
@@ -7977,15 +8861,17 @@ async function runSingleScenario(scenario, runId, options) {
7977
8861
  screenshotter,
7978
8862
  model,
7979
8863
  runId,
7980
- maxTurns: 30,
7981
- a11y: options.a11y,
8864
+ maxTurns: effectiveOptions.minimal ? 10 : 30,
8865
+ a11y: effectiveOptions.a11y,
7982
8866
  persona: persona ? {
7983
8867
  name: persona.name,
7984
8868
  role: persona.role,
7985
8869
  description: persona.description,
7986
8870
  instructions: persona.instructions,
7987
8871
  traits: persona.traits,
7988
- goals: persona.goals
8872
+ goals: persona.goals,
8873
+ behaviors: persona.behaviors,
8874
+ painPoints: persona.painPoints
7989
8875
  } : null,
7990
8876
  onStep: (stepEvent) => {
7991
8877
  let stepDurationMs;
@@ -8012,7 +8898,7 @@ async function runSingleScenario(scenario, runId, options) {
8012
8898
  });
8013
8899
  }
8014
8900
  }), scenarioTimeout, scenario.name);
8015
- if (options.engine !== "lightpanda") {
8901
+ if (options.engine !== "lightpanda" && options.engine !== "bun") {
8016
8902
  for (const ss of agentResult.screenshots) {
8017
8903
  try {
8018
8904
  createScreenshot({
@@ -8030,8 +8916,8 @@ async function runSingleScenario(scenario, runId, options) {
8030
8916
  } catch {}
8031
8917
  }
8032
8918
  }
8033
- const lightpandaNote = options.engine === "lightpanda" ? " (Running with Lightpanda \u2014 no screenshots)" : "";
8034
- const updatedResult = updateResult(result.id, {
8919
+ const lightpandaNote = options.engine === "lightpanda" ? " (Running with Lightpanda \u2014 no screenshots)" : options.engine === "bun" ? " (Running with Bun.WebView \u2014 native, ~11x faster)" : "";
8920
+ let updatedResult = updateResult(result.id, {
8035
8921
  status: agentResult.status,
8036
8922
  reasoning: agentResult.reasoning ? agentResult.reasoning + lightpandaNote : lightpandaNote || undefined,
8037
8923
  stepsCompleted: agentResult.stepsCompleted,
@@ -8039,29 +8925,53 @@ async function runSingleScenario(scenario, runId, options) {
8039
8925
  tokensUsed: agentResult.tokensUsed,
8040
8926
  costCents: estimateCost(model, agentResult.tokensUsed)
8041
8927
  });
8928
+ if (agentResult.status === "failed" || agentResult.status === "error") {
8929
+ const failureAnalysis = analyzeFailure(null, agentResult.reasoning ?? null);
8930
+ if (failureAnalysis) {
8931
+ updatedResult = updateResult(result.id, { failureAnalysis });
8932
+ }
8933
+ }
8934
+ if (agentResult.status === "passed") {
8935
+ try {
8936
+ updateScenarioPassedCache(scenario.id, options.url);
8937
+ } catch {}
8938
+ }
8042
8939
  const eventType = agentResult.status === "passed" ? "scenario:pass" : "scenario:fail";
8043
8940
  emit({ type: eventType, scenarioId: scenario.id, scenarioName: scenario.name, resultId: result.id, runId });
8044
8941
  return updatedResult;
8045
8942
  } catch (error) {
8046
8943
  const errorMsg = error instanceof Error ? error.message : String(error);
8047
- const updatedResult = updateResult(result.id, {
8944
+ let updatedResult = updateResult(result.id, {
8048
8945
  status: "error",
8049
8946
  error: errorMsg,
8050
8947
  durationMs: Date.now() - new Date(result.createdAt).getTime()
8051
8948
  });
8949
+ const failureAnalysis = analyzeFailure(errorMsg, null);
8950
+ if (failureAnalysis) {
8951
+ updatedResult = updateResult(result.id, { failureAnalysis });
8952
+ }
8052
8953
  emit({ type: "scenario:error", scenarioId: scenario.id, scenarioName: scenario.name, error: errorMsg, runId });
8053
8954
  return updatedResult;
8054
8955
  } finally {
8055
8956
  if (browser)
8056
- await closeBrowser(browser, options.engine);
8957
+ await closeBrowser(browser, effectiveOptions.engine);
8057
8958
  }
8058
8959
  }
8059
8960
  async function runBatch(scenarios, options) {
8060
8961
  const config = loadConfig();
8061
- const model = resolveModel(options.model ?? config.defaultModel);
8062
- const parallel = options.parallel ?? 1;
8962
+ const model = resolveModel(options.minimal ? "quick" : options.model ?? config.defaultModel);
8963
+ const parallel = options.minimal ? Math.max(5, options.parallel ?? 1) : options.parallel ?? 1;
8063
8964
  const samples = options.samples ?? 1;
8064
8965
  const flakinessThreshold = options.flakinessThreshold ?? 0.95;
8966
+ if (!options.skipBudgetCheck) {
8967
+ const cap = options.maxCostCents ?? config.defaultMaxCostCents;
8968
+ if (cap !== undefined && cap > 0) {
8969
+ const estimated = estimateRunCostCents(scenarios.length, model, samples);
8970
+ if (estimated > cap) {
8971
+ throw new BudgetExceededError(estimated, cap);
8972
+ }
8973
+ }
8974
+ }
8065
8975
  const run = createRun({
8066
8976
  url: options.url,
8067
8977
  model,
@@ -8203,6 +9113,10 @@ async function runBatch(scenarios, options) {
8203
9113
  createFailureTasks(finalRun, failedResults, scenarios).catch(() => {});
8204
9114
  notifyFailureToConversations(finalRun, failedResults, scenarios).catch(() => {});
8205
9115
  }
9116
+ const conversationsSpaceId = config.conversationsSpace ?? process.env["TESTERS_CONVERSATIONS_SPACE"];
9117
+ if (conversationsSpaceId) {
9118
+ notifyRunToConversations(finalRun, results, { spaceId: conversationsSpaceId }).catch(() => {});
9119
+ }
8206
9120
  return { run: finalRun, results };
8207
9121
  }
8208
9122
  async function runByFilter(options) {
@@ -9545,7 +10459,7 @@ async function handleRequest(req) {
9545
10459
  if (pathname === "/api/status" && method === "GET") {
9546
10460
  const config = loadConfig();
9547
10461
  getDatabase();
9548
- const dbPath = process.env["TESTERS_DB_PATH"] ?? join5(homedir5(), ".testers", "testers.db");
10462
+ const dbPath = process.env["TESTERS_DB_PATH"] ?? join6(homedir6(), ".testers", "testers.db");
9549
10463
  const scenarios = listScenarios();
9550
10464
  const runs = listRuns();
9551
10465
  return jsonResponse({
@@ -9736,6 +10650,20 @@ async function handleRequest(req) {
9736
10650
  const total = countScreenshots(id);
9737
10651
  return jsonResponse({ ...result, screenshots }, 200, { "X-Total-Count": String(total) });
9738
10652
  }
10653
+ const resultExplainMatch = pathname.match(/^\/api\/results\/([^/]+)\/explain$/);
10654
+ if (resultExplainMatch && method === "GET") {
10655
+ const id = resultExplainMatch[1];
10656
+ try {
10657
+ const { explainFailure: explainFailure2 } = await Promise.resolve().then(() => (init_failure_explainer(), exports_failure_explainer));
10658
+ const explanation = explainFailure2(id);
10659
+ return jsonResponse(explanation);
10660
+ } catch (err) {
10661
+ const msg = err instanceof Error ? err.message : String(err);
10662
+ if (msg.includes("not found"))
10663
+ return errorResponse(msg, 404);
10664
+ return errorResponse(msg, 500);
10665
+ }
10666
+ }
9739
10667
  const screenshotFileMatch = pathname.match(/^\/api\/screenshots\/([^/]+)\/file$/);
9740
10668
  if (screenshotFileMatch && method === "GET") {
9741
10669
  const id = screenshotFileMatch[1];
@@ -10190,7 +11118,7 @@ async function handleRequest(req) {
10190
11118
  return jsonResponse({ routes, apiRoutes, totalCovered: coverageMap.size });
10191
11119
  }
10192
11120
  if (!pathname.startsWith("/api")) {
10193
- const dashboardDir = join5(import.meta.dir, "..", "..", "dashboard", "dist");
11121
+ const dashboardDir = join6(import.meta.dir, "..", "..", "dashboard", "dist");
10194
11122
  if (!existsSync5(dashboardDir)) {
10195
11123
  return new Response(`<!DOCTYPE html>
10196
11124
  <html>
@@ -10209,7 +11137,7 @@ async function handleRequest(req) {
10209
11137
  }
10210
11138
  });
10211
11139
  }
10212
- const filePath = join5(dashboardDir, pathname === "/" ? "index.html" : pathname);
11140
+ const filePath = join6(dashboardDir, pathname === "/" ? "index.html" : pathname);
10213
11141
  if (existsSync5(filePath)) {
10214
11142
  const file = Bun.file(filePath);
10215
11143
  return new Response(file, {
@@ -10219,7 +11147,7 @@ async function handleRequest(req) {
10219
11147
  }
10220
11148
  });
10221
11149
  }
10222
- const indexPath = join5(dashboardDir, "index.html");
11150
+ const indexPath = join6(dashboardDir, "index.html");
10223
11151
  if (existsSync5(indexPath)) {
10224
11152
  const file = Bun.file(indexPath);
10225
11153
  return new Response(file, {