@hasna/testers 0.0.15 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +170 -21
- package/dashboard/dist/assets/{index-BSYf1bIR.css → index-CQzkimyO.css} +1 -1
- package/dashboard/dist/index.html +2 -2
- package/dist/cli/index.js +2043 -818
- package/dist/db/database.d.ts.map +1 -1
- package/dist/db/personas.d.ts +8 -0
- package/dist/db/personas.d.ts.map +1 -1
- package/dist/db/results.d.ts +2 -1
- package/dist/db/results.d.ts.map +1 -1
- package/dist/db/scenarios.d.ts +1 -0
- package/dist/db/scenarios.d.ts.map +1 -1
- package/dist/db/seed-personas.d.ts +15 -0
- package/dist/db/seed-personas.d.ts.map +1 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2220 -1441
- package/dist/lib/ai-client.d.ts +7 -8
- package/dist/lib/ai-client.d.ts.map +1 -1
- package/dist/lib/browser-bun.d.ts +153 -0
- package/dist/lib/browser-bun.d.ts.map +1 -0
- package/dist/lib/browser.d.ts +1 -1
- package/dist/lib/browser.d.ts.map +1 -1
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/costs.d.ts +5 -0
- package/dist/lib/costs.d.ts.map +1 -1
- package/dist/lib/failure-analyzer.d.ts +7 -0
- package/dist/lib/failure-analyzer.d.ts.map +1 -0
- package/dist/lib/failure-explainer.d.ts +17 -0
- package/dist/lib/failure-explainer.d.ts.map +1 -0
- package/dist/lib/failure-pipeline.d.ts +11 -0
- package/dist/lib/failure-pipeline.d.ts.map +1 -1
- package/dist/lib/hybrid-runner.d.ts +100 -0
- package/dist/lib/hybrid-runner.d.ts.map +1 -0
- package/dist/lib/judge.d.ts +1 -1
- package/dist/lib/judge.d.ts.map +1 -1
- package/dist/lib/reporter.d.ts +2 -0
- package/dist/lib/reporter.d.ts.map +1 -1
- package/dist/lib/runner.d.ts +5 -1
- package/dist/lib/runner.d.ts.map +1 -1
- package/dist/lib/screenshotter.d.ts.map +1 -1
- package/dist/mcp/index.js +8580 -6403
- package/dist/server/index.js +1082 -154
- package/dist/types/index.d.ts +60 -2
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +4 -4
- package/dist/cli/index.d.ts +0 -3
- package/dist/cli/index.d.ts.map +0 -1
- package/dist/mcp/index.d.ts +0 -3
- package/dist/mcp/index.d.ts.map +0 -1
- /package/dashboard/dist/assets/{index-Bdn52878.js → index-D52SWwDa.js} +0 -0
package/dist/server/index.js
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
// @bun
|
|
3
3
|
var __defProp = Object.defineProperty;
|
|
4
|
+
var __returnValue = (v) => v;
|
|
5
|
+
function __exportSetter(name, newValue) {
|
|
6
|
+
this[name] = __returnValue.bind(null, newValue);
|
|
7
|
+
}
|
|
4
8
|
var __export = (target, all) => {
|
|
5
9
|
for (var name in all)
|
|
6
10
|
__defProp(target, name, {
|
|
7
11
|
get: all[name],
|
|
8
12
|
enumerable: true,
|
|
9
13
|
configurable: true,
|
|
10
|
-
set: (
|
|
14
|
+
set: __exportSetter.bind(all, name)
|
|
11
15
|
});
|
|
12
16
|
};
|
|
13
17
|
var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
|
|
@@ -46,9 +50,12 @@ function scenarioFromRow(row) {
|
|
|
46
50
|
assertions: JSON.parse(row.assertions || "[]"),
|
|
47
51
|
personaId: row.persona_id ?? null,
|
|
48
52
|
scenarioType: row.scenario_type ?? "browser",
|
|
53
|
+
requiredRole: row.required_role ?? null,
|
|
49
54
|
version: row.version,
|
|
50
55
|
createdAt: row.created_at,
|
|
51
|
-
updatedAt: row.updated_at
|
|
56
|
+
updatedAt: row.updated_at,
|
|
57
|
+
lastPassedAt: row.last_passed_at ?? null,
|
|
58
|
+
lastPassedUrl: row.last_passed_url ?? null
|
|
52
59
|
};
|
|
53
60
|
}
|
|
54
61
|
function runFromRow(row) {
|
|
@@ -88,7 +95,8 @@ function resultFromRow(row) {
|
|
|
88
95
|
metadata: row.metadata ? JSON.parse(row.metadata) : null,
|
|
89
96
|
createdAt: row.created_at,
|
|
90
97
|
personaId: row.persona_id ?? null,
|
|
91
|
-
personaName: row.persona_name ?? null
|
|
98
|
+
personaName: row.persona_name ?? null,
|
|
99
|
+
failureAnalysis: row.failure_analysis ? JSON.parse(row.failure_analysis) : null
|
|
92
100
|
};
|
|
93
101
|
}
|
|
94
102
|
function screenshotFromRow(row) {
|
|
@@ -156,6 +164,7 @@ function flowFromRow(row) {
|
|
|
156
164
|
};
|
|
157
165
|
}
|
|
158
166
|
function personaFromRow(row) {
|
|
167
|
+
const hasAuth = row.auth_email && row.auth_password;
|
|
159
168
|
return {
|
|
160
169
|
id: row.id,
|
|
161
170
|
shortId: row.short_id,
|
|
@@ -164,13 +173,23 @@ function personaFromRow(row) {
|
|
|
164
173
|
description: row.description,
|
|
165
174
|
role: row.role,
|
|
166
175
|
instructions: row.instructions,
|
|
167
|
-
traits: JSON.parse(row.traits),
|
|
168
|
-
goals: JSON.parse(row.goals),
|
|
176
|
+
traits: JSON.parse(row.traits || "[]"),
|
|
177
|
+
goals: JSON.parse(row.goals || "[]"),
|
|
178
|
+
behaviors: JSON.parse(row.behaviors || "[]"),
|
|
179
|
+
expertiseLevel: row.expertise_level || "intermediate",
|
|
180
|
+
demographics: JSON.parse(row.demographics || "{}"),
|
|
181
|
+
painPoints: JSON.parse(row.pain_points || "[]"),
|
|
169
182
|
metadata: row.metadata ? JSON.parse(row.metadata) : null,
|
|
170
183
|
enabled: row.enabled === 1,
|
|
171
184
|
version: row.version,
|
|
172
185
|
createdAt: row.created_at,
|
|
173
|
-
updatedAt: row.updated_at
|
|
186
|
+
updatedAt: row.updated_at,
|
|
187
|
+
auth: hasAuth ? {
|
|
188
|
+
email: row.auth_email,
|
|
189
|
+
password: row.auth_password,
|
|
190
|
+
loginPath: row.auth_login_path ?? "/login",
|
|
191
|
+
cookies: row.auth_cookies ? JSON.parse(row.auth_cookies) : null
|
|
192
|
+
} : null
|
|
174
193
|
};
|
|
175
194
|
}
|
|
176
195
|
function apiCheckFromRow(row) {
|
|
@@ -212,12 +231,14 @@ function apiCheckResultFromRow(row) {
|
|
|
212
231
|
createdAt: row.created_at
|
|
213
232
|
};
|
|
214
233
|
}
|
|
215
|
-
var MODEL_MAP, VersionConflictError, BrowserError, AIClientError, TodosConnectionError, ScheduleNotFoundError, ApiCheckNotFoundError, DependencyCycleError, PersonaNotFoundError;
|
|
234
|
+
var MODEL_MAP, VersionConflictError, BrowserError, AIClientError, TodosConnectionError, ScheduleNotFoundError, BudgetExceededError, ApiCheckNotFoundError, DependencyCycleError, PersonaNotFoundError;
|
|
216
235
|
var init_types = __esm(() => {
|
|
217
236
|
MODEL_MAP = {
|
|
218
237
|
quick: "claude-haiku-4-5-20251001",
|
|
219
238
|
thorough: "claude-sonnet-4-6-20260311",
|
|
220
|
-
deep: "claude-opus-4-6-20260311"
|
|
239
|
+
deep: "claude-opus-4-6-20260311",
|
|
240
|
+
"cerebras-fast": "llama-3.1-8b",
|
|
241
|
+
"cerebras-smart": "llama-3.3-70b"
|
|
221
242
|
};
|
|
222
243
|
VersionConflictError = class VersionConflictError extends Error {
|
|
223
244
|
constructor(entity, id) {
|
|
@@ -249,6 +270,12 @@ var init_types = __esm(() => {
|
|
|
249
270
|
this.name = "ScheduleNotFoundError";
|
|
250
271
|
}
|
|
251
272
|
};
|
|
273
|
+
BudgetExceededError = class BudgetExceededError extends Error {
|
|
274
|
+
constructor(estimatedCents, capCents) {
|
|
275
|
+
super(`Estimated run cost ($${(estimatedCents / 100).toFixed(2)}) exceeds budget cap ($${(capCents / 100).toFixed(2)}). Pass skipBudgetCheck: true to override.`);
|
|
276
|
+
this.name = "BudgetExceededError";
|
|
277
|
+
}
|
|
278
|
+
};
|
|
252
279
|
ApiCheckNotFoundError = class ApiCheckNotFoundError extends Error {
|
|
253
280
|
constructor(id) {
|
|
254
281
|
super(`API check not found: ${id}`);
|
|
@@ -670,10 +697,123 @@ CREATE TABLE IF NOT EXISTS golden_check_results (
|
|
|
670
697
|
CREATE INDEX IF NOT EXISTS idx_golden_project ON golden_answers(project_id);
|
|
671
698
|
CREATE INDEX IF NOT EXISTS idx_golden_enabled ON golden_answers(enabled);
|
|
672
699
|
CREATE INDEX IF NOT EXISTS idx_golden_results_golden ON golden_check_results(golden_id);
|
|
700
|
+
`,
|
|
701
|
+
`
|
|
702
|
+
ALTER TABLE results ADD COLUMN failure_analysis TEXT;
|
|
703
|
+
`,
|
|
704
|
+
`
|
|
705
|
+
ALTER TABLE personas ADD COLUMN behaviors TEXT DEFAULT '[]';
|
|
706
|
+
ALTER TABLE personas ADD COLUMN expertise_level TEXT DEFAULT 'intermediate';
|
|
707
|
+
ALTER TABLE personas ADD COLUMN demographics TEXT DEFAULT '{}';
|
|
708
|
+
ALTER TABLE personas ADD COLUMN pain_points TEXT DEFAULT '[]';
|
|
709
|
+
`,
|
|
710
|
+
`
|
|
711
|
+
ALTER TABLE scenarios ADD COLUMN last_passed_at TEXT;
|
|
712
|
+
ALTER TABLE scenarios ADD COLUMN last_passed_url TEXT;
|
|
713
|
+
`,
|
|
714
|
+
`
|
|
715
|
+
ALTER TABLE personas ADD COLUMN auth_email TEXT;
|
|
716
|
+
ALTER TABLE personas ADD COLUMN auth_password TEXT;
|
|
717
|
+
ALTER TABLE personas ADD COLUMN auth_login_path TEXT DEFAULT '/login';
|
|
718
|
+
ALTER TABLE personas ADD COLUMN auth_cookies TEXT;
|
|
719
|
+
ALTER TABLE scenarios ADD COLUMN required_role TEXT;
|
|
673
720
|
`
|
|
674
721
|
];
|
|
675
722
|
});
|
|
676
723
|
|
|
724
|
+
// src/db/results.ts
|
|
725
|
+
function createResult(input) {
|
|
726
|
+
const db2 = getDatabase();
|
|
727
|
+
const id = uuid();
|
|
728
|
+
const timestamp = now();
|
|
729
|
+
db2.query(`
|
|
730
|
+
INSERT INTO results (id, run_id, scenario_id, status, reasoning, error, steps_completed, steps_total, duration_ms, model, tokens_used, cost_cents, metadata, created_at, persona_id, persona_name)
|
|
731
|
+
VALUES (?, ?, ?, 'skipped', NULL, NULL, 0, ?, 0, ?, 0, 0, '{}', ?, ?, ?)
|
|
732
|
+
`).run(id, input.runId, input.scenarioId, input.stepsTotal, input.model, timestamp, input.personaId ?? null, input.personaName ?? null);
|
|
733
|
+
return getResult(id);
|
|
734
|
+
}
|
|
735
|
+
function getResult(id) {
|
|
736
|
+
const db2 = getDatabase();
|
|
737
|
+
let row = db2.query("SELECT * FROM results WHERE id = ?").get(id);
|
|
738
|
+
if (row)
|
|
739
|
+
return resultFromRow(row);
|
|
740
|
+
const fullId = resolvePartialId("results", id);
|
|
741
|
+
if (fullId) {
|
|
742
|
+
row = db2.query("SELECT * FROM results WHERE id = ?").get(fullId);
|
|
743
|
+
if (row)
|
|
744
|
+
return resultFromRow(row);
|
|
745
|
+
}
|
|
746
|
+
return null;
|
|
747
|
+
}
|
|
748
|
+
function listResults(runId) {
|
|
749
|
+
const db2 = getDatabase();
|
|
750
|
+
const rows = db2.query("SELECT * FROM results WHERE run_id = ? ORDER BY created_at ASC").all(runId);
|
|
751
|
+
return rows.map(resultFromRow);
|
|
752
|
+
}
|
|
753
|
+
function updateResult(id, updates) {
|
|
754
|
+
const db2 = getDatabase();
|
|
755
|
+
const existing = getResult(id);
|
|
756
|
+
if (!existing) {
|
|
757
|
+
throw new Error(`Result not found: ${id}`);
|
|
758
|
+
}
|
|
759
|
+
const sets = [];
|
|
760
|
+
const params = [];
|
|
761
|
+
if (updates.status !== undefined) {
|
|
762
|
+
sets.push("status = ?");
|
|
763
|
+
params.push(updates.status);
|
|
764
|
+
}
|
|
765
|
+
if (updates.reasoning !== undefined) {
|
|
766
|
+
sets.push("reasoning = ?");
|
|
767
|
+
params.push(updates.reasoning);
|
|
768
|
+
}
|
|
769
|
+
if (updates.error !== undefined) {
|
|
770
|
+
sets.push("error = ?");
|
|
771
|
+
params.push(updates.error);
|
|
772
|
+
}
|
|
773
|
+
if (updates.stepsCompleted !== undefined) {
|
|
774
|
+
sets.push("steps_completed = ?");
|
|
775
|
+
params.push(updates.stepsCompleted);
|
|
776
|
+
}
|
|
777
|
+
if (updates.durationMs !== undefined) {
|
|
778
|
+
sets.push("duration_ms = ?");
|
|
779
|
+
params.push(updates.durationMs);
|
|
780
|
+
}
|
|
781
|
+
if (updates.tokensUsed !== undefined) {
|
|
782
|
+
sets.push("tokens_used = ?");
|
|
783
|
+
params.push(updates.tokensUsed);
|
|
784
|
+
}
|
|
785
|
+
if (updates.costCents !== undefined) {
|
|
786
|
+
sets.push("cost_cents = ?");
|
|
787
|
+
params.push(updates.costCents);
|
|
788
|
+
}
|
|
789
|
+
if (updates.metadata !== undefined) {
|
|
790
|
+
sets.push("metadata = ?");
|
|
791
|
+
params.push(JSON.stringify(updates.metadata));
|
|
792
|
+
}
|
|
793
|
+
if (updates.failureAnalysis !== undefined) {
|
|
794
|
+
sets.push("failure_analysis = ?");
|
|
795
|
+
params.push(updates.failureAnalysis !== null ? JSON.stringify(updates.failureAnalysis) : null);
|
|
796
|
+
}
|
|
797
|
+
if (sets.length === 0) {
|
|
798
|
+
return existing;
|
|
799
|
+
}
|
|
800
|
+
params.push(existing.id);
|
|
801
|
+
db2.query(`UPDATE results SET ${sets.join(", ")} WHERE id = ?`).run(...params);
|
|
802
|
+
return getResult(existing.id);
|
|
803
|
+
}
|
|
804
|
+
function getResultsByRun(runId) {
|
|
805
|
+
return listResults(runId);
|
|
806
|
+
}
|
|
807
|
+
function countResultsByRun(runId) {
|
|
808
|
+
const db2 = getDatabase();
|
|
809
|
+
const row = db2.query("SELECT COUNT(*) as count FROM results WHERE run_id = ?").get(runId);
|
|
810
|
+
return row.count;
|
|
811
|
+
}
|
|
812
|
+
var init_results = __esm(() => {
|
|
813
|
+
init_types();
|
|
814
|
+
init_database();
|
|
815
|
+
});
|
|
816
|
+
|
|
677
817
|
// src/lib/browser-lightpanda.ts
|
|
678
818
|
var exports_browser_lightpanda = {};
|
|
679
819
|
__export(exports_browser_lightpanda, {
|
|
@@ -836,6 +976,412 @@ var init_browser_lightpanda = __esm(() => {
|
|
|
836
976
|
init_types();
|
|
837
977
|
});
|
|
838
978
|
|
|
979
|
+
// src/lib/browser-bun.ts
|
|
980
|
+
var exports_browser_bun = {};
|
|
981
|
+
__export(exports_browser_bun, {
|
|
982
|
+
isBunWebViewAvailable: () => isBunWebViewAvailable,
|
|
983
|
+
BunWebViewSession: () => BunWebViewSession
|
|
984
|
+
});
|
|
985
|
+
import { join as join2 } from "path";
|
|
986
|
+
import { mkdirSync as mkdirSync2 } from "fs";
|
|
987
|
+
import { homedir as homedir2 } from "os";
|
|
988
|
+
function isBunWebViewAvailable() {
|
|
989
|
+
return typeof globalThis.Bun !== "undefined" && typeof globalThis.Bun.WebView !== "undefined";
|
|
990
|
+
}
|
|
991
|
+
function getProfileDir(profileName) {
|
|
992
|
+
const base = process.env["TESTERS_BROWSER_DATA_DIR"] ?? join2(homedir2(), ".testers", "browser");
|
|
993
|
+
const dir = join2(base, "profiles", profileName);
|
|
994
|
+
mkdirSync2(dir, { recursive: true });
|
|
995
|
+
return dir;
|
|
996
|
+
}
|
|
997
|
+
var BunWebViewSession;
|
|
998
|
+
var init_browser_bun = __esm(() => {
|
|
999
|
+
BunWebViewSession = class BunWebViewSession {
|
|
1000
|
+
view;
|
|
1001
|
+
_sessionId;
|
|
1002
|
+
_eventListeners = new Map;
|
|
1003
|
+
constructor(opts = {}) {
|
|
1004
|
+
if (!isBunWebViewAvailable()) {
|
|
1005
|
+
throw new Error("Bun.WebView is not available. Install Bun canary: bun upgrade --canary");
|
|
1006
|
+
}
|
|
1007
|
+
const BunWebView = globalThis.Bun.WebView;
|
|
1008
|
+
const constructorOpts = {
|
|
1009
|
+
width: opts.width ?? 1280,
|
|
1010
|
+
height: opts.height ?? 720
|
|
1011
|
+
};
|
|
1012
|
+
if (opts.profile) {
|
|
1013
|
+
constructorOpts.dataStore = { directory: getProfileDir(opts.profile) };
|
|
1014
|
+
} else {
|
|
1015
|
+
constructorOpts.dataStore = "ephemeral";
|
|
1016
|
+
}
|
|
1017
|
+
if (opts.onConsole) {
|
|
1018
|
+
constructorOpts.console = opts.onConsole;
|
|
1019
|
+
}
|
|
1020
|
+
this.view = new BunWebView(constructorOpts);
|
|
1021
|
+
this.view.onNavigated = (url) => {
|
|
1022
|
+
this._emit("navigated", url);
|
|
1023
|
+
};
|
|
1024
|
+
this.view.onNavigationFailed = (error) => {
|
|
1025
|
+
this._emit("navigationfailed", error);
|
|
1026
|
+
};
|
|
1027
|
+
}
|
|
1028
|
+
async goto(url, opts) {
|
|
1029
|
+
await this.view.navigate(url);
|
|
1030
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
1031
|
+
}
|
|
1032
|
+
async goBack() {
|
|
1033
|
+
await this.view.goBack();
|
|
1034
|
+
}
|
|
1035
|
+
async goForward() {
|
|
1036
|
+
await this.view.goForward();
|
|
1037
|
+
}
|
|
1038
|
+
async reload() {
|
|
1039
|
+
await this.view.reload();
|
|
1040
|
+
}
|
|
1041
|
+
async evaluate(fnOrExpr, ...args) {
|
|
1042
|
+
let expr;
|
|
1043
|
+
if (typeof fnOrExpr === "function") {
|
|
1044
|
+
const serializedArgs = args.map((a) => JSON.stringify(a)).join(", ");
|
|
1045
|
+
expr = `(${fnOrExpr.toString()})(${serializedArgs})`;
|
|
1046
|
+
} else {
|
|
1047
|
+
expr = fnOrExpr;
|
|
1048
|
+
}
|
|
1049
|
+
return this.view.evaluate(expr);
|
|
1050
|
+
}
|
|
1051
|
+
async screenshot(opts) {
|
|
1052
|
+
const uint8 = await this.view.screenshot();
|
|
1053
|
+
return Buffer.from(uint8);
|
|
1054
|
+
}
|
|
1055
|
+
async click(selector, opts) {
|
|
1056
|
+
await this.view.click(selector, opts ? { button: opts.button } : undefined);
|
|
1057
|
+
}
|
|
1058
|
+
async type(selector, text, opts) {
|
|
1059
|
+
try {
|
|
1060
|
+
await this.view.click(selector);
|
|
1061
|
+
} catch {}
|
|
1062
|
+
await this.view.type(text);
|
|
1063
|
+
}
|
|
1064
|
+
async fill(selector, value) {
|
|
1065
|
+
await this.view.evaluate(`
|
|
1066
|
+
(() => {
|
|
1067
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
1068
|
+
if (el) { el.value = ''; el.dispatchEvent(new Event('input')); }
|
|
1069
|
+
})()
|
|
1070
|
+
`);
|
|
1071
|
+
await this.type(selector, value);
|
|
1072
|
+
}
|
|
1073
|
+
async press(key, opts) {
|
|
1074
|
+
await this.view.press(key, opts);
|
|
1075
|
+
}
|
|
1076
|
+
async scroll(direction, amount) {
|
|
1077
|
+
const dx = direction === "left" ? -amount : direction === "right" ? amount : 0;
|
|
1078
|
+
const dy = direction === "up" ? -amount : direction === "down" ? amount : 0;
|
|
1079
|
+
await this.view.scroll(dx, dy);
|
|
1080
|
+
}
|
|
1081
|
+
async scrollIntoView(selector) {
|
|
1082
|
+
await this.view.scrollTo(selector);
|
|
1083
|
+
}
|
|
1084
|
+
async hover(selector) {
|
|
1085
|
+
try {
|
|
1086
|
+
await this.view.scrollTo(selector);
|
|
1087
|
+
} catch {}
|
|
1088
|
+
}
|
|
1089
|
+
async resize(width, height) {
|
|
1090
|
+
await this.view.resize(width, height);
|
|
1091
|
+
}
|
|
1092
|
+
async $(selector) {
|
|
1093
|
+
const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
|
|
1094
|
+
if (!exists)
|
|
1095
|
+
return null;
|
|
1096
|
+
return {
|
|
1097
|
+
textContent: async () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`)
|
|
1098
|
+
};
|
|
1099
|
+
}
|
|
1100
|
+
async $$(selector) {
|
|
1101
|
+
const count = await this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)}).length`);
|
|
1102
|
+
return Array.from({ length: count }, (_, i) => ({
|
|
1103
|
+
textContent: async () => this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)})[${i}]?.textContent ?? null`)
|
|
1104
|
+
}));
|
|
1105
|
+
}
|
|
1106
|
+
async inputValue(selector) {
|
|
1107
|
+
return this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.value ?? ''`);
|
|
1108
|
+
}
|
|
1109
|
+
async isChecked(selector) {
|
|
1110
|
+
return this.view.evaluate(`!!(document.querySelector(${JSON.stringify(selector)})?.checked)`);
|
|
1111
|
+
}
|
|
1112
|
+
async isVisible(selector) {
|
|
1113
|
+
return this.view.evaluate(`
|
|
1114
|
+
(() => {
|
|
1115
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
1116
|
+
if (!el) return false;
|
|
1117
|
+
const style = window.getComputedStyle(el);
|
|
1118
|
+
return style.display !== 'none' && style.visibility !== 'hidden' && el.offsetWidth > 0;
|
|
1119
|
+
})()
|
|
1120
|
+
`);
|
|
1121
|
+
}
|
|
1122
|
+
async isEnabled(selector) {
|
|
1123
|
+
return this.view.evaluate(`!(document.querySelector(${JSON.stringify(selector)})?.disabled)`);
|
|
1124
|
+
}
|
|
1125
|
+
async selectOption(selector, value) {
|
|
1126
|
+
await this.view.evaluate(`
|
|
1127
|
+
(() => {
|
|
1128
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
1129
|
+
if (el) {
|
|
1130
|
+
el.value = ${JSON.stringify(value)};
|
|
1131
|
+
el.dispatchEvent(new Event('change'));
|
|
1132
|
+
}
|
|
1133
|
+
})()
|
|
1134
|
+
`);
|
|
1135
|
+
return [value];
|
|
1136
|
+
}
|
|
1137
|
+
async check(selector) {
|
|
1138
|
+
await this.view.evaluate(`
|
|
1139
|
+
(() => {
|
|
1140
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
1141
|
+
if (el && !el.checked) { el.checked = true; el.dispatchEvent(new Event('change')); }
|
|
1142
|
+
})()
|
|
1143
|
+
`);
|
|
1144
|
+
}
|
|
1145
|
+
async uncheck(selector) {
|
|
1146
|
+
await this.view.evaluate(`
|
|
1147
|
+
(() => {
|
|
1148
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
1149
|
+
if (el && el.checked) { el.checked = false; el.dispatchEvent(new Event('change')); }
|
|
1150
|
+
})()
|
|
1151
|
+
`);
|
|
1152
|
+
}
|
|
1153
|
+
async setInputFiles(selector, files) {
|
|
1154
|
+
throw new Error("File upload not supported in Bun.WebView engine. Use engine: 'playwright' instead.");
|
|
1155
|
+
}
|
|
1156
|
+
getByRole(role, opts) {
|
|
1157
|
+
const name = opts?.name?.toString() ?? "";
|
|
1158
|
+
const selector = name ? `[role="${role}"][aria-label*="${name}"], ${role}[aria-label*="${name}"]` : `[role="${role}"], ${role}`;
|
|
1159
|
+
return {
|
|
1160
|
+
click: (clickOpts) => this.click(selector, clickOpts),
|
|
1161
|
+
fill: (value) => this.fill(selector, value),
|
|
1162
|
+
check: () => this.check(selector),
|
|
1163
|
+
uncheck: () => this.uncheck(selector),
|
|
1164
|
+
isVisible: () => this.isVisible(selector),
|
|
1165
|
+
textContent: () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`),
|
|
1166
|
+
inputValue: () => this.inputValue(selector),
|
|
1167
|
+
first: () => ({
|
|
1168
|
+
click: (clickOpts) => this.click(selector, clickOpts),
|
|
1169
|
+
fill: (value) => this.fill(selector, value),
|
|
1170
|
+
textContent: () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`),
|
|
1171
|
+
isVisible: () => this.isVisible(selector),
|
|
1172
|
+
hover: () => this.hover(selector),
|
|
1173
|
+
boundingBox: async () => null,
|
|
1174
|
+
scrollIntoViewIfNeeded: () => this.scrollIntoView(selector),
|
|
1175
|
+
evaluate: (fn) => this.view.evaluate(`(${fn.toString()})(document.querySelector(${JSON.stringify(selector)}))`),
|
|
1176
|
+
waitFor: (opts2) => {
|
|
1177
|
+
return new Promise((resolve, reject) => {
|
|
1178
|
+
const timeout = opts2?.timeout ?? 1e4;
|
|
1179
|
+
const start = Date.now();
|
|
1180
|
+
const check = async () => {
|
|
1181
|
+
const visible = await this.isVisible(selector);
|
|
1182
|
+
if (visible)
|
|
1183
|
+
return resolve();
|
|
1184
|
+
if (Date.now() - start > timeout)
|
|
1185
|
+
return reject(new Error(`Timeout waiting for ${selector}`));
|
|
1186
|
+
setTimeout(check, 100);
|
|
1187
|
+
};
|
|
1188
|
+
check();
|
|
1189
|
+
});
|
|
1190
|
+
}
|
|
1191
|
+
}),
|
|
1192
|
+
count: async () => {
|
|
1193
|
+
const count = await this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)}).length`);
|
|
1194
|
+
return count;
|
|
1195
|
+
},
|
|
1196
|
+
nth: (n) => ({
|
|
1197
|
+
click: (clickOpts) => this.click(selector, clickOpts),
|
|
1198
|
+
textContent: () => this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)})[${n}]?.textContent ?? null`),
|
|
1199
|
+
isVisible: () => this.isVisible(selector)
|
|
1200
|
+
})
|
|
1201
|
+
};
|
|
1202
|
+
}
|
|
1203
|
+
getByText(text, opts) {
|
|
1204
|
+
const selector = opts?.exact ? `*:is(button, a, span, div, p, h1, h2, h3, h4, label)` : "*";
|
|
1205
|
+
return {
|
|
1206
|
+
first: () => ({
|
|
1207
|
+
click: async (clickOpts) => {
|
|
1208
|
+
await this.view.evaluate(`
|
|
1209
|
+
(() => {
|
|
1210
|
+
const text = ${JSON.stringify(text)};
|
|
1211
|
+
const all = document.querySelectorAll('*');
|
|
1212
|
+
for (const el of all) {
|
|
1213
|
+
if (el.children.length === 0 && el.textContent?.trim() === text) {
|
|
1214
|
+
el.click(); return;
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
for (const el of all) {
|
|
1218
|
+
if (el.textContent?.includes(text)) { el.click(); return; }
|
|
1219
|
+
}
|
|
1220
|
+
})()
|
|
1221
|
+
`);
|
|
1222
|
+
},
|
|
1223
|
+
waitFor: (waitOpts) => {
|
|
1224
|
+
const timeout = waitOpts?.timeout ?? 1e4;
|
|
1225
|
+
return new Promise((resolve, reject) => {
|
|
1226
|
+
const start = Date.now();
|
|
1227
|
+
const check = async () => {
|
|
1228
|
+
const found = await this.view.evaluate(`document.body?.textContent?.includes(${JSON.stringify(text)})`);
|
|
1229
|
+
if (found)
|
|
1230
|
+
return resolve();
|
|
1231
|
+
if (Date.now() - start > timeout)
|
|
1232
|
+
return reject(new Error(`Timeout: text "${text}" not found`));
|
|
1233
|
+
setTimeout(check, 100);
|
|
1234
|
+
};
|
|
1235
|
+
check();
|
|
1236
|
+
});
|
|
1237
|
+
}
|
|
1238
|
+
})
|
|
1239
|
+
};
|
|
1240
|
+
}
|
|
1241
|
+
locator(selector) {
|
|
1242
|
+
return {
|
|
1243
|
+
click: (opts) => this.click(selector, opts),
|
|
1244
|
+
fill: (value) => this.fill(selector, value),
|
|
1245
|
+
scrollIntoViewIfNeeded: () => this.scrollIntoView(selector),
|
|
1246
|
+
first: () => this.getByRole("*").first(),
|
|
1247
|
+
evaluate: (fn) => this.view.evaluate(`(${fn.toString()})(document.querySelector(${JSON.stringify(selector)}))`),
|
|
1248
|
+
waitFor: (opts) => {
|
|
1249
|
+
const timeout = opts?.timeout ?? 1e4;
|
|
1250
|
+
return new Promise((resolve, reject) => {
|
|
1251
|
+
const start = Date.now();
|
|
1252
|
+
const check = async () => {
|
|
1253
|
+
const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
|
|
1254
|
+
if (exists)
|
|
1255
|
+
return resolve();
|
|
1256
|
+
if (Date.now() - start > timeout)
|
|
1257
|
+
return reject(new Error(`Timeout: ${selector}`));
|
|
1258
|
+
setTimeout(check, 100);
|
|
1259
|
+
};
|
|
1260
|
+
check();
|
|
1261
|
+
});
|
|
1262
|
+
}
|
|
1263
|
+
};
|
|
1264
|
+
}
|
|
1265
|
+
url() {
|
|
1266
|
+
return this.view.url;
|
|
1267
|
+
}
|
|
1268
|
+
async title() {
|
|
1269
|
+
return this.view.title || await this.evaluate("document.title");
|
|
1270
|
+
}
|
|
1271
|
+
viewportSize() {
|
|
1272
|
+
return { width: 1280, height: 720 };
|
|
1273
|
+
}
|
|
1274
|
+
async waitForLoadState(state, opts) {
|
|
1275
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
1276
|
+
}
|
|
1277
|
+
async waitForURL(pattern, opts) {
|
|
1278
|
+
const timeout = opts?.timeout ?? 30000;
|
|
1279
|
+
const start = Date.now();
|
|
1280
|
+
while (Date.now() - start < timeout) {
|
|
1281
|
+
const url = this.view.url;
|
|
1282
|
+
const matches = pattern instanceof RegExp ? pattern.test(url) : url.includes(pattern);
|
|
1283
|
+
if (matches)
|
|
1284
|
+
return;
|
|
1285
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
1286
|
+
}
|
|
1287
|
+
throw new Error(`Timeout waiting for URL to match ${pattern}`);
|
|
1288
|
+
}
|
|
1289
|
+
async waitForSelector(selector, opts) {
|
|
1290
|
+
const timeout = opts?.timeout ?? 1e4;
|
|
1291
|
+
const start = Date.now();
|
|
1292
|
+
while (Date.now() - start < timeout) {
|
|
1293
|
+
const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
|
|
1294
|
+
if (exists)
|
|
1295
|
+
return;
|
|
1296
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
1297
|
+
}
|
|
1298
|
+
throw new Error(`Timeout waiting for ${selector}`);
|
|
1299
|
+
}
|
|
1300
|
+
async setContent(html) {
|
|
1301
|
+
await this.view.navigate(`data:text/html,${encodeURIComponent(html)}`);
|
|
1302
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
1303
|
+
}
|
|
1304
|
+
async content() {
|
|
1305
|
+
return this.view.evaluate("document.documentElement.outerHTML");
|
|
1306
|
+
}
|
|
1307
|
+
async addInitScript(script) {
|
|
1308
|
+
const expr = typeof script === "function" ? `(${script.toString()})()` : script;
|
|
1309
|
+
await this.view.evaluate(expr);
|
|
1310
|
+
}
|
|
1311
|
+
keyboard = {
|
|
1312
|
+
press: (key) => this.view.press(key)
|
|
1313
|
+
};
|
|
1314
|
+
context() {
|
|
1315
|
+
return {
|
|
1316
|
+
close: async () => {
|
|
1317
|
+
await this.close();
|
|
1318
|
+
},
|
|
1319
|
+
newPage: async () => {
|
|
1320
|
+
throw new Error("Multi-tab not supported in Bun.WebView. Use engine: 'playwright'");
|
|
1321
|
+
},
|
|
1322
|
+
cookies: async () => [],
|
|
1323
|
+
addCookies: async (_) => {},
|
|
1324
|
+
clearCookies: async () => {},
|
|
1325
|
+
newCDPSession: async () => {
|
|
1326
|
+
throw new Error("CDP session via context not available in Bun.WebView. Use view.cdp() when shipped.");
|
|
1327
|
+
},
|
|
1328
|
+
route: async (_pattern, _handler) => {
|
|
1329
|
+
throw new Error("Network interception not supported in Bun.WebView. Use engine: 'cdp' or 'playwright'.");
|
|
1330
|
+
},
|
|
1331
|
+
unrouteAll: async () => {},
|
|
1332
|
+
pages: () => [],
|
|
1333
|
+
addInitScript: async (script) => {
|
|
1334
|
+
await this.addInitScript(script);
|
|
1335
|
+
}
|
|
1336
|
+
};
|
|
1337
|
+
}
|
|
1338
|
+
on(event, handler) {
|
|
1339
|
+
if (!this._eventListeners.has(event))
|
|
1340
|
+
this._eventListeners.set(event, []);
|
|
1341
|
+
this._eventListeners.get(event).push(handler);
|
|
1342
|
+
return this;
|
|
1343
|
+
}
|
|
1344
|
+
off(event, handler) {
|
|
1345
|
+
const listeners = this._eventListeners.get(event) ?? [];
|
|
1346
|
+
this._eventListeners.set(event, listeners.filter((l) => l !== handler));
|
|
1347
|
+
return this;
|
|
1348
|
+
}
|
|
1349
|
+
_emit(event, ...args) {
|
|
1350
|
+
for (const handler of this._eventListeners.get(event) ?? []) {
|
|
1351
|
+
try {
|
|
1352
|
+
handler(...args);
|
|
1353
|
+
} catch {}
|
|
1354
|
+
}
|
|
1355
|
+
}
|
|
1356
|
+
async pdf(_opts) {
|
|
1357
|
+
throw new Error("PDF generation not supported in Bun.WebView. Use engine: 'playwright'.");
|
|
1358
|
+
}
|
|
1359
|
+
coverage = {
|
|
1360
|
+
startJSCoverage: async () => {},
|
|
1361
|
+
stopJSCoverage: async () => [],
|
|
1362
|
+
startCSSCoverage: async () => {},
|
|
1363
|
+
stopCSSCoverage: async () => []
|
|
1364
|
+
};
|
|
1365
|
+
setSessionId(id) {
|
|
1366
|
+
this._sessionId = id;
|
|
1367
|
+
}
|
|
1368
|
+
getSessionId() {
|
|
1369
|
+
return this._sessionId;
|
|
1370
|
+
}
|
|
1371
|
+
getNativeView() {
|
|
1372
|
+
return this.view;
|
|
1373
|
+
}
|
|
1374
|
+
async close() {
|
|
1375
|
+
try {
|
|
1376
|
+
await this.view.close();
|
|
1377
|
+
} catch {}
|
|
1378
|
+
}
|
|
1379
|
+
[Symbol.asyncDispose]() {
|
|
1380
|
+
return this.close();
|
|
1381
|
+
}
|
|
1382
|
+
};
|
|
1383
|
+
});
|
|
1384
|
+
|
|
839
1385
|
// src/lib/browser.ts
|
|
840
1386
|
var exports_browser = {};
|
|
841
1387
|
__export(exports_browser, {
|
|
@@ -857,6 +1403,22 @@ async function launchBrowser(options) {
|
|
|
857
1403
|
}
|
|
858
1404
|
return launchLightpanda2({ viewport: options?.viewport });
|
|
859
1405
|
}
|
|
1406
|
+
if (engine === "bun") {
|
|
1407
|
+
const { isBunWebViewAvailable: isBunWebViewAvailable2, BunWebViewSession: BunWebViewSession2 } = await Promise.resolve().then(() => (init_browser_bun(), exports_browser_bun));
|
|
1408
|
+
if (!isBunWebViewAvailable2()) {
|
|
1409
|
+
throw new BrowserError("Bun.WebView not available. Upgrade to Bun canary: bun upgrade --canary");
|
|
1410
|
+
}
|
|
1411
|
+
const session = new BunWebViewSession2({
|
|
1412
|
+
width: options?.viewport?.width ?? 1280,
|
|
1413
|
+
height: options?.viewport?.height ?? 720
|
|
1414
|
+
});
|
|
1415
|
+
return {
|
|
1416
|
+
newContext: async () => ({ newPage: async () => session, close: async () => {} }),
|
|
1417
|
+
close: async () => session.close(),
|
|
1418
|
+
contexts: () => [],
|
|
1419
|
+
_bunSession: session
|
|
1420
|
+
};
|
|
1421
|
+
}
|
|
860
1422
|
const headless = options?.headless ?? true;
|
|
861
1423
|
const viewport = options?.viewport ?? DEFAULT_VIEWPORT;
|
|
862
1424
|
try {
|
|
@@ -878,6 +1440,12 @@ async function getPage(browser, options) {
|
|
|
878
1440
|
const { getLightpandaPage: getLightpandaPage2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda));
|
|
879
1441
|
return getLightpandaPage2(browser, options);
|
|
880
1442
|
}
|
|
1443
|
+
if (engine === "bun") {
|
|
1444
|
+
const bunSession = browser._bunSession;
|
|
1445
|
+
if (bunSession)
|
|
1446
|
+
return bunSession;
|
|
1447
|
+
throw new BrowserError("Bun.WebView session not found on browser instance");
|
|
1448
|
+
}
|
|
881
1449
|
const viewport = options?.viewport ?? DEFAULT_VIEWPORT;
|
|
882
1450
|
try {
|
|
883
1451
|
const context = await browser.newContext({
|
|
@@ -897,6 +1465,12 @@ async function closeBrowser(browser, engine) {
|
|
|
897
1465
|
const { closeLightpanda: closeLightpanda2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda));
|
|
898
1466
|
return closeLightpanda2(browser);
|
|
899
1467
|
}
|
|
1468
|
+
if (engine === "bun") {
|
|
1469
|
+
const bunSession = browser._bunSession;
|
|
1470
|
+
if (bunSession)
|
|
1471
|
+
await bunSession.close();
|
|
1472
|
+
return;
|
|
1473
|
+
}
|
|
900
1474
|
try {
|
|
901
1475
|
await browser.close();
|
|
902
1476
|
} catch (error) {
|
|
@@ -966,6 +1540,9 @@ async function launchBrowserEngine(engine, config) {
|
|
|
966
1540
|
}
|
|
967
1541
|
return launchLightpanda2({ viewport: config.viewport });
|
|
968
1542
|
}
|
|
1543
|
+
if (engine === "bun") {
|
|
1544
|
+
return launchBrowser({ headless: config.headless, viewport: config.viewport, engine: "bun" });
|
|
1545
|
+
}
|
|
969
1546
|
return chromium2.launch({
|
|
970
1547
|
headless: config.headless,
|
|
971
1548
|
args: ["--no-sandbox", "--disable-setuid-sandbox"]
|
|
@@ -1100,8 +1677,8 @@ async function scanA11y(options) {
|
|
|
1100
1677
|
var AXE_CDN = "https://cdn.jsdelivr.net/npm/axe-core@4/axe.min.js";
|
|
1101
1678
|
|
|
1102
1679
|
// src/lib/config.ts
|
|
1103
|
-
import { homedir as
|
|
1104
|
-
import { join as
|
|
1680
|
+
import { homedir as homedir3 } from "os";
|
|
1681
|
+
import { join as join3 } from "path";
|
|
1105
1682
|
import { readFileSync, existsSync as existsSync2 } from "fs";
|
|
1106
1683
|
function getDefaultConfig() {
|
|
1107
1684
|
return {
|
|
@@ -1113,7 +1690,7 @@ function getDefaultConfig() {
|
|
|
1113
1690
|
timeout: 60000
|
|
1114
1691
|
},
|
|
1115
1692
|
screenshots: {
|
|
1116
|
-
dir:
|
|
1693
|
+
dir: join3(homedir3(), ".testers", "screenshots"),
|
|
1117
1694
|
format: "png",
|
|
1118
1695
|
quality: 90,
|
|
1119
1696
|
fullPage: false
|
|
@@ -1139,7 +1716,8 @@ function loadConfig() {
|
|
|
1139
1716
|
todosDbPath: fileConfig.todosDbPath,
|
|
1140
1717
|
judgeModel: fileConfig.judgeModel,
|
|
1141
1718
|
judgeProvider: fileConfig.judgeProvider,
|
|
1142
|
-
selfHeal: fileConfig.selfHeal ?? false
|
|
1719
|
+
selfHeal: fileConfig.selfHeal ?? false,
|
|
1720
|
+
conversationsSpace: fileConfig.conversationsSpace
|
|
1143
1721
|
};
|
|
1144
1722
|
const envModel = process.env["TESTERS_MODEL"];
|
|
1145
1723
|
if (envModel) {
|
|
@@ -1158,8 +1736,8 @@ function loadConfig() {
|
|
|
1158
1736
|
var CONFIG_DIR, CONFIG_PATH;
|
|
1159
1737
|
var init_config = __esm(() => {
|
|
1160
1738
|
init_types();
|
|
1161
|
-
CONFIG_DIR =
|
|
1162
|
-
CONFIG_PATH =
|
|
1739
|
+
CONFIG_DIR = join3(homedir3(), ".testers");
|
|
1740
|
+
CONFIG_PATH = join3(CONFIG_DIR, "config.json");
|
|
1163
1741
|
});
|
|
1164
1742
|
|
|
1165
1743
|
// src/lib/healer.ts
|
|
@@ -1590,6 +2168,8 @@ async function runAgentLoop(options) {
|
|
|
1590
2168
|
Instructions: ${persona.instructions}` : "",
|
|
1591
2169
|
persona.traits.length > 0 ? `Traits: ${persona.traits.join(", ")}` : "",
|
|
1592
2170
|
persona.goals.length > 0 ? `Goals: ${persona.goals.join("; ")}` : "",
|
|
2171
|
+
persona.behaviors && persona.behaviors.length > 0 ? `Behaviors: ${persona.behaviors.join("; ")}` : "",
|
|
2172
|
+
persona.painPoints && persona.painPoints.length > 0 ? `Pain points: ${persona.painPoints.join("; ")}` : "",
|
|
1593
2173
|
"",
|
|
1594
2174
|
"Stay in character throughout the test. Your observations, choices, and priorities should reflect this persona."
|
|
1595
2175
|
].filter(Boolean).join(`
|
|
@@ -1640,6 +2220,15 @@ Instructions: ${persona.instructions}` : "",
|
|
|
1640
2220
|
const isOpenAICompat = "provider" in client;
|
|
1641
2221
|
try {
|
|
1642
2222
|
for (let turn = 0;turn < maxTurns; turn++) {
|
|
2223
|
+
if (persona && turn > 0 && turn % 5 === 0) {
|
|
2224
|
+
messages = [
|
|
2225
|
+
...messages,
|
|
2226
|
+
{
|
|
2227
|
+
role: "user",
|
|
2228
|
+
content: `[Reminder: You are ${persona.name} \u2014 ${persona.role}. Traits: ${persona.traits.join(", ")}. Stay in character.]`
|
|
2229
|
+
}
|
|
2230
|
+
];
|
|
2231
|
+
}
|
|
1643
2232
|
const response = isOpenAICompat ? await callOpenAICompatible({
|
|
1644
2233
|
baseUrl: client.baseUrl,
|
|
1645
2234
|
apiKey: client.apiKey,
|
|
@@ -1734,6 +2323,8 @@ function detectProvider(model) {
|
|
|
1734
2323
|
return "openai";
|
|
1735
2324
|
if (model.startsWith("gemini-"))
|
|
1736
2325
|
return "google";
|
|
2326
|
+
if (model.startsWith("llama-") || model.startsWith("qwen-") || model.includes("cerebras"))
|
|
2327
|
+
return "cerebras";
|
|
1737
2328
|
return "anthropic";
|
|
1738
2329
|
}
|
|
1739
2330
|
function createClient(apiKey) {
|
|
@@ -1827,6 +2418,12 @@ function createClientForModel(model, apiKey) {
|
|
|
1827
2418
|
throw new AIClientError("No Google API key. Set GOOGLE_API_KEY or pass it explicitly.");
|
|
1828
2419
|
return { provider: "google", baseUrl: "https://generativelanguage.googleapis.com/v1beta/openai", apiKey: key };
|
|
1829
2420
|
}
|
|
2421
|
+
if (provider === "cerebras") {
|
|
2422
|
+
const key = apiKey ?? process.env["CEREBRAS_API_KEY"];
|
|
2423
|
+
if (!key)
|
|
2424
|
+
throw new AIClientError("No Cerebras API key. Set CEREBRAS_API_KEY or pass it explicitly.");
|
|
2425
|
+
return { provider: "cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKey: key };
|
|
2426
|
+
}
|
|
1830
2427
|
return createClient(apiKey);
|
|
1831
2428
|
}
|
|
1832
2429
|
var BROWSER_TOOLS;
|
|
@@ -2289,6 +2886,106 @@ var init_flows = __esm(() => {
|
|
|
2289
2886
|
init_types();
|
|
2290
2887
|
});
|
|
2291
2888
|
|
|
2889
|
+
// src/lib/failure-explainer.ts
|
|
2890
|
+
var exports_failure_explainer = {};
|
|
2891
|
+
__export(exports_failure_explainer, {
|
|
2892
|
+
explainFailure: () => explainFailure
|
|
2893
|
+
});
|
|
2894
|
+
function detectType(error, reasoning, existingAnalysis) {
|
|
2895
|
+
if (existingAnalysis?.type && existingAnalysis.type !== "unknown") {
|
|
2896
|
+
return existingAnalysis.type;
|
|
2897
|
+
}
|
|
2898
|
+
const text = [error, reasoning].filter(Boolean).join(" ");
|
|
2899
|
+
for (const { pattern, type } of TYPE_PATTERNS) {
|
|
2900
|
+
if (pattern.test(text))
|
|
2901
|
+
return type;
|
|
2902
|
+
}
|
|
2903
|
+
return "unknown";
|
|
2904
|
+
}
|
|
2905
|
+
function extractAffectedElement(error, existingAnalysis) {
|
|
2906
|
+
if (existingAnalysis?.affectedElement)
|
|
2907
|
+
return existingAnalysis.affectedElement;
|
|
2908
|
+
if (!error)
|
|
2909
|
+
return;
|
|
2910
|
+
const match = error.match(/selector[:\s]+['"`]?([^'"`\s,]+)['"`]?/i) ?? error.match(/element[:\s]+['"`]?([^'"`\s,]+)['"`]?/i);
|
|
2911
|
+
return match?.[1];
|
|
2912
|
+
}
|
|
2913
|
+
function explainFailure(resultId) {
|
|
2914
|
+
const result = getResult(resultId);
|
|
2915
|
+
if (!result)
|
|
2916
|
+
throw new Error(`Result not found: ${resultId}`);
|
|
2917
|
+
if (result.status === "passed") {
|
|
2918
|
+
return {
|
|
2919
|
+
resultId,
|
|
2920
|
+
type: "unknown",
|
|
2921
|
+
summary: "This result passed \u2014 no failure to explain.",
|
|
2922
|
+
likelyCause: "N/A",
|
|
2923
|
+
suggestedFix: "N/A",
|
|
2924
|
+
confidence: 1,
|
|
2925
|
+
raw: { error: null, reasoning: result.reasoning, failureAnalysis: null }
|
|
2926
|
+
};
|
|
2927
|
+
}
|
|
2928
|
+
const type = detectType(result.error, result.reasoning, result.failureAnalysis);
|
|
2929
|
+
const affectedElement = extractAffectedElement(result.error, result.failureAnalysis);
|
|
2930
|
+
const existingAnalysis = result.failureAnalysis;
|
|
2931
|
+
const confidenceMap = {
|
|
2932
|
+
selector_not_found: 0.9,
|
|
2933
|
+
timeout: 0.85,
|
|
2934
|
+
auth_error: 0.8,
|
|
2935
|
+
network_error: 0.85,
|
|
2936
|
+
assertion_failed: 0.75,
|
|
2937
|
+
eval_failed: 0.7,
|
|
2938
|
+
unknown: 0.4
|
|
2939
|
+
};
|
|
2940
|
+
const confidence = existingAnalysis?.confidence === "high" ? 0.9 : existingAnalysis?.confidence === "medium" ? 0.7 : existingAnalysis?.confidence === "low" ? 0.5 : confidenceMap[type] ?? 0.5;
|
|
2941
|
+
const errorSnippet = result.error ? result.error.slice(0, 200) : "(no error message)";
|
|
2942
|
+
const summary = `${type.replace(/_/g, " ")} in result ${resultId.slice(0, 8)}${affectedElement ? ` \u2014 element: ${affectedElement}` : ""}. Error: ${errorSnippet}`;
|
|
2943
|
+
return {
|
|
2944
|
+
resultId,
|
|
2945
|
+
type,
|
|
2946
|
+
summary,
|
|
2947
|
+
likelyCause: LIKELY_CAUSES[type],
|
|
2948
|
+
suggestedFix: affectedElement && type === "selector_not_found" ? `Update selector "${affectedElement}". Use get_elements or get_page_html to find the new selector.` : SUGGESTED_FIXES[type],
|
|
2949
|
+
affectedElement,
|
|
2950
|
+
confidence,
|
|
2951
|
+
raw: {
|
|
2952
|
+
error: result.error,
|
|
2953
|
+
reasoning: result.reasoning,
|
|
2954
|
+
failureAnalysis: result.failureAnalysis
|
|
2955
|
+
}
|
|
2956
|
+
};
|
|
2957
|
+
}
|
|
2958
|
+
var TYPE_PATTERNS, SUGGESTED_FIXES, LIKELY_CAUSES;
|
|
2959
|
+
var init_failure_explainer = __esm(() => {
|
|
2960
|
+
init_results();
|
|
2961
|
+
TYPE_PATTERNS = [
|
|
2962
|
+
{ pattern: /not found|no element|waiting for selector|selector.*not found/i, type: "selector_not_found" },
|
|
2963
|
+
{ pattern: /timeout|timed out|exceeded.*ms/i, type: "timeout" },
|
|
2964
|
+
{ pattern: /auth|login|unauthorized|403|forbidden|credentials/i, type: "auth_error" },
|
|
2965
|
+
{ pattern: /network|fetch|ERR_|ECONNREFUSED|ENOTFOUND|request failed/i, type: "network_error" },
|
|
2966
|
+
{ pattern: /assert|expected.*got|does not contain|mismatch/i, type: "assertion_failed" },
|
|
2967
|
+
{ pattern: /eval|eval.*failed|score/i, type: "eval_failed" }
|
|
2968
|
+
];
|
|
2969
|
+
SUGGESTED_FIXES = {
|
|
2970
|
+
selector_not_found: "Update the CSS selector. The element may have been renamed, moved, or removed. Use get_elements to discover the current selector.",
|
|
2971
|
+
assertion_failed: "Verify the expected value matches the current app behavior. The UI text or state may have changed.",
|
|
2972
|
+
timeout: "Increase the scenario timeout or check if the app is responding slowly. Try reducing the number of steps.",
|
|
2973
|
+
auth_error: "Verify the auth credentials and login flow. Check if the session is being properly established.",
|
|
2974
|
+
network_error: "Check if the app is running and accessible at the target URL. Verify network connectivity.",
|
|
2975
|
+
eval_failed: "Review the evaluation criteria. The AI may need clearer pass/fail conditions.",
|
|
2976
|
+
unknown: "Review the full error message and reasoning. Check the app logs for more context."
|
|
2977
|
+
};
|
|
2978
|
+
LIKELY_CAUSES = {
|
|
2979
|
+
selector_not_found: "A CSS selector could not locate the target element \u2014 the element may not exist, be hidden, or the selector may be stale.",
|
|
2980
|
+
assertion_failed: "An assertion check did not match the expected value \u2014 the app output differs from what the scenario expects.",
|
|
2981
|
+
timeout: "The scenario exceeded the time limit \u2014 either the app is slow or the test is waiting for an element that never appears.",
|
|
2982
|
+
auth_error: "Authentication failed or the session was not established \u2014 the test could not access protected content.",
|
|
2983
|
+
network_error: "A network request failed \u2014 the app may be unreachable or returning unexpected errors.",
|
|
2984
|
+
eval_failed: "The AI evaluator could not determine a pass/fail verdict with sufficient confidence.",
|
|
2985
|
+
unknown: "The failure cause is unclear from the available error data."
|
|
2986
|
+
};
|
|
2987
|
+
});
|
|
2988
|
+
|
|
2292
2989
|
// src/db/scan-issues.ts
|
|
2293
2990
|
var exports_scan_issues = {};
|
|
2294
2991
|
__export(exports_scan_issues, {
|
|
@@ -2387,8 +3084,8 @@ var init_scan_issues = __esm(() => {
|
|
|
2387
3084
|
|
|
2388
3085
|
// src/server/index.ts
|
|
2389
3086
|
import { existsSync as existsSync5 } from "fs";
|
|
2390
|
-
import { join as
|
|
2391
|
-
import { homedir as
|
|
3087
|
+
import { join as join6 } from "path";
|
|
3088
|
+
import { homedir as homedir6 } from "os";
|
|
2392
3089
|
|
|
2393
3090
|
// node_modules/zod/v3/external.js
|
|
2394
3091
|
var exports_external = {};
|
|
@@ -6450,7 +7147,32 @@ function listScenarios(filter) {
|
|
|
6450
7147
|
params.push(filter.offset);
|
|
6451
7148
|
}
|
|
6452
7149
|
const rows = db2.query(sql).all(...params);
|
|
6453
|
-
|
|
7150
|
+
const scenarios = rows.map(scenarioFromRow);
|
|
7151
|
+
if (scenarios.length === 0)
|
|
7152
|
+
return scenarios;
|
|
7153
|
+
const scenarioIds = scenarios.map((s) => s.id);
|
|
7154
|
+
const placeholders = scenarioIds.map(() => "?").join(",");
|
|
7155
|
+
const statsRows = db2.query(`
|
|
7156
|
+
SELECT scenario_id,
|
|
7157
|
+
COUNT(*) as total,
|
|
7158
|
+
SUM(CASE WHEN status = 'passed' THEN 1 ELSE 0 END) as passed
|
|
7159
|
+
FROM (
|
|
7160
|
+
SELECT scenario_id, status
|
|
7161
|
+
FROM results
|
|
7162
|
+
WHERE scenario_id IN (${placeholders})
|
|
7163
|
+
ORDER BY created_at DESC
|
|
7164
|
+
)
|
|
7165
|
+
GROUP BY scenario_id
|
|
7166
|
+
`).all(...scenarioIds);
|
|
7167
|
+
const statsMap = new Map(statsRows.map((r) => [r.scenario_id, r]));
|
|
7168
|
+
return scenarios.map((s) => {
|
|
7169
|
+
const stats = statsMap.get(s.id);
|
|
7170
|
+
return {
|
|
7171
|
+
...s,
|
|
7172
|
+
flakinessScore: stats ? stats.passed / stats.total : null,
|
|
7173
|
+
recentRunCount: stats?.total ?? 0
|
|
7174
|
+
};
|
|
7175
|
+
});
|
|
6454
7176
|
}
|
|
6455
7177
|
function updateScenario(id, input, version) {
|
|
6456
7178
|
const db2 = getDatabase();
|
|
@@ -6555,6 +7277,10 @@ function countScenarios(filter) {
|
|
|
6555
7277
|
const row = db2.query(sql).get(...params);
|
|
6556
7278
|
return row.count;
|
|
6557
7279
|
}
|
|
7280
|
+
function updateScenarioPassedCache(id, url) {
|
|
7281
|
+
const db2 = getDatabase();
|
|
7282
|
+
db2.query("UPDATE scenarios SET last_passed_at = ?, last_passed_url = ? WHERE id = ?").run(now(), url, id);
|
|
7283
|
+
}
|
|
6558
7284
|
function deleteScenario(id) {
|
|
6559
7285
|
const db2 = getDatabase();
|
|
6560
7286
|
const scenario = getScenario(id);
|
|
@@ -6703,92 +7429,8 @@ function updateRun(id, updates) {
|
|
|
6703
7429
|
return getRun(existing.id);
|
|
6704
7430
|
}
|
|
6705
7431
|
|
|
6706
|
-
// src/
|
|
6707
|
-
|
|
6708
|
-
init_database();
|
|
6709
|
-
function createResult(input) {
|
|
6710
|
-
const db2 = getDatabase();
|
|
6711
|
-
const id = uuid();
|
|
6712
|
-
const timestamp = now();
|
|
6713
|
-
db2.query(`
|
|
6714
|
-
INSERT INTO results (id, run_id, scenario_id, status, reasoning, error, steps_completed, steps_total, duration_ms, model, tokens_used, cost_cents, metadata, created_at, persona_id, persona_name)
|
|
6715
|
-
VALUES (?, ?, ?, 'skipped', NULL, NULL, 0, ?, 0, ?, 0, 0, '{}', ?, ?, ?)
|
|
6716
|
-
`).run(id, input.runId, input.scenarioId, input.stepsTotal, input.model, timestamp, input.personaId ?? null, input.personaName ?? null);
|
|
6717
|
-
return getResult(id);
|
|
6718
|
-
}
|
|
6719
|
-
function getResult(id) {
|
|
6720
|
-
const db2 = getDatabase();
|
|
6721
|
-
let row = db2.query("SELECT * FROM results WHERE id = ?").get(id);
|
|
6722
|
-
if (row)
|
|
6723
|
-
return resultFromRow(row);
|
|
6724
|
-
const fullId = resolvePartialId("results", id);
|
|
6725
|
-
if (fullId) {
|
|
6726
|
-
row = db2.query("SELECT * FROM results WHERE id = ?").get(fullId);
|
|
6727
|
-
if (row)
|
|
6728
|
-
return resultFromRow(row);
|
|
6729
|
-
}
|
|
6730
|
-
return null;
|
|
6731
|
-
}
|
|
6732
|
-
function listResults(runId) {
|
|
6733
|
-
const db2 = getDatabase();
|
|
6734
|
-
const rows = db2.query("SELECT * FROM results WHERE run_id = ? ORDER BY created_at ASC").all(runId);
|
|
6735
|
-
return rows.map(resultFromRow);
|
|
6736
|
-
}
|
|
6737
|
-
function updateResult(id, updates) {
|
|
6738
|
-
const db2 = getDatabase();
|
|
6739
|
-
const existing = getResult(id);
|
|
6740
|
-
if (!existing) {
|
|
6741
|
-
throw new Error(`Result not found: ${id}`);
|
|
6742
|
-
}
|
|
6743
|
-
const sets = [];
|
|
6744
|
-
const params = [];
|
|
6745
|
-
if (updates.status !== undefined) {
|
|
6746
|
-
sets.push("status = ?");
|
|
6747
|
-
params.push(updates.status);
|
|
6748
|
-
}
|
|
6749
|
-
if (updates.reasoning !== undefined) {
|
|
6750
|
-
sets.push("reasoning = ?");
|
|
6751
|
-
params.push(updates.reasoning);
|
|
6752
|
-
}
|
|
6753
|
-
if (updates.error !== undefined) {
|
|
6754
|
-
sets.push("error = ?");
|
|
6755
|
-
params.push(updates.error);
|
|
6756
|
-
}
|
|
6757
|
-
if (updates.stepsCompleted !== undefined) {
|
|
6758
|
-
sets.push("steps_completed = ?");
|
|
6759
|
-
params.push(updates.stepsCompleted);
|
|
6760
|
-
}
|
|
6761
|
-
if (updates.durationMs !== undefined) {
|
|
6762
|
-
sets.push("duration_ms = ?");
|
|
6763
|
-
params.push(updates.durationMs);
|
|
6764
|
-
}
|
|
6765
|
-
if (updates.tokensUsed !== undefined) {
|
|
6766
|
-
sets.push("tokens_used = ?");
|
|
6767
|
-
params.push(updates.tokensUsed);
|
|
6768
|
-
}
|
|
6769
|
-
if (updates.costCents !== undefined) {
|
|
6770
|
-
sets.push("cost_cents = ?");
|
|
6771
|
-
params.push(updates.costCents);
|
|
6772
|
-
}
|
|
6773
|
-
if (updates.metadata !== undefined) {
|
|
6774
|
-
sets.push("metadata = ?");
|
|
6775
|
-
params.push(JSON.stringify(updates.metadata));
|
|
6776
|
-
}
|
|
6777
|
-
if (sets.length === 0) {
|
|
6778
|
-
return existing;
|
|
6779
|
-
}
|
|
6780
|
-
params.push(existing.id);
|
|
6781
|
-
db2.query(`UPDATE results SET ${sets.join(", ")} WHERE id = ?`).run(...params);
|
|
6782
|
-
return getResult(existing.id);
|
|
6783
|
-
}
|
|
6784
|
-
function getResultsByRun(runId) {
|
|
6785
|
-
return listResults(runId);
|
|
6786
|
-
}
|
|
6787
|
-
function countResultsByRun(runId) {
|
|
6788
|
-
const db2 = getDatabase();
|
|
6789
|
-
const row = db2.query("SELECT COUNT(*) as count FROM results WHERE run_id = ?").get(runId);
|
|
6790
|
-
return row.count;
|
|
6791
|
-
}
|
|
7432
|
+
// src/server/index.ts
|
|
7433
|
+
init_results();
|
|
6792
7434
|
|
|
6793
7435
|
// src/db/screenshots.ts
|
|
6794
7436
|
init_types();
|
|
@@ -6819,6 +7461,12 @@ function countScreenshots(resultId) {
|
|
|
6819
7461
|
return row.count;
|
|
6820
7462
|
}
|
|
6821
7463
|
|
|
7464
|
+
// src/lib/runner.ts
|
|
7465
|
+
init_types();
|
|
7466
|
+
|
|
7467
|
+
// src/lib/eval-runner.ts
|
|
7468
|
+
init_results();
|
|
7469
|
+
|
|
6822
7470
|
// src/lib/judge.ts
|
|
6823
7471
|
init_ai_client();
|
|
6824
7472
|
init_types();
|
|
@@ -6879,11 +7527,13 @@ function resolveJudgeModel(config) {
|
|
|
6879
7527
|
apiKey = process.env["OPENAI_API_KEY"];
|
|
6880
7528
|
else if (provider === "google")
|
|
6881
7529
|
apiKey = process.env["GOOGLE_API_KEY"];
|
|
7530
|
+
else if (provider === "cerebras")
|
|
7531
|
+
apiKey = process.env["CEREBRAS_API_KEY"];
|
|
6882
7532
|
}
|
|
6883
7533
|
if (!apiKey) {
|
|
6884
|
-
apiKey = process.env["ANTHROPIC_API_KEY"] ?? process.env["OPENAI_API_KEY"] ?? process.env["GOOGLE_API_KEY"] ?? globalConfig.anthropicApiKey;
|
|
7534
|
+
apiKey = process.env["ANTHROPIC_API_KEY"] ?? process.env["CEREBRAS_API_KEY"] ?? process.env["OPENAI_API_KEY"] ?? process.env["GOOGLE_API_KEY"] ?? globalConfig.anthropicApiKey;
|
|
6885
7535
|
if (!apiKey)
|
|
6886
|
-
throw new AIClientError("No API key found for judge. Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or GOOGLE_API_KEY.");
|
|
7536
|
+
throw new AIClientError("No API key found for judge. Set ANTHROPIC_API_KEY, CEREBRAS_API_KEY, OPENAI_API_KEY, or GOOGLE_API_KEY.");
|
|
6887
7537
|
}
|
|
6888
7538
|
return { model, provider, apiKey };
|
|
6889
7539
|
}
|
|
@@ -6898,8 +7548,8 @@ reason: 1-2 sentences max`;
|
|
|
6898
7548
|
async function callJudge(prompt, config) {
|
|
6899
7549
|
const { model, provider, apiKey } = resolveJudgeModel(config);
|
|
6900
7550
|
const threshold = 0.7;
|
|
6901
|
-
if (provider === "openai" || provider === "google") {
|
|
6902
|
-
const baseUrl = provider === "openai" ? "https://api.openai.com/v1" : "https://generativelanguage.googleapis.com/v1beta/openai";
|
|
7551
|
+
if (provider === "openai" || provider === "google" || provider === "cerebras") {
|
|
7552
|
+
const baseUrl = provider === "openai" ? "https://api.openai.com/v1" : provider === "cerebras" ? "https://api.cerebras.ai/v1" : "https://generativelanguage.googleapis.com/v1beta/openai";
|
|
6903
7553
|
const resp2 = await callOpenAICompatible({
|
|
6904
7554
|
baseUrl,
|
|
6905
7555
|
apiKey,
|
|
@@ -7301,6 +7951,130 @@ async function runPipelineScenario(scenario, options) {
|
|
|
7301
7951
|
});
|
|
7302
7952
|
}
|
|
7303
7953
|
|
|
7954
|
+
// src/lib/runner.ts
|
|
7955
|
+
init_results();
|
|
7956
|
+
|
|
7957
|
+
// src/lib/failure-analyzer.ts
|
|
7958
|
+
function analyzeFailure(error, reasoning) {
|
|
7959
|
+
const combinedText = [error, reasoning].filter(Boolean).join(" ");
|
|
7960
|
+
if (!combinedText.trim())
|
|
7961
|
+
return null;
|
|
7962
|
+
const errorText = error ?? "";
|
|
7963
|
+
const reasoningText = reasoning ?? "";
|
|
7964
|
+
if (/waiting for selector/i.test(errorText) || /not found/i.test(errorText) || /No element/i.test(errorText) || /waiting for selector/i.test(reasoningText) || /could not find element/i.test(reasoningText) || /element not found/i.test(reasoningText)) {
|
|
7965
|
+
const selectorMatch = errorText.match(/'([^']+)'/) ?? reasoningText.match(/'([^']+)'/);
|
|
7966
|
+
const affectedElement = selectorMatch ? selectorMatch[1] : undefined;
|
|
7967
|
+
const stepMatch = reasoningText.match(/step\s+(\d+)/i);
|
|
7968
|
+
const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
|
|
7969
|
+
return {
|
|
7970
|
+
type: "selector_not_found",
|
|
7971
|
+
affectedElement,
|
|
7972
|
+
stepNumber,
|
|
7973
|
+
confidence: affectedElement ? "high" : "medium"
|
|
7974
|
+
};
|
|
7975
|
+
}
|
|
7976
|
+
if (/assert/i.test(errorText) || /expected/i.test(errorText) || /to equal/i.test(errorText) || /to be/i.test(errorText) || /\bgot\b/.test(errorText) || /assertion.*failed/i.test(reasoningText) || /expected.*but.*got/i.test(reasoningText)) {
|
|
7977
|
+
const expectedActualMatch = errorText.match(/expected[:\s]+(['"]?)([^'"]+)\1[,\s]+(?:got|received|actual)[:\s]+(['"]?)([^'"]+)\3/i);
|
|
7978
|
+
const toEqualMatch = errorText.match(/expected[:\s]+(['"]?)([^'"]+)\1\s+to\s+equal\s+(['"]?)([^'"]+)\3/i);
|
|
7979
|
+
let expected;
|
|
7980
|
+
let actual;
|
|
7981
|
+
if (expectedActualMatch) {
|
|
7982
|
+
expected = expectedActualMatch[2];
|
|
7983
|
+
actual = expectedActualMatch[4];
|
|
7984
|
+
} else if (toEqualMatch) {
|
|
7985
|
+
expected = toEqualMatch[4];
|
|
7986
|
+
actual = toEqualMatch[2];
|
|
7987
|
+
}
|
|
7988
|
+
const stepMatch = reasoningText.match(/step\s+(\d+)/i);
|
|
7989
|
+
const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
|
|
7990
|
+
return {
|
|
7991
|
+
type: "assertion_failed",
|
|
7992
|
+
expected,
|
|
7993
|
+
actual,
|
|
7994
|
+
stepNumber,
|
|
7995
|
+
confidence: expected && actual ? "high" : "medium"
|
|
7996
|
+
};
|
|
7997
|
+
}
|
|
7998
|
+
if (/timeout/i.test(errorText) || /timed out/i.test(errorText) || /Timeout/i.test(reasoningText) || /timed out/i.test(reasoningText)) {
|
|
7999
|
+
const stepMatch = reasoningText.match(/step\s+(\d+)/i);
|
|
8000
|
+
const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
|
|
8001
|
+
return {
|
|
8002
|
+
type: "timeout",
|
|
8003
|
+
stepNumber,
|
|
8004
|
+
confidence: "high"
|
|
8005
|
+
};
|
|
8006
|
+
}
|
|
8007
|
+
if (/\b401\b/.test(errorText) || /\b403\b/.test(errorText) || /login/i.test(errorText) || /unauthorized/i.test(errorText) || /\bauth\b/i.test(errorText) || /\b401\b/.test(reasoningText) || /\b403\b/.test(reasoningText) || /unauthorized/i.test(reasoningText) || /authentication/i.test(reasoningText)) {
|
|
8008
|
+
return {
|
|
8009
|
+
type: "auth_error",
|
|
8010
|
+
confidence: "high"
|
|
8011
|
+
};
|
|
8012
|
+
}
|
|
8013
|
+
if (/ECONNREFUSED/i.test(errorText) || /ENOTFOUND/i.test(errorText) || /fetch failed/i.test(errorText) || /network/i.test(errorText) || /ECONNREFUSED/i.test(reasoningText) || /fetch failed/i.test(reasoningText) || /connection refused/i.test(reasoningText)) {
|
|
8014
|
+
return {
|
|
8015
|
+
type: "network_error",
|
|
8016
|
+
confidence: "high"
|
|
8017
|
+
};
|
|
8018
|
+
}
|
|
8019
|
+
if (/\beval\b/i.test(errorText) || /evaluate/i.test(errorText) || /\bscript\b/i.test(errorText) || /\beval\b/i.test(reasoningText) || /evaluate/i.test(reasoningText)) {
|
|
8020
|
+
return {
|
|
8021
|
+
type: "eval_failed",
|
|
8022
|
+
confidence: "medium"
|
|
8023
|
+
};
|
|
8024
|
+
}
|
|
8025
|
+
return {
|
|
8026
|
+
type: "unknown",
|
|
8027
|
+
confidence: "low"
|
|
8028
|
+
};
|
|
8029
|
+
}
|
|
8030
|
+
|
|
8031
|
+
// src/lib/costs.ts
|
|
8032
|
+
init_database();
|
|
8033
|
+
init_config();
|
|
8034
|
+
var COST_PER_SCENARIO_CENTS = {
|
|
8035
|
+
haiku: 5,
|
|
8036
|
+
sonnet: 30,
|
|
8037
|
+
opus: 150,
|
|
8038
|
+
"claude-haiku": 5,
|
|
8039
|
+
"claude-sonnet": 30,
|
|
8040
|
+
"claude-opus": 150,
|
|
8041
|
+
"gpt-4o-mini": 3,
|
|
8042
|
+
"gpt-4o": 25,
|
|
8043
|
+
"gemini-2.0-flash": 2,
|
|
8044
|
+
"gemini-1.5-pro": 20,
|
|
8045
|
+
"llama-3.1-8b": 1,
|
|
8046
|
+
"llama-3.3-70b": 3
|
|
8047
|
+
};
|
|
8048
|
+
function modelToCostKey(model) {
|
|
8049
|
+
const exact = COST_PER_SCENARIO_CENTS[model];
|
|
8050
|
+
if (exact !== undefined)
|
|
8051
|
+
return exact;
|
|
8052
|
+
const lower = model.toLowerCase();
|
|
8053
|
+
if (lower.includes("opus"))
|
|
8054
|
+
return COST_PER_SCENARIO_CENTS["opus"];
|
|
8055
|
+
if (lower.includes("sonnet"))
|
|
8056
|
+
return COST_PER_SCENARIO_CENTS["sonnet"];
|
|
8057
|
+
if (lower.includes("haiku"))
|
|
8058
|
+
return COST_PER_SCENARIO_CENTS["haiku"];
|
|
8059
|
+
if (lower.includes("gpt-4o-mini"))
|
|
8060
|
+
return COST_PER_SCENARIO_CENTS["gpt-4o-mini"];
|
|
8061
|
+
if (lower.includes("gpt-4o"))
|
|
8062
|
+
return COST_PER_SCENARIO_CENTS["gpt-4o"];
|
|
8063
|
+
if (lower.includes("gemini-2.0-flash") || lower.includes("gemini-flash"))
|
|
8064
|
+
return COST_PER_SCENARIO_CENTS["gemini-2.0-flash"];
|
|
8065
|
+
if (lower.includes("gemini-1.5-pro") || lower.includes("gemini-pro"))
|
|
8066
|
+
return COST_PER_SCENARIO_CENTS["gemini-1.5-pro"];
|
|
8067
|
+
if (lower.includes("llama-3.3") || lower.includes("llama3.3"))
|
|
8068
|
+
return COST_PER_SCENARIO_CENTS["llama-3.3-70b"];
|
|
8069
|
+
if (lower.includes("llama"))
|
|
8070
|
+
return COST_PER_SCENARIO_CENTS["llama-3.1-8b"];
|
|
8071
|
+
return 10;
|
|
8072
|
+
}
|
|
8073
|
+
function estimateRunCostCents(scenarioCount, model, samples = 1) {
|
|
8074
|
+
const costPerScenario = modelToCostKey(model);
|
|
8075
|
+
return scenarioCount * costPerScenario * Math.max(1, samples);
|
|
8076
|
+
}
|
|
8077
|
+
|
|
7304
8078
|
// src/db/personas.ts
|
|
7305
8079
|
init_types();
|
|
7306
8080
|
init_database();
|
|
@@ -7310,9 +8084,9 @@ function createPersona(input) {
|
|
|
7310
8084
|
const short_id = shortUuid();
|
|
7311
8085
|
const timestamp = now();
|
|
7312
8086
|
db2.query(`
|
|
7313
|
-
INSERT INTO personas (id, short_id, project_id, name, description, role, instructions, traits, goals, metadata, enabled, version, created_at, updated_at)
|
|
7314
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1, ?, ?)
|
|
7315
|
-
`).run(id, short_id, input.projectId ?? null, input.name, input.description ?? "", input.role, input.instructions ?? "", JSON.stringify(input.traits ?? []), JSON.stringify(input.goals ?? []), input.metadata ? JSON.stringify(input.metadata) : "{}", input.enabled === false ? 0 : 1, timestamp, timestamp);
|
|
8087
|
+
INSERT INTO personas (id, short_id, project_id, name, description, role, instructions, traits, goals, behaviors, expertise_level, demographics, pain_points, metadata, enabled, auth_email, auth_password, auth_login_path, version, created_at, updated_at)
|
|
8088
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1, ?, ?)
|
|
8089
|
+
`).run(id, short_id, input.projectId ?? null, input.name, input.description ?? "", input.role, input.instructions ?? "", JSON.stringify(input.traits ?? []), JSON.stringify(input.goals ?? []), JSON.stringify(input.behaviors ?? []), input.expertiseLevel ?? "intermediate", JSON.stringify(input.demographics ?? {}), JSON.stringify(input.painPoints ?? []), input.metadata ? JSON.stringify(input.metadata) : "{}", input.enabled === false ? 0 : 1, input.authEmail ?? null, input.authPassword ?? null, input.authLoginPath ?? null, timestamp, timestamp);
|
|
7316
8090
|
return getPersona(id);
|
|
7317
8091
|
}
|
|
7318
8092
|
function getPersona(id) {
|
|
@@ -7390,6 +8164,22 @@ function updatePersona(id, updates, version) {
|
|
|
7390
8164
|
sets.push("goals = ?");
|
|
7391
8165
|
params.push(JSON.stringify(updates.goals));
|
|
7392
8166
|
}
|
|
8167
|
+
if (updates.behaviors !== undefined) {
|
|
8168
|
+
sets.push("behaviors = ?");
|
|
8169
|
+
params.push(JSON.stringify(updates.behaviors));
|
|
8170
|
+
}
|
|
8171
|
+
if (updates.expertiseLevel !== undefined) {
|
|
8172
|
+
sets.push("expertise_level = ?");
|
|
8173
|
+
params.push(updates.expertiseLevel);
|
|
8174
|
+
}
|
|
8175
|
+
if (updates.demographics !== undefined) {
|
|
8176
|
+
sets.push("demographics = ?");
|
|
8177
|
+
params.push(JSON.stringify(updates.demographics));
|
|
8178
|
+
}
|
|
8179
|
+
if (updates.painPoints !== undefined) {
|
|
8180
|
+
sets.push("pain_points = ?");
|
|
8181
|
+
params.push(JSON.stringify(updates.painPoints));
|
|
8182
|
+
}
|
|
7393
8183
|
if (updates.enabled !== undefined) {
|
|
7394
8184
|
sets.push("enabled = ?");
|
|
7395
8185
|
params.push(updates.enabled ? 1 : 0);
|
|
@@ -7398,6 +8188,22 @@ function updatePersona(id, updates, version) {
|
|
|
7398
8188
|
sets.push("metadata = ?");
|
|
7399
8189
|
params.push(JSON.stringify(updates.metadata));
|
|
7400
8190
|
}
|
|
8191
|
+
if (updates.authEmail !== undefined) {
|
|
8192
|
+
sets.push("auth_email = ?");
|
|
8193
|
+
params.push(updates.authEmail);
|
|
8194
|
+
}
|
|
8195
|
+
if (updates.authPassword !== undefined) {
|
|
8196
|
+
sets.push("auth_password = ?");
|
|
8197
|
+
params.push(updates.authPassword);
|
|
8198
|
+
}
|
|
8199
|
+
if (updates.authLoginPath !== undefined) {
|
|
8200
|
+
sets.push("auth_login_path = ?");
|
|
8201
|
+
params.push(updates.authLoginPath);
|
|
8202
|
+
}
|
|
8203
|
+
if (updates.authCookies !== undefined) {
|
|
8204
|
+
sets.push("auth_cookies = ?");
|
|
8205
|
+
params.push(updates.authCookies ? JSON.stringify(updates.authCookies) : null);
|
|
8206
|
+
}
|
|
7401
8207
|
if (sets.length === 0) {
|
|
7402
8208
|
return existing;
|
|
7403
8209
|
}
|
|
@@ -7447,9 +8253,9 @@ function countPersonas(filter) {
|
|
|
7447
8253
|
init_browser();
|
|
7448
8254
|
|
|
7449
8255
|
// src/lib/screenshotter.ts
|
|
7450
|
-
import { mkdirSync as
|
|
7451
|
-
import { join as
|
|
7452
|
-
import { homedir as
|
|
8256
|
+
import { mkdirSync as mkdirSync3, existsSync as existsSync3, writeFileSync } from "fs";
|
|
8257
|
+
import { join as join4 } from "path";
|
|
8258
|
+
import { homedir as homedir4 } from "os";
|
|
7453
8259
|
function slugify(text) {
|
|
7454
8260
|
return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
|
|
7455
8261
|
}
|
|
@@ -7469,11 +8275,11 @@ function getScreenshotDir(baseDir, runId, scenarioSlug, projectName, timestamp)
|
|
|
7469
8275
|
const project = projectName ?? "default";
|
|
7470
8276
|
const dateDir = formatDate(now2);
|
|
7471
8277
|
const timeDir = `${formatTime(now2)}_${runId.slice(0, 8)}`;
|
|
7472
|
-
return
|
|
8278
|
+
return join4(baseDir, project, dateDir, timeDir, scenarioSlug);
|
|
7473
8279
|
}
|
|
7474
8280
|
function ensureDir(dirPath) {
|
|
7475
8281
|
if (!existsSync3(dirPath)) {
|
|
7476
|
-
|
|
8282
|
+
mkdirSync3(dirPath, { recursive: true });
|
|
7477
8283
|
}
|
|
7478
8284
|
}
|
|
7479
8285
|
function writeMetaSidecar(screenshotPath, meta) {
|
|
@@ -7484,10 +8290,10 @@ function writeMetaSidecar(screenshotPath, meta) {
|
|
|
7484
8290
|
}
|
|
7485
8291
|
async function generateThumbnail(page, screenshotDir, filename) {
|
|
7486
8292
|
try {
|
|
7487
|
-
const thumbDir =
|
|
8293
|
+
const thumbDir = join4(screenshotDir, "_thumbnail");
|
|
7488
8294
|
ensureDir(thumbDir);
|
|
7489
8295
|
const thumbFilename = filename.replace(/\.(png|jpeg)$/, ".thumb.$1");
|
|
7490
|
-
const thumbPath =
|
|
8296
|
+
const thumbPath = join4(thumbDir, thumbFilename);
|
|
7491
8297
|
const viewport = page.viewportSize();
|
|
7492
8298
|
if (viewport) {
|
|
7493
8299
|
await page.screenshot({
|
|
@@ -7501,7 +8307,7 @@ async function generateThumbnail(page, screenshotDir, filename) {
|
|
|
7501
8307
|
return null;
|
|
7502
8308
|
}
|
|
7503
8309
|
}
|
|
7504
|
-
var DEFAULT_BASE_DIR =
|
|
8310
|
+
var DEFAULT_BASE_DIR = join4(homedir4(), ".testers", "screenshots");
|
|
7505
8311
|
|
|
7506
8312
|
class Screenshotter {
|
|
7507
8313
|
baseDir;
|
|
@@ -7522,14 +8328,16 @@ class Screenshotter {
|
|
|
7522
8328
|
const action = options.description ?? options.action;
|
|
7523
8329
|
const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
|
|
7524
8330
|
const filename = generateFilename(options.stepNumber, action);
|
|
7525
|
-
const filePath =
|
|
8331
|
+
const filePath = join4(dir, filename);
|
|
7526
8332
|
ensureDir(dir);
|
|
7527
|
-
|
|
8333
|
+
const screenshotOpts = {
|
|
7528
8334
|
path: filePath,
|
|
7529
8335
|
fullPage: this.fullPage,
|
|
7530
|
-
type: this.format
|
|
7531
|
-
|
|
7532
|
-
|
|
8336
|
+
type: this.format
|
|
8337
|
+
};
|
|
8338
|
+
if (this.format === "jpeg")
|
|
8339
|
+
screenshotOpts.quality = this.quality;
|
|
8340
|
+
await page.screenshot(screenshotOpts);
|
|
7533
8341
|
const viewport = page.viewportSize() ?? { width: 0, height: 0 };
|
|
7534
8342
|
const pageUrl = page.url();
|
|
7535
8343
|
const timestamp = new Date().toISOString();
|
|
@@ -7557,14 +8365,16 @@ class Screenshotter {
|
|
|
7557
8365
|
const action = options.description ?? options.action;
|
|
7558
8366
|
const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
|
|
7559
8367
|
const filename = generateFilename(options.stepNumber, action);
|
|
7560
|
-
const filePath =
|
|
8368
|
+
const filePath = join4(dir, filename);
|
|
7561
8369
|
ensureDir(dir);
|
|
7562
|
-
|
|
8370
|
+
const ssOpts2 = {
|
|
7563
8371
|
path: filePath,
|
|
7564
8372
|
fullPage: true,
|
|
7565
|
-
type: this.format
|
|
7566
|
-
|
|
7567
|
-
|
|
8373
|
+
type: this.format
|
|
8374
|
+
};
|
|
8375
|
+
if (this.format === "jpeg")
|
|
8376
|
+
ssOpts2.quality = this.quality;
|
|
8377
|
+
await page.screenshot(ssOpts2);
|
|
7568
8378
|
const viewport = page.viewportSize() ?? { width: 0, height: 0 };
|
|
7569
8379
|
const pageUrl = page.url();
|
|
7570
8380
|
const timestamp = new Date().toISOString();
|
|
@@ -7592,13 +8402,15 @@ class Screenshotter {
|
|
|
7592
8402
|
const action = options.description ?? options.action;
|
|
7593
8403
|
const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
|
|
7594
8404
|
const filename = generateFilename(options.stepNumber, action);
|
|
7595
|
-
const filePath =
|
|
8405
|
+
const filePath = join4(dir, filename);
|
|
7596
8406
|
ensureDir(dir);
|
|
7597
|
-
|
|
8407
|
+
const ssOpts3 = {
|
|
7598
8408
|
path: filePath,
|
|
7599
|
-
type: this.format
|
|
7600
|
-
|
|
7601
|
-
|
|
8409
|
+
type: this.format
|
|
8410
|
+
};
|
|
8411
|
+
if (this.format === "jpeg")
|
|
8412
|
+
ssOpts3.quality = this.quality;
|
|
8413
|
+
await page.locator(selector).screenshot(ssOpts3);
|
|
7602
8414
|
const viewport = page.viewportSize() ?? { width: 0, height: 0 };
|
|
7603
8415
|
const pageUrl = page.url();
|
|
7604
8416
|
const timestamp = new Date().toISOString();
|
|
@@ -7793,14 +8605,14 @@ async function pushFailedRunToLogs(run, failedResults, scenarios) {
|
|
|
7793
8605
|
// src/lib/todos-connector.ts
|
|
7794
8606
|
import { Database as Database2 } from "bun:sqlite";
|
|
7795
8607
|
import { existsSync as existsSync4 } from "fs";
|
|
7796
|
-
import { join as
|
|
7797
|
-
import { homedir as
|
|
8608
|
+
import { join as join5 } from "path";
|
|
8609
|
+
import { homedir as homedir5 } from "os";
|
|
7798
8610
|
init_types();
|
|
7799
8611
|
function resolveTodosDbPath() {
|
|
7800
8612
|
const envPath = process.env["TODOS_DB_PATH"];
|
|
7801
8613
|
if (envPath)
|
|
7802
8614
|
return envPath;
|
|
7803
|
-
return
|
|
8615
|
+
return join5(homedir5(), ".todos", "todos.db");
|
|
7804
8616
|
}
|
|
7805
8617
|
function connectToTodos() {
|
|
7806
8618
|
const dbPath = resolveTodosDbPath();
|
|
@@ -7902,6 +8714,45 @@ async function notifyFailureToConversations(run, failedResults, scenarios) {
|
|
|
7902
8714
|
});
|
|
7903
8715
|
} catch {}
|
|
7904
8716
|
}
|
|
8717
|
+
async function notifyRunToConversations(run, results, options) {
|
|
8718
|
+
const baseUrl = process.env["TESTERS_CONVERSATIONS_URL"];
|
|
8719
|
+
const space = options?.spaceId ?? process.env["TESTERS_CONVERSATIONS_SPACE"];
|
|
8720
|
+
if (!baseUrl || !space)
|
|
8721
|
+
return;
|
|
8722
|
+
const passRate = run.total > 0 ? (run.passed / run.total * 100).toFixed(0) : "0";
|
|
8723
|
+
const statusIcon = run.status === "passed" ? "\u2705" : run.status === "failed" ? "\u274C" : "\u26A0\uFE0F";
|
|
8724
|
+
const durationSec = run.finishedAt && run.startedAt ? ((new Date(run.finishedAt).getTime() - new Date(run.startedAt).getTime()) / 1000).toFixed(1) : null;
|
|
8725
|
+
const lines = [
|
|
8726
|
+
`${statusIcon} **Testers run ${run.status.toUpperCase()}** \u2014 ${run.passed}/${run.total} scenarios (${passRate}% pass rate)`,
|
|
8727
|
+
``,
|
|
8728
|
+
`**URL:** ${run.url}`,
|
|
8729
|
+
`**Run ID:** \`${run.id}\``,
|
|
8730
|
+
`**Model:** ${run.model}`,
|
|
8731
|
+
durationSec ? `**Duration:** ${durationSec}s` : null
|
|
8732
|
+
].filter((l) => l !== null);
|
|
8733
|
+
if (run.status === "failed") {
|
|
8734
|
+
const failedResults = results.filter((r) => r.status === "failed" || r.status === "error");
|
|
8735
|
+
const failLines = failedResults.slice(0, 5).map((r) => {
|
|
8736
|
+
const err = r.error ? ` \u2014 ${r.error.slice(0, 100)}` : "";
|
|
8737
|
+
return ` \u274C ${r.scenarioId.slice(0, 8)}${err}`;
|
|
8738
|
+
});
|
|
8739
|
+
if (failLines.length > 0) {
|
|
8740
|
+
lines.push(``, `**Failures:**`);
|
|
8741
|
+
lines.push(...failLines);
|
|
8742
|
+
if (failedResults.length > 5)
|
|
8743
|
+
lines.push(` \u2026 and ${failedResults.length - 5} more`);
|
|
8744
|
+
}
|
|
8745
|
+
}
|
|
8746
|
+
const message = lines.join(`
|
|
8747
|
+
`);
|
|
8748
|
+
try {
|
|
8749
|
+
await fetch(`${baseUrl.replace(/\/$/, "")}/api/spaces/${encodeURIComponent(space)}/messages`, {
|
|
8750
|
+
method: "POST",
|
|
8751
|
+
headers: { "Content-Type": "application/json" },
|
|
8752
|
+
body: JSON.stringify({ content: message, from: "testers" })
|
|
8753
|
+
});
|
|
8754
|
+
} catch {}
|
|
8755
|
+
}
|
|
7905
8756
|
|
|
7906
8757
|
// src/lib/runner.ts
|
|
7907
8758
|
var eventHandler = null;
|
|
@@ -7943,10 +8794,35 @@ async function runSingleScenario(scenario, runId, options) {
|
|
|
7943
8794
|
const config = loadConfig();
|
|
7944
8795
|
if (options.selfHeal !== undefined)
|
|
7945
8796
|
config.selfHeal = options.selfHeal;
|
|
7946
|
-
|
|
7947
|
-
|
|
8797
|
+
let effectiveOptions = options;
|
|
8798
|
+
if (options.minimal) {
|
|
8799
|
+
effectiveOptions = {
|
|
8800
|
+
...options,
|
|
8801
|
+
engine: options.engine ?? "playwright"
|
|
8802
|
+
};
|
|
8803
|
+
try {
|
|
8804
|
+
const { isLightpandaAvailable: isLightpandaAvailable2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda)).catch(() => ({ isLightpandaAvailable: () => false }));
|
|
8805
|
+
if (isLightpandaAvailable2())
|
|
8806
|
+
effectiveOptions = { ...effectiveOptions, engine: "lightpanda" };
|
|
8807
|
+
} catch {}
|
|
8808
|
+
}
|
|
8809
|
+
const model = resolveModel(effectiveOptions.minimal ? "quick" : effectiveOptions.model ?? scenario.model ?? config.defaultModel);
|
|
8810
|
+
if (options.cacheMaxAgeMs && options.cacheMaxAgeMs > 0 && scenario.lastPassedAt && scenario.lastPassedUrl === options.url) {
|
|
8811
|
+
const age = Date.now() - new Date(scenario.lastPassedAt).getTime();
|
|
8812
|
+
if (age < options.cacheMaxAgeMs) {
|
|
8813
|
+
const cached = createResult({ runId, scenarioId: scenario.id, model, stepsTotal: 0 });
|
|
8814
|
+
return updateResult(cached.id, {
|
|
8815
|
+
status: "passed",
|
|
8816
|
+
reasoning: `Cache hit: passed ${Math.round(age / 1000)}s ago at ${options.url}`,
|
|
8817
|
+
stepsCompleted: 0,
|
|
8818
|
+
durationMs: 0,
|
|
8819
|
+
tokensUsed: 0
|
|
8820
|
+
});
|
|
8821
|
+
}
|
|
8822
|
+
}
|
|
8823
|
+
const client = createClientForModel(model, effectiveOptions.apiKey ?? config.anthropicApiKey);
|
|
7948
8824
|
const screenshotter = new Screenshotter({
|
|
7949
|
-
baseDir:
|
|
8825
|
+
baseDir: effectiveOptions.screenshotDir ?? config.screenshots.dir
|
|
7950
8826
|
});
|
|
7951
8827
|
const resolvedPersonaId = options.personaId ?? scenario.personaId;
|
|
7952
8828
|
const persona = resolvedPersonaId ? getPersona(resolvedPersonaId) : null;
|
|
@@ -7962,12 +8838,20 @@ async function runSingleScenario(scenario, runId, options) {
|
|
|
7962
8838
|
let browser = null;
|
|
7963
8839
|
let page = null;
|
|
7964
8840
|
try {
|
|
7965
|
-
browser = await launchBrowser({ headless: !(
|
|
8841
|
+
browser = await launchBrowser({ headless: !(effectiveOptions.headed ?? false), engine: effectiveOptions.engine });
|
|
7966
8842
|
page = await getPage(browser, {
|
|
7967
8843
|
viewport: config.browser.viewport
|
|
7968
8844
|
});
|
|
7969
8845
|
const targetUrl = scenario.targetPath ? `${options.url.replace(/\/$/, "")}${scenario.targetPath}` : options.url;
|
|
7970
8846
|
const scenarioTimeout = scenario.timeoutMs ?? options.timeout ?? config.browser.timeout ?? 60000;
|
|
8847
|
+
const consoleErrors = [];
|
|
8848
|
+
page.on("console", (msg) => {
|
|
8849
|
+
if (msg.type() === "error")
|
|
8850
|
+
consoleErrors.push(msg.text());
|
|
8851
|
+
});
|
|
8852
|
+
page.on("pageerror", (err) => {
|
|
8853
|
+
consoleErrors.push(err.message);
|
|
8854
|
+
});
|
|
7971
8855
|
await page.goto(targetUrl, { timeout: Math.min(scenarioTimeout, 30000) });
|
|
7972
8856
|
const stepStartTimes = new Map;
|
|
7973
8857
|
const agentResult = await withTimeout(runAgentLoop({
|
|
@@ -7977,15 +8861,17 @@ async function runSingleScenario(scenario, runId, options) {
|
|
|
7977
8861
|
screenshotter,
|
|
7978
8862
|
model,
|
|
7979
8863
|
runId,
|
|
7980
|
-
maxTurns: 30,
|
|
7981
|
-
a11y:
|
|
8864
|
+
maxTurns: effectiveOptions.minimal ? 10 : 30,
|
|
8865
|
+
a11y: effectiveOptions.a11y,
|
|
7982
8866
|
persona: persona ? {
|
|
7983
8867
|
name: persona.name,
|
|
7984
8868
|
role: persona.role,
|
|
7985
8869
|
description: persona.description,
|
|
7986
8870
|
instructions: persona.instructions,
|
|
7987
8871
|
traits: persona.traits,
|
|
7988
|
-
goals: persona.goals
|
|
8872
|
+
goals: persona.goals,
|
|
8873
|
+
behaviors: persona.behaviors,
|
|
8874
|
+
painPoints: persona.painPoints
|
|
7989
8875
|
} : null,
|
|
7990
8876
|
onStep: (stepEvent) => {
|
|
7991
8877
|
let stepDurationMs;
|
|
@@ -8012,7 +8898,7 @@ async function runSingleScenario(scenario, runId, options) {
|
|
|
8012
8898
|
});
|
|
8013
8899
|
}
|
|
8014
8900
|
}), scenarioTimeout, scenario.name);
|
|
8015
|
-
if (options.engine !== "lightpanda") {
|
|
8901
|
+
if (options.engine !== "lightpanda" && options.engine !== "bun") {
|
|
8016
8902
|
for (const ss of agentResult.screenshots) {
|
|
8017
8903
|
try {
|
|
8018
8904
|
createScreenshot({
|
|
@@ -8030,8 +8916,8 @@ async function runSingleScenario(scenario, runId, options) {
|
|
|
8030
8916
|
} catch {}
|
|
8031
8917
|
}
|
|
8032
8918
|
}
|
|
8033
|
-
const lightpandaNote = options.engine === "lightpanda" ? " (Running with Lightpanda \u2014 no screenshots)" : "";
|
|
8034
|
-
|
|
8919
|
+
const lightpandaNote = options.engine === "lightpanda" ? " (Running with Lightpanda \u2014 no screenshots)" : options.engine === "bun" ? " (Running with Bun.WebView \u2014 native, ~11x faster)" : "";
|
|
8920
|
+
let updatedResult = updateResult(result.id, {
|
|
8035
8921
|
status: agentResult.status,
|
|
8036
8922
|
reasoning: agentResult.reasoning ? agentResult.reasoning + lightpandaNote : lightpandaNote || undefined,
|
|
8037
8923
|
stepsCompleted: agentResult.stepsCompleted,
|
|
@@ -8039,29 +8925,53 @@ async function runSingleScenario(scenario, runId, options) {
|
|
|
8039
8925
|
tokensUsed: agentResult.tokensUsed,
|
|
8040
8926
|
costCents: estimateCost(model, agentResult.tokensUsed)
|
|
8041
8927
|
});
|
|
8928
|
+
if (agentResult.status === "failed" || agentResult.status === "error") {
|
|
8929
|
+
const failureAnalysis = analyzeFailure(null, agentResult.reasoning ?? null);
|
|
8930
|
+
if (failureAnalysis) {
|
|
8931
|
+
updatedResult = updateResult(result.id, { failureAnalysis });
|
|
8932
|
+
}
|
|
8933
|
+
}
|
|
8934
|
+
if (agentResult.status === "passed") {
|
|
8935
|
+
try {
|
|
8936
|
+
updateScenarioPassedCache(scenario.id, options.url);
|
|
8937
|
+
} catch {}
|
|
8938
|
+
}
|
|
8042
8939
|
const eventType = agentResult.status === "passed" ? "scenario:pass" : "scenario:fail";
|
|
8043
8940
|
emit({ type: eventType, scenarioId: scenario.id, scenarioName: scenario.name, resultId: result.id, runId });
|
|
8044
8941
|
return updatedResult;
|
|
8045
8942
|
} catch (error) {
|
|
8046
8943
|
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
8047
|
-
|
|
8944
|
+
let updatedResult = updateResult(result.id, {
|
|
8048
8945
|
status: "error",
|
|
8049
8946
|
error: errorMsg,
|
|
8050
8947
|
durationMs: Date.now() - new Date(result.createdAt).getTime()
|
|
8051
8948
|
});
|
|
8949
|
+
const failureAnalysis = analyzeFailure(errorMsg, null);
|
|
8950
|
+
if (failureAnalysis) {
|
|
8951
|
+
updatedResult = updateResult(result.id, { failureAnalysis });
|
|
8952
|
+
}
|
|
8052
8953
|
emit({ type: "scenario:error", scenarioId: scenario.id, scenarioName: scenario.name, error: errorMsg, runId });
|
|
8053
8954
|
return updatedResult;
|
|
8054
8955
|
} finally {
|
|
8055
8956
|
if (browser)
|
|
8056
|
-
await closeBrowser(browser,
|
|
8957
|
+
await closeBrowser(browser, effectiveOptions.engine);
|
|
8057
8958
|
}
|
|
8058
8959
|
}
|
|
8059
8960
|
async function runBatch(scenarios, options) {
|
|
8060
8961
|
const config = loadConfig();
|
|
8061
|
-
const model = resolveModel(options.model ?? config.defaultModel);
|
|
8062
|
-
const parallel = options.parallel ?? 1;
|
|
8962
|
+
const model = resolveModel(options.minimal ? "quick" : options.model ?? config.defaultModel);
|
|
8963
|
+
const parallel = options.minimal ? Math.max(5, options.parallel ?? 1) : options.parallel ?? 1;
|
|
8063
8964
|
const samples = options.samples ?? 1;
|
|
8064
8965
|
const flakinessThreshold = options.flakinessThreshold ?? 0.95;
|
|
8966
|
+
if (!options.skipBudgetCheck) {
|
|
8967
|
+
const cap = options.maxCostCents ?? config.defaultMaxCostCents;
|
|
8968
|
+
if (cap !== undefined && cap > 0) {
|
|
8969
|
+
const estimated = estimateRunCostCents(scenarios.length, model, samples);
|
|
8970
|
+
if (estimated > cap) {
|
|
8971
|
+
throw new BudgetExceededError(estimated, cap);
|
|
8972
|
+
}
|
|
8973
|
+
}
|
|
8974
|
+
}
|
|
8065
8975
|
const run = createRun({
|
|
8066
8976
|
url: options.url,
|
|
8067
8977
|
model,
|
|
@@ -8203,6 +9113,10 @@ async function runBatch(scenarios, options) {
|
|
|
8203
9113
|
createFailureTasks(finalRun, failedResults, scenarios).catch(() => {});
|
|
8204
9114
|
notifyFailureToConversations(finalRun, failedResults, scenarios).catch(() => {});
|
|
8205
9115
|
}
|
|
9116
|
+
const conversationsSpaceId = config.conversationsSpace ?? process.env["TESTERS_CONVERSATIONS_SPACE"];
|
|
9117
|
+
if (conversationsSpaceId) {
|
|
9118
|
+
notifyRunToConversations(finalRun, results, { spaceId: conversationsSpaceId }).catch(() => {});
|
|
9119
|
+
}
|
|
8206
9120
|
return { run: finalRun, results };
|
|
8207
9121
|
}
|
|
8208
9122
|
async function runByFilter(options) {
|
|
@@ -9545,7 +10459,7 @@ async function handleRequest(req) {
|
|
|
9545
10459
|
if (pathname === "/api/status" && method === "GET") {
|
|
9546
10460
|
const config = loadConfig();
|
|
9547
10461
|
getDatabase();
|
|
9548
|
-
const dbPath = process.env["TESTERS_DB_PATH"] ??
|
|
10462
|
+
const dbPath = process.env["TESTERS_DB_PATH"] ?? join6(homedir6(), ".testers", "testers.db");
|
|
9549
10463
|
const scenarios = listScenarios();
|
|
9550
10464
|
const runs = listRuns();
|
|
9551
10465
|
return jsonResponse({
|
|
@@ -9736,6 +10650,20 @@ async function handleRequest(req) {
|
|
|
9736
10650
|
const total = countScreenshots(id);
|
|
9737
10651
|
return jsonResponse({ ...result, screenshots }, 200, { "X-Total-Count": String(total) });
|
|
9738
10652
|
}
|
|
10653
|
+
const resultExplainMatch = pathname.match(/^\/api\/results\/([^/]+)\/explain$/);
|
|
10654
|
+
if (resultExplainMatch && method === "GET") {
|
|
10655
|
+
const id = resultExplainMatch[1];
|
|
10656
|
+
try {
|
|
10657
|
+
const { explainFailure: explainFailure2 } = await Promise.resolve().then(() => (init_failure_explainer(), exports_failure_explainer));
|
|
10658
|
+
const explanation = explainFailure2(id);
|
|
10659
|
+
return jsonResponse(explanation);
|
|
10660
|
+
} catch (err) {
|
|
10661
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
10662
|
+
if (msg.includes("not found"))
|
|
10663
|
+
return errorResponse(msg, 404);
|
|
10664
|
+
return errorResponse(msg, 500);
|
|
10665
|
+
}
|
|
10666
|
+
}
|
|
9739
10667
|
const screenshotFileMatch = pathname.match(/^\/api\/screenshots\/([^/]+)\/file$/);
|
|
9740
10668
|
if (screenshotFileMatch && method === "GET") {
|
|
9741
10669
|
const id = screenshotFileMatch[1];
|
|
@@ -10190,7 +11118,7 @@ async function handleRequest(req) {
|
|
|
10190
11118
|
return jsonResponse({ routes, apiRoutes, totalCovered: coverageMap.size });
|
|
10191
11119
|
}
|
|
10192
11120
|
if (!pathname.startsWith("/api")) {
|
|
10193
|
-
const dashboardDir =
|
|
11121
|
+
const dashboardDir = join6(import.meta.dir, "..", "..", "dashboard", "dist");
|
|
10194
11122
|
if (!existsSync5(dashboardDir)) {
|
|
10195
11123
|
return new Response(`<!DOCTYPE html>
|
|
10196
11124
|
<html>
|
|
@@ -10209,7 +11137,7 @@ async function handleRequest(req) {
|
|
|
10209
11137
|
}
|
|
10210
11138
|
});
|
|
10211
11139
|
}
|
|
10212
|
-
const filePath =
|
|
11140
|
+
const filePath = join6(dashboardDir, pathname === "/" ? "index.html" : pathname);
|
|
10213
11141
|
if (existsSync5(filePath)) {
|
|
10214
11142
|
const file = Bun.file(filePath);
|
|
10215
11143
|
return new Response(file, {
|
|
@@ -10219,7 +11147,7 @@ async function handleRequest(req) {
|
|
|
10219
11147
|
}
|
|
10220
11148
|
});
|
|
10221
11149
|
}
|
|
10222
|
-
const indexPath =
|
|
11150
|
+
const indexPath = join6(dashboardDir, "index.html");
|
|
10223
11151
|
if (existsSync5(indexPath)) {
|
|
10224
11152
|
const file = Bun.file(indexPath);
|
|
10225
11153
|
return new Response(file, {
|