@hasna/testers 0.0.15 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +170 -21
- package/dashboard/dist/assets/{index-BSYf1bIR.css → index-CQzkimyO.css} +1 -1
- package/dashboard/dist/index.html +2 -2
- package/dist/cli/index.js +2043 -818
- package/dist/db/database.d.ts.map +1 -1
- package/dist/db/personas.d.ts +8 -0
- package/dist/db/personas.d.ts.map +1 -1
- package/dist/db/results.d.ts +2 -1
- package/dist/db/results.d.ts.map +1 -1
- package/dist/db/scenarios.d.ts +1 -0
- package/dist/db/scenarios.d.ts.map +1 -1
- package/dist/db/seed-personas.d.ts +15 -0
- package/dist/db/seed-personas.d.ts.map +1 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2220 -1441
- package/dist/lib/ai-client.d.ts +7 -8
- package/dist/lib/ai-client.d.ts.map +1 -1
- package/dist/lib/browser-bun.d.ts +153 -0
- package/dist/lib/browser-bun.d.ts.map +1 -0
- package/dist/lib/browser.d.ts +1 -1
- package/dist/lib/browser.d.ts.map +1 -1
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/costs.d.ts +5 -0
- package/dist/lib/costs.d.ts.map +1 -1
- package/dist/lib/failure-analyzer.d.ts +7 -0
- package/dist/lib/failure-analyzer.d.ts.map +1 -0
- package/dist/lib/failure-explainer.d.ts +17 -0
- package/dist/lib/failure-explainer.d.ts.map +1 -0
- package/dist/lib/failure-pipeline.d.ts +11 -0
- package/dist/lib/failure-pipeline.d.ts.map +1 -1
- package/dist/lib/hybrid-runner.d.ts +100 -0
- package/dist/lib/hybrid-runner.d.ts.map +1 -0
- package/dist/lib/judge.d.ts +1 -1
- package/dist/lib/judge.d.ts.map +1 -1
- package/dist/lib/reporter.d.ts +2 -0
- package/dist/lib/reporter.d.ts.map +1 -1
- package/dist/lib/runner.d.ts +5 -1
- package/dist/lib/runner.d.ts.map +1 -1
- package/dist/lib/screenshotter.d.ts.map +1 -1
- package/dist/mcp/index.js +8580 -6403
- package/dist/server/index.js +1082 -154
- package/dist/types/index.d.ts +60 -2
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +4 -4
- package/dist/cli/index.d.ts +0 -3
- package/dist/cli/index.d.ts.map +0 -1
- package/dist/mcp/index.d.ts +0 -3
- package/dist/mcp/index.d.ts.map +0 -1
- /package/dashboard/dist/assets/{index-Bdn52878.js → index-D52SWwDa.js} +0 -0
package/dist/index.js
CHANGED
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
// @bun
|
|
2
2
|
var __defProp = Object.defineProperty;
|
|
3
|
+
var __returnValue = (v) => v;
|
|
4
|
+
function __exportSetter(name, newValue) {
|
|
5
|
+
this[name] = __returnValue.bind(null, newValue);
|
|
6
|
+
}
|
|
3
7
|
var __export = (target, all) => {
|
|
4
8
|
for (var name in all)
|
|
5
9
|
__defProp(target, name, {
|
|
6
10
|
get: all[name],
|
|
7
11
|
enumerable: true,
|
|
8
12
|
configurable: true,
|
|
9
|
-
set: (
|
|
13
|
+
set: __exportSetter.bind(all, name)
|
|
10
14
|
});
|
|
11
15
|
};
|
|
12
16
|
var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
|
|
@@ -56,9 +60,12 @@ function scenarioFromRow(row) {
|
|
|
56
60
|
assertions: JSON.parse(row.assertions || "[]"),
|
|
57
61
|
personaId: row.persona_id ?? null,
|
|
58
62
|
scenarioType: row.scenario_type ?? "browser",
|
|
63
|
+
requiredRole: row.required_role ?? null,
|
|
59
64
|
version: row.version,
|
|
60
65
|
createdAt: row.created_at,
|
|
61
|
-
updatedAt: row.updated_at
|
|
66
|
+
updatedAt: row.updated_at,
|
|
67
|
+
lastPassedAt: row.last_passed_at ?? null,
|
|
68
|
+
lastPassedUrl: row.last_passed_url ?? null
|
|
62
69
|
};
|
|
63
70
|
}
|
|
64
71
|
function runFromRow(row) {
|
|
@@ -98,7 +105,8 @@ function resultFromRow(row) {
|
|
|
98
105
|
metadata: row.metadata ? JSON.parse(row.metadata) : null,
|
|
99
106
|
createdAt: row.created_at,
|
|
100
107
|
personaId: row.persona_id ?? null,
|
|
101
|
-
personaName: row.persona_name ?? null
|
|
108
|
+
personaName: row.persona_name ?? null,
|
|
109
|
+
failureAnalysis: row.failure_analysis ? JSON.parse(row.failure_analysis) : null
|
|
102
110
|
};
|
|
103
111
|
}
|
|
104
112
|
function screenshotFromRow(row) {
|
|
@@ -148,6 +156,7 @@ function flowFromRow(row) {
|
|
|
148
156
|
};
|
|
149
157
|
}
|
|
150
158
|
function personaFromRow(row) {
|
|
159
|
+
const hasAuth = row.auth_email && row.auth_password;
|
|
151
160
|
return {
|
|
152
161
|
id: row.id,
|
|
153
162
|
shortId: row.short_id,
|
|
@@ -156,21 +165,33 @@ function personaFromRow(row) {
|
|
|
156
165
|
description: row.description,
|
|
157
166
|
role: row.role,
|
|
158
167
|
instructions: row.instructions,
|
|
159
|
-
traits: JSON.parse(row.traits),
|
|
160
|
-
goals: JSON.parse(row.goals),
|
|
168
|
+
traits: JSON.parse(row.traits || "[]"),
|
|
169
|
+
goals: JSON.parse(row.goals || "[]"),
|
|
170
|
+
behaviors: JSON.parse(row.behaviors || "[]"),
|
|
171
|
+
expertiseLevel: row.expertise_level || "intermediate",
|
|
172
|
+
demographics: JSON.parse(row.demographics || "{}"),
|
|
173
|
+
painPoints: JSON.parse(row.pain_points || "[]"),
|
|
161
174
|
metadata: row.metadata ? JSON.parse(row.metadata) : null,
|
|
162
175
|
enabled: row.enabled === 1,
|
|
163
176
|
version: row.version,
|
|
164
177
|
createdAt: row.created_at,
|
|
165
|
-
updatedAt: row.updated_at
|
|
178
|
+
updatedAt: row.updated_at,
|
|
179
|
+
auth: hasAuth ? {
|
|
180
|
+
email: row.auth_email,
|
|
181
|
+
password: row.auth_password,
|
|
182
|
+
loginPath: row.auth_login_path ?? "/login",
|
|
183
|
+
cookies: row.auth_cookies ? JSON.parse(row.auth_cookies) : null
|
|
184
|
+
} : null
|
|
166
185
|
};
|
|
167
186
|
}
|
|
168
|
-
var MODEL_MAP, ScenarioNotFoundError, RunNotFoundError, ResultNotFoundError, VersionConflictError, BrowserError, AIClientError, TodosConnectionError, ProjectNotFoundError, AgentNotFoundError, ScheduleNotFoundError, FlowNotFoundError, DependencyCycleError;
|
|
187
|
+
var MODEL_MAP, ScenarioNotFoundError, RunNotFoundError, ResultNotFoundError, VersionConflictError, BrowserError, AIClientError, TodosConnectionError, ProjectNotFoundError, AgentNotFoundError, ScheduleNotFoundError, BudgetExceededError, FlowNotFoundError, DependencyCycleError;
|
|
169
188
|
var init_types = __esm(() => {
|
|
170
189
|
MODEL_MAP = {
|
|
171
190
|
quick: "claude-haiku-4-5-20251001",
|
|
172
191
|
thorough: "claude-sonnet-4-6-20260311",
|
|
173
|
-
deep: "claude-opus-4-6-20260311"
|
|
192
|
+
deep: "claude-opus-4-6-20260311",
|
|
193
|
+
"cerebras-fast": "llama-3.1-8b",
|
|
194
|
+
"cerebras-smart": "llama-3.3-70b"
|
|
174
195
|
};
|
|
175
196
|
ScenarioNotFoundError = class ScenarioNotFoundError extends Error {
|
|
176
197
|
constructor(id) {
|
|
@@ -232,6 +253,12 @@ var init_types = __esm(() => {
|
|
|
232
253
|
this.name = "ScheduleNotFoundError";
|
|
233
254
|
}
|
|
234
255
|
};
|
|
256
|
+
BudgetExceededError = class BudgetExceededError extends Error {
|
|
257
|
+
constructor(estimatedCents, capCents) {
|
|
258
|
+
super(`Estimated run cost ($${(estimatedCents / 100).toFixed(2)}) exceeds budget cap ($${(capCents / 100).toFixed(2)}). Pass skipBudgetCheck: true to override.`);
|
|
259
|
+
this.name = "BudgetExceededError";
|
|
260
|
+
}
|
|
261
|
+
};
|
|
235
262
|
FlowNotFoundError = class FlowNotFoundError extends Error {
|
|
236
263
|
constructor(id) {
|
|
237
264
|
super(`Flow not found: ${id}`);
|
|
@@ -673,6 +700,26 @@ CREATE TABLE IF NOT EXISTS golden_check_results (
|
|
|
673
700
|
CREATE INDEX IF NOT EXISTS idx_golden_project ON golden_answers(project_id);
|
|
674
701
|
CREATE INDEX IF NOT EXISTS idx_golden_enabled ON golden_answers(enabled);
|
|
675
702
|
CREATE INDEX IF NOT EXISTS idx_golden_results_golden ON golden_check_results(golden_id);
|
|
703
|
+
`,
|
|
704
|
+
`
|
|
705
|
+
ALTER TABLE results ADD COLUMN failure_analysis TEXT;
|
|
706
|
+
`,
|
|
707
|
+
`
|
|
708
|
+
ALTER TABLE personas ADD COLUMN behaviors TEXT DEFAULT '[]';
|
|
709
|
+
ALTER TABLE personas ADD COLUMN expertise_level TEXT DEFAULT 'intermediate';
|
|
710
|
+
ALTER TABLE personas ADD COLUMN demographics TEXT DEFAULT '{}';
|
|
711
|
+
ALTER TABLE personas ADD COLUMN pain_points TEXT DEFAULT '[]';
|
|
712
|
+
`,
|
|
713
|
+
`
|
|
714
|
+
ALTER TABLE scenarios ADD COLUMN last_passed_at TEXT;
|
|
715
|
+
ALTER TABLE scenarios ADD COLUMN last_passed_url TEXT;
|
|
716
|
+
`,
|
|
717
|
+
`
|
|
718
|
+
ALTER TABLE personas ADD COLUMN auth_email TEXT;
|
|
719
|
+
ALTER TABLE personas ADD COLUMN auth_password TEXT;
|
|
720
|
+
ALTER TABLE personas ADD COLUMN auth_login_path TEXT DEFAULT '/login';
|
|
721
|
+
ALTER TABLE personas ADD COLUMN auth_cookies TEXT;
|
|
722
|
+
ALTER TABLE scenarios ADD COLUMN required_role TEXT;
|
|
676
723
|
`
|
|
677
724
|
];
|
|
678
725
|
});
|
|
@@ -1026,7 +1073,8 @@ function loadConfig() {
|
|
|
1026
1073
|
todosDbPath: fileConfig.todosDbPath,
|
|
1027
1074
|
judgeModel: fileConfig.judgeModel,
|
|
1028
1075
|
judgeProvider: fileConfig.judgeProvider,
|
|
1029
|
-
selfHeal: fileConfig.selfHeal ?? false
|
|
1076
|
+
selfHeal: fileConfig.selfHeal ?? false,
|
|
1077
|
+
conversationsSpace: fileConfig.conversationsSpace
|
|
1030
1078
|
};
|
|
1031
1079
|
const envModel = process.env["TESTERS_MODEL"];
|
|
1032
1080
|
if (envModel) {
|
|
@@ -1217,6 +1265,412 @@ var init_browser_lightpanda = __esm(() => {
|
|
|
1217
1265
|
init_types();
|
|
1218
1266
|
});
|
|
1219
1267
|
|
|
1268
|
+
// src/lib/browser-bun.ts
|
|
1269
|
+
var exports_browser_bun = {};
|
|
1270
|
+
__export(exports_browser_bun, {
|
|
1271
|
+
isBunWebViewAvailable: () => isBunWebViewAvailable,
|
|
1272
|
+
BunWebViewSession: () => BunWebViewSession
|
|
1273
|
+
});
|
|
1274
|
+
import { join as join3 } from "path";
|
|
1275
|
+
import { mkdirSync as mkdirSync2 } from "fs";
|
|
1276
|
+
import { homedir as homedir3 } from "os";
|
|
1277
|
+
function isBunWebViewAvailable() {
|
|
1278
|
+
return typeof globalThis.Bun !== "undefined" && typeof globalThis.Bun.WebView !== "undefined";
|
|
1279
|
+
}
|
|
1280
|
+
function getProfileDir(profileName) {
|
|
1281
|
+
const base = process.env["TESTERS_BROWSER_DATA_DIR"] ?? join3(homedir3(), ".testers", "browser");
|
|
1282
|
+
const dir = join3(base, "profiles", profileName);
|
|
1283
|
+
mkdirSync2(dir, { recursive: true });
|
|
1284
|
+
return dir;
|
|
1285
|
+
}
|
|
1286
|
+
var BunWebViewSession;
|
|
1287
|
+
var init_browser_bun = __esm(() => {
|
|
1288
|
+
BunWebViewSession = class BunWebViewSession {
|
|
1289
|
+
view;
|
|
1290
|
+
_sessionId;
|
|
1291
|
+
_eventListeners = new Map;
|
|
1292
|
+
constructor(opts = {}) {
|
|
1293
|
+
if (!isBunWebViewAvailable()) {
|
|
1294
|
+
throw new Error("Bun.WebView is not available. Install Bun canary: bun upgrade --canary");
|
|
1295
|
+
}
|
|
1296
|
+
const BunWebView = globalThis.Bun.WebView;
|
|
1297
|
+
const constructorOpts = {
|
|
1298
|
+
width: opts.width ?? 1280,
|
|
1299
|
+
height: opts.height ?? 720
|
|
1300
|
+
};
|
|
1301
|
+
if (opts.profile) {
|
|
1302
|
+
constructorOpts.dataStore = { directory: getProfileDir(opts.profile) };
|
|
1303
|
+
} else {
|
|
1304
|
+
constructorOpts.dataStore = "ephemeral";
|
|
1305
|
+
}
|
|
1306
|
+
if (opts.onConsole) {
|
|
1307
|
+
constructorOpts.console = opts.onConsole;
|
|
1308
|
+
}
|
|
1309
|
+
this.view = new BunWebView(constructorOpts);
|
|
1310
|
+
this.view.onNavigated = (url) => {
|
|
1311
|
+
this._emit("navigated", url);
|
|
1312
|
+
};
|
|
1313
|
+
this.view.onNavigationFailed = (error) => {
|
|
1314
|
+
this._emit("navigationfailed", error);
|
|
1315
|
+
};
|
|
1316
|
+
}
|
|
1317
|
+
async goto(url, opts) {
|
|
1318
|
+
await this.view.navigate(url);
|
|
1319
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
1320
|
+
}
|
|
1321
|
+
async goBack() {
|
|
1322
|
+
await this.view.goBack();
|
|
1323
|
+
}
|
|
1324
|
+
async goForward() {
|
|
1325
|
+
await this.view.goForward();
|
|
1326
|
+
}
|
|
1327
|
+
async reload() {
|
|
1328
|
+
await this.view.reload();
|
|
1329
|
+
}
|
|
1330
|
+
async evaluate(fnOrExpr, ...args) {
|
|
1331
|
+
let expr;
|
|
1332
|
+
if (typeof fnOrExpr === "function") {
|
|
1333
|
+
const serializedArgs = args.map((a) => JSON.stringify(a)).join(", ");
|
|
1334
|
+
expr = `(${fnOrExpr.toString()})(${serializedArgs})`;
|
|
1335
|
+
} else {
|
|
1336
|
+
expr = fnOrExpr;
|
|
1337
|
+
}
|
|
1338
|
+
return this.view.evaluate(expr);
|
|
1339
|
+
}
|
|
1340
|
+
async screenshot(opts) {
|
|
1341
|
+
const uint8 = await this.view.screenshot();
|
|
1342
|
+
return Buffer.from(uint8);
|
|
1343
|
+
}
|
|
1344
|
+
async click(selector, opts) {
|
|
1345
|
+
await this.view.click(selector, opts ? { button: opts.button } : undefined);
|
|
1346
|
+
}
|
|
1347
|
+
async type(selector, text, opts) {
|
|
1348
|
+
try {
|
|
1349
|
+
await this.view.click(selector);
|
|
1350
|
+
} catch {}
|
|
1351
|
+
await this.view.type(text);
|
|
1352
|
+
}
|
|
1353
|
+
async fill(selector, value) {
|
|
1354
|
+
await this.view.evaluate(`
|
|
1355
|
+
(() => {
|
|
1356
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
1357
|
+
if (el) { el.value = ''; el.dispatchEvent(new Event('input')); }
|
|
1358
|
+
})()
|
|
1359
|
+
`);
|
|
1360
|
+
await this.type(selector, value);
|
|
1361
|
+
}
|
|
1362
|
+
async press(key, opts) {
|
|
1363
|
+
await this.view.press(key, opts);
|
|
1364
|
+
}
|
|
1365
|
+
async scroll(direction, amount) {
|
|
1366
|
+
const dx = direction === "left" ? -amount : direction === "right" ? amount : 0;
|
|
1367
|
+
const dy = direction === "up" ? -amount : direction === "down" ? amount : 0;
|
|
1368
|
+
await this.view.scroll(dx, dy);
|
|
1369
|
+
}
|
|
1370
|
+
async scrollIntoView(selector) {
|
|
1371
|
+
await this.view.scrollTo(selector);
|
|
1372
|
+
}
|
|
1373
|
+
async hover(selector) {
|
|
1374
|
+
try {
|
|
1375
|
+
await this.view.scrollTo(selector);
|
|
1376
|
+
} catch {}
|
|
1377
|
+
}
|
|
1378
|
+
async resize(width, height) {
|
|
1379
|
+
await this.view.resize(width, height);
|
|
1380
|
+
}
|
|
1381
|
+
async $(selector) {
|
|
1382
|
+
const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
|
|
1383
|
+
if (!exists)
|
|
1384
|
+
return null;
|
|
1385
|
+
return {
|
|
1386
|
+
textContent: async () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`)
|
|
1387
|
+
};
|
|
1388
|
+
}
|
|
1389
|
+
async $$(selector) {
|
|
1390
|
+
const count = await this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)}).length`);
|
|
1391
|
+
return Array.from({ length: count }, (_, i) => ({
|
|
1392
|
+
textContent: async () => this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)})[${i}]?.textContent ?? null`)
|
|
1393
|
+
}));
|
|
1394
|
+
}
|
|
1395
|
+
async inputValue(selector) {
|
|
1396
|
+
return this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.value ?? ''`);
|
|
1397
|
+
}
|
|
1398
|
+
async isChecked(selector) {
|
|
1399
|
+
return this.view.evaluate(`!!(document.querySelector(${JSON.stringify(selector)})?.checked)`);
|
|
1400
|
+
}
|
|
1401
|
+
async isVisible(selector) {
|
|
1402
|
+
return this.view.evaluate(`
|
|
1403
|
+
(() => {
|
|
1404
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
1405
|
+
if (!el) return false;
|
|
1406
|
+
const style = window.getComputedStyle(el);
|
|
1407
|
+
return style.display !== 'none' && style.visibility !== 'hidden' && el.offsetWidth > 0;
|
|
1408
|
+
})()
|
|
1409
|
+
`);
|
|
1410
|
+
}
|
|
1411
|
+
async isEnabled(selector) {
|
|
1412
|
+
return this.view.evaluate(`!(document.querySelector(${JSON.stringify(selector)})?.disabled)`);
|
|
1413
|
+
}
|
|
1414
|
+
async selectOption(selector, value) {
|
|
1415
|
+
await this.view.evaluate(`
|
|
1416
|
+
(() => {
|
|
1417
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
1418
|
+
if (el) {
|
|
1419
|
+
el.value = ${JSON.stringify(value)};
|
|
1420
|
+
el.dispatchEvent(new Event('change'));
|
|
1421
|
+
}
|
|
1422
|
+
})()
|
|
1423
|
+
`);
|
|
1424
|
+
return [value];
|
|
1425
|
+
}
|
|
1426
|
+
async check(selector) {
|
|
1427
|
+
await this.view.evaluate(`
|
|
1428
|
+
(() => {
|
|
1429
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
1430
|
+
if (el && !el.checked) { el.checked = true; el.dispatchEvent(new Event('change')); }
|
|
1431
|
+
})()
|
|
1432
|
+
`);
|
|
1433
|
+
}
|
|
1434
|
+
async uncheck(selector) {
|
|
1435
|
+
await this.view.evaluate(`
|
|
1436
|
+
(() => {
|
|
1437
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
1438
|
+
if (el && el.checked) { el.checked = false; el.dispatchEvent(new Event('change')); }
|
|
1439
|
+
})()
|
|
1440
|
+
`);
|
|
1441
|
+
}
|
|
1442
|
+
async setInputFiles(selector, files) {
|
|
1443
|
+
throw new Error("File upload not supported in Bun.WebView engine. Use engine: 'playwright' instead.");
|
|
1444
|
+
}
|
|
1445
|
+
getByRole(role, opts) {
|
|
1446
|
+
const name = opts?.name?.toString() ?? "";
|
|
1447
|
+
const selector = name ? `[role="${role}"][aria-label*="${name}"], ${role}[aria-label*="${name}"]` : `[role="${role}"], ${role}`;
|
|
1448
|
+
return {
|
|
1449
|
+
click: (clickOpts) => this.click(selector, clickOpts),
|
|
1450
|
+
fill: (value) => this.fill(selector, value),
|
|
1451
|
+
check: () => this.check(selector),
|
|
1452
|
+
uncheck: () => this.uncheck(selector),
|
|
1453
|
+
isVisible: () => this.isVisible(selector),
|
|
1454
|
+
textContent: () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`),
|
|
1455
|
+
inputValue: () => this.inputValue(selector),
|
|
1456
|
+
first: () => ({
|
|
1457
|
+
click: (clickOpts) => this.click(selector, clickOpts),
|
|
1458
|
+
fill: (value) => this.fill(selector, value),
|
|
1459
|
+
textContent: () => this.view.evaluate(`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`),
|
|
1460
|
+
isVisible: () => this.isVisible(selector),
|
|
1461
|
+
hover: () => this.hover(selector),
|
|
1462
|
+
boundingBox: async () => null,
|
|
1463
|
+
scrollIntoViewIfNeeded: () => this.scrollIntoView(selector),
|
|
1464
|
+
evaluate: (fn) => this.view.evaluate(`(${fn.toString()})(document.querySelector(${JSON.stringify(selector)}))`),
|
|
1465
|
+
waitFor: (opts2) => {
|
|
1466
|
+
return new Promise((resolve, reject) => {
|
|
1467
|
+
const timeout = opts2?.timeout ?? 1e4;
|
|
1468
|
+
const start = Date.now();
|
|
1469
|
+
const check = async () => {
|
|
1470
|
+
const visible = await this.isVisible(selector);
|
|
1471
|
+
if (visible)
|
|
1472
|
+
return resolve();
|
|
1473
|
+
if (Date.now() - start > timeout)
|
|
1474
|
+
return reject(new Error(`Timeout waiting for ${selector}`));
|
|
1475
|
+
setTimeout(check, 100);
|
|
1476
|
+
};
|
|
1477
|
+
check();
|
|
1478
|
+
});
|
|
1479
|
+
}
|
|
1480
|
+
}),
|
|
1481
|
+
count: async () => {
|
|
1482
|
+
const count = await this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)}).length`);
|
|
1483
|
+
return count;
|
|
1484
|
+
},
|
|
1485
|
+
nth: (n) => ({
|
|
1486
|
+
click: (clickOpts) => this.click(selector, clickOpts),
|
|
1487
|
+
textContent: () => this.view.evaluate(`document.querySelectorAll(${JSON.stringify(selector)})[${n}]?.textContent ?? null`),
|
|
1488
|
+
isVisible: () => this.isVisible(selector)
|
|
1489
|
+
})
|
|
1490
|
+
};
|
|
1491
|
+
}
|
|
1492
|
+
getByText(text, opts) {
|
|
1493
|
+
const selector = opts?.exact ? `*:is(button, a, span, div, p, h1, h2, h3, h4, label)` : "*";
|
|
1494
|
+
return {
|
|
1495
|
+
first: () => ({
|
|
1496
|
+
click: async (clickOpts) => {
|
|
1497
|
+
await this.view.evaluate(`
|
|
1498
|
+
(() => {
|
|
1499
|
+
const text = ${JSON.stringify(text)};
|
|
1500
|
+
const all = document.querySelectorAll('*');
|
|
1501
|
+
for (const el of all) {
|
|
1502
|
+
if (el.children.length === 0 && el.textContent?.trim() === text) {
|
|
1503
|
+
el.click(); return;
|
|
1504
|
+
}
|
|
1505
|
+
}
|
|
1506
|
+
for (const el of all) {
|
|
1507
|
+
if (el.textContent?.includes(text)) { el.click(); return; }
|
|
1508
|
+
}
|
|
1509
|
+
})()
|
|
1510
|
+
`);
|
|
1511
|
+
},
|
|
1512
|
+
waitFor: (waitOpts) => {
|
|
1513
|
+
const timeout = waitOpts?.timeout ?? 1e4;
|
|
1514
|
+
return new Promise((resolve, reject) => {
|
|
1515
|
+
const start = Date.now();
|
|
1516
|
+
const check = async () => {
|
|
1517
|
+
const found = await this.view.evaluate(`document.body?.textContent?.includes(${JSON.stringify(text)})`);
|
|
1518
|
+
if (found)
|
|
1519
|
+
return resolve();
|
|
1520
|
+
if (Date.now() - start > timeout)
|
|
1521
|
+
return reject(new Error(`Timeout: text "${text}" not found`));
|
|
1522
|
+
setTimeout(check, 100);
|
|
1523
|
+
};
|
|
1524
|
+
check();
|
|
1525
|
+
});
|
|
1526
|
+
}
|
|
1527
|
+
})
|
|
1528
|
+
};
|
|
1529
|
+
}
|
|
1530
|
+
locator(selector) {
|
|
1531
|
+
return {
|
|
1532
|
+
click: (opts) => this.click(selector, opts),
|
|
1533
|
+
fill: (value) => this.fill(selector, value),
|
|
1534
|
+
scrollIntoViewIfNeeded: () => this.scrollIntoView(selector),
|
|
1535
|
+
first: () => this.getByRole("*").first(),
|
|
1536
|
+
evaluate: (fn) => this.view.evaluate(`(${fn.toString()})(document.querySelector(${JSON.stringify(selector)}))`),
|
|
1537
|
+
waitFor: (opts) => {
|
|
1538
|
+
const timeout = opts?.timeout ?? 1e4;
|
|
1539
|
+
return new Promise((resolve, reject) => {
|
|
1540
|
+
const start = Date.now();
|
|
1541
|
+
const check = async () => {
|
|
1542
|
+
const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
|
|
1543
|
+
if (exists)
|
|
1544
|
+
return resolve();
|
|
1545
|
+
if (Date.now() - start > timeout)
|
|
1546
|
+
return reject(new Error(`Timeout: ${selector}`));
|
|
1547
|
+
setTimeout(check, 100);
|
|
1548
|
+
};
|
|
1549
|
+
check();
|
|
1550
|
+
});
|
|
1551
|
+
}
|
|
1552
|
+
};
|
|
1553
|
+
}
|
|
1554
|
+
url() {
|
|
1555
|
+
return this.view.url;
|
|
1556
|
+
}
|
|
1557
|
+
async title() {
|
|
1558
|
+
return this.view.title || await this.evaluate("document.title");
|
|
1559
|
+
}
|
|
1560
|
+
viewportSize() {
|
|
1561
|
+
return { width: 1280, height: 720 };
|
|
1562
|
+
}
|
|
1563
|
+
async waitForLoadState(state, opts) {
|
|
1564
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
1565
|
+
}
|
|
1566
|
+
async waitForURL(pattern, opts) {
|
|
1567
|
+
const timeout = opts?.timeout ?? 30000;
|
|
1568
|
+
const start = Date.now();
|
|
1569
|
+
while (Date.now() - start < timeout) {
|
|
1570
|
+
const url = this.view.url;
|
|
1571
|
+
const matches = pattern instanceof RegExp ? pattern.test(url) : url.includes(pattern);
|
|
1572
|
+
if (matches)
|
|
1573
|
+
return;
|
|
1574
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
1575
|
+
}
|
|
1576
|
+
throw new Error(`Timeout waiting for URL to match ${pattern}`);
|
|
1577
|
+
}
|
|
1578
|
+
async waitForSelector(selector, opts) {
|
|
1579
|
+
const timeout = opts?.timeout ?? 1e4;
|
|
1580
|
+
const start = Date.now();
|
|
1581
|
+
while (Date.now() - start < timeout) {
|
|
1582
|
+
const exists = await this.view.evaluate(`!!document.querySelector(${JSON.stringify(selector)})`);
|
|
1583
|
+
if (exists)
|
|
1584
|
+
return;
|
|
1585
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
1586
|
+
}
|
|
1587
|
+
throw new Error(`Timeout waiting for ${selector}`);
|
|
1588
|
+
}
|
|
1589
|
+
async setContent(html) {
|
|
1590
|
+
await this.view.navigate(`data:text/html,${encodeURIComponent(html)}`);
|
|
1591
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
1592
|
+
}
|
|
1593
|
+
async content() {
|
|
1594
|
+
return this.view.evaluate("document.documentElement.outerHTML");
|
|
1595
|
+
}
|
|
1596
|
+
async addInitScript(script) {
|
|
1597
|
+
const expr = typeof script === "function" ? `(${script.toString()})()` : script;
|
|
1598
|
+
await this.view.evaluate(expr);
|
|
1599
|
+
}
|
|
1600
|
+
keyboard = {
|
|
1601
|
+
press: (key) => this.view.press(key)
|
|
1602
|
+
};
|
|
1603
|
+
context() {
|
|
1604
|
+
return {
|
|
1605
|
+
close: async () => {
|
|
1606
|
+
await this.close();
|
|
1607
|
+
},
|
|
1608
|
+
newPage: async () => {
|
|
1609
|
+
throw new Error("Multi-tab not supported in Bun.WebView. Use engine: 'playwright'");
|
|
1610
|
+
},
|
|
1611
|
+
cookies: async () => [],
|
|
1612
|
+
addCookies: async (_) => {},
|
|
1613
|
+
clearCookies: async () => {},
|
|
1614
|
+
newCDPSession: async () => {
|
|
1615
|
+
throw new Error("CDP session via context not available in Bun.WebView. Use view.cdp() when shipped.");
|
|
1616
|
+
},
|
|
1617
|
+
route: async (_pattern, _handler) => {
|
|
1618
|
+
throw new Error("Network interception not supported in Bun.WebView. Use engine: 'cdp' or 'playwright'.");
|
|
1619
|
+
},
|
|
1620
|
+
unrouteAll: async () => {},
|
|
1621
|
+
pages: () => [],
|
|
1622
|
+
addInitScript: async (script) => {
|
|
1623
|
+
await this.addInitScript(script);
|
|
1624
|
+
}
|
|
1625
|
+
};
|
|
1626
|
+
}
|
|
1627
|
+
on(event, handler) {
|
|
1628
|
+
if (!this._eventListeners.has(event))
|
|
1629
|
+
this._eventListeners.set(event, []);
|
|
1630
|
+
this._eventListeners.get(event).push(handler);
|
|
1631
|
+
return this;
|
|
1632
|
+
}
|
|
1633
|
+
off(event, handler) {
|
|
1634
|
+
const listeners = this._eventListeners.get(event) ?? [];
|
|
1635
|
+
this._eventListeners.set(event, listeners.filter((l) => l !== handler));
|
|
1636
|
+
return this;
|
|
1637
|
+
}
|
|
1638
|
+
_emit(event, ...args) {
|
|
1639
|
+
for (const handler of this._eventListeners.get(event) ?? []) {
|
|
1640
|
+
try {
|
|
1641
|
+
handler(...args);
|
|
1642
|
+
} catch {}
|
|
1643
|
+
}
|
|
1644
|
+
}
|
|
1645
|
+
async pdf(_opts) {
|
|
1646
|
+
throw new Error("PDF generation not supported in Bun.WebView. Use engine: 'playwright'.");
|
|
1647
|
+
}
|
|
1648
|
+
coverage = {
|
|
1649
|
+
startJSCoverage: async () => {},
|
|
1650
|
+
stopJSCoverage: async () => [],
|
|
1651
|
+
startCSSCoverage: async () => {},
|
|
1652
|
+
stopCSSCoverage: async () => []
|
|
1653
|
+
};
|
|
1654
|
+
setSessionId(id) {
|
|
1655
|
+
this._sessionId = id;
|
|
1656
|
+
}
|
|
1657
|
+
getSessionId() {
|
|
1658
|
+
return this._sessionId;
|
|
1659
|
+
}
|
|
1660
|
+
getNativeView() {
|
|
1661
|
+
return this.view;
|
|
1662
|
+
}
|
|
1663
|
+
async close() {
|
|
1664
|
+
try {
|
|
1665
|
+
await this.view.close();
|
|
1666
|
+
} catch {}
|
|
1667
|
+
}
|
|
1668
|
+
[Symbol.asyncDispose]() {
|
|
1669
|
+
return this.close();
|
|
1670
|
+
}
|
|
1671
|
+
};
|
|
1672
|
+
});
|
|
1673
|
+
|
|
1220
1674
|
// src/lib/browser.ts
|
|
1221
1675
|
var exports_browser = {};
|
|
1222
1676
|
__export(exports_browser, {
|
|
@@ -1238,6 +1692,22 @@ async function launchBrowser(options) {
|
|
|
1238
1692
|
}
|
|
1239
1693
|
return launchLightpanda2({ viewport: options?.viewport });
|
|
1240
1694
|
}
|
|
1695
|
+
if (engine === "bun") {
|
|
1696
|
+
const { isBunWebViewAvailable: isBunWebViewAvailable2, BunWebViewSession: BunWebViewSession2 } = await Promise.resolve().then(() => (init_browser_bun(), exports_browser_bun));
|
|
1697
|
+
if (!isBunWebViewAvailable2()) {
|
|
1698
|
+
throw new BrowserError("Bun.WebView not available. Upgrade to Bun canary: bun upgrade --canary");
|
|
1699
|
+
}
|
|
1700
|
+
const session = new BunWebViewSession2({
|
|
1701
|
+
width: options?.viewport?.width ?? 1280,
|
|
1702
|
+
height: options?.viewport?.height ?? 720
|
|
1703
|
+
});
|
|
1704
|
+
return {
|
|
1705
|
+
newContext: async () => ({ newPage: async () => session, close: async () => {} }),
|
|
1706
|
+
close: async () => session.close(),
|
|
1707
|
+
contexts: () => [],
|
|
1708
|
+
_bunSession: session
|
|
1709
|
+
};
|
|
1710
|
+
}
|
|
1241
1711
|
const headless = options?.headless ?? true;
|
|
1242
1712
|
const viewport = options?.viewport ?? DEFAULT_VIEWPORT;
|
|
1243
1713
|
try {
|
|
@@ -1259,6 +1729,12 @@ async function getPage(browser, options) {
|
|
|
1259
1729
|
const { getLightpandaPage: getLightpandaPage2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda));
|
|
1260
1730
|
return getLightpandaPage2(browser, options);
|
|
1261
1731
|
}
|
|
1732
|
+
if (engine === "bun") {
|
|
1733
|
+
const bunSession = browser._bunSession;
|
|
1734
|
+
if (bunSession)
|
|
1735
|
+
return bunSession;
|
|
1736
|
+
throw new BrowserError("Bun.WebView session not found on browser instance");
|
|
1737
|
+
}
|
|
1262
1738
|
const viewport = options?.viewport ?? DEFAULT_VIEWPORT;
|
|
1263
1739
|
try {
|
|
1264
1740
|
const context = await browser.newContext({
|
|
@@ -1278,6 +1754,12 @@ async function closeBrowser(browser, engine) {
|
|
|
1278
1754
|
const { closeLightpanda: closeLightpanda2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda));
|
|
1279
1755
|
return closeLightpanda2(browser);
|
|
1280
1756
|
}
|
|
1757
|
+
if (engine === "bun") {
|
|
1758
|
+
const bunSession = browser._bunSession;
|
|
1759
|
+
if (bunSession)
|
|
1760
|
+
await bunSession.close();
|
|
1761
|
+
return;
|
|
1762
|
+
}
|
|
1281
1763
|
try {
|
|
1282
1764
|
await browser.close();
|
|
1283
1765
|
} catch (error) {
|
|
@@ -1347,6 +1829,9 @@ async function launchBrowserEngine(engine, config) {
|
|
|
1347
1829
|
}
|
|
1348
1830
|
return launchLightpanda2({ viewport: config.viewport });
|
|
1349
1831
|
}
|
|
1832
|
+
if (engine === "bun") {
|
|
1833
|
+
return launchBrowser({ headless: config.headless, viewport: config.viewport, engine: "bun" });
|
|
1834
|
+
}
|
|
1350
1835
|
return chromium2.launch({
|
|
1351
1836
|
headless: config.headless,
|
|
1352
1837
|
args: ["--no-sandbox", "--disable-setuid-sandbox"]
|
|
@@ -1908,6 +2393,8 @@ async function runAgentLoop(options) {
|
|
|
1908
2393
|
Instructions: ${persona.instructions}` : "",
|
|
1909
2394
|
persona.traits.length > 0 ? `Traits: ${persona.traits.join(", ")}` : "",
|
|
1910
2395
|
persona.goals.length > 0 ? `Goals: ${persona.goals.join("; ")}` : "",
|
|
2396
|
+
persona.behaviors && persona.behaviors.length > 0 ? `Behaviors: ${persona.behaviors.join("; ")}` : "",
|
|
2397
|
+
persona.painPoints && persona.painPoints.length > 0 ? `Pain points: ${persona.painPoints.join("; ")}` : "",
|
|
1911
2398
|
"",
|
|
1912
2399
|
"Stay in character throughout the test. Your observations, choices, and priorities should reflect this persona."
|
|
1913
2400
|
].filter(Boolean).join(`
|
|
@@ -1958,6 +2445,15 @@ Instructions: ${persona.instructions}` : "",
|
|
|
1958
2445
|
const isOpenAICompat = "provider" in client;
|
|
1959
2446
|
try {
|
|
1960
2447
|
for (let turn = 0;turn < maxTurns; turn++) {
|
|
2448
|
+
if (persona && turn > 0 && turn % 5 === 0) {
|
|
2449
|
+
messages = [
|
|
2450
|
+
...messages,
|
|
2451
|
+
{
|
|
2452
|
+
role: "user",
|
|
2453
|
+
content: `[Reminder: You are ${persona.name} \u2014 ${persona.role}. Traits: ${persona.traits.join(", ")}. Stay in character.]`
|
|
2454
|
+
}
|
|
2455
|
+
];
|
|
2456
|
+
}
|
|
1961
2457
|
const response = isOpenAICompat ? await callOpenAICompatible({
|
|
1962
2458
|
baseUrl: client.baseUrl,
|
|
1963
2459
|
apiKey: client.apiKey,
|
|
@@ -2052,6 +2548,8 @@ function detectProvider(model) {
|
|
|
2052
2548
|
return "openai";
|
|
2053
2549
|
if (model.startsWith("gemini-"))
|
|
2054
2550
|
return "google";
|
|
2551
|
+
if (model.startsWith("llama-") || model.startsWith("qwen-") || model.includes("cerebras"))
|
|
2552
|
+
return "cerebras";
|
|
2055
2553
|
return "anthropic";
|
|
2056
2554
|
}
|
|
2057
2555
|
function createClient(apiKey) {
|
|
@@ -2145,6 +2643,12 @@ function createClientForModel(model, apiKey) {
|
|
|
2145
2643
|
throw new AIClientError("No Google API key. Set GOOGLE_API_KEY or pass it explicitly.");
|
|
2146
2644
|
return { provider: "google", baseUrl: "https://generativelanguage.googleapis.com/v1beta/openai", apiKey: key };
|
|
2147
2645
|
}
|
|
2646
|
+
if (provider === "cerebras") {
|
|
2647
|
+
const key = apiKey ?? process.env["CEREBRAS_API_KEY"];
|
|
2648
|
+
if (!key)
|
|
2649
|
+
throw new AIClientError("No Cerebras API key. Set CEREBRAS_API_KEY or pass it explicitly.");
|
|
2650
|
+
return { provider: "cerebras", baseUrl: "https://api.cerebras.ai/v1", apiKey: key };
|
|
2651
|
+
}
|
|
2148
2652
|
return createClient(apiKey);
|
|
2149
2653
|
}
|
|
2150
2654
|
var BROWSER_TOOLS;
|
|
@@ -2547,7 +3051,32 @@ function listScenarios(filter) {
|
|
|
2547
3051
|
params.push(filter.offset);
|
|
2548
3052
|
}
|
|
2549
3053
|
const rows = db2.query(sql).all(...params);
|
|
2550
|
-
|
|
3054
|
+
const scenarios = rows.map(scenarioFromRow);
|
|
3055
|
+
if (scenarios.length === 0)
|
|
3056
|
+
return scenarios;
|
|
3057
|
+
const scenarioIds = scenarios.map((s) => s.id);
|
|
3058
|
+
const placeholders = scenarioIds.map(() => "?").join(",");
|
|
3059
|
+
const statsRows = db2.query(`
|
|
3060
|
+
SELECT scenario_id,
|
|
3061
|
+
COUNT(*) as total,
|
|
3062
|
+
SUM(CASE WHEN status = 'passed' THEN 1 ELSE 0 END) as passed
|
|
3063
|
+
FROM (
|
|
3064
|
+
SELECT scenario_id, status
|
|
3065
|
+
FROM results
|
|
3066
|
+
WHERE scenario_id IN (${placeholders})
|
|
3067
|
+
ORDER BY created_at DESC
|
|
3068
|
+
)
|
|
3069
|
+
GROUP BY scenario_id
|
|
3070
|
+
`).all(...scenarioIds);
|
|
3071
|
+
const statsMap = new Map(statsRows.map((r) => [r.scenario_id, r]));
|
|
3072
|
+
return scenarios.map((s) => {
|
|
3073
|
+
const stats = statsMap.get(s.id);
|
|
3074
|
+
return {
|
|
3075
|
+
...s,
|
|
3076
|
+
flakinessScore: stats ? stats.passed / stats.total : null,
|
|
3077
|
+
recentRunCount: stats?.total ?? 0
|
|
3078
|
+
};
|
|
3079
|
+
});
|
|
2551
3080
|
}
|
|
2552
3081
|
function updateScenario(id, input, version) {
|
|
2553
3082
|
const db2 = getDatabase();
|
|
@@ -2623,6 +3152,10 @@ function updateScenario(id, input, version) {
|
|
|
2623
3152
|
}
|
|
2624
3153
|
return getScenario(existing.id);
|
|
2625
3154
|
}
|
|
3155
|
+
function updateScenarioPassedCache(id, url) {
|
|
3156
|
+
const db2 = getDatabase();
|
|
3157
|
+
db2.query("UPDATE scenarios SET last_passed_at = ?, last_passed_url = ? WHERE id = ?").run(now(), url, id);
|
|
3158
|
+
}
|
|
2626
3159
|
function deleteScenario(id) {
|
|
2627
3160
|
const db2 = getDatabase();
|
|
2628
3161
|
const scenario = getScenario(id);
|
|
@@ -2706,6 +3239,10 @@ function updateResult(id, updates) {
|
|
|
2706
3239
|
sets.push("metadata = ?");
|
|
2707
3240
|
params.push(JSON.stringify(updates.metadata));
|
|
2708
3241
|
}
|
|
3242
|
+
if (updates.failureAnalysis !== undefined) {
|
|
3243
|
+
sets.push("failure_analysis = ?");
|
|
3244
|
+
params.push(updates.failureAnalysis !== null ? JSON.stringify(updates.failureAnalysis) : null);
|
|
3245
|
+
}
|
|
2709
3246
|
if (sets.length === 0) {
|
|
2710
3247
|
return existing;
|
|
2711
3248
|
}
|
|
@@ -2949,9 +3486,9 @@ init_browser();
|
|
|
2949
3486
|
init_browser_lightpanda();
|
|
2950
3487
|
|
|
2951
3488
|
// src/lib/screenshotter.ts
|
|
2952
|
-
import { mkdirSync as
|
|
2953
|
-
import { join as
|
|
2954
|
-
import { homedir as
|
|
3489
|
+
import { mkdirSync as mkdirSync3, existsSync as existsSync3, writeFileSync } from "fs";
|
|
3490
|
+
import { join as join4 } from "path";
|
|
3491
|
+
import { homedir as homedir4 } from "os";
|
|
2955
3492
|
function slugify(text) {
|
|
2956
3493
|
return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
|
|
2957
3494
|
}
|
|
@@ -2971,11 +3508,11 @@ function getScreenshotDir(baseDir, runId, scenarioSlug, projectName, timestamp)
|
|
|
2971
3508
|
const project = projectName ?? "default";
|
|
2972
3509
|
const dateDir = formatDate(now2);
|
|
2973
3510
|
const timeDir = `${formatTime(now2)}_${runId.slice(0, 8)}`;
|
|
2974
|
-
return
|
|
3511
|
+
return join4(baseDir, project, dateDir, timeDir, scenarioSlug);
|
|
2975
3512
|
}
|
|
2976
3513
|
function ensureDir(dirPath) {
|
|
2977
3514
|
if (!existsSync3(dirPath)) {
|
|
2978
|
-
|
|
3515
|
+
mkdirSync3(dirPath, { recursive: true });
|
|
2979
3516
|
}
|
|
2980
3517
|
}
|
|
2981
3518
|
function writeMetaSidecar(screenshotPath, meta) {
|
|
@@ -2987,21 +3524,21 @@ function writeMetaSidecar(screenshotPath, meta) {
|
|
|
2987
3524
|
function writeRunMeta(dir, meta) {
|
|
2988
3525
|
ensureDir(dir);
|
|
2989
3526
|
try {
|
|
2990
|
-
writeFileSync(
|
|
3527
|
+
writeFileSync(join4(dir, "_run-meta.json"), JSON.stringify(meta, null, 2), "utf-8");
|
|
2991
3528
|
} catch {}
|
|
2992
3529
|
}
|
|
2993
3530
|
function writeScenarioMeta(dir, meta) {
|
|
2994
3531
|
ensureDir(dir);
|
|
2995
3532
|
try {
|
|
2996
|
-
writeFileSync(
|
|
3533
|
+
writeFileSync(join4(dir, "_scenario-meta.json"), JSON.stringify(meta, null, 2), "utf-8");
|
|
2997
3534
|
} catch {}
|
|
2998
3535
|
}
|
|
2999
3536
|
async function generateThumbnail(page, screenshotDir, filename) {
|
|
3000
3537
|
try {
|
|
3001
|
-
const thumbDir =
|
|
3538
|
+
const thumbDir = join4(screenshotDir, "_thumbnail");
|
|
3002
3539
|
ensureDir(thumbDir);
|
|
3003
3540
|
const thumbFilename = filename.replace(/\.(png|jpeg)$/, ".thumb.$1");
|
|
3004
|
-
const thumbPath =
|
|
3541
|
+
const thumbPath = join4(thumbDir, thumbFilename);
|
|
3005
3542
|
const viewport = page.viewportSize();
|
|
3006
3543
|
if (viewport) {
|
|
3007
3544
|
await page.screenshot({
|
|
@@ -3015,7 +3552,7 @@ async function generateThumbnail(page, screenshotDir, filename) {
|
|
|
3015
3552
|
return null;
|
|
3016
3553
|
}
|
|
3017
3554
|
}
|
|
3018
|
-
var DEFAULT_BASE_DIR =
|
|
3555
|
+
var DEFAULT_BASE_DIR = join4(homedir4(), ".testers", "screenshots");
|
|
3019
3556
|
|
|
3020
3557
|
class Screenshotter {
|
|
3021
3558
|
baseDir;
|
|
@@ -3036,14 +3573,16 @@ class Screenshotter {
|
|
|
3036
3573
|
const action = options.description ?? options.action;
|
|
3037
3574
|
const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
|
|
3038
3575
|
const filename = generateFilename(options.stepNumber, action);
|
|
3039
|
-
const filePath =
|
|
3576
|
+
const filePath = join4(dir, filename);
|
|
3040
3577
|
ensureDir(dir);
|
|
3041
|
-
|
|
3578
|
+
const screenshotOpts = {
|
|
3042
3579
|
path: filePath,
|
|
3043
3580
|
fullPage: this.fullPage,
|
|
3044
|
-
type: this.format
|
|
3045
|
-
|
|
3046
|
-
|
|
3581
|
+
type: this.format
|
|
3582
|
+
};
|
|
3583
|
+
if (this.format === "jpeg")
|
|
3584
|
+
screenshotOpts.quality = this.quality;
|
|
3585
|
+
await page.screenshot(screenshotOpts);
|
|
3047
3586
|
const viewport = page.viewportSize() ?? { width: 0, height: 0 };
|
|
3048
3587
|
const pageUrl = page.url();
|
|
3049
3588
|
const timestamp = new Date().toISOString();
|
|
@@ -3071,14 +3610,16 @@ class Screenshotter {
|
|
|
3071
3610
|
const action = options.description ?? options.action;
|
|
3072
3611
|
const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
|
|
3073
3612
|
const filename = generateFilename(options.stepNumber, action);
|
|
3074
|
-
const filePath =
|
|
3613
|
+
const filePath = join4(dir, filename);
|
|
3075
3614
|
ensureDir(dir);
|
|
3076
|
-
|
|
3615
|
+
const ssOpts2 = {
|
|
3077
3616
|
path: filePath,
|
|
3078
3617
|
fullPage: true,
|
|
3079
|
-
type: this.format
|
|
3080
|
-
|
|
3081
|
-
|
|
3618
|
+
type: this.format
|
|
3619
|
+
};
|
|
3620
|
+
if (this.format === "jpeg")
|
|
3621
|
+
ssOpts2.quality = this.quality;
|
|
3622
|
+
await page.screenshot(ssOpts2);
|
|
3082
3623
|
const viewport = page.viewportSize() ?? { width: 0, height: 0 };
|
|
3083
3624
|
const pageUrl = page.url();
|
|
3084
3625
|
const timestamp = new Date().toISOString();
|
|
@@ -3106,13 +3647,15 @@ class Screenshotter {
|
|
|
3106
3647
|
const action = options.description ?? options.action;
|
|
3107
3648
|
const dir = getScreenshotDir(this.baseDir, options.runId, options.scenarioSlug, this.projectName, this.runTimestamp);
|
|
3108
3649
|
const filename = generateFilename(options.stepNumber, action);
|
|
3109
|
-
const filePath =
|
|
3650
|
+
const filePath = join4(dir, filename);
|
|
3110
3651
|
ensureDir(dir);
|
|
3111
|
-
|
|
3652
|
+
const ssOpts3 = {
|
|
3112
3653
|
path: filePath,
|
|
3113
|
-
type: this.format
|
|
3114
|
-
|
|
3115
|
-
|
|
3654
|
+
type: this.format
|
|
3655
|
+
};
|
|
3656
|
+
if (this.format === "jpeg")
|
|
3657
|
+
ssOpts3.quality = this.quality;
|
|
3658
|
+
await page.locator(selector).screenshot(ssOpts3);
|
|
3116
3659
|
const viewport = page.viewportSize() ?? { width: 0, height: 0 };
|
|
3117
3660
|
const pageUrl = page.url();
|
|
3118
3661
|
const timestamp = new Date().toISOString();
|
|
@@ -3140,6 +3683,9 @@ class Screenshotter {
|
|
|
3140
3683
|
// src/index.ts
|
|
3141
3684
|
init_ai_client();
|
|
3142
3685
|
|
|
3686
|
+
// src/lib/runner.ts
|
|
3687
|
+
init_types();
|
|
3688
|
+
|
|
3143
3689
|
// src/lib/judge.ts
|
|
3144
3690
|
init_ai_client();
|
|
3145
3691
|
init_types();
|
|
@@ -3200,11 +3746,13 @@ function resolveJudgeModel(config) {
|
|
|
3200
3746
|
apiKey = process.env["OPENAI_API_KEY"];
|
|
3201
3747
|
else if (provider === "google")
|
|
3202
3748
|
apiKey = process.env["GOOGLE_API_KEY"];
|
|
3749
|
+
else if (provider === "cerebras")
|
|
3750
|
+
apiKey = process.env["CEREBRAS_API_KEY"];
|
|
3203
3751
|
}
|
|
3204
3752
|
if (!apiKey) {
|
|
3205
|
-
apiKey = process.env["ANTHROPIC_API_KEY"] ?? process.env["OPENAI_API_KEY"] ?? process.env["GOOGLE_API_KEY"] ?? globalConfig.anthropicApiKey;
|
|
3753
|
+
apiKey = process.env["ANTHROPIC_API_KEY"] ?? process.env["CEREBRAS_API_KEY"] ?? process.env["OPENAI_API_KEY"] ?? process.env["GOOGLE_API_KEY"] ?? globalConfig.anthropicApiKey;
|
|
3206
3754
|
if (!apiKey)
|
|
3207
|
-
throw new AIClientError("No API key found for judge. Set ANTHROPIC_API_KEY, OPENAI_API_KEY, or GOOGLE_API_KEY.");
|
|
3755
|
+
throw new AIClientError("No API key found for judge. Set ANTHROPIC_API_KEY, CEREBRAS_API_KEY, OPENAI_API_KEY, or GOOGLE_API_KEY.");
|
|
3208
3756
|
}
|
|
3209
3757
|
return { model, provider, apiKey };
|
|
3210
3758
|
}
|
|
@@ -3219,8 +3767,8 @@ reason: 1-2 sentences max`;
|
|
|
3219
3767
|
async function callJudge(prompt, config) {
|
|
3220
3768
|
const { model, provider, apiKey } = resolveJudgeModel(config);
|
|
3221
3769
|
const threshold = 0.7;
|
|
3222
|
-
if (provider === "openai" || provider === "google") {
|
|
3223
|
-
const baseUrl = provider === "openai" ? "https://api.openai.com/v1" : "https://generativelanguage.googleapis.com/v1beta/openai";
|
|
3770
|
+
if (provider === "openai" || provider === "google" || provider === "cerebras") {
|
|
3771
|
+
const baseUrl = provider === "openai" ? "https://api.openai.com/v1" : provider === "cerebras" ? "https://api.cerebras.ai/v1" : "https://generativelanguage.googleapis.com/v1beta/openai";
|
|
3224
3772
|
const resp2 = await callOpenAICompatible({
|
|
3225
3773
|
baseUrl,
|
|
3226
3774
|
apiKey,
|
|
@@ -3625,1320 +4173,1723 @@ async function runPipelineScenario(scenario, options) {
|
|
|
3625
4173
|
// src/lib/runner.ts
|
|
3626
4174
|
init_runs();
|
|
3627
4175
|
|
|
3628
|
-
// src/
|
|
3629
|
-
|
|
3630
|
-
|
|
3631
|
-
|
|
3632
|
-
const db2 = getDatabase();
|
|
3633
|
-
let row = db2.query("SELECT * FROM personas WHERE id = ?").get(id);
|
|
3634
|
-
if (row)
|
|
3635
|
-
return personaFromRow(row);
|
|
3636
|
-
row = db2.query("SELECT * FROM personas WHERE short_id = ?").get(id);
|
|
3637
|
-
if (row)
|
|
3638
|
-
return personaFromRow(row);
|
|
3639
|
-
return null;
|
|
3640
|
-
}
|
|
3641
|
-
|
|
3642
|
-
// src/lib/runner.ts
|
|
3643
|
-
init_browser();
|
|
3644
|
-
init_ai_client();
|
|
3645
|
-
init_config();
|
|
3646
|
-
|
|
3647
|
-
// src/lib/webhooks.ts
|
|
3648
|
-
init_database();
|
|
3649
|
-
function fromRow(row) {
|
|
3650
|
-
return {
|
|
3651
|
-
id: row.id,
|
|
3652
|
-
url: row.url,
|
|
3653
|
-
events: JSON.parse(row.events),
|
|
3654
|
-
projectId: row.project_id,
|
|
3655
|
-
secret: row.secret,
|
|
3656
|
-
active: row.active === 1,
|
|
3657
|
-
createdAt: row.created_at
|
|
3658
|
-
};
|
|
3659
|
-
}
|
|
3660
|
-
function createWebhook(input) {
|
|
3661
|
-
const db2 = getDatabase();
|
|
3662
|
-
const id = uuid();
|
|
3663
|
-
const events = input.events ?? ["failed"];
|
|
3664
|
-
const secret = input.secret ?? crypto.randomUUID().replace(/-/g, "");
|
|
3665
|
-
db2.query(`
|
|
3666
|
-
INSERT INTO webhooks (id, url, events, project_id, secret, active, created_at)
|
|
3667
|
-
VALUES (?, ?, ?, ?, ?, 1, ?)
|
|
3668
|
-
`).run(id, input.url, JSON.stringify(events), input.projectId ?? null, secret, now());
|
|
3669
|
-
return getWebhook(id);
|
|
3670
|
-
}
|
|
3671
|
-
function getWebhook(id) {
|
|
3672
|
-
const db2 = getDatabase();
|
|
3673
|
-
const row = db2.query("SELECT * FROM webhooks WHERE id = ?").get(id);
|
|
3674
|
-
if (!row) {
|
|
3675
|
-
const rows = db2.query("SELECT * FROM webhooks WHERE id LIKE ? || '%'").all(id);
|
|
3676
|
-
if (rows.length === 1)
|
|
3677
|
-
return fromRow(rows[0]);
|
|
4176
|
+
// src/lib/failure-analyzer.ts
|
|
4177
|
+
function analyzeFailure(error, reasoning) {
|
|
4178
|
+
const combinedText = [error, reasoning].filter(Boolean).join(" ");
|
|
4179
|
+
if (!combinedText.trim())
|
|
3678
4180
|
return null;
|
|
4181
|
+
const errorText = error ?? "";
|
|
4182
|
+
const reasoningText = reasoning ?? "";
|
|
4183
|
+
if (/waiting for selector/i.test(errorText) || /not found/i.test(errorText) || /No element/i.test(errorText) || /waiting for selector/i.test(reasoningText) || /could not find element/i.test(reasoningText) || /element not found/i.test(reasoningText)) {
|
|
4184
|
+
const selectorMatch = errorText.match(/'([^']+)'/) ?? reasoningText.match(/'([^']+)'/);
|
|
4185
|
+
const affectedElement = selectorMatch ? selectorMatch[1] : undefined;
|
|
4186
|
+
const stepMatch = reasoningText.match(/step\s+(\d+)/i);
|
|
4187
|
+
const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
|
|
4188
|
+
return {
|
|
4189
|
+
type: "selector_not_found",
|
|
4190
|
+
affectedElement,
|
|
4191
|
+
stepNumber,
|
|
4192
|
+
confidence: affectedElement ? "high" : "medium"
|
|
4193
|
+
};
|
|
3679
4194
|
}
|
|
3680
|
-
|
|
3681
|
-
|
|
3682
|
-
|
|
3683
|
-
|
|
3684
|
-
|
|
3685
|
-
|
|
3686
|
-
|
|
3687
|
-
|
|
3688
|
-
|
|
4195
|
+
if (/assert/i.test(errorText) || /expected/i.test(errorText) || /to equal/i.test(errorText) || /to be/i.test(errorText) || /\bgot\b/.test(errorText) || /assertion.*failed/i.test(reasoningText) || /expected.*but.*got/i.test(reasoningText)) {
|
|
4196
|
+
const expectedActualMatch = errorText.match(/expected[:\s]+(['"]?)([^'"]+)\1[,\s]+(?:got|received|actual)[:\s]+(['"]?)([^'"]+)\3/i);
|
|
4197
|
+
const toEqualMatch = errorText.match(/expected[:\s]+(['"]?)([^'"]+)\1\s+to\s+equal\s+(['"]?)([^'"]+)\3/i);
|
|
4198
|
+
let expected;
|
|
4199
|
+
let actual;
|
|
4200
|
+
if (expectedActualMatch) {
|
|
4201
|
+
expected = expectedActualMatch[2];
|
|
4202
|
+
actual = expectedActualMatch[4];
|
|
4203
|
+
} else if (toEqualMatch) {
|
|
4204
|
+
expected = toEqualMatch[4];
|
|
4205
|
+
actual = toEqualMatch[2];
|
|
4206
|
+
}
|
|
4207
|
+
const stepMatch = reasoningText.match(/step\s+(\d+)/i);
|
|
4208
|
+
const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
|
|
4209
|
+
return {
|
|
4210
|
+
type: "assertion_failed",
|
|
4211
|
+
expected,
|
|
4212
|
+
actual,
|
|
4213
|
+
stepNumber,
|
|
4214
|
+
confidence: expected && actual ? "high" : "medium"
|
|
4215
|
+
};
|
|
3689
4216
|
}
|
|
3690
|
-
|
|
3691
|
-
|
|
3692
|
-
|
|
3693
|
-
|
|
3694
|
-
|
|
3695
|
-
|
|
3696
|
-
|
|
3697
|
-
|
|
3698
|
-
|
|
3699
|
-
|
|
3700
|
-
|
|
3701
|
-
|
|
3702
|
-
|
|
3703
|
-
|
|
3704
|
-
|
|
3705
|
-
|
|
3706
|
-
|
|
3707
|
-
|
|
3708
|
-
|
|
4217
|
+
if (/timeout/i.test(errorText) || /timed out/i.test(errorText) || /Timeout/i.test(reasoningText) || /timed out/i.test(reasoningText)) {
|
|
4218
|
+
const stepMatch = reasoningText.match(/step\s+(\d+)/i);
|
|
4219
|
+
const stepNumber = stepMatch ? parseInt(stepMatch[1], 10) : undefined;
|
|
4220
|
+
return {
|
|
4221
|
+
type: "timeout",
|
|
4222
|
+
stepNumber,
|
|
4223
|
+
confidence: "high"
|
|
4224
|
+
};
|
|
4225
|
+
}
|
|
4226
|
+
if (/\b401\b/.test(errorText) || /\b403\b/.test(errorText) || /login/i.test(errorText) || /unauthorized/i.test(errorText) || /\bauth\b/i.test(errorText) || /\b401\b/.test(reasoningText) || /\b403\b/.test(reasoningText) || /unauthorized/i.test(reasoningText) || /authentication/i.test(reasoningText)) {
|
|
4227
|
+
return {
|
|
4228
|
+
type: "auth_error",
|
|
4229
|
+
confidence: "high"
|
|
4230
|
+
};
|
|
4231
|
+
}
|
|
4232
|
+
if (/ECONNREFUSED/i.test(errorText) || /ENOTFOUND/i.test(errorText) || /fetch failed/i.test(errorText) || /network/i.test(errorText) || /ECONNREFUSED/i.test(reasoningText) || /fetch failed/i.test(reasoningText) || /connection refused/i.test(reasoningText)) {
|
|
4233
|
+
return {
|
|
4234
|
+
type: "network_error",
|
|
4235
|
+
confidence: "high"
|
|
4236
|
+
};
|
|
4237
|
+
}
|
|
4238
|
+
if (/\beval\b/i.test(errorText) || /evaluate/i.test(errorText) || /\bscript\b/i.test(errorText) || /\beval\b/i.test(reasoningText) || /evaluate/i.test(reasoningText)) {
|
|
4239
|
+
return {
|
|
4240
|
+
type: "eval_failed",
|
|
4241
|
+
confidence: "medium"
|
|
4242
|
+
};
|
|
3709
4243
|
}
|
|
3710
|
-
return `sha256=${Math.abs(hash).toString(16).padStart(16, "0")}`;
|
|
3711
|
-
}
|
|
3712
|
-
function formatSlackPayload(payload) {
|
|
3713
|
-
const status = payload.run.status === "passed" ? ":white_check_mark:" : ":x:";
|
|
3714
|
-
const color = payload.run.status === "passed" ? "#22c55e" : "#ef4444";
|
|
3715
4244
|
return {
|
|
3716
|
-
|
|
3717
|
-
|
|
3718
|
-
color,
|
|
3719
|
-
blocks: [
|
|
3720
|
-
{
|
|
3721
|
-
type: "section",
|
|
3722
|
-
text: {
|
|
3723
|
-
type: "mrkdwn",
|
|
3724
|
-
text: `${status} *Test Run ${payload.run.status.toUpperCase()}*
|
|
3725
|
-
` + `URL: ${payload.run.url}
|
|
3726
|
-
` + `Results: ${payload.run.passed}/${payload.run.total} passed` + (payload.run.failed > 0 ? ` (${payload.run.failed} failed)` : "") + (payload.schedule ? `
|
|
3727
|
-
Schedule: ${payload.schedule.name}` : "")
|
|
3728
|
-
}
|
|
3729
|
-
}
|
|
3730
|
-
]
|
|
3731
|
-
}
|
|
3732
|
-
]
|
|
4245
|
+
type: "unknown",
|
|
4246
|
+
confidence: "low"
|
|
3733
4247
|
};
|
|
3734
4248
|
}
|
|
3735
|
-
|
|
3736
|
-
|
|
3737
|
-
|
|
3738
|
-
|
|
3739
|
-
|
|
3740
|
-
|
|
3741
|
-
|
|
3742
|
-
|
|
3743
|
-
|
|
3744
|
-
|
|
3745
|
-
|
|
3746
|
-
|
|
3747
|
-
|
|
3748
|
-
|
|
3749
|
-
|
|
3750
|
-
|
|
3751
|
-
|
|
3752
|
-
|
|
3753
|
-
|
|
3754
|
-
|
|
3755
|
-
|
|
3756
|
-
|
|
3757
|
-
|
|
3758
|
-
|
|
3759
|
-
|
|
3760
|
-
|
|
3761
|
-
|
|
3762
|
-
|
|
3763
|
-
|
|
3764
|
-
|
|
3765
|
-
|
|
3766
|
-
|
|
3767
|
-
|
|
3768
|
-
|
|
3769
|
-
|
|
3770
|
-
|
|
3771
|
-
|
|
4249
|
+
|
|
4250
|
+
// node_modules/chalk/source/vendor/ansi-styles/index.js
|
|
4251
|
+
var ANSI_BACKGROUND_OFFSET = 10;
|
|
4252
|
+
var wrapAnsi16 = (offset = 0) => (code) => `\x1B[${code + offset}m`;
|
|
4253
|
+
var wrapAnsi256 = (offset = 0) => (code) => `\x1B[${38 + offset};5;${code}m`;
|
|
4254
|
+
var wrapAnsi16m = (offset = 0) => (red, green, blue) => `\x1B[${38 + offset};2;${red};${green};${blue}m`;
|
|
4255
|
+
var styles = {
|
|
4256
|
+
modifier: {
|
|
4257
|
+
reset: [0, 0],
|
|
4258
|
+
bold: [1, 22],
|
|
4259
|
+
dim: [2, 22],
|
|
4260
|
+
italic: [3, 23],
|
|
4261
|
+
underline: [4, 24],
|
|
4262
|
+
overline: [53, 55],
|
|
4263
|
+
inverse: [7, 27],
|
|
4264
|
+
hidden: [8, 28],
|
|
4265
|
+
strikethrough: [9, 29]
|
|
4266
|
+
},
|
|
4267
|
+
color: {
|
|
4268
|
+
black: [30, 39],
|
|
4269
|
+
red: [31, 39],
|
|
4270
|
+
green: [32, 39],
|
|
4271
|
+
yellow: [33, 39],
|
|
4272
|
+
blue: [34, 39],
|
|
4273
|
+
magenta: [35, 39],
|
|
4274
|
+
cyan: [36, 39],
|
|
4275
|
+
white: [37, 39],
|
|
4276
|
+
blackBright: [90, 39],
|
|
4277
|
+
gray: [90, 39],
|
|
4278
|
+
grey: [90, 39],
|
|
4279
|
+
redBright: [91, 39],
|
|
4280
|
+
greenBright: [92, 39],
|
|
4281
|
+
yellowBright: [93, 39],
|
|
4282
|
+
blueBright: [94, 39],
|
|
4283
|
+
magentaBright: [95, 39],
|
|
4284
|
+
cyanBright: [96, 39],
|
|
4285
|
+
whiteBright: [97, 39]
|
|
4286
|
+
},
|
|
4287
|
+
bgColor: {
|
|
4288
|
+
bgBlack: [40, 49],
|
|
4289
|
+
bgRed: [41, 49],
|
|
4290
|
+
bgGreen: [42, 49],
|
|
4291
|
+
bgYellow: [43, 49],
|
|
4292
|
+
bgBlue: [44, 49],
|
|
4293
|
+
bgMagenta: [45, 49],
|
|
4294
|
+
bgCyan: [46, 49],
|
|
4295
|
+
bgWhite: [47, 49],
|
|
4296
|
+
bgBlackBright: [100, 49],
|
|
4297
|
+
bgGray: [100, 49],
|
|
4298
|
+
bgGrey: [100, 49],
|
|
4299
|
+
bgRedBright: [101, 49],
|
|
4300
|
+
bgGreenBright: [102, 49],
|
|
4301
|
+
bgYellowBright: [103, 49],
|
|
4302
|
+
bgBlueBright: [104, 49],
|
|
4303
|
+
bgMagentaBright: [105, 49],
|
|
4304
|
+
bgCyanBright: [106, 49],
|
|
4305
|
+
bgWhiteBright: [107, 49]
|
|
3772
4306
|
}
|
|
3773
|
-
}
|
|
3774
|
-
|
|
3775
|
-
|
|
3776
|
-
|
|
3777
|
-
|
|
3778
|
-
|
|
3779
|
-
|
|
3780
|
-
|
|
3781
|
-
|
|
3782
|
-
|
|
3783
|
-
|
|
3784
|
-
|
|
3785
|
-
|
|
3786
|
-
|
|
3787
|
-
|
|
3788
|
-
|
|
3789
|
-
|
|
3790
|
-
|
|
3791
|
-
|
|
4307
|
+
};
|
|
4308
|
+
var modifierNames = Object.keys(styles.modifier);
|
|
4309
|
+
var foregroundColorNames = Object.keys(styles.color);
|
|
4310
|
+
var backgroundColorNames = Object.keys(styles.bgColor);
|
|
4311
|
+
var colorNames = [...foregroundColorNames, ...backgroundColorNames];
|
|
4312
|
+
function assembleStyles() {
|
|
4313
|
+
const codes = new Map;
|
|
4314
|
+
for (const [groupName, group] of Object.entries(styles)) {
|
|
4315
|
+
for (const [styleName, style] of Object.entries(group)) {
|
|
4316
|
+
styles[styleName] = {
|
|
4317
|
+
open: `\x1B[${style[0]}m`,
|
|
4318
|
+
close: `\x1B[${style[1]}m`
|
|
4319
|
+
};
|
|
4320
|
+
group[styleName] = styles[styleName];
|
|
4321
|
+
codes.set(style[0], style[1]);
|
|
4322
|
+
}
|
|
4323
|
+
Object.defineProperty(styles, groupName, {
|
|
4324
|
+
value: group,
|
|
4325
|
+
enumerable: false
|
|
3792
4326
|
});
|
|
3793
|
-
return response.ok;
|
|
3794
|
-
} catch {
|
|
3795
|
-
return false;
|
|
3796
4327
|
}
|
|
4328
|
+
Object.defineProperty(styles, "codes", {
|
|
4329
|
+
value: codes,
|
|
4330
|
+
enumerable: false
|
|
4331
|
+
});
|
|
4332
|
+
styles.color.close = "\x1B[39m";
|
|
4333
|
+
styles.bgColor.close = "\x1B[49m";
|
|
4334
|
+
styles.color.ansi = wrapAnsi16();
|
|
4335
|
+
styles.color.ansi256 = wrapAnsi256();
|
|
4336
|
+
styles.color.ansi16m = wrapAnsi16m();
|
|
4337
|
+
styles.bgColor.ansi = wrapAnsi16(ANSI_BACKGROUND_OFFSET);
|
|
4338
|
+
styles.bgColor.ansi256 = wrapAnsi256(ANSI_BACKGROUND_OFFSET);
|
|
4339
|
+
styles.bgColor.ansi16m = wrapAnsi16m(ANSI_BACKGROUND_OFFSET);
|
|
4340
|
+
Object.defineProperties(styles, {
|
|
4341
|
+
rgbToAnsi256: {
|
|
4342
|
+
value(red, green, blue) {
|
|
4343
|
+
if (red === green && green === blue) {
|
|
4344
|
+
if (red < 8) {
|
|
4345
|
+
return 16;
|
|
4346
|
+
}
|
|
4347
|
+
if (red > 248) {
|
|
4348
|
+
return 231;
|
|
4349
|
+
}
|
|
4350
|
+
return Math.round((red - 8) / 247 * 24) + 232;
|
|
4351
|
+
}
|
|
4352
|
+
return 16 + 36 * Math.round(red / 255 * 5) + 6 * Math.round(green / 255 * 5) + Math.round(blue / 255 * 5);
|
|
4353
|
+
},
|
|
4354
|
+
enumerable: false
|
|
4355
|
+
},
|
|
4356
|
+
hexToRgb: {
|
|
4357
|
+
value(hex) {
|
|
4358
|
+
const matches = /[a-f\d]{6}|[a-f\d]{3}/i.exec(hex.toString(16));
|
|
4359
|
+
if (!matches) {
|
|
4360
|
+
return [0, 0, 0];
|
|
4361
|
+
}
|
|
4362
|
+
let [colorString] = matches;
|
|
4363
|
+
if (colorString.length === 3) {
|
|
4364
|
+
colorString = [...colorString].map((character) => character + character).join("");
|
|
4365
|
+
}
|
|
4366
|
+
const integer = Number.parseInt(colorString, 16);
|
|
4367
|
+
return [
|
|
4368
|
+
integer >> 16 & 255,
|
|
4369
|
+
integer >> 8 & 255,
|
|
4370
|
+
integer & 255
|
|
4371
|
+
];
|
|
4372
|
+
},
|
|
4373
|
+
enumerable: false
|
|
4374
|
+
},
|
|
4375
|
+
hexToAnsi256: {
|
|
4376
|
+
value: (hex) => styles.rgbToAnsi256(...styles.hexToRgb(hex)),
|
|
4377
|
+
enumerable: false
|
|
4378
|
+
},
|
|
4379
|
+
ansi256ToAnsi: {
|
|
4380
|
+
value(code) {
|
|
4381
|
+
if (code < 8) {
|
|
4382
|
+
return 30 + code;
|
|
4383
|
+
}
|
|
4384
|
+
if (code < 16) {
|
|
4385
|
+
return 90 + (code - 8);
|
|
4386
|
+
}
|
|
4387
|
+
let red;
|
|
4388
|
+
let green;
|
|
4389
|
+
let blue;
|
|
4390
|
+
if (code >= 232) {
|
|
4391
|
+
red = ((code - 232) * 10 + 8) / 255;
|
|
4392
|
+
green = red;
|
|
4393
|
+
blue = red;
|
|
4394
|
+
} else {
|
|
4395
|
+
code -= 16;
|
|
4396
|
+
const remainder = code % 36;
|
|
4397
|
+
red = Math.floor(code / 36) / 5;
|
|
4398
|
+
green = Math.floor(remainder / 6) / 5;
|
|
4399
|
+
blue = remainder % 6 / 5;
|
|
4400
|
+
}
|
|
4401
|
+
const value = Math.max(red, green, blue) * 2;
|
|
4402
|
+
if (value === 0) {
|
|
4403
|
+
return 30;
|
|
4404
|
+
}
|
|
4405
|
+
let result = 30 + (Math.round(blue) << 2 | Math.round(green) << 1 | Math.round(red));
|
|
4406
|
+
if (value === 2) {
|
|
4407
|
+
result += 60;
|
|
4408
|
+
}
|
|
4409
|
+
return result;
|
|
4410
|
+
},
|
|
4411
|
+
enumerable: false
|
|
4412
|
+
},
|
|
4413
|
+
rgbToAnsi: {
|
|
4414
|
+
value: (red, green, blue) => styles.ansi256ToAnsi(styles.rgbToAnsi256(red, green, blue)),
|
|
4415
|
+
enumerable: false
|
|
4416
|
+
},
|
|
4417
|
+
hexToAnsi: {
|
|
4418
|
+
value: (hex) => styles.ansi256ToAnsi(styles.hexToAnsi256(hex)),
|
|
4419
|
+
enumerable: false
|
|
4420
|
+
}
|
|
4421
|
+
});
|
|
4422
|
+
return styles;
|
|
3797
4423
|
}
|
|
4424
|
+
var ansiStyles = assembleStyles();
|
|
4425
|
+
var ansi_styles_default = ansiStyles;
|
|
3798
4426
|
|
|
3799
|
-
//
|
|
3800
|
-
|
|
3801
|
-
|
|
3802
|
-
|
|
3803
|
-
|
|
3804
|
-
const
|
|
3805
|
-
const
|
|
3806
|
-
|
|
3807
|
-
|
|
3808
|
-
level: "error",
|
|
3809
|
-
source: "sdk",
|
|
3810
|
-
service: "testers",
|
|
3811
|
-
message: `[testers] Scenario failed: ${scenario?.name ?? result.scenarioId}${result.error ? ` \u2014 ${result.error}` : ""}`,
|
|
3812
|
-
metadata: {
|
|
3813
|
-
run_id: run.id,
|
|
3814
|
-
scenario_id: result.scenarioId,
|
|
3815
|
-
scenario_name: scenario?.name,
|
|
3816
|
-
url: run.url,
|
|
3817
|
-
status: result.status,
|
|
3818
|
-
duration_ms: result.durationMs
|
|
3819
|
-
}
|
|
3820
|
-
};
|
|
3821
|
-
});
|
|
3822
|
-
try {
|
|
3823
|
-
await fetch(`${logsUrl.replace(/\/$/, "")}/api/logs`, {
|
|
3824
|
-
method: "POST",
|
|
3825
|
-
headers: { "Content-Type": "application/json" },
|
|
3826
|
-
body: JSON.stringify(entries)
|
|
3827
|
-
});
|
|
3828
|
-
} catch {}
|
|
4427
|
+
// node_modules/chalk/source/vendor/supports-color/index.js
|
|
4428
|
+
import process2 from "process";
|
|
4429
|
+
import os from "os";
|
|
4430
|
+
import tty from "tty";
|
|
4431
|
+
function hasFlag(flag, argv = globalThis.Deno ? globalThis.Deno.args : process2.argv) {
|
|
4432
|
+
const prefix = flag.startsWith("-") ? "" : flag.length === 1 ? "-" : "--";
|
|
4433
|
+
const position = argv.indexOf(prefix + flag);
|
|
4434
|
+
const terminatorPosition = argv.indexOf("--");
|
|
4435
|
+
return position !== -1 && (terminatorPosition === -1 || position < terminatorPosition);
|
|
3829
4436
|
}
|
|
3830
|
-
|
|
3831
|
-
|
|
3832
|
-
|
|
3833
|
-
|
|
3834
|
-
|
|
3835
|
-
|
|
3836
|
-
init_types();
|
|
3837
|
-
function resolveTodosDbPath() {
|
|
3838
|
-
const envPath = process.env["TODOS_DB_PATH"];
|
|
3839
|
-
if (envPath)
|
|
3840
|
-
return envPath;
|
|
3841
|
-
return join4(homedir4(), ".todos", "todos.db");
|
|
4437
|
+
var { env } = process2;
|
|
4438
|
+
var flagForceColor;
|
|
4439
|
+
if (hasFlag("no-color") || hasFlag("no-colors") || hasFlag("color=false") || hasFlag("color=never")) {
|
|
4440
|
+
flagForceColor = 0;
|
|
4441
|
+
} else if (hasFlag("color") || hasFlag("colors") || hasFlag("color=true") || hasFlag("color=always")) {
|
|
4442
|
+
flagForceColor = 1;
|
|
3842
4443
|
}
|
|
3843
|
-
function
|
|
3844
|
-
|
|
3845
|
-
|
|
3846
|
-
|
|
4444
|
+
function envForceColor() {
|
|
4445
|
+
if ("FORCE_COLOR" in env) {
|
|
4446
|
+
if (env.FORCE_COLOR === "true") {
|
|
4447
|
+
return 1;
|
|
4448
|
+
}
|
|
4449
|
+
if (env.FORCE_COLOR === "false") {
|
|
4450
|
+
return 0;
|
|
4451
|
+
}
|
|
4452
|
+
return env.FORCE_COLOR.length === 0 ? 1 : Math.min(Number.parseInt(env.FORCE_COLOR, 10), 3);
|
|
3847
4453
|
}
|
|
3848
|
-
const db2 = new Database2(dbPath, { readonly: true });
|
|
3849
|
-
db2.exec("PRAGMA foreign_keys = ON");
|
|
3850
|
-
return db2;
|
|
3851
4454
|
}
|
|
3852
|
-
function
|
|
3853
|
-
|
|
3854
|
-
|
|
3855
|
-
|
|
3856
|
-
|
|
3857
|
-
|
|
3858
|
-
|
|
3859
|
-
|
|
3860
|
-
|
|
3861
|
-
|
|
4455
|
+
function translateLevel(level) {
|
|
4456
|
+
if (level === 0) {
|
|
4457
|
+
return false;
|
|
4458
|
+
}
|
|
4459
|
+
return {
|
|
4460
|
+
level,
|
|
4461
|
+
hasBasic: true,
|
|
4462
|
+
has256: level >= 2,
|
|
4463
|
+
has16m: level >= 3
|
|
4464
|
+
};
|
|
4465
|
+
}
|
|
4466
|
+
function _supportsColor(haveStream, { streamIsTTY, sniffFlags = true } = {}) {
|
|
4467
|
+
const noFlagForceColor = envForceColor();
|
|
4468
|
+
if (noFlagForceColor !== undefined) {
|
|
4469
|
+
flagForceColor = noFlagForceColor;
|
|
4470
|
+
}
|
|
4471
|
+
const forceColor = sniffFlags ? flagForceColor : noFlagForceColor;
|
|
4472
|
+
if (forceColor === 0) {
|
|
4473
|
+
return 0;
|
|
4474
|
+
}
|
|
4475
|
+
if (sniffFlags) {
|
|
4476
|
+
if (hasFlag("color=16m") || hasFlag("color=full") || hasFlag("color=truecolor")) {
|
|
4477
|
+
return 3;
|
|
3862
4478
|
}
|
|
3863
|
-
if (
|
|
3864
|
-
|
|
3865
|
-
params.push(options.priority);
|
|
4479
|
+
if (hasFlag("color=256")) {
|
|
4480
|
+
return 2;
|
|
3866
4481
|
}
|
|
3867
|
-
|
|
3868
|
-
|
|
3869
|
-
|
|
3870
|
-
|
|
3871
|
-
|
|
3872
|
-
|
|
4482
|
+
}
|
|
4483
|
+
if ("TF_BUILD" in env && "AGENT_NAME" in env) {
|
|
4484
|
+
return 1;
|
|
4485
|
+
}
|
|
4486
|
+
if (haveStream && !streamIsTTY && forceColor === undefined) {
|
|
4487
|
+
return 0;
|
|
4488
|
+
}
|
|
4489
|
+
const min = forceColor || 0;
|
|
4490
|
+
if (env.TERM === "dumb") {
|
|
4491
|
+
return min;
|
|
4492
|
+
}
|
|
4493
|
+
if (process2.platform === "win32") {
|
|
4494
|
+
const osRelease = os.release().split(".");
|
|
4495
|
+
if (Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
|
|
4496
|
+
return Number(osRelease[2]) >= 14931 ? 3 : 2;
|
|
3873
4497
|
}
|
|
3874
|
-
|
|
3875
|
-
|
|
3876
|
-
|
|
3877
|
-
|
|
3878
|
-
|
|
3879
|
-
return options.tags.some((tag) => taskTags.includes(tag));
|
|
3880
|
-
});
|
|
4498
|
+
return 1;
|
|
4499
|
+
}
|
|
4500
|
+
if ("CI" in env) {
|
|
4501
|
+
if (["GITHUB_ACTIONS", "GITEA_ACTIONS", "CIRCLECI"].some((key) => (key in env))) {
|
|
4502
|
+
return 3;
|
|
3881
4503
|
}
|
|
3882
|
-
|
|
3883
|
-
|
|
3884
|
-
|
|
4504
|
+
if (["TRAVIS", "APPVEYOR", "GITLAB_CI", "BUILDKITE", "DRONE"].some((sign) => (sign in env)) || env.CI_NAME === "codeship") {
|
|
4505
|
+
return 1;
|
|
4506
|
+
}
|
|
4507
|
+
return min;
|
|
3885
4508
|
}
|
|
3886
|
-
|
|
3887
|
-
|
|
3888
|
-
|
|
3889
|
-
|
|
3890
|
-
|
|
3891
|
-
|
|
3892
|
-
|
|
3893
|
-
|
|
3894
|
-
|
|
3895
|
-
|
|
3896
|
-
|
|
3897
|
-
|
|
4509
|
+
if ("TEAMCITY_VERSION" in env) {
|
|
4510
|
+
return /^(9\.(0*[1-9]\d*)\.|\d{2,}\.)/.test(env.TEAMCITY_VERSION) ? 1 : 0;
|
|
4511
|
+
}
|
|
4512
|
+
if (env.COLORTERM === "truecolor") {
|
|
4513
|
+
return 3;
|
|
4514
|
+
}
|
|
4515
|
+
if (env.TERM === "xterm-kitty") {
|
|
4516
|
+
return 3;
|
|
4517
|
+
}
|
|
4518
|
+
if (env.TERM === "xterm-ghostty") {
|
|
4519
|
+
return 3;
|
|
4520
|
+
}
|
|
4521
|
+
if (env.TERM === "wezterm") {
|
|
4522
|
+
return 3;
|
|
4523
|
+
}
|
|
4524
|
+
if ("TERM_PROGRAM" in env) {
|
|
4525
|
+
const version = Number.parseInt((env.TERM_PROGRAM_VERSION || "").split(".")[0], 10);
|
|
4526
|
+
switch (env.TERM_PROGRAM) {
|
|
4527
|
+
case "iTerm.app": {
|
|
4528
|
+
return version >= 3 ? 3 : 2;
|
|
4529
|
+
}
|
|
4530
|
+
case "Apple_Terminal": {
|
|
4531
|
+
return 2;
|
|
3898
4532
|
}
|
|
3899
4533
|
}
|
|
3900
4534
|
}
|
|
3901
|
-
|
|
3902
|
-
|
|
3903
|
-
description: task.description || task.title,
|
|
3904
|
-
steps,
|
|
3905
|
-
tags,
|
|
3906
|
-
priority,
|
|
3907
|
-
projectId,
|
|
3908
|
-
metadata: { todosTaskId: task.id, todosShortId: task.short_id }
|
|
3909
|
-
};
|
|
3910
|
-
}
|
|
3911
|
-
function importFromTodos(options = {}) {
|
|
3912
|
-
const tasks = pullTasks({
|
|
3913
|
-
projectName: options.projectName,
|
|
3914
|
-
tags: options.tags ?? ["qa", "test", "testing"],
|
|
3915
|
-
priority: options.priority
|
|
3916
|
-
});
|
|
3917
|
-
const existing = listScenarios({ projectId: options.projectId });
|
|
3918
|
-
const existingTodoIds = new Set(existing.filter((s) => s.metadata?.todosTaskId).map((s) => s.metadata.todosTaskId));
|
|
3919
|
-
let imported = 0;
|
|
3920
|
-
let skipped = 0;
|
|
3921
|
-
for (const task of tasks) {
|
|
3922
|
-
if (existingTodoIds.has(task.id)) {
|
|
3923
|
-
skipped++;
|
|
3924
|
-
continue;
|
|
3925
|
-
}
|
|
3926
|
-
const input = taskToScenarioInput(task, options.projectId);
|
|
3927
|
-
createScenario(input);
|
|
3928
|
-
imported++;
|
|
4535
|
+
if (/-256(color)?$/i.test(env.TERM)) {
|
|
4536
|
+
return 2;
|
|
3929
4537
|
}
|
|
3930
|
-
|
|
3931
|
-
|
|
3932
|
-
function markTodoDone(taskId) {
|
|
3933
|
-
const dbPath = resolveTodosDbPath();
|
|
3934
|
-
if (!existsSync4(dbPath))
|
|
3935
|
-
return false;
|
|
3936
|
-
const db2 = new Database2(dbPath);
|
|
3937
|
-
try {
|
|
3938
|
-
const task = db2.query("SELECT id, version FROM tasks WHERE id LIKE ? || '%'").get(taskId);
|
|
3939
|
-
if (!task)
|
|
3940
|
-
return false;
|
|
3941
|
-
db2.query("UPDATE tasks SET status = 'completed', completed_at = datetime('now'), version = version + 1, updated_at = datetime('now') WHERE id = ? AND version = ?").run(task.id, task.version);
|
|
3942
|
-
return true;
|
|
3943
|
-
} finally {
|
|
3944
|
-
db2.close();
|
|
4538
|
+
if (/^screen|^xterm|^vt100|^vt220|^rxvt|color|ansi|cygwin|linux/i.test(env.TERM)) {
|
|
4539
|
+
return 1;
|
|
3945
4540
|
}
|
|
4541
|
+
if ("COLORTERM" in env) {
|
|
4542
|
+
return 1;
|
|
4543
|
+
}
|
|
4544
|
+
return min;
|
|
3946
4545
|
}
|
|
4546
|
+
function createSupportsColor(stream, options = {}) {
|
|
4547
|
+
const level = _supportsColor(stream, {
|
|
4548
|
+
streamIsTTY: stream && stream.isTTY,
|
|
4549
|
+
...options
|
|
4550
|
+
});
|
|
4551
|
+
return translateLevel(level);
|
|
4552
|
+
}
|
|
4553
|
+
var supportsColor = {
|
|
4554
|
+
stdout: createSupportsColor({ isTTY: tty.isatty(1) }),
|
|
4555
|
+
stderr: createSupportsColor({ isTTY: tty.isatty(2) })
|
|
4556
|
+
};
|
|
4557
|
+
var supports_color_default = supportsColor;
|
|
3947
4558
|
|
|
3948
|
-
//
|
|
3949
|
-
|
|
3950
|
-
|
|
3951
|
-
|
|
3952
|
-
|
|
3953
|
-
if (!projectId)
|
|
3954
|
-
return { created: 0, skipped: 0 };
|
|
3955
|
-
let db2 = null;
|
|
3956
|
-
try {
|
|
3957
|
-
db2 = connectToTodos();
|
|
3958
|
-
} catch {
|
|
3959
|
-
return { created: 0, skipped: 0 };
|
|
4559
|
+
// node_modules/chalk/source/utilities.js
|
|
4560
|
+
function stringReplaceAll(string, substring, replacer) {
|
|
4561
|
+
let index = string.indexOf(substring);
|
|
4562
|
+
if (index === -1) {
|
|
4563
|
+
return string;
|
|
3960
4564
|
}
|
|
3961
|
-
const
|
|
3962
|
-
let
|
|
3963
|
-
let
|
|
3964
|
-
|
|
3965
|
-
|
|
3966
|
-
|
|
3967
|
-
|
|
3968
|
-
|
|
3969
|
-
|
|
3970
|
-
|
|
3971
|
-
continue;
|
|
3972
|
-
}
|
|
3973
|
-
const id = crypto.randomUUID();
|
|
3974
|
-
const now2 = new Date().toISOString();
|
|
3975
|
-
const description = [
|
|
3976
|
-
`Test failure detected by open-testers.`,
|
|
3977
|
-
``,
|
|
3978
|
-
`**Run:** ${run.id}`,
|
|
3979
|
-
`**URL:** ${run.url}`,
|
|
3980
|
-
`**Scenario:** ${scenario?.name ?? result.scenarioId}`,
|
|
3981
|
-
`**Status:** ${result.status}`,
|
|
3982
|
-
result.error ? `**Error:** ${result.error}` : null,
|
|
3983
|
-
result.reasoning ? `**Reasoning:** ${result.reasoning.slice(0, 500)}` : null,
|
|
3984
|
-
`**Duration:** ${result.durationMs ? `${(result.durationMs / 1000).toFixed(1)}s` : "N/A"}`,
|
|
3985
|
-
`**Tokens:** ${result.tokensUsed ?? 0}`
|
|
3986
|
-
].filter(Boolean).join(`
|
|
3987
|
-
`);
|
|
3988
|
-
try {
|
|
3989
|
-
db2.query(`
|
|
3990
|
-
INSERT INTO tasks (id, short_id, title, description, status, priority, tags, project_id, version, created_at, updated_at)
|
|
3991
|
-
VALUES (?, ?, ?, ?, 'pending', 'high', ?, ?, 1, ?, ?)
|
|
3992
|
-
`).run(id, `BUG-${id.slice(0, 6)}`, title, description, JSON.stringify(["bug", "testers", "auto-created"]), projectId, now2, now2);
|
|
3993
|
-
created++;
|
|
3994
|
-
} catch {
|
|
3995
|
-
skipped++;
|
|
3996
|
-
}
|
|
3997
|
-
}
|
|
3998
|
-
} finally {
|
|
3999
|
-
db2.close();
|
|
4000
|
-
}
|
|
4001
|
-
return { created, skipped };
|
|
4565
|
+
const substringLength = substring.length;
|
|
4566
|
+
let endIndex = 0;
|
|
4567
|
+
let returnValue = "";
|
|
4568
|
+
do {
|
|
4569
|
+
returnValue += string.slice(endIndex, index) + substring + replacer;
|
|
4570
|
+
endIndex = index + substringLength;
|
|
4571
|
+
index = string.indexOf(substring, endIndex);
|
|
4572
|
+
} while (index !== -1);
|
|
4573
|
+
returnValue += string.slice(endIndex);
|
|
4574
|
+
return returnValue;
|
|
4002
4575
|
}
|
|
4003
|
-
|
|
4004
|
-
|
|
4005
|
-
|
|
4006
|
-
|
|
4007
|
-
|
|
4008
|
-
|
|
4009
|
-
|
|
4010
|
-
|
|
4011
|
-
|
|
4012
|
-
|
|
4013
|
-
|
|
4014
|
-
|
|
4015
|
-
|
|
4016
|
-
|
|
4017
|
-
const extra = failedResults.length > 5 ? ` \u2026 and ${failedResults.length - 5} more` : "";
|
|
4018
|
-
const message = [
|
|
4019
|
-
`\uD83D\uDEA8 **Testers run failed** \u2014 ${failedCount}/${total} scenarios failed`,
|
|
4020
|
-
``,
|
|
4021
|
-
`**URL:** ${run.url}`,
|
|
4022
|
-
`**Run ID:** \`${run.id}\``,
|
|
4023
|
-
`**Pass rate:** ${passedCount}/${total}`,
|
|
4024
|
-
``,
|
|
4025
|
-
`**Failures:**`,
|
|
4026
|
-
...failureLines,
|
|
4027
|
-
extra
|
|
4028
|
-
].filter((l) => l !== "").join(`
|
|
4029
|
-
`);
|
|
4030
|
-
try {
|
|
4031
|
-
await fetch(`${baseUrl.replace(/\/$/, "")}/api/spaces/${encodeURIComponent(space)}/messages`, {
|
|
4032
|
-
method: "POST",
|
|
4033
|
-
headers: { "Content-Type": "application/json" },
|
|
4034
|
-
body: JSON.stringify({ content: message, from: "testers" })
|
|
4035
|
-
});
|
|
4036
|
-
} catch {}
|
|
4576
|
+
function stringEncaseCRLFWithFirstIndex(string, prefix, postfix, index) {
|
|
4577
|
+
let endIndex = 0;
|
|
4578
|
+
let returnValue = "";
|
|
4579
|
+
do {
|
|
4580
|
+
const gotCR = string[index - 1] === "\r";
|
|
4581
|
+
returnValue += string.slice(endIndex, gotCR ? index - 1 : index) + prefix + (gotCR ? `\r
|
|
4582
|
+
` : `
|
|
4583
|
+
`) + postfix;
|
|
4584
|
+
endIndex = index + 1;
|
|
4585
|
+
index = string.indexOf(`
|
|
4586
|
+
`, endIndex);
|
|
4587
|
+
} while (index !== -1);
|
|
4588
|
+
returnValue += string.slice(endIndex);
|
|
4589
|
+
return returnValue;
|
|
4037
4590
|
}
|
|
4038
4591
|
|
|
4039
|
-
//
|
|
4040
|
-
var
|
|
4041
|
-
|
|
4042
|
-
|
|
4592
|
+
// node_modules/chalk/source/index.js
|
|
4593
|
+
var { stdout: stdoutColor, stderr: stderrColor } = supports_color_default;
|
|
4594
|
+
var GENERATOR = Symbol("GENERATOR");
|
|
4595
|
+
var STYLER = Symbol("STYLER");
|
|
4596
|
+
var IS_EMPTY = Symbol("IS_EMPTY");
|
|
4597
|
+
var levelMapping = [
|
|
4598
|
+
"ansi",
|
|
4599
|
+
"ansi",
|
|
4600
|
+
"ansi256",
|
|
4601
|
+
"ansi16m"
|
|
4602
|
+
];
|
|
4603
|
+
var styles2 = Object.create(null);
|
|
4604
|
+
var applyOptions = (object, options = {}) => {
|
|
4605
|
+
if (options.level && !(Number.isInteger(options.level) && options.level >= 0 && options.level <= 3)) {
|
|
4606
|
+
throw new Error("The `level` option should be an integer from 0 to 3");
|
|
4607
|
+
}
|
|
4608
|
+
const colorLevel = stdoutColor ? stdoutColor.level : 0;
|
|
4609
|
+
object.level = options.level === undefined ? colorLevel : options.level;
|
|
4610
|
+
};
|
|
4611
|
+
var chalkFactory = (options) => {
|
|
4612
|
+
const chalk = (...strings) => strings.join(" ");
|
|
4613
|
+
applyOptions(chalk, options);
|
|
4614
|
+
Object.setPrototypeOf(chalk, createChalk.prototype);
|
|
4615
|
+
return chalk;
|
|
4616
|
+
};
|
|
4617
|
+
function createChalk(options) {
|
|
4618
|
+
return chalkFactory(options);
|
|
4043
4619
|
}
|
|
4044
|
-
|
|
4045
|
-
|
|
4046
|
-
|
|
4620
|
+
Object.setPrototypeOf(createChalk.prototype, Function.prototype);
|
|
4621
|
+
for (const [styleName, style] of Object.entries(ansi_styles_default)) {
|
|
4622
|
+
styles2[styleName] = {
|
|
4623
|
+
get() {
|
|
4624
|
+
const builder = createBuilder(this, createStyler(style.open, style.close, this[STYLER]), this[IS_EMPTY]);
|
|
4625
|
+
Object.defineProperty(this, styleName, { value: builder });
|
|
4626
|
+
return builder;
|
|
4627
|
+
}
|
|
4628
|
+
};
|
|
4047
4629
|
}
|
|
4048
|
-
|
|
4049
|
-
|
|
4050
|
-
const
|
|
4051
|
-
|
|
4052
|
-
|
|
4053
|
-
|
|
4054
|
-
|
|
4055
|
-
|
|
4056
|
-
|
|
4057
|
-
|
|
4058
|
-
|
|
4059
|
-
|
|
4060
|
-
|
|
4061
|
-
|
|
4062
|
-
}
|
|
4063
|
-
|
|
4064
|
-
|
|
4065
|
-
|
|
4066
|
-
|
|
4067
|
-
|
|
4068
|
-
|
|
4069
|
-
|
|
4070
|
-
|
|
4071
|
-
|
|
4072
|
-
|
|
4630
|
+
styles2.visible = {
|
|
4631
|
+
get() {
|
|
4632
|
+
const builder = createBuilder(this, this[STYLER], true);
|
|
4633
|
+
Object.defineProperty(this, "visible", { value: builder });
|
|
4634
|
+
return builder;
|
|
4635
|
+
}
|
|
4636
|
+
};
|
|
4637
|
+
var getModelAnsi = (model, level, type, ...arguments_) => {
|
|
4638
|
+
if (model === "rgb") {
|
|
4639
|
+
if (level === "ansi16m") {
|
|
4640
|
+
return ansi_styles_default[type].ansi16m(...arguments_);
|
|
4641
|
+
}
|
|
4642
|
+
if (level === "ansi256") {
|
|
4643
|
+
return ansi_styles_default[type].ansi256(ansi_styles_default.rgbToAnsi256(...arguments_));
|
|
4644
|
+
}
|
|
4645
|
+
return ansi_styles_default[type].ansi(ansi_styles_default.rgbToAnsi(...arguments_));
|
|
4646
|
+
}
|
|
4647
|
+
if (model === "hex") {
|
|
4648
|
+
return getModelAnsi("rgb", level, type, ...ansi_styles_default.hexToRgb(...arguments_));
|
|
4649
|
+
}
|
|
4650
|
+
return ansi_styles_default[type][model](...arguments_);
|
|
4651
|
+
};
|
|
4652
|
+
var usedModels = ["rgb", "hex", "ansi256"];
|
|
4653
|
+
for (const model of usedModels) {
|
|
4654
|
+
styles2[model] = {
|
|
4655
|
+
get() {
|
|
4656
|
+
const { level } = this;
|
|
4657
|
+
return function(...arguments_) {
|
|
4658
|
+
const styler = createStyler(getModelAnsi(model, levelMapping[level], "color", ...arguments_), ansi_styles_default.color.close, this[STYLER]);
|
|
4659
|
+
return createBuilder(this, styler, this[IS_EMPTY]);
|
|
4660
|
+
};
|
|
4661
|
+
}
|
|
4662
|
+
};
|
|
4663
|
+
const bgModel = "bg" + model[0].toUpperCase() + model.slice(1);
|
|
4664
|
+
styles2[bgModel] = {
|
|
4665
|
+
get() {
|
|
4666
|
+
const { level } = this;
|
|
4667
|
+
return function(...arguments_) {
|
|
4668
|
+
const styler = createStyler(getModelAnsi(model, levelMapping[level], "bgColor", ...arguments_), ansi_styles_default.bgColor.close, this[STYLER]);
|
|
4669
|
+
return createBuilder(this, styler, this[IS_EMPTY]);
|
|
4670
|
+
};
|
|
4671
|
+
}
|
|
4672
|
+
};
|
|
4073
4673
|
}
|
|
4074
|
-
|
|
4075
|
-
|
|
4076
|
-
|
|
4077
|
-
|
|
4674
|
+
var proto = Object.defineProperties(() => {}, {
|
|
4675
|
+
...styles2,
|
|
4676
|
+
level: {
|
|
4677
|
+
enumerable: true,
|
|
4678
|
+
get() {
|
|
4679
|
+
return this[GENERATOR].level;
|
|
4680
|
+
},
|
|
4681
|
+
set(level) {
|
|
4682
|
+
this[GENERATOR].level = level;
|
|
4683
|
+
}
|
|
4078
4684
|
}
|
|
4079
|
-
|
|
4080
|
-
|
|
4081
|
-
|
|
4082
|
-
|
|
4083
|
-
|
|
4084
|
-
|
|
4085
|
-
|
|
4086
|
-
}
|
|
4087
|
-
|
|
4088
|
-
|
|
4089
|
-
const result = createResult({
|
|
4090
|
-
runId,
|
|
4091
|
-
scenarioId: scenario.id,
|
|
4092
|
-
model,
|
|
4093
|
-
stepsTotal: scenario.steps.length || 10,
|
|
4094
|
-
personaId: persona?.id ?? null,
|
|
4095
|
-
personaName: persona?.name ?? null
|
|
4096
|
-
});
|
|
4097
|
-
emit({ type: "scenario:start", scenarioId: scenario.id, scenarioName: scenario.name, resultId: result.id, runId });
|
|
4098
|
-
let browser = null;
|
|
4099
|
-
let page = null;
|
|
4100
|
-
try {
|
|
4101
|
-
browser = await launchBrowser({ headless: !(options.headed ?? false), engine: options.engine });
|
|
4102
|
-
page = await getPage(browser, {
|
|
4103
|
-
viewport: config.browser.viewport
|
|
4104
|
-
});
|
|
4105
|
-
const targetUrl = scenario.targetPath ? `${options.url.replace(/\/$/, "")}${scenario.targetPath}` : options.url;
|
|
4106
|
-
const scenarioTimeout = scenario.timeoutMs ?? options.timeout ?? config.browser.timeout ?? 60000;
|
|
4107
|
-
await page.goto(targetUrl, { timeout: Math.min(scenarioTimeout, 30000) });
|
|
4108
|
-
const stepStartTimes = new Map;
|
|
4109
|
-
const agentResult = await withTimeout(runAgentLoop({
|
|
4110
|
-
client,
|
|
4111
|
-
page,
|
|
4112
|
-
scenario,
|
|
4113
|
-
screenshotter,
|
|
4114
|
-
model,
|
|
4115
|
-
runId,
|
|
4116
|
-
maxTurns: 30,
|
|
4117
|
-
a11y: options.a11y,
|
|
4118
|
-
persona: persona ? {
|
|
4119
|
-
name: persona.name,
|
|
4120
|
-
role: persona.role,
|
|
4121
|
-
description: persona.description,
|
|
4122
|
-
instructions: persona.instructions,
|
|
4123
|
-
traits: persona.traits,
|
|
4124
|
-
goals: persona.goals
|
|
4125
|
-
} : null,
|
|
4126
|
-
onStep: (stepEvent) => {
|
|
4127
|
-
let stepDurationMs;
|
|
4128
|
-
if (stepEvent.type === "tool_call") {
|
|
4129
|
-
stepStartTimes.set(stepEvent.stepNumber, Date.now());
|
|
4130
|
-
} else if (stepEvent.type === "tool_result") {
|
|
4131
|
-
const startTime = stepStartTimes.get(stepEvent.stepNumber);
|
|
4132
|
-
if (startTime !== undefined) {
|
|
4133
|
-
stepDurationMs = Date.now() - startTime;
|
|
4134
|
-
stepStartTimes.delete(stepEvent.stepNumber);
|
|
4135
|
-
}
|
|
4136
|
-
}
|
|
4137
|
-
emit({
|
|
4138
|
-
type: `step:${stepEvent.type}`,
|
|
4139
|
-
scenarioId: scenario.id,
|
|
4140
|
-
scenarioName: scenario.name,
|
|
4141
|
-
runId,
|
|
4142
|
-
toolName: stepEvent.toolName,
|
|
4143
|
-
toolInput: stepEvent.toolInput,
|
|
4144
|
-
toolResult: stepEvent.toolResult,
|
|
4145
|
-
thinking: stepEvent.thinking,
|
|
4146
|
-
stepNumber: stepEvent.stepNumber,
|
|
4147
|
-
stepDurationMs
|
|
4148
|
-
});
|
|
4149
|
-
}
|
|
4150
|
-
}), scenarioTimeout, scenario.name);
|
|
4151
|
-
if (options.engine !== "lightpanda") {
|
|
4152
|
-
for (const ss of agentResult.screenshots) {
|
|
4153
|
-
try {
|
|
4154
|
-
createScreenshot({
|
|
4155
|
-
resultId: result.id,
|
|
4156
|
-
stepNumber: ss.stepNumber,
|
|
4157
|
-
action: ss.action,
|
|
4158
|
-
filePath: ss.filePath,
|
|
4159
|
-
width: ss.width,
|
|
4160
|
-
height: ss.height,
|
|
4161
|
-
description: ss.description,
|
|
4162
|
-
pageUrl: ss.pageUrl,
|
|
4163
|
-
thumbnailPath: ss.thumbnailPath
|
|
4164
|
-
});
|
|
4165
|
-
emit({ type: "screenshot:captured", screenshotPath: ss.filePath, scenarioId: scenario.id, runId });
|
|
4166
|
-
} catch {}
|
|
4167
|
-
}
|
|
4168
|
-
}
|
|
4169
|
-
const lightpandaNote = options.engine === "lightpanda" ? " (Running with Lightpanda \u2014 no screenshots)" : "";
|
|
4170
|
-
const updatedResult = updateResult(result.id, {
|
|
4171
|
-
status: agentResult.status,
|
|
4172
|
-
reasoning: agentResult.reasoning ? agentResult.reasoning + lightpandaNote : lightpandaNote || undefined,
|
|
4173
|
-
stepsCompleted: agentResult.stepsCompleted,
|
|
4174
|
-
durationMs: Date.now() - new Date(result.createdAt).getTime(),
|
|
4175
|
-
tokensUsed: agentResult.tokensUsed,
|
|
4176
|
-
costCents: estimateCost(model, agentResult.tokensUsed)
|
|
4177
|
-
});
|
|
4178
|
-
const eventType = agentResult.status === "passed" ? "scenario:pass" : "scenario:fail";
|
|
4179
|
-
emit({ type: eventType, scenarioId: scenario.id, scenarioName: scenario.name, resultId: result.id, runId });
|
|
4180
|
-
return updatedResult;
|
|
4181
|
-
} catch (error) {
|
|
4182
|
-
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
4183
|
-
const updatedResult = updateResult(result.id, {
|
|
4184
|
-
status: "error",
|
|
4185
|
-
error: errorMsg,
|
|
4186
|
-
durationMs: Date.now() - new Date(result.createdAt).getTime()
|
|
4187
|
-
});
|
|
4188
|
-
emit({ type: "scenario:error", scenarioId: scenario.id, scenarioName: scenario.name, error: errorMsg, runId });
|
|
4189
|
-
return updatedResult;
|
|
4190
|
-
} finally {
|
|
4191
|
-
if (browser)
|
|
4192
|
-
await closeBrowser(browser, options.engine);
|
|
4685
|
+
});
|
|
4686
|
+
var createStyler = (open, close, parent) => {
|
|
4687
|
+
let openAll;
|
|
4688
|
+
let closeAll;
|
|
4689
|
+
if (parent === undefined) {
|
|
4690
|
+
openAll = open;
|
|
4691
|
+
closeAll = close;
|
|
4692
|
+
} else {
|
|
4693
|
+
openAll = parent.openAll + open;
|
|
4694
|
+
closeAll = close + parent.closeAll;
|
|
4193
4695
|
}
|
|
4194
|
-
|
|
4195
|
-
|
|
4196
|
-
|
|
4197
|
-
|
|
4198
|
-
|
|
4199
|
-
|
|
4200
|
-
const flakinessThreshold = options.flakinessThreshold ?? 0.95;
|
|
4201
|
-
const run = createRun({
|
|
4202
|
-
url: options.url,
|
|
4203
|
-
model,
|
|
4204
|
-
headed: options.headed,
|
|
4205
|
-
parallel,
|
|
4206
|
-
projectId: options.projectId,
|
|
4207
|
-
samples,
|
|
4208
|
-
flakinessThreshold
|
|
4209
|
-
});
|
|
4210
|
-
updateRun(run.id, { status: "running", total: scenarios.length });
|
|
4211
|
-
let sortedScenarios = scenarios;
|
|
4212
|
-
try {
|
|
4213
|
-
const { topologicalSort: topologicalSort2 } = await Promise.resolve().then(() => (init_flows(), exports_flows));
|
|
4214
|
-
const scenarioIds = scenarios.map((s) => s.id);
|
|
4215
|
-
const sortedIds = topologicalSort2(scenarioIds);
|
|
4216
|
-
const scenarioMap = new Map(scenarios.map((s) => [s.id, s]));
|
|
4217
|
-
sortedScenarios = sortedIds.map((id) => scenarioMap.get(id)).filter((s) => s !== undefined);
|
|
4218
|
-
for (const s of scenarios) {
|
|
4219
|
-
if (!sortedIds.includes(s.id))
|
|
4220
|
-
sortedScenarios.push(s);
|
|
4221
|
-
}
|
|
4222
|
-
} catch {}
|
|
4223
|
-
const results = [];
|
|
4224
|
-
const failedScenarioIds = new Set;
|
|
4225
|
-
const canRun = async (scenario) => {
|
|
4226
|
-
try {
|
|
4227
|
-
const { getDependencies: getDependencies2 } = await Promise.resolve().then(() => (init_flows(), exports_flows));
|
|
4228
|
-
const deps = getDependencies2(scenario.id);
|
|
4229
|
-
for (const depId of deps) {
|
|
4230
|
-
if (failedScenarioIds.has(depId))
|
|
4231
|
-
return false;
|
|
4232
|
-
}
|
|
4233
|
-
} catch {}
|
|
4234
|
-
return true;
|
|
4696
|
+
return {
|
|
4697
|
+
open,
|
|
4698
|
+
close,
|
|
4699
|
+
openAll,
|
|
4700
|
+
closeAll,
|
|
4701
|
+
parent
|
|
4235
4702
|
};
|
|
4236
|
-
|
|
4237
|
-
|
|
4238
|
-
|
|
4239
|
-
|
|
4240
|
-
|
|
4241
|
-
|
|
4242
|
-
|
|
4243
|
-
|
|
4244
|
-
|
|
4245
|
-
|
|
4246
|
-
|
|
4247
|
-
|
|
4248
|
-
let attempt = 1;
|
|
4249
|
-
while ((result.status === "failed" || result.status === "error") && attempt <= maxRetries) {
|
|
4250
|
-
emit({ type: "scenario:start", scenarioId: scenario.id, scenarioName: scenario.name, runId: run.id, retryAttempt: attempt + 1, maxRetries: maxRetries + 1 });
|
|
4251
|
-
result = await runSingleScenario(scenario, run.id, options);
|
|
4252
|
-
attempt++;
|
|
4253
|
-
}
|
|
4254
|
-
if (samples > 1) {
|
|
4255
|
-
const sampleResults = [result];
|
|
4256
|
-
for (let s = 1;s < samples; s++) {
|
|
4257
|
-
emit({ type: "scenario:start", scenarioId: scenario.id, scenarioName: scenario.name, runId: run.id });
|
|
4258
|
-
const sampleResult = await runSingleScenario(scenario, run.id, options);
|
|
4259
|
-
sampleResults.push(sampleResult);
|
|
4260
|
-
}
|
|
4261
|
-
const passCount = sampleResults.filter((r) => r.status === "passed").length;
|
|
4262
|
-
const passRate = passCount / samples;
|
|
4263
|
-
if (passCount > 0 && passCount < samples && passRate < flakinessThreshold) {
|
|
4264
|
-
result = updateResult(result.id, {
|
|
4265
|
-
status: "flaky",
|
|
4266
|
-
reasoning: `Flaky: ${passCount}/${samples} samples passed (${Math.round(passRate * 100)}% pass rate, threshold ${Math.round(flakinessThreshold * 100)}%)`,
|
|
4267
|
-
metadata: { samples, passCount, passRate, sampleResultIds: sampleResults.map((r) => r.id) }
|
|
4268
|
-
});
|
|
4269
|
-
} else if (passCount === 0) {
|
|
4270
|
-
result = updateResult(result.id, {
|
|
4271
|
-
metadata: { samples, passCount, passRate, sampleResultIds: sampleResults.map((r) => r.id) }
|
|
4272
|
-
});
|
|
4273
|
-
} else if (passCount === samples) {
|
|
4274
|
-
result = updateResult(result.id, {
|
|
4275
|
-
metadata: { samples, passCount, passRate, sampleResultIds: sampleResults.map((r) => r.id) }
|
|
4276
|
-
});
|
|
4277
|
-
}
|
|
4278
|
-
}
|
|
4279
|
-
results.push(result);
|
|
4280
|
-
if (result.status === "failed" || result.status === "error" || result.status === "flaky") {
|
|
4281
|
-
failedScenarioIds.add(scenario.id);
|
|
4282
|
-
}
|
|
4283
|
-
}
|
|
4284
|
-
} else {
|
|
4285
|
-
const queue = [...sortedScenarios];
|
|
4286
|
-
const running = [];
|
|
4287
|
-
const processNext = async () => {
|
|
4288
|
-
const scenario = queue.shift();
|
|
4289
|
-
if (!scenario)
|
|
4290
|
-
return;
|
|
4291
|
-
if (!await canRun(scenario)) {
|
|
4292
|
-
const result2 = createResult({ runId: run.id, scenarioId: scenario.id, model, stepsTotal: 0 });
|
|
4293
|
-
const skipped = updateResult(result2.id, { status: "skipped", error: "Skipped: dependency failed" });
|
|
4294
|
-
results.push(skipped);
|
|
4295
|
-
failedScenarioIds.add(scenario.id);
|
|
4296
|
-
await processNext();
|
|
4297
|
-
return;
|
|
4298
|
-
}
|
|
4299
|
-
const result = await runSingleScenario(scenario, run.id, options);
|
|
4300
|
-
results.push(result);
|
|
4301
|
-
if (result.status === "failed" || result.status === "error") {
|
|
4302
|
-
failedScenarioIds.add(scenario.id);
|
|
4303
|
-
}
|
|
4304
|
-
await processNext();
|
|
4305
|
-
};
|
|
4306
|
-
const workers = Math.min(parallel, sortedScenarios.length);
|
|
4307
|
-
for (let i = 0;i < workers; i++) {
|
|
4308
|
-
running.push(processNext());
|
|
4309
|
-
}
|
|
4310
|
-
await Promise.all(running);
|
|
4703
|
+
};
|
|
4704
|
+
var createBuilder = (self, _styler, _isEmpty) => {
|
|
4705
|
+
const builder = (...arguments_) => applyStyle(builder, arguments_.length === 1 ? "" + arguments_[0] : arguments_.join(" "));
|
|
4706
|
+
Object.setPrototypeOf(builder, proto);
|
|
4707
|
+
builder[GENERATOR] = self;
|
|
4708
|
+
builder[STYLER] = _styler;
|
|
4709
|
+
builder[IS_EMPTY] = _isEmpty;
|
|
4710
|
+
return builder;
|
|
4711
|
+
};
|
|
4712
|
+
var applyStyle = (self, string) => {
|
|
4713
|
+
if (self.level <= 0 || !string) {
|
|
4714
|
+
return self[IS_EMPTY] ? "" : string;
|
|
4311
4715
|
}
|
|
4312
|
-
let
|
|
4313
|
-
if (
|
|
4314
|
-
|
|
4315
|
-
|
|
4316
|
-
|
|
4317
|
-
|
|
4318
|
-
|
|
4319
|
-
|
|
4320
|
-
|
|
4716
|
+
let styler = self[STYLER];
|
|
4717
|
+
if (styler === undefined) {
|
|
4718
|
+
return string;
|
|
4719
|
+
}
|
|
4720
|
+
const { openAll, closeAll } = styler;
|
|
4721
|
+
if (string.includes("\x1B")) {
|
|
4722
|
+
while (styler !== undefined) {
|
|
4723
|
+
string = stringReplaceAll(string, styler.close, styler.open);
|
|
4724
|
+
styler = styler.parent;
|
|
4321
4725
|
}
|
|
4322
4726
|
}
|
|
4323
|
-
const
|
|
4324
|
-
|
|
4325
|
-
|
|
4326
|
-
|
|
4327
|
-
status: finalStatus,
|
|
4328
|
-
passed,
|
|
4329
|
-
failed,
|
|
4330
|
-
total: scenarios.length,
|
|
4331
|
-
finished_at: new Date().toISOString()
|
|
4332
|
-
});
|
|
4333
|
-
emit({ type: "run:complete", runId: run.id });
|
|
4334
|
-
const eventType = finalRun.status === "failed" ? "failed" : "completed";
|
|
4335
|
-
dispatchWebhooks(eventType, finalRun).catch(() => {});
|
|
4336
|
-
if (finalRun.status === "failed") {
|
|
4337
|
-
const failedResults = results.filter((r) => r.status === "failed" || r.status === "error");
|
|
4338
|
-
pushFailedRunToLogs(finalRun, failedResults, scenarios).catch(() => {});
|
|
4339
|
-
createFailureTasks(finalRun, failedResults, scenarios).catch(() => {});
|
|
4340
|
-
notifyFailureToConversations(finalRun, failedResults, scenarios).catch(() => {});
|
|
4727
|
+
const lfIndex = string.indexOf(`
|
|
4728
|
+
`);
|
|
4729
|
+
if (lfIndex !== -1) {
|
|
4730
|
+
string = stringEncaseCRLFWithFirstIndex(string, closeAll, openAll, lfIndex);
|
|
4341
4731
|
}
|
|
4342
|
-
return
|
|
4343
|
-
}
|
|
4344
|
-
|
|
4345
|
-
|
|
4346
|
-
|
|
4347
|
-
|
|
4348
|
-
|
|
4349
|
-
|
|
4350
|
-
|
|
4351
|
-
|
|
4352
|
-
|
|
4353
|
-
|
|
4354
|
-
|
|
4732
|
+
return openAll + string + closeAll;
|
|
4733
|
+
};
|
|
4734
|
+
Object.defineProperties(createChalk.prototype, styles2);
|
|
4735
|
+
var chalk = createChalk();
|
|
4736
|
+
var chalkStderr = createChalk({ level: stderrColor ? stderrColor.level : 0 });
|
|
4737
|
+
var source_default = chalk;
|
|
4738
|
+
|
|
4739
|
+
// src/lib/costs.ts
|
|
4740
|
+
init_database();
|
|
4741
|
+
init_config();
|
|
4742
|
+
function getDateFilter(period) {
|
|
4743
|
+
switch (period) {
|
|
4744
|
+
case "day":
|
|
4745
|
+
return "AND r.created_at >= date('now', 'start of day')";
|
|
4746
|
+
case "week":
|
|
4747
|
+
return "AND r.created_at >= date('now', '-7 days')";
|
|
4748
|
+
case "month":
|
|
4749
|
+
return "AND r.created_at >= date('now', '-30 days')";
|
|
4750
|
+
case "all":
|
|
4751
|
+
return "";
|
|
4355
4752
|
}
|
|
4356
|
-
|
|
4357
|
-
|
|
4358
|
-
|
|
4359
|
-
|
|
4360
|
-
|
|
4361
|
-
|
|
4753
|
+
}
|
|
4754
|
+
function getPeriodDays(period) {
|
|
4755
|
+
switch (period) {
|
|
4756
|
+
case "day":
|
|
4757
|
+
return 1;
|
|
4758
|
+
case "week":
|
|
4759
|
+
return 7;
|
|
4760
|
+
case "month":
|
|
4761
|
+
return 30;
|
|
4762
|
+
case "all":
|
|
4763
|
+
return 30;
|
|
4362
4764
|
}
|
|
4363
|
-
return runBatch(scenarios, options);
|
|
4364
4765
|
}
|
|
4365
|
-
function
|
|
4766
|
+
function loadBudgetConfig() {
|
|
4366
4767
|
const config = loadConfig();
|
|
4367
|
-
const
|
|
4368
|
-
|
|
4369
|
-
|
|
4370
|
-
|
|
4371
|
-
|
|
4372
|
-
}
|
|
4373
|
-
|
|
4374
|
-
|
|
4375
|
-
|
|
4376
|
-
|
|
4377
|
-
|
|
4768
|
+
const budget = config.budget;
|
|
4769
|
+
return {
|
|
4770
|
+
maxPerRunCents: budget?.maxPerRunCents ?? 50,
|
|
4771
|
+
maxPerDayCents: budget?.maxPerDayCents ?? 500,
|
|
4772
|
+
warnAtPercent: budget?.warnAtPercent ?? 0.8
|
|
4773
|
+
};
|
|
4774
|
+
}
|
|
4775
|
+
function getCostSummary(options) {
|
|
4776
|
+
const db2 = getDatabase();
|
|
4777
|
+
const period = options?.period ?? "month";
|
|
4778
|
+
const projectId = options?.projectId;
|
|
4779
|
+
const dateFilter = getDateFilter(period);
|
|
4780
|
+
const projectFilter = projectId ? "AND ru.project_id = ?" : "";
|
|
4781
|
+
const projectParams = projectId ? [projectId] : [];
|
|
4782
|
+
const totalsRow = db2.query(`SELECT
|
|
4783
|
+
COALESCE(SUM(r.cost_cents), 0) as total_cost,
|
|
4784
|
+
COALESCE(SUM(r.tokens_used), 0) as total_tokens,
|
|
4785
|
+
COUNT(DISTINCT r.run_id) as run_count
|
|
4786
|
+
FROM results r
|
|
4787
|
+
JOIN runs ru ON r.run_id = ru.id
|
|
4788
|
+
WHERE 1=1 ${dateFilter} ${projectFilter}`).get(...projectParams);
|
|
4789
|
+
const modelRows = db2.query(`SELECT
|
|
4790
|
+
r.model,
|
|
4791
|
+
COALESCE(SUM(r.cost_cents), 0) as cost_cents,
|
|
4792
|
+
COALESCE(SUM(r.tokens_used), 0) as tokens,
|
|
4793
|
+
COUNT(DISTINCT r.run_id) as runs
|
|
4794
|
+
FROM results r
|
|
4795
|
+
JOIN runs ru ON r.run_id = ru.id
|
|
4796
|
+
WHERE 1=1 ${dateFilter} ${projectFilter}
|
|
4797
|
+
GROUP BY r.model
|
|
4798
|
+
ORDER BY cost_cents DESC`).all(...projectParams);
|
|
4799
|
+
const byModel = {};
|
|
4800
|
+
for (const row of modelRows) {
|
|
4801
|
+
byModel[row.model] = {
|
|
4802
|
+
costCents: row.cost_cents,
|
|
4803
|
+
tokens: row.tokens,
|
|
4804
|
+
runs: row.runs
|
|
4805
|
+
};
|
|
4378
4806
|
}
|
|
4379
|
-
const
|
|
4380
|
-
|
|
4381
|
-
|
|
4382
|
-
|
|
4383
|
-
|
|
4384
|
-
|
|
4385
|
-
|
|
4386
|
-
|
|
4387
|
-
|
|
4388
|
-
|
|
4389
|
-
|
|
4807
|
+
const scenarioRows = db2.query(`SELECT
|
|
4808
|
+
r.scenario_id,
|
|
4809
|
+
COALESCE(s.name, r.scenario_id) as name,
|
|
4810
|
+
COALESCE(SUM(r.cost_cents), 0) as cost_cents,
|
|
4811
|
+
COALESCE(SUM(r.tokens_used), 0) as tokens,
|
|
4812
|
+
COUNT(DISTINCT r.run_id) as runs
|
|
4813
|
+
FROM results r
|
|
4814
|
+
JOIN runs ru ON r.run_id = ru.id
|
|
4815
|
+
LEFT JOIN scenarios s ON r.scenario_id = s.id
|
|
4816
|
+
WHERE 1=1 ${dateFilter} ${projectFilter}
|
|
4817
|
+
GROUP BY r.scenario_id
|
|
4818
|
+
ORDER BY cost_cents DESC
|
|
4819
|
+
LIMIT 10`).all(...projectParams);
|
|
4820
|
+
const byScenario = scenarioRows.map((row) => ({
|
|
4821
|
+
scenarioId: row.scenario_id,
|
|
4822
|
+
name: row.name,
|
|
4823
|
+
costCents: row.cost_cents,
|
|
4824
|
+
tokens: row.tokens,
|
|
4825
|
+
runs: row.runs
|
|
4826
|
+
}));
|
|
4827
|
+
const runCount = totalsRow.run_count;
|
|
4828
|
+
const avgCostPerRun = runCount > 0 ? totalsRow.total_cost / runCount : 0;
|
|
4829
|
+
const periodDays = getPeriodDays(period);
|
|
4830
|
+
const estimatedMonthlyCents = periodDays > 0 ? totalsRow.total_cost / periodDays * 30 : 0;
|
|
4831
|
+
return {
|
|
4832
|
+
period,
|
|
4833
|
+
totalCostCents: totalsRow.total_cost,
|
|
4834
|
+
totalTokens: totalsRow.total_tokens,
|
|
4835
|
+
runCount,
|
|
4836
|
+
byModel,
|
|
4837
|
+
byScenario,
|
|
4838
|
+
avgCostPerRun,
|
|
4839
|
+
estimatedMonthlyCents
|
|
4840
|
+
};
|
|
4841
|
+
}
|
|
4842
|
+
var COST_PER_SCENARIO_CENTS = {
|
|
4843
|
+
haiku: 5,
|
|
4844
|
+
sonnet: 30,
|
|
4845
|
+
opus: 150,
|
|
4846
|
+
"claude-haiku": 5,
|
|
4847
|
+
"claude-sonnet": 30,
|
|
4848
|
+
"claude-opus": 150,
|
|
4849
|
+
"gpt-4o-mini": 3,
|
|
4850
|
+
"gpt-4o": 25,
|
|
4851
|
+
"gemini-2.0-flash": 2,
|
|
4852
|
+
"gemini-1.5-pro": 20,
|
|
4853
|
+
"llama-3.1-8b": 1,
|
|
4854
|
+
"llama-3.3-70b": 3
|
|
4855
|
+
};
|
|
4856
|
+
function modelToCostKey(model) {
|
|
4857
|
+
const exact = COST_PER_SCENARIO_CENTS[model];
|
|
4858
|
+
if (exact !== undefined)
|
|
4859
|
+
return exact;
|
|
4860
|
+
const lower = model.toLowerCase();
|
|
4861
|
+
if (lower.includes("opus"))
|
|
4862
|
+
return COST_PER_SCENARIO_CENTS["opus"];
|
|
4863
|
+
if (lower.includes("sonnet"))
|
|
4864
|
+
return COST_PER_SCENARIO_CENTS["sonnet"];
|
|
4865
|
+
if (lower.includes("haiku"))
|
|
4866
|
+
return COST_PER_SCENARIO_CENTS["haiku"];
|
|
4867
|
+
if (lower.includes("gpt-4o-mini"))
|
|
4868
|
+
return COST_PER_SCENARIO_CENTS["gpt-4o-mini"];
|
|
4869
|
+
if (lower.includes("gpt-4o"))
|
|
4870
|
+
return COST_PER_SCENARIO_CENTS["gpt-4o"];
|
|
4871
|
+
if (lower.includes("gemini-2.0-flash") || lower.includes("gemini-flash"))
|
|
4872
|
+
return COST_PER_SCENARIO_CENTS["gemini-2.0-flash"];
|
|
4873
|
+
if (lower.includes("gemini-1.5-pro") || lower.includes("gemini-pro"))
|
|
4874
|
+
return COST_PER_SCENARIO_CENTS["gemini-1.5-pro"];
|
|
4875
|
+
if (lower.includes("llama-3.3") || lower.includes("llama3.3"))
|
|
4876
|
+
return COST_PER_SCENARIO_CENTS["llama-3.3-70b"];
|
|
4877
|
+
if (lower.includes("llama"))
|
|
4878
|
+
return COST_PER_SCENARIO_CENTS["llama-3.1-8b"];
|
|
4879
|
+
return 10;
|
|
4880
|
+
}
|
|
4881
|
+
function estimateRunCostCents(scenarioCount, model, samples = 1) {
|
|
4882
|
+
const costPerScenario = modelToCostKey(model);
|
|
4883
|
+
return scenarioCount * costPerScenario * Math.max(1, samples);
|
|
4884
|
+
}
|
|
4885
|
+
function checkBudget(estimatedCostCents) {
|
|
4886
|
+
const budget = loadBudgetConfig();
|
|
4887
|
+
if (estimatedCostCents > budget.maxPerRunCents) {
|
|
4888
|
+
return {
|
|
4889
|
+
allowed: false,
|
|
4890
|
+
warning: `Estimated cost (${formatDollars(estimatedCostCents)}) exceeds per-run limit (${formatDollars(budget.maxPerRunCents)})`
|
|
4891
|
+
};
|
|
4390
4892
|
}
|
|
4391
|
-
|
|
4392
|
-
|
|
4393
|
-
|
|
4394
|
-
|
|
4395
|
-
|
|
4396
|
-
|
|
4397
|
-
|
|
4398
|
-
|
|
4399
|
-
|
|
4400
|
-
|
|
4401
|
-
|
|
4402
|
-
|
|
4403
|
-
|
|
4404
|
-
|
|
4405
|
-
|
|
4406
|
-
return;
|
|
4407
|
-
const result = await runSingleScenario(scenario, run.id, options);
|
|
4408
|
-
results.push(result);
|
|
4409
|
-
await processNext();
|
|
4410
|
-
};
|
|
4411
|
-
const workers = Math.min(parallel, scenarios.length);
|
|
4412
|
-
for (let i = 0;i < workers; i++) {
|
|
4413
|
-
running.push(processNext());
|
|
4414
|
-
}
|
|
4415
|
-
await Promise.all(running);
|
|
4416
|
-
}
|
|
4417
|
-
const passed = results.filter((r) => r.status === "passed").length;
|
|
4418
|
-
const failed = results.filter((r) => r.status === "failed" || r.status === "error").length;
|
|
4419
|
-
updateRun(run.id, {
|
|
4420
|
-
status: failed > 0 ? "failed" : "passed",
|
|
4421
|
-
passed,
|
|
4422
|
-
failed,
|
|
4423
|
-
total: scenarios.length,
|
|
4424
|
-
finished_at: new Date().toISOString()
|
|
4425
|
-
});
|
|
4426
|
-
emit({ type: "run:complete", runId: run.id });
|
|
4427
|
-
const asyncRun = getRun(run.id);
|
|
4428
|
-
if (asyncRun)
|
|
4429
|
-
dispatchWebhooks(asyncRun.status === "failed" ? "failed" : "completed", asyncRun).catch(() => {});
|
|
4430
|
-
} catch (error) {
|
|
4431
|
-
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
4432
|
-
updateRun(run.id, {
|
|
4433
|
-
status: "failed",
|
|
4434
|
-
finished_at: new Date().toISOString()
|
|
4435
|
-
});
|
|
4436
|
-
emit({ type: "run:complete", runId: run.id, error: errorMsg });
|
|
4437
|
-
const failedRun = getRun(run.id);
|
|
4438
|
-
if (failedRun)
|
|
4439
|
-
dispatchWebhooks("failed", failedRun).catch(() => {});
|
|
4440
|
-
}
|
|
4441
|
-
})();
|
|
4442
|
-
return { runId: run.id, scenarioCount: scenarios.length };
|
|
4893
|
+
const todaySummary = getCostSummary({ period: "day" });
|
|
4894
|
+
const projectedDaily = todaySummary.totalCostCents + estimatedCostCents;
|
|
4895
|
+
if (projectedDaily > budget.maxPerDayCents) {
|
|
4896
|
+
return {
|
|
4897
|
+
allowed: false,
|
|
4898
|
+
warning: `Daily spending (${formatDollars(todaySummary.totalCostCents)}) + this run (${formatDollars(estimatedCostCents)}) would exceed daily limit (${formatDollars(budget.maxPerDayCents)})`
|
|
4899
|
+
};
|
|
4900
|
+
}
|
|
4901
|
+
if (projectedDaily > budget.maxPerDayCents * budget.warnAtPercent) {
|
|
4902
|
+
return {
|
|
4903
|
+
allowed: true,
|
|
4904
|
+
warning: `Approaching daily limit: ${formatDollars(projectedDaily)} of ${formatDollars(budget.maxPerDayCents)} (${Math.round(projectedDaily / budget.maxPerDayCents * 100)}%)`
|
|
4905
|
+
};
|
|
4906
|
+
}
|
|
4907
|
+
return { allowed: true };
|
|
4443
4908
|
}
|
|
4444
|
-
function
|
|
4445
|
-
|
|
4446
|
-
"claude-haiku-4-5-20251001": 0.1,
|
|
4447
|
-
"claude-sonnet-4-6-20260311": 0.9,
|
|
4448
|
-
"claude-opus-4-6-20260311": 3
|
|
4449
|
-
};
|
|
4450
|
-
const costPer1M = costs[model] ?? 0.5;
|
|
4451
|
-
return tokens / 1e6 * costPer1M * 100;
|
|
4909
|
+
function formatDollars(cents) {
|
|
4910
|
+
return `$${(cents / 100).toFixed(2)}`;
|
|
4452
4911
|
}
|
|
4453
|
-
|
|
4454
|
-
|
|
4455
|
-
|
|
4456
|
-
|
|
4457
|
-
|
|
4458
|
-
|
|
4459
|
-
|
|
4460
|
-
|
|
4461
|
-
|
|
4462
|
-
|
|
4463
|
-
|
|
4464
|
-
|
|
4465
|
-
|
|
4466
|
-
|
|
4467
|
-
|
|
4468
|
-
|
|
4469
|
-
|
|
4470
|
-
|
|
4471
|
-
|
|
4472
|
-
|
|
4473
|
-
|
|
4474
|
-
|
|
4475
|
-
|
|
4476
|
-
|
|
4477
|
-
cyan: [36, 39],
|
|
4478
|
-
white: [37, 39],
|
|
4479
|
-
blackBright: [90, 39],
|
|
4480
|
-
gray: [90, 39],
|
|
4481
|
-
grey: [90, 39],
|
|
4482
|
-
redBright: [91, 39],
|
|
4483
|
-
greenBright: [92, 39],
|
|
4484
|
-
yellowBright: [93, 39],
|
|
4485
|
-
blueBright: [94, 39],
|
|
4486
|
-
magentaBright: [95, 39],
|
|
4487
|
-
cyanBright: [96, 39],
|
|
4488
|
-
whiteBright: [97, 39]
|
|
4489
|
-
},
|
|
4490
|
-
bgColor: {
|
|
4491
|
-
bgBlack: [40, 49],
|
|
4492
|
-
bgRed: [41, 49],
|
|
4493
|
-
bgGreen: [42, 49],
|
|
4494
|
-
bgYellow: [43, 49],
|
|
4495
|
-
bgBlue: [44, 49],
|
|
4496
|
-
bgMagenta: [45, 49],
|
|
4497
|
-
bgCyan: [46, 49],
|
|
4498
|
-
bgWhite: [47, 49],
|
|
4499
|
-
bgBlackBright: [100, 49],
|
|
4500
|
-
bgGray: [100, 49],
|
|
4501
|
-
bgGrey: [100, 49],
|
|
4502
|
-
bgRedBright: [101, 49],
|
|
4503
|
-
bgGreenBright: [102, 49],
|
|
4504
|
-
bgYellowBright: [103, 49],
|
|
4505
|
-
bgBlueBright: [104, 49],
|
|
4506
|
-
bgMagentaBright: [105, 49],
|
|
4507
|
-
bgCyanBright: [106, 49],
|
|
4508
|
-
bgWhiteBright: [107, 49]
|
|
4912
|
+
function formatTokens(tokens) {
|
|
4913
|
+
if (tokens >= 1e6)
|
|
4914
|
+
return `${(tokens / 1e6).toFixed(1)}M`;
|
|
4915
|
+
if (tokens >= 1000)
|
|
4916
|
+
return `${(tokens / 1000).toFixed(1)}K`;
|
|
4917
|
+
return String(tokens);
|
|
4918
|
+
}
|
|
4919
|
+
function formatCostsTerminal(summary) {
|
|
4920
|
+
const lines = [];
|
|
4921
|
+
lines.push("");
|
|
4922
|
+
lines.push(source_default.bold(` Cost Summary (${summary.period})`));
|
|
4923
|
+
lines.push("");
|
|
4924
|
+
lines.push(` Total: ${source_default.yellow(formatDollars(summary.totalCostCents))} (${formatTokens(summary.totalTokens)} tokens across ${summary.runCount} runs)`);
|
|
4925
|
+
lines.push(` Avg/run: ${source_default.yellow(formatDollars(summary.avgCostPerRun))}`);
|
|
4926
|
+
lines.push(` Est/month: ${source_default.yellow(formatDollars(summary.estimatedMonthlyCents))}`);
|
|
4927
|
+
const modelEntries = Object.entries(summary.byModel);
|
|
4928
|
+
if (modelEntries.length > 0) {
|
|
4929
|
+
lines.push("");
|
|
4930
|
+
lines.push(source_default.bold(" By Model"));
|
|
4931
|
+
lines.push(` ${"Model".padEnd(40)} ${"Cost".padEnd(12)} ${"Tokens".padEnd(12)} Runs`);
|
|
4932
|
+
lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(12)} ${"\u2500".repeat(12)} ${"\u2500".repeat(6)}`);
|
|
4933
|
+
for (const [model, data] of modelEntries) {
|
|
4934
|
+
lines.push(` ${model.padEnd(40)} ${formatDollars(data.costCents).padEnd(12)} ${formatTokens(data.tokens).padEnd(12)} ${data.runs}`);
|
|
4935
|
+
}
|
|
4509
4936
|
}
|
|
4510
|
-
|
|
4511
|
-
|
|
4512
|
-
|
|
4513
|
-
|
|
4514
|
-
|
|
4515
|
-
|
|
4516
|
-
|
|
4517
|
-
|
|
4518
|
-
|
|
4519
|
-
styles[styleName] = {
|
|
4520
|
-
open: `\x1B[${style[0]}m`,
|
|
4521
|
-
close: `\x1B[${style[1]}m`
|
|
4522
|
-
};
|
|
4523
|
-
group[styleName] = styles[styleName];
|
|
4524
|
-
codes.set(style[0], style[1]);
|
|
4937
|
+
if (summary.byScenario.length > 0) {
|
|
4938
|
+
lines.push("");
|
|
4939
|
+
lines.push(source_default.bold(" Scenarios by Cost (most expensive first)"));
|
|
4940
|
+
lines.push(` ${"Scenario".padEnd(40)} ${"Total Cost".padEnd(12)} ${"Avg/Run".padEnd(12)} ${"Runs".padEnd(6)} Tokens`);
|
|
4941
|
+
lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(12)} ${"\u2500".repeat(12)} ${"\u2500".repeat(6)} ${"\u2500".repeat(10)}`);
|
|
4942
|
+
for (const s of summary.byScenario) {
|
|
4943
|
+
const label = s.name.length > 38 ? s.name.slice(0, 35) + "..." : s.name;
|
|
4944
|
+
const avgPerRun = s.runs > 0 ? s.costCents / s.runs : 0;
|
|
4945
|
+
lines.push(` ${label.padEnd(40)} ${formatDollars(s.costCents).padEnd(12)} ${formatDollars(avgPerRun).padEnd(12)} ${String(s.runs).padEnd(6)} ${formatTokens(s.tokens)}`);
|
|
4525
4946
|
}
|
|
4526
|
-
Object.defineProperty(styles, groupName, {
|
|
4527
|
-
value: group,
|
|
4528
|
-
enumerable: false
|
|
4529
|
-
});
|
|
4530
4947
|
}
|
|
4531
|
-
|
|
4532
|
-
|
|
4533
|
-
|
|
4534
|
-
|
|
4535
|
-
|
|
4536
|
-
|
|
4537
|
-
|
|
4538
|
-
|
|
4539
|
-
|
|
4540
|
-
|
|
4541
|
-
|
|
4542
|
-
|
|
4543
|
-
|
|
4544
|
-
|
|
4545
|
-
|
|
4546
|
-
|
|
4547
|
-
|
|
4548
|
-
|
|
4549
|
-
|
|
4550
|
-
|
|
4551
|
-
|
|
4948
|
+
lines.push("");
|
|
4949
|
+
return lines.join(`
|
|
4950
|
+
`);
|
|
4951
|
+
}
|
|
4952
|
+
function formatCostsJSON(summary) {
|
|
4953
|
+
return JSON.stringify(summary, null, 2);
|
|
4954
|
+
}
|
|
4955
|
+
|
|
4956
|
+
// src/db/personas.ts
|
|
4957
|
+
init_types();
|
|
4958
|
+
init_database();
|
|
4959
|
+
function getPersona(id) {
|
|
4960
|
+
const db2 = getDatabase();
|
|
4961
|
+
let row = db2.query("SELECT * FROM personas WHERE id = ?").get(id);
|
|
4962
|
+
if (row)
|
|
4963
|
+
return personaFromRow(row);
|
|
4964
|
+
row = db2.query("SELECT * FROM personas WHERE short_id = ?").get(id);
|
|
4965
|
+
if (row)
|
|
4966
|
+
return personaFromRow(row);
|
|
4967
|
+
return null;
|
|
4968
|
+
}
|
|
4969
|
+
|
|
4970
|
+
// src/lib/runner.ts
|
|
4971
|
+
init_browser();
|
|
4972
|
+
init_ai_client();
|
|
4973
|
+
init_config();
|
|
4974
|
+
|
|
4975
|
+
// src/lib/webhooks.ts
|
|
4976
|
+
init_database();
|
|
4977
|
+
function fromRow(row) {
|
|
4978
|
+
return {
|
|
4979
|
+
id: row.id,
|
|
4980
|
+
url: row.url,
|
|
4981
|
+
events: JSON.parse(row.events),
|
|
4982
|
+
projectId: row.project_id,
|
|
4983
|
+
secret: row.secret,
|
|
4984
|
+
active: row.active === 1,
|
|
4985
|
+
createdAt: row.created_at
|
|
4986
|
+
};
|
|
4987
|
+
}
|
|
4988
|
+
function createWebhook(input) {
|
|
4989
|
+
const db2 = getDatabase();
|
|
4990
|
+
const id = uuid();
|
|
4991
|
+
const events = input.events ?? ["failed"];
|
|
4992
|
+
const secret = input.secret ?? crypto.randomUUID().replace(/-/g, "");
|
|
4993
|
+
db2.query(`
|
|
4994
|
+
INSERT INTO webhooks (id, url, events, project_id, secret, active, created_at)
|
|
4995
|
+
VALUES (?, ?, ?, ?, ?, 1, ?)
|
|
4996
|
+
`).run(id, input.url, JSON.stringify(events), input.projectId ?? null, secret, now());
|
|
4997
|
+
return getWebhook(id);
|
|
4998
|
+
}
|
|
4999
|
+
function getWebhook(id) {
|
|
5000
|
+
const db2 = getDatabase();
|
|
5001
|
+
const row = db2.query("SELECT * FROM webhooks WHERE id = ?").get(id);
|
|
5002
|
+
if (!row) {
|
|
5003
|
+
const rows = db2.query("SELECT * FROM webhooks WHERE id LIKE ? || '%'").all(id);
|
|
5004
|
+
if (rows.length === 1)
|
|
5005
|
+
return fromRow(rows[0]);
|
|
5006
|
+
return null;
|
|
5007
|
+
}
|
|
5008
|
+
return fromRow(row);
|
|
5009
|
+
}
|
|
5010
|
+
function listWebhooks(projectId) {
|
|
5011
|
+
const db2 = getDatabase();
|
|
5012
|
+
let query = "SELECT * FROM webhooks WHERE active = 1";
|
|
5013
|
+
const params = [];
|
|
5014
|
+
if (projectId) {
|
|
5015
|
+
query += " AND (project_id = ? OR project_id IS NULL)";
|
|
5016
|
+
params.push(projectId);
|
|
5017
|
+
}
|
|
5018
|
+
query += " ORDER BY created_at DESC";
|
|
5019
|
+
const rows = db2.query(query).all(...params);
|
|
5020
|
+
return rows.map(fromRow);
|
|
5021
|
+
}
|
|
5022
|
+
function deleteWebhook(id) {
|
|
5023
|
+
const db2 = getDatabase();
|
|
5024
|
+
const webhook = getWebhook(id);
|
|
5025
|
+
if (!webhook)
|
|
5026
|
+
return false;
|
|
5027
|
+
db2.query("DELETE FROM webhooks WHERE id = ?").run(webhook.id);
|
|
5028
|
+
return true;
|
|
5029
|
+
}
|
|
5030
|
+
function signPayload(body, secret) {
|
|
5031
|
+
const encoder = new TextEncoder;
|
|
5032
|
+
const key = encoder.encode(secret);
|
|
5033
|
+
const data = encoder.encode(body);
|
|
5034
|
+
let hash = 0;
|
|
5035
|
+
for (let i = 0;i < data.length; i++) {
|
|
5036
|
+
hash = (hash << 5) - hash + data[i] + (key[i % key.length] ?? 0) | 0;
|
|
5037
|
+
}
|
|
5038
|
+
return `sha256=${Math.abs(hash).toString(16).padStart(16, "0")}`;
|
|
5039
|
+
}
|
|
5040
|
+
function formatSlackPayload(payload) {
|
|
5041
|
+
const status = payload.run.status === "passed" ? ":white_check_mark:" : ":x:";
|
|
5042
|
+
const color = payload.run.status === "passed" ? "#22c55e" : "#ef4444";
|
|
5043
|
+
return {
|
|
5044
|
+
attachments: [
|
|
5045
|
+
{
|
|
5046
|
+
color,
|
|
5047
|
+
blocks: [
|
|
5048
|
+
{
|
|
5049
|
+
type: "section",
|
|
5050
|
+
text: {
|
|
5051
|
+
type: "mrkdwn",
|
|
5052
|
+
text: `${status} *Test Run ${payload.run.status.toUpperCase()}*
|
|
5053
|
+
` + `URL: ${payload.run.url}
|
|
5054
|
+
` + `Results: ${payload.run.passed}/${payload.run.total} passed` + (payload.run.failed > 0 ? ` (${payload.run.failed} failed)` : "") + (payload.schedule ? `
|
|
5055
|
+
Schedule: ${payload.schedule.name}` : "")
|
|
5056
|
+
}
|
|
4552
5057
|
}
|
|
4553
|
-
|
|
4554
|
-
|
|
4555
|
-
|
|
4556
|
-
|
|
4557
|
-
|
|
4558
|
-
|
|
4559
|
-
|
|
4560
|
-
|
|
4561
|
-
|
|
4562
|
-
|
|
4563
|
-
|
|
4564
|
-
|
|
4565
|
-
|
|
4566
|
-
|
|
4567
|
-
|
|
4568
|
-
|
|
4569
|
-
const integer = Number.parseInt(colorString, 16);
|
|
4570
|
-
return [
|
|
4571
|
-
integer >> 16 & 255,
|
|
4572
|
-
integer >> 8 & 255,
|
|
4573
|
-
integer & 255
|
|
4574
|
-
];
|
|
4575
|
-
},
|
|
4576
|
-
enumerable: false
|
|
4577
|
-
},
|
|
4578
|
-
hexToAnsi256: {
|
|
4579
|
-
value: (hex) => styles.rgbToAnsi256(...styles.hexToRgb(hex)),
|
|
4580
|
-
enumerable: false
|
|
5058
|
+
]
|
|
5059
|
+
}
|
|
5060
|
+
]
|
|
5061
|
+
};
|
|
5062
|
+
}
|
|
5063
|
+
async function dispatchWebhooks(event, run, schedule) {
|
|
5064
|
+
const webhooks = listWebhooks(run.projectId ?? undefined);
|
|
5065
|
+
const payload = {
|
|
5066
|
+
event,
|
|
5067
|
+
run: {
|
|
5068
|
+
id: run.id,
|
|
5069
|
+
url: run.url,
|
|
5070
|
+
status: run.status,
|
|
5071
|
+
passed: run.passed,
|
|
5072
|
+
failed: run.failed,
|
|
5073
|
+
total: run.total
|
|
4581
5074
|
},
|
|
4582
|
-
|
|
4583
|
-
|
|
4584
|
-
|
|
4585
|
-
|
|
4586
|
-
|
|
4587
|
-
|
|
4588
|
-
|
|
4589
|
-
|
|
4590
|
-
|
|
4591
|
-
|
|
4592
|
-
|
|
4593
|
-
|
|
4594
|
-
|
|
4595
|
-
|
|
4596
|
-
|
|
4597
|
-
|
|
4598
|
-
|
|
4599
|
-
|
|
4600
|
-
|
|
4601
|
-
|
|
4602
|
-
|
|
4603
|
-
|
|
4604
|
-
|
|
4605
|
-
|
|
4606
|
-
|
|
4607
|
-
|
|
4608
|
-
|
|
4609
|
-
|
|
4610
|
-
|
|
4611
|
-
|
|
4612
|
-
|
|
5075
|
+
schedule,
|
|
5076
|
+
timestamp: new Date().toISOString()
|
|
5077
|
+
};
|
|
5078
|
+
for (const webhook of webhooks) {
|
|
5079
|
+
if (!webhook.events.includes(event) && !webhook.events.includes("*"))
|
|
5080
|
+
continue;
|
|
5081
|
+
const isSlack = webhook.url.includes("hooks.slack.com");
|
|
5082
|
+
const body = isSlack ? JSON.stringify(formatSlackPayload(payload)) : JSON.stringify(payload);
|
|
5083
|
+
const headers = {
|
|
5084
|
+
"Content-Type": "application/json"
|
|
5085
|
+
};
|
|
5086
|
+
if (webhook.secret) {
|
|
5087
|
+
headers["X-Testers-Signature"] = signPayload(body, webhook.secret);
|
|
5088
|
+
}
|
|
5089
|
+
try {
|
|
5090
|
+
const response = await fetch(webhook.url, {
|
|
5091
|
+
method: "POST",
|
|
5092
|
+
headers,
|
|
5093
|
+
body
|
|
5094
|
+
});
|
|
5095
|
+
if (!response.ok) {
|
|
5096
|
+
await new Promise((r) => setTimeout(r, 5000));
|
|
5097
|
+
await fetch(webhook.url, { method: "POST", headers, body });
|
|
5098
|
+
}
|
|
5099
|
+
} catch {}
|
|
5100
|
+
}
|
|
5101
|
+
}
|
|
5102
|
+
async function testWebhook(id) {
|
|
5103
|
+
const webhook = getWebhook(id);
|
|
5104
|
+
if (!webhook)
|
|
5105
|
+
return false;
|
|
5106
|
+
const testPayload = {
|
|
5107
|
+
event: "test",
|
|
5108
|
+
run: { id: "test-run", url: "http://localhost:3000", status: "passed", passed: 3, failed: 0, total: 3 },
|
|
5109
|
+
timestamp: new Date().toISOString()
|
|
5110
|
+
};
|
|
5111
|
+
try {
|
|
5112
|
+
const body = JSON.stringify(testPayload);
|
|
5113
|
+
const response = await fetch(webhook.url, {
|
|
5114
|
+
method: "POST",
|
|
5115
|
+
headers: {
|
|
5116
|
+
"Content-Type": "application/json",
|
|
5117
|
+
...webhook.secret ? { "X-Testers-Signature": signPayload(body, webhook.secret) } : {}
|
|
4613
5118
|
},
|
|
4614
|
-
|
|
4615
|
-
}
|
|
4616
|
-
|
|
4617
|
-
|
|
4618
|
-
|
|
4619
|
-
|
|
4620
|
-
|
|
4621
|
-
|
|
4622
|
-
|
|
5119
|
+
body
|
|
5120
|
+
});
|
|
5121
|
+
return response.ok;
|
|
5122
|
+
} catch {
|
|
5123
|
+
return false;
|
|
5124
|
+
}
|
|
5125
|
+
}
|
|
5126
|
+
|
|
5127
|
+
// src/lib/logs-integration.ts
|
|
5128
|
+
async function pushFailedRunToLogs(run, failedResults, scenarios) {
|
|
5129
|
+
const logsUrl = process.env.LOGS_URL;
|
|
5130
|
+
if (!logsUrl)
|
|
5131
|
+
return;
|
|
5132
|
+
const scenarioMap = new Map(scenarios.map((s) => [s.id, s]));
|
|
5133
|
+
const entries = failedResults.map((result) => {
|
|
5134
|
+
const scenario = scenarioMap.get(result.scenarioId);
|
|
5135
|
+
return {
|
|
5136
|
+
level: "error",
|
|
5137
|
+
source: "sdk",
|
|
5138
|
+
service: "testers",
|
|
5139
|
+
message: `[testers] Scenario failed: ${scenario?.name ?? result.scenarioId}${result.error ? ` \u2014 ${result.error}` : ""}`,
|
|
5140
|
+
metadata: {
|
|
5141
|
+
run_id: run.id,
|
|
5142
|
+
scenario_id: result.scenarioId,
|
|
5143
|
+
scenario_name: scenario?.name,
|
|
5144
|
+
url: run.url,
|
|
5145
|
+
status: result.status,
|
|
5146
|
+
duration_ms: result.durationMs
|
|
5147
|
+
}
|
|
5148
|
+
};
|
|
5149
|
+
});
|
|
5150
|
+
try {
|
|
5151
|
+
await fetch(`${logsUrl.replace(/\/$/, "")}/api/logs`, {
|
|
5152
|
+
method: "POST",
|
|
5153
|
+
headers: { "Content-Type": "application/json" },
|
|
5154
|
+
body: JSON.stringify(entries)
|
|
5155
|
+
});
|
|
5156
|
+
} catch {}
|
|
5157
|
+
}
|
|
5158
|
+
|
|
5159
|
+
// src/lib/todos-connector.ts
|
|
5160
|
+
import { Database as Database2 } from "bun:sqlite";
|
|
5161
|
+
import { existsSync as existsSync4 } from "fs";
|
|
5162
|
+
import { join as join5 } from "path";
|
|
5163
|
+
import { homedir as homedir5 } from "os";
|
|
5164
|
+
init_types();
|
|
5165
|
+
function resolveTodosDbPath() {
|
|
5166
|
+
const envPath = process.env["TODOS_DB_PATH"];
|
|
5167
|
+
if (envPath)
|
|
5168
|
+
return envPath;
|
|
5169
|
+
return join5(homedir5(), ".todos", "todos.db");
|
|
5170
|
+
}
|
|
5171
|
+
function connectToTodos() {
|
|
5172
|
+
const dbPath = resolveTodosDbPath();
|
|
5173
|
+
if (!existsSync4(dbPath)) {
|
|
5174
|
+
throw new TodosConnectionError(`Todos database not found at ${dbPath}. Install @hasna/todos or set TODOS_DB_PATH.`);
|
|
5175
|
+
}
|
|
5176
|
+
const db2 = new Database2(dbPath, { readonly: true });
|
|
5177
|
+
db2.exec("PRAGMA foreign_keys = ON");
|
|
5178
|
+
return db2;
|
|
5179
|
+
}
|
|
5180
|
+
function pullTasks(options = {}) {
|
|
5181
|
+
const db2 = connectToTodos();
|
|
5182
|
+
try {
|
|
5183
|
+
let query = "SELECT id, short_id, title, description, status, priority, tags, project_id FROM tasks WHERE 1=1";
|
|
5184
|
+
const params = [];
|
|
5185
|
+
if (options.status) {
|
|
5186
|
+
query += " AND status = ?";
|
|
5187
|
+
params.push(options.status);
|
|
5188
|
+
} else {
|
|
5189
|
+
query += " AND status IN ('pending', 'in_progress')";
|
|
5190
|
+
}
|
|
5191
|
+
if (options.priority) {
|
|
5192
|
+
query += " AND priority = ?";
|
|
5193
|
+
params.push(options.priority);
|
|
5194
|
+
}
|
|
5195
|
+
if (options.projectName) {
|
|
5196
|
+
const project = db2.query("SELECT id FROM projects WHERE name = ?").get(options.projectName);
|
|
5197
|
+
if (project) {
|
|
5198
|
+
query += " AND project_id = ?";
|
|
5199
|
+
params.push(project.id);
|
|
5200
|
+
}
|
|
5201
|
+
}
|
|
5202
|
+
query += " ORDER BY CASE priority WHEN 'critical' THEN 0 WHEN 'high' THEN 1 WHEN 'medium' THEN 2 WHEN 'low' THEN 3 END";
|
|
5203
|
+
const tasks = db2.query(query).all(...params);
|
|
5204
|
+
if (options.tags && options.tags.length > 0) {
|
|
5205
|
+
return tasks.filter((task) => {
|
|
5206
|
+
const taskTags = JSON.parse(task.tags || "[]");
|
|
5207
|
+
return options.tags.some((tag) => taskTags.includes(tag));
|
|
5208
|
+
});
|
|
5209
|
+
}
|
|
5210
|
+
return tasks;
|
|
5211
|
+
} finally {
|
|
5212
|
+
db2.close();
|
|
5213
|
+
}
|
|
5214
|
+
}
|
|
5215
|
+
function taskToScenarioInput(task, projectId) {
|
|
5216
|
+
const tags = JSON.parse(task.tags || "[]");
|
|
5217
|
+
const priority = ["low", "medium", "high", "critical"].includes(task.priority) ? task.priority : "medium";
|
|
5218
|
+
const steps = [];
|
|
5219
|
+
if (task.description) {
|
|
5220
|
+
const lines = task.description.split(`
|
|
5221
|
+
`);
|
|
5222
|
+
for (const line of lines) {
|
|
5223
|
+
const match = line.match(/^\s*\d+[\.\)]\s*(.+)/);
|
|
5224
|
+
if (match?.[1]) {
|
|
5225
|
+
steps.push(match[1].trim());
|
|
5226
|
+
}
|
|
4623
5227
|
}
|
|
5228
|
+
}
|
|
5229
|
+
return {
|
|
5230
|
+
name: task.title.replace(/^(OPE\d+-\d+|[A-Z]+-\d+):\s*/, ""),
|
|
5231
|
+
description: task.description || task.title,
|
|
5232
|
+
steps,
|
|
5233
|
+
tags,
|
|
5234
|
+
priority,
|
|
5235
|
+
projectId,
|
|
5236
|
+
metadata: { todosTaskId: task.id, todosShortId: task.short_id }
|
|
5237
|
+
};
|
|
5238
|
+
}
|
|
5239
|
+
function importFromTodos(options = {}) {
|
|
5240
|
+
const tasks = pullTasks({
|
|
5241
|
+
projectName: options.projectName,
|
|
5242
|
+
tags: options.tags ?? ["qa", "test", "testing"],
|
|
5243
|
+
priority: options.priority
|
|
4624
5244
|
});
|
|
4625
|
-
|
|
5245
|
+
const existing = listScenarios({ projectId: options.projectId });
|
|
5246
|
+
const existingTodoIds = new Set(existing.filter((s) => s.metadata?.todosTaskId).map((s) => s.metadata.todosTaskId));
|
|
5247
|
+
let imported = 0;
|
|
5248
|
+
let skipped = 0;
|
|
5249
|
+
for (const task of tasks) {
|
|
5250
|
+
if (existingTodoIds.has(task.id)) {
|
|
5251
|
+
skipped++;
|
|
5252
|
+
continue;
|
|
5253
|
+
}
|
|
5254
|
+
const input = taskToScenarioInput(task, options.projectId);
|
|
5255
|
+
createScenario(input);
|
|
5256
|
+
imported++;
|
|
5257
|
+
}
|
|
5258
|
+
return { imported, skipped };
|
|
5259
|
+
}
|
|
5260
|
+
function markTodoDone(taskId) {
|
|
5261
|
+
const dbPath = resolveTodosDbPath();
|
|
5262
|
+
if (!existsSync4(dbPath))
|
|
5263
|
+
return false;
|
|
5264
|
+
const db2 = new Database2(dbPath);
|
|
5265
|
+
try {
|
|
5266
|
+
const task = db2.query("SELECT id, version FROM tasks WHERE id LIKE ? || '%'").get(taskId);
|
|
5267
|
+
if (!task)
|
|
5268
|
+
return false;
|
|
5269
|
+
db2.query("UPDATE tasks SET status = 'completed', completed_at = datetime('now'), version = version + 1, updated_at = datetime('now') WHERE id = ? AND version = ?").run(task.id, task.version);
|
|
5270
|
+
return true;
|
|
5271
|
+
} finally {
|
|
5272
|
+
db2.close();
|
|
5273
|
+
}
|
|
4626
5274
|
}
|
|
4627
|
-
var ansiStyles = assembleStyles();
|
|
4628
|
-
var ansi_styles_default = ansiStyles;
|
|
4629
5275
|
|
|
4630
|
-
//
|
|
4631
|
-
|
|
4632
|
-
|
|
4633
|
-
|
|
4634
|
-
|
|
4635
|
-
|
|
4636
|
-
|
|
4637
|
-
|
|
4638
|
-
|
|
5276
|
+
// src/lib/failure-pipeline.ts
|
|
5277
|
+
async function createFailureTasks(run, failedResults, scenarios) {
|
|
5278
|
+
if (failedResults.length === 0)
|
|
5279
|
+
return { created: 0, skipped: 0 };
|
|
5280
|
+
const projectId = process.env["TESTERS_TODOS_PROJECT_ID"];
|
|
5281
|
+
if (!projectId)
|
|
5282
|
+
return { created: 0, skipped: 0 };
|
|
5283
|
+
let db2 = null;
|
|
5284
|
+
try {
|
|
5285
|
+
db2 = connectToTodos();
|
|
5286
|
+
} catch {
|
|
5287
|
+
return { created: 0, skipped: 0 };
|
|
5288
|
+
}
|
|
5289
|
+
const scenarioMap = new Map(scenarios.map((s) => [s.id, s]));
|
|
5290
|
+
let created = 0;
|
|
5291
|
+
let skipped = 0;
|
|
5292
|
+
try {
|
|
5293
|
+
for (const result of failedResults) {
|
|
5294
|
+
const scenario = scenarioMap.get(result.scenarioId);
|
|
5295
|
+
const title = `BUG: [testers] ${scenario?.name ?? result.scenarioId} failed`;
|
|
5296
|
+
const existing = db2.query("SELECT id FROM tasks WHERE title = ? AND status NOT IN ('completed', 'cancelled') LIMIT 1").get(title);
|
|
5297
|
+
if (existing) {
|
|
5298
|
+
skipped++;
|
|
5299
|
+
continue;
|
|
5300
|
+
}
|
|
5301
|
+
const id = crypto.randomUUID();
|
|
5302
|
+
const now2 = new Date().toISOString();
|
|
5303
|
+
const description = [
|
|
5304
|
+
`Test failure detected by open-testers.`,
|
|
5305
|
+
``,
|
|
5306
|
+
`**Run:** ${run.id}`,
|
|
5307
|
+
`**URL:** ${run.url}`,
|
|
5308
|
+
`**Scenario:** ${scenario?.name ?? result.scenarioId}`,
|
|
5309
|
+
`**Status:** ${result.status}`,
|
|
5310
|
+
result.error ? `**Error:** ${result.error}` : null,
|
|
5311
|
+
result.reasoning ? `**Reasoning:** ${result.reasoning.slice(0, 500)}` : null,
|
|
5312
|
+
`**Duration:** ${result.durationMs ? `${(result.durationMs / 1000).toFixed(1)}s` : "N/A"}`,
|
|
5313
|
+
`**Tokens:** ${result.tokensUsed ?? 0}`
|
|
5314
|
+
].filter(Boolean).join(`
|
|
5315
|
+
`);
|
|
5316
|
+
try {
|
|
5317
|
+
db2.query(`
|
|
5318
|
+
INSERT INTO tasks (id, short_id, title, description, status, priority, tags, project_id, version, created_at, updated_at)
|
|
5319
|
+
VALUES (?, ?, ?, ?, 'pending', 'high', ?, ?, 1, ?, ?)
|
|
5320
|
+
`).run(id, `BUG-${id.slice(0, 6)}`, title, description, JSON.stringify(["bug", "testers", "auto-created"]), projectId, now2, now2);
|
|
5321
|
+
created++;
|
|
5322
|
+
} catch {
|
|
5323
|
+
skipped++;
|
|
5324
|
+
}
|
|
5325
|
+
}
|
|
5326
|
+
} finally {
|
|
5327
|
+
db2.close();
|
|
5328
|
+
}
|
|
5329
|
+
return { created, skipped };
|
|
4639
5330
|
}
|
|
4640
|
-
|
|
4641
|
-
|
|
4642
|
-
|
|
4643
|
-
|
|
4644
|
-
|
|
4645
|
-
|
|
5331
|
+
async function notifyFailureToConversations(run, failedResults, scenarios) {
|
|
5332
|
+
const baseUrl = process.env["TESTERS_CONVERSATIONS_URL"];
|
|
5333
|
+
const space = process.env["TESTERS_CONVERSATIONS_SPACE"];
|
|
5334
|
+
if (!baseUrl || !space)
|
|
5335
|
+
return;
|
|
5336
|
+
const scenarioMap = new Map(scenarios.map((s) => [s.id, s]));
|
|
5337
|
+
const total = run.total;
|
|
5338
|
+
const failedCount = failedResults.length;
|
|
5339
|
+
const passedCount = run.passed;
|
|
5340
|
+
const failureLines = failedResults.slice(0, 5).map((r) => {
|
|
5341
|
+
const name = scenarioMap.get(r.scenarioId)?.name ?? r.scenarioId;
|
|
5342
|
+
const err = r.error ? ` \u2014 ${r.error.slice(0, 120)}` : "";
|
|
5343
|
+
return ` \u274C ${name}${err}`;
|
|
5344
|
+
});
|
|
5345
|
+
const extra = failedResults.length > 5 ? ` \u2026 and ${failedResults.length - 5} more` : "";
|
|
5346
|
+
const message = [
|
|
5347
|
+
`\uD83D\uDEA8 **Testers run failed** \u2014 ${failedCount}/${total} scenarios failed`,
|
|
5348
|
+
``,
|
|
5349
|
+
`**URL:** ${run.url}`,
|
|
5350
|
+
`**Run ID:** \`${run.id}\``,
|
|
5351
|
+
`**Pass rate:** ${passedCount}/${total}`,
|
|
5352
|
+
``,
|
|
5353
|
+
`**Failures:**`,
|
|
5354
|
+
...failureLines,
|
|
5355
|
+
extra
|
|
5356
|
+
].filter((l) => l !== "").join(`
|
|
5357
|
+
`);
|
|
5358
|
+
try {
|
|
5359
|
+
await fetch(`${baseUrl.replace(/\/$/, "")}/api/spaces/${encodeURIComponent(space)}/messages`, {
|
|
5360
|
+
method: "POST",
|
|
5361
|
+
headers: { "Content-Type": "application/json" },
|
|
5362
|
+
body: JSON.stringify({ content: message, from: "testers" })
|
|
5363
|
+
});
|
|
5364
|
+
} catch {}
|
|
4646
5365
|
}
|
|
4647
|
-
function
|
|
4648
|
-
|
|
4649
|
-
|
|
4650
|
-
|
|
4651
|
-
|
|
4652
|
-
|
|
4653
|
-
|
|
5366
|
+
async function notifyRunToConversations(run, results, options) {
|
|
5367
|
+
const baseUrl = process.env["TESTERS_CONVERSATIONS_URL"];
|
|
5368
|
+
const space = options?.spaceId ?? process.env["TESTERS_CONVERSATIONS_SPACE"];
|
|
5369
|
+
if (!baseUrl || !space)
|
|
5370
|
+
return;
|
|
5371
|
+
const passRate = run.total > 0 ? (run.passed / run.total * 100).toFixed(0) : "0";
|
|
5372
|
+
const statusIcon = run.status === "passed" ? "\u2705" : run.status === "failed" ? "\u274C" : "\u26A0\uFE0F";
|
|
5373
|
+
const durationSec = run.finishedAt && run.startedAt ? ((new Date(run.finishedAt).getTime() - new Date(run.startedAt).getTime()) / 1000).toFixed(1) : null;
|
|
5374
|
+
const lines = [
|
|
5375
|
+
`${statusIcon} **Testers run ${run.status.toUpperCase()}** \u2014 ${run.passed}/${run.total} scenarios (${passRate}% pass rate)`,
|
|
5376
|
+
``,
|
|
5377
|
+
`**URL:** ${run.url}`,
|
|
5378
|
+
`**Run ID:** \`${run.id}\``,
|
|
5379
|
+
`**Model:** ${run.model}`,
|
|
5380
|
+
durationSec ? `**Duration:** ${durationSec}s` : null
|
|
5381
|
+
].filter((l) => l !== null);
|
|
5382
|
+
if (run.status === "failed") {
|
|
5383
|
+
const failedResults = results.filter((r) => r.status === "failed" || r.status === "error");
|
|
5384
|
+
const failLines = failedResults.slice(0, 5).map((r) => {
|
|
5385
|
+
const err = r.error ? ` \u2014 ${r.error.slice(0, 100)}` : "";
|
|
5386
|
+
return ` \u274C ${r.scenarioId.slice(0, 8)}${err}`;
|
|
5387
|
+
});
|
|
5388
|
+
if (failLines.length > 0) {
|
|
5389
|
+
lines.push(``, `**Failures:**`);
|
|
5390
|
+
lines.push(...failLines);
|
|
5391
|
+
if (failedResults.length > 5)
|
|
5392
|
+
lines.push(` \u2026 and ${failedResults.length - 5} more`);
|
|
4654
5393
|
}
|
|
4655
|
-
return env.FORCE_COLOR.length === 0 ? 1 : Math.min(Number.parseInt(env.FORCE_COLOR, 10), 3);
|
|
4656
5394
|
}
|
|
5395
|
+
const message = lines.join(`
|
|
5396
|
+
`);
|
|
5397
|
+
try {
|
|
5398
|
+
await fetch(`${baseUrl.replace(/\/$/, "")}/api/spaces/${encodeURIComponent(space)}/messages`, {
|
|
5399
|
+
method: "POST",
|
|
5400
|
+
headers: { "Content-Type": "application/json" },
|
|
5401
|
+
body: JSON.stringify({ content: message, from: "testers" })
|
|
5402
|
+
});
|
|
5403
|
+
} catch {}
|
|
4657
5404
|
}
|
|
4658
|
-
|
|
4659
|
-
|
|
4660
|
-
|
|
4661
|
-
|
|
4662
|
-
|
|
4663
|
-
level,
|
|
4664
|
-
hasBasic: true,
|
|
4665
|
-
has256: level >= 2,
|
|
4666
|
-
has16m: level >= 3
|
|
4667
|
-
};
|
|
5405
|
+
|
|
5406
|
+
// src/lib/runner.ts
|
|
5407
|
+
var eventHandler = null;
|
|
5408
|
+
function onRunEvent(handler) {
|
|
5409
|
+
eventHandler = handler;
|
|
4668
5410
|
}
|
|
4669
|
-
function
|
|
4670
|
-
|
|
4671
|
-
|
|
4672
|
-
|
|
5411
|
+
function emit(event) {
|
|
5412
|
+
if (eventHandler)
|
|
5413
|
+
eventHandler(event);
|
|
5414
|
+
}
|
|
5415
|
+
function withTimeout(promise, ms, label) {
|
|
5416
|
+
return new Promise((resolve, reject) => {
|
|
5417
|
+
const warningAt = Math.floor(ms * 0.8);
|
|
5418
|
+
const warningTimer = setTimeout(() => {
|
|
5419
|
+
emit({
|
|
5420
|
+
type: "scenario:timeout_warning",
|
|
5421
|
+
scenarioName: label,
|
|
5422
|
+
timeoutMs: ms,
|
|
5423
|
+
elapsedMs: warningAt
|
|
5424
|
+
});
|
|
5425
|
+
}, warningAt);
|
|
5426
|
+
const timer = setTimeout(() => {
|
|
5427
|
+
clearTimeout(warningTimer);
|
|
5428
|
+
reject(new Error(`Scenario '${label}' timed out after ${ms}ms. Try: testers run --timeout ${ms * 2} or simplify the scenario steps.`));
|
|
5429
|
+
}, ms);
|
|
5430
|
+
promise.then((val) => {
|
|
5431
|
+
clearTimeout(timer);
|
|
5432
|
+
clearTimeout(warningTimer);
|
|
5433
|
+
resolve(val);
|
|
5434
|
+
}, (err) => {
|
|
5435
|
+
clearTimeout(timer);
|
|
5436
|
+
clearTimeout(warningTimer);
|
|
5437
|
+
reject(err);
|
|
5438
|
+
});
|
|
5439
|
+
});
|
|
5440
|
+
}
|
|
5441
|
+
async function runSingleScenario(scenario, runId, options) {
|
|
5442
|
+
const scenarioType = scenario.scenarioType ?? "browser";
|
|
5443
|
+
if (scenarioType === "eval") {
|
|
5444
|
+
return runEvalScenario(scenario, { runId, baseUrl: options.url });
|
|
5445
|
+
}
|
|
5446
|
+
const config = loadConfig();
|
|
5447
|
+
if (options.selfHeal !== undefined)
|
|
5448
|
+
config.selfHeal = options.selfHeal;
|
|
5449
|
+
let effectiveOptions = options;
|
|
5450
|
+
if (options.minimal) {
|
|
5451
|
+
effectiveOptions = {
|
|
5452
|
+
...options,
|
|
5453
|
+
engine: options.engine ?? "playwright"
|
|
5454
|
+
};
|
|
5455
|
+
try {
|
|
5456
|
+
const { isLightpandaAvailable: isLightpandaAvailable2 } = await Promise.resolve().then(() => (init_browser_lightpanda(), exports_browser_lightpanda)).catch(() => ({ isLightpandaAvailable: () => false }));
|
|
5457
|
+
if (isLightpandaAvailable2())
|
|
5458
|
+
effectiveOptions = { ...effectiveOptions, engine: "lightpanda" };
|
|
5459
|
+
} catch {}
|
|
4673
5460
|
}
|
|
4674
|
-
const
|
|
4675
|
-
if (
|
|
4676
|
-
|
|
5461
|
+
const model = resolveModel2(effectiveOptions.minimal ? "quick" : effectiveOptions.model ?? scenario.model ?? config.defaultModel);
|
|
5462
|
+
if (options.cacheMaxAgeMs && options.cacheMaxAgeMs > 0 && scenario.lastPassedAt && scenario.lastPassedUrl === options.url) {
|
|
5463
|
+
const age = Date.now() - new Date(scenario.lastPassedAt).getTime();
|
|
5464
|
+
if (age < options.cacheMaxAgeMs) {
|
|
5465
|
+
const cached = createResult({ runId, scenarioId: scenario.id, model, stepsTotal: 0 });
|
|
5466
|
+
return updateResult(cached.id, {
|
|
5467
|
+
status: "passed",
|
|
5468
|
+
reasoning: `Cache hit: passed ${Math.round(age / 1000)}s ago at ${options.url}`,
|
|
5469
|
+
stepsCompleted: 0,
|
|
5470
|
+
durationMs: 0,
|
|
5471
|
+
tokensUsed: 0
|
|
5472
|
+
});
|
|
5473
|
+
}
|
|
4677
5474
|
}
|
|
4678
|
-
|
|
4679
|
-
|
|
4680
|
-
|
|
5475
|
+
const client = createClientForModel(model, effectiveOptions.apiKey ?? config.anthropicApiKey);
|
|
5476
|
+
const screenshotter = new Screenshotter({
|
|
5477
|
+
baseDir: effectiveOptions.screenshotDir ?? config.screenshots.dir
|
|
5478
|
+
});
|
|
5479
|
+
const resolvedPersonaId = options.personaId ?? scenario.personaId;
|
|
5480
|
+
const persona = resolvedPersonaId ? getPersona(resolvedPersonaId) : null;
|
|
5481
|
+
const result = createResult({
|
|
5482
|
+
runId,
|
|
5483
|
+
scenarioId: scenario.id,
|
|
5484
|
+
model,
|
|
5485
|
+
stepsTotal: scenario.steps.length || 10,
|
|
5486
|
+
personaId: persona?.id ?? null,
|
|
5487
|
+
personaName: persona?.name ?? null
|
|
5488
|
+
});
|
|
5489
|
+
emit({ type: "scenario:start", scenarioId: scenario.id, scenarioName: scenario.name, resultId: result.id, runId });
|
|
5490
|
+
let browser = null;
|
|
5491
|
+
let page = null;
|
|
5492
|
+
try {
|
|
5493
|
+
browser = await launchBrowser({ headless: !(effectiveOptions.headed ?? false), engine: effectiveOptions.engine });
|
|
5494
|
+
page = await getPage(browser, {
|
|
5495
|
+
viewport: config.browser.viewport
|
|
5496
|
+
});
|
|
5497
|
+
const targetUrl = scenario.targetPath ? `${options.url.replace(/\/$/, "")}${scenario.targetPath}` : options.url;
|
|
5498
|
+
const scenarioTimeout = scenario.timeoutMs ?? options.timeout ?? config.browser.timeout ?? 60000;
|
|
5499
|
+
const consoleErrors = [];
|
|
5500
|
+
page.on("console", (msg) => {
|
|
5501
|
+
if (msg.type() === "error")
|
|
5502
|
+
consoleErrors.push(msg.text());
|
|
5503
|
+
});
|
|
5504
|
+
page.on("pageerror", (err) => {
|
|
5505
|
+
consoleErrors.push(err.message);
|
|
5506
|
+
});
|
|
5507
|
+
await page.goto(targetUrl, { timeout: Math.min(scenarioTimeout, 30000) });
|
|
5508
|
+
const stepStartTimes = new Map;
|
|
5509
|
+
const agentResult = await withTimeout(runAgentLoop({
|
|
5510
|
+
client,
|
|
5511
|
+
page,
|
|
5512
|
+
scenario,
|
|
5513
|
+
screenshotter,
|
|
5514
|
+
model,
|
|
5515
|
+
runId,
|
|
5516
|
+
maxTurns: effectiveOptions.minimal ? 10 : 30,
|
|
5517
|
+
a11y: effectiveOptions.a11y,
|
|
5518
|
+
persona: persona ? {
|
|
5519
|
+
name: persona.name,
|
|
5520
|
+
role: persona.role,
|
|
5521
|
+
description: persona.description,
|
|
5522
|
+
instructions: persona.instructions,
|
|
5523
|
+
traits: persona.traits,
|
|
5524
|
+
goals: persona.goals,
|
|
5525
|
+
behaviors: persona.behaviors,
|
|
5526
|
+
painPoints: persona.painPoints
|
|
5527
|
+
} : null,
|
|
5528
|
+
onStep: (stepEvent) => {
|
|
5529
|
+
let stepDurationMs;
|
|
5530
|
+
if (stepEvent.type === "tool_call") {
|
|
5531
|
+
stepStartTimes.set(stepEvent.stepNumber, Date.now());
|
|
5532
|
+
} else if (stepEvent.type === "tool_result") {
|
|
5533
|
+
const startTime = stepStartTimes.get(stepEvent.stepNumber);
|
|
5534
|
+
if (startTime !== undefined) {
|
|
5535
|
+
stepDurationMs = Date.now() - startTime;
|
|
5536
|
+
stepStartTimes.delete(stepEvent.stepNumber);
|
|
5537
|
+
}
|
|
5538
|
+
}
|
|
5539
|
+
emit({
|
|
5540
|
+
type: `step:${stepEvent.type}`,
|
|
5541
|
+
scenarioId: scenario.id,
|
|
5542
|
+
scenarioName: scenario.name,
|
|
5543
|
+
runId,
|
|
5544
|
+
toolName: stepEvent.toolName,
|
|
5545
|
+
toolInput: stepEvent.toolInput,
|
|
5546
|
+
toolResult: stepEvent.toolResult,
|
|
5547
|
+
thinking: stepEvent.thinking,
|
|
5548
|
+
stepNumber: stepEvent.stepNumber,
|
|
5549
|
+
stepDurationMs
|
|
5550
|
+
});
|
|
5551
|
+
}
|
|
5552
|
+
}), scenarioTimeout, scenario.name);
|
|
5553
|
+
if (options.engine !== "lightpanda" && options.engine !== "bun") {
|
|
5554
|
+
for (const ss of agentResult.screenshots) {
|
|
5555
|
+
try {
|
|
5556
|
+
createScreenshot({
|
|
5557
|
+
resultId: result.id,
|
|
5558
|
+
stepNumber: ss.stepNumber,
|
|
5559
|
+
action: ss.action,
|
|
5560
|
+
filePath: ss.filePath,
|
|
5561
|
+
width: ss.width,
|
|
5562
|
+
height: ss.height,
|
|
5563
|
+
description: ss.description,
|
|
5564
|
+
pageUrl: ss.pageUrl,
|
|
5565
|
+
thumbnailPath: ss.thumbnailPath
|
|
5566
|
+
});
|
|
5567
|
+
emit({ type: "screenshot:captured", screenshotPath: ss.filePath, scenarioId: scenario.id, runId });
|
|
5568
|
+
} catch {}
|
|
5569
|
+
}
|
|
4681
5570
|
}
|
|
4682
|
-
|
|
4683
|
-
|
|
5571
|
+
const lightpandaNote = options.engine === "lightpanda" ? " (Running with Lightpanda \u2014 no screenshots)" : options.engine === "bun" ? " (Running with Bun.WebView \u2014 native, ~11x faster)" : "";
|
|
5572
|
+
let updatedResult = updateResult(result.id, {
|
|
5573
|
+
status: agentResult.status,
|
|
5574
|
+
reasoning: agentResult.reasoning ? agentResult.reasoning + lightpandaNote : lightpandaNote || undefined,
|
|
5575
|
+
stepsCompleted: agentResult.stepsCompleted,
|
|
5576
|
+
durationMs: Date.now() - new Date(result.createdAt).getTime(),
|
|
5577
|
+
tokensUsed: agentResult.tokensUsed,
|
|
5578
|
+
costCents: estimateCost(model, agentResult.tokensUsed)
|
|
5579
|
+
});
|
|
5580
|
+
if (agentResult.status === "failed" || agentResult.status === "error") {
|
|
5581
|
+
const failureAnalysis = analyzeFailure(null, agentResult.reasoning ?? null);
|
|
5582
|
+
if (failureAnalysis) {
|
|
5583
|
+
updatedResult = updateResult(result.id, { failureAnalysis });
|
|
5584
|
+
}
|
|
4684
5585
|
}
|
|
5586
|
+
if (agentResult.status === "passed") {
|
|
5587
|
+
try {
|
|
5588
|
+
updateScenarioPassedCache(scenario.id, options.url);
|
|
5589
|
+
} catch {}
|
|
5590
|
+
}
|
|
5591
|
+
const eventType = agentResult.status === "passed" ? "scenario:pass" : "scenario:fail";
|
|
5592
|
+
emit({ type: eventType, scenarioId: scenario.id, scenarioName: scenario.name, resultId: result.id, runId });
|
|
5593
|
+
return updatedResult;
|
|
5594
|
+
} catch (error) {
|
|
5595
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
5596
|
+
let updatedResult = updateResult(result.id, {
|
|
5597
|
+
status: "error",
|
|
5598
|
+
error: errorMsg,
|
|
5599
|
+
durationMs: Date.now() - new Date(result.createdAt).getTime()
|
|
5600
|
+
});
|
|
5601
|
+
const failureAnalysis = analyzeFailure(errorMsg, null);
|
|
5602
|
+
if (failureAnalysis) {
|
|
5603
|
+
updatedResult = updateResult(result.id, { failureAnalysis });
|
|
5604
|
+
}
|
|
5605
|
+
emit({ type: "scenario:error", scenarioId: scenario.id, scenarioName: scenario.name, error: errorMsg, runId });
|
|
5606
|
+
return updatedResult;
|
|
5607
|
+
} finally {
|
|
5608
|
+
if (browser)
|
|
5609
|
+
await closeBrowser(browser, effectiveOptions.engine);
|
|
4685
5610
|
}
|
|
4686
|
-
|
|
4687
|
-
|
|
4688
|
-
|
|
4689
|
-
|
|
4690
|
-
|
|
4691
|
-
|
|
4692
|
-
const
|
|
4693
|
-
if (
|
|
4694
|
-
|
|
4695
|
-
|
|
4696
|
-
|
|
4697
|
-
|
|
4698
|
-
|
|
4699
|
-
|
|
5611
|
+
}
|
|
5612
|
+
async function runBatch(scenarios, options) {
|
|
5613
|
+
const config = loadConfig();
|
|
5614
|
+
const model = resolveModel2(options.minimal ? "quick" : options.model ?? config.defaultModel);
|
|
5615
|
+
const parallel = options.minimal ? Math.max(5, options.parallel ?? 1) : options.parallel ?? 1;
|
|
5616
|
+
const samples = options.samples ?? 1;
|
|
5617
|
+
const flakinessThreshold = options.flakinessThreshold ?? 0.95;
|
|
5618
|
+
if (!options.skipBudgetCheck) {
|
|
5619
|
+
const cap = options.maxCostCents ?? config.defaultMaxCostCents;
|
|
5620
|
+
if (cap !== undefined && cap > 0) {
|
|
5621
|
+
const estimated = estimateRunCostCents(scenarios.length, model, samples);
|
|
5622
|
+
if (estimated > cap) {
|
|
5623
|
+
throw new BudgetExceededError(estimated, cap);
|
|
5624
|
+
}
|
|
4700
5625
|
}
|
|
4701
|
-
return 1;
|
|
4702
5626
|
}
|
|
4703
|
-
|
|
4704
|
-
|
|
4705
|
-
|
|
5627
|
+
const run = createRun({
|
|
5628
|
+
url: options.url,
|
|
5629
|
+
model,
|
|
5630
|
+
headed: options.headed,
|
|
5631
|
+
parallel,
|
|
5632
|
+
projectId: options.projectId,
|
|
5633
|
+
samples,
|
|
5634
|
+
flakinessThreshold
|
|
5635
|
+
});
|
|
5636
|
+
updateRun(run.id, { status: "running", total: scenarios.length });
|
|
5637
|
+
let sortedScenarios = scenarios;
|
|
5638
|
+
try {
|
|
5639
|
+
const { topologicalSort: topologicalSort2 } = await Promise.resolve().then(() => (init_flows(), exports_flows));
|
|
5640
|
+
const scenarioIds = scenarios.map((s) => s.id);
|
|
5641
|
+
const sortedIds = topologicalSort2(scenarioIds);
|
|
5642
|
+
const scenarioMap = new Map(scenarios.map((s) => [s.id, s]));
|
|
5643
|
+
sortedScenarios = sortedIds.map((id) => scenarioMap.get(id)).filter((s) => s !== undefined);
|
|
5644
|
+
for (const s of scenarios) {
|
|
5645
|
+
if (!sortedIds.includes(s.id))
|
|
5646
|
+
sortedScenarios.push(s);
|
|
4706
5647
|
}
|
|
4707
|
-
|
|
4708
|
-
|
|
5648
|
+
} catch {}
|
|
5649
|
+
const results = [];
|
|
5650
|
+
const failedScenarioIds = new Set;
|
|
5651
|
+
const canRun = async (scenario) => {
|
|
5652
|
+
try {
|
|
5653
|
+
const { getDependencies: getDependencies2 } = await Promise.resolve().then(() => (init_flows(), exports_flows));
|
|
5654
|
+
const deps = getDependencies2(scenario.id);
|
|
5655
|
+
for (const depId of deps) {
|
|
5656
|
+
if (failedScenarioIds.has(depId))
|
|
5657
|
+
return false;
|
|
5658
|
+
}
|
|
5659
|
+
} catch {}
|
|
5660
|
+
return true;
|
|
5661
|
+
};
|
|
5662
|
+
const maxRetries = options.retry ?? 0;
|
|
5663
|
+
if (parallel <= 1) {
|
|
5664
|
+
for (const scenario of sortedScenarios) {
|
|
5665
|
+
if (!await canRun(scenario)) {
|
|
5666
|
+
const result2 = createResult({ runId: run.id, scenarioId: scenario.id, model, stepsTotal: 0 });
|
|
5667
|
+
const skipped = updateResult(result2.id, { status: "skipped", error: "Skipped: dependency failed" });
|
|
5668
|
+
results.push(skipped);
|
|
5669
|
+
failedScenarioIds.add(scenario.id);
|
|
5670
|
+
emit({ type: "scenario:error", scenarioId: scenario.id, scenarioName: scenario.name, error: "Dependency failed \u2014 skipped", runId: run.id });
|
|
5671
|
+
continue;
|
|
5672
|
+
}
|
|
5673
|
+
let result = await runSingleScenario(scenario, run.id, options);
|
|
5674
|
+
let attempt = 1;
|
|
5675
|
+
while ((result.status === "failed" || result.status === "error") && attempt <= maxRetries) {
|
|
5676
|
+
emit({ type: "scenario:start", scenarioId: scenario.id, scenarioName: scenario.name, runId: run.id, retryAttempt: attempt + 1, maxRetries: maxRetries + 1 });
|
|
5677
|
+
result = await runSingleScenario(scenario, run.id, options);
|
|
5678
|
+
attempt++;
|
|
5679
|
+
}
|
|
5680
|
+
if (samples > 1) {
|
|
5681
|
+
const sampleResults = [result];
|
|
5682
|
+
for (let s = 1;s < samples; s++) {
|
|
5683
|
+
emit({ type: "scenario:start", scenarioId: scenario.id, scenarioName: scenario.name, runId: run.id });
|
|
5684
|
+
const sampleResult = await runSingleScenario(scenario, run.id, options);
|
|
5685
|
+
sampleResults.push(sampleResult);
|
|
5686
|
+
}
|
|
5687
|
+
const passCount = sampleResults.filter((r) => r.status === "passed").length;
|
|
5688
|
+
const passRate = passCount / samples;
|
|
5689
|
+
if (passCount > 0 && passCount < samples && passRate < flakinessThreshold) {
|
|
5690
|
+
result = updateResult(result.id, {
|
|
5691
|
+
status: "flaky",
|
|
5692
|
+
reasoning: `Flaky: ${passCount}/${samples} samples passed (${Math.round(passRate * 100)}% pass rate, threshold ${Math.round(flakinessThreshold * 100)}%)`,
|
|
5693
|
+
metadata: { samples, passCount, passRate, sampleResultIds: sampleResults.map((r) => r.id) }
|
|
5694
|
+
});
|
|
5695
|
+
} else if (passCount === 0) {
|
|
5696
|
+
result = updateResult(result.id, {
|
|
5697
|
+
metadata: { samples, passCount, passRate, sampleResultIds: sampleResults.map((r) => r.id) }
|
|
5698
|
+
});
|
|
5699
|
+
} else if (passCount === samples) {
|
|
5700
|
+
result = updateResult(result.id, {
|
|
5701
|
+
metadata: { samples, passCount, passRate, sampleResultIds: sampleResults.map((r) => r.id) }
|
|
5702
|
+
});
|
|
5703
|
+
}
|
|
5704
|
+
}
|
|
5705
|
+
results.push(result);
|
|
5706
|
+
if (result.status === "failed" || result.status === "error" || result.status === "flaky") {
|
|
5707
|
+
failedScenarioIds.add(scenario.id);
|
|
5708
|
+
}
|
|
4709
5709
|
}
|
|
4710
|
-
|
|
4711
|
-
|
|
4712
|
-
|
|
4713
|
-
|
|
4714
|
-
|
|
4715
|
-
|
|
4716
|
-
|
|
4717
|
-
|
|
4718
|
-
|
|
4719
|
-
|
|
4720
|
-
|
|
4721
|
-
|
|
4722
|
-
|
|
4723
|
-
|
|
4724
|
-
if (env.TERM === "wezterm") {
|
|
4725
|
-
return 3;
|
|
4726
|
-
}
|
|
4727
|
-
if ("TERM_PROGRAM" in env) {
|
|
4728
|
-
const version = Number.parseInt((env.TERM_PROGRAM_VERSION || "").split(".")[0], 10);
|
|
4729
|
-
switch (env.TERM_PROGRAM) {
|
|
4730
|
-
case "iTerm.app": {
|
|
4731
|
-
return version >= 3 ? 3 : 2;
|
|
5710
|
+
} else {
|
|
5711
|
+
const queue = [...sortedScenarios];
|
|
5712
|
+
const running = [];
|
|
5713
|
+
const processNext = async () => {
|
|
5714
|
+
const scenario = queue.shift();
|
|
5715
|
+
if (!scenario)
|
|
5716
|
+
return;
|
|
5717
|
+
if (!await canRun(scenario)) {
|
|
5718
|
+
const result2 = createResult({ runId: run.id, scenarioId: scenario.id, model, stepsTotal: 0 });
|
|
5719
|
+
const skipped = updateResult(result2.id, { status: "skipped", error: "Skipped: dependency failed" });
|
|
5720
|
+
results.push(skipped);
|
|
5721
|
+
failedScenarioIds.add(scenario.id);
|
|
5722
|
+
await processNext();
|
|
5723
|
+
return;
|
|
4732
5724
|
}
|
|
4733
|
-
|
|
4734
|
-
|
|
5725
|
+
const result = await runSingleScenario(scenario, run.id, options);
|
|
5726
|
+
results.push(result);
|
|
5727
|
+
if (result.status === "failed" || result.status === "error") {
|
|
5728
|
+
failedScenarioIds.add(scenario.id);
|
|
4735
5729
|
}
|
|
5730
|
+
await processNext();
|
|
5731
|
+
};
|
|
5732
|
+
const workers = Math.min(parallel, sortedScenarios.length);
|
|
5733
|
+
for (let i = 0;i < workers; i++) {
|
|
5734
|
+
running.push(processNext());
|
|
4736
5735
|
}
|
|
5736
|
+
await Promise.all(running);
|
|
4737
5737
|
}
|
|
4738
|
-
|
|
4739
|
-
|
|
5738
|
+
let divergenceResults = [];
|
|
5739
|
+
if (options.personaIds && options.personaIds.length > 1) {
|
|
5740
|
+
const additionalPersonaIds = options.personaIds.slice(1);
|
|
5741
|
+
for (const personaId of additionalPersonaIds) {
|
|
5742
|
+
for (const scenario of sortedScenarios) {
|
|
5743
|
+
const personaResult = await runSingleScenario(scenario, run.id, { ...options, personaId });
|
|
5744
|
+
divergenceResults.push(personaResult);
|
|
5745
|
+
results.push(personaResult);
|
|
5746
|
+
}
|
|
5747
|
+
}
|
|
4740
5748
|
}
|
|
4741
|
-
|
|
4742
|
-
|
|
5749
|
+
const passed = results.filter((r) => r.status === "passed").length;
|
|
5750
|
+
const failed = results.filter((r) => r.status === "failed" || r.status === "error").length;
|
|
5751
|
+
const finalStatus = failed > 0 ? "failed" : "passed";
|
|
5752
|
+
const finalRun = updateRun(run.id, {
|
|
5753
|
+
status: finalStatus,
|
|
5754
|
+
passed,
|
|
5755
|
+
failed,
|
|
5756
|
+
total: scenarios.length,
|
|
5757
|
+
finished_at: new Date().toISOString()
|
|
5758
|
+
});
|
|
5759
|
+
emit({ type: "run:complete", runId: run.id });
|
|
5760
|
+
const eventType = finalRun.status === "failed" ? "failed" : "completed";
|
|
5761
|
+
dispatchWebhooks(eventType, finalRun).catch(() => {});
|
|
5762
|
+
if (finalRun.status === "failed") {
|
|
5763
|
+
const failedResults = results.filter((r) => r.status === "failed" || r.status === "error");
|
|
5764
|
+
pushFailedRunToLogs(finalRun, failedResults, scenarios).catch(() => {});
|
|
5765
|
+
createFailureTasks(finalRun, failedResults, scenarios).catch(() => {});
|
|
5766
|
+
notifyFailureToConversations(finalRun, failedResults, scenarios).catch(() => {});
|
|
4743
5767
|
}
|
|
4744
|
-
|
|
4745
|
-
|
|
5768
|
+
const conversationsSpaceId = config.conversationsSpace ?? process.env["TESTERS_CONVERSATIONS_SPACE"];
|
|
5769
|
+
if (conversationsSpaceId) {
|
|
5770
|
+
notifyRunToConversations(finalRun, results, { spaceId: conversationsSpaceId }).catch(() => {});
|
|
4746
5771
|
}
|
|
4747
|
-
return
|
|
4748
|
-
}
|
|
4749
|
-
function createSupportsColor(stream, options = {}) {
|
|
4750
|
-
const level = _supportsColor(stream, {
|
|
4751
|
-
streamIsTTY: stream && stream.isTTY,
|
|
4752
|
-
...options
|
|
4753
|
-
});
|
|
4754
|
-
return translateLevel(level);
|
|
5772
|
+
return { run: finalRun, results };
|
|
4755
5773
|
}
|
|
4756
|
-
|
|
4757
|
-
|
|
4758
|
-
|
|
4759
|
-
};
|
|
4760
|
-
|
|
4761
|
-
|
|
4762
|
-
|
|
4763
|
-
|
|
4764
|
-
|
|
4765
|
-
|
|
4766
|
-
|
|
5774
|
+
async function runByFilter(options) {
|
|
5775
|
+
let scenarios;
|
|
5776
|
+
if (options.scenarioIds && options.scenarioIds.length > 0) {
|
|
5777
|
+
const all = listScenarios({ projectId: options.projectId });
|
|
5778
|
+
scenarios = all.filter((s) => options.scenarioIds.includes(s.id) || options.scenarioIds.includes(s.shortId));
|
|
5779
|
+
} else {
|
|
5780
|
+
scenarios = listScenarios({
|
|
5781
|
+
projectId: options.projectId,
|
|
5782
|
+
tags: options.tags,
|
|
5783
|
+
priority: options.priority
|
|
5784
|
+
});
|
|
4767
5785
|
}
|
|
4768
|
-
|
|
4769
|
-
|
|
4770
|
-
|
|
4771
|
-
|
|
4772
|
-
|
|
4773
|
-
|
|
4774
|
-
index = string.indexOf(substring, endIndex);
|
|
4775
|
-
} while (index !== -1);
|
|
4776
|
-
returnValue += string.slice(endIndex);
|
|
4777
|
-
return returnValue;
|
|
4778
|
-
}
|
|
4779
|
-
function stringEncaseCRLFWithFirstIndex(string, prefix, postfix, index) {
|
|
4780
|
-
let endIndex = 0;
|
|
4781
|
-
let returnValue = "";
|
|
4782
|
-
do {
|
|
4783
|
-
const gotCR = string[index - 1] === "\r";
|
|
4784
|
-
returnValue += string.slice(endIndex, gotCR ? index - 1 : index) + prefix + (gotCR ? `\r
|
|
4785
|
-
` : `
|
|
4786
|
-
`) + postfix;
|
|
4787
|
-
endIndex = index + 1;
|
|
4788
|
-
index = string.indexOf(`
|
|
4789
|
-
`, endIndex);
|
|
4790
|
-
} while (index !== -1);
|
|
4791
|
-
returnValue += string.slice(endIndex);
|
|
4792
|
-
return returnValue;
|
|
4793
|
-
}
|
|
4794
|
-
|
|
4795
|
-
// node_modules/chalk/source/index.js
|
|
4796
|
-
var { stdout: stdoutColor, stderr: stderrColor } = supports_color_default;
|
|
4797
|
-
var GENERATOR = Symbol("GENERATOR");
|
|
4798
|
-
var STYLER = Symbol("STYLER");
|
|
4799
|
-
var IS_EMPTY = Symbol("IS_EMPTY");
|
|
4800
|
-
var levelMapping = [
|
|
4801
|
-
"ansi",
|
|
4802
|
-
"ansi",
|
|
4803
|
-
"ansi256",
|
|
4804
|
-
"ansi16m"
|
|
4805
|
-
];
|
|
4806
|
-
var styles2 = Object.create(null);
|
|
4807
|
-
var applyOptions = (object, options = {}) => {
|
|
4808
|
-
if (options.level && !(Number.isInteger(options.level) && options.level >= 0 && options.level <= 3)) {
|
|
4809
|
-
throw new Error("The `level` option should be an integer from 0 to 3");
|
|
5786
|
+
if (scenarios.length === 0) {
|
|
5787
|
+
const config = loadConfig();
|
|
5788
|
+
const model = resolveModel2(options.model ?? config.defaultModel);
|
|
5789
|
+
const run = createRun({ url: options.url, model, projectId: options.projectId });
|
|
5790
|
+
updateRun(run.id, { status: "passed", total: 0, finished_at: new Date().toISOString() });
|
|
5791
|
+
return { run: getRun(run.id), results: [] };
|
|
4810
5792
|
}
|
|
4811
|
-
|
|
4812
|
-
object.level = options.level === undefined ? colorLevel : options.level;
|
|
4813
|
-
};
|
|
4814
|
-
var chalkFactory = (options) => {
|
|
4815
|
-
const chalk = (...strings) => strings.join(" ");
|
|
4816
|
-
applyOptions(chalk, options);
|
|
4817
|
-
Object.setPrototypeOf(chalk, createChalk.prototype);
|
|
4818
|
-
return chalk;
|
|
4819
|
-
};
|
|
4820
|
-
function createChalk(options) {
|
|
4821
|
-
return chalkFactory(options);
|
|
4822
|
-
}
|
|
4823
|
-
Object.setPrototypeOf(createChalk.prototype, Function.prototype);
|
|
4824
|
-
for (const [styleName, style] of Object.entries(ansi_styles_default)) {
|
|
4825
|
-
styles2[styleName] = {
|
|
4826
|
-
get() {
|
|
4827
|
-
const builder = createBuilder(this, createStyler(style.open, style.close, this[STYLER]), this[IS_EMPTY]);
|
|
4828
|
-
Object.defineProperty(this, styleName, { value: builder });
|
|
4829
|
-
return builder;
|
|
4830
|
-
}
|
|
4831
|
-
};
|
|
5793
|
+
return runBatch(scenarios, options);
|
|
4832
5794
|
}
|
|
4833
|
-
|
|
4834
|
-
|
|
4835
|
-
|
|
4836
|
-
|
|
4837
|
-
|
|
5795
|
+
function startRunAsync(options) {
|
|
5796
|
+
const config = loadConfig();
|
|
5797
|
+
const model = resolveModel2(options.model ?? config.defaultModel);
|
|
5798
|
+
let scenarios;
|
|
5799
|
+
if (options.scenarioIds && options.scenarioIds.length > 0) {
|
|
5800
|
+
const all = listScenarios({ projectId: options.projectId });
|
|
5801
|
+
scenarios = all.filter((s) => options.scenarioIds.includes(s.id) || options.scenarioIds.includes(s.shortId));
|
|
5802
|
+
} else {
|
|
5803
|
+
scenarios = listScenarios({
|
|
5804
|
+
projectId: options.projectId,
|
|
5805
|
+
tags: options.tags,
|
|
5806
|
+
priority: options.priority
|
|
5807
|
+
});
|
|
4838
5808
|
}
|
|
4839
|
-
|
|
4840
|
-
|
|
4841
|
-
|
|
4842
|
-
|
|
4843
|
-
|
|
4844
|
-
|
|
4845
|
-
|
|
4846
|
-
|
|
5809
|
+
if (!options.skipBudgetCheck) {
|
|
5810
|
+
const cap = options.maxCostCents ?? config.defaultMaxCostCents;
|
|
5811
|
+
if (cap !== undefined && cap > 0 && scenarios.length > 0) {
|
|
5812
|
+
const samples = options.samples ?? 1;
|
|
5813
|
+
const estimated = estimateRunCostCents(scenarios.length, model, samples);
|
|
5814
|
+
if (estimated > cap) {
|
|
5815
|
+
throw new BudgetExceededError(estimated, cap);
|
|
5816
|
+
}
|
|
4847
5817
|
}
|
|
4848
|
-
return ansi_styles_default[type].ansi(ansi_styles_default.rgbToAnsi(...arguments_));
|
|
4849
5818
|
}
|
|
4850
|
-
|
|
4851
|
-
|
|
5819
|
+
const parallel = options.parallel ?? 1;
|
|
5820
|
+
const run = createRun({
|
|
5821
|
+
url: options.url,
|
|
5822
|
+
model,
|
|
5823
|
+
headed: options.headed,
|
|
5824
|
+
parallel,
|
|
5825
|
+
projectId: options.projectId
|
|
5826
|
+
});
|
|
5827
|
+
if (scenarios.length === 0) {
|
|
5828
|
+
updateRun(run.id, { status: "passed", total: 0, finished_at: new Date().toISOString() });
|
|
5829
|
+
return { runId: run.id, scenarioCount: 0 };
|
|
4852
5830
|
}
|
|
4853
|
-
|
|
4854
|
-
|
|
4855
|
-
|
|
4856
|
-
|
|
4857
|
-
|
|
4858
|
-
|
|
4859
|
-
|
|
4860
|
-
|
|
4861
|
-
|
|
4862
|
-
|
|
4863
|
-
|
|
4864
|
-
|
|
4865
|
-
|
|
4866
|
-
|
|
4867
|
-
|
|
4868
|
-
|
|
4869
|
-
|
|
4870
|
-
|
|
4871
|
-
|
|
4872
|
-
|
|
4873
|
-
|
|
5831
|
+
updateRun(run.id, { status: "running", total: scenarios.length });
|
|
5832
|
+
(async () => {
|
|
5833
|
+
const results = [];
|
|
5834
|
+
try {
|
|
5835
|
+
if (parallel <= 1) {
|
|
5836
|
+
for (const scenario of scenarios) {
|
|
5837
|
+
const result = await runSingleScenario(scenario, run.id, options);
|
|
5838
|
+
results.push(result);
|
|
5839
|
+
}
|
|
5840
|
+
} else {
|
|
5841
|
+
const queue = [...scenarios];
|
|
5842
|
+
const running = [];
|
|
5843
|
+
const processNext = async () => {
|
|
5844
|
+
const scenario = queue.shift();
|
|
5845
|
+
if (!scenario)
|
|
5846
|
+
return;
|
|
5847
|
+
const result = await runSingleScenario(scenario, run.id, options);
|
|
5848
|
+
results.push(result);
|
|
5849
|
+
await processNext();
|
|
5850
|
+
};
|
|
5851
|
+
const workers = Math.min(parallel, scenarios.length);
|
|
5852
|
+
for (let i = 0;i < workers; i++) {
|
|
5853
|
+
running.push(processNext());
|
|
5854
|
+
}
|
|
5855
|
+
await Promise.all(running);
|
|
5856
|
+
}
|
|
5857
|
+
const passed = results.filter((r) => r.status === "passed").length;
|
|
5858
|
+
const failed = results.filter((r) => r.status === "failed" || r.status === "error").length;
|
|
5859
|
+
updateRun(run.id, {
|
|
5860
|
+
status: failed > 0 ? "failed" : "passed",
|
|
5861
|
+
passed,
|
|
5862
|
+
failed,
|
|
5863
|
+
total: scenarios.length,
|
|
5864
|
+
finished_at: new Date().toISOString()
|
|
5865
|
+
});
|
|
5866
|
+
emit({ type: "run:complete", runId: run.id });
|
|
5867
|
+
const asyncRun = getRun(run.id);
|
|
5868
|
+
if (asyncRun)
|
|
5869
|
+
dispatchWebhooks(asyncRun.status === "failed" ? "failed" : "completed", asyncRun).catch(() => {});
|
|
5870
|
+
} catch (error) {
|
|
5871
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
5872
|
+
updateRun(run.id, {
|
|
5873
|
+
status: "failed",
|
|
5874
|
+
finished_at: new Date().toISOString()
|
|
5875
|
+
});
|
|
5876
|
+
emit({ type: "run:complete", runId: run.id, error: errorMsg });
|
|
5877
|
+
const failedRun = getRun(run.id);
|
|
5878
|
+
if (failedRun)
|
|
5879
|
+
dispatchWebhooks("failed", failedRun).catch(() => {});
|
|
4874
5880
|
}
|
|
4875
|
-
};
|
|
5881
|
+
})();
|
|
5882
|
+
return { runId: run.id, scenarioCount: scenarios.length };
|
|
4876
5883
|
}
|
|
4877
|
-
|
|
4878
|
-
|
|
4879
|
-
|
|
4880
|
-
|
|
4881
|
-
|
|
4882
|
-
return this[GENERATOR].level;
|
|
4883
|
-
},
|
|
4884
|
-
set(level) {
|
|
4885
|
-
this[GENERATOR].level = level;
|
|
4886
|
-
}
|
|
4887
|
-
}
|
|
4888
|
-
});
|
|
4889
|
-
var createStyler = (open, close, parent) => {
|
|
4890
|
-
let openAll;
|
|
4891
|
-
let closeAll;
|
|
4892
|
-
if (parent === undefined) {
|
|
4893
|
-
openAll = open;
|
|
4894
|
-
closeAll = close;
|
|
4895
|
-
} else {
|
|
4896
|
-
openAll = parent.openAll + open;
|
|
4897
|
-
closeAll = close + parent.closeAll;
|
|
4898
|
-
}
|
|
4899
|
-
return {
|
|
4900
|
-
open,
|
|
4901
|
-
close,
|
|
4902
|
-
openAll,
|
|
4903
|
-
closeAll,
|
|
4904
|
-
parent
|
|
5884
|
+
function estimateCost(model, tokens) {
|
|
5885
|
+
const costs = {
|
|
5886
|
+
"claude-haiku-4-5-20251001": 0.1,
|
|
5887
|
+
"claude-sonnet-4-6-20260311": 0.9,
|
|
5888
|
+
"claude-opus-4-6-20260311": 3
|
|
4905
5889
|
};
|
|
4906
|
-
|
|
4907
|
-
|
|
4908
|
-
|
|
4909
|
-
Object.setPrototypeOf(builder, proto);
|
|
4910
|
-
builder[GENERATOR] = self;
|
|
4911
|
-
builder[STYLER] = _styler;
|
|
4912
|
-
builder[IS_EMPTY] = _isEmpty;
|
|
4913
|
-
return builder;
|
|
4914
|
-
};
|
|
4915
|
-
var applyStyle = (self, string) => {
|
|
4916
|
-
if (self.level <= 0 || !string) {
|
|
4917
|
-
return self[IS_EMPTY] ? "" : string;
|
|
4918
|
-
}
|
|
4919
|
-
let styler = self[STYLER];
|
|
4920
|
-
if (styler === undefined) {
|
|
4921
|
-
return string;
|
|
4922
|
-
}
|
|
4923
|
-
const { openAll, closeAll } = styler;
|
|
4924
|
-
if (string.includes("\x1B")) {
|
|
4925
|
-
while (styler !== undefined) {
|
|
4926
|
-
string = stringReplaceAll(string, styler.close, styler.open);
|
|
4927
|
-
styler = styler.parent;
|
|
4928
|
-
}
|
|
4929
|
-
}
|
|
4930
|
-
const lfIndex = string.indexOf(`
|
|
4931
|
-
`);
|
|
4932
|
-
if (lfIndex !== -1) {
|
|
4933
|
-
string = stringEncaseCRLFWithFirstIndex(string, closeAll, openAll, lfIndex);
|
|
4934
|
-
}
|
|
4935
|
-
return openAll + string + closeAll;
|
|
4936
|
-
};
|
|
4937
|
-
Object.defineProperties(createChalk.prototype, styles2);
|
|
4938
|
-
var chalk = createChalk();
|
|
4939
|
-
var chalkStderr = createChalk({ level: stderrColor ? stderrColor.level : 0 });
|
|
4940
|
-
var source_default = chalk;
|
|
4941
|
-
|
|
5890
|
+
const costPer1M = costs[model] ?? 0.5;
|
|
5891
|
+
return tokens / 1e6 * costPer1M * 100;
|
|
5892
|
+
}
|
|
4942
5893
|
// src/lib/reporter.ts
|
|
4943
5894
|
init_database();
|
|
4944
5895
|
function useEmoji() {
|
|
@@ -5148,7 +6099,8 @@ function formatScenarioList(scenarios) {
|
|
|
5148
6099
|
lastStatusIcon = source_default.dim("~");
|
|
5149
6100
|
passRateStr = stats.passRate === "\u2014" ? source_default.dim("\u2014") : source_default.dim(stats.passRate);
|
|
5150
6101
|
}
|
|
5151
|
-
|
|
6102
|
+
const flakinessStr = s.flakinessScore !== null && s.flakinessScore !== undefined && s.flakinessScore < 0.8 ? source_default.yellow(` \u26A1 flaky (${Math.round(s.flakinessScore * 100)}%)`) : "";
|
|
6103
|
+
lines.push(` ${source_default.cyan(s.shortId)} ${s.name} ${priorityColor(s.priority)}${tags}${flakinessStr} ${lastStatusIcon} ${passRateStr}`);
|
|
5152
6104
|
}
|
|
5153
6105
|
lines.push("");
|
|
5154
6106
|
return lines.join(`
|
|
@@ -5402,11 +6354,11 @@ class Scheduler {
|
|
|
5402
6354
|
}
|
|
5403
6355
|
}
|
|
5404
6356
|
// src/lib/init.ts
|
|
5405
|
-
import { existsSync as existsSync5, readFileSync as readFileSync2, writeFileSync as writeFileSync2, mkdirSync as
|
|
5406
|
-
import { join as
|
|
5407
|
-
import { homedir as
|
|
6357
|
+
import { existsSync as existsSync5, readFileSync as readFileSync2, writeFileSync as writeFileSync2, mkdirSync as mkdirSync4 } from "fs";
|
|
6358
|
+
import { join as join6, basename } from "path";
|
|
6359
|
+
import { homedir as homedir6 } from "os";
|
|
5408
6360
|
function detectFramework(dir) {
|
|
5409
|
-
const pkgPath =
|
|
6361
|
+
const pkgPath = join6(dir, "package.json");
|
|
5410
6362
|
if (!existsSync5(pkgPath))
|
|
5411
6363
|
return null;
|
|
5412
6364
|
let pkg;
|
|
@@ -5626,10 +6578,10 @@ function initProject(options) {
|
|
|
5626
6578
|
const project = ensureProject(name, projectPath);
|
|
5627
6579
|
const starterInputs = getStarterScenarios(framework ?? { name: "Unknown", features: [] }, project.id);
|
|
5628
6580
|
const scenarios = starterInputs.map((input) => createScenario(input));
|
|
5629
|
-
const configDir =
|
|
5630
|
-
const configPath =
|
|
6581
|
+
const configDir = join6(homedir6(), ".testers");
|
|
6582
|
+
const configPath = join6(configDir, "config.json");
|
|
5631
6583
|
if (!existsSync5(configDir)) {
|
|
5632
|
-
|
|
6584
|
+
mkdirSync4(configDir, { recursive: true });
|
|
5633
6585
|
}
|
|
5634
6586
|
let config = {};
|
|
5635
6587
|
if (existsSync5(configPath)) {
|
|
@@ -6276,179 +7228,6 @@ function generateLatestReport() {
|
|
|
6276
7228
|
throw new Error("No runs found");
|
|
6277
7229
|
return generateHtmlReport(runs[0].id);
|
|
6278
7230
|
}
|
|
6279
|
-
// src/lib/costs.ts
|
|
6280
|
-
init_database();
|
|
6281
|
-
init_config();
|
|
6282
|
-
function getDateFilter(period) {
|
|
6283
|
-
switch (period) {
|
|
6284
|
-
case "day":
|
|
6285
|
-
return "AND r.created_at >= date('now', 'start of day')";
|
|
6286
|
-
case "week":
|
|
6287
|
-
return "AND r.created_at >= date('now', '-7 days')";
|
|
6288
|
-
case "month":
|
|
6289
|
-
return "AND r.created_at >= date('now', '-30 days')";
|
|
6290
|
-
case "all":
|
|
6291
|
-
return "";
|
|
6292
|
-
}
|
|
6293
|
-
}
|
|
6294
|
-
function getPeriodDays(period) {
|
|
6295
|
-
switch (period) {
|
|
6296
|
-
case "day":
|
|
6297
|
-
return 1;
|
|
6298
|
-
case "week":
|
|
6299
|
-
return 7;
|
|
6300
|
-
case "month":
|
|
6301
|
-
return 30;
|
|
6302
|
-
case "all":
|
|
6303
|
-
return 30;
|
|
6304
|
-
}
|
|
6305
|
-
}
|
|
6306
|
-
function loadBudgetConfig() {
|
|
6307
|
-
const config = loadConfig();
|
|
6308
|
-
const budget = config.budget;
|
|
6309
|
-
return {
|
|
6310
|
-
maxPerRunCents: budget?.maxPerRunCents ?? 50,
|
|
6311
|
-
maxPerDayCents: budget?.maxPerDayCents ?? 500,
|
|
6312
|
-
warnAtPercent: budget?.warnAtPercent ?? 0.8
|
|
6313
|
-
};
|
|
6314
|
-
}
|
|
6315
|
-
function getCostSummary(options) {
|
|
6316
|
-
const db2 = getDatabase();
|
|
6317
|
-
const period = options?.period ?? "month";
|
|
6318
|
-
const projectId = options?.projectId;
|
|
6319
|
-
const dateFilter = getDateFilter(period);
|
|
6320
|
-
const projectFilter = projectId ? "AND ru.project_id = ?" : "";
|
|
6321
|
-
const projectParams = projectId ? [projectId] : [];
|
|
6322
|
-
const totalsRow = db2.query(`SELECT
|
|
6323
|
-
COALESCE(SUM(r.cost_cents), 0) as total_cost,
|
|
6324
|
-
COALESCE(SUM(r.tokens_used), 0) as total_tokens,
|
|
6325
|
-
COUNT(DISTINCT r.run_id) as run_count
|
|
6326
|
-
FROM results r
|
|
6327
|
-
JOIN runs ru ON r.run_id = ru.id
|
|
6328
|
-
WHERE 1=1 ${dateFilter} ${projectFilter}`).get(...projectParams);
|
|
6329
|
-
const modelRows = db2.query(`SELECT
|
|
6330
|
-
r.model,
|
|
6331
|
-
COALESCE(SUM(r.cost_cents), 0) as cost_cents,
|
|
6332
|
-
COALESCE(SUM(r.tokens_used), 0) as tokens,
|
|
6333
|
-
COUNT(DISTINCT r.run_id) as runs
|
|
6334
|
-
FROM results r
|
|
6335
|
-
JOIN runs ru ON r.run_id = ru.id
|
|
6336
|
-
WHERE 1=1 ${dateFilter} ${projectFilter}
|
|
6337
|
-
GROUP BY r.model
|
|
6338
|
-
ORDER BY cost_cents DESC`).all(...projectParams);
|
|
6339
|
-
const byModel = {};
|
|
6340
|
-
for (const row of modelRows) {
|
|
6341
|
-
byModel[row.model] = {
|
|
6342
|
-
costCents: row.cost_cents,
|
|
6343
|
-
tokens: row.tokens,
|
|
6344
|
-
runs: row.runs
|
|
6345
|
-
};
|
|
6346
|
-
}
|
|
6347
|
-
const scenarioRows = db2.query(`SELECT
|
|
6348
|
-
r.scenario_id,
|
|
6349
|
-
COALESCE(s.name, r.scenario_id) as name,
|
|
6350
|
-
COALESCE(SUM(r.cost_cents), 0) as cost_cents,
|
|
6351
|
-
COALESCE(SUM(r.tokens_used), 0) as tokens,
|
|
6352
|
-
COUNT(DISTINCT r.run_id) as runs
|
|
6353
|
-
FROM results r
|
|
6354
|
-
JOIN runs ru ON r.run_id = ru.id
|
|
6355
|
-
LEFT JOIN scenarios s ON r.scenario_id = s.id
|
|
6356
|
-
WHERE 1=1 ${dateFilter} ${projectFilter}
|
|
6357
|
-
GROUP BY r.scenario_id
|
|
6358
|
-
ORDER BY cost_cents DESC
|
|
6359
|
-
LIMIT 10`).all(...projectParams);
|
|
6360
|
-
const byScenario = scenarioRows.map((row) => ({
|
|
6361
|
-
scenarioId: row.scenario_id,
|
|
6362
|
-
name: row.name,
|
|
6363
|
-
costCents: row.cost_cents,
|
|
6364
|
-
tokens: row.tokens,
|
|
6365
|
-
runs: row.runs
|
|
6366
|
-
}));
|
|
6367
|
-
const runCount = totalsRow.run_count;
|
|
6368
|
-
const avgCostPerRun = runCount > 0 ? totalsRow.total_cost / runCount : 0;
|
|
6369
|
-
const periodDays = getPeriodDays(period);
|
|
6370
|
-
const estimatedMonthlyCents = periodDays > 0 ? totalsRow.total_cost / periodDays * 30 : 0;
|
|
6371
|
-
return {
|
|
6372
|
-
period,
|
|
6373
|
-
totalCostCents: totalsRow.total_cost,
|
|
6374
|
-
totalTokens: totalsRow.total_tokens,
|
|
6375
|
-
runCount,
|
|
6376
|
-
byModel,
|
|
6377
|
-
byScenario,
|
|
6378
|
-
avgCostPerRun,
|
|
6379
|
-
estimatedMonthlyCents
|
|
6380
|
-
};
|
|
6381
|
-
}
|
|
6382
|
-
function checkBudget(estimatedCostCents) {
|
|
6383
|
-
const budget = loadBudgetConfig();
|
|
6384
|
-
if (estimatedCostCents > budget.maxPerRunCents) {
|
|
6385
|
-
return {
|
|
6386
|
-
allowed: false,
|
|
6387
|
-
warning: `Estimated cost (${formatDollars(estimatedCostCents)}) exceeds per-run limit (${formatDollars(budget.maxPerRunCents)})`
|
|
6388
|
-
};
|
|
6389
|
-
}
|
|
6390
|
-
const todaySummary = getCostSummary({ period: "day" });
|
|
6391
|
-
const projectedDaily = todaySummary.totalCostCents + estimatedCostCents;
|
|
6392
|
-
if (projectedDaily > budget.maxPerDayCents) {
|
|
6393
|
-
return {
|
|
6394
|
-
allowed: false,
|
|
6395
|
-
warning: `Daily spending (${formatDollars(todaySummary.totalCostCents)}) + this run (${formatDollars(estimatedCostCents)}) would exceed daily limit (${formatDollars(budget.maxPerDayCents)})`
|
|
6396
|
-
};
|
|
6397
|
-
}
|
|
6398
|
-
if (projectedDaily > budget.maxPerDayCents * budget.warnAtPercent) {
|
|
6399
|
-
return {
|
|
6400
|
-
allowed: true,
|
|
6401
|
-
warning: `Approaching daily limit: ${formatDollars(projectedDaily)} of ${formatDollars(budget.maxPerDayCents)} (${Math.round(projectedDaily / budget.maxPerDayCents * 100)}%)`
|
|
6402
|
-
};
|
|
6403
|
-
}
|
|
6404
|
-
return { allowed: true };
|
|
6405
|
-
}
|
|
6406
|
-
function formatDollars(cents) {
|
|
6407
|
-
return `$${(cents / 100).toFixed(2)}`;
|
|
6408
|
-
}
|
|
6409
|
-
function formatTokens(tokens) {
|
|
6410
|
-
if (tokens >= 1e6)
|
|
6411
|
-
return `${(tokens / 1e6).toFixed(1)}M`;
|
|
6412
|
-
if (tokens >= 1000)
|
|
6413
|
-
return `${(tokens / 1000).toFixed(1)}K`;
|
|
6414
|
-
return String(tokens);
|
|
6415
|
-
}
|
|
6416
|
-
function formatCostsTerminal(summary) {
|
|
6417
|
-
const lines = [];
|
|
6418
|
-
lines.push("");
|
|
6419
|
-
lines.push(source_default.bold(` Cost Summary (${summary.period})`));
|
|
6420
|
-
lines.push("");
|
|
6421
|
-
lines.push(` Total: ${source_default.yellow(formatDollars(summary.totalCostCents))} (${formatTokens(summary.totalTokens)} tokens across ${summary.runCount} runs)`);
|
|
6422
|
-
lines.push(` Avg/run: ${source_default.yellow(formatDollars(summary.avgCostPerRun))}`);
|
|
6423
|
-
lines.push(` Est/month: ${source_default.yellow(formatDollars(summary.estimatedMonthlyCents))}`);
|
|
6424
|
-
const modelEntries = Object.entries(summary.byModel);
|
|
6425
|
-
if (modelEntries.length > 0) {
|
|
6426
|
-
lines.push("");
|
|
6427
|
-
lines.push(source_default.bold(" By Model"));
|
|
6428
|
-
lines.push(` ${"Model".padEnd(40)} ${"Cost".padEnd(12)} ${"Tokens".padEnd(12)} Runs`);
|
|
6429
|
-
lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(12)} ${"\u2500".repeat(12)} ${"\u2500".repeat(6)}`);
|
|
6430
|
-
for (const [model, data] of modelEntries) {
|
|
6431
|
-
lines.push(` ${model.padEnd(40)} ${formatDollars(data.costCents).padEnd(12)} ${formatTokens(data.tokens).padEnd(12)} ${data.runs}`);
|
|
6432
|
-
}
|
|
6433
|
-
}
|
|
6434
|
-
if (summary.byScenario.length > 0) {
|
|
6435
|
-
lines.push("");
|
|
6436
|
-
lines.push(source_default.bold(" Scenarios by Cost (most expensive first)"));
|
|
6437
|
-
lines.push(` ${"Scenario".padEnd(40)} ${"Total Cost".padEnd(12)} ${"Avg/Run".padEnd(12)} ${"Runs".padEnd(6)} Tokens`);
|
|
6438
|
-
lines.push(` ${"\u2500".repeat(40)} ${"\u2500".repeat(12)} ${"\u2500".repeat(12)} ${"\u2500".repeat(6)} ${"\u2500".repeat(10)}`);
|
|
6439
|
-
for (const s of summary.byScenario) {
|
|
6440
|
-
const label = s.name.length > 38 ? s.name.slice(0, 35) + "..." : s.name;
|
|
6441
|
-
const avgPerRun = s.runs > 0 ? s.costCents / s.runs : 0;
|
|
6442
|
-
lines.push(` ${label.padEnd(40)} ${formatDollars(s.costCents).padEnd(12)} ${formatDollars(avgPerRun).padEnd(12)} ${String(s.runs).padEnd(6)} ${formatTokens(s.tokens)}`);
|
|
6443
|
-
}
|
|
6444
|
-
}
|
|
6445
|
-
lines.push("");
|
|
6446
|
-
return lines.join(`
|
|
6447
|
-
`);
|
|
6448
|
-
}
|
|
6449
|
-
function formatCostsJSON(summary) {
|
|
6450
|
-
return JSON.stringify(summary, null, 2);
|
|
6451
|
-
}
|
|
6452
7231
|
// src/lib/watch.ts
|
|
6453
7232
|
import { watch } from "fs";
|
|
6454
7233
|
import { resolve } from "path";
|