@bbearai/ai-executor 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-WT22IQMS.mjs +175 -0
- package/dist/chunk-WT22IQMS.mjs.map +1 -0
- package/dist/cli.js +622 -129
- package/dist/cli.js.map +1 -1
- package/dist/index.d.mts +533 -8
- package/dist/index.d.ts +533 -8
- package/dist/index.js +1613 -131
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1411 -130
- package/dist/index.mjs.map +1 -1
- package/dist/report-generator-EVZEB33O.mjs +7 -0
- package/dist/report-generator-EVZEB33O.mjs.map +1 -0
- package/package.json +5 -1
package/dist/cli.js
CHANGED
|
@@ -28,10 +28,88 @@ var import_supabase_js = require("@supabase/supabase-js");
|
|
|
28
28
|
|
|
29
29
|
// src/runner.ts
|
|
30
30
|
var import_sdk = __toESM(require("@anthropic-ai/sdk"));
|
|
31
|
-
var import_zod = require("zod");
|
|
32
31
|
|
|
33
32
|
// src/browser.ts
|
|
34
33
|
var import_stagehand = require("@browserbasehq/stagehand");
|
|
34
|
+
|
|
35
|
+
// src/supabase-auth.ts
|
|
36
|
+
function extractProjectRef(supabaseUrl) {
|
|
37
|
+
const url = new URL(supabaseUrl);
|
|
38
|
+
const hostname = url.hostname;
|
|
39
|
+
const ref = hostname.split(".")[0];
|
|
40
|
+
return ref;
|
|
41
|
+
}
|
|
42
|
+
async function authenticateSupabase(auth) {
|
|
43
|
+
const url = `${auth.supabaseUrl.replace(/\/$/, "")}/auth/v1/token?grant_type=password`;
|
|
44
|
+
const response = await fetch(url, {
|
|
45
|
+
method: "POST",
|
|
46
|
+
headers: {
|
|
47
|
+
"Content-Type": "application/json",
|
|
48
|
+
"apikey": auth.anonKey
|
|
49
|
+
},
|
|
50
|
+
body: JSON.stringify({
|
|
51
|
+
email: auth.email,
|
|
52
|
+
password: auth.password
|
|
53
|
+
})
|
|
54
|
+
});
|
|
55
|
+
if (!response.ok) {
|
|
56
|
+
const body = await response.text().catch(() => "");
|
|
57
|
+
throw new Error(
|
|
58
|
+
`Supabase auth failed (${response.status}): ${body.slice(0, 200)}`
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
const session = await response.json();
|
|
62
|
+
if (!session.access_token) {
|
|
63
|
+
throw new Error("Supabase auth returned no access_token");
|
|
64
|
+
}
|
|
65
|
+
return session;
|
|
66
|
+
}
|
|
67
|
+
async function injectSupabaseAuth(page, auth, session) {
|
|
68
|
+
const ref = extractProjectRef(auth.supabaseUrl);
|
|
69
|
+
const storageKey = `sb-${ref}-auth-token`;
|
|
70
|
+
const storageValue = JSON.stringify({
|
|
71
|
+
access_token: session.access_token,
|
|
72
|
+
refresh_token: session.refresh_token,
|
|
73
|
+
expires_in: session.expires_in,
|
|
74
|
+
expires_at: session.expires_at,
|
|
75
|
+
token_type: session.token_type,
|
|
76
|
+
user: session.user
|
|
77
|
+
});
|
|
78
|
+
const currentUrl = page.url();
|
|
79
|
+
if (currentUrl === "about:blank" || !currentUrl) {
|
|
80
|
+
await page.goto(auth.supabaseUrl.replace(/\/$/, ""), {
|
|
81
|
+
waitUntil: "domcontentloaded",
|
|
82
|
+
timeoutMs: 1e4
|
|
83
|
+
}).catch(() => {
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
await page.evaluate(
|
|
87
|
+
({ key, value }) => {
|
|
88
|
+
localStorage.setItem(key, value);
|
|
89
|
+
},
|
|
90
|
+
{ key: storageKey, value: storageValue }
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
async function verifySupabaseSession(auth, accessToken) {
|
|
94
|
+
const url = `${auth.supabaseUrl.replace(/\/$/, "")}/auth/v1/user`;
|
|
95
|
+
const response = await fetch(url, {
|
|
96
|
+
headers: {
|
|
97
|
+
"Authorization": `Bearer ${accessToken}`,
|
|
98
|
+
"apikey": auth.anonKey
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
return response.ok;
|
|
102
|
+
}
|
|
103
|
+
async function performSupabaseAuth(page, auth) {
|
|
104
|
+
const session = await authenticateSupabase(auth);
|
|
105
|
+
await injectSupabaseAuth(page, auth, session);
|
|
106
|
+
const valid = await verifySupabaseSession(auth, session.access_token);
|
|
107
|
+
if (!valid) {
|
|
108
|
+
throw new Error("Supabase auth verification failed \u2014 session token rejected");
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// src/browser.ts
|
|
35
113
|
var DEFAULT_MODEL = "anthropic/claude-sonnet-4-20250514";
|
|
36
114
|
async function createStagehandSession(config, anthropicApiKey) {
|
|
37
115
|
const modelName = config.model ?? DEFAULT_MODEL;
|
|
@@ -44,6 +122,11 @@ async function createStagehandSession(config, anthropicApiKey) {
|
|
|
44
122
|
modelName,
|
|
45
123
|
apiKey: anthropicApiKey
|
|
46
124
|
},
|
|
125
|
+
// Bypass pino logger — its pino-pretty transport uses worker threads
|
|
126
|
+
// which fail in Vercel's serverless environment
|
|
127
|
+
logger: (msg) => {
|
|
128
|
+
if ((msg.level ?? 0) >= 40) console.warn("[Stagehand]", msg.message);
|
|
129
|
+
},
|
|
47
130
|
localBrowserLaunchOptions: config.provider === "local" ? {
|
|
48
131
|
headless: config.headless ?? true,
|
|
49
132
|
viewport
|
|
@@ -67,6 +150,21 @@ async function createStagehandSession(config, anthropicApiKey) {
|
|
|
67
150
|
}
|
|
68
151
|
};
|
|
69
152
|
}
|
|
153
|
+
async function suppressBugBearWidget(stagehand) {
|
|
154
|
+
try {
|
|
155
|
+
const ctx = stagehand.context;
|
|
156
|
+
if (ctx?.addInitScript) {
|
|
157
|
+
await ctx.addInitScript(() => {
|
|
158
|
+
window.__bugbear_suppress = true;
|
|
159
|
+
try {
|
|
160
|
+
localStorage.setItem("__bugbear_suppress", "true");
|
|
161
|
+
} catch {
|
|
162
|
+
}
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
} catch {
|
|
166
|
+
}
|
|
167
|
+
}
|
|
70
168
|
async function injectAuth(page, auth, stagehand) {
|
|
71
169
|
if (auth.type === "cookie") {
|
|
72
170
|
for (const c of auth.cookies) {
|
|
@@ -92,23 +190,27 @@ async function injectAuth(page, auth, stagehand) {
|
|
|
92
190
|
}, auth.items);
|
|
93
191
|
} else if (auth.type === "form-login") {
|
|
94
192
|
await performFormLogin(page, auth, stagehand);
|
|
193
|
+
} else if (auth.type === "supabase-native") {
|
|
194
|
+
await performSupabaseAuth(page, auth);
|
|
95
195
|
}
|
|
96
196
|
}
|
|
97
197
|
async function performFormLogin(page, auth, stagehand) {
|
|
98
198
|
await page.goto(auth.loginUrl, { waitUntil: "domcontentloaded" });
|
|
99
199
|
await page.waitForLoadState("networkidle", 15e3).catch(() => {
|
|
100
200
|
});
|
|
201
|
+
await fillLoginCredentials(page, auth);
|
|
101
202
|
if (stagehand) {
|
|
102
203
|
await stagehand.act(
|
|
103
|
-
|
|
104
|
-
)
|
|
204
|
+
"Click the login, sign-in, or submit button to submit the form."
|
|
205
|
+
).catch(() => {
|
|
206
|
+
});
|
|
105
207
|
} else {
|
|
106
|
-
await
|
|
208
|
+
await clickSubmitButton(page);
|
|
107
209
|
}
|
|
108
210
|
await page.waitForLoadState("networkidle", 15e3).catch(() => {
|
|
109
211
|
});
|
|
110
212
|
}
|
|
111
|
-
async function
|
|
213
|
+
async function fillLoginCredentials(page, auth) {
|
|
112
214
|
await page.waitForSelector(
|
|
113
215
|
'input[type="email"], input[type="text"][name*="email"], input[name*="user"], input[type="text"]',
|
|
114
216
|
{ timeout: 15e3 }
|
|
@@ -142,6 +244,8 @@ async function manualFormLogin(page, auth) {
|
|
|
142
244
|
} else {
|
|
143
245
|
throw new Error("Could not find password input on login page");
|
|
144
246
|
}
|
|
247
|
+
}
|
|
248
|
+
async function clickSubmitButton(page) {
|
|
145
249
|
const submitSelectors = [
|
|
146
250
|
'button[type="submit"]',
|
|
147
251
|
'input[type="submit"]'
|
|
@@ -166,21 +270,23 @@ async function generateRunSummary(anthropic, testTitle, steps, model) {
|
|
|
166
270
|
(s) => `Step ${s.stepNumber}: ${s.action}
|
|
167
271
|
Expected: ${s.expectedResult}
|
|
168
272
|
Actual: ${s.actualResult}
|
|
169
|
-
Result: ${s.passed ? "PASS" : "FAIL"} (confidence: ${Math.round(s.confidence * 100)}%)${s.error ? `
|
|
273
|
+
Result: ${s.skipped ? "SKIPPED" : s.passed ? "PASS" : "FAIL"} (confidence: ${Math.round(s.confidence * 100)}%)${s.error ? `
|
|
170
274
|
Error: ${s.error}` : ""}`
|
|
171
275
|
).join("\n\n");
|
|
172
|
-
const passCount = steps.filter((s) => s.passed).length;
|
|
173
|
-
const failCount = steps.filter((s) => !s.passed).length;
|
|
276
|
+
const passCount = steps.filter((s) => s.passed && !s.skipped).length;
|
|
277
|
+
const failCount = steps.filter((s) => !s.passed && !s.skipped).length;
|
|
278
|
+
const skipCount = steps.filter((s) => s.skipped).length;
|
|
279
|
+
const skipNote = skipCount > 0 ? " Some steps were skipped due to page state recovery \u2014 these are not failures, just steps that could not be executed." : "";
|
|
174
280
|
const response = await anthropic.messages.create({
|
|
175
281
|
model,
|
|
176
282
|
max_tokens: 512,
|
|
177
283
|
messages: [
|
|
178
284
|
{
|
|
179
285
|
role: "user",
|
|
180
|
-
content: `Summarize this AI test execution in 2-3 sentences. Focus on what was tested, what passed, and what failed (if anything)
|
|
286
|
+
content: `Summarize this AI test execution in 2-3 sentences. Focus on what was tested, what passed, and what failed (if anything).${skipNote} Be concise and factual.
|
|
181
287
|
|
|
182
288
|
Test: ${testTitle}
|
|
183
|
-
Results: ${passCount} passed, ${failCount} failed out of ${steps.length} steps
|
|
289
|
+
Results: ${passCount} passed, ${failCount} failed, ${skipCount} skipped out of ${steps.length} steps
|
|
184
290
|
|
|
185
291
|
${stepsText}`
|
|
186
292
|
}
|
|
@@ -189,7 +295,317 @@ ${stepsText}`
|
|
|
189
295
|
return response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
|
|
190
296
|
}
|
|
191
297
|
|
|
298
|
+
// src/vision-evaluator.ts
|
|
299
|
+
var DEFAULT_MODEL2 = "claude-sonnet-4-20250514";
|
|
300
|
+
async function evaluateStep(input) {
|
|
301
|
+
const model = input.model ?? DEFAULT_MODEL2;
|
|
302
|
+
const hintClause = input.evaluationHint ? `
|
|
303
|
+
EVALUATION HINT: ${input.evaluationHint}` : "";
|
|
304
|
+
const response = await input.anthropic.messages.create({
|
|
305
|
+
model,
|
|
306
|
+
max_tokens: 512,
|
|
307
|
+
messages: [
|
|
308
|
+
{
|
|
309
|
+
role: "user",
|
|
310
|
+
content: [
|
|
311
|
+
{
|
|
312
|
+
type: "text",
|
|
313
|
+
text: "BEFORE screenshot (page state before the action):"
|
|
314
|
+
},
|
|
315
|
+
{
|
|
316
|
+
type: "image",
|
|
317
|
+
source: {
|
|
318
|
+
type: "base64",
|
|
319
|
+
media_type: "image/png",
|
|
320
|
+
data: input.screenshotBefore.toString("base64")
|
|
321
|
+
}
|
|
322
|
+
},
|
|
323
|
+
{
|
|
324
|
+
type: "text",
|
|
325
|
+
text: "AFTER screenshot (page state after the action):"
|
|
326
|
+
},
|
|
327
|
+
{
|
|
328
|
+
type: "image",
|
|
329
|
+
source: {
|
|
330
|
+
type: "base64",
|
|
331
|
+
media_type: "image/png",
|
|
332
|
+
data: input.screenshotAfter.toString("base64")
|
|
333
|
+
}
|
|
334
|
+
},
|
|
335
|
+
{
|
|
336
|
+
type: "text",
|
|
337
|
+
text: `You are a QA test evaluator. Compare the BEFORE and AFTER screenshots to evaluate this test step.
|
|
338
|
+
|
|
339
|
+
ACTION PERFORMED: ${input.action}
|
|
340
|
+
EXPECTED RESULT: ${input.expectedResult}${hintClause}
|
|
341
|
+
|
|
342
|
+
Analyze the visual differences between the two screenshots and determine if the expected result was achieved.
|
|
343
|
+
|
|
344
|
+
Respond with ONLY a JSON object (no markdown, no explanation outside the JSON):
|
|
345
|
+
{
|
|
346
|
+
"passed": true/false,
|
|
347
|
+
"confidence": 0.0-1.0,
|
|
348
|
+
"actualResult": "Brief description of what actually changed between the screenshots"
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
Confidence guide:
|
|
352
|
+
- 0.95-1.0: Clearly achieved/not achieved, obvious visual evidence
|
|
353
|
+
- 0.8-0.94: Very likely, strong visual indicators
|
|
354
|
+
- 0.6-0.79: Probable but some ambiguity
|
|
355
|
+
- Below 0.6: Uncertain, hard to tell from screenshots alone`
|
|
356
|
+
}
|
|
357
|
+
]
|
|
358
|
+
}
|
|
359
|
+
]
|
|
360
|
+
});
|
|
361
|
+
const text = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
|
|
362
|
+
return parseEvaluation(text);
|
|
363
|
+
}
|
|
364
|
+
function parseEvaluation(text) {
|
|
365
|
+
try {
|
|
366
|
+
const parsed = JSON.parse(text.trim());
|
|
367
|
+
return validateEvaluation(parsed);
|
|
368
|
+
} catch {
|
|
369
|
+
const jsonMatch = text.match(/\{[\s\S]*"passed"[\s\S]*"confidence"[\s\S]*"actualResult"[\s\S]*\}/);
|
|
370
|
+
if (jsonMatch) {
|
|
371
|
+
try {
|
|
372
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
373
|
+
return validateEvaluation(parsed);
|
|
374
|
+
} catch {
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
return {
|
|
379
|
+
passed: false,
|
|
380
|
+
confidence: 0.3,
|
|
381
|
+
actualResult: `Vision evaluation returned unparseable response: ${text.slice(0, 200)}`
|
|
382
|
+
};
|
|
383
|
+
}
|
|
384
|
+
function validateEvaluation(parsed) {
|
|
385
|
+
return {
|
|
386
|
+
passed: typeof parsed.passed === "boolean" ? parsed.passed : false,
|
|
387
|
+
confidence: typeof parsed.confidence === "number" ? Math.max(0, Math.min(1, parsed.confidence)) : 0.5,
|
|
388
|
+
actualResult: typeof parsed.actualResult === "string" ? parsed.actualResult : "No description provided"
|
|
389
|
+
};
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// src/action-executor.ts
|
|
393
|
+
async function executeAction(page, stagehand, step) {
|
|
394
|
+
if (step.selector && step.actionType) {
|
|
395
|
+
try {
|
|
396
|
+
await executePlaywrightAction(page, step);
|
|
397
|
+
return { deterministic: true };
|
|
398
|
+
} catch (err) {
|
|
399
|
+
const fallbackResult = await executeStagehandAction(stagehand, step);
|
|
400
|
+
return {
|
|
401
|
+
deterministic: false,
|
|
402
|
+
error: fallbackResult.error ? `Playwright failed (${err instanceof Error ? err.message : String(err)}), Stagehand fallback also failed: ${fallbackResult.error}` : void 0
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
return executeStagehandAction(stagehand, step);
|
|
407
|
+
}
|
|
408
|
+
async function executePlaywrightAction(page, step) {
|
|
409
|
+
const { actionType, selector, value, waitMs } = step;
|
|
410
|
+
switch (actionType) {
|
|
411
|
+
case "click": {
|
|
412
|
+
const locator = page.locator(selector);
|
|
413
|
+
await locator.click();
|
|
414
|
+
break;
|
|
415
|
+
}
|
|
416
|
+
case "fill": {
|
|
417
|
+
const locator = page.locator(selector);
|
|
418
|
+
await locator.fill(value ?? "");
|
|
419
|
+
break;
|
|
420
|
+
}
|
|
421
|
+
case "select": {
|
|
422
|
+
await page.evaluate(
|
|
423
|
+
({ sel, val }) => {
|
|
424
|
+
const el = document.querySelector(sel);
|
|
425
|
+
if (!el) throw new Error(`Select element not found: ${sel}`);
|
|
426
|
+
el.value = val;
|
|
427
|
+
el.dispatchEvent(new Event("change", { bubbles: true }));
|
|
428
|
+
},
|
|
429
|
+
{ sel: selector, val: value ?? "" }
|
|
430
|
+
);
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
case "navigate": {
|
|
434
|
+
const url = value ?? selector ?? "";
|
|
435
|
+
if (!url) throw new Error("Navigate action requires a value or selector with the URL");
|
|
436
|
+
await page.goto(url, { waitUntil: "domcontentloaded", timeoutMs: 15e3 });
|
|
437
|
+
break;
|
|
438
|
+
}
|
|
439
|
+
case "scroll": {
|
|
440
|
+
await page.evaluate((sel) => {
|
|
441
|
+
const el = document.querySelector(sel);
|
|
442
|
+
if (el) el.scrollIntoView({ behavior: "smooth", block: "center" });
|
|
443
|
+
}, selector);
|
|
444
|
+
break;
|
|
445
|
+
}
|
|
446
|
+
case "wait": {
|
|
447
|
+
if (selector) {
|
|
448
|
+
await page.waitForSelector(selector, { timeout: waitMs ?? 1e4 });
|
|
449
|
+
} else if (waitMs) {
|
|
450
|
+
await page.waitForTimeout(waitMs);
|
|
451
|
+
}
|
|
452
|
+
break;
|
|
453
|
+
}
|
|
454
|
+
case "assert": {
|
|
455
|
+
break;
|
|
456
|
+
}
|
|
457
|
+
default: {
|
|
458
|
+
throw new Error(`Unknown actionType: ${actionType}`);
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
if (waitMs && actionType !== "wait") {
|
|
462
|
+
await page.waitForTimeout(waitMs);
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
async function executeStagehandAction(stagehand, step) {
|
|
466
|
+
try {
|
|
467
|
+
await stagehand.act(step.action);
|
|
468
|
+
return { deterministic: false };
|
|
469
|
+
} catch (err) {
|
|
470
|
+
return {
|
|
471
|
+
deterministic: false,
|
|
472
|
+
error: err instanceof Error ? err.message : String(err)
|
|
473
|
+
};
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// src/selector-discovery.ts
|
|
478
|
+
async function discoverSelector(page) {
|
|
479
|
+
try {
|
|
480
|
+
const result = await page.evaluate(() => {
|
|
481
|
+
const el = document.__bbLastClicked ?? document.activeElement;
|
|
482
|
+
if (!el || el === document.body || el === document.documentElement) return null;
|
|
483
|
+
const tagName = el.tagName?.toLowerCase() ?? "unknown";
|
|
484
|
+
const textContent = (el.textContent ?? "").trim().slice(0, 100);
|
|
485
|
+
let selector = "";
|
|
486
|
+
let strategy = "css-path";
|
|
487
|
+
const testId = el.getAttribute("data-testid") ?? el.getAttribute("data-test-id");
|
|
488
|
+
if (testId) {
|
|
489
|
+
selector = `[data-testid="${testId}"]`;
|
|
490
|
+
strategy = "data-testid";
|
|
491
|
+
} else if (el.id && !/^:r[0-9a-z]+:?$/.test(el.id) && !/^react-/.test(el.id)) {
|
|
492
|
+
selector = `#${el.id}`;
|
|
493
|
+
strategy = "id";
|
|
494
|
+
} else if (el.getAttribute("role")) {
|
|
495
|
+
const role = el.getAttribute("role");
|
|
496
|
+
const name = el.getAttribute("aria-label") ?? el.getAttribute("name") ?? "";
|
|
497
|
+
if (name) {
|
|
498
|
+
selector = `[role="${role}"][aria-label="${name}"]`;
|
|
499
|
+
strategy = "role";
|
|
500
|
+
} else {
|
|
501
|
+
selector = `[role="${role}"]`;
|
|
502
|
+
strategy = "role";
|
|
503
|
+
}
|
|
504
|
+
} else if (el.getAttribute("aria-label")) {
|
|
505
|
+
selector = `[aria-label="${el.getAttribute("aria-label")}"]`;
|
|
506
|
+
strategy = "aria-label";
|
|
507
|
+
} else {
|
|
508
|
+
const parts = [];
|
|
509
|
+
let current = el;
|
|
510
|
+
while (current && current !== document.body) {
|
|
511
|
+
let part = current.tagName.toLowerCase();
|
|
512
|
+
if (current.className && typeof current.className === "string") {
|
|
513
|
+
const classes = current.className.split(/\s+/).filter(
|
|
514
|
+
(c) => c && !c.startsWith("_") && c.length < 30
|
|
515
|
+
);
|
|
516
|
+
if (classes.length > 0) {
|
|
517
|
+
part += `.${classes[0]}`;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
parts.unshift(part);
|
|
521
|
+
current = current.parentElement;
|
|
522
|
+
if (parts.length >= 4) break;
|
|
523
|
+
}
|
|
524
|
+
selector = parts.join(" > ");
|
|
525
|
+
strategy = "css-path";
|
|
526
|
+
}
|
|
527
|
+
let suggestedActionType;
|
|
528
|
+
if (tagName === "button" || tagName === "a" || el.getAttribute("role") === "button") {
|
|
529
|
+
suggestedActionType = "click";
|
|
530
|
+
} else if (tagName === "input" || tagName === "textarea") {
|
|
531
|
+
const type = el.getAttribute("type") ?? "text";
|
|
532
|
+
if (type === "checkbox" || type === "radio") {
|
|
533
|
+
suggestedActionType = "click";
|
|
534
|
+
} else {
|
|
535
|
+
suggestedActionType = "fill";
|
|
536
|
+
}
|
|
537
|
+
} else if (tagName === "select") {
|
|
538
|
+
suggestedActionType = "select";
|
|
539
|
+
}
|
|
540
|
+
return { selector, strategy, suggestedActionType, tagName, textContent };
|
|
541
|
+
});
|
|
542
|
+
return result;
|
|
543
|
+
} catch {
|
|
544
|
+
return null;
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
async function installClickTracker(page) {
|
|
548
|
+
try {
|
|
549
|
+
await page.evaluate(() => {
|
|
550
|
+
document.addEventListener("click", (e) => {
|
|
551
|
+
document.__bbLastClicked = e.target;
|
|
552
|
+
}, { capture: true });
|
|
553
|
+
});
|
|
554
|
+
} catch {
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
// src/cost.ts
|
|
559
|
+
var TOKEN_PROFILE = {
|
|
560
|
+
/** act() — screenshot + DOM context → action decision */
|
|
561
|
+
actInput: 2e3,
|
|
562
|
+
actOutput: 200,
|
|
563
|
+
/** extract() — screenshot + extraction schema → structured result */
|
|
564
|
+
extractInput: 3e3,
|
|
565
|
+
extractOutput: 500,
|
|
566
|
+
/** summary — all step results → narrative summary (once per run) */
|
|
567
|
+
summaryInput: 2e3,
|
|
568
|
+
summaryOutput: 500
|
|
569
|
+
};
|
|
570
|
+
function getTokenEstimate(stepCount) {
|
|
571
|
+
return {
|
|
572
|
+
inputTokens: stepCount * (TOKEN_PROFILE.actInput + TOKEN_PROFILE.extractInput) + TOKEN_PROFILE.summaryInput,
|
|
573
|
+
outputTokens: stepCount * (TOKEN_PROFILE.actOutput + TOKEN_PROFILE.extractOutput) + TOKEN_PROFILE.summaryOutput
|
|
574
|
+
};
|
|
575
|
+
}
|
|
576
|
+
|
|
192
577
|
// src/runner.ts
|
|
578
|
+
var AI_OPERATION_TIMEOUT_MS = 3e4;
|
|
579
|
+
var DEFAULT_MAX_RETRIES = 2;
|
|
580
|
+
var DEFAULT_RETRY_DELAY_MS = 2e3;
|
|
581
|
+
function isRetryableError(error) {
|
|
582
|
+
const patterns = [
|
|
583
|
+
/timed?\s*out/i,
|
|
584
|
+
/ECONNREFUSED/i,
|
|
585
|
+
/ECONNRESET/i,
|
|
586
|
+
/ENOTFOUND/i,
|
|
587
|
+
/net::ERR_/i,
|
|
588
|
+
/navigation failed/i,
|
|
589
|
+
/page crashed/i,
|
|
590
|
+
/context was destroyed/i,
|
|
591
|
+
/target closed/i,
|
|
592
|
+
/session closed/i,
|
|
593
|
+
/browser disconnected/i,
|
|
594
|
+
/execution context/i
|
|
595
|
+
];
|
|
596
|
+
return patterns.some((p) => p.test(error));
|
|
597
|
+
}
|
|
598
|
+
async function withTimeout(promise, timeoutMs, operation) {
|
|
599
|
+
let timeoutId;
|
|
600
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
601
|
+
timeoutId = setTimeout(() => reject(new Error(`${operation} timed out after ${timeoutMs}ms`)), timeoutMs);
|
|
602
|
+
});
|
|
603
|
+
try {
|
|
604
|
+
return await Promise.race([promise, timeoutPromise]);
|
|
605
|
+
} finally {
|
|
606
|
+
clearTimeout(timeoutId);
|
|
607
|
+
}
|
|
608
|
+
}
|
|
193
609
|
async function runTest(config) {
|
|
194
610
|
const anthropic = new import_sdk.default({ apiKey: config.anthropicApiKey });
|
|
195
611
|
const startTime = Date.now();
|
|
@@ -198,60 +614,71 @@ async function runTest(config) {
|
|
|
198
614
|
headless: true
|
|
199
615
|
};
|
|
200
616
|
config.onStatusChange?.("initializing");
|
|
201
|
-
|
|
202
|
-
const { stagehand, page } = session;
|
|
617
|
+
let session;
|
|
203
618
|
const stepResults = [];
|
|
204
619
|
let pendingConsoleLogs = [];
|
|
205
620
|
let pendingNetworkErrors = [];
|
|
206
621
|
let stepStartTime = Date.now();
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
const
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
pendingNetworkErrors.push({
|
|
223
|
-
method,
|
|
224
|
-
url: url.slice(0, 500),
|
|
225
|
-
status: 0,
|
|
226
|
-
statusText: failure?.errorText ?? "Request failed",
|
|
227
|
-
timestamp: Date.now() - stepStartTime
|
|
228
|
-
});
|
|
229
|
-
});
|
|
230
|
-
rawPage.on("response", (res) => {
|
|
231
|
-
const status = typeof res.status === "function" ? res.status() : Number(res.status ?? 0);
|
|
232
|
-
if (status >= 400) {
|
|
233
|
-
const url = typeof res.url === "function" ? res.url() : String(res.url ?? "");
|
|
234
|
-
const statusText = typeof res.statusText === "function" ? res.statusText() : String(res.statusText ?? "");
|
|
235
|
-
const req = typeof res.request === "function" ? res.request() : res.request;
|
|
236
|
-
const method = req ? typeof req.method === "function" ? req.method() : String(req.method ?? "GET") : "GET";
|
|
237
|
-
pendingNetworkErrors.push({
|
|
238
|
-
method,
|
|
239
|
-
url: url.slice(0, 500),
|
|
240
|
-
status,
|
|
241
|
-
statusText,
|
|
242
|
-
timestamp: Date.now() - stepStartTime
|
|
622
|
+
try {
|
|
623
|
+
session = await createStagehandSession(browserConfig, config.anthropicApiKey);
|
|
624
|
+
const { stagehand, page } = session;
|
|
625
|
+
await suppressBugBearWidget(stagehand);
|
|
626
|
+
const rawPage = page;
|
|
627
|
+
try {
|
|
628
|
+
rawPage.on("console", (msg) => {
|
|
629
|
+
const level = msg.type?.() ?? msg.type ?? "log";
|
|
630
|
+
const mappedLevel = level === "error" ? "error" : level === "warn" || level === "warning" ? "warning" : level === "info" ? "info" : level === "debug" ? "debug" : "log";
|
|
631
|
+
pendingConsoleLogs.push({
|
|
632
|
+
level: mappedLevel,
|
|
633
|
+
text: (typeof msg.text === "function" ? msg.text() : String(msg.text ?? msg)).slice(0, 2e3),
|
|
634
|
+
source: typeof msg.location === "function" ? msg.location()?.url : void 0,
|
|
635
|
+
timestamp: Date.now() - stepStartTime
|
|
636
|
+
});
|
|
243
637
|
});
|
|
638
|
+
} catch {
|
|
244
639
|
}
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
640
|
+
try {
|
|
641
|
+
rawPage.on("requestfailed", (req) => {
|
|
642
|
+
const url = typeof req.url === "function" ? req.url() : String(req.url ?? "");
|
|
643
|
+
const method = typeof req.method === "function" ? req.method() : String(req.method ?? "GET");
|
|
644
|
+
const failure = typeof req.failure === "function" ? req.failure() : req.failure;
|
|
645
|
+
pendingNetworkErrors.push({
|
|
646
|
+
method,
|
|
647
|
+
url: url.slice(0, 500),
|
|
648
|
+
status: 0,
|
|
649
|
+
statusText: failure?.errorText ?? "Request failed",
|
|
650
|
+
timestamp: Date.now() - stepStartTime
|
|
651
|
+
});
|
|
652
|
+
});
|
|
653
|
+
} catch {
|
|
654
|
+
}
|
|
655
|
+
try {
|
|
656
|
+
rawPage.on("response", (res) => {
|
|
657
|
+
const status = typeof res.status === "function" ? res.status() : Number(res.status ?? 0);
|
|
658
|
+
if (status >= 400) {
|
|
659
|
+
const url = typeof res.url === "function" ? res.url() : String(res.url ?? "");
|
|
660
|
+
const statusText = typeof res.statusText === "function" ? res.statusText() : String(res.statusText ?? "");
|
|
661
|
+
const req = typeof res.request === "function" ? res.request() : res.request;
|
|
662
|
+
const method = req ? typeof req.method === "function" ? req.method() : String(req.method ?? "GET") : "GET";
|
|
663
|
+
pendingNetworkErrors.push({
|
|
664
|
+
method,
|
|
665
|
+
url: url.slice(0, 500),
|
|
666
|
+
status,
|
|
667
|
+
statusText,
|
|
668
|
+
timestamp: Date.now() - stepStartTime
|
|
669
|
+
});
|
|
670
|
+
}
|
|
671
|
+
});
|
|
672
|
+
} catch {
|
|
673
|
+
}
|
|
674
|
+
if (config.auth?.type === "form-login" || config.auth?.type === "supabase-native") {
|
|
248
675
|
config.onStatusChange?.("authenticating");
|
|
249
676
|
await injectAuth(page, config.auth, stagehand);
|
|
250
677
|
}
|
|
251
678
|
config.onStatusChange?.("navigating");
|
|
252
679
|
const targetUrl = config.testCase.targetRoute ? `${config.targetUrl.replace(/\/$/, "")}${config.testCase.targetRoute}` : config.targetUrl;
|
|
253
680
|
await page.goto(targetUrl, { waitUntil: "domcontentloaded", timeoutMs: 3e4 });
|
|
254
|
-
if (config.auth && config.auth.type !== "form-login") {
|
|
681
|
+
if (config.auth && config.auth.type !== "form-login" && config.auth.type !== "supabase-native") {
|
|
255
682
|
config.onStatusChange?.("authenticating");
|
|
256
683
|
await injectAuth(page, config.auth, stagehand);
|
|
257
684
|
if (config.auth.type === "localStorage") {
|
|
@@ -265,79 +692,143 @@ async function runTest(config) {
|
|
|
265
692
|
}
|
|
266
693
|
await page.waitForLoadState("networkidle").catch(() => {
|
|
267
694
|
});
|
|
695
|
+
await page.evaluate(() => {
|
|
696
|
+
window.__bugbear_suppress = true;
|
|
697
|
+
try {
|
|
698
|
+
localStorage.setItem("__bugbear_suppress", "true");
|
|
699
|
+
} catch {
|
|
700
|
+
}
|
|
701
|
+
}).catch(() => {
|
|
702
|
+
});
|
|
703
|
+
await installClickTracker(page);
|
|
268
704
|
pendingConsoleLogs = [];
|
|
269
705
|
pendingNetworkErrors = [];
|
|
270
706
|
config.onStatusChange?.("executing");
|
|
271
707
|
const steps = config.testCase.steps;
|
|
708
|
+
const maxRetries = config.retry?.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
709
|
+
const retryDelayMs = config.retry?.retryDelayMs ?? DEFAULT_RETRY_DELAY_MS;
|
|
710
|
+
const resilientMode = config.resilientMode ?? true;
|
|
272
711
|
for (let i = 0; i < steps.length; i++) {
|
|
273
712
|
const step = steps[i];
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
actSucceeded =
|
|
284
|
-
await page
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
713
|
+
const retryHistory = [];
|
|
714
|
+
let finalResult;
|
|
715
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
716
|
+
stepStartTime = Date.now();
|
|
717
|
+
pendingConsoleLogs = [];
|
|
718
|
+
pendingNetworkErrors = [];
|
|
719
|
+
const screenshotBefore = await page.screenshot({ type: "png" });
|
|
720
|
+
let error;
|
|
721
|
+
let screenshotAfter = screenshotBefore;
|
|
722
|
+
let actSucceeded = false;
|
|
723
|
+
const actionResult = await executeAction(page, stagehand, step);
|
|
724
|
+
error = actionResult.error;
|
|
725
|
+
actSucceeded = !error;
|
|
726
|
+
if (actSucceeded) {
|
|
727
|
+
await page.waitForLoadState("networkidle").catch(() => {
|
|
728
|
+
});
|
|
729
|
+
await page.waitForTimeout(step.waitMs ?? 500);
|
|
730
|
+
}
|
|
290
731
|
screenshotAfter = await page.screenshot({ type: "png" }).catch(() => screenshotBefore);
|
|
732
|
+
let evaluation = {
|
|
733
|
+
passed: false,
|
|
734
|
+
confidence: 0,
|
|
735
|
+
actualResult: error ?? "Action execution failed"
|
|
736
|
+
};
|
|
737
|
+
if (actSucceeded) {
|
|
738
|
+
try {
|
|
739
|
+
const visionResult = await withTimeout(
|
|
740
|
+
evaluateStep({
|
|
741
|
+
anthropic,
|
|
742
|
+
screenshotBefore,
|
|
743
|
+
screenshotAfter,
|
|
744
|
+
action: step.action,
|
|
745
|
+
expectedResult: step.expectedResult,
|
|
746
|
+
evaluationHint: step.evaluationHint,
|
|
747
|
+
model: config.model
|
|
748
|
+
}),
|
|
749
|
+
AI_OPERATION_TIMEOUT_MS,
|
|
750
|
+
"Vision evaluation"
|
|
751
|
+
);
|
|
752
|
+
evaluation = {
|
|
753
|
+
passed: visionResult.passed,
|
|
754
|
+
confidence: visionResult.confidence,
|
|
755
|
+
actualResult: visionResult.actualResult
|
|
756
|
+
};
|
|
757
|
+
} catch (evalErr) {
|
|
758
|
+
evaluation = {
|
|
759
|
+
passed: false,
|
|
760
|
+
confidence: 0.2,
|
|
761
|
+
actualResult: `Vision evaluation error: ${evalErr instanceof Error ? evalErr.message : String(evalErr)}`
|
|
762
|
+
};
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
let discoveredActions = [];
|
|
766
|
+
if (actSucceeded && !actionResult.deterministic) {
|
|
767
|
+
const discovered = await discoverSelector(page);
|
|
768
|
+
if (discovered) {
|
|
769
|
+
discoveredActions = [{
|
|
770
|
+
type: discovered.suggestedActionType ?? "click",
|
|
771
|
+
selector: discovered.selector,
|
|
772
|
+
description: `Discovered via ${discovered.strategy}: ${discovered.tagName}${discovered.textContent ? ` "${discovered.textContent.slice(0, 50)}"` : ""}`
|
|
773
|
+
}];
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
const consoleLogs = pendingConsoleLogs.slice(0, 50);
|
|
777
|
+
const networkErrors = pendingNetworkErrors.slice(0, 30);
|
|
778
|
+
finalResult = {
|
|
779
|
+
stepNumber: step.stepNumber,
|
|
780
|
+
action: step.action,
|
|
781
|
+
expectedResult: step.expectedResult,
|
|
782
|
+
actualResult: evaluation.actualResult,
|
|
783
|
+
passed: evaluation.passed,
|
|
784
|
+
confidence: evaluation.confidence,
|
|
785
|
+
screenshotBefore,
|
|
786
|
+
screenshotAfter,
|
|
787
|
+
actionsTaken: discoveredActions,
|
|
788
|
+
error,
|
|
789
|
+
durationMs: Date.now() - stepStartTime,
|
|
790
|
+
consoleLogs,
|
|
791
|
+
networkErrors,
|
|
792
|
+
retryCount: attempt,
|
|
793
|
+
retryHistory,
|
|
794
|
+
skipped: false
|
|
795
|
+
};
|
|
796
|
+
const shouldRetry = !evaluation.passed && error && isRetryableError(error) && attempt < maxRetries;
|
|
797
|
+
if (!shouldRetry) break;
|
|
798
|
+
retryHistory.push({
|
|
799
|
+
attempt,
|
|
800
|
+
error,
|
|
801
|
+
confidence: evaluation.confidence,
|
|
802
|
+
timestamp: Date.now()
|
|
803
|
+
});
|
|
804
|
+
await new Promise((resolve) => setTimeout(resolve, retryDelayMs));
|
|
291
805
|
}
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
actualResult: error ?? "Action execution failed"
|
|
296
|
-
};
|
|
297
|
-
if (actSucceeded) {
|
|
806
|
+
if (resilientMode && finalResult && !finalResult.passed) {
|
|
807
|
+
finalResult.skipped = true;
|
|
808
|
+
finalResult.skipReason = "Step failed, recovered page state";
|
|
298
809
|
try {
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
actualResult: import_zod.z.string().describe("Description of what actually happened on the page")
|
|
810
|
+
config.onStatusChange?.("navigating");
|
|
811
|
+
await page.goto(targetUrl, { waitUntil: "domcontentloaded", timeoutMs: 3e4 });
|
|
812
|
+
await page.waitForLoadState("networkidle").catch(() => {
|
|
303
813
|
});
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
};
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
};
|
|
814
|
+
await installClickTracker(page);
|
|
815
|
+
await page.evaluate(() => {
|
|
816
|
+
window.__bugbear_suppress = true;
|
|
817
|
+
try {
|
|
818
|
+
localStorage.setItem("__bugbear_suppress", "true");
|
|
819
|
+
} catch {
|
|
820
|
+
}
|
|
821
|
+
}).catch(() => {
|
|
822
|
+
});
|
|
823
|
+
pendingConsoleLogs = [];
|
|
824
|
+
pendingNetworkErrors = [];
|
|
825
|
+
config.onStatusChange?.("executing");
|
|
826
|
+
} catch (recoveryErr) {
|
|
827
|
+
finalResult.skipReason = `Step failed, recovery also failed: ${recoveryErr instanceof Error ? recoveryErr.message : String(recoveryErr)}`;
|
|
319
828
|
}
|
|
320
829
|
}
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
const result = {
|
|
324
|
-
stepNumber: step.stepNumber,
|
|
325
|
-
action: step.action,
|
|
326
|
-
expectedResult: step.expectedResult,
|
|
327
|
-
actualResult: evaluation.actualResult,
|
|
328
|
-
passed: evaluation.passed,
|
|
329
|
-
confidence: evaluation.confidence,
|
|
330
|
-
screenshotBefore,
|
|
331
|
-
screenshotAfter,
|
|
332
|
-
actionsTaken: [],
|
|
333
|
-
// Stagehand handles actions internally
|
|
334
|
-
error,
|
|
335
|
-
durationMs: Date.now() - stepStartTime,
|
|
336
|
-
consoleLogs,
|
|
337
|
-
networkErrors
|
|
338
|
-
};
|
|
339
|
-
stepResults.push(result);
|
|
340
|
-
config.onStepComplete?.(result, i, steps.length);
|
|
830
|
+
stepResults.push(finalResult);
|
|
831
|
+
config.onStepComplete?.(finalResult, i, steps.length);
|
|
341
832
|
}
|
|
342
833
|
config.onStatusChange?.("completed");
|
|
343
834
|
const model = config.model ?? "claude-sonnet-4-20250514";
|
|
@@ -351,11 +842,7 @@ async function runTest(config) {
|
|
|
351
842
|
totalDurationMs: Date.now() - startTime,
|
|
352
843
|
summary,
|
|
353
844
|
screenshotUrls: [],
|
|
354
|
-
tokenUsage:
|
|
355
|
-
// Stagehand tracks tokens internally; these are approximate
|
|
356
|
-
inputTokens: steps.length * 3e3,
|
|
357
|
-
outputTokens: steps.length * 500
|
|
358
|
-
},
|
|
845
|
+
tokenUsage: getTokenEstimate(steps.length),
|
|
359
846
|
browserSessionId: session.sessionId
|
|
360
847
|
};
|
|
361
848
|
} catch (err) {
|
|
@@ -367,23 +854,29 @@ async function runTest(config) {
|
|
|
367
854
|
totalDurationMs: Date.now() - startTime,
|
|
368
855
|
summary: `Test execution failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
369
856
|
screenshotUrls: [],
|
|
370
|
-
tokenUsage:
|
|
371
|
-
|
|
372
|
-
outputTokens: stepResults.length * 500
|
|
373
|
-
},
|
|
374
|
-
browserSessionId: session.sessionId
|
|
857
|
+
tokenUsage: getTokenEstimate(stepResults.length),
|
|
858
|
+
browserSessionId: session?.sessionId ?? "unknown"
|
|
375
859
|
};
|
|
376
860
|
} finally {
|
|
377
|
-
|
|
861
|
+
if (session?.page) {
|
|
862
|
+
const rawPage = session.page;
|
|
863
|
+
rawPage.removeAllListeners?.("console");
|
|
864
|
+
rawPage.removeAllListeners?.("requestfailed");
|
|
865
|
+
rawPage.removeAllListeners?.("response");
|
|
866
|
+
}
|
|
867
|
+
await session?.close();
|
|
378
868
|
}
|
|
379
869
|
}
|
|
380
870
|
function determineOverallResult(steps) {
|
|
381
871
|
if (steps.length === 0) return "error";
|
|
382
|
-
const
|
|
383
|
-
const
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
872
|
+
const nonSkipped = steps.filter((s) => !s.skipped);
|
|
873
|
+
const skippedCount = steps.length - nonSkipped.length;
|
|
874
|
+
if (nonSkipped.length === 0) return "error";
|
|
875
|
+
const allNonSkippedPassed = nonSkipped.every((s) => s.passed);
|
|
876
|
+
const hasErrors = nonSkipped.some((s) => s.error);
|
|
877
|
+
if (skippedCount > 0 && allNonSkippedPassed) return "passed_with_skips";
|
|
878
|
+
if (allNonSkippedPassed) return "passed";
|
|
879
|
+
if (nonSkipped.every((s) => !s.passed) || hasErrors) return "failed";
|
|
387
880
|
return "partial";
|
|
388
881
|
}
|
|
389
882
|
|