@bbearai/ai-executor 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,6 +5,9 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
6
  var __getProtoOf = Object.getPrototypeOf;
7
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __esm = (fn, res) => function __init() {
9
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
10
+ };
8
11
  var __export = (target, all) => {
9
12
  for (var name in all)
10
13
  __defProp(target, name, { get: all[name], enumerable: true });
@@ -27,22 +30,299 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
27
30
  ));
28
31
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
32
 
33
+ // src/report-generator.ts
34
+ var report_generator_exports = {};
35
+ __export(report_generator_exports, {
36
+ generateExplorationReport: () => generateExplorationReport
37
+ });
38
+ async function generateExplorationReport(anthropic, input) {
39
+ const { projectName, featureDescription, targetUrl, actions, model } = input;
40
+ const findings = actions.filter((a) => a.category !== "normal");
41
+ const passed = actions.filter((a) => a.category === "normal");
42
+ const actionableFindings = findings.map((f) => ({
43
+ title: buildFindingTitle(f),
44
+ category: f.category,
45
+ severity: f.severity || "medium",
46
+ confidence: f.confidence,
47
+ networkRequests: f.networkRequests,
48
+ consoleErrors: f.consoleLogs.filter((l) => l.level === "error"),
49
+ domContext: f.domContext,
50
+ url: targetUrl,
51
+ route: extractRoute(targetUrl),
52
+ reproSteps: buildReproSteps(actions, f.actionNumber),
53
+ screenshotUrl: "",
54
+ // Filled in by API route after upload
55
+ actionPerformed: f.action,
56
+ expectedBehavior: "Normal application behavior",
57
+ actualBehavior: f.description
58
+ }));
59
+ const tested = passed.map((a) => ({
60
+ description: a.action,
61
+ route: extractRoute(targetUrl),
62
+ status: "passed"
63
+ }));
64
+ const notTested = detectUntestable(actions);
65
+ const summaryResponse = await anthropic.messages.create({
66
+ model,
67
+ max_tokens: 500,
68
+ messages: [
69
+ {
70
+ role: "user",
71
+ content: `Summarize this exploratory QA session in 2-3 sentences.
72
+
73
+ Feature tested: "${featureDescription}"
74
+ URL: ${targetUrl}
75
+ Actions performed: ${actions.length}
76
+ Findings: ${findings.length} (${findings.filter((f) => f.severity === "critical" || f.severity === "high").length} high/critical)
77
+ Passed checks: ${passed.length}
78
+
79
+ Finding details:
80
+ ${findings.map((f) => `- [${f.severity?.toUpperCase()}] ${f.category}: ${f.description}`).join("\n")}
81
+
82
+ Be concise and factual. Focus on what was tested and the most important findings.`
83
+ }
84
+ ]
85
+ });
86
+ const summary = summaryResponse.content[0].type === "text" ? summaryResponse.content[0].text : "Exploration complete.";
87
+ const suggestedPrompt = buildSuggestedPrompt(
88
+ featureDescription,
89
+ actionableFindings,
90
+ tested,
91
+ notTested
92
+ );
93
+ const totalDuration = actions.reduce((sum, a) => sum + a.durationMs, 0);
94
+ return {
95
+ report: {
96
+ projectName,
97
+ featureDescription,
98
+ targetUrl,
99
+ exploredAt: (/* @__PURE__ */ new Date()).toISOString(),
100
+ duration: `${Math.round(totalDuration / 1e3)}s`,
101
+ actionsUsed: actions.length,
102
+ actionBudget: actions.length,
103
+ findings: actionableFindings,
104
+ tested,
105
+ notTested,
106
+ summary,
107
+ suggestedPrompt
108
+ },
109
+ tokenUsage: {
110
+ inputTokens: summaryResponse.usage.input_tokens,
111
+ outputTokens: summaryResponse.usage.output_tokens
112
+ }
113
+ };
114
+ }
115
+ function buildFindingTitle(action) {
116
+ const prefix = {
117
+ console_error: "JS Error",
118
+ broken_interaction: "Broken",
119
+ visual_anomaly: "Visual",
120
+ input_handling: "Validation",
121
+ normal: ""
122
+ };
123
+ return `${prefix[action.category] || action.category}: ${action.description.slice(0, 80)}`;
124
+ }
125
+ function extractRoute(url) {
126
+ try {
127
+ return new URL(url).pathname;
128
+ } catch {
129
+ return url;
130
+ }
131
+ }
132
+ function buildReproSteps(allActions, targetActionNumber) {
133
+ return allActions.filter((a) => a.actionNumber <= targetActionNumber).map((a) => `${a.actionNumber}. ${a.action}`);
134
+ }
135
+ function detectUntestable(actions) {
136
+ const untestable = [];
137
+ const allText = actions.map((a) => `${a.action} ${a.description}`).join(" ").toLowerCase();
138
+ if (allText.includes("file upload") || allText.includes("drag and drop")) {
139
+ untestable.push({
140
+ description: "File upload functionality",
141
+ reason: "AI cannot interact with OS file dialogs"
142
+ });
143
+ }
144
+ if (allText.includes("captcha") || allText.includes("recaptcha")) {
145
+ untestable.push({
146
+ description: "CAPTCHA verification",
147
+ reason: "AI cannot solve CAPTCHAs"
148
+ });
149
+ }
150
+ if (allText.includes("disabled") || allText.includes("permission")) {
151
+ untestable.push({
152
+ description: "Permission-gated features",
153
+ reason: "Current auth may not have required permissions"
154
+ });
155
+ }
156
+ return untestable;
157
+ }
158
+ function buildSuggestedPrompt(featureDescription, findings, tested, notTested) {
159
+ if (findings.length === 0) {
160
+ return `Exploratory QA tested "${featureDescription}" with ${tested.length} interactions \u2014 no issues found.`;
161
+ }
162
+ let prompt = `Fix these ${findings.length} issue(s) found during exploratory QA testing of "${featureDescription}":
163
+
164
+ `;
165
+ findings.forEach((f, i) => {
166
+ prompt += `${i + 1}. [${f.severity.toUpperCase()}] ${f.title}
167
+ `;
168
+ if (f.consoleErrors.length > 0) {
169
+ prompt += ` Console: ${f.consoleErrors[0].text}
170
+ `;
171
+ if (f.consoleErrors[0].source) {
172
+ prompt += ` Source: ${f.consoleErrors[0].source}
173
+ `;
174
+ }
175
+ }
176
+ if (f.networkRequests.some((r) => r.status >= 400)) {
177
+ const failed = f.networkRequests.find((r) => r.status >= 400);
178
+ if (failed) {
179
+ prompt += ` API: ${failed.method} ${failed.url} \u2192 ${failed.status}
180
+ `;
181
+ if (failed.responseBody) {
182
+ prompt += ` Response: ${failed.responseBody.slice(0, 200)}
183
+ `;
184
+ }
185
+ }
186
+ }
187
+ if (f.domContext?.selector) {
188
+ prompt += ` Element: ${f.domContext.selector}
189
+ `;
190
+ }
191
+ prompt += ` Route: ${f.route}
192
+ `;
193
+ prompt += ` Repro: ${f.reproSteps.join(" \u2192 ")}
194
+
195
+ `;
196
+ });
197
+ if (notTested.length > 0) {
198
+ prompt += `Not tested (requires manual review):
199
+ `;
200
+ notTested.forEach((n) => {
201
+ prompt += `- ${n.description}: ${n.reason}
202
+ `;
203
+ });
204
+ }
205
+ return prompt.trim();
206
+ }
207
+ var init_report_generator = __esm({
208
+ "src/report-generator.ts"() {
209
+ "use strict";
210
+ }
211
+ });
212
+
30
213
  // src/index.ts
31
214
  var index_exports = {};
32
215
  __export(index_exports, {
216
+ Semaphore: () => Semaphore,
217
+ analyzeFailure: () => analyzeFailure,
218
+ authenticateSupabase: () => authenticateSupabase,
33
219
  createStagehandSession: () => createStagehandSession,
220
+ discoverSelector: () => discoverSelector,
221
+ estimateBatchCost: () => estimateBatchCost,
222
+ estimateCost: () => estimateCost,
223
+ estimateTestCost: () => estimateTestCost,
224
+ evaluateStep: () => evaluateStep,
225
+ executeAction: () => executeAction,
226
+ generateExplorationReport: () => generateExplorationReport,
34
227
  generateRunSummary: () => generateRunSummary,
228
+ getTokenEstimate: () => getTokenEstimate,
35
229
  injectAuth: () => injectAuth,
36
- runTest: () => runTest
230
+ injectSupabaseAuth: () => injectSupabaseAuth,
231
+ installClickTracker: () => installClickTracker,
232
+ performSupabaseAuth: () => performSupabaseAuth,
233
+ rollupFailureClassification: () => rollupFailureClassification,
234
+ runExploration: () => runExploration,
235
+ runTest: () => runTest,
236
+ suppressBugBearWidget: () => suppressBugBearWidget,
237
+ triageReport: () => triageReport,
238
+ verifySupabaseSession: () => verifySupabaseSession
37
239
  });
38
240
  module.exports = __toCommonJS(index_exports);
39
241
 
40
242
  // src/runner.ts
41
243
  var import_sdk = __toESM(require("@anthropic-ai/sdk"));
42
- var import_zod = require("zod");
43
244
 
44
245
  // src/browser.ts
45
246
  var import_stagehand = require("@browserbasehq/stagehand");
247
+
248
+ // src/supabase-auth.ts
249
+ function extractProjectRef(supabaseUrl) {
250
+ const url = new URL(supabaseUrl);
251
+ const hostname = url.hostname;
252
+ const ref = hostname.split(".")[0];
253
+ return ref;
254
+ }
255
+ async function authenticateSupabase(auth) {
256
+ const url = `${auth.supabaseUrl.replace(/\/$/, "")}/auth/v1/token?grant_type=password`;
257
+ const response = await fetch(url, {
258
+ method: "POST",
259
+ headers: {
260
+ "Content-Type": "application/json",
261
+ "apikey": auth.anonKey
262
+ },
263
+ body: JSON.stringify({
264
+ email: auth.email,
265
+ password: auth.password
266
+ })
267
+ });
268
+ if (!response.ok) {
269
+ const body = await response.text().catch(() => "");
270
+ throw new Error(
271
+ `Supabase auth failed (${response.status}): ${body.slice(0, 200)}`
272
+ );
273
+ }
274
+ const session = await response.json();
275
+ if (!session.access_token) {
276
+ throw new Error("Supabase auth returned no access_token");
277
+ }
278
+ return session;
279
+ }
280
+ async function injectSupabaseAuth(page, auth, session) {
281
+ const ref = extractProjectRef(auth.supabaseUrl);
282
+ const storageKey = `sb-${ref}-auth-token`;
283
+ const storageValue = JSON.stringify({
284
+ access_token: session.access_token,
285
+ refresh_token: session.refresh_token,
286
+ expires_in: session.expires_in,
287
+ expires_at: session.expires_at,
288
+ token_type: session.token_type,
289
+ user: session.user
290
+ });
291
+ const currentUrl = page.url();
292
+ if (currentUrl === "about:blank" || !currentUrl) {
293
+ await page.goto(auth.supabaseUrl.replace(/\/$/, ""), {
294
+ waitUntil: "domcontentloaded",
295
+ timeoutMs: 1e4
296
+ }).catch(() => {
297
+ });
298
+ }
299
+ await page.evaluate(
300
+ ({ key, value }) => {
301
+ localStorage.setItem(key, value);
302
+ },
303
+ { key: storageKey, value: storageValue }
304
+ );
305
+ }
306
+ async function verifySupabaseSession(auth, accessToken) {
307
+ const url = `${auth.supabaseUrl.replace(/\/$/, "")}/auth/v1/user`;
308
+ const response = await fetch(url, {
309
+ headers: {
310
+ "Authorization": `Bearer ${accessToken}`,
311
+ "apikey": auth.anonKey
312
+ }
313
+ });
314
+ return response.ok;
315
+ }
316
+ async function performSupabaseAuth(page, auth) {
317
+ const session = await authenticateSupabase(auth);
318
+ await injectSupabaseAuth(page, auth, session);
319
+ const valid = await verifySupabaseSession(auth, session.access_token);
320
+ if (!valid) {
321
+ throw new Error("Supabase auth verification failed \u2014 session token rejected");
322
+ }
323
+ }
324
+
325
+ // src/browser.ts
46
326
  var DEFAULT_MODEL = "anthropic/claude-sonnet-4-20250514";
47
327
  async function createStagehandSession(config, anthropicApiKey) {
48
328
  const modelName = config.model ?? DEFAULT_MODEL;
@@ -55,6 +335,11 @@ async function createStagehandSession(config, anthropicApiKey) {
55
335
  modelName,
56
336
  apiKey: anthropicApiKey
57
337
  },
338
+ // Bypass pino logger — its pino-pretty transport uses worker threads
339
+ // which fail in Vercel's serverless environment
340
+ logger: (msg) => {
341
+ if ((msg.level ?? 0) >= 40) console.warn("[Stagehand]", msg.message);
342
+ },
58
343
  localBrowserLaunchOptions: config.provider === "local" ? {
59
344
  headless: config.headless ?? true,
60
345
  viewport
@@ -78,6 +363,21 @@ async function createStagehandSession(config, anthropicApiKey) {
78
363
  }
79
364
  };
80
365
  }
366
+ async function suppressBugBearWidget(stagehand) {
367
+ try {
368
+ const ctx = stagehand.context;
369
+ if (ctx?.addInitScript) {
370
+ await ctx.addInitScript(() => {
371
+ window.__bugbear_suppress = true;
372
+ try {
373
+ localStorage.setItem("__bugbear_suppress", "true");
374
+ } catch {
375
+ }
376
+ });
377
+ }
378
+ } catch {
379
+ }
380
+ }
81
381
  async function injectAuth(page, auth, stagehand) {
82
382
  if (auth.type === "cookie") {
83
383
  for (const c of auth.cookies) {
@@ -103,23 +403,123 @@ async function injectAuth(page, auth, stagehand) {
103
403
  }, auth.items);
104
404
  } else if (auth.type === "form-login") {
105
405
  await performFormLogin(page, auth, stagehand);
406
+ } else if (auth.type === "supabase-native") {
407
+ await performSupabaseAuth(page, auth);
106
408
  }
107
409
  }
410
+ function createNetworkCapture(page) {
411
+ const requests = [];
412
+ const errors = [];
413
+ let active = false;
414
+ let startTimestamp = Date.now();
415
+ const onResponse = async (response) => {
416
+ if (!active) return;
417
+ const req = response.request();
418
+ const resourceType = typeof req.resourceType === "function" ? req.resourceType() : req.resourceType;
419
+ if (["image", "stylesheet", "font", "media"].includes(resourceType)) return;
420
+ const entry = {
421
+ method: typeof req.method === "function" ? req.method() : String(req.method),
422
+ url: (typeof response.url === "function" ? response.url() : String(response.url)).slice(0, 500),
423
+ status: typeof response.status === "function" ? response.status() : Number(response.status),
424
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
425
+ };
426
+ const status = entry.status;
427
+ if (status >= 400) {
428
+ try {
429
+ const body = await response.text();
430
+ entry.responseBody = body.slice(0, 500);
431
+ } catch {
432
+ }
433
+ errors.push({
434
+ method: entry.method,
435
+ url: entry.url,
436
+ status,
437
+ statusText: typeof response.statusText === "function" ? response.statusText() : String(response.statusText ?? ""),
438
+ timestamp: Date.now() - startTimestamp
439
+ });
440
+ }
441
+ if (["POST", "PUT", "PATCH"].includes(entry.method)) {
442
+ try {
443
+ const postData = typeof req.postData === "function" ? req.postData() : req.postData;
444
+ if (postData) entry.requestBody = String(postData).slice(0, 500);
445
+ } catch {
446
+ }
447
+ }
448
+ requests.push(entry);
449
+ };
450
+ const onRequestFailed = (req) => {
451
+ if (!active) return;
452
+ const url = typeof req.url === "function" ? req.url() : String(req.url ?? "");
453
+ const method = typeof req.method === "function" ? req.method() : String(req.method ?? "GET");
454
+ const failure = typeof req.failure === "function" ? req.failure() : req.failure;
455
+ errors.push({
456
+ method,
457
+ url: url.slice(0, 500),
458
+ status: 0,
459
+ statusText: failure?.errorText ?? "Request failed",
460
+ timestamp: Date.now() - startTimestamp
461
+ });
462
+ };
463
+ const rawPage = page;
464
+ let responseSupported = true;
465
+ let requestFailedSupported = true;
466
+ return {
467
+ start() {
468
+ active = true;
469
+ requests.length = 0;
470
+ errors.length = 0;
471
+ startTimestamp = Date.now();
472
+ if (responseSupported) {
473
+ try {
474
+ rawPage.on("response", onResponse);
475
+ } catch {
476
+ responseSupported = false;
477
+ }
478
+ }
479
+ if (requestFailedSupported) {
480
+ try {
481
+ rawPage.on("requestfailed", onRequestFailed);
482
+ } catch {
483
+ requestFailedSupported = false;
484
+ }
485
+ }
486
+ },
487
+ stop() {
488
+ active = false;
489
+ if (responseSupported) {
490
+ try {
491
+ rawPage.off("response", onResponse);
492
+ } catch {
493
+ }
494
+ }
495
+ if (requestFailedSupported) {
496
+ try {
497
+ rawPage.off("requestfailed", onRequestFailed);
498
+ } catch {
499
+ }
500
+ }
501
+ },
502
+ getRequests: () => [...requests],
503
+ getErrors: () => [...errors]
504
+ };
505
+ }
108
506
  async function performFormLogin(page, auth, stagehand) {
109
507
  await page.goto(auth.loginUrl, { waitUntil: "domcontentloaded" });
110
508
  await page.waitForLoadState("networkidle", 15e3).catch(() => {
111
509
  });
510
+ await fillLoginCredentials(page, auth);
112
511
  if (stagehand) {
113
512
  await stagehand.act(
114
- `Fill in the email/username field with "${auth.email}" and the password field with "${auth.password}", then click the login/sign-in button to submit the form.`
115
- );
513
+ "Click the login, sign-in, or submit button to submit the form."
514
+ ).catch(() => {
515
+ });
116
516
  } else {
117
- await manualFormLogin(page, auth);
517
+ await clickSubmitButton(page);
118
518
  }
119
519
  await page.waitForLoadState("networkidle", 15e3).catch(() => {
120
520
  });
121
521
  }
122
- async function manualFormLogin(page, auth) {
522
+ async function fillLoginCredentials(page, auth) {
123
523
  await page.waitForSelector(
124
524
  'input[type="email"], input[type="text"][name*="email"], input[name*="user"], input[type="text"]',
125
525
  { timeout: 15e3 }
@@ -153,6 +553,8 @@ async function manualFormLogin(page, auth) {
153
553
  } else {
154
554
  throw new Error("Could not find password input on login page");
155
555
  }
556
+ }
557
+ async function clickSubmitButton(page) {
156
558
  const submitSelectors = [
157
559
  'button[type="submit"]',
158
560
  'input[type="submit"]'
@@ -177,21 +579,23 @@ async function generateRunSummary(anthropic, testTitle, steps, model) {
177
579
  (s) => `Step ${s.stepNumber}: ${s.action}
178
580
  Expected: ${s.expectedResult}
179
581
  Actual: ${s.actualResult}
180
- Result: ${s.passed ? "PASS" : "FAIL"} (confidence: ${Math.round(s.confidence * 100)}%)${s.error ? `
582
+ Result: ${s.skipped ? "SKIPPED" : s.passed ? "PASS" : "FAIL"} (confidence: ${Math.round(s.confidence * 100)}%)${s.error ? `
181
583
  Error: ${s.error}` : ""}`
182
584
  ).join("\n\n");
183
- const passCount = steps.filter((s) => s.passed).length;
184
- const failCount = steps.filter((s) => !s.passed).length;
585
+ const passCount = steps.filter((s) => s.passed && !s.skipped).length;
586
+ const failCount = steps.filter((s) => !s.passed && !s.skipped).length;
587
+ const skipCount = steps.filter((s) => s.skipped).length;
588
+ const skipNote = skipCount > 0 ? " Some steps were skipped due to page state recovery \u2014 these are not failures, just steps that could not be executed." : "";
185
589
  const response = await anthropic.messages.create({
186
590
  model,
187
591
  max_tokens: 512,
188
592
  messages: [
189
593
  {
190
594
  role: "user",
191
- content: `Summarize this AI test execution in 2-3 sentences. Focus on what was tested, what passed, and what failed (if anything). Be concise and factual.
595
+ content: `Summarize this AI test execution in 2-3 sentences. Focus on what was tested, what passed, and what failed (if anything).${skipNote} Be concise and factual.
192
596
 
193
597
  Test: ${testTitle}
194
- Results: ${passCount} passed, ${failCount} failed out of ${steps.length} steps
598
+ Results: ${passCount} passed, ${failCount} failed, ${skipCount} skipped out of ${steps.length} steps
195
599
 
196
600
  ${stepsText}`
197
601
  }
@@ -200,7 +604,355 @@ ${stepsText}`
200
604
  return response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
201
605
  }
202
606
 
607
+ // src/vision-evaluator.ts
608
+ var DEFAULT_MODEL2 = "claude-sonnet-4-20250514";
609
+ async function evaluateStep(input) {
610
+ const model = input.model ?? DEFAULT_MODEL2;
611
+ const hintClause = input.evaluationHint ? `
612
+ EVALUATION HINT: ${input.evaluationHint}` : "";
613
+ const response = await input.anthropic.messages.create({
614
+ model,
615
+ max_tokens: 512,
616
+ messages: [
617
+ {
618
+ role: "user",
619
+ content: [
620
+ {
621
+ type: "text",
622
+ text: "BEFORE screenshot (page state before the action):"
623
+ },
624
+ {
625
+ type: "image",
626
+ source: {
627
+ type: "base64",
628
+ media_type: "image/png",
629
+ data: input.screenshotBefore.toString("base64")
630
+ }
631
+ },
632
+ {
633
+ type: "text",
634
+ text: "AFTER screenshot (page state after the action):"
635
+ },
636
+ {
637
+ type: "image",
638
+ source: {
639
+ type: "base64",
640
+ media_type: "image/png",
641
+ data: input.screenshotAfter.toString("base64")
642
+ }
643
+ },
644
+ {
645
+ type: "text",
646
+ text: `You are a QA test evaluator. Compare the BEFORE and AFTER screenshots to evaluate this test step.
647
+
648
+ ACTION PERFORMED: ${input.action}
649
+ EXPECTED RESULT: ${input.expectedResult}${hintClause}
650
+
651
+ Analyze the visual differences between the two screenshots and determine if the expected result was achieved.
652
+
653
+ Respond with ONLY a JSON object (no markdown, no explanation outside the JSON):
654
+ {
655
+ "passed": true/false,
656
+ "confidence": 0.0-1.0,
657
+ "actualResult": "Brief description of what actually changed between the screenshots"
658
+ }
659
+
660
+ Confidence guide:
661
+ - 0.95-1.0: Clearly achieved/not achieved, obvious visual evidence
662
+ - 0.8-0.94: Very likely, strong visual indicators
663
+ - 0.6-0.79: Probable but some ambiguity
664
+ - Below 0.6: Uncertain, hard to tell from screenshots alone`
665
+ }
666
+ ]
667
+ }
668
+ ]
669
+ });
670
+ const text = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
671
+ return parseEvaluation(text);
672
+ }
673
+ function parseEvaluation(text) {
674
+ try {
675
+ const parsed = JSON.parse(text.trim());
676
+ return validateEvaluation(parsed);
677
+ } catch {
678
+ const jsonMatch = text.match(/\{[\s\S]*"passed"[\s\S]*"confidence"[\s\S]*"actualResult"[\s\S]*\}/);
679
+ if (jsonMatch) {
680
+ try {
681
+ const parsed = JSON.parse(jsonMatch[0]);
682
+ return validateEvaluation(parsed);
683
+ } catch {
684
+ }
685
+ }
686
+ }
687
+ return {
688
+ passed: false,
689
+ confidence: 0.3,
690
+ actualResult: `Vision evaluation returned unparseable response: ${text.slice(0, 200)}`
691
+ };
692
+ }
693
+ function validateEvaluation(parsed) {
694
+ return {
695
+ passed: typeof parsed.passed === "boolean" ? parsed.passed : false,
696
+ confidence: typeof parsed.confidence === "number" ? Math.max(0, Math.min(1, parsed.confidence)) : 0.5,
697
+ actualResult: typeof parsed.actualResult === "string" ? parsed.actualResult : "No description provided"
698
+ };
699
+ }
700
+
701
+ // src/action-executor.ts
702
+ async function executeAction(page, stagehand, step) {
703
+ if (step.selector && step.actionType) {
704
+ try {
705
+ await executePlaywrightAction(page, step);
706
+ return { deterministic: true };
707
+ } catch (err) {
708
+ const fallbackResult = await executeStagehandAction(stagehand, step);
709
+ return {
710
+ deterministic: false,
711
+ error: fallbackResult.error ? `Playwright failed (${err instanceof Error ? err.message : String(err)}), Stagehand fallback also failed: ${fallbackResult.error}` : void 0
712
+ };
713
+ }
714
+ }
715
+ return executeStagehandAction(stagehand, step);
716
+ }
717
+ async function executePlaywrightAction(page, step) {
718
+ const { actionType, selector, value, waitMs } = step;
719
+ switch (actionType) {
720
+ case "click": {
721
+ const locator = page.locator(selector);
722
+ await locator.click();
723
+ break;
724
+ }
725
+ case "fill": {
726
+ const locator = page.locator(selector);
727
+ await locator.fill(value ?? "");
728
+ break;
729
+ }
730
+ case "select": {
731
+ await page.evaluate(
732
+ ({ sel, val }) => {
733
+ const el = document.querySelector(sel);
734
+ if (!el) throw new Error(`Select element not found: ${sel}`);
735
+ el.value = val;
736
+ el.dispatchEvent(new Event("change", { bubbles: true }));
737
+ },
738
+ { sel: selector, val: value ?? "" }
739
+ );
740
+ break;
741
+ }
742
+ case "navigate": {
743
+ const url = value ?? selector ?? "";
744
+ if (!url) throw new Error("Navigate action requires a value or selector with the URL");
745
+ await page.goto(url, { waitUntil: "domcontentloaded", timeoutMs: 15e3 });
746
+ break;
747
+ }
748
+ case "scroll": {
749
+ await page.evaluate((sel) => {
750
+ const el = document.querySelector(sel);
751
+ if (el) el.scrollIntoView({ behavior: "smooth", block: "center" });
752
+ }, selector);
753
+ break;
754
+ }
755
+ case "wait": {
756
+ if (selector) {
757
+ await page.waitForSelector(selector, { timeout: waitMs ?? 1e4 });
758
+ } else if (waitMs) {
759
+ await page.waitForTimeout(waitMs);
760
+ }
761
+ break;
762
+ }
763
+ case "assert": {
764
+ break;
765
+ }
766
+ default: {
767
+ throw new Error(`Unknown actionType: ${actionType}`);
768
+ }
769
+ }
770
+ if (waitMs && actionType !== "wait") {
771
+ await page.waitForTimeout(waitMs);
772
+ }
773
+ }
774
+ async function executeStagehandAction(stagehand, step) {
775
+ try {
776
+ await stagehand.act(step.action);
777
+ return { deterministic: false };
778
+ } catch (err) {
779
+ return {
780
+ deterministic: false,
781
+ error: err instanceof Error ? err.message : String(err)
782
+ };
783
+ }
784
+ }
785
+
786
+ // src/selector-discovery.ts
787
+ async function discoverSelector(page) {
788
+ try {
789
+ const result = await page.evaluate(() => {
790
+ const el = document.__bbLastClicked ?? document.activeElement;
791
+ if (!el || el === document.body || el === document.documentElement) return null;
792
+ const tagName = el.tagName?.toLowerCase() ?? "unknown";
793
+ const textContent = (el.textContent ?? "").trim().slice(0, 100);
794
+ let selector = "";
795
+ let strategy = "css-path";
796
+ const testId = el.getAttribute("data-testid") ?? el.getAttribute("data-test-id");
797
+ if (testId) {
798
+ selector = `[data-testid="${testId}"]`;
799
+ strategy = "data-testid";
800
+ } else if (el.id && !/^:r[0-9a-z]+:?$/.test(el.id) && !/^react-/.test(el.id)) {
801
+ selector = `#${el.id}`;
802
+ strategy = "id";
803
+ } else if (el.getAttribute("role")) {
804
+ const role = el.getAttribute("role");
805
+ const name = el.getAttribute("aria-label") ?? el.getAttribute("name") ?? "";
806
+ if (name) {
807
+ selector = `[role="${role}"][aria-label="${name}"]`;
808
+ strategy = "role";
809
+ } else {
810
+ selector = `[role="${role}"]`;
811
+ strategy = "role";
812
+ }
813
+ } else if (el.getAttribute("aria-label")) {
814
+ selector = `[aria-label="${el.getAttribute("aria-label")}"]`;
815
+ strategy = "aria-label";
816
+ } else {
817
+ const parts = [];
818
+ let current = el;
819
+ while (current && current !== document.body) {
820
+ let part = current.tagName.toLowerCase();
821
+ if (current.className && typeof current.className === "string") {
822
+ const classes = current.className.split(/\s+/).filter(
823
+ (c) => c && !c.startsWith("_") && c.length < 30
824
+ );
825
+ if (classes.length > 0) {
826
+ part += `.${classes[0]}`;
827
+ }
828
+ }
829
+ parts.unshift(part);
830
+ current = current.parentElement;
831
+ if (parts.length >= 4) break;
832
+ }
833
+ selector = parts.join(" > ");
834
+ strategy = "css-path";
835
+ }
836
+ let suggestedActionType;
837
+ if (tagName === "button" || tagName === "a" || el.getAttribute("role") === "button") {
838
+ suggestedActionType = "click";
839
+ } else if (tagName === "input" || tagName === "textarea") {
840
+ const type = el.getAttribute("type") ?? "text";
841
+ if (type === "checkbox" || type === "radio") {
842
+ suggestedActionType = "click";
843
+ } else {
844
+ suggestedActionType = "fill";
845
+ }
846
+ } else if (tagName === "select") {
847
+ suggestedActionType = "select";
848
+ }
849
+ return { selector, strategy, suggestedActionType, tagName, textContent };
850
+ });
851
+ return result;
852
+ } catch {
853
+ return null;
854
+ }
855
+ }
856
+ async function installClickTracker(page) {
857
+ try {
858
+ await page.evaluate(() => {
859
+ document.addEventListener("click", (e) => {
860
+ document.__bbLastClicked = e.target;
861
+ }, { capture: true });
862
+ });
863
+ } catch {
864
+ }
865
+ }
866
+
867
+ // src/cost.ts
868
+ var MODEL_PRICING = {
869
+ "claude-sonnet-4-20250514": { input: 3, output: 15 },
870
+ "claude-haiku-4-20250514": { input: 0.8, output: 4 },
871
+ "claude-opus-4-20250514": { input: 15, output: 75 },
872
+ // Aliases
873
+ "sonnet": { input: 3, output: 15 },
874
+ "haiku": { input: 0.8, output: 4 },
875
+ "opus": { input: 15, output: 75 }
876
+ };
877
+ var DEFAULT_MODEL3 = "claude-sonnet-4-20250514";
878
+ var TOKEN_PROFILE = {
879
+ /** act() — screenshot + DOM context → action decision */
880
+ actInput: 2e3,
881
+ actOutput: 200,
882
+ /** extract() — screenshot + extraction schema → structured result */
883
+ extractInput: 3e3,
884
+ extractOutput: 500,
885
+ /** summary — all step results → narrative summary (once per run) */
886
+ summaryInput: 2e3,
887
+ summaryOutput: 500
888
+ };
889
+ function estimateCost(inputTokens, outputTokens, model) {
890
+ const resolvedModel = model ?? DEFAULT_MODEL3;
891
+ const pricing = MODEL_PRICING[resolvedModel] ?? MODEL_PRICING[DEFAULT_MODEL3];
892
+ const inputCost = inputTokens / 1e6 * pricing.input;
893
+ const outputCost = outputTokens / 1e6 * pricing.output;
894
+ const totalDollars = inputCost + outputCost;
895
+ const cents = Math.round(totalDollars * 100 * 100) / 100;
896
+ return {
897
+ cents,
898
+ formatted: `$${totalDollars.toFixed(4)}`,
899
+ tokens: { inputTokens, outputTokens },
900
+ model: resolvedModel
901
+ };
902
+ }
903
+ function estimateTestCost(stepCount, model) {
904
+ const inputTokens = stepCount * (TOKEN_PROFILE.actInput + TOKEN_PROFILE.extractInput) + TOKEN_PROFILE.summaryInput;
905
+ const outputTokens = stepCount * (TOKEN_PROFILE.actOutput + TOKEN_PROFILE.extractOutput) + TOKEN_PROFILE.summaryOutput;
906
+ return estimateCost(inputTokens, outputTokens, model);
907
+ }
908
+ function estimateBatchCost(testCases, model) {
909
+ let totalInput = 0;
910
+ let totalOutput = 0;
911
+ for (const tc of testCases) {
912
+ totalInput += tc.stepCount * (TOKEN_PROFILE.actInput + TOKEN_PROFILE.extractInput) + TOKEN_PROFILE.summaryInput;
913
+ totalOutput += tc.stepCount * (TOKEN_PROFILE.actOutput + TOKEN_PROFILE.extractOutput) + TOKEN_PROFILE.summaryOutput;
914
+ }
915
+ return estimateCost(totalInput, totalOutput, model);
916
+ }
917
+ function getTokenEstimate(stepCount) {
918
+ return {
919
+ inputTokens: stepCount * (TOKEN_PROFILE.actInput + TOKEN_PROFILE.extractInput) + TOKEN_PROFILE.summaryInput,
920
+ outputTokens: stepCount * (TOKEN_PROFILE.actOutput + TOKEN_PROFILE.extractOutput) + TOKEN_PROFILE.summaryOutput
921
+ };
922
+ }
923
+
203
924
  // src/runner.ts
925
+ var AI_OPERATION_TIMEOUT_MS = 3e4;
926
+ var DEFAULT_MAX_RETRIES = 2;
927
+ var DEFAULT_RETRY_DELAY_MS = 2e3;
928
+ function isRetryableError(error) {
929
+ const patterns = [
930
+ /timed?\s*out/i,
931
+ /ECONNREFUSED/i,
932
+ /ECONNRESET/i,
933
+ /ENOTFOUND/i,
934
+ /net::ERR_/i,
935
+ /navigation failed/i,
936
+ /page crashed/i,
937
+ /context was destroyed/i,
938
+ /target closed/i,
939
+ /session closed/i,
940
+ /browser disconnected/i,
941
+ /execution context/i
942
+ ];
943
+ return patterns.some((p) => p.test(error));
944
+ }
945
+ async function withTimeout(promise, timeoutMs, operation) {
946
+ let timeoutId;
947
+ const timeoutPromise = new Promise((_, reject) => {
948
+ timeoutId = setTimeout(() => reject(new Error(`${operation} timed out after ${timeoutMs}ms`)), timeoutMs);
949
+ });
950
+ try {
951
+ return await Promise.race([promise, timeoutPromise]);
952
+ } finally {
953
+ clearTimeout(timeoutId);
954
+ }
955
+ }
204
956
  async function runTest(config) {
205
957
  const anthropic = new import_sdk.default({ apiKey: config.anthropicApiKey });
206
958
  const startTime = Date.now();
@@ -209,60 +961,71 @@ async function runTest(config) {
209
961
  headless: true
210
962
  };
211
963
  config.onStatusChange?.("initializing");
212
- const session = await createStagehandSession(browserConfig, config.anthropicApiKey);
213
- const { stagehand, page } = session;
964
+ let session;
214
965
  const stepResults = [];
215
966
  let pendingConsoleLogs = [];
216
967
  let pendingNetworkErrors = [];
217
968
  let stepStartTime = Date.now();
218
- const rawPage = page;
219
- rawPage.on("console", (msg) => {
220
- const level = msg.type?.() ?? msg.type ?? "log";
221
- const mappedLevel = level === "error" ? "error" : level === "warn" || level === "warning" ? "warning" : level === "info" ? "info" : level === "debug" ? "debug" : "log";
222
- pendingConsoleLogs.push({
223
- level: mappedLevel,
224
- text: (typeof msg.text === "function" ? msg.text() : String(msg.text ?? msg)).slice(0, 2e3),
225
- source: typeof msg.location === "function" ? msg.location()?.url : void 0,
226
- timestamp: Date.now() - stepStartTime
227
- });
228
- });
229
- rawPage.on("requestfailed", (req) => {
230
- const url = typeof req.url === "function" ? req.url() : String(req.url ?? "");
231
- const method = typeof req.method === "function" ? req.method() : String(req.method ?? "GET");
232
- const failure = typeof req.failure === "function" ? req.failure() : req.failure;
233
- pendingNetworkErrors.push({
234
- method,
235
- url: url.slice(0, 500),
236
- status: 0,
237
- statusText: failure?.errorText ?? "Request failed",
238
- timestamp: Date.now() - stepStartTime
239
- });
240
- });
241
- rawPage.on("response", (res) => {
242
- const status = typeof res.status === "function" ? res.status() : Number(res.status ?? 0);
243
- if (status >= 400) {
244
- const url = typeof res.url === "function" ? res.url() : String(res.url ?? "");
245
- const statusText = typeof res.statusText === "function" ? res.statusText() : String(res.statusText ?? "");
246
- const req = typeof res.request === "function" ? res.request() : res.request;
247
- const method = req ? typeof req.method === "function" ? req.method() : String(req.method ?? "GET") : "GET";
248
- pendingNetworkErrors.push({
249
- method,
250
- url: url.slice(0, 500),
251
- status,
252
- statusText,
253
- timestamp: Date.now() - stepStartTime
969
+ try {
970
+ session = await createStagehandSession(browserConfig, config.anthropicApiKey);
971
+ const { stagehand, page } = session;
972
+ await suppressBugBearWidget(stagehand);
973
+ const rawPage = page;
974
+ try {
975
+ rawPage.on("console", (msg) => {
976
+ const level = msg.type?.() ?? msg.type ?? "log";
977
+ const mappedLevel = level === "error" ? "error" : level === "warn" || level === "warning" ? "warning" : level === "info" ? "info" : level === "debug" ? "debug" : "log";
978
+ pendingConsoleLogs.push({
979
+ level: mappedLevel,
980
+ text: (typeof msg.text === "function" ? msg.text() : String(msg.text ?? msg)).slice(0, 2e3),
981
+ source: typeof msg.location === "function" ? msg.location()?.url : void 0,
982
+ timestamp: Date.now() - stepStartTime
983
+ });
254
984
  });
985
+ } catch {
255
986
  }
256
- });
257
- try {
258
- if (config.auth?.type === "form-login") {
987
+ try {
988
+ rawPage.on("requestfailed", (req) => {
989
+ const url = typeof req.url === "function" ? req.url() : String(req.url ?? "");
990
+ const method = typeof req.method === "function" ? req.method() : String(req.method ?? "GET");
991
+ const failure = typeof req.failure === "function" ? req.failure() : req.failure;
992
+ pendingNetworkErrors.push({
993
+ method,
994
+ url: url.slice(0, 500),
995
+ status: 0,
996
+ statusText: failure?.errorText ?? "Request failed",
997
+ timestamp: Date.now() - stepStartTime
998
+ });
999
+ });
1000
+ } catch {
1001
+ }
1002
+ try {
1003
+ rawPage.on("response", (res) => {
1004
+ const status = typeof res.status === "function" ? res.status() : Number(res.status ?? 0);
1005
+ if (status >= 400) {
1006
+ const url = typeof res.url === "function" ? res.url() : String(res.url ?? "");
1007
+ const statusText = typeof res.statusText === "function" ? res.statusText() : String(res.statusText ?? "");
1008
+ const req = typeof res.request === "function" ? res.request() : res.request;
1009
+ const method = req ? typeof req.method === "function" ? req.method() : String(req.method ?? "GET") : "GET";
1010
+ pendingNetworkErrors.push({
1011
+ method,
1012
+ url: url.slice(0, 500),
1013
+ status,
1014
+ statusText,
1015
+ timestamp: Date.now() - stepStartTime
1016
+ });
1017
+ }
1018
+ });
1019
+ } catch {
1020
+ }
1021
+ if (config.auth?.type === "form-login" || config.auth?.type === "supabase-native") {
259
1022
  config.onStatusChange?.("authenticating");
260
1023
  await injectAuth(page, config.auth, stagehand);
261
1024
  }
262
1025
  config.onStatusChange?.("navigating");
263
1026
  const targetUrl = config.testCase.targetRoute ? `${config.targetUrl.replace(/\/$/, "")}${config.testCase.targetRoute}` : config.targetUrl;
264
1027
  await page.goto(targetUrl, { waitUntil: "domcontentloaded", timeoutMs: 3e4 });
265
- if (config.auth && config.auth.type !== "form-login") {
1028
+ if (config.auth && config.auth.type !== "form-login" && config.auth.type !== "supabase-native") {
266
1029
  config.onStatusChange?.("authenticating");
267
1030
  await injectAuth(page, config.auth, stagehand);
268
1031
  if (config.auth.type === "localStorage") {
@@ -276,79 +1039,143 @@ async function runTest(config) {
276
1039
  }
277
1040
  await page.waitForLoadState("networkidle").catch(() => {
278
1041
  });
1042
+ await page.evaluate(() => {
1043
+ window.__bugbear_suppress = true;
1044
+ try {
1045
+ localStorage.setItem("__bugbear_suppress", "true");
1046
+ } catch {
1047
+ }
1048
+ }).catch(() => {
1049
+ });
1050
+ await installClickTracker(page);
279
1051
  pendingConsoleLogs = [];
280
1052
  pendingNetworkErrors = [];
281
1053
  config.onStatusChange?.("executing");
282
1054
  const steps = config.testCase.steps;
1055
+ const maxRetries = config.retry?.maxRetries ?? DEFAULT_MAX_RETRIES;
1056
+ const retryDelayMs = config.retry?.retryDelayMs ?? DEFAULT_RETRY_DELAY_MS;
1057
+ const resilientMode = config.resilientMode ?? true;
283
1058
  for (let i = 0; i < steps.length; i++) {
284
1059
  const step = steps[i];
285
- stepStartTime = Date.now();
286
- pendingConsoleLogs = [];
287
- pendingNetworkErrors = [];
288
- const screenshotBefore = await page.screenshot({ type: "png" });
289
- let error;
290
- let screenshotAfter = screenshotBefore;
291
- let actSucceeded = false;
292
- try {
293
- await stagehand.act(step.action);
294
- actSucceeded = true;
295
- await page.waitForLoadState("networkidle").catch(() => {
296
- });
297
- await page.waitForTimeout(500);
298
- screenshotAfter = await page.screenshot({ type: "png" });
299
- } catch (err) {
300
- error = err instanceof Error ? err.message : String(err);
1060
+ const retryHistory = [];
1061
+ let finalResult;
1062
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
1063
+ stepStartTime = Date.now();
1064
+ pendingConsoleLogs = [];
1065
+ pendingNetworkErrors = [];
1066
+ const screenshotBefore = await page.screenshot({ type: "png" });
1067
+ let error;
1068
+ let screenshotAfter = screenshotBefore;
1069
+ let actSucceeded = false;
1070
+ const actionResult = await executeAction(page, stagehand, step);
1071
+ error = actionResult.error;
1072
+ actSucceeded = !error;
1073
+ if (actSucceeded) {
1074
+ await page.waitForLoadState("networkidle").catch(() => {
1075
+ });
1076
+ await page.waitForTimeout(step.waitMs ?? 500);
1077
+ }
301
1078
  screenshotAfter = await page.screenshot({ type: "png" }).catch(() => screenshotBefore);
1079
+ let evaluation = {
1080
+ passed: false,
1081
+ confidence: 0,
1082
+ actualResult: error ?? "Action execution failed"
1083
+ };
1084
+ if (actSucceeded) {
1085
+ try {
1086
+ const visionResult = await withTimeout(
1087
+ evaluateStep({
1088
+ anthropic,
1089
+ screenshotBefore,
1090
+ screenshotAfter,
1091
+ action: step.action,
1092
+ expectedResult: step.expectedResult,
1093
+ evaluationHint: step.evaluationHint,
1094
+ model: config.model
1095
+ }),
1096
+ AI_OPERATION_TIMEOUT_MS,
1097
+ "Vision evaluation"
1098
+ );
1099
+ evaluation = {
1100
+ passed: visionResult.passed,
1101
+ confidence: visionResult.confidence,
1102
+ actualResult: visionResult.actualResult
1103
+ };
1104
+ } catch (evalErr) {
1105
+ evaluation = {
1106
+ passed: false,
1107
+ confidence: 0.2,
1108
+ actualResult: `Vision evaluation error: ${evalErr instanceof Error ? evalErr.message : String(evalErr)}`
1109
+ };
1110
+ }
1111
+ }
1112
+ let discoveredActions = [];
1113
+ if (actSucceeded && !actionResult.deterministic) {
1114
+ const discovered = await discoverSelector(page);
1115
+ if (discovered) {
1116
+ discoveredActions = [{
1117
+ type: discovered.suggestedActionType ?? "click",
1118
+ selector: discovered.selector,
1119
+ description: `Discovered via ${discovered.strategy}: ${discovered.tagName}${discovered.textContent ? ` "${discovered.textContent.slice(0, 50)}"` : ""}`
1120
+ }];
1121
+ }
1122
+ }
1123
+ const consoleLogs = pendingConsoleLogs.slice(0, 50);
1124
+ const networkErrors = pendingNetworkErrors.slice(0, 30);
1125
+ finalResult = {
1126
+ stepNumber: step.stepNumber,
1127
+ action: step.action,
1128
+ expectedResult: step.expectedResult,
1129
+ actualResult: evaluation.actualResult,
1130
+ passed: evaluation.passed,
1131
+ confidence: evaluation.confidence,
1132
+ screenshotBefore,
1133
+ screenshotAfter,
1134
+ actionsTaken: discoveredActions,
1135
+ error,
1136
+ durationMs: Date.now() - stepStartTime,
1137
+ consoleLogs,
1138
+ networkErrors,
1139
+ retryCount: attempt,
1140
+ retryHistory,
1141
+ skipped: false
1142
+ };
1143
+ const shouldRetry = !evaluation.passed && error && isRetryableError(error) && attempt < maxRetries;
1144
+ if (!shouldRetry) break;
1145
+ retryHistory.push({
1146
+ attempt,
1147
+ error,
1148
+ confidence: evaluation.confidence,
1149
+ timestamp: Date.now()
1150
+ });
1151
+ await new Promise((resolve) => setTimeout(resolve, retryDelayMs));
302
1152
  }
303
- let evaluation = {
304
- passed: false,
305
- confidence: 0,
306
- actualResult: error ?? "Action execution failed"
307
- };
308
- if (actSucceeded) {
1153
+ if (resilientMode && finalResult && !finalResult.passed) {
1154
+ finalResult.skipped = true;
1155
+ finalResult.skipReason = "Step failed, recovered page state";
309
1156
  try {
310
- const verificationSchema = import_zod.z.object({
311
- passed: import_zod.z.boolean().describe("Whether the expected result was achieved"),
312
- confidence: import_zod.z.number().min(0).max(1).describe("Confidence in the assessment (0.9+ = very sure, 0.7-0.9 = likely, below 0.7 = uncertain)"),
313
- actualResult: import_zod.z.string().describe("Description of what actually happened on the page")
1157
+ config.onStatusChange?.("navigating");
1158
+ await page.goto(targetUrl, { waitUntil: "domcontentloaded", timeoutMs: 3e4 });
1159
+ await page.waitForLoadState("networkidle").catch(() => {
1160
+ });
1161
+ await installClickTracker(page);
1162
+ await page.evaluate(() => {
1163
+ window.__bugbear_suppress = true;
1164
+ try {
1165
+ localStorage.setItem("__bugbear_suppress", "true");
1166
+ } catch {
1167
+ }
1168
+ }).catch(() => {
314
1169
  });
315
- const verification = await stagehand.extract(
316
- `You are evaluating a QA test step. The action "${step.action}" was just performed. Check if this expected result was achieved: "${step.expectedResult}". Look at the current page state and describe what actually happened. Be precise and factual in your assessment.`,
317
- verificationSchema
318
- );
319
- evaluation = {
320
- passed: verification.passed,
321
- confidence: verification.confidence,
322
- actualResult: verification.actualResult
323
- };
324
- } catch (evalErr) {
325
- evaluation = {
326
- passed: false,
327
- confidence: 0.2,
328
- actualResult: `Verification error: ${evalErr instanceof Error ? evalErr.message : String(evalErr)}`
329
- };
1170
+ pendingConsoleLogs = [];
1171
+ pendingNetworkErrors = [];
1172
+ config.onStatusChange?.("executing");
1173
+ } catch (recoveryErr) {
1174
+ finalResult.skipReason = `Step failed, recovery also failed: ${recoveryErr instanceof Error ? recoveryErr.message : String(recoveryErr)}`;
330
1175
  }
331
1176
  }
332
- const consoleLogs = pendingConsoleLogs.slice(0, 50);
333
- const networkErrors = pendingNetworkErrors.slice(0, 30);
334
- const result = {
335
- stepNumber: step.stepNumber,
336
- action: step.action,
337
- expectedResult: step.expectedResult,
338
- actualResult: evaluation.actualResult,
339
- passed: evaluation.passed,
340
- confidence: evaluation.confidence,
341
- screenshotBefore,
342
- screenshotAfter,
343
- actionsTaken: [],
344
- // Stagehand handles actions internally
345
- error,
346
- durationMs: Date.now() - stepStartTime,
347
- consoleLogs,
348
- networkErrors
349
- };
350
- stepResults.push(result);
351
- config.onStepComplete?.(result, i, steps.length);
1177
+ stepResults.push(finalResult);
1178
+ config.onStepComplete?.(finalResult, i, steps.length);
352
1179
  }
353
1180
  config.onStatusChange?.("completed");
354
1181
  const model = config.model ?? "claude-sonnet-4-20250514";
@@ -362,11 +1189,7 @@ async function runTest(config) {
362
1189
  totalDurationMs: Date.now() - startTime,
363
1190
  summary,
364
1191
  screenshotUrls: [],
365
- tokenUsage: {
366
- // Stagehand tracks tokens internally; these are approximate
367
- inputTokens: steps.length * 3e3,
368
- outputTokens: steps.length * 500
369
- },
1192
+ tokenUsage: getTokenEstimate(steps.length),
370
1193
  browserSessionId: session.sessionId
371
1194
  };
372
1195
  } catch (err) {
@@ -378,30 +1201,689 @@ async function runTest(config) {
378
1201
  totalDurationMs: Date.now() - startTime,
379
1202
  summary: `Test execution failed: ${err instanceof Error ? err.message : String(err)}`,
380
1203
  screenshotUrls: [],
381
- tokenUsage: {
382
- inputTokens: stepResults.length * 3e3,
383
- outputTokens: stepResults.length * 500
384
- },
385
- browserSessionId: session.sessionId
1204
+ tokenUsage: getTokenEstimate(stepResults.length),
1205
+ browserSessionId: session?.sessionId ?? "unknown"
386
1206
  };
387
1207
  } finally {
388
- await session.close();
1208
+ if (session?.page) {
1209
+ const rawPage = session.page;
1210
+ rawPage.removeAllListeners?.("console");
1211
+ rawPage.removeAllListeners?.("requestfailed");
1212
+ rawPage.removeAllListeners?.("response");
1213
+ }
1214
+ await session?.close();
389
1215
  }
390
1216
  }
391
1217
  function determineOverallResult(steps) {
392
1218
  if (steps.length === 0) return "error";
393
- const allPassed = steps.every((s) => s.passed);
394
- const allFailed = steps.every((s) => !s.passed);
395
- const hasErrors = steps.some((s) => s.error);
396
- if (allPassed) return "passed";
397
- if (allFailed || hasErrors) return "failed";
1219
+ const nonSkipped = steps.filter((s) => !s.skipped);
1220
+ const skippedCount = steps.length - nonSkipped.length;
1221
+ if (nonSkipped.length === 0) return "error";
1222
+ const allNonSkippedPassed = nonSkipped.every((s) => s.passed);
1223
+ const hasErrors = nonSkipped.some((s) => s.error);
1224
+ if (skippedCount > 0 && allNonSkippedPassed) return "passed_with_skips";
1225
+ if (allNonSkippedPassed) return "passed";
1226
+ if (nonSkipped.every((s) => !s.passed) || hasErrors) return "failed";
398
1227
  return "partial";
399
1228
  }
1229
+
1230
+ // src/explorer.ts
1231
+ var import_sdk2 = __toESM(require("@anthropic-ai/sdk"));
1232
+ var DEFAULT_MODEL4 = "anthropic/claude-sonnet-4-20250514";
1233
+ var AI_OPERATION_TIMEOUT_MS2 = 6e4;
1234
+ async function withTimeout2(promise, timeoutMs, operation) {
1235
+ let timeoutId;
1236
+ const timeoutPromise = new Promise((_, reject) => {
1237
+ timeoutId = setTimeout(() => reject(new Error(`${operation} timed out after ${timeoutMs}ms`)), timeoutMs);
1238
+ });
1239
+ try {
1240
+ return await Promise.race([promise, timeoutPromise]);
1241
+ } finally {
1242
+ clearTimeout(timeoutId);
1243
+ }
1244
+ }
1245
+ async function runExploration(config) {
1246
+ const {
1247
+ targetUrl,
1248
+ featureDescription,
1249
+ actionBudget,
1250
+ auth,
1251
+ browserConfig,
1252
+ anthropicApiKey,
1253
+ model = DEFAULT_MODEL4,
1254
+ onActionComplete
1255
+ } = config;
1256
+ const anthropic = new import_sdk2.default({ apiKey: anthropicApiKey });
1257
+ const startTime = Date.now();
1258
+ const actions = [];
1259
+ let totalInputTokens = 0;
1260
+ let totalOutputTokens = 0;
1261
+ const session = await createStagehandSession(browserConfig, anthropicApiKey);
1262
+ const { stagehand, page } = session;
1263
+ await suppressBugBearWidget(stagehand);
1264
+ try {
1265
+ await page.goto(targetUrl, { waitUntil: "networkidle", timeoutMs: 3e4 });
1266
+ if (auth) {
1267
+ await injectAuth(page, auth, stagehand);
1268
+ await page.waitForLoadState("networkidle").catch(() => {
1269
+ });
1270
+ }
1271
+ const networkCapture = createNetworkCapture(page);
1272
+ let consoleLogs = [];
1273
+ let actionStartTime = Date.now();
1274
+ const rawPage = page;
1275
+ rawPage.on("console", (msg) => {
1276
+ const level = msg.type?.() ?? msg.type ?? "log";
1277
+ if (["error", "warning", "warn"].includes(level)) {
1278
+ consoleLogs.push({
1279
+ level: level === "warn" ? "warning" : level,
1280
+ text: (typeof msg.text === "function" ? msg.text() : String(msg.text ?? msg)).slice(0, 500),
1281
+ source: typeof msg.location === "function" ? msg.location()?.url : void 0,
1282
+ timestamp: Date.now() - actionStartTime
1283
+ });
1284
+ }
1285
+ });
1286
+ const actionLog = [];
1287
+ for (let i = 0; i < actionBudget; i++) {
1288
+ actionStartTime = Date.now();
1289
+ consoleLogs = [];
1290
+ const observations = await withTimeout2(
1291
+ stagehand.observe(),
1292
+ AI_OPERATION_TIMEOUT_MS2,
1293
+ "Page observation"
1294
+ );
1295
+ const decisionResponse = await withTimeout2(
1296
+ anthropic.messages.create({
1297
+ model: model.replace("anthropic/", ""),
1298
+ max_tokens: 300,
1299
+ system: buildDecisionPrompt(featureDescription, actionBudget - i, actionLog),
1300
+ messages: [
1301
+ {
1302
+ role: "user",
1303
+ content: `Current page URL: ${page.url()}
1304
+
1305
+ Visible interactive elements:
1306
+ ${formatObservations(observations)}
1307
+
1308
+ What single action should I perform next?`
1309
+ }
1310
+ ]
1311
+ }),
1312
+ AI_OPERATION_TIMEOUT_MS2,
1313
+ "Action decision"
1314
+ );
1315
+ const actionText = extractText(decisionResponse);
1316
+ totalInputTokens += decisionResponse.usage.input_tokens;
1317
+ totalOutputTokens += decisionResponse.usage.output_tokens;
1318
+ if (actionText.toLowerCase().includes("[done]") || actionText.toLowerCase().includes("no more actions")) {
1319
+ break;
1320
+ }
1321
+ const screenshotBefore = await page.screenshot({ type: "png" });
1322
+ networkCapture.start();
1323
+ try {
1324
+ await stagehand.act(actionText);
1325
+ } catch (actError) {
1326
+ networkCapture.stop();
1327
+ const screenshotAfter2 = await page.screenshot({ type: "png" });
1328
+ const action2 = {
1329
+ actionNumber: i + 1,
1330
+ action: actionText,
1331
+ category: "broken_interaction",
1332
+ severity: "medium",
1333
+ confidence: 0.9,
1334
+ description: `Action failed: ${actError instanceof Error ? actError.message : String(actError)}`,
1335
+ screenshotBefore,
1336
+ screenshotAfter: screenshotAfter2,
1337
+ networkRequests: networkCapture.getRequests(),
1338
+ consoleLogs: [...consoleLogs],
1339
+ durationMs: Date.now() - actionStartTime
1340
+ };
1341
+ actions.push(action2);
1342
+ actionLog.push(`[${i + 1}] ${actionText} -> FAILED: ${action2.description}`);
1343
+ onActionComplete?.(action2, i);
1344
+ continue;
1345
+ }
1346
+ await page.waitForLoadState("networkidle").catch(() => {
1347
+ });
1348
+ await page.waitForTimeout(500);
1349
+ networkCapture.stop();
1350
+ const screenshotAfter = await page.screenshot({ type: "png" });
1351
+ const capturedRequests = networkCapture.getRequests();
1352
+ const networkErrors = networkCapture.getErrors();
1353
+ const evalResponse = await withTimeout2(
1354
+ anthropic.messages.create({
1355
+ model: model.replace("anthropic/", ""),
1356
+ max_tokens: 400,
1357
+ system: buildEvaluationPrompt(),
1358
+ messages: [
1359
+ {
1360
+ role: "user",
1361
+ content: buildEvaluationContext(actionText, consoleLogs, networkErrors, page.url())
1362
+ }
1363
+ ]
1364
+ }),
1365
+ AI_OPERATION_TIMEOUT_MS2,
1366
+ "Action evaluation"
1367
+ );
1368
+ totalInputTokens += evalResponse.usage.input_tokens;
1369
+ totalOutputTokens += evalResponse.usage.output_tokens;
1370
+ const evaluation = parseEvaluation2(extractText(evalResponse));
1371
+ const action = {
1372
+ actionNumber: i + 1,
1373
+ action: actionText,
1374
+ category: evaluation.category,
1375
+ severity: evaluation.severity,
1376
+ confidence: evaluation.confidence,
1377
+ description: evaluation.description,
1378
+ screenshotBefore,
1379
+ screenshotAfter,
1380
+ networkRequests: capturedRequests,
1381
+ consoleLogs: [...consoleLogs],
1382
+ domContext: evaluation.domContext,
1383
+ durationMs: Date.now() - actionStartTime
1384
+ };
1385
+ actions.push(action);
1386
+ const logEntry = evaluation.category === "normal" ? `[${i + 1}] ${actionText} -> OK` : `[${i + 1}] ${actionText} -> FINDING (${evaluation.category}): ${evaluation.description}`;
1387
+ actionLog.push(logEntry);
1388
+ onActionComplete?.(action, i);
1389
+ }
1390
+ const { generateExplorationReport: generateExplorationReport2 } = await Promise.resolve().then(() => (init_report_generator(), report_generator_exports));
1391
+ const report = await generateExplorationReport2(anthropic, {
1392
+ projectName: "",
1393
+ featureDescription,
1394
+ targetUrl,
1395
+ actions,
1396
+ model: model.replace("anthropic/", "")
1397
+ });
1398
+ totalInputTokens += report.tokenUsage.inputTokens;
1399
+ totalOutputTokens += report.tokenUsage.outputTokens;
1400
+ const findings = actions.filter((a) => a.category !== "normal");
1401
+ return {
1402
+ overallResult: findings.length > 0 ? "findings" : "clean",
1403
+ actions,
1404
+ report: report.report,
1405
+ totalDurationMs: Date.now() - startTime,
1406
+ tokenUsage: { inputTokens: totalInputTokens, outputTokens: totalOutputTokens },
1407
+ browserSessionId: session.sessionId
1408
+ };
1409
+ } catch (error) {
1410
+ return {
1411
+ overallResult: "error",
1412
+ actions,
1413
+ report: {
1414
+ projectName: "",
1415
+ featureDescription,
1416
+ targetUrl,
1417
+ exploredAt: (/* @__PURE__ */ new Date()).toISOString(),
1418
+ duration: `${Math.round((Date.now() - startTime) / 1e3)}s`,
1419
+ actionsUsed: actions.length,
1420
+ actionBudget,
1421
+ findings: [],
1422
+ tested: [],
1423
+ notTested: [{ description: "Exploration aborted due to error", reason: String(error) }],
1424
+ summary: `Exploration failed after ${actions.length} actions: ${error instanceof Error ? error.message : String(error)}`,
1425
+ suggestedPrompt: ""
1426
+ },
1427
+ totalDurationMs: Date.now() - startTime,
1428
+ tokenUsage: { inputTokens: totalInputTokens, outputTokens: totalOutputTokens },
1429
+ browserSessionId: session.sessionId
1430
+ };
1431
+ } finally {
1432
+ if (session.page) {
1433
+ const rawPage = session.page;
1434
+ rawPage.removeAllListeners?.("console");
1435
+ }
1436
+ await session.close();
1437
+ }
1438
+ }
1439
+ function buildDecisionPrompt(featureDescription, remainingBudget, actionLog) {
1440
+ return `You are an exploratory QA tester examining the feature: "${featureDescription}".
1441
+ Your goal is to find bugs by interacting with the page like a real user would.
1442
+
1443
+ Strategy for choosing your next action:
1444
+ 1. Try the happy path first (normal usage)
1445
+ 2. Then try edge cases: empty inputs, very long text, special characters
1446
+ 3. Click buttons and links to verify they work
1447
+ 4. Submit forms with missing required fields
1448
+ 5. Look for visual problems: overlapping text, broken layouts, missing images
1449
+
1450
+ You have ${remainingBudget} actions left. Prioritize high-risk interactions.
1451
+ ${actionLog.length > 0 ? `
1452
+ Actions already taken:
1453
+ ${actionLog.join("\n")}` : ""}
1454
+
1455
+ DO NOT repeat an action you've already performed.
1456
+ Respond with a single action description. If there's nothing left to test, respond with "[DONE]".`;
1457
+ }
1458
+ function buildEvaluationPrompt() {
1459
+ return `You are evaluating the result of a QA test action. Categorize what happened.
1460
+
1461
+ Respond in this exact JSON format:
1462
+ {
1463
+ "category": "normal" | "console_error" | "broken_interaction" | "visual_anomaly" | "input_handling",
1464
+ "severity": "critical" | "high" | "medium" | "low",
1465
+ "confidence": 0.0-1.0,
1466
+ "description": "What happened",
1467
+ "expectedBehavior": "What should have happened",
1468
+ "domSelector": "CSS selector of the element involved (if applicable)"
1469
+ }
1470
+
1471
+ Category definitions:
1472
+ - normal: Expected behavior, no issues found
1473
+ - console_error: JavaScript exception or failed network request (4xx/5xx)
1474
+ - broken_interaction: Action had no visible effect, button didn't respond, navigation failed
1475
+ - visual_anomaly: Layout break, text overflow, missing/broken images, overlapping elements
1476
+ - input_handling: Missing validation, accepted clearly invalid input, no error feedback
1477
+
1478
+ Only report genuine issues. If behavior seems correct, use "normal".
1479
+ For "normal" results, severity and domSelector are not required.`;
1480
+ }
1481
+ function buildEvaluationContext(action, consoleLogs, networkErrors, currentUrl) {
1482
+ let context = `Action performed: "${action}"
1483
+ Current URL: ${currentUrl}
1484
+ `;
1485
+ if (consoleLogs.length > 0) {
1486
+ context += `
1487
+ Console output:
1488
+ ${consoleLogs.map((l) => `[${l.level}] ${l.text}`).join("\n")}
1489
+ `;
1490
+ }
1491
+ if (networkErrors.length > 0) {
1492
+ context += `
1493
+ Failed network requests:
1494
+ ${networkErrors.map((e) => `${e.method} ${e.url} -> ${e.status} ${e.statusText}`).join("\n")}
1495
+ `;
1496
+ }
1497
+ return context;
1498
+ }
1499
+ function formatObservations(observations) {
1500
+ return observations.slice(0, 30).map((o, i) => `${i + 1}. [${o.selector}] ${o.description}`).join("\n");
1501
+ }
1502
+ function extractText(response) {
1503
+ const block = response.content[0];
1504
+ return block.type === "text" ? block.text : "";
1505
+ }
1506
+ function parseEvaluation2(text) {
1507
+ try {
1508
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
1509
+ if (!jsonMatch) throw new Error("No JSON found");
1510
+ const parsed = JSON.parse(jsonMatch[0]);
1511
+ return {
1512
+ category: parsed.category || "normal",
1513
+ severity: parsed.severity,
1514
+ confidence: typeof parsed.confidence === "number" ? parsed.confidence : 0.5,
1515
+ description: parsed.description || text,
1516
+ expectedBehavior: parsed.expectedBehavior,
1517
+ domContext: parsed.domSelector ? { selector: parsed.domSelector, elementText: "", nearbyText: "" } : void 0
1518
+ };
1519
+ } catch {
1520
+ return { category: "normal", confidence: 0.3, description: text };
1521
+ }
1522
+ }
1523
+
1524
+ // src/index.ts
1525
+ init_report_generator();
1526
+
1527
+ // src/report-triager.ts
1528
+ var DEFAULT_MODEL5 = "claude-sonnet-4-20250514";
1529
+ async function triageReport(input) {
1530
+ const model = input.model ?? DEFAULT_MODEL5;
1531
+ const { report, recentReports } = input;
1532
+ const prompt = buildTriagePrompt(report, recentReports);
1533
+ const response = await input.anthropic.messages.create({
1534
+ model,
1535
+ max_tokens: 1024,
1536
+ messages: [{ role: "user", content: prompt }]
1537
+ });
1538
+ const text = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
1539
+ return parseTriageResult(text);
1540
+ }
1541
+ function buildTriagePrompt(report, recentReports) {
1542
+ const sections = [];
1543
+ sections.push(`REPORT TITLE: ${report.title ?? "(no title)"}`);
1544
+ sections.push(`DESCRIPTION: ${report.description}`);
1545
+ if (report.report_source) {
1546
+ sections.push(`SOURCE: ${report.report_source}`);
1547
+ }
1548
+ if (report.app_context && Object.keys(report.app_context).length > 0) {
1549
+ const ctx = report.app_context;
1550
+ const parts = [];
1551
+ if (ctx.currentRoute) parts.push(`Route: ${ctx.currentRoute}`);
1552
+ if (ctx.currentUrl) parts.push(`URL: ${ctx.currentUrl}`);
1553
+ if (ctx.componentName) parts.push(`Component: ${ctx.componentName}`);
1554
+ if (ctx.userAction) parts.push(`User action: ${ctx.userAction}`);
1555
+ if (parts.length > 0) {
1556
+ sections.push(`APP CONTEXT:
1557
+ ${parts.join("\n")}`);
1558
+ }
1559
+ }
1560
+ if (report.enhanced_context) {
1561
+ const enhanced = report.enhanced_context;
1562
+ const consoleLogs = enhanced.consoleLogs;
1563
+ if (consoleLogs && consoleLogs.length > 0) {
1564
+ const errors = consoleLogs.filter((l) => l.level === "error" || l.level === "warning").slice(0, 10).map((l) => `[${l.level}] ${l.text}`).join("\n");
1565
+ if (errors) {
1566
+ sections.push(`CONSOLE ERRORS:
1567
+ ${errors}`);
1568
+ }
1569
+ }
1570
+ const networkErrors = enhanced.networkErrors;
1571
+ if (networkErrors && networkErrors.length > 0) {
1572
+ const netErrors = networkErrors.slice(0, 10).map((e) => `${e.method} ${e.url} \u2192 ${e.status}`).join("\n");
1573
+ sections.push(`NETWORK ERRORS:
1574
+ ${netErrors}`);
1575
+ }
1576
+ }
1577
+ if (report.device_info && Object.keys(report.device_info).length > 0) {
1578
+ const device = report.device_info;
1579
+ const parts = [];
1580
+ if (device.platform) parts.push(`Platform: ${device.platform}`);
1581
+ if (device.browser) parts.push(`Browser: ${device.browser}`);
1582
+ if (device.os) parts.push(`OS: ${device.os}`);
1583
+ if (device.screenSize) parts.push(`Screen: ${device.screenSize}`);
1584
+ if (parts.length > 0) {
1585
+ sections.push(`DEVICE:
1586
+ ${parts.join(", ")}`);
1587
+ }
1588
+ }
1589
+ if (report.error_fingerprint) {
1590
+ sections.push(`ERROR FINGERPRINT: ${report.error_fingerprint}`);
1591
+ }
1592
+ let recentSection = "";
1593
+ if (recentReports.length > 0) {
1594
+ const recentLines = recentReports.map((r) => {
1595
+ const desc = r.description.slice(0, 150);
1596
+ const fp = r.error_fingerprint ? ` [fingerprint: ${r.error_fingerprint}]` : "";
1597
+ return `- ID: ${r.id} | "${r.title ?? "(no title)"}" | ${desc}${fp}`;
1598
+ });
1599
+ recentSection = `
1600
+ RECENT REPORTS (check for duplicates):
1601
+ ${recentLines.join("\n")}`;
1602
+ }
1603
+ return `You are a QA triage specialist. Analyze this bug report and provide structured triage.
1604
+
1605
+ ${sections.join("\n\n")}
1606
+ ${recentSection}
1607
+
1608
+ Respond with ONLY a JSON object (no markdown, no explanation outside the JSON):
1609
+ {
1610
+ "suggested_severity": "critical" | "high" | "medium" | "low",
1611
+ "severity_confidence": 0.0-1.0,
1612
+ "suggested_category": "ui_ux" | "functional" | "crash" | "security" | "other",
1613
+ "category_confidence": 0.0-1.0,
1614
+ "root_cause_analysis": "Brief analysis of the likely root cause",
1615
+ "duplicate_of": null or "uuid-of-matching-report",
1616
+ "duplicate_confidence": 0.0-1.0,
1617
+ "triage_notes": "Summary of triage reasoning"
1618
+ }
1619
+
1620
+ Severity guide:
1621
+ - critical: App crash, data loss, security vulnerability, blocks core workflow
1622
+ - high: Major feature broken, significant UX degradation, affects many users
1623
+ - medium: Feature partially broken, workaround exists, moderate impact
1624
+ - low: Minor cosmetic issue, edge case, minimal user impact
1625
+
1626
+ Category guide:
1627
+ - crash: App crashes, unhandled exceptions, white screen of death
1628
+ - security: Auth bypass, data exposure, injection vulnerabilities
1629
+ - functional: Feature doesn't work as expected, logic errors, broken flows
1630
+ - ui_ux: Visual glitches, layout issues, confusing UX, accessibility problems
1631
+ - other: Performance, documentation, configuration issues
1632
+
1633
+ Duplicate detection:
1634
+ - Compare error fingerprints first (exact match = very high confidence)
1635
+ - Then compare descriptions semantically (similar symptoms on same route/feature)
1636
+ - Only flag as duplicate if confidence \u2265 0.80`;
1637
+ }
1638
+ var VALID_SEVERITIES = ["critical", "high", "medium", "low"];
1639
+ var VALID_CATEGORIES = ["ui_ux", "functional", "crash", "security", "other"];
1640
+ function parseTriageResult(text) {
1641
+ try {
1642
+ const parsed = JSON.parse(text.trim());
1643
+ return validateTriageResult(parsed);
1644
+ } catch {
1645
+ const jsonMatch = text.match(/\{[\s\S]*"suggested_severity"[\s\S]*"suggested_category"[\s\S]*\}/);
1646
+ if (jsonMatch) {
1647
+ try {
1648
+ const parsed = JSON.parse(jsonMatch[0]);
1649
+ return validateTriageResult(parsed);
1650
+ } catch {
1651
+ }
1652
+ }
1653
+ }
1654
+ return {
1655
+ suggested_severity: "medium",
1656
+ severity_confidence: 0.3,
1657
+ suggested_category: "other",
1658
+ category_confidence: 0.3,
1659
+ root_cause_analysis: `Triage returned unparseable response: ${text.slice(0, 200)}`,
1660
+ duplicate_of: null,
1661
+ duplicate_confidence: 0,
1662
+ triage_notes: "Auto-triage failed to parse AI response"
1663
+ };
1664
+ }
1665
+ function validateTriageResult(parsed) {
1666
+ const severity = VALID_SEVERITIES.includes(parsed.suggested_severity) ? parsed.suggested_severity : "medium";
1667
+ const category = VALID_CATEGORIES.includes(parsed.suggested_category) ? parsed.suggested_category : "other";
1668
+ return {
1669
+ suggested_severity: severity,
1670
+ severity_confidence: clampConfidence(parsed.severity_confidence),
1671
+ suggested_category: category,
1672
+ category_confidence: clampConfidence(parsed.category_confidence),
1673
+ root_cause_analysis: typeof parsed.root_cause_analysis === "string" ? parsed.root_cause_analysis : "No analysis provided",
1674
+ duplicate_of: typeof parsed.duplicate_of === "string" ? parsed.duplicate_of : null,
1675
+ duplicate_confidence: clampConfidence(parsed.duplicate_confidence),
1676
+ triage_notes: typeof parsed.triage_notes === "string" ? parsed.triage_notes : "No notes provided"
1677
+ };
1678
+ }
1679
+ function clampConfidence(value) {
1680
+ if (typeof value !== "number") return 0.5;
1681
+ return Math.max(0, Math.min(1, value));
1682
+ }
1683
+
1684
+ // src/failure-analyzer.ts
1685
+ var DEFAULT_MODEL6 = "claude-sonnet-4-20250514";
1686
+ async function analyzeFailure(input) {
1687
+ const model = input.model ?? DEFAULT_MODEL6;
1688
+ const { step, result, discoveredSelector, consoleLogs, networkErrors } = input;
1689
+ const content = [];
1690
+ content.push({ type: "text", text: "BEFORE screenshot (page state before the failed action):" });
1691
+ content.push({
1692
+ type: "image",
1693
+ source: { type: "base64", media_type: "image/png", data: result.screenshotBefore.toString("base64") }
1694
+ });
1695
+ content.push({ type: "text", text: "AFTER screenshot (page state after the failed action):" });
1696
+ content.push({
1697
+ type: "image",
1698
+ source: { type: "base64", media_type: "image/png", data: result.screenshotAfter.toString("base64") }
1699
+ });
1700
+ content.push({ type: "text", text: buildFailurePrompt(step, result, discoveredSelector, consoleLogs, networkErrors) });
1701
+ const response = await input.anthropic.messages.create({
1702
+ model,
1703
+ max_tokens: 1024,
1704
+ messages: [{ role: "user", content }]
1705
+ });
1706
+ const text = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
1707
+ return parseFailureAnalysis(text, step);
1708
+ }
1709
+ var STEP_TO_RUN = {
1710
+ real_bug: "bug",
1711
+ test_maintenance: "test_issue",
1712
+ ai_limitation: "ai_limitation",
1713
+ flaky: "flaky",
1714
+ unknown: "unknown"
1715
+ };
1716
+ function rollupFailureClassification(stepClassifications) {
1717
+ if (stepClassifications.length === 0) return "unknown";
1718
+ if (stepClassifications.some((c) => c === "real_bug")) return "bug";
1719
+ if (stepClassifications.every((c) => c === "ai_limitation")) return "ai_limitation";
1720
+ if (stepClassifications.every((c) => c === "test_maintenance")) return "test_issue";
1721
+ if (stepClassifications.every((c) => c === "flaky")) return "flaky";
1722
+ const counts = /* @__PURE__ */ new Map();
1723
+ for (const c of stepClassifications) {
1724
+ counts.set(c, (counts.get(c) ?? 0) + 1);
1725
+ }
1726
+ let best = "unknown";
1727
+ let bestCount = 0;
1728
+ for (const [cls, count] of counts) {
1729
+ if (count > bestCount) {
1730
+ bestCount = count;
1731
+ best = cls;
1732
+ }
1733
+ }
1734
+ return STEP_TO_RUN[best];
1735
+ }
1736
+ function buildFailurePrompt(step, result, discoveredSelector, consoleLogs, networkErrors) {
1737
+ const sections = [];
1738
+ sections.push(`FAILED STEP #${step.stepNumber}: ${step.action}`);
1739
+ sections.push(`EXPECTED: ${step.expectedResult}`);
1740
+ sections.push(`ACTUAL: ${result.actualResult}`);
1741
+ if (step.selector) sections.push(`SELECTOR USED: ${step.selector}`);
1742
+ if (step.actionType) sections.push(`ACTION TYPE: ${step.actionType}`);
1743
+ if (result.error) sections.push(`ERROR: ${result.error}`);
1744
+ if (discoveredSelector) {
1745
+ sections.push(`DISCOVERED SELECTOR (what Stagehand actually clicked): ${discoveredSelector.selector} (via ${discoveredSelector.strategy})${discoveredSelector.textContent ? ` \u2014 text: "${discoveredSelector.textContent}"` : ""}`);
1746
+ }
1747
+ if (consoleLogs && consoleLogs.length > 0) {
1748
+ const errors = consoleLogs.filter((l) => l.level === "error" || l.level === "warning").slice(0, 8).map((l) => `[${l.level}] ${l.text}`).join("\n");
1749
+ if (errors) sections.push(`CONSOLE ERRORS:
1750
+ ${errors}`);
1751
+ }
1752
+ if (networkErrors && networkErrors.length > 0) {
1753
+ const netErrors = networkErrors.slice(0, 8).map((e) => `${e.method} ${e.url} \u2192 ${e.status} ${e.statusText}`).join("\n");
1754
+ sections.push(`NETWORK ERRORS:
1755
+ ${netErrors}`);
1756
+ }
1757
+ return `You are a QA failure analyst. A test step failed. Analyze the before/after screenshots and the context below to classify this failure.
1758
+
1759
+ ${sections.join("\n\n")}
1760
+
1761
+ Classify into ONE of these categories:
1762
+ - **real_bug**: The application has an actual defect. Indicators: API errors (4xx/5xx), JavaScript exceptions, missing/broken UI elements that SHOULD be there, incorrect behavior, data not saving.
1763
+ - **test_maintenance**: The test is stale \u2014 the app changed but the test wasn't updated. Indicators: element moved/renamed, selector no longer matches, page restructured but app works correctly, the discovered selector differs from the test's selector.
1764
+ - **ai_limitation**: The AI executor itself could not complete this step \u2014 NOT an app bug. Indicators: already logged in so can't reach the login page, a QA/testing widget or overlay appeared and blocked the real UI, the test requires measuring something the AI can't (contrast ratios, pixel measurements), the AI landed on a completely wrong page and never reached the test target, authentication redirect prevented navigation, a popup or modal unrelated to the test blocked interaction.
1765
+ - **flaky**: Timing or intermittent issue. Indicators: timeout errors, "element not found" but the element IS visible in screenshots, network hiccup, race condition.
1766
+ - **unknown**: Can't determine with confidence.
1767
+
1768
+ For **test_maintenance** failures, suggest a corrected step (selector, action, value).
1769
+
1770
+ Respond with ONLY a JSON object (no markdown, no explanation outside the JSON):
1771
+ {
1772
+ "classification": "real_bug" | "test_maintenance" | "ai_limitation" | "flaky" | "unknown",
1773
+ "confidence": 0.0-1.0,
1774
+ "reasoning": "Brief explanation of why this classification",
1775
+ "suggested_fix": null | {
1776
+ "corrected_action": "Updated natural language action (if changed)",
1777
+ "corrected_selector": "Updated CSS selector (if selector changed)",
1778
+ "corrected_actionType": "Updated action type (if changed)",
1779
+ "corrected_value": "Updated value (if changed)"
1780
+ }
1781
+ }`;
1782
+ }
1783
+ var VALID_CLASSIFICATIONS = ["real_bug", "test_maintenance", "ai_limitation", "flaky", "unknown"];
1784
+ function parseFailureAnalysis(text, step) {
1785
+ try {
1786
+ const parsed = JSON.parse(text.trim());
1787
+ return validateFailureAnalysis(parsed, step);
1788
+ } catch {
1789
+ const jsonMatch = text.match(/\{[\s\S]*"classification"[\s\S]*"confidence"[\s\S]*\}/);
1790
+ if (jsonMatch) {
1791
+ try {
1792
+ const parsed = JSON.parse(jsonMatch[0]);
1793
+ return validateFailureAnalysis(parsed, step);
1794
+ } catch {
1795
+ }
1796
+ }
1797
+ }
1798
+ return {
1799
+ classification: "unknown",
1800
+ confidence: 0.3,
1801
+ reasoning: `Failure analysis returned unparseable response: ${text.slice(0, 200)}`
1802
+ };
1803
+ }
1804
+ function validateFailureAnalysis(parsed, step) {
1805
+ const classification = VALID_CLASSIFICATIONS.includes(parsed.classification) ? parsed.classification : "unknown";
1806
+ const result = {
1807
+ classification,
1808
+ confidence: clampConfidence2(parsed.confidence),
1809
+ reasoning: typeof parsed.reasoning === "string" ? parsed.reasoning : "No reasoning provided"
1810
+ };
1811
+ if (parsed.suggested_fix && typeof parsed.suggested_fix === "object") {
1812
+ const fix = parsed.suggested_fix;
1813
+ result.suggested_fix = {
1814
+ stepNumber: step.stepNumber,
1815
+ original_action: step.action,
1816
+ corrected_action: typeof fix.corrected_action === "string" ? fix.corrected_action : void 0,
1817
+ corrected_selector: typeof fix.corrected_selector === "string" ? fix.corrected_selector : void 0,
1818
+ corrected_actionType: typeof fix.corrected_actionType === "string" ? fix.corrected_actionType : void 0,
1819
+ corrected_value: typeof fix.corrected_value === "string" ? fix.corrected_value : void 0
1820
+ };
1821
+ }
1822
+ return result;
1823
+ }
1824
+ function clampConfidence2(value) {
1825
+ if (typeof value !== "number") return 0.5;
1826
+ return Math.max(0, Math.min(1, value));
1827
+ }
1828
+
1829
+ // src/concurrency.ts
1830
+ var Semaphore = class {
1831
+ constructor(max) {
1832
+ this.max = max;
1833
+ this.current = 0;
1834
+ this.queue = [];
1835
+ if (max < 1) throw new Error("Semaphore max must be >= 1");
1836
+ }
1837
+ async acquire() {
1838
+ if (this.current < this.max) {
1839
+ this.current++;
1840
+ return;
1841
+ }
1842
+ return new Promise((resolve) => {
1843
+ this.queue.push(resolve);
1844
+ });
1845
+ }
1846
+ release() {
1847
+ const next = this.queue.shift();
1848
+ if (next) {
1849
+ next();
1850
+ } else {
1851
+ this.current--;
1852
+ }
1853
+ }
1854
+ /** Number of slots currently in use */
1855
+ get active() {
1856
+ return this.current;
1857
+ }
1858
+ /** Number of waiters in the queue */
1859
+ get waiting() {
1860
+ return this.queue.length;
1861
+ }
1862
+ };
400
1863
  // Annotate the CommonJS export names for ESM import in node:
401
1864
  0 && (module.exports = {
1865
+ Semaphore,
1866
+ analyzeFailure,
1867
+ authenticateSupabase,
402
1868
  createStagehandSession,
1869
+ discoverSelector,
1870
+ estimateBatchCost,
1871
+ estimateCost,
1872
+ estimateTestCost,
1873
+ evaluateStep,
1874
+ executeAction,
1875
+ generateExplorationReport,
403
1876
  generateRunSummary,
1877
+ getTokenEstimate,
404
1878
  injectAuth,
405
- runTest
1879
+ injectSupabaseAuth,
1880
+ installClickTracker,
1881
+ performSupabaseAuth,
1882
+ rollupFailureClassification,
1883
+ runExploration,
1884
+ runTest,
1885
+ suppressBugBearWidget,
1886
+ triageReport,
1887
+ verifySupabaseSession
406
1888
  });
407
1889
  //# sourceMappingURL=index.js.map