@bbearai/ai-executor 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,9 +1,91 @@
1
+ import {
2
+ generateExplorationReport
3
+ } from "./chunk-WT22IQMS.mjs";
4
+
1
5
  // src/runner.ts
2
6
  import Anthropic from "@anthropic-ai/sdk";
3
- import { z } from "zod";
4
7
 
5
8
  // src/browser.ts
6
9
  import { Stagehand } from "@browserbasehq/stagehand";
10
+
11
+ // src/supabase-auth.ts
12
+ function extractProjectRef(supabaseUrl) {
13
+ const url = new URL(supabaseUrl);
14
+ const hostname = url.hostname;
15
+ const ref = hostname.split(".")[0];
16
+ return ref;
17
+ }
18
+ async function authenticateSupabase(auth) {
19
+ const url = `${auth.supabaseUrl.replace(/\/$/, "")}/auth/v1/token?grant_type=password`;
20
+ const response = await fetch(url, {
21
+ method: "POST",
22
+ headers: {
23
+ "Content-Type": "application/json",
24
+ "apikey": auth.anonKey
25
+ },
26
+ body: JSON.stringify({
27
+ email: auth.email,
28
+ password: auth.password
29
+ })
30
+ });
31
+ if (!response.ok) {
32
+ const body = await response.text().catch(() => "");
33
+ throw new Error(
34
+ `Supabase auth failed (${response.status}): ${body.slice(0, 200)}`
35
+ );
36
+ }
37
+ const session = await response.json();
38
+ if (!session.access_token) {
39
+ throw new Error("Supabase auth returned no access_token");
40
+ }
41
+ return session;
42
+ }
43
+ async function injectSupabaseAuth(page, auth, session) {
44
+ const ref = extractProjectRef(auth.supabaseUrl);
45
+ const storageKey = `sb-${ref}-auth-token`;
46
+ const storageValue = JSON.stringify({
47
+ access_token: session.access_token,
48
+ refresh_token: session.refresh_token,
49
+ expires_in: session.expires_in,
50
+ expires_at: session.expires_at,
51
+ token_type: session.token_type,
52
+ user: session.user
53
+ });
54
+ const currentUrl = page.url();
55
+ if (currentUrl === "about:blank" || !currentUrl) {
56
+ await page.goto(auth.supabaseUrl.replace(/\/$/, ""), {
57
+ waitUntil: "domcontentloaded",
58
+ timeoutMs: 1e4
59
+ }).catch(() => {
60
+ });
61
+ }
62
+ await page.evaluate(
63
+ ({ key, value }) => {
64
+ localStorage.setItem(key, value);
65
+ },
66
+ { key: storageKey, value: storageValue }
67
+ );
68
+ }
69
+ async function verifySupabaseSession(auth, accessToken) {
70
+ const url = `${auth.supabaseUrl.replace(/\/$/, "")}/auth/v1/user`;
71
+ const response = await fetch(url, {
72
+ headers: {
73
+ "Authorization": `Bearer ${accessToken}`,
74
+ "apikey": auth.anonKey
75
+ }
76
+ });
77
+ return response.ok;
78
+ }
79
+ async function performSupabaseAuth(page, auth) {
80
+ const session = await authenticateSupabase(auth);
81
+ await injectSupabaseAuth(page, auth, session);
82
+ const valid = await verifySupabaseSession(auth, session.access_token);
83
+ if (!valid) {
84
+ throw new Error("Supabase auth verification failed \u2014 session token rejected");
85
+ }
86
+ }
87
+
88
+ // src/browser.ts
7
89
  var DEFAULT_MODEL = "anthropic/claude-sonnet-4-20250514";
8
90
  async function createStagehandSession(config, anthropicApiKey) {
9
91
  const modelName = config.model ?? DEFAULT_MODEL;
@@ -16,6 +98,11 @@ async function createStagehandSession(config, anthropicApiKey) {
16
98
  modelName,
17
99
  apiKey: anthropicApiKey
18
100
  },
101
+ // Bypass pino logger — its pino-pretty transport uses worker threads
102
+ // which fail in Vercel's serverless environment
103
+ logger: (msg) => {
104
+ if ((msg.level ?? 0) >= 40) console.warn("[Stagehand]", msg.message);
105
+ },
19
106
  localBrowserLaunchOptions: config.provider === "local" ? {
20
107
  headless: config.headless ?? true,
21
108
  viewport
@@ -39,6 +126,21 @@ async function createStagehandSession(config, anthropicApiKey) {
39
126
  }
40
127
  };
41
128
  }
129
+ async function suppressBugBearWidget(stagehand) {
130
+ try {
131
+ const ctx = stagehand.context;
132
+ if (ctx?.addInitScript) {
133
+ await ctx.addInitScript(() => {
134
+ window.__bugbear_suppress = true;
135
+ try {
136
+ localStorage.setItem("__bugbear_suppress", "true");
137
+ } catch {
138
+ }
139
+ });
140
+ }
141
+ } catch {
142
+ }
143
+ }
42
144
  async function injectAuth(page, auth, stagehand) {
43
145
  if (auth.type === "cookie") {
44
146
  for (const c of auth.cookies) {
@@ -64,23 +166,123 @@ async function injectAuth(page, auth, stagehand) {
64
166
  }, auth.items);
65
167
  } else if (auth.type === "form-login") {
66
168
  await performFormLogin(page, auth, stagehand);
169
+ } else if (auth.type === "supabase-native") {
170
+ await performSupabaseAuth(page, auth);
67
171
  }
68
172
  }
173
+ function createNetworkCapture(page) {
174
+ const requests = [];
175
+ const errors = [];
176
+ let active = false;
177
+ let startTimestamp = Date.now();
178
+ const onResponse = async (response) => {
179
+ if (!active) return;
180
+ const req = response.request();
181
+ const resourceType = typeof req.resourceType === "function" ? req.resourceType() : req.resourceType;
182
+ if (["image", "stylesheet", "font", "media"].includes(resourceType)) return;
183
+ const entry = {
184
+ method: typeof req.method === "function" ? req.method() : String(req.method),
185
+ url: (typeof response.url === "function" ? response.url() : String(response.url)).slice(0, 500),
186
+ status: typeof response.status === "function" ? response.status() : Number(response.status),
187
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
188
+ };
189
+ const status = entry.status;
190
+ if (status >= 400) {
191
+ try {
192
+ const body = await response.text();
193
+ entry.responseBody = body.slice(0, 500);
194
+ } catch {
195
+ }
196
+ errors.push({
197
+ method: entry.method,
198
+ url: entry.url,
199
+ status,
200
+ statusText: typeof response.statusText === "function" ? response.statusText() : String(response.statusText ?? ""),
201
+ timestamp: Date.now() - startTimestamp
202
+ });
203
+ }
204
+ if (["POST", "PUT", "PATCH"].includes(entry.method)) {
205
+ try {
206
+ const postData = typeof req.postData === "function" ? req.postData() : req.postData;
207
+ if (postData) entry.requestBody = String(postData).slice(0, 500);
208
+ } catch {
209
+ }
210
+ }
211
+ requests.push(entry);
212
+ };
213
+ const onRequestFailed = (req) => {
214
+ if (!active) return;
215
+ const url = typeof req.url === "function" ? req.url() : String(req.url ?? "");
216
+ const method = typeof req.method === "function" ? req.method() : String(req.method ?? "GET");
217
+ const failure = typeof req.failure === "function" ? req.failure() : req.failure;
218
+ errors.push({
219
+ method,
220
+ url: url.slice(0, 500),
221
+ status: 0,
222
+ statusText: failure?.errorText ?? "Request failed",
223
+ timestamp: Date.now() - startTimestamp
224
+ });
225
+ };
226
+ const rawPage = page;
227
+ let responseSupported = true;
228
+ let requestFailedSupported = true;
229
+ return {
230
+ start() {
231
+ active = true;
232
+ requests.length = 0;
233
+ errors.length = 0;
234
+ startTimestamp = Date.now();
235
+ if (responseSupported) {
236
+ try {
237
+ rawPage.on("response", onResponse);
238
+ } catch {
239
+ responseSupported = false;
240
+ }
241
+ }
242
+ if (requestFailedSupported) {
243
+ try {
244
+ rawPage.on("requestfailed", onRequestFailed);
245
+ } catch {
246
+ requestFailedSupported = false;
247
+ }
248
+ }
249
+ },
250
+ stop() {
251
+ active = false;
252
+ if (responseSupported) {
253
+ try {
254
+ rawPage.off("response", onResponse);
255
+ } catch {
256
+ }
257
+ }
258
+ if (requestFailedSupported) {
259
+ try {
260
+ rawPage.off("requestfailed", onRequestFailed);
261
+ } catch {
262
+ }
263
+ }
264
+ },
265
+ getRequests: () => [...requests],
266
+ getErrors: () => [...errors]
267
+ };
268
+ }
69
269
  async function performFormLogin(page, auth, stagehand) {
70
270
  await page.goto(auth.loginUrl, { waitUntil: "domcontentloaded" });
71
271
  await page.waitForLoadState("networkidle", 15e3).catch(() => {
72
272
  });
273
+ await fillLoginCredentials(page, auth);
73
274
  if (stagehand) {
74
275
  await stagehand.act(
75
- `Fill in the email/username field with "${auth.email}" and the password field with "${auth.password}", then click the login/sign-in button to submit the form.`
76
- );
276
+ "Click the login, sign-in, or submit button to submit the form."
277
+ ).catch(() => {
278
+ });
77
279
  } else {
78
- await manualFormLogin(page, auth);
280
+ await clickSubmitButton(page);
79
281
  }
80
282
  await page.waitForLoadState("networkidle", 15e3).catch(() => {
81
283
  });
82
284
  }
83
- async function manualFormLogin(page, auth) {
285
+ async function fillLoginCredentials(page, auth) {
84
286
  await page.waitForSelector(
85
287
  'input[type="email"], input[type="text"][name*="email"], input[name*="user"], input[type="text"]',
86
288
  { timeout: 15e3 }
@@ -114,6 +316,8 @@ async function manualFormLogin(page, auth) {
114
316
  } else {
115
317
  throw new Error("Could not find password input on login page");
116
318
  }
319
+ }
320
+ async function clickSubmitButton(page) {
117
321
  const submitSelectors = [
118
322
  'button[type="submit"]',
119
323
  'input[type="submit"]'
@@ -138,21 +342,23 @@ async function generateRunSummary(anthropic, testTitle, steps, model) {
138
342
  (s) => `Step ${s.stepNumber}: ${s.action}
139
343
  Expected: ${s.expectedResult}
140
344
  Actual: ${s.actualResult}
141
- Result: ${s.passed ? "PASS" : "FAIL"} (confidence: ${Math.round(s.confidence * 100)}%)${s.error ? `
345
+ Result: ${s.skipped ? "SKIPPED" : s.passed ? "PASS" : "FAIL"} (confidence: ${Math.round(s.confidence * 100)}%)${s.error ? `
142
346
  Error: ${s.error}` : ""}`
143
347
  ).join("\n\n");
144
- const passCount = steps.filter((s) => s.passed).length;
145
- const failCount = steps.filter((s) => !s.passed).length;
348
+ const passCount = steps.filter((s) => s.passed && !s.skipped).length;
349
+ const failCount = steps.filter((s) => !s.passed && !s.skipped).length;
350
+ const skipCount = steps.filter((s) => s.skipped).length;
351
+ const skipNote = skipCount > 0 ? " Some steps were skipped due to page state recovery \u2014 these are not failures, just steps that could not be executed." : "";
146
352
  const response = await anthropic.messages.create({
147
353
  model,
148
354
  max_tokens: 512,
149
355
  messages: [
150
356
  {
151
357
  role: "user",
152
- content: `Summarize this AI test execution in 2-3 sentences. Focus on what was tested, what passed, and what failed (if anything). Be concise and factual.
358
+ content: `Summarize this AI test execution in 2-3 sentences. Focus on what was tested, what passed, and what failed (if anything).${skipNote} Be concise and factual.
153
359
 
154
360
  Test: ${testTitle}
155
- Results: ${passCount} passed, ${failCount} failed out of ${steps.length} steps
361
+ Results: ${passCount} passed, ${failCount} failed, ${skipCount} skipped out of ${steps.length} steps
156
362
 
157
363
  ${stepsText}`
158
364
  }
@@ -161,7 +367,355 @@ ${stepsText}`
161
367
  return response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
162
368
  }
163
369
 
370
+ // src/vision-evaluator.ts
371
+ var DEFAULT_MODEL2 = "claude-sonnet-4-20250514";
372
+ async function evaluateStep(input) {
373
+ const model = input.model ?? DEFAULT_MODEL2;
374
+ const hintClause = input.evaluationHint ? `
375
+ EVALUATION HINT: ${input.evaluationHint}` : "";
376
+ const response = await input.anthropic.messages.create({
377
+ model,
378
+ max_tokens: 512,
379
+ messages: [
380
+ {
381
+ role: "user",
382
+ content: [
383
+ {
384
+ type: "text",
385
+ text: "BEFORE screenshot (page state before the action):"
386
+ },
387
+ {
388
+ type: "image",
389
+ source: {
390
+ type: "base64",
391
+ media_type: "image/png",
392
+ data: input.screenshotBefore.toString("base64")
393
+ }
394
+ },
395
+ {
396
+ type: "text",
397
+ text: "AFTER screenshot (page state after the action):"
398
+ },
399
+ {
400
+ type: "image",
401
+ source: {
402
+ type: "base64",
403
+ media_type: "image/png",
404
+ data: input.screenshotAfter.toString("base64")
405
+ }
406
+ },
407
+ {
408
+ type: "text",
409
+ text: `You are a QA test evaluator. Compare the BEFORE and AFTER screenshots to evaluate this test step.
410
+
411
+ ACTION PERFORMED: ${input.action}
412
+ EXPECTED RESULT: ${input.expectedResult}${hintClause}
413
+
414
+ Analyze the visual differences between the two screenshots and determine if the expected result was achieved.
415
+
416
+ Respond with ONLY a JSON object (no markdown, no explanation outside the JSON):
417
+ {
418
+ "passed": true/false,
419
+ "confidence": 0.0-1.0,
420
+ "actualResult": "Brief description of what actually changed between the screenshots"
421
+ }
422
+
423
+ Confidence guide:
424
+ - 0.95-1.0: Clearly achieved/not achieved, obvious visual evidence
425
+ - 0.8-0.94: Very likely, strong visual indicators
426
+ - 0.6-0.79: Probable but some ambiguity
427
+ - Below 0.6: Uncertain, hard to tell from screenshots alone`
428
+ }
429
+ ]
430
+ }
431
+ ]
432
+ });
433
+ const text = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
434
+ return parseEvaluation(text);
435
+ }
436
+ function parseEvaluation(text) {
437
+ try {
438
+ const parsed = JSON.parse(text.trim());
439
+ return validateEvaluation(parsed);
440
+ } catch {
441
+ const jsonMatch = text.match(/\{[\s\S]*"passed"[\s\S]*"confidence"[\s\S]*"actualResult"[\s\S]*\}/);
442
+ if (jsonMatch) {
443
+ try {
444
+ const parsed = JSON.parse(jsonMatch[0]);
445
+ return validateEvaluation(parsed);
446
+ } catch {
447
+ }
448
+ }
449
+ }
450
+ return {
451
+ passed: false,
452
+ confidence: 0.3,
453
+ actualResult: `Vision evaluation returned unparseable response: ${text.slice(0, 200)}`
454
+ };
455
+ }
456
+ function validateEvaluation(parsed) {
457
+ return {
458
+ passed: typeof parsed.passed === "boolean" ? parsed.passed : false,
459
+ confidence: typeof parsed.confidence === "number" ? Math.max(0, Math.min(1, parsed.confidence)) : 0.5,
460
+ actualResult: typeof parsed.actualResult === "string" ? parsed.actualResult : "No description provided"
461
+ };
462
+ }
463
+
464
+ // src/action-executor.ts
465
+ async function executeAction(page, stagehand, step) {
466
+ if (step.selector && step.actionType) {
467
+ try {
468
+ await executePlaywrightAction(page, step);
469
+ return { deterministic: true };
470
+ } catch (err) {
471
+ const fallbackResult = await executeStagehandAction(stagehand, step);
472
+ return {
473
+ deterministic: false,
474
+ error: fallbackResult.error ? `Playwright failed (${err instanceof Error ? err.message : String(err)}), Stagehand fallback also failed: ${fallbackResult.error}` : void 0
475
+ };
476
+ }
477
+ }
478
+ return executeStagehandAction(stagehand, step);
479
+ }
480
+ async function executePlaywrightAction(page, step) {
481
+ const { actionType, selector, value, waitMs } = step;
482
+ switch (actionType) {
483
+ case "click": {
484
+ const locator = page.locator(selector);
485
+ await locator.click();
486
+ break;
487
+ }
488
+ case "fill": {
489
+ const locator = page.locator(selector);
490
+ await locator.fill(value ?? "");
491
+ break;
492
+ }
493
+ case "select": {
494
+ await page.evaluate(
495
+ ({ sel, val }) => {
496
+ const el = document.querySelector(sel);
497
+ if (!el) throw new Error(`Select element not found: ${sel}`);
498
+ el.value = val;
499
+ el.dispatchEvent(new Event("change", { bubbles: true }));
500
+ },
501
+ { sel: selector, val: value ?? "" }
502
+ );
503
+ break;
504
+ }
505
+ case "navigate": {
506
+ const url = value ?? selector ?? "";
507
+ if (!url) throw new Error("Navigate action requires a value or selector with the URL");
508
+ await page.goto(url, { waitUntil: "domcontentloaded", timeoutMs: 15e3 });
509
+ break;
510
+ }
511
+ case "scroll": {
512
+ await page.evaluate((sel) => {
513
+ const el = document.querySelector(sel);
514
+ if (el) el.scrollIntoView({ behavior: "smooth", block: "center" });
515
+ }, selector);
516
+ break;
517
+ }
518
+ case "wait": {
519
+ if (selector) {
520
+ await page.waitForSelector(selector, { timeout: waitMs ?? 1e4 });
521
+ } else if (waitMs) {
522
+ await page.waitForTimeout(waitMs);
523
+ }
524
+ break;
525
+ }
526
+ case "assert": {
527
+ break;
528
+ }
529
+ default: {
530
+ throw new Error(`Unknown actionType: ${actionType}`);
531
+ }
532
+ }
533
+ if (waitMs && actionType !== "wait") {
534
+ await page.waitForTimeout(waitMs);
535
+ }
536
+ }
537
+ async function executeStagehandAction(stagehand, step) {
538
+ try {
539
+ await stagehand.act(step.action);
540
+ return { deterministic: false };
541
+ } catch (err) {
542
+ return {
543
+ deterministic: false,
544
+ error: err instanceof Error ? err.message : String(err)
545
+ };
546
+ }
547
+ }
548
+
549
+ // src/selector-discovery.ts
550
+ async function discoverSelector(page) {
551
+ try {
552
+ const result = await page.evaluate(() => {
553
+ const el = document.__bbLastClicked ?? document.activeElement;
554
+ if (!el || el === document.body || el === document.documentElement) return null;
555
+ const tagName = el.tagName?.toLowerCase() ?? "unknown";
556
+ const textContent = (el.textContent ?? "").trim().slice(0, 100);
557
+ let selector = "";
558
+ let strategy = "css-path";
559
+ const testId = el.getAttribute("data-testid") ?? el.getAttribute("data-test-id");
560
+ if (testId) {
561
+ selector = `[data-testid="${testId}"]`;
562
+ strategy = "data-testid";
563
+ } else if (el.id && !/^:r[0-9a-z]+:?$/.test(el.id) && !/^react-/.test(el.id)) {
564
+ selector = `#${el.id}`;
565
+ strategy = "id";
566
+ } else if (el.getAttribute("role")) {
567
+ const role = el.getAttribute("role");
568
+ const name = el.getAttribute("aria-label") ?? el.getAttribute("name") ?? "";
569
+ if (name) {
570
+ selector = `[role="${role}"][aria-label="${name}"]`;
571
+ strategy = "role";
572
+ } else {
573
+ selector = `[role="${role}"]`;
574
+ strategy = "role";
575
+ }
576
+ } else if (el.getAttribute("aria-label")) {
577
+ selector = `[aria-label="${el.getAttribute("aria-label")}"]`;
578
+ strategy = "aria-label";
579
+ } else {
580
+ const parts = [];
581
+ let current = el;
582
+ while (current && current !== document.body) {
583
+ let part = current.tagName.toLowerCase();
584
+ if (current.className && typeof current.className === "string") {
585
+ const classes = current.className.split(/\s+/).filter(
586
+ (c) => c && !c.startsWith("_") && c.length < 30
587
+ );
588
+ if (classes.length > 0) {
589
+ part += `.${classes[0]}`;
590
+ }
591
+ }
592
+ parts.unshift(part);
593
+ current = current.parentElement;
594
+ if (parts.length >= 4) break;
595
+ }
596
+ selector = parts.join(" > ");
597
+ strategy = "css-path";
598
+ }
599
+ let suggestedActionType;
600
+ if (tagName === "button" || tagName === "a" || el.getAttribute("role") === "button") {
601
+ suggestedActionType = "click";
602
+ } else if (tagName === "input" || tagName === "textarea") {
603
+ const type = el.getAttribute("type") ?? "text";
604
+ if (type === "checkbox" || type === "radio") {
605
+ suggestedActionType = "click";
606
+ } else {
607
+ suggestedActionType = "fill";
608
+ }
609
+ } else if (tagName === "select") {
610
+ suggestedActionType = "select";
611
+ }
612
+ return { selector, strategy, suggestedActionType, tagName, textContent };
613
+ });
614
+ return result;
615
+ } catch {
616
+ return null;
617
+ }
618
+ }
619
+ async function installClickTracker(page) {
620
+ try {
621
+ await page.evaluate(() => {
622
+ document.addEventListener("click", (e) => {
623
+ document.__bbLastClicked = e.target;
624
+ }, { capture: true });
625
+ });
626
+ } catch {
627
+ }
628
+ }
629
+
630
+ // src/cost.ts
631
+ var MODEL_PRICING = {
632
+ "claude-sonnet-4-20250514": { input: 3, output: 15 },
633
+ "claude-haiku-4-20250514": { input: 0.8, output: 4 },
634
+ "claude-opus-4-20250514": { input: 15, output: 75 },
635
+ // Aliases
636
+ "sonnet": { input: 3, output: 15 },
637
+ "haiku": { input: 0.8, output: 4 },
638
+ "opus": { input: 15, output: 75 }
639
+ };
640
+ var DEFAULT_MODEL3 = "claude-sonnet-4-20250514";
641
+ var TOKEN_PROFILE = {
642
+ /** act() — screenshot + DOM context → action decision */
643
+ actInput: 2e3,
644
+ actOutput: 200,
645
+ /** extract() — screenshot + extraction schema → structured result */
646
+ extractInput: 3e3,
647
+ extractOutput: 500,
648
+ /** summary — all step results → narrative summary (once per run) */
649
+ summaryInput: 2e3,
650
+ summaryOutput: 500
651
+ };
652
+ function estimateCost(inputTokens, outputTokens, model) {
653
+ const resolvedModel = model ?? DEFAULT_MODEL3;
654
+ const pricing = MODEL_PRICING[resolvedModel] ?? MODEL_PRICING[DEFAULT_MODEL3];
655
+ const inputCost = inputTokens / 1e6 * pricing.input;
656
+ const outputCost = outputTokens / 1e6 * pricing.output;
657
+ const totalDollars = inputCost + outputCost;
658
+ const cents = Math.round(totalDollars * 100 * 100) / 100;
659
+ return {
660
+ cents,
661
+ formatted: `$${totalDollars.toFixed(4)}`,
662
+ tokens: { inputTokens, outputTokens },
663
+ model: resolvedModel
664
+ };
665
+ }
666
+ function estimateTestCost(stepCount, model) {
667
+ const inputTokens = stepCount * (TOKEN_PROFILE.actInput + TOKEN_PROFILE.extractInput) + TOKEN_PROFILE.summaryInput;
668
+ const outputTokens = stepCount * (TOKEN_PROFILE.actOutput + TOKEN_PROFILE.extractOutput) + TOKEN_PROFILE.summaryOutput;
669
+ return estimateCost(inputTokens, outputTokens, model);
670
+ }
671
+ function estimateBatchCost(testCases, model) {
672
+ let totalInput = 0;
673
+ let totalOutput = 0;
674
+ for (const tc of testCases) {
675
+ totalInput += tc.stepCount * (TOKEN_PROFILE.actInput + TOKEN_PROFILE.extractInput) + TOKEN_PROFILE.summaryInput;
676
+ totalOutput += tc.stepCount * (TOKEN_PROFILE.actOutput + TOKEN_PROFILE.extractOutput) + TOKEN_PROFILE.summaryOutput;
677
+ }
678
+ return estimateCost(totalInput, totalOutput, model);
679
+ }
680
+ function getTokenEstimate(stepCount) {
681
+ return {
682
+ inputTokens: stepCount * (TOKEN_PROFILE.actInput + TOKEN_PROFILE.extractInput) + TOKEN_PROFILE.summaryInput,
683
+ outputTokens: stepCount * (TOKEN_PROFILE.actOutput + TOKEN_PROFILE.extractOutput) + TOKEN_PROFILE.summaryOutput
684
+ };
685
+ }
686
+
164
687
  // src/runner.ts
688
+ var AI_OPERATION_TIMEOUT_MS = 3e4;
689
+ var DEFAULT_MAX_RETRIES = 2;
690
+ var DEFAULT_RETRY_DELAY_MS = 2e3;
691
+ function isRetryableError(error) {
692
+ const patterns = [
693
+ /timed?\s*out/i,
694
+ /ECONNREFUSED/i,
695
+ /ECONNRESET/i,
696
+ /ENOTFOUND/i,
697
+ /net::ERR_/i,
698
+ /navigation failed/i,
699
+ /page crashed/i,
700
+ /context was destroyed/i,
701
+ /target closed/i,
702
+ /session closed/i,
703
+ /browser disconnected/i,
704
+ /execution context/i
705
+ ];
706
+ return patterns.some((p) => p.test(error));
707
+ }
708
+ async function withTimeout(promise, timeoutMs, operation) {
709
+ let timeoutId;
710
+ const timeoutPromise = new Promise((_, reject) => {
711
+ timeoutId = setTimeout(() => reject(new Error(`${operation} timed out after ${timeoutMs}ms`)), timeoutMs);
712
+ });
713
+ try {
714
+ return await Promise.race([promise, timeoutPromise]);
715
+ } finally {
716
+ clearTimeout(timeoutId);
717
+ }
718
+ }
165
719
  async function runTest(config) {
166
720
  const anthropic = new Anthropic({ apiKey: config.anthropicApiKey });
167
721
  const startTime = Date.now();
@@ -170,60 +724,71 @@ async function runTest(config) {
170
724
  headless: true
171
725
  };
172
726
  config.onStatusChange?.("initializing");
173
- const session = await createStagehandSession(browserConfig, config.anthropicApiKey);
174
- const { stagehand, page } = session;
727
+ let session;
175
728
  const stepResults = [];
176
729
  let pendingConsoleLogs = [];
177
730
  let pendingNetworkErrors = [];
178
731
  let stepStartTime = Date.now();
179
- const rawPage = page;
180
- rawPage.on("console", (msg) => {
181
- const level = msg.type?.() ?? msg.type ?? "log";
182
- const mappedLevel = level === "error" ? "error" : level === "warn" || level === "warning" ? "warning" : level === "info" ? "info" : level === "debug" ? "debug" : "log";
183
- pendingConsoleLogs.push({
184
- level: mappedLevel,
185
- text: (typeof msg.text === "function" ? msg.text() : String(msg.text ?? msg)).slice(0, 2e3),
186
- source: typeof msg.location === "function" ? msg.location()?.url : void 0,
187
- timestamp: Date.now() - stepStartTime
188
- });
189
- });
190
- rawPage.on("requestfailed", (req) => {
191
- const url = typeof req.url === "function" ? req.url() : String(req.url ?? "");
192
- const method = typeof req.method === "function" ? req.method() : String(req.method ?? "GET");
193
- const failure = typeof req.failure === "function" ? req.failure() : req.failure;
194
- pendingNetworkErrors.push({
195
- method,
196
- url: url.slice(0, 500),
197
- status: 0,
198
- statusText: failure?.errorText ?? "Request failed",
199
- timestamp: Date.now() - stepStartTime
200
- });
201
- });
202
- rawPage.on("response", (res) => {
203
- const status = typeof res.status === "function" ? res.status() : Number(res.status ?? 0);
204
- if (status >= 400) {
205
- const url = typeof res.url === "function" ? res.url() : String(res.url ?? "");
206
- const statusText = typeof res.statusText === "function" ? res.statusText() : String(res.statusText ?? "");
207
- const req = typeof res.request === "function" ? res.request() : res.request;
208
- const method = req ? typeof req.method === "function" ? req.method() : String(req.method ?? "GET") : "GET";
209
- pendingNetworkErrors.push({
210
- method,
211
- url: url.slice(0, 500),
212
- status,
213
- statusText,
214
- timestamp: Date.now() - stepStartTime
732
+ try {
733
+ session = await createStagehandSession(browserConfig, config.anthropicApiKey);
734
+ const { stagehand, page } = session;
735
+ await suppressBugBearWidget(stagehand);
736
+ const rawPage = page;
737
+ try {
738
+ rawPage.on("console", (msg) => {
739
+ const level = msg.type?.() ?? msg.type ?? "log";
740
+ const mappedLevel = level === "error" ? "error" : level === "warn" || level === "warning" ? "warning" : level === "info" ? "info" : level === "debug" ? "debug" : "log";
741
+ pendingConsoleLogs.push({
742
+ level: mappedLevel,
743
+ text: (typeof msg.text === "function" ? msg.text() : String(msg.text ?? msg)).slice(0, 2e3),
744
+ source: typeof msg.location === "function" ? msg.location()?.url : void 0,
745
+ timestamp: Date.now() - stepStartTime
746
+ });
215
747
  });
748
+ } catch {
216
749
  }
217
- });
218
- try {
219
- if (config.auth?.type === "form-login") {
750
+ try {
751
+ rawPage.on("requestfailed", (req) => {
752
+ const url = typeof req.url === "function" ? req.url() : String(req.url ?? "");
753
+ const method = typeof req.method === "function" ? req.method() : String(req.method ?? "GET");
754
+ const failure = typeof req.failure === "function" ? req.failure() : req.failure;
755
+ pendingNetworkErrors.push({
756
+ method,
757
+ url: url.slice(0, 500),
758
+ status: 0,
759
+ statusText: failure?.errorText ?? "Request failed",
760
+ timestamp: Date.now() - stepStartTime
761
+ });
762
+ });
763
+ } catch {
764
+ }
765
+ try {
766
+ rawPage.on("response", (res) => {
767
+ const status = typeof res.status === "function" ? res.status() : Number(res.status ?? 0);
768
+ if (status >= 400) {
769
+ const url = typeof res.url === "function" ? res.url() : String(res.url ?? "");
770
+ const statusText = typeof res.statusText === "function" ? res.statusText() : String(res.statusText ?? "");
771
+ const req = typeof res.request === "function" ? res.request() : res.request;
772
+ const method = req ? typeof req.method === "function" ? req.method() : String(req.method ?? "GET") : "GET";
773
+ pendingNetworkErrors.push({
774
+ method,
775
+ url: url.slice(0, 500),
776
+ status,
777
+ statusText,
778
+ timestamp: Date.now() - stepStartTime
779
+ });
780
+ }
781
+ });
782
+ } catch {
783
+ }
784
+ if (config.auth?.type === "form-login" || config.auth?.type === "supabase-native") {
220
785
  config.onStatusChange?.("authenticating");
221
786
  await injectAuth(page, config.auth, stagehand);
222
787
  }
223
788
  config.onStatusChange?.("navigating");
224
789
  const targetUrl = config.testCase.targetRoute ? `${config.targetUrl.replace(/\/$/, "")}${config.testCase.targetRoute}` : config.targetUrl;
225
790
  await page.goto(targetUrl, { waitUntil: "domcontentloaded", timeoutMs: 3e4 });
226
- if (config.auth && config.auth.type !== "form-login") {
791
+ if (config.auth && config.auth.type !== "form-login" && config.auth.type !== "supabase-native") {
227
792
  config.onStatusChange?.("authenticating");
228
793
  await injectAuth(page, config.auth, stagehand);
229
794
  if (config.auth.type === "localStorage") {
@@ -237,79 +802,143 @@ async function runTest(config) {
237
802
  }
238
803
  await page.waitForLoadState("networkidle").catch(() => {
239
804
  });
805
+ await page.evaluate(() => {
806
+ window.__bugbear_suppress = true;
807
+ try {
808
+ localStorage.setItem("__bugbear_suppress", "true");
809
+ } catch {
810
+ }
811
+ }).catch(() => {
812
+ });
813
+ await installClickTracker(page);
240
814
  pendingConsoleLogs = [];
241
815
  pendingNetworkErrors = [];
242
816
  config.onStatusChange?.("executing");
243
817
  const steps = config.testCase.steps;
818
+ const maxRetries = config.retry?.maxRetries ?? DEFAULT_MAX_RETRIES;
819
+ const retryDelayMs = config.retry?.retryDelayMs ?? DEFAULT_RETRY_DELAY_MS;
820
+ const resilientMode = config.resilientMode ?? true;
244
821
  for (let i = 0; i < steps.length; i++) {
245
822
  const step = steps[i];
246
- stepStartTime = Date.now();
247
- pendingConsoleLogs = [];
248
- pendingNetworkErrors = [];
249
- const screenshotBefore = await page.screenshot({ type: "png" });
250
- let error;
251
- let screenshotAfter = screenshotBefore;
252
- let actSucceeded = false;
253
- try {
254
- await stagehand.act(step.action);
255
- actSucceeded = true;
256
- await page.waitForLoadState("networkidle").catch(() => {
257
- });
258
- await page.waitForTimeout(500);
259
- screenshotAfter = await page.screenshot({ type: "png" });
260
- } catch (err) {
261
- error = err instanceof Error ? err.message : String(err);
823
+ const retryHistory = [];
824
+ let finalResult;
825
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
826
+ stepStartTime = Date.now();
827
+ pendingConsoleLogs = [];
828
+ pendingNetworkErrors = [];
829
+ const screenshotBefore = await page.screenshot({ type: "png" });
830
+ let error;
831
+ let screenshotAfter = screenshotBefore;
832
+ let actSucceeded = false;
833
+ const actionResult = await executeAction(page, stagehand, step);
834
+ error = actionResult.error;
835
+ actSucceeded = !error;
836
+ if (actSucceeded) {
837
+ await page.waitForLoadState("networkidle").catch(() => {
838
+ });
839
+ await page.waitForTimeout(step.waitMs ?? 500);
840
+ }
262
841
  screenshotAfter = await page.screenshot({ type: "png" }).catch(() => screenshotBefore);
842
+ let evaluation = {
843
+ passed: false,
844
+ confidence: 0,
845
+ actualResult: error ?? "Action execution failed"
846
+ };
847
+ if (actSucceeded) {
848
+ try {
849
+ const visionResult = await withTimeout(
850
+ evaluateStep({
851
+ anthropic,
852
+ screenshotBefore,
853
+ screenshotAfter,
854
+ action: step.action,
855
+ expectedResult: step.expectedResult,
856
+ evaluationHint: step.evaluationHint,
857
+ model: config.model
858
+ }),
859
+ AI_OPERATION_TIMEOUT_MS,
860
+ "Vision evaluation"
861
+ );
862
+ evaluation = {
863
+ passed: visionResult.passed,
864
+ confidence: visionResult.confidence,
865
+ actualResult: visionResult.actualResult
866
+ };
867
+ } catch (evalErr) {
868
+ evaluation = {
869
+ passed: false,
870
+ confidence: 0.2,
871
+ actualResult: `Vision evaluation error: ${evalErr instanceof Error ? evalErr.message : String(evalErr)}`
872
+ };
873
+ }
874
+ }
875
+ let discoveredActions = [];
876
+ if (actSucceeded && !actionResult.deterministic) {
877
+ const discovered = await discoverSelector(page);
878
+ if (discovered) {
879
+ discoveredActions = [{
880
+ type: discovered.suggestedActionType ?? "click",
881
+ selector: discovered.selector,
882
+ description: `Discovered via ${discovered.strategy}: ${discovered.tagName}${discovered.textContent ? ` "${discovered.textContent.slice(0, 50)}"` : ""}`
883
+ }];
884
+ }
885
+ }
886
+ const consoleLogs = pendingConsoleLogs.slice(0, 50);
887
+ const networkErrors = pendingNetworkErrors.slice(0, 30);
888
+ finalResult = {
889
+ stepNumber: step.stepNumber,
890
+ action: step.action,
891
+ expectedResult: step.expectedResult,
892
+ actualResult: evaluation.actualResult,
893
+ passed: evaluation.passed,
894
+ confidence: evaluation.confidence,
895
+ screenshotBefore,
896
+ screenshotAfter,
897
+ actionsTaken: discoveredActions,
898
+ error,
899
+ durationMs: Date.now() - stepStartTime,
900
+ consoleLogs,
901
+ networkErrors,
902
+ retryCount: attempt,
903
+ retryHistory,
904
+ skipped: false
905
+ };
906
+ const shouldRetry = !evaluation.passed && error && isRetryableError(error) && attempt < maxRetries;
907
+ if (!shouldRetry) break;
908
+ retryHistory.push({
909
+ attempt,
910
+ error,
911
+ confidence: evaluation.confidence,
912
+ timestamp: Date.now()
913
+ });
914
+ await new Promise((resolve) => setTimeout(resolve, retryDelayMs));
263
915
  }
264
- let evaluation = {
265
- passed: false,
266
- confidence: 0,
267
- actualResult: error ?? "Action execution failed"
268
- };
269
- if (actSucceeded) {
916
+ if (resilientMode && finalResult && !finalResult.passed) {
917
+ finalResult.skipped = true;
918
+ finalResult.skipReason = "Step failed, recovered page state";
270
919
  try {
271
- const verificationSchema = z.object({
272
- passed: z.boolean().describe("Whether the expected result was achieved"),
273
- confidence: z.number().min(0).max(1).describe("Confidence in the assessment (0.9+ = very sure, 0.7-0.9 = likely, below 0.7 = uncertain)"),
274
- actualResult: z.string().describe("Description of what actually happened on the page")
920
+ config.onStatusChange?.("navigating");
921
+ await page.goto(targetUrl, { waitUntil: "domcontentloaded", timeoutMs: 3e4 });
922
+ await page.waitForLoadState("networkidle").catch(() => {
923
+ });
924
+ await installClickTracker(page);
925
+ await page.evaluate(() => {
926
+ window.__bugbear_suppress = true;
927
+ try {
928
+ localStorage.setItem("__bugbear_suppress", "true");
929
+ } catch {
930
+ }
931
+ }).catch(() => {
275
932
  });
276
- const verification = await stagehand.extract(
277
- `You are evaluating a QA test step. The action "${step.action}" was just performed. Check if this expected result was achieved: "${step.expectedResult}". Look at the current page state and describe what actually happened. Be precise and factual in your assessment.`,
278
- verificationSchema
279
- );
280
- evaluation = {
281
- passed: verification.passed,
282
- confidence: verification.confidence,
283
- actualResult: verification.actualResult
284
- };
285
- } catch (evalErr) {
286
- evaluation = {
287
- passed: false,
288
- confidence: 0.2,
289
- actualResult: `Verification error: ${evalErr instanceof Error ? evalErr.message : String(evalErr)}`
290
- };
933
+ pendingConsoleLogs = [];
934
+ pendingNetworkErrors = [];
935
+ config.onStatusChange?.("executing");
936
+ } catch (recoveryErr) {
937
+ finalResult.skipReason = `Step failed, recovery also failed: ${recoveryErr instanceof Error ? recoveryErr.message : String(recoveryErr)}`;
291
938
  }
292
939
  }
293
- const consoleLogs = pendingConsoleLogs.slice(0, 50);
294
- const networkErrors = pendingNetworkErrors.slice(0, 30);
295
- const result = {
296
- stepNumber: step.stepNumber,
297
- action: step.action,
298
- expectedResult: step.expectedResult,
299
- actualResult: evaluation.actualResult,
300
- passed: evaluation.passed,
301
- confidence: evaluation.confidence,
302
- screenshotBefore,
303
- screenshotAfter,
304
- actionsTaken: [],
305
- // Stagehand handles actions internally
306
- error,
307
- durationMs: Date.now() - stepStartTime,
308
- consoleLogs,
309
- networkErrors
310
- };
311
- stepResults.push(result);
312
- config.onStepComplete?.(result, i, steps.length);
940
+ stepResults.push(finalResult);
941
+ config.onStepComplete?.(finalResult, i, steps.length);
313
942
  }
314
943
  config.onStatusChange?.("completed");
315
944
  const model = config.model ?? "claude-sonnet-4-20250514";
@@ -323,11 +952,7 @@ async function runTest(config) {
323
952
  totalDurationMs: Date.now() - startTime,
324
953
  summary,
325
954
  screenshotUrls: [],
326
- tokenUsage: {
327
- // Stagehand tracks tokens internally; these are approximate
328
- inputTokens: steps.length * 3e3,
329
- outputTokens: steps.length * 500
330
- },
955
+ tokenUsage: getTokenEstimate(steps.length),
331
956
  browserSessionId: session.sessionId
332
957
  };
333
958
  } catch (err) {
@@ -339,29 +964,685 @@ async function runTest(config) {
339
964
  totalDurationMs: Date.now() - startTime,
340
965
  summary: `Test execution failed: ${err instanceof Error ? err.message : String(err)}`,
341
966
  screenshotUrls: [],
342
- tokenUsage: {
343
- inputTokens: stepResults.length * 3e3,
344
- outputTokens: stepResults.length * 500
345
- },
346
- browserSessionId: session.sessionId
967
+ tokenUsage: getTokenEstimate(stepResults.length),
968
+ browserSessionId: session?.sessionId ?? "unknown"
347
969
  };
348
970
  } finally {
349
- await session.close();
971
+ if (session?.page) {
972
+ const rawPage = session.page;
973
+ rawPage.removeAllListeners?.("console");
974
+ rawPage.removeAllListeners?.("requestfailed");
975
+ rawPage.removeAllListeners?.("response");
976
+ }
977
+ await session?.close();
350
978
  }
351
979
  }
352
980
  function determineOverallResult(steps) {
353
981
  if (steps.length === 0) return "error";
354
- const allPassed = steps.every((s) => s.passed);
355
- const allFailed = steps.every((s) => !s.passed);
356
- const hasErrors = steps.some((s) => s.error);
357
- if (allPassed) return "passed";
358
- if (allFailed || hasErrors) return "failed";
982
+ const nonSkipped = steps.filter((s) => !s.skipped);
983
+ const skippedCount = steps.length - nonSkipped.length;
984
+ if (nonSkipped.length === 0) return "error";
985
+ const allNonSkippedPassed = nonSkipped.every((s) => s.passed);
986
+ const hasErrors = nonSkipped.some((s) => s.error);
987
+ if (skippedCount > 0 && allNonSkippedPassed) return "passed_with_skips";
988
+ if (allNonSkippedPassed) return "passed";
989
+ if (nonSkipped.every((s) => !s.passed) || hasErrors) return "failed";
359
990
  return "partial";
360
991
  }
992
+
993
+ // src/explorer.ts
994
+ import Anthropic2 from "@anthropic-ai/sdk";
995
+ var DEFAULT_MODEL4 = "anthropic/claude-sonnet-4-20250514";
996
+ var AI_OPERATION_TIMEOUT_MS2 = 6e4;
997
+ async function withTimeout2(promise, timeoutMs, operation) {
998
+ let timeoutId;
999
+ const timeoutPromise = new Promise((_, reject) => {
1000
+ timeoutId = setTimeout(() => reject(new Error(`${operation} timed out after ${timeoutMs}ms`)), timeoutMs);
1001
+ });
1002
+ try {
1003
+ return await Promise.race([promise, timeoutPromise]);
1004
+ } finally {
1005
+ clearTimeout(timeoutId);
1006
+ }
1007
+ }
1008
+ async function runExploration(config) {
1009
+ const {
1010
+ targetUrl,
1011
+ featureDescription,
1012
+ actionBudget,
1013
+ auth,
1014
+ browserConfig,
1015
+ anthropicApiKey,
1016
+ model = DEFAULT_MODEL4,
1017
+ onActionComplete
1018
+ } = config;
1019
+ const anthropic = new Anthropic2({ apiKey: anthropicApiKey });
1020
+ const startTime = Date.now();
1021
+ const actions = [];
1022
+ let totalInputTokens = 0;
1023
+ let totalOutputTokens = 0;
1024
+ const session = await createStagehandSession(browserConfig, anthropicApiKey);
1025
+ const { stagehand, page } = session;
1026
+ await suppressBugBearWidget(stagehand);
1027
+ try {
1028
+ await page.goto(targetUrl, { waitUntil: "networkidle", timeoutMs: 3e4 });
1029
+ if (auth) {
1030
+ await injectAuth(page, auth, stagehand);
1031
+ await page.waitForLoadState("networkidle").catch(() => {
1032
+ });
1033
+ }
1034
+ const networkCapture = createNetworkCapture(page);
1035
+ let consoleLogs = [];
1036
+ let actionStartTime = Date.now();
1037
+ const rawPage = page;
1038
+ rawPage.on("console", (msg) => {
1039
+ const level = msg.type?.() ?? msg.type ?? "log";
1040
+ if (["error", "warning", "warn"].includes(level)) {
1041
+ consoleLogs.push({
1042
+ level: level === "warn" ? "warning" : level,
1043
+ text: (typeof msg.text === "function" ? msg.text() : String(msg.text ?? msg)).slice(0, 500),
1044
+ source: typeof msg.location === "function" ? msg.location()?.url : void 0,
1045
+ timestamp: Date.now() - actionStartTime
1046
+ });
1047
+ }
1048
+ });
1049
+ const actionLog = [];
1050
+ for (let i = 0; i < actionBudget; i++) {
1051
+ actionStartTime = Date.now();
1052
+ consoleLogs = [];
1053
+ const observations = await withTimeout2(
1054
+ stagehand.observe(),
1055
+ AI_OPERATION_TIMEOUT_MS2,
1056
+ "Page observation"
1057
+ );
1058
+ const decisionResponse = await withTimeout2(
1059
+ anthropic.messages.create({
1060
+ model: model.replace("anthropic/", ""),
1061
+ max_tokens: 300,
1062
+ system: buildDecisionPrompt(featureDescription, actionBudget - i, actionLog),
1063
+ messages: [
1064
+ {
1065
+ role: "user",
1066
+ content: `Current page URL: ${page.url()}
1067
+
1068
+ Visible interactive elements:
1069
+ ${formatObservations(observations)}
1070
+
1071
+ What single action should I perform next?`
1072
+ }
1073
+ ]
1074
+ }),
1075
+ AI_OPERATION_TIMEOUT_MS2,
1076
+ "Action decision"
1077
+ );
1078
+ const actionText = extractText(decisionResponse);
1079
+ totalInputTokens += decisionResponse.usage.input_tokens;
1080
+ totalOutputTokens += decisionResponse.usage.output_tokens;
1081
+ if (actionText.toLowerCase().includes("[done]") || actionText.toLowerCase().includes("no more actions")) {
1082
+ break;
1083
+ }
1084
+ const screenshotBefore = await page.screenshot({ type: "png" });
1085
+ networkCapture.start();
1086
+ try {
1087
+ await stagehand.act(actionText);
1088
+ } catch (actError) {
1089
+ networkCapture.stop();
1090
+ const screenshotAfter2 = await page.screenshot({ type: "png" });
1091
+ const action2 = {
1092
+ actionNumber: i + 1,
1093
+ action: actionText,
1094
+ category: "broken_interaction",
1095
+ severity: "medium",
1096
+ confidence: 0.9,
1097
+ description: `Action failed: ${actError instanceof Error ? actError.message : String(actError)}`,
1098
+ screenshotBefore,
1099
+ screenshotAfter: screenshotAfter2,
1100
+ networkRequests: networkCapture.getRequests(),
1101
+ consoleLogs: [...consoleLogs],
1102
+ durationMs: Date.now() - actionStartTime
1103
+ };
1104
+ actions.push(action2);
1105
+ actionLog.push(`[${i + 1}] ${actionText} -> FAILED: ${action2.description}`);
1106
+ onActionComplete?.(action2, i);
1107
+ continue;
1108
+ }
1109
+ await page.waitForLoadState("networkidle").catch(() => {
1110
+ });
1111
+ await page.waitForTimeout(500);
1112
+ networkCapture.stop();
1113
+ const screenshotAfter = await page.screenshot({ type: "png" });
1114
+ const capturedRequests = networkCapture.getRequests();
1115
+ const networkErrors = networkCapture.getErrors();
1116
+ const evalResponse = await withTimeout2(
1117
+ anthropic.messages.create({
1118
+ model: model.replace("anthropic/", ""),
1119
+ max_tokens: 400,
1120
+ system: buildEvaluationPrompt(),
1121
+ messages: [
1122
+ {
1123
+ role: "user",
1124
+ content: buildEvaluationContext(actionText, consoleLogs, networkErrors, page.url())
1125
+ }
1126
+ ]
1127
+ }),
1128
+ AI_OPERATION_TIMEOUT_MS2,
1129
+ "Action evaluation"
1130
+ );
1131
+ totalInputTokens += evalResponse.usage.input_tokens;
1132
+ totalOutputTokens += evalResponse.usage.output_tokens;
1133
+ const evaluation = parseEvaluation2(extractText(evalResponse));
1134
+ const action = {
1135
+ actionNumber: i + 1,
1136
+ action: actionText,
1137
+ category: evaluation.category,
1138
+ severity: evaluation.severity,
1139
+ confidence: evaluation.confidence,
1140
+ description: evaluation.description,
1141
+ screenshotBefore,
1142
+ screenshotAfter,
1143
+ networkRequests: capturedRequests,
1144
+ consoleLogs: [...consoleLogs],
1145
+ domContext: evaluation.domContext,
1146
+ durationMs: Date.now() - actionStartTime
1147
+ };
1148
+ actions.push(action);
1149
+ const logEntry = evaluation.category === "normal" ? `[${i + 1}] ${actionText} -> OK` : `[${i + 1}] ${actionText} -> FINDING (${evaluation.category}): ${evaluation.description}`;
1150
+ actionLog.push(logEntry);
1151
+ onActionComplete?.(action, i);
1152
+ }
1153
+ const { generateExplorationReport: generateExplorationReport2 } = await import("./report-generator-EVZEB33O.mjs");
1154
+ const report = await generateExplorationReport2(anthropic, {
1155
+ projectName: "",
1156
+ featureDescription,
1157
+ targetUrl,
1158
+ actions,
1159
+ model: model.replace("anthropic/", "")
1160
+ });
1161
+ totalInputTokens += report.tokenUsage.inputTokens;
1162
+ totalOutputTokens += report.tokenUsage.outputTokens;
1163
+ const findings = actions.filter((a) => a.category !== "normal");
1164
+ return {
1165
+ overallResult: findings.length > 0 ? "findings" : "clean",
1166
+ actions,
1167
+ report: report.report,
1168
+ totalDurationMs: Date.now() - startTime,
1169
+ tokenUsage: { inputTokens: totalInputTokens, outputTokens: totalOutputTokens },
1170
+ browserSessionId: session.sessionId
1171
+ };
1172
+ } catch (error) {
1173
+ return {
1174
+ overallResult: "error",
1175
+ actions,
1176
+ report: {
1177
+ projectName: "",
1178
+ featureDescription,
1179
+ targetUrl,
1180
+ exploredAt: (/* @__PURE__ */ new Date()).toISOString(),
1181
+ duration: `${Math.round((Date.now() - startTime) / 1e3)}s`,
1182
+ actionsUsed: actions.length,
1183
+ actionBudget,
1184
+ findings: [],
1185
+ tested: [],
1186
+ notTested: [{ description: "Exploration aborted due to error", reason: String(error) }],
1187
+ summary: `Exploration failed after ${actions.length} actions: ${error instanceof Error ? error.message : String(error)}`,
1188
+ suggestedPrompt: ""
1189
+ },
1190
+ totalDurationMs: Date.now() - startTime,
1191
+ tokenUsage: { inputTokens: totalInputTokens, outputTokens: totalOutputTokens },
1192
+ browserSessionId: session.sessionId
1193
+ };
1194
+ } finally {
1195
+ if (session.page) {
1196
+ const rawPage = session.page;
1197
+ rawPage.removeAllListeners?.("console");
1198
+ }
1199
+ await session.close();
1200
+ }
1201
+ }
1202
+ function buildDecisionPrompt(featureDescription, remainingBudget, actionLog) {
1203
+ return `You are an exploratory QA tester examining the feature: "${featureDescription}".
1204
+ Your goal is to find bugs by interacting with the page like a real user would.
1205
+
1206
+ Strategy for choosing your next action:
1207
+ 1. Try the happy path first (normal usage)
1208
+ 2. Then try edge cases: empty inputs, very long text, special characters
1209
+ 3. Click buttons and links to verify they work
1210
+ 4. Submit forms with missing required fields
1211
+ 5. Look for visual problems: overlapping text, broken layouts, missing images
1212
+
1213
+ You have ${remainingBudget} actions left. Prioritize high-risk interactions.
1214
+ ${actionLog.length > 0 ? `
1215
+ Actions already taken:
1216
+ ${actionLog.join("\n")}` : ""}
1217
+
1218
+ DO NOT repeat an action you've already performed.
1219
+ Respond with a single action description. If there's nothing left to test, respond with "[DONE]".`;
1220
+ }
1221
+ function buildEvaluationPrompt() {
1222
+ return `You are evaluating the result of a QA test action. Categorize what happened.
1223
+
1224
+ Respond in this exact JSON format:
1225
+ {
1226
+ "category": "normal" | "console_error" | "broken_interaction" | "visual_anomaly" | "input_handling",
1227
+ "severity": "critical" | "high" | "medium" | "low",
1228
+ "confidence": 0.0-1.0,
1229
+ "description": "What happened",
1230
+ "expectedBehavior": "What should have happened",
1231
+ "domSelector": "CSS selector of the element involved (if applicable)"
1232
+ }
1233
+
1234
+ Category definitions:
1235
+ - normal: Expected behavior, no issues found
1236
+ - console_error: JavaScript exception or failed network request (4xx/5xx)
1237
+ - broken_interaction: Action had no visible effect, button didn't respond, navigation failed
1238
+ - visual_anomaly: Layout break, text overflow, missing/broken images, overlapping elements
1239
+ - input_handling: Missing validation, accepted clearly invalid input, no error feedback
1240
+
1241
+ Only report genuine issues. If behavior seems correct, use "normal".
1242
+ For "normal" results, severity and domSelector are not required.`;
1243
+ }
1244
+ function buildEvaluationContext(action, consoleLogs, networkErrors, currentUrl) {
1245
+ let context = `Action performed: "${action}"
1246
+ Current URL: ${currentUrl}
1247
+ `;
1248
+ if (consoleLogs.length > 0) {
1249
+ context += `
1250
+ Console output:
1251
+ ${consoleLogs.map((l) => `[${l.level}] ${l.text}`).join("\n")}
1252
+ `;
1253
+ }
1254
+ if (networkErrors.length > 0) {
1255
+ context += `
1256
+ Failed network requests:
1257
+ ${networkErrors.map((e) => `${e.method} ${e.url} -> ${e.status} ${e.statusText}`).join("\n")}
1258
+ `;
1259
+ }
1260
+ return context;
1261
+ }
1262
+ function formatObservations(observations) {
1263
+ return observations.slice(0, 30).map((o, i) => `${i + 1}. [${o.selector}] ${o.description}`).join("\n");
1264
+ }
1265
+ function extractText(response) {
1266
+ const block = response.content[0];
1267
+ return block.type === "text" ? block.text : "";
1268
+ }
1269
+ function parseEvaluation2(text) {
1270
+ try {
1271
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
1272
+ if (!jsonMatch) throw new Error("No JSON found");
1273
+ const parsed = JSON.parse(jsonMatch[0]);
1274
+ return {
1275
+ category: parsed.category || "normal",
1276
+ severity: parsed.severity,
1277
+ confidence: typeof parsed.confidence === "number" ? parsed.confidence : 0.5,
1278
+ description: parsed.description || text,
1279
+ expectedBehavior: parsed.expectedBehavior,
1280
+ domContext: parsed.domSelector ? { selector: parsed.domSelector, elementText: "", nearbyText: "" } : void 0
1281
+ };
1282
+ } catch {
1283
+ return { category: "normal", confidence: 0.3, description: text };
1284
+ }
1285
+ }
1286
+
1287
+ // src/report-triager.ts
1288
+ var DEFAULT_MODEL5 = "claude-sonnet-4-20250514";
1289
+ async function triageReport(input) {
1290
+ const model = input.model ?? DEFAULT_MODEL5;
1291
+ const { report, recentReports } = input;
1292
+ const prompt = buildTriagePrompt(report, recentReports);
1293
+ const response = await input.anthropic.messages.create({
1294
+ model,
1295
+ max_tokens: 1024,
1296
+ messages: [{ role: "user", content: prompt }]
1297
+ });
1298
+ const text = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
1299
+ return parseTriageResult(text);
1300
+ }
1301
+ function buildTriagePrompt(report, recentReports) {
1302
+ const sections = [];
1303
+ sections.push(`REPORT TITLE: ${report.title ?? "(no title)"}`);
1304
+ sections.push(`DESCRIPTION: ${report.description}`);
1305
+ if (report.report_source) {
1306
+ sections.push(`SOURCE: ${report.report_source}`);
1307
+ }
1308
+ if (report.app_context && Object.keys(report.app_context).length > 0) {
1309
+ const ctx = report.app_context;
1310
+ const parts = [];
1311
+ if (ctx.currentRoute) parts.push(`Route: ${ctx.currentRoute}`);
1312
+ if (ctx.currentUrl) parts.push(`URL: ${ctx.currentUrl}`);
1313
+ if (ctx.componentName) parts.push(`Component: ${ctx.componentName}`);
1314
+ if (ctx.userAction) parts.push(`User action: ${ctx.userAction}`);
1315
+ if (parts.length > 0) {
1316
+ sections.push(`APP CONTEXT:
1317
+ ${parts.join("\n")}`);
1318
+ }
1319
+ }
1320
+ if (report.enhanced_context) {
1321
+ const enhanced = report.enhanced_context;
1322
+ const consoleLogs = enhanced.consoleLogs;
1323
+ if (consoleLogs && consoleLogs.length > 0) {
1324
+ const errors = consoleLogs.filter((l) => l.level === "error" || l.level === "warning").slice(0, 10).map((l) => `[${l.level}] ${l.text}`).join("\n");
1325
+ if (errors) {
1326
+ sections.push(`CONSOLE ERRORS:
1327
+ ${errors}`);
1328
+ }
1329
+ }
1330
+ const networkErrors = enhanced.networkErrors;
1331
+ if (networkErrors && networkErrors.length > 0) {
1332
+ const netErrors = networkErrors.slice(0, 10).map((e) => `${e.method} ${e.url} \u2192 ${e.status}`).join("\n");
1333
+ sections.push(`NETWORK ERRORS:
1334
+ ${netErrors}`);
1335
+ }
1336
+ }
1337
+ if (report.device_info && Object.keys(report.device_info).length > 0) {
1338
+ const device = report.device_info;
1339
+ const parts = [];
1340
+ if (device.platform) parts.push(`Platform: ${device.platform}`);
1341
+ if (device.browser) parts.push(`Browser: ${device.browser}`);
1342
+ if (device.os) parts.push(`OS: ${device.os}`);
1343
+ if (device.screenSize) parts.push(`Screen: ${device.screenSize}`);
1344
+ if (parts.length > 0) {
1345
+ sections.push(`DEVICE:
1346
+ ${parts.join(", ")}`);
1347
+ }
1348
+ }
1349
+ if (report.error_fingerprint) {
1350
+ sections.push(`ERROR FINGERPRINT: ${report.error_fingerprint}`);
1351
+ }
1352
+ let recentSection = "";
1353
+ if (recentReports.length > 0) {
1354
+ const recentLines = recentReports.map((r) => {
1355
+ const desc = r.description.slice(0, 150);
1356
+ const fp = r.error_fingerprint ? ` [fingerprint: ${r.error_fingerprint}]` : "";
1357
+ return `- ID: ${r.id} | "${r.title ?? "(no title)"}" | ${desc}${fp}`;
1358
+ });
1359
+ recentSection = `
1360
+ RECENT REPORTS (check for duplicates):
1361
+ ${recentLines.join("\n")}`;
1362
+ }
1363
+ return `You are a QA triage specialist. Analyze this bug report and provide structured triage.
1364
+
1365
+ ${sections.join("\n\n")}
1366
+ ${recentSection}
1367
+
1368
+ Respond with ONLY a JSON object (no markdown, no explanation outside the JSON):
1369
+ {
1370
+ "suggested_severity": "critical" | "high" | "medium" | "low",
1371
+ "severity_confidence": 0.0-1.0,
1372
+ "suggested_category": "ui_ux" | "functional" | "crash" | "security" | "other",
1373
+ "category_confidence": 0.0-1.0,
1374
+ "root_cause_analysis": "Brief analysis of the likely root cause",
1375
+ "duplicate_of": null or "uuid-of-matching-report",
1376
+ "duplicate_confidence": 0.0-1.0,
1377
+ "triage_notes": "Summary of triage reasoning"
1378
+ }
1379
+
1380
+ Severity guide:
1381
+ - critical: App crash, data loss, security vulnerability, blocks core workflow
1382
+ - high: Major feature broken, significant UX degradation, affects many users
1383
+ - medium: Feature partially broken, workaround exists, moderate impact
1384
+ - low: Minor cosmetic issue, edge case, minimal user impact
1385
+
1386
+ Category guide:
1387
+ - crash: App crashes, unhandled exceptions, white screen of death
1388
+ - security: Auth bypass, data exposure, injection vulnerabilities
1389
+ - functional: Feature doesn't work as expected, logic errors, broken flows
1390
+ - ui_ux: Visual glitches, layout issues, confusing UX, accessibility problems
1391
+ - other: Performance, documentation, configuration issues
1392
+
1393
+ Duplicate detection:
1394
+ - Compare error fingerprints first (exact match = very high confidence)
1395
+ - Then compare descriptions semantically (similar symptoms on same route/feature)
1396
+ - Only flag as duplicate if confidence \u2265 0.80`;
1397
+ }
1398
+ var VALID_SEVERITIES = ["critical", "high", "medium", "low"];
1399
+ var VALID_CATEGORIES = ["ui_ux", "functional", "crash", "security", "other"];
1400
+ function parseTriageResult(text) {
1401
+ try {
1402
+ const parsed = JSON.parse(text.trim());
1403
+ return validateTriageResult(parsed);
1404
+ } catch {
1405
+ const jsonMatch = text.match(/\{[\s\S]*"suggested_severity"[\s\S]*"suggested_category"[\s\S]*\}/);
1406
+ if (jsonMatch) {
1407
+ try {
1408
+ const parsed = JSON.parse(jsonMatch[0]);
1409
+ return validateTriageResult(parsed);
1410
+ } catch {
1411
+ }
1412
+ }
1413
+ }
1414
+ return {
1415
+ suggested_severity: "medium",
1416
+ severity_confidence: 0.3,
1417
+ suggested_category: "other",
1418
+ category_confidence: 0.3,
1419
+ root_cause_analysis: `Triage returned unparseable response: ${text.slice(0, 200)}`,
1420
+ duplicate_of: null,
1421
+ duplicate_confidence: 0,
1422
+ triage_notes: "Auto-triage failed to parse AI response"
1423
+ };
1424
+ }
1425
+ function validateTriageResult(parsed) {
1426
+ const severity = VALID_SEVERITIES.includes(parsed.suggested_severity) ? parsed.suggested_severity : "medium";
1427
+ const category = VALID_CATEGORIES.includes(parsed.suggested_category) ? parsed.suggested_category : "other";
1428
+ return {
1429
+ suggested_severity: severity,
1430
+ severity_confidence: clampConfidence(parsed.severity_confidence),
1431
+ suggested_category: category,
1432
+ category_confidence: clampConfidence(parsed.category_confidence),
1433
+ root_cause_analysis: typeof parsed.root_cause_analysis === "string" ? parsed.root_cause_analysis : "No analysis provided",
1434
+ duplicate_of: typeof parsed.duplicate_of === "string" ? parsed.duplicate_of : null,
1435
+ duplicate_confidence: clampConfidence(parsed.duplicate_confidence),
1436
+ triage_notes: typeof parsed.triage_notes === "string" ? parsed.triage_notes : "No notes provided"
1437
+ };
1438
+ }
1439
+ function clampConfidence(value) {
1440
+ if (typeof value !== "number") return 0.5;
1441
+ return Math.max(0, Math.min(1, value));
1442
+ }
1443
+
1444
+ // src/failure-analyzer.ts
1445
+ var DEFAULT_MODEL6 = "claude-sonnet-4-20250514";
1446
+ async function analyzeFailure(input) {
1447
+ const model = input.model ?? DEFAULT_MODEL6;
1448
+ const { step, result, discoveredSelector, consoleLogs, networkErrors } = input;
1449
+ const content = [];
1450
+ content.push({ type: "text", text: "BEFORE screenshot (page state before the failed action):" });
1451
+ content.push({
1452
+ type: "image",
1453
+ source: { type: "base64", media_type: "image/png", data: result.screenshotBefore.toString("base64") }
1454
+ });
1455
+ content.push({ type: "text", text: "AFTER screenshot (page state after the failed action):" });
1456
+ content.push({
1457
+ type: "image",
1458
+ source: { type: "base64", media_type: "image/png", data: result.screenshotAfter.toString("base64") }
1459
+ });
1460
+ content.push({ type: "text", text: buildFailurePrompt(step, result, discoveredSelector, consoleLogs, networkErrors) });
1461
+ const response = await input.anthropic.messages.create({
1462
+ model,
1463
+ max_tokens: 1024,
1464
+ messages: [{ role: "user", content }]
1465
+ });
1466
+ const text = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
1467
+ return parseFailureAnalysis(text, step);
1468
+ }
1469
+ var STEP_TO_RUN = {
1470
+ real_bug: "bug",
1471
+ test_maintenance: "test_issue",
1472
+ ai_limitation: "ai_limitation",
1473
+ flaky: "flaky",
1474
+ unknown: "unknown"
1475
+ };
1476
+ function rollupFailureClassification(stepClassifications) {
1477
+ if (stepClassifications.length === 0) return "unknown";
1478
+ if (stepClassifications.some((c) => c === "real_bug")) return "bug";
1479
+ if (stepClassifications.every((c) => c === "ai_limitation")) return "ai_limitation";
1480
+ if (stepClassifications.every((c) => c === "test_maintenance")) return "test_issue";
1481
+ if (stepClassifications.every((c) => c === "flaky")) return "flaky";
1482
+ const counts = /* @__PURE__ */ new Map();
1483
+ for (const c of stepClassifications) {
1484
+ counts.set(c, (counts.get(c) ?? 0) + 1);
1485
+ }
1486
+ let best = "unknown";
1487
+ let bestCount = 0;
1488
+ for (const [cls, count] of counts) {
1489
+ if (count > bestCount) {
1490
+ bestCount = count;
1491
+ best = cls;
1492
+ }
1493
+ }
1494
+ return STEP_TO_RUN[best];
1495
+ }
1496
+ function buildFailurePrompt(step, result, discoveredSelector, consoleLogs, networkErrors) {
1497
+ const sections = [];
1498
+ sections.push(`FAILED STEP #${step.stepNumber}: ${step.action}`);
1499
+ sections.push(`EXPECTED: ${step.expectedResult}`);
1500
+ sections.push(`ACTUAL: ${result.actualResult}`);
1501
+ if (step.selector) sections.push(`SELECTOR USED: ${step.selector}`);
1502
+ if (step.actionType) sections.push(`ACTION TYPE: ${step.actionType}`);
1503
+ if (result.error) sections.push(`ERROR: ${result.error}`);
1504
+ if (discoveredSelector) {
1505
+ sections.push(`DISCOVERED SELECTOR (what Stagehand actually clicked): ${discoveredSelector.selector} (via ${discoveredSelector.strategy})${discoveredSelector.textContent ? ` \u2014 text: "${discoveredSelector.textContent}"` : ""}`);
1506
+ }
1507
+ if (consoleLogs && consoleLogs.length > 0) {
1508
+ const errors = consoleLogs.filter((l) => l.level === "error" || l.level === "warning").slice(0, 8).map((l) => `[${l.level}] ${l.text}`).join("\n");
1509
+ if (errors) sections.push(`CONSOLE ERRORS:
1510
+ ${errors}`);
1511
+ }
1512
+ if (networkErrors && networkErrors.length > 0) {
1513
+ const netErrors = networkErrors.slice(0, 8).map((e) => `${e.method} ${e.url} \u2192 ${e.status} ${e.statusText}`).join("\n");
1514
+ sections.push(`NETWORK ERRORS:
1515
+ ${netErrors}`);
1516
+ }
1517
+ return `You are a QA failure analyst. A test step failed. Analyze the before/after screenshots and the context below to classify this failure.
1518
+
1519
+ ${sections.join("\n\n")}
1520
+
1521
+ Classify into ONE of these categories:
1522
+ - **real_bug**: The application has an actual defect. Indicators: API errors (4xx/5xx), JavaScript exceptions, missing/broken UI elements that SHOULD be there, incorrect behavior, data not saving.
1523
+ - **test_maintenance**: The test is stale \u2014 the app changed but the test wasn't updated. Indicators: element moved/renamed, selector no longer matches, page restructured but app works correctly, the discovered selector differs from the test's selector.
1524
+ - **ai_limitation**: The AI executor itself could not complete this step \u2014 NOT an app bug. Indicators: already logged in so can't reach the login page, a QA/testing widget or overlay appeared and blocked the real UI, the test requires measuring something the AI can't (contrast ratios, pixel measurements), the AI landed on a completely wrong page and never reached the test target, authentication redirect prevented navigation, a popup or modal unrelated to the test blocked interaction.
1525
+ - **flaky**: Timing or intermittent issue. Indicators: timeout errors, "element not found" but the element IS visible in screenshots, network hiccup, race condition.
1526
+ - **unknown**: Can't determine with confidence.
1527
+
1528
+ For **test_maintenance** failures, suggest a corrected step (selector, action, value).
1529
+
1530
+ Respond with ONLY a JSON object (no markdown, no explanation outside the JSON):
1531
+ {
1532
+ "classification": "real_bug" | "test_maintenance" | "ai_limitation" | "flaky" | "unknown",
1533
+ "confidence": 0.0-1.0,
1534
+ "reasoning": "Brief explanation of why this classification",
1535
+ "suggested_fix": null | {
1536
+ "corrected_action": "Updated natural language action (if changed)",
1537
+ "corrected_selector": "Updated CSS selector (if selector changed)",
1538
+ "corrected_actionType": "Updated action type (if changed)",
1539
+ "corrected_value": "Updated value (if changed)"
1540
+ }
1541
+ }`;
1542
+ }
1543
+ var VALID_CLASSIFICATIONS = ["real_bug", "test_maintenance", "ai_limitation", "flaky", "unknown"];
1544
+ function parseFailureAnalysis(text, step) {
1545
+ try {
1546
+ const parsed = JSON.parse(text.trim());
1547
+ return validateFailureAnalysis(parsed, step);
1548
+ } catch {
1549
+ const jsonMatch = text.match(/\{[\s\S]*"classification"[\s\S]*"confidence"[\s\S]*\}/);
1550
+ if (jsonMatch) {
1551
+ try {
1552
+ const parsed = JSON.parse(jsonMatch[0]);
1553
+ return validateFailureAnalysis(parsed, step);
1554
+ } catch {
1555
+ }
1556
+ }
1557
+ }
1558
+ return {
1559
+ classification: "unknown",
1560
+ confidence: 0.3,
1561
+ reasoning: `Failure analysis returned unparseable response: ${text.slice(0, 200)}`
1562
+ };
1563
+ }
1564
+ function validateFailureAnalysis(parsed, step) {
1565
+ const classification = VALID_CLASSIFICATIONS.includes(parsed.classification) ? parsed.classification : "unknown";
1566
+ const result = {
1567
+ classification,
1568
+ confidence: clampConfidence2(parsed.confidence),
1569
+ reasoning: typeof parsed.reasoning === "string" ? parsed.reasoning : "No reasoning provided"
1570
+ };
1571
+ if (parsed.suggested_fix && typeof parsed.suggested_fix === "object") {
1572
+ const fix = parsed.suggested_fix;
1573
+ result.suggested_fix = {
1574
+ stepNumber: step.stepNumber,
1575
+ original_action: step.action,
1576
+ corrected_action: typeof fix.corrected_action === "string" ? fix.corrected_action : void 0,
1577
+ corrected_selector: typeof fix.corrected_selector === "string" ? fix.corrected_selector : void 0,
1578
+ corrected_actionType: typeof fix.corrected_actionType === "string" ? fix.corrected_actionType : void 0,
1579
+ corrected_value: typeof fix.corrected_value === "string" ? fix.corrected_value : void 0
1580
+ };
1581
+ }
1582
+ return result;
1583
+ }
1584
+ function clampConfidence2(value) {
1585
+ if (typeof value !== "number") return 0.5;
1586
+ return Math.max(0, Math.min(1, value));
1587
+ }
1588
+
1589
+ // src/concurrency.ts
1590
+ var Semaphore = class {
1591
+ constructor(max) {
1592
+ this.max = max;
1593
+ this.current = 0;
1594
+ this.queue = [];
1595
+ if (max < 1) throw new Error("Semaphore max must be >= 1");
1596
+ }
1597
+ async acquire() {
1598
+ if (this.current < this.max) {
1599
+ this.current++;
1600
+ return;
1601
+ }
1602
+ return new Promise((resolve) => {
1603
+ this.queue.push(resolve);
1604
+ });
1605
+ }
1606
+ release() {
1607
+ const next = this.queue.shift();
1608
+ if (next) {
1609
+ next();
1610
+ } else {
1611
+ this.current--;
1612
+ }
1613
+ }
1614
+ /** Number of slots currently in use */
1615
+ get active() {
1616
+ return this.current;
1617
+ }
1618
+ /** Number of waiters in the queue */
1619
+ get waiting() {
1620
+ return this.queue.length;
1621
+ }
1622
+ };
361
1623
  export {
1624
+ Semaphore,
1625
+ analyzeFailure,
1626
+ authenticateSupabase,
362
1627
  createStagehandSession,
1628
+ discoverSelector,
1629
+ estimateBatchCost,
1630
+ estimateCost,
1631
+ estimateTestCost,
1632
+ evaluateStep,
1633
+ executeAction,
1634
+ generateExplorationReport,
363
1635
  generateRunSummary,
1636
+ getTokenEstimate,
364
1637
  injectAuth,
365
- runTest
1638
+ injectSupabaseAuth,
1639
+ installClickTracker,
1640
+ performSupabaseAuth,
1641
+ rollupFailureClassification,
1642
+ runExploration,
1643
+ runTest,
1644
+ suppressBugBearWidget,
1645
+ triageReport,
1646
+ verifySupabaseSession
366
1647
  };
367
1648
  //# sourceMappingURL=index.mjs.map