@bbearai/ai-executor 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-WT22IQMS.mjs +175 -0
- package/dist/chunk-WT22IQMS.mjs.map +1 -0
- package/dist/cli.js +622 -129
- package/dist/cli.js.map +1 -1
- package/dist/index.d.mts +533 -8
- package/dist/index.d.ts +533 -8
- package/dist/index.js +1613 -131
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1411 -130
- package/dist/index.mjs.map +1 -1
- package/dist/report-generator-EVZEB33O.mjs +7 -0
- package/dist/report-generator-EVZEB33O.mjs.map +1 -0
- package/package.json +5 -1
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Stagehand, Page } from '@browserbasehq/stagehand';
|
|
2
1
|
import Anthropic from '@anthropic-ai/sdk';
|
|
2
|
+
import { Stagehand, Page } from '@browserbasehq/stagehand';
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* @bbearai/ai-executor - Type definitions
|
|
@@ -50,11 +50,33 @@ interface FormLoginAuth {
|
|
|
50
50
|
/** Password to enter */
|
|
51
51
|
password: string;
|
|
52
52
|
}
|
|
53
|
-
|
|
53
|
+
interface SupabaseNativeAuth {
|
|
54
|
+
type: 'supabase-native';
|
|
55
|
+
/** Supabase project URL (e.g. https://xyz.supabase.co) */
|
|
56
|
+
supabaseUrl: string;
|
|
57
|
+
/** Supabase anon/public key (required for GoTrue REST API) */
|
|
58
|
+
anonKey: string;
|
|
59
|
+
/** Email for Supabase auth */
|
|
60
|
+
email: string;
|
|
61
|
+
/** Password for Supabase auth */
|
|
62
|
+
password: string;
|
|
63
|
+
}
|
|
64
|
+
type AuthConfig = CookieAuth | LocalStorageAuth | FormLoginAuth | SupabaseNativeAuth;
|
|
65
|
+
type StepActionType = 'click' | 'fill' | 'select' | 'navigate' | 'scroll' | 'wait' | 'assert';
|
|
54
66
|
interface TestStep {
|
|
55
67
|
stepNumber: number;
|
|
56
68
|
action: string;
|
|
57
69
|
expectedResult: string;
|
|
70
|
+
/** Deterministic action type — when set with selector, bypasses AI */
|
|
71
|
+
actionType?: StepActionType;
|
|
72
|
+
/** CSS selector or data-testid for deterministic execution */
|
|
73
|
+
selector?: string;
|
|
74
|
+
/** Value for fill/select/navigate actions */
|
|
75
|
+
value?: string;
|
|
76
|
+
/** Explicit wait after action (ms) */
|
|
77
|
+
waitMs?: number;
|
|
78
|
+
/** Hint for the vision evaluator on what to look for */
|
|
79
|
+
evaluationHint?: string;
|
|
58
80
|
}
|
|
59
81
|
interface TestCaseInput {
|
|
60
82
|
id: string;
|
|
@@ -99,6 +121,13 @@ interface NetworkError {
|
|
|
99
121
|
/** Timestamp relative to step start (ms) */
|
|
100
122
|
timestamp: number;
|
|
101
123
|
}
|
|
124
|
+
/** A single retry attempt record */
|
|
125
|
+
interface RetryAttempt {
|
|
126
|
+
attempt: number;
|
|
127
|
+
error: string;
|
|
128
|
+
confidence: number;
|
|
129
|
+
timestamp: number;
|
|
130
|
+
}
|
|
102
131
|
interface StepResult {
|
|
103
132
|
stepNumber: number;
|
|
104
133
|
action: string;
|
|
@@ -121,6 +150,20 @@ interface StepResult {
|
|
|
121
150
|
consoleLogs: ConsoleEntry[];
|
|
122
151
|
/** Failed/errored network requests during this step */
|
|
123
152
|
networkErrors: NetworkError[];
|
|
153
|
+
/** Number of retry attempts (0 = succeeded on first try) */
|
|
154
|
+
retryCount: number;
|
|
155
|
+
/** History of failed retry attempts before the final result */
|
|
156
|
+
retryHistory: RetryAttempt[];
|
|
157
|
+
/** Whether this step was skipped in resilient mode after failing */
|
|
158
|
+
skipped: boolean;
|
|
159
|
+
/** Reason the step was skipped */
|
|
160
|
+
skipReason?: string;
|
|
161
|
+
}
|
|
162
|
+
interface RetryConfig {
|
|
163
|
+
/** Maximum retry attempts per step (default: 2) */
|
|
164
|
+
maxRetries?: number;
|
|
165
|
+
/** Delay between retries in ms (default: 2000) */
|
|
166
|
+
retryDelayMs?: number;
|
|
124
167
|
}
|
|
125
168
|
interface TestRunConfig {
|
|
126
169
|
/** Base URL of the application under test */
|
|
@@ -141,13 +184,17 @@ interface TestRunConfig {
|
|
|
141
184
|
anonKey: string;
|
|
142
185
|
projectId: string;
|
|
143
186
|
};
|
|
187
|
+
/** Retry configuration for transient failures */
|
|
188
|
+
retry?: RetryConfig;
|
|
189
|
+
/** Enable skip-and-recover mode: failed steps are skipped and page state is recovered (default: true) */
|
|
190
|
+
resilientMode?: boolean;
|
|
144
191
|
/** Callback for real-time progress updates */
|
|
145
192
|
onStepComplete?: (result: StepResult, index: number, total: number) => void;
|
|
146
193
|
/** Callback for status changes */
|
|
147
194
|
onStatusChange?: (status: TestRunStatus) => void;
|
|
148
195
|
}
|
|
149
196
|
type TestRunStatus = 'initializing' | 'navigating' | 'authenticating' | 'executing' | 'evaluating' | 'completed' | 'error';
|
|
150
|
-
type OverallResult = 'passed' | 'failed' | 'error' | 'partial';
|
|
197
|
+
type OverallResult = 'passed' | 'failed' | 'error' | 'partial' | 'passed_with_skips';
|
|
151
198
|
interface TestRunResult {
|
|
152
199
|
testCaseId: string;
|
|
153
200
|
testCaseTitle: string;
|
|
@@ -166,15 +213,102 @@ interface TestRunResult {
|
|
|
166
213
|
/** Browserbase session ID (if applicable) */
|
|
167
214
|
browserSessionId?: string;
|
|
168
215
|
}
|
|
216
|
+
type FindingCategory = 'console_error' | 'broken_interaction' | 'visual_anomaly' | 'input_handling';
|
|
217
|
+
type FindingSeverity = 'critical' | 'high' | 'medium' | 'low';
|
|
218
|
+
interface ExplorationConfig {
|
|
219
|
+
targetUrl: string;
|
|
220
|
+
featureDescription: string;
|
|
221
|
+
actionBudget: number;
|
|
222
|
+
auth?: AuthConfig;
|
|
223
|
+
browserConfig: BrowserConfig;
|
|
224
|
+
anthropicApiKey: string;
|
|
225
|
+
model?: string;
|
|
226
|
+
onActionComplete?: (action: ExplorationAction, index: number) => void;
|
|
227
|
+
}
|
|
228
|
+
interface ExplorationAction {
|
|
229
|
+
actionNumber: number;
|
|
230
|
+
action: string;
|
|
231
|
+
category: FindingCategory | 'normal';
|
|
232
|
+
severity?: FindingSeverity;
|
|
233
|
+
confidence: number;
|
|
234
|
+
description: string;
|
|
235
|
+
screenshotBefore: Buffer;
|
|
236
|
+
screenshotAfter: Buffer;
|
|
237
|
+
networkRequests: CapturedRequest[];
|
|
238
|
+
consoleLogs: ConsoleEntry[];
|
|
239
|
+
domContext?: DomContext;
|
|
240
|
+
durationMs: number;
|
|
241
|
+
}
|
|
242
|
+
interface CapturedRequest {
|
|
243
|
+
method: string;
|
|
244
|
+
url: string;
|
|
245
|
+
status: number;
|
|
246
|
+
responseBody?: string;
|
|
247
|
+
requestBody?: string;
|
|
248
|
+
timestamp: string;
|
|
249
|
+
}
|
|
250
|
+
interface DomContext {
|
|
251
|
+
selector: string;
|
|
252
|
+
elementText: string;
|
|
253
|
+
nearbyText: string;
|
|
254
|
+
}
|
|
255
|
+
interface ActionableFinding {
|
|
256
|
+
title: string;
|
|
257
|
+
category: FindingCategory;
|
|
258
|
+
severity: FindingSeverity;
|
|
259
|
+
confidence: number;
|
|
260
|
+
networkRequests: CapturedRequest[];
|
|
261
|
+
consoleErrors: ConsoleEntry[];
|
|
262
|
+
domContext?: DomContext;
|
|
263
|
+
url: string;
|
|
264
|
+
route: string;
|
|
265
|
+
reproSteps: string[];
|
|
266
|
+
screenshotUrl: string;
|
|
267
|
+
actionPerformed: string;
|
|
268
|
+
expectedBehavior: string;
|
|
269
|
+
actualBehavior: string;
|
|
270
|
+
}
|
|
271
|
+
interface ExplorationReport {
|
|
272
|
+
projectName: string;
|
|
273
|
+
featureDescription: string;
|
|
274
|
+
targetUrl: string;
|
|
275
|
+
exploredAt: string;
|
|
276
|
+
duration: string;
|
|
277
|
+
actionsUsed: number;
|
|
278
|
+
actionBudget: number;
|
|
279
|
+
findings: ActionableFinding[];
|
|
280
|
+
tested: {
|
|
281
|
+
description: string;
|
|
282
|
+
route: string;
|
|
283
|
+
status: 'passed';
|
|
284
|
+
}[];
|
|
285
|
+
notTested: {
|
|
286
|
+
description: string;
|
|
287
|
+
reason: string;
|
|
288
|
+
}[];
|
|
289
|
+
summary: string;
|
|
290
|
+
suggestedPrompt: string;
|
|
291
|
+
}
|
|
292
|
+
interface ExplorationResult {
|
|
293
|
+
overallResult: 'clean' | 'findings' | 'error';
|
|
294
|
+
actions: ExplorationAction[];
|
|
295
|
+
report: ExplorationReport;
|
|
296
|
+
totalDurationMs: number;
|
|
297
|
+
tokenUsage: {
|
|
298
|
+
inputTokens: number;
|
|
299
|
+
outputTokens: number;
|
|
300
|
+
};
|
|
301
|
+
browserSessionId?: string;
|
|
302
|
+
}
|
|
169
303
|
|
|
170
304
|
/**
|
|
171
305
|
* Test Runner
|
|
172
306
|
*
|
|
173
|
-
* Orchestrates the full test execution lifecycle
|
|
307
|
+
* Orchestrates the full test execution lifecycle:
|
|
174
308
|
* 1. Launch Stagehand browser session
|
|
175
309
|
* 2. Navigate to target URL
|
|
176
|
-
* 3. Inject authentication
|
|
177
|
-
* 4. For each step: act() → screenshot →
|
|
310
|
+
* 3. Inject authentication (supports supabase-native, cookie, localStorage, form-login)
|
|
311
|
+
* 4. For each step: act() → screenshot → vision evaluate → record
|
|
178
312
|
* 5. Generate summary
|
|
179
313
|
* 6. Return structured results
|
|
180
314
|
*/
|
|
@@ -184,6 +318,40 @@ interface TestRunResult {
|
|
|
184
318
|
*/
|
|
185
319
|
declare function runTest(config: TestRunConfig): Promise<TestRunResult>;
|
|
186
320
|
|
|
321
|
+
/**
|
|
322
|
+
* Exploratory Testing Runner
|
|
323
|
+
*
|
|
324
|
+
* Implements the observe->act->evaluate loop for autonomous
|
|
325
|
+
* feature exploration. The AI navigates a feature area,
|
|
326
|
+
* tries edge cases, and reports findings.
|
|
327
|
+
*/
|
|
328
|
+
|
|
329
|
+
declare function runExploration(config: ExplorationConfig): Promise<ExplorationResult>;
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* Exploration Report Generator
|
|
333
|
+
*
|
|
334
|
+
* Transforms raw exploration actions into a structured report
|
|
335
|
+
* optimized for Claude Code consumption. The suggestedPrompt
|
|
336
|
+
* is designed to be pasted directly into Claude Code to fix issues.
|
|
337
|
+
*/
|
|
338
|
+
|
|
339
|
+
interface ReportInput {
|
|
340
|
+
projectName: string;
|
|
341
|
+
featureDescription: string;
|
|
342
|
+
targetUrl: string;
|
|
343
|
+
actions: ExplorationAction[];
|
|
344
|
+
model: string;
|
|
345
|
+
}
|
|
346
|
+
interface ReportOutput {
|
|
347
|
+
report: ExplorationReport;
|
|
348
|
+
tokenUsage: {
|
|
349
|
+
inputTokens: number;
|
|
350
|
+
outputTokens: number;
|
|
351
|
+
};
|
|
352
|
+
}
|
|
353
|
+
declare function generateExplorationReport(anthropic: Anthropic, input: ReportInput): Promise<ReportOutput>;
|
|
354
|
+
|
|
187
355
|
/**
|
|
188
356
|
* Browser Provider
|
|
189
357
|
*
|
|
@@ -204,17 +372,309 @@ interface StagehandSession {
|
|
|
204
372
|
* and manages Browserbase or local browser sessions.
|
|
205
373
|
*/
|
|
206
374
|
declare function createStagehandSession(config: BrowserConfig, anthropicApiKey: string): Promise<StagehandSession>;
|
|
375
|
+
/**
|
|
376
|
+
* Suppress the BugBear widget in the browser session.
|
|
377
|
+
*
|
|
378
|
+
* Uses Playwright's addInitScript() on the browser context to set a suppression
|
|
379
|
+
* flag before any page script runs. This prevents the widget from rendering and
|
|
380
|
+
* interfering with test execution (clicking the widget instead of app UI, popups
|
|
381
|
+
* covering elements, etc.).
|
|
382
|
+
*
|
|
383
|
+
* The flag is checked by BugBearProvider and BugBearPanel in @bbearai/react.
|
|
384
|
+
*/
|
|
385
|
+
declare function suppressBugBearWidget(stagehand: Stagehand): Promise<void>;
|
|
207
386
|
/**
|
|
208
387
|
* Inject authentication into the browser session.
|
|
209
388
|
* Uses Stagehand's Page API and CDP for cookie injection.
|
|
210
389
|
*/
|
|
211
390
|
declare function injectAuth(page: Page, auth: AuthConfig, stagehand?: Stagehand): Promise<void>;
|
|
391
|
+
interface NetworkCapture {
|
|
392
|
+
start: () => void;
|
|
393
|
+
stop: () => void;
|
|
394
|
+
getRequests: () => CapturedRequest[];
|
|
395
|
+
getErrors: () => NetworkError[];
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/**
|
|
399
|
+
* Supabase Native Auth
|
|
400
|
+
*
|
|
401
|
+
* Authenticates against Supabase GoTrue API directly, bypassing fragile
|
|
402
|
+
* form-based login. Injects the session into localStorage so the app
|
|
403
|
+
* picks it up on page load — no DOM interaction required.
|
|
404
|
+
*/
|
|
405
|
+
|
|
406
|
+
/** Supabase GoTrue session shape (subset we need) */
|
|
407
|
+
interface GoTrueSession {
|
|
408
|
+
access_token: string;
|
|
409
|
+
refresh_token: string;
|
|
410
|
+
expires_in: number;
|
|
411
|
+
expires_at: number;
|
|
412
|
+
token_type: string;
|
|
413
|
+
user: {
|
|
414
|
+
id: string;
|
|
415
|
+
email: string;
|
|
416
|
+
role: string;
|
|
417
|
+
aud: string;
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
/**
|
|
421
|
+
* Authenticate via Supabase GoTrue REST API and return the session.
|
|
422
|
+
*/
|
|
423
|
+
declare function authenticateSupabase(auth: SupabaseNativeAuth): Promise<GoTrueSession>;
|
|
424
|
+
/**
|
|
425
|
+
* Inject a Supabase session into the browser's localStorage.
|
|
426
|
+
*
|
|
427
|
+
* The app's Supabase client reads from `sb-<ref>-auth-token` on load.
|
|
428
|
+
* We inject the token into localStorage so the app authenticates on
|
|
429
|
+
* the next page load — no DOM interaction needed.
|
|
430
|
+
*/
|
|
431
|
+
declare function injectSupabaseAuth(page: Page, auth: SupabaseNativeAuth, session: GoTrueSession): Promise<void>;
|
|
432
|
+
/**
|
|
433
|
+
* Verify the session is valid by calling the Supabase user endpoint.
|
|
434
|
+
*/
|
|
435
|
+
declare function verifySupabaseSession(auth: SupabaseNativeAuth, accessToken: string): Promise<boolean>;
|
|
436
|
+
/**
|
|
437
|
+
* Full Supabase auth flow: authenticate → inject → verify.
|
|
438
|
+
*/
|
|
439
|
+
declare function performSupabaseAuth(page: Page, auth: SupabaseNativeAuth): Promise<void>;
|
|
440
|
+
|
|
441
|
+
/**
|
|
442
|
+
* Vision-Based Step Evaluator
|
|
443
|
+
*
|
|
444
|
+
* Replaces Stagehand's extract() with direct Claude Messages API calls
|
|
445
|
+
* using before/after screenshots. This gives Claude visual context
|
|
446
|
+
* instead of just DOM text, catching visual regressions, layout shifts,
|
|
447
|
+
* and rendering issues that DOM-only evaluation misses.
|
|
448
|
+
*/
|
|
449
|
+
|
|
450
|
+
interface StepEvaluationInput {
|
|
451
|
+
anthropic: Anthropic;
|
|
452
|
+
screenshotBefore: Buffer;
|
|
453
|
+
screenshotAfter: Buffer;
|
|
454
|
+
action: string;
|
|
455
|
+
expectedResult: string;
|
|
456
|
+
/** Optional hint to guide what the evaluator should look for */
|
|
457
|
+
evaluationHint?: string;
|
|
458
|
+
model?: string;
|
|
459
|
+
}
|
|
460
|
+
interface StepEvaluation {
|
|
461
|
+
passed: boolean;
|
|
462
|
+
confidence: number;
|
|
463
|
+
actualResult: string;
|
|
464
|
+
}
|
|
465
|
+
/**
|
|
466
|
+
* Evaluate a test step by comparing before/after screenshots using Claude Vision.
|
|
467
|
+
*
|
|
468
|
+
* Sends both screenshots as image content blocks along with a structured
|
|
469
|
+
* evaluation prompt. Returns a typed assessment with pass/fail, confidence,
|
|
470
|
+
* and a description of what actually happened.
|
|
471
|
+
*/
|
|
472
|
+
declare function evaluateStep(input: StepEvaluationInput): Promise<StepEvaluation>;
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* Action Executor
|
|
476
|
+
*
|
|
477
|
+
* Executes test step actions using a tiered approach:
|
|
478
|
+
* 1. If step has selector + actionType → Playwright direct (deterministic, fast)
|
|
479
|
+
* 2. If step has only natural language → Stagehand AI fallback
|
|
480
|
+
* 3. If Playwright action fails → Fall back to Stagehand with the natural language
|
|
481
|
+
*
|
|
482
|
+
* This eliminates AI flakiness for steps that have been enriched with
|
|
483
|
+
* selectors while preserving AI flexibility for natural-language-only steps.
|
|
484
|
+
*/
|
|
485
|
+
|
|
486
|
+
interface ActionResult {
|
|
487
|
+
/** Whether the action was executed via Playwright (true) or Stagehand AI (false) */
|
|
488
|
+
deterministic: boolean;
|
|
489
|
+
/** Error message if action failed */
|
|
490
|
+
error?: string;
|
|
491
|
+
}
|
|
492
|
+
/**
|
|
493
|
+
* Execute a test step action, preferring deterministic Playwright
|
|
494
|
+
* when the step has a selector, falling back to Stagehand AI otherwise.
|
|
495
|
+
*/
|
|
496
|
+
declare function executeAction(page: Page, stagehand: Stagehand, step: TestStep): Promise<ActionResult>;
|
|
497
|
+
|
|
498
|
+
/**
|
|
499
|
+
* Selector Discovery
|
|
500
|
+
*
|
|
501
|
+
* After Stagehand successfully executes a natural-language step,
|
|
502
|
+
* attempts to discover which element was interacted with. Records
|
|
503
|
+
* the best available selector so the test case can be enriched
|
|
504
|
+
* for deterministic execution next time.
|
|
505
|
+
*
|
|
506
|
+
* Discovery data is stored in ai_step_results.actions_taken (JSONB).
|
|
507
|
+
*/
|
|
508
|
+
|
|
509
|
+
interface DiscoveredSelector {
|
|
510
|
+
/** The selector that was discovered */
|
|
511
|
+
selector: string;
|
|
512
|
+
/** How the selector was derived */
|
|
513
|
+
strategy: 'data-testid' | 'role' | 'aria-label' | 'id' | 'css-path';
|
|
514
|
+
/** Suggested actionType based on the element */
|
|
515
|
+
suggestedActionType?: StepActionType;
|
|
516
|
+
/** Element tag name */
|
|
517
|
+
tagName: string;
|
|
518
|
+
/** Visible text content (truncated) */
|
|
519
|
+
textContent?: string;
|
|
520
|
+
}
|
|
521
|
+
/**
|
|
522
|
+
* Attempt to discover the selector for the last-interacted element.
|
|
523
|
+
*
|
|
524
|
+
* Uses page.evaluate() to find the currently focused or last-clicked
|
|
525
|
+
* element and extract the best available selector for it.
|
|
526
|
+
*
|
|
527
|
+
* Returns null if no element can be identified.
|
|
528
|
+
*/
|
|
529
|
+
declare function discoverSelector(page: Page): Promise<DiscoveredSelector | null>;
|
|
530
|
+
/**
|
|
531
|
+
* Install a click tracker on the page.
|
|
532
|
+
*
|
|
533
|
+
* Records the last-clicked element in `document.__bbLastClicked`
|
|
534
|
+
* so discoverSelector() can find it after Stagehand's act().
|
|
535
|
+
* Should be called once after page navigation.
|
|
536
|
+
*/
|
|
537
|
+
declare function installClickTracker(page: Page): Promise<void>;
|
|
538
|
+
|
|
539
|
+
/**
|
|
540
|
+
* Report Auto-Triager
|
|
541
|
+
*
|
|
542
|
+
* Uses Claude to analyze incoming bug reports and auto-assign:
|
|
543
|
+
* - Severity (critical/high/medium/low)
|
|
544
|
+
* - Category (ui_ux/functional/crash/security/other)
|
|
545
|
+
* - Duplicate detection against recent reports
|
|
546
|
+
* - Root cause analysis
|
|
547
|
+
*
|
|
548
|
+
* Results are stored in reports.ai_analysis (JSONB).
|
|
549
|
+
*/
|
|
550
|
+
|
|
551
|
+
interface TriageReportInput {
|
|
552
|
+
title?: string | null;
|
|
553
|
+
description: string;
|
|
554
|
+
app_context?: Record<string, unknown> | null;
|
|
555
|
+
enhanced_context?: Record<string, unknown> | null;
|
|
556
|
+
device_info?: Record<string, unknown> | null;
|
|
557
|
+
navigation_history?: unknown[] | null;
|
|
558
|
+
screenshot_urls?: string[] | null;
|
|
559
|
+
error_fingerprint?: string | null;
|
|
560
|
+
report_source?: string | null;
|
|
561
|
+
}
|
|
562
|
+
interface RecentReportSummary {
|
|
563
|
+
id: string;
|
|
564
|
+
title?: string | null;
|
|
565
|
+
description: string;
|
|
566
|
+
error_fingerprint?: string | null;
|
|
567
|
+
severity?: string | null;
|
|
568
|
+
category?: string | null;
|
|
569
|
+
status: string;
|
|
570
|
+
}
|
|
571
|
+
interface TriageInput {
|
|
572
|
+
anthropic: Anthropic;
|
|
573
|
+
report: TriageReportInput;
|
|
574
|
+
recentReports: RecentReportSummary[];
|
|
575
|
+
model?: string;
|
|
576
|
+
}
|
|
577
|
+
type TriageSeverity = 'critical' | 'high' | 'medium' | 'low';
|
|
578
|
+
type TriageCategory = 'ui_ux' | 'functional' | 'crash' | 'security' | 'other';
|
|
579
|
+
interface TriageResult {
|
|
580
|
+
suggested_severity: TriageSeverity;
|
|
581
|
+
severity_confidence: number;
|
|
582
|
+
suggested_category: TriageCategory;
|
|
583
|
+
category_confidence: number;
|
|
584
|
+
root_cause_analysis: string;
|
|
585
|
+
duplicate_of: string | null;
|
|
586
|
+
duplicate_confidence: number;
|
|
587
|
+
triage_notes: string;
|
|
588
|
+
}
|
|
589
|
+
/**
|
|
590
|
+
* Analyze a report using Claude and return triage suggestions.
|
|
591
|
+
*/
|
|
592
|
+
declare function triageReport(input: TriageInput): Promise<TriageResult>;
|
|
593
|
+
|
|
594
|
+
/**
|
|
595
|
+
* Failure Analyzer
|
|
596
|
+
*
|
|
597
|
+
* When an AI test step fails, analyzes the failure to classify it as:
|
|
598
|
+
* - real_bug: Actual application defect (API error, broken feature, crash)
|
|
599
|
+
* - test_maintenance: Test is stale (selector changed, page restructured)
|
|
600
|
+
* - flaky: Timing issue, intermittent network failure, race condition
|
|
601
|
+
* - unknown: Can't determine with sufficient confidence
|
|
602
|
+
*
|
|
603
|
+
* For test_maintenance failures, suggests corrected selectors/actions
|
|
604
|
+
* that can be auto-applied to heal the test case.
|
|
605
|
+
*/
|
|
606
|
+
|
|
607
|
+
type FailureClassification = 'real_bug' | 'test_maintenance' | 'ai_limitation' | 'flaky' | 'unknown';
|
|
608
|
+
/** Run-level classification (aggregated from step-level classifications) */
|
|
609
|
+
type RunFailureClassification = 'bug' | 'test_issue' | 'ai_limitation' | 'flaky' | 'unknown';
|
|
610
|
+
interface FailureAnalysis {
|
|
611
|
+
classification: FailureClassification;
|
|
612
|
+
confidence: number;
|
|
613
|
+
reasoning: string;
|
|
614
|
+
suggested_fix?: {
|
|
615
|
+
stepNumber: number;
|
|
616
|
+
original_action: string;
|
|
617
|
+
corrected_action?: string;
|
|
618
|
+
corrected_selector?: string;
|
|
619
|
+
corrected_actionType?: string;
|
|
620
|
+
corrected_value?: string;
|
|
621
|
+
};
|
|
622
|
+
}
|
|
623
|
+
interface FailureAnalysisInput {
|
|
624
|
+
anthropic: Anthropic;
|
|
625
|
+
step: {
|
|
626
|
+
stepNumber: number;
|
|
627
|
+
action: string;
|
|
628
|
+
expectedResult: string;
|
|
629
|
+
selector?: string;
|
|
630
|
+
actionType?: string;
|
|
631
|
+
value?: string;
|
|
632
|
+
};
|
|
633
|
+
result: {
|
|
634
|
+
actualResult: string;
|
|
635
|
+
error?: string;
|
|
636
|
+
screenshotBefore: Buffer;
|
|
637
|
+
screenshotAfter: Buffer;
|
|
638
|
+
};
|
|
639
|
+
discoveredSelector?: {
|
|
640
|
+
selector: string;
|
|
641
|
+
strategy: string;
|
|
642
|
+
tagName?: string;
|
|
643
|
+
textContent?: string;
|
|
644
|
+
};
|
|
645
|
+
consoleLogs?: Array<{
|
|
646
|
+
level: string;
|
|
647
|
+
text: string;
|
|
648
|
+
}>;
|
|
649
|
+
networkErrors?: Array<{
|
|
650
|
+
method: string;
|
|
651
|
+
url: string;
|
|
652
|
+
status: number;
|
|
653
|
+
statusText: string;
|
|
654
|
+
}>;
|
|
655
|
+
model?: string;
|
|
656
|
+
}
|
|
657
|
+
/**
|
|
658
|
+
* Analyze a failed test step to classify the failure and suggest fixes.
|
|
659
|
+
*/
|
|
660
|
+
declare function analyzeFailure(input: FailureAnalysisInput): Promise<FailureAnalysis>;
|
|
661
|
+
/**
|
|
662
|
+
* Roll up step-level failure classifications into a single run-level classification.
|
|
663
|
+
*
|
|
664
|
+
* Priority:
|
|
665
|
+
* 1. ANY step = real_bug → 'bug'
|
|
666
|
+
* 2. ALL steps = ai_limitation → 'ai_limitation'
|
|
667
|
+
* 3. ALL steps = test_maintenance → 'test_issue'
|
|
668
|
+
* 4. ALL steps = flaky → 'flaky'
|
|
669
|
+
* 5. Otherwise → most common classification (mapped to run-level)
|
|
670
|
+
*/
|
|
671
|
+
declare function rollupFailureClassification(stepClassifications: FailureClassification[]): RunFailureClassification;
|
|
212
672
|
|
|
213
673
|
/**
|
|
214
674
|
* Result Evaluator
|
|
215
675
|
*
|
|
216
676
|
* Generates AI summaries of test run results.
|
|
217
|
-
* Step-level evaluation is
|
|
677
|
+
* Step-level evaluation is handled by vision-evaluator.ts (Claude Vision).
|
|
218
678
|
*/
|
|
219
679
|
|
|
220
680
|
/**
|
|
@@ -228,6 +688,71 @@ declare function generateRunSummary(anthropic: Anthropic, testTitle: string, ste
|
|
|
228
688
|
passed: boolean;
|
|
229
689
|
confidence: number;
|
|
230
690
|
error?: string;
|
|
691
|
+
skipped?: boolean;
|
|
231
692
|
}>, model: string): Promise<string>;
|
|
232
693
|
|
|
233
|
-
|
|
694
|
+
/**
|
|
695
|
+
* Simple counting semaphore for controlling concurrent browser sessions.
|
|
696
|
+
*
|
|
697
|
+
* Usage:
|
|
698
|
+
* const sem = new Semaphore(3);
|
|
699
|
+
* await sem.acquire();
|
|
700
|
+
* try { ... } finally { sem.release(); }
|
|
701
|
+
*/
|
|
702
|
+
declare class Semaphore {
|
|
703
|
+
private readonly max;
|
|
704
|
+
private current;
|
|
705
|
+
private queue;
|
|
706
|
+
constructor(max: number);
|
|
707
|
+
acquire(): Promise<void>;
|
|
708
|
+
release(): void;
|
|
709
|
+
/** Number of slots currently in use */
|
|
710
|
+
get active(): number;
|
|
711
|
+
/** Number of waiters in the queue */
|
|
712
|
+
get waiting(): number;
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
/**
|
|
716
|
+
* AI Test Execution Cost Estimation
|
|
717
|
+
*
|
|
718
|
+
* Provides pre-run cost estimates and post-run cost calculations
|
|
719
|
+
* based on per-model token pricing and calibrated usage profiles.
|
|
720
|
+
*/
|
|
721
|
+
interface CostEstimate {
|
|
722
|
+
/** Cost in cents (USD) */
|
|
723
|
+
cents: number;
|
|
724
|
+
/** Formatted string (e.g., "$0.12") */
|
|
725
|
+
formatted: string;
|
|
726
|
+
/** Token breakdown */
|
|
727
|
+
tokens: {
|
|
728
|
+
inputTokens: number;
|
|
729
|
+
outputTokens: number;
|
|
730
|
+
};
|
|
731
|
+
/** Model used for estimate */
|
|
732
|
+
model: string;
|
|
733
|
+
}
|
|
734
|
+
/**
|
|
735
|
+
* Calculate actual cost from known token counts.
|
|
736
|
+
*/
|
|
737
|
+
declare function estimateCost(inputTokens: number, outputTokens: number, model?: string): CostEstimate;
|
|
738
|
+
/**
|
|
739
|
+
* Pre-run cost estimate based on step count.
|
|
740
|
+
* Each step involves: act() + extract(). Plus one summary at the end.
|
|
741
|
+
*/
|
|
742
|
+
declare function estimateTestCost(stepCount: number, model?: string): CostEstimate;
|
|
743
|
+
/**
|
|
744
|
+
* Estimate cost for a batch of test cases.
|
|
745
|
+
*/
|
|
746
|
+
declare function estimateBatchCost(testCases: Array<{
|
|
747
|
+
stepCount: number;
|
|
748
|
+
}>, model?: string): CostEstimate;
|
|
749
|
+
/**
|
|
750
|
+
* Get calibrated token estimates for a test with N steps.
|
|
751
|
+
* More accurate than the old hardcoded 3000/500 per step.
|
|
752
|
+
*/
|
|
753
|
+
declare function getTokenEstimate(stepCount: number): {
|
|
754
|
+
inputTokens: number;
|
|
755
|
+
outputTokens: number;
|
|
756
|
+
};
|
|
757
|
+
|
|
758
|
+
export { type ActionResult, type ActionableFinding, type AuthConfig, type BrowserConfig, type BrowserProvider, type CapturedRequest, type ConsoleEntry, type CookieAuth, type CostEstimate, type DiscoveredSelector, type DomContext, type ExplorationAction, type ExplorationConfig, type ExplorationReport, type ExplorationResult, type FailureAnalysis, type FailureAnalysisInput, type FailureClassification, type FindingCategory, type FindingSeverity, type FormLoginAuth, type LocalStorageAuth, type NetworkCapture, type NetworkError, type OverallResult, type RecentReportSummary, type RetryAttempt, type RetryConfig, type RunFailureClassification, Semaphore, type StagehandSession, type StepAction, type StepActionType, type StepEvaluation, type StepEvaluationInput, type StepResult, type SupabaseNativeAuth, type TestCaseInput, type TestRunConfig, type TestRunResult, type TestRunStatus, type TestStep, type TriageCategory, type TriageInput, type TriageReportInput, type TriageResult, type TriageSeverity, analyzeFailure, authenticateSupabase, createStagehandSession, discoverSelector, estimateBatchCost, estimateCost, estimateTestCost, evaluateStep, executeAction, generateExplorationReport, generateRunSummary, getTokenEstimate, injectAuth, injectSupabaseAuth, installClickTracker, performSupabaseAuth, rollupFailureClassification, runExploration, runTest, suppressBugBearWidget, triageReport, verifySupabaseSession };
|