autokap 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/dist/cli-config.d.ts +13 -0
  2. package/dist/cli-config.js +42 -0
  3. package/dist/cli-utils.d.ts +0 -19
  4. package/dist/cli-utils.js +2 -65
  5. package/dist/cli.d.ts +0 -1
  6. package/dist/cli.js +266 -305
  7. package/package.json +23 -16
  8. package/assets/chrome/ios-statusbar-comparison-reference.jpg +0 -0
  9. package/assets/chrome/ios-statusbar-dark-reference.jpg +0 -0
  10. package/assets/chrome/ios-statusbar-light-reference.jpg +0 -0
  11. package/assets/devices/ipad-pro-11-m4.json +0 -52
  12. package/assets/devices/iphone-16-pro.json +0 -53
  13. package/assets/devices/macbook-air-13.json +0 -45
  14. package/assets/frames/MacBook Air 13.svg +0 -242
  15. package/assets/frames/Status bar - iPhone.png +0 -0
  16. package/assets/frames/Status bar and Menu bar- iPad.png +0 -0
  17. package/assets/frames/iPad Pro M4 11_.png +0 -0
  18. package/assets/frames/iPhone 16 Pro.png +0 -0
  19. package/assets/icons/Cellular Connection.svg +0 -3
  20. package/assets/icons/Union.svg +0 -6
  21. package/assets/icons/Wifi.svg +0 -3
  22. package/assets/icons/battery.svg +0 -5
  23. package/assets/icons/battery_charging.svg +0 -8
  24. package/dist/abort.d.ts +0 -5
  25. package/dist/abort.js +0 -44
  26. package/dist/agent.d.ts +0 -142
  27. package/dist/agent.js +0 -4504
  28. package/dist/browser-bar.d.ts +0 -40
  29. package/dist/browser-bar.js +0 -147
  30. package/dist/clip-orchestrator.d.ts +0 -148
  31. package/dist/clip-orchestrator.js +0 -950
  32. package/dist/clip-postprocess.d.ts +0 -42
  33. package/dist/clip-postprocess.js +0 -192
  34. package/dist/credential-templates.d.ts +0 -5
  35. package/dist/credential-templates.js +0 -60
  36. package/dist/element-capture.d.ts +0 -53
  37. package/dist/element-capture.js +0 -766
  38. package/dist/hybrid-navigator.d.ts +0 -138
  39. package/dist/hybrid-navigator.js +0 -468
  40. package/dist/index.d.ts +0 -15
  41. package/dist/index.js +0 -11
  42. package/dist/llm-usage.d.ts +0 -17
  43. package/dist/llm-usage.js +0 -45
  44. package/dist/mockup-html.d.ts +0 -119
  45. package/dist/mockup-html.js +0 -253
  46. package/dist/mockup.d.ts +0 -94
  47. package/dist/mockup.js +0 -604
  48. package/dist/mouse-animation.d.ts +0 -46
  49. package/dist/mouse-animation.js +0 -100
  50. package/dist/overlay-utils.d.ts +0 -14
  51. package/dist/overlay-utils.js +0 -13
  52. package/dist/posthog.d.ts +0 -4
  53. package/dist/posthog.js +0 -26
  54. package/dist/prompt-cache.d.ts +0 -10
  55. package/dist/prompt-cache.js +0 -24
  56. package/dist/prompts.d.ts +0 -167
  57. package/dist/prompts.js +0 -1165
  58. package/dist/security.d.ts +0 -20
  59. package/dist/security.js +0 -569
  60. package/dist/session-profile.d.ts +0 -86
  61. package/dist/session-profile.js +0 -1471
  62. package/dist/sf-pro-fonts.d.ts +0 -4
  63. package/dist/sf-pro-fonts.js +0 -7
  64. package/dist/status-bar-l10n.d.ts +0 -14
  65. package/dist/status-bar-l10n.js +0 -177
  66. package/dist/status-bar.d.ts +0 -44
  67. package/dist/status-bar.js +0 -336
  68. package/dist/tools.d.ts +0 -4
  69. package/dist/tools.js +0 -578
  70. package/dist/video-agent.d.ts +0 -143
  71. package/dist/video-agent.js +0 -4783
  72. package/dist/video-observation.d.ts +0 -36
  73. package/dist/video-observation.js +0 -192
  74. package/dist/video-planner.d.ts +0 -12
  75. package/dist/video-planner.js +0 -500
  76. package/dist/video-prompts.d.ts +0 -37
  77. package/dist/video-prompts.js +0 -554
  78. package/dist/video-tools.d.ts +0 -3
  79. package/dist/video-tools.js +0 -59
  80. package/dist/video-variant-state.d.ts +0 -29
  81. package/dist/video-variant-state.js +0 -80
  82. package/dist/vision-model.d.ts +0 -17
  83. package/dist/vision-model.js +0 -74
@@ -1,4783 +0,0 @@
1
- import OpenAI from 'openai';
2
- import * as fs from 'fs';
3
- import * as path from 'path';
4
- import * as os from 'os';
5
- import { Browser } from './browser.js';
6
- import { analyzeReplayCandidate, executeAction, resolveReplayActionArgs, runAgent } from './agent.js';
7
- import { observePlanningContext, planFromScript } from './video-planner.js';
8
- import { videoVerificationTools } from './video-tools.js';
9
- import { buildCursorOverlayScript, buildVideoPromptContentParts, buildVideoVerificationSystemPrompt, buildVideoStepVerificationUserMessage, buildStepFixerSystemPrompt, buildStepFixerUserMessage, } from './video-prompts.js';
10
- import { buildVideoObservationSnapshot, captureVideoObservationSummary } from './video-observation.js';
11
- import { buildStepUsageFromSnapshot, extractLlmUsageSnapshot, normalizeLlmUsageSnapshot, } from './llm-usage.js';
12
- import { detectVariantStateDeterministic, evaluateRequestedThemeState, scoreLocaleSignals, } from './video-variant-state.js';
13
- import { animatedClick, animatedHover, humanType, moveMouse } from './mouse-animation.js';
14
- import { dismissOverlaysWithLogging } from './overlay-utils.js';
15
- import { logger } from './logger.js';
16
- import { evaluateActionSecurity, evaluateResolvedActionSecurity } from './security.js';
17
- import { createAbortError, getAbortMessage, isAbortError, throwIfAborted } from './abort.js';
18
- const VIDEO_AGENT_CACHE_LAYOUT_V2 = process.env.VIDEO_AGENT_CACHE_LAYOUT_V2 === '1';
19
- function createClient(apiKey) {
20
- return new OpenAI({
21
- baseURL: 'https://openrouter.ai/api/v1',
22
- apiKey,
23
- defaultHeaders: {
24
- 'HTTP-Referer': 'https://github.com/screenshot-agent',
25
- 'X-Title': 'Screenshot Agent',
26
- },
27
- });
28
- }
29
- // ── Helpers ──────────────────────────────────────────────────────────
30
- function log(msg, level = 'info', cb) {
31
- const entry = { level, message: msg, timestamp: Date.now() };
32
- if (level === 'error')
33
- logger.error(msg);
34
- else if (level === 'success')
35
- logger.success(msg);
36
- else if (level === 'ai')
37
- logger.ai(msg);
38
- else
39
- logger.info(msg);
40
- cb?.(entry);
41
- }
42
- function attachAbortToBrowser(signal, browser) {
43
- if (!signal) {
44
- return () => { };
45
- }
46
- const onAbort = () => {
47
- void browser.close().catch(() => {
48
- // Browser may already be closed.
49
- });
50
- };
51
- if (signal.aborted) {
52
- onAbort();
53
- return () => { };
54
- }
55
- signal.addEventListener('abort', onAbort, { once: true });
56
- return () => {
57
- signal.removeEventListener('abort', onAbort);
58
- };
59
- }
60
- function createDryRunSubphaseSignal(params) {
61
- if (!params.parentSignal && params.timeoutMs === null) {
62
- return { signal: undefined, cleanup: () => { } };
63
- }
64
- const controller = new AbortController();
65
- let timeout = null;
66
- const abort = (reason) => {
67
- if (controller.signal.aborted)
68
- return;
69
- if (reason instanceof Error) {
70
- controller.abort(reason);
71
- return;
72
- }
73
- if (typeof reason === 'string' && reason.trim().length > 0) {
74
- controller.abort(createAbortError(reason));
75
- return;
76
- }
77
- controller.abort(createAbortError('Video dry-run cancelled.'));
78
- };
79
- const onParentAbort = () => {
80
- abort(params.parentSignal?.reason ?? getAbortMessage(params.parentSignal, 'Video dry-run cancelled.'));
81
- };
82
- if (params.parentSignal) {
83
- if (params.parentSignal.aborted) {
84
- onParentAbort();
85
- }
86
- else {
87
- params.parentSignal.addEventListener('abort', onParentAbort, { once: true });
88
- }
89
- }
90
- if (params.timeoutMs !== null) {
91
- timeout = setTimeout(() => {
92
- abort(createAbortError(`Dry-run step ${params.stepIndex + 1} ${params.subphase} timed out after ${params.timeoutMs}ms.`));
93
- }, params.timeoutMs);
94
- }
95
- return {
96
- signal: controller.signal,
97
- cleanup: () => {
98
- if (timeout)
99
- clearTimeout(timeout);
100
- if (params.parentSignal) {
101
- params.parentSignal.removeEventListener('abort', onParentAbort);
102
- }
103
- },
104
- };
105
- }
106
- function getAbortAwareErrorMessage(signal, error, fallback) {
107
- if (signal?.aborted) {
108
- return getAbortMessage(signal, fallback);
109
- }
110
- if (error instanceof Error && error.message) {
111
- return error.message;
112
- }
113
- return fallback;
114
- }
115
- function getRemainingPhaseBudgetMs(config) {
116
- if (!config.internalPhaseStartedAt || !config.internalPhaseTimeoutMs) {
117
- return null;
118
- }
119
- return Math.max(0, config.internalPhaseTimeoutMs - (Date.now() - config.internalPhaseStartedAt));
120
- }
121
- function summarizeVideoTarget(target) {
122
- if (!target)
123
- return '';
124
- const parts = [
125
- target.label ? `label=${target.label}` : '',
126
- target.href ? `href=${target.href}` : '',
127
- target.role ? `role=${target.role}` : '',
128
- target.tag ? `tag=${target.tag}` : '',
129
- target.selector ? `selector=${target.selector}` : '',
130
- typeof target.index === 'number' ? `index=${target.index}` : '',
131
- ].filter(Boolean);
132
- return parts.join(', ');
133
- }
134
- function summarizeStepTarget(step) {
135
- return summarizeVideoTarget(step.target)
136
- || step.selector
137
- || (step.coordinates ? `coords=${step.coordinates.x},${step.coordinates.y}` : '')
138
- || step.description;
139
- }
140
- async function runDryRunSubphase(params) {
141
- const remaining = getRemainingPhaseBudgetMs(params.config);
142
- const minBudgetMs = Math.max(1200, params.minBudgetMs ?? 1500);
143
- log(`[Dry-run] stepStart step=${params.stepIndex + 1} subphase=${params.subphase} remainingBudgetMs=${remaining ?? 'unbounded'}`, 'info', params.callbacks.onLog);
144
- if (remaining !== null && remaining < minBudgetMs) {
145
- throw new Error(`Dry-run budget exhausted before step ${params.stepIndex + 1} ${params.subphase} (remaining ${remaining}ms).`);
146
- }
147
- const localTimeout = remaining === null
148
- ? null
149
- : Math.max(800, Math.min(remaining - 250, params.subphase === 'repair_lane'
150
- ? 40_000
151
- : params.subphase === 'prepared_variant'
152
- ? 60_000
153
- : params.subphase === 'verification_llm' || params.subphase === 'llm_fixer'
154
- ? 30_000
155
- : 20_000));
156
- const fallbackMessage = localTimeout === null
157
- ? `Dry-run step ${params.stepIndex + 1} ${params.subphase} cancelled.`
158
- : `Dry-run step ${params.stepIndex + 1} ${params.subphase} timed out after ${localTimeout}ms.`;
159
- const { signal, cleanup } = createDryRunSubphaseSignal({
160
- parentSignal: params.config.abortSignal,
161
- timeoutMs: localTimeout,
162
- stepIndex: params.stepIndex,
163
- subphase: params.subphase,
164
- });
165
- try {
166
- const result = await params.run(localTimeout, signal);
167
- log(`[Dry-run] ${params.subphase}End step=${params.stepIndex + 1} remainingBudgetMs=${getRemainingPhaseBudgetMs(params.config) ?? 'unbounded'}`, 'info', params.callbacks.onLog);
168
- return result;
169
- }
170
- catch (error) {
171
- const message = getAbortAwareErrorMessage(signal, error, fallbackMessage);
172
- log(`[Dry-run] ${params.subphase}Failed step=${params.stepIndex + 1} error=${message}`, 'error', params.callbacks.onLog);
173
- throw signal?.aborted && !params.config.abortSignal?.aborted
174
- ? createAbortError(message)
175
- : error;
176
- }
177
- finally {
178
- cleanup();
179
- }
180
- }
181
- function buildVideoStepTargetFromAction(action, coherenceKey) {
182
- const selector = typeof action.params.selector === 'string' ? action.params.selector : undefined;
183
- const coordinates = typeof action.params.x === 'number' && typeof action.params.y === 'number'
184
- ? { x: action.params.x, y: action.params.y }
185
- : typeof action.params.elementCx === 'number' && typeof action.params.elementCy === 'number'
186
- ? { x: action.params.elementCx, y: action.params.elementCy }
187
- : undefined;
188
- const target = {
189
- selector,
190
- href: typeof action.params.href === 'string' ? action.params.href : undefined,
191
- label: typeof action.params.elementLabel === 'string' ? action.params.elementLabel : undefined,
192
- tag: typeof action.params.elementTag === 'string' ? action.params.elementTag : undefined,
193
- role: typeof action.params.elementRole === 'string' ? action.params.elementRole : undefined,
194
- coordinates,
195
- index: typeof action.params.index === 'number' ? action.params.index : undefined,
196
- coherenceKey,
197
- };
198
- return target.selector || target.href || target.label || target.coordinates || typeof target.index === 'number'
199
- ? target
200
- : undefined;
201
- }
202
- function mapExecutedActionsToVideoSteps(params) {
203
- const actionable = params.actions.filter((action) => (action.success !== false
204
- && ['navigate_to', 'dismiss_overlays', 'click', 'type_text', 'select_option', 'scroll', 'press_key', 'wait', 'hover', 'safe_expand'].includes(action.action)));
205
- return actionable.map((action, index) => {
206
- const id = index === 0 ? params.originalStep.id : `${params.originalStep.id}-repair-${index + 1}`;
207
- const target = buildVideoStepTargetFromAction(action, params.coherenceKey);
208
- const baseStep = {
209
- ...params.originalStep,
210
- id,
211
- description: typeof action.params.elementLabel === 'string'
212
- ? `${action.action} ${action.params.elementLabel}`.trim()
213
- : params.originalStep.description,
214
- target,
215
- selector: target?.selector ?? (typeof action.params.selector === 'string' ? action.params.selector : undefined),
216
- coordinates: target?.coordinates,
217
- };
218
- switch (action.action) {
219
- case 'navigate_to':
220
- return { ...baseStep, type: 'navigate', url: String(action.params.url ?? params.originalStep.url ?? '') };
221
- case 'dismiss_overlays':
222
- return { ...baseStep, type: 'dismiss_overlays' };
223
- case 'type_text':
224
- return { ...baseStep, type: 'type', text: String(action.params.text ?? params.originalStep.text ?? '') };
225
- case 'select_option':
226
- return {
227
- ...baseStep,
228
- type: 'select_option',
229
- optionLabel: typeof action.params.optionLabel === 'string' ? action.params.optionLabel : params.originalStep.optionLabel,
230
- optionValue: typeof action.params.optionValue === 'string' ? action.params.optionValue : params.originalStep.optionValue,
231
- optionIndex: typeof action.params.optionIndex === 'number' ? action.params.optionIndex : params.originalStep.optionIndex,
232
- };
233
- case 'scroll':
234
- return {
235
- ...baseStep,
236
- type: 'scroll',
237
- direction: action.params.direction ?? params.originalStep.direction ?? 'down',
238
- amount: typeof action.params.amount === 'number' ? action.params.amount : params.originalStep.amount ?? 400,
239
- };
240
- case 'press_key':
241
- return { ...baseStep, type: 'key', key: String(action.params.key ?? params.originalStep.key ?? 'Enter') };
242
- case 'hover':
243
- case 'safe_expand':
244
- return { ...baseStep, type: 'hover' };
245
- case 'wait':
246
- return { ...baseStep, type: 'wait', waitMs: Number(action.params.ms ?? params.originalStep.waitMs ?? 500) };
247
- case 'click':
248
- default:
249
- return { ...baseStep, type: 'click' };
250
- }
251
- });
252
- }
253
- function buildScreenshotRepairLanePrompt(params) {
254
- const details = [
255
- `Overall clip goal: ${params.script}`,
256
- `Current blocked step: ${params.step.description}`,
257
- params.nextStep ? `Stop as soon as the UI needed for the next step is visible: ${params.nextStep.description}` : 'Stop as soon as this step has clearly succeeded.',
258
- params.lang ? `Requested UI language: ${params.lang}` : '',
259
- params.theme ? `Requested UI theme: ${params.theme}` : '',
260
- params.step.expectedPageAfter ? `Expected destination contract: ${JSON.stringify(params.step.expectedPageAfter)}` : '',
261
- summarizeStepTarget(params.step) ? `Current step target hints: ${summarizeStepTarget(params.step)}` : '',
262
- ].filter(Boolean).join('\n');
263
- return `${details}
264
-
265
- Use the existing browser state on the current page. Perform ONLY the minimal actions needed to make this step succeed.
266
- - Re-read the live page and use visible interactive elements or search_text instead of guessing stale selectors.
267
- - If a menu, dialog, or popover must be opened for the step to succeed, open it and stop immediately once that resulting UI is visible.
268
- - Do NOT perform later clip actions.
269
- - Do NOT navigate away unless the current step explicitly requires it.
270
- - Call ready_to_capture immediately when the exact post-step UI state is visible.`;
271
- }
272
- function buildPreparedVariantRepairPrompt(params) {
273
- const details = [
274
- 'Variant preflight only.',
275
- `Overall clip goal: ${params.script}`,
276
- `Current page URL: ${params.currentUrl}`,
277
- params.preparedStartUrl ? `Required final page URL family: ${params.preparedStartUrl}` : '',
278
- params.lang ? `Requested fixed UI language: ${params.lang}` : '',
279
- params.theme ? `Requested fixed UI theme: ${params.theme}` : '',
280
- params.observedSummary ? `Observed variant mismatch: ${params.observedSummary}` : '',
281
- params.langInstructions
282
- ? `Follow these language-switch instructions exactly:\n${params.langInstructions}`
283
- : params.lang
284
- ? `Use the in-app language selector and confirm the fixed app chrome is in "${params.lang}".`
285
- : '',
286
- params.themeInstructions
287
- ? `Follow these theme-switch instructions exactly:\n${params.themeInstructions}`
288
- : params.theme
289
- ? `If the fixed UI theme is not "${params.theme}", use the in-app theme selector, appearance menu, or persisted preference to switch it.`
290
- : '',
291
- ].filter(Boolean).join('\n\n');
292
- return `${details}
293
-
294
- Use the existing authenticated browser state. The correct project/page is already open, but the fixed UI variant is not reliably restored in this fresh browser.
295
- - Keep the SAME project/entity/page open while restoring the requested language/theme.
296
- - Dismiss cookie/analytics banners or overlays first if they block the UI.
297
- - Prefer visible locale/theme controls, user menus, or settings/sidebar entries. If a combined variant chip or label like "fr light" is visible, use that control directly.
298
- - Do NOT start by searching for tiny raw tokens like "fr", "en", "light", or "dark" unless a settings page, language picker, or variant menu is already open.
299
- - Use visible controls, menus, settings, or search_text to find the language/theme switch. Do not guess stale selectors.
300
- - If the app briefly detours through settings or another route to switch language/theme, return to the same project/page before finishing.
301
- - Ignore user-generated content that may remain in another language, including project names, preset names, assistant text, and imported data labels.
302
- - Do NOT perform the recorded clip interaction itself (for example do not click "New", do not open "New preset", do not select "Hero").
303
- - Call ready_to_capture only when the current page is back on the correct project/entity and the visible app chrome matches the requested language/theme.
304
- - If you cannot restore the requested language/theme on this page, call give_up.`;
305
- }
306
- async function replayPreparedActions(browser, config, callbacks) {
307
- if (!config.preparedReplayActions || config.preparedReplayActions.length === 0) {
308
- return;
309
- }
310
- const replayAnalysis = analyzeReplayCandidate(config.preparedReplayActions, {
311
- currentUrl: browser.currentPage.url(),
312
- targetUrl: config.preparedStartUrl ?? config.url,
313
- currentViewport: browser.currentPage.viewportSize(),
314
- isAuthenticated: !!config.credentials,
315
- currentDialogCount: null,
316
- pageIdentity: config.preparedObservationSnapshot?.pageIdentity ?? null,
317
- });
318
- if (replayAnalysis.skipReason) {
319
- log(`[Dry-run] replayPreparedState skipped: ${replayAnalysis.skipReason}`, 'info', callbacks.onLog);
320
- return;
321
- }
322
- log(`[Dry-run] replayPreparedState replaying ${replayAnalysis.replayableActions.length} action(s)`, 'info', callbacks.onLog);
323
- for (const action of replayAnalysis.replayableActions) {
324
- throwIfAborted(config.abortSignal, 'Video dry-run cancelled.');
325
- const interactiveElements = ['click', 'type_text', 'select_option', 'scroll', 'hover', 'safe_expand'].includes(action.action)
326
- ? await browser.getInteractiveElements({ timeoutMs: 3000 }).catch(() => [])
327
- : [];
328
- const resolved = resolveReplayActionArgs(action, interactiveElements);
329
- if (!resolved.args) {
330
- log(`[Dry-run] replayPreparedState unresolved action "${action.action}": ${resolved.reason}`, 'info', callbacks.onLog);
331
- continue;
332
- }
333
- const result = await executeAction(browser, action.action, resolved.args);
334
- if (!result.success) {
335
- log(`[Dry-run] replayPreparedState action "${action.action}" failed: ${result.error}`, 'info', callbacks.onLog);
336
- break;
337
- }
338
- }
339
- }
340
- async function runScreenshotRepairLane(params) {
341
- const prompt = buildScreenshotRepairLanePrompt({
342
- script: params.config.script,
343
- step: params.originalStep,
344
- nextStep: params.nextStep,
345
- lang: params.config.lang,
346
- theme: params.config.theme,
347
- });
348
- const result = await runAgent(params.browser, {
349
- url: params.browser.currentPage.url(),
350
- prompt,
351
- dark: params.config.theme === 'dark',
352
- langs: params.config.lang ? [params.config.lang] : ['en'],
353
- outputDir: '',
354
- headed: false,
355
- viewport: params.config.viewport,
356
- maxIterations: Math.min(params.config.maxStepRetries ?? 12, 12),
357
- model: params.config.model,
358
- credentials: params.config.credentials,
359
- currentLang: params.config.lang,
360
- currentTheme: params.config.theme,
361
- selectorMemory: params.config.selectorMemory,
362
- abortSignal: params.abortSignal ?? params.config.abortSignal,
363
- }, params.config.apiKey);
364
- if (!result.success) {
365
- return null;
366
- }
367
- const steps = mapExecutedActionsToVideoSteps({
368
- actions: result.actions,
369
- originalStep: params.originalStep,
370
- coherenceKey: params.config.preparedCoherenceKey,
371
- });
372
- if (steps.length === 0) {
373
- return null;
374
- }
375
- return {
376
- steps,
377
- actions: result.actions,
378
- resolvedTargetSummary: summarizeStepTarget(steps[0] ?? params.originalStep),
379
- };
380
- }
381
- async function runPreparedVariantRepairLane(params) {
382
- const prompt = buildPreparedVariantRepairPrompt({
383
- script: params.config.script,
384
- currentUrl: params.browser.currentPage.url(),
385
- preparedStartUrl: params.config.preparedStartUrl,
386
- lang: params.config.lang,
387
- theme: params.config.theme,
388
- langInstructions: params.config.langInstructions,
389
- themeInstructions: params.config.themeInstructions,
390
- observedSummary: params.observedSummary,
391
- });
392
- const buildAgentConfig = (model) => ({
393
- url: params.browser.currentPage.url(),
394
- prompt,
395
- dark: params.config.theme === 'dark',
396
- langs: params.config.lang ? [params.config.lang] : ['en'],
397
- outputDir: '',
398
- headed: false,
399
- viewport: params.config.viewport,
400
- maxIterations: Math.min(params.config.maxStepRetries ?? 12, 12),
401
- model,
402
- credentials: params.config.credentials,
403
- langInstructions: params.config.langInstructions,
404
- themeInstructions: params.config.themeInstructions,
405
- currentLang: params.config.lang,
406
- currentTheme: params.config.theme,
407
- selectorMemory: params.config.selectorMemory,
408
- abortSignal: params.abortSignal ?? params.config.abortSignal,
409
- runMode: 'language_preflight',
410
- currentObjective: 'repair',
411
- });
412
- const result = await runAgent(params.browser, buildAgentConfig(params.config.model), params.config.apiKey);
413
- // If primary model failed to complete the repair (no actions, or ran but didn't succeed)
414
- // and a fallback is configured, retry with the fallback model.
415
- if (!result.success && params.config.fallbackModel && params.config.fallbackModel !== params.config.model) {
416
- logger.info(`[repair-lane] Primary model failed (${result.actions.length} actions, success=${result.success}), retrying with fallback model: ${params.config.fallbackModel}`);
417
- return runAgent(params.browser, buildAgentConfig(params.config.fallbackModel), params.config.apiKey);
418
- }
419
- return result;
420
- }
421
- async function prepareVideoSessionStorage(browser, sessionStorage) {
422
- if (!sessionStorage || Object.keys(sessionStorage).length === 0)
423
- return;
424
- await browser.prepareSessionStorage(sessionStorage, { replace: true });
425
- }
426
- async function captureVideoStepCheckpoint(page) {
427
- const scroll = await page.evaluate(() => ({
428
- x: Math.round(window.scrollX),
429
- y: Math.round(window.scrollY),
430
- })).catch(() => ({ x: 0, y: 0 }));
431
- return {
432
- url: page.url(),
433
- scrollX: scroll.x,
434
- scrollY: scroll.y,
435
- };
436
- }
437
- export async function restoreVideoStepCheckpoint(browser, checkpoint) {
438
- let page = browser.currentPage;
439
- const currentUrl = page.url();
440
- if (currentUrl !== checkpoint.url) {
441
- await browser.navigateTo(checkpoint.url);
442
- page = browser.currentPage;
443
- await page.waitForLoadState('networkidle', { timeout: 4000 }).catch(() => { });
444
- }
445
- await page.evaluate(({ x, y }) => window.scrollTo({ left: x, top: y, behavior: 'instant' }), { x: checkpoint.scrollX, y: checkpoint.scrollY }).catch(() => { });
446
- await page.waitForTimeout(250);
447
- }
448
- export function isRectMeaningfullyVisibleInViewport(params) {
449
- const { rect, viewportWidth, viewportHeight, topInset = 0, bottomInset = 0 } = params;
450
- const topSafeY = topInset + 16;
451
- const bottomSafeY = viewportHeight - bottomInset - 16;
452
- if (rect.width <= 0 ||
453
- rect.height <= 0 ||
454
- rect.bottom <= topSafeY ||
455
- rect.right <= 0 ||
456
- rect.top >= bottomSafeY ||
457
- rect.left >= viewportWidth) {
458
- return false;
459
- }
460
- const visibleHeight = Math.min(rect.bottom, bottomSafeY) - Math.max(rect.top, topSafeY);
461
- const minVisibleHeight = Math.min(Math.max(28, rect.height * 0.45), Math.max(28, rect.height - 4));
462
- return visibleHeight >= minVisibleHeight;
463
- }
464
- async function isLocatorMeaningfullyInViewport(locator) {
465
- try {
466
- return await locator.evaluate((node) => {
467
- if (!(node instanceof HTMLElement))
468
- return false;
469
- const detectOcclusionInsets = () => {
470
- let top = 0;
471
- let bottom = 0;
472
- const viewportHeight = window.innerHeight;
473
- const viewportWidth = window.innerWidth;
474
- const viewportCenterX = viewportWidth / 2;
475
- for (const candidate of Array.from(document.body?.querySelectorAll('*') ?? [])) {
476
- if (!(candidate instanceof HTMLElement))
477
- continue;
478
- const style = window.getComputedStyle(candidate);
479
- if (style.display === 'none' || style.visibility === 'hidden' || parseFloat(style.opacity || '1') === 0) {
480
- continue;
481
- }
482
- if (style.position !== 'fixed' && style.position !== 'sticky')
483
- continue;
484
- if (candidate.getAttribute('aria-hidden') === 'true')
485
- continue;
486
- const rect = candidate.getBoundingClientRect();
487
- if (rect.width <= 0 || rect.height <= 0)
488
- continue;
489
- if (rect.width < viewportWidth * 0.35)
490
- continue;
491
- if (rect.left > viewportCenterX || rect.right < viewportCenterX)
492
- continue;
493
- if (rect.height > viewportHeight * 0.35)
494
- continue;
495
- if (rect.top <= 24 && rect.bottom > 0) {
496
- top = Math.max(top, rect.bottom);
497
- }
498
- if (rect.bottom >= viewportHeight - 24 && rect.top < viewportHeight) {
499
- bottom = Math.max(bottom, viewportHeight - rect.top);
500
- }
501
- }
502
- return { top, bottom };
503
- };
504
- const rect = node.getBoundingClientRect();
505
- const { top, bottom } = detectOcclusionInsets();
506
- const topSafeY = top + 16;
507
- const bottomSafeY = window.innerHeight - bottom - 16;
508
- if (rect.width <= 0 ||
509
- rect.height <= 0 ||
510
- rect.bottom <= topSafeY ||
511
- rect.right <= 0 ||
512
- rect.top >= bottomSafeY ||
513
- rect.left >= window.innerWidth) {
514
- return false;
515
- }
516
- const visibleHeight = Math.min(rect.bottom, bottomSafeY) - Math.max(rect.top, topSafeY);
517
- const minVisibleHeight = Math.min(Math.max(28, rect.height * 0.45), Math.max(28, rect.height - 4));
518
- return visibleHeight >= minVisibleHeight;
519
- });
520
- }
521
- catch {
522
- return false;
523
- }
524
- }
525
- function isFeatureEnabled(name, defaultEnabled = true) {
526
- const raw = process.env[name];
527
- if (raw == null)
528
- return defaultEnabled;
529
- const normalized = raw.trim().toLowerCase();
530
- return normalized !== '0' && normalized !== 'false' && normalized !== 'off' && normalized !== 'no';
531
- }
532
- function splitSelectorCandidates(selector) {
533
- if (!selector)
534
- return [];
535
- return selector
536
- .split(',')
537
- .map((item) => item.trim())
538
- .filter(Boolean);
539
- }
540
- function containsInternalAutomationSelector(selector) {
541
- return !!selector && /\[data-ak-[^\]]+\]|data-ak-interactive-index/i.test(selector);
542
- }
543
- function dedupeSelectors(selectors) {
544
- const seen = new Set();
545
- const result = [];
546
- for (const selector of selectors) {
547
- if (containsInternalAutomationSelector(selector))
548
- continue;
549
- if (seen.has(selector))
550
- continue;
551
- seen.add(selector);
552
- result.push(selector);
553
- }
554
- return result;
555
- }
556
- function pickModeTimeout(mode, recordingMs, dryRunMs) {
557
- return mode === 'recording' ? recordingMs : dryRunMs;
558
- }
559
- function shouldUseDeterministicRecovery(step) {
560
- return step.type === 'click'
561
- || step.type === 'type'
562
- || step.type === 'select_option'
563
- || step.type === 'hover'
564
- || step.type === 'assert_element';
565
- }
566
- function buildStepSignature(step) {
567
- const normalizedDescription = step.description.toLowerCase().replace(/\\s+/g, ' ').trim().slice(0, 120);
568
- if (step.type === 'navigate')
569
- return `navigate|${step.url ?? ''}|${normalizedDescription}`;
570
- if (step.type === 'assert_url')
571
- return `assert_url|${step.urlPattern ?? ''}|${normalizedDescription}`;
572
- if (step.type === 'assert_text')
573
- return `assert_text|${step.text ?? ''}|${normalizedDescription}`;
574
- if (step.type === 'assert_page')
575
- return `assert_page|${JSON.stringify(step.pageExpectation ?? {})}|${normalizedDescription}`;
576
- if (step.type === 'select_option')
577
- return `select_option|${step.selector ?? ''}|${step.optionLabel ?? step.optionValue ?? step.optionIndex ?? ''}|${normalizedDescription}`;
578
- return `${step.type}|${step.selector ?? ''}|${normalizedDescription}`;
579
- }
580
- function applyStepSelector(step, selector) {
581
- if (!selector)
582
- return step;
583
- return { ...step, selector };
584
- }
585
- function normalizeUrlForComparison(rawUrl) {
586
- return rawUrl.trim().replace(/\/+$/, '').toLowerCase();
587
- }
588
- function normalizeOutputScale(value) {
589
- if (!Number.isFinite(value))
590
- return 2;
591
- return Math.max(0.5, Math.min(4, Number(value)));
592
- }
593
- function normalizeLangTag(value) {
594
- if (!value)
595
- return null;
596
- return value.trim().toLowerCase().replace('_', '-').split('-')[0] || null;
597
- }
598
- function normalizeComparableUrl(rawUrl) {
599
- return rawUrl.trim().replace(/\/+$/, '').toLowerCase();
600
- }
601
- function parseUrlMaybe(rawUrl, base) {
602
- try {
603
- return new URL(rawUrl, base);
604
- }
605
- catch {
606
- return null;
607
- }
608
- }
609
- function normalizePathSegments(pathname) {
610
- return pathname
611
- .split('/')
612
- .map((segment) => decodeURIComponent(segment).trim().toLowerCase())
613
- .filter(Boolean);
614
- }
615
- function stripLocalePrefix(segments) {
616
- if (segments.length === 0)
617
- return segments;
618
- return /^[a-z]{2}(?:-[a-z]{2})?$/i.test(segments[0]) ? segments.slice(1) : segments;
619
- }
620
- function routeSegmentMatches(expected, actual) {
621
- if (expected === actual)
622
- return true;
623
- if (actual.startsWith(`${expected}-`) || expected.startsWith(`${actual}-`))
624
- return true;
625
- return false;
626
- }
627
- function pathFamilyMatches(actualPath, expectedPath) {
628
- const actualSegments = stripLocalePrefix(normalizePathSegments(actualPath));
629
- const expectedSegments = stripLocalePrefix(normalizePathSegments(expectedPath));
630
- if (expectedSegments.length === 0)
631
- return actualSegments.length === 0;
632
- for (let start = 0; start <= actualSegments.length - expectedSegments.length; start += 1) {
633
- const matches = expectedSegments.every((segment, index) => routeSegmentMatches(segment, actualSegments[start + index] ?? ''));
634
- if (matches)
635
- return true;
636
- }
637
- return false;
638
- }
639
- function urlPatternMatches(actualUrl, expected, mode = 'contains') {
640
- if (mode === 'equals') {
641
- return normalizeComparableUrl(actualUrl) === normalizeComparableUrl(expected);
642
- }
643
- if (mode === 'regex') {
644
- try {
645
- return new RegExp(expected).test(actualUrl);
646
- }
647
- catch {
648
- return false;
649
- }
650
- }
651
- if (actualUrl.includes(expected)) {
652
- return true;
653
- }
654
- const actualParsed = parseUrlMaybe(actualUrl);
655
- const expectedParsed = parseUrlMaybe(expected, actualUrl);
656
- if (!actualParsed || !expectedParsed)
657
- return false;
658
- const actualHost = actualParsed.hostname.replace(/^www\./, '');
659
- const expectedHost = expectedParsed.hostname.replace(/^www\./, '');
660
- if (expectedParsed.hostname && actualHost !== expectedHost) {
661
- return false;
662
- }
663
- return pathFamilyMatches(actualParsed.pathname, expectedParsed.pathname);
664
- }
665
- function parseHreflangEntry(entry) {
666
- const separatorIndex = entry.indexOf(':');
667
- if (separatorIndex <= 0)
668
- return null;
669
- const code = entry.slice(0, separatorIndex).trim();
670
- const href = entry.slice(separatorIndex + 1).trim();
671
- if (!code || !href)
672
- return null;
673
- return { code, href };
674
- }
675
- function addLocaleActivationCandidate(candidates, url, source, score) {
676
- if (!url)
677
- return;
678
- const parsed = parseUrlMaybe(url);
679
- if (!parsed || !/^https?:$/i.test(parsed.protocol))
680
- return;
681
- const normalized = normalizeComparableUrl(parsed.toString());
682
- const existing = candidates.get(normalized);
683
- if (!existing || existing.score < score) {
684
- candidates.set(normalized, { url: parsed.toString(), source, score });
685
- }
686
- }
687
- function buildLocalePrefixedUrl(rawUrl, requestedLang) {
688
- const parsed = parseUrlMaybe(rawUrl);
689
- if (!parsed)
690
- return null;
691
- const pathname = parsed.pathname || '/';
692
- const trailingSlash = pathname.endsWith('/') || pathname === '/';
693
- const nextSegments = [requestedLang, ...stripLocalePrefix(normalizePathSegments(pathname))];
694
- parsed.pathname = `/${nextSegments.join('/')}${trailingSlash ? '/' : ''}`.replace(/\/{2,}/g, '/');
695
- return parsed.toString();
696
- }
697
- function buildLocaleQueryUrls(rawUrl, requestedLang) {
698
- const parsed = parseUrlMaybe(rawUrl);
699
- if (!parsed)
700
- return [];
701
- const candidates = [];
702
- for (const key of ['lang', 'hl', 'locale']) {
703
- if (!parsed.searchParams.has(key))
704
- continue;
705
- const next = new URL(parsed.toString());
706
- next.searchParams.set(key, requestedLang);
707
- candidates.push(next.toString());
708
- }
709
- return candidates;
710
- }
711
- function collectLocaleActivationCandidates(signals, requestedLang, seedUrls = []) {
712
- const lang = normalizeLangTag(requestedLang);
713
- const requestedExact = requestedLang?.trim().toLowerCase() ?? null;
714
- if (!lang)
715
- return [];
716
- const candidates = new Map();
717
- for (const entry of signals.hreflangs) {
718
- const parsedEntry = parseHreflangEntry(entry);
719
- if (!parsedEntry)
720
- continue;
721
- const normalizedCode = normalizeLangTag(parsedEntry.code);
722
- if (normalizedCode !== lang)
723
- continue;
724
- const rawCode = parsedEntry.code.trim().toLowerCase();
725
- const score = rawCode === requestedExact ? 120 : rawCode === lang ? 116 : 112;
726
- addLocaleActivationCandidate(candidates, parsedEntry.href, `hreflang ${parsedEntry.code}`, score);
727
- }
728
- for (const rawUrl of [signals.url, signals.canonicalUrl, ...seedUrls]) {
729
- if (!rawUrl)
730
- continue;
731
- addLocaleActivationCandidate(candidates, buildLocalePrefixedUrl(rawUrl, lang), `locale path from ${rawUrl}`, normalizeComparableUrl(rawUrl) === normalizeComparableUrl(signals.url) ? 82 : 76);
732
- for (const queryUrl of buildLocaleQueryUrls(rawUrl, lang)) {
733
- addLocaleActivationCandidate(candidates, queryUrl, `locale query from ${rawUrl}`, 62);
734
- }
735
- }
736
- return [...candidates.values()].sort((a, b) => b.score - a.score);
737
- }
738
- function normalizeVariantToken(value) {
739
- return (value ?? '')
740
- .normalize('NFD')
741
- .replace(/[\u0300-\u036f]/g, '')
742
- .replace(/\s+/g, ' ')
743
- .trim()
744
- .toLowerCase();
745
- }
746
- const LANGUAGE_LABEL_ALIASES = {
747
- fr: ['fr', 'fr-fr', 'fr_fr', 'france', 'french', 'francais', 'français'],
748
- en: ['en', 'en-us', 'en_gb', 'english', 'anglais'],
749
- de: ['de', 'de-de', 'de_de', 'german', 'deutsch'],
750
- es: ['es', 'es-es', 'es_es', 'spanish', 'espanol', 'español'],
751
- it: ['it', 'it-it', 'it_it', 'italian', 'italiano'],
752
- pt: ['pt', 'pt-pt', 'pt-br', 'pt_pt', 'pt_br', 'portuguese', 'portugues', 'português'],
753
- nl: ['nl', 'nl-nl', 'dutch', 'nederlands'],
754
- };
755
- const THEME_LABEL_ALIASES = {
756
- light: ['light', 'clair', 'claro', 'hell', 'day'],
757
- dark: ['dark', 'sombre', 'oscuro', 'dunkel', 'night'],
758
- };
759
- function buildRequestedLanguageTokens(requestedLang) {
760
- const lang = normalizeLangTag(requestedLang);
761
- if (!lang)
762
- return [];
763
- const tokens = new Set(LANGUAGE_LABEL_ALIASES[lang] ?? [lang]);
764
- tokens.add(lang);
765
- if (requestedLang) {
766
- tokens.add(normalizeVariantToken(requestedLang));
767
- tokens.add(normalizeVariantToken(requestedLang.replace('-', '_')));
768
- tokens.add(normalizeVariantToken(requestedLang.replace('_', '-')));
769
- }
770
- return [...tokens].filter(Boolean);
771
- }
772
- function buildRequestedThemeTokens(requestedTheme) {
773
- if (!requestedTheme)
774
- return [];
775
- return [...new Set([requestedTheme, ...THEME_LABEL_ALIASES[requestedTheme]].map(normalizeVariantToken).filter(Boolean))];
776
- }
777
- function isLikelyAuthPreparationText(text) {
778
- const normalized = normalizeVariantToken(text);
779
- if (!normalized)
780
- return false;
781
- return /\b(sign in|signin|log in|login|authenticate|auth|connexion|connecter|se connecter|mot de passe|password|email)\b/i.test(normalized);
782
- }
783
- function urlLooksVariantRelated(rawUrl, requestedLang, requestedTheme) {
784
- const parsed = parseUrlMaybe(rawUrl, 'https://example.com');
785
- if (!parsed)
786
- return false;
787
- const localeTokens = buildRequestedLanguageTokens(requestedLang);
788
- const themeTokens = buildRequestedThemeTokens(requestedTheme);
789
- const urlBits = [
790
- parsed.pathname,
791
- parsed.search,
792
- ...normalizePathSegments(parsed.pathname),
793
- ...Array.from(parsed.searchParams.entries()).flatMap(([key, value]) => [key, value]),
794
- ]
795
- .map((value) => normalizeVariantToken(value))
796
- .filter(Boolean);
797
- return urlBits.some((bit) => localeTokens.some((token) => bit.includes(token))
798
- || themeTokens.some((token) => bit.includes(token))
799
- || /\b(lang|locale|language|theme|appearance|mode|color scheme|couleur|langue)\b/.test(bit));
800
- }
801
- function stepLooksVariantRelevant(step, requestedLang, requestedTheme) {
802
- const localeTokens = buildRequestedLanguageTokens(requestedLang);
803
- const themeTokens = buildRequestedThemeTokens(requestedTheme);
804
- const genericVariantTokens = [
805
- 'lang',
806
- 'language',
807
- 'locale',
808
- 'theme',
809
- 'appearance',
810
- 'mode',
811
- 'color scheme',
812
- 'langue',
813
- 'theme',
814
- 'thème',
815
- 'apparence',
816
- 'clair',
817
- 'sombre',
818
- ].map(normalizeVariantToken);
819
- const textFields = [
820
- step.description,
821
- step.selector,
822
- step.url,
823
- step.urlPattern,
824
- step.text,
825
- step.optionLabel,
826
- step.optionValue,
827
- step.pageExpectation?.locale,
828
- step.pageExpectation?.theme,
829
- step.expectedPageAfter?.locale,
830
- step.expectedPageAfter?.theme,
831
- ...(step.pageExpectation?.urlPatterns ?? []),
832
- ...(step.pageExpectation?.textPatterns ?? []),
833
- ...(step.pageExpectation?.titlePatterns ?? []),
834
- ...(step.expectedPageAfter?.urlPatterns ?? []),
835
- ...(step.expectedPageAfter?.textPatterns ?? []),
836
- ...(step.expectedPageAfter?.titlePatterns ?? []),
837
- ]
838
- .filter((value) => typeof value === 'string' && value.trim().length > 0)
839
- .map(normalizeVariantToken);
840
- const tokenMatch = textFields.some((value) => localeTokens.some((token) => value.includes(token))
841
- || themeTokens.some((token) => value.includes(token))
842
- || genericVariantTokens.some((token) => token && value.includes(token)));
843
- if (tokenMatch)
844
- return true;
845
- if (step.url && urlLooksVariantRelated(step.url, requestedLang, requestedTheme))
846
- return true;
847
- if (step.urlPattern && urlLooksVariantRelated(step.urlPattern, requestedLang, requestedTheme))
848
- return true;
849
- if (textFields.some(isLikelyAuthPreparationText))
850
- return true;
851
- if (step.pageExpectation?.locale || step.pageExpectation?.theme || step.expectedPageAfter?.locale || step.expectedPageAfter?.theme) {
852
- return true;
853
- }
854
- return false;
855
- }
856
- export function sanitizeVariantPrefixSteps(steps, requestedLang, requestedTheme) {
857
- return steps.filter((step) => {
858
- if (step.type === 'dismiss_overlays' || step.type === 'wait')
859
- return true;
860
- return stepLooksVariantRelevant(step, requestedLang, requestedTheme);
861
- });
862
- }
863
- function scoreTokenMatch(text, tokens) {
864
- const normalized = normalizeVariantToken(text);
865
- if (!normalized || tokens.length === 0)
866
- return 0;
867
- let score = 0;
868
- for (const token of tokens) {
869
- if (!token)
870
- continue;
871
- if (normalized === token)
872
- score += 28;
873
- else if (normalized.includes(token))
874
- score += 18;
875
- else if (token.length > 2 && normalized.split(/[^a-z0-9]+/).includes(token))
876
- score += 14;
877
- }
878
- return score;
879
- }
880
- function scoreLocaleControl(control, requestedLang) {
881
- if (control.kind !== 'locale' || !requestedLang)
882
- return 0;
883
- const tokens = buildRequestedLanguageTokens(requestedLang);
884
- let score = 0;
885
- score += scoreTokenMatch(control.label, tokens);
886
- score += scoreTokenMatch(control.value ?? '', tokens);
887
- score += scoreTokenMatch(control.href ?? '', tokens);
888
- if (control.mechanism === 'select')
889
- score += 10;
890
- if (control.options?.length) {
891
- const optionScores = control.options.map((option) => Math.max(scoreTokenMatch(option.label, tokens), scoreTokenMatch(option.value ?? '', tokens)));
892
- score += Math.max(0, ...optionScores);
893
- }
894
- return score;
895
- }
896
- function scoreThemeControl(control, requestedTheme) {
897
- if (control.kind !== 'theme' || !requestedTheme)
898
- return 0;
899
- const tokens = buildRequestedThemeTokens(requestedTheme);
900
- let score = 0;
901
- score += scoreTokenMatch(control.label, tokens);
902
- score += scoreTokenMatch(control.value ?? '', tokens);
903
- score += scoreTokenMatch(control.href ?? '', tokens);
904
- if (control.mechanism === 'toggle')
905
- score += 8;
906
- if (control.options?.length) {
907
- const optionScores = control.options.map((option) => Math.max(scoreTokenMatch(option.label, tokens), scoreTokenMatch(option.value ?? '', tokens)));
908
- score += Math.max(0, ...optionScores);
909
- }
910
- return score;
911
- }
912
- function buildRequestedLanguageValues(requestedLang, sample) {
913
- const lang = normalizeLangTag(requestedLang);
914
- if (!lang)
915
- return [];
916
- const values = new Set([lang]);
917
- if (requestedLang) {
918
- const normalized = requestedLang.replace('_', '-');
919
- values.add(normalized);
920
- values.add(normalized.toLowerCase());
921
- values.add(normalized.toUpperCase());
922
- values.add(normalized.replace('-', '_'));
923
- values.add(normalized.toLowerCase().replace('-', '_'));
924
- }
925
- const sampleText = sample ?? '';
926
- if (/^[A-Z]{2}$/.test(sampleText.trim())) {
927
- values.add(lang.toUpperCase());
928
- }
929
- return [...values].filter(Boolean);
930
- }
931
- function buildStorageReplacementCandidates(hint, requestedLang, requestedTheme) {
932
- if (hint.kind === 'locale') {
933
- return buildRequestedLanguageValues(requestedLang, hint.valueSample);
934
- }
935
- if (hint.kind === 'theme' && requestedTheme) {
936
- return [requestedTheme];
937
- }
938
- return [];
939
- }
940
- async function settleAfterVariantMutation(browser) {
941
- const page = browser.currentPage;
942
- await page.waitForLoadState('domcontentloaded', { timeout: 3000 }).catch(() => { });
943
- await page.waitForLoadState('networkidle', { timeout: 3000 }).catch(() => { });
944
- await page.waitForTimeout(350);
945
- }
946
- async function tryActivateVariantControl(browser, control, requestedLang, requestedTheme, callbacks) {
947
- const page = browser.currentPage;
948
- const locator = page.locator(control.selector).first();
949
- await locator.waitFor({ state: 'visible', timeout: 3000 });
950
- if (control.mechanism === 'select' && control.options?.length) {
951
- const scoreOption = (option) => control.kind === 'locale'
952
- ? Math.max(scoreTokenMatch(option.label, buildRequestedLanguageTokens(requestedLang)), scoreTokenMatch(option.value ?? '', buildRequestedLanguageTokens(requestedLang)))
953
- : Math.max(scoreTokenMatch(option.label, buildRequestedThemeTokens(requestedTheme)), scoreTokenMatch(option.value ?? '', buildRequestedThemeTokens(requestedTheme)));
954
- const ranked = control.options
955
- .map((option, index) => ({ option, index, score: scoreOption(option) }))
956
- .sort((a, b) => b.score - a.score);
957
- const best = ranked[0];
958
- if (!best || best.score <= 0) {
959
- return false;
960
- }
961
- if (best.option.value) {
962
- await locator.selectOption({ value: best.option.value }, { timeout: 4000 });
963
- }
964
- else {
965
- await locator.selectOption({ label: best.option.label }, { timeout: 4000 });
966
- }
967
- await settleAfterVariantMutation(browser);
968
- return true;
969
- }
970
- try {
971
- await locator.click({ timeout: 4000 });
972
- }
973
- catch (err) {
974
- if (control.href && control.mechanism === 'link') {
975
- log(`Variant control click failed for ${control.label}; falling back to href navigation ${control.href}.`, 'info', callbacks.onLog);
976
- await browser.navigateTo(control.href);
977
- return true;
978
- }
979
- throw err;
980
- }
981
- await settleAfterVariantMutation(browser);
982
- return true;
983
- }
984
- async function attemptVariantControlsActivation(browser, requestedLang, requestedTheme, signals, callbacks, depth = 0) {
985
- let detected = await detectVariantStateDeterministic(browser, requestedLang, requestedTheme);
986
- if ((requestedLang ? detected.lang.active : true) && (requestedTheme ? detected.theme.active : true)) {
987
- return { detected };
988
- }
989
- const rankedControls = signals.variantControls
990
- .map((control) => ({
991
- control,
992
- score: Math.max(!detected.lang.active ? scoreLocaleControl(control, requestedLang) : 0, !detected.theme.active ? scoreThemeControl(control, requestedTheme) : 0),
993
- }))
994
- .filter((entry) => entry.score > 0)
995
- .sort((a, b) => b.score - a.score)
996
- .slice(0, 6);
997
- for (const entry of rankedControls) {
998
- try {
999
- log(`Variant preflight trying ${entry.control.kind} control ${entry.control.selector} (${entry.control.label}).`, 'info', callbacks.onLog);
1000
- const activated = await tryActivateVariantControl(browser, entry.control, requestedLang, requestedTheme, callbacks);
1001
- if (!activated)
1002
- continue;
1003
- await dismissOverlaysWithLogging(browser, {
1004
- context: `variant control activation (${entry.control.selector})`,
1005
- onLog: callbacks.onLog,
1006
- });
1007
- detected = await detectVariantStateDeterministic(browser, requestedLang, requestedTheme);
1008
- if ((requestedLang ? detected.lang.active : true) && (requestedTheme ? detected.theme.active : true)) {
1009
- log(`Variant preflight activated state via ${entry.control.kind} control ${entry.control.selector}.`, 'success', callbacks.onLog);
1010
- return { detected, controlUsed: entry.control };
1011
- }
1012
- if (depth === 0 && entry.control.mechanism !== 'select') {
1013
- const nestedSignals = detected.pageSignals;
1014
- const nestedAttempt = await attemptVariantControlsActivation(browser, requestedLang, requestedTheme, nestedSignals, callbacks, depth + 1);
1015
- if ((requestedLang ? nestedAttempt.detected.lang.active : true) && (requestedTheme ? nestedAttempt.detected.theme.active : true)) {
1016
- return nestedAttempt;
1017
- }
1018
- detected = nestedAttempt.detected;
1019
- }
1020
- }
1021
- catch (err) {
1022
- log(`Variant preflight control activation failed for ${entry.control.selector}: ${err.message}`, 'info', callbacks.onLog);
1023
- }
1024
- }
1025
- return { detected };
1026
- }
1027
- async function writeVariantStorageCandidate(browser, hint, candidateValue) {
1028
- const page = browser.currentPage;
1029
- return page.evaluate(({ storageName, key, candidate, kind }) => {
1030
- const storage = storageName === 'localStorage' ? window.localStorage : window.sessionStorage;
1031
- const current = storage.getItem(key);
1032
- if (current == null)
1033
- return false;
1034
- if (current === candidate)
1035
- return true;
1036
- const LOCALE_KEY_WHITELIST = ['lang', 'locale', 'language', 'i18n', 'intl', 'i18n-locale', 'next-i18next', 'NEXT_LOCALE', 'nuxt-i18n-locale'];
1037
- const THEME_KEY_WHITELIST = ['theme', 'color-scheme', 'colorScheme', 'dark-mode', 'darkMode', 'appearance'];
1038
- const whitelist = kind === 'locale' ? LOCALE_KEY_WHITELIST : THEME_KEY_WHITELIST;
1039
- const isAllowedKey = (entryKey) => {
1040
- const lower = entryKey.toLowerCase();
1041
- return whitelist.some((allowed) => {
1042
- const normalizedAllowed = allowed.toLowerCase();
1043
- if (lower === normalizedAllowed)
1044
- return true;
1045
- const re = new RegExp(`(?:^|[^a-zA-Z0-9])${normalizedAllowed.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&')}(?:$|[^a-zA-Z0-9])`, 'i');
1046
- return re.test(lower);
1047
- });
1048
- };
1049
- const looksLikeTargetValue = (value) => {
1050
- if (kind === 'locale') {
1051
- return /^[a-z]{2,3}(?:[-_][a-zA-Z]{2,4})?$/.test(value.trim());
1052
- }
1053
- return /^(light|dark|auto|system|dim|high-contrast|1|0|true|false|enabled|disabled|on|off)$/i.test(value.trim());
1054
- };
1055
- const rewrite = (input) => {
1056
- if (typeof input === 'string') {
1057
- if (!looksLikeTargetValue(input))
1058
- return { changed: false, value: input };
1059
- return { changed: input !== candidate, value: candidate };
1060
- }
1061
- if (Array.isArray(input)) {
1062
- let changed = false;
1063
- const next = input.map((entry) => {
1064
- const rewritten = rewrite(entry);
1065
- changed = changed || rewritten.changed;
1066
- return rewritten.value;
1067
- });
1068
- return { changed, value: next };
1069
- }
1070
- if (input && typeof input === 'object') {
1071
- let changed = false;
1072
- const next = {};
1073
- for (const [entryKey, entryValue] of Object.entries(input)) {
1074
- if (isAllowedKey(entryKey) && typeof entryValue === 'string' && looksLikeTargetValue(entryValue)) {
1075
- if (entryValue !== candidate)
1076
- changed = true;
1077
- next[entryKey] = candidate;
1078
- continue;
1079
- }
1080
- const rewritten = rewrite(entryValue);
1081
- changed = changed || rewritten.changed;
1082
- next[entryKey] = rewritten.value;
1083
- }
1084
- return { changed, value: next };
1085
- }
1086
- return { changed: false, value: input };
1087
- };
1088
- let nextValue = candidate;
1089
- try {
1090
- const parsed = JSON.parse(current);
1091
- const rewritten = rewrite(parsed);
1092
- if (rewritten.changed) {
1093
- nextValue = JSON.stringify(rewritten.value);
1094
- }
1095
- }
1096
- catch {
1097
- nextValue = candidate;
1098
- }
1099
- storage.setItem(key, nextValue);
1100
- return true;
1101
- }, {
1102
- storageName: hint.storage,
1103
- key: hint.key,
1104
- candidate: candidateValue,
1105
- kind: hint.kind,
1106
- });
1107
- }
1108
- async function attemptVariantStorageActivation(browser, requestedLang, requestedTheme, signals, callbacks) {
1109
- let detected = await detectVariantStateDeterministic(browser, requestedLang, requestedTheme);
1110
- if ((requestedLang ? detected.lang.active : true) && (requestedTheme ? detected.theme.active : true)) {
1111
- return { detected };
1112
- }
1113
- const rankedHints = signals.storageHints
1114
- .map((hint) => {
1115
- const score = hint.kind === 'locale' && !detected.lang.active
1116
- ? 20 + scoreTokenMatch(`${hint.key} ${hint.valueSample}`, buildRequestedLanguageTokens(requestedLang))
1117
- : hint.kind === 'theme' && !detected.theme.active
1118
- ? 20 + scoreTokenMatch(`${hint.key} ${hint.valueSample}`, buildRequestedThemeTokens(requestedTheme))
1119
- : 0;
1120
- return { hint, score };
1121
- })
1122
- .filter((entry) => entry.score > 0)
1123
- .sort((a, b) => b.score - a.score)
1124
- .slice(0, 6);
1125
- for (const entry of rankedHints) {
1126
- const candidates = buildStorageReplacementCandidates(entry.hint, requestedLang, requestedTheme);
1127
- for (const candidate of candidates) {
1128
- try {
1129
- log(`Variant preflight trying ${entry.hint.kind} storage ${entry.hint.storage}.${entry.hint.key}=${candidate}.`, 'info', callbacks.onLog);
1130
- const written = await writeVariantStorageCandidate(browser, entry.hint, candidate);
1131
- if (!written)
1132
- continue;
1133
- await browser.currentPage.reload({ waitUntil: 'domcontentloaded', timeout: 15000 }).catch(async () => {
1134
- await browser.navigateTo(browser.currentPage.url());
1135
- });
1136
- await settleAfterVariantMutation(browser);
1137
- await dismissOverlaysWithLogging(browser, {
1138
- context: `variant storage activation (${entry.hint.storage}.${entry.hint.key})`,
1139
- onLog: callbacks.onLog,
1140
- });
1141
- detected = await detectVariantStateDeterministic(browser, requestedLang, requestedTheme);
1142
- if ((requestedLang ? detected.lang.active : true) && (requestedTheme ? detected.theme.active : true)) {
1143
- log(`Variant preflight activated state via ${entry.hint.storage}.${entry.hint.key}.`, 'success', callbacks.onLog);
1144
- return { detected, storageUsed: entry.hint };
1145
- }
1146
- }
1147
- catch (err) {
1148
- log(`Variant preflight storage activation failed for ${entry.hint.storage}.${entry.hint.key}: ${err.message}`, 'info', callbacks.onLog);
1149
- }
1150
- }
1151
- }
1152
- return { detected };
1153
- }
1154
- function isConcreteVariantActivationStep(step) {
1155
- return step.type === 'navigate'
1156
- || step.type === 'click'
1157
- || step.type === 'select_option'
1158
- || step.type === 'type'
1159
- || step.type === 'key'
1160
- || step.type === 'drag';
1161
- }
1162
- async function evaluatePageExpectation(page, signals, expectation) {
1163
- const reasons = [];
1164
- let matchedWeight = 0;
1165
- let totalWeight = 0;
1166
- const evaluatePatterns = (actualValues, expectedValues, weight, matcher = (actual, expected) => actual.toLowerCase().includes(expected.toLowerCase()), label = 'signal') => {
1167
- if (!expectedValues || expectedValues.length === 0)
1168
- return;
1169
- totalWeight += weight;
1170
- const expectedHit = expectedValues.some((expected) => actualValues.some((actual) => matcher(actual, expected)));
1171
- if (expectedHit) {
1172
- matchedWeight += weight;
1173
- reasons.push(`${label} matched`);
1174
- }
1175
- else {
1176
- reasons.push(`${label} missing`);
1177
- }
1178
- };
1179
- evaluatePatterns([signals.url], expectation.urlPatterns, 0.35, (actual, expected) => urlPatternMatches(actual, expected), 'url');
1180
- evaluatePatterns([signals.title], expectation.titlePatterns, 0.15, undefined, 'title');
1181
- if (expectation.textPatterns && expectation.textPatterns.length > 0) {
1182
- totalWeight += 0.2;
1183
- let textHit = expectation.textPatterns.some((expected) => [signals.visibleText, ...signals.headings].some((actual) => actual.toLowerCase().includes(expected.toLowerCase())));
1184
- if (!textHit) {
1185
- for (const expected of expectation.textPatterns) {
1186
- try {
1187
- const domState = await queryNamedTargetDomState(page, expected, false, 'dry_run');
1188
- if (domState.found && domState.visible) {
1189
- textHit = true;
1190
- reasons.push('text matched via visible DOM');
1191
- break;
1192
- }
1193
- }
1194
- catch {
1195
- // Fall through to body-text fallback.
1196
- }
1197
- }
1198
- }
1199
- if (!textHit) {
1200
- try {
1201
- const bodyText = await page.evaluate(() => document.body?.innerText ?? '');
1202
- textHit = expectation.textPatterns.some((expected) => matchStringWithMode(bodyText, expected, 'contains'));
1203
- if (textHit) {
1204
- reasons.push('text matched via body text');
1205
- }
1206
- }
1207
- catch {
1208
- // Ignore and keep mismatch below.
1209
- }
1210
- }
1211
- if (textHit) {
1212
- matchedWeight += 0.2;
1213
- if (!reasons.some((reason) => reason.startsWith('text matched'))) {
1214
- reasons.push('text matched');
1215
- }
1216
- }
1217
- else {
1218
- reasons.push('text missing');
1219
- }
1220
- }
1221
- evaluatePatterns(signals.navLabels, expectation.navPatterns, 0.08, undefined, 'nav');
1222
- evaluatePatterns(signals.breadcrumbLabels, expectation.breadcrumbPatterns, 0.07, undefined, 'breadcrumb');
1223
- if (expectation.locale) {
1224
- totalWeight += 0.1;
1225
- const locale = scoreLocaleSignals(signals, expectation.locale);
1226
- if (locale.score >= 0.6) {
1227
- matchedWeight += 0.1;
1228
- reasons.push(`locale ok (${locale.reasons.join(', ') || expectation.locale})`);
1229
- }
1230
- else {
1231
- reasons.push(`locale weak (${locale.reasons.join(', ') || expectation.locale})`);
1232
- }
1233
- }
1234
- if (expectation.theme) {
1235
- totalWeight += 0.05;
1236
- const themeState = evaluateRequestedThemeState(signals, expectation.theme);
1237
- if (themeState.active) {
1238
- matchedWeight += 0.05;
1239
- reasons.push(`theme ok (${themeState.reason})`);
1240
- }
1241
- else {
1242
- reasons.push(`theme expected ${expectation.theme}, got ${themeState.detected ?? 'unknown'} (${themeState.reason})`);
1243
- }
1244
- }
1245
- if (expectation.selectors && expectation.selectors.length > 0) {
1246
- totalWeight += 0.15;
1247
- let selectorHit = false;
1248
- for (const selector of expectation.selectors) {
1249
- try {
1250
- if (await page.locator(selector).first().isVisible({ timeout: 800 })) {
1251
- selectorHit = true;
1252
- break;
1253
- }
1254
- }
1255
- catch {
1256
- // Try next selector candidate.
1257
- }
1258
- }
1259
- if (selectorHit) {
1260
- matchedWeight += 0.15;
1261
- reasons.push('selector matched');
1262
- }
1263
- else {
1264
- reasons.push('selector missing');
1265
- }
1266
- }
1267
- const confidence = totalWeight > 0 ? matchedWeight / totalWeight : 0;
1268
- const threshold = expectation.minConfidence ?? 0.65;
1269
- return {
1270
- ok: confidence >= threshold,
1271
- confidence,
1272
- reasons,
1273
- };
1274
- }
1275
- // Tested only against step.description (planner-controlled text).
1276
- const VIDEO_DANGEROUS_ACTION_RE = /\b(delete|remove|destroy|deactivate|disable|disconnect|unlink|revoke|archive|trash|wipe|terminate|logout|log out|sign out|buy|purchase|checkout|pay|order|book|subscribe|upgrade|upload|import|publish|post|share|invite|merge|commit|save|confirm|approve)\b/i;
1277
- // Tested against content fields (selector, text, url, optionLabel) — only truly destructive words
1278
- // to avoid false positives from product names like "good buy" or URLs containing "book".
1279
- const VIDEO_DANGEROUS_CONTENT_RE = /\b(delete|remove|destroy|wipe|terminate|trash|deactivate|disable|disconnect|revoke|archive|logout|log out|sign out)\b/i;
1280
- const VIDEO_DRAG_HINT_RE = /\b(slider|compare|comparison|before|after|timeline|scrubber|handle|split|reveal)\b/i;
1281
- const MAX_VIDEO_TEXT_LENGTH = 256;
1282
- function resolveCredentialTemplates(value, credentials) {
1283
- if (!value)
1284
- return value;
1285
- const replacements = {
1286
- loginUrl: credentials?.loginUrl,
1287
- email: credentials?.email,
1288
- password: credentials?.password,
1289
- };
1290
- return value.replace(/\{\{credential\.(loginUrl|email|password)\}\}/g, (match, key) => {
1291
- const replacement = replacements[key];
1292
- return typeof replacement === 'string' ? replacement : match;
1293
- });
1294
- }
1295
- function ensureNoCredentialTemplate(field, value) {
1296
- if (value && /\{\{credential\.(loginUrl|email|password)\}\}/.test(value)) {
1297
- throw new Error(`Missing credential value for ${field}`);
1298
- }
1299
- }
1300
- function resolveStepCredentials(step, credentials) {
1301
- const resolved = {
1302
- ...step,
1303
- url: resolveCredentialTemplates(step.url, credentials),
1304
- urlPattern: resolveCredentialTemplates(step.urlPattern, credentials),
1305
- text: resolveCredentialTemplates(step.text, credentials),
1306
- };
1307
- ensureNoCredentialTemplate('step.url', resolved.url);
1308
- ensureNoCredentialTemplate('step.urlPattern', resolved.urlPattern);
1309
- ensureNoCredentialTemplate('step.text', resolved.text);
1310
- return resolved;
1311
- }
1312
- function shouldCaptureVideoStepReaction(step) {
1313
- return step.type === 'navigate'
1314
- || step.type === 'click'
1315
- || step.type === 'select_option'
1316
- || step.type === 'scroll'
1317
- || step.type === 'hover'
1318
- || step.type === 'drag'
1319
- || step.type === 'key';
1320
- }
1321
- function shouldRequireVideoStepReaction(step) {
1322
- // scroll and drag are excluded: a scroll to an already-visible element or
1323
- // a drag on a static handle may produce no observable change — that's not an error.
1324
- return step.type === 'navigate'
1325
- || step.type === 'click'
1326
- || step.type === 'select_option';
1327
- }
1328
- function getVideoStepReactionOptions(step) {
1329
- switch (step.type) {
1330
- case 'navigate':
1331
- return { timeoutMs: 2000, settleMs: 300 };
1332
- case 'click':
1333
- case 'select_option':
1334
- case 'key':
1335
- return { timeoutMs: 1500, settleMs: 250 };
1336
- case 'scroll':
1337
- case 'drag':
1338
- return { timeoutMs: 1200, settleMs: 200 };
1339
- case 'hover':
1340
- return { timeoutMs: 800, settleMs: 200 };
1341
- default:
1342
- return { timeoutMs: 1200, settleMs: 200 };
1343
- }
1344
- }
1345
- export async function evaluateVideoStepSafety(step, config, page, currentUrl, interactiveElements) {
1346
- let activeStep;
1347
- try {
1348
- activeStep = specializeStepSelectorForIntent(resolveStepCredentials(step, config.credentials));
1349
- }
1350
- catch (err) {
1351
- return {
1352
- allowed: false,
1353
- reason: err.message,
1354
- };
1355
- }
1356
- const actionIntent = (activeStep.description ?? '').toLowerCase();
1357
- const contentIntent = [
1358
- activeStep.selector,
1359
- activeStep.toSelector,
1360
- activeStep.text,
1361
- activeStep.url,
1362
- activeStep.urlPattern,
1363
- activeStep.optionLabel,
1364
- activeStep.optionValue,
1365
- ]
1366
- .filter(Boolean)
1367
- .join(' ')
1368
- .toLowerCase();
1369
- const descriptionRiskApplies = !(activeStep.type === 'scroll'
1370
- || activeStep.type === 'highlight'
1371
- || activeStep.type === 'hover'
1372
- || activeStep.type === 'wait'
1373
- || activeStep.type === 'assert_url'
1374
- || activeStep.type === 'assert_text'
1375
- || activeStep.type === 'assert_element'
1376
- || activeStep.type === 'assert_page');
1377
- if (descriptionRiskApplies && VIDEO_DANGEROUS_ACTION_RE.test(actionIntent)) {
1378
- return {
1379
- allowed: false,
1380
- reason: `Blocked by video safety policy: step description appears mutating or high-risk (${activeStep.description}).`,
1381
- };
1382
- }
1383
- if (VIDEO_DANGEROUS_CONTENT_RE.test(contentIntent)) {
1384
- return {
1385
- allowed: false,
1386
- reason: `Blocked by video safety policy: step content appears destructive (${activeStep.description}).`,
1387
- };
1388
- }
1389
- const actionContext = {
1390
- rootUrl: config.url,
1391
- currentUrl,
1392
- credentials: config.credentials,
1393
- interactiveElements,
1394
- currentLang: config.lang,
1395
- currentTheme: config.theme,
1396
- };
1397
- if (activeStep.type === 'navigate') {
1398
- const decision = evaluateActionSecurity('navigate_to', { url: activeStep.url }, actionContext);
1399
- return decision.allowed
1400
- ? { allowed: true }
1401
- : { allowed: false, reason: decision.reason ?? 'Blocked by navigation policy.' };
1402
- }
1403
- if (activeStep.type === 'click' || activeStep.type === 'hover') {
1404
- const target = await inspectVideoTarget(page, activeStep.selector, activeStep.coordinates);
1405
- // If the target can't be resolved yet (e.g. inside a popover that hasn't rendered),
1406
- // allow the step — executePlanStep has retries and fallbacks to find it later.
1407
- // Only block if the target IS resolved and the security check flags it as dangerous.
1408
- if (!target)
1409
- return { allowed: true };
1410
- const decision = evaluateResolvedActionSecurity(activeStep.type === 'hover' ? 'hover' : 'click', {
1411
- selector: activeStep.selector,
1412
- x: activeStep.coordinates?.x,
1413
- y: activeStep.coordinates?.y,
1414
- }, actionContext, target);
1415
- return decision.allowed
1416
- ? { allowed: true }
1417
- : { allowed: false, reason: decision.reason ?? 'Blocked by video interaction policy.' };
1418
- }
1419
- if (activeStep.type === 'key') {
1420
- const decision = evaluateActionSecurity('press_key', { key: activeStep.key }, actionContext);
1421
- return decision.allowed
1422
- ? { allowed: true }
1423
- : { allowed: false, reason: decision.reason ?? `Blocked by video safety policy: keyboard key "${activeStep.key ?? ''}" is not allowed.` };
1424
- }
1425
- if (activeStep.type === 'type') {
1426
- const text = activeStep.text ?? '';
1427
- if (text.length > MAX_VIDEO_TEXT_LENGTH) {
1428
- return {
1429
- allowed: false,
1430
- reason: `Blocked by video safety policy: typed text exceeds ${MAX_VIDEO_TEXT_LENGTH} characters.`,
1431
- };
1432
- }
1433
- if (/[\r\n]/.test(text)) {
1434
- return {
1435
- allowed: false,
1436
- reason: 'Blocked by video safety policy: multi-line text entry is not allowed.',
1437
- };
1438
- }
1439
- const target = await inspectVideoTarget(page, activeStep.selector, activeStep.coordinates);
1440
- const decision = evaluateResolvedActionSecurity('type_text', {
1441
- selector: activeStep.selector,
1442
- x: activeStep.coordinates?.x,
1443
- y: activeStep.coordinates?.y,
1444
- text,
1445
- }, actionContext, target);
1446
- return decision.allowed
1447
- ? { allowed: true }
1448
- : { allowed: false, reason: decision.reason ?? 'Blocked by video typing policy.' };
1449
- }
1450
- if (activeStep.type === 'select_option') {
1451
- const target = await inspectVideoTarget(page, activeStep.selector, activeStep.coordinates);
1452
- const decision = evaluateResolvedActionSecurity('select_option', {
1453
- selector: activeStep.selector,
1454
- optionLabel: activeStep.optionLabel,
1455
- optionValue: activeStep.optionValue,
1456
- optionIndex: activeStep.optionIndex,
1457
- }, actionContext, target);
1458
- return decision.allowed
1459
- ? { allowed: true }
1460
- : { allowed: false, reason: decision.reason ?? 'Blocked by video select policy.' };
1461
- }
1462
- if (activeStep.type === 'drag') {
1463
- const dragIntent = [actionIntent, contentIntent].join(' ');
1464
- if (!VIDEO_DRAG_HINT_RE.test(dragIntent)) {
1465
- return {
1466
- allowed: false,
1467
- reason: 'Blocked by video safety policy: drag is reserved for visual sliders or comparison reveals.',
1468
- };
1469
- }
1470
- if ((!activeStep.selector && !activeStep.coordinates) || (!activeStep.toSelector && !activeStep.toCoordinates)) {
1471
- return {
1472
- allowed: false,
1473
- reason: 'Blocked by video safety policy: drag requires a valid start and end target.',
1474
- };
1475
- }
1476
- }
1477
- return { allowed: true };
1478
- }
1479
- function matchStringWithMode(actual, expected, mode = 'contains') {
1480
- const normalize = (value) => value
1481
- .normalize('NFKC')
1482
- .replace(/[\u200b-\u200d\ufeff]/g, '')
1483
- .replace(/[\u00a0\u202f]/g, ' ')
1484
- .replace(/[’‘]/g, "'")
1485
- .replace(/\s+/g, ' ')
1486
- .trim()
1487
- .toLowerCase();
1488
- if (mode === 'equals')
1489
- return normalize(actual) === normalize(expected);
1490
- if (mode === 'contains')
1491
- return normalize(actual).includes(normalize(expected));
1492
- try {
1493
- return new RegExp(expected).test(actual);
1494
- }
1495
- catch {
1496
- return false;
1497
- }
1498
- }
1499
- function extractDescriptionEntityPhrases(description) {
1500
- if (!description)
1501
- return [];
1502
- const matches = [
1503
- ...Array.from(description.matchAll(/\b(?:open|click|highlight|hover|verify|scroll(?:\s+\w+){0,2}\s+to|go to|reveal)\s+(?:the\s+)?([a-z0-9][a-z0-9+.\-/'’]*(?:\s+[a-z0-9][a-z0-9+.\-/'’]*){0,4})\s+(?:page|section|link|button|tab|card)\b/gi)).map((match) => match[1]?.trim() ?? ''),
1504
- ...Array.from(description.matchAll(/\b(?:ouvrir|cliquer(?:\s+sur)?|survoler|v[eé]rifier|faire\s+d[eé]filer(?:\s+jusqu['’](?:a|à)|\s+jusqu(?:e)?\s+(?:a|à))?|aller(?:\s+vers)?|r[eé]v[eé]ler)\s+(?:la\s+|le\s+|les\s+)?(?:section|page|onglet|carte|lien|bouton)\s+([a-z0-9][a-z0-9+.\-/'’]*(?:\s+[a-z0-9][a-z0-9+.\-/'’]*){0,5})\b/gi)).map((match) => match[1]?.trim() ?? ''),
1505
- ];
1506
- return dedupeSelectors(matches)
1507
- .filter((value) => value.length >= 3);
1508
- }
1509
- function extractQuotedPhrases(text) {
1510
- if (!text)
1511
- return [];
1512
- return Array.from(text.matchAll(/(?:^|[\s([{"“”«»,:;])(["'])([^"'“”«»]{3,}?)\1(?=$|[\s)\]}“”«».,!?;:])/g))
1513
- .map((match) => match[2]?.trim() ?? '')
1514
- .filter((value) => value.length >= 3);
1515
- }
1516
- export function extractNamedScrollTarget(description) {
1517
- if (!description)
1518
- return null;
1519
- const quoted = extractQuotedPhrases(description)[0];
1520
- if (quoted)
1521
- return quoted;
1522
- const patterns = [
1523
- /\b(?:scroll(?:\s+\w+){0,2}\s+to|go to)\s+(?:the\s+)?(?:section\s+)?([a-z0-9][a-z0-9\s+.\-/'’]{2,80}?)(?:\s+section)?(?:[.!?,:;]|$)/i,
1524
- /\b(?:faire\s+d[eé]filer|aller)\s+(?:jusqu['’](?:a|à)|jusqu(?:e)?\s+(?:a|à)|vers)\s+(?:la\s+|le\s+)?(?:section\s+)?([a-z0-9][a-z0-9\s+.\-/'’]{2,80})(?:[.!?,:;]|$)/i,
1525
- /\b(?:section|rubrique)\s+([a-z0-9][a-z0-9\s+.\-/'’]{2,80})(?:[.!?,:;]|$)/i,
1526
- ];
1527
- for (const pattern of patterns) {
1528
- const match = description.match(pattern);
1529
- const raw = match?.[1]?.trim();
1530
- if (!raw)
1531
- continue;
1532
- const cleaned = raw
1533
- .replace(/^(?:see|view|reach)\s+(?:the\s+)?/i, '')
1534
- .replace(/^(?:la|le|les|the)\s+/i, '')
1535
- .replace(/\s+(?:section|rubrique)$/i, '')
1536
- .trim();
1537
- if (cleaned.length >= 3)
1538
- return cleaned;
1539
- }
1540
- return null;
1541
- }
1542
- function resolveNamedScrollTarget(step) {
1543
- const direct = extractNamedScrollTarget(step.description);
1544
- if (direct)
1545
- return direct;
1546
- const intentFallback = extractDescriptionEntityPhrases(step.description)[0];
1547
- return intentFallback?.trim() || null;
1548
- }
1549
- async function queryNamedTargetDomState(page, target, scrollIntoView, mode) {
1550
- const trimmedTarget = target.trim();
1551
- if (!trimmedTarget)
1552
- return { found: false, visible: false };
1553
- return page.evaluate(({ namedTarget, smooth, shouldScroll }) => {
1554
- const normalize = (value) => value
1555
- .normalize('NFD')
1556
- .replace(/[\u0300-\u036f]/g, '')
1557
- .replace(/[\u00a0\u202f]/g, ' ')
1558
- .replace(/[’‘]/g, "'")
1559
- .replace(/\s+/g, ' ')
1560
- .trim()
1561
- .toLowerCase();
1562
- const needle = normalize(namedTarget);
1563
- if (!needle)
1564
- return { found: false, visible: false };
1565
- const needleTokens = needle.split(' ').filter((token) => token.length > 2);
1566
- const needlePhrases = needleTokens.length >= 2
1567
- ? Array.from({ length: needleTokens.length - 1 }, (_, index) => `${needleTokens[index]} ${needleTokens[index + 1]}`)
1568
- : [];
1569
- const viewportHeight = window.innerHeight;
1570
- const viewportWidth = window.innerWidth;
1571
- const viewportCenterX = viewportWidth / 2;
1572
- const detectOcclusionInsets = () => {
1573
- let top = 0;
1574
- let bottom = 0;
1575
- for (const node of Array.from(document.body?.querySelectorAll('*') ?? [])) {
1576
- if (!(node instanceof HTMLElement))
1577
- continue;
1578
- const style = window.getComputedStyle(node);
1579
- if (style.display === 'none' || style.visibility === 'hidden' || parseFloat(style.opacity || '1') === 0) {
1580
- continue;
1581
- }
1582
- if (style.position !== 'fixed' && style.position !== 'sticky')
1583
- continue;
1584
- if (node.getAttribute('aria-hidden') === 'true')
1585
- continue;
1586
- const rect = node.getBoundingClientRect();
1587
- if (rect.width <= 0 || rect.height <= 0)
1588
- continue;
1589
- if (rect.width < viewportWidth * 0.35)
1590
- continue;
1591
- if (rect.left > viewportCenterX || rect.right < viewportCenterX)
1592
- continue;
1593
- if (rect.height > viewportHeight * 0.35)
1594
- continue;
1595
- if (rect.top <= 24 && rect.bottom > 0) {
1596
- top = Math.max(top, rect.bottom);
1597
- }
1598
- if (rect.bottom >= viewportHeight - 24 && rect.top < viewportHeight) {
1599
- bottom = Math.max(bottom, viewportHeight - rect.top);
1600
- }
1601
- }
1602
- return { top, bottom };
1603
- };
1604
- const isRenderable = (element) => {
1605
- if (element.getAttribute('aria-hidden') === 'true')
1606
- return false;
1607
- const style = window.getComputedStyle(element);
1608
- if (style.display === 'none' ||
1609
- style.visibility === 'hidden' ||
1610
- style.contentVisibility === 'hidden' ||
1611
- parseFloat(style.opacity || '1') === 0) {
1612
- return false;
1613
- }
1614
- const rect = element.getBoundingClientRect();
1615
- return rect.width > 0 && rect.height > 0;
1616
- };
1617
- const root = document.querySelector('main') ?? document.body ?? document.documentElement;
1618
- const candidates = [];
1619
- const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT);
1620
- let current = walker.currentNode;
1621
- while (current) {
1622
- if (current instanceof HTMLElement &&
1623
- isRenderable(current) &&
1624
- current.tagName !== 'SCRIPT' &&
1625
- current.tagName !== 'STYLE' &&
1626
- current.tagName !== 'NOSCRIPT' &&
1627
- current.tagName !== 'TEMPLATE') {
1628
- candidates.push(current);
1629
- }
1630
- current = walker.nextNode();
1631
- }
1632
- const rootText = normalize((root.textContent ?? '').slice(0, 200000));
1633
- const rootContainsNeedle = rootText.includes(needle);
1634
- let bestMatch = null;
1635
- for (const candidate of candidates) {
1636
- const text = normalize(candidate.textContent ?? '');
1637
- if (!text)
1638
- continue;
1639
- const exactTextMatch = text.includes(needle);
1640
- const tokenHits = needleTokens.filter((token) => text.includes(token)).length;
1641
- const phraseHits = needlePhrases.filter((phrase) => text.includes(phrase)).length;
1642
- const partialMatchThreshold = Math.max(2, Math.ceil(needleTokens.length * 0.55));
1643
- if (!exactTextMatch && phraseHits === 0 && tokenHits < partialMatchThreshold)
1644
- continue;
1645
- const rect = candidate.getBoundingClientRect();
1646
- const isHeading = /^H[1-6]$/.test(candidate.tagName) || candidate.getAttribute('role') === 'heading';
1647
- const isSectionLike = /^(SECTION|ARTICLE|ASIDE)$/i.test(candidate.tagName)
1648
- || candidate.getAttribute('role') === 'region';
1649
- const sectionAncestor = candidate.closest('section,article,[role="region"]');
1650
- const tagName = candidate.tagName.toUpperCase();
1651
- const isGenericContainer = tagName === 'MAIN' || tagName === 'BODY' || tagName === 'HTML';
1652
- const isMassiveContainer = rect.height > viewportHeight * 1.6
1653
- || text.length > Math.max(needle.length * 24, 2400);
1654
- if (isGenericContainer)
1655
- continue;
1656
- if (isMassiveContainer && !isHeading && !isSectionLike)
1657
- continue;
1658
- const exactMatch = exactTextMatch && (text === needle || text.startsWith(`${needle} `));
1659
- const textLengthPenalty = Math.min(18, text.length / Math.max(needle.length, 1));
1660
- const proximityPenalty = Math.abs(rect.top) / 2000;
1661
- const score = (exactMatch ? 16 : 0) +
1662
- (exactTextMatch ? 10 : 0) +
1663
- (isHeading ? 10 : 0) +
1664
- (isSectionLike ? 6 : 0) +
1665
- (sectionAncestor ? 5 : 0) +
1666
- phraseHits * 4 +
1667
- tokenHits +
1668
- (isMassiveContainer ? 4 : 0) -
1669
- textLengthPenalty -
1670
- proximityPenalty;
1671
- if (!bestMatch || score > bestMatch.score) {
1672
- bestMatch = { element: candidate, score };
1673
- }
1674
- }
1675
- if (!bestMatch) {
1676
- return { found: rootContainsNeedle, visible: false };
1677
- }
1678
- const { top: topInset, bottom: bottomInset } = detectOcclusionInsets();
1679
- const topSafeY = topInset + 16;
1680
- const bottomSafeY = viewportHeight - bottomInset - 16;
1681
- const isInViewport = (rect) => {
1682
- if (rect.width <= 0 ||
1683
- rect.height <= 0 ||
1684
- rect.bottom <= topSafeY ||
1685
- rect.right <= 0 ||
1686
- rect.top >= bottomSafeY ||
1687
- rect.left >= window.innerWidth) {
1688
- return false;
1689
- }
1690
- const visibleHeight = Math.min(rect.bottom, bottomSafeY) - Math.max(rect.top, topSafeY);
1691
- const minVisibleHeight = Math.min(Math.max(28, rect.height * 0.45), Math.max(28, rect.height - 4));
1692
- return visibleHeight >= minVisibleHeight;
1693
- };
1694
- const anchor = bestMatch.element;
1695
- const beforeRect = anchor.getBoundingClientRect();
1696
- if (shouldScroll && !isInViewport(beforeRect)) {
1697
- const desiredTop = Math.max(0, topSafeY);
1698
- const targetTop = Math.max(0, Math.round(window.scrollY + beforeRect.top - desiredTop));
1699
- window.scrollTo({ top: targetTop, behavior: smooth ? 'smooth' : 'instant' });
1700
- }
1701
- const afterRect = anchor.getBoundingClientRect();
1702
- return { found: true, visible: isInViewport(afterRect) };
1703
- }, { namedTarget: trimmedTarget, smooth: mode === 'recording', shouldScroll: scrollIntoView });
1704
- }
1705
- async function scrollNamedTargetIntoViewFromDom(page, target, mode) {
1706
- const state = await queryNamedTargetDomState(page, target, true, mode);
1707
- return state.found;
1708
- }
1709
- /**
1710
- * Extract target text hints from a step description.
1711
- * E.g. "Click 'Nouveau preset'" → ["Nouveau preset"]
1712
- * E.g. "Click top 'New' / 'Nouveau' button" → ["New", "Nouveau"]
1713
- */
1714
- function extractDescriptionTextHints(description) {
1715
- const matches = Array.from(description.matchAll(/[''""]([^''"]+)[''""]/g));
1716
- return matches.map(m => m[1]).filter(Boolean);
1717
- }
1718
- /**
1719
- * Find an interactive element on the live page by matching its visible text
1720
- * against the step description — same approach as the screenshot agent.
1721
- *
1722
- * Uses `browser.getInteractiveElements()` to see the real DOM (including
1723
- * popovers, modals, dropdowns that appeared after previous interactions).
1724
- */
1725
- async function findElementByDescription(browser, description) {
1726
- if (!description)
1727
- return null;
1728
- const textHints = extractDescriptionTextHints(description);
1729
- if (textHints.length === 0)
1730
- return null;
1731
- const elements = await browser.getInteractiveElements();
1732
- const viewport = browser.currentPage.viewportSize();
1733
- // Score each visible element by text similarity to the description hints
1734
- let bestMatch = null;
1735
- for (const el of elements) {
1736
- if (!el.visible || !el.boundingBox)
1737
- continue;
1738
- const elText = (el.text || el.ariaLabel || '').toLowerCase().trim();
1739
- if (!elText)
1740
- continue;
1741
- let score = 0;
1742
- for (const hint of textHints) {
1743
- const hintLower = hint.toLowerCase();
1744
- if (elText === hintLower) {
1745
- score += 100; // Exact match
1746
- }
1747
- else if (elText.includes(hintLower)) {
1748
- score += 60; // Contains
1749
- }
1750
- else if (hintLower.includes(elText)) {
1751
- score += 40; // Hint contains element text
1752
- }
1753
- }
1754
- if (score === 0)
1755
- continue;
1756
- // Prefer visible, in-viewport elements
1757
- const bb = el.boundingBox;
1758
- const cx = bb.x + bb.width / 2;
1759
- const cy = bb.y + bb.height / 2;
1760
- if (viewport && (cx < 0 || cy < 0 || cx > viewport.width || cy > viewport.height)) {
1761
- score -= 20; // Off-screen penalty
1762
- }
1763
- if (!bestMatch || score > bestMatch.score) {
1764
- bestMatch = { x: cx, y: cy, score };
1765
- }
1766
- }
1767
- return bestMatch;
1768
- }
1769
- function extractSelectorHints(selector, description) {
1770
- const textHints = Array.from(selector.matchAll(/:has-text\((['"])(.*?)\1\)|text=(['"])(.*?)\3|\[aria-label\*?=(['"]?)([^'"\]]+)\5\]/gi))
1771
- .map((match) => match[2] || match[4] || match[6] || '')
1772
- .filter(Boolean);
1773
- const hrefHints = Array.from(selector.matchAll(/\[href\*?=(['"]?)([^'"\]]+)\1\]/gi))
1774
- .map((match) => match[2] || '')
1775
- .filter(Boolean);
1776
- if (description) {
1777
- const quoted = extractQuotedPhrases(description);
1778
- textHints.push(...quoted);
1779
- textHints.push(...extractDescriptionEntityPhrases(description));
1780
- }
1781
- return {
1782
- textHints: dedupeSelectors(textHints),
1783
- hrefHints: dedupeSelectors(hrefHints),
1784
- };
1785
- }
1786
- function escapeSelectorText(value) {
1787
- return value
1788
- .replace(/\\/g, '\\\\')
1789
- .replace(/"/g, '\\"');
1790
- }
1791
- function sanitizeIntentPhrase(value) {
1792
- return value
1793
- .trim()
1794
- .replace(/^[^a-z0-9]+/i, '')
1795
- .replace(/^(?:see|view|reach)\s+(?:the\s+)?/i, '')
1796
- .replace(/\b(?:de|du|des|d|le|la|les|the|a|an|to|for|pour|vers|sur|au|aux)\s+/i, '')
1797
- .replace(/^(?:l|d)['’]/i, '')
1798
- .replace(/\b(?:page|section|link|button|tab|card|onglet|carte|lien|bouton|rubrique)\b$/i, '')
1799
- .trim();
1800
- }
1801
- function slugifyIntentValue(value) {
1802
- return normalizeVariantToken(value)
1803
- .replace(/[^a-z0-9]+/g, '-')
1804
- .replace(/^-+|-+$/g, '');
1805
- }
1806
- function extractSpecificRouteSlugs(step) {
1807
- const rawPatterns = [
1808
- step.url,
1809
- step.urlPattern,
1810
- ...(step.pageExpectation?.urlPatterns ?? []),
1811
- ...(step.expectedPageAfter?.urlPatterns ?? []),
1812
- ]
1813
- .filter((value) => !!value);
1814
- const slugs = [];
1815
- for (const raw of rawPatterns) {
1816
- const parsed = parseUrlMaybe(raw, 'https://example.com');
1817
- const pathname = parsed?.pathname ?? raw;
1818
- for (const segment of normalizePathSegments(pathname)) {
1819
- if (segment.length < 3)
1820
- continue;
1821
- if (/^[a-z]{2}$/i.test(segment))
1822
- continue;
1823
- if (segment === 'iphone' || segment === 'mac' || segment === 'ipad' || segment === 'watch')
1824
- continue;
1825
- if (segment.includes('-') || /\d/.test(segment)) {
1826
- slugs.push(segment);
1827
- }
1828
- }
1829
- }
1830
- return dedupeSelectors(slugs);
1831
- }
1832
- function isSpecificIntentPhrase(value) {
1833
- const normalized = normalizeVariantToken(sanitizeIntentPhrase(value));
1834
- if (!normalized)
1835
- return false;
1836
- const tokens = normalized.split(/\s+/).filter(Boolean);
1837
- const meaningfulTokens = tokens.filter((token) => !/^(?:de|du|des|d|le|la|les|the|a|an|to|for|pour|vers|sur|au|aux|l)$/.test(token));
1838
- if (meaningfulTokens.length === 0)
1839
- return false;
1840
- return /\d/.test(normalized) || normalized.includes('-') || tokens.length >= 2;
1841
- }
1842
- function collectStepIntentTargets(step) {
1843
- const selectorHints = step.selector ? extractSelectorHints(step.selector, step.description) : { textHints: [], hrefHints: [] };
1844
- const quoted = step.description
1845
- ? extractQuotedPhrases(step.description)
1846
- : [];
1847
- const expectationPhrases = [
1848
- ...(step.pageExpectation?.titlePatterns ?? []),
1849
- ...(step.pageExpectation?.textPatterns ?? []),
1850
- ...(step.expectedPageAfter?.titlePatterns ?? []),
1851
- ...(step.expectedPageAfter?.textPatterns ?? []),
1852
- ];
1853
- const descriptionEntities = extractDescriptionEntityPhrases(step.description);
1854
- const phrases = dedupeSelectors([
1855
- ...quoted,
1856
- ...descriptionEntities,
1857
- ...expectationPhrases,
1858
- ...selectorHints.textHints,
1859
- ].map(sanitizeIntentPhrase).filter(Boolean)).filter(isSpecificIntentPhrase);
1860
- const slugs = dedupeSelectors([
1861
- ...extractSpecificRouteSlugs(step),
1862
- ...selectorHints.hrefHints.map(slugifyIntentValue),
1863
- ...phrases.map(slugifyIntentValue),
1864
- ]).filter((slug) => slug.length >= 3 && (slug.includes('-') || /\d/.test(slug)));
1865
- return {
1866
- phrases,
1867
- slugs,
1868
- strict: phrases.length > 0 || slugs.length > 0,
1869
- };
1870
- }
1871
- function selectorPreservesStepIntent(selector, step) {
1872
- const targets = collectStepIntentTargets(step);
1873
- if (!targets.strict)
1874
- return true;
1875
- const normalizedSelector = normalizeVariantToken(selector);
1876
- return targets.phrases.some((phrase) => normalizedSelector.includes(normalizeVariantToken(phrase)))
1877
- || targets.slugs.some((slug) => normalizedSelector.includes(slug));
1878
- }
1879
- function buildIntentSelectorCandidates(step) {
1880
- const targets = collectStepIntentTargets(step);
1881
- const selectors = [];
1882
- if (step.type === 'scroll') {
1883
- const namedTarget = resolveNamedScrollTarget(step);
1884
- const phrases = namedTarget ? dedupeSelectors([namedTarget, ...targets.phrases]) : targets.phrases;
1885
- for (const phrase of phrases) {
1886
- const escaped = escapeSelectorText(phrase);
1887
- selectors.push(`section:has-text("${escaped}")`);
1888
- selectors.push(`main section:has-text("${escaped}")`);
1889
- selectors.push(`[role="region"]:has-text("${escaped}")`);
1890
- selectors.push(`h1:has-text("${escaped}")`);
1891
- selectors.push(`h2:has-text("${escaped}")`);
1892
- selectors.push(`h3:has-text("${escaped}")`);
1893
- selectors.push(`p:has-text("${escaped}")`);
1894
- selectors.push(`text="${escaped}"`);
1895
- }
1896
- for (const slug of targets.slugs) {
1897
- const escaped = escapeSelectorText(slug);
1898
- selectors.push(`section[id*="${escaped}"]`);
1899
- selectors.push(`[id*="${escaped}"]`);
1900
- selectors.push(`[data-testid*="${escaped}"]`);
1901
- selectors.push(`[href*="${escaped}"]`);
1902
- }
1903
- return dedupeSelectors(selectors);
1904
- }
1905
- for (const slug of targets.slugs) {
1906
- const escaped = escapeSelectorText(slug);
1907
- selectors.push(`a[href*="${escaped}"]`);
1908
- selectors.push(`[href*="${escaped}"]`);
1909
- selectors.push(`[data-testid*="${escaped}"]`);
1910
- selectors.push(`[aria-label*="${escaped}"]`);
1911
- }
1912
- for (const phrase of targets.phrases) {
1913
- const escaped = escapeSelectorText(phrase);
1914
- selectors.push(`a:has-text("${escaped}")`);
1915
- selectors.push(`button:has-text("${escaped}")`);
1916
- // Modern component libraries (cmdk, Radix, Headless UI, etc.) render interactive
1917
- // items as <div> elements with role attributes or data-cmdk-item. These are not
1918
- // matched by a: or button: selectors. Add role-based and generic text selectors.
1919
- selectors.push(`[role="menuitem"]:has-text("${escaped}")`);
1920
- selectors.push(`[role="option"]:has-text("${escaped}")`);
1921
- selectors.push(`[role="listitem"]:has-text("${escaped}")`);
1922
- selectors.push(`[data-cmdk-item]:has-text("${escaped}")`);
1923
- selectors.push(`[aria-label*="${escaped}"]`);
1924
- selectors.push(`[title*="${escaped}"]`);
1925
- // Generic text selector as last resort — Playwright's text= matches any element
1926
- selectors.push(`text="${escaped}"`);
1927
- }
1928
- return dedupeSelectors(selectors);
1929
- }
1930
- function specializeStepSelectorForIntent(step) {
1931
- if (step.type !== 'click'
1932
- && step.type !== 'hover'
1933
- && step.type !== 'highlight'
1934
- && step.type !== 'assert_element'
1935
- && step.type !== 'scroll') {
1936
- return step;
1937
- }
1938
- if (!step.selector) {
1939
- const intentSelectors = buildIntentSelectorCandidates(step);
1940
- if (intentSelectors.length === 0)
1941
- return step;
1942
- return { ...step, selector: intentSelectors.join(', ') };
1943
- }
1944
- const originalSelectors = splitSelectorCandidates(step.selector);
1945
- const intentSelectors = buildIntentSelectorCandidates(step);
1946
- if (intentSelectors.length === 0)
1947
- return step;
1948
- const filteredOriginalSelectors = originalSelectors.filter((selector) => selectorPreservesStepIntent(selector, step));
1949
- const combined = dedupeSelectors([...intentSelectors, ...filteredOriginalSelectors]);
1950
- if (combined.length === 0)
1951
- return step;
1952
- return { ...step, selector: combined.join(', ') };
1953
- }
1954
- async function getElementCenter(browser, selector, description, mode = 'dry_run') {
1955
- if (containsInternalAutomationSelector(selector))
1956
- return null;
1957
- const page = browser.currentPage;
1958
- const selectorCandidates = splitSelectorCandidates(selector);
1959
- const selectors = selectorCandidates.length > 0 ? selectorCandidates : [selector];
1960
- const { textHints, hrefHints } = extractSelectorHints(selector, description);
1961
- const viewport = page.viewportSize();
1962
- const candidateLimit = mode === 'recording' ? 4 : 8;
1963
- let bestCandidate = null;
1964
- for (const candidateSelector of selectors) {
1965
- let locator;
1966
- try {
1967
- locator = page.locator(candidateSelector);
1968
- // Use a short timeout for counting to avoid hanging on complex selectors
1969
- // that trigger Playwright's auto-waiting in some edge cases.
1970
- const countPromise = locator.count();
1971
- const rawCount = await Promise.race([
1972
- countPromise,
1973
- new Promise((resolve) => setTimeout(() => resolve(0), 3000)),
1974
- ]);
1975
- const count = Math.min(rawCount, candidateLimit);
1976
- for (let index = 0; index < count; index += 1) {
1977
- const candidate = locator.nth(index);
1978
- const meta = await candidate.evaluate((node) => {
1979
- if (!(node instanceof HTMLElement))
1980
- return null;
1981
- const rect = node.getBoundingClientRect();
1982
- const style = window.getComputedStyle(node);
1983
- const visible = style.display !== 'none'
1984
- && style.visibility !== 'hidden'
1985
- && parseFloat(style.opacity || '1') > 0
1986
- && rect.width > 1
1987
- && rect.height > 1;
1988
- const text = (node.innerText || node.textContent || node.getAttribute('aria-label') || '').replace(/\s+/g, ' ').trim();
1989
- const href = node instanceof HTMLAnchorElement ? node.href : node.getAttribute('href');
1990
- return {
1991
- rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
1992
- visible,
1993
- text,
1994
- href,
1995
- inNav: !!node.closest('nav, header, [role="navigation"]'),
1996
- tag: node.tagName.toLowerCase(),
1997
- };
1998
- }).catch(() => null);
1999
- if (!meta || !meta.visible)
2000
- continue;
2001
- const centerX = meta.rect.x + meta.rect.width / 2;
2002
- const centerY = meta.rect.y + meta.rect.height / 2;
2003
- let score = 0;
2004
- if (viewport) {
2005
- const fullyVisible = meta.rect.x >= 0
2006
- && meta.rect.y >= 0
2007
- && meta.rect.x + meta.rect.width <= viewport.width
2008
- && meta.rect.y + meta.rect.height <= viewport.height;
2009
- score += fullyVisible ? 16 : 6;
2010
- }
2011
- if (meta.inNav)
2012
- score += 8;
2013
- if (meta.tag === 'a')
2014
- score += 4;
2015
- if (meta.rect.width > 600 || meta.rect.height > 180)
2016
- score -= 4;
2017
- const candidateText = `${meta.text || ''}`.toLowerCase();
2018
- const candidateHref = `${meta.href || ''}`.toLowerCase();
2019
- for (const hint of textHints) {
2020
- const normalizedHint = hint.toLowerCase();
2021
- if (!normalizedHint)
2022
- continue;
2023
- if (candidateText === normalizedHint)
2024
- score += 18;
2025
- else if (candidateText.includes(normalizedHint))
2026
- score += 12;
2027
- }
2028
- for (const hint of hrefHints) {
2029
- const normalizedHint = hint.toLowerCase();
2030
- if (!normalizedHint)
2031
- continue;
2032
- if (candidateHref.includes(normalizedHint))
2033
- score += 14;
2034
- }
2035
- if (!bestCandidate || score > bestCandidate.score) {
2036
- bestCandidate = { x: centerX, y: centerY, score };
2037
- }
2038
- }
2039
- }
2040
- catch {
2041
- // Try next selector candidate.
2042
- }
2043
- }
2044
- if (bestCandidate) {
2045
- return { x: bestCandidate.x, y: bestCandidate.y };
2046
- }
2047
- try {
2048
- const locator = page.locator(selector).first();
2049
- await locator.waitFor({ state: 'visible', timeout: pickModeTimeout(mode, 900, 3000) });
2050
- const box = await locator.boundingBox({ timeout: pickModeTimeout(mode, 700, 1500) });
2051
- if (!box)
2052
- return null;
2053
- return {
2054
- x: box.x + box.width / 2,
2055
- y: box.y + box.height / 2,
2056
- };
2057
- }
2058
- catch {
2059
- return null;
2060
- }
2061
- }
2062
- export async function inspectVideoTarget(page, selector, coordinates) {
2063
- const inspectElementHandle = async (element, selectorValue = '') => {
2064
- return element.evaluate((node, resolvedSelector) => {
2065
- if (!(node instanceof HTMLElement))
2066
- return null;
2067
- const rect = node.getBoundingClientRect();
2068
- const vw = window.innerWidth;
2069
- const vh = window.innerHeight;
2070
- const visibleWidth = Math.max(0, Math.min(rect.right, vw) - Math.max(rect.left, 0));
2071
- const visibleHeight = Math.max(0, Math.min(rect.bottom, vh) - Math.max(rect.top, 0));
2072
- const inViewport = visibleWidth > 0 && visibleHeight > 0;
2073
- const fullyVisible = rect.top >= 0 && rect.left >= 0 && rect.bottom <= vh && rect.right <= vw;
2074
- const getOwnText = (el) => {
2075
- let text = '';
2076
- for (const child of el.childNodes) {
2077
- if (child.nodeType === Node.TEXT_NODE) {
2078
- text += `${(child.textContent || '').trim()} `;
2079
- }
2080
- }
2081
- return text.trim();
2082
- };
2083
- const text = (() => {
2084
- if (node.tagName === 'INPUT' || node.tagName === 'TEXTAREA') {
2085
- const input = node;
2086
- return (input.placeholder || input.value || '').slice(0, 80);
2087
- }
2088
- const shallow = getOwnText(node);
2089
- if (shallow)
2090
- return shallow.slice(0, 80);
2091
- return ((node.textContent || '').trim() || node.getAttribute('title') || node.getAttribute('aria-label') || '').slice(0, 80);
2092
- })();
2093
- const role = node.getAttribute('role')
2094
- || (node.tagName === 'A' ? 'link' : '')
2095
- || (node.tagName === 'BUTTON' ? 'button' : '')
2096
- || (node.tagName === 'INPUT' ? `input[${node.type}]` : '')
2097
- || node.tagName.toLowerCase();
2098
- const inputType = (node.tagName === 'INPUT' || node.tagName === 'SELECT' || node.tagName === 'TEXTAREA')
2099
- ? node.type || node.tagName.toLowerCase()
2100
- : null;
2101
- return {
2102
- index: -1,
2103
- tag: node.tagName.toLowerCase(),
2104
- role,
2105
- text,
2106
- ariaLabel: node.getAttribute('aria-label'),
2107
- ariaControls: node.getAttribute('aria-controls'),
2108
- ariaExpanded: node.getAttribute('aria-expanded'),
2109
- ariaHasPopup: node.getAttribute('aria-haspopup'),
2110
- href: node.tagName === 'A' ? node.href : null,
2111
- inputType,
2112
- boundingBox: {
2113
- x: Math.round(rect.x),
2114
- y: Math.round(rect.y),
2115
- width: Math.round(rect.width),
2116
- height: Math.round(rect.height),
2117
- },
2118
- selector: resolvedSelector,
2119
- visible: inViewport,
2120
- visibilityState: !inViewport ? 'offscreen' : fullyVisible ? 'full' : 'partial',
2121
- };
2122
- }, selectorValue);
2123
- };
2124
- const inspectNode = async (mode, value) => {
2125
- return page.evaluate(({ mode, value }) => {
2126
- const node = mode === 'selector'
2127
- ? document.querySelector(value)
2128
- : document.elementFromPoint(value.x, value.y);
2129
- if (!(node instanceof HTMLElement))
2130
- return null;
2131
- const rect = node.getBoundingClientRect();
2132
- const vw = window.innerWidth;
2133
- const vh = window.innerHeight;
2134
- const visibleWidth = Math.max(0, Math.min(rect.right, vw) - Math.max(rect.left, 0));
2135
- const visibleHeight = Math.max(0, Math.min(rect.bottom, vh) - Math.max(rect.top, 0));
2136
- const inViewport = visibleWidth > 0 && visibleHeight > 0;
2137
- const fullyVisible = rect.top >= 0 && rect.left >= 0 && rect.bottom <= vh && rect.right <= vw;
2138
- const getOwnText = (el) => {
2139
- let text = '';
2140
- for (const child of el.childNodes) {
2141
- if (child.nodeType === Node.TEXT_NODE) {
2142
- text += `${(child.textContent || '').trim()} `;
2143
- }
2144
- }
2145
- return text.trim();
2146
- };
2147
- const text = (() => {
2148
- if (node.tagName === 'INPUT' || node.tagName === 'TEXTAREA') {
2149
- const input = node;
2150
- return (input.placeholder || input.value || '').slice(0, 80);
2151
- }
2152
- const shallow = getOwnText(node);
2153
- if (shallow)
2154
- return shallow.slice(0, 80);
2155
- return ((node.textContent || '').trim() || node.getAttribute('title') || node.getAttribute('aria-label') || '').slice(0, 80);
2156
- })();
2157
- const role = node.getAttribute('role')
2158
- || (node.tagName === 'A' ? 'link' : '')
2159
- || (node.tagName === 'BUTTON' ? 'button' : '')
2160
- || (node.tagName === 'INPUT' ? `input[${node.type}]` : '')
2161
- || node.tagName.toLowerCase();
2162
- const inputType = (node.tagName === 'INPUT' || node.tagName === 'SELECT' || node.tagName === 'TEXTAREA')
2163
- ? node.type || node.tagName.toLowerCase()
2164
- : null;
2165
- return {
2166
- index: -1,
2167
- tag: node.tagName.toLowerCase(),
2168
- role,
2169
- text,
2170
- ariaLabel: node.getAttribute('aria-label'),
2171
- ariaControls: node.getAttribute('aria-controls'),
2172
- ariaExpanded: node.getAttribute('aria-expanded'),
2173
- ariaHasPopup: node.getAttribute('aria-haspopup'),
2174
- href: node.tagName === 'A' ? node.href : null,
2175
- inputType,
2176
- boundingBox: {
2177
- x: Math.round(rect.x),
2178
- y: Math.round(rect.y),
2179
- width: Math.round(rect.width),
2180
- height: Math.round(rect.height),
2181
- },
2182
- selector: mode === 'selector' ? value : '',
2183
- visible: inViewport,
2184
- visibilityState: !inViewport ? 'offscreen' : fullyVisible ? 'full' : 'partial',
2185
- };
2186
- }, { mode, value });
2187
- };
2188
- if (selector) {
2189
- const selectorCandidates = splitSelectorCandidates(selector);
2190
- const candidates = selectorCandidates.length > 0 ? selectorCandidates : [selector];
2191
- for (const candidate of candidates) {
2192
- try {
2193
- const locator = page.locator(candidate).first();
2194
- const count = await locator.count();
2195
- if (count === 0)
2196
- continue;
2197
- const handle = await locator.elementHandle();
2198
- if (handle) {
2199
- const target = await inspectElementHandle(handle, candidate).catch(() => null);
2200
- await handle.dispose().catch(() => { });
2201
- if (target)
2202
- return target;
2203
- }
2204
- }
2205
- catch {
2206
- // Fall through to the DOM query fallback below.
2207
- }
2208
- const target = await inspectNode('selector', candidate).catch(() => null);
2209
- if (target)
2210
- return target;
2211
- }
2212
- }
2213
- if (coordinates) {
2214
- const target = await inspectNode('coordinates', coordinates).catch(() => null);
2215
- if (target)
2216
- return target;
2217
- }
2218
- return null;
2219
- }
2220
- // ── Step execution ────────────────────────────────────────────────────
2221
- async function executeAssertStep(browser, step) {
2222
- if (!isFeatureEnabled('VIDEO_ASSERT_STEPS', true)) {
2223
- return;
2224
- }
2225
- const page = browser.currentPage;
2226
- const timeoutMs = step.timeoutMs ?? 6000;
2227
- const deadline = Date.now() + timeoutMs;
2228
- const waitForCondition = async (predicate, errorMessage) => {
2229
- while (Date.now() < deadline) {
2230
- try {
2231
- if (await predicate())
2232
- return;
2233
- }
2234
- catch {
2235
- // Keep retrying until timeout
2236
- }
2237
- await page.waitForTimeout(250);
2238
- }
2239
- throw new Error(errorMessage);
2240
- };
2241
- if (step.type === 'assert_url') {
2242
- const expected = step.urlPattern ?? step.url ?? '';
2243
- const mode = step.matchMode ?? 'contains';
2244
- await waitForCondition(async () => {
2245
- const currentUrl = page.url();
2246
- if (mode === 'equals') {
2247
- return normalizeUrlForComparison(currentUrl) === normalizeUrlForComparison(expected);
2248
- }
2249
- return urlPatternMatches(currentUrl, expected, mode);
2250
- }, `assert_url failed (mode=${mode}, expected=${expected}, actual=${page.url()})`);
2251
- return;
2252
- }
2253
- if (step.type === 'assert_text') {
2254
- const expectedText = step.text ?? '';
2255
- const mode = step.matchMode ?? 'contains';
2256
- await waitForCondition(async () => {
2257
- if (step.scopeSelector) {
2258
- const locator = page.locator(step.scopeSelector).first();
2259
- const text = (await locator.innerText({ timeout: 1500 })).trim();
2260
- return matchStringWithMode(text, expectedText, mode);
2261
- }
2262
- const bodyText = await page.evaluate(() => document.body?.innerText ?? '');
2263
- return matchStringWithMode(bodyText.trim(), expectedText, mode);
2264
- }, `assert_text failed (mode=${mode}, expected=${expectedText})`);
2265
- return;
2266
- }
2267
- if (step.type === 'assert_element') {
2268
- if (!step.selector)
2269
- throw new Error('assert_element step missing selector');
2270
- const state = step.state ?? 'visible';
2271
- const locator = page.locator(step.selector).first();
2272
- await locator.waitFor({ state, timeout: timeoutMs });
2273
- return;
2274
- }
2275
- if (step.type === 'assert_page') {
2276
- if (!step.pageExpectation)
2277
- throw new Error('assert_page step missing pageExpectation');
2278
- await waitForCondition(async () => {
2279
- const signals = await browser.capturePageSignals();
2280
- const verdict = await evaluatePageExpectation(page, signals, step.pageExpectation);
2281
- return verdict.ok;
2282
- }, (() => {
2283
- const expectation = JSON.stringify(step.pageExpectation);
2284
- return `assert_page failed (expectation=${expectation}, actualUrl=${page.url()})`;
2285
- })());
2286
- return;
2287
- }
2288
- throw new Error(`Unsupported assert step type: ${step.type}`);
2289
- }
2290
- /**
2291
- * Execute a single video step in the browser.
2292
- * In 'dry_run' mode: instant execution (no animation).
2293
- * In 'recording' mode: animated mouse + human-like typing.
2294
- */
2295
- async function executePlanStep(browser, step, mode, currentMousePos, credentials) {
2296
- const page = browser.currentPage;
2297
- const activeStep = resolveStepCredentials(step, credentials);
2298
- // Only capture observation in dry-run mode — reaction checks add latency
2299
- // (polling every 120ms) with no benefit during recording.
2300
- const beforeObservation = mode === 'dry_run' && shouldCaptureVideoStepReaction(activeStep)
2301
- ? await browser.captureObservation().catch(() => null)
2302
- : null;
2303
- try {
2304
- if (activeStep.waitMs && activeStep.type !== 'wait') {
2305
- await page.waitForTimeout(activeStep.waitMs);
2306
- }
2307
- if (containsInternalAutomationSelector(activeStep.selector) || containsInternalAutomationSelector(activeStep.toSelector)) {
2308
- throw new Error('Internal automation selector is not allowed in executable steps');
2309
- }
2310
- if (activeStep.type === 'assert_page' && activeStep.pageExpectation?.selectors?.some((selector) => containsInternalAutomationSelector(selector))) {
2311
- throw new Error('Internal automation selector is not allowed in assert_page expectations');
2312
- }
2313
- if (activeStep.expectedPageAfter?.selectors?.some((selector) => containsInternalAutomationSelector(selector))) {
2314
- throw new Error('Internal automation selector is not allowed in expectedPageAfter expectations');
2315
- }
2316
- switch (activeStep.type) {
2317
- case 'navigate': {
2318
- if (!activeStep.url)
2319
- throw new Error('navigate step missing url');
2320
- // In recording mode: skip the navigation if we're already on the target page.
2321
- // This prevents a visible "refresh" flicker caused by consecutive navigate steps
2322
- // to the same URL (e.g. when the fixer replaces a highlight with a navigate but
2323
- // the next step was originally also a navigate to the same destination).
2324
- if (mode === 'recording') {
2325
- const normalizeUrl = (u) => u.toLowerCase().replace(/^https?:\/\//, '').replace(/^www\./, '').replace(/\/+$/, '').replace(/[?#].*$/, '').trim();
2326
- if (normalizeUrl(page.url()) === normalizeUrl(activeStep.url)) {
2327
- await page.waitForTimeout(activeStep.postStepWaitMs ?? 500);
2328
- break;
2329
- }
2330
- }
2331
- {
2332
- const before = await browser.captureObservation().catch(() => undefined);
2333
- await browser.navigateTo(activeStep.url);
2334
- await browser.waitForPageReaction(before, {
2335
- timeoutMs: activeStep.postStepWaitMs ?? 2500,
2336
- settleMs: 350,
2337
- });
2338
- await dismissOverlaysWithLogging(browser, {
2339
- context: `execute step navigate (${activeStep.description})`,
2340
- });
2341
- break;
2342
- }
2343
- }
2344
- case 'dismiss_overlays': {
2345
- await browser.dismissOverlays();
2346
- await page.waitForTimeout(activeStep.postStepWaitMs ?? 600);
2347
- break;
2348
- }
2349
- case 'wait': {
2350
- // Cap explicit waits in recording mode — the postStepWaitMs on the
2351
- // preceding action already provides settling time. Long waits were
2352
- // designed for dry-run verification; in recording they create dead air.
2353
- const rawWaitMs = activeStep.waitMs ?? 1000;
2354
- const waitMs = mode === 'recording' ? Math.min(rawWaitMs, 700) : rawWaitMs;
2355
- await page.waitForTimeout(waitMs);
2356
- break;
2357
- }
2358
- case 'click': {
2359
- let target = null;
2360
- if (activeStep.selector) {
2361
- // Quick first attempt — if the element is present, find it immediately.
2362
- target = await getElementCenter(browser, activeStep.selector, activeStep.description, mode);
2363
- // One retry after a short wait — popovers may take a moment to render.
2364
- if (!target) {
2365
- await page.waitForTimeout(400);
2366
- target = await getElementCenter(browser, activeStep.selector, activeStep.description, mode);
2367
- }
2368
- }
2369
- // Fallback: use the same interactive element detection as the screenshot agent.
2370
- if (!target) {
2371
- target = await findElementByDescription(browser, activeStep.description);
2372
- }
2373
- if (!target && activeStep.coordinates) {
2374
- target = activeStep.coordinates;
2375
- }
2376
- if (!target)
2377
- throw new Error(`No clickable target found (selector: ${activeStep.selector})`);
2378
- const before = await browser.captureObservation().catch(() => undefined);
2379
- if (mode === 'recording') {
2380
- await animatedClick(page, target, currentMousePos);
2381
- }
2382
- else {
2383
- await page.mouse.click(target.x, target.y);
2384
- }
2385
- // Dynamic wait — detects when popovers/modals/transitions are truly settled
2386
- await browser.waitForPageReaction(before, {
2387
- timeoutMs: activeStep.postStepWaitMs ?? 1500,
2388
- settleMs: 250,
2389
- });
2390
- await dismissOverlaysWithLogging(browser, {
2391
- context: `execute step click (${activeStep.description})`,
2392
- });
2393
- currentMousePos = target;
2394
- break;
2395
- }
2396
- case 'type': {
2397
- // First click the field (with animation in recording mode)
2398
- let target = null;
2399
- if (activeStep.selector) {
2400
- target = await getElementCenter(browser, activeStep.selector, activeStep.description, mode);
2401
- if (!target) {
2402
- await page.waitForTimeout(mode === 'recording' ? 120 : 400);
2403
- target = await getElementCenter(browser, activeStep.selector, activeStep.description, mode);
2404
- }
2405
- }
2406
- if (!target && activeStep.coordinates) {
2407
- target = activeStep.coordinates;
2408
- }
2409
- if (target) {
2410
- if (mode === 'recording') {
2411
- await animatedClick(page, target, currentMousePos);
2412
- }
2413
- else {
2414
- await page.mouse.click(target.x, target.y);
2415
- }
2416
- currentMousePos = target;
2417
- }
2418
- else if (activeStep.selector) {
2419
- // Fallback: use Playwright fill with selector candidates
2420
- const selectorCandidates = activeStep.selector
2421
- .split(',')
2422
- .map((s) => s.trim())
2423
- .filter(Boolean);
2424
- const fillTargets = selectorCandidates.length > 0 ? selectorCandidates : [activeStep.selector];
2425
- let filled = false;
2426
- for (const candidate of fillTargets) {
2427
- try {
2428
- const locator = page.locator(candidate).first();
2429
- await locator.waitFor({ state: 'visible', timeout: pickModeTimeout(mode, 1200, 4500) });
2430
- await locator.fill(activeStep.text ?? '', { timeout: pickModeTimeout(mode, 1200, 2500) });
2431
- filled = true;
2432
- break;
2433
- }
2434
- catch {
2435
- // Try next selector candidate
2436
- }
2437
- }
2438
- if (filled) {
2439
- const waitMs = activeStep.postStepWaitMs ?? 600;
2440
- await page.waitForTimeout(waitMs);
2441
- break;
2442
- }
2443
- throw new Error(`Could not find element to type into (selector: ${activeStep.selector})`);
2444
- }
2445
- // Clear existing content and type
2446
- await page.keyboard.press('Control+A');
2447
- await page.waitForTimeout(100);
2448
- if (mode === 'recording' && activeStep.text) {
2449
- await humanType(page, activeStep.text);
2450
- }
2451
- else if (activeStep.text) {
2452
- await page.keyboard.type(activeStep.text);
2453
- }
2454
- const waitMs = activeStep.postStepWaitMs ?? 500;
2455
- await page.waitForTimeout(waitMs);
2456
- break;
2457
- }
2458
- case 'select_option': {
2459
- if (!activeStep.selector)
2460
- throw new Error('select_option step missing selector');
2461
- const option = typeof activeStep.optionLabel === 'string' ? { label: activeStep.optionLabel }
2462
- : typeof activeStep.optionValue === 'string' ? { value: activeStep.optionValue }
2463
- : typeof activeStep.optionIndex === 'number' ? { index: activeStep.optionIndex }
2464
- : null;
2465
- if (!option)
2466
- throw new Error('select_option step missing optionLabel/optionValue/optionIndex');
2467
- const locator = page.locator(activeStep.selector).first();
2468
- await locator.waitFor({ state: 'visible', timeout: pickModeTimeout(mode, 1200, 4500) });
2469
- try {
2470
- await locator.scrollIntoViewIfNeeded({ timeout: pickModeTimeout(mode, 800, 1200) });
2471
- }
2472
- catch {
2473
- // Non-blocking
2474
- }
2475
- if (mode === 'recording') {
2476
- const target = await getElementCenter(browser, activeStep.selector, activeStep.description, mode);
2477
- if (target) {
2478
- await animatedClick(page, target, currentMousePos);
2479
- currentMousePos = target;
2480
- }
2481
- }
2482
- await locator.selectOption(option, { timeout: pickModeTimeout(mode, 1500, 3000) });
2483
- await page.waitForTimeout(activeStep.postStepWaitMs ?? 500);
2484
- break;
2485
- }
2486
- case 'scroll': {
2487
- // During recording, move the cursor to the center of the viewport before scrolling.
2488
- // Without this, the cursor stays at its previous position (possibly off-screen)
2489
- // and the viewer doesn't see an active cursor during scroll-only clips.
2490
- if (mode === 'recording') {
2491
- const vpCenter = { x: Math.round(page.viewportSize().width / 2), y: Math.round(page.viewportSize().height / 2) };
2492
- await moveMouse(page, currentMousePos, vpCenter, { durationMs: 300 });
2493
- currentMousePos = vpCenter;
2494
- }
2495
- // If a selector is provided, scroll that element to the center of the viewport.
2496
- // This is far more precise than guessing pixel amounts.
2497
- let scrollHandled = false;
2498
- const namedTarget = resolveNamedScrollTarget(activeStep);
2499
- const namedSearchSelectors = namedTarget
2500
- ? dedupeSelectors(buildIntentSelectorCandidates({ ...activeStep, type: 'scroll' })).slice(0, mode === 'recording' ? 6 : 12)
2501
- : [];
2502
- const namedProbeVisibleTimeout = namedTarget
2503
- ? pickModeTimeout(mode, 120, 320)
2504
- : pickModeTimeout(mode, 250, 600);
2505
- const namedProbeWaitMs = namedTarget
2506
- ? pickModeTimeout(mode, 220, 320)
2507
- : activeStep.postStepWaitMs ?? (mode === 'recording' ? 800 : 400);
2508
- const isScrollTargetVisible = async (selector) => {
2509
- if (namedTarget) {
2510
- try {
2511
- const domState = await queryNamedTargetDomState(page, namedTarget, false, mode);
2512
- if (domState.visible) {
2513
- return true;
2514
- }
2515
- }
2516
- catch {
2517
- // Fall through to selector-based checks
2518
- }
2519
- }
2520
- const candidates = selector
2521
- ? splitSelectorCandidates(selector)
2522
- : activeStep.selector
2523
- ? splitSelectorCandidates(activeStep.selector)
2524
- : [];
2525
- const targetCandidates = namedTarget
2526
- ? dedupeSelectors([...candidates, ...namedSearchSelectors])
2527
- : candidates;
2528
- for (const candidate of targetCandidates) {
2529
- try {
2530
- const locator = page.locator(candidate).first();
2531
- if (await locator.isVisible({ timeout: namedProbeVisibleTimeout }) && await isLocatorMeaningfullyInViewport(locator)) {
2532
- return true;
2533
- }
2534
- }
2535
- catch {
2536
- // Try next candidate
2537
- }
2538
- }
2539
- return false;
2540
- };
2541
- if (activeStep.selector) {
2542
- const selectorCandidates = splitSelectorCandidates(activeStep.selector);
2543
- for (const candidate of selectorCandidates) {
2544
- try {
2545
- const locator = page.locator(candidate).first();
2546
- await locator.waitFor({ state: 'attached', timeout: namedTarget ? pickModeTimeout(mode, 350, 1200) : pickModeTimeout(mode, 1200, 3000) });
2547
- const behavior = mode === 'recording' ? 'smooth' : 'instant';
2548
- await locator.evaluate((el, b) => {
2549
- el.scrollIntoView({ block: 'center', behavior: b });
2550
- }, behavior);
2551
- await page.waitForTimeout(namedProbeWaitMs);
2552
- if (await isScrollTargetVisible(candidate)) {
2553
- scrollHandled = true;
2554
- break;
2555
- }
2556
- }
2557
- catch {
2558
- // Try next candidate
2559
- }
2560
- }
2561
- // Fallback: Playwright's built-in scrollIntoViewIfNeeded
2562
- if (!scrollHandled) {
2563
- for (const candidate of selectorCandidates) {
2564
- try {
2565
- const locator = page.locator(candidate).first();
2566
- await locator.scrollIntoViewIfNeeded({ timeout: namedTarget ? pickModeTimeout(mode, 350, 1200) : pickModeTimeout(mode, 900, 2000) });
2567
- await page.waitForTimeout(namedProbeWaitMs);
2568
- if (await isScrollTargetVisible(candidate)) {
2569
- scrollHandled = true;
2570
- break;
2571
- }
2572
- }
2573
- catch {
2574
- // Try next candidate
2575
- }
2576
- }
2577
- }
2578
- }
2579
- // Text-based fallback: extract quoted section name from description
2580
- if (!scrollHandled) {
2581
- const quotedTarget = extractQuotedPhrases(activeStep.description)[0];
2582
- if (quotedTarget) {
2583
- try {
2584
- const textLocator = page.locator(`text="${quotedTarget}"`).first();
2585
- await textLocator.scrollIntoViewIfNeeded({ timeout: namedTarget ? pickModeTimeout(mode, 350, 1200) : pickModeTimeout(mode, 900, 2000) });
2586
- await page.waitForTimeout(namedProbeWaitMs);
2587
- scrollHandled = await isScrollTargetVisible(`text="${quotedTarget}"`);
2588
- }
2589
- catch {
2590
- // Fall through to pixel scroll
2591
- }
2592
- }
2593
- }
2594
- if (!scrollHandled && namedTarget) {
2595
- try {
2596
- const domGroundedScroll = await scrollNamedTargetIntoViewFromDom(page, namedTarget, mode);
2597
- if (domGroundedScroll) {
2598
- await page.waitForTimeout(namedProbeWaitMs);
2599
- scrollHandled = await isScrollTargetVisible();
2600
- }
2601
- }
2602
- catch {
2603
- // Fall through to incremental search
2604
- }
2605
- }
2606
- if (!scrollHandled && namedTarget) {
2607
- const viewportHeight = page.viewportSize()?.height ?? 900;
2608
- const searchDeltaY = Math.round(viewportHeight * (mode === 'recording' ? 1.05 : 0.95));
2609
- const scrollMetrics = await page.evaluate(() => ({
2610
- scrollY: Math.round(window.scrollY),
2611
- maxScrollY: Math.max(0, Math.round(document.documentElement.scrollHeight - window.innerHeight)),
2612
- }));
2613
- const remainingScroll = Math.max(0, scrollMetrics.maxScrollY - scrollMetrics.scrollY);
2614
- const minimumAttempts = mode === 'recording' ? 6 : 10;
2615
- const maximumAttempts = mode === 'recording' ? 36 : 48;
2616
- const searchAttempts = Math.min(maximumAttempts, Math.max(minimumAttempts, Math.ceil(remainingScroll / Math.max(1, searchDeltaY)) + 1));
2617
- const searchScrollPauseMs = mode === 'recording' ? 320 : 220;
2618
- for (let searchAttempt = 0; searchAttempt < searchAttempts && !scrollHandled; searchAttempt += 1) {
2619
- for (const candidate of namedSearchSelectors) {
2620
- try {
2621
- const locator = page.locator(candidate).first();
2622
- if (await locator.count() === 0)
2623
- continue;
2624
- await locator.scrollIntoViewIfNeeded({ timeout: pickModeTimeout(mode, 450, 1200) });
2625
- await page.waitForTimeout(namedProbeWaitMs);
2626
- if (await isScrollTargetVisible(candidate)) {
2627
- scrollHandled = true;
2628
- break;
2629
- }
2630
- }
2631
- catch {
2632
- // Try next selector candidate
2633
- }
2634
- }
2635
- if (scrollHandled)
2636
- break;
2637
- if (mode === 'recording') {
2638
- await page.evaluate(({ deltaY }) => window.scrollBy({ top: deltaY, behavior: 'smooth' }), { deltaY: searchDeltaY });
2639
- await page.waitForTimeout(searchScrollPauseMs);
2640
- }
2641
- else {
2642
- await page.evaluate(({ deltaY }) => window.scrollBy(0, deltaY), { deltaY: searchDeltaY });
2643
- await page.waitForTimeout(searchScrollPauseMs);
2644
- }
2645
- if (!scrollHandled) {
2646
- try {
2647
- const domGroundedScroll = await scrollNamedTargetIntoViewFromDom(page, namedTarget, mode);
2648
- if (domGroundedScroll) {
2649
- await page.waitForTimeout(namedProbeWaitMs);
2650
- scrollHandled = await isScrollTargetVisible();
2651
- }
2652
- }
2653
- catch {
2654
- // Continue the incremental search.
2655
- }
2656
- }
2657
- }
2658
- }
2659
- if (!scrollHandled && namedTarget) {
2660
- let diagnostic = '';
2661
- try {
2662
- const domState = await queryNamedTargetDomState(page, namedTarget, false, mode);
2663
- const metrics = await page.evaluate(() => ({
2664
- scrollY: Math.round(window.scrollY),
2665
- scrollHeight: Math.round(document.documentElement.scrollHeight),
2666
- viewportHeight: Math.round(window.innerHeight),
2667
- }));
2668
- diagnostic = ` (domFound=${domState.found}, domVisible=${domState.visible}, scrollY=${metrics.scrollY}, scrollHeight=${metrics.scrollHeight}, viewportHeight=${metrics.viewportHeight})`;
2669
- }
2670
- catch {
2671
- // Keep the original failure if diagnostic capture fails.
2672
- }
2673
- throw new Error(`Could not reveal named section target: ${namedTarget}${diagnostic}`);
2674
- }
2675
- // Pixel-based scroll (generic scroll with no specific named target)
2676
- if (!scrollHandled) {
2677
- const direction = activeStep.direction ?? 'down';
2678
- const amount = activeStep.amount ?? 400;
2679
- const dx = direction === 'right' ? amount : direction === 'left' ? -amount : 0;
2680
- const dy = direction === 'down' ? amount : direction === 'up' ? -amount : 0;
2681
- if (mode === 'recording') {
2682
- await page.evaluate(({ deltaX, deltaY }) => {
2683
- window.scrollBy({ left: deltaX, top: deltaY, behavior: 'smooth' });
2684
- }, { deltaX: dx, deltaY: dy });
2685
- await page.waitForTimeout(500);
2686
- }
2687
- else {
2688
- await page.evaluate(({ deltaX, deltaY }) => window.scrollBy(deltaX, deltaY), { deltaX: dx, deltaY: dy });
2689
- }
2690
- const waitMs = activeStep.postStepWaitMs ?? 500;
2691
- await page.waitForTimeout(waitMs);
2692
- }
2693
- break;
2694
- }
2695
- case 'hover': {
2696
- let target = null;
2697
- if (activeStep.selector) {
2698
- target = await getElementCenter(browser, activeStep.selector, activeStep.description, mode);
2699
- if (!target) {
2700
- await page.waitForTimeout(mode === 'recording' ? 120 : 400);
2701
- target = await getElementCenter(browser, activeStep.selector, activeStep.description, mode);
2702
- }
2703
- }
2704
- if (!target && activeStep.coordinates) {
2705
- target = activeStep.coordinates;
2706
- }
2707
- if (!target)
2708
- throw new Error(`No hover target found (selector: ${activeStep.selector})`);
2709
- if (mode === 'recording') {
2710
- await animatedHover(page, target, currentMousePos);
2711
- }
2712
- else {
2713
- await page.mouse.move(target.x, target.y);
2714
- }
2715
- currentMousePos = target;
2716
- const waitMs = activeStep.postStepWaitMs ?? 300;
2717
- await page.waitForTimeout(waitMs);
2718
- break;
2719
- }
2720
- case 'drag': {
2721
- let fromTarget = null;
2722
- let toTarget = null;
2723
- if (activeStep.selector) {
2724
- fromTarget = await getElementCenter(browser, activeStep.selector, activeStep.description, mode);
2725
- }
2726
- if (!fromTarget && activeStep.coordinates) {
2727
- fromTarget = activeStep.coordinates;
2728
- }
2729
- if (activeStep.toSelector) {
2730
- toTarget = await getElementCenter(browser, activeStep.toSelector, activeStep.description, mode);
2731
- }
2732
- if (!toTarget && activeStep.toCoordinates) {
2733
- toTarget = activeStep.toCoordinates;
2734
- }
2735
- if (!fromTarget || !toTarget) {
2736
- throw new Error('drag step missing a valid start or end target');
2737
- }
2738
- if (mode === 'recording') {
2739
- await moveMouse(page, currentMousePos, fromTarget, { durationMs: 500 });
2740
- await page.waitForTimeout(120);
2741
- await page.mouse.down();
2742
- await moveMouse(page, fromTarget, toTarget, {
2743
- durationMs: activeStep.durationMs ?? 900,
2744
- steps: 40,
2745
- });
2746
- await page.mouse.up();
2747
- }
2748
- else {
2749
- await page.mouse.move(fromTarget.x, fromTarget.y);
2750
- await page.mouse.down();
2751
- await page.mouse.move(toTarget.x, toTarget.y, { steps: 10 });
2752
- await page.mouse.up();
2753
- }
2754
- currentMousePos = toTarget;
2755
- await page.waitForTimeout(activeStep.postStepWaitMs ?? 700);
2756
- break;
2757
- }
2758
- case 'highlight': {
2759
- // Like hover, but with a longer pause to draw viewer attention
2760
- let target = null;
2761
- if (activeStep.selector) {
2762
- target = await getElementCenter(browser, activeStep.selector, activeStep.description, mode);
2763
- if (!target) {
2764
- await page.waitForTimeout(mode === 'recording' ? 120 : 300);
2765
- target = await getElementCenter(browser, activeStep.selector, activeStep.description, mode);
2766
- }
2767
- }
2768
- if (!target && activeStep.coordinates) {
2769
- target = activeStep.coordinates;
2770
- }
2771
- if (!target) {
2772
- // Highlight is non-critical — skip silently if target not found
2773
- break;
2774
- }
2775
- if (mode === 'recording') {
2776
- await animatedHover(page, target, currentMousePos);
2777
- // Extra pause for emphasis
2778
- await page.waitForTimeout(150);
2779
- }
2780
- else {
2781
- await page.mouse.move(target.x, target.y);
2782
- }
2783
- currentMousePos = target;
2784
- const waitMs = activeStep.postStepWaitMs ?? 400;
2785
- await page.waitForTimeout(waitMs);
2786
- break;
2787
- }
2788
- case 'key': {
2789
- if (!activeStep.key)
2790
- throw new Error('key step missing key field');
2791
- await page.keyboard.press(activeStep.key);
2792
- const waitMs = activeStep.postStepWaitMs ?? 300;
2793
- await page.waitForTimeout(waitMs);
2794
- break;
2795
- }
2796
- case 'assert_url':
2797
- case 'assert_text':
2798
- case 'assert_element':
2799
- case 'assert_page': {
2800
- await executeAssertStep(browser, activeStep);
2801
- break;
2802
- }
2803
- }
2804
- const shouldEnforceExpectedPageAfter = !!activeStep.expectedPageAfter
2805
- && (activeStep.type === 'navigate'
2806
- || activeStep.type === 'click'
2807
- || activeStep.type === 'select_option'
2808
- || activeStep.type === 'type'
2809
- || activeStep.type === 'key');
2810
- if (shouldEnforceExpectedPageAfter && activeStep.expectedPageAfter) {
2811
- const deadline = Date.now() + (activeStep.timeoutMs ?? pickModeTimeout(mode, 2500, 5000));
2812
- let verdict = null;
2813
- while (Date.now() < deadline) {
2814
- const signals = await browser.capturePageSignals().catch(() => null);
2815
- if (signals) {
2816
- verdict = await evaluatePageExpectation(page, signals, activeStep.expectedPageAfter);
2817
- if (verdict.ok) {
2818
- break;
2819
- }
2820
- }
2821
- await page.waitForTimeout(200);
2822
- }
2823
- if (!verdict?.ok) {
2824
- const reason = verdict
2825
- ? `Expected destination not reached (confidence=${verdict.confidence.toFixed(2)}; ${verdict.reasons.join(', ')})`
2826
- : 'Expected destination not reached';
2827
- return {
2828
- newMousePos: currentMousePos,
2829
- error: reason,
2830
- };
2831
- }
2832
- }
2833
- let reactionSummary;
2834
- let reactionChanged;
2835
- if (beforeObservation) {
2836
- const reaction = await browser.waitForPageReaction(beforeObservation, getVideoStepReactionOptions(activeStep)).catch(() => null);
2837
- if (reaction) {
2838
- reactionSummary = reaction.summary;
2839
- reactionChanged = reaction.changed;
2840
- if (activeStep.type !== 'navigate' && reaction.before.url !== reaction.after.url) {
2841
- const cleanup = await dismissOverlaysWithLogging(browser, {
2842
- context: `reaction cleanup (${activeStep.description})`,
2843
- });
2844
- if (cleanup.dismissed) {
2845
- const cleanupReaction = await browser.waitForPageReaction(reaction.after, {
2846
- timeoutMs: 1200,
2847
- settleMs: 200,
2848
- }).catch(() => null);
2849
- reactionSummary = cleanupReaction
2850
- ? `${reaction.summary}; cleanup: ${cleanupReaction.summary}; page cleanup reapplied after navigation.`
2851
- : `${reaction.summary}; page cleanup reapplied after navigation.`;
2852
- reactionChanged = reaction.changed || cleanupReaction?.changed || reactionChanged;
2853
- }
2854
- else {
2855
- reactionSummary = `${reaction.summary}; page cleanup checked after navigation.`;
2856
- }
2857
- }
2858
- if (shouldRequireVideoStepReaction(activeStep) && !reactionChanged) {
2859
- return {
2860
- newMousePos: currentMousePos,
2861
- error: reactionSummary,
2862
- reactionSummary,
2863
- reactionChanged,
2864
- };
2865
- }
2866
- }
2867
- }
2868
- return { newMousePos: currentMousePos, reactionSummary, reactionChanged };
2869
- }
2870
- catch (err) {
2871
- return {
2872
- newMousePos: currentMousePos,
2873
- error: err.message,
2874
- };
2875
- }
2876
- }
2877
- function shouldCaptureStepArtifact(config, phase, success) {
2878
- if (!isFeatureEnabled('VIDEO_DEBUG_ARTIFACTS', true)) {
2879
- return false;
2880
- }
2881
- const mode = config.videoOptions?.debugArtifacts ?? 'failed_only';
2882
- if (mode === 'off')
2883
- return false;
2884
- if (mode === 'all_dry_run')
2885
- return phase === 'dry_run' || !success;
2886
- return !success;
2887
- }
2888
- async function executeStepWithDeterministicRecovery(browser, step, options) {
2889
- throwIfAborted(options.config.abortSignal, 'Video run cancelled.');
2890
- const page = browser.currentPage;
2891
- let currentMousePos = options.currentMousePos;
2892
- let activeStep = specializeStepSelectorForIntent(step);
2893
- const traces = [];
2894
- let attemptIndex = 0;
2895
- const executeAttempt = async (strategy, attemptStep, selector) => {
2896
- throwIfAborted(options.config.abortSignal, 'Video run cancelled.');
2897
- const interactiveElements = await browser.getInteractiveElements().catch(() => []);
2898
- const safetyDecision = await evaluateVideoStepSafety(attemptStep, options.config, page, page.url(), interactiveElements);
2899
- if (!safetyDecision.allowed) {
2900
- const trace = {
2901
- stepId: step.id,
2902
- stepIndex: options.stepIndex,
2903
- attemptIndex,
2904
- strategy,
2905
- selector,
2906
- success: false,
2907
- reason: safetyDecision.reason,
2908
- };
2909
- traces.push(trace);
2910
- options.callbacks.onStepAttempt?.(trace);
2911
- attemptIndex += 1;
2912
- return { success: false, reason: safetyDecision.reason };
2913
- }
2914
- const shouldCaptureBefore = shouldCaptureStepArtifact(options.config, options.mode, false);
2915
- const urlBefore = page.url();
2916
- const beforeScreenshot = shouldCaptureBefore ? await browser.takeScreenshotForAI().catch(() => undefined) : undefined;
2917
- const execResult = await executePlanStep(browser, attemptStep, options.mode, currentMousePos, options.config.credentials);
2918
- currentMousePos = execResult.newMousePos;
2919
- const success = !execResult.error;
2920
- const reason = execResult.error;
2921
- const urlAfter = page.url();
2922
- const trace = {
2923
- stepId: step.id,
2924
- stepIndex: options.stepIndex,
2925
- attemptIndex,
2926
- strategy,
2927
- selector,
2928
- success,
2929
- reason,
2930
- };
2931
- traces.push(trace);
2932
- options.callbacks.onStepAttempt?.(trace);
2933
- if (shouldCaptureStepArtifact(options.config, options.mode, success)) {
2934
- const afterScreenshot = await browser.takeScreenshotForAI().catch(() => undefined);
2935
- options.callbacks.onStepArtifact?.({
2936
- runId: options.config.runId,
2937
- variantId: options.config.variantId,
2938
- stepId: step.id,
2939
- stepIndex: options.stepIndex,
2940
- attemptIndex,
2941
- phase: options.mode,
2942
- strategy,
2943
- urlBefore,
2944
- urlAfter,
2945
- selectorUsed: selector,
2946
- reason,
2947
- beforeScreenshot,
2948
- afterScreenshot,
2949
- });
2950
- // Emit live preview screenshot for the UI (reuse already-captured screenshot)
2951
- if (afterScreenshot && options.callbacks.onScreenshot) {
2952
- options.callbacks.onScreenshot(afterScreenshot.toString('base64'));
2953
- }
2954
- }
2955
- else if (beforeScreenshot && options.callbacks.onScreenshot) {
2956
- // Reuse the pre-action screenshot for live preview — no extra capture needed
2957
- options.callbacks.onScreenshot(beforeScreenshot.toString('base64'));
2958
- }
2959
- attemptIndex += 1;
2960
- return { success, reason };
2961
- };
2962
- const selectorCandidates = splitSelectorCandidates(activeStep.selector);
2963
- // Keep the full comma-separated selector list for the primary attempt so that
2964
- // getElementCenter (inside executePlanStep) can iterate over ALL candidates.
2965
- // The old behavior of extracting only selectorCandidates[0] caused long timeouts
2966
- // when the first selector didn't match but an alternative would have worked instantly.
2967
- const primaryAttempt = await executeAttempt('primary', activeStep, activeStep.selector);
2968
- if (primaryAttempt.success
2969
- || !shouldUseDeterministicRecovery(step)
2970
- || !isFeatureEnabled('VIDEO_DETERMINISTIC_RECOVERY', true)) {
2971
- return {
2972
- success: primaryAttempt.success,
2973
- reason: primaryAttempt.reason,
2974
- step: activeStep,
2975
- newMousePos: currentMousePos,
2976
- traces,
2977
- };
2978
- }
2979
- // The primary attempt already tried ALL selector candidates via getElementCenter.
2980
- // If it failed, the element is genuinely not in the DOM. Doing more retries with
2981
- // the same selectors is redundant and wastes tens of seconds per retry cycle.
2982
- // Only try selector memory fallbacks (from previous runs) and one short wait+retry.
2983
- // Try selector memory fallbacks (selectors from previous successful runs)
2984
- const memoryOnlyFallbacks = dedupeSelectors(options.selectorFallbacks ?? [])
2985
- .filter((selector) => selectorPreservesStepIntent(selector, activeStep)
2986
- && !selectorCandidates.includes(selector));
2987
- for (const selector of memoryOnlyFallbacks) {
2988
- const candidateStep = applyStepSelector(step, selector);
2989
- const fallbackAttempt = await executeAttempt('selector_fallback', candidateStep, selector);
2990
- if (fallbackAttempt.success) {
2991
- return {
2992
- success: true,
2993
- step: candidateStep,
2994
- newMousePos: currentMousePos,
2995
- traces,
2996
- };
2997
- }
2998
- }
2999
- // One short wait + retry — the element might appear after a brief animation.
3000
- await page.waitForTimeout(500);
3001
- const retryAttempt = await executeAttempt('retry', activeStep, activeStep.selector);
3002
- return {
3003
- success: retryAttempt.success,
3004
- reason: retryAttempt.reason,
3005
- step: activeStep,
3006
- newMousePos: currentMousePos,
3007
- traces,
3008
- };
3009
- }
3010
- async function captureVideoVerificationArtifacts(browser, callbacks, helperTimeoutMs) {
3011
- const bundle = await browser.captureVideoVerificationBundle({
3012
- onRetry: (message) => log(`Verification snapshot retry: ${message}`, 'info', callbacks.onLog),
3013
- helperTimeoutMs: helperTimeoutMs ?? undefined,
3014
- });
3015
- log(`Verification snapshot ready [${bundle.coherenceKey}] @ ${bundle.url}`, 'info', callbacks.onLog);
3016
- const observationSummary = await captureVideoObservationSummary(bundle, {
3017
- maxAccessibilityChars: 2200,
3018
- maxElements: 12,
3019
- maxVisibleTextChars: 360,
3020
- });
3021
- return {
3022
- bundle,
3023
- observationSummary,
3024
- observationSnapshot: buildVideoObservationSnapshot({
3025
- coherenceKey: bundle.coherenceKey,
3026
- interactiveElements: bundle.interactiveElements,
3027
- pageSignals: bundle.pageSignals,
3028
- pageIdentity: null,
3029
- }),
3030
- pageContext: {
3031
- currentUrl: bundle.url,
3032
- pageTitle: bundle.title,
3033
- },
3034
- };
3035
- }
3036
- export async function verifyVideoStepDeterministically(browser, step) {
3037
- const page = browser.currentPage;
3038
- if (step.type === 'assert_text' && !step.scopeSelector && step.text) {
3039
- try {
3040
- const domState = await queryNamedTargetDomState(page, step.text, false, 'dry_run');
3041
- if (domState.found && domState.visible) {
3042
- return {
3043
- ok: true,
3044
- reason: `Asserted text "${step.text}" is already visible.`,
3045
- usage: {},
3046
- };
3047
- }
3048
- }
3049
- catch {
3050
- // Fall through to text matching.
3051
- }
3052
- try {
3053
- const bodyText = await page.evaluate(() => document.body?.innerText ?? '');
3054
- if (matchStringWithMode(bodyText, step.text, step.matchMode ?? 'contains')) {
3055
- return {
3056
- ok: true,
3057
- reason: `Asserted text "${step.text}" already matches the current page.`,
3058
- usage: {},
3059
- };
3060
- }
3061
- }
3062
- catch {
3063
- // Fall through to the remaining deterministic checks.
3064
- }
3065
- }
3066
- if (step.type === 'assert_url') {
3067
- const expected = step.urlPattern ?? step.url ?? '';
3068
- const mode = step.matchMode ?? 'contains';
3069
- const currentUrl = page.url();
3070
- const ok = mode === 'equals'
3071
- ? normalizeUrlForComparison(currentUrl) === normalizeUrlForComparison(expected)
3072
- : urlPatternMatches(currentUrl, expected, mode);
3073
- if (ok) {
3074
- return {
3075
- ok: true,
3076
- reason: `Current URL already satisfies ${expected}.`,
3077
- usage: {},
3078
- };
3079
- }
3080
- }
3081
- if (step.type === 'assert_page' && step.pageExpectation) {
3082
- const signals = await browser.capturePageSignals().catch(() => null);
3083
- if (signals) {
3084
- const verdict = await evaluatePageExpectation(page, signals, step.pageExpectation);
3085
- if (verdict.ok) {
3086
- return {
3087
- ok: true,
3088
- reason: `Page expectation verified deterministically (confidence=${verdict.confidence.toFixed(2)}).`,
3089
- usage: {},
3090
- };
3091
- }
3092
- }
3093
- }
3094
- if (step.type === 'assert_element' && step.selector) {
3095
- try {
3096
- const locator = page.locator(step.selector).first();
3097
- const state = step.state ?? 'visible';
3098
- if (state === 'visible' && await locator.isVisible({ timeout: 400 })) {
3099
- return {
3100
- ok: true,
3101
- reason: `Element ${step.selector} is visible.`,
3102
- usage: {},
3103
- };
3104
- }
3105
- }
3106
- catch {
3107
- // Fall through to the remaining deterministic checks.
3108
- }
3109
- }
3110
- const namedTarget = step.type === 'scroll' ? resolveNamedScrollTarget(step) : null;
3111
- if (namedTarget) {
3112
- try {
3113
- const domState = await queryNamedTargetDomState(page, namedTarget, false, 'dry_run');
3114
- if (domState.found && domState.visible) {
3115
- return {
3116
- ok: true,
3117
- reason: `Named scroll target "${namedTarget}" is visible.`,
3118
- usage: {},
3119
- };
3120
- }
3121
- }
3122
- catch {
3123
- // Fall through to other verification paths.
3124
- }
3125
- }
3126
- if (step.expectedPageAfter) {
3127
- const signals = await browser.capturePageSignals().catch(() => null);
3128
- if (signals) {
3129
- const verdict = await evaluatePageExpectation(page, signals, step.expectedPageAfter);
3130
- if (verdict.ok) {
3131
- return {
3132
- ok: true,
3133
- reason: `Expected destination verified deterministically (confidence=${verdict.confidence.toFixed(2)}).`,
3134
- usage: {},
3135
- };
3136
- }
3137
- }
3138
- }
3139
- return null;
3140
- }
3141
- async function verifyStep(client, model, step, screenshot, stepIndex, totalSteps, videoScript, pageContext, observationSummary, observationSnapshot, signal) {
3142
- throwIfAborted(signal, 'Video verification cancelled.');
3143
- const userText = buildVideoStepVerificationUserMessage(step, stepIndex, totalSteps, pageContext, observationSummary, observationSnapshot);
3144
- const base64 = screenshot.toString('base64');
3145
- const imageUrl = `data:image/jpeg;base64,${base64}`;
3146
- try {
3147
- const response = await client.chat.completions.create({
3148
- model,
3149
- messages: [
3150
- { role: 'system', content: buildVideoVerificationSystemPrompt(videoScript) },
3151
- {
3152
- role: 'user',
3153
- content: buildVideoPromptContentParts({
3154
- text: userText,
3155
- imageUrl,
3156
- cacheLayoutV2: VIDEO_AGENT_CACHE_LAYOUT_V2,
3157
- }),
3158
- },
3159
- ],
3160
- tools: videoVerificationTools,
3161
- tool_choice: 'required',
3162
- max_tokens: 300,
3163
- provider: { zdr: true },
3164
- }, { signal });
3165
- const usage = extractLlmUsageSnapshot(response);
3166
- const call = response.choices?.[0]?.message?.tool_calls?.[0];
3167
- if (!call || !('function' in call)) {
3168
- return {
3169
- ok: false,
3170
- giveUp: true,
3171
- reason: 'Verification failed: verifier did not return a structured decision.',
3172
- usage,
3173
- };
3174
- }
3175
- const args = JSON.parse(call.function.arguments || '{}');
3176
- if (call.function.name === 'step_ok') {
3177
- return { ok: true, usage };
3178
- }
3179
- else if (call.function.name === 'give_up') {
3180
- return { ok: false, giveUp: true, reason: String(args.reason || 'Give up requested'), usage };
3181
- }
3182
- else {
3183
- return {
3184
- ok: false,
3185
- reason: String(args.reason || 'Step failed'),
3186
- suggestion: args.suggestion ? String(args.suggestion) : undefined,
3187
- usage,
3188
- };
3189
- }
3190
- }
3191
- catch (err) {
3192
- if (isAbortError(err)) {
3193
- throw err;
3194
- }
3195
- return {
3196
- ok: false,
3197
- giveUp: true,
3198
- reason: `Verification failed: ${err.message}`,
3199
- usage: {},
3200
- };
3201
- }
3202
- }
3203
- /**
3204
- * When a dry-run step fails, call the LLM with the screenshot + failure context
3205
- * to propose a replacement sequence of steps. Returns empty array if no fix could be generated.
3206
- */
3207
- async function fixStep(client, model, step, failureReason, suggestion, screenshot, videoScript, observationSummary, observationSnapshot, signal) {
3208
- throwIfAborted(signal, 'Video fixer cancelled.');
3209
- const base64 = screenshot.toString('base64');
3210
- const imageUrl = `data:image/jpeg;base64,${base64}`;
3211
- try {
3212
- const response = await client.chat.completions.create({
3213
- model,
3214
- messages: [
3215
- { role: 'system', content: buildStepFixerSystemPrompt(videoScript) },
3216
- {
3217
- role: 'user',
3218
- content: buildVideoPromptContentParts({
3219
- text: buildStepFixerUserMessage(step, failureReason, suggestion, observationSummary, observationSnapshot),
3220
- imageUrl,
3221
- cacheLayoutV2: VIDEO_AGENT_CACHE_LAYOUT_V2,
3222
- }),
3223
- },
3224
- ],
3225
- max_tokens: 600,
3226
- response_format: { type: 'json_object' },
3227
- provider: { zdr: true },
3228
- }, { signal });
3229
- const usage = extractLlmUsageSnapshot(response);
3230
- const content = response.choices?.[0]?.message?.content ?? '';
3231
- if (!content)
3232
- return { steps: [], usage };
3233
- const parsed = JSON.parse(content);
3234
- // Support both { steps: [...] } and a bare step object (backwards compat)
3235
- const rawSteps = Array.isArray(parsed.steps)
3236
- ? parsed.steps
3237
- : parsed.type ? [parsed] : [];
3238
- if (rawSteps.length === 0)
3239
- return { steps: [], usage };
3240
- // Build valid VideoStep objects; first step keeps original id
3241
- const fixedSteps = rawSteps
3242
- .map((s, idx) => ({
3243
- ...step,
3244
- ...s,
3245
- id: idx === 0 ? step.id : (s.id ?? `${step.id}-fix-${idx + 1}`),
3246
- }))
3247
- .filter((candidate) => {
3248
- if (containsInternalAutomationSelector(candidate.selector))
3249
- return false;
3250
- if (containsInternalAutomationSelector(candidate.toSelector))
3251
- return false;
3252
- if (candidate.type === 'assert_page' && candidate.pageExpectation?.selectors?.some((selector) => containsInternalAutomationSelector(selector))) {
3253
- return false;
3254
- }
3255
- if (candidate.expectedPageAfter?.selectors?.some((selector) => containsInternalAutomationSelector(selector))) {
3256
- return false;
3257
- }
3258
- return true;
3259
- });
3260
- return { steps: fixedSteps, usage };
3261
- }
3262
- catch (err) {
3263
- if (isAbortError(err)) {
3264
- throw err;
3265
- }
3266
- return { steps: [], usage: {} };
3267
- }
3268
- }
3269
- async function classifyVariantStateWithLLMFallback(client, model, browser, requestedLang, requestedTheme, signal) {
3270
- throwIfAborted(signal, 'Video preflight cancelled.');
3271
- const screenshot = await browser.takeScreenshotForAI();
3272
- const base64 = screenshot.toString('base64');
3273
- const imageUrl = `data:image/jpeg;base64,${base64}`;
3274
- const response = await client.chat.completions.create({
3275
- model,
3276
- response_format: { type: 'json_object' },
3277
- max_tokens: 180,
3278
- messages: [
3279
- {
3280
- role: 'system',
3281
- content: 'You classify page language and theme from screenshot + hints. Return strict JSON: {"langActive": boolean|null, "themeActive": boolean|null}.',
3282
- },
3283
- {
3284
- role: 'user',
3285
- content: buildVideoPromptContentParts({
3286
- text: `Requested language=${requestedLang ?? 'none'}, requested theme=${requestedTheme ?? 'none'}.`,
3287
- imageUrl,
3288
- cacheLayoutV2: VIDEO_AGENT_CACHE_LAYOUT_V2,
3289
- }),
3290
- },
3291
- ],
3292
- provider: { zdr: true },
3293
- }, { signal });
3294
- const parsed = JSON.parse(response.choices?.[0]?.message?.content ?? '{}');
3295
- return {
3296
- langActive: typeof parsed.langActive === 'boolean' ? parsed.langActive : undefined,
3297
- themeActive: typeof parsed.themeActive === 'boolean' ? parsed.themeActive : undefined,
3298
- usage: extractLlmUsageSnapshot(response),
3299
- };
3300
- }
3301
- function isPrepareOnlyStep(step) {
3302
- return step.recordingIntent === 'prepare_only';
3303
- }
3304
- function areEquivalentPreparedUrls(a, b) {
3305
- if (normalizeComparableUrl(a) === normalizeComparableUrl(b))
3306
- return true;
3307
- const parsedA = parseUrlMaybe(a);
3308
- const parsedB = parseUrlMaybe(b);
3309
- if (!parsedA || !parsedB)
3310
- return false;
3311
- if (parsedA.hostname.replace(/^www\./, '') !== parsedB.hostname.replace(/^www\./, ''))
3312
- return false;
3313
- return pathFamilyMatches(parsedA.pathname, parsedB.pathname);
3314
- }
3315
- async function attemptDeterministicLocaleActivation(browser, requestedLang, requestedTheme, signals, seedUrls, callbacks) {
3316
- let detected = await detectVariantStateDeterministic(browser, requestedLang, requestedTheme);
3317
- if (!requestedLang || detected.lang.active) {
3318
- return { detected };
3319
- }
3320
- const candidates = collectLocaleActivationCandidates(signals, requestedLang, seedUrls)
3321
- .filter((candidate) => normalizeComparableUrl(candidate.url) !== normalizeComparableUrl(browser.currentPage.url()))
3322
- .slice(0, 4);
3323
- if (candidates.length === 0) {
3324
- log(`Variant preflight locale fallback found no candidate URL for ${requestedLang}.`, 'info', callbacks.onLog);
3325
- return { detected };
3326
- }
3327
- for (const candidate of candidates) {
3328
- try {
3329
- log(`Variant preflight trying locale fallback via ${candidate.source}: ${candidate.url}`, 'info', callbacks.onLog);
3330
- await browser.navigateTo(candidate.url);
3331
- await dismissOverlaysWithLogging(browser, {
3332
- context: `locale fallback (${candidate.source})`,
3333
- onLog: callbacks.onLog,
3334
- });
3335
- detected = await detectVariantStateDeterministic(browser, requestedLang, requestedTheme);
3336
- if (detected.lang.active) {
3337
- log(`Variant preflight activated locale ${requestedLang} via ${candidate.source}.`, 'success', callbacks.onLog);
3338
- return { detected, candidateUsed: candidate };
3339
- }
3340
- }
3341
- catch (err) {
3342
- log(`Variant preflight locale fallback failed via ${candidate.source}: ${err.message}`, 'info', callbacks.onLog);
3343
- }
3344
- }
3345
- return { detected };
3346
- }
3347
- async function confirmPreparedVariantState(params) {
3348
- const { browser, client, model, config, callbacks, context } = params;
3349
- const contextLabel = context === 'dry_run' ? 'Dry-run' : 'Recording';
3350
- const signal = params.abortSignal ?? config.abortSignal;
3351
- const usage = [];
3352
- let usageStepNumber = 1;
3353
- if (!config.lang && !config.theme) {
3354
- const detected = await detectVariantStateDeterministic(browser, config.lang, config.theme);
3355
- return { ok: true, detected, usage };
3356
- }
3357
- await dismissOverlaysWithLogging(browser, {
3358
- context: `${contextLabel.toLowerCase()} prepared-state check`,
3359
- onLog: callbacks.onLog,
3360
- });
3361
- let detected = await detectVariantStateDeterministic(browser, config.lang, config.theme);
3362
- let langActive = !config.lang || detected.lang.active;
3363
- let themeActive = !config.theme || detected.theme.active;
3364
- if (langActive && themeActive) {
3365
- return { ok: true, detected, usage };
3366
- }
3367
- log(`[${contextLabel}] prepared variant mismatch detected after rehydration (lang=${config.lang ?? 'n/a'}:${detected.lang.detected ?? 'unknown'}, theme=${config.theme ?? 'n/a'}:${detected.theme.detected ?? 'unknown'}). Attempting local recovery.`, 'info', callbacks.onLog);
3368
- if (config.lang || config.theme) {
3369
- const controlFallback = await attemptVariantControlsActivation(browser, config.lang, config.theme, detected.pageSignals, callbacks);
3370
- detected = controlFallback.detected;
3371
- }
3372
- if ((config.lang && !detected.lang.active) || (config.theme && !detected.theme.active)) {
3373
- const storageFallback = await attemptVariantStorageActivation(browser, config.lang, config.theme, detected.pageSignals, callbacks);
3374
- detected = storageFallback.detected;
3375
- }
3376
- if (config.lang && !detected.lang.active) {
3377
- const localeFallback = await attemptDeterministicLocaleActivation(browser, config.lang, config.theme, detected.pageSignals, [config.preparedStartUrl ?? config.url, config.url], callbacks);
3378
- detected = localeFallback.detected;
3379
- }
3380
- langActive = !config.lang || detected.lang.active;
3381
- themeActive = !config.theme || detected.theme.active;
3382
- if ((detected.lang.ambiguous || detected.theme.ambiguous) && client && (config.lang || config.theme)) {
3383
- try {
3384
- const fallback = await classifyVariantStateWithLLMFallback(client, model, browser, config.lang, config.theme, signal);
3385
- usage.push(buildStepUsageFromSnapshot(normalizeLlmUsageSnapshot(fallback.usage), {
3386
- stepNumber: usageStepNumber++,
3387
- stepType: 'video_variant_classification',
3388
- modelRequested: model,
3389
- imagesInPrompt: 1,
3390
- }));
3391
- if (detected.lang.ambiguous && typeof fallback.langActive === 'boolean') {
3392
- langActive = fallback.langActive;
3393
- }
3394
- if (detected.theme.ambiguous && typeof fallback.themeActive === 'boolean') {
3395
- themeActive = fallback.themeActive;
3396
- }
3397
- }
3398
- catch (err) {
3399
- if (isAbortError(err)) {
3400
- throw err;
3401
- }
3402
- // Keep deterministic result when the fallback cannot complete.
3403
- }
3404
- }
3405
- const rebaseUrl = config.preparedStartUrl ?? config.url;
3406
- if (rebaseUrl && !areEquivalentPreparedUrls(browser.currentPage.url(), rebaseUrl)) {
3407
- log(`[${contextLabel}] rebasing to ${rebaseUrl} after local variant recovery.`, 'info', callbacks.onLog);
3408
- await browser.navigateTo(rebaseUrl);
3409
- await dismissOverlaysWithLogging(browser, {
3410
- context: `${contextLabel.toLowerCase()} prepared-state rebase`,
3411
- onLog: callbacks.onLog,
3412
- });
3413
- detected = await detectVariantStateDeterministic(browser, config.lang, config.theme);
3414
- langActive = !config.lang || detected.lang.active;
3415
- themeActive = !config.theme || detected.theme.active;
3416
- }
3417
- if (langActive && themeActive) {
3418
- return { ok: true, detected, usage };
3419
- }
3420
- if (client && (config.lang || config.theme)) {
3421
- try {
3422
- log(`[${contextLabel}] escalating variant restoration to agent repair lane.`, 'info', callbacks.onLog);
3423
- const repairResult = await runPreparedVariantRepairLane({
3424
- browser,
3425
- config,
3426
- callbacks,
3427
- abortSignal: signal,
3428
- observedSummary: [
3429
- config.lang
3430
- ? `language requested=${config.lang}; detected=${detected.lang.detected ?? 'unknown'}; active=${detected.lang.active}; ambiguous=${detected.lang.ambiguous}`
3431
- : null,
3432
- config.theme
3433
- ? `theme requested=${config.theme}; detected=${detected.theme.detected ?? 'unknown'}; active=${detected.theme.active}; ambiguous=${detected.theme.ambiguous}`
3434
- : null,
3435
- ].filter(Boolean).join(' | '),
3436
- });
3437
- if (repairResult.success) {
3438
- const rebaseUrl = config.preparedStartUrl ?? config.url;
3439
- if (rebaseUrl && !areEquivalentPreparedUrls(browser.currentPage.url(), rebaseUrl)) {
3440
- log(`[${contextLabel}] rebasing to ${rebaseUrl} after variant repair lane.`, 'info', callbacks.onLog);
3441
- await browser.navigateTo(rebaseUrl);
3442
- }
3443
- await dismissOverlaysWithLogging(browser, {
3444
- context: `${contextLabel.toLowerCase()} variant repair lane`,
3445
- onLog: callbacks.onLog,
3446
- });
3447
- detected = await detectVariantStateDeterministic(browser, config.lang, config.theme);
3448
- langActive = !config.lang || detected.lang.active;
3449
- themeActive = !config.theme || detected.theme.active;
3450
- if ((detected.lang.ambiguous || detected.theme.ambiguous) && (config.lang || config.theme)) {
3451
- try {
3452
- const fallback = await classifyVariantStateWithLLMFallback(client, model, browser, config.lang, config.theme, signal);
3453
- usage.push(buildStepUsageFromSnapshot(normalizeLlmUsageSnapshot(fallback.usage), {
3454
- stepNumber: usageStepNumber++,
3455
- stepType: 'video_variant_classification',
3456
- modelRequested: model,
3457
- imagesInPrompt: 1,
3458
- }));
3459
- if (detected.lang.ambiguous && typeof fallback.langActive === 'boolean') {
3460
- langActive = fallback.langActive;
3461
- }
3462
- if (detected.theme.ambiguous && typeof fallback.themeActive === 'boolean') {
3463
- themeActive = fallback.themeActive;
3464
- }
3465
- }
3466
- catch (err) {
3467
- if (isAbortError(err)) {
3468
- throw err;
3469
- }
3470
- // Keep deterministic result if the LLM fallback cannot complete.
3471
- }
3472
- }
3473
- if (langActive && themeActive) {
3474
- return { ok: true, detected, usage };
3475
- }
3476
- }
3477
- }
3478
- catch (err) {
3479
- if (isAbortError(err)) {
3480
- throw err;
3481
- }
3482
- log(`[${contextLabel}] variant repair lane failed: ${err.message}`, 'info', callbacks.onLog);
3483
- }
3484
- }
3485
- const reasons = [];
3486
- if (config.lang && !langActive) {
3487
- reasons.push(`requested lang=${config.lang}, detected=${detected.lang.detected ?? 'unknown'}`);
3488
- }
3489
- if (config.theme && !themeActive) {
3490
- reasons.push(`requested theme=${config.theme}, detected=${detected.theme.detected ?? 'unknown'}`);
3491
- }
3492
- return {
3493
- ok: false,
3494
- detected,
3495
- reason: `${contextLabel} prepared variant mismatch after rehydration: ${reasons.join('; ') || 'variant not active'}`,
3496
- usage,
3497
- };
3498
- }
3499
- function deriveExecutionSteps(plan, config) {
3500
- let steps = plan.steps.filter((step) => !isPrepareOnlyStep(step));
3501
- if (config.preparedStartUrl && steps[0]?.type === 'navigate' && steps[0].url && areEquivalentPreparedUrls(steps[0].url, config.preparedStartUrl)) {
3502
- steps = steps.slice(1);
3503
- if (steps[0]?.type === 'wait' && (steps[0].waitMs ?? 0) <= 2500) {
3504
- steps = steps.slice(1);
3505
- }
3506
- }
3507
- return steps;
3508
- }
3509
- export async function runVariantPreflight(config, prefixPlan, callbacks = {}) {
3510
- callbacks.onVariantPhase?.('preflight');
3511
- const client = createClient(config.apiKey);
3512
- throwIfAborted(config.abortSignal, 'Video preflight cancelled.');
3513
- const browser = await Browser.fromPool({
3514
- headed: false,
3515
- viewport: config.viewport,
3516
- deviceScaleFactor: normalizeOutputScale(config.outputScale),
3517
- lang: config.lang,
3518
- colorScheme: config.theme,
3519
- storageState: config.preparedStorageState,
3520
- });
3521
- const detachAbort = attachAbortToBrowser(config.abortSignal, browser);
3522
- let currentMousePos = { x: 0, y: 0 };
3523
- const usage = [];
3524
- let usageStepNumber = 1;
3525
- try {
3526
- throwIfAborted(config.abortSignal, 'Video preflight cancelled.');
3527
- if (config.theme) {
3528
- await browser.setColorScheme(config.theme);
3529
- }
3530
- if (config.lang) {
3531
- await browser.setLanguage(config.lang);
3532
- }
3533
- await prepareVideoSessionStorage(browser, config.preparedSessionStorage);
3534
- const entryUrl = config.preparedStartUrl ?? config.credentials?.loginUrl ?? config.url;
3535
- await browser.navigateTo(entryUrl);
3536
- const rawPrefixSteps = prefixPlan?.steps ?? [];
3537
- let detected = await detectVariantStateDeterministic(browser, config.lang, config.theme);
3538
- let prefixSteps = rawPrefixSteps;
3539
- if ((config.lang ? detected.lang.active : true) && (config.theme ? detected.theme.active : true)) {
3540
- if (rawPrefixSteps.length > 0) {
3541
- log('Variant already active on entry page. Skipping prefix plan execution.', 'info', callbacks.onLog);
3542
- }
3543
- prefixSteps = [];
3544
- }
3545
- else {
3546
- const sanitizedPrefixSteps = sanitizeVariantPrefixSteps(rawPrefixSteps, config.lang, config.theme);
3547
- if (sanitizedPrefixSteps.length !== rawPrefixSteps.length) {
3548
- log(`Variant prefix sanitized: skipped ${rawPrefixSteps.length - sanitizedPrefixSteps.length} non-variant step(s) before preflight.`, 'info', callbacks.onLog);
3549
- }
3550
- prefixSteps = sanitizedPrefixSteps;
3551
- }
3552
- const hasConcreteVariantActivation = prefixSteps.some(isConcreteVariantActivationStep);
3553
- if ((config.lang || config.theme) && !hasConcreteVariantActivation) {
3554
- log('Variant prefix plan contains no concrete activation step. Preflight will rely on deterministic locale/theme fallback if needed.', 'info', callbacks.onLog);
3555
- }
3556
- for (let index = 0; index < prefixSteps.length; index++) {
3557
- throwIfAborted(config.abortSignal, 'Video preflight cancelled.');
3558
- const step = prefixSteps[index];
3559
- const stepSignature = buildStepSignature(step);
3560
- const selectorMemory = config.selectorMemory?.[stepSignature] ?? [];
3561
- const execResult = await executeStepWithDeterministicRecovery(browser, step, {
3562
- mode: 'dry_run',
3563
- stepIndex: index,
3564
- currentMousePos,
3565
- selectorFallbacks: selectorMemory,
3566
- config,
3567
- callbacks,
3568
- });
3569
- currentMousePos = execResult.newMousePos;
3570
- if (!execResult.success) {
3571
- const isAssertStep = step.type === 'assert_url' || step.type === 'assert_text' || step.type === 'assert_element' || step.type === 'assert_page';
3572
- if (isAssertStep) {
3573
- // Assert steps in prefix plans are informational — the real variant validation
3574
- // is done by detectVariantStateDeterministic below. Don't abort preflight here.
3575
- log(`Prefix assert skipped (non-fatal): ${step.description}. ${execResult.reason ?? 'unknown reason'}`, 'info', callbacks.onLog);
3576
- }
3577
- else {
3578
- const failed = {
3579
- ok: false,
3580
- code: 'PREFLIGHT_VARIANT_NOT_ACTIVE',
3581
- reason: `Prefix step failed: ${step.description}. ${execResult.reason ?? 'unknown reason'}`,
3582
- usage,
3583
- };
3584
- callbacks.onPreflightResult?.(failed);
3585
- callbacks.onVariantPhase?.('failed');
3586
- return failed;
3587
- }
3588
- }
3589
- }
3590
- detected = await detectVariantStateDeterministic(browser, config.lang, config.theme);
3591
- if ((config.lang && !detected.lang.active) || (config.theme && !detected.theme.active)) {
3592
- const controlFallback = await attemptVariantControlsActivation(browser, config.lang, config.theme, detected.pageSignals, callbacks);
3593
- detected = controlFallback.detected;
3594
- }
3595
- if ((config.lang && !detected.lang.active) || (config.theme && !detected.theme.active)) {
3596
- const storageFallback = await attemptVariantStorageActivation(browser, config.lang, config.theme, detected.pageSignals, callbacks);
3597
- detected = storageFallback.detected;
3598
- }
3599
- if (config.lang && !detected.lang.active) {
3600
- const localeFallback = await attemptDeterministicLocaleActivation(browser, config.lang, config.theme, detected.pageSignals, [entryUrl, config.url], callbacks);
3601
- detected = localeFallback.detected;
3602
- }
3603
- let langActive = detected.lang.active;
3604
- let themeActive = detected.theme.active;
3605
- if ((detected.lang.ambiguous || detected.theme.ambiguous) && (config.lang || config.theme)) {
3606
- try {
3607
- const fallback = await classifyVariantStateWithLLMFallback(client, config.model, browser, config.lang, config.theme, config.abortSignal);
3608
- usage.push(buildStepUsageFromSnapshot(normalizeLlmUsageSnapshot(fallback.usage), {
3609
- stepNumber: usageStepNumber++,
3610
- stepType: 'video_variant_classification',
3611
- modelRequested: config.model,
3612
- imagesInPrompt: 1,
3613
- }));
3614
- if (detected.lang.ambiguous && typeof fallback.langActive === 'boolean') {
3615
- langActive = fallback.langActive;
3616
- }
3617
- if (detected.theme.ambiguous && typeof fallback.themeActive === 'boolean') {
3618
- themeActive = fallback.themeActive;
3619
- }
3620
- }
3621
- catch {
3622
- // Keep deterministic result
3623
- }
3624
- }
3625
- const rebaseUrl = config.preparedStartUrl ?? config.url;
3626
- if (rebaseUrl && !areEquivalentPreparedUrls(browser.currentPage.url(), rebaseUrl)) {
3627
- log(`Variant preflight rebasing to ${rebaseUrl} after variant detour.`, 'info', callbacks.onLog);
3628
- await browser.navigateTo(rebaseUrl);
3629
- await dismissOverlaysWithLogging(browser, {
3630
- context: 'variant preflight rebase',
3631
- onLog: callbacks.onLog,
3632
- });
3633
- detected = await detectVariantStateDeterministic(browser, config.lang, config.theme);
3634
- langActive = detected.lang.active;
3635
- themeActive = detected.theme.active;
3636
- if ((detected.lang.ambiguous || detected.theme.ambiguous) && (config.lang || config.theme)) {
3637
- try {
3638
- const fallback = await classifyVariantStateWithLLMFallback(client, config.model, browser, config.lang, config.theme, config.abortSignal);
3639
- usage.push(buildStepUsageFromSnapshot(normalizeLlmUsageSnapshot(fallback.usage), {
3640
- stepNumber: usageStepNumber++,
3641
- stepType: 'video_variant_classification',
3642
- modelRequested: config.model,
3643
- imagesInPrompt: 1,
3644
- }));
3645
- if (detected.lang.ambiguous && typeof fallback.langActive === 'boolean') {
3646
- langActive = fallback.langActive;
3647
- }
3648
- if (detected.theme.ambiguous && typeof fallback.themeActive === 'boolean') {
3649
- themeActive = fallback.themeActive;
3650
- }
3651
- }
3652
- catch {
3653
- // Keep deterministic result after rebase.
3654
- }
3655
- }
3656
- }
3657
- let ok = langActive && themeActive;
3658
- // If deterministic fallbacks failed, escalate to the agent repair lane.
3659
- // This mirrors the behavior of confirmPreparedVariantState in the dry-run,
3660
- // where the repair lane is the last resort before declaring failure.
3661
- if (!ok && (config.lang || config.theme)) {
3662
- try {
3663
- log('Variant preflight escalating to agent repair lane.', 'info', callbacks.onLog);
3664
- const repairResult = await runPreparedVariantRepairLane({
3665
- browser,
3666
- config,
3667
- callbacks,
3668
- abortSignal: config.abortSignal,
3669
- observedSummary: [
3670
- config.lang
3671
- ? `language requested=${config.lang}; detected=${detected.lang.detected ?? 'unknown'}; active=${detected.lang.active}; ambiguous=${detected.lang.ambiguous}`
3672
- : null,
3673
- config.theme
3674
- ? `theme requested=${config.theme}; detected=${detected.theme.detected ?? 'unknown'}; active=${detected.theme.active}; ambiguous=${detected.theme.ambiguous}`
3675
- : null,
3676
- ].filter(Boolean).join(' | '),
3677
- });
3678
- if (repairResult.success) {
3679
- const rebaseAfterRepair = config.preparedStartUrl ?? config.url;
3680
- if (rebaseAfterRepair && !areEquivalentPreparedUrls(browser.currentPage.url(), rebaseAfterRepair)) {
3681
- log(`Variant preflight rebasing to ${rebaseAfterRepair} after agent repair.`, 'info', callbacks.onLog);
3682
- await browser.navigateTo(rebaseAfterRepair);
3683
- }
3684
- await dismissOverlaysWithLogging(browser, {
3685
- context: 'variant preflight agent repair',
3686
- onLog: callbacks.onLog,
3687
- });
3688
- detected = await detectVariantStateDeterministic(browser, config.lang, config.theme);
3689
- langActive = detected.lang.active;
3690
- themeActive = detected.theme.active;
3691
- if ((detected.lang.ambiguous || detected.theme.ambiguous) && (config.lang || config.theme)) {
3692
- try {
3693
- const fallback = await classifyVariantStateWithLLMFallback(client, config.model, browser, config.lang, config.theme, config.abortSignal);
3694
- usage.push(buildStepUsageFromSnapshot(normalizeLlmUsageSnapshot(fallback.usage), {
3695
- stepNumber: usageStepNumber++,
3696
- stepType: 'video_variant_classification',
3697
- modelRequested: config.model,
3698
- imagesInPrompt: 1,
3699
- }));
3700
- if (detected.lang.ambiguous && typeof fallback.langActive === 'boolean') {
3701
- langActive = fallback.langActive;
3702
- }
3703
- if (detected.theme.ambiguous && typeof fallback.themeActive === 'boolean') {
3704
- themeActive = fallback.themeActive;
3705
- }
3706
- }
3707
- catch {
3708
- // Keep deterministic result after repair.
3709
- }
3710
- }
3711
- ok = langActive && themeActive;
3712
- }
3713
- }
3714
- catch (err) {
3715
- if (isAbortError(err)) {
3716
- throw err;
3717
- }
3718
- log(`Variant preflight agent repair failed: ${err.message}`, 'error', callbacks.onLog);
3719
- }
3720
- }
3721
- if (!ok) {
3722
- const reasons = [];
3723
- if (!langActive && config.lang) {
3724
- reasons.push(`requested lang=${config.lang}, detected=${detected.lang.detected ?? 'unknown'}`);
3725
- }
3726
- if (!themeActive && config.theme) {
3727
- reasons.push(`requested theme=${config.theme}, detected=${detected.theme.detected ?? 'unknown'}`);
3728
- }
3729
- const failed = {
3730
- ok: false,
3731
- code: 'PREFLIGHT_VARIANT_NOT_ACTIVE',
3732
- reason: reasons.join(' | ') || 'Variant state not active',
3733
- detectedLang: detected.lang.detected,
3734
- detectedTheme: detected.theme.detected,
3735
- usage,
3736
- };
3737
- callbacks.onPreflightResult?.(failed);
3738
- callbacks.onVariantPhase?.('failed');
3739
- return failed;
3740
- }
3741
- const success = {
3742
- ok: true,
3743
- detectedLang: detected.lang.detected,
3744
- detectedTheme: detected.theme.detected,
3745
- finalUrl: browser.currentPage.url(),
3746
- storageState: await browser.exportStorageState().catch(() => undefined),
3747
- sessionStorage: await browser.exportSessionStorage().catch(() => undefined),
3748
- pageSignals: detected.pageSignals,
3749
- observationSummary: await captureVideoObservationSummary(browser, {
3750
- maxAccessibilityChars: 3500,
3751
- maxElements: 18,
3752
- maxVisibleTextChars: 500,
3753
- }).catch(() => undefined),
3754
- observationSnapshot: buildVideoObservationSnapshot({
3755
- coherenceKey: config.preparedCoherenceKey,
3756
- interactiveElements: await browser.getInteractiveElements({ timeoutMs: 3000 }).catch(() => []),
3757
- pageSignals: detected.pageSignals,
3758
- pageIdentity: config.preparedObservationSnapshot?.pageIdentity ?? null,
3759
- }),
3760
- usage,
3761
- };
3762
- callbacks.onPreflightResult?.(success);
3763
- return success;
3764
- }
3765
- catch (err) {
3766
- if (isAbortError(err)) {
3767
- throw err;
3768
- }
3769
- const failed = {
3770
- ok: false,
3771
- code: 'PREFLIGHT_VARIANT_NOT_ACTIVE',
3772
- reason: `Preflight execution failed: ${err.message}`,
3773
- usage,
3774
- };
3775
- callbacks.onPreflightResult?.(failed);
3776
- callbacks.onVariantPhase?.('failed');
3777
- return failed;
3778
- }
3779
- finally {
3780
- detachAbort();
3781
- await browser.close().catch(() => {
3782
- // Browser may already be closed by cancellation.
3783
- });
3784
- }
3785
- }
3786
- async function runDryRun(plan, config, client, callbacks, videoScript) {
3787
- log('Starting dry-run verification...', 'info', callbacks.onLog);
3788
- throwIfAborted(config.abortSignal, 'Video dry-run cancelled.');
3789
- const browser = await Browser.fromPool({
3790
- headed: false,
3791
- viewport: config.viewport,
3792
- deviceScaleFactor: normalizeOutputScale(config.outputScale),
3793
- lang: config.lang,
3794
- colorScheme: config.theme,
3795
- storageState: config.preparedStorageState,
3796
- });
3797
- const detachAbort = attachAbortToBrowser(config.abortSignal, browser);
3798
- const verificationUsage = [];
3799
- const executionSteps = deriveExecutionSteps(plan, config);
3800
- const preservedPrepareSteps = plan.steps.filter(isPrepareOnlyStep);
3801
- // Collect the final (possibly patched) steps for the recording phase
3802
- const fixedPlanSteps = [];
3803
- const memoryUpdates = [];
3804
- let abortReason;
3805
- let failure;
3806
- let currentMousePos = { x: 0, y: 0 };
3807
- let stepNumber = 1;
3808
- try {
3809
- throwIfAborted(config.abortSignal, 'Video dry-run cancelled.');
3810
- if (config.theme) {
3811
- await browser.setColorScheme(config.theme);
3812
- }
3813
- if (config.lang) {
3814
- await browser.setLanguage(config.lang);
3815
- }
3816
- await prepareVideoSessionStorage(browser, config.preparedSessionStorage);
3817
- if (config.preparedStartUrl) {
3818
- await browser.navigateTo(config.preparedStartUrl);
3819
- }
3820
- else if (config.credentials?.loginUrl) {
3821
- // Pre-login if credentials provided
3822
- await browser.navigateTo(config.credentials.loginUrl);
3823
- }
3824
- else if (executionSteps[0]?.type !== 'navigate') {
3825
- await browser.navigateTo(plan.startUrl);
3826
- }
3827
- await replayPreparedActions(browser, config, callbacks);
3828
- try {
3829
- const preparedVariant = await runDryRunSubphase({
3830
- config,
3831
- callbacks,
3832
- stepIndex: 0,
3833
- subphase: 'prepared_variant',
3834
- minBudgetMs: 5_000,
3835
- run: async (_timeoutMs, signal) => confirmPreparedVariantState({
3836
- browser,
3837
- client,
3838
- model: config.model,
3839
- config,
3840
- callbacks,
3841
- context: 'dry_run',
3842
- abortSignal: signal,
3843
- }),
3844
- });
3845
- verificationUsage.push(...preparedVariant.usage);
3846
- if (!preparedVariant.ok) {
3847
- failure = {
3848
- stepIndex: 0,
3849
- subphase: 'prepared_variant',
3850
- reason: preparedVariant.reason,
3851
- resolvedTargetSummary: 'prepared variant state',
3852
- };
3853
- abortReason = preparedVariant.reason;
3854
- log(`Aborting dry-run: ${abortReason}`, 'error', callbacks.onLog);
3855
- return {
3856
- success: false,
3857
- patchedPlan: { ...plan, steps: [...preservedPrepareSteps, ...fixedPlanSteps] },
3858
- verificationUsage,
3859
- memoryUpdates,
3860
- abortReason,
3861
- failure,
3862
- };
3863
- }
3864
- }
3865
- catch (error) {
3866
- const reason = getAbortAwareErrorMessage(config.abortSignal, error, 'Video dry-run cancelled.');
3867
- failure = {
3868
- stepIndex: 0,
3869
- subphase: 'prepared_variant',
3870
- reason,
3871
- resolvedTargetSummary: 'prepared variant state',
3872
- };
3873
- abortReason = reason;
3874
- log(`Aborting dry-run: ${abortReason}`, 'error', callbacks.onLog);
3875
- return {
3876
- success: false,
3877
- patchedPlan: { ...plan, steps: [...preservedPrepareSteps, ...fixedPlanSteps] },
3878
- verificationUsage,
3879
- memoryUpdates,
3880
- abortReason,
3881
- failure,
3882
- };
3883
- }
3884
- for (let i = 0; i < executionSteps.length; i++) {
3885
- throwIfAborted(config.abortSignal, 'Video dry-run cancelled.');
3886
- const originalStep = executionSteps[i];
3887
- callbacks.onStep?.(i, executionSteps.length, originalStep.description, 'dry_run');
3888
- log(`[Dry-run ${i + 1}/${executionSteps.length}] ${originalStep.description} (target=${summarizeStepTarget(originalStep) || 'none'})`, 'info', callbacks.onLog);
3889
- const isAssertStep = originalStep.type === 'assert_url'
3890
- || originalStep.type === 'assert_text'
3891
- || originalStep.type === 'assert_element'
3892
- || originalStep.type === 'assert_page';
3893
- if (isAssertStep) {
3894
- const preAssertVerification = await verifyVideoStepDeterministically(browser, originalStep);
3895
- if (preAssertVerification?.ok) {
3896
- callbacks.onStepResult?.(i, true, preAssertVerification.reason);
3897
- log(`Step ${i + 1} deterministically verified: ${preAssertVerification.reason ?? 'ok'}`, 'info', callbacks.onLog);
3898
- continue;
3899
- }
3900
- }
3901
- const preStepCheckpoint = await captureVideoStepCheckpoint(browser.currentPage);
3902
- const markFailure = (subphase, reason, step = originalStep) => {
3903
- failure = {
3904
- stepIndex: i,
3905
- subphase,
3906
- reason,
3907
- resolvedTargetSummary: summarizeStepTarget(step),
3908
- };
3909
- };
3910
- const stepSignature = buildStepSignature(originalStep);
3911
- const selectorMemory = config.selectorMemory?.[stepSignature] ?? [];
3912
- let deterministicExec;
3913
- try {
3914
- deterministicExec = await runDryRunSubphase({
3915
- config,
3916
- callbacks,
3917
- stepIndex: i,
3918
- subphase: 'deterministic_exec',
3919
- run: async () => executeStepWithDeterministicRecovery(browser, originalStep, {
3920
- mode: 'dry_run',
3921
- stepIndex: i,
3922
- currentMousePos,
3923
- selectorFallbacks: selectorMemory,
3924
- config,
3925
- callbacks,
3926
- }),
3927
- });
3928
- }
3929
- catch (error) {
3930
- const reason = getAbortAwareErrorMessage(config.abortSignal, error, 'Video dry-run cancelled.');
3931
- markFailure('deterministic_exec', reason);
3932
- abortReason = `Unresolved dry-run step ${i + 1}: ${reason}`;
3933
- callbacks.onStepResult?.(i, false, reason);
3934
- log(`Aborting dry-run: ${abortReason}`, 'error', callbacks.onLog);
3935
- break;
3936
- }
3937
- currentMousePos = deterministicExec.newMousePos;
3938
- let activeStep = deterministicExec.step;
3939
- if (deterministicExec.success
3940
- && activeStep.selector
3941
- && activeStep.selector !== originalStep.selector
3942
- && !containsInternalAutomationSelector(activeStep.selector)) {
3943
- memoryUpdates.push({
3944
- stepSignature,
3945
- selector: activeStep.selector,
3946
- target: activeStep.target,
3947
- source: 'deterministic',
3948
- success: true,
3949
- });
3950
- }
3951
- let screenshot = null;
3952
- let observationSummary;
3953
- let verificationArtifacts = null;
3954
- let verification;
3955
- if (!deterministicExec.success) {
3956
- verification = {
3957
- ok: false,
3958
- reason: deterministicExec.reason ?? 'Step execution failed',
3959
- usage: {},
3960
- };
3961
- }
3962
- else if (isAssertStep) {
3963
- verification = { ok: true, usage: {} };
3964
- }
3965
- else {
3966
- // For clip mode, skip expensive LLM verification on intermediate steps.
3967
- // Clips are short (2-4 steps) and often involve stateful UI flows (open menu →
3968
- // click item → select option). The 8-12s LLM verification pause between steps
3969
- // causes popovers/dropdowns to close, breaking the subsequent step.
3970
- // Only the final step gets full LLM verification; intermediate steps use
3971
- // deterministic checks or auto-accept when the execution succeeded.
3972
- const isClipMode = config.mode === 'clip';
3973
- const isLastStep = i >= executionSteps.length - 1;
3974
- const skipLlmVerification = isClipMode && !isLastStep && deterministicExec.success;
3975
- const deterministicVerification = await verifyVideoStepDeterministically(browser, activeStep);
3976
- if (deterministicVerification?.ok) {
3977
- verification = deterministicVerification;
3978
- log(`Step ${i + 1} deterministically verified: ${deterministicVerification.reason ?? 'ok'}`, 'info', callbacks.onLog);
3979
- }
3980
- else if (skipLlmVerification) {
3981
- // Clip intermediate step: execution succeeded, accept without LLM to avoid
3982
- // the 8-12s pause that would close popovers/dropdowns opened by this step.
3983
- verification = { ok: true, usage: {} };
3984
- log(`Step ${i + 1} auto-accepted (clip intermediate step — execution succeeded, skipping LLM verification to preserve UI state).`, 'info', callbacks.onLog);
3985
- }
3986
- else {
3987
- try {
3988
- verificationArtifacts = await runDryRunSubphase({
3989
- config,
3990
- callbacks,
3991
- stepIndex: i,
3992
- subphase: 'verification_snapshot',
3993
- run: async (timeoutMs) => captureVideoVerificationArtifacts(browser, callbacks, timeoutMs),
3994
- });
3995
- }
3996
- catch (error) {
3997
- const reason = getAbortAwareErrorMessage(config.abortSignal, error, 'Video dry-run cancelled.');
3998
- markFailure('verification_snapshot', reason, activeStep);
3999
- verification = {
4000
- ok: false,
4001
- giveUp: true,
4002
- reason,
4003
- usage: {},
4004
- };
4005
- callbacks.onStepResult?.(i, verification.ok, verification.reason);
4006
- abortReason = verification.reason;
4007
- log(`Aborting dry-run: ${abortReason}`, 'error', callbacks.onLog);
4008
- break;
4009
- }
4010
- if (!verificationArtifacts) {
4011
- const reason = 'Verification artifacts were not captured.';
4012
- markFailure('verification_snapshot', reason, activeStep);
4013
- verification = {
4014
- ok: false,
4015
- giveUp: true,
4016
- reason,
4017
- usage: {},
4018
- };
4019
- callbacks.onStepResult?.(i, verification.ok, verification.reason);
4020
- abortReason = verification.reason;
4021
- log(`Aborting dry-run: ${abortReason}`, 'error', callbacks.onLog);
4022
- break;
4023
- }
4024
- const verifiedArtifacts = verificationArtifacts;
4025
- const verifiedScreenshot = verifiedArtifacts.bundle.screenshot;
4026
- const verifiedPageContext = verifiedArtifacts.pageContext;
4027
- const verifiedObservationSnapshot = verifiedArtifacts.observationSnapshot;
4028
- screenshot = verifiedScreenshot;
4029
- observationSummary = verifiedArtifacts.observationSummary;
4030
- try {
4031
- verification = await runDryRunSubphase({
4032
- config,
4033
- callbacks,
4034
- stepIndex: i,
4035
- subphase: 'verification_llm',
4036
- run: async (_timeoutMs, signal) => verifyStep(client, config.model, activeStep, verifiedScreenshot, i, executionSteps.length, videoScript, verifiedPageContext, observationSummary, verifiedObservationSnapshot, signal),
4037
- });
4038
- }
4039
- catch (error) {
4040
- const reason = getAbortAwareErrorMessage(config.abortSignal, error, 'Video dry-run cancelled.');
4041
- markFailure('verification_llm', reason, activeStep);
4042
- verification = {
4043
- ok: false,
4044
- giveUp: true,
4045
- reason,
4046
- usage: {},
4047
- };
4048
- callbacks.onStepResult?.(i, verification.ok, verification.reason);
4049
- abortReason = verification.reason;
4050
- log(`Aborting dry-run: ${abortReason}`, 'error', callbacks.onLog);
4051
- break;
4052
- }
4053
- verificationUsage.push({
4054
- stepNumber: stepNumber++,
4055
- stepType: 'video_step_verification',
4056
- generationId: verification.usage.generationId ?? null,
4057
- modelRequested: config.model,
4058
- modelUsed: verification.usage.modelUsed ?? null,
4059
- promptTokens: verification.usage.promptTokens ?? null,
4060
- completionTokens: verification.usage.completionTokens ?? null,
4061
- totalTokens: verification.usage.totalTokens ?? null,
4062
- cacheReadTokens: verification.usage.cacheReadTokens ?? null,
4063
- cacheWriteTokens: verification.usage.cacheWriteTokens ?? null,
4064
- imagesInPrompt: 1,
4065
- });
4066
- }
4067
- }
4068
- callbacks.onStepResult?.(i, verification.ok, verification.reason);
4069
- if (verification.giveUp) {
4070
- abortReason = verification.reason;
4071
- log(`Aborting dry-run: ${abortReason}`, 'error', callbacks.onLog);
4072
- break;
4073
- }
4074
- if (verification.ok) {
4075
- log(`Step ${i + 1} verified OK`, 'success', callbacks.onLog);
4076
- fixedPlanSteps.push(activeStep);
4077
- continue;
4078
- }
4079
- log(`Step ${i + 1} failed: ${verification.reason}. Generating fix...`, 'error', callbacks.onLog);
4080
- const failedSelector = deterministicExec.traces[deterministicExec.traces.length - 1]?.selector ?? activeStep.selector;
4081
- if (failedSelector && !containsInternalAutomationSelector(failedSelector)) {
4082
- memoryUpdates.push({
4083
- stepSignature,
4084
- selector: failedSelector,
4085
- target: activeStep.target,
4086
- source: 'deterministic',
4087
- success: false,
4088
- });
4089
- }
4090
- try {
4091
- await runDryRunSubphase({
4092
- config,
4093
- callbacks,
4094
- stepIndex: i,
4095
- subphase: 'checkpoint_restore',
4096
- run: async () => restoreVideoStepCheckpoint(browser, preStepCheckpoint),
4097
- });
4098
- }
4099
- catch (error) {
4100
- const reason = getAbortAwareErrorMessage(config.abortSignal, error, 'Video dry-run cancelled.');
4101
- markFailure('checkpoint_restore', reason, activeStep);
4102
- abortReason = `Unresolved dry-run step ${i + 1}: ${reason}`;
4103
- log(`Aborting dry-run: ${abortReason}`, 'error', callbacks.onLog);
4104
- break;
4105
- }
4106
- try {
4107
- const repairLaneResult = await runDryRunSubphase({
4108
- config,
4109
- callbacks,
4110
- stepIndex: i,
4111
- subphase: 'repair_lane',
4112
- run: async (_timeoutMs, signal) => runScreenshotRepairLane({
4113
- browser,
4114
- config,
4115
- callbacks,
4116
- originalStep: activeStep,
4117
- nextStep: executionSteps[i + 1],
4118
- abortSignal: signal,
4119
- }),
4120
- });
4121
- if (repairLaneResult?.steps.length) {
4122
- fixedPlanSteps.push(...repairLaneResult.steps);
4123
- const repairedTarget = repairLaneResult.steps[0]?.target;
4124
- const repairedSelector = repairLaneResult.steps[0]?.selector;
4125
- if (repairedSelector && !containsInternalAutomationSelector(repairedSelector)) {
4126
- memoryUpdates.push({
4127
- stepSignature,
4128
- selector: repairedSelector,
4129
- target: repairedTarget,
4130
- source: 'manual',
4131
- success: true,
4132
- });
4133
- }
4134
- callbacks.onStepResult?.(i, true, 'Repaired via screenshot agent');
4135
- log(`Repair lane successful — step ${i + 1} replaced with ${repairLaneResult.steps.length} step(s)`, 'success', callbacks.onLog);
4136
- continue;
4137
- }
4138
- }
4139
- catch (error) {
4140
- markFailure('repair_lane', getAbortAwareErrorMessage(config.abortSignal, error, 'Video dry-run cancelled.'), activeStep);
4141
- }
4142
- if (!verificationArtifacts) {
4143
- try {
4144
- verificationArtifacts = await runDryRunSubphase({
4145
- config,
4146
- callbacks,
4147
- stepIndex: i,
4148
- subphase: 'verification_snapshot',
4149
- run: async (timeoutMs) => captureVideoVerificationArtifacts(browser, callbacks, timeoutMs),
4150
- });
4151
- }
4152
- catch (error) {
4153
- const reason = getAbortAwareErrorMessage(config.abortSignal, error, 'Video dry-run cancelled.');
4154
- markFailure('verification_snapshot', reason, activeStep);
4155
- abortReason = `Unresolved dry-run step ${i + 1}: ${reason}`;
4156
- log(`Could not capture a stable verification snapshot for step ${i + 1}. Aborting dry-run...`, 'error', callbacks.onLog);
4157
- break;
4158
- }
4159
- }
4160
- screenshot = verificationArtifacts.bundle.screenshot;
4161
- observationSummary = verificationArtifacts.observationSummary;
4162
- // Ask LLM to produce a replacement step sequence only after deterministic recovery failed
4163
- let fixResult;
4164
- try {
4165
- fixResult = await runDryRunSubphase({
4166
- config,
4167
- callbacks,
4168
- stepIndex: i,
4169
- subphase: 'llm_fixer',
4170
- run: async (_timeoutMs, signal) => fixStep(client, config.model, activeStep, verification.reason ?? 'Step failed', verification.suggestion ?? '', screenshot, videoScript, observationSummary, verificationArtifacts.observationSnapshot, signal),
4171
- });
4172
- }
4173
- catch (error) {
4174
- const reason = getAbortAwareErrorMessage(config.abortSignal, error, 'Video dry-run cancelled.');
4175
- markFailure('llm_fixer', reason, activeStep);
4176
- abortReason = `Unresolved dry-run step ${i + 1}: ${reason}`;
4177
- log(`Could not generate a valid fix for step ${i + 1}. Aborting dry-run...`, 'error', callbacks.onLog);
4178
- break;
4179
- }
4180
- // Track fix LLM call cost
4181
- if (fixResult.usage.generationId || fixResult.usage.totalTokens) {
4182
- verificationUsage.push({
4183
- stepNumber: stepNumber++,
4184
- stepType: 'video_step_fix',
4185
- generationId: fixResult.usage.generationId ?? null,
4186
- modelRequested: config.model,
4187
- modelUsed: fixResult.usage.modelUsed ?? null,
4188
- promptTokens: fixResult.usage.promptTokens ?? null,
4189
- completionTokens: fixResult.usage.completionTokens ?? null,
4190
- totalTokens: fixResult.usage.totalTokens ?? null,
4191
- cacheReadTokens: fixResult.usage.cacheReadTokens ?? null,
4192
- cacheWriteTokens: fixResult.usage.cacheWriteTokens ?? null,
4193
- imagesInPrompt: 1,
4194
- });
4195
- }
4196
- if (fixResult.steps.length === 0) {
4197
- markFailure('llm_fixer', verification.reason ?? 'no valid fix generated', activeStep);
4198
- abortReason = `Unresolved dry-run step ${i + 1}: ${verification.reason ?? 'no valid fix generated'}`;
4199
- log(`Could not generate a valid fix for step ${i + 1}. Aborting dry-run...`, 'error', callbacks.onLog);
4200
- break;
4201
- }
4202
- log(`Fix proposed (${fixResult.steps.length} step(s)) — rolling back and retrying...`, 'ai', callbacks.onLog);
4203
- try {
4204
- await runDryRunSubphase({
4205
- config,
4206
- callbacks,
4207
- stepIndex: i,
4208
- subphase: 'checkpoint_restore',
4209
- run: async () => restoreVideoStepCheckpoint(browser, preStepCheckpoint),
4210
- });
4211
- }
4212
- catch (error) {
4213
- const abortMessage = getAbortAwareErrorMessage(config.abortSignal, error, 'Video dry-run cancelled.');
4214
- markFailure('checkpoint_restore', abortMessage, activeStep);
4215
- if (config.abortSignal?.aborted) {
4216
- abortReason = abortMessage;
4217
- log(`Aborting dry-run: ${abortReason}`, 'error', callbacks.onLog);
4218
- break;
4219
- }
4220
- }
4221
- const executedFixSteps = [];
4222
- let fixExecutionFailed = false;
4223
- let fixExecutionReason;
4224
- for (let fixIndex = 0; fixIndex < fixResult.steps.length; fixIndex++) {
4225
- const candidate = fixResult.steps[fixIndex];
4226
- const fixSignature = buildStepSignature(candidate);
4227
- const fixMemorySelectors = config.selectorMemory?.[fixSignature] ?? [];
4228
- let fixExec;
4229
- try {
4230
- fixExec = await runDryRunSubphase({
4231
- config,
4232
- callbacks,
4233
- stepIndex: i,
4234
- subphase: 'deterministic_exec',
4235
- run: async () => executeStepWithDeterministicRecovery(browser, candidate, {
4236
- mode: 'dry_run',
4237
- stepIndex: i,
4238
- currentMousePos,
4239
- selectorFallbacks: fixMemorySelectors,
4240
- config,
4241
- callbacks,
4242
- }),
4243
- });
4244
- }
4245
- catch (error) {
4246
- fixExecutionFailed = true;
4247
- fixExecutionReason = getAbortAwareErrorMessage(config.abortSignal, error, 'Video dry-run cancelled.');
4248
- break;
4249
- }
4250
- currentMousePos = fixExec.newMousePos;
4251
- executedFixSteps.push(fixExec.step);
4252
- if (!fixExec.success) {
4253
- fixExecutionFailed = true;
4254
- fixExecutionReason = fixExec.reason ?? 'Fix step execution failed';
4255
- break;
4256
- }
4257
- }
4258
- let fixVerificationOk = !fixExecutionFailed;
4259
- if (!fixExecutionFailed) {
4260
- if (isAssertStep) {
4261
- throwIfAborted(config.abortSignal, 'Video dry-run cancelled.');
4262
- const deterministicAssertVerification = await verifyVideoStepDeterministically(browser, activeStep);
4263
- if (deterministicAssertVerification?.ok) {
4264
- fixVerificationOk = true;
4265
- log(`Step ${i + 1} fix deterministically verified: ${deterministicAssertVerification.reason ?? 'ok'}`, 'info', callbacks.onLog);
4266
- }
4267
- else {
4268
- const assertVerification = await executeStepWithDeterministicRecovery(browser, activeStep, {
4269
- mode: 'dry_run',
4270
- stepIndex: i,
4271
- currentMousePos,
4272
- selectorFallbacks: selectorMemory,
4273
- config,
4274
- callbacks,
4275
- });
4276
- currentMousePos = assertVerification.newMousePos;
4277
- fixVerificationOk = assertVerification.success;
4278
- if (!assertVerification.success) {
4279
- fixExecutionReason = assertVerification.reason ?? 'Assertion still failing after fix';
4280
- }
4281
- }
4282
- }
4283
- else {
4284
- const deterministicFixVerification = await verifyVideoStepDeterministically(browser, originalStep);
4285
- if (deterministicFixVerification?.ok) {
4286
- fixVerificationOk = true;
4287
- log(`Step ${i + 1} fix deterministically verified: ${deterministicFixVerification.reason ?? 'ok'}`, 'info', callbacks.onLog);
4288
- }
4289
- else {
4290
- try {
4291
- const fixArtifacts = await runDryRunSubphase({
4292
- config,
4293
- callbacks,
4294
- stepIndex: i,
4295
- subphase: 'verification_snapshot',
4296
- run: async (timeoutMs) => captureVideoVerificationArtifacts(browser, callbacks, timeoutMs),
4297
- });
4298
- const fixVerification = await runDryRunSubphase({
4299
- config,
4300
- callbacks,
4301
- stepIndex: i,
4302
- subphase: 'verification_llm',
4303
- run: async (_timeoutMs, signal) => verifyStep(client, config.model, originalStep, fixArtifacts.bundle.screenshot, i, executionSteps.length, videoScript, fixArtifacts.pageContext, fixArtifacts.observationSummary, fixArtifacts.observationSnapshot, signal),
4304
- });
4305
- verificationUsage.push({
4306
- stepNumber: stepNumber++,
4307
- stepType: 'video_step_verification',
4308
- generationId: fixVerification.usage.generationId ?? null,
4309
- modelRequested: config.model,
4310
- modelUsed: fixVerification.usage.modelUsed ?? null,
4311
- promptTokens: fixVerification.usage.promptTokens ?? null,
4312
- completionTokens: fixVerification.usage.completionTokens ?? null,
4313
- totalTokens: fixVerification.usage.totalTokens ?? null,
4314
- cacheReadTokens: fixVerification.usage.cacheReadTokens ?? null,
4315
- cacheWriteTokens: fixVerification.usage.cacheWriteTokens ?? null,
4316
- imagesInPrompt: 1,
4317
- });
4318
- fixVerificationOk = fixVerification.ok;
4319
- if (!fixVerification.ok) {
4320
- fixExecutionReason = fixVerification.reason ?? 'Fix verification failed';
4321
- }
4322
- }
4323
- catch (error) {
4324
- fixVerificationOk = false;
4325
- fixExecutionReason = getAbortAwareErrorMessage(config.abortSignal, error, 'Video dry-run cancelled.');
4326
- markFailure('verification_llm', fixExecutionReason, originalStep);
4327
- }
4328
- }
4329
- }
4330
- }
4331
- if (fixVerificationOk) {
4332
- fixedPlanSteps.push(...executedFixSteps);
4333
- const fixedSelector = executedFixSteps[0]?.selector;
4334
- if (fixedSelector && !containsInternalAutomationSelector(fixedSelector)) {
4335
- memoryUpdates.push({
4336
- stepSignature,
4337
- selector: fixedSelector,
4338
- target: executedFixSteps[0]?.target,
4339
- source: 'llm_fix',
4340
- success: true,
4341
- });
4342
- }
4343
- log(`Fix successful — step ${i + 1} replaced with ${executedFixSteps.length} step(s) in plan`, 'success', callbacks.onLog);
4344
- callbacks.onStepResult?.(i, true, 'Fixed and verified');
4345
- }
4346
- else {
4347
- if (executedFixSteps[0]?.selector && !containsInternalAutomationSelector(executedFixSteps[0].selector)) {
4348
- memoryUpdates.push({
4349
- stepSignature,
4350
- selector: executedFixSteps[0].selector,
4351
- target: executedFixSteps[0].target,
4352
- source: 'llm_fix',
4353
- success: false,
4354
- });
4355
- }
4356
- markFailure('llm_fixer', fixExecutionReason ?? verification.reason ?? 'fix failed', executedFixSteps[0] ?? activeStep);
4357
- log(`Fix also failed for step ${i + 1}. Aborting dry-run... ${fixExecutionReason ?? ''}`, 'error', callbacks.onLog);
4358
- abortReason = `Unresolved dry-run step ${i + 1}: ${fixExecutionReason ?? verification.reason ?? 'fix failed'}`;
4359
- try {
4360
- await restoreVideoStepCheckpoint(browser, preStepCheckpoint);
4361
- }
4362
- catch {
4363
- // Keep the original dry-run failure reason.
4364
- }
4365
- break;
4366
- }
4367
- }
4368
- // Apply the fixed plan so the recording phase uses corrected steps
4369
- plan.steps = preservedPrepareSteps.length > 0
4370
- ? [...preservedPrepareSteps, ...fixedPlanSteps]
4371
- : fixedPlanSteps;
4372
- }
4373
- catch (error) {
4374
- const abortMessage = getAbortAwareErrorMessage(config.abortSignal, error, 'Video dry-run cancelled.');
4375
- if (config.abortSignal?.aborted || isAbortError(error)) {
4376
- abortReason = abortMessage;
4377
- log(`Aborting dry-run: ${abortReason}`, 'error', callbacks.onLog);
4378
- }
4379
- else {
4380
- throw error;
4381
- }
4382
- }
4383
- finally {
4384
- detachAbort();
4385
- await browser.close().catch(() => {
4386
- // Browser may already be closed by cancellation.
4387
- });
4388
- }
4389
- return {
4390
- success: !abortReason,
4391
- abortReason,
4392
- failure,
4393
- verificationUsage,
4394
- patchedPlan: {
4395
- ...plan,
4396
- steps: preservedPrepareSteps.length > 0
4397
- ? [...preservedPrepareSteps, ...fixedPlanSteps]
4398
- : fixedPlanSteps,
4399
- },
4400
- memoryUpdates,
4401
- };
4402
- }
4403
- // ── Phase 3: Video recording ──────────────────────────────────────────
4404
- async function runRecording(plan, config, callbacks) {
4405
- log('Starting video recording...', 'info', callbacks.onLog);
4406
- // Create temp directory for the video file
4407
- const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'autokap-video-'));
4408
- const outputPath = path.join(tempDir, 'recording.webm');
4409
- const startTime = Date.now();
4410
- const executionSteps = deriveExecutionSteps(plan, config);
4411
- let stepsExecuted = 0;
4412
- let setupDurationSec = 0;
4413
- let videoPath = null;
4414
- const usage = [];
4415
- let browser = null;
4416
- let detachAbort = () => { };
4417
- // We need a reference to the Video object before closing the context,
4418
- // because Playwright only finalizes the WebM file after the context is closed.
4419
- // The correct order is: capture ref → close context → saveAs().
4420
- let videoRef = null;
4421
- try {
4422
- throwIfAborted(config.abortSignal, 'Video recording cancelled.');
4423
- const client = createClient(config.apiKey);
4424
- browser = await Browser.forVideoRecording({
4425
- headed: false,
4426
- viewport: config.viewport,
4427
- deviceScaleFactor: normalizeOutputScale(config.outputScale),
4428
- lang: config.lang,
4429
- colorScheme: config.theme,
4430
- storageState: config.preparedStorageState,
4431
- }, tempDir, buildCursorOverlayScript(config.videoOptions?.cursorTheme ?? 'minimal'));
4432
- detachAbort = attachAbortToBrowser(config.abortSignal, browser);
4433
- if (config.theme) {
4434
- await browser.setColorScheme(config.theme);
4435
- }
4436
- await prepareVideoSessionStorage(browser, config.preparedSessionStorage);
4437
- // Pre-navigate only when step 1 won't navigate to the same URL (avoids visible reload flicker).
4438
- const normalizeForDedup = (u) => u.toLowerCase().replace(/^https?:\/\//, '').replace(/^www\./, '').replace(/\/+$/, '').replace(/[?#].*$/, '').trim();
4439
- const firstStepUrl = executionSteps[0]?.type === 'navigate' ? executionSteps[0].url : null;
4440
- const preNavUrl = config.preparedStartUrl
4441
- ?? config.credentials?.loginUrl
4442
- ?? (executionSteps[0]?.type !== 'navigate' ? plan.startUrl : null);
4443
- if (preNavUrl && (!firstStepUrl || normalizeForDedup(preNavUrl) !== normalizeForDedup(firstStepUrl))) {
4444
- await browser.navigateTo(preNavUrl);
4445
- }
4446
- // When storageState restored auth and we already navigated to preparedStartUrl,
4447
- // replaying intermediate navigation actions is redundant and adds visible setup time.
4448
- if (!config.preparedStorageState || !config.preparedStartUrl) {
4449
- await replayPreparedActions(browser, config, callbacks);
4450
- }
4451
- // Skip variant confirmation in recording when the dry-run already confirmed it.
4452
- // This saves 10-30s of deterministic checks + potential LLM fallback.
4453
- if (!config.dryRunVariantConfirmed) {
4454
- const preparedVariant = await confirmPreparedVariantState({
4455
- browser,
4456
- client,
4457
- model: config.model,
4458
- config,
4459
- callbacks,
4460
- context: 'recording',
4461
- });
4462
- usage.push(...preparedVariant.usage);
4463
- if (!preparedVariant.ok) {
4464
- throw new Error(preparedVariant.reason);
4465
- }
4466
- }
4467
- // Wait for the page to be fully loaded before recording any steps.
4468
- // The video is already recording (Playwright limitation), so this "setup"
4469
- // time is trimmed away by the post-processing trimStart.
4470
- await browser.currentPage.waitForLoadState('networkidle', { timeout: 5000 }).catch(() => { });
4471
- await dismissOverlaysWithLogging(browser, {
4472
- context: 'recording setup first dismiss',
4473
- onLog: callbacks.onLog,
4474
- });
4475
- // Brief settle time for late-loading elements (fonts, lazy images)
4476
- await browser.currentPage.waitForTimeout(400);
4477
- // Second dismiss pass — some overlays appear after networkidle (delayed popups)
4478
- await dismissOverlaysWithLogging(browser, {
4479
- context: 'recording setup second dismiss',
4480
- onLog: callbacks.onLog,
4481
- });
4482
- // Randomize cursor starting position within the central area of the viewport
4483
- // so it never appears from the same top-left corner on every recording.
4484
- const vpW = config.viewport.width;
4485
- const vpH = config.viewport.height;
4486
- const startX = Math.floor(vpW * 0.3 + Math.random() * vpW * 0.4);
4487
- const startY = Math.floor(vpH * 0.3 + Math.random() * vpH * 0.4);
4488
- await browser.currentPage.mouse.move(startX, startY);
4489
- let currentMousePos = { x: startX, y: startY };
4490
- // Record how long the setup took — this will be trimmed from the final video
4491
- setupDurationSec = (Date.now() - startTime) / 1000;
4492
- // Filter out assert steps — they are only useful during dry-run verification
4493
- // and add dead time (up to 6s each) with no visual benefit during recording.
4494
- // Also filter out consecutive wait steps that are redundant.
4495
- const recordableSteps = executionSteps.filter((s) => s.type !== 'assert_url' && s.type !== 'assert_text' && s.type !== 'assert_element' && s.type !== 'assert_page');
4496
- let firstNavDone = false;
4497
- for (let i = 0; i < recordableSteps.length; i++) {
4498
- throwIfAborted(config.abortSignal, 'Video recording cancelled.');
4499
- const step = recordableSteps[i];
4500
- callbacks.onStep?.(i, recordableSteps.length, step.description, 'recording');
4501
- log(`[Recording ${i + 1}/${recordableSteps.length}] ${step.description}`, 'info', callbacks.onLog);
4502
- const result = await executePlanStep(browser, step, 'recording', currentMousePos, config.credentials);
4503
- // Auto-dismiss overlays (cookie banners, consent walls) after the first
4504
- // navigation so they don't persist throughout the entire video.
4505
- if (!firstNavDone && step.type === 'navigate') {
4506
- firstNavDone = true;
4507
- await dismissOverlaysWithLogging(browser, {
4508
- context: 'recording post-first-navigation dismiss',
4509
- onLog: callbacks.onLog,
4510
- });
4511
- }
4512
- currentMousePos = result.newMousePos;
4513
- stepsExecuted++;
4514
- if (result.error) {
4515
- callbacks.onStepResult?.(i, false, result.error);
4516
- if (config.mode === 'clip') {
4517
- // Graceful degradation for clips: log and continue with remaining steps.
4518
- // A partial clip (3/4 interactions) is better than no clip at all.
4519
- log(`[Recording] Step ${i + 1} failed (continuing): ${result.error}`, 'error', callbacks.onLog);
4520
- }
4521
- else {
4522
- // For videos, abort — longer plans are more likely to have cascading failures.
4523
- throw new Error(`Recording step ${i + 1} failed: ${result.error}`);
4524
- }
4525
- }
4526
- }
4527
- // Final pause so the last frame is visible
4528
- throwIfAborted(config.abortSignal, 'Video recording cancelled.');
4529
- await browser.currentPage.waitForTimeout(800);
4530
- // Capture video ref before closing anything
4531
- videoRef = browser.currentPage.video() ?? null;
4532
- // Step 1: close only the context — this finalizes the WebM on disk
4533
- // while the browser process (and its IPC channel) remains alive.
4534
- await browser.closeContext();
4535
- // Step 2: saveAs() can now copy the finalized file (IPC channel still open)
4536
- if (videoRef) {
4537
- await videoRef.saveAs(outputPath);
4538
- videoPath = outputPath;
4539
- log('Recording saved successfully', 'success', callbacks.onLog);
4540
- }
4541
- }
4542
- catch (err) {
4543
- if (isAbortError(err)) {
4544
- throw err;
4545
- }
4546
- log(`Recording error: ${err.message}`, 'error', callbacks.onLog);
4547
- }
4548
- finally {
4549
- detachAbort();
4550
- // Step 3: shut down the browser process
4551
- if (browser)
4552
- await browser.close().catch(() => {
4553
- // Browser may already be closed by cancellation.
4554
- });
4555
- }
4556
- const durationMs = Date.now() - startTime;
4557
- return { videoPath, durationMs, stepsExecuted, setupDurationSec, usage };
4558
- }
4559
- // ── Exported phase functions (for multi-viewport orchestration) ───────
4560
- function withStepIdPrefix(steps, idPrefix) {
4561
- return steps.map((step, index) => ({
4562
- ...step,
4563
- id: `${idPrefix}${index + 1}`,
4564
- }));
4565
- }
4566
- function withRecordingIntent(steps, recordingIntent) {
4567
- return steps.map((step) => ({
4568
- ...step,
4569
- recordingIntent,
4570
- }));
4571
- }
4572
- export function composeHybridPlan(basePlan, variantPrefixPlan) {
4573
- const prefixSteps = variantPrefixPlan?.steps ?? [];
4574
- const prefixedVariant = withRecordingIntent(withStepIdPrefix(prefixSteps, 'prefix-step-'), 'prepare_only');
4575
- const prefixedBase = withRecordingIntent(withStepIdPrefix(basePlan.steps, 'base-step-'), 'visible');
4576
- return {
4577
- title: `${basePlan.title}`,
4578
- estimatedDurationSec: basePlan.estimatedDurationSec + (variantPrefixPlan?.estimatedDurationSec ?? 0),
4579
- startUrl: variantPrefixPlan?.startUrl ?? basePlan.startUrl,
4580
- steps: [...prefixedVariant, ...prefixedBase],
4581
- };
4582
- }
4583
- async function collectPlannerObservation(config) {
4584
- if (config.preparedObservationSummary && config.preparedObservationSnapshot && !(config.enablePlannerVision ?? false)) {
4585
- return {
4586
- summary: config.preparedObservationSummary,
4587
- snapshot: config.preparedObservationSnapshot,
4588
- };
4589
- }
4590
- if (config.preparedObservationSummary && !(config.enablePlannerVision ?? false)) {
4591
- return { summary: config.preparedObservationSummary, snapshot: config.preparedObservationSnapshot };
4592
- }
4593
- try {
4594
- const obs = await observePlanningContext({
4595
- url: config.url,
4596
- startUrl: config.preparedStartUrl,
4597
- viewport: config.viewport,
4598
- outputScale: normalizeOutputScale(config.outputScale),
4599
- lang: config.lang,
4600
- theme: config.theme,
4601
- storageState: config.preparedStorageState,
4602
- sessionStorage: config.preparedSessionStorage,
4603
- abortSignal: config.abortSignal,
4604
- captureScreenshot: config.enablePlannerVision ?? false,
4605
- pageIdentity: config.preparedObservationSnapshot?.pageIdentity ?? null,
4606
- });
4607
- return {
4608
- summary: obs.summary ?? config.preparedObservationSummary,
4609
- screenshot: obs.screenshot,
4610
- snapshot: obs.snapshot ?? config.preparedObservationSnapshot,
4611
- };
4612
- }
4613
- catch (err) {
4614
- if (isAbortError(err)) {
4615
- throw err;
4616
- }
4617
- logger.info(`Planning observation skipped: ${err.message}`);
4618
- return config.preparedObservationSummary
4619
- ? { summary: config.preparedObservationSummary, snapshot: config.preparedObservationSnapshot }
4620
- : {};
4621
- }
4622
- }
4623
- export async function createBasePlan(config) {
4624
- const obs = await collectPlannerObservation(config);
4625
- const result = await planFromScript(config.script, config.preparedStartUrl ?? config.url, config.model, config.apiKey, config.maxPlanRetries ?? 2, {
4626
- mode: config.mode === 'clip' ? 'clip' : 'base',
4627
- credentials: config.credentials,
4628
- variant: {
4629
- lang: config.lang,
4630
- theme: config.theme,
4631
- langInstructions: config.langInstructions,
4632
- themeInstructions: config.themeInstructions,
4633
- },
4634
- observationSummary: obs.summary,
4635
- observationSnapshot: obs.snapshot,
4636
- screenshot: obs.screenshot,
4637
- }, config.abortSignal);
4638
- return result;
4639
- }
4640
- export async function createVariantPrefixPlan(config) {
4641
- const obs = await collectPlannerObservation(config);
4642
- const result = await planFromScript(config.script, config.url, config.model, config.apiKey, config.maxPlanRetries ?? 2, {
4643
- mode: 'variant_prefix',
4644
- credentials: config.credentials,
4645
- variant: {
4646
- lang: config.lang,
4647
- theme: config.theme,
4648
- langInstructions: config.langInstructions,
4649
- themeInstructions: config.themeInstructions,
4650
- },
4651
- observationSummary: obs.summary,
4652
- observationSnapshot: obs.snapshot,
4653
- screenshot: obs.screenshot,
4654
- }, config.abortSignal);
4655
- return result;
4656
- }
4657
- export async function verifyAndPatchPlan(plan, config, callbacks = {}) {
4658
- const client = createClient(config.apiKey);
4659
- callbacks.onVariantPhase?.('dry_run');
4660
- callbacks.onPhaseChange?.('dry_run');
4661
- log('Phase 2: Running verification dry-run...', 'info', callbacks.onLog);
4662
- const dryRunResult = await runDryRun({ ...plan, steps: [...plan.steps] }, config, client, callbacks, config.script);
4663
- if (!dryRunResult.success) {
4664
- log(`Dry-run aborted: ${dryRunResult.abortReason}`, 'error', callbacks.onLog);
4665
- callbacks.onPhaseChange?.('failed');
4666
- callbacks.onVariantPhase?.('failed');
4667
- return {
4668
- success: false,
4669
- error: `Dry-run failed: ${dryRunResult.abortReason}`,
4670
- usage: dryRunResult.verificationUsage,
4671
- memoryUpdates: dryRunResult.memoryUpdates,
4672
- failedStepIndex: dryRunResult.failure?.stepIndex,
4673
- failedSubphase: dryRunResult.failure?.subphase,
4674
- };
4675
- }
4676
- log('Dry-run completed successfully', 'success', callbacks.onLog);
4677
- return {
4678
- success: true,
4679
- plan: dryRunResult.patchedPlan,
4680
- usage: dryRunResult.verificationUsage,
4681
- memoryUpdates: dryRunResult.memoryUpdates,
4682
- };
4683
- }
4684
- /**
4685
- * Backward-compatible helper: full planning + verify in one call.
4686
- */
4687
- export async function planAndVerify(config, callbacks = {}) {
4688
- const allUsage = [];
4689
- callbacks.onVariantPhase?.('planning');
4690
- callbacks.onPhaseChange?.('planning');
4691
- log('Phase 1: Analyzing script and generating execution plan...', 'ai', callbacks.onLog);
4692
- let plan;
4693
- try {
4694
- const obs = await collectPlannerObservation(config);
4695
- const planResult = await planFromScript(config.script, config.url, config.model, config.apiKey, config.maxPlanRetries ?? 2, {
4696
- mode: config.mode === 'clip' ? 'clip' : 'full',
4697
- credentials: config.credentials,
4698
- observationSummary: obs.summary,
4699
- screenshot: obs.screenshot,
4700
- }, config.abortSignal);
4701
- plan = planResult.plan;
4702
- allUsage.push(planResult.usage);
4703
- log(`Plan ready: "${plan.title}" — ${plan.steps.length} steps (~${plan.estimatedDurationSec}s)`, 'success', callbacks.onLog);
4704
- }
4705
- catch (err) {
4706
- if (isAbortError(err)) {
4707
- throw err;
4708
- }
4709
- const error = err.message;
4710
- log(`Planning failed: ${error}`, 'error', callbacks.onLog);
4711
- callbacks.onPhaseChange?.('failed');
4712
- callbacks.onVariantPhase?.('failed');
4713
- return { success: false, error: `Planning failed: ${error}`, usage: allUsage, memoryUpdates: [] };
4714
- }
4715
- const verified = await verifyAndPatchPlan(plan, config, callbacks);
4716
- allUsage.push(...verified.usage);
4717
- if (!verified.success) {
4718
- return {
4719
- success: false,
4720
- error: verified.error,
4721
- usage: allUsage,
4722
- memoryUpdates: verified.memoryUpdates,
4723
- };
4724
- }
4725
- return {
4726
- success: true,
4727
- plan: verified.plan,
4728
- usage: allUsage,
4729
- memoryUpdates: verified.memoryUpdates,
4730
- };
4731
- }
4732
- /**
4733
- * Run phase 3 (recording only) using a pre-verified plan.
4734
- * Call after `planAndVerify()` to record across multiple lang/theme combinations.
4735
- */
4736
- export async function recordPlan(plan, config, callbacks = {}) {
4737
- callbacks.onPhaseChange?.('recording');
4738
- callbacks.onVariantPhase?.('recording');
4739
- log('Phase 3: Recording video with animated mouse...', 'info', callbacks.onLog);
4740
- return runRecording(plan, config, callbacks);
4741
- }
4742
- // ── Main entry point ──────────────────────────────────────────────────
4743
- /**
4744
- * Run the full 3-phase video capture pipeline:
4745
- * 1. Planning: script → structured plan (LLM)
4746
- * 2. Dry-run: verify all steps work
4747
- * 3. Recording: execute with Bezier mouse + cursor overlay
4748
- */
4749
- export async function runVideoAgent(config, callbacks = {}) {
4750
- const allUsage = [];
4751
- const planResult = await planAndVerify(config, callbacks);
4752
- allUsage.push(...planResult.usage);
4753
- if (!planResult.success) {
4754
- callbacks.onPhaseChange?.('failed');
4755
- return {
4756
- success: false,
4757
- plan: null,
4758
- videoPath: null,
4759
- thumbnailBuffer: null,
4760
- durationMs: 0,
4761
- stepsExecuted: 0,
4762
- assessment: planResult.error,
4763
- usage: allUsage,
4764
- };
4765
- }
4766
- const plan = planResult.plan;
4767
- const recordingResult = await recordPlan(plan, config, callbacks);
4768
- allUsage.push(...recordingResult.usage);
4769
- callbacks.onPhaseChange?.('done');
4770
- callbacks.onVariantPhase?.('done');
4771
- log('Video capture complete!', 'success', callbacks.onLog);
4772
- return {
4773
- success: recordingResult.videoPath !== null,
4774
- plan,
4775
- videoPath: recordingResult.videoPath,
4776
- thumbnailBuffer: null, // Thumbnail generation is optional (V1: skip)
4777
- durationMs: recordingResult.durationMs,
4778
- stepsExecuted: recordingResult.stepsExecuted,
4779
- assessment: `Recorded ${recordingResult.stepsExecuted}/${plan.steps.length} steps in ${Math.round(recordingResult.durationMs / 1000)}s`,
4780
- usage: allUsage,
4781
- };
4782
- }
4783
- //# sourceMappingURL=video-agent.js.map