autokap 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/dist/cli-config.d.ts +13 -0
  2. package/dist/cli-config.js +42 -0
  3. package/dist/cli-utils.d.ts +0 -19
  4. package/dist/cli-utils.js +2 -65
  5. package/dist/cli.d.ts +0 -1
  6. package/dist/cli.js +266 -305
  7. package/package.json +26 -19
  8. package/assets/chrome/ios-statusbar-comparison-reference.jpg +0 -0
  9. package/assets/chrome/ios-statusbar-dark-reference.jpg +0 -0
  10. package/assets/chrome/ios-statusbar-light-reference.jpg +0 -0
  11. package/assets/devices/ipad-pro-11-m4.json +0 -52
  12. package/assets/devices/iphone-16-pro.json +0 -53
  13. package/assets/devices/macbook-air-13.json +0 -45
  14. package/assets/frames/MacBook Air 13.svg +0 -242
  15. package/assets/frames/Status bar - iPhone.png +0 -0
  16. package/assets/frames/Status bar and Menu bar- iPad.png +0 -0
  17. package/assets/frames/iPad Pro M4 11_.png +0 -0
  18. package/assets/frames/iPhone 16 Pro.png +0 -0
  19. package/assets/icons/Cellular Connection.svg +0 -3
  20. package/assets/icons/Union.svg +0 -6
  21. package/assets/icons/Wifi.svg +0 -3
  22. package/assets/icons/battery.svg +0 -5
  23. package/assets/icons/battery_charging.svg +0 -8
  24. package/dist/abort.d.ts +0 -5
  25. package/dist/abort.js +0 -44
  26. package/dist/agent.d.ts +0 -142
  27. package/dist/agent.js +0 -4504
  28. package/dist/browser-bar.d.ts +0 -40
  29. package/dist/browser-bar.js +0 -147
  30. package/dist/clip-orchestrator.d.ts +0 -148
  31. package/dist/clip-orchestrator.js +0 -950
  32. package/dist/clip-postprocess.d.ts +0 -42
  33. package/dist/clip-postprocess.js +0 -192
  34. package/dist/credential-templates.d.ts +0 -5
  35. package/dist/credential-templates.js +0 -60
  36. package/dist/element-capture.d.ts +0 -53
  37. package/dist/element-capture.js +0 -766
  38. package/dist/hybrid-navigator.d.ts +0 -138
  39. package/dist/hybrid-navigator.js +0 -468
  40. package/dist/index.d.ts +0 -15
  41. package/dist/index.js +0 -11
  42. package/dist/llm-usage.d.ts +0 -17
  43. package/dist/llm-usage.js +0 -45
  44. package/dist/mockup-html.d.ts +0 -119
  45. package/dist/mockup-html.js +0 -253
  46. package/dist/mockup.d.ts +0 -94
  47. package/dist/mockup.js +0 -604
  48. package/dist/mouse-animation.d.ts +0 -46
  49. package/dist/mouse-animation.js +0 -100
  50. package/dist/overlay-utils.d.ts +0 -14
  51. package/dist/overlay-utils.js +0 -13
  52. package/dist/posthog.d.ts +0 -4
  53. package/dist/posthog.js +0 -26
  54. package/dist/prompt-cache.d.ts +0 -10
  55. package/dist/prompt-cache.js +0 -24
  56. package/dist/prompts.d.ts +0 -167
  57. package/dist/prompts.js +0 -1165
  58. package/dist/security.d.ts +0 -20
  59. package/dist/security.js +0 -569
  60. package/dist/session-profile.d.ts +0 -86
  61. package/dist/session-profile.js +0 -1471
  62. package/dist/sf-pro-fonts.d.ts +0 -4
  63. package/dist/sf-pro-fonts.js +0 -7
  64. package/dist/status-bar-l10n.d.ts +0 -14
  65. package/dist/status-bar-l10n.js +0 -177
  66. package/dist/status-bar.d.ts +0 -44
  67. package/dist/status-bar.js +0 -336
  68. package/dist/tools.d.ts +0 -4
  69. package/dist/tools.js +0 -578
  70. package/dist/video-agent.d.ts +0 -143
  71. package/dist/video-agent.js +0 -4783
  72. package/dist/video-observation.d.ts +0 -36
  73. package/dist/video-observation.js +0 -192
  74. package/dist/video-planner.d.ts +0 -12
  75. package/dist/video-planner.js +0 -500
  76. package/dist/video-prompts.d.ts +0 -37
  77. package/dist/video-prompts.js +0 -554
  78. package/dist/video-tools.d.ts +0 -3
  79. package/dist/video-tools.js +0 -59
  80. package/dist/video-variant-state.d.ts +0 -29
  81. package/dist/video-variant-state.js +0 -80
  82. package/dist/vision-model.d.ts +0 -17
  83. package/dist/vision-model.js +0 -74
@@ -1,950 +0,0 @@
1
- import { mkdtemp } from 'node:fs/promises';
2
- import os from 'node:os';
3
- import path from 'node:path';
4
- import { logger } from './logger.js';
5
- import { createBasePlan, createVariantPrefixPlan, verifyAndPatchPlan, recordPlan, runVariantPreflight, } from './video-agent.js';
6
- import { navigateWithAgent, } from './hybrid-navigator.js';
7
- import { postProcessClipRecording } from './clip-postprocess.js';
8
- import { createAbortError, getAbortMessage } from './abort.js';
9
- // ── Helpers ──────────────────────────────────────────────────────────
10
- /**
11
- * Merge two abort signals into one that fires when either fires.
12
- */
13
- function mergeAbortSignals(a, b) {
14
- const controller = new AbortController();
15
- const onAbort = () => controller.abort(a.aborted ? a.reason : b.reason);
16
- if (a.aborted || b.aborted) {
17
- onAbort();
18
- }
19
- else {
20
- a.addEventListener('abort', onAbort, { once: true });
21
- b.addEventListener('abort', onAbort, { once: true });
22
- }
23
- return controller.signal;
24
- }
25
- /**
26
- * Convert technical failure messages into user-friendly descriptions.
27
- */
28
- function humanizeClipFailure(raw, phase, subphase) {
29
- // Language/theme variant failures
30
- if (/variant.*not active|lang=\w+.*detected=\w+/i.test(raw)) {
31
- const langMatch = raw.match(/lang=(\w+),\s*detected=(\w+)/);
32
- if (langMatch) {
33
- return `Could not switch the app to "${langMatch[1]}" — the page stayed in "${langMatch[2]}". Check that the app has a language selector accessible from the current page, or add language switch instructions in your preset settings.`;
34
- }
35
- return `Could not activate the requested language/theme. Check that the app has accessible language/theme controls, or add switch instructions in your preset settings.`;
36
- }
37
- // Timeout failures
38
- if (/timed?\s*out|timeout/i.test(raw)) {
39
- if (subphase === 'prepared_variant') {
40
- return 'The language/theme switch took too long. The app may not have a language selector on this page, or the AI model failed to interact with it. Try adding explicit language switch instructions.';
41
- }
42
- if (phase === 'preflight') {
43
- return 'Setting up the correct language/theme took too long. Try adding explicit switch instructions in your preset settings.';
44
- }
45
- if (phase === 'recording') {
46
- return 'Recording timed out — the interaction may be too complex or the page was loading too slowly.';
47
- }
48
- return `The operation timed out during ${phase ?? 'execution'}. The page may be slow or unresponsive.`;
49
- }
50
- // Navigation failures
51
- if (/navigation failed/i.test(raw)) {
52
- if (/login|auth|sign.?in/i.test(raw)) {
53
- return 'Could not log into the app. Check that the stored credentials are correct.';
54
- }
55
- return 'Could not navigate to the target page. Check the URL and navigation instructions.';
56
- }
57
- // Selector/element not found
58
- if (/absent from.*observation|selector.*not found|cannot resolve/i.test(raw)) {
59
- const selectorMatch = raw.match(/(?:target|selector)\s+"([^"]+)"/);
60
- return selectorMatch
61
- ? `The element "${selectorMatch[1]}" was not found on the page. It may have a different label or not be visible at this point. Check the recording instructions.`
62
- : 'An expected UI element was not found on the page. Check the recording instructions — the element may have a different name or not be visible.';
63
- }
64
- // Planning failures
65
- if (/planning failed/i.test(raw)) {
66
- return 'The AI could not plan the interaction steps. Try simplifying the recording instructions or breaking them into smaller clips.';
67
- }
68
- // Post-processing
69
- if (/post-?processing failed/i.test(raw)) {
70
- return 'The clip was recorded but post-processing (GIF/MP4 conversion) failed. Try again.';
71
- }
72
- // No video produced
73
- if (/no video file/i.test(raw)) {
74
- return 'The recording produced no video. The interaction may have been too fast or the page did not react visibly.';
75
- }
76
- // Fallback: return raw but clean up technical noise
77
- return raw
78
- .replace(/Dry-run failed:\s*/gi, '')
79
- .replace(/Clip variant \S+ /gi, '')
80
- .replace(/\bafter \d+ms\b/gi, '')
81
- .trim();
82
- }
83
- function buildVariantId(targetId, lang, theme) {
84
- return `${targetId}:${lang}:${theme}`;
85
- }
86
- /**
87
- * Build a navigation cache key for reusing the prepared state of an identical variant.
88
- * Theme is part of the key because many apps require an in-app theme switch and
89
- * cannot safely share a light-mode navigation state with a dark-mode variant.
90
- */
91
- function buildNavCacheKey(clipId, targetId, lang, theme) {
92
- return `${clipId}:${targetId}:${lang}:${theme}`;
93
- }
94
- const CLIP_PHASE_MAX_ATTEMPTS = 2;
95
- function isRetryableClipVariantFailure(phase, reason, failedSubphase) {
96
- // Dry-run semantic failures (wrong selector, wrong page) are not retryable,
97
- // but environment/variant preparation timeouts are retryable.
98
- if (phase === 'dry_run') {
99
- return failedSubphase === 'prepared_variant'
100
- && /\b(timeout|timed out|network|socket|context closed)\b/i.test(reason);
101
- }
102
- return /\b(timeout|timed out|stale verification snapshot|temporary|network|net::|socket|navigation failed|page closed|context closed)\b/i.test(reason);
103
- }
104
- function computeClipPhaseTimeoutMs(phase, clipOptions) {
105
- const maxDurationSec = Math.max(1, Math.round(clipOptions?.maxDurationSec ?? 8));
106
- switch (phase) {
107
- case 'preflight':
108
- // Preflight may need to: detect variant state, try 4 deterministic fallbacks
109
- // (controls, storage, locale URL, rebase), then spawn an agent repair lane
110
- // that navigates to settings and switches language/theme. 30s was far too
111
- // aggressive — the agent repair lane alone needs 30-60s.
112
- return Math.max(120_000, Math.round(maxDurationSec * 10_000));
113
- case 'planning':
114
- return Math.max(90_000, Math.round(maxDurationSec * 8_000));
115
- case 'dry_run':
116
- return Math.max(180_000, Math.round(maxDurationSec * 18_000));
117
- case 'recording':
118
- return Math.max(90_000, Math.round(maxDurationSec * 12_000));
119
- case 'postprocess':
120
- return Math.max(45_000, Math.round(maxDurationSec * 4_000));
121
- }
122
- }
123
- function createPhaseAttemptSignal(parentSignal, timeoutMs, variantId, phase) {
124
- const controller = new AbortController();
125
- const onParentAbort = () => {
126
- controller.abort(parentSignal?.reason ?? createAbortError(`Clip variant ${variantId} cancelled.`));
127
- };
128
- if (parentSignal) {
129
- if (parentSignal.aborted) {
130
- controller.abort(parentSignal.reason ?? createAbortError(`Clip variant ${variantId} cancelled.`));
131
- }
132
- else {
133
- parentSignal.addEventListener('abort', onParentAbort, { once: true });
134
- }
135
- }
136
- const timer = setTimeout(() => {
137
- controller.abort(createAbortError(`Clip variant ${variantId} ${phase} timed out after ${timeoutMs}ms.`));
138
- }, timeoutMs);
139
- return {
140
- signal: controller.signal,
141
- cleanup: () => {
142
- clearTimeout(timer);
143
- parentSignal?.removeEventListener('abort', onParentAbort);
144
- },
145
- };
146
- }
147
- async function executeClipPhase(params) {
148
- const timeoutMs = computeClipPhaseTimeoutMs(params.phase, params.clipOptions);
149
- for (let attempt = 1; attempt <= CLIP_PHASE_MAX_ATTEMPTS; attempt += 1) {
150
- const { signal, cleanup } = createPhaseAttemptSignal(params.parentSignal, timeoutMs, params.variantId, params.phase);
151
- const startedAt = Date.now();
152
- try {
153
- const result = await params.run(signal, attempt, { timeoutMs, startedAt });
154
- if (result.ok) {
155
- return result;
156
- }
157
- const retryable = isRetryableClipVariantFailure(params.phase, result.reason, result.failedSubphase);
158
- if (!retryable || attempt === CLIP_PHASE_MAX_ATTEMPTS) {
159
- return {
160
- ok: false,
161
- failure: {
162
- failedPhase: params.phase,
163
- reason: result.reason,
164
- retryable,
165
- failedStepIndex: result.failedStepIndex,
166
- failedSubphase: result.failedSubphase,
167
- },
168
- };
169
- }
170
- logger.info(`Retrying clip variant ${params.variantId} ${params.phase} after transient failure: ${result.reason}`);
171
- }
172
- catch (err) {
173
- const reason = signal.aborted && !params.parentSignal?.aborted
174
- ? getAbortMessage(signal, `Clip variant ${params.variantId} ${params.phase} timed out after ${timeoutMs}ms.`)
175
- : err.message;
176
- const retryable = isRetryableClipVariantFailure(params.phase, reason);
177
- if (!retryable || attempt === CLIP_PHASE_MAX_ATTEMPTS) {
178
- return {
179
- ok: false,
180
- failure: {
181
- failedPhase: params.phase,
182
- reason,
183
- retryable,
184
- },
185
- };
186
- }
187
- logger.info(`Retrying clip variant ${params.variantId} ${params.phase} after transient exception: ${reason}`);
188
- }
189
- finally {
190
- cleanup();
191
- }
192
- }
193
- return {
194
- ok: false,
195
- failure: {
196
- failedPhase: params.phase,
197
- reason: `Clip variant ${params.variantId} ${params.phase} failed without a recorded assessment.`,
198
- retryable: false,
199
- },
200
- };
201
- }
202
- function isGenericDashboardRoute(rawUrl) {
203
- try {
204
- const url = new URL(rawUrl);
205
- const pathname = url.pathname.replace(/\/+$/, '') || '/';
206
- return pathname === '/'
207
- || pathname === '/home'
208
- || pathname === '/dashboard'
209
- || pathname === '/app';
210
- }
211
- catch {
212
- return false;
213
- }
214
- }
215
- function requiresSpecificEntityPreparation(config) {
216
- const text = `${config.navigationInstructions ?? ''}\n${config.script ?? ''}`.toLowerCase();
217
- if (!text.trim())
218
- return false;
219
- const entityMarker = /\b(project|workspace|account|organization|organisation|team|preset|document|collection|site|app)\b/i;
220
- const actionMarker = /\b(open|click|select|choose|switch|go to|navigate|enter)\b/i;
221
- return entityMarker.test(text) && actionMarker.test(text);
222
- }
223
- function hasExactVariantHandoff(navResult, config, requestedLang, requestedTheme) {
224
- if (navResult.exactStartStateVerified !== true) {
225
- return false;
226
- }
227
- if (requiresSpecificEntityPreparation(config) && isGenericDashboardRoute(navResult.finalUrl)) {
228
- return false;
229
- }
230
- // Language detection from the navigation browser frequently does not survive session
231
- // transfer to a new browser context. The Accept-Language header, navigator.language
232
- // override, and similar browser-level signals are set per-context and are NOT persisted
233
- // in cookies or storage. Even when sessionStorage has entries, those entries rarely
234
- // contain explicit language preferences.
235
- //
236
- // Always require variant preflight when a specific language is requested so the
237
- // variant state is verified in the actual recording browser. The preflight is fast
238
- // when the language is already correct (detect + immediate return).
239
- if (requestedLang) {
240
- // Only trust URL-anchored languages (locale prefix like /en/...) which are portable
241
- if (!hasLocaleInUrl(navResult.finalUrl, requestedLang)) {
242
- logger.info(`[variant-handoff] Lang ${requestedLang} not anchored in URL — requiring preflight verification.`);
243
- return false;
244
- }
245
- }
246
- const langReady = !requestedLang || navResult.variantState?.lang.active === true;
247
- const themeReady = !requestedTheme || navResult.variantState?.theme.active === true;
248
- return langReady && themeReady;
249
- }
250
- function hasLocaleInUrl(url, lang) {
251
- try {
252
- const pathname = new URL(url).pathname;
253
- const normalizedLang = lang.toLowerCase().split('-')[0];
254
- const segments = pathname.split('/').filter(Boolean);
255
- return segments.length > 0 && segments[0].toLowerCase() === normalizedLang;
256
- }
257
- catch {
258
- return false;
259
- }
260
- }
261
- function summarizeVariantHandoffMismatch(navResult, config, requestedLang, requestedTheme) {
262
- const problems = [];
263
- if (requiresSpecificEntityPreparation(config) && isGenericDashboardRoute(navResult.finalUrl)) {
264
- problems.push(`generic_route=${navResult.finalUrl}`);
265
- }
266
- if (requestedLang && navResult.variantState?.lang.active !== true) {
267
- problems.push(`lang=${requestedLang} not confirmed`);
268
- }
269
- if (requestedTheme && navResult.variantState?.theme.active !== true) {
270
- problems.push(`theme=${requestedTheme} not confirmed`);
271
- }
272
- return problems.join(', ');
273
- }
274
- function buildVideoAgentConfig(orchestratorConfig, clip, target, lang, theme, navResult) {
275
- return {
276
- url: clip.url ?? orchestratorConfig.url,
277
- script: clip.script,
278
- viewport: target.viewport,
279
- outputScale: orchestratorConfig.outputScale,
280
- model: orchestratorConfig.model,
281
- apiKey: orchestratorConfig.apiKey,
282
- mode: 'clip',
283
- runId: orchestratorConfig.runId,
284
- variantId: buildVariantId(target.id, lang, theme),
285
- projectId: orchestratorConfig.projectId,
286
- presetId: orchestratorConfig.presetId,
287
- lang,
288
- theme,
289
- langInstructions: orchestratorConfig.langInstructions,
290
- themeInstructions: orchestratorConfig.themeInstructions,
291
- navigationInstructions: orchestratorConfig.navigationInstructions,
292
- credentials: orchestratorConfig.credentials,
293
- abortSignal: orchestratorConfig.abortSignal,
294
- videoOptions: {
295
- cursorTheme: orchestratorConfig.clipOptions?.cursorTheme ?? 'minimal',
296
- },
297
- fallbackModel: orchestratorConfig.fallbackModel,
298
- // Inject navigation session for Phase 2
299
- ...(navResult ? {
300
- preparedStartUrl: navResult.finalUrl,
301
- preparedStorageState: navResult.storageState,
302
- preparedSessionStorage: navResult.sessionStorage,
303
- preparedObservationSummary: navResult.observationSummary,
304
- preparedObservationSnapshot: navResult.observationSnapshot,
305
- preparedActions: navResult.actions,
306
- preparedReplayActions: navResult.replayableActions,
307
- preparedCoherenceKey: navResult.coherenceKey,
308
- } : {}),
309
- };
310
- }
311
- async function prevalidateClipPlan(config, plan) {
312
- const snapshot = config.preparedObservationSnapshot;
313
- if (!snapshot) {
314
- return { valid: true };
315
- }
316
- const mutatingTypes = new Set(['click', 'type', 'select_option', 'key', 'drag']);
317
- const skipTypes = new Set(['navigate', 'wait', 'dismiss_overlays', 'assert_url', 'assert_text', 'assert_element', 'assert_page']);
318
- // Only validate steps that can be checked against the pre-interaction snapshot.
319
- // After the first DOM-mutating step (click, type, etc.), later targets may only
320
- // appear in the DOM after that interaction (dropdown items, modal content).
321
- for (let stepIndex = 0; stepIndex < plan.steps.length; stepIndex += 1) {
322
- const step = plan.steps[stepIndex];
323
- if (skipTypes.has(step.type)) {
324
- continue;
325
- }
326
- const validation = validateStepAgainstSnapshot(step, snapshot);
327
- if (!validation.valid) {
328
- // When the snapshot has interactive elements but the target is absent, it's likely
329
- // a real mismatch. When the snapshot is empty or when a prior mutating step could
330
- // have revealed the target (e.g. opening a dropdown), treat as a soft warning.
331
- const snapshotHasElements = snapshot.interactiveElements.length > 0;
332
- if (snapshotHasElements && stepIndex === 0) {
333
- // First step target absent from a populated snapshot — hard fail.
334
- return {
335
- valid: false,
336
- reason: `Snapshot pre-validation failed at step ${stepIndex + 1}: ${validation.reason}`,
337
- };
338
- }
339
- // Target may appear after a prior interaction (dropdown, menu) — soft warning.
340
- logger.info(`Snapshot pre-validation warning at step ${stepIndex + 1}: ${validation.reason} — allowing because the target may appear after a prior interaction.`);
341
- break;
342
- }
343
- if (mutatingTypes.has(step.type)) {
344
- break;
345
- }
346
- }
347
- return { valid: true };
348
- }
349
- function normalizeSnapshotText(value) {
350
- return (value ?? '').trim().toLowerCase();
351
- }
352
- function matchesTargetLabel(actual, expected, mode) {
353
- const normalizedActual = normalizeSnapshotText(actual);
354
- const normalizedExpected = normalizeSnapshotText(expected);
355
- if (!normalizedActual || !normalizedExpected) {
356
- return false;
357
- }
358
- if (mode === 'contains') {
359
- return normalizedActual.includes(normalizedExpected);
360
- }
361
- return normalizedActual === normalizedExpected || normalizedActual.includes(normalizedExpected);
362
- }
363
- function extractSelectorHints(selector) {
364
- const textTokens = Array.from(selector.matchAll(/:has-text\((["'])(.*?)\1\)/g)).map((match) => match[2] ?? '');
365
- const ariaTokens = Array.from(selector.matchAll(/\[aria-label[*^$|~]?=(["'])(.*?)\1\]/g)).map((match) => match[2] ?? '');
366
- const hrefTokens = Array.from(selector.matchAll(/\[href[*^$|~]?=(["'])(.*?)\1\]/g)).map((match) => match[2] ?? '');
367
- const roleTokens = Array.from(selector.matchAll(/\[role=(["']?)([a-z0-9_-]+)\1\]/gi)).map((match) => match[2] ?? '');
368
- const tagToken = selector.match(/^\s*([a-z][a-z0-9-]*)\b/i)?.[1]?.toLowerCase();
369
- const exactSelector = selector.includes(':')
370
- || selector.includes('[')
371
- || selector.includes('#')
372
- || selector.includes('.')
373
- ? selector.trim()
374
- : undefined;
375
- return {
376
- textTokens: [...textTokens, ...ariaTokens].filter(Boolean),
377
- hrefTokens: hrefTokens.filter(Boolean),
378
- roleTokens: roleTokens.filter(Boolean),
379
- tagToken,
380
- exactSelector,
381
- };
382
- }
383
- function matchSelectorAgainstSnapshot(selector, snapshot) {
384
- const selectorParts = selector.split(',').map((part) => part.trim()).filter(Boolean);
385
- if (selectorParts.length === 0) {
386
- return { matched: false, decisive: false };
387
- }
388
- let decisive = false;
389
- for (const part of selectorParts) {
390
- const hints = extractSelectorHints(part);
391
- const partHasHints = hints.textTokens.length > 0 || hints.hrefTokens.length > 0 || hints.roleTokens.length > 0 || Boolean(hints.tagToken);
392
- if (partHasHints) {
393
- decisive = true;
394
- }
395
- const matched = snapshot.interactiveElements.some((element) => {
396
- const labelCandidates = [element.text, element.ariaLabel, element.title].filter(Boolean);
397
- if (hints.textTokens.length > 0 && !hints.textTokens.every((token) => labelCandidates.some((candidate) => matchesTargetLabel(candidate, token, 'contains')))) {
398
- return false;
399
- }
400
- if (hints.hrefTokens.length > 0) {
401
- const href = normalizeSnapshotText(element.href);
402
- if (!href || !hints.hrefTokens.every((token) => href.includes(normalizeSnapshotText(token)))) {
403
- return false;
404
- }
405
- }
406
- if (hints.roleTokens.length > 0) {
407
- const role = normalizeSnapshotText(element.role);
408
- if (!role || !hints.roleTokens.every((token) => role === normalizeSnapshotText(token))) {
409
- return false;
410
- }
411
- }
412
- if (hints.tagToken) {
413
- const tag = normalizeSnapshotText(element.tag);
414
- if (!tag || tag !== hints.tagToken) {
415
- return false;
416
- }
417
- }
418
- if (hints.exactSelector && element.selector) {
419
- const normalizedElementSelector = element.selector.trim();
420
- if (normalizedElementSelector === hints.exactSelector || normalizedElementSelector.includes(hints.exactSelector) || hints.exactSelector.includes(normalizedElementSelector)) {
421
- return true;
422
- }
423
- }
424
- return partHasHints;
425
- });
426
- if (matched) {
427
- return { matched: true, decisive: true };
428
- }
429
- }
430
- return { matched: false, decisive };
431
- }
432
- function matchTargetAgainstSnapshot(target, snapshot) {
433
- const requiresScopedIndex = typeof target.index === 'number' && target.coherenceKey && snapshot.coherenceKey && target.coherenceKey === snapshot.coherenceKey;
434
- const targetHasAnchors = Boolean(target.label || target.href || target.role || target.tag || target.selector || requiresScopedIndex);
435
- if (!targetHasAnchors) {
436
- return { matched: false, decisive: false };
437
- }
438
- for (const element of snapshot.interactiveElements) {
439
- const labelCandidates = [element.text, element.ariaLabel, element.title].filter(Boolean);
440
- if (target.label && !labelCandidates.some((candidate) => matchesTargetLabel(candidate, target.label, target.labelMatchMode))) {
441
- continue;
442
- }
443
- if (target.href) {
444
- const href = normalizeSnapshotText(element.href);
445
- if (!href || !href.includes(normalizeSnapshotText(target.href))) {
446
- continue;
447
- }
448
- }
449
- if (target.role) {
450
- const role = normalizeSnapshotText(element.role);
451
- if (!role || role !== normalizeSnapshotText(target.role)) {
452
- continue;
453
- }
454
- }
455
- if (target.tag) {
456
- const tag = normalizeSnapshotText(element.tag);
457
- if (!tag || tag !== normalizeSnapshotText(target.tag)) {
458
- continue;
459
- }
460
- }
461
- if (typeof target.index === 'number' && requiresScopedIndex && element.index !== target.index) {
462
- continue;
463
- }
464
- if (target.selector) {
465
- const selectorMatch = matchSelectorAgainstSnapshot(target.selector, {
466
- ...snapshot,
467
- interactiveElements: [element],
468
- });
469
- if (!selectorMatch.matched && selectorMatch.decisive) {
470
- continue;
471
- }
472
- }
473
- return { matched: true, decisive: true };
474
- }
475
- return { matched: false, decisive: true };
476
- }
477
- function validateStepAgainstSnapshot(step, snapshot) {
478
- if (step.target) {
479
- const targetMatch = matchTargetAgainstSnapshot(step.target, snapshot);
480
- if (!targetMatch.matched && targetMatch.decisive) {
481
- return {
482
- valid: false,
483
- reason: step.target.label
484
- ? `target "${step.target.label}" is absent from the prepared observation`
485
- : `target anchors for "${step.description}" are absent from the prepared observation`,
486
- };
487
- }
488
- }
489
- if (step.selector) {
490
- const selectorMatch = matchSelectorAgainstSnapshot(step.selector, snapshot);
491
- if (!selectorMatch.matched && selectorMatch.decisive) {
492
- return {
493
- valid: false,
494
- reason: `selector "${step.selector}" is absent from the prepared observation`,
495
- };
496
- }
497
- }
498
- return { valid: true };
499
- }
500
- function wrapCallbacks(clipId, variantId, callbacks) {
501
- return {
502
- onPhaseChange: (phase) => callbacks.onClipPhase?.({ clipId, variantId, phase }),
503
- onStep: (stepIndex, total, description, phase) => callbacks.onClipStep?.({ clipId, variantId, stepIndex, total, description, phase }),
504
- onStepResult: (stepIndex, ok, reason) => callbacks.onClipStepResult?.({ clipId, variantId, stepIndex, ok, reason }),
505
- onLog: callbacks.onLog,
506
- onScreenshot: callbacks.onScreenshot,
507
- };
508
- }
509
- // ── Main orchestrator ───────────────────────────────────────────────
510
- /**
511
- * Run the clip capture pipeline for all clips × targets × langs × themes.
512
- *
513
- * Hybrid pipeline:
514
- * 1. For each (clip, target, lang) — navigate once with the screenshot agent
515
- * 2. Share the navigation session across theme variants (light/dark)
516
- * 3. For each variant: plan → validate selectors → record → post-process
517
- *
518
- * Navigation sharing: themes only affect `colorScheme` (a Playwright setting),
519
- * so the same navigation session (cookies, localStorage, URL) is reused.
520
- */
521
- export async function runClipOrchestrator(config, callbacks = {}) {
522
- const results = [];
523
- const totalVariants = config.clips.length * config.targets.length * config.langs.length * config.themes.length;
524
- logger.info(`Starting clip orchestration (hybrid): ${config.clips.length} clips × ${config.targets.length} targets × ${config.langs.length} langs × ${config.themes.length} themes = ${totalVariants} variants`);
525
- // Navigation cache: shared only for the exact same (clip, target, lang, theme) variant.
526
- const navCache = new Map();
527
- for (let clipIndex = 0; clipIndex < config.clips.length; clipIndex++) {
528
- const clip = config.clips[clipIndex];
529
- logger.info(`Processing clip ${clipIndex + 1}/${config.clips.length}: "${clip.name}"`);
530
- for (const target of config.targets) {
531
- for (const lang of config.langs) {
532
- for (const theme of config.themes) {
533
- const navKey = buildNavCacheKey(clip.id, target.id, lang, theme);
534
- let navResult = navCache.get(navKey);
535
- if (!navResult) {
536
- logger.info(`[hybrid] Navigating for clip="${clip.name}" target=${target.id} lang=${lang} theme=${theme}`);
537
- const NAV_MAX_ATTEMPTS = 2;
538
- let navAttempt = 0;
539
- let navError = null;
540
- let navFailed = false;
541
- while (navAttempt < NAV_MAX_ATTEMPTS && !navResult) {
542
- navAttempt += 1;
543
- try {
544
- // When navigationScript is provided, use it for navigation and keep
545
- // script exclusively for the recording planner. Otherwise, fall back
546
- // to using script for both (legacy behavior).
547
- navResult = await navigateWithAgent({
548
- url: clip.url ?? config.url,
549
- baseUrl: config.url,
550
- navigationPrompt: clip.navigationScript ?? clip.script,
551
- recordingScript: clip.navigationScript ? clip.script : undefined,
552
- viewport: target.viewport,
553
- outputScale: config.outputScale,
554
- lang,
555
- theme: theme,
556
- credentials: config.credentials,
557
- langInstructions: config.langInstructions,
558
- themeInstructions: config.themeInstructions,
559
- navigationInstructions: config.navigationInstructions,
560
- model: config.model,
561
- apiKey: config.apiKey,
562
- maxIterations: 30,
563
- abortSignal: config.abortSignal,
564
- onLog: callbacks.onLog,
565
- onScreenshot: callbacks.onScreenshot,
566
- });
567
- if (!navResult.success) {
568
- const errMsg = navResult.error ?? 'unknown';
569
- const isTransient = /\b(timeout|timed out|network|socket|context closed)\b/i.test(errMsg);
570
- if (isTransient && navAttempt < NAV_MAX_ATTEMPTS) {
571
- logger.info(`Navigation attempt ${navAttempt} failed (transient: ${errMsg}), retrying...`);
572
- navResult = undefined;
573
- continue;
574
- }
575
- navFailed = true;
576
- }
577
- }
578
- catch (err) {
579
- navError = err;
580
- const isTransient = /\b(timeout|timed out|network|socket|context closed)\b/i.test(navError.message);
581
- if (isTransient && navAttempt < NAV_MAX_ATTEMPTS) {
582
- logger.info(`Navigation attempt ${navAttempt} threw (transient: ${navError.message}), retrying...`);
583
- continue;
584
- }
585
- break;
586
- }
587
- }
588
- if (navError && !navResult) {
589
- logger.error(`Navigation failed for clip="${clip.name}" ${navKey}: ${navError.message}`);
590
- results.push({
591
- clipId: clip.id,
592
- clipName: clip.name,
593
- clipScript: clip.script,
594
- targetId: target.id,
595
- targetLabel: target.label,
596
- viewportWidth: target.viewport.width,
597
- viewportHeight: target.viewport.height,
598
- lang,
599
- theme,
600
- success: false,
601
- durationMs: 0,
602
- stepsExecuted: 0,
603
- assessment: `Navigation failed: ${navError.message}`,
604
- usage: [],
605
- });
606
- continue;
607
- }
608
- if (navFailed || !navResult) {
609
- results.push({
610
- clipId: clip.id,
611
- clipName: clip.name,
612
- clipScript: clip.script,
613
- targetId: target.id,
614
- targetLabel: target.label,
615
- viewportWidth: target.viewport.width,
616
- viewportHeight: target.viewport.height,
617
- lang,
618
- theme,
619
- success: false,
620
- durationMs: 0,
621
- stepsExecuted: 0,
622
- assessment: `Navigation failed: ${navResult?.error ?? 'unknown'}`,
623
- usage: navResult?.usage ?? [],
624
- });
625
- continue;
626
- }
627
- navCache.set(navKey, navResult);
628
- }
629
- const variantId = buildVariantId(target.id, lang, theme);
630
- const variantConfig = buildVideoAgentConfig(config, clip, target, lang, theme, navResult);
631
- const variantCallbacks = wrapCallbacks(clip.id, variantId, callbacks);
632
- callbacks.onClipStart?.({
633
- clipId: clip.id,
634
- clipName: clip.name,
635
- variantId,
636
- target: target.viewport,
637
- lang,
638
- theme,
639
- clipIndex,
640
- totalClips: config.clips.length,
641
- });
642
- // Per-clip timeout to prevent a single slow clip from blocking the entire pipeline.
643
- // 300s allows for: navigation (~45s) + preflight (~60s) + planning (~30s) + dry-run (~90s) + recording (~60s).
644
- const clipTimeoutMs = config.perClipTimeoutMs ?? 300_000;
645
- const clipAbortController = new AbortController();
646
- const clipTimer = setTimeout(() => {
647
- clipAbortController.abort(createAbortError(`Clip variant ${variantId} exceeded per-clip timeout of ${clipTimeoutMs}ms.`));
648
- }, clipTimeoutMs);
649
- // Override the abort signal for this variant
650
- variantConfig.abortSignal = config.abortSignal
651
- ? mergeAbortSignals(config.abortSignal, clipAbortController.signal)
652
- : clipAbortController.signal;
653
- let variantResult;
654
- try {
655
- variantResult = await runClipVariant(clip, target, lang, theme, navResult, variantConfig, variantCallbacks, callbacks, config.clipOptions);
656
- }
657
- catch (err) {
658
- clearTimeout(clipTimer);
659
- variantResult = {
660
- clipId: clip.id,
661
- clipName: clip.name,
662
- clipScript: clip.script,
663
- targetId: target.id,
664
- targetLabel: target.label,
665
- viewportWidth: target.viewport.width,
666
- viewportHeight: target.viewport.height,
667
- lang,
668
- theme,
669
- success: false,
670
- durationMs: 0,
671
- stepsExecuted: 0,
672
- assessment: `Clip variant timed out: ${err.message}`,
673
- usage: [],
674
- };
675
- }
676
- finally {
677
- clearTimeout(clipTimer);
678
- }
679
- if (!variantResult.success) {
680
- logger.error(`Clip variant ${variantId} failed${variantResult.failedPhase ? ` during ${variantResult.failedPhase}` : ''}: ${variantResult.assessment}`);
681
- }
682
- results.push(variantResult);
683
- }
684
- }
685
- }
686
- }
687
- const successCount = results.filter(r => r.success).length;
688
- callbacks.onClipsAllDone?.({ totalClips: results.length, successCount });
689
- logger.info(`Clip orchestration complete: ${successCount}/${results.length} successful`);
690
- return { results, totalClips: results.length, successCount };
691
- }
692
- async function runClipVariant(clip, target, lang, theme, navResult, config, videoCallbacks, orchestratorCallbacks, clipOptions) {
693
- const allUsage = [...navResult.usage];
694
- return runClipVariantAttempt(clip, target, lang, theme, navResult, config, allUsage, videoCallbacks, orchestratorCallbacks, clipOptions);
695
- }
696
- async function runClipVariantAttempt(clip, target, lang, theme, navResult, config, allUsage, videoCallbacks, orchestratorCallbacks, clipOptions) {
697
- const variantId = buildVariantId(target.id, lang, theme);
698
- const makeFailResult = (assessment, failedPhase, failure) => ({
699
- clipId: clip.id,
700
- clipName: clip.name,
701
- clipScript: clip.script,
702
- targetId: target.id,
703
- targetLabel: target.label,
704
- viewportWidth: target.viewport.width,
705
- viewportHeight: target.viewport.height,
706
- lang,
707
- theme,
708
- success: false,
709
- durationMs: 0,
710
- stepsExecuted: 0,
711
- assessment: humanizeClipFailure(assessment, failedPhase, failure?.failedSubphase),
712
- failedPhase,
713
- failedStepIndex: failure?.failedStepIndex,
714
- failedSubphase: failure?.failedSubphase,
715
- usage: allUsage,
716
- });
717
- videoCallbacks.onPhaseChange?.('planning');
718
- const shouldSkipPreflight = hasExactVariantHandoff(navResult, config, lang, theme);
719
- const canUseTargetedVariantPreflight = navResult.exactStartStateVerified === true
720
- && !shouldSkipPreflight
721
- && !(requiresSpecificEntityPreparation(config) && isGenericDashboardRoute(navResult.finalUrl));
722
- const shouldPlanVariantPrefix = !shouldSkipPreflight && !canUseTargetedVariantPreflight;
723
- let preparedConfig = {
724
- ...config,
725
- preparedObservationSummary: config.preparedObservationSummary ?? navResult.observationSummary,
726
- };
727
- if (shouldSkipPreflight) {
728
- logger.info(`Clip variant ${variantId} reusing exact start-state handoff from hybrid navigation; skipping variant preflight.`);
729
- }
730
- else {
731
- let variantPrefixPlan = null;
732
- if (shouldPlanVariantPrefix) {
733
- const prefixPhase = await executeClipPhase({
734
- phase: 'planning',
735
- variantId,
736
- clipOptions,
737
- parentSignal: config.abortSignal,
738
- run: async (signal) => {
739
- try {
740
- const prefixResult = await createVariantPrefixPlan({
741
- ...config,
742
- abortSignal: signal,
743
- });
744
- allUsage.push(prefixResult.usage);
745
- return { ok: true, value: prefixResult.plan };
746
- }
747
- catch (err) {
748
- return { ok: false, reason: `Variant preflight planning failed: ${err.message}` };
749
- }
750
- },
751
- });
752
- if (!prefixPhase.ok) {
753
- return makeFailResult(prefixPhase.failure.reason, prefixPhase.failure.failedPhase);
754
- }
755
- variantPrefixPlan = prefixPhase.value;
756
- }
757
- else if (canUseTargetedVariantPreflight) {
758
- const mismatchSummary = summarizeVariantHandoffMismatch(navResult, config, lang, theme);
759
- logger.info(`Clip variant ${variantId} reached the exact start state, but the requested UI variant is not confirmed${mismatchSummary ? ` (${mismatchSummary})` : ''}. Running targeted variant preflight without prefix planning.`);
760
- }
761
- const preflightPhase = await executeClipPhase({
762
- phase: 'preflight',
763
- variantId,
764
- clipOptions,
765
- parentSignal: config.abortSignal,
766
- run: async (signal, _attempt, phaseContext) => {
767
- const preflightResult = await runVariantPreflight({
768
- ...config,
769
- abortSignal: signal,
770
- internalPhaseTimeoutMs: phaseContext.timeoutMs,
771
- internalPhaseStartedAt: phaseContext.startedAt,
772
- }, variantPrefixPlan, videoCallbacks);
773
- if (!preflightResult.ok) {
774
- return { ok: false, reason: `Variant preflight failed: ${preflightResult.reason ?? 'unknown reason'}` };
775
- }
776
- return { ok: true, value: preflightResult };
777
- },
778
- });
779
- if (!preflightPhase.ok) {
780
- return makeFailResult(preflightPhase.failure.reason, preflightPhase.failure.failedPhase);
781
- }
782
- const preflightResult = preflightPhase.value;
783
- if (preflightResult.usage?.length) {
784
- allUsage.push(...preflightResult.usage);
785
- }
786
- preparedConfig = {
787
- ...preparedConfig,
788
- preparedStartUrl: preflightResult.finalUrl ?? config.preparedStartUrl ?? config.url,
789
- preparedStorageState: preflightResult.storageState ?? config.preparedStorageState,
790
- preparedSessionStorage: preflightResult.sessionStorage ?? config.preparedSessionStorage,
791
- preparedObservationSummary: preflightResult.observationSummary ?? preparedConfig.preparedObservationSummary,
792
- preparedObservationSnapshot: preflightResult.observationSnapshot ?? preparedConfig.preparedObservationSnapshot,
793
- };
794
- }
795
- let plan;
796
- const planningPhase = await executeClipPhase({
797
- phase: 'planning',
798
- variantId,
799
- clipOptions,
800
- parentSignal: config.abortSignal,
801
- run: async (signal, _attempt, phaseContext) => {
802
- try {
803
- const planResult = await createBasePlan({
804
- ...preparedConfig,
805
- abortSignal: signal,
806
- internalPhaseTimeoutMs: phaseContext.timeoutMs,
807
- internalPhaseStartedAt: phaseContext.startedAt,
808
- });
809
- allUsage.push(planResult.usage);
810
- logger.info(`Clip "${clip.name}" plan: "${planResult.plan.title}" — ${planResult.plan.steps.length} steps`);
811
- const prevalidation = await prevalidateClipPlan(preparedConfig, planResult.plan);
812
- if (!prevalidation.valid) {
813
- return { ok: false, reason: prevalidation.reason };
814
- }
815
- return { ok: true, value: planResult.plan };
816
- }
817
- catch (err) {
818
- return { ok: false, reason: `Planning failed: ${err.message}` };
819
- }
820
- },
821
- });
822
- if (!planningPhase.ok) {
823
- return makeFailResult(planningPhase.failure.reason, planningPhase.failure.failedPhase, planningPhase.failure);
824
- }
825
- plan = planningPhase.value;
826
- const dryRunPhase = await executeClipPhase({
827
- phase: 'dry_run',
828
- variantId,
829
- clipOptions,
830
- parentSignal: config.abortSignal,
831
- run: async (signal, _attempt, phaseContext) => {
832
- const verifyResult = await verifyAndPatchPlan({
833
- ...plan,
834
- steps: [...plan.steps],
835
- }, {
836
- ...preparedConfig,
837
- abortSignal: signal,
838
- internalPhaseTimeoutMs: phaseContext.timeoutMs,
839
- internalPhaseStartedAt: phaseContext.startedAt,
840
- }, videoCallbacks);
841
- allUsage.push(...verifyResult.usage);
842
- if (!verifyResult.success) {
843
- logger.error(`Dry-run failed for clip "${clip.name}" variant ${variantId}`);
844
- return {
845
- ok: false,
846
- reason: `Dry-run failed: ${verifyResult.error}`,
847
- failedStepIndex: verifyResult.failedStepIndex,
848
- failedSubphase: verifyResult.failedSubphase,
849
- };
850
- }
851
- return { ok: true, value: verifyResult.plan };
852
- },
853
- });
854
- if (!dryRunPhase.ok) {
855
- return makeFailResult(dryRunPhase.failure.reason, dryRunPhase.failure.failedPhase, dryRunPhase.failure);
856
- }
857
- plan = dryRunPhase.value;
858
- const recordingPhase = await executeClipPhase({
859
- phase: 'recording',
860
- variantId,
861
- clipOptions,
862
- parentSignal: config.abortSignal,
863
- run: async (signal, _attempt, phaseContext) => {
864
- const recordResult = await recordPlan(plan, {
865
- ...preparedConfig,
866
- abortSignal: signal,
867
- internalPhaseTimeoutMs: phaseContext.timeoutMs,
868
- internalPhaseStartedAt: phaseContext.startedAt,
869
- dryRunVariantConfirmed: true,
870
- }, videoCallbacks);
871
- if (!recordResult.videoPath) {
872
- return { ok: false, reason: 'Recording produced no video file' };
873
- }
874
- return {
875
- ok: true,
876
- value: {
877
- ...recordResult,
878
- videoPath: recordResult.videoPath,
879
- },
880
- };
881
- },
882
- });
883
- if (!recordingPhase.ok) {
884
- return makeFailResult(recordingPhase.failure.reason, recordingPhase.failure.failedPhase);
885
- }
886
- const recordResult = recordingPhase.value;
887
- orchestratorCallbacks.onClipPostprocessStart?.({ clipId: clip.id, variantId });
888
- const postprocessPhase = await executeClipPhase({
889
- phase: 'postprocess',
890
- variantId,
891
- clipOptions,
892
- parentSignal: config.abortSignal,
893
- run: async (_signal, _attempt, _phaseContext) => {
894
- try {
895
- const outputDir = await mkdtemp(path.join(os.tmpdir(), `clip-${clip.id}-`));
896
- const outputScale = config.outputScale ?? 1;
897
- const postResult = await postProcessClipRecording(recordResult.videoPath, outputDir, `${clip.id}_${variantId.replace(/:/g, '_')}`, {
898
- ...clipOptions,
899
- gifMaxWidth: clipOptions?.gifMaxWidth ?? Math.round(target.viewport.width * outputScale),
900
- holdLastFrameSec: clip.holdLastFrameSec ?? clipOptions?.holdLastFrameSec ?? 0,
901
- trimStartSec: recordResult.setupDurationSec,
902
- });
903
- return { ok: true, value: postResult };
904
- }
905
- catch (err) {
906
- logger.error(`Post-processing failed for clip "${clip.name}" variant ${variantId}: ${err.message}`);
907
- return { ok: false, reason: `Post-processing failed: ${err.message}` };
908
- }
909
- },
910
- });
911
- if (!postprocessPhase.ok) {
912
- orchestratorCallbacks.onClipPostprocessDone?.({ clipId: clip.id, variantId, success: false });
913
- return makeFailResult(postprocessPhase.failure.reason, postprocessPhase.failure.failedPhase);
914
- }
915
- const postResult = postprocessPhase.value;
916
- orchestratorCallbacks.onClipPostprocessDone?.({
917
- clipId: clip.id, variantId, success: true,
918
- gifPath: postResult.gifPath, mp4Path: postResult.mp4Path,
919
- });
920
- const result = {
921
- clipId: clip.id,
922
- clipName: clip.name,
923
- clipScript: clip.script,
924
- targetId: target.id,
925
- targetLabel: target.label,
926
- viewportWidth: target.viewport.width,
927
- viewportHeight: target.viewport.height,
928
- lang,
929
- theme,
930
- success: true,
931
- gifPath: postResult.gifPath,
932
- mp4Path: postResult.mp4Path,
933
- thumbnailPath: postResult.thumbnailPath,
934
- durationMs: postResult.durationMs,
935
- fileSizeBytes: postResult.fileSizeBytes,
936
- plan,
937
- stepsExecuted: recordResult.stepsExecuted,
938
- assessment: `Recorded ${recordResult.stepsExecuted} steps, ${Math.round(postResult.durationMs / 1000)}s clip`,
939
- usage: allUsage,
940
- };
941
- orchestratorCallbacks.onClipRecordingDone?.({
942
- clipId: clip.id, variantId, success: true,
943
- gifPath: postResult.gifPath, mp4Path: postResult.mp4Path,
944
- thumbnailPath: postResult.thumbnailPath,
945
- durationMs: postResult.durationMs, fileSizeBytes: postResult.fileSizeBytes,
946
- lang, theme, targetId: target.id, targetLabel: target.label,
947
- });
948
- return result;
949
- }
950
- //# sourceMappingURL=clip-orchestrator.js.map