autokap 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/dist/cli-config.d.ts +13 -0
  2. package/dist/cli-config.js +42 -0
  3. package/dist/cli-utils.d.ts +0 -19
  4. package/dist/cli-utils.js +2 -65
  5. package/dist/cli.d.ts +0 -1
  6. package/dist/cli.js +266 -305
  7. package/package.json +23 -16
  8. package/assets/chrome/ios-statusbar-comparison-reference.jpg +0 -0
  9. package/assets/chrome/ios-statusbar-dark-reference.jpg +0 -0
  10. package/assets/chrome/ios-statusbar-light-reference.jpg +0 -0
  11. package/assets/devices/ipad-pro-11-m4.json +0 -52
  12. package/assets/devices/iphone-16-pro.json +0 -53
  13. package/assets/devices/macbook-air-13.json +0 -45
  14. package/assets/frames/MacBook Air 13.svg +0 -242
  15. package/assets/frames/Status bar - iPhone.png +0 -0
  16. package/assets/frames/Status bar and Menu bar- iPad.png +0 -0
  17. package/assets/frames/iPad Pro M4 11_.png +0 -0
  18. package/assets/frames/iPhone 16 Pro.png +0 -0
  19. package/assets/icons/Cellular Connection.svg +0 -3
  20. package/assets/icons/Union.svg +0 -6
  21. package/assets/icons/Wifi.svg +0 -3
  22. package/assets/icons/battery.svg +0 -5
  23. package/assets/icons/battery_charging.svg +0 -8
  24. package/dist/abort.d.ts +0 -5
  25. package/dist/abort.js +0 -44
  26. package/dist/agent.d.ts +0 -142
  27. package/dist/agent.js +0 -4504
  28. package/dist/browser-bar.d.ts +0 -40
  29. package/dist/browser-bar.js +0 -147
  30. package/dist/clip-orchestrator.d.ts +0 -148
  31. package/dist/clip-orchestrator.js +0 -950
  32. package/dist/clip-postprocess.d.ts +0 -42
  33. package/dist/clip-postprocess.js +0 -192
  34. package/dist/credential-templates.d.ts +0 -5
  35. package/dist/credential-templates.js +0 -60
  36. package/dist/element-capture.d.ts +0 -53
  37. package/dist/element-capture.js +0 -766
  38. package/dist/hybrid-navigator.d.ts +0 -138
  39. package/dist/hybrid-navigator.js +0 -468
  40. package/dist/index.d.ts +0 -15
  41. package/dist/index.js +0 -11
  42. package/dist/llm-usage.d.ts +0 -17
  43. package/dist/llm-usage.js +0 -45
  44. package/dist/mockup-html.d.ts +0 -119
  45. package/dist/mockup-html.js +0 -253
  46. package/dist/mockup.d.ts +0 -94
  47. package/dist/mockup.js +0 -604
  48. package/dist/mouse-animation.d.ts +0 -46
  49. package/dist/mouse-animation.js +0 -100
  50. package/dist/overlay-utils.d.ts +0 -14
  51. package/dist/overlay-utils.js +0 -13
  52. package/dist/posthog.d.ts +0 -4
  53. package/dist/posthog.js +0 -26
  54. package/dist/prompt-cache.d.ts +0 -10
  55. package/dist/prompt-cache.js +0 -24
  56. package/dist/prompts.d.ts +0 -167
  57. package/dist/prompts.js +0 -1165
  58. package/dist/security.d.ts +0 -20
  59. package/dist/security.js +0 -569
  60. package/dist/session-profile.d.ts +0 -86
  61. package/dist/session-profile.js +0 -1471
  62. package/dist/sf-pro-fonts.d.ts +0 -4
  63. package/dist/sf-pro-fonts.js +0 -7
  64. package/dist/status-bar-l10n.d.ts +0 -14
  65. package/dist/status-bar-l10n.js +0 -177
  66. package/dist/status-bar.d.ts +0 -44
  67. package/dist/status-bar.js +0 -336
  68. package/dist/tools.d.ts +0 -4
  69. package/dist/tools.js +0 -578
  70. package/dist/video-agent.d.ts +0 -143
  71. package/dist/video-agent.js +0 -4783
  72. package/dist/video-observation.d.ts +0 -36
  73. package/dist/video-observation.js +0 -192
  74. package/dist/video-planner.d.ts +0 -12
  75. package/dist/video-planner.js +0 -500
  76. package/dist/video-prompts.d.ts +0 -37
  77. package/dist/video-prompts.js +0 -554
  78. package/dist/video-tools.d.ts +0 -3
  79. package/dist/video-tools.js +0 -59
  80. package/dist/video-variant-state.d.ts +0 -29
  81. package/dist/video-variant-state.js +0 -80
  82. package/dist/vision-model.d.ts +0 -17
  83. package/dist/vision-model.js +0 -74
@@ -1,138 +0,0 @@
1
- /**
2
- * Hybrid Navigator — Uses the screenshot agent for navigation (Phase 1),
3
- * then exports the session for video recording (Phase 2).
4
- *
5
- * This wrapper around `runAgent()` provides:
6
- * - Agent-driven navigation with full self-healing
7
- * - Session export (storageState + sessionStorage + scroll position)
8
- * - Post-transfer auth detection (silent re-auth wait)
9
- * - Lightweight selector pre-validation for clip plans
10
- * - Fresh observation capture on recording browser
11
- */
12
- import { Browser } from './browser.js';
13
- import { type VariantStateDetection } from './video-variant-state.js';
14
- import type { BrowserStorageState, BrowserSessionStorageState, CapturePageIdentity, ExecutedAction, InteractiveElement, LoginCredentials, StepUsage, VerificationResult, VideoPlan, VideoObservationSnapshot, VideoPageSignals, VideoStep } from './types.js';
15
- export interface NavigationResult {
16
- success: boolean;
17
- finalUrl: string;
18
- scrollPosition: {
19
- x: number;
20
- y: number;
21
- };
22
- storageState?: BrowserStorageState;
23
- sessionStorage?: BrowserSessionStorageState;
24
- observationSummary?: string;
25
- observationSnapshot?: VideoObservationSnapshot;
26
- coherenceKey?: string;
27
- interactiveElements?: InteractiveElement[];
28
- pageIdentity?: CapturePageIdentity | null;
29
- actions?: ExecutedAction[];
30
- replayableActions?: ExecutedAction[];
31
- verification?: VerificationResult | null;
32
- verificationMode?: VerificationResult['mode'];
33
- exactStartStateVerified: boolean;
34
- detectedLang?: string | null;
35
- detectedTheme?: 'light' | 'dark' | null;
36
- pageSignals?: VideoPageSignals;
37
- variantState?: VariantStateDetection;
38
- error?: string;
39
- usage: StepUsage[];
40
- }
41
- export interface NavigateWithAgentConfig {
42
- /** Target URL — can be relative (e.g. "/docs") if baseUrl is provided. */
43
- url: string;
44
- /** Base URL of the project (e.g. "https://example.com"). Used to resolve relative urls. */
45
- baseUrl?: string;
46
- navigationPrompt: string;
47
- /**
48
- * When provided (two-part mode), this describes the RECORDED interaction separately
49
- * from the navigation. The navigation agent only needs to know what will be recorded
50
- * to stop at the right pre-recording state — it must NOT perform the recorded actions.
51
- */
52
- recordingScript?: string;
53
- viewport: {
54
- width: number;
55
- height: number;
56
- };
57
- outputScale?: number;
58
- lang?: string;
59
- theme?: 'light' | 'dark';
60
- credentials?: LoginCredentials;
61
- langInstructions?: string;
62
- themeInstructions?: string;
63
- navigationInstructions?: string;
64
- model: string;
65
- apiKey: string;
66
- maxIterations?: number;
67
- abortSignal?: AbortSignal;
68
- onLog?: (entry: {
69
- level: 'info' | 'action' | 'success' | 'error' | 'ai';
70
- message: string;
71
- timestamp: number;
72
- }) => void;
73
- onScreenshot?: (base64: string) => void;
74
- }
75
- export interface SelectorValidationResult {
76
- valid: boolean;
77
- missingSelectors: Array<{
78
- stepIndex: number;
79
- step: VideoStep;
80
- selector: string;
81
- }>;
82
- }
83
- export interface FreshObservationConfig {
84
- url: string;
85
- viewport: {
86
- width: number;
87
- height: number;
88
- };
89
- outputScale?: number;
90
- lang?: string;
91
- theme?: 'light' | 'dark';
92
- storageState?: BrowserStorageState;
93
- sessionStorage?: BrowserSessionStorageState;
94
- scrollPosition?: {
95
- x: number;
96
- y: number;
97
- };
98
- abortSignal?: AbortSignal;
99
- }
100
- /**
101
- * Phase 1: Use the screenshot agent to navigate to a target state.
102
- * Exports the full session (cookies, localStorage, sessionStorage, scroll)
103
- * for reuse in the video recording phase.
104
- */
105
- export declare function navigateWithAgent(config: NavigateWithAgentConfig): Promise<NavigationResult>;
106
- /**
107
- * Capture a fresh observation from a browser that already has the session loaded.
108
- * Use this on Browser B (recording browser) AFTER navigation, not the stale
109
- * observation from Browser A (agent browser).
110
- */
111
- export declare function captureFreshObservation(browser: Browser): Promise<string>;
112
- /**
113
- * Capture a fresh observation by opening a temporary pool browser with the
114
- * exported session. Closes the browser automatically.
115
- */
116
- export declare function captureFreshObservationFromSession(config: FreshObservationConfig): Promise<string>;
117
- /**
118
- * After transferring session to a new browser context, detect if the page
119
- * ended up in a logged-out state (e.g., due to IndexedDB/in-memory JWT loss).
120
- * If detected, waits for silent re-auth to complete.
121
- *
122
- * Returns true if auth is confirmed or no auth issues detected.
123
- */
124
- export declare function waitForAuthStabilization(browser: Browser, options?: {
125
- timeoutMs?: number;
126
- }): Promise<{
127
- ok: boolean;
128
- reason?: string;
129
- }>;
130
- /**
131
- * Validate selectors in a plan that can be checked on the current page.
132
- *
133
- * Only validates selectors up to the first page-mutating step (click, hover,
134
- * type, select_option, key). After such a step the DOM changes and later
135
- * selectors cannot be verified without actually executing the plan.
136
- */
137
- export declare function validatePlanSelectors(browser: Browser, plan: VideoPlan): Promise<SelectorValidationResult>;
138
- export declare function buildNavigationPrompt(resolvedUrl: string, config: NavigateWithAgentConfig): string;
@@ -1,468 +0,0 @@
1
- /**
2
- * Hybrid Navigator — Uses the screenshot agent for navigation (Phase 1),
3
- * then exports the session for video recording (Phase 2).
4
- *
5
- * This wrapper around `runAgent()` provides:
6
- * - Agent-driven navigation with full self-healing
7
- * - Session export (storageState + sessionStorage + scroll position)
8
- * - Post-transfer auth detection (silent re-auth wait)
9
- * - Lightweight selector pre-validation for clip plans
10
- * - Fresh observation capture on recording browser
11
- */
12
- import { Browser } from './browser.js';
13
- import { analyzeReplayCandidate, runAgent } from './agent.js';
14
- import { buildVideoObservationSnapshot, buildVideoObservationSummary, captureVideoObservationSummary, } from './video-observation.js';
15
- import { detectVariantStateDeterministic } from './video-variant-state.js';
16
- import { logger, runWithLoggerCallbacks } from './logger.js';
17
- import { throwIfAborted } from './abort.js';
18
- import { dismissOverlaysWithLogging } from './overlay-utils.js';
19
- // ── Navigation ───────────────────────────────────────────────────────
20
- /**
21
- * Phase 1: Use the screenshot agent to navigate to a target state.
22
- * Exports the full session (cookies, localStorage, sessionStorage, scroll)
23
- * for reuse in the video recording phase.
24
- */
25
- export async function navigateWithAgent(config) {
26
- throwIfAborted(config.abortSignal, 'Hybrid navigation cancelled.');
27
- // Resolve relative URLs (e.g. "/docs") against the project base URL.
28
- const resolvedUrl = resolveUrl(config.url, config.baseUrl);
29
- const log = (msg, level = 'info') => {
30
- if (level === 'error')
31
- logger.error(msg);
32
- else
33
- logger.info(msg);
34
- config.onLog?.({ level, message: msg, timestamp: Date.now() });
35
- };
36
- log(`[hybrid-nav] Starting agent navigation to: ${resolvedUrl}`);
37
- const browser = await Browser.fromPool({
38
- headed: false,
39
- viewport: config.viewport,
40
- deviceScaleFactor: config.outputScale,
41
- lang: config.lang,
42
- colorScheme: config.theme,
43
- });
44
- try {
45
- // Navigate to the target URL before starting the agent.
46
- // The agent expects the browser to already be on the page (same as capture pipeline).
47
- // Without this, the browser starts on about:blank and relative URLs fail.
48
- await browser.navigateTo(resolvedUrl);
49
- await dismissOverlaysWithLogging(browser, {
50
- context: 'hybrid navigation bootstrap',
51
- onLog: config.onLog,
52
- });
53
- // Wrap runAgent with logger callbacks so that agent logs (reasoning, actions,
54
- // verification) and live screenshots are forwarded to the SSE stream.
55
- const agentResult = await runWithLoggerCallbacks({
56
- onLog: config.onLog ? (entry) => config.onLog(entry) : undefined,
57
- onScreenshot: config.onScreenshot ?? undefined,
58
- }, () => runAgent(browser, {
59
- url: resolvedUrl,
60
- prompt: buildNavigationPrompt(resolvedUrl, config),
61
- dark: config.theme === 'dark',
62
- langs: config.lang ? [config.lang] : ['en'],
63
- outputDir: '',
64
- headed: false,
65
- viewport: config.viewport,
66
- maxIterations: config.maxIterations ?? 30,
67
- model: config.model,
68
- credentials: config.credentials,
69
- langInstructions: config.langInstructions,
70
- themeInstructions: config.themeInstructions,
71
- currentLang: config.lang,
72
- currentTheme: config.theme,
73
- runMode: 'video_navigation_preflight',
74
- abortSignal: config.abortSignal,
75
- }, config.apiKey));
76
- if (!agentResult.success) {
77
- log(`[hybrid-nav] Agent navigation failed: ${agentResult.assessment}`, 'error');
78
- return {
79
- success: false,
80
- finalUrl: browser.currentPage.url(),
81
- scrollPosition: { x: 0, y: 0 },
82
- exactStartStateVerified: false,
83
- error: agentResult.assessment,
84
- usage: agentResult.usage,
85
- };
86
- }
87
- const verification = agentResult.verification ?? null;
88
- const verificationMode = verification?.mode;
89
- const exactStartStateVerified = verification?.verified === true
90
- && (verificationMode === 'vision' || verificationMode === 'text_fallback');
91
- const captureVerificationBundle = typeof browser.captureVideoVerificationBundle === 'function'
92
- ? browser.captureVideoVerificationBundle({ maxAttempts: 1 }).catch(() => null)
93
- : Promise.resolve(null);
94
- const [storageState, sessionStorage, scrollPosition, variantState, pageState, pageTitle, verificationBundle] = await Promise.all([
95
- browser.exportStorageState().catch(() => undefined),
96
- browser.exportSessionStorage().catch(() => undefined),
97
- browser.currentPage.evaluate(() => ({
98
- x: Math.round(window.scrollX),
99
- y: Math.round(window.scrollY),
100
- })).catch(() => ({ x: 0, y: 0 })),
101
- detectVariantStateDeterministic(browser, config.lang, config.theme).catch(() => undefined),
102
- browser.getPageState().catch(() => undefined),
103
- browser.currentPage.title().catch(() => ''),
104
- captureVerificationBundle,
105
- ]);
106
- const finalUrl = browser.currentPage.url();
107
- const observationSummary = verificationBundle
108
- ? buildVideoObservationSummary({
109
- url: verificationBundle.url,
110
- title: verificationBundle.title,
111
- accessibilityTree: verificationBundle.accessibilityTree,
112
- interactiveElements: verificationBundle.interactiveElements,
113
- pageSignals: verificationBundle.pageSignals,
114
- maxAccessibilityChars: 3500,
115
- maxElements: 18,
116
- maxVisibleTextChars: 500,
117
- })
118
- : pageState && variantState
119
- ? buildVideoObservationSummary({
120
- url: finalUrl,
121
- title: pageTitle,
122
- accessibilityTree: pageState.accessibilityTree,
123
- interactiveElements: pageState.interactiveElements,
124
- pageSignals: variantState.pageSignals,
125
- maxAccessibilityChars: 3500,
126
- maxElements: 18,
127
- maxVisibleTextChars: 500,
128
- })
129
- : await captureVideoObservationSummary(browser).catch(() => undefined);
130
- const observationSnapshot = verificationBundle
131
- ? buildVideoObservationSnapshot({
132
- coherenceKey: verificationBundle.coherenceKey,
133
- interactiveElements: verificationBundle.interactiveElements,
134
- pageSignals: verificationBundle.pageSignals,
135
- pageIdentity: null,
136
- })
137
- : pageState && variantState
138
- ? buildVideoObservationSnapshot({
139
- interactiveElements: pageState.interactiveElements,
140
- pageSignals: variantState.pageSignals,
141
- pageIdentity: null,
142
- })
143
- : undefined;
144
- const replayAnalysis = analyzeReplayCandidate(agentResult.actions, {
145
- currentUrl: finalUrl,
146
- targetUrl: finalUrl,
147
- currentViewport: browser.currentPage.viewportSize(),
148
- isAuthenticated: !!config.credentials,
149
- currentDialogCount: verificationBundle?.observation.dialogCount ?? null,
150
- pageIdentity: null,
151
- });
152
- log(`[hybrid-nav] Navigation complete. Final URL: ${finalUrl} (runMode=video_navigation_preflight, verificationMode=${verificationMode ?? 'unknown'})`);
153
- return {
154
- success: true,
155
- finalUrl,
156
- scrollPosition,
157
- storageState,
158
- sessionStorage,
159
- observationSummary,
160
- observationSnapshot,
161
- coherenceKey: verificationBundle?.coherenceKey,
162
- interactiveElements: verificationBundle?.interactiveElements ?? pageState?.interactiveElements,
163
- pageIdentity: null,
164
- actions: agentResult.actions,
165
- replayableActions: replayAnalysis.skipReason ? [] : replayAnalysis.replayableActions,
166
- verification,
167
- verificationMode,
168
- exactStartStateVerified,
169
- detectedLang: variantState?.lang.detected ?? null,
170
- detectedTheme: variantState?.theme.detected ?? null,
171
- pageSignals: variantState?.pageSignals,
172
- variantState,
173
- usage: agentResult.usage,
174
- };
175
- }
176
- finally {
177
- await browser.close().catch(() => { });
178
- }
179
- }
180
- // ── Fresh Observation on Recording Browser ───────────────────────────
181
- /**
182
- * Capture a fresh observation from a browser that already has the session loaded.
183
- * Use this on Browser B (recording browser) AFTER navigation, not the stale
184
- * observation from Browser A (agent browser).
185
- */
186
- export async function captureFreshObservation(browser) {
187
- return captureVideoObservationSummary(browser, {
188
- maxAccessibilityChars: 3500,
189
- maxElements: 18,
190
- maxVisibleTextChars: 500,
191
- });
192
- }
193
- /**
194
- * Capture a fresh observation by opening a temporary pool browser with the
195
- * exported session. Closes the browser automatically.
196
- */
197
- export async function captureFreshObservationFromSession(config) {
198
- throwIfAborted(config.abortSignal, 'Observation capture cancelled.');
199
- const browser = await Browser.fromPool({
200
- headed: false,
201
- viewport: config.viewport,
202
- deviceScaleFactor: config.outputScale,
203
- lang: config.lang,
204
- colorScheme: config.theme,
205
- storageState: config.storageState,
206
- });
207
- try {
208
- if (config.sessionStorage && Object.keys(config.sessionStorage).length > 0) {
209
- await browser.prepareSessionStorage(config.sessionStorage, { replace: true });
210
- }
211
- await browser.navigateTo(config.url);
212
- await dismissOverlaysWithLogging(browser, { context: 'fresh observation bootstrap' });
213
- // Restore scroll position
214
- if (config.scrollPosition && (config.scrollPosition.x !== 0 || config.scrollPosition.y !== 0)) {
215
- await browser.currentPage.evaluate(({ x, y }) => window.scrollTo({ left: x, top: y, behavior: 'instant' }), config.scrollPosition).catch(() => { });
216
- await browser.currentPage.waitForTimeout(250);
217
- }
218
- return await captureVideoObservationSummary(browser, {
219
- maxAccessibilityChars: 3500,
220
- maxElements: 18,
221
- maxVisibleTextChars: 500,
222
- });
223
- }
224
- finally {
225
- await browser.close().catch(() => { });
226
- }
227
- }
228
- // ── Post-Transfer Auth Detection ─────────────────────────────────────
229
- /**
230
- * After transferring session to a new browser context, detect if the page
231
- * ended up in a logged-out state (e.g., due to IndexedDB/in-memory JWT loss).
232
- * If detected, waits for silent re-auth to complete.
233
- *
234
- * Returns true if auth is confirmed or no auth issues detected.
235
- */
236
- export async function waitForAuthStabilization(browser, options = {}) {
237
- const timeout = options.timeoutMs ?? 5000;
238
- const page = browser.currentPage;
239
- // Check 1: Did the page redirect to a login URL?
240
- const url = page.url().toLowerCase();
241
- const loginPatterns = [
242
- '/login',
243
- '/signin',
244
- '/sign-in',
245
- '/auth',
246
- '/sso',
247
- '/oauth',
248
- '/accounts/login',
249
- '/connexion',
250
- '/anmeldung',
251
- '/acceso',
252
- '/accedi',
253
- ];
254
- const isOnLoginPage = loginPatterns.some(p => url.includes(p));
255
- if (isOnLoginPage) {
256
- logger.info('[hybrid-nav] Detected redirect to login page after session transfer. Waiting for silent re-auth...');
257
- // Wait for navigation away from login page (silent OAuth/OIDC redirect)
258
- try {
259
- await page.waitForURL((u) => !loginPatterns.some(p => u.toString().toLowerCase().includes(p)), { timeout });
260
- logger.info('[hybrid-nav] Silent re-auth completed successfully.');
261
- return { ok: true };
262
- }
263
- catch {
264
- return { ok: false, reason: 'Session transfer failed: stuck on login page after re-auth timeout' };
265
- }
266
- }
267
- // Check 2: Does the page contain visible login/sign-in buttons suggesting logged-out state?
268
- const hasLoginPrompt = await page.evaluate(() => {
269
- const buttons = Array.from(document.querySelectorAll('button, a, [role="button"]'));
270
- const loginLabels = [
271
- 'log in',
272
- 'login',
273
- 'sign in',
274
- 'signin',
275
- 'sign up',
276
- 'get started',
277
- 'se connecter',
278
- 'connexion',
279
- 'anmelden',
280
- 'iniciar sesion',
281
- 'accedi',
282
- ];
283
- return buttons.some(el => {
284
- const text = (el.textContent || '').trim().toLowerCase();
285
- return loginLabels.some(label => text === label) && el.checkVisibility?.();
286
- });
287
- }).catch(() => false);
288
- if (hasLoginPrompt) {
289
- logger.info('[hybrid-nav] Detected login prompt on page. Waiting briefly for potential silent re-auth...');
290
- // Brief wait — some SPAs show login state briefly before silent auth completes
291
- await page.waitForTimeout(2000);
292
- // Re-check
293
- const stillHasLogin = await page.evaluate(() => {
294
- const buttons = Array.from(document.querySelectorAll('button, a, [role="button"]'));
295
- const loginLabels = ['log in', 'login', 'sign in', 'signin', 'se connecter', 'connexion', 'anmelden', 'iniciar sesion', 'accedi'];
296
- return buttons.some(el => {
297
- const text = (el.textContent || '').trim().toLowerCase();
298
- return loginLabels.some(label => text === label) && el.checkVisibility?.();
299
- });
300
- }).catch(() => false);
301
- if (stillHasLogin) {
302
- return { ok: false, reason: 'Session transfer may have failed: login prompt still visible' };
303
- }
304
- }
305
- return { ok: true };
306
- }
307
- // ── Lightweight Selector Pre-Validation ──────────────────────────────
308
- /**
309
- * Validate selectors in a plan that can be checked on the current page.
310
- *
311
- * Only validates selectors up to the first page-mutating step (click, hover,
312
- * type, select_option, key). After such a step the DOM changes and later
313
- * selectors cannot be verified without actually executing the plan.
314
- */
315
- export async function validatePlanSelectors(browser, plan) {
316
- const page = browser.currentPage;
317
- const missing = [];
318
- // Types that mutate the page state — hover is intentionally excluded so
319
- // we can keep validating selectors that come after a non-committal reveal.
320
- const mutatingTypes = new Set(['click', 'type', 'select_option', 'key', 'drag']);
321
- const skipTypes = new Set(['navigate', 'wait', 'dismiss_overlays', 'assert_url', 'assert_text', 'assert_element', 'assert_page']);
322
- for (let i = 0; i < plan.steps.length; i++) {
323
- const step = plan.steps[i];
324
- if (skipTypes.has(step.type))
325
- continue;
326
- if (!step.selector) {
327
- // First mutating step without a selector — stop here
328
- if (mutatingTypes.has(step.type))
329
- break;
330
- continue;
331
- }
332
- // Validate this selector
333
- try {
334
- const selectorParts = step.selector.split(',').map(s => s.trim()).filter(Boolean);
335
- let found = false;
336
- for (const part of selectorParts) {
337
- const count = await page.locator(part).count().catch(() => 0);
338
- if (count > 0) {
339
- found = true;
340
- break;
341
- }
342
- }
343
- if (!found) {
344
- missing.push({ stepIndex: i, step, selector: step.selector });
345
- }
346
- }
347
- catch {
348
- missing.push({ stepIndex: i, step, selector: step.selector });
349
- }
350
- // After validating this step, if it mutates the page, stop
351
- if (mutatingTypes.has(step.type))
352
- break;
353
- }
354
- return {
355
- valid: missing.length === 0,
356
- missingSelectors: missing,
357
- };
358
- }
359
- // ── URL Resolution ───────────────────────────────────────────────────
360
- /**
361
- * Resolve a potentially relative URL against a base URL.
362
- * - "/docs" + "https://example.com" → "https://example.com/docs"
363
- * - "https://other.com/page" + "https://example.com" → "https://other.com/page" (absolute, untouched)
364
- * - "/docs" + undefined → "/docs" (no base, pass through as-is)
365
- */
366
- function resolveUrl(url, baseUrl) {
367
- // Already absolute — nothing to resolve
368
- if (/^https?:\/\//i.test(url))
369
- return url;
370
- if (!baseUrl)
371
- return url;
372
- try {
373
- return new URL(url, baseUrl).href;
374
- }
375
- catch {
376
- // Fallback: simple concatenation if URL constructor fails
377
- const base = baseUrl.replace(/\/+$/, '');
378
- const path = url.startsWith('/') ? url : `/${url}`;
379
- return `${base}${path}`;
380
- }
381
- }
382
- // ── Prompt Builder ───────────────────────────────────────────────────
383
- export function buildNavigationPrompt(resolvedUrl, config) {
384
- // Two-part mode: navigationPrompt = WHERE to navigate, recordingScript = WHAT will be recorded.
385
- // Legacy mode: navigationPrompt = full clip script used for both.
386
- const hasTwoPartPrompt = !!config.recordingScript;
387
- const parts = [
388
- `Navigate to ${resolvedUrl} and prepare the exact pre-recording start state for a clip recording.`,
389
- '',
390
- 'Your tasks:',
391
- '1. Load the page at the URL above',
392
- '2. Handle login if credentials are provided',
393
- '3. Dismiss ALL cookie banners, consent walls, analytics prompts, overlays, and popups — do this BEFORE calling ready_to_capture',
394
- '4. Navigate to the specific page/state described below',
395
- '5. Once the page is clean (no overlays, no spinners, correct page), call ready_to_capture',
396
- ];
397
- if (hasTwoPartPrompt) {
398
- // Two-part mode: clear separation between navigation goal and what will be recorded.
399
- parts.push('');
400
- parts.push('<navigation_goal>');
401
- parts.push(config.navigationPrompt);
402
- parts.push('</navigation_goal>');
403
- parts.push('');
404
- parts.push('<recording_preview>');
405
- parts.push(`After you call ready_to_capture, the recording system will perform: ${config.recordingScript}`);
406
- parts.push('You must NOT perform these recording actions yourself. Your job is ONLY to reach the state where the recording can begin.');
407
- parts.push('</recording_preview>');
408
- parts.push('');
409
- parts.push('Follow the <navigation_goal> instructions exactly. Navigate to the described page/state. ' +
410
- 'Dismiss all overlays and popups along the way. ' +
411
- 'Once you are on the correct page with no obstructions, call ready_to_capture.');
412
- }
413
- else if (config.navigationPrompt) {
414
- // Legacy mode: single script for both navigation and recording.
415
- parts.push('');
416
- parts.push(`Clip description: "${config.navigationPrompt}"`);
417
- parts.push('');
418
- parts.push('Read the clip description above carefully. If it mentions navigation steps to perform BEFORE the main action ' +
419
- '(e.g. "go to Settings", "open the API tab", "navigate to the pricing page"), you MUST perform those navigation steps now. ' +
420
- 'Opening the correct project, workspace, section, tab, or source dialog counts as preparation and MUST be done now when requested. ' +
421
- 'However, do NOT perform the final interaction that should be RECORDED (e.g. "click Regenerate", "click New", "click New preset", "scroll down", "toggle dark mode"). ' +
422
- 'The recording system will handle the recorded interaction. Your job is to stop on the exact pre-recording start state for that interaction, not on a generic dashboard that only roughly matches the site.');
423
- }
424
- else {
425
- parts.push('');
426
- parts.push('No specific navigation required. Just load the page, dismiss overlays, and call ready_to_capture once the exact pre-recording start state is visible.');
427
- }
428
- parts.push('');
429
- parts.push('IMPORTANT — Before calling ready_to_capture:\n' +
430
- '- Dismiss ALL overlays: cookie banners, analytics prompts, consent walls, feedback widgets, onboarding tooltips. Use dismiss_overlays or click dismiss/close buttons.\n' +
431
- '- If the URL is /home, /dashboard, /app, or / you are on a GENERIC dashboard, NOT inside a specific project/workspace. ' +
432
- 'Do NOT call ready_to_capture from a generic dashboard unless explicitly targeting the homepage.\n' +
433
- '- Check the URL path: it must match the specific entity/page required.\n' +
434
- '- If you changed language or theme via settings, navigate BACK to the target page before calling ready_to_capture.');
435
- if (config.credentials) {
436
- parts.push('');
437
- parts.push('If the page requires authentication, use the provided credentials to log in.');
438
- }
439
- if (config.lang) {
440
- parts.push('');
441
- parts.push(`<variant_requirement type="language" value="${config.lang}">\n` +
442
- `The app\'s fixed UI chrome (menus, buttons, labels, navigation) MUST be in "${config.lang}" when you call ready_to_capture.\n` +
443
- `If the fixed chrome is in a different language, you MUST switch it before calling ready_to_capture.\n` +
444
- `Ignore user-generated content (project names, preset names, imported data) that may remain in another language.\n` +
445
- `</variant_requirement>`);
446
- }
447
- if (config.theme) {
448
- parts.push('');
449
- parts.push(`<variant_requirement type="theme" value="${config.theme}">\n` +
450
- `The app MUST be in "${config.theme}" theme when you call ready_to_capture.\n` +
451
- `If the app has an in-app theme toggle/selector, use it to switch to "${config.theme}" mode.\n` +
452
- `</variant_requirement>`);
453
- }
454
- if (config.navigationInstructions) {
455
- parts.push('');
456
- parts.push(`Additional navigation instructions: ${config.navigationInstructions}`);
457
- }
458
- if (config.langInstructions) {
459
- parts.push('');
460
- parts.push(`Language instructions: ${config.langInstructions}`);
461
- }
462
- if (config.themeInstructions) {
463
- parts.push('');
464
- parts.push(`Theme instructions: ${config.themeInstructions}`);
465
- }
466
- return parts.join('\n');
467
- }
468
- //# sourceMappingURL=hybrid-navigator.js.map
package/dist/index.d.ts DELETED
@@ -1,15 +0,0 @@
1
- export { Browser } from './browser.js';
2
- export { runAgent } from './agent.js';
3
- export { captureIsolatedElement } from './element-capture.js';
4
- export { dismissCookiesAndWidgets } from './cookie-dismiss.js';
5
- export { logger, setOnLog, setOnScreenshot, emitScreenshot } from './logger.js';
6
- export type { LogEntry, LogLevel, OnLogCallback, OnScreenshotCallback } from './logger.js';
7
- export { applyDeviceFrame, getDeviceFrames, getDeviceFrame, invalidateDeviceConfigCache, resolveDeviceFrameDescriptor, rasterizeDeviceFrame, } from './mockup.js';
8
- export type { DeviceFrameId, DeviceFrameDefinition, DeviceCategory, MockupOptions, MockupOrientation, ResolvedDeviceFrameDescriptor, } from './mockup.js';
9
- export { generateStatusBarHtml } from './status-bar.js';
10
- export { generateBrowserBarHtml } from './browser-bar.js';
11
- export type { BrowserBarConfig, BrowserBarRenderOptions } from './browser-bar.js';
12
- export { convertToGif, convertToMp4, ensureFfmpegAvailable, extractThumbnail, getMediaDurationMs, } from './clip-postprocess.js';
13
- export { evaluateActionSecurity, describeSecurityTarget } from './security.js';
14
- export type { StatusBarConfig, StatusBarLayout, StatusBarDeviceType, StatusBarRenderOptions } from './status-bar.js';
15
- export type * from './types.js';
package/dist/index.js DELETED
@@ -1,11 +0,0 @@
1
- export { Browser } from './browser.js';
2
- export { runAgent } from './agent.js';
3
- export { captureIsolatedElement } from './element-capture.js';
4
- export { dismissCookiesAndWidgets } from './cookie-dismiss.js';
5
- export { logger, setOnLog, setOnScreenshot, emitScreenshot } from './logger.js';
6
- export { applyDeviceFrame, getDeviceFrames, getDeviceFrame, invalidateDeviceConfigCache, resolveDeviceFrameDescriptor, rasterizeDeviceFrame, } from './mockup.js';
7
- export { generateStatusBarHtml } from './status-bar.js';
8
- export { generateBrowserBarHtml } from './browser-bar.js';
9
- export { convertToGif, convertToMp4, ensureFfmpegAvailable, extractThumbnail, getMediaDurationMs, } from './clip-postprocess.js';
10
- export { evaluateActionSecurity, describeSecurityTarget } from './security.js';
11
- //# sourceMappingURL=index.js.map
@@ -1,17 +0,0 @@
1
- import type { ChatCompletion } from 'openai/resources/chat/completions';
2
- import type { StepUsage } from './types.js';
3
- export type LlmUsageSnapshot = Pick<StepUsage, 'generationId' | 'modelUsed' | 'promptTokens' | 'completionTokens' | 'totalTokens' | 'cacheReadTokens' | 'cacheWriteTokens' | 'reasoningTokens'>;
4
- export declare function extractLlmUsageSnapshot(response: ChatCompletion): LlmUsageSnapshot;
5
- export declare function buildStepUsageFromSnapshot(snapshot: LlmUsageSnapshot, params: {
6
- stepNumber: number;
7
- stepType: StepUsage['stepType'];
8
- modelRequested: string;
9
- imagesInPrompt: number;
10
- }): StepUsage;
11
- export declare function normalizeLlmUsageSnapshot(snapshot: Partial<LlmUsageSnapshot>): LlmUsageSnapshot;
12
- export declare function extractStepUsage(response: ChatCompletion, params: {
13
- stepNumber: number;
14
- stepType: StepUsage['stepType'];
15
- modelRequested: string;
16
- imagesInPrompt: number;
17
- }): StepUsage;