autokap 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/dist/cli-config.d.ts +13 -0
  2. package/dist/cli-config.js +42 -0
  3. package/dist/cli-utils.d.ts +0 -19
  4. package/dist/cli-utils.js +2 -65
  5. package/dist/cli.d.ts +0 -1
  6. package/dist/cli.js +268 -306
  7. package/package.json +24 -16
  8. package/assets/chrome/ios-statusbar-comparison-reference.jpg +0 -0
  9. package/assets/chrome/ios-statusbar-dark-reference.jpg +0 -0
  10. package/assets/chrome/ios-statusbar-light-reference.jpg +0 -0
  11. package/assets/devices/ipad-pro-11-m4.json +0 -52
  12. package/assets/devices/iphone-16-pro.json +0 -53
  13. package/assets/devices/macbook-air-13.json +0 -45
  14. package/assets/frames/MacBook Air 13.svg +0 -242
  15. package/assets/frames/Status bar - iPhone.png +0 -0
  16. package/assets/frames/Status bar and Menu bar- iPad.png +0 -0
  17. package/assets/frames/iPad Pro M4 11_.png +0 -0
  18. package/assets/frames/iPhone 16 Pro.png +0 -0
  19. package/assets/icons/Cellular Connection.svg +0 -3
  20. package/assets/icons/Union.svg +0 -6
  21. package/assets/icons/Wifi.svg +0 -3
  22. package/assets/icons/battery.svg +0 -5
  23. package/assets/icons/battery_charging.svg +0 -8
  24. package/dist/abort.d.ts +0 -5
  25. package/dist/abort.js +0 -44
  26. package/dist/agent.d.ts +0 -142
  27. package/dist/agent.js +0 -4504
  28. package/dist/browser-bar.d.ts +0 -40
  29. package/dist/browser-bar.js +0 -147
  30. package/dist/clip-orchestrator.d.ts +0 -148
  31. package/dist/clip-orchestrator.js +0 -950
  32. package/dist/clip-postprocess.d.ts +0 -42
  33. package/dist/clip-postprocess.js +0 -192
  34. package/dist/credential-templates.d.ts +0 -5
  35. package/dist/credential-templates.js +0 -60
  36. package/dist/element-capture.d.ts +0 -53
  37. package/dist/element-capture.js +0 -766
  38. package/dist/hybrid-navigator.d.ts +0 -138
  39. package/dist/hybrid-navigator.js +0 -468
  40. package/dist/index.d.ts +0 -15
  41. package/dist/index.js +0 -11
  42. package/dist/llm-usage.d.ts +0 -17
  43. package/dist/llm-usage.js +0 -45
  44. package/dist/mockup-html.d.ts +0 -119
  45. package/dist/mockup-html.js +0 -253
  46. package/dist/mockup.d.ts +0 -94
  47. package/dist/mockup.js +0 -604
  48. package/dist/mouse-animation.d.ts +0 -46
  49. package/dist/mouse-animation.js +0 -100
  50. package/dist/overlay-utils.d.ts +0 -14
  51. package/dist/overlay-utils.js +0 -13
  52. package/dist/posthog.d.ts +0 -4
  53. package/dist/posthog.js +0 -26
  54. package/dist/prompt-cache.d.ts +0 -10
  55. package/dist/prompt-cache.js +0 -24
  56. package/dist/prompts.d.ts +0 -167
  57. package/dist/prompts.js +0 -1165
  58. package/dist/security.d.ts +0 -20
  59. package/dist/security.js +0 -569
  60. package/dist/session-profile.d.ts +0 -86
  61. package/dist/session-profile.js +0 -1471
  62. package/dist/sf-pro-fonts.d.ts +0 -4
  63. package/dist/sf-pro-fonts.js +0 -7
  64. package/dist/status-bar-l10n.d.ts +0 -14
  65. package/dist/status-bar-l10n.js +0 -177
  66. package/dist/status-bar.d.ts +0 -44
  67. package/dist/status-bar.js +0 -336
  68. package/dist/tools.d.ts +0 -4
  69. package/dist/tools.js +0 -578
  70. package/dist/video-agent.d.ts +0 -143
  71. package/dist/video-agent.js +0 -4783
  72. package/dist/video-observation.d.ts +0 -36
  73. package/dist/video-observation.js +0 -192
  74. package/dist/video-planner.d.ts +0 -12
  75. package/dist/video-planner.js +0 -500
  76. package/dist/video-prompts.d.ts +0 -37
  77. package/dist/video-prompts.js +0 -554
  78. package/dist/video-tools.d.ts +0 -3
  79. package/dist/video-tools.js +0 -59
  80. package/dist/video-variant-state.d.ts +0 -29
  81. package/dist/video-variant-state.js +0 -80
  82. package/dist/vision-model.d.ts +0 -17
  83. package/dist/vision-model.js +0 -74
package/dist/cli.js CHANGED
@@ -1,52 +1,135 @@
1
1
  #!/usr/bin/env node
2
2
  import { Command } from 'commander';
3
- import { config as loadEnv } from 'dotenv';
4
3
  import { createRequire } from 'node:module';
5
4
  import path from 'node:path';
6
5
  import fs from 'node:fs/promises';
7
- import readline from 'node:readline';
6
+ import WebSocket from 'ws';
8
7
  const require = createRequire(import.meta.url);
9
8
  const { version } = require('../package.json');
10
9
  import { Browser } from './browser.js';
11
- import { runAgent } from './agent.js';
12
- import { captureIsolatedElement } from './element-capture.js';
13
- import { dismissCookiesAndWidgets } from './cookie-dismiss.js';
14
10
  import { logger } from './logger.js';
15
- import { getPostHog, shutdownPostHog, DISTINCT_ID } from './posthog.js';
16
- import { parseViewport, parseViewports, parseLanguages, buildThemeList, buildCredentials, parseElements, stripBuffersFromManifest, replaceSkillPlaceholders, buildRetryPrompt, } from './cli-utils.js';
17
- function askUser(question) {
18
- const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
19
- return new Promise(resolve => {
20
- rl.question(question, answer => {
21
- rl.close();
22
- resolve(answer.trim());
23
- });
24
- });
25
- }
11
+ import { writeConfig, requireConfig, DEFAULT_API_BASE_URL, DEFAULT_WS_URL, } from './cli-config.js';
12
+ import { replaceSkillPlaceholders } from './cli-utils.js';
13
+ // ── Program definition ──────────────────────────────────────────────
26
14
  export const program = new Command();
27
15
  program
28
16
  .name('autokap')
29
17
  .version(version)
30
- .description('AI-powered screenshot capture tool')
31
- .option('--url <url>', 'Target URL')
32
- .option('--prompt <prompt>', 'What to capture (natural language)')
33
- .option('--dark', 'Also capture dark mode variant', false)
34
- .option('--langs <codes>', 'Comma-separated language codes (e.g., en,fr,de)', 'en')
35
- .option('--output <dir>', 'Output directory', './output')
36
- .option('--headed', 'Show browser window (debug mode)', false)
37
- .option('--viewport <size>', 'Viewport size WxH (e.g., 1440x900)', '1440x900')
38
- .option('--viewports <sizes>', 'Comma-separated viewport sizes for multi-viewport capture (e.g., "1440x900,768x1024,375x812")')
39
- .option('--max-iter <n>', 'Max agent iterations', '30')
40
- .option('--model <id>', 'OpenRouter model ID', 'x-ai/grok-4.1-fast')
41
- .option('--reasoning-effort <level>', 'Reasoning effort for compatible models: low, medium, high, off', 'medium')
42
- .option('--login-url <url>', 'Login page URL (if different from --url)')
43
- .option('--email <email>', 'Login email/username')
44
- .option('--password <password>', 'Login password')
45
- .option('--cookies <file>', 'Path to a JSON file with cookies to inject (for OAuth/session auth)')
46
- .option('--lang-instructions <text>', 'How to switch language on the site (e.g., "Click the language selector in the footer and choose the target language")')
47
- .option('--theme-instructions <text>', 'How to switch theme on the site (e.g., "Click the sun/moon icon in the top-right corner")')
48
- .option('--element <specs...>', 'Isolated elements to capture (format: "name:description")');
49
- // ── Skill subcommand ──────────────────────────────────────────────────
18
+ .description('AI-powered screenshot capture — local Playwright proxy');
19
+ // ── login command ───────────────────────────────────────────────────
20
+ program
21
+ .command('login <key>')
22
+ .description('Authenticate with the AutoKap API')
23
+ .option('--api-base-url <url>', 'API base URL', DEFAULT_API_BASE_URL)
24
+ .option('--ws-url <url>', 'WebSocket server URL', DEFAULT_WS_URL)
25
+ .action(async (key, opts) => {
26
+ try {
27
+ const res = await fetch(`${opts.apiBaseUrl}/api/v1/presets`, {
28
+ headers: { Authorization: `Bearer ${key}` },
29
+ });
30
+ if (!res.ok) {
31
+ logger.error('Invalid API key. Generate one in the AutoKap dashboard.');
32
+ process.exit(1);
33
+ }
34
+ }
35
+ catch (err) {
36
+ logger.error(`Cannot reach API: ${err.message}`);
37
+ process.exit(1);
38
+ }
39
+ await writeConfig({
40
+ apiKey: key,
41
+ apiBaseUrl: opts.apiBaseUrl,
42
+ wsUrl: opts.wsUrl,
43
+ });
44
+ logger.success('Authenticated. Key stored in ~/.autokap/config.json');
45
+ process.exit(0);
46
+ });
47
+ // ── run command ─────────────────────────────────────────────────────
48
+ program
49
+ .command('run <preset-id>')
50
+ .description('Run a preset capture using local Playwright')
51
+ .option('--headed', 'Show browser window for debugging', false)
52
+ .action(async (presetId, opts) => {
53
+ const config = await requireConfig();
54
+ logger.info(`Connecting to ${config.wsUrl}...`);
55
+ const browser = new Browser({
56
+ headed: opts.headed,
57
+ viewport: { width: 1440, height: 900 },
58
+ });
59
+ await browser.launch();
60
+ logger.success('Browser launched');
61
+ const wsUrl = `${config.wsUrl}?key=${encodeURIComponent(config.apiKey)}&preset_id=${encodeURIComponent(presetId)}`;
62
+ const ws = new WebSocket(wsUrl);
63
+ const cleanup = async () => {
64
+ try {
65
+ await browser.close();
66
+ }
67
+ catch { /* ignore */ }
68
+ try {
69
+ ws.close();
70
+ }
71
+ catch { /* ignore */ }
72
+ };
73
+ ws.on('error', async (err) => {
74
+ logger.error(`WebSocket error: ${err.message}`);
75
+ await cleanup();
76
+ process.exit(1);
77
+ });
78
+ ws.on('close', async () => {
79
+ await cleanup();
80
+ });
81
+ ws.on('open', () => {
82
+ logger.success('Connected to AutoKap server');
83
+ });
84
+ ws.on('message', async (data) => {
85
+ const raw = typeof data === 'string' ? data : data.toString('utf-8');
86
+ let msg;
87
+ try {
88
+ msg = JSON.parse(raw);
89
+ }
90
+ catch {
91
+ return;
92
+ }
93
+ // Handle server events (progress, done, error)
94
+ if ('type' in msg) {
95
+ const event = msg;
96
+ switch (event.type) {
97
+ case 'progress':
98
+ logger.info(event.message ?? '');
99
+ break;
100
+ case 'done':
101
+ if (event.summary) {
102
+ logger.success(`Done: ${event.summary.successes}/${event.summary.total} captures succeeded`);
103
+ }
104
+ await cleanup();
105
+ process.exit(0);
106
+ break;
107
+ case 'error':
108
+ logger.error(event.message ?? 'Unknown error');
109
+ await cleanup();
110
+ process.exit(1);
111
+ break;
112
+ }
113
+ return;
114
+ }
115
+ // Handle browser commands from server
116
+ const cmd = msg;
117
+ try {
118
+ const result = await executeBrowserCommand(browser, cmd.method, cmd.params);
119
+ ws.send(JSON.stringify({ id: cmd.id, result }));
120
+ }
121
+ catch (err) {
122
+ ws.send(JSON.stringify({ id: cmd.id, error: err.message }));
123
+ }
124
+ });
125
+ // Handle SIGINT gracefully
126
+ process.on('SIGINT', async () => {
127
+ logger.info('Interrupted. Cleaning up...');
128
+ await cleanup();
129
+ process.exit(130);
130
+ });
131
+ });
132
+ // ── skill command ───────────────────────────────────────────────────
50
133
  program
51
134
  .command('skill')
52
135
  .description('Output or install the AutoKap preset creation skill for AI coding agents')
@@ -82,282 +165,161 @@ program
82
165
  }
83
166
  process.exit(0);
84
167
  });
85
- export async function main() {
86
- loadEnv();
87
- const opts = program.opts();
88
- if (!opts.url) {
89
- logger.error("Missing required option: --url <url>");
90
- process.exit(1);
91
- }
92
- if (!opts.prompt) {
93
- logger.error("Missing required option: --prompt <prompt>");
94
- process.exit(1);
95
- }
96
- const apiKey = process.env.OPENROUTER_API_KEY;
97
- if (!apiKey) {
98
- logger.error('OPENROUTER_API_KEY not set. Create a .env file or set the environment variable.');
99
- process.exit(1);
100
- }
101
- // Parse viewport
102
- const { width: vw, height: vh } = parseViewport(opts.viewport);
103
- // Parse multi-viewports (if provided)
104
- const viewports = opts.viewports ? parseViewports(opts.viewports) : undefined;
105
- // Parse languages
106
- const langs = parseLanguages(opts.langs);
107
- // Build theme list
108
- const themes = buildThemeList(opts.dark);
109
- // Build credentials (if provided)
110
- const credentials = buildCredentials({
111
- email: opts.email,
112
- password: opts.password,
113
- loginUrl: opts.loginUrl,
114
- });
115
- // Parse isolated elements
116
- const elements = opts.element ? parseElements(opts.element) : [];
117
- // Ensure output directory exists
118
- const outputDir = path.resolve(opts.output);
119
- await fs.mkdir(outputDir, { recursive: true });
120
- logger.info(`URL: ${opts.url}`);
121
- logger.info(`Prompt: "${opts.prompt}"`);
122
- logger.info(`Model: ${opts.model}${opts.reasoningEffort !== 'off' ? ` (reasoning: ${opts.reasoningEffort})` : ''}`);
123
- logger.info(`Viewport: ${vw}x${vh}${viewports ? ` + multi: ${viewports.map(v => `${v.width}x${v.height}`).join(', ')}` : ''}`);
124
- logger.info(`Themes: ${themes.join(', ')}`);
125
- logger.info(`Languages: ${langs.join(', ')}`);
126
- logger.info(`Max iterations: ${opts.maxIter}`);
127
- if (credentials)
128
- logger.info(`Login: ${credentials.email || '(no email)'}${credentials.loginUrl ? ` via ${credentials.loginUrl}` : ''}`);
129
- if (opts.cookies)
130
- logger.info(`Cookies: ${opts.cookies}`);
131
- if (opts.langInstructions)
132
- logger.info(`Lang instructions: "${opts.langInstructions}"`);
133
- if (opts.themeInstructions)
134
- logger.info(`Theme instructions: "${opts.themeInstructions}"`);
135
- if (elements.length > 0)
136
- logger.info(`Isolated elements: ${elements.map(e => e.name).join(', ')}`);
137
- logger.info(`Output: ${outputDir}`);
138
- // Track capture start
139
- getPostHog().capture({
140
- distinctId: DISTINCT_ID,
141
- event: 'capture_started',
142
- properties: {
143
- url: opts.url,
144
- prompt: opts.prompt,
145
- model: opts.model,
146
- themes,
147
- langs,
148
- viewport: `${vw}x${vh}`,
149
- multi_viewports: viewports ? viewports.map(v => `${v.width}x${v.height}`) : null,
150
- max_iterations: parseInt(opts.maxIter, 10),
151
- has_credentials: !!credentials,
152
- has_cookies: !!opts.cookies,
153
- element_count: elements.length,
154
- },
155
- });
156
- // Launch browser
157
- const browser = new Browser({
158
- headed: opts.headed,
159
- viewport: { width: vw, height: vh },
160
- });
161
- await browser.launch();
162
- logger.success('Browser launched');
163
- // Inject cookies if provided
164
- if (opts.cookies) {
165
- try {
166
- const cookieFile = await fs.readFile(path.resolve(opts.cookies), 'utf-8');
167
- const cookies = JSON.parse(cookieFile);
168
- if (!Array.isArray(cookies))
169
- throw new Error('Cookies file must contain a JSON array');
170
- await browser.addCookies(cookies);
171
- logger.success(`Injected ${cookies.length} cookies from ${opts.cookies}`);
168
+ // ── Browser command executor ────────────────────────────────────────
169
+ async function executeBrowserCommand(browser, method, params) {
170
+ switch (method) {
171
+ // Lifecycle
172
+ case 'launch':
173
+ await browser.launch();
174
+ return null;
175
+ case 'close':
176
+ await browser.close();
177
+ return null;
178
+ case 'closeContext':
179
+ await browser.closeContext();
180
+ return null;
181
+ // Navigation
182
+ case 'navigateTo':
183
+ await browser.navigateTo(params.url);
184
+ return null;
185
+ case 'addCookies':
186
+ await browser.addCookies(params.cookies);
187
+ return null;
188
+ // Screenshots — serialize buffers to base64
189
+ case 'takeScreenshot': {
190
+ const buf = await browser.takeScreenshot();
191
+ return { screenshot: buf.toString('base64') };
172
192
  }
173
- catch (err) {
174
- logger.error(`Failed to load cookies: ${err.message}`);
175
- process.exit(1);
193
+ case 'takeScreenshotForAI': {
194
+ const buf = await browser.takeScreenshotForAI(params);
195
+ return { screenshot: buf.toString('base64') };
176
196
  }
177
- }
178
- const manifest = {
179
- url: opts.url,
180
- prompt: opts.prompt,
181
- timestamp: new Date().toISOString(),
182
- captures: [],
183
- };
184
- try {
185
- for (const lang of langs) {
186
- for (const theme of themes) {
187
- logger.info(`--- Capturing: theme=${theme}, lang=${lang} ---`);
188
- // Set language and navigate
189
- await browser.setLanguage(lang);
190
- await browser.navigateTo(opts.url);
191
- logger.info('Page loaded');
192
- // Pre-cleanup: dismiss cookies and widgets
193
- const cookieResult = await dismissCookiesAndWidgets(browser.currentPage);
194
- if (cookieResult.dismissed) {
195
- logger.success(`Cookies dismissed: ${cookieResult.method}`);
196
- }
197
- // Set color scheme
198
- await browser.setColorScheme(theme);
199
- // Wait for re-renders
200
- await browser.wait(500);
201
- // Run AI agent with retry on give_up
202
- let currentPrompt = opts.prompt;
203
- let retryCount = 0;
204
- const maxRetries = 3;
205
- let result;
206
- while (true) {
207
- const config = {
208
- url: opts.url,
209
- prompt: currentPrompt,
210
- dark: theme === 'dark',
211
- langs: [lang],
212
- outputDir,
213
- headed: opts.headed,
214
- viewport: { width: vw, height: vh },
215
- maxIterations: parseInt(opts.maxIter, 10),
216
- model: opts.model,
217
- reasoningEffort: opts.reasoningEffort,
218
- credentials,
219
- langInstructions: opts.langInstructions,
220
- themeInstructions: opts.themeInstructions,
221
- currentLang: lang,
222
- currentTheme: theme,
223
- viewports,
224
- };
225
- result = await runAgent(browser, config, apiKey);
226
- // If succeeded or max retries reached, stop
227
- if (result.success || retryCount >= maxRetries)
228
- break;
229
- // Save diagnostic state on failure
230
- if (result.diagnostic) {
231
- const diagFilename = `diagnostic_${lang}_${theme}_attempt${retryCount}.png`;
232
- const diagPath = path.join(outputDir, diagFilename);
233
- await fs.writeFile(diagPath, result.diagnostic.screenshot);
234
- logger.info(`Diagnostic screenshot saved: ${diagPath}`);
235
- const diagStateFilename = `diagnostic_${lang}_${theme}_attempt${retryCount}.json`;
236
- const diagStatePath = path.join(outputDir, diagStateFilename);
237
- await fs.writeFile(diagStatePath, JSON.stringify({
238
- reason: result.diagnostic.giveUpReason,
239
- url: result.diagnostic.url,
240
- interactiveElements: result.diagnostic.interactiveElements.slice(0, 50),
241
- accessibilityTreeSnippet: result.diagnostic.accessibilityTreeSnippet,
242
- actions: result.actions,
243
- }, null, 2));
244
- logger.info(`Diagnostic state saved: ${diagStatePath}`);
245
- }
246
- // Ask user for clarification
247
- logger.error(`Agent gave up: ${result.assessment}`);
248
- logger.info('You can provide additional instructions to help the agent retry, or press Enter to skip.');
249
- const clarification = await askUser('\nAdditional instructions (or Enter to skip): ');
250
- if (!clarification)
251
- break; // User chose to skip
252
- // Append clarification to the prompt and retry
253
- retryCount++;
254
- currentPrompt = buildRetryPrompt(opts.prompt, result.assessment, clarification, retryCount + 1);
255
- logger.info(`Retrying with clarification (attempt ${retryCount + 1}/${maxRetries + 1})...`);
256
- }
257
- // Save final screenshot
258
- const filename = `screenshot_${lang}_${theme}.png`;
259
- const filepath = path.join(outputDir, filename);
260
- const finalScreenshot = await browser.takeScreenshot();
261
- await fs.writeFile(filepath, finalScreenshot);
262
- result.screenshotPath = filepath;
263
- if (result.success) {
264
- logger.success(`Final screenshot saved: ${filepath}`);
265
- }
266
- else {
267
- logger.error(`Failed (diagnostic saved): ${filepath}`);
268
- }
269
- // Save workflow screenshots (intermediate captures)
270
- const screenshotEntries = [];
271
- for (const ws of result.screenshots) {
272
- const wsFilename = `screenshot_${lang}_${theme}_step${ws.index}.png`;
273
- const wsPath = path.join(outputDir, wsFilename);
274
- await fs.writeFile(wsPath, ws.buffer);
275
- ws.path = wsPath;
276
- screenshotEntries.push({
277
- index: ws.index,
278
- iteration: ws.iteration,
279
- label: ws.label,
280
- filename: wsFilename,
281
- });
282
- logger.success(`Workflow screenshot saved: ${wsPath} ("${ws.label}")`);
283
- }
284
- manifest.captures.push({
285
- theme,
286
- lang,
287
- filename,
288
- screenshots: screenshotEntries,
289
- result,
290
- });
291
- // Capture isolated elements (if page is ready)
292
- if (elements.length > 0 && result.success) {
293
- logger.info(`--- Capturing ${elements.length} isolated element(s): lang=${lang}, theme=${theme} ---`);
294
- await browser.forceLoadLazyImages({ timeout: 8000 });
295
- for (const element of elements) {
296
- const elementResult = await captureIsolatedElement(browser, element, apiKey, opts.model);
297
- if (elementResult.success) {
298
- const elFilename = `${element.name}__${lang}_${theme}.png`;
299
- const elPath = path.join(outputDir, elFilename);
300
- await fs.writeFile(elPath, elementResult.buffer);
301
- logger.success(`Element "${element.name}" saved: ${elPath}`);
302
- }
303
- else {
304
- logger.error(`Element "${element.name}" failed: ${elementResult.assessment}`);
305
- }
306
- }
307
- }
308
- }
197
+ case 'screenshotElement': {
198
+ const buf = await browser.screenshotElement(params.index, params.padding);
199
+ return { screenshot: buf.toString('base64') };
309
200
  }
201
+ case 'screenshotRegion': {
202
+ const buf = await browser.screenshotRegion(params.x, params.y, params.width, params.height, params.padding);
203
+ return { screenshot: buf.toString('base64') };
204
+ }
205
+ case 'screenshotBySelector': {
206
+ const r = await browser.screenshotBySelector(params.selector, params.outscale);
207
+ return { buffer: r.buffer.toString('base64'), validation: r.validation };
208
+ }
209
+ case 'screenshotByRegion': {
210
+ const buf = await browser.screenshotByRegion(params.region, params.outscale);
211
+ return { screenshot: buf.toString('base64') };
212
+ }
213
+ // Page state — serialize screenshots inside PageState
214
+ case 'getPageState': {
215
+ const state = await browser.getPageState(params);
216
+ return {
217
+ ...state,
218
+ cleanScreenshot: state.cleanScreenshot.toString('base64'),
219
+ screenshot: state.screenshot.toString('base64'),
220
+ };
221
+ }
222
+ case 'getPageStateLite':
223
+ return await browser.getPageStateLite();
224
+ case 'getAccessibilityTree':
225
+ return await browser.getAccessibilityTree(params);
226
+ case 'getInteractiveElements':
227
+ return await browser.getInteractiveElements(params);
228
+ case 'getSimplifiedDOM':
229
+ return await browser.getSimplifiedDOM();
230
+ case 'captureObservation':
231
+ return await browser.captureObservation();
232
+ case 'capturePageSignals':
233
+ return await browser.capturePageSignals(params);
234
+ case 'captureVerificationBundle': {
235
+ const bundle = await browser.captureVerificationBundle(params);
236
+ return { ...bundle, screenshot: bundle.screenshot.toString('base64') };
237
+ }
238
+ case 'captureVideoVerificationBundle': {
239
+ const bundle = await browser.captureVideoVerificationBundle(params);
240
+ return { ...bundle, screenshot: bundle.screenshot.toString('base64') };
241
+ }
242
+ // Interactions
243
+ case 'clickByIndex':
244
+ await browser.clickByIndex(params.index);
245
+ return null;
246
+ case 'clickBySelector':
247
+ await browser.clickBySelector(params.selector, params);
248
+ return null;
249
+ case 'clickByCoordinates':
250
+ await browser.clickByCoordinates(params.x, params.y);
251
+ return null;
252
+ case 'hoverByIndex':
253
+ await browser.hoverByIndex(params.index);
254
+ return null;
255
+ case 'hoverBySelector':
256
+ await browser.hoverBySelector(params.selector);
257
+ return null;
258
+ case 'hoverByCoordinates':
259
+ await browser.hoverByCoordinates(params.x, params.y);
260
+ return null;
261
+ case 'typeText':
262
+ await browser.typeText(params.text, params);
263
+ return null;
264
+ case 'selectOption':
265
+ await browser.selectOption(params);
266
+ return null;
267
+ case 'scroll':
268
+ await browser.scroll(params.direction, params.amount, params.selector);
269
+ return null;
270
+ case 'scrollElementIntoView':
271
+ await browser.scrollElementIntoView(params.index, params);
272
+ return null;
273
+ case 'safeExpand':
274
+ await browser.safeExpand(params);
275
+ return null;
276
+ case 'pressKey':
277
+ await browser.pressKey(params.key);
278
+ return null;
279
+ case 'searchText': return await browser.searchText(params.query);
280
+ // Reactions
281
+ case 'waitForPageReaction':
282
+ return await browser.waitForPageReaction(params.before, params);
283
+ // UI manipulation
284
+ case 'wait':
285
+ await browser.wait(params.ms);
286
+ return null;
287
+ case 'dismissOverlays': return await browser.dismissOverlays();
288
+ case 'setColorScheme':
289
+ await browser.setColorScheme(params.scheme);
290
+ return null;
291
+ case 'setLanguage':
292
+ await browser.setLanguage(params.lang);
293
+ return null;
294
+ case 'resizeViewport':
295
+ await browser.resizeViewport(params.width, params.height);
296
+ return null;
297
+ case 'forceLoadLazyImages':
298
+ await browser.forceLoadLazyImages(params);
299
+ return null;
300
+ // Storage
301
+ case 'exportStorageState': return await browser.exportStorageState();
302
+ case 'exportSessionStorage': return await browser.exportSessionStorage();
303
+ case 'prepareSessionStorage':
304
+ await browser.prepareSessionStorage(params.bundle, params);
305
+ return null;
306
+ // Network
307
+ case 'observeNetworkRequests': return await browser.observeNetworkRequests(params.url, params.waitMs);
308
+ case 'setupRouteInterception':
309
+ await browser.setupRouteInterception(params.mocks);
310
+ return null;
311
+ case 'clearRouteInterception':
312
+ await browser.clearRouteInterception();
313
+ return null;
314
+ default:
315
+ throw new Error(`Unknown browser command: ${method}`);
310
316
  }
311
- finally {
312
- await browser.close();
313
- logger.info('Browser closed');
314
- }
315
- // Track capture completion
316
- const successes = manifest.captures.filter(c => c.result.success).length;
317
- const total = manifest.captures.length;
318
- const totalScreenshots = manifest.captures.reduce((sum, c) => sum + c.screenshots.length, 0);
319
- const totalTokens = manifest.captures.reduce((sum, c) => sum + c.result.usage.reduce((s, u) => s + (u.totalTokens ?? 0), 0), 0);
320
- getPostHog().capture({
321
- distinctId: DISTINCT_ID,
322
- event: 'capture_completed',
323
- properties: {
324
- url: opts.url,
325
- model: opts.model,
326
- successes,
327
- total,
328
- total_workflow_screenshots: totalScreenshots,
329
- total_tokens: totalTokens,
330
- all_succeeded: successes === total,
331
- },
332
- });
333
- // Write manifest (strip Buffer fields from screenshots before serializing)
334
- const manifestForJson = stripBuffersFromManifest(manifest);
335
- const manifestPath = path.join(outputDir, 'manifest.json');
336
- await fs.writeFile(manifestPath, JSON.stringify(manifestForJson, null, 2));
337
- logger.info(`Manifest saved: ${manifestPath}`);
338
- // Summary
339
- if (successes === total) {
340
- logger.success(`Done: ${successes}/${total} captures succeeded (${totalScreenshots} workflow screenshots)`);
341
- }
342
- else {
343
- logger.error(`Done: ${successes}/${total} captures succeeded (${totalScreenshots} workflow screenshots)`);
344
- }
345
- await shutdownPostHog();
346
317
  }
347
- // Only run when executed directly (not imported by tests)
348
- const isDirectExecution = process.argv[1] && (process.argv[1].endsWith('/cli.js') ||
349
- process.argv[1].endsWith('/cli.ts'));
318
+ // ── Entry point ─────────────────────────────────────────────────────
319
+ const resolvedArgv = process.argv[1] && fs.realpath(process.argv[1]).catch(() => process.argv[1]);
320
+ const isDirectExecution = resolvedArgv && await resolvedArgv.then(p => p.endsWith('/cli.js') || p.endsWith('/cli.ts'));
350
321
  if (isDirectExecution) {
351
- // Use parseAsync so that subcommand action handlers (e.g. `skill`) complete
352
- // before any fallback code runs. Without this, the sync `parse()` would
353
- // trigger `main()` before the async skill handler can call process.exit().
354
- program.parseAsync().then(async () => {
355
- // If a subcommand was invoked, its action handler calls process.exit().
356
- // The code below only runs for the default capture command.
357
- await main();
358
- }).catch(async (err) => {
359
- getPostHog().captureException(err, DISTINCT_ID);
360
- await shutdownPostHog();
322
+ program.parseAsync().catch(async (err) => {
361
323
  logger.error(err.message);
362
324
  process.exit(1);
363
325
  });