autokap 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/assets/chrome/ios-statusbar-comparison-reference.jpg +0 -0
  2. package/assets/chrome/ios-statusbar-dark-reference.jpg +0 -0
  3. package/assets/chrome/ios-statusbar-light-reference.jpg +0 -0
  4. package/assets/devices/ipad-pro-11-m4.json +52 -0
  5. package/assets/devices/iphone-16-pro.json +53 -0
  6. package/assets/devices/macbook-air-13.json +45 -0
  7. package/assets/frames/MacBook Air 13.svg +242 -0
  8. package/assets/frames/Status bar - iPhone.png +0 -0
  9. Menu bar- iPad.png +0 -0
  10. package/assets/frames/iPad Pro M4 11_.png +0 -0
  11. package/assets/frames/iPhone 16 Pro.png +0 -0
  12. package/assets/icons/Cellular Connection.svg +3 -0
  13. package/assets/icons/Union.svg +6 -0
  14. package/assets/icons/Wifi.svg +3 -0
  15. package/assets/icons/battery.svg +5 -0
  16. package/assets/icons/battery_charging.svg +8 -0
  17. package/assets/skill/SKILL.md +575 -0
  18. package/dist/abort.d.ts +5 -0
  19. package/dist/abort.js +44 -0
  20. package/dist/agent.d.ts +142 -0
  21. package/dist/agent.js +4504 -0
  22. package/dist/browser-bar.d.ts +40 -0
  23. package/dist/browser-bar.js +147 -0
  24. package/dist/browser-pool.d.ts +34 -0
  25. package/dist/browser-pool.js +122 -0
  26. package/dist/browser.d.ts +279 -0
  27. package/dist/browser.js +2902 -0
  28. package/dist/cli-utils.d.ts +25 -0
  29. package/dist/cli-utils.js +80 -0
  30. package/dist/cli.d.ts +4 -0
  31. package/dist/cli.js +365 -0
  32. package/dist/clip-orchestrator.d.ts +148 -0
  33. package/dist/clip-orchestrator.js +950 -0
  34. package/dist/clip-postprocess.d.ts +42 -0
  35. package/dist/clip-postprocess.js +192 -0
  36. package/dist/cookie-dismiss.d.ts +5 -0
  37. package/dist/cookie-dismiss.js +172 -0
  38. package/dist/credential-templates.d.ts +5 -0
  39. package/dist/credential-templates.js +60 -0
  40. package/dist/element-capture.d.ts +53 -0
  41. package/dist/element-capture.js +766 -0
  42. package/dist/hybrid-navigator.d.ts +138 -0
  43. package/dist/hybrid-navigator.js +468 -0
  44. package/dist/index.d.ts +15 -0
  45. package/dist/index.js +11 -0
  46. package/dist/llm-usage.d.ts +17 -0
  47. package/dist/llm-usage.js +45 -0
  48. package/dist/logger.d.ts +46 -0
  49. package/dist/logger.js +79 -0
  50. package/dist/mockup-html.d.ts +119 -0
  51. package/dist/mockup-html.js +253 -0
  52. package/dist/mockup.d.ts +94 -0
  53. package/dist/mockup.js +604 -0
  54. package/dist/mouse-animation.d.ts +46 -0
  55. package/dist/mouse-animation.js +100 -0
  56. package/dist/overlay-utils.d.ts +14 -0
  57. package/dist/overlay-utils.js +13 -0
  58. package/dist/posthog.d.ts +4 -0
  59. package/dist/posthog.js +26 -0
  60. package/dist/prompt-cache.d.ts +10 -0
  61. package/dist/prompt-cache.js +24 -0
  62. package/dist/prompts.d.ts +167 -0
  63. package/dist/prompts.js +1165 -0
  64. package/dist/security.d.ts +20 -0
  65. package/dist/security.js +569 -0
  66. package/dist/session-profile.d.ts +86 -0
  67. package/dist/session-profile.js +1471 -0
  68. package/dist/sf-pro-fonts.d.ts +4 -0
  69. package/dist/sf-pro-fonts.js +7 -0
  70. package/dist/status-bar-l10n.d.ts +14 -0
  71. package/dist/status-bar-l10n.js +177 -0
  72. package/dist/status-bar.d.ts +44 -0
  73. package/dist/status-bar.js +336 -0
  74. package/dist/tools.d.ts +4 -0
  75. package/dist/tools.js +578 -0
  76. package/dist/types.d.ts +796 -0
  77. package/dist/types.js +2 -0
  78. package/dist/video-agent.d.ts +143 -0
  79. package/dist/video-agent.js +4783 -0
  80. package/dist/video-observation.d.ts +36 -0
  81. package/dist/video-observation.js +192 -0
  82. package/dist/video-planner.d.ts +12 -0
  83. package/dist/video-planner.js +500 -0
  84. package/dist/video-prompts.d.ts +37 -0
  85. package/dist/video-prompts.js +554 -0
  86. package/dist/video-tools.d.ts +3 -0
  87. package/dist/video-tools.js +59 -0
  88. package/dist/video-variant-state.d.ts +29 -0
  89. package/dist/video-variant-state.js +80 -0
  90. package/dist/vision-model.d.ts +17 -0
  91. package/dist/vision-model.js +74 -0
  92. package/package.json +165 -0
  93. package/readme.md +61 -0
@@ -0,0 +1,25 @@
1
+ import type { CaptureManifest, IsolatedElement, LoginCredentials } from './types.js';
2
+ export declare function parseViewport(input: string): {
3
+ width: number;
4
+ height: number;
5
+ };
6
+ export declare function parseViewports(input: string): Array<{
7
+ width: number;
8
+ height: number;
9
+ }>;
10
+ export declare function parseLanguages(input: string): string[];
11
+ export declare function buildThemeList(dark: boolean): Array<'light' | 'dark'>;
12
+ export declare function buildCredentials(opts: {
13
+ email?: string;
14
+ password?: string;
15
+ loginUrl?: string;
16
+ }): LoginCredentials | undefined;
17
+ export declare function parseElements(specs: string[]): IsolatedElement[];
18
+ export declare function stripBuffersFromManifest(manifest: CaptureManifest): object;
19
+ export declare function replaceSkillPlaceholders(content: string, opts: {
20
+ projectUrl?: string;
21
+ projectId?: string;
22
+ apiKey?: string;
23
+ apiBaseUrl?: string;
24
+ }): string;
25
+ export declare function buildRetryPrompt(originalPrompt: string, assessment: string, clarification: string, attemptNumber: number): string;
@@ -0,0 +1,80 @@
1
+ export function parseViewport(input) {
2
+ const parts = input.split('x').map(Number);
3
+ if (parts.length !== 2 || parts.some(isNaN)) {
4
+ throw new Error(`Invalid viewport format: "${input}". Use WxH (e.g., 1440x900)`);
5
+ }
6
+ return { width: parts[0], height: parts[1] };
7
+ }
8
+ export function parseViewports(input) {
9
+ return input.split(',').map(s => {
10
+ const trimmed = s.trim();
11
+ const parts = trimmed.split('x').map(Number);
12
+ if (parts.length !== 2 || parts.some(isNaN)) {
13
+ throw new Error(`Invalid viewport in --viewports: "${trimmed}". Use WxH (e.g., 1440x900)`);
14
+ }
15
+ return { width: parts[0], height: parts[1] };
16
+ });
17
+ }
18
+ export function parseLanguages(input) {
19
+ return input.split(',').map(l => l.trim()).filter(Boolean);
20
+ }
21
+ export function buildThemeList(dark) {
22
+ const themes = ['light'];
23
+ if (dark)
24
+ themes.push('dark');
25
+ return themes;
26
+ }
27
+ export function buildCredentials(opts) {
28
+ if (!opts.email && !opts.password)
29
+ return undefined;
30
+ return {
31
+ loginUrl: opts.loginUrl,
32
+ email: opts.email,
33
+ password: opts.password,
34
+ };
35
+ }
36
+ export function parseElements(specs) {
37
+ const elements = [];
38
+ for (const spec of specs) {
39
+ const colonIdx = spec.indexOf(':');
40
+ if (colonIdx === -1) {
41
+ throw new Error(`Invalid --element format: "${spec}". Use "name:description"`);
42
+ }
43
+ elements.push({
44
+ name: spec.slice(0, colonIdx).trim(),
45
+ description: spec.slice(colonIdx + 1).trim(),
46
+ });
47
+ }
48
+ return elements;
49
+ }
50
+ export function stripBuffersFromManifest(manifest) {
51
+ return {
52
+ ...manifest,
53
+ captures: manifest.captures.map(c => ({
54
+ ...c,
55
+ result: {
56
+ ...c.result,
57
+ screenshots: undefined,
58
+ },
59
+ })),
60
+ };
61
+ }
62
+ export function replaceSkillPlaceholders(content, opts) {
63
+ let result = content;
64
+ if (opts.projectUrl) {
65
+ result = result.replace(/\[AUTOKAP_PROJECT_URL\]/g, opts.projectUrl);
66
+ }
67
+ if (opts.projectId) {
68
+ result = result.replace(/\[AUTOKAP_PROJECT_ID\]/g, opts.projectId);
69
+ }
70
+ if (opts.apiKey) {
71
+ result = result.replace(/YOUR_API_KEY/g, opts.apiKey);
72
+ }
73
+ const baseUrl = opts.apiBaseUrl ?? 'https://app.autokap.com';
74
+ result = result.replace(/https:\/\/app\.autokap\.com/g, baseUrl);
75
+ return result;
76
+ }
77
+ export function buildRetryPrompt(originalPrompt, assessment, clarification, attemptNumber) {
78
+ return `${originalPrompt}\n\nIMPORTANT CLARIFICATION FROM USER (attempt #${attemptNumber}):\nThe previous attempt failed: "${assessment}"\nUser says: ${clarification}`;
79
+ }
80
+ //# sourceMappingURL=cli-utils.js.map
package/dist/cli.d.ts ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env node
2
+ import { Command } from 'commander';
3
+ export declare const program: Command;
4
+ export declare function main(): Promise<void>;
package/dist/cli.js ADDED
@@ -0,0 +1,365 @@
1
+ #!/usr/bin/env node
2
+ import { Command } from 'commander';
3
+ import { config as loadEnv } from 'dotenv';
4
+ import { createRequire } from 'node:module';
5
+ import path from 'node:path';
6
+ import fs from 'node:fs/promises';
7
+ import readline from 'node:readline';
8
+ const require = createRequire(import.meta.url);
9
+ const { version } = require('../package.json');
10
+ import { Browser } from './browser.js';
11
+ import { runAgent } from './agent.js';
12
+ import { captureIsolatedElement } from './element-capture.js';
13
+ import { dismissCookiesAndWidgets } from './cookie-dismiss.js';
14
+ import { logger } from './logger.js';
15
+ import { getPostHog, shutdownPostHog, DISTINCT_ID } from './posthog.js';
16
+ import { parseViewport, parseViewports, parseLanguages, buildThemeList, buildCredentials, parseElements, stripBuffersFromManifest, replaceSkillPlaceholders, buildRetryPrompt, } from './cli-utils.js';
17
+ function askUser(question) {
18
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
19
+ return new Promise(resolve => {
20
+ rl.question(question, answer => {
21
+ rl.close();
22
+ resolve(answer.trim());
23
+ });
24
+ });
25
+ }
26
+ export const program = new Command();
27
+ program
28
+ .name('autokap')
29
+ .version(version)
30
+ .description('AI-powered screenshot capture tool')
31
+ .option('--url <url>', 'Target URL')
32
+ .option('--prompt <prompt>', 'What to capture (natural language)')
33
+ .option('--dark', 'Also capture dark mode variant', false)
34
+ .option('--langs <codes>', 'Comma-separated language codes (e.g., en,fr,de)', 'en')
35
+ .option('--output <dir>', 'Output directory', './output')
36
+ .option('--headed', 'Show browser window (debug mode)', false)
37
+ .option('--viewport <size>', 'Viewport size WxH (e.g., 1440x900)', '1440x900')
38
+ .option('--viewports <sizes>', 'Comma-separated viewport sizes for multi-viewport capture (e.g., "1440x900,768x1024,375x812")')
39
+ .option('--max-iter <n>', 'Max agent iterations', '30')
40
+ .option('--model <id>', 'OpenRouter model ID', 'x-ai/grok-4.1-fast')
41
+ .option('--reasoning-effort <level>', 'Reasoning effort for compatible models: low, medium, high, off', 'medium')
42
+ .option('--login-url <url>', 'Login page URL (if different from --url)')
43
+ .option('--email <email>', 'Login email/username')
44
+ .option('--password <password>', 'Login password')
45
+ .option('--cookies <file>', 'Path to a JSON file with cookies to inject (for OAuth/session auth)')
46
+ .option('--lang-instructions <text>', 'How to switch language on the site (e.g., "Click the language selector in the footer and choose the target language")')
47
+ .option('--theme-instructions <text>', 'How to switch theme on the site (e.g., "Click the sun/moon icon in the top-right corner")')
48
+ .option('--element <specs...>', 'Isolated elements to capture (format: "name:description")');
49
+ // ── Skill subcommand ──────────────────────────────────────────────────
50
+ program
51
+ .command('skill')
52
+ .description('Output or install the AutoKap preset creation skill for AI coding agents')
53
+ .option('--output <path>', 'Write skill file to this path instead of stdout')
54
+ .option('--project-url <url>', 'Replace the project URL placeholder in the skill')
55
+ .option('--project-id <id>', 'Replace the project ID placeholder in the skill')
56
+ .option('--api-key <key>', 'Replace the API key placeholder in the skill')
57
+ .option('--api-base-url <url>', 'Replace the API base URL placeholder (default: https://app.autokap.com)')
58
+ .action(async (opts) => {
59
+ const skillPath = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..', 'assets', 'skill', 'SKILL.md');
60
+ let rawContent;
61
+ try {
62
+ rawContent = await fs.readFile(skillPath, 'utf-8');
63
+ }
64
+ catch {
65
+ console.error('Error: Could not find SKILL.md. Make sure the autokap package is installed correctly.');
66
+ process.exit(1);
67
+ }
68
+ const content = replaceSkillPlaceholders(rawContent, opts);
69
+ if (opts.output) {
70
+ const outDir = path.dirname(opts.output);
71
+ await fs.mkdir(outDir, { recursive: true });
72
+ await fs.writeFile(opts.output, content, 'utf-8');
73
+ console.log(`Skill file written to: ${opts.output}`);
74
+ console.log('');
75
+ console.log('Next steps:');
76
+ console.log(' Claude Code: Place the file in .claude/commands/autokap-preset.md');
77
+ console.log(' Cursor: Place the file in .cursor/rules/autokap-preset.md');
78
+ console.log(' Other agents: Point your agent to the file when asking it to create presets');
79
+ }
80
+ else {
81
+ process.stdout.write(content);
82
+ }
83
+ process.exit(0);
84
+ });
85
+ export async function main() {
86
+ loadEnv();
87
+ const opts = program.opts();
88
+ if (!opts.url) {
89
+ logger.error("Missing required option: --url <url>");
90
+ process.exit(1);
91
+ }
92
+ if (!opts.prompt) {
93
+ logger.error("Missing required option: --prompt <prompt>");
94
+ process.exit(1);
95
+ }
96
+ const apiKey = process.env.OPENROUTER_API_KEY;
97
+ if (!apiKey) {
98
+ logger.error('OPENROUTER_API_KEY not set. Create a .env file or set the environment variable.');
99
+ process.exit(1);
100
+ }
101
+ // Parse viewport
102
+ const { width: vw, height: vh } = parseViewport(opts.viewport);
103
+ // Parse multi-viewports (if provided)
104
+ const viewports = opts.viewports ? parseViewports(opts.viewports) : undefined;
105
+ // Parse languages
106
+ const langs = parseLanguages(opts.langs);
107
+ // Build theme list
108
+ const themes = buildThemeList(opts.dark);
109
+ // Build credentials (if provided)
110
+ const credentials = buildCredentials({
111
+ email: opts.email,
112
+ password: opts.password,
113
+ loginUrl: opts.loginUrl,
114
+ });
115
+ // Parse isolated elements
116
+ const elements = opts.element ? parseElements(opts.element) : [];
117
+ // Ensure output directory exists
118
+ const outputDir = path.resolve(opts.output);
119
+ await fs.mkdir(outputDir, { recursive: true });
120
+ logger.info(`URL: ${opts.url}`);
121
+ logger.info(`Prompt: "${opts.prompt}"`);
122
+ logger.info(`Model: ${opts.model}${opts.reasoningEffort !== 'off' ? ` (reasoning: ${opts.reasoningEffort})` : ''}`);
123
+ logger.info(`Viewport: ${vw}x${vh}${viewports ? ` + multi: ${viewports.map(v => `${v.width}x${v.height}`).join(', ')}` : ''}`);
124
+ logger.info(`Themes: ${themes.join(', ')}`);
125
+ logger.info(`Languages: ${langs.join(', ')}`);
126
+ logger.info(`Max iterations: ${opts.maxIter}`);
127
+ if (credentials)
128
+ logger.info(`Login: ${credentials.email || '(no email)'}${credentials.loginUrl ? ` via ${credentials.loginUrl}` : ''}`);
129
+ if (opts.cookies)
130
+ logger.info(`Cookies: ${opts.cookies}`);
131
+ if (opts.langInstructions)
132
+ logger.info(`Lang instructions: "${opts.langInstructions}"`);
133
+ if (opts.themeInstructions)
134
+ logger.info(`Theme instructions: "${opts.themeInstructions}"`);
135
+ if (elements.length > 0)
136
+ logger.info(`Isolated elements: ${elements.map(e => e.name).join(', ')}`);
137
+ logger.info(`Output: ${outputDir}`);
138
+ // Track capture start
139
+ getPostHog().capture({
140
+ distinctId: DISTINCT_ID,
141
+ event: 'capture_started',
142
+ properties: {
143
+ url: opts.url,
144
+ prompt: opts.prompt,
145
+ model: opts.model,
146
+ themes,
147
+ langs,
148
+ viewport: `${vw}x${vh}`,
149
+ multi_viewports: viewports ? viewports.map(v => `${v.width}x${v.height}`) : null,
150
+ max_iterations: parseInt(opts.maxIter, 10),
151
+ has_credentials: !!credentials,
152
+ has_cookies: !!opts.cookies,
153
+ element_count: elements.length,
154
+ },
155
+ });
156
+ // Launch browser
157
+ const browser = new Browser({
158
+ headed: opts.headed,
159
+ viewport: { width: vw, height: vh },
160
+ });
161
+ await browser.launch();
162
+ logger.success('Browser launched');
163
+ // Inject cookies if provided
164
+ if (opts.cookies) {
165
+ try {
166
+ const cookieFile = await fs.readFile(path.resolve(opts.cookies), 'utf-8');
167
+ const cookies = JSON.parse(cookieFile);
168
+ if (!Array.isArray(cookies))
169
+ throw new Error('Cookies file must contain a JSON array');
170
+ await browser.addCookies(cookies);
171
+ logger.success(`Injected ${cookies.length} cookies from ${opts.cookies}`);
172
+ }
173
+ catch (err) {
174
+ logger.error(`Failed to load cookies: ${err.message}`);
175
+ process.exit(1);
176
+ }
177
+ }
178
+ const manifest = {
179
+ url: opts.url,
180
+ prompt: opts.prompt,
181
+ timestamp: new Date().toISOString(),
182
+ captures: [],
183
+ };
184
+ try {
185
+ for (const lang of langs) {
186
+ for (const theme of themes) {
187
+ logger.info(`--- Capturing: theme=${theme}, lang=${lang} ---`);
188
+ // Set language and navigate
189
+ await browser.setLanguage(lang);
190
+ await browser.navigateTo(opts.url);
191
+ logger.info('Page loaded');
192
+ // Pre-cleanup: dismiss cookies and widgets
193
+ const cookieResult = await dismissCookiesAndWidgets(browser.currentPage);
194
+ if (cookieResult.dismissed) {
195
+ logger.success(`Cookies dismissed: ${cookieResult.method}`);
196
+ }
197
+ // Set color scheme
198
+ await browser.setColorScheme(theme);
199
+ // Wait for re-renders
200
+ await browser.wait(500);
201
+ // Run AI agent with retry on give_up
202
+ let currentPrompt = opts.prompt;
203
+ let retryCount = 0;
204
+ const maxRetries = 3;
205
+ let result;
206
+ while (true) {
207
+ const config = {
208
+ url: opts.url,
209
+ prompt: currentPrompt,
210
+ dark: theme === 'dark',
211
+ langs: [lang],
212
+ outputDir,
213
+ headed: opts.headed,
214
+ viewport: { width: vw, height: vh },
215
+ maxIterations: parseInt(opts.maxIter, 10),
216
+ model: opts.model,
217
+ reasoningEffort: opts.reasoningEffort,
218
+ credentials,
219
+ langInstructions: opts.langInstructions,
220
+ themeInstructions: opts.themeInstructions,
221
+ currentLang: lang,
222
+ currentTheme: theme,
223
+ viewports,
224
+ };
225
+ result = await runAgent(browser, config, apiKey);
226
+ // If succeeded or max retries reached, stop
227
+ if (result.success || retryCount >= maxRetries)
228
+ break;
229
+ // Save diagnostic state on failure
230
+ if (result.diagnostic) {
231
+ const diagFilename = `diagnostic_${lang}_${theme}_attempt${retryCount}.png`;
232
+ const diagPath = path.join(outputDir, diagFilename);
233
+ await fs.writeFile(diagPath, result.diagnostic.screenshot);
234
+ logger.info(`Diagnostic screenshot saved: ${diagPath}`);
235
+ const diagStateFilename = `diagnostic_${lang}_${theme}_attempt${retryCount}.json`;
236
+ const diagStatePath = path.join(outputDir, diagStateFilename);
237
+ await fs.writeFile(diagStatePath, JSON.stringify({
238
+ reason: result.diagnostic.giveUpReason,
239
+ url: result.diagnostic.url,
240
+ interactiveElements: result.diagnostic.interactiveElements.slice(0, 50),
241
+ accessibilityTreeSnippet: result.diagnostic.accessibilityTreeSnippet,
242
+ actions: result.actions,
243
+ }, null, 2));
244
+ logger.info(`Diagnostic state saved: ${diagStatePath}`);
245
+ }
246
+ // Ask user for clarification
247
+ logger.error(`Agent gave up: ${result.assessment}`);
248
+ logger.info('You can provide additional instructions to help the agent retry, or press Enter to skip.');
249
+ const clarification = await askUser('\nAdditional instructions (or Enter to skip): ');
250
+ if (!clarification)
251
+ break; // User chose to skip
252
+ // Append clarification to the prompt and retry
253
+ retryCount++;
254
+ currentPrompt = buildRetryPrompt(opts.prompt, result.assessment, clarification, retryCount + 1);
255
+ logger.info(`Retrying with clarification (attempt ${retryCount + 1}/${maxRetries + 1})...`);
256
+ }
257
+ // Save final screenshot
258
+ const filename = `screenshot_${lang}_${theme}.png`;
259
+ const filepath = path.join(outputDir, filename);
260
+ const finalScreenshot = await browser.takeScreenshot();
261
+ await fs.writeFile(filepath, finalScreenshot);
262
+ result.screenshotPath = filepath;
263
+ if (result.success) {
264
+ logger.success(`Final screenshot saved: ${filepath}`);
265
+ }
266
+ else {
267
+ logger.error(`Failed (diagnostic saved): ${filepath}`);
268
+ }
269
+ // Save workflow screenshots (intermediate captures)
270
+ const screenshotEntries = [];
271
+ for (const ws of result.screenshots) {
272
+ const wsFilename = `screenshot_${lang}_${theme}_step${ws.index}.png`;
273
+ const wsPath = path.join(outputDir, wsFilename);
274
+ await fs.writeFile(wsPath, ws.buffer);
275
+ ws.path = wsPath;
276
+ screenshotEntries.push({
277
+ index: ws.index,
278
+ iteration: ws.iteration,
279
+ label: ws.label,
280
+ filename: wsFilename,
281
+ });
282
+ logger.success(`Workflow screenshot saved: ${wsPath} ("${ws.label}")`);
283
+ }
284
+ manifest.captures.push({
285
+ theme,
286
+ lang,
287
+ filename,
288
+ screenshots: screenshotEntries,
289
+ result,
290
+ });
291
+ // Capture isolated elements (if page is ready)
292
+ if (elements.length > 0 && result.success) {
293
+ logger.info(`--- Capturing ${elements.length} isolated element(s): lang=${lang}, theme=${theme} ---`);
294
+ await browser.forceLoadLazyImages({ timeout: 8000 });
295
+ for (const element of elements) {
296
+ const elementResult = await captureIsolatedElement(browser, element, apiKey, opts.model);
297
+ if (elementResult.success) {
298
+ const elFilename = `${element.name}__${lang}_${theme}.png`;
299
+ const elPath = path.join(outputDir, elFilename);
300
+ await fs.writeFile(elPath, elementResult.buffer);
301
+ logger.success(`Element "${element.name}" saved: ${elPath}`);
302
+ }
303
+ else {
304
+ logger.error(`Element "${element.name}" failed: ${elementResult.assessment}`);
305
+ }
306
+ }
307
+ }
308
+ }
309
+ }
310
+ }
311
+ finally {
312
+ await browser.close();
313
+ logger.info('Browser closed');
314
+ }
315
+ // Track capture completion
316
+ const successes = manifest.captures.filter(c => c.result.success).length;
317
+ const total = manifest.captures.length;
318
+ const totalScreenshots = manifest.captures.reduce((sum, c) => sum + c.screenshots.length, 0);
319
+ const totalTokens = manifest.captures.reduce((sum, c) => sum + c.result.usage.reduce((s, u) => s + (u.totalTokens ?? 0), 0), 0);
320
+ getPostHog().capture({
321
+ distinctId: DISTINCT_ID,
322
+ event: 'capture_completed',
323
+ properties: {
324
+ url: opts.url,
325
+ model: opts.model,
326
+ successes,
327
+ total,
328
+ total_workflow_screenshots: totalScreenshots,
329
+ total_tokens: totalTokens,
330
+ all_succeeded: successes === total,
331
+ },
332
+ });
333
+ // Write manifest (strip Buffer fields from screenshots before serializing)
334
+ const manifestForJson = stripBuffersFromManifest(manifest);
335
+ const manifestPath = path.join(outputDir, 'manifest.json');
336
+ await fs.writeFile(manifestPath, JSON.stringify(manifestForJson, null, 2));
337
+ logger.info(`Manifest saved: ${manifestPath}`);
338
+ // Summary
339
+ if (successes === total) {
340
+ logger.success(`Done: ${successes}/${total} captures succeeded (${totalScreenshots} workflow screenshots)`);
341
+ }
342
+ else {
343
+ logger.error(`Done: ${successes}/${total} captures succeeded (${totalScreenshots} workflow screenshots)`);
344
+ }
345
+ await shutdownPostHog();
346
+ }
347
+ // Only run when executed directly (not imported by tests)
348
+ const isDirectExecution = process.argv[1] && (process.argv[1].endsWith('/cli.js') ||
349
+ process.argv[1].endsWith('/cli.ts'));
350
+ if (isDirectExecution) {
351
+ // Use parseAsync so that subcommand action handlers (e.g. `skill`) complete
352
+ // before any fallback code runs. Without this, the sync `parse()` would
353
+ // trigger `main()` before the async skill handler can call process.exit().
354
+ program.parseAsync().then(async () => {
355
+ // If a subcommand was invoked, its action handler calls process.exit().
356
+ // The code below only runs for the default capture command.
357
+ await main();
358
+ }).catch(async (err) => {
359
+ getPostHog().captureException(err, DISTINCT_ID);
360
+ await shutdownPostHog();
361
+ logger.error(err.message);
362
+ process.exit(1);
363
+ });
364
+ }
365
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1,148 @@
1
+ import type { ClipDefinition, ClipOptions, LoginCredentials, VideoPlan, StepUsage, VideoPhase } from './types.js';
2
+ export interface ClipTarget {
3
+ id: string;
4
+ label: string;
5
+ viewport: {
6
+ width: number;
7
+ height: number;
8
+ };
9
+ }
10
+ export interface ClipOrchestratorConfig {
11
+ url: string;
12
+ clips: ClipDefinition[];
13
+ targets: ClipTarget[];
14
+ outputScale?: number;
15
+ langs: string[];
16
+ themes: Array<'light' | 'dark'>;
17
+ clipOptions?: ClipOptions;
18
+ credentials?: LoginCredentials;
19
+ model: string;
20
+ apiKey: string;
21
+ runId: string;
22
+ presetId?: string;
23
+ projectId?: string;
24
+ langInstructions?: string;
25
+ themeInstructions?: string;
26
+ navigationInstructions?: string;
27
+ abortSignal?: AbortSignal;
28
+ /** Fallback model used when the primary model fails to produce tool calls (e.g. in repair lane). */
29
+ fallbackModel?: string;
30
+ /** Per-clip timeout in milliseconds. Default: 180_000 (3 minutes). */
31
+ perClipTimeoutMs?: number;
32
+ }
33
+ export interface ClipVariantResult {
34
+ clipId: string;
35
+ clipName: string;
36
+ clipScript: string;
37
+ targetId: string;
38
+ targetLabel: string;
39
+ viewportWidth: number;
40
+ viewportHeight: number;
41
+ lang: string;
42
+ theme: string;
43
+ success: boolean;
44
+ gifPath?: string;
45
+ mp4Path?: string;
46
+ thumbnailPath?: string;
47
+ durationMs: number;
48
+ fileSizeBytes?: number;
49
+ plan?: VideoPlan;
50
+ stepsExecuted: number;
51
+ assessment: string;
52
+ failedPhase?: 'preflight' | 'planning' | 'dry_run' | 'recording' | 'postprocess';
53
+ failedStepIndex?: number;
54
+ failedSubphase?: string;
55
+ usage: StepUsage[];
56
+ }
57
+ export interface ClipOrchestratorResult {
58
+ results: ClipVariantResult[];
59
+ totalClips: number;
60
+ successCount: number;
61
+ }
62
+ export interface ClipOrchestratorCallbacks {
63
+ onClipStart?: (data: {
64
+ clipId: string;
65
+ clipName: string;
66
+ variantId: string;
67
+ target: {
68
+ width: number;
69
+ height: number;
70
+ };
71
+ lang: string;
72
+ theme: string;
73
+ clipIndex: number;
74
+ totalClips: number;
75
+ }) => void;
76
+ onClipPhase?: (data: {
77
+ clipId: string;
78
+ variantId: string;
79
+ phase: VideoPhase;
80
+ }) => void;
81
+ onClipStep?: (data: {
82
+ clipId: string;
83
+ variantId: string;
84
+ stepIndex: number;
85
+ total: number;
86
+ description: string;
87
+ phase: 'dry_run' | 'recording';
88
+ }) => void;
89
+ onClipStepResult?: (data: {
90
+ clipId: string;
91
+ variantId: string;
92
+ stepIndex: number;
93
+ ok: boolean;
94
+ reason?: string;
95
+ }) => void;
96
+ onClipRecordingDone?: (data: {
97
+ clipId: string;
98
+ variantId: string;
99
+ success: boolean;
100
+ gifPath?: string;
101
+ mp4Path?: string;
102
+ thumbnailPath?: string;
103
+ durationMs: number;
104
+ fileSizeBytes?: number;
105
+ lang: string;
106
+ theme: string;
107
+ targetId: string;
108
+ targetLabel: string;
109
+ assessment?: string;
110
+ failedPhase?: 'preflight' | 'planning' | 'dry_run' | 'recording' | 'postprocess';
111
+ failedStepIndex?: number;
112
+ failedSubphase?: string;
113
+ }) => void;
114
+ onClipPostprocessStart?: (data: {
115
+ clipId: string;
116
+ variantId: string;
117
+ }) => void;
118
+ onClipPostprocessDone?: (data: {
119
+ clipId: string;
120
+ variantId: string;
121
+ success: boolean;
122
+ gifPath?: string;
123
+ mp4Path?: string;
124
+ }) => void;
125
+ onClipsAllDone?: (data: {
126
+ totalClips: number;
127
+ successCount: number;
128
+ }) => void;
129
+ onLog?: (entry: {
130
+ level: string;
131
+ message: string;
132
+ timestamp: number;
133
+ }) => void;
134
+ /** Emit a live Playwright screenshot (base64 JPEG) for real-time preview in the UI. */
135
+ onScreenshot?: (base64: string) => void;
136
+ }
137
+ /**
138
+ * Run the clip capture pipeline for all clips × targets × langs × themes.
139
+ *
140
+ * Hybrid pipeline:
141
+ * 1. For each (clip, target, lang) — navigate once with the screenshot agent
142
+ * 2. Share the navigation session across theme variants (light/dark)
143
+ * 3. For each variant: plan → validate selectors → record → post-process
144
+ *
145
+ * Navigation sharing: themes only affect `colorScheme` (a Playwright setting),
146
+ * so the same navigation session (cookies, localStorage, URL) is reused.
147
+ */
148
+ export declare function runClipOrchestrator(config: ClipOrchestratorConfig, callbacks?: ClipOrchestratorCallbacks): Promise<ClipOrchestratorResult>;