autokap 1.2.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/skill/OPCODE-REFERENCE.md +18 -1
- package/assets/skill/SKILL.md +54 -11
- package/dist/browser.js +23 -1
- package/dist/capture-strategy.d.ts +14 -0
- package/dist/capture-strategy.js +28 -0
- package/dist/cli-contract.d.ts +61 -0
- package/dist/cli-runner.d.ts +10 -1
- package/dist/cli-runner.js +415 -20
- package/dist/cli.js +80 -0
- package/dist/clip-capture-loop.js +11 -2
- package/dist/cookie-dismiss.d.ts +1 -0
- package/dist/cookie-dismiss.js +13 -1
- package/dist/execution-schema.d.ts +303 -2
- package/dist/execution-schema.js +77 -4
- package/dist/execution-types.d.ts +114 -5
- package/dist/execution-types.js +2 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.js +2 -0
- package/dist/mouse-animation.d.ts +12 -2
- package/dist/mouse-animation.js +36 -6
- package/dist/opcode-actions.d.ts +2 -0
- package/dist/opcode-actions.js +39 -5
- package/dist/opcode-runner.d.ts +2 -0
- package/dist/opcode-runner.js +139 -17
- package/dist/openrouter-tts.d.ts +74 -0
- package/dist/openrouter-tts.js +218 -0
- package/dist/postcondition.js +36 -26
- package/dist/program-signing.d.ts +67 -0
- package/dist/recovery-chain.js +26 -12
- package/dist/server-credit-usage.d.ts +1 -1
- package/dist/video-narration-schema.d.ts +1165 -0
- package/dist/video-narration-schema.js +137 -0
- package/dist/web-playwright-local.d.ts +16 -0
- package/dist/web-playwright-local.js +204 -18
- package/package.json +9 -1
package/dist/cli-runner.js
CHANGED
|
@@ -21,17 +21,86 @@ import { executeProgram } from './opcode-runner.js';
|
|
|
21
21
|
import { RecoveryChainImpl } from './recovery-chain.js';
|
|
22
22
|
import { parseProgram } from './execution-schema.js';
|
|
23
23
|
import { buildCursorOverlayScript } from './cursor-overlay-script.js';
|
|
24
|
-
import { CLI_VERSION_HEADER } from './cli-contract.js';
|
|
24
|
+
import { CLI_VERSION_HEADER, } from './cli-contract.js';
|
|
25
25
|
import { logger } from './logger.js';
|
|
26
26
|
import { callLLM } from './llm-provider.js';
|
|
27
27
|
import { APP_VERSION } from './version.js';
|
|
28
28
|
import { normalizeAllowedOrigins, normalizeHttpOrigin, verifySignedExecutionProgramEnvelope, } from './program-signing.js';
|
|
29
29
|
const MAX_CLIP_CAPTURE_DEVICE_SCALE_FACTOR = 1;
|
|
30
|
+
const DEFAULT_VIDEO_DELIVERY_RESOLUTION = { width: 1920, height: 1080 };
|
|
31
|
+
const DEFAULT_VIDEO_CAPTURE_RESOLUTION = DEFAULT_VIDEO_DELIVERY_RESOLUTION;
|
|
30
32
|
const FETCH_PROGRAM_MAX_ATTEMPTS = 4;
|
|
31
33
|
const FETCH_PROGRAM_RETRY_DELAYS_MS = [1000, 3000, 5000];
|
|
32
34
|
const DEFAULT_SCREENSHOT_ARTIFACT_UPLOAD_CONCURRENCY = 4;
|
|
33
35
|
const DEFAULT_MEDIA_ARTIFACT_UPLOAD_CONCURRENCY = 2;
|
|
34
36
|
const MAX_ARTIFACT_UPLOAD_CONCURRENCY = 8;
|
|
37
|
+
export function resolveRecordableBrowserSettings(program, variant) {
|
|
38
|
+
const requestedDeviceScaleFactor = normalizeNumericScale(variant.deviceScaleFactor ?? program.outputScale ?? 2);
|
|
39
|
+
if (program.mediaMode !== 'video') {
|
|
40
|
+
return {
|
|
41
|
+
viewport: variant.viewport,
|
|
42
|
+
requestedDeviceScaleFactor,
|
|
43
|
+
runtimeDeviceScaleFactor: Math.min(requestedDeviceScaleFactor, MAX_CLIP_CAPTURE_DEVICE_SCALE_FACTOR),
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
return {
|
|
47
|
+
viewport: {
|
|
48
|
+
width: DEFAULT_VIDEO_DELIVERY_RESOLUTION.width,
|
|
49
|
+
height: DEFAULT_VIDEO_DELIVERY_RESOLUTION.height,
|
|
50
|
+
},
|
|
51
|
+
requestedDeviceScaleFactor,
|
|
52
|
+
runtimeDeviceScaleFactor: 1,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
export function normalizeVideoCaptureProgram(program) {
|
|
56
|
+
if (program.mediaMode !== 'video')
|
|
57
|
+
return program;
|
|
58
|
+
const format = program.artifactPlan.format ?? {};
|
|
59
|
+
const deliveryResolution = DEFAULT_VIDEO_DELIVERY_RESOLUTION;
|
|
60
|
+
const captureResolution = format.captureResolution ?? DEFAULT_VIDEO_CAPTURE_RESOLUTION;
|
|
61
|
+
// Variants are normalized too so any code path that reads `variant.viewport`
|
|
62
|
+
// or `variant.deviceScaleFactor` directly (not just resolveRecordableBrowserSettings)
|
|
63
|
+
// sees consistent 1920×1080 @1× values. Legacy presets carrying
|
|
64
|
+
// viewport=2560×1440 or DPR=1.3333 would otherwise leak through.
|
|
65
|
+
const targetViewport = { width: deliveryResolution.width, height: deliveryResolution.height };
|
|
66
|
+
const variantsAlreadyNormalized = program.variants.every((v) => v.viewport.width === targetViewport.width &&
|
|
67
|
+
v.viewport.height === targetViewport.height &&
|
|
68
|
+
(v.deviceScaleFactor === undefined || v.deviceScaleFactor === 1));
|
|
69
|
+
const formatAlreadyNormalized = captureResolution.width === deliveryResolution.width &&
|
|
70
|
+
captureResolution.height === deliveryResolution.height &&
|
|
71
|
+
format.deliveryResolution?.width === deliveryResolution.width &&
|
|
72
|
+
format.deliveryResolution.height === deliveryResolution.height;
|
|
73
|
+
const outputScaleAlreadyNormalized = program.outputScale === undefined || program.outputScale === 1;
|
|
74
|
+
if (variantsAlreadyNormalized && formatAlreadyNormalized && outputScaleAlreadyNormalized) {
|
|
75
|
+
return program;
|
|
76
|
+
}
|
|
77
|
+
return {
|
|
78
|
+
...program,
|
|
79
|
+
outputScale: 1,
|
|
80
|
+
variants: program.variants.map((v) => ({
|
|
81
|
+
...v,
|
|
82
|
+
viewport: { ...targetViewport },
|
|
83
|
+
deviceScaleFactor: 1,
|
|
84
|
+
})),
|
|
85
|
+
artifactPlan: {
|
|
86
|
+
...program.artifactPlan,
|
|
87
|
+
format: {
|
|
88
|
+
...format,
|
|
89
|
+
captureResolution: {
|
|
90
|
+
width: deliveryResolution.width,
|
|
91
|
+
height: deliveryResolution.height,
|
|
92
|
+
},
|
|
93
|
+
deliveryResolution,
|
|
94
|
+
},
|
|
95
|
+
},
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
function normalizeNumericScale(value) {
|
|
99
|
+
if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) {
|
|
100
|
+
return 1;
|
|
101
|
+
}
|
|
102
|
+
return value;
|
|
103
|
+
}
|
|
35
104
|
const HEALER_SYSTEM_PROMPT = 'You repair failed deterministic browser opcodes. Respond only with JSON.';
|
|
36
105
|
// ── Main entry point ────────────────────────────────────────────────
|
|
37
106
|
export async function runCapture(options) {
|
|
@@ -41,7 +110,7 @@ export async function runCapture(options) {
|
|
|
41
110
|
if (options.program) {
|
|
42
111
|
let parsedProgram;
|
|
43
112
|
try {
|
|
44
|
-
parsedProgram = parseProgram(options.program);
|
|
113
|
+
parsedProgram = normalizeVideoCaptureProgram(parseProgram(options.program));
|
|
45
114
|
}
|
|
46
115
|
catch (err) {
|
|
47
116
|
return { success: false, error: `program validation failed: ${err instanceof Error ? err.message : String(err)}` };
|
|
@@ -64,7 +133,7 @@ export async function runCapture(options) {
|
|
|
64
133
|
// Step 2: Validate the program fetched from the server.
|
|
65
134
|
resolvedProgram = {
|
|
66
135
|
...resolvedProgram,
|
|
67
|
-
program: parseProgram(resolvedProgram.program),
|
|
136
|
+
program: normalizeVideoCaptureProgram(parseProgram(resolvedProgram.program)),
|
|
68
137
|
};
|
|
69
138
|
}
|
|
70
139
|
catch (err) {
|
|
@@ -80,12 +149,30 @@ export async function runCapture(options) {
|
|
|
80
149
|
presetId: options.presetId,
|
|
81
150
|
};
|
|
82
151
|
}
|
|
152
|
+
const runId = randomUUID();
|
|
153
|
+
let videoAudioAssets;
|
|
154
|
+
let videoAudioAssetsByLocale;
|
|
83
155
|
try {
|
|
84
156
|
assertProgramNavigationScope(program, resolvedProgram.security);
|
|
85
157
|
}
|
|
86
158
|
catch (error) {
|
|
87
159
|
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
|
88
160
|
}
|
|
161
|
+
if (!options.program && program.mediaMode === 'video') {
|
|
162
|
+
const prepareResult = await prepareVideoSpeechForRun(config, options.presetId, runId);
|
|
163
|
+
if (!prepareResult.success) {
|
|
164
|
+
return { success: false, error: prepareResult.error };
|
|
165
|
+
}
|
|
166
|
+
program = applyVideoSpeechDurations(program, prepareResult.durationsByStepId);
|
|
167
|
+
videoAudioAssets = prepareResult.audioAssets;
|
|
168
|
+
videoAudioAssetsByLocale = prepareResult.audioAssetsByLocale;
|
|
169
|
+
try {
|
|
170
|
+
program = normalizeVideoCaptureProgram(parseProgram(program));
|
|
171
|
+
}
|
|
172
|
+
catch (err) {
|
|
173
|
+
return { success: false, error: `prepared video program validation failed: ${err instanceof Error ? err.message : String(err)}` };
|
|
174
|
+
}
|
|
175
|
+
}
|
|
89
176
|
logger.info(`[capture] Running preset "${options.presetId}" — ${program.steps.length} opcodes, ${program.variants.length} variant(s)`);
|
|
90
177
|
logger.info(`[capture] Resolved API origin ${resolvedProgram.security.expectedApiOrigin}; navigation scope: ${resolvedProgram.security.allowedNavigationOrigins.join(', ')}`);
|
|
91
178
|
const llmConfig = resolveCliLLMConfig(resolvedProgram.security);
|
|
@@ -97,9 +184,11 @@ export async function runCapture(options) {
|
|
|
97
184
|
credentials: program.preconditions.credentials,
|
|
98
185
|
});
|
|
99
186
|
// Step 4: Execute the program
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
187
|
+
// 'clip' and 'video' both go through the frame-capture pipeline and need to
|
|
188
|
+
// serialize variants (CPU/RAM bound on the encoder side). Only 'screenshot'
|
|
189
|
+
// honors the requested concurrency.
|
|
190
|
+
const isRecordable = program.mediaMode === 'clip' || program.mediaMode === 'video';
|
|
191
|
+
const maxParallelVariants = isRecordable ? 1 : program.maxParallelCaptures;
|
|
103
192
|
const runOptions = {
|
|
104
193
|
recoveryChain,
|
|
105
194
|
abortSignal: options.abortSignal,
|
|
@@ -116,20 +205,21 @@ export async function runCapture(options) {
|
|
|
116
205
|
const captureStart = Date.now();
|
|
117
206
|
if (maxParallelVariants) {
|
|
118
207
|
logger.info(`[capture] Concurrency cap resolved to ${maxParallelVariants} parallel variant(s)`);
|
|
119
|
-
if (
|
|
120
|
-
logger.info(`[capture]
|
|
208
|
+
if (isRecordable && program.maxParallelCaptures && program.maxParallelCaptures > 1) {
|
|
209
|
+
logger.info(`[capture] ${program.mediaMode} capture concurrency capped at 1 ` +
|
|
121
210
|
`(requested ${program.maxParallelCaptures}) to avoid CI CPU contention`);
|
|
122
211
|
}
|
|
123
212
|
}
|
|
124
213
|
const createAdapter = async (variant) => {
|
|
125
|
-
const recordable =
|
|
126
|
-
const
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
214
|
+
const recordable = isRecordable;
|
|
215
|
+
const recordableSettings = recordable
|
|
216
|
+
? resolveRecordableBrowserSettings(program, variant)
|
|
217
|
+
: null;
|
|
218
|
+
const requestedDeviceScaleFactor = recordableSettings?.requestedDeviceScaleFactor ?? (variant.deviceScaleFactor ?? program.outputScale ?? 2);
|
|
219
|
+
const runtimeDeviceScaleFactor = recordableSettings?.runtimeDeviceScaleFactor ?? requestedDeviceScaleFactor;
|
|
130
220
|
const browserOptions = {
|
|
131
221
|
headed: options.headed ?? false,
|
|
132
|
-
viewport: variant.viewport,
|
|
222
|
+
viewport: recordableSettings?.viewport ?? variant.viewport,
|
|
133
223
|
deviceScaleFactor: runtimeDeviceScaleFactor,
|
|
134
224
|
lang: variant.locale,
|
|
135
225
|
colorScheme: variant.theme,
|
|
@@ -138,7 +228,14 @@ export async function runCapture(options) {
|
|
|
138
228
|
let recordingDir;
|
|
139
229
|
let browser;
|
|
140
230
|
logger.info(`[capture] Launching browser${browserOptions.headed ? ' (headed)' : ''}…`);
|
|
141
|
-
if (recordable
|
|
231
|
+
if (recordable) {
|
|
232
|
+
logger.info(`[capture] Recordable browser settings: mediaMode=${program.mediaMode}, ` +
|
|
233
|
+
`variant.viewport=${variant.viewport.width}x${variant.viewport.height}, ` +
|
|
234
|
+
`variant.deviceScaleFactor=${variant.deviceScaleFactor ?? 'unset'}, ` +
|
|
235
|
+
`program.outputScale=${program.outputScale ?? 'unset'} ` +
|
|
236
|
+
`→ runtime viewport=${browserOptions.viewport.width}x${browserOptions.viewport.height} @DPR=${browserOptions.deviceScaleFactor}`);
|
|
237
|
+
}
|
|
238
|
+
if (recordable && program.mediaMode === 'clip' && runtimeDeviceScaleFactor !== requestedDeviceScaleFactor) {
|
|
142
239
|
logger.info(`[capture] Clip capture scale capped at ${runtimeDeviceScaleFactor} ` +
|
|
143
240
|
`(requested ${requestedDeviceScaleFactor}) to preserve recording FPS`);
|
|
144
241
|
}
|
|
@@ -167,7 +264,10 @@ export async function runCapture(options) {
|
|
|
167
264
|
}
|
|
168
265
|
try {
|
|
169
266
|
logger.info('[capture] Saving captures, might take a few seconds...');
|
|
170
|
-
await uploadResults(config, program, runResult);
|
|
267
|
+
const uploadOutcome = await uploadResults(config, program, runResult, runId);
|
|
268
|
+
if (program.mediaMode === 'video' && runResult.success) {
|
|
269
|
+
await signalVideoComplete(config, program, runResult, uploadOutcome.runId, videoAudioAssets, videoAudioAssetsByLocale);
|
|
270
|
+
}
|
|
171
271
|
const totalDurationSec = ((Date.now() - captureStart) / 1000).toFixed(1);
|
|
172
272
|
logger.info(`[capture] Captures saved successfully — total ${totalDurationSec}s`);
|
|
173
273
|
}
|
|
@@ -244,12 +344,220 @@ async function fetchProgram(config, presetId, environmentName) {
|
|
|
244
344
|
expectedApiOrigin: safeOrigin(config.apiBaseUrl),
|
|
245
345
|
});
|
|
246
346
|
if (envelope.meta?.stale) {
|
|
247
|
-
logger.warn('[capture] Program
|
|
347
|
+
logger.warn('[capture] Program needs regeneration — preset settings require missing or outdated opcodes. Regenerate before relying on this capture.');
|
|
248
348
|
}
|
|
249
349
|
return { success: true, program: envelope.program, security: envelope.security };
|
|
250
350
|
}
|
|
251
351
|
return { success: false, error: 'failed to fetch program: retry attempts exhausted' };
|
|
252
352
|
}
|
|
353
|
+
async function prepareVideoSpeechForRun(config, videoId, runId) {
|
|
354
|
+
logger.info('[capture] Generating speech, may take a few seconds...');
|
|
355
|
+
const url = `${config.apiBaseUrl}/api/cli/video-prepare`;
|
|
356
|
+
let response;
|
|
357
|
+
try {
|
|
358
|
+
response = await fetch(url, {
|
|
359
|
+
method: 'POST',
|
|
360
|
+
headers: {
|
|
361
|
+
'Authorization': `Bearer ${config.apiKey}`,
|
|
362
|
+
'Content-Type': 'application/json',
|
|
363
|
+
[CLI_VERSION_HEADER]: APP_VERSION,
|
|
364
|
+
},
|
|
365
|
+
body: JSON.stringify({ videoId, runId }),
|
|
366
|
+
});
|
|
367
|
+
}
|
|
368
|
+
catch (err) {
|
|
369
|
+
return {
|
|
370
|
+
success: false,
|
|
371
|
+
error: `failed to generate speech: ${err instanceof Error ? err.message : String(err)}`,
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
if (!response.ok) {
|
|
375
|
+
return {
|
|
376
|
+
success: false,
|
|
377
|
+
error: `failed to generate speech: ${await formatServerError(response, url)}`,
|
|
378
|
+
};
|
|
379
|
+
}
|
|
380
|
+
if (!response.body) {
|
|
381
|
+
return { success: false, error: 'failed to generate speech: server returned an empty stream' };
|
|
382
|
+
}
|
|
383
|
+
const decoder = new TextDecoder();
|
|
384
|
+
const reader = response.body.getReader();
|
|
385
|
+
let buffered = '';
|
|
386
|
+
let donePayload = null;
|
|
387
|
+
let streamError = null;
|
|
388
|
+
const consumeLine = (line) => {
|
|
389
|
+
const trimmed = line.trim();
|
|
390
|
+
if (!trimmed)
|
|
391
|
+
return;
|
|
392
|
+
let event;
|
|
393
|
+
try {
|
|
394
|
+
event = JSON.parse(trimmed);
|
|
395
|
+
}
|
|
396
|
+
catch (err) {
|
|
397
|
+
streamError = `failed to parse speech progress: ${err instanceof Error ? err.message : String(err)}`;
|
|
398
|
+
return;
|
|
399
|
+
}
|
|
400
|
+
if (event.type === 'tts_progress') {
|
|
401
|
+
logTtsProgress(event);
|
|
402
|
+
return;
|
|
403
|
+
}
|
|
404
|
+
if (event.type === 'error') {
|
|
405
|
+
streamError = event.error ?? 'speech generation failed';
|
|
406
|
+
return;
|
|
407
|
+
}
|
|
408
|
+
if (event.type === 'done') {
|
|
409
|
+
const durationsByStepId = parseDurationsByStepId(event.durationsByStepId);
|
|
410
|
+
const audioAssets = parseVideoAudioAssets(event.audioAssets);
|
|
411
|
+
const audioAssetsByLocale = parseVideoAudioAssetsByLocale(event.audioAssetsByLocale);
|
|
412
|
+
const hasLocaleAssets = Object.keys(audioAssetsByLocale).length > 0;
|
|
413
|
+
if (Object.keys(durationsByStepId).length === 0 || audioAssets.length === 0) {
|
|
414
|
+
streamError = 'speech generation completed without audio assets or SLEEP durations';
|
|
415
|
+
return;
|
|
416
|
+
}
|
|
417
|
+
donePayload = {
|
|
418
|
+
success: true,
|
|
419
|
+
durationsByStepId,
|
|
420
|
+
audioAssets,
|
|
421
|
+
...(hasLocaleAssets ? { audioAssetsByLocale } : {}),
|
|
422
|
+
};
|
|
423
|
+
}
|
|
424
|
+
};
|
|
425
|
+
while (true) {
|
|
426
|
+
const { value, done } = await reader.read();
|
|
427
|
+
if (done)
|
|
428
|
+
break;
|
|
429
|
+
buffered += decoder.decode(value, { stream: true });
|
|
430
|
+
const lines = buffered.split(/\r?\n/);
|
|
431
|
+
buffered = lines.pop() ?? '';
|
|
432
|
+
for (const line of lines) {
|
|
433
|
+
consumeLine(line);
|
|
434
|
+
if (streamError)
|
|
435
|
+
break;
|
|
436
|
+
}
|
|
437
|
+
if (streamError) {
|
|
438
|
+
await reader.cancel().catch(() => undefined);
|
|
439
|
+
break;
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
buffered += decoder.decode();
|
|
443
|
+
if (!streamError && buffered.trim()) {
|
|
444
|
+
consumeLine(buffered);
|
|
445
|
+
}
|
|
446
|
+
if (streamError) {
|
|
447
|
+
return { success: false, error: streamError };
|
|
448
|
+
}
|
|
449
|
+
if (!donePayload) {
|
|
450
|
+
return { success: false, error: 'speech generation stream ended before completion' };
|
|
451
|
+
}
|
|
452
|
+
logger.info('[capture] TTS DONE');
|
|
453
|
+
return donePayload;
|
|
454
|
+
}
|
|
455
|
+
function logTtsProgress(event) {
|
|
456
|
+
const index = typeof event.index === 'number' ? event.index : '?';
|
|
457
|
+
const total = typeof event.total === 'number' ? event.total : '?';
|
|
458
|
+
const stepId = event.stepId ?? 'unknown-step';
|
|
459
|
+
if (event.stage === 'synthesizing') {
|
|
460
|
+
const preview = event.textPreview ? ` — "${event.textPreview}"` : '';
|
|
461
|
+
logger.info(`[capture] TTS ${index}/${total}: generating ${stepId}${preview}`);
|
|
462
|
+
}
|
|
463
|
+
else if (event.stage === 'uploading') {
|
|
464
|
+
logger.info(`[capture] TTS ${index}/${total}: uploading ${stepId}`);
|
|
465
|
+
}
|
|
466
|
+
else if (event.stage === 'done') {
|
|
467
|
+
const duration = typeof event.durationMs === 'number' ? ` (${event.durationMs}ms)` : '';
|
|
468
|
+
logger.info(`[capture] TTS ${index}/${total}: done ${stepId}${duration}`);
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
function parseDurationsByStepId(raw) {
|
|
472
|
+
if (!raw || typeof raw !== 'object' || Array.isArray(raw))
|
|
473
|
+
return {};
|
|
474
|
+
const out = {};
|
|
475
|
+
for (const [key, value] of Object.entries(raw)) {
|
|
476
|
+
if (typeof key !== 'string' || !key)
|
|
477
|
+
continue;
|
|
478
|
+
if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0)
|
|
479
|
+
continue;
|
|
480
|
+
out[key] = Math.max(1, Math.min(60_000, Math.round(value)));
|
|
481
|
+
}
|
|
482
|
+
return out;
|
|
483
|
+
}
|
|
484
|
+
function parseVideoAudioAssets(raw) {
|
|
485
|
+
if (!Array.isArray(raw))
|
|
486
|
+
return [];
|
|
487
|
+
return raw.flatMap((entry) => {
|
|
488
|
+
if (!entry || typeof entry !== 'object')
|
|
489
|
+
return [];
|
|
490
|
+
const asset = entry;
|
|
491
|
+
if (typeof asset.stepId !== 'string'
|
|
492
|
+
|| !asset.stepId
|
|
493
|
+
|| typeof asset.url !== 'string'
|
|
494
|
+
|| !asset.url
|
|
495
|
+
|| typeof asset.duration_ms !== 'number'
|
|
496
|
+
|| !Number.isFinite(asset.duration_ms)
|
|
497
|
+
|| asset.duration_ms <= 0) {
|
|
498
|
+
return [];
|
|
499
|
+
}
|
|
500
|
+
const word_timings = parseVideoWordTimings(asset.word_timings);
|
|
501
|
+
return [{
|
|
502
|
+
stepId: asset.stepId,
|
|
503
|
+
url: asset.url,
|
|
504
|
+
duration_ms: Math.round(asset.duration_ms),
|
|
505
|
+
...(word_timings.length > 0 ? { word_timings } : {}),
|
|
506
|
+
}];
|
|
507
|
+
});
|
|
508
|
+
}
|
|
509
|
+
function parseVideoAudioAssetsByLocale(raw) {
|
|
510
|
+
if (!raw || typeof raw !== 'object' || Array.isArray(raw))
|
|
511
|
+
return {};
|
|
512
|
+
const out = {};
|
|
513
|
+
for (const [locale, value] of Object.entries(raw)) {
|
|
514
|
+
if (!locale)
|
|
515
|
+
continue;
|
|
516
|
+
const assets = parseVideoAudioAssets(value);
|
|
517
|
+
if (assets.length > 0)
|
|
518
|
+
out[locale] = assets;
|
|
519
|
+
}
|
|
520
|
+
return out;
|
|
521
|
+
}
|
|
522
|
+
function parseVideoWordTimings(raw) {
|
|
523
|
+
if (!Array.isArray(raw))
|
|
524
|
+
return [];
|
|
525
|
+
return raw.flatMap((entry) => {
|
|
526
|
+
if (!entry || typeof entry !== 'object')
|
|
527
|
+
return [];
|
|
528
|
+
const word = entry;
|
|
529
|
+
if (typeof word.word !== 'string'
|
|
530
|
+
|| typeof word.start_ms !== 'number'
|
|
531
|
+
|| typeof word.end_ms !== 'number') {
|
|
532
|
+
return [];
|
|
533
|
+
}
|
|
534
|
+
return [{
|
|
535
|
+
word: word.word,
|
|
536
|
+
start_ms: Math.max(0, Math.round(word.start_ms)),
|
|
537
|
+
end_ms: Math.max(0, Math.round(word.end_ms)),
|
|
538
|
+
}];
|
|
539
|
+
});
|
|
540
|
+
}
|
|
541
|
+
function applyVideoSpeechDurations(program, durationsByStepId) {
|
|
542
|
+
const steps = program.steps.map((step) => {
|
|
543
|
+
if (step.kind !== 'SLEEP')
|
|
544
|
+
return step;
|
|
545
|
+
const stepId = typeof step.stepId === 'string' ? step.stepId : null;
|
|
546
|
+
if (!stepId)
|
|
547
|
+
return step;
|
|
548
|
+
const durationMs = durationsByStepId[stepId];
|
|
549
|
+
if (!durationMs)
|
|
550
|
+
return step;
|
|
551
|
+
return {
|
|
552
|
+
...step,
|
|
553
|
+
durationMs,
|
|
554
|
+
};
|
|
555
|
+
});
|
|
556
|
+
return {
|
|
557
|
+
...program,
|
|
558
|
+
steps: steps,
|
|
559
|
+
};
|
|
560
|
+
}
|
|
253
561
|
function shouldRetryProgramFetch(status, error) {
|
|
254
562
|
if ([408, 429, 500, 502, 503, 504].includes(status)) {
|
|
255
563
|
return true;
|
|
@@ -272,8 +580,7 @@ function getProgramFetchRetryDelayMs(attempt) {
|
|
|
272
580
|
function sleep(ms) {
|
|
273
581
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
274
582
|
}
|
|
275
|
-
async function uploadResults(config, program, result) {
|
|
276
|
-
const runId = randomUUID();
|
|
583
|
+
async function uploadResults(config, program, result, runId = randomUUID()) {
|
|
277
584
|
const artifactJobs = result.variantResults.flatMap((variant) => {
|
|
278
585
|
const variantSpec = program.variants.find((entry) => entry.id === variant.variantId);
|
|
279
586
|
return variant.artifacts.map((artifact) => ({
|
|
@@ -343,6 +650,93 @@ async function uploadResults(config, program, result) {
|
|
|
343
650
|
if (!telemetryResponse.ok) {
|
|
344
651
|
throw new Error(`telemetry upload failed: ${await formatServerError(telemetryResponse, `${config.apiBaseUrl}/api/cli/telemetry`)}`);
|
|
345
652
|
}
|
|
653
|
+
return { runId };
|
|
654
|
+
}
|
|
655
|
+
/**
|
|
656
|
+
* AUT-57 PR #5 — signal `/api/cli/video-complete` after all clip artifacts
|
|
657
|
+
* have been uploaded. Builds the per-clip metadata from `RunResult.opcodeTimings`
|
|
658
|
+
* and the video clip artifacts, posts to the queue endpoint. Throws on
|
|
659
|
+
* non-2xx so the caller can surface the failure.
|
|
660
|
+
*
|
|
661
|
+
* `mp4StoragePath` is conventional — the upload step (PR #6) ranges video
|
|
662
|
+
* clips under `raw/{video_id}/{clip_id}.mp4` in the `videos` bucket so the
|
|
663
|
+
* compositor can fetch them from a stable path.
|
|
664
|
+
*/
|
|
665
|
+
async function signalVideoComplete(config, program, result, runId, audioAssets, audioAssetsByLocale) {
|
|
666
|
+
const videoId = program.presetId; // The CLI passes the video id in the preset id slot.
|
|
667
|
+
const clips = buildVideoClipMetadata(videoId, result, program, runId);
|
|
668
|
+
if (clips.length === 0) {
|
|
669
|
+
throw new Error('video-complete: no MP4 clips were produced — refusing to queue compositor job');
|
|
670
|
+
}
|
|
671
|
+
const payload = {
|
|
672
|
+
videoId,
|
|
673
|
+
runId,
|
|
674
|
+
clips,
|
|
675
|
+
...(audioAssets && audioAssets.length > 0 ? { audioAssets } : {}),
|
|
676
|
+
...(audioAssetsByLocale && Object.keys(audioAssetsByLocale).length > 0
|
|
677
|
+
? { audioAssetsByLocale }
|
|
678
|
+
: {}),
|
|
679
|
+
};
|
|
680
|
+
const url = `${config.apiBaseUrl}/api/cli/video-complete`;
|
|
681
|
+
const response = await fetch(url, {
|
|
682
|
+
method: 'POST',
|
|
683
|
+
headers: {
|
|
684
|
+
'Authorization': `Bearer ${config.apiKey}`,
|
|
685
|
+
'Content-Type': 'application/json',
|
|
686
|
+
[CLI_VERSION_HEADER]: APP_VERSION,
|
|
687
|
+
},
|
|
688
|
+
body: JSON.stringify(payload),
|
|
689
|
+
});
|
|
690
|
+
if (!response.ok) {
|
|
691
|
+
throw new Error(`video-complete failed: ${await formatServerError(response, url)}`);
|
|
692
|
+
}
|
|
693
|
+
return (await response.json());
|
|
694
|
+
}
|
|
695
|
+
export function buildVideoClipMetadata(videoId, result, program, runId) {
|
|
696
|
+
const clipsByKey = new Map();
|
|
697
|
+
for (const variant of result.variantResults) {
|
|
698
|
+
const variantId = variant.variantId;
|
|
699
|
+
const variantSpec = program?.variants.find((entry) => entry.id === variantId);
|
|
700
|
+
for (const artifact of variant.artifacts) {
|
|
701
|
+
if (artifact.mediaMode !== 'video' && artifact.mediaMode !== 'clip')
|
|
702
|
+
continue;
|
|
703
|
+
if (!artifact.clipId)
|
|
704
|
+
continue;
|
|
705
|
+
// Conventional storage path; the upload route enforces the same convention.
|
|
706
|
+
const storagePath = runId
|
|
707
|
+
? `raw/${videoId}/${runId}/${variantId}/${artifact.clipId}.mp4`
|
|
708
|
+
: `raw/${videoId}/${variantId}/${artifact.clipId}.mp4`;
|
|
709
|
+
const matchingTimings = result.opcodeTimings
|
|
710
|
+
.filter((t) => t.variantId === variantId && t.clipId === artifact.clipId)
|
|
711
|
+
.map((t) => ({
|
|
712
|
+
stepIndex: t.stepIndex,
|
|
713
|
+
stepId: t.stepId,
|
|
714
|
+
opcodeKind: t.opcodeKind,
|
|
715
|
+
timecodeStartMs: t.timecodeStartMs,
|
|
716
|
+
timecodeEndMs: t.timecodeEndMs,
|
|
717
|
+
bbox: t.bbox ?? null,
|
|
718
|
+
}));
|
|
719
|
+
clipsByKey.set(`${variantId}:${artifact.clipId}`, {
|
|
720
|
+
variantId,
|
|
721
|
+
lang: variantSpec?.locale ?? inferVariantLocale(variantId),
|
|
722
|
+
theme: variantSpec?.theme ?? inferVariantTheme(variantId),
|
|
723
|
+
clipId: artifact.clipId,
|
|
724
|
+
mp4StoragePath: storagePath,
|
|
725
|
+
durationMs: artifact.durationMs ?? 0,
|
|
726
|
+
opcodeTimings: matchingTimings,
|
|
727
|
+
});
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
return [...clipsByKey.values()];
|
|
731
|
+
}
|
|
732
|
+
function inferVariantLocale(variantId) {
|
|
733
|
+
const parts = variantId.split('-').filter(Boolean);
|
|
734
|
+
if (parts.length >= 2)
|
|
735
|
+
return parts[parts.length - 2];
|
|
736
|
+
return 'en';
|
|
737
|
+
}
|
|
738
|
+
function inferVariantTheme(variantId) {
|
|
739
|
+
return variantId.endsWith('-dark') ? 'dark' : 'light';
|
|
346
740
|
}
|
|
347
741
|
async function uploadArtifact(config, program, runId, totalArtifacts, uploadNumber, job) {
|
|
348
742
|
const { artifact, variant, variantSpec } = job;
|
|
@@ -368,7 +762,8 @@ async function uploadArtifact(config, program, runId, totalArtifacts, uploadNumb
|
|
|
368
762
|
formData.append('deviceFrame', variantSpec.deviceFrame);
|
|
369
763
|
}
|
|
370
764
|
const requestedDeviceScaleFactor = variantSpec?.deviceScaleFactor ?? program.outputScale ?? 2;
|
|
371
|
-
const
|
|
765
|
+
const isFrameCapture = artifact.mediaMode === 'clip' || artifact.mediaMode === 'video';
|
|
766
|
+
const deviceScaleFactor = isFrameCapture && Number.isFinite(requestedDeviceScaleFactor)
|
|
372
767
|
? Math.min(Number(requestedDeviceScaleFactor), MAX_CLIP_CAPTURE_DEVICE_SCALE_FACTOR)
|
|
373
768
|
: requestedDeviceScaleFactor;
|
|
374
769
|
if (Number.isFinite(deviceScaleFactor)) {
|
package/dist/cli.js
CHANGED
|
@@ -240,12 +240,17 @@ program
|
|
|
240
240
|
.option('--local', `Use the local AutoKap dev server (${LOCAL_API_BASE_URL})`, false)
|
|
241
241
|
.option('--allow-upload-failure', 'Keep a successful capture exit code even if artifact upload fails', false)
|
|
242
242
|
.option('--debug', 'Verbose logging: per-substep timing, opcode dumps, recovery strategy traces', false)
|
|
243
|
+
.option('--cloud', 'Cloud runner mode: signals 4+ vCPU available, unblocks the conservative Linux FPS default (8 → 30)', false)
|
|
243
244
|
.action(async (opts) => {
|
|
244
245
|
if (opts.debug) {
|
|
245
246
|
const { setDebugEnabled } = await import('./logger.js');
|
|
246
247
|
setDebugEnabled(true);
|
|
247
248
|
logger.info('[capture] Debug mode enabled — verbose logging on');
|
|
248
249
|
}
|
|
250
|
+
if (opts.cloud) {
|
|
251
|
+
process.env.AUTOKAP_CLOUD_RUNNER = '1';
|
|
252
|
+
logger.info('[capture] Cloud runner mode — Linux FPS cap lifted (clips target 30 fps)');
|
|
253
|
+
}
|
|
249
254
|
if (opts.local) {
|
|
250
255
|
process.env[API_BASE_URL_ENV_VAR] = LOCAL_API_BASE_URL;
|
|
251
256
|
process.env[WS_URL_ENV_VAR] = LOCAL_WS_URL;
|
|
@@ -606,6 +611,81 @@ presetCmd
|
|
|
606
611
|
console.log(JSON.stringify(info, null, 2));
|
|
607
612
|
process.exit(0);
|
|
608
613
|
});
|
|
614
|
+
// ── video commands ─────────────────────────────────────────────────
|
|
615
|
+
//
|
|
616
|
+
// Mirrors the preset commands but targets `/api/video-projects` (CLI-keyed
|
|
617
|
+
// route used by the IDE skill flow). The payload file contains the full
|
|
618
|
+
// body expected by the API (projectId, title, user_script, legacy
|
|
619
|
+
// narration_voice/narration_locale aliases, narration_by_app_locale,
|
|
620
|
+
// app_locale/app_locales, app_theme/app_themes, cursor_theme,
|
|
621
|
+
// credentials_account_id, mockDataInjection, program). On `update`, the same payload shape is sent
|
|
622
|
+
// via PATCH. TTS is generated later by `autokap run`.
|
|
623
|
+
const videoCmd = program
|
|
624
|
+
.command('video')
|
|
625
|
+
.description('Manage demo videos');
|
|
626
|
+
videoCmd
|
|
627
|
+
.command('create')
|
|
628
|
+
.description('Create a new demo video from a JSON payload file')
|
|
629
|
+
.requiredOption('--payload <file>', 'Path to payload JSON file (use "-" for stdin)')
|
|
630
|
+
.action(async (opts) => {
|
|
631
|
+
const cfg = await requireConfig();
|
|
632
|
+
let payload;
|
|
633
|
+
try {
|
|
634
|
+
payload = await readJsonInput(opts.payload);
|
|
635
|
+
}
|
|
636
|
+
catch (err) {
|
|
637
|
+
logger.error(`Failed to read payload: ${err.message}`);
|
|
638
|
+
process.exit(1);
|
|
639
|
+
}
|
|
640
|
+
const res = await fetch(`${cfg.apiBaseUrl}/api/video-projects`, {
|
|
641
|
+
method: 'POST',
|
|
642
|
+
headers: {
|
|
643
|
+
Authorization: `Bearer ${cfg.apiKey}`,
|
|
644
|
+
'Content-Type': 'application/json',
|
|
645
|
+
},
|
|
646
|
+
body: JSON.stringify(payload),
|
|
647
|
+
});
|
|
648
|
+
if (!res.ok) {
|
|
649
|
+
const body = await res.json().catch(() => ({ error: res.statusText }));
|
|
650
|
+
logger.error(`Failed to create video: ${body.error || res.statusText}`);
|
|
651
|
+
process.exit(1);
|
|
652
|
+
}
|
|
653
|
+
const data = await res.json();
|
|
654
|
+
// Output only the video ID so callers can chain: autokap run $(autokap video create ...)
|
|
655
|
+
console.log(data.video.id);
|
|
656
|
+
process.exit(0);
|
|
657
|
+
});
|
|
658
|
+
videoCmd
|
|
659
|
+
.command('update <video-id>')
|
|
660
|
+
.description('Update an existing demo video from a JSON payload file')
|
|
661
|
+
.requiredOption('--payload <file>', 'Path to payload JSON file (use "-" for stdin)')
|
|
662
|
+
.action(async (videoId, opts) => {
|
|
663
|
+
const cfg = await requireConfig();
|
|
664
|
+
let payload;
|
|
665
|
+
try {
|
|
666
|
+
payload = await readJsonInput(opts.payload);
|
|
667
|
+
}
|
|
668
|
+
catch (err) {
|
|
669
|
+
logger.error(`Failed to read payload: ${err.message}`);
|
|
670
|
+
process.exit(1);
|
|
671
|
+
}
|
|
672
|
+
const res = await fetch(`${cfg.apiBaseUrl}/api/video-projects/${videoId}`, {
|
|
673
|
+
method: 'PATCH',
|
|
674
|
+
headers: {
|
|
675
|
+
Authorization: `Bearer ${cfg.apiKey}`,
|
|
676
|
+
'Content-Type': 'application/json',
|
|
677
|
+
},
|
|
678
|
+
body: JSON.stringify(payload),
|
|
679
|
+
});
|
|
680
|
+
if (!res.ok) {
|
|
681
|
+
const body = await res.json().catch(() => ({ error: res.statusText }));
|
|
682
|
+
logger.error(`Failed to update video: ${body.error || res.statusText}`);
|
|
683
|
+
process.exit(1);
|
|
684
|
+
}
|
|
685
|
+
const data = await res.json();
|
|
686
|
+
console.log(data.video.id);
|
|
687
|
+
process.exit(0);
|
|
688
|
+
});
|
|
609
689
|
// ── auth commands ──────────────────────────────────────────────────
|
|
610
690
|
const authCmd = program
|
|
611
691
|
.command('auth')
|
|
@@ -34,10 +34,19 @@ export class ClipCaptureLoop {
|
|
|
34
34
|
this.page = opts.page;
|
|
35
35
|
this.framesDir = opts.framesDir;
|
|
36
36
|
this.jpegQuality = opts.jpegQuality ?? 80;
|
|
37
|
-
|
|
37
|
+
// Linux default is 8 fps to stay safe on 2 vCPU CI runners. Cloud runners
|
|
38
|
+
// (AUTOKAP_CLOUD_RUNNER=1, set by the Fly.io image and the `--cloud` CLI
|
|
39
|
+
// flag) get the same 15 fps default as macOS/Windows since they have
|
|
40
|
+
// ≥ 4 vCPU. Callers can still override via opts.targetFps.
|
|
41
|
+
const isCloudRunner = process.env.AUTOKAP_CLOUD_RUNNER === '1';
|
|
42
|
+
const linuxDefault = isCloudRunner ? 15 : 8;
|
|
43
|
+
const platformDefault = process.platform === 'linux' ? linuxDefault : 15;
|
|
44
|
+
const targetFps = Math.max(1, Math.min(30, opts.targetFps ?? platformDefault));
|
|
38
45
|
this.targetFps = targetFps;
|
|
39
46
|
this.targetFrameIntervalMs = 1000 / targetFps;
|
|
40
|
-
|
|
47
|
+
const linuxMinRest = isCloudRunner ? 16 : 50;
|
|
48
|
+
const platformMinRest = process.platform === 'linux' ? linuxMinRest : 16;
|
|
49
|
+
this.minRestMs = Math.max(0, Math.min(250, opts.minRestMs ?? platformMinRest));
|
|
41
50
|
}
|
|
42
51
|
async start() {
|
|
43
52
|
this.cdp = await this.page.context().newCDPSession(this.page);
|
package/dist/cookie-dismiss.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { Page } from 'playwright';
|
|
2
|
+
export declare const CAPTURE_HIDE_STYLE_ID = "autokap-capture-hide-style";
|
|
2
3
|
export declare function getCaptureHideCSS(): string;
|
|
3
4
|
export declare function ensureCaptureHideStyles(page: Page): Promise<void>;
|
|
4
5
|
export declare function dismissCookiesAndWidgets(page: Page): Promise<{
|