autokap 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,17 +21,86 @@ import { executeProgram } from './opcode-runner.js';
21
21
  import { RecoveryChainImpl } from './recovery-chain.js';
22
22
  import { parseProgram } from './execution-schema.js';
23
23
  import { buildCursorOverlayScript } from './cursor-overlay-script.js';
24
- import { CLI_VERSION_HEADER } from './cli-contract.js';
24
+ import { CLI_VERSION_HEADER, } from './cli-contract.js';
25
25
  import { logger } from './logger.js';
26
26
  import { callLLM } from './llm-provider.js';
27
27
  import { APP_VERSION } from './version.js';
28
28
  import { normalizeAllowedOrigins, normalizeHttpOrigin, verifySignedExecutionProgramEnvelope, } from './program-signing.js';
29
29
  const MAX_CLIP_CAPTURE_DEVICE_SCALE_FACTOR = 1;
30
+ const DEFAULT_VIDEO_DELIVERY_RESOLUTION = { width: 1920, height: 1080 };
31
+ const DEFAULT_VIDEO_CAPTURE_RESOLUTION = DEFAULT_VIDEO_DELIVERY_RESOLUTION;
30
32
  const FETCH_PROGRAM_MAX_ATTEMPTS = 4;
31
33
  const FETCH_PROGRAM_RETRY_DELAYS_MS = [1000, 3000, 5000];
32
34
  const DEFAULT_SCREENSHOT_ARTIFACT_UPLOAD_CONCURRENCY = 4;
33
35
  const DEFAULT_MEDIA_ARTIFACT_UPLOAD_CONCURRENCY = 2;
34
36
  const MAX_ARTIFACT_UPLOAD_CONCURRENCY = 8;
37
+ export function resolveRecordableBrowserSettings(program, variant) {
38
+ const requestedDeviceScaleFactor = normalizeNumericScale(variant.deviceScaleFactor ?? program.outputScale ?? 2);
39
+ if (program.mediaMode !== 'video') {
40
+ return {
41
+ viewport: variant.viewport,
42
+ requestedDeviceScaleFactor,
43
+ runtimeDeviceScaleFactor: Math.min(requestedDeviceScaleFactor, MAX_CLIP_CAPTURE_DEVICE_SCALE_FACTOR),
44
+ };
45
+ }
46
+ return {
47
+ viewport: {
48
+ width: DEFAULT_VIDEO_DELIVERY_RESOLUTION.width,
49
+ height: DEFAULT_VIDEO_DELIVERY_RESOLUTION.height,
50
+ },
51
+ requestedDeviceScaleFactor,
52
+ runtimeDeviceScaleFactor: 1,
53
+ };
54
+ }
55
+ export function normalizeVideoCaptureProgram(program) {
56
+ if (program.mediaMode !== 'video')
57
+ return program;
58
+ const format = program.artifactPlan.format ?? {};
59
+ const deliveryResolution = DEFAULT_VIDEO_DELIVERY_RESOLUTION;
60
+ const captureResolution = format.captureResolution ?? DEFAULT_VIDEO_CAPTURE_RESOLUTION;
61
+ // Variants are normalized too so any code path that reads `variant.viewport`
62
+ // or `variant.deviceScaleFactor` directly (not just resolveRecordableBrowserSettings)
63
+ // sees consistent 1920×1080 @1× values. Legacy presets carrying
64
+ // viewport=2560×1440 or DPR=1.3333 would otherwise leak through.
65
+ const targetViewport = { width: deliveryResolution.width, height: deliveryResolution.height };
66
+ const variantsAlreadyNormalized = program.variants.every((v) => v.viewport.width === targetViewport.width &&
67
+ v.viewport.height === targetViewport.height &&
68
+ (v.deviceScaleFactor === undefined || v.deviceScaleFactor === 1));
69
+ const formatAlreadyNormalized = captureResolution.width === deliveryResolution.width &&
70
+ captureResolution.height === deliveryResolution.height &&
71
+ format.deliveryResolution?.width === deliveryResolution.width &&
72
+ format.deliveryResolution.height === deliveryResolution.height;
73
+ const outputScaleAlreadyNormalized = program.outputScale === undefined || program.outputScale === 1;
74
+ if (variantsAlreadyNormalized && formatAlreadyNormalized && outputScaleAlreadyNormalized) {
75
+ return program;
76
+ }
77
+ return {
78
+ ...program,
79
+ outputScale: 1,
80
+ variants: program.variants.map((v) => ({
81
+ ...v,
82
+ viewport: { ...targetViewport },
83
+ deviceScaleFactor: 1,
84
+ })),
85
+ artifactPlan: {
86
+ ...program.artifactPlan,
87
+ format: {
88
+ ...format,
89
+ captureResolution: {
90
+ width: deliveryResolution.width,
91
+ height: deliveryResolution.height,
92
+ },
93
+ deliveryResolution,
94
+ },
95
+ },
96
+ };
97
+ }
98
+ function normalizeNumericScale(value) {
99
+ if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) {
100
+ return 1;
101
+ }
102
+ return value;
103
+ }
35
104
  const HEALER_SYSTEM_PROMPT = 'You repair failed deterministic browser opcodes. Respond only with JSON.';
36
105
  // ── Main entry point ────────────────────────────────────────────────
37
106
  export async function runCapture(options) {
@@ -41,7 +110,7 @@ export async function runCapture(options) {
41
110
  if (options.program) {
42
111
  let parsedProgram;
43
112
  try {
44
- parsedProgram = parseProgram(options.program);
113
+ parsedProgram = normalizeVideoCaptureProgram(parseProgram(options.program));
45
114
  }
46
115
  catch (err) {
47
116
  return { success: false, error: `program validation failed: ${err instanceof Error ? err.message : String(err)}` };
@@ -64,7 +133,7 @@ export async function runCapture(options) {
64
133
  // Step 2: Validate the program fetched from the server.
65
134
  resolvedProgram = {
66
135
  ...resolvedProgram,
67
- program: parseProgram(resolvedProgram.program),
136
+ program: normalizeVideoCaptureProgram(parseProgram(resolvedProgram.program)),
68
137
  };
69
138
  }
70
139
  catch (err) {
@@ -80,12 +149,30 @@ export async function runCapture(options) {
80
149
  presetId: options.presetId,
81
150
  };
82
151
  }
152
+ const runId = randomUUID();
153
+ let videoAudioAssets;
154
+ let videoAudioAssetsByLocale;
83
155
  try {
84
156
  assertProgramNavigationScope(program, resolvedProgram.security);
85
157
  }
86
158
  catch (error) {
87
159
  return { success: false, error: error instanceof Error ? error.message : String(error) };
88
160
  }
161
+ if (!options.program && program.mediaMode === 'video') {
162
+ const prepareResult = await prepareVideoSpeechForRun(config, options.presetId, runId);
163
+ if (!prepareResult.success) {
164
+ return { success: false, error: prepareResult.error };
165
+ }
166
+ program = applyVideoSpeechDurations(program, prepareResult.durationsByStepId);
167
+ videoAudioAssets = prepareResult.audioAssets;
168
+ videoAudioAssetsByLocale = prepareResult.audioAssetsByLocale;
169
+ try {
170
+ program = normalizeVideoCaptureProgram(parseProgram(program));
171
+ }
172
+ catch (err) {
173
+ return { success: false, error: `prepared video program validation failed: ${err instanceof Error ? err.message : String(err)}` };
174
+ }
175
+ }
89
176
  logger.info(`[capture] Running preset "${options.presetId}" — ${program.steps.length} opcodes, ${program.variants.length} variant(s)`);
90
177
  logger.info(`[capture] Resolved API origin ${resolvedProgram.security.expectedApiOrigin}; navigation scope: ${resolvedProgram.security.allowedNavigationOrigins.join(', ')}`);
91
178
  const llmConfig = resolveCliLLMConfig(resolvedProgram.security);
@@ -97,9 +184,11 @@ export async function runCapture(options) {
97
184
  credentials: program.preconditions.credentials,
98
185
  });
99
186
  // Step 4: Execute the program
100
- const maxParallelVariants = program.mediaMode === 'clip'
101
- ? 1
102
- : program.maxParallelCaptures;
187
+ // 'clip' and 'video' both go through the frame-capture pipeline and need to
188
+ // serialize variants (CPU/RAM bound on the encoder side). Only 'screenshot'
189
+ // honors the requested concurrency.
190
+ const isRecordable = program.mediaMode === 'clip' || program.mediaMode === 'video';
191
+ const maxParallelVariants = isRecordable ? 1 : program.maxParallelCaptures;
103
192
  const runOptions = {
104
193
  recoveryChain,
105
194
  abortSignal: options.abortSignal,
@@ -116,20 +205,21 @@ export async function runCapture(options) {
116
205
  const captureStart = Date.now();
117
206
  if (maxParallelVariants) {
118
207
  logger.info(`[capture] Concurrency cap resolved to ${maxParallelVariants} parallel variant(s)`);
119
- if (program.mediaMode === 'clip' && program.maxParallelCaptures && program.maxParallelCaptures > 1) {
120
- logger.info(`[capture] Clip capture concurrency capped at 1 ` +
208
+ if (isRecordable && program.maxParallelCaptures && program.maxParallelCaptures > 1) {
209
+ logger.info(`[capture] ${program.mediaMode} capture concurrency capped at 1 ` +
121
210
  `(requested ${program.maxParallelCaptures}) to avoid CI CPU contention`);
122
211
  }
123
212
  }
124
213
  const createAdapter = async (variant) => {
125
- const recordable = program.mediaMode === 'clip';
126
- const requestedDeviceScaleFactor = variant.deviceScaleFactor ?? program.outputScale ?? 2;
127
- const runtimeDeviceScaleFactor = recordable && Number.isFinite(requestedDeviceScaleFactor)
128
- ? Math.min(Number(requestedDeviceScaleFactor), MAX_CLIP_CAPTURE_DEVICE_SCALE_FACTOR)
129
- : requestedDeviceScaleFactor;
214
+ const recordable = isRecordable;
215
+ const recordableSettings = recordable
216
+ ? resolveRecordableBrowserSettings(program, variant)
217
+ : null;
218
+ const requestedDeviceScaleFactor = recordableSettings?.requestedDeviceScaleFactor ?? (variant.deviceScaleFactor ?? program.outputScale ?? 2);
219
+ const runtimeDeviceScaleFactor = recordableSettings?.runtimeDeviceScaleFactor ?? requestedDeviceScaleFactor;
130
220
  const browserOptions = {
131
221
  headed: options.headed ?? false,
132
- viewport: variant.viewport,
222
+ viewport: recordableSettings?.viewport ?? variant.viewport,
133
223
  deviceScaleFactor: runtimeDeviceScaleFactor,
134
224
  lang: variant.locale,
135
225
  colorScheme: variant.theme,
@@ -138,7 +228,14 @@ export async function runCapture(options) {
138
228
  let recordingDir;
139
229
  let browser;
140
230
  logger.info(`[capture] Launching browser${browserOptions.headed ? ' (headed)' : ''}…`);
141
- if (recordable && runtimeDeviceScaleFactor !== requestedDeviceScaleFactor) {
231
+ if (recordable) {
232
+ logger.info(`[capture] Recordable browser settings: mediaMode=${program.mediaMode}, ` +
233
+ `variant.viewport=${variant.viewport.width}x${variant.viewport.height}, ` +
234
+ `variant.deviceScaleFactor=${variant.deviceScaleFactor ?? 'unset'}, ` +
235
+ `program.outputScale=${program.outputScale ?? 'unset'} ` +
236
+ `→ runtime viewport=${browserOptions.viewport.width}x${browserOptions.viewport.height} @DPR=${browserOptions.deviceScaleFactor}`);
237
+ }
238
+ if (recordable && program.mediaMode === 'clip' && runtimeDeviceScaleFactor !== requestedDeviceScaleFactor) {
142
239
  logger.info(`[capture] Clip capture scale capped at ${runtimeDeviceScaleFactor} ` +
143
240
  `(requested ${requestedDeviceScaleFactor}) to preserve recording FPS`);
144
241
  }
@@ -167,7 +264,10 @@ export async function runCapture(options) {
167
264
  }
168
265
  try {
169
266
  logger.info('[capture] Saving captures, might take a few seconds...');
170
- await uploadResults(config, program, runResult);
267
+ const uploadOutcome = await uploadResults(config, program, runResult, runId);
268
+ if (program.mediaMode === 'video' && runResult.success) {
269
+ await signalVideoComplete(config, program, runResult, uploadOutcome.runId, videoAudioAssets, videoAudioAssetsByLocale);
270
+ }
171
271
  const totalDurationSec = ((Date.now() - captureStart) / 1000).toFixed(1);
172
272
  logger.info(`[capture] Captures saved successfully — total ${totalDurationSec}s`);
173
273
  }
@@ -244,12 +344,220 @@ async function fetchProgram(config, presetId, environmentName) {
244
344
  expectedApiOrigin: safeOrigin(config.apiBaseUrl),
245
345
  });
246
346
  if (envelope.meta?.stale) {
247
- logger.warn('[capture] Program is stalelangs/themes changed since last compilation. Recompile to apply changes.');
347
+ logger.warn('[capture] Program needs regenerationpreset settings require missing or outdated opcodes. Regenerate before relying on this capture.');
248
348
  }
249
349
  return { success: true, program: envelope.program, security: envelope.security };
250
350
  }
251
351
  return { success: false, error: 'failed to fetch program: retry attempts exhausted' };
252
352
  }
353
+ async function prepareVideoSpeechForRun(config, videoId, runId) {
354
+ logger.info('[capture] Generating speech, may take a few seconds...');
355
+ const url = `${config.apiBaseUrl}/api/cli/video-prepare`;
356
+ let response;
357
+ try {
358
+ response = await fetch(url, {
359
+ method: 'POST',
360
+ headers: {
361
+ 'Authorization': `Bearer ${config.apiKey}`,
362
+ 'Content-Type': 'application/json',
363
+ [CLI_VERSION_HEADER]: APP_VERSION,
364
+ },
365
+ body: JSON.stringify({ videoId, runId }),
366
+ });
367
+ }
368
+ catch (err) {
369
+ return {
370
+ success: false,
371
+ error: `failed to generate speech: ${err instanceof Error ? err.message : String(err)}`,
372
+ };
373
+ }
374
+ if (!response.ok) {
375
+ return {
376
+ success: false,
377
+ error: `failed to generate speech: ${await formatServerError(response, url)}`,
378
+ };
379
+ }
380
+ if (!response.body) {
381
+ return { success: false, error: 'failed to generate speech: server returned an empty stream' };
382
+ }
383
+ const decoder = new TextDecoder();
384
+ const reader = response.body.getReader();
385
+ let buffered = '';
386
+ let donePayload = null;
387
+ let streamError = null;
388
+ const consumeLine = (line) => {
389
+ const trimmed = line.trim();
390
+ if (!trimmed)
391
+ return;
392
+ let event;
393
+ try {
394
+ event = JSON.parse(trimmed);
395
+ }
396
+ catch (err) {
397
+ streamError = `failed to parse speech progress: ${err instanceof Error ? err.message : String(err)}`;
398
+ return;
399
+ }
400
+ if (event.type === 'tts_progress') {
401
+ logTtsProgress(event);
402
+ return;
403
+ }
404
+ if (event.type === 'error') {
405
+ streamError = event.error ?? 'speech generation failed';
406
+ return;
407
+ }
408
+ if (event.type === 'done') {
409
+ const durationsByStepId = parseDurationsByStepId(event.durationsByStepId);
410
+ const audioAssets = parseVideoAudioAssets(event.audioAssets);
411
+ const audioAssetsByLocale = parseVideoAudioAssetsByLocale(event.audioAssetsByLocale);
412
+ const hasLocaleAssets = Object.keys(audioAssetsByLocale).length > 0;
413
+ if (Object.keys(durationsByStepId).length === 0 || audioAssets.length === 0) {
414
+ streamError = 'speech generation completed without audio assets or SLEEP durations';
415
+ return;
416
+ }
417
+ donePayload = {
418
+ success: true,
419
+ durationsByStepId,
420
+ audioAssets,
421
+ ...(hasLocaleAssets ? { audioAssetsByLocale } : {}),
422
+ };
423
+ }
424
+ };
425
+ while (true) {
426
+ const { value, done } = await reader.read();
427
+ if (done)
428
+ break;
429
+ buffered += decoder.decode(value, { stream: true });
430
+ const lines = buffered.split(/\r?\n/);
431
+ buffered = lines.pop() ?? '';
432
+ for (const line of lines) {
433
+ consumeLine(line);
434
+ if (streamError)
435
+ break;
436
+ }
437
+ if (streamError) {
438
+ await reader.cancel().catch(() => undefined);
439
+ break;
440
+ }
441
+ }
442
+ buffered += decoder.decode();
443
+ if (!streamError && buffered.trim()) {
444
+ consumeLine(buffered);
445
+ }
446
+ if (streamError) {
447
+ return { success: false, error: streamError };
448
+ }
449
+ if (!donePayload) {
450
+ return { success: false, error: 'speech generation stream ended before completion' };
451
+ }
452
+ logger.info('[capture] TTS DONE');
453
+ return donePayload;
454
+ }
455
+ function logTtsProgress(event) {
456
+ const index = typeof event.index === 'number' ? event.index : '?';
457
+ const total = typeof event.total === 'number' ? event.total : '?';
458
+ const stepId = event.stepId ?? 'unknown-step';
459
+ if (event.stage === 'synthesizing') {
460
+ const preview = event.textPreview ? ` — "${event.textPreview}"` : '';
461
+ logger.info(`[capture] TTS ${index}/${total}: generating ${stepId}${preview}`);
462
+ }
463
+ else if (event.stage === 'uploading') {
464
+ logger.info(`[capture] TTS ${index}/${total}: uploading ${stepId}`);
465
+ }
466
+ else if (event.stage === 'done') {
467
+ const duration = typeof event.durationMs === 'number' ? ` (${event.durationMs}ms)` : '';
468
+ logger.info(`[capture] TTS ${index}/${total}: done ${stepId}${duration}`);
469
+ }
470
+ }
471
+ function parseDurationsByStepId(raw) {
472
+ if (!raw || typeof raw !== 'object' || Array.isArray(raw))
473
+ return {};
474
+ const out = {};
475
+ for (const [key, value] of Object.entries(raw)) {
476
+ if (typeof key !== 'string' || !key)
477
+ continue;
478
+ if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0)
479
+ continue;
480
+ out[key] = Math.max(1, Math.min(60_000, Math.round(value)));
481
+ }
482
+ return out;
483
+ }
484
+ function parseVideoAudioAssets(raw) {
485
+ if (!Array.isArray(raw))
486
+ return [];
487
+ return raw.flatMap((entry) => {
488
+ if (!entry || typeof entry !== 'object')
489
+ return [];
490
+ const asset = entry;
491
+ if (typeof asset.stepId !== 'string'
492
+ || !asset.stepId
493
+ || typeof asset.url !== 'string'
494
+ || !asset.url
495
+ || typeof asset.duration_ms !== 'number'
496
+ || !Number.isFinite(asset.duration_ms)
497
+ || asset.duration_ms <= 0) {
498
+ return [];
499
+ }
500
+ const word_timings = parseVideoWordTimings(asset.word_timings);
501
+ return [{
502
+ stepId: asset.stepId,
503
+ url: asset.url,
504
+ duration_ms: Math.round(asset.duration_ms),
505
+ ...(word_timings.length > 0 ? { word_timings } : {}),
506
+ }];
507
+ });
508
+ }
509
+ function parseVideoAudioAssetsByLocale(raw) {
510
+ if (!raw || typeof raw !== 'object' || Array.isArray(raw))
511
+ return {};
512
+ const out = {};
513
+ for (const [locale, value] of Object.entries(raw)) {
514
+ if (!locale)
515
+ continue;
516
+ const assets = parseVideoAudioAssets(value);
517
+ if (assets.length > 0)
518
+ out[locale] = assets;
519
+ }
520
+ return out;
521
+ }
522
+ function parseVideoWordTimings(raw) {
523
+ if (!Array.isArray(raw))
524
+ return [];
525
+ return raw.flatMap((entry) => {
526
+ if (!entry || typeof entry !== 'object')
527
+ return [];
528
+ const word = entry;
529
+ if (typeof word.word !== 'string'
530
+ || typeof word.start_ms !== 'number'
531
+ || typeof word.end_ms !== 'number') {
532
+ return [];
533
+ }
534
+ return [{
535
+ word: word.word,
536
+ start_ms: Math.max(0, Math.round(word.start_ms)),
537
+ end_ms: Math.max(0, Math.round(word.end_ms)),
538
+ }];
539
+ });
540
+ }
541
+ function applyVideoSpeechDurations(program, durationsByStepId) {
542
+ const steps = program.steps.map((step) => {
543
+ if (step.kind !== 'SLEEP')
544
+ return step;
545
+ const stepId = typeof step.stepId === 'string' ? step.stepId : null;
546
+ if (!stepId)
547
+ return step;
548
+ const durationMs = durationsByStepId[stepId];
549
+ if (!durationMs)
550
+ return step;
551
+ return {
552
+ ...step,
553
+ durationMs,
554
+ };
555
+ });
556
+ return {
557
+ ...program,
558
+ steps: steps,
559
+ };
560
+ }
253
561
  function shouldRetryProgramFetch(status, error) {
254
562
  if ([408, 429, 500, 502, 503, 504].includes(status)) {
255
563
  return true;
@@ -272,8 +580,7 @@ function getProgramFetchRetryDelayMs(attempt) {
272
580
  function sleep(ms) {
273
581
  return new Promise((resolve) => setTimeout(resolve, ms));
274
582
  }
275
- async function uploadResults(config, program, result) {
276
- const runId = randomUUID();
583
+ async function uploadResults(config, program, result, runId = randomUUID()) {
277
584
  const artifactJobs = result.variantResults.flatMap((variant) => {
278
585
  const variantSpec = program.variants.find((entry) => entry.id === variant.variantId);
279
586
  return variant.artifacts.map((artifact) => ({
@@ -343,6 +650,93 @@ async function uploadResults(config, program, result) {
343
650
  if (!telemetryResponse.ok) {
344
651
  throw new Error(`telemetry upload failed: ${await formatServerError(telemetryResponse, `${config.apiBaseUrl}/api/cli/telemetry`)}`);
345
652
  }
653
+ return { runId };
654
+ }
655
+ /**
656
+ * AUT-57 PR #5 — signal `/api/cli/video-complete` after all clip artifacts
657
+ * have been uploaded. Builds the per-clip metadata from `RunResult.opcodeTimings`
658
+ * and the video clip artifacts, posts to the queue endpoint. Throws on
659
+ * non-2xx so the caller can surface the failure.
660
+ *
661
+ * `mp4StoragePath` is conventional — the upload step (PR #6) ranges video
662
+ * clips under `raw/{video_id}/{clip_id}.mp4` in the `videos` bucket so the
663
+ * compositor can fetch them from a stable path.
664
+ */
665
+ async function signalVideoComplete(config, program, result, runId, audioAssets, audioAssetsByLocale) {
666
+ const videoId = program.presetId; // The CLI passes the video id in the preset id slot.
667
+ const clips = buildVideoClipMetadata(videoId, result, program, runId);
668
+ if (clips.length === 0) {
669
+ throw new Error('video-complete: no MP4 clips were produced — refusing to queue compositor job');
670
+ }
671
+ const payload = {
672
+ videoId,
673
+ runId,
674
+ clips,
675
+ ...(audioAssets && audioAssets.length > 0 ? { audioAssets } : {}),
676
+ ...(audioAssetsByLocale && Object.keys(audioAssetsByLocale).length > 0
677
+ ? { audioAssetsByLocale }
678
+ : {}),
679
+ };
680
+ const url = `${config.apiBaseUrl}/api/cli/video-complete`;
681
+ const response = await fetch(url, {
682
+ method: 'POST',
683
+ headers: {
684
+ 'Authorization': `Bearer ${config.apiKey}`,
685
+ 'Content-Type': 'application/json',
686
+ [CLI_VERSION_HEADER]: APP_VERSION,
687
+ },
688
+ body: JSON.stringify(payload),
689
+ });
690
+ if (!response.ok) {
691
+ throw new Error(`video-complete failed: ${await formatServerError(response, url)}`);
692
+ }
693
+ return (await response.json());
694
+ }
695
+ export function buildVideoClipMetadata(videoId, result, program, runId) {
696
+ const clipsByKey = new Map();
697
+ for (const variant of result.variantResults) {
698
+ const variantId = variant.variantId;
699
+ const variantSpec = program?.variants.find((entry) => entry.id === variantId);
700
+ for (const artifact of variant.artifacts) {
701
+ if (artifact.mediaMode !== 'video' && artifact.mediaMode !== 'clip')
702
+ continue;
703
+ if (!artifact.clipId)
704
+ continue;
705
+ // Conventional storage path; the upload route enforces the same convention.
706
+ const storagePath = runId
707
+ ? `raw/${videoId}/${runId}/${variantId}/${artifact.clipId}.mp4`
708
+ : `raw/${videoId}/${variantId}/${artifact.clipId}.mp4`;
709
+ const matchingTimings = result.opcodeTimings
710
+ .filter((t) => t.variantId === variantId && t.clipId === artifact.clipId)
711
+ .map((t) => ({
712
+ stepIndex: t.stepIndex,
713
+ stepId: t.stepId,
714
+ opcodeKind: t.opcodeKind,
715
+ timecodeStartMs: t.timecodeStartMs,
716
+ timecodeEndMs: t.timecodeEndMs,
717
+ bbox: t.bbox ?? null,
718
+ }));
719
+ clipsByKey.set(`${variantId}:${artifact.clipId}`, {
720
+ variantId,
721
+ lang: variantSpec?.locale ?? inferVariantLocale(variantId),
722
+ theme: variantSpec?.theme ?? inferVariantTheme(variantId),
723
+ clipId: artifact.clipId,
724
+ mp4StoragePath: storagePath,
725
+ durationMs: artifact.durationMs ?? 0,
726
+ opcodeTimings: matchingTimings,
727
+ });
728
+ }
729
+ }
730
+ return [...clipsByKey.values()];
731
+ }
732
+ function inferVariantLocale(variantId) {
733
+ const parts = variantId.split('-').filter(Boolean);
734
+ if (parts.length >= 2)
735
+ return parts[parts.length - 2];
736
+ return 'en';
737
+ }
738
+ function inferVariantTheme(variantId) {
739
+ return variantId.endsWith('-dark') ? 'dark' : 'light';
346
740
  }
347
741
  async function uploadArtifact(config, program, runId, totalArtifacts, uploadNumber, job) {
348
742
  const { artifact, variant, variantSpec } = job;
@@ -368,7 +762,8 @@ async function uploadArtifact(config, program, runId, totalArtifacts, uploadNumb
368
762
  formData.append('deviceFrame', variantSpec.deviceFrame);
369
763
  }
370
764
  const requestedDeviceScaleFactor = variantSpec?.deviceScaleFactor ?? program.outputScale ?? 2;
371
- const deviceScaleFactor = artifact.mediaMode === 'clip' && Number.isFinite(requestedDeviceScaleFactor)
765
+ const isFrameCapture = artifact.mediaMode === 'clip' || artifact.mediaMode === 'video';
766
+ const deviceScaleFactor = isFrameCapture && Number.isFinite(requestedDeviceScaleFactor)
372
767
  ? Math.min(Number(requestedDeviceScaleFactor), MAX_CLIP_CAPTURE_DEVICE_SCALE_FACTOR)
373
768
  : requestedDeviceScaleFactor;
374
769
  if (Number.isFinite(deviceScaleFactor)) {
package/dist/cli.js CHANGED
@@ -240,12 +240,17 @@ program
240
240
  .option('--local', `Use the local AutoKap dev server (${LOCAL_API_BASE_URL})`, false)
241
241
  .option('--allow-upload-failure', 'Keep a successful capture exit code even if artifact upload fails', false)
242
242
  .option('--debug', 'Verbose logging: per-substep timing, opcode dumps, recovery strategy traces', false)
243
+ .option('--cloud', 'Cloud runner mode: signals 4+ vCPU available, unblocks the conservative Linux FPS default (8 → 30)', false)
243
244
  .action(async (opts) => {
244
245
  if (opts.debug) {
245
246
  const { setDebugEnabled } = await import('./logger.js');
246
247
  setDebugEnabled(true);
247
248
  logger.info('[capture] Debug mode enabled — verbose logging on');
248
249
  }
250
+ if (opts.cloud) {
251
+ process.env.AUTOKAP_CLOUD_RUNNER = '1';
252
+ logger.info('[capture] Cloud runner mode — Linux FPS cap lifted (clips target 30 fps)');
253
+ }
249
254
  if (opts.local) {
250
255
  process.env[API_BASE_URL_ENV_VAR] = LOCAL_API_BASE_URL;
251
256
  process.env[WS_URL_ENV_VAR] = LOCAL_WS_URL;
@@ -606,6 +611,81 @@ presetCmd
606
611
  console.log(JSON.stringify(info, null, 2));
607
612
  process.exit(0);
608
613
  });
614
+ // ── video commands ─────────────────────────────────────────────────
615
+ //
616
+ // Mirrors the preset commands but targets `/api/video-projects` (CLI-keyed
617
+ // route used by the IDE skill flow). The payload file contains the full
618
+ // body expected by the API (projectId, title, user_script, legacy
619
+ // narration_voice/narration_locale aliases, narration_by_app_locale,
620
+ // app_locale/app_locales, app_theme/app_themes, cursor_theme,
621
+ // credentials_account_id, mockDataInjection, program). On `update`, the same payload shape is sent
622
+ // via PATCH. TTS is generated later by `autokap run`.
623
+ const videoCmd = program
624
+ .command('video')
625
+ .description('Manage demo videos');
626
+ videoCmd
627
+ .command('create')
628
+ .description('Create a new demo video from a JSON payload file')
629
+ .requiredOption('--payload <file>', 'Path to payload JSON file (use "-" for stdin)')
630
+ .action(async (opts) => {
631
+ const cfg = await requireConfig();
632
+ let payload;
633
+ try {
634
+ payload = await readJsonInput(opts.payload);
635
+ }
636
+ catch (err) {
637
+ logger.error(`Failed to read payload: ${err.message}`);
638
+ process.exit(1);
639
+ }
640
+ const res = await fetch(`${cfg.apiBaseUrl}/api/video-projects`, {
641
+ method: 'POST',
642
+ headers: {
643
+ Authorization: `Bearer ${cfg.apiKey}`,
644
+ 'Content-Type': 'application/json',
645
+ },
646
+ body: JSON.stringify(payload),
647
+ });
648
+ if (!res.ok) {
649
+ const body = await res.json().catch(() => ({ error: res.statusText }));
650
+ logger.error(`Failed to create video: ${body.error || res.statusText}`);
651
+ process.exit(1);
652
+ }
653
+ const data = await res.json();
654
+ // Output only the video ID so callers can chain: autokap run $(autokap video create ...)
655
+ console.log(data.video.id);
656
+ process.exit(0);
657
+ });
658
+ videoCmd
659
+ .command('update <video-id>')
660
+ .description('Update an existing demo video from a JSON payload file')
661
+ .requiredOption('--payload <file>', 'Path to payload JSON file (use "-" for stdin)')
662
+ .action(async (videoId, opts) => {
663
+ const cfg = await requireConfig();
664
+ let payload;
665
+ try {
666
+ payload = await readJsonInput(opts.payload);
667
+ }
668
+ catch (err) {
669
+ logger.error(`Failed to read payload: ${err.message}`);
670
+ process.exit(1);
671
+ }
672
+ const res = await fetch(`${cfg.apiBaseUrl}/api/video-projects/${videoId}`, {
673
+ method: 'PATCH',
674
+ headers: {
675
+ Authorization: `Bearer ${cfg.apiKey}`,
676
+ 'Content-Type': 'application/json',
677
+ },
678
+ body: JSON.stringify(payload),
679
+ });
680
+ if (!res.ok) {
681
+ const body = await res.json().catch(() => ({ error: res.statusText }));
682
+ logger.error(`Failed to update video: ${body.error || res.statusText}`);
683
+ process.exit(1);
684
+ }
685
+ const data = await res.json();
686
+ console.log(data.video.id);
687
+ process.exit(0);
688
+ });
609
689
  // ── auth commands ──────────────────────────────────────────────────
610
690
  const authCmd = program
611
691
  .command('auth')
@@ -34,10 +34,19 @@ export class ClipCaptureLoop {
34
34
  this.page = opts.page;
35
35
  this.framesDir = opts.framesDir;
36
36
  this.jpegQuality = opts.jpegQuality ?? 80;
37
- const targetFps = Math.max(1, Math.min(30, opts.targetFps ?? (process.platform === 'linux' ? 8 : 15)));
37
+ // Linux default is 8 fps to stay safe on 2 vCPU CI runners. Cloud runners
38
+ // (AUTOKAP_CLOUD_RUNNER=1, set by the Fly.io image and the `--cloud` CLI
39
+ // flag) get the same 15 fps default as macOS/Windows since they have
40
+ // ≥ 4 vCPU. Callers can still override via opts.targetFps.
41
+ const isCloudRunner = process.env.AUTOKAP_CLOUD_RUNNER === '1';
42
+ const linuxDefault = isCloudRunner ? 15 : 8;
43
+ const platformDefault = process.platform === 'linux' ? linuxDefault : 15;
44
+ const targetFps = Math.max(1, Math.min(30, opts.targetFps ?? platformDefault));
38
45
  this.targetFps = targetFps;
39
46
  this.targetFrameIntervalMs = 1000 / targetFps;
40
- this.minRestMs = Math.max(0, Math.min(250, opts.minRestMs ?? (process.platform === 'linux' ? 50 : 16)));
47
+ const linuxMinRest = isCloudRunner ? 16 : 50;
48
+ const platformMinRest = process.platform === 'linux' ? linuxMinRest : 16;
49
+ this.minRestMs = Math.max(0, Math.min(250, opts.minRestMs ?? platformMinRest));
41
50
  }
42
51
  async start() {
43
52
  this.cdp = await this.page.context().newCDPSession(this.page);
@@ -1,4 +1,5 @@
1
1
  import type { Page } from 'playwright';
2
+ export declare const CAPTURE_HIDE_STYLE_ID = "autokap-capture-hide-style";
2
3
  export declare function getCaptureHideCSS(): string;
3
4
  export declare function ensureCaptureHideStyles(page: Page): Promise<void>;
4
5
  export declare function dismissCookiesAndWidgets(page: Page): Promise<{