demo-dev 0.0.1-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +174 -0
  2. package/bin/demo-cli.js +26 -0
  3. package/bin/demo-dev.js +26 -0
  4. package/demo.dev.config.example.json +20 -0
  5. package/dist/index.d.ts +392 -0
  6. package/dist/index.js +2116 -0
  7. package/package.json +76 -0
  8. package/skills/demo-dev/SKILL.md +153 -0
  9. package/skills/demo-dev/references/configuration.md +102 -0
  10. package/skills/demo-dev/references/recipes.md +83 -0
  11. package/src/ai/provider.ts +254 -0
  12. package/src/auth/bootstrap.ts +72 -0
  13. package/src/browser/session.ts +43 -0
  14. package/src/capture/continuous-capture.ts +739 -0
  15. package/src/cli.ts +337 -0
  16. package/src/config/project.ts +183 -0
  17. package/src/github/comment.ts +134 -0
  18. package/src/index.ts +10 -0
  19. package/src/lib/data-uri.ts +21 -0
  20. package/src/lib/fs.ts +7 -0
  21. package/src/lib/git.ts +59 -0
  22. package/src/lib/media.ts +23 -0
  23. package/src/orchestrate.ts +166 -0
  24. package/src/planner/heuristic.ts +180 -0
  25. package/src/planner/index.ts +26 -0
  26. package/src/planner/llm.ts +85 -0
  27. package/src/planner/openai.ts +77 -0
  28. package/src/planner/prompt.ts +331 -0
  29. package/src/planner/refine.ts +155 -0
  30. package/src/planner/schema.ts +62 -0
  31. package/src/presentation/polish.ts +84 -0
  32. package/src/probe/page-probe.ts +225 -0
  33. package/src/render/browser-frame.ts +176 -0
  34. package/src/render/ffmpeg-compose.ts +779 -0
  35. package/src/render/visual-plan.ts +422 -0
  36. package/src/setup/doctor.ts +158 -0
  37. package/src/setup/init.ts +90 -0
  38. package/src/types.ts +105 -0
  39. package/src/voice/script.ts +42 -0
  40. package/src/voice/tts.ts +286 -0
  41. package/tsconfig.json +16 -0
@@ -0,0 +1,779 @@
1
+ /**
2
+ * FFmpeg-based video composition pipeline.
3
+ *
4
+ * Takes the raw continuous recording + visual plan and produces the final
5
+ * polished mp4 with:
6
+ * - Smooth zoom/pan following interaction targets
7
+ * - Variable playback speed (compress loading, normal for interactions)
8
+ * - Narration audio overlay with proper sync
9
+ * - Background music with ducking
10
+ * - Intro/outro title cards
11
+ */
12
+
13
+ import { execFile } from "node:child_process";
14
+ import { access, writeFile } from "node:fs/promises";
15
+ import { join, dirname, resolve } from "node:path";
16
+ import { mkdir } from "node:fs/promises";
17
+ import { promisify } from "node:util";
18
+ import type { CaptureInteraction, ContinuousCaptureResult, SceneMarker } from "../capture/continuous-capture.js";
19
+ import type {
20
+ VisualPlanResult,
21
+ ZoomKeyframe,
22
+ SpeedSegment,
23
+ } from "./visual-plan.js";
24
+ import type { VoiceLine } from "../types.js";
25
+ import { applyBrowserFrame, type BrowserFrameOptions } from "./browser-frame.js";
26
+
27
+ const execFileAsync = promisify(execFile);
28
+
29
+ // ---------------------------------------------------------------------------
30
+ // Types
31
+ // ---------------------------------------------------------------------------
32
+
33
+ export type VideoQuality = "draft" | "standard" | "high";
34
+
35
+ const QUALITY_PRESETS: Record<VideoQuality, { crf: number; preset: string; fps: number }> = {
36
+ draft: { crf: 28, preset: "ultrafast", fps: 24 },
37
+ standard: { crf: 18, preset: "fast", fps: 30 },
38
+ high: { crf: 12, preset: "slow", fps: 60 },
39
+ };
40
+
41
+ export interface ComposeOptions {
42
+ /** Path to the continuous WebM recording. */
43
+ videoPath: string;
44
+ /** Output mp4 path. */
45
+ outputPath: string;
46
+ /** Visual plan with zoom keyframes and speed segments. */
47
+ visualPlan: VisualPlanResult;
48
+ /** Capture result for viewport info and scene markers. */
49
+ capture: ContinuousCaptureResult;
50
+ /** Voice lines (narration audio files). */
51
+ voiceLines?: VoiceLine[];
52
+ /** Background music. */
53
+ bgm?: {
54
+ path: string;
55
+ volume?: number;
56
+ ducking?: number;
57
+ };
58
+ /** Video title for intro card. */
59
+ title?: string;
60
+ /** Output resolution. */
61
+ width?: number;
62
+ height?: number;
63
+ fps?: number;
64
+ /** Video quality: "draft" (fast, low), "standard" (default), "high" (slow, best). */
65
+ quality?: VideoQuality;
66
+ /** Wrap video in a Screen Studio–style browser frame with gradient background. */
67
+ frame?: BrowserFrameOptions | boolean;
68
+ }
69
+
70
+ // ---------------------------------------------------------------------------
71
+ // FFmpeg filter expression builders
72
+ // ---------------------------------------------------------------------------
73
+
74
+ /**
75
+ * Build a zoompan filter expression from zoom keyframes.
76
+ *
77
+ * The zoompan filter evaluates `z`, `x`, `y` expressions per frame.
78
+ * We encode keyframes as piecewise-linear interpolation in the expression.
79
+ */
80
+ const buildZoompanExpression = (
81
+ keyframes: ZoomKeyframe[],
82
+ fps: number,
83
+ totalDurationMs: number,
84
+ inputWidth: number,
85
+ inputHeight: number,
86
+ outputWidth: number,
87
+ outputHeight: number,
88
+ ): string => {
89
+ if (keyframes.length === 0) {
90
+ return `zoompan=z=1:x=0:y=0:d=1:s=${outputWidth}x${outputHeight}:fps=${fps}`;
91
+ }
92
+
93
+ // Convert keyframes to frame numbers
94
+ const frameKeyframes = keyframes.map((kf) => ({
95
+ frame: Math.round((kf.atMs / 1000) * fps),
96
+ scale: kf.scale,
97
+ centerX: kf.centerX,
98
+ centerY: kf.centerY,
99
+ transitionFrames: Math.round((kf.transitionMs / 1000) * fps),
100
+ }));
101
+
102
+ // Build piecewise zoom expression
103
+ // FFmpeg expressions: if(cond,val_true,val_false), between(val,min,max), etc.
104
+ const zParts: string[] = [];
105
+ const xParts: string[] = [];
106
+ const yParts: string[] = [];
107
+
108
+ for (let i = 0; i < frameKeyframes.length; i++) {
109
+ const kf = frameKeyframes[i];
110
+ const next = frameKeyframes[i + 1];
111
+
112
+ if (!next) {
113
+ // Last keyframe: hold this zoom
114
+ zParts.push(`if(gte(on,${kf.frame}),${kf.scale.toFixed(3)}`);
115
+ xParts.push(
116
+ `if(gte(on,${kf.frame}),${buildPanX(kf.centerX, kf.scale, inputWidth, outputWidth)}`,
117
+ );
118
+ yParts.push(
119
+ `if(gte(on,${kf.frame}),${buildPanY(kf.centerY, kf.scale, inputHeight, outputHeight)}`,
120
+ );
121
+ } else {
122
+ const transStart = next.frame - next.transitionFrames;
123
+ const transEnd = next.frame;
124
+
125
+ // Hold at current zoom until transition starts
126
+ zParts.push(
127
+ `if(between(on,${kf.frame},${transStart}),${kf.scale.toFixed(3)}`,
128
+ );
129
+ xParts.push(
130
+ `if(between(on,${kf.frame},${transStart}),${buildPanX(kf.centerX, kf.scale, inputWidth, outputWidth)}`,
131
+ );
132
+ yParts.push(
133
+ `if(between(on,${kf.frame},${transStart}),${buildPanY(kf.centerY, kf.scale, inputHeight, outputHeight)}`,
134
+ );
135
+
136
+ // Interpolate during transition
137
+ if (next.transitionFrames > 0) {
138
+ const progress = `((on-${transStart})/${next.transitionFrames})`;
139
+ const zInterp = `${kf.scale.toFixed(3)}+${progress}*(${next.scale.toFixed(3)}-${kf.scale.toFixed(3)})`;
140
+ zParts.push(
141
+ `if(between(on,${transStart},${transEnd}),${zInterp}`,
142
+ );
143
+
144
+ // Interpolate pan position
145
+ const fromX = buildPanX(kf.centerX, kf.scale, inputWidth, outputWidth);
146
+ const toX = buildPanX(next.centerX, next.scale, inputWidth, outputWidth);
147
+ xParts.push(
148
+ `if(between(on,${transStart},${transEnd}),${fromX}+${progress}*(${toX}-(${fromX}))`,
149
+ );
150
+
151
+ const fromY = buildPanY(kf.centerY, kf.scale, inputHeight, outputHeight);
152
+ const toY = buildPanY(next.centerY, next.scale, inputHeight, outputHeight);
153
+ yParts.push(
154
+ `if(between(on,${transStart},${transEnd}),${fromY}+${progress}*(${toY}-(${fromY}))`,
155
+ );
156
+ }
157
+ }
158
+ }
159
+
160
+ // Close all if() expressions — default to scale 1 center
161
+ const defaultX = buildPanX(0.5, 1, inputWidth, outputWidth);
162
+ const defaultY = buildPanY(0.5, 1, inputHeight, outputHeight);
163
+ const closers = ")".repeat(zParts.length);
164
+ const zExpr = zParts.join(",") + `,1${closers}`;
165
+ const xExpr = xParts.join(",") + `,${defaultX}${closers}`;
166
+ const yExpr = yParts.join(",") + `,${defaultY}${closers}`;
167
+
168
+ return `zoompan=z='${zExpr}':x='${xExpr}':y='${yExpr}':d=1:s=${outputWidth}x${outputHeight}:fps=${fps}`;
169
+ };
170
+
171
+ const buildPanX = (
172
+ centerX: number,
173
+ scale: number,
174
+ inputWidth: number,
175
+ outputWidth: number,
176
+ ): string => {
177
+ // Pan so that centerX is at the center of the output
178
+ const cropWidth = inputWidth / scale;
179
+ const panX = centerX * inputWidth - cropWidth / 2;
180
+ return Math.max(0, Math.min(inputWidth - cropWidth, panX)).toFixed(1);
181
+ };
182
+
183
+ const buildPanY = (
184
+ centerY: number,
185
+ scale: number,
186
+ inputHeight: number,
187
+ outputHeight: number,
188
+ ): string => {
189
+ const cropHeight = inputHeight / scale;
190
+ const panY = centerY * inputHeight - cropHeight / 2;
191
+ return Math.max(0, Math.min(inputHeight - cropHeight, panY)).toFixed(1);
192
+ };
193
+
194
+ // ---------------------------------------------------------------------------
195
+ // Title card generation via FFmpeg
196
+ // ---------------------------------------------------------------------------
197
+
198
+ const hasDrawtext = async (): Promise<boolean> => {
199
+ try {
200
+ const { stdout } = await execFileAsync("ffmpeg", ["-filters"], { maxBuffer: 1024 * 1024 });
201
+ return stdout.includes("drawtext");
202
+ } catch {
203
+ return false;
204
+ }
205
+ };
206
+
207
+ const generateTitleCard = async (
208
+ title: string,
209
+ outputPath: string,
210
+ width: number,
211
+ height: number,
212
+ durationSec: number,
213
+ fps: number,
214
+ ): Promise<void> => {
215
+ await mkdir(dirname(outputPath), { recursive: true });
216
+
217
+ const canDrawText = await hasDrawtext();
218
+
219
+ if (canDrawText) {
220
+ await execFileAsync("ffmpeg", [
221
+ "-f", "lavfi",
222
+ "-i", `color=c=0x171410:s=${width}x${height}:d=${durationSec}:r=${fps}`,
223
+ "-vf",
224
+ `drawtext=text='${escapeFFmpegText(title)}':` +
225
+ `fontsize=48:fontcolor=white:` +
226
+ `x=(w-text_w)/2:y=(h-text_h)/2`,
227
+ "-c:v", "libx264",
228
+ "-pix_fmt", "yuv420p",
229
+ "-y",
230
+ outputPath,
231
+ ]);
232
+ } else {
233
+ // Fallback: plain dark card (no text overlay)
234
+ await execFileAsync("ffmpeg", [
235
+ "-f", "lavfi",
236
+ "-i", `color=c=0x171410:s=${width}x${height}:d=${durationSec}:r=${fps}`,
237
+ "-c:v", "libx264",
238
+ "-pix_fmt", "yuv420p",
239
+ "-y",
240
+ outputPath,
241
+ ]);
242
+ }
243
+ };
244
+
245
+ const escapeFFmpegText = (text: string): string =>
246
+ text.replace(/'/g, "'\\''").replace(/:/g, "\\:").replace(/\\/g, "\\\\");
247
+
248
+ // ---------------------------------------------------------------------------
249
+ // Audio mixing
250
+ // ---------------------------------------------------------------------------
251
+
252
+ interface AudioTrack {
253
+ path: string;
254
+ /** Start time in the output video timeline (seconds). */
255
+ startSec: number;
256
+ }
257
+
258
+ const buildNarrationTracks = (
259
+ voiceLines: VoiceLine[],
260
+ sceneMarkers: SceneMarker[],
261
+ speedSegments: SpeedSegment[],
262
+ interactions: CaptureInteraction[],
263
+ ): AudioTrack[] => {
264
+ const tracks: AudioTrack[] = [];
265
+ const markerBySceneId = new Map(sceneMarkers.map((m) => [m.sceneId, m]));
266
+
267
+ for (const line of voiceLines) {
268
+ if (!line.audioPath) continue;
269
+ const marker = markerBySceneId.get(line.sceneId);
270
+ if (!marker) continue;
271
+
272
+ // Map scene start in raw recording → output timeline (after speed ramps)
273
+ const outputTimeSec = rawTimeToOutputTime(marker.startMs, speedSegments) / 1000;
274
+
275
+ // Find the first interactive event in this scene to anchor narration
276
+ // after content has loaded rather than at the raw scene start
277
+ const sceneInteractions = interactions.filter(
278
+ (i) => i.sceneId === marker.sceneId && i.type !== "scene-start" && i.type !== "scene-end",
279
+ );
280
+ const firstInteraction = sceneInteractions[0];
281
+ let anchorSec = outputTimeSec;
282
+ if (firstInteraction) {
283
+ // Start narration 0.3s after the first meaningful interaction in the scene
284
+ const interactionOutputSec = rawTimeToOutputTime(firstInteraction.atMs, speedSegments) / 1000;
285
+ // But don't start later than 3s into the scene
286
+ anchorSec = Math.min(interactionOutputSec + 0.3, outputTimeSec + 3.0);
287
+ // And never before the scene start
288
+ anchorSec = Math.max(anchorSec, outputTimeSec);
289
+ }
290
+
291
+ // Ensure this track doesn't overlap with the previous one
292
+ const prev = tracks[tracks.length - 1];
293
+ if (prev) {
294
+ const prevVoice = voiceLines.find((v) => v.audioPath === prev.path);
295
+ const prevEndSec = prev.startSec + (prevVoice?.audioDurationMs ?? 0) / 1000;
296
+ if (anchorSec < prevEndSec) {
297
+ // Push this track to start 0.3s after the previous one ends
298
+ anchorSec = prevEndSec + 0.3;
299
+ }
300
+ }
301
+
302
+ tracks.push({
303
+ path: line.audioPath,
304
+ startSec: anchorSec,
305
+ });
306
+ }
307
+
308
+ return tracks;
309
+ };
310
+
311
+ /**
312
+ * Map a timestamp in the raw recording to the output timeline,
313
+ * accounting for speed ramps.
314
+ */
315
+ const rawTimeToOutputTime = (
316
+ rawMs: number,
317
+ segments: SpeedSegment[],
318
+ ): number => {
319
+ let outputMs = 0;
320
+
321
+ for (const seg of segments) {
322
+ if (rawMs <= seg.startMs) break;
323
+
324
+ const segStart = seg.startMs;
325
+ const segEnd = Math.min(seg.endMs, rawMs);
326
+ const segDuration = segEnd - segStart;
327
+
328
+ outputMs += segDuration / seg.speed;
329
+
330
+ if (rawMs <= seg.endMs) break;
331
+ }
332
+
333
+ return outputMs;
334
+ };
335
+
336
+ // ---------------------------------------------------------------------------
337
+ // Main composition
338
+ // ---------------------------------------------------------------------------
339
+
340
+ export const composeVideo = async (options: ComposeOptions): Promise<string> => {
341
+ const q = QUALITY_PRESETS[options.quality ?? "standard"];
342
+ const width = options.width ?? options.capture.viewport.width;
343
+ const height = options.height ?? options.capture.viewport.height;
344
+ const fps = options.fps ?? q.fps;
345
+
346
+ await mkdir(dirname(options.outputPath), { recursive: true });
347
+
348
+ const tempDir = join(dirname(options.outputPath), ".ffmpeg-temp");
349
+ await mkdir(tempDir, { recursive: true });
350
+
351
+ // Step 1: Apply speed ramps to the base video
352
+ const speedAdjustedPath = join(tempDir, "speed-adjusted.mp4");
353
+ await applySpeedRamps(
354
+ options.videoPath,
355
+ speedAdjustedPath,
356
+ options.visualPlan.speedSegments,
357
+ fps,
358
+ width,
359
+ height,
360
+ q.crf,
361
+ q.preset,
362
+ );
363
+
364
+ // Step 2: Zoom is now handled at capture time via CSS transforms (Screen Studio style)
365
+ // No FFmpeg zoom step needed — the browser does smooth 60fps zoom during recording.
366
+
367
+ // Step 3: Optional browser frame (Screen Studio style)
368
+ let mainVideoPath = speedAdjustedPath;
369
+ let finalWidth = width;
370
+ let finalHeight = height;
371
+
372
+ if (options.frame) {
373
+ const frameOpts: BrowserFrameOptions = options.frame === true ? {} : options.frame;
374
+ // Infer display URL from the first scene marker
375
+ if (!frameOpts.displayUrl && options.capture.sceneMarkers[0]) {
376
+ try {
377
+ frameOpts.displayUrl = new URL(options.capture.sceneMarkers[0].url).host;
378
+ } catch { /* keep default */ }
379
+ }
380
+ const framedPath = join(tempDir, "framed.mp4");
381
+ const { outputWidth, outputHeight } = await applyBrowserFrame(
382
+ speedAdjustedPath, framedPath, width, height, frameOpts, q.crf, q.preset,
383
+ );
384
+ mainVideoPath = framedPath;
385
+ finalWidth = outputWidth;
386
+ finalHeight = outputHeight;
387
+ }
388
+
389
+ // Step 3: Generate intro if title provided
390
+ let introPath: string | undefined;
391
+ if (options.title) {
392
+ introPath = join(tempDir, "intro.mp4");
393
+ await generateTitleCard(options.title, introPath, finalWidth, finalHeight, 2.0, fps);
394
+ }
395
+
396
+ // Step 4: Concatenate intro + main
397
+ const concatPath = introPath
398
+ ? join(tempDir, "concatenated.mp4")
399
+ : mainVideoPath;
400
+
401
+ if (introPath) {
402
+ await concatVideos([introPath, mainVideoPath], concatPath);
403
+ }
404
+
405
+ // Step 5: Mix audio (narration + BGM)
406
+ if (options.voiceLines?.length || options.bgm) {
407
+ await mixAudio(
408
+ concatPath,
409
+ options.outputPath,
410
+ options.voiceLines ?? [],
411
+ options.capture.sceneMarkers,
412
+ options.visualPlan.speedSegments,
413
+ options.capture.interactions,
414
+ options.bgm,
415
+ introPath ? 2.0 : 0,
416
+ );
417
+ } else {
418
+ // No audio to mix — just copy
419
+ await execFileAsync("ffmpeg", [
420
+ "-i", concatPath,
421
+ "-c", "copy",
422
+ "-y",
423
+ options.outputPath,
424
+ ]);
425
+ }
426
+
427
+ return options.outputPath;
428
+ };
429
+
430
+ // ---------------------------------------------------------------------------
431
+ // Pipeline steps
432
+ // ---------------------------------------------------------------------------
433
+
434
+ const applySpeedRamps = async (
435
+ inputPath: string,
436
+ outputPath: string,
437
+ segments: SpeedSegment[],
438
+ fps: number,
439
+ width: number,
440
+ height: number,
441
+ crf: number,
442
+ preset: string,
443
+ ): Promise<void> => {
444
+ const allNormal = segments.every((s) => s.speed === 1.0);
445
+
446
+ if (allNormal || segments.length <= 1) {
447
+ await execFileAsync("ffmpeg", [
448
+ "-i", inputPath,
449
+ "-vf", `scale=${width}:${height}:flags=lanczos,fps=${fps}`,
450
+ "-c:v", "libx264",
451
+ "-preset", preset,
452
+ "-crf", String(crf),
453
+ "-pix_fmt", "yuv420p",
454
+ "-an",
455
+ "-y",
456
+ outputPath,
457
+ ]);
458
+ return;
459
+ }
460
+
461
+ const tempDir = dirname(outputPath);
462
+ const segmentPaths: string[] = [];
463
+
464
+ for (let i = 0; i < segments.length; i++) {
465
+ const seg = segments[i];
466
+ const segPath = join(tempDir, `seg-${i}.mp4`);
467
+ const startSec = (seg.startMs / 1000).toFixed(3);
468
+ const durationSec = ((seg.endMs - seg.startMs) / 1000).toFixed(3);
469
+ const ptsExpr = seg.speed === 1.0 ? "PTS-STARTPTS" : `(PTS-STARTPTS)/${seg.speed.toFixed(2)}`;
470
+
471
+ await execFileAsync("ffmpeg", [
472
+ "-ss", startSec,
473
+ "-t", durationSec,
474
+ "-i", inputPath,
475
+ "-vf", `scale=${width}:${height}:flags=lanczos,fps=${fps},setpts=${ptsExpr}`,
476
+ "-c:v", "libx264",
477
+ "-preset", preset,
478
+ "-crf", String(crf),
479
+ "-pix_fmt", "yuv420p",
480
+ "-an",
481
+ "-y",
482
+ segPath,
483
+ ]);
484
+ segmentPaths.push(segPath);
485
+ }
486
+
487
+ // Concat all segments (use absolute paths for FFmpeg -safe 0)
488
+ const listPath = join(tempDir, "speed-concat.txt");
489
+ const listContent = segmentPaths.map((p) => `file '${resolve(p)}'`).join("\n");
490
+ await writeFile(listPath, listContent, "utf-8");
491
+
492
+ await execFileAsync("ffmpeg", [
493
+ "-f", "concat",
494
+ "-safe", "0",
495
+ "-i", listPath,
496
+ "-c", "copy",
497
+ "-y",
498
+ outputPath,
499
+ ]);
500
+ };
501
+
502
+ /**
503
+ * Apply subtle visual polish to the speed-adjusted video.
504
+ * For continuous screen recordings, the natural cursor/scroll motion
505
+ * provides enough visual interest — we just ensure clean output.
506
+ */
507
+ /**
508
+ * Apply smooth zoom keyframes using FFmpeg crop filter with time-based
509
+ * expressions. Uses cosine easing for smooth transitions (no hard cuts).
510
+ *
511
+ * Each frame evaluates a piecewise crop expression that smoothly
512
+ * interpolates scale, centerX, centerY between keyframes.
513
+ */
514
+ const applyVideoZoom = async (
515
+ inputPath: string,
516
+ outputPath: string,
517
+ keyframes: ZoomKeyframe[],
518
+ speedSegments: SpeedSegment[],
519
+ fps: number,
520
+ width: number,
521
+ height: number,
522
+ crf: number,
523
+ preset: string,
524
+ ): Promise<void> => {
525
+ // Adjust keyframe timings for speed ramps
526
+ const adjusted = keyframes
527
+ .map((kf) => ({
528
+ scale: kf.scale,
529
+ centerX: kf.centerX,
530
+ centerY: kf.centerY,
531
+ tSec: rawTimeToOutputTime(kf.atMs, speedSegments) / 1000,
532
+ transSec: kf.transitionMs / 1000,
533
+ }))
534
+ .sort((a, b) => a.tSec - b.tSec);
535
+
536
+ if (adjusted.length === 0 || adjusted.every((k) => k.scale <= 1.01)) {
537
+ await execFileAsync("ffmpeg", ["-i", inputPath, "-c", "copy", "-y", outputPath]);
538
+ return;
539
+ }
540
+
541
+ // Build a piecewise expression for scale, centerX, centerY using t (time)
542
+ // Smooth easing: 0.5-0.5*cos(progress*PI) gives cosine ease-in-out
543
+ const buildExpr = (getter: (kf: typeof adjusted[0]) => number): string => {
544
+ const parts: string[] = [];
545
+
546
+ for (let i = 0; i < adjusted.length; i++) {
547
+ const kf = adjusted[i];
548
+ const next = adjusted[i + 1];
549
+ const val = getter(kf).toFixed(4);
550
+
551
+ if (!next) {
552
+ // Last keyframe: hold
553
+ parts.push(`if(gte(t,${kf.tSec.toFixed(3)}),${val}`);
554
+ } else {
555
+ const holdEnd = next.tSec - next.transSec;
556
+ const transStart = Math.max(kf.tSec, holdEnd);
557
+ const transEnd = next.tSec;
558
+
559
+ // Hold at current value
560
+ parts.push(`if(between(t,${kf.tSec.toFixed(3)},${transStart.toFixed(3)}),${val}`);
561
+
562
+ // Smooth transition to next value
563
+ if (transEnd > transStart) {
564
+ const nextVal = getter(next).toFixed(4);
565
+ // p = smoothstep: 3*p^2 - 2*p^3 where p = (t-start)/(end-start)
566
+ const dur = (transEnd - transStart).toFixed(3);
567
+ const raw = `clip((t-${transStart.toFixed(3)})/${dur},0,1)`;
568
+ // smoothstep(p) = p*p*(3-2*p)
569
+ const p = `(${raw})*(${raw})*(3-2*(${raw}))`;
570
+ parts.push(
571
+ `if(between(t,${transStart.toFixed(3)},${transEnd.toFixed(3)}),${val}+${p}*(${nextVal}-${val})`,
572
+ );
573
+ }
574
+ }
575
+ }
576
+
577
+ const closers = ")".repeat(parts.length);
578
+ const defaultVal = getter(adjusted[0]).toFixed(4);
579
+ return parts.join(",") + `,${defaultVal}${closers}`;
580
+ };
581
+
582
+ const scaleExpr = buildExpr((k) => k.scale);
583
+ const cxExpr = buildExpr((k) => k.centerX);
584
+ const cyExpr = buildExpr((k) => k.centerY);
585
+
586
+ // crop filter: crop to (w/scale, h/scale) centered at (cx*w, cy*h), then scale back
587
+ // Using intermediate variables via sendcmd is too complex; instead embed directly
588
+ const cropW = `${width}/(${scaleExpr})`;
589
+ const cropH = `${height}/(${scaleExpr})`;
590
+ const cropX = `clip((${cxExpr})*${width}-${width}/(${scaleExpr})/2,0,${width}-${width}/(${scaleExpr}))`;
591
+ const cropY = `clip((${cyExpr})*${height}-${height}/(${scaleExpr})/2,0,${height}-${height}/(${scaleExpr}))`;
592
+
593
+ const filter = `crop=w='${cropW}':h='${cropH}':x='${cropX}':y='${cropY}':exact=1,scale=${width}:${height}:flags=lanczos`;
594
+
595
+ await execFileAsync("ffmpeg", [
596
+ "-i", inputPath,
597
+ "-vf", filter,
598
+ "-c:v", "libx264",
599
+ "-preset", preset,
600
+ "-crf", String(crf),
601
+ "-pix_fmt", "yuv420p",
602
+ "-an",
603
+ "-y",
604
+ outputPath,
605
+ ]);
606
+ };
607
+
608
+ const applyVisualPolish = async (
609
+ inputPath: string,
610
+ outputPath: string,
611
+ fps: number,
612
+ width: number,
613
+ height: number,
614
+ ): Promise<void> => {
615
+ // Ensure consistent output format; no zoompan (it's designed for images, not video)
616
+ await execFileAsync("ffmpeg", [
617
+ "-i", inputPath,
618
+ "-vf", `scale=${width}:${height}:flags=lanczos,fps=${fps}`,
619
+ "-c:v", "libx264",
620
+ "-preset", "fast",
621
+ "-crf", "18",
622
+ "-pix_fmt", "yuv420p",
623
+ "-an",
624
+ "-y",
625
+ outputPath,
626
+ ]);
627
+ };
628
+
629
+ const applyZoomPan = async (
630
+ inputPath: string,
631
+ outputPath: string,
632
+ keyframes: ZoomKeyframe[],
633
+ speedSegments: SpeedSegment[],
634
+ fps: number,
635
+ inputWidth: number,
636
+ inputHeight: number,
637
+ outputWidth: number,
638
+ outputHeight: number,
639
+ totalDurationMs: number,
640
+ ): Promise<void> => {
641
+ // Adjust keyframe timings for speed ramps
642
+ const adjustedKeyframes = keyframes.map((kf) => ({
643
+ ...kf,
644
+ atMs: rawTimeToOutputTime(kf.atMs, speedSegments),
645
+ transitionMs: kf.transitionMs,
646
+ }));
647
+
648
+ // For zoom to work well, we upscale the input first then crop
649
+ const upscaleWidth = outputWidth * 2;
650
+ const upscaleHeight = outputHeight * 2;
651
+
652
+ const zoompanFilter = buildZoompanExpression(
653
+ adjustedKeyframes,
654
+ fps,
655
+ totalDurationMs,
656
+ upscaleWidth,
657
+ upscaleHeight,
658
+ outputWidth,
659
+ outputHeight,
660
+ );
661
+
662
+ await execFileAsync("ffmpeg", [
663
+ "-i", inputPath,
664
+ "-vf",
665
+ `scale=${upscaleWidth}:${upscaleHeight}:flags=lanczos,${zoompanFilter}`,
666
+ "-c:v", "libx264",
667
+ "-preset", "fast",
668
+ "-crf", "18",
669
+ "-pix_fmt", "yuv420p",
670
+ "-an",
671
+ "-y",
672
+ outputPath,
673
+ ]);
674
+ };
675
+
676
+ const concatVideos = async (
677
+ inputs: string[],
678
+ outputPath: string,
679
+ ): Promise<void> => {
680
+ if (inputs.length === 1) {
681
+ await execFileAsync("ffmpeg", ["-i", inputs[0], "-c", "copy", "-y", outputPath]);
682
+ return;
683
+ }
684
+
685
+ // Use filter_complex concat for reliable merging (handles different codecs/sizes)
686
+ const inputArgs = inputs.flatMap((p) => ["-i", resolve(p)]);
687
+ const filterParts = inputs.map((_, i) => `[${i}:v]`).join("");
688
+ const filter = `${filterParts}concat=n=${inputs.length}:v=1:a=0[outv]`;
689
+
690
+ await execFileAsync("ffmpeg", [
691
+ ...inputArgs,
692
+ "-filter_complex", filter,
693
+ "-map", "[outv]",
694
+ "-c:v", "libx264",
695
+ "-preset", "fast",
696
+ "-crf", "18",
697
+ "-pix_fmt", "yuv420p",
698
+ "-y",
699
+ outputPath,
700
+ ]);
701
+ };
702
+
703
+ const mixAudio = async (
704
+ videoPath: string,
705
+ outputPath: string,
706
+ voiceLines: VoiceLine[],
707
+ sceneMarkers: SceneMarker[],
708
+ speedSegments: SpeedSegment[],
709
+ interactions: CaptureInteraction[],
710
+ bgm: ComposeOptions["bgm"],
711
+ introOffsetSec: number,
712
+ ): Promise<void> => {
713
+ const narrationTracks = buildNarrationTracks(voiceLines, sceneMarkers, speedSegments, interactions);
714
+
715
+ // Offset narration by intro duration
716
+ const adjustedTracks = narrationTracks.map((t) => ({
717
+ ...t,
718
+ startSec: t.startSec + introOffsetSec,
719
+ }));
720
+
721
+ if (adjustedTracks.length === 0 && !bgm) {
722
+ await execFileAsync("ffmpeg", ["-i", videoPath, "-c", "copy", "-y", outputPath]);
723
+ return;
724
+ }
725
+
726
+ // Strategy: use a silent audio base matching the video duration, then overlay
727
+ // each narration track at the correct time. This avoids amix quirks.
728
+ const inputs: string[] = ["-i", videoPath];
729
+ const filterParts: string[] = [];
730
+ let streamIdx = 1;
731
+
732
+ // Generate a silent base audio from the video duration
733
+ // (anullsrc generates silence; we trim it to video length via -shortest)
734
+ filterParts.push(`anullsrc=r=44100:cl=stereo[silence]`);
735
+
736
+ // Add narration audio inputs with precise adelay
737
+ const overlayLabels: string[] = ["[silence]"];
738
+ for (const track of adjustedTracks) {
739
+ inputs.push("-i", track.path);
740
+ const delayMs = Math.round(track.startSec * 1000);
741
+ // adelay: delay in ms for left|right channels; pad to fill with silence after
742
+ filterParts.push(
743
+ `[${streamIdx}]aresample=44100,adelay=${delayMs}|${delayMs}:all=1,apad[narr${streamIdx}]`,
744
+ );
745
+ overlayLabels.push(`[narr${streamIdx}]`);
746
+ streamIdx++;
747
+ }
748
+
749
+ // Add BGM
750
+ if (bgm) {
751
+ inputs.push("-i", bgm.path);
752
+ const vol = bgm.volume ?? 0.16;
753
+ filterParts.push(
754
+ `[${streamIdx}]aresample=44100,volume=${vol.toFixed(2)},aloop=loop=-1:size=2e+09,apad[bgm]`,
755
+ );
756
+ overlayLabels.push("[bgm]");
757
+ streamIdx++;
758
+ }
759
+
760
+ // Mix: amix with normalize=0 prevents volume dropping with fewer inputs
761
+ filterParts.push(
762
+ `${overlayLabels.join("")}amix=inputs=${overlayLabels.length}:duration=first:normalize=0[aout]`,
763
+ );
764
+
765
+ const filterComplex = filterParts.join(";");
766
+
767
+ await execFileAsync("ffmpeg", [
768
+ ...inputs,
769
+ "-filter_complex", filterComplex,
770
+ "-map", "0:v",
771
+ "-map", "[aout]",
772
+ "-c:v", "copy",
773
+ "-c:a", "aac",
774
+ "-b:a", "192k",
775
+ "-shortest",
776
+ "-y",
777
+ outputPath,
778
+ ]);
779
+ };