voxflow 1.17.2 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -413,6 +413,23 @@ async function cardRender(opts) {
413
413
  const clipPaths = [];
414
414
  let totalQuota = 0;
415
415
 
416
+ // Timeline tracking: each clip we render has a known duration; we accumulate
417
+ // them so the final timeline.json maps absolute output-mp4 offsets to cards.
418
+ // Downstream tools (e.g. `voxflow card subtitle`) use this to allocate
419
+ // per-sentence captions without re-running silencedetect.
420
+ let cumulativeMs = 0;
421
+ const timeline = {
422
+ title,
423
+ ratio,
424
+ language: deck.meta?.language || null,
425
+ voice: noAudio ? null : voice,
426
+ speed: noAudio ? null : speed,
427
+ intro: null,
428
+ cards: [],
429
+ outro: null,
430
+ totalDurationMs: 0,
431
+ };
432
+
416
433
  try {
417
434
  // ── Intro card (only if drawtext available — otherwise it's just a blank dark frame) ──
418
435
  if (!noIntro && title && hasDrawtext) {
@@ -425,6 +442,9 @@ async function cardRender(opts) {
425
442
  isFirst: true, isLast: false, hasDrawtext, cjkFontPath,
426
443
  });
427
444
  clipPaths.push(introPath);
445
+ const introMs = Math.round(introDuration * 1000);
446
+ timeline.intro = { start: cumulativeMs, end: cumulativeMs + introMs };
447
+ cumulativeMs += introMs;
428
448
  }
429
449
 
430
450
  // ── Per-card clips ──────────────────────────────────────────────────────
@@ -467,6 +487,16 @@ async function cardRender(opts) {
467
487
  subtitle: noSubtitle ? null : (card.narration || card.title || null),
468
488
  });
469
489
  clipPaths.push(clipOut);
490
+ const cardStart = cumulativeMs;
491
+ const cardEnd = cumulativeMs + durationMs;
492
+ timeline.cards.push({
493
+ file: card.file,
494
+ title: card.title || null,
495
+ narration: card.narration || null,
496
+ start: cardStart,
497
+ end: cardEnd,
498
+ });
499
+ cumulativeMs = cardEnd;
470
500
  }
471
501
 
472
502
  // ── Outro card (only if drawtext available) ───────────────────────────
@@ -482,7 +512,11 @@ async function cardRender(opts) {
482
512
  isFirst: false, isLast: true, hasDrawtext, cjkFontPath,
483
513
  });
484
514
  clipPaths.push(outroPath);
515
+ const outroMs = Math.round(outroDuration * 1000);
516
+ timeline.outro = { start: cumulativeMs, end: cumulativeMs + outroMs };
517
+ cumulativeMs += outroMs;
485
518
  }
519
+ timeline.totalDurationMs = cumulativeMs;
486
520
 
487
521
  // ── Concat ──────────────────────────────────────────────────────────────
488
522
  const slug = (title
@@ -511,10 +545,17 @@ async function cardRender(opts) {
511
545
  console.log(`Output: ${outputPath}`);
512
546
  if (totalQuota > 0) console.log(`Quota used: ${totalQuota}`);
513
547
 
548
+ // Write timeline.json next to the deck so downstream tools (e.g.
549
+ // `voxflow card subtitle`) can map output offsets back to per-card
550
+ // narration windows without re-running silencedetect.
551
+ const timelinePath = path.join(dir, 'timeline.json');
552
+ timeline.output = path.basename(outputPath);
553
+ fs.writeFileSync(timelinePath, JSON.stringify(timeline, null, 2) + '\n');
554
+
514
555
  // Success — clean up work directory
515
556
  fs.rmSync(workDir, { recursive: true, force: true });
516
557
 
517
- return { outputPath, cardCount: cards.length, quotaUsed: totalQuota };
558
+ return { outputPath, cardCount: cards.length, quotaUsed: totalQuota, timelinePath, timeline };
518
559
  } catch (err) {
519
560
  // Failure — keep work directory for debugging
520
561
  if (fs.existsSync(workDir)) {
@@ -0,0 +1,497 @@
1
+ /**
2
+ * VoxFlow CLI — card subtitle subcommand
3
+ *
4
+ * Burns per-sentence synchronised subtitles into a previously rendered
5
+ * `voxflow card render` MP4. Replaces the original mp4 atomically; original
6
+ * is preserved as `<name>-no-subs.mp4` so iteration is non-destructive.
7
+ *
8
+ * Sources of timing truth (in priority order):
9
+ * 1. <dir>/timeline.json (emitted by `card render` since 1.18.0)
10
+ * 2. ffmpeg silencedetect fallback (~2.5s pauses ≈ card boundaries)
11
+ *
12
+ * Per-card flow:
13
+ * 1. Window = [card.start, card.end] from timeline (or detected silences).
14
+ * 2. Split narration on CJK + ASCII sentence enders: 。!?!?.…— ; , 、
15
+ * 3. Allocate each sentence a slice proportional to char count.
16
+ * 4. Manual line-wrap every ≤ MAX_LINE_CHARS (CJK has no auto-wrap in
17
+ * ffmpeg's `subtitles=` filter — must inject `\n` ourselves).
18
+ * 5. Write SRT, then ffmpeg subtitles= filter with ASS force_style.
19
+ *
20
+ * ASS coordinate system note: PlayResY defaults to 288, NOT 1920. So the
21
+ * "small" font/margin numbers in DEFAULT_STYLE are correct — they are
22
+ * scaled to whatever vertical resolution the source video uses.
23
+ */
24
+
25
+ 'use strict';
26
+
27
+ const fs = require('fs');
28
+ const path = require('path');
29
+ const { execFile } = require('child_process');
30
+
31
+ const { parseFlag } = require('../core/args');
32
+ const { runCommand, checkFfmpeg } = require('../core/ffmpeg');
33
+
34
+ // ── ffmpeg binary capability probe ────────────────────────────────────────────
35
+
36
+ /**
37
+ * Find an ffmpeg binary that has the `subtitles` filter (i.e. is built with
38
+ * libass). Many minimal builds — notably Homebrew's default `ffmpeg` formula
39
+ * on macOS — ship without libass; the `subtitles=` filter then fails to even
40
+ * parse, with a misleading "Error parsing filterchain" message.
41
+ *
42
+ * Strategy:
43
+ * 1. Probe whatever the core ffmpeg helper resolves (system ffmpeg first).
44
+ * 2. If that binary lacks the `subtitles` filter, fall back to the bundled
45
+ * `ffmpeg-static` package, which is built with libass.
46
+ *
47
+ * Returns: { binary: string, source: 'system'|'ffmpeg-static' } or throws
48
+ * with a helpful message when neither path has libass.
49
+ */
50
+ function probeSubtitlesCapableFfmpeg() {
51
+ return new Promise((resolve, reject) => {
52
+ const tryBinary = (binary, source, onFail) => {
53
+ execFile(binary, ['-hide_banner', '-filters'], (err, stdout) => {
54
+ if (err) return onFail();
55
+ if (/\bsubtitles\b/.test(stdout || '')) {
56
+ resolve({ binary, source });
57
+ } else {
58
+ onFail();
59
+ }
60
+ });
61
+ };
62
+
63
+ // 1. System ffmpeg via PATH (or whatever core/ffmpeg.js already resolved)
64
+ tryBinary('ffmpeg', 'system', () => {
65
+ // 2. Bundled ffmpeg-static
66
+ let staticPath = null;
67
+ try { staticPath = require('ffmpeg-static'); } catch { /* not installed */ }
68
+ if (!staticPath || !fs.existsSync(staticPath)) {
69
+ return reject(new Error(
70
+ 'No ffmpeg with libass / `subtitles` filter found.\n' +
71
+ ' System ffmpeg lacks libass (e.g. Homebrew default formula).\n' +
72
+ ' Install ffmpeg-static: `npm install ffmpeg-static` (any project),\n' +
73
+ ' or rebuild ffmpeg with --enable-libass.',
74
+ ));
75
+ }
76
+ tryBinary(staticPath, 'ffmpeg-static', () => {
77
+ reject(new Error(
78
+ `ffmpeg-static at ${staticPath} also lacks libass. Reinstall ffmpeg-static.`,
79
+ ));
80
+ });
81
+ });
82
+ });
83
+ }
84
+
85
+ /**
86
+ * Run a specific ffmpeg binary directly (bypassing core/ffmpeg's resolveFfmpegBin
87
+ * cache, which prefers system ffmpeg). Used by the subtitle burn step when the
88
+ * system binary lacks libass and we have to force ffmpeg-static.
89
+ */
90
+ function runSpecificFfmpeg(binary, args) {
91
+ return new Promise((resolve, reject) => {
92
+ execFile(binary, args, { timeout: 600_000 }, (err, stdout, stderr) => {
93
+ if (err) {
94
+ err.stderr = stderr;
95
+ err.stdout = stdout;
96
+ return reject(err);
97
+ }
98
+ resolve({ stdout, stderr });
99
+ });
100
+ });
101
+ }
102
+
103
+ // ── Constants ─────────────────────────────────────────────────────────────────
104
+
105
+ const MAX_LINE_CHARS = 16; // CJK characters per visual line
106
+ const PUNCT_BIAS = 4; // last N chars: prefer to break after a punct
107
+ const SENTENCE_SPLIT_RE = /[^。!?!?.…—]+[。!?!?.…—]?/g;
108
+ const PUNCT_BREAK_RE = /[,,、;;::—]/;
109
+ const MIN_SENTENCE_DURATION_MS = 600;
110
+
111
+ // ASS PlayResY=288 default — these "small" numbers map to ~14% of frame height.
112
+ // FontSize=8, MarginV=14 gives a clean bottom-third caption strip on 1080×1920.
113
+ const DEFAULT_STYLE = (
114
+ 'FontName=PingFang SC,FontSize=8,' +
115
+ 'PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&,BackColour=&HC0000000&,' +
116
+ 'BorderStyle=3,Outline=2,Shadow=0,' +
117
+ 'MarginV=14,MarginL=24,MarginR=24,Alignment=2,Bold=1'
118
+ );
119
+
120
+ // ── Sentence splitting & wrapping ────────────────────────────────────────────
121
+
122
+ /**
123
+ * Split narration into sentences. Falls back to the whole text when no
124
+ * punctuation exists (single short narration).
125
+ */
126
+ function splitSentences(text) {
127
+ const cleaned = text.replace(/\s+/g, '');
128
+ const matches = cleaned.match(SENTENCE_SPLIT_RE);
129
+ const sentences = matches ? matches.map((s) => s.trim()).filter(Boolean) : [cleaned];
130
+ return sentences.length > 0 ? sentences : [cleaned];
131
+ }
132
+
133
+ /**
134
+ * Wrap a CJK sentence into at most maxChars per line. Prefers to break after
135
+ * comma/semicolon/em-dash when within the last PUNCT_BIAS chars of a line —
136
+ * keeps phrasing intact rather than slicing mid-word.
137
+ */
138
+ function wrapText(s, maxChars = MAX_LINE_CHARS) {
139
+ if (s.length <= maxChars) return s;
140
+ const lines = [];
141
+ let cur = '';
142
+ for (let i = 0; i < s.length; i++) {
143
+ const ch = s[i];
144
+ cur += ch;
145
+ const isPunct = PUNCT_BREAK_RE.test(ch);
146
+ if (cur.length >= maxChars) {
147
+ lines.push(cur);
148
+ cur = '';
149
+ } else if (cur.length >= maxChars - PUNCT_BIAS && isPunct) {
150
+ lines.push(cur);
151
+ cur = '';
152
+ }
153
+ }
154
+ if (cur) lines.push(cur);
155
+ return lines.join('\n');
156
+ }
157
+
158
+ // ── SRT formatting ────────────────────────────────────────────────────────────
159
+
160
+ function pad(n) { return String(n).padStart(2, '0'); }
161
+ function pad3(n) { return String(n).padStart(3, '0'); }
162
+
163
+ function fmtTime(seconds) {
164
+ const t = Math.max(0, seconds);
165
+ const h = Math.floor(t / 3600);
166
+ const m = Math.floor((t % 3600) / 60);
167
+ const s = Math.floor(t % 60);
168
+ const ms = Math.floor((t - Math.floor(t)) * 1000);
169
+ return `${pad(h)}:${pad(m)}:${pad(s)},${pad3(ms)}`;
170
+ }
171
+
172
+ /**
173
+ * Allocate per-sentence time slices proportional to char count, then format
174
+ * each into a numbered SRT cue with manual line wraps.
175
+ */
176
+ function buildCuesForCard({ narration, startMs, endMs, startIndex }) {
177
+ const sentences = splitSentences(narration);
178
+ const totalChars = sentences.reduce((sum, s) => sum + s.length, 0) || 1;
179
+ const winDur = Math.max(0, endMs - startMs);
180
+
181
+ let cursor = startMs;
182
+ const cues = [];
183
+ for (let j = 0; j < sentences.length; j++) {
184
+ const s = sentences[j];
185
+ const isLast = j === sentences.length - 1;
186
+ let sStart = cursor;
187
+ let sEnd = isLast
188
+ ? endMs
189
+ : cursor + Math.max(MIN_SENTENCE_DURATION_MS, Math.round((s.length / totalChars) * winDur));
190
+ if (sEnd > endMs) sEnd = endMs;
191
+ if (sEnd <= sStart) sEnd = sStart + MIN_SENTENCE_DURATION_MS;
192
+ cursor = sEnd;
193
+ const wrapped = wrapText(s, MAX_LINE_CHARS);
194
+ cues.push({
195
+ index: startIndex + j,
196
+ start: sStart / 1000,
197
+ end: sEnd / 1000,
198
+ text: wrapped,
199
+ });
200
+ }
201
+ return cues;
202
+ }
203
+
204
+ function formatSrt(cues) {
205
+ return cues
206
+ .map((c) => `${c.index}\n${fmtTime(c.start)} --> ${fmtTime(c.end)}\n${c.text}\n`)
207
+ .join('\n');
208
+ }
209
+
210
+ // ── Timeline + silence detection ─────────────────────────────────────────────
211
+
212
+ async function probeDurationSec(mp4) {
213
+ const { stdout } = await runCommand('ffprobe', [
214
+ '-v', 'error', '-show_entries', 'format=duration', '-of', 'csv=p=0', mp4,
215
+ ]);
216
+ const dur = parseFloat(stdout.trim());
217
+ if (!Number.isFinite(dur)) throw new Error(`Could not probe duration of ${mp4}`);
218
+ return dur;
219
+ }
220
+
221
+ async function detectSilences(mp4, { dB = -30, minSec = 2.0 } = {}) {
222
+ // silencedetect emits to stderr, not stdout. runCommand doesn't capture stderr
223
+ // separately, but on success ffmpeg prints to stderr and runCommand returns it
224
+ // bundled. We invoke directly through the ffmpeg path via runCommand which
225
+ // returns { stdout, stderr } on the version in core/ffmpeg.js.
226
+ const result = await runCommand('ffmpeg', [
227
+ '-i', mp4, '-af', `silencedetect=n=${dB}dB:d=${minSec}`, '-f', 'null', '-',
228
+ ]).catch((err) => err); // silencedetect path always exits 0; defensive
229
+ const stderr = (result && result.stderr) || '';
230
+ const silences = [];
231
+ let cur = null;
232
+ for (const line of stderr.split('\n')) {
233
+ let m;
234
+ if ((m = line.match(/silence_start:\s*([\d.]+)/))) cur = { start: parseFloat(m[1]) };
235
+ else if ((m = line.match(/silence_end:\s*([\d.]+)/)) && cur) {
236
+ cur.end = parseFloat(m[1]);
237
+ silences.push(cur);
238
+ cur = null;
239
+ }
240
+ }
241
+ return silences;
242
+ }
243
+
244
+ /**
245
+ * Build per-card narration windows (in ms) by either reading timeline.json
246
+ * directly or, when absent, falling back to silencedetect on the mp4.
247
+ */
248
+ async function buildWindows({ deck, timelinePath, mp4Path }) {
249
+ if (timelinePath && fs.existsSync(timelinePath)) {
250
+ const timeline = JSON.parse(fs.readFileSync(timelinePath, 'utf8'));
251
+ const cards = timeline.cards || [];
252
+ if (cards.length !== deck.cards.length) {
253
+ throw new Error(
254
+ `timeline.json has ${cards.length} cards but deck.json has ${deck.cards.length}. ` +
255
+ `Re-run \`voxflow card render\` to regenerate.`,
256
+ );
257
+ }
258
+ return {
259
+ source: 'timeline',
260
+ windows: cards.map((c) => ({ start: c.start, end: c.end, narration: c.narration })),
261
+ };
262
+ }
263
+
264
+ // Fallback: silencedetect heuristic
265
+ const totalSec = await probeDurationSec(mp4Path);
266
+ const silences = await detectSilences(mp4Path);
267
+ if (silences.length < deck.cards.length - 1) {
268
+ throw new Error(
269
+ `silencedetect found ${silences.length} pauses but expected ≥ ${deck.cards.length - 1}. ` +
270
+ `Re-render with \`voxflow card render\` to emit timeline.json instead.`,
271
+ );
272
+ }
273
+ const windows = deck.cards.map((card, i) => {
274
+ const start = i === 0 ? 0 : silences[i - 1].end;
275
+ const end = i < silences.length ? silences[i].start : totalSec;
276
+ return {
277
+ start: Math.round(start * 1000),
278
+ end: Math.round(end * 1000),
279
+ narration: card.narration || '',
280
+ };
281
+ });
282
+ return { source: 'silencedetect', windows };
283
+ }
284
+
285
+ // ── Main pipeline ─────────────────────────────────────────────────────────────
286
+
287
+ function readDeck(dir) {
288
+ const p = path.join(dir, 'deck.json');
289
+ if (!fs.existsSync(p)) {
290
+ throw new Error(`No deck.json found in ${dir}.`);
291
+ }
292
+ return JSON.parse(fs.readFileSync(p, 'utf8'));
293
+ }
294
+
295
+ function findSourceMp4(dir, override) {
296
+ if (override) {
297
+ if (!fs.existsSync(override)) throw new Error(`--input mp4 not found: ${override}`);
298
+ return path.resolve(override);
299
+ }
300
+ const entries = fs.readdirSync(dir);
301
+ // Skip prior `-no-subs.mp4` backups so re-running the command idempotently
302
+ // re-burns from the same source.
303
+ const candidates = entries.filter((f) => f.endsWith('.mp4') && !f.endsWith('-no-subs.mp4'));
304
+ if (candidates.length === 0) {
305
+ throw new Error(
306
+ `No source .mp4 in ${dir}. Run \`voxflow card render <dir>\` first.`,
307
+ );
308
+ }
309
+ if (candidates.length > 1) {
310
+ throw new Error(
311
+ `Multiple .mp4 in ${dir}: ${candidates.join(', ')}. Pass --input <file> to disambiguate.`,
312
+ );
313
+ }
314
+ return path.join(dir, candidates[0]);
315
+ }
316
+
317
+ /**
318
+ * Escape a path for use as the VALUE of `subtitles='...'` in an ffmpeg -vf
319
+ * argument. The filter parser strips one level of quoting, so single quotes
320
+ * and colons inside the path need backslash escapes.
321
+ */
322
+ function escapeSubsPath(p) {
323
+ return p.replace(/\\/g, '/').replace(/'/g, "\\'").replace(/:/g, '\\:');
324
+ }
325
+
326
+ async function cardSubtitle(opts) {
327
+ const { dir, input, output, style: styleOverride, dryRun = false } = opts;
328
+
329
+ const ffmpegInfo = await checkFfmpeg();
330
+ if (!ffmpegInfo.available) {
331
+ throw new Error(
332
+ 'ffmpeg not found. Install: brew install ffmpeg (macOS) / sudo apt install ffmpeg (Linux)',
333
+ );
334
+ }
335
+ if (ffmpegInfo.source === 'ffmpeg-static') {
336
+ let pkgVersion = '';
337
+ try { pkgVersion = require('ffmpeg-static/package.json').version; } catch { /* unknown */ }
338
+ console.log(` (using ffmpeg-static${pkgVersion ? ` v${pkgVersion}` : ''} — ffmpeg ${ffmpegInfo.version})`);
339
+ }
340
+
341
+ const deck = readDeck(dir);
342
+ const mp4Path = findSourceMp4(dir, input);
343
+ const timelinePath = path.join(dir, 'timeline.json');
344
+
345
+ console.log(`\n=== VoxFlow Card Subtitle ===`);
346
+ console.log(`Source : ${path.relative(process.cwd(), mp4Path)}`);
347
+ console.log(`Cards : ${deck.cards.length}`);
348
+
349
+ const { source, windows } = await buildWindows({ deck, timelinePath, mp4Path });
350
+ console.log(`Timing : ${source}${source === 'timeline' ? ` (${path.basename(timelinePath)})` : ' (silencedetect fallback)'}`);
351
+
352
+ // Build cues
353
+ const cues = [];
354
+ let cueIndex = 1;
355
+ for (let i = 0; i < deck.cards.length; i++) {
356
+ const win = windows[i];
357
+ if (!win.narration) continue;
358
+ const cardCues = buildCuesForCard({
359
+ narration: win.narration,
360
+ startMs: win.start,
361
+ endMs: win.end,
362
+ startIndex: cueIndex,
363
+ });
364
+ cueIndex += cardCues.length;
365
+ cues.push(...cardCues);
366
+ }
367
+
368
+ if (cues.length === 0) {
369
+ throw new Error('No narration text to subtitle. All cards have empty `narration`.');
370
+ }
371
+
372
+ const srtPath = path.join(dir, 'subs.srt');
373
+ fs.writeFileSync(srtPath, formatSrt(cues));
374
+ console.log(`SRT : ${cues.length} cues → ${path.relative(process.cwd(), srtPath)}`);
375
+
376
+ if (dryRun) {
377
+ console.log(`\n--dry-run: stopped before ffmpeg burn-in. Edit ${path.basename(srtPath)} and re-run without --dry-run to bake.`);
378
+ return { cues: cues.length, srtPath, source };
379
+ }
380
+
381
+ // ── Burn into mp4 ──────────────────────────────────────────────────────────
382
+ const subbedTmp = mp4Path.replace(/\.mp4$/i, '-subbed.mp4');
383
+ const noSubsBackup = mp4Path.replace(/\.mp4$/i, '-no-subs.mp4');
384
+ const finalOut = output ? path.resolve(output) : mp4Path;
385
+
386
+ const ratioWidth = 1080; // PlayResY hint — most card decks are 1080×1920
387
+ const ratioHeight = 1920;
388
+ const style = styleOverride || DEFAULT_STYLE;
389
+ const escSrt = escapeSubsPath(srtPath);
390
+
391
+ console.log(`\n Burning subs (FontSize / MarginV via ASS PlayResY=288 default)...`);
392
+ const vfArg = `subtitles='${escSrt}':original_size=${ratioWidth}x${ratioHeight}:force_style='${style}'`;
393
+ if (process.env.VOXFLOW_DEBUG_SUBS) {
394
+ console.error('[DEBUG vf]', JSON.stringify(vfArg));
395
+ }
396
+
397
+ // The `subtitles=` filter requires libass. Many minimal ffmpeg builds
398
+ // (notably Homebrew's default formula on macOS) ship without it and fail
399
+ // at filtergraph parse time with a misleading "Error parsing filterchain"
400
+ // message. Probe both system ffmpeg and ffmpeg-static; pick the one that
401
+ // actually has the filter.
402
+ const { binary: ffmpegBin, source: ffmpegSource } = await probeSubtitlesCapableFfmpeg();
403
+ if (ffmpegSource === 'ffmpeg-static') {
404
+ console.log(` (system ffmpeg lacks libass — using bundled ffmpeg-static)`);
405
+ }
406
+
407
+ await runSpecificFfmpeg(ffmpegBin, [
408
+ '-i', mp4Path,
409
+ '-vf', vfArg,
410
+ '-c:a', 'copy',
411
+ '-c:v', 'libx264', '-preset', 'fast', '-crf', '20',
412
+ '-movflags', '+faststart',
413
+ '-y', subbedTmp,
414
+ ]);
415
+
416
+ if (finalOut === mp4Path) {
417
+ // In-place replace: keep the original as -no-subs backup.
418
+ if (!fs.existsSync(noSubsBackup)) {
419
+ fs.renameSync(mp4Path, noSubsBackup);
420
+ } else {
421
+ // backup already exists from a previous run — just overwrite the head.
422
+ fs.unlinkSync(mp4Path);
423
+ }
424
+ fs.renameSync(subbedTmp, finalOut);
425
+ console.log(`\n=== Done ===`);
426
+ console.log(`Output : ${path.relative(process.cwd(), finalOut)} (original kept as ${path.basename(noSubsBackup)})`);
427
+ } else {
428
+ fs.renameSync(subbedTmp, finalOut);
429
+ console.log(`\n=== Done ===`);
430
+ console.log(`Output : ${path.relative(process.cwd(), finalOut)}`);
431
+ }
432
+
433
+ return { cues: cues.length, srtPath, source, output: finalOut };
434
+ }
435
+
436
+ // ── CLI handler ───────────────────────────────────────────────────────────────
437
+
438
+ async function handle(args) {
439
+ if (args.length === 0 || args[0] === '--help' || args[0] === '-h') {
440
+ console.log(`Usage: voxflow card subtitle <dir> [options]
441
+
442
+ Burn per-sentence synced subtitles into a card-render MP4. Reads
443
+ <dir>/timeline.json (preferred) or falls back to ffmpeg silencedetect.
444
+ The original mp4 is preserved as <name>-no-subs.mp4.
445
+
446
+ Options:
447
+ <dir> Card output directory (must contain deck.json + .mp4)
448
+
449
+ --input <path> Override source mp4 (default: the only .mp4 in <dir>
450
+ that doesn't end with -no-subs.mp4).
451
+ -o, --output <path> Write output here instead of replacing the source.
452
+ --style <ass> Override ASS force_style string (advanced).
453
+ --dry-run Write subs.srt but skip ffmpeg burn-in.
454
+
455
+ Examples:
456
+ voxflow card subtitle cards/why-no-savings/
457
+ voxflow card subtitle cards/why-no-savings/ --dry-run # inspect SRT first
458
+ voxflow card subtitle cards/why-no-savings/ -o subbed.mp4`);
459
+ return;
460
+ }
461
+
462
+ const valuedFlags = new Set(['--input', '--output', '-o', '--style']);
463
+ let dir;
464
+ for (let i = 0; i < args.length; i++) {
465
+ if (args[i].startsWith('-')) { if (valuedFlags.has(args[i])) i++; continue; }
466
+ dir = args[i]; break;
467
+ }
468
+ if (!dir) { console.error('Error: provide the card output directory'); process.exit(1); }
469
+
470
+ const input = parseFlag(args, '--input');
471
+ const output = parseFlag(args, '--output', '-o');
472
+ const style = parseFlag(args, '--style');
473
+ const dryRun = args.includes('--dry-run');
474
+
475
+ await cardSubtitle({
476
+ dir: path.resolve(dir),
477
+ input: input ? path.resolve(input) : undefined,
478
+ output: output || undefined,
479
+ style: style || undefined,
480
+ dryRun,
481
+ });
482
+ }
483
+
484
+ module.exports = {
485
+ cardSubtitle,
486
+ splitSentences,
487
+ wrapText,
488
+ fmtTime,
489
+ buildCuesForCard,
490
+ formatSrt,
491
+ buildWindows,
492
+ escapeSubsPath,
493
+ probeSubtitlesCapableFfmpeg,
494
+ DEFAULT_STYLE,
495
+ MAX_LINE_CHARS,
496
+ handle,
497
+ };
@@ -1,55 +1,76 @@
1
1
  /**
2
2
  * VoxFlow CLI — card command
3
3
  *
4
- * Dispatches `voxflow card render <dir>` to card-render.js.
4
+ * Dispatches:
5
+ * - `voxflow card render <dir>` → card-render.js (deck → narrated MP4)
6
+ * - `voxflow card subtitle <dir>` → card-subtitle.js (MP4 → +synced subs)
7
+ *
5
8
  * The card *generation* workflow lives in cli/skills/card/SKILL.md —
6
- * Claude Code calls render-cards.mjs directly. This command handles the
7
- * post-generation video render step which needs API auth (TTS quota).
9
+ * Claude Code calls render-cards.mjs directly. These commands handle the
10
+ * post-generation video render + subtitle steps which need API auth /
11
+ * ffmpeg-static fallback / timeline.json plumbing.
8
12
  */
9
13
 
10
14
  'use strict';
11
15
 
12
16
  async function handle(args) {
13
- if (args.length > 0 && args[0] === 'render') {
14
- const cardRender = require('./card-render');
15
- return cardRender.handle(args.slice(1));
17
+ if (args.length > 0) {
18
+ if (args[0] === 'render') {
19
+ const cardRender = require('./card-render');
20
+ return cardRender.handle(args.slice(1));
21
+ }
22
+ if (args[0] === 'subtitle' || args[0] === 'subtitles' || args[0] === 'subs') {
23
+ const cardSubtitle = require('./card-subtitle');
24
+ return cardSubtitle.handle(args.slice(1));
25
+ }
16
26
  }
17
27
 
18
28
  // Default: usage
19
29
  console.log(`Usage:
20
- voxflow card render <dir> Render card directory → narrated MP4 video
30
+ voxflow card render <dir> Render card directory → narrated MP4 video
31
+ voxflow card subtitle <dir> Burn per-sentence synced subs into the rendered MP4
21
32
 
22
33
  Subcommands:
23
34
  render Synthesize TTS narration + render Ken Burns video from card PNGs
35
+ (emits timeline.json next to deck.json — used by \`subtitle\`)
36
+ subtitle Read deck.json + timeline.json, allocate per-sentence cues by
37
+ char count, and burn subtitles into the source mp4 in-place.
38
+ Original kept as <name>-no-subs.mp4. Aliases: subs / subtitles.
24
39
 
25
- See: voxflow card render --help`);
40
+ See: voxflow card render --help
41
+ voxflow card subtitle --help`);
26
42
  }
27
43
 
28
44
  const meta = {
29
45
  card: {
30
- usage: 'render <dir> [options]',
31
- description: 'Card video export: narrated MP4 with subtitles, intro/outro, and BGM from a card-skill output directory',
46
+ usage: '<subcommand> <dir> [options]',
47
+ description: 'Card video pipeline: deck.json + PNGs narrated MP4 (`render`) synced sentence-level subtitles (`subtitle`).',
32
48
  options: [
33
49
  'render <dir> Render deck.json + PNGs → narrated MP4 video',
34
- '--voice <id> TTS voice ID (default: v-female-R2s4N9qJ)',
35
- '--speed <n> TTS speed, 0.5-2.0 (default: 1.0)',
36
- '--no-audio Silent video skip TTS synthesis',
37
- '--pause <sec> Silence after narration (reading time, default: 2.5)',
38
- '--hold <sec> Card duration in --no-audio mode (default: 5)',
39
- '--no-intro Skip intro title card',
40
- '--no-outro Skip outro branding card',
41
- '--intro-dur <sec> Intro duration (default: 2.5)',
42
- '--outro-dur <sec> Outro duration (default: 2)',
43
- '--no-subtitle Disable subtitle overlay',
44
- '--bgm <path> Background music (loops at low volume)',
45
- '--bgm-volume <n> BGM volume, 0-1 (default: 0.08)',
46
- '-o, --output <path> Output MP4 path (default: <dir>/<title>.mp4)',
50
+ 'subtitle <dir> Burn per-sentence synced subs into the rendered MP4',
51
+ '--voice <id> [render] TTS voice ID (default: v-female-R2s4N9qJ)',
52
+ '--speed <n> [render] TTS speed, 0.5-2.0 (default: 1.0)',
53
+ '--no-audio [render] Silent video skip TTS synthesis',
54
+ '--pause <sec> [render] Silence after narration (default: 2.5)',
55
+ '--hold <sec> [render] Card duration in --no-audio mode (default: 5)',
56
+ '--no-intro [render] Skip intro title card',
57
+ '--no-outro [render] Skip outro branding card',
58
+ '--intro-dur <sec> [render] Intro duration (default: 2.5)',
59
+ '--outro-dur <sec> [render] Outro duration (default: 2)',
60
+ '--no-subtitle [render] Disable in-render subtitle bar',
61
+ '--bgm <path> [render] Background music (loops at low volume)',
62
+ '--bgm-volume <n> [render] BGM volume, 0-1 (default: 0.08)',
63
+ '--input <path> [subtitle] Override source mp4',
64
+ '--style <ass> [subtitle] ASS force_style override (advanced)',
65
+ '--dry-run [subtitle] Write subs.srt but skip ffmpeg burn-in',
66
+ '-o, --output <path> Output MP4 path (default: <dir>/<title>.mp4 or in-place)',
47
67
  ],
48
68
  examples: [
49
69
  'voxflow card render cards/fermentation/',
50
- 'voxflow card render cards/fermentation/ --voice v-female-R2s4N9qJ -o out.mp4',
70
+ 'voxflow card render cards/fermentation/ --no-intro --no-outro --no-subtitle',
71
+ 'voxflow card subtitle cards/fermentation/',
72
+ 'voxflow card subtitle cards/fermentation/ --dry-run',
51
73
  'voxflow card render cards/fermentation/ --bgm ~/music/ambient.mp3',
52
- 'voxflow card render cards/fermentation/ --no-audio --no-subtitle',
53
74
  ],
54
75
  },
55
76
  };