voxflow 1.17.1 → 1.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -1
- package/lib/commands/asr/index.js +1 -1
- package/lib/commands/card-render.js +44 -3
- package/lib/commands/card-subtitle.js +497 -0
- package/lib/commands/card.js +46 -25
- package/lib/commands/dub.js +1 -1
- package/lib/commands/explain.js +3 -3
- package/lib/commands/narrate.js +1 -1
- package/lib/commands/picstory.js +3 -3
- package/lib/commands/podcast/index.js +1 -1
- package/lib/commands/present.js +1 -1
- package/lib/commands/publish.js +1 -1
- package/lib/commands/slides/index.js +1 -1
- package/lib/commands/story.js +1 -1
- package/lib/commands/summarize.js +3 -3
- package/lib/commands/translate.js +1 -1
- package/lib/commands/video-translate.js +1 -1
- package/lib/commands/voices.js +2 -2
- package/package.json +1 -1
- package/skills/.claude-plugin/plugin.json +1 -1
- package/skills/card/SKILL.md +24 -1
|
@@ -211,7 +211,7 @@ const meta = {
|
|
|
211
211
|
`--mode <type> auto (default) | sentence | flash | file (cloud only)`,
|
|
212
212
|
`--lang <model> Language. Tencent: 16k_zh (default), 16k_en, ... | Azure: ja-JP, en-US, zh-CN, ...`,
|
|
213
213
|
`--format <fmt> Output format: srt (default), txt, json`,
|
|
214
|
-
|
|
214
|
+
`-o, --output <path> Output file path (default: <input>.<format>)`,
|
|
215
215
|
`--speakers Enable speaker diarization (alias of --diarize)`,
|
|
216
216
|
`--diarize Enable speaker diarization (azure)`,
|
|
217
217
|
`--speaker-number <n> Expected number of speakers (with --speakers / --diarize)`,
|
|
@@ -342,7 +342,7 @@ async function cardRender(opts) {
|
|
|
342
342
|
introDuration = 2.5,
|
|
343
343
|
outroDuration = 2,
|
|
344
344
|
} = opts;
|
|
345
|
-
const voice = opts.voice || 'female-
|
|
345
|
+
const voice = opts.voice || 'v-female-R2s4N9qJ';
|
|
346
346
|
const speed = Number(opts.speed) || 1.0;
|
|
347
347
|
|
|
348
348
|
const deck = readDeckJson(dir);
|
|
@@ -413,6 +413,23 @@ async function cardRender(opts) {
|
|
|
413
413
|
const clipPaths = [];
|
|
414
414
|
let totalQuota = 0;
|
|
415
415
|
|
|
416
|
+
// Timeline tracking: each clip we render has a known duration; we accumulate
|
|
417
|
+
// them so the final timeline.json maps absolute output-mp4 offsets to cards.
|
|
418
|
+
// Downstream tools (e.g. `voxflow card subtitle`) use this to allocate
|
|
419
|
+
// per-sentence captions without re-running silencedetect.
|
|
420
|
+
let cumulativeMs = 0;
|
|
421
|
+
const timeline = {
|
|
422
|
+
title,
|
|
423
|
+
ratio,
|
|
424
|
+
language: deck.meta?.language || null,
|
|
425
|
+
voice: noAudio ? null : voice,
|
|
426
|
+
speed: noAudio ? null : speed,
|
|
427
|
+
intro: null,
|
|
428
|
+
cards: [],
|
|
429
|
+
outro: null,
|
|
430
|
+
totalDurationMs: 0,
|
|
431
|
+
};
|
|
432
|
+
|
|
416
433
|
try {
|
|
417
434
|
// ── Intro card (only if drawtext available — otherwise it's just a blank dark frame) ──
|
|
418
435
|
if (!noIntro && title && hasDrawtext) {
|
|
@@ -425,6 +442,9 @@ async function cardRender(opts) {
|
|
|
425
442
|
isFirst: true, isLast: false, hasDrawtext, cjkFontPath,
|
|
426
443
|
});
|
|
427
444
|
clipPaths.push(introPath);
|
|
445
|
+
const introMs = Math.round(introDuration * 1000);
|
|
446
|
+
timeline.intro = { start: cumulativeMs, end: cumulativeMs + introMs };
|
|
447
|
+
cumulativeMs += introMs;
|
|
428
448
|
}
|
|
429
449
|
|
|
430
450
|
// ── Per-card clips ──────────────────────────────────────────────────────
|
|
@@ -467,6 +487,16 @@ async function cardRender(opts) {
|
|
|
467
487
|
subtitle: noSubtitle ? null : (card.narration || card.title || null),
|
|
468
488
|
});
|
|
469
489
|
clipPaths.push(clipOut);
|
|
490
|
+
const cardStart = cumulativeMs;
|
|
491
|
+
const cardEnd = cumulativeMs + durationMs;
|
|
492
|
+
timeline.cards.push({
|
|
493
|
+
file: card.file,
|
|
494
|
+
title: card.title || null,
|
|
495
|
+
narration: card.narration || null,
|
|
496
|
+
start: cardStart,
|
|
497
|
+
end: cardEnd,
|
|
498
|
+
});
|
|
499
|
+
cumulativeMs = cardEnd;
|
|
470
500
|
}
|
|
471
501
|
|
|
472
502
|
// ── Outro card (only if drawtext available) ───────────────────────────
|
|
@@ -482,7 +512,11 @@ async function cardRender(opts) {
|
|
|
482
512
|
isFirst: false, isLast: true, hasDrawtext, cjkFontPath,
|
|
483
513
|
});
|
|
484
514
|
clipPaths.push(outroPath);
|
|
515
|
+
const outroMs = Math.round(outroDuration * 1000);
|
|
516
|
+
timeline.outro = { start: cumulativeMs, end: cumulativeMs + outroMs };
|
|
517
|
+
cumulativeMs += outroMs;
|
|
485
518
|
}
|
|
519
|
+
timeline.totalDurationMs = cumulativeMs;
|
|
486
520
|
|
|
487
521
|
// ── Concat ──────────────────────────────────────────────────────────────
|
|
488
522
|
const slug = (title
|
|
@@ -511,10 +545,17 @@ async function cardRender(opts) {
|
|
|
511
545
|
console.log(`Output: ${outputPath}`);
|
|
512
546
|
if (totalQuota > 0) console.log(`Quota used: ${totalQuota}`);
|
|
513
547
|
|
|
548
|
+
// Write timeline.json next to the deck so downstream tools (e.g.
|
|
549
|
+
// `voxflow card subtitle`) can map output offsets back to per-card
|
|
550
|
+
// narration windows without re-running silencedetect.
|
|
551
|
+
const timelinePath = path.join(dir, 'timeline.json');
|
|
552
|
+
timeline.output = path.basename(outputPath);
|
|
553
|
+
fs.writeFileSync(timelinePath, JSON.stringify(timeline, null, 2) + '\n');
|
|
554
|
+
|
|
514
555
|
// Success — clean up work directory
|
|
515
556
|
fs.rmSync(workDir, { recursive: true, force: true });
|
|
516
557
|
|
|
517
|
-
return { outputPath, cardCount: cards.length, quotaUsed: totalQuota };
|
|
558
|
+
return { outputPath, cardCount: cards.length, quotaUsed: totalQuota, timelinePath, timeline };
|
|
518
559
|
} catch (err) {
|
|
519
560
|
// Failure — keep work directory for debugging
|
|
520
561
|
if (fs.existsSync(workDir)) {
|
|
@@ -534,7 +575,7 @@ Options:
|
|
|
534
575
|
<dir> Card output directory (must contain deck.json + exports/*.png)
|
|
535
576
|
|
|
536
577
|
Audio:
|
|
537
|
-
--voice <id> TTS voice ID (default: female-
|
|
578
|
+
--voice <id> TTS voice ID (default: v-female-R2s4N9qJ)
|
|
538
579
|
--speed <n> TTS speed, 0.5-2.0 (default: 1.0)
|
|
539
580
|
--no-audio Silent video — skip TTS synthesis
|
|
540
581
|
|
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VoxFlow CLI — card subtitle subcommand
|
|
3
|
+
*
|
|
4
|
+
* Burns per-sentence synchronised subtitles into a previously rendered
|
|
5
|
+
* `voxflow card render` MP4. Replaces the original mp4 atomically; original
|
|
6
|
+
* is preserved as `<name>-no-subs.mp4` so iteration is non-destructive.
|
|
7
|
+
*
|
|
8
|
+
* Sources of timing truth (in priority order):
|
|
9
|
+
* 1. <dir>/timeline.json (emitted by `card render` since 1.18.0)
|
|
10
|
+
* 2. ffmpeg silencedetect fallback (~2.5s pauses ≈ card boundaries)
|
|
11
|
+
*
|
|
12
|
+
* Per-card flow:
|
|
13
|
+
* 1. Window = [card.start, card.end] from timeline (or detected silences).
|
|
14
|
+
* 2. Split narration on CJK + ASCII sentence enders: 。!?!?.…— ; , 、
|
|
15
|
+
* 3. Allocate each sentence a slice proportional to char count.
|
|
16
|
+
* 4. Manual line-wrap every ≤ MAX_LINE_CHARS (CJK has no auto-wrap in
|
|
17
|
+
* ffmpeg's `subtitles=` filter — must inject `\n` ourselves).
|
|
18
|
+
* 5. Write SRT, then ffmpeg subtitles= filter with ASS force_style.
|
|
19
|
+
*
|
|
20
|
+
* ASS coordinate system note: PlayResY defaults to 288, NOT 1920. So the
|
|
21
|
+
* "small" font/margin numbers in DEFAULT_STYLE are correct — they are
|
|
22
|
+
* scaled to whatever vertical resolution the source video uses.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
'use strict';
|
|
26
|
+
|
|
27
|
+
const fs = require('fs');
|
|
28
|
+
const path = require('path');
|
|
29
|
+
const { execFile } = require('child_process');
|
|
30
|
+
|
|
31
|
+
const { parseFlag } = require('../core/args');
|
|
32
|
+
const { runCommand, checkFfmpeg } = require('../core/ffmpeg');
|
|
33
|
+
|
|
34
|
+
// ── ffmpeg binary capability probe ────────────────────────────────────────────
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Find an ffmpeg binary that has the `subtitles` filter (i.e. is built with
|
|
38
|
+
* libass). Many minimal builds — notably Homebrew's default `ffmpeg` formula
|
|
39
|
+
* on macOS — ship without libass; the `subtitles=` filter then fails to even
|
|
40
|
+
* parse, with a misleading "Error parsing filterchain" message.
|
|
41
|
+
*
|
|
42
|
+
* Strategy:
|
|
43
|
+
* 1. Probe whatever the core ffmpeg helper resolves (system ffmpeg first).
|
|
44
|
+
* 2. If that binary lacks the `subtitles` filter, fall back to the bundled
|
|
45
|
+
* `ffmpeg-static` package, which is built with libass.
|
|
46
|
+
*
|
|
47
|
+
* Returns: { binary: string, source: 'system'|'ffmpeg-static' } or throws
|
|
48
|
+
* with a helpful message when neither path has libass.
|
|
49
|
+
*/
|
|
50
|
+
function probeSubtitlesCapableFfmpeg() {
|
|
51
|
+
return new Promise((resolve, reject) => {
|
|
52
|
+
const tryBinary = (binary, source, onFail) => {
|
|
53
|
+
execFile(binary, ['-hide_banner', '-filters'], (err, stdout) => {
|
|
54
|
+
if (err) return onFail();
|
|
55
|
+
if (/\bsubtitles\b/.test(stdout || '')) {
|
|
56
|
+
resolve({ binary, source });
|
|
57
|
+
} else {
|
|
58
|
+
onFail();
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
// 1. System ffmpeg via PATH (or whatever core/ffmpeg.js already resolved)
|
|
64
|
+
tryBinary('ffmpeg', 'system', () => {
|
|
65
|
+
// 2. Bundled ffmpeg-static
|
|
66
|
+
let staticPath = null;
|
|
67
|
+
try { staticPath = require('ffmpeg-static'); } catch { /* not installed */ }
|
|
68
|
+
if (!staticPath || !fs.existsSync(staticPath)) {
|
|
69
|
+
return reject(new Error(
|
|
70
|
+
'No ffmpeg with libass / `subtitles` filter found.\n' +
|
|
71
|
+
' System ffmpeg lacks libass (e.g. Homebrew default formula).\n' +
|
|
72
|
+
' Install ffmpeg-static: `npm install ffmpeg-static` (any project),\n' +
|
|
73
|
+
' or rebuild ffmpeg with --enable-libass.',
|
|
74
|
+
));
|
|
75
|
+
}
|
|
76
|
+
tryBinary(staticPath, 'ffmpeg-static', () => {
|
|
77
|
+
reject(new Error(
|
|
78
|
+
`ffmpeg-static at ${staticPath} also lacks libass. Reinstall ffmpeg-static.`,
|
|
79
|
+
));
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Run a specific ffmpeg binary directly (bypassing core/ffmpeg's resolveFfmpegBin
|
|
87
|
+
* cache, which prefers system ffmpeg). Used by the subtitle burn step when the
|
|
88
|
+
* system binary lacks libass and we have to force ffmpeg-static.
|
|
89
|
+
*/
|
|
90
|
+
function runSpecificFfmpeg(binary, args) {
|
|
91
|
+
return new Promise((resolve, reject) => {
|
|
92
|
+
execFile(binary, args, { timeout: 600_000 }, (err, stdout, stderr) => {
|
|
93
|
+
if (err) {
|
|
94
|
+
err.stderr = stderr;
|
|
95
|
+
err.stdout = stdout;
|
|
96
|
+
return reject(err);
|
|
97
|
+
}
|
|
98
|
+
resolve({ stdout, stderr });
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ── Constants ─────────────────────────────────────────────────────────────────
|
|
104
|
+
|
|
105
|
+
const MAX_LINE_CHARS = 16; // CJK characters per visual line
|
|
106
|
+
const PUNCT_BIAS = 4; // last N chars: prefer to break after a punct
|
|
107
|
+
const SENTENCE_SPLIT_RE = /[^。!?!?.…—]+[。!?!?.…—]?/g;
|
|
108
|
+
const PUNCT_BREAK_RE = /[,,、;;::—]/;
|
|
109
|
+
const MIN_SENTENCE_DURATION_MS = 600;
|
|
110
|
+
|
|
111
|
+
// ASS PlayResY=288 default — these "small" numbers map to ~14% of frame height.
|
|
112
|
+
// FontSize=8, MarginV=14 gives a clean bottom-third caption strip on 1080×1920.
|
|
113
|
+
const DEFAULT_STYLE = (
|
|
114
|
+
'FontName=PingFang SC,FontSize=8,' +
|
|
115
|
+
'PrimaryColour=&HFFFFFF&,OutlineColour=&H000000&,BackColour=&HC0000000&,' +
|
|
116
|
+
'BorderStyle=3,Outline=2,Shadow=0,' +
|
|
117
|
+
'MarginV=14,MarginL=24,MarginR=24,Alignment=2,Bold=1'
|
|
118
|
+
);
|
|
119
|
+
|
|
120
|
+
// ── Sentence splitting & wrapping ────────────────────────────────────────────
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Split narration into sentences. Falls back to the whole text when no
|
|
124
|
+
* punctuation exists (single short narration).
|
|
125
|
+
*/
|
|
126
|
+
function splitSentences(text) {
|
|
127
|
+
const cleaned = text.replace(/\s+/g, '');
|
|
128
|
+
const matches = cleaned.match(SENTENCE_SPLIT_RE);
|
|
129
|
+
const sentences = matches ? matches.map((s) => s.trim()).filter(Boolean) : [cleaned];
|
|
130
|
+
return sentences.length > 0 ? sentences : [cleaned];
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Wrap a CJK sentence into at most maxChars per line. Prefers to break after
|
|
135
|
+
* comma/semicolon/em-dash when within the last PUNCT_BIAS chars of a line —
|
|
136
|
+
* keeps phrasing intact rather than slicing mid-word.
|
|
137
|
+
*/
|
|
138
|
+
function wrapText(s, maxChars = MAX_LINE_CHARS) {
|
|
139
|
+
if (s.length <= maxChars) return s;
|
|
140
|
+
const lines = [];
|
|
141
|
+
let cur = '';
|
|
142
|
+
for (let i = 0; i < s.length; i++) {
|
|
143
|
+
const ch = s[i];
|
|
144
|
+
cur += ch;
|
|
145
|
+
const isPunct = PUNCT_BREAK_RE.test(ch);
|
|
146
|
+
if (cur.length >= maxChars) {
|
|
147
|
+
lines.push(cur);
|
|
148
|
+
cur = '';
|
|
149
|
+
} else if (cur.length >= maxChars - PUNCT_BIAS && isPunct) {
|
|
150
|
+
lines.push(cur);
|
|
151
|
+
cur = '';
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (cur) lines.push(cur);
|
|
155
|
+
return lines.join('\n');
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// ── SRT formatting ────────────────────────────────────────────────────────────
|
|
159
|
+
|
|
160
|
+
function pad(n) { return String(n).padStart(2, '0'); }
|
|
161
|
+
function pad3(n) { return String(n).padStart(3, '0'); }
|
|
162
|
+
|
|
163
|
+
function fmtTime(seconds) {
|
|
164
|
+
const t = Math.max(0, seconds);
|
|
165
|
+
const h = Math.floor(t / 3600);
|
|
166
|
+
const m = Math.floor((t % 3600) / 60);
|
|
167
|
+
const s = Math.floor(t % 60);
|
|
168
|
+
const ms = Math.floor((t - Math.floor(t)) * 1000);
|
|
169
|
+
return `${pad(h)}:${pad(m)}:${pad(s)},${pad3(ms)}`;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Allocate per-sentence time slices proportional to char count, then format
|
|
174
|
+
* each into a numbered SRT cue with manual line wraps.
|
|
175
|
+
*/
|
|
176
|
+
function buildCuesForCard({ narration, startMs, endMs, startIndex }) {
|
|
177
|
+
const sentences = splitSentences(narration);
|
|
178
|
+
const totalChars = sentences.reduce((sum, s) => sum + s.length, 0) || 1;
|
|
179
|
+
const winDur = Math.max(0, endMs - startMs);
|
|
180
|
+
|
|
181
|
+
let cursor = startMs;
|
|
182
|
+
const cues = [];
|
|
183
|
+
for (let j = 0; j < sentences.length; j++) {
|
|
184
|
+
const s = sentences[j];
|
|
185
|
+
const isLast = j === sentences.length - 1;
|
|
186
|
+
let sStart = cursor;
|
|
187
|
+
let sEnd = isLast
|
|
188
|
+
? endMs
|
|
189
|
+
: cursor + Math.max(MIN_SENTENCE_DURATION_MS, Math.round((s.length / totalChars) * winDur));
|
|
190
|
+
if (sEnd > endMs) sEnd = endMs;
|
|
191
|
+
if (sEnd <= sStart) sEnd = sStart + MIN_SENTENCE_DURATION_MS;
|
|
192
|
+
cursor = sEnd;
|
|
193
|
+
const wrapped = wrapText(s, MAX_LINE_CHARS);
|
|
194
|
+
cues.push({
|
|
195
|
+
index: startIndex + j,
|
|
196
|
+
start: sStart / 1000,
|
|
197
|
+
end: sEnd / 1000,
|
|
198
|
+
text: wrapped,
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
return cues;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function formatSrt(cues) {
|
|
205
|
+
return cues
|
|
206
|
+
.map((c) => `${c.index}\n${fmtTime(c.start)} --> ${fmtTime(c.end)}\n${c.text}\n`)
|
|
207
|
+
.join('\n');
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// ── Timeline + silence detection ─────────────────────────────────────────────
|
|
211
|
+
|
|
212
|
+
async function probeDurationSec(mp4) {
|
|
213
|
+
const { stdout } = await runCommand('ffprobe', [
|
|
214
|
+
'-v', 'error', '-show_entries', 'format=duration', '-of', 'csv=p=0', mp4,
|
|
215
|
+
]);
|
|
216
|
+
const dur = parseFloat(stdout.trim());
|
|
217
|
+
if (!Number.isFinite(dur)) throw new Error(`Could not probe duration of ${mp4}`);
|
|
218
|
+
return dur;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
async function detectSilences(mp4, { dB = -30, minSec = 2.0 } = {}) {
|
|
222
|
+
// silencedetect emits to stderr, not stdout. runCommand doesn't capture stderr
|
|
223
|
+
// separately, but on success ffmpeg prints to stderr and runCommand returns it
|
|
224
|
+
// bundled. We invoke directly through the ffmpeg path via runCommand which
|
|
225
|
+
// returns { stdout, stderr } on the version in core/ffmpeg.js.
|
|
226
|
+
const result = await runCommand('ffmpeg', [
|
|
227
|
+
'-i', mp4, '-af', `silencedetect=n=${dB}dB:d=${minSec}`, '-f', 'null', '-',
|
|
228
|
+
]).catch((err) => err); // silencedetect path always exits 0; defensive
|
|
229
|
+
const stderr = (result && result.stderr) || '';
|
|
230
|
+
const silences = [];
|
|
231
|
+
let cur = null;
|
|
232
|
+
for (const line of stderr.split('\n')) {
|
|
233
|
+
let m;
|
|
234
|
+
if ((m = line.match(/silence_start:\s*([\d.]+)/))) cur = { start: parseFloat(m[1]) };
|
|
235
|
+
else if ((m = line.match(/silence_end:\s*([\d.]+)/)) && cur) {
|
|
236
|
+
cur.end = parseFloat(m[1]);
|
|
237
|
+
silences.push(cur);
|
|
238
|
+
cur = null;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
return silences;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Build per-card narration windows (in ms) by either reading timeline.json
|
|
246
|
+
* directly or, when absent, falling back to silencedetect on the mp4.
|
|
247
|
+
*/
|
|
248
|
+
async function buildWindows({ deck, timelinePath, mp4Path }) {
|
|
249
|
+
if (timelinePath && fs.existsSync(timelinePath)) {
|
|
250
|
+
const timeline = JSON.parse(fs.readFileSync(timelinePath, 'utf8'));
|
|
251
|
+
const cards = timeline.cards || [];
|
|
252
|
+
if (cards.length !== deck.cards.length) {
|
|
253
|
+
throw new Error(
|
|
254
|
+
`timeline.json has ${cards.length} cards but deck.json has ${deck.cards.length}. ` +
|
|
255
|
+
`Re-run \`voxflow card render\` to regenerate.`,
|
|
256
|
+
);
|
|
257
|
+
}
|
|
258
|
+
return {
|
|
259
|
+
source: 'timeline',
|
|
260
|
+
windows: cards.map((c) => ({ start: c.start, end: c.end, narration: c.narration })),
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Fallback: silencedetect heuristic
|
|
265
|
+
const totalSec = await probeDurationSec(mp4Path);
|
|
266
|
+
const silences = await detectSilences(mp4Path);
|
|
267
|
+
if (silences.length < deck.cards.length - 1) {
|
|
268
|
+
throw new Error(
|
|
269
|
+
`silencedetect found ${silences.length} pauses but expected ≥ ${deck.cards.length - 1}. ` +
|
|
270
|
+
`Re-render with \`voxflow card render\` to emit timeline.json instead.`,
|
|
271
|
+
);
|
|
272
|
+
}
|
|
273
|
+
const windows = deck.cards.map((card, i) => {
|
|
274
|
+
const start = i === 0 ? 0 : silences[i - 1].end;
|
|
275
|
+
const end = i < silences.length ? silences[i].start : totalSec;
|
|
276
|
+
return {
|
|
277
|
+
start: Math.round(start * 1000),
|
|
278
|
+
end: Math.round(end * 1000),
|
|
279
|
+
narration: card.narration || '',
|
|
280
|
+
};
|
|
281
|
+
});
|
|
282
|
+
return { source: 'silencedetect', windows };
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// ── Main pipeline ─────────────────────────────────────────────────────────────
|
|
286
|
+
|
|
287
|
+
function readDeck(dir) {
|
|
288
|
+
const p = path.join(dir, 'deck.json');
|
|
289
|
+
if (!fs.existsSync(p)) {
|
|
290
|
+
throw new Error(`No deck.json found in ${dir}.`);
|
|
291
|
+
}
|
|
292
|
+
return JSON.parse(fs.readFileSync(p, 'utf8'));
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function findSourceMp4(dir, override) {
|
|
296
|
+
if (override) {
|
|
297
|
+
if (!fs.existsSync(override)) throw new Error(`--input mp4 not found: ${override}`);
|
|
298
|
+
return path.resolve(override);
|
|
299
|
+
}
|
|
300
|
+
const entries = fs.readdirSync(dir);
|
|
301
|
+
// Skip prior `-no-subs.mp4` backups so re-running the command idempotently
|
|
302
|
+
// re-burns from the same source.
|
|
303
|
+
const candidates = entries.filter((f) => f.endsWith('.mp4') && !f.endsWith('-no-subs.mp4'));
|
|
304
|
+
if (candidates.length === 0) {
|
|
305
|
+
throw new Error(
|
|
306
|
+
`No source .mp4 in ${dir}. Run \`voxflow card render <dir>\` first.`,
|
|
307
|
+
);
|
|
308
|
+
}
|
|
309
|
+
if (candidates.length > 1) {
|
|
310
|
+
throw new Error(
|
|
311
|
+
`Multiple .mp4 in ${dir}: ${candidates.join(', ')}. Pass --input <file> to disambiguate.`,
|
|
312
|
+
);
|
|
313
|
+
}
|
|
314
|
+
return path.join(dir, candidates[0]);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Escape a path for use as the VALUE of `subtitles='...'` in an ffmpeg -vf
|
|
319
|
+
* argument. The filter parser strips one level of quoting, so single quotes
|
|
320
|
+
* and colons inside the path need backslash escapes.
|
|
321
|
+
*/
|
|
322
|
+
function escapeSubsPath(p) {
|
|
323
|
+
return p.replace(/\\/g, '/').replace(/'/g, "\\'").replace(/:/g, '\\:');
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
async function cardSubtitle(opts) {
|
|
327
|
+
const { dir, input, output, style: styleOverride, dryRun = false } = opts;
|
|
328
|
+
|
|
329
|
+
const ffmpegInfo = await checkFfmpeg();
|
|
330
|
+
if (!ffmpegInfo.available) {
|
|
331
|
+
throw new Error(
|
|
332
|
+
'ffmpeg not found. Install: brew install ffmpeg (macOS) / sudo apt install ffmpeg (Linux)',
|
|
333
|
+
);
|
|
334
|
+
}
|
|
335
|
+
if (ffmpegInfo.source === 'ffmpeg-static') {
|
|
336
|
+
let pkgVersion = '';
|
|
337
|
+
try { pkgVersion = require('ffmpeg-static/package.json').version; } catch { /* unknown */ }
|
|
338
|
+
console.log(` (using ffmpeg-static${pkgVersion ? ` v${pkgVersion}` : ''} — ffmpeg ${ffmpegInfo.version})`);
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const deck = readDeck(dir);
|
|
342
|
+
const mp4Path = findSourceMp4(dir, input);
|
|
343
|
+
const timelinePath = path.join(dir, 'timeline.json');
|
|
344
|
+
|
|
345
|
+
console.log(`\n=== VoxFlow Card Subtitle ===`);
|
|
346
|
+
console.log(`Source : ${path.relative(process.cwd(), mp4Path)}`);
|
|
347
|
+
console.log(`Cards : ${deck.cards.length}`);
|
|
348
|
+
|
|
349
|
+
const { source, windows } = await buildWindows({ deck, timelinePath, mp4Path });
|
|
350
|
+
console.log(`Timing : ${source}${source === 'timeline' ? ` (${path.basename(timelinePath)})` : ' (silencedetect fallback)'}`);
|
|
351
|
+
|
|
352
|
+
// Build cues
|
|
353
|
+
const cues = [];
|
|
354
|
+
let cueIndex = 1;
|
|
355
|
+
for (let i = 0; i < deck.cards.length; i++) {
|
|
356
|
+
const win = windows[i];
|
|
357
|
+
if (!win.narration) continue;
|
|
358
|
+
const cardCues = buildCuesForCard({
|
|
359
|
+
narration: win.narration,
|
|
360
|
+
startMs: win.start,
|
|
361
|
+
endMs: win.end,
|
|
362
|
+
startIndex: cueIndex,
|
|
363
|
+
});
|
|
364
|
+
cueIndex += cardCues.length;
|
|
365
|
+
cues.push(...cardCues);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
if (cues.length === 0) {
|
|
369
|
+
throw new Error('No narration text to subtitle. All cards have empty `narration`.');
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
const srtPath = path.join(dir, 'subs.srt');
|
|
373
|
+
fs.writeFileSync(srtPath, formatSrt(cues));
|
|
374
|
+
console.log(`SRT : ${cues.length} cues → ${path.relative(process.cwd(), srtPath)}`);
|
|
375
|
+
|
|
376
|
+
if (dryRun) {
|
|
377
|
+
console.log(`\n--dry-run: stopped before ffmpeg burn-in. Edit ${path.basename(srtPath)} and re-run without --dry-run to bake.`);
|
|
378
|
+
return { cues: cues.length, srtPath, source };
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// ── Burn into mp4 ──────────────────────────────────────────────────────────
|
|
382
|
+
const subbedTmp = mp4Path.replace(/\.mp4$/i, '-subbed.mp4');
|
|
383
|
+
const noSubsBackup = mp4Path.replace(/\.mp4$/i, '-no-subs.mp4');
|
|
384
|
+
const finalOut = output ? path.resolve(output) : mp4Path;
|
|
385
|
+
|
|
386
|
+
const ratioWidth = 1080; // PlayResY hint — most card decks are 1080×1920
|
|
387
|
+
const ratioHeight = 1920;
|
|
388
|
+
const style = styleOverride || DEFAULT_STYLE;
|
|
389
|
+
const escSrt = escapeSubsPath(srtPath);
|
|
390
|
+
|
|
391
|
+
console.log(`\n Burning subs (FontSize / MarginV via ASS PlayResY=288 default)...`);
|
|
392
|
+
const vfArg = `subtitles='${escSrt}':original_size=${ratioWidth}x${ratioHeight}:force_style='${style}'`;
|
|
393
|
+
if (process.env.VOXFLOW_DEBUG_SUBS) {
|
|
394
|
+
console.error('[DEBUG vf]', JSON.stringify(vfArg));
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// The `subtitles=` filter requires libass. Many minimal ffmpeg builds
|
|
398
|
+
// (notably Homebrew's default formula on macOS) ship without it and fail
|
|
399
|
+
// at filtergraph parse time with a misleading "Error parsing filterchain"
|
|
400
|
+
// message. Probe both system ffmpeg and ffmpeg-static; pick the one that
|
|
401
|
+
// actually has the filter.
|
|
402
|
+
const { binary: ffmpegBin, source: ffmpegSource } = await probeSubtitlesCapableFfmpeg();
|
|
403
|
+
if (ffmpegSource === 'ffmpeg-static') {
|
|
404
|
+
console.log(` (system ffmpeg lacks libass — using bundled ffmpeg-static)`);
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
await runSpecificFfmpeg(ffmpegBin, [
|
|
408
|
+
'-i', mp4Path,
|
|
409
|
+
'-vf', vfArg,
|
|
410
|
+
'-c:a', 'copy',
|
|
411
|
+
'-c:v', 'libx264', '-preset', 'fast', '-crf', '20',
|
|
412
|
+
'-movflags', '+faststart',
|
|
413
|
+
'-y', subbedTmp,
|
|
414
|
+
]);
|
|
415
|
+
|
|
416
|
+
if (finalOut === mp4Path) {
|
|
417
|
+
// In-place replace: keep the original as -no-subs backup.
|
|
418
|
+
if (!fs.existsSync(noSubsBackup)) {
|
|
419
|
+
fs.renameSync(mp4Path, noSubsBackup);
|
|
420
|
+
} else {
|
|
421
|
+
// backup already exists from a previous run — just overwrite the head.
|
|
422
|
+
fs.unlinkSync(mp4Path);
|
|
423
|
+
}
|
|
424
|
+
fs.renameSync(subbedTmp, finalOut);
|
|
425
|
+
console.log(`\n=== Done ===`);
|
|
426
|
+
console.log(`Output : ${path.relative(process.cwd(), finalOut)} (original kept as ${path.basename(noSubsBackup)})`);
|
|
427
|
+
} else {
|
|
428
|
+
fs.renameSync(subbedTmp, finalOut);
|
|
429
|
+
console.log(`\n=== Done ===`);
|
|
430
|
+
console.log(`Output : ${path.relative(process.cwd(), finalOut)}`);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
return { cues: cues.length, srtPath, source, output: finalOut };
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// ── CLI handler ───────────────────────────────────────────────────────────────
|
|
437
|
+
|
|
438
|
+
async function handle(args) {
|
|
439
|
+
if (args.length === 0 || args[0] === '--help' || args[0] === '-h') {
|
|
440
|
+
console.log(`Usage: voxflow card subtitle <dir> [options]
|
|
441
|
+
|
|
442
|
+
Burn per-sentence synced subtitles into a card-render MP4. Reads
|
|
443
|
+
<dir>/timeline.json (preferred) or falls back to ffmpeg silencedetect.
|
|
444
|
+
The original mp4 is preserved as <name>-no-subs.mp4.
|
|
445
|
+
|
|
446
|
+
Options:
|
|
447
|
+
<dir> Card output directory (must contain deck.json + .mp4)
|
|
448
|
+
|
|
449
|
+
--input <path> Override source mp4 (default: the only .mp4 in <dir>
|
|
450
|
+
that doesn't end with -no-subs.mp4).
|
|
451
|
+
-o, --output <path> Write output here instead of replacing the source.
|
|
452
|
+
--style <ass> Override ASS force_style string (advanced).
|
|
453
|
+
--dry-run Write subs.srt but skip ffmpeg burn-in.
|
|
454
|
+
|
|
455
|
+
Examples:
|
|
456
|
+
voxflow card subtitle cards/why-no-savings/
|
|
457
|
+
voxflow card subtitle cards/why-no-savings/ --dry-run # inspect SRT first
|
|
458
|
+
voxflow card subtitle cards/why-no-savings/ -o subbed.mp4`);
|
|
459
|
+
return;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
const valuedFlags = new Set(['--input', '--output', '-o', '--style']);
|
|
463
|
+
let dir;
|
|
464
|
+
for (let i = 0; i < args.length; i++) {
|
|
465
|
+
if (args[i].startsWith('-')) { if (valuedFlags.has(args[i])) i++; continue; }
|
|
466
|
+
dir = args[i]; break;
|
|
467
|
+
}
|
|
468
|
+
if (!dir) { console.error('Error: provide the card output directory'); process.exit(1); }
|
|
469
|
+
|
|
470
|
+
const input = parseFlag(args, '--input');
|
|
471
|
+
const output = parseFlag(args, '--output', '-o');
|
|
472
|
+
const style = parseFlag(args, '--style');
|
|
473
|
+
const dryRun = args.includes('--dry-run');
|
|
474
|
+
|
|
475
|
+
await cardSubtitle({
|
|
476
|
+
dir: path.resolve(dir),
|
|
477
|
+
input: input ? path.resolve(input) : undefined,
|
|
478
|
+
output: output || undefined,
|
|
479
|
+
style: style || undefined,
|
|
480
|
+
dryRun,
|
|
481
|
+
});
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
module.exports = {
|
|
485
|
+
cardSubtitle,
|
|
486
|
+
splitSentences,
|
|
487
|
+
wrapText,
|
|
488
|
+
fmtTime,
|
|
489
|
+
buildCuesForCard,
|
|
490
|
+
formatSrt,
|
|
491
|
+
buildWindows,
|
|
492
|
+
escapeSubsPath,
|
|
493
|
+
probeSubtitlesCapableFfmpeg,
|
|
494
|
+
DEFAULT_STYLE,
|
|
495
|
+
MAX_LINE_CHARS,
|
|
496
|
+
handle,
|
|
497
|
+
};
|