voxflow 1.17.1 → 1.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -1
- package/lib/commands/asr/index.js +1 -1
- package/lib/commands/card-render.js +44 -3
- package/lib/commands/card-subtitle.js +497 -0
- package/lib/commands/card.js +46 -25
- package/lib/commands/dub.js +1 -1
- package/lib/commands/explain.js +3 -3
- package/lib/commands/narrate.js +1 -1
- package/lib/commands/picstory.js +3 -3
- package/lib/commands/podcast/index.js +1 -1
- package/lib/commands/present.js +1 -1
- package/lib/commands/publish.js +1 -1
- package/lib/commands/slides/index.js +1 -1
- package/lib/commands/story.js +1 -1
- package/lib/commands/summarize.js +3 -3
- package/lib/commands/translate.js +1 -1
- package/lib/commands/video-translate.js +1 -1
- package/lib/commands/voices.js +2 -2
- package/package.json +1 -1
- package/skills/.claude-plugin/plugin.json +1 -1
- package/skills/card/SKILL.md +24 -1
package/lib/commands/card.js
CHANGED
|
@@ -1,55 +1,76 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* VoxFlow CLI — card command
|
|
3
3
|
*
|
|
4
|
-
* Dispatches
|
|
4
|
+
* Dispatches:
|
|
5
|
+
* - `voxflow card render <dir>` → card-render.js (deck → narrated MP4)
|
|
6
|
+
* - `voxflow card subtitle <dir>` → card-subtitle.js (MP4 → +synced subs)
|
|
7
|
+
*
|
|
5
8
|
* The card *generation* workflow lives in cli/skills/card/SKILL.md —
|
|
6
|
-
* Claude Code calls render-cards.mjs directly.
|
|
7
|
-
* post-generation video render
|
|
9
|
+
* Claude Code calls render-cards.mjs directly. These commands handle the
|
|
10
|
+
* post-generation video render + subtitle steps which need API auth /
|
|
11
|
+
* ffmpeg-static fallback / timeline.json plumbing.
|
|
8
12
|
*/
|
|
9
13
|
|
|
10
14
|
'use strict';
|
|
11
15
|
|
|
12
16
|
async function handle(args) {
|
|
13
|
-
if (args.length > 0
|
|
14
|
-
|
|
15
|
-
|
|
17
|
+
if (args.length > 0) {
|
|
18
|
+
if (args[0] === 'render') {
|
|
19
|
+
const cardRender = require('./card-render');
|
|
20
|
+
return cardRender.handle(args.slice(1));
|
|
21
|
+
}
|
|
22
|
+
if (args[0] === 'subtitle' || args[0] === 'subtitles' || args[0] === 'subs') {
|
|
23
|
+
const cardSubtitle = require('./card-subtitle');
|
|
24
|
+
return cardSubtitle.handle(args.slice(1));
|
|
25
|
+
}
|
|
16
26
|
}
|
|
17
27
|
|
|
18
28
|
// Default: usage
|
|
19
29
|
console.log(`Usage:
|
|
20
|
-
voxflow card render <dir>
|
|
30
|
+
voxflow card render <dir> Render card directory → narrated MP4 video
|
|
31
|
+
voxflow card subtitle <dir> Burn per-sentence synced subs into the rendered MP4
|
|
21
32
|
|
|
22
33
|
Subcommands:
|
|
23
34
|
render Synthesize TTS narration + render Ken Burns video from card PNGs
|
|
35
|
+
(emits timeline.json next to deck.json — used by \`subtitle\`)
|
|
36
|
+
subtitle Read deck.json + timeline.json, allocate per-sentence cues by
|
|
37
|
+
char count, and burn subtitles into the source mp4 in-place.
|
|
38
|
+
Original kept as <name>-no-subs.mp4. Aliases: subs / subtitles.
|
|
24
39
|
|
|
25
|
-
See: voxflow card render --help
|
|
40
|
+
See: voxflow card render --help
|
|
41
|
+
voxflow card subtitle --help`);
|
|
26
42
|
}
|
|
27
43
|
|
|
28
44
|
const meta = {
|
|
29
45
|
card: {
|
|
30
|
-
usage: '
|
|
31
|
-
description: 'Card video
|
|
46
|
+
usage: '<subcommand> <dir> [options]',
|
|
47
|
+
description: 'Card video pipeline: deck.json + PNGs → narrated MP4 (`render`) → synced sentence-level subtitles (`subtitle`).',
|
|
32
48
|
options: [
|
|
33
49
|
'render <dir> Render deck.json + PNGs → narrated MP4 video',
|
|
34
|
-
'
|
|
35
|
-
'--
|
|
36
|
-
'--
|
|
37
|
-
'--
|
|
38
|
-
'--
|
|
39
|
-
'--no-
|
|
40
|
-
'--no-
|
|
41
|
-
'--
|
|
42
|
-
'--
|
|
43
|
-
'--
|
|
44
|
-
'--
|
|
45
|
-
'--bgm
|
|
46
|
-
'-
|
|
50
|
+
'subtitle <dir> Burn per-sentence synced subs into the rendered MP4',
|
|
51
|
+
'--voice <id> [render] TTS voice ID (default: v-female-R2s4N9qJ)',
|
|
52
|
+
'--speed <n> [render] TTS speed, 0.5-2.0 (default: 1.0)',
|
|
53
|
+
'--no-audio [render] Silent video — skip TTS synthesis',
|
|
54
|
+
'--pause <sec> [render] Silence after narration (default: 2.5)',
|
|
55
|
+
'--hold <sec> [render] Card duration in --no-audio mode (default: 5)',
|
|
56
|
+
'--no-intro [render] Skip intro title card',
|
|
57
|
+
'--no-outro [render] Skip outro branding card',
|
|
58
|
+
'--intro-dur <sec> [render] Intro duration (default: 2.5)',
|
|
59
|
+
'--outro-dur <sec> [render] Outro duration (default: 2)',
|
|
60
|
+
'--no-subtitle [render] Disable in-render subtitle bar',
|
|
61
|
+
'--bgm <path> [render] Background music (loops at low volume)',
|
|
62
|
+
'--bgm-volume <n> [render] BGM volume, 0-1 (default: 0.08)',
|
|
63
|
+
'--input <path> [subtitle] Override source mp4',
|
|
64
|
+
'--style <ass> [subtitle] ASS force_style override (advanced)',
|
|
65
|
+
'--dry-run [subtitle] Write subs.srt but skip ffmpeg burn-in',
|
|
66
|
+
'-o, --output <path> Output MP4 path (default: <dir>/<title>.mp4 or in-place)',
|
|
47
67
|
],
|
|
48
68
|
examples: [
|
|
49
69
|
'voxflow card render cards/fermentation/',
|
|
50
|
-
'voxflow card render cards/fermentation/ --
|
|
70
|
+
'voxflow card render cards/fermentation/ --no-intro --no-outro --no-subtitle',
|
|
71
|
+
'voxflow card subtitle cards/fermentation/',
|
|
72
|
+
'voxflow card subtitle cards/fermentation/ --dry-run',
|
|
51
73
|
'voxflow card render cards/fermentation/ --bgm ~/music/ambient.mp3',
|
|
52
|
-
'voxflow card render cards/fermentation/ --no-audio --no-subtitle',
|
|
53
74
|
],
|
|
54
75
|
},
|
|
55
76
|
};
|
package/lib/commands/dub.js
CHANGED
|
@@ -527,7 +527,7 @@ const meta = {
|
|
|
527
527
|
`--bgm <file> Background music file to mix in`,
|
|
528
528
|
`--ducking <n> BGM volume ducking 0-1.0 (default: ${DUB_DEFAULTS.ducking})`,
|
|
529
529
|
`--patch <id> Re-synthesize a single caption by ID (patch mode)`,
|
|
530
|
-
|
|
530
|
+
`-o, --output <path> Output file path (default: ./dub-<timestamp>.wav)`,
|
|
531
531
|
],
|
|
532
532
|
examples: [
|
|
533
533
|
'voxflow dub --srt subtitles.srt',
|
package/lib/commands/explain.js
CHANGED
|
@@ -476,7 +476,7 @@ async function handle(args) {
|
|
|
476
476
|
topic: parseFlag(args, '--topic') || undefined,
|
|
477
477
|
voice: parseFlag(args, '--voice') || undefined,
|
|
478
478
|
style: style || undefined,
|
|
479
|
-
language: parseFlag(args, '--language') || undefined,
|
|
479
|
+
language: parseFlag(args, '--language', '--lang') || undefined,
|
|
480
480
|
output, speed, scenes,
|
|
481
481
|
audioOnly: parseBoolFlag(args, '--audio-only'),
|
|
482
482
|
cloud: parseBoolFlag(args, '--cloud'),
|
|
@@ -492,13 +492,13 @@ const meta = {
|
|
|
492
492
|
options: [
|
|
493
493
|
`--topic <text> Topic to explain (use "demo" for built-in demo)`,
|
|
494
494
|
`--style <style> Visual style: modern (default), playful, corporate, chalkboard`,
|
|
495
|
-
`--language <code> Script language: en (default), zh, ja, ko, etc
|
|
495
|
+
`--language <code> Script language: en (default), zh, ja, ko, etc. (alias: --lang)`,
|
|
496
496
|
`--voice <id> TTS voice ID (default: ${EXPLAIN_DEFAULTS.voice})`,
|
|
497
497
|
`--speed <n> TTS speed 0.5-2.0 (default: ${EXPLAIN_DEFAULTS.speed})`,
|
|
498
498
|
`--scenes <n> Number of scenes, 3-12 (default: ${EXPLAIN_DEFAULTS.sceneCount})`,
|
|
499
499
|
`--audio-only Skip video render, output WAV narration only`,
|
|
500
500
|
`--cloud Render on cloud instead of local Remotion`,
|
|
501
|
-
|
|
501
|
+
`-o, --output <path> Output file path (default: ./explain-<timestamp>.mp4)`,
|
|
502
502
|
],
|
|
503
503
|
examples: [
|
|
504
504
|
'voxflow explain --topic "What is React?"',
|
package/lib/commands/narrate.js
CHANGED
|
@@ -612,7 +612,7 @@ const meta = {
|
|
|
612
612
|
`--format <fmt> Output format: pcm, wav, mp3 (default: pcm → WAV)`,
|
|
613
613
|
`--speed <n> TTS speed 0.5-2.0 (default: ${NARRATE_DEFAULTS.speed})`,
|
|
614
614
|
`--silence <sec> Silence between segments, 0-5.0 (default: ${NARRATE_DEFAULTS.silence})`,
|
|
615
|
-
|
|
615
|
+
`-o, --output <path> Output file path (default: matches input basename, e.g. design.md → design.wav)`,
|
|
616
616
|
],
|
|
617
617
|
examples: [
|
|
618
618
|
'voxflow narrate --input article.txt --voice v-female-R2s4N9qJ',
|
package/lib/commands/picstory.js
CHANGED
|
@@ -487,7 +487,7 @@ async function handle(args) {
|
|
|
487
487
|
outputDir: outputDir || undefined,
|
|
488
488
|
style: style || undefined,
|
|
489
489
|
ratio: ratio || undefined,
|
|
490
|
-
language: parseFlag(args, '--language') || undefined,
|
|
490
|
+
language: parseFlag(args, '--language', '--lang') || undefined,
|
|
491
491
|
sceneCount: scenes,
|
|
492
492
|
quality: quality || undefined,
|
|
493
493
|
fadeSeconds: fadeSeconds !== undefined ? fadeSeconds : undefined,
|
|
@@ -511,7 +511,7 @@ const meta = {
|
|
|
511
511
|
`--text <content> Input text content to visualize`,
|
|
512
512
|
`--style <name> Visual style: sketchnote (default), neon_noir, minimal_3d, chalkboard, photo, manga_panel, vintage_newspaper`,
|
|
513
513
|
`--ratio <name> Aspect ratio: portrait (default, 9:16), landscape (16:9), square (1:1)`,
|
|
514
|
-
`--language <code> Script language: zh (default), en, ja, etc
|
|
514
|
+
`--language <code> Script language: zh (default), en, ja, etc. (alias: --lang)`,
|
|
515
515
|
`--scenes <n> Number of scenes, 2-10 (default: ${PICSTORY_DEFAULTS.sceneCount})`,
|
|
516
516
|
`--quality <tier> Image quality: fast (default), hd, ultra (gpt-5.4-image-2, best quality, ~16× cost), hd-aiberm / fast-aiberm (Aiberm Gemini — strongest Chinese text rendering)`,
|
|
517
517
|
`--voice <id> TTS voice ID`,
|
|
@@ -522,7 +522,7 @@ const meta = {
|
|
|
522
522
|
`--fade <n> Scene fade-in/out duration in seconds (default: ${PICSTORY_DEFAULTS.fadeSeconds}, set 0 to disable)`,
|
|
523
523
|
`--image-only Save images+audio without rendering video`,
|
|
524
524
|
`--output-dir <dir> Directory for all output files (auto-created if needed)`,
|
|
525
|
-
|
|
525
|
+
`-o, --output <path> Output file path (overrides --output-dir)`,
|
|
526
526
|
],
|
|
527
527
|
examples: [
|
|
528
528
|
'voxflow picstory --topic "AI Agent 入门" --style sketchnote',
|
|
@@ -534,7 +534,7 @@ const meta = {
|
|
|
534
534
|
`--voice <id> Override TTS voice for all speakers`,
|
|
535
535
|
`--bgm <file> Background music file to mix in`,
|
|
536
536
|
`--ducking <n> BGM volume ducking 0-1.0 (default: ${PODCAST_DEFAULTS.ducking})`,
|
|
537
|
-
|
|
537
|
+
`-o, --output <path> Output WAV path (default: ./podcast-<timestamp>.wav)`,
|
|
538
538
|
`--speed <n> TTS speed 0.5-2.0 (default: ${PODCAST_DEFAULTS.speed})`,
|
|
539
539
|
`--silence <sec> Uniform silence override between segments, 0-5.0 (legacy)`,
|
|
540
540
|
`--pace <preset> Pacing preset: tight | natural | relaxed (default: natural).`,
|
package/lib/commands/present.js
CHANGED
|
@@ -495,7 +495,7 @@ const meta = {
|
|
|
495
495
|
`--speed <n> TTS speed 0.5-2.0 (default: ${PRESENT_DEFAULTS.speed})`,
|
|
496
496
|
`--no-audio Skip TTS, render silent video only`,
|
|
497
497
|
`--web-search Search the web for up-to-date info on the topic`,
|
|
498
|
-
|
|
498
|
+
`-o, --output <path> Output file path (default: ./present-<timestamp>.mp4)`,
|
|
499
499
|
],
|
|
500
500
|
examples: [
|
|
501
501
|
'voxflow present --text "Claude Code 是一个 AI 编程工具" --style aurora',
|
package/lib/commands/publish.js
CHANGED
|
@@ -395,7 +395,7 @@ const meta = {
|
|
|
395
395
|
'--audio <file> Mode C: merge existing audio into video',
|
|
396
396
|
'--voice <id> TTS voice for Mode A/B',
|
|
397
397
|
'--voices <file> Multi-speaker voice map for Mode A/B',
|
|
398
|
-
'--output <path>
|
|
398
|
+
'-o, --output <path> Final MP4 output path',
|
|
399
399
|
'--publish <target> local (default) | webhook | none',
|
|
400
400
|
'--publish-dir <dir> Local publish directory (for --publish local)',
|
|
401
401
|
'--publish-webhook <url> Webhook URL (for --publish webhook)',
|
|
@@ -331,7 +331,7 @@ const meta = {
|
|
|
331
331
|
`--template <name> Template: product, report, tutorial, pitch, free (default: ${SLIDES_DEFAULTS.template})`,
|
|
332
332
|
`--model <id> Model: swift, balanced, pro, creative (default: ${SLIDES_DEFAULTS.model})`,
|
|
333
333
|
`--no-audio Skip TTS synthesis, generate slides only`,
|
|
334
|
-
|
|
334
|
+
`-o, --output <path> Output HTML file (default: ./slides-<timestamp>.html)`,
|
|
335
335
|
],
|
|
336
336
|
examples: [
|
|
337
337
|
'voxflow slides "AI in Healthcare"',
|
package/lib/commands/story.js
CHANGED
|
@@ -285,7 +285,7 @@ const meta = {
|
|
|
285
285
|
options: [
|
|
286
286
|
`--topic <text> Story topic (default: children's story)`,
|
|
287
287
|
`--voice <id> TTS voice ID (default: ${STORY_DEFAULTS.voice})`,
|
|
288
|
-
|
|
288
|
+
`-o, --output <path> Output WAV path (default: ./story-<timestamp>.wav)`,
|
|
289
289
|
`--paragraphs <n> Paragraph count, 1-20 (default: ${STORY_DEFAULTS.paragraphs})`,
|
|
290
290
|
`--speed <n> TTS speed 0.5-2.0 (default: ${STORY_DEFAULTS.speed})`,
|
|
291
291
|
`--silence <sec> Silence between paragraphs, 0-5.0 (default: ${STORY_DEFAULTS.silence})`,
|
|
@@ -462,7 +462,7 @@ async function handle(args) {
|
|
|
462
462
|
const voice = parseFlag(args, '--voice') || SUM_DEFS.voice;
|
|
463
463
|
const speed = parseFloatFlag(args, '--speed') ?? SUM_DEFS.speed;
|
|
464
464
|
const slideCount = parseIntFlag(args, '--slides') ?? SUM_DEFS.slides;
|
|
465
|
-
const language = parseFlag(args, '--lang') || SUM_DEFS.language;
|
|
465
|
+
const language = parseFlag(args, '--lang', '--language') || SUM_DEFS.language;
|
|
466
466
|
const engine = parseFlag(args, '--engine') || SUM_DEFS.engine;
|
|
467
467
|
const model = parseFlag(args, '--model');
|
|
468
468
|
const tts = parseBoolFlag(args, '--tts');
|
|
@@ -509,7 +509,7 @@ const meta = {
|
|
|
509
509
|
`--input <file> Input video/audio file → ASR + summarize`,
|
|
510
510
|
`--text <text> Direct text input (skip ASR)`,
|
|
511
511
|
`--slides <n> Number of slides, 4-12 (default: ${SUMMARIZE_DEFAULTS.slides})`,
|
|
512
|
-
`--lang <code> Output language: en, zh, ja, etc. (default: ${SUMMARIZE_DEFAULTS.language})`,
|
|
512
|
+
`--lang <code> Output language: en, zh, ja, etc. (default: ${SUMMARIZE_DEFAULTS.language}) (alias: --language)`,
|
|
513
513
|
`--engine <engine> ASR engine: auto, local, cloud (default: ${SUMMARIZE_DEFAULTS.engine})`,
|
|
514
514
|
`--model <model> Whisper model for local ASR: tiny, base, small, medium, large`,
|
|
515
515
|
`--tts Generate TTS narration audio for each slide`,
|
|
@@ -517,7 +517,7 @@ const meta = {
|
|
|
517
517
|
`--scheme <name> Video visual scheme: noir, neon, editorial, aurora (default), brutalist`,
|
|
518
518
|
`--voice <id> TTS voice ID (default: ${SUMMARIZE_DEFAULTS.voice})`,
|
|
519
519
|
`--speed <n> TTS speed 0.5-2.0 (default: ${SUMMARIZE_DEFAULTS.speed})`,
|
|
520
|
-
|
|
520
|
+
`-o, --output <path> Output PPTX path (default: <input>-summary.pptx)`,
|
|
521
521
|
],
|
|
522
522
|
examples: [
|
|
523
523
|
'voxflow summarize --input lecture.mp4',
|
|
@@ -566,7 +566,7 @@ const meta = {
|
|
|
566
566
|
`--input <file> Text file (.txt, .md) to translate`,
|
|
567
567
|
`--from <lang> Source language code (default: auto-detect)`,
|
|
568
568
|
`--to <lang> Target language code (required)`,
|
|
569
|
-
|
|
569
|
+
`-o, --output <path> Output file path (default: <input>-<lang>.<ext>)`,
|
|
570
570
|
`--realign Adjust subtitle timing for target language length`,
|
|
571
571
|
`--batch-size <n> Captions per LLM call, 1-20 (default: ${TRANSLATE_DEFAULTS.batchSize})`,
|
|
572
572
|
],
|
|
@@ -559,7 +559,7 @@ const meta = {
|
|
|
559
559
|
`--speed <n> TTS speed 0.5-2.0 (default: ${require('../core/config').VIDEO_TRANSLATE_DEFAULTS.speed})`,
|
|
560
560
|
`--batch-size <n> Translation batch size, 1-20 (default: ${require('../core/config').VIDEO_TRANSLATE_DEFAULTS.batchSize})`,
|
|
561
561
|
`--keep-intermediates Keep intermediate files (SRT, audio) for debugging`,
|
|
562
|
-
|
|
562
|
+
`-o, --output <path> Output MP4 path (default: <input>-<lang>.mp4)`,
|
|
563
563
|
`--asr-mode <mode> Override ASR mode: auto, sentence, flash, file`,
|
|
564
564
|
`--asr-lang <engine> Override ASR engine: 16k_zh, 16k_en, 16k_ja, 16k_ko, etc.`,
|
|
565
565
|
`--engine <engine> ASR engine: auto, local, cloud (default: auto)`,
|
package/lib/commands/voices.js
CHANGED
|
@@ -250,7 +250,7 @@ async function handle(args) {
|
|
|
250
250
|
api,
|
|
251
251
|
search: parseFlag(args, '--search'),
|
|
252
252
|
gender: parseFlag(args, '--gender'),
|
|
253
|
-
language: parseFlag(args, '--language'),
|
|
253
|
+
language: parseFlag(args, '--language', '--lang'),
|
|
254
254
|
useCase: parseFlag(args, '--use-case'),
|
|
255
255
|
json: parseBoolFlag(args, '--json'),
|
|
256
256
|
extended: parseBoolFlag(args, '--extended'),
|
|
@@ -275,7 +275,7 @@ const meta = {
|
|
|
275
275
|
`--mine List your cloned voices (requires login)`,
|
|
276
276
|
`--search <query> Search by name, tone, style, description`,
|
|
277
277
|
`--gender <m|f> Filter by gender: male/m or female/f`,
|
|
278
|
-
`--language <code> Filter by language: zh, en, etc
|
|
278
|
+
`--language <code> Filter by language: zh, en, etc. (alias: --lang)`,
|
|
279
279
|
`--use-case <tag> Filter by editorial-curated use case (e.g. podcast)`,
|
|
280
280
|
`--extended Include extended voice library (380+ voices)`,
|
|
281
281
|
`--json Output raw JSON instead of table`,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "voxflow",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.18.0",
|
|
4
4
|
"description": "AI voice CLI bundled as 6 skills (hub, podcast, transcribe, video, slice, card). Synthesize speech in 200+ voices across 40+ languages, generate multi-speaker AI podcasts, transcribe audio/video with word-level timestamps, dub videos from SRT subtitles, run end-to-end video translation, turn long articles into vertical card video reels via Remotion, and turn text into polished shareable card images or narrated card videos. Backed by a hosted TTS/ASR/LLM/render service with per-user quota (free tier 10K/mo).",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "VoxFlow",
|
package/skills/card/SKILL.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: card
|
|
3
|
-
description: "Use when the user wants to turn text content into a set of polished, shareable visual CARD IMAGES or narrated card VIDEOS — knowledge cards, quote cards, 小红书图文, carousel cards, poster cards — rendered as HTML/CSS and exported via Playwright at ratios like 1:1 / 3:4 / 9:16; optionally produces a narrated MP4 video from those cards via `voxflow card render` (per-card TTS + FFmpeg static-image clips with optional subtitle bar / intro+outro cards / BGM mix). Triggers: card / 卡片 / 知识卡 / 文字卡片 / 金句卡 / 图文卡片 / 卡片生成 / make cards / card video / 卡片视频. For article → Slice-themed card VIDEO use voxflow:slice; for short videos / AI clips use voxflow:video; for podcasts use voxflow:podcast."
|
|
3
|
+
description: "Use when the user wants to turn text content into a set of polished, shareable visual CARD IMAGES or narrated card VIDEOS — knowledge cards, quote cards, 小红书图文, carousel cards, poster cards — rendered as HTML/CSS and exported via Playwright at ratios like 1:1 / 3:4 / 9:16; optionally produces a narrated MP4 video from those cards via `voxflow card render` (per-card TTS + FFmpeg static-image clips with optional subtitle bar / intro+outro cards / BGM mix), and burns per-sentence synced captions for short-form video via `voxflow card subtitle` (timeline.json-driven, char-ratio time allocation, manual CJK line-wrapping). Triggers: card / 卡片 / 知识卡 / 文字卡片 / 金句卡 / 图文卡片 / 卡片生成 / make cards / card video / 卡片视频. For article → Slice-themed card VIDEO use voxflow:slice; for short videos / AI clips use voxflow:video; for podcasts use voxflow:podcast."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# VoxFlow Skill — Card
|
|
@@ -409,6 +409,7 @@ Use `references/design-languages.md` to define the card set's visual grammar ind
|
|
|
409
409
|
├── card-01.html … card-N.html (source HTML)
|
|
410
410
|
├── deck.json (narration + metadata)
|
|
411
411
|
├── exports/card-01.png … (PNG exports)
|
|
412
|
+
├── timeline.json (per-card start/end ms — used by `card subtitle`)
|
|
412
413
|
├── sources.md (attribution)
|
|
413
414
|
└── my-topic.mp4 (final video — slug derived from deck.meta.title)
|
|
414
415
|
```
|
|
@@ -439,6 +440,28 @@ Use `references/design-languages.md` to define the card set's visual grammar ind
|
|
|
439
440
|
- **Quota**: ~50 per card narrated (`tts-synthesize`); zero with `--no-audio`. A 5-card deck costs ~250 quota total.
|
|
440
441
|
- For article-to-card VIDEO with Slice themes (paper-slide, editorial-mag, etc.), prefer `voxflow:slice` instead.
|
|
441
442
|
|
|
443
|
+
11. Burn per-sentence synced subtitles (optional — when the user wants a captioned reel for 小红书 / 抖音 / TikTok).
|
|
444
|
+
|
|
445
|
+
The `--no-subtitle` baseline is recommended for cards-as-cover short videos because the in-render subtitle bar shows the entire narration of a card for the full clip — fine for desktop preview, ineffective for short-form video. The dedicated `card subtitle` subcommand instead splits each card's narration into sentences and gives each its own time slice:
|
|
446
|
+
|
|
447
|
+
```bash
|
|
448
|
+
# 1. Render without the in-render subtitle bar and without intro/outro chrome
|
|
449
|
+
voxflow card render <output-dir>/ --no-intro --no-outro --no-subtitle
|
|
450
|
+
|
|
451
|
+
# 2. Burn synced sentence-level captions
|
|
452
|
+
voxflow card subtitle <output-dir>/
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
- The `render` step emits `timeline.json` next to `deck.json` with each card's exact `[start, end]` ms in the output mp4. `subtitle` reads it directly — no `silencedetect` guesswork.
|
|
456
|
+
- Sentences split on `[。!?!?.…—]`. Each sentence gets a time slice proportional to its character count; the last sentence absorbs any remainder so cues land exactly on the card boundary.
|
|
457
|
+
- CJK text is manually wrapped at ≤ 16 chars per visual line (with a soft preference for breaking after `[,,、;;::—]` when within the last 4 chars of a line). ffmpeg's `subtitles=` filter does not auto-wrap CJK, so this manual wrap is required.
|
|
458
|
+
- The original mp4 is preserved as `<name>-no-subs.mp4` so iteration is non-destructive.
|
|
459
|
+
- `--dry-run` writes `subs.srt` but skips the ffmpeg burn-in. Use it to inspect and hand-edit cues before committing.
|
|
460
|
+
- `--input <path>` / `-o, --output <path>` — operate on / write to a different mp4 (otherwise: replace in place).
|
|
461
|
+
- **Quota**: 0 — pure FFmpeg pipeline.
|
|
462
|
+
|
|
463
|
+
Note: `card subtitle` also has a `silencedetect` fallback for old mp4s that pre-date the `timeline.json` emission (introduced in CLI 1.18). Prefer the timeline path; it is exact rather than heuristic.
|
|
464
|
+
|
|
442
465
|
## Asset and Source Discipline
|
|
443
466
|
|
|
444
467
|
- Keep generated files contained in the requested output folder.
|