voxflow 1.17.2 → 1.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -1
- package/lib/commands/card-render.js +42 -1
- package/lib/commands/card-subtitle.js +501 -0
- package/lib/commands/card.js +46 -25
- package/lib/core/ffmpeg.js +44 -7
- package/package.json +1 -1
- package/skills/.claude-plugin/plugin.json +1 -1
- package/skills/card/SKILL.md +24 -1
package/lib/core/ffmpeg.js
CHANGED
|
@@ -14,6 +14,43 @@ const fs = require('fs');
|
|
|
14
14
|
|
|
15
15
|
let _resolvedFfmpegPath = null;
|
|
16
16
|
|
|
17
|
+
/**
|
|
18
|
+
* Resolve the bundled `ffmpeg-static` binary path.
|
|
19
|
+
*
|
|
20
|
+
* Naively, `require('ffmpeg-static')` returns the path of its sibling `ffmpeg`
|
|
21
|
+
* binary. But ffmpeg-static's index.js uses `path.join(__dirname, ...)` to
|
|
22
|
+
* compute that path — and when we ncc-bundle this CLI into a single
|
|
23
|
+
* `dist/index.js`, `__dirname` inside the inlined ffmpeg-static module
|
|
24
|
+
* collapses to the *bundle*'s directory (`<install>/voxflow/dist/`), not its
|
|
25
|
+
* real `<install>/voxflow/node_modules/ffmpeg-static/` location. The returned
|
|
26
|
+
* string then points at a file that doesn't exist.
|
|
27
|
+
*
|
|
28
|
+
* Recovery: `require.resolve('ffmpeg-static/package.json')` honors Node's
|
|
29
|
+
* runtime module resolution (not the `__dirname` baked in at bundle time), so
|
|
30
|
+
* it finds the real package directory. The binary lives next to that
|
|
31
|
+
* package.json. This works in both source-mode (npm test) and ncc-bundled
|
|
32
|
+
* mode (the published CLI).
|
|
33
|
+
*
|
|
34
|
+
* Returns: a path string (may or may not exist; caller should fs.existsSync).
|
|
35
|
+
* Or null when ffmpeg-static is not installed at all.
|
|
36
|
+
*/
|
|
37
|
+
function resolveFfmpegStaticBin() {
|
|
38
|
+
// 1. Naive path — works in source mode and any non-ncc context.
|
|
39
|
+
let direct = null;
|
|
40
|
+
try { direct = require('ffmpeg-static'); } catch { /* not installed */ }
|
|
41
|
+
if (direct && fs.existsSync(direct)) return direct;
|
|
42
|
+
|
|
43
|
+
// 2. ncc-safe recovery via package.json resolution.
|
|
44
|
+
try {
|
|
45
|
+
const pkgJson = require.resolve('ffmpeg-static/package.json');
|
|
46
|
+
const exe = process.platform === 'win32' ? 'ffmpeg.exe' : 'ffmpeg';
|
|
47
|
+
const recovered = path.join(path.dirname(pkgJson), exe);
|
|
48
|
+
if (fs.existsSync(recovered)) return recovered;
|
|
49
|
+
} catch { /* not installed */ }
|
|
50
|
+
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
|
|
17
54
|
/**
|
|
18
55
|
* Resolve the ffmpeg binary path. Priority:
|
|
19
56
|
* 1. System `ffmpeg` on PATH
|
|
@@ -30,13 +67,11 @@ function resolveFfmpegBin() {
|
|
|
30
67
|
} catch { /* not on PATH */ }
|
|
31
68
|
|
|
32
69
|
// Fall back to ffmpeg-static
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
}
|
|
39
|
-
} catch { /* not installed */ }
|
|
70
|
+
const staticPath = resolveFfmpegStaticBin();
|
|
71
|
+
if (staticPath) {
|
|
72
|
+
_resolvedFfmpegPath = staticPath;
|
|
73
|
+
return _resolvedFfmpegPath;
|
|
74
|
+
}
|
|
40
75
|
|
|
41
76
|
// Nothing found — return 'ffmpeg' and let it fail with a helpful error
|
|
42
77
|
_resolvedFfmpegPath = 'ffmpeg';
|
|
@@ -536,4 +571,6 @@ module.exports = {
|
|
|
536
571
|
concatVideos,
|
|
537
572
|
normalizeVideo,
|
|
538
573
|
detectCjkFont,
|
|
574
|
+
// ffmpeg-static path resolution (used by card-subtitle's libass fallback)
|
|
575
|
+
resolveFfmpegStaticBin,
|
|
539
576
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "voxflow",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.18.1",
|
|
4
4
|
"description": "AI voice CLI bundled as 6 skills (hub, podcast, transcribe, video, slice, card). Synthesize speech in 200+ voices across 40+ languages, generate multi-speaker AI podcasts, transcribe audio/video with word-level timestamps, dub videos from SRT subtitles, run end-to-end video translation, turn long articles into vertical card video reels via Remotion, and turn text into polished shareable card images or narrated card videos. Backed by a hosted TTS/ASR/LLM/render service with per-user quota (free tier 10K/mo).",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "VoxFlow",
|
package/skills/card/SKILL.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: card
|
|
3
|
-
description: "Use when the user wants to turn text content into a set of polished, shareable visual CARD IMAGES or narrated card VIDEOS — knowledge cards, quote cards, 小红书图文, carousel cards, poster cards — rendered as HTML/CSS and exported via Playwright at ratios like 1:1 / 3:4 / 9:16; optionally produces a narrated MP4 video from those cards via `voxflow card render` (per-card TTS + FFmpeg static-image clips with optional subtitle bar / intro+outro cards / BGM mix). Triggers: card / 卡片 / 知识卡 / 文字卡片 / 金句卡 / 图文卡片 / 卡片生成 / make cards / card video / 卡片视频. For article → Slice-themed card VIDEO use voxflow:slice; for short videos / AI clips use voxflow:video; for podcasts use voxflow:podcast."
|
|
3
|
+
description: "Use when the user wants to turn text content into a set of polished, shareable visual CARD IMAGES or narrated card VIDEOS — knowledge cards, quote cards, 小红书图文, carousel cards, poster cards — rendered as HTML/CSS and exported via Playwright at ratios like 1:1 / 3:4 / 9:16; optionally produces a narrated MP4 video from those cards via `voxflow card render` (per-card TTS + FFmpeg static-image clips with optional subtitle bar / intro+outro cards / BGM mix), and burns per-sentence synced captions for short-form video via `voxflow card subtitle` (timeline.json-driven, char-ratio time allocation, manual CJK line-wrapping). Triggers: card / 卡片 / 知识卡 / 文字卡片 / 金句卡 / 图文卡片 / 卡片生成 / make cards / card video / 卡片视频. For article → Slice-themed card VIDEO use voxflow:slice; for short videos / AI clips use voxflow:video; for podcasts use voxflow:podcast."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# VoxFlow Skill — Card
|
|
@@ -409,6 +409,7 @@ Use `references/design-languages.md` to define the card set's visual grammar ind
|
|
|
409
409
|
├── card-01.html … card-N.html (source HTML)
|
|
410
410
|
├── deck.json (narration + metadata)
|
|
411
411
|
├── exports/card-01.png … (PNG exports)
|
|
412
|
+
├── timeline.json (per-card start/end ms — used by `card subtitle`)
|
|
412
413
|
├── sources.md (attribution)
|
|
413
414
|
└── my-topic.mp4 (final video — slug derived from deck.meta.title)
|
|
414
415
|
```
|
|
@@ -439,6 +440,28 @@ Use `references/design-languages.md` to define the card set's visual grammar ind
|
|
|
439
440
|
- **Quota**: ~50 per card narrated (`tts-synthesize`); zero with `--no-audio`. A 5-card deck costs ~250 quota total.
|
|
440
441
|
- For article-to-card VIDEO with Slice themes (paper-slide, editorial-mag, etc.), prefer `voxflow:slice` instead.
|
|
441
442
|
|
|
443
|
+
11. Burn per-sentence synced subtitles (optional — when the user wants a captioned reel for 小红书 / 抖音 / TikTok).
|
|
444
|
+
|
|
445
|
+
The `--no-subtitle` baseline is recommended for cards-as-cover short videos because the in-render subtitle bar shows the entire narration of a card for the full clip — fine for desktop preview, ineffective for short-form video. The dedicated `card subtitle` subcommand instead splits each card's narration into sentences and gives each its own time slice:
|
|
446
|
+
|
|
447
|
+
```bash
|
|
448
|
+
# 1. Render without the in-render subtitle bar and without intro/outro chrome
|
|
449
|
+
voxflow card render <output-dir>/ --no-intro --no-outro --no-subtitle
|
|
450
|
+
|
|
451
|
+
# 2. Burn synced sentence-level captions
|
|
452
|
+
voxflow card subtitle <output-dir>/
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
- The `render` step emits `timeline.json` next to `deck.json` with each card's exact `[start, end]` ms in the output mp4. `subtitle` reads it directly — no `silencedetect` guesswork.
|
|
456
|
+
- Sentences split on `[。!?!?.…—]`. Each sentence gets a time slice proportional to its character count; the last sentence absorbs any remainder so cues land exactly on the card boundary.
|
|
457
|
+
- CJK text is manually wrapped at ≤ 16 chars per visual line (with a soft preference for breaking after `[,,、;;::—]` when within the last 4 chars of a line). ffmpeg's `subtitles=` filter does not auto-wrap CJK, so this manual wrap is required.
|
|
458
|
+
- The original mp4 is preserved as `<name>-no-subs.mp4` so iteration is non-destructive.
|
|
459
|
+
- `--dry-run` writes `subs.srt` but skips the ffmpeg burn-in. Use it to inspect and hand-edit cues before committing.
|
|
460
|
+
- `--input <path>` / `-o, --output <path>` — operate on / write to a different mp4 (otherwise: replace in place).
|
|
461
|
+
- **Quota**: 0 — pure FFmpeg pipeline.
|
|
462
|
+
|
|
463
|
+
Note: `card subtitle` also has a `silencedetect` fallback for old mp4s that pre-date the `timeline.json` emission (introduced in CLI 1.18). Prefer the timeline path; it is exact rather than heuristic.
|
|
464
|
+
|
|
442
465
|
## Asset and Source Discipline
|
|
443
466
|
|
|
444
467
|
- Keep generated files contained in the requested output folder.
|