voxflow 1.17.0 → 1.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -118,12 +118,99 @@ function escapeDrawtext(text) {
118
118
  .replace(/\n/g, ' ');
119
119
  }
120
120
 
121
+ /**
122
+ * Detect whether a string contains CJK characters that need a CJK fontfile.
123
+ * Covers Han (CJK Unified), Hiragana, Katakana, Hangul, and full-width punctuation.
124
+ */
125
+ function containsCjk(text) {
126
+ if (!text) return false;
127
+ // U+3001–303F CJK symbols & punctuation (skip U+3000 IDEOGRAPHIC SPACE — eslint flags it)
128
+ // U+3040–30FF Hiragana + Katakana
129
+ // U+3400–9FFF CJK Ext A + CJK Unified
130
+ // U+AC00–D7AF Hangul; U+FF00–FFEF Halfwidth/Fullwidth forms
131
+ return /[、-ヿ㐀-鿿가-힯＀-￯]/.test(text);
132
+ }
133
+
134
+ /**
135
+ * Locate a fontfile that supports CJK glyphs on the host platform.
136
+ *
137
+ * ffmpeg's `drawtext` filter, when no `fontfile=` is given, falls back to a
138
+ * built-in default that ships only Latin-1. CJK content rendered without an
139
+ * explicit CJK fontfile shows as `□` tofu boxes (issue #3592).
140
+ *
141
+ * Returns an absolute path to a known CJK-capable font, or null if none of
142
+ * the platform-specific candidates exist. Cached for the process lifetime.
143
+ * Override the search via `VOXFLOW_CJK_FONT=/path/to/font.ttc`.
144
+ *
145
+ * @returns {string|null}
146
+ */
147
+ let _cjkFontPathCache; // undefined = unknown, null = absent, string = found
148
+ function findCjkFontFile() {
149
+ if (_cjkFontPathCache !== undefined) return _cjkFontPathCache;
150
+
151
+ // User override wins over platform autodetect
152
+ if (process.env.VOXFLOW_CJK_FONT && fs.existsSync(process.env.VOXFLOW_CJK_FONT)) {
153
+ _cjkFontPathCache = process.env.VOXFLOW_CJK_FONT;
154
+ return _cjkFontPathCache;
155
+ }
156
+
157
+ const candidates = [];
158
+ if (process.platform === 'darwin') {
159
+ candidates.push(
160
+ '/System/Library/Fonts/PingFang.ttc',
161
+ '/System/Library/Fonts/Hiragino Sans GB.ttc',
162
+ '/System/Library/Fonts/STHeiti Medium.ttc',
163
+ '/System/Library/Fonts/STHeiti Light.ttc',
164
+ '/Library/Fonts/Songti.ttc',
165
+ );
166
+ } else if (process.platform === 'linux') {
167
+ candidates.push(
168
+ '/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc',
169
+ '/usr/share/fonts/opentype/noto/NotoSansCJK.ttc',
170
+ '/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc',
171
+ '/usr/share/fonts/wqy-microhei/wqy-microhei.ttc',
172
+ '/usr/share/fonts/wqy-zenhei/wqy-zenhei.ttc',
173
+ '/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc',
174
+ );
175
+ } else if (process.platform === 'win32') {
176
+ candidates.push(
177
+ 'C:/Windows/Fonts/msyh.ttc',
178
+ 'C:/Windows/Fonts/msyhbd.ttc',
179
+ 'C:/Windows/Fonts/simsun.ttc',
180
+ 'C:/Windows/Fonts/yugothic.ttf',
181
+ );
182
+ }
183
+
184
+ for (const p of candidates) {
185
+ if (fs.existsSync(p)) {
186
+ _cjkFontPathCache = p;
187
+ return _cjkFontPathCache;
188
+ }
189
+ }
190
+ _cjkFontPathCache = null;
191
+ return null;
192
+ }
193
+
194
+ /**
195
+ * Build the drawtext fontfile= clause when the text contains CJK and a
196
+ * suitable font is available on the host. Returns either ":fontfile=…" (with
197
+ * leading colon, ready to splice into a drawtext arg list) or "" when no
198
+ * font override is needed (ASCII-only text or no CJK font on host).
199
+ */
200
+ function drawtextFontfileClause(text, cjkFontPath) {
201
+ if (!text || !cjkFontPath) return '';
202
+ if (!containsCjk(text)) return '';
203
+ // ffmpeg fontfile= path needs `:` and `\` escaped inside a filter arg.
204
+ const escaped = cjkFontPath.replace(/\\/g, '/').replace(/:/g, '\\:');
205
+ return `:fontfile='${escaped}'`;
206
+ }
207
+
121
208
  // ── Render functions ──────────────────────────────────────────────────────────
122
209
 
123
210
  /**
124
211
  * Render a single card: PNG + optional WAV → MP4 clip with subtitle overlay.
125
212
  */
126
- async function renderCardClip({ pngPath, wavPath, outPath, durationMs, ratio, subtitle, hasDrawtext = false }) {
213
+ async function renderCardClip({ pngPath, wavPath, outPath, durationMs, ratio, subtitle, hasDrawtext = false, cjkFontPath = null }) {
127
214
  const { w, h } = RATIO_DIMS[ratio] || RATIO_DIMS['9:16'];
128
215
  const durationSec = Math.max(3, durationMs / 1000);
129
216
 
@@ -139,8 +226,9 @@ async function renderCardClip({ pngPath, wavPath, outPath, durationMs, ratio, su
139
226
  const escaped = escapeDrawtext(subtitle);
140
227
  const fontSize = Math.round(SUB_FONT_SIZE * (w / 1080));
141
228
  const boxY = h - SUB_MARGIN_BOTTOM - fontSize - SUB_PADDING * 2;
229
+ const fontfile = drawtextFontfileClause(subtitle, cjkFontPath);
142
230
  vfParts.push(
143
- `drawtext=text='${escaped}':fontsize=${fontSize}:fontcolor=white:` +
231
+ `drawtext=text='${escaped}'${fontfile}:fontsize=${fontSize}:fontcolor=white:` +
144
232
  `x=(w-text_w)/2:y=${boxY + SUB_PADDING}:` +
145
233
  `box=1:boxcolor=black@${SUB_BOX_OPACITY}:boxborderw=${SUB_PADDING}`,
146
234
  );
@@ -177,7 +265,7 @@ async function renderCardClip({ pngPath, wavPath, outPath, durationMs, ratio, su
177
265
  * Generate an intro or outro card via FFmpeg color source (simple solid + no text).
178
266
  * Text overlay requires drawtext (libfreetype); if unavailable, renders a plain color card.
179
267
  */
180
- async function renderTitleCard({ outPath, ratio, title, subtitle, durationSec = 3, bgColor = '1a1520', textColor = 'f4efe6', fadeSeconds = 0.4, isFirst = false, isLast = false, hasDrawtext = false }) {
268
+ async function renderTitleCard({ outPath, ratio, title, subtitle, durationSec = 3, bgColor = '1a1520', textColor = 'f4efe6', fadeSeconds = 0.4, isFirst = false, isLast = false, hasDrawtext = false, cjkFontPath = null }) {
181
269
  const { w, h } = RATIO_DIMS[ratio] || RATIO_DIMS['9:16'];
182
270
  const fd = fadeSeconds;
183
271
 
@@ -185,19 +273,21 @@ async function renderTitleCard({ outPath, ratio, title, subtitle, durationSec =
185
273
  const subSize = Math.round(32 * (w / 1080));
186
274
  const escapedTitle = escapeDrawtext(title || '');
187
275
  const escapedSub = escapeDrawtext(subtitle || '');
276
+ const titleFontfile = drawtextFontfileClause(title, cjkFontPath);
277
+ const subFontfile = drawtextFontfileClause(subtitle, cjkFontPath);
188
278
 
189
279
  const vfParts = [`color=c=0x${bgColor}:s=${w}x${h}:d=${durationSec}:r=30`];
190
280
 
191
281
  if (hasDrawtext) {
192
282
  if (escapedTitle) {
193
283
  vfParts.push(
194
- `drawtext=text='${escapedTitle}':fontsize=${titleSize}:fontcolor=0x${textColor}:` +
284
+ `drawtext=text='${escapedTitle}'${titleFontfile}:fontsize=${titleSize}:fontcolor=0x${textColor}:` +
195
285
  `x=(w-text_w)/2:y=(h-text_h)/2-${Math.round(subSize * 1.5)}`,
196
286
  );
197
287
  }
198
288
  if (escapedSub) {
199
289
  vfParts.push(
200
- `drawtext=text='${escapedSub}':fontsize=${subSize}:fontcolor=0x${textColor}@0.6:` +
290
+ `drawtext=text='${escapedSub}'${subFontfile}:fontsize=${subSize}:fontcolor=0x${textColor}@0.6:` +
201
291
  `x=(w-text_w)/2:y=(h-text_h)/2+${Math.round(titleSize * 0.8)}`,
202
292
  );
203
293
  }
@@ -283,6 +373,7 @@ async function cardRender(opts) {
283
373
 
284
374
  // Check drawtext filter availability (needs libfreetype)
285
375
  let hasDrawtext = false;
376
+ let cjkFontPath = null;
286
377
  if (!noSubtitle) {
287
378
  try {
288
379
  const { stdout } = await runCommand('ffmpeg', ['-hide_banner', '-filters']);
@@ -290,6 +381,24 @@ async function cardRender(opts) {
290
381
  } catch { /* unavailable */ }
291
382
  if (!hasDrawtext) {
292
383
  console.log(` (drawtext unavailable — subtitles disabled)`);
384
+ } else {
385
+ // Detect CJK content in titles/narrations and locate a CJK fontfile if needed.
386
+ // ffmpeg's default drawtext font is Latin-1 only; without an explicit fontfile
387
+ // CJK characters render as `□` tofu boxes (#3592).
388
+ const allText = [
389
+ deck.meta?.title || '',
390
+ ...cards.flatMap((c) => [c.title || '', c.narration || '']),
391
+ ].join('\n');
392
+ if (containsCjk(allText)) {
393
+ cjkFontPath = findCjkFontFile();
394
+ if (cjkFontPath) {
395
+ console.log(` (CJK detected — using ${path.basename(cjkFontPath)} for overlay text)`);
396
+ } else {
397
+ console.log(` (CJK detected but no CJK font found — overlay text will show as □.`);
398
+ console.log(` Install Noto Sans CJK or set VOXFLOW_CJK_FONT=/path/to/font.ttc,`);
399
+ console.log(` or rerun with --no-subtitle --no-intro --no-outro.)`);
400
+ }
401
+ }
293
402
  }
294
403
  }
295
404
 
@@ -313,7 +422,7 @@ async function cardRender(opts) {
313
422
  outPath: introPath, ratio, title,
314
423
  subtitle: deck.meta?.language === 'zh' ? '知识卡片' : 'Card Series',
315
424
  durationSec: introDuration, fadeSeconds: 0,
316
- isFirst: true, isLast: false, hasDrawtext,
425
+ isFirst: true, isLast: false, hasDrawtext, cjkFontPath,
317
426
  });
318
427
  clipPaths.push(introPath);
319
428
  }
@@ -354,7 +463,7 @@ async function cardRender(opts) {
354
463
  console.log(` Rendering card ${i + 1}/${cards.length}...`);
355
464
  await renderCardClip({
356
465
  pngPath, wavPath, outPath: clipOut,
357
- durationMs, ratio, hasDrawtext,
466
+ durationMs, ratio, hasDrawtext, cjkFontPath,
358
467
  subtitle: noSubtitle ? null : (card.narration || card.title || null),
359
468
  });
360
469
  clipPaths.push(clipOut);
@@ -370,7 +479,7 @@ async function cardRender(opts) {
370
479
  subtitle: 'voxflow.studio',
371
480
  durationSec: outroDuration, fadeSeconds: 0,
372
481
  bgColor: '0d0b14',
373
- isFirst: false, isLast: true, hasDrawtext,
482
+ isFirst: false, isLast: true, hasDrawtext, cjkFontPath,
374
483
  });
375
484
  clipPaths.push(outroPath);
376
485
  }
@@ -516,5 +625,8 @@ module.exports = {
516
625
  renderTitleCard,
517
626
  escapeDrawtext,
518
627
  writePcmAsWav,
628
+ containsCjk,
629
+ findCjkFontFile,
630
+ drawtextFontfileClause,
519
631
  handle,
520
632
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voxflow",
3
- "version": "1.17.0",
3
+ "version": "1.17.1",
4
4
  "description": "AI audio content creation CLI — stories, podcasts, narration, dubbing, transcription, translation, and video translation with TTS",
5
5
  "bin": {
6
6
  "voxflow": "./dist/index.js"
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voxflow",
3
- "version": "1.17.0",
3
+ "version": "1.17.1",
4
4
  "description": "AI voice CLI bundled as 6 skills (hub, podcast, transcribe, video, slice, card). Synthesize speech in 200+ voices across 40+ languages, generate multi-speaker AI podcasts, transcribe audio/video with word-level timestamps, dub videos from SRT subtitles, run end-to-end video translation, turn long articles into vertical card video reels via Remotion, and turn text into polished shareable card images or narrated card videos. Backed by a hosted TTS/ASR/LLM/render service with per-user quota (free tier 10K/mo).",
5
5
  "author": {
6
6
  "name": "VoxFlow",
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: card
3
- description: "Use when the user wants to turn text content into a set of polished, shareable visual CARD IMAGES or narrated card VIDEOS — knowledge cards, quote cards, 小红书图文, carousel cards, poster cards — rendered as HTML/CSS and exported via Playwright at ratios like 1:1 / 3:4 / 9:16; optionally produces narrated MP4 video from those cards via `voxflow card render` (Ken Burns + TTS). Triggers: card / 卡片 / 知识卡 / 文字卡片 / 金句卡 / 图文卡片 / 卡片生成 / make cards / card video / 卡片视频. For article → Slice-themed card VIDEO use voxflow:slice; for short videos / AI clips use voxflow:video; for podcasts use voxflow:podcast."
3
+ description: "Use when the user wants to turn text content into a set of polished, shareable visual CARD IMAGES or narrated card VIDEOS — knowledge cards, quote cards, 小红书图文, carousel cards, poster cards — rendered as HTML/CSS and exported via Playwright at ratios like 1:1 / 3:4 / 9:16; optionally produces a narrated MP4 video from those cards via `voxflow card render` (per-card TTS + FFmpeg static-image clips with optional subtitle bar / intro+outro cards / BGM mix). Triggers: card / 卡片 / 知识卡 / 文字卡片 / 金句卡 / 图文卡片 / 卡片生成 / make cards / card video / 卡片视频. For article → Slice-themed card VIDEO use voxflow:slice; for short videos / AI clips use voxflow:video; for podcasts use voxflow:podcast."
4
4
  ---
5
5
 
6
6
  # VoxFlow Skill — Card
@@ -374,7 +374,7 @@ Use `references/design-languages.md` to define the card set's visual grammar ind
374
374
  "meta": {
375
375
  "title": "<Series title>",
376
376
  "ratio": "<ratio used: 9:16 | 1:1 | 3:4>",
377
- "language": "<zh | en>"
377
+ "language": "<zh | en | ja | ...>"
378
378
  },
379
379
  "cards": [
380
380
  { "file": "card-01.html", "title": "...", "narration": "1-3 sentence spoken caption." },
@@ -383,6 +383,11 @@ Use `references/design-languages.md` to define the card set's visual grammar ind
383
383
  }
384
384
  ```
385
385
 
386
+ - Field semantics:
387
+ - `meta.title` — drives the intro card text and the default output filename (slugified: `[^a-z0-9一-鿿]` → `-`, lowercased; CJK is preserved).
388
+ - `meta.language` — only `"zh"` switches the intro subtitle to "知识卡片"; any other value (including `"ja"`, `"en"`, `"mixed"`) falls back to "Card Series".
389
+ - `card.title` — used as the on-screen subtitle bar fallback when `card.narration` is empty.
390
+ - `card.narration` — the spoken caption fed to TTS and (by default) also rendered as the subtitle bar text.
386
391
  - Narration rules:
387
392
  - Write narration in the same language as the card copy.
388
393
  - 1-3 sentences per card. Natural spoken rhythm — avoid lists, avoid bullet-speak.
@@ -405,22 +410,33 @@ Use `references/design-languages.md` to define the card set's visual grammar ind
405
410
  ├── deck.json (narration + metadata)
406
411
  ├── exports/card-01.png … (PNG exports)
407
412
  ├── sources.md (attribution)
408
- └── my-topic.mp4 (final video — default output here)
413
+ └── my-topic.mp4 (final video — slug derived from deck.meta.title)
409
414
  ```
410
415
 
411
- - **Key parameters** (pick based on user preference):
416
+ - **Audio / TTS**:
412
417
  - `--voice <id>` — TTS voice. Suggest `voxflow voices` to browse.
413
418
  - `--speed <n>` — narration speed 0.5-2.0 (default: 1.0)
414
- - `--pause <sec>` silence after each card's narration for reading time (default: 2.5)
419
+ - `--no-audio`skip TTS, produce a silent video (zero quota)
420
+ - **Timing**:
421
+ - `--pause <sec>` — silence after each card's narration for reading time (default: 2.5). Baked into the WAV so it always shows in the final clip.
415
422
  - `--hold <sec>` — card duration in `--no-audio` mode (default: 5)
416
- - `--bgm <path>` — background music file (loops at low volume)
417
- - `--no-audio` — skip TTS, produce silent video
418
- - `--no-intro` / `--no-outro` skip title/branding cards
419
- - `-o <path>` — custom output path
420
-
421
- - Default output: `<dir>/<deck title>.mp4` (next to the cards).
422
- - No external dependencies beyond FFmpeg (auto-detected; `ffmpeg-static` as fallback).
423
+ - **Structure**:
424
+ - `--no-intro` / `--no-outro` — skip title / branding cards
425
+ - `--intro-dur <sec>`intro card duration (default: 2.5)
426
+ - `--outro-dur <sec>` — outro card duration (default: 2)
427
+ - **Overlay & mix**:
428
+ - `--no-subtitle` disable the bottom subtitle bar (subtitles need FFmpeg with `libfreetype`; auto-detected and skipped if missing)
429
+ - `--bgm <path>` background music, looped at low volume
430
+ - `--bgm-volume <n>` — BGM volume 0-1 (default: 0.08)
431
+ - **Output**:
432
+ - `-o <path>` / `--output <path>` — custom output path (parents auto-created)
433
+
434
+ - **CJK content** (since CLI 1.17.1): subtitles, intro, and outro overlays auto-detect CJK text in `meta.title` / `card.title` / `card.narration` and inject a CJK-capable system fontfile (PingFang / Hiragino / Heiti on macOS; Noto CJK / WQY on Linux; msyh / SimSun on Windows). If your platform has no CJK font installed, set `VOXFLOW_CJK_FONT=/path/to/font.ttc` to point at one explicitly. When neither autodetect nor override finds a font, the command logs a warning and you should fall back to `--no-subtitle --no-intro --no-outro` to avoid `□` tofu boxes.
435
+
436
+ - Default output: `<dir>/<slugified deck.meta.title>.mp4` (next to the cards). If `meta.title` is empty, falls back to `cards.mp4`.
437
+ - No external dependencies beyond FFmpeg (auto-detected; falls back to `ffmpeg-static` npm package when system ffmpeg is missing).
423
438
  - Intermediate files (WAVs, clips) stored in `<dir>/.card-render-work/` — auto-cleaned on success, preserved on failure for debugging.
439
+ - **Quota**: ~50 per card narrated (`tts-synthesize`); zero with `--no-audio`. A 5-card deck costs ~250 quota total.
424
440
  - For article-to-card VIDEO with Slice themes (paper-slide, editorial-mag, etc.), prefer `voxflow:slice` instead.
425
441
 
426
442
  ## Asset and Source Discipline
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: hub
3
- description: Use when the user wants to read text aloud (TTS), search VoxFlow voices, sample AI stories, or set up VoxFlow install/auth/quota — the entry-point voice toolkit. For podcasts use voxflow:podcast; for short videos / AI clips use voxflow:video; for article-to-card reels (Slice) use voxflow:slice; for transcription / dubbing / subtitle translation use voxflow:transcribe.
3
+ description: Use when the user wants to read text aloud (TTS), search VoxFlow voices, sample AI stories, or set up VoxFlow install/auth/quota — the entry-point voice toolkit. For podcasts use voxflow:podcast; for short videos / AI clips use voxflow:video; for article-to-card reels (Slice) use voxflow:slice; for shareable card images or narrated card videos use voxflow:card; for transcription / dubbing / subtitle translation use voxflow:transcribe.
4
4
  ---
5
5
 
6
6
  # VoxFlow Skill — Hub
@@ -19,7 +19,8 @@ For specialized tasks, switch to:
19
19
 
20
20
  - **Podcasts** (multi-speaker dialogue) → `voxflow:podcast`
21
21
  - **Short videos / AI clips / knowledge cards** (`picstory`, `present`, `slides`, `explain`) → `voxflow:video`
22
- - **Article → vertical card video (Slice)** — 6 themes (paper / editorial / poster / Notion / brutalist / glass), web app + Remotion → `voxflow:slice`
22
+ - **Article → vertical card video (Slice)** — 13 themes (paper-slide / editorial-mag / bold-poster / notion-card / brutalist / glass-dark / editorial-stencil / broadsheet / blueprint / daisy-pastel / showa-catalog / photo-feature / atmospheric), web app + Remotion → `voxflow:slice`
23
+ - **Shareable card images & narrated card videos** (HTML/CSS + Playwright export, optional `voxflow card render` for narrated MP4) → `voxflow:card`
23
24
  - **Transcription, subtitle translation, dubbing, summarize, publish** (`asr`, `asr-jobs`, `translate`, `dub`, `video-translate`, `summarize`, `publish`) → `voxflow:transcribe`
24
25
 
25
26
  ## Install & login
@@ -1,13 +1,13 @@
1
1
  ---
2
2
  name: video
3
- description: Use when the user wants AI-generated short-form video — knowledge cards (picstory / 小红书 / TikTok / Reels), narrated explainers, presentations, AI clips, or slides — covering picstory, present, slides, explain, and image generation. For article-to-card reels (Slice — 6 themes including paper-slide), use voxflow:slice.
3
+ description: Use when the user wants AI-generated short-form video — knowledge cards (picstory / 小红书 / TikTok / Reels), narrated explainers, presentations, AI clips, or slides — covering picstory, present, slides, explain, and image generation. For article-to-card reels (Slice — 13 themes including paper-slide), use voxflow:slice. For shareable HTML/CSS card images or narrated card MP4 videos (`voxflow card render`) use voxflow:card.
4
4
  ---
5
5
 
6
6
  # VoxFlow Video Skill
7
7
 
8
8
  Generate short-form videos with AI: LLM writes the script, AI draws cards or scenes, TTS narrates, FFmpeg / Remotion renders the final MP4.
9
9
 
10
- For article-to-card reels (Slice — 6 themes: paper / editorial / poster / Notion / brutalist / glass), switch to `voxflow:slice`.
10
+ For article-to-card reels (Slice — 13 themes: paper-slide / editorial-mag / bold-poster / notion-card / brutalist / glass-dark / editorial-stencil / broadsheet / blueprint / daisy-pastel / showa-catalog / photo-feature / atmospheric), switch to `voxflow:slice`. For shareable HTML/CSS card image sets or narrated card-to-MP4 export, switch to `voxflow:card`.
11
11
 
12
12
  Five entry points — pick by what the user wants:
13
13