npm - voxflow - Versions diffs - 1.17.0 → 1.17.2 - Mend

voxflow 1.17.0 → 1.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/index.js +1 -1
package/lib/commands/asr/index.js +1 -1
package/lib/commands/card-render.js +122 -10
package/lib/commands/card.js +1 -1
package/lib/commands/dub.js +1 -1
package/lib/commands/explain.js +3 -3
package/lib/commands/narrate.js +1 -1
package/lib/commands/picstory.js +3 -3
package/lib/commands/podcast/index.js +1 -1
package/lib/commands/present.js +1 -1
package/lib/commands/publish.js +1 -1
package/lib/commands/slides/index.js +1 -1
package/lib/commands/story.js +1 -1
package/lib/commands/summarize.js +3 -3
package/lib/commands/translate.js +1 -1
package/lib/commands/video-translate.js +1 -1
package/lib/commands/voices.js +2 -2
package/package.json +1 -1
package/skills/.claude-plugin/plugin.json +1 -1
package/skills/card/SKILL.md +28 -12
package/skills/hub/SKILL.md +3 -2
package/skills/video/SKILL.md +2 -2

package/lib/commands/asr/index.js CHANGED Viewed

@@ -211,7 +211,7 @@ const meta = {
       `--mode <type>        auto (default) | sentence | flash | file (cloud only)`,
       `--lang <model>       Language. Tencent: 16k_zh (default), 16k_en, ... | Azure: ja-JP, en-US, zh-CN, ...`,
       `--format <fmt>       Output format: srt (default), txt, json`,
-      `--output <path>      Output file path (default: <input>.<format>)`,
+      `-o, --output <path>  Output file path (default: <input>.<format>)`,
       `--speakers           Enable speaker diarization (alias of --diarize)`,
       `--diarize            Enable speaker diarization (azure)`,
       `--speaker-number <n> Expected number of speakers (with --speakers / --diarize)`,

package/lib/commands/card-render.js CHANGED Viewed

@@ -118,12 +118,99 @@ function escapeDrawtext(text) {
     .replace(/\n/g, ' ');
 }
+/**
+ * Detect whether a string contains CJK characters that need a CJK fontfile.
+ * Covers Han (CJK Unified), Hiragana, Katakana, Hangul, and full-width punctuation.
+ */
+function containsCjk(text) {
+  if (!text) return false;
+  // U+3001–303F CJK symbols & punctuation (skip U+3000 IDEOGRAPHIC SPACE — eslint flags it)
+  // U+3040–30FF Hiragana + Katakana
+  // U+3400–9FFF CJK Ext A + CJK Unified
+  // U+AC00–D7AF Hangul; U+FF00–FFEF Halfwidth/Fullwidth forms
+  return /[、-ヿ㐀-鿿가-힯＀-￯]/.test(text);
+}
+/**
+ * Locate a fontfile that supports CJK glyphs on the host platform.
+ *
+ * ffmpeg's `drawtext` filter, when no `fontfile=` is given, falls back to a
+ * built-in default that ships only Latin-1. CJK content rendered without an
+ * explicit CJK fontfile shows as `□` tofu boxes (issue #3592).
+ *
+ * Returns an absolute path to a known CJK-capable font, or null if none of
+ * the platform-specific candidates exist. Cached for the process lifetime.
+ * Override the search via `VOXFLOW_CJK_FONT=/path/to/font.ttc`.
+ *
+ * @returns {string|null}
+ */
+let _cjkFontPathCache; // undefined = unknown, null = absent, string = found
+function findCjkFontFile() {
+  if (_cjkFontPathCache !== undefined) return _cjkFontPathCache;
+  // User override wins over platform autodetect
+  if (process.env.VOXFLOW_CJK_FONT && fs.existsSync(process.env.VOXFLOW_CJK_FONT)) {
+    _cjkFontPathCache = process.env.VOXFLOW_CJK_FONT;
+    return _cjkFontPathCache;
+  }
+  const candidates = [];
+  if (process.platform === 'darwin') {
+    candidates.push(
+      '/System/Library/Fonts/PingFang.ttc',
+      '/System/Library/Fonts/Hiragino Sans GB.ttc',
+      '/System/Library/Fonts/STHeiti Medium.ttc',
+      '/System/Library/Fonts/STHeiti Light.ttc',
+      '/Library/Fonts/Songti.ttc',
+    );
+  } else if (process.platform === 'linux') {
+    candidates.push(
+      '/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc',
+      '/usr/share/fonts/opentype/noto/NotoSansCJK.ttc',
+      '/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc',
+      '/usr/share/fonts/wqy-microhei/wqy-microhei.ttc',
+      '/usr/share/fonts/wqy-zenhei/wqy-zenhei.ttc',
+      '/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc',
+    );
+  } else if (process.platform === 'win32') {
+    candidates.push(
+      'C:/Windows/Fonts/msyh.ttc',
+      'C:/Windows/Fonts/msyhbd.ttc',
+      'C:/Windows/Fonts/simsun.ttc',
+      'C:/Windows/Fonts/yugothic.ttf',
+    );
+  }
+  for (const p of candidates) {
+    if (fs.existsSync(p)) {
+      _cjkFontPathCache = p;
+      return _cjkFontPathCache;
+    }
+  }
+  _cjkFontPathCache = null;
+  return null;
+}
+/**
+ * Build the drawtext fontfile= clause when the text contains CJK and a
+ * suitable font is available on the host. Returns either ":fontfile=…" (with
+ * leading colon, ready to splice into a drawtext arg list) or "" when no
+ * font override is needed (ASCII-only text or no CJK font on host).
+ */
+function drawtextFontfileClause(text, cjkFontPath) {
+  if (!text || !cjkFontPath) return '';
+  if (!containsCjk(text)) return '';
+  // ffmpeg fontfile= path needs `:` and `\` escaped inside a filter arg.
+  const escaped = cjkFontPath.replace(/\\/g, '/').replace(/:/g, '\\:');
+  return `:fontfile='${escaped}'`;
+}
 // ── Render functions ──────────────────────────────────────────────────────────
 /**
  * Render a single card: PNG + optional WAV → MP4 clip with subtitle overlay.
  */
-async function renderCardClip({ pngPath, wavPath, outPath, durationMs, ratio, subtitle, hasDrawtext = false }) {
+async function renderCardClip({ pngPath, wavPath, outPath, durationMs, ratio, subtitle, hasDrawtext = false, cjkFontPath = null }) {
   const { w, h } = RATIO_DIMS[ratio] || RATIO_DIMS['9:16'];
   const durationSec = Math.max(3, durationMs / 1000);
@@ -139,8 +226,9 @@ async function renderCardClip({ pngPath, wavPath, outPath, durationMs, ratio, su
     const escaped = escapeDrawtext(subtitle);
     const fontSize = Math.round(SUB_FONT_SIZE * (w / 1080));
     const boxY = h - SUB_MARGIN_BOTTOM - fontSize - SUB_PADDING * 2;
+    const fontfile = drawtextFontfileClause(subtitle, cjkFontPath);
     vfParts.push(
-      `drawtext=text='${escaped}':fontsize=${fontSize}:fontcolor=white:` +
+      `drawtext=text='${escaped}'${fontfile}:fontsize=${fontSize}:fontcolor=white:` +
       `x=(w-text_w)/2:y=${boxY + SUB_PADDING}:` +
       `box=1:boxcolor=black@${SUB_BOX_OPACITY}:boxborderw=${SUB_PADDING}`,
     );
@@ -177,7 +265,7 @@ async function renderCardClip({ pngPath, wavPath, outPath, durationMs, ratio, su
  * Generate an intro or outro card via FFmpeg color source (simple solid + no text).
  * Text overlay requires drawtext (libfreetype); if unavailable, renders a plain color card.
  */
-async function renderTitleCard({ outPath, ratio, title, subtitle, durationSec = 3, bgColor = '1a1520', textColor = 'f4efe6', fadeSeconds = 0.4, isFirst = false, isLast = false, hasDrawtext = false }) {
+async function renderTitleCard({ outPath, ratio, title, subtitle, durationSec = 3, bgColor = '1a1520', textColor = 'f4efe6', fadeSeconds = 0.4, isFirst = false, isLast = false, hasDrawtext = false, cjkFontPath = null }) {
   const { w, h } = RATIO_DIMS[ratio] || RATIO_DIMS['9:16'];
   const fd = fadeSeconds;
@@ -185,19 +273,21 @@ async function renderTitleCard({ outPath, ratio, title, subtitle, durationSec =
   const subSize = Math.round(32 * (w / 1080));
   const escapedTitle = escapeDrawtext(title || '');
   const escapedSub = escapeDrawtext(subtitle || '');
+  const titleFontfile = drawtextFontfileClause(title, cjkFontPath);
+  const subFontfile = drawtextFontfileClause(subtitle, cjkFontPath);
   const vfParts = [`color=c=0x${bgColor}:s=${w}x${h}:d=${durationSec}:r=30`];
   if (hasDrawtext) {
     if (escapedTitle) {
       vfParts.push(
-        `drawtext=text='${escapedTitle}':fontsize=${titleSize}:fontcolor=0x${textColor}:` +
+        `drawtext=text='${escapedTitle}'${titleFontfile}:fontsize=${titleSize}:fontcolor=0x${textColor}:` +
         `x=(w-text_w)/2:y=(h-text_h)/2-${Math.round(subSize * 1.5)}`,
       );
     }
     if (escapedSub) {
       vfParts.push(
-        `drawtext=text='${escapedSub}':fontsize=${subSize}:fontcolor=0x${textColor}@0.6:` +
+        `drawtext=text='${escapedSub}'${subFontfile}:fontsize=${subSize}:fontcolor=0x${textColor}@0.6:` +
         `x=(w-text_w)/2:y=(h-text_h)/2+${Math.round(titleSize * 0.8)}`,
       );
     }
@@ -252,7 +342,7 @@ async function cardRender(opts) {
     introDuration = 2.5,
     outroDuration = 2,
   } = opts;
-  const voice = opts.voice || 'female-kefu-xiaoyue';
+  const voice = opts.voice || 'v-female-R2s4N9qJ';
   const speed = Number(opts.speed) || 1.0;
   const deck = readDeckJson(dir);
@@ -283,6 +373,7 @@ async function cardRender(opts) {
   // Check drawtext filter availability (needs libfreetype)
   let hasDrawtext = false;
+  let cjkFontPath = null;
   if (!noSubtitle) {
     try {
       const { stdout } = await runCommand('ffmpeg', ['-hide_banner', '-filters']);
@@ -290,6 +381,24 @@ async function cardRender(opts) {
     } catch { /* unavailable */ }
     if (!hasDrawtext) {
       console.log(`  (drawtext unavailable — subtitles disabled)`);
+    } else {
+      // Detect CJK content in titles/narrations and locate a CJK fontfile if needed.
+      // ffmpeg's default drawtext font is Latin-1 only; without an explicit fontfile
+      // CJK characters render as `□` tofu boxes (#3592).
+      const allText = [
+        deck.meta?.title || '',
+        ...cards.flatMap((c) => [c.title || '', c.narration || '']),
+      ].join('\n');
+      if (containsCjk(allText)) {
+        cjkFontPath = findCjkFontFile();
+        if (cjkFontPath) {
+          console.log(`  (CJK detected — using ${path.basename(cjkFontPath)} for overlay text)`);
+        } else {
+          console.log(`  (CJK detected but no CJK font found — overlay text will show as □.`);
+          console.log(`   Install Noto Sans CJK or set VOXFLOW_CJK_FONT=/path/to/font.ttc,`);
+          console.log(`   or rerun with --no-subtitle --no-intro --no-outro.)`);
+        }
+      }
     }
   }
@@ -313,7 +422,7 @@ async function cardRender(opts) {
         outPath: introPath, ratio, title,
         subtitle: deck.meta?.language === 'zh' ? '知识卡片' : 'Card Series',
         durationSec: introDuration, fadeSeconds: 0,
-        isFirst: true, isLast: false, hasDrawtext,
+        isFirst: true, isLast: false, hasDrawtext, cjkFontPath,
       });
       clipPaths.push(introPath);
     }
@@ -354,7 +463,7 @@ async function cardRender(opts) {
       console.log(`  Rendering card ${i + 1}/${cards.length}...`);
       await renderCardClip({
         pngPath, wavPath, outPath: clipOut,
-        durationMs, ratio, hasDrawtext,
+        durationMs, ratio, hasDrawtext, cjkFontPath,
         subtitle: noSubtitle ? null : (card.narration || card.title || null),
       });
       clipPaths.push(clipOut);
@@ -370,7 +479,7 @@ async function cardRender(opts) {
         subtitle: 'voxflow.studio',
         durationSec: outroDuration, fadeSeconds: 0,
         bgColor: '0d0b14',
-        isFirst: false, isLast: true, hasDrawtext,
+        isFirst: false, isLast: true, hasDrawtext, cjkFontPath,
       });
       clipPaths.push(outroPath);
     }
@@ -425,7 +534,7 @@ Options:
   <dir>               Card output directory (must contain deck.json + exports/*.png)
   Audio:
-  --voice <id>        TTS voice ID (default: female-kefu-xiaoyue)
+  --voice <id>        TTS voice ID (default: v-female-R2s4N9qJ)
   --speed <n>         TTS speed, 0.5-2.0 (default: 1.0)
   --no-audio          Silent video — skip TTS synthesis
@@ -516,5 +625,8 @@ module.exports = {
   renderTitleCard,
   escapeDrawtext,
   writePcmAsWav,
+  containsCjk,
+  findCjkFontFile,
+  drawtextFontfileClause,
   handle,
 };

package/lib/commands/card.js CHANGED Viewed

@@ -31,7 +31,7 @@ const meta = {
     description: 'Card video export: narrated MP4 with subtitles, intro/outro, and BGM from a card-skill output directory',
     options: [
       'render <dir>           Render deck.json + PNGs → narrated MP4 video',
-      '--voice <id>           TTS voice ID (default: female-kefu-xiaoyue)',
+      '--voice <id>           TTS voice ID (default: v-female-R2s4N9qJ)',
       '--speed <n>            TTS speed, 0.5-2.0 (default: 1.0)',
       '--no-audio             Silent video — skip TTS synthesis',
       '--pause <sec>          Silence after narration (reading time, default: 2.5)',

package/lib/commands/dub.js CHANGED Viewed

@@ -527,7 +527,7 @@ const meta = {
       `--bgm <file>         Background music file to mix in`,
       `--ducking <n>        BGM volume ducking 0-1.0 (default: ${DUB_DEFAULTS.ducking})`,
       `--patch <id>         Re-synthesize a single caption by ID (patch mode)`,
-      `--output <path>      Output file path (default: ./dub-<timestamp>.wav)`,
+      `-o, --output <path>  Output file path (default: ./dub-<timestamp>.wav)`,
     ],
     examples: [
       'voxflow dub --srt subtitles.srt',

package/lib/commands/explain.js CHANGED Viewed

@@ -476,7 +476,7 @@ async function handle(args) {
     topic: parseFlag(args, '--topic') || undefined,
     voice: parseFlag(args, '--voice') || undefined,
     style: style || undefined,
-    language: parseFlag(args, '--language') || undefined,
+    language: parseFlag(args, '--language', '--lang') || undefined,
     output, speed, scenes,
     audioOnly: parseBoolFlag(args, '--audio-only'),
     cloud: parseBoolFlag(args, '--cloud'),
@@ -492,13 +492,13 @@ const meta = {
     options: [
       `--topic <text>       Topic to explain (use "demo" for built-in demo)`,
       `--style <style>      Visual style: modern (default), playful, corporate, chalkboard`,
-      `--language <code>    Script language: en (default), zh, ja, ko, etc.`,
+      `--language <code>    Script language: en (default), zh, ja, ko, etc. (alias: --lang)`,
       `--voice <id>         TTS voice ID (default: ${EXPLAIN_DEFAULTS.voice})`,
       `--speed <n>          TTS speed 0.5-2.0 (default: ${EXPLAIN_DEFAULTS.speed})`,
       `--scenes <n>         Number of scenes, 3-12 (default: ${EXPLAIN_DEFAULTS.sceneCount})`,
       `--audio-only         Skip video render, output WAV narration only`,
       `--cloud              Render on cloud instead of local Remotion`,
-      `--output <path>      Output file path (default: ./explain-<timestamp>.mp4)`,
+      `-o, --output <path>  Output file path (default: ./explain-<timestamp>.mp4)`,
     ],
     examples: [
       'voxflow explain --topic "What is React?"',

package/lib/commands/narrate.js CHANGED Viewed

@@ -612,7 +612,7 @@ const meta = {
       `--format <fmt>       Output format: pcm, wav, mp3 (default: pcm → WAV)`,
       `--speed <n>          TTS speed 0.5-2.0 (default: ${NARRATE_DEFAULTS.speed})`,
       `--silence <sec>      Silence between segments, 0-5.0 (default: ${NARRATE_DEFAULTS.silence})`,
-      `--output <path>      Output file path (default: matches input basename, e.g. design.md → design.wav)`,
+      `-o, --output <path>  Output file path (default: matches input basename, e.g. design.md → design.wav)`,
     ],
     examples: [
       'voxflow narrate --input article.txt --voice v-female-R2s4N9qJ',

package/lib/commands/picstory.js CHANGED Viewed

@@ -487,7 +487,7 @@ async function handle(args) {
     outputDir: outputDir || undefined,
     style: style || undefined,
     ratio: ratio || undefined,
-    language: parseFlag(args, '--language') || undefined,
+    language: parseFlag(args, '--language', '--lang') || undefined,
     sceneCount: scenes,
     quality: quality || undefined,
     fadeSeconds: fadeSeconds !== undefined ? fadeSeconds : undefined,
@@ -511,7 +511,7 @@ const meta = {
       `--text <content>     Input text content to visualize`,
       `--style <name>       Visual style: sketchnote (default), neon_noir, minimal_3d, chalkboard, photo, manga_panel, vintage_newspaper`,
       `--ratio <name>       Aspect ratio: portrait (default, 9:16), landscape (16:9), square (1:1)`,
-      `--language <code>    Script language: zh (default), en, ja, etc.`,
+      `--language <code>    Script language: zh (default), en, ja, etc. (alias: --lang)`,
       `--scenes <n>         Number of scenes, 2-10 (default: ${PICSTORY_DEFAULTS.sceneCount})`,
       `--quality <tier>     Image quality: fast (default), hd, ultra (gpt-5.4-image-2, best quality, ~16× cost), hd-aiberm / fast-aiberm (Aiberm Gemini — strongest Chinese text rendering)`,
       `--voice <id>         TTS voice ID`,
@@ -522,7 +522,7 @@ const meta = {
       `--fade <n>           Scene fade-in/out duration in seconds (default: ${PICSTORY_DEFAULTS.fadeSeconds}, set 0 to disable)`,
       `--image-only         Save images+audio without rendering video`,
       `--output-dir <dir>   Directory for all output files (auto-created if needed)`,
-      `--output <path>      Output file path (overrides --output-dir)`,
+      `-o, --output <path>  Output file path (overrides --output-dir)`,
     ],
     examples: [
       'voxflow picstory --topic "AI Agent 入门" --style sketchnote',

package/lib/commands/podcast/index.js CHANGED Viewed

@@ -534,7 +534,7 @@ const meta = {
       `--voice <id>         Override TTS voice for all speakers`,
       `--bgm <file>         Background music file to mix in`,
       `--ducking <n>        BGM volume ducking 0-1.0 (default: ${PODCAST_DEFAULTS.ducking})`,
-      `--output <path>      Output WAV path (default: ./podcast-<timestamp>.wav)`,
+      `-o, --output <path>  Output WAV path (default: ./podcast-<timestamp>.wav)`,
       `--speed <n>          TTS speed 0.5-2.0 (default: ${PODCAST_DEFAULTS.speed})`,
       `--silence <sec>      Uniform silence override between segments, 0-5.0 (legacy)`,
       `--pace <preset>      Pacing preset: tight | natural | relaxed (default: natural).`,

package/lib/commands/present.js CHANGED Viewed

@@ -495,7 +495,7 @@ const meta = {
       `--speed <n>          TTS speed 0.5-2.0 (default: ${PRESENT_DEFAULTS.speed})`,
       `--no-audio           Skip TTS, render silent video only`,
       `--web-search         Search the web for up-to-date info on the topic`,
-      `--output <path>      Output file path (default: ./present-<timestamp>.mp4)`,
+      `-o, --output <path>  Output file path (default: ./present-<timestamp>.mp4)`,
     ],
     examples: [
       'voxflow present --text "Claude Code 是一个 AI 编程工具" --style aurora',

package/lib/commands/publish.js CHANGED Viewed

@@ -395,7 +395,7 @@ const meta = {
       '--audio <file>       Mode C: merge existing audio into video',
       '--voice <id>         TTS voice for Mode A/B',
       '--voices <file>      Multi-speaker voice map for Mode A/B',
-      '--output <path>      Final MP4 output path',
+      '-o, --output <path>  Final MP4 output path',
       '--publish <target>   local (default) | webhook | none',
       '--publish-dir <dir>  Local publish directory (for --publish local)',
       '--publish-webhook <url> Webhook URL (for --publish webhook)',

package/lib/commands/slides/index.js CHANGED Viewed

@@ -331,7 +331,7 @@ const meta = {
       `--template <name>    Template: product, report, tutorial, pitch, free (default: ${SLIDES_DEFAULTS.template})`,
       `--model <id>         Model: swift, balanced, pro, creative (default: ${SLIDES_DEFAULTS.model})`,
       `--no-audio           Skip TTS synthesis, generate slides only`,
-      `--output <path>      Output HTML file (default: ./slides-<timestamp>.html)`,
+      `-o, --output <path>  Output HTML file (default: ./slides-<timestamp>.html)`,
     ],
     examples: [
       'voxflow slides "AI in Healthcare"',

package/lib/commands/story.js CHANGED Viewed

@@ -285,7 +285,7 @@ const meta = {
     options: [
       `--topic <text>       Story topic (default: children's story)`,
       `--voice <id>         TTS voice ID (default: ${STORY_DEFAULTS.voice})`,
-      `--output <path>      Output WAV path (default: ./story-<timestamp>.wav)`,
+      `-o, --output <path>  Output WAV path (default: ./story-<timestamp>.wav)`,
       `--paragraphs <n>     Paragraph count, 1-20 (default: ${STORY_DEFAULTS.paragraphs})`,
       `--speed <n>          TTS speed 0.5-2.0 (default: ${STORY_DEFAULTS.speed})`,
       `--silence <sec>      Silence between paragraphs, 0-5.0 (default: ${STORY_DEFAULTS.silence})`,

package/lib/commands/summarize.js CHANGED Viewed

@@ -462,7 +462,7 @@ async function handle(args) {
   const voice = parseFlag(args, '--voice') || SUM_DEFS.voice;
   const speed = parseFloatFlag(args, '--speed') ?? SUM_DEFS.speed;
   const slideCount = parseIntFlag(args, '--slides') ?? SUM_DEFS.slides;
-  const language = parseFlag(args, '--lang') || SUM_DEFS.language;
+  const language = parseFlag(args, '--lang', '--language') || SUM_DEFS.language;
   const engine = parseFlag(args, '--engine') || SUM_DEFS.engine;
   const model = parseFlag(args, '--model');
   const tts = parseBoolFlag(args, '--tts');
@@ -509,7 +509,7 @@ const meta = {
       `--input <file>       Input video/audio file → ASR + summarize`,
       `--text <text>        Direct text input (skip ASR)`,
       `--slides <n>         Number of slides, 4-12 (default: ${SUMMARIZE_DEFAULTS.slides})`,
-      `--lang <code>        Output language: en, zh, ja, etc. (default: ${SUMMARIZE_DEFAULTS.language})`,
+      `--lang <code>        Output language: en, zh, ja, etc. (default: ${SUMMARIZE_DEFAULTS.language}) (alias: --language)`,
       `--engine <engine>    ASR engine: auto, local, cloud (default: ${SUMMARIZE_DEFAULTS.engine})`,
       `--model <model>      Whisper model for local ASR: tiny, base, small, medium, large`,
       `--tts                Generate TTS narration audio for each slide`,
@@ -517,7 +517,7 @@ const meta = {
       `--scheme <name>      Video visual scheme: noir, neon, editorial, aurora (default), brutalist`,
       `--voice <id>         TTS voice ID (default: ${SUMMARIZE_DEFAULTS.voice})`,
       `--speed <n>          TTS speed 0.5-2.0 (default: ${SUMMARIZE_DEFAULTS.speed})`,
-      `--output <path>      Output PPTX path (default: <input>-summary.pptx)`,
+      `-o, --output <path>  Output PPTX path (default: <input>-summary.pptx)`,
     ],
     examples: [
       'voxflow summarize --input lecture.mp4',

package/lib/commands/translate.js CHANGED Viewed

@@ -566,7 +566,7 @@ const meta = {
       `--input <file>       Text file (.txt, .md) to translate`,
       `--from <lang>        Source language code (default: auto-detect)`,
       `--to <lang>          Target language code (required)`,
-      `--output <path>      Output file path (default: <input>-<lang>.<ext>)`,
+      `-o, --output <path>  Output file path (default: <input>-<lang>.<ext>)`,
       `--realign            Adjust subtitle timing for target language length`,
       `--batch-size <n>     Captions per LLM call, 1-20 (default: ${TRANSLATE_DEFAULTS.batchSize})`,
     ],

package/lib/commands/video-translate.js CHANGED Viewed

@@ -559,7 +559,7 @@ const meta = {
       `--speed <n>          TTS speed 0.5-2.0 (default: ${require('../core/config').VIDEO_TRANSLATE_DEFAULTS.speed})`,
       `--batch-size <n>     Translation batch size, 1-20 (default: ${require('../core/config').VIDEO_TRANSLATE_DEFAULTS.batchSize})`,
       `--keep-intermediates Keep intermediate files (SRT, audio) for debugging`,
-      `--output <path>      Output MP4 path (default: <input>-<lang>.mp4)`,
+      `-o, --output <path>  Output MP4 path (default: <input>-<lang>.mp4)`,
       `--asr-mode <mode>    Override ASR mode: auto, sentence, flash, file`,
       `--asr-lang <engine>  Override ASR engine: 16k_zh, 16k_en, 16k_ja, 16k_ko, etc.`,
       `--engine <engine>    ASR engine: auto, local, cloud (default: auto)`,

package/lib/commands/voices.js CHANGED Viewed

@@ -250,7 +250,7 @@ async function handle(args) {
     api,
     search: parseFlag(args, '--search'),
     gender: parseFlag(args, '--gender'),
-    language: parseFlag(args, '--language'),
+    language: parseFlag(args, '--language', '--lang'),
     useCase: parseFlag(args, '--use-case'),
     json: parseBoolFlag(args, '--json'),
     extended: parseBoolFlag(args, '--extended'),
@@ -275,7 +275,7 @@ const meta = {
       `--mine               List your cloned voices (requires login)`,
       `--search <query>     Search by name, tone, style, description`,
       `--gender <m|f>       Filter by gender: male/m or female/f`,
-      `--language <code>    Filter by language: zh, en, etc.`,
+      `--language <code>    Filter by language: zh, en, etc. (alias: --lang)`,
       `--use-case <tag>     Filter by editorial-curated use case (e.g. podcast)`,
       `--extended           Include extended voice library (380+ voices)`,
       `--json               Output raw JSON instead of table`,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "voxflow",
-  "version": "1.17.0",
+  "version": "1.17.2",
   "description": "AI audio content creation CLI — stories, podcasts, narration, dubbing, transcription, translation, and video translation with TTS",
   "bin": {
     "voxflow": "./dist/index.js"

package/skills/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "voxflow",
-  "version": "1.17.0",
+  "version": "1.17.2",
   "description": "AI voice CLI bundled as 6 skills (hub, podcast, transcribe, video, slice, card). Synthesize speech in 200+ voices across 40+ languages, generate multi-speaker AI podcasts, transcribe audio/video with word-level timestamps, dub videos from SRT subtitles, run end-to-end video translation, turn long articles into vertical card video reels via Remotion, and turn text into polished shareable card images or narrated card videos. Backed by a hosted TTS/ASR/LLM/render service with per-user quota (free tier 10K/mo).",
   "author": {
     "name": "VoxFlow",

package/skills/card/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: card
-description: "Use when the user wants to turn text content into a set of polished, shareable visual CARD IMAGES or narrated card VIDEOS — knowledge cards, quote cards, 小红书图文, carousel cards, poster cards — rendered as HTML/CSS and exported via Playwright at ratios like 1:1 / 3:4 / 9:16; optionally produces narrated MP4 video from those cards via `voxflow card render` (Ken Burns + TTS). Triggers: card / 卡片 / 知识卡 / 文字卡片 / 金句卡 / 图文卡片 / 卡片生成 / make cards / card video / 卡片视频. For article → Slice-themed card VIDEO use voxflow:slice; for short videos / AI clips use voxflow:video; for podcasts use voxflow:podcast."
+description: "Use when the user wants to turn text content into a set of polished, shareable visual CARD IMAGES or narrated card VIDEOS — knowledge cards, quote cards, 小红书图文, carousel cards, poster cards — rendered as HTML/CSS and exported via Playwright at ratios like 1:1 / 3:4 / 9:16; optionally produces a narrated MP4 video from those cards via `voxflow card render` (per-card TTS + FFmpeg static-image clips with optional subtitle bar / intro+outro cards / BGM mix). Triggers: card / 卡片 / 知识卡 / 文字卡片 / 金句卡 / 图文卡片 / 卡片生成 / make cards / card video / 卡片视频. For article → Slice-themed card VIDEO use voxflow:slice; for short videos / AI clips use voxflow:video; for podcasts use voxflow:podcast."
 ---
 # VoxFlow Skill — Card
@@ -374,7 +374,7 @@ Use `references/design-languages.md` to define the card set's visual grammar ind
      "meta": {
        "title": "<Series title>",
        "ratio": "<ratio used: 9:16 | 1:1 | 3:4>",
-       "language": "<zh | en>"
+       "language": "<zh | en | ja | ...>"
      },
      "cards": [
        { "file": "card-01.html", "title": "...", "narration": "1-3 sentence spoken caption." },
@@ -383,6 +383,11 @@ Use `references/design-languages.md` to define the card set's visual grammar ind
    }
    ```
+   - Field semantics:
+     - `meta.title` — drives the intro card text and the default output filename (slugified: `[^a-z0-9一-鿿]` → `-`, lowercased; CJK is preserved).
+     - `meta.language` — only `"zh"` switches the intro subtitle to "知识卡片"; any other value (including `"ja"`, `"en"`, `"mixed"`) falls back to "Card Series".
+     - `card.title` — used as the on-screen subtitle bar fallback when `card.narration` is empty.
+     - `card.narration` — the spoken caption fed to TTS and (by default) also rendered as the subtitle bar text.
    - Narration rules:
      - Write narration in the same language as the card copy.
      - 1-3 sentences per card. Natural spoken rhythm — avoid lists, avoid bullet-speak.
@@ -405,22 +410,33 @@ Use `references/design-languages.md` to define the card set's visual grammar ind
       ├── deck.json                     (narration + metadata)
       ├── exports/card-01.png …         (PNG exports)
       ├── sources.md                    (attribution)
-      └── my-topic.mp4                  (final video — default output here)
+      └── my-topic.mp4                  (final video — slug derived from deck.meta.title)
       ```
-    - **Key parameters** (pick based on user preference):
+    - **Audio / TTS**:
       - `--voice <id>` — TTS voice. Suggest `voxflow voices` to browse.
       - `--speed <n>` — narration speed 0.5-2.0 (default: 1.0)
-      - `--pause <sec>` — silence after each card's narration for reading time (default: 2.5)
+      - `--no-audio` — skip TTS, produce a silent video (zero quota)
+    - **Timing**:
+      - `--pause <sec>` — silence after each card's narration for reading time (default: 2.5). Baked into the WAV so it always shows in the final clip.
       - `--hold <sec>` — card duration in `--no-audio` mode (default: 5)
-      - `--bgm <path>` — background music file (loops at low volume)
-      - `--no-audio` — skip TTS, produce silent video
-      - `--no-intro` / `--no-outro` — skip title/branding cards
-      - `-o <path>` — custom output path
-    - Default output: `<dir>/<deck title>.mp4` (next to the cards).
-    - No external dependencies beyond FFmpeg (auto-detected; `ffmpeg-static` as fallback).
+    - **Structure**:
+      - `--no-intro` / `--no-outro` — skip title / branding cards
+      - `--intro-dur <sec>` — intro card duration (default: 2.5)
+      - `--outro-dur <sec>` — outro card duration (default: 2)
+    - **Overlay & mix**:
+      - `--no-subtitle` — disable the bottom subtitle bar (subtitles need FFmpeg with `libfreetype`; auto-detected and skipped if missing)
+      - `--bgm <path>` — background music, looped at low volume
+      - `--bgm-volume <n>` — BGM volume 0-1 (default: 0.08)
+    - **Output**:
+      - `-o <path>` / `--output <path>` — custom output path (parents auto-created)
+    - **CJK content** (since CLI 1.17.1): subtitles, intro, and outro overlays auto-detect CJK text in `meta.title` / `card.title` / `card.narration` and inject a CJK-capable system fontfile (PingFang / Hiragino / Heiti on macOS; Noto CJK / WQY on Linux; msyh / SimSun on Windows). If your platform has no CJK font installed, set `VOXFLOW_CJK_FONT=/path/to/font.ttc` to point at one explicitly. When neither autodetect nor override finds a font, the command logs a warning and you should fall back to `--no-subtitle --no-intro --no-outro` to avoid `□` tofu boxes.
+    - Default output: `<dir>/<slugified deck.meta.title>.mp4` (next to the cards). If `meta.title` is empty, falls back to `cards.mp4`.
+    - No external dependencies beyond FFmpeg (auto-detected; falls back to `ffmpeg-static` npm package when system ffmpeg is missing).
     - Intermediate files (WAVs, clips) stored in `<dir>/.card-render-work/` — auto-cleaned on success, preserved on failure for debugging.
+    - **Quota**: ~50 per card narrated (`tts-synthesize`); zero with `--no-audio`. A 5-card deck costs ~250 quota total.
     - For article-to-card VIDEO with Slice themes (paper-slide, editorial-mag, etc.), prefer `voxflow:slice` instead.
 ## Asset and Source Discipline

package/skills/hub/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: hub
-description: Use when the user wants to read text aloud (TTS), search VoxFlow voices, sample AI stories, or set up VoxFlow install/auth/quota — the entry-point voice toolkit. For podcasts use voxflow:podcast; for short videos / AI clips use voxflow:video; for article-to-card reels (Slice) use voxflow:slice; for transcription / dubbing / subtitle translation use voxflow:transcribe.
+description: Use when the user wants to read text aloud (TTS), search VoxFlow voices, sample AI stories, or set up VoxFlow install/auth/quota — the entry-point voice toolkit. For podcasts use voxflow:podcast; for short videos / AI clips use voxflow:video; for article-to-card reels (Slice) use voxflow:slice; for shareable card images or narrated card videos use voxflow:card; for transcription / dubbing / subtitle translation use voxflow:transcribe.
 ---
 # VoxFlow Skill — Hub
@@ -19,7 +19,8 @@ For specialized tasks, switch to:
 - **Podcasts** (multi-speaker dialogue) → `voxflow:podcast`
 - **Short videos / AI clips / knowledge cards** (`picstory`, `present`, `slides`, `explain`) → `voxflow:video`
-- **Article → vertical card video (Slice)** — 6 themes (paper / editorial / poster / Notion / brutalist / glass), web app + Remotion → `voxflow:slice`
+- **Article → vertical card video (Slice)** — 13 themes (paper-slide / editorial-mag / bold-poster / notion-card / brutalist / glass-dark / editorial-stencil / broadsheet / blueprint / daisy-pastel / showa-catalog / photo-feature / atmospheric), web app + Remotion → `voxflow:slice`
+- **Shareable card images & narrated card videos** (HTML/CSS + Playwright export, optional `voxflow card render` for narrated MP4) → `voxflow:card`
 - **Transcription, subtitle translation, dubbing, summarize, publish** (`asr`, `asr-jobs`, `translate`, `dub`, `video-translate`, `summarize`, `publish`) → `voxflow:transcribe`
 ## Install & login

package/skills/video/SKILL.md CHANGED Viewed

@@ -1,13 +1,13 @@
 ---
 name: video
-description: Use when the user wants AI-generated short-form video — knowledge cards (picstory / 小红书 / TikTok / Reels), narrated explainers, presentations, AI clips, or slides — covering picstory, present, slides, explain, and image generation. For article-to-card reels (Slice — 6 themes including paper-slide), use voxflow:slice.
+description: Use when the user wants AI-generated short-form video — knowledge cards (picstory / 小红书 / TikTok / Reels), narrated explainers, presentations, AI clips, or slides — covering picstory, present, slides, explain, and image generation. For article-to-card reels (Slice — 13 themes including paper-slide), use voxflow:slice. For shareable HTML/CSS card images or narrated card MP4 videos (`voxflow card render`) use voxflow:card.
 ---
 # VoxFlow Video Skill
 Generate short-form videos with AI: LLM writes the script, AI draws cards or scenes, TTS narrates, FFmpeg / Remotion renders the final MP4.
-For article-to-card reels (Slice — 6 themes: paper / editorial / poster / Notion / brutalist / glass), switch to `voxflow:slice`.
+For article-to-card reels (Slice — 13 themes: paper-slide / editorial-mag / bold-poster / notion-card / brutalist / glass-dark / editorial-stencil / broadsheet / blueprint / daisy-pastel / showa-catalog / photo-feature / atmospheric), switch to `voxflow:slice`. For shareable HTML/CSS card image sets or narrated card-to-MP4 export, switch to `voxflow:card`.
 Five entry points — pick by what the user wants: