npm - voxflow - Versions diffs - 1.15.3 → 1.15.4 - Mend

voxflow 1.15.3 → 1.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/index.js +1 -1
package/lib/commands/slice-render.js +71 -7
package/lib/commands/slice-stage.js +34 -0
package/lib/internal/deck-validator.js +47 -0
package/lib/stage-core/local-render.js +57 -1
package/lib/stage-core/server.js +57 -2
package/lib/stage-core/tts-audition.js +0 -0
package/lib/stage-core/voiceover-mux.js +183 -0
package/lib/stage-ui/slice/template.js +171 -0
package/package.json +1 -1
package/skills/voxflow-slice/SKILL.md +75 -2

package/lib/commands/slice-render.js CHANGED Viewed

@@ -38,9 +38,14 @@ const DEFAULT_CARD_SEC = 4;
  * Phase 0 is silent — every card gets DEFAULT_CARD_SEC. Phase 1 will
  * splice per-card TTS in and replace this with audio-driven durations.
  */
-function buildInputProps(deck) {
+function buildInputProps(deck, opts = {}) {
+    // Map of cardIdx → audio URL produced by prepareVoiceovers (or empty when
+    // the renderer runs silent). Threads into PaperSlideDeckProps.cards[].slide
+    // .voiceoverSrc so the composition's <Audio> element fetches it during
+    // Remotion's headless render.
+    const voiceoverByIdx = opts.voiceoverByIdx || {};
     const numberBadge = null;
-    const cards = deck.cards.map((card) => {
+    const cards = deck.cards.map((card, i) => {
         const slide = {
             kind: card.kind,
             header: deck.header,
@@ -49,7 +54,7 @@ function buildInputProps(deck) {
             figureKeyword: card.figureKeyword ?? null,
             seriesTitle: deck.seriesTitle,
             seriesTagline: deck.seriesTagline,
-            voiceoverSrc: null,
+            voiceoverSrc: voiceoverByIdx[i] || null,
             numberBadge,
             imageUrl: card.imageUrl,
         };
@@ -181,8 +186,53 @@ async function render(opts) {
     const outputDir = path.dirname(outputPath);
     if (!fs.existsSync(outputDir)) fs.mkdirSync(outputDir, { recursive: true });
+    // ─── Voiceover prep (Phase 1) ────────────────────────────────────────
+    // Synthesize per-card TTS up front so renderMedia's headless Chromium
+    // can fetch each clip as the composition plays. Reuses the audition
+    // cache so a card the user previewed in stage doesn't pay quota again.
+    // Skip the whole pass on --no-audio (back-compat with Phase 0 silent).
+    const includeAudio = opts.noAudio !== true;
+    let voiceoverByIdx = {};
+    let voiceoverServer = null;
+    let voiceoverSkipped = [];
+    if (includeAudio) {
+        const { createTtsAuditionClient } = require('../stage-core/tts-audition');
+        const { startVoiceoverServer, prepareVoiceovers } = require('../stage-core/voiceover-mux');
+        const audClient = createTtsAuditionClient();
+        voiceoverServer = await startVoiceoverServer({ cacheDir: audClient.cacheDir });
+        let synthCount = 0;
+        let cacheCount = 0;
+        const prep = await prepareVoiceovers({
+            deck,
+            auditionClient: audClient,
+            baseUrl: voiceoverServer.url,
+            onProgress: (p) => {
+                if (p.fromCache) cacheCount += 1; else synthCount += 1;
+                process.stdout.write(
+                    `\r[slice render] voiceover ${p.cardIdx + 1}/${p.total} ` +
+                    `(${p.fromCache ? 'cache' : 'synth'})           `
+                );
+            },
+        });
+        voiceoverByIdx = prep.byIdx;
+        voiceoverSkipped = prep.skipped;
+        if (synthCount > 0 || cacheCount > 0) process.stdout.write('\n');
+        if (Object.keys(voiceoverByIdx).length === 0) {
+            const fatal = voiceoverSkipped.find(
+                (s) => s.reason === 'not_logged_in' || s.reason === 'quota_exceeded'
+            );
+            if (fatal) {
+                console.warn(
+                    `[slice render] ⚠ audio skipped — ${fatal.reason}` +
+                    (fatal.message ? `: ${fatal.message}` : '') +
+                    ' (rendering silent video; pass --no-audio to suppress this notice)'
+                );
+            }
+        }
+    }
     const serveUrl = resolveServeUrl();
-    const inputProps = buildInputProps(deck);
+    const inputProps = buildInputProps(deck, { voiceoverByIdx });
     // Lazy require so users who never run `slice render` don't pay the
     // remotion install cost at CLI startup (renderer pulls in puppeteer-
@@ -246,24 +296,38 @@ async function render(opts) {
     const totalMs = Date.now() - t0;
     process.stdout.write('\n');
+    // Tear down the localhost audio file server after the render is fully
+    // committed to disk so a hanging Chromium fetch can't be interrupted.
+    if (voiceoverServer) {
+        try { await voiceoverServer.close(); } catch { /* best-effort */ }
+    }
     const stat = fs.statSync(outputPath);
     console.log(`[slice render] done in ${fmtSec(totalMs)} — ${humanSize(stat.size)}`);
     console.log(`[slice render] saved to ${outputPath}`);
-    return { outputPath, totalMs, frames: lastFrame, size: stat.size };
+    return {
+        outputPath,
+        totalMs,
+        frames: lastFrame,
+        size: stat.size,
+        voiceoverCount: Object.keys(voiceoverByIdx).length,
+        voiceoverSkipped,
+    };
 }
 async function handle(args) {
     const { parseFlag } = require('../core/args');
     const output = parseFlag(args, '--output', '-o');
+    const noAudio = args.includes('--no-audio');
     const positional = args.find(
         (a) => !a.startsWith('-') && !a.startsWith('--')
     );
     if (!positional) {
-        console.error('Usage: voxflow slice render <deck.json> [--output out.mp4]');
+        console.error('Usage: voxflow slice render <deck.json> [--output out.mp4] [--no-audio]');
         process.exit(1);
     }
     try {
-        await render({ deckPath: positional, output });
+        await render({ deckPath: positional, output, noAudio });
     } catch (err) {
         console.error(`\nslice render failed: ${err.message}`);
         if (process.env.VOXFLOW_DEBUG) console.error(err.stack);

package/lib/commands/slice-stage.js CHANGED Viewed

@@ -20,10 +20,12 @@ const { createEventBus } = require('../stage-core/event-bus');
 const { createSnapshotStore } = require('../stage-core/snapshot-store');
 const { createCloudRenderClient } = require('../stage-core/cloud-render');
 const { startLocalRender, getJobStatus } = require('../stage-core/local-render');
+const { createTtsAuditionClient } = require('../stage-core/tts-audition');
 const { validatePaperSlideDeck, isV2LayoutTreeDeck } = require('../internal/deck-validator');
 const { renderSliceStageHtml } = require('../stage-ui/slice/template');
 const { emit: emitTelemetry } = require('../core/telemetry');
 const { readCachedToken } = require('../core/auth');
+const { SYNTHESIZE_DEFAULTS } = require('../core/config');
 // Sourced from the canonical registry at repo root. Previously this list
 // silently fell out of sync (lagged at 6 themes while the rest of the repo
@@ -188,6 +190,37 @@ async function startSliceStage(opts) {
     },
   };
+  // ─── TTS audition bridge (per-card ▶ on stage UI) ──────────────────────
+  // Resolves `card.voiceover` / `card.voiceId` / `card.narration` against the
+  // live deck snapshot at request time so editing the deck → ▶ replays the
+  // new content immediately. Audio is cached by content hash so iterative
+  // re-listens cost zero quota after the first call.
+  const auditionBridge = opts.audition || (() => {
+    const tts = opts.ttsClient || createTtsAuditionClient(opts.ttsClientOpts || {});
+    return {
+      async play({ cardIndex, voiceOverride }) {
+        if (!snapshot.deck) return { code: 'no_deck', message: 'no deck loaded' };
+        const cards = Array.isArray(snapshot.deck.cards) ? snapshot.deck.cards : [];
+        const card = cards[cardIndex];
+        if (!card) return { code: 'card_not_found', message: `no card at index ${cardIndex}` };
+        const vo = card.voiceover || {};
+        if (vo.enabled === false) {
+          return { code: 'voiceover_disabled', message: 'card voiceover.enabled = false' };
+        }
+        const text = (typeof vo.text === 'string' && vo.text.trim()) ? vo.text : card.narration;
+        if (typeof text !== 'string' || !text.trim()) {
+          return { code: 'invalid_text', message: 'no text to synthesize (card.voiceover.text or card.narration)' };
+        }
+        const voiceId = (voiceOverride && String(voiceOverride).trim())
+          || vo.voiceId
+          || card.voiceId
+          || SYNTHESIZE_DEFAULTS.voice;
+        const speed = typeof vo.rate === 'number' ? vo.rate : 1.0;
+        return tts.audition({ voiceId, text, speed, format: 'mp3' });
+      },
+    };
+  })();
   // Boot-time auth probe so the UI can emphasise local vs cloud render.
   // We treat any cached, non-expired token as "logged in"; the actual
   // request flow still revalidates on /api/quota-balance.
@@ -204,6 +237,7 @@ async function startSliceStage(opts) {
     cloudRender,
     localRender: localRenderBridge,
     deckSaver: deckSaverBridge,
+    audition: auditionBridge,
     publishEvent: bus.publish,
     tokenAvailable,
     preferredPort,

package/lib/internal/deck-validator.js CHANGED Viewed

@@ -128,6 +128,49 @@ function validateListPayload(list, i) {
     });
 }
+// Optional per-card voiceover override. Extends the legacy `card.voiceId`
+// (V1-only) with a nested object that carries audio behavior toggles — silent
+// card, custom TTS text override, speech rate — so stage's audition endpoint
+// and the local-render mux pass resolve a single source of truth per card.
+// All fields are optional inside an optional object: omitting `voiceover`
+// entirely keeps existing decks unchanged. Render-time resolution (highest
+// precedence first):
+//   voiceId = voiceover.voiceId ?? card.voiceId ?? job-level default
+//   text    = voiceover.text    ?? card.narration
+//   enabled = voiceover.enabled ?? true
+//   rate    = voiceover.rate    ?? 1.0
+const VOICEOVER_TEXT_MAX = 500;
+function validateVoiceoverShape(vo, cardIdx) {
+    if (vo == null) return;
+    if (typeof vo !== 'object' || Array.isArray(vo)) {
+        throw new Error(`cards[${cardIdx}].voiceover must be an object`);
+    }
+    if (vo.enabled != null && typeof vo.enabled !== 'boolean') {
+        throw new Error(`cards[${cardIdx}].voiceover.enabled must be boolean`);
+    }
+    if (vo.voiceId != null) {
+        if (typeof vo.voiceId !== 'string' || !vo.voiceId.trim()) {
+            throw new Error(`cards[${cardIdx}].voiceover.voiceId must be non-empty string when present`);
+        }
+        if (vo.voiceId.length > 128) {
+            throw new Error(`cards[${cardIdx}].voiceover.voiceId too long (${vo.voiceId.length} > 128)`);
+        }
+    }
+    if (vo.text != null) {
+        if (typeof vo.text !== 'string') {
+            throw new Error(`cards[${cardIdx}].voiceover.text must be string`);
+        }
+        if (vo.text.length > VOICEOVER_TEXT_MAX) {
+            throw new Error(`cards[${cardIdx}].voiceover.text too long (${vo.text.length} > ${VOICEOVER_TEXT_MAX})`);
+        }
+    }
+    if (vo.rate != null) {
+        if (typeof vo.rate !== 'number' || !Number.isFinite(vo.rate) || vo.rate < 0.5 || vo.rate > 2.0) {
+            throw new Error(`cards[${cardIdx}].voiceover.rate must be number in [0.5, 2.0]`);
+        }
+    }
+}
 function validatePaperSlideDeck(deck) {
     if (!deck || typeof deck !== 'object') throw new Error('deck missing');
     for (const f of ['header', 'seriesTitle', 'seriesTagline']) {
@@ -235,6 +278,7 @@ function validatePaperSlideDeck(deck) {
                 throw new Error(`cards[${i}].voiceId too long (${card.voiceId.length} > 128)`);
             }
         }
+        validateVoiceoverShape(card.voiceover, i);
         // Optional per-card image URL — photo-feature / atmospheric themes
         // composite it as a full-bleed background; other themes ignore it.
         // Shape-check only (string, length cap, http(s) prefix); reachability
@@ -446,6 +490,7 @@ function validatePaperSlideDeckV2(deck) {
             if (stepsEls.length !== 1) throw new Error(`cards[${i}] list card must contain exactly one steps element (got ${stepsEls.length})`);
             richCounts.list += 1;
         }
+        validateVoiceoverShape(card.voiceover, i);
     });
     // Cap on rich-kind variety — at most 1 of each (same as V1 prompt rule)
     for (const k of Object.keys(richCounts)) {
@@ -477,6 +522,7 @@ module.exports = {
     validateQuotePayload,
     validateDataPayload,
     validateListPayload,
+    validateVoiceoverShape,
     QUOTE_TEXT_MAX,
     QUOTE_ATTRIBUTION_MAX,
     DATA_VALUE_MAX,
@@ -485,4 +531,5 @@ module.exports = {
     LIST_ITEM_MAX_LEN,
     LIST_ITEM_MIN_COUNT,
     LIST_ITEM_MAX_COUNT,
+    VOICEOVER_TEXT_MAX,
 };

package/lib/stage-core/local-render.js CHANGED Viewed

@@ -45,6 +45,8 @@ const {
   THEME_TO_DECK_ID,
   DEFAULT_THEME,
 } = require('../commands/slice-render');
+const { createTtsAuditionClient } = require('./tts-audition');
+const { startVoiceoverServer, prepareVoiceovers } = require('./voiceover-mux');
 // In-memory job table. We never persist jobs — a stage restart wipes history,
 // which is fine because the produced mp4 lives on disk under the user's deck
@@ -158,10 +160,58 @@ function startLocalRender(opts) {
 async function runRender({ job, deck, onProgress, onDone, onError }) {
   const { jobId, outputPath, deckId } = job;
+  let voiceoverServer = null;
   try {
     job.state = 'preparing';
+    // ─── Voiceover prep (Phase 1) ──────────────────────────────────────
+    // Stage's Render button defaults to including audio — users in stage
+    // are iterating and expect a richer preview. The audition cache makes
+    // re-renders effectively free for cards they already previewed.
+    // Falls back to silent video on not_logged_in / quota_exceeded
+    // (recorded in job.voiceoverSkipped so the UI can surface the reason).
+    let voiceoverByIdx = {};
+    let voiceoverSkipped = [];
+    try {
+      const audClient = createTtsAuditionClient();
+      voiceoverServer = await startVoiceoverServer({ cacheDir: audClient.cacheDir });
+      const prep = await prepareVoiceovers({
+        deck,
+        auditionClient: audClient,
+        baseUrl: voiceoverServer.url,
+        onProgress: (p) => {
+          if (typeof onProgress === 'function') {
+            try {
+              onProgress({
+                jobId,
+                progress: 0,
+                framesRendered: 0,
+                framesTotal: 0,
+                phase: 'voiceover',
+                voiceoverIndex: p.cardIdx + 1,
+                voiceoverTotal: p.total,
+                voiceoverFromCache: p.fromCache,
+              });
+            } catch { /* swallow */ }
+          }
+        },
+      });
+      voiceoverByIdx = prep.byIdx;
+      voiceoverSkipped = prep.skipped;
+    } catch (err) {
+      // Voiceover prep failure is non-fatal — fall back to silent render
+      // so a TTS outage / first-run-without-login still produces an mp4.
+      voiceoverSkipped = [{ cardIdx: -1, reason: 'voiceover_prep_failed', message: err.message }];
+      if (voiceoverServer) {
+        try { await voiceoverServer.close(); } catch { /* */ }
+        voiceoverServer = null;
+      }
+    }
+    job.voiceoverCount = Object.keys(voiceoverByIdx).length;
+    job.voiceoverSkipped = voiceoverSkipped;
     const serveUrl = resolveServeUrl();
-    const inputProps = buildInputProps(deck);
+    const inputProps = buildInputProps(deck, { voiceoverByIdx });
     const renderer = loadRenderer();
     job.coldStart = !chromeBinaryExists();
@@ -234,6 +284,12 @@ async function runRender({ job, deck, onProgress, onDone, onError }) {
     if (typeof onError === 'function') {
       try { onError({ jobId, message: job.error }); } catch { /* swallow */ }
     }
+  } finally {
+    // Always tear down the audio file server, including on render failure,
+    // so a stale localhost listener doesn't leak across jobs.
+    if (voiceoverServer) {
+      try { await voiceoverServer.close(); } catch { /* best-effort */ }
+    }
   }
 }

package/lib/stage-core/server.js CHANGED Viewed

@@ -65,6 +65,7 @@ async function startStageServer(opts) {
     cloudRender = null,
     localRender = null,
     deckSaver = null,
+    audition = null,
     publishEvent = null,
     tokenAvailable = false,
     preferredPort = 5180,
@@ -243,8 +244,19 @@ async function startStageServer(opts) {
     function statusForCode(code) {
       if (code === 'not_logged_in') return 401;
       if (code === 'quota_exceeded') return 402;
-      if (code === 'invalid_deck' || code === 'invalid_id') return 400;
-      if (code === 'job_not_found') return 404;
+      if (
+        code === 'invalid_deck' ||
+        code === 'invalid_id' ||
+        code === 'invalid_card_index' ||
+        code === 'invalid_voice' ||
+        code === 'invalid_text'
+      ) return 400;
+      if (
+        code === 'job_not_found' ||
+        code === 'no_deck' ||
+        code === 'card_not_found'
+      ) return 404;
+      if (code === 'voiceover_disabled') return 409;
       if (code === 'success') return 200;
       return 502; // upstream error
     }
@@ -403,6 +415,49 @@ async function startStageServer(opts) {
       return;
     }
+    // ─── TTS audition (per-card ▶) ──────────────────────────────────────────
+    // GET /api/audition?card=<int>[&voice=<id>]
+    //   Resolves card.voiceover/voiceId/narration → calls /api/tts/synthesize
+    //   via the audition bridge → streams audio bytes (default mp3). Content
+    //   hash caches identical (voice, text, speed, format) so iteration is
+    //   free after the first call. The page never sees the JWT.
+    if (audition && req.method === 'GET' && req.url.startsWith('/api/audition')) {
+      let parsed;
+      try { parsed = new URL(req.url, `http://127.0.0.1:${port}`); }
+      catch {
+        return sendJson(400, { code: 'bad_request', message: 'invalid /api/audition url' });
+      }
+      const cardIndexStr = parsed.searchParams.get('card');
+      const cardIndex = Number.parseInt(cardIndexStr, 10);
+      if (!Number.isInteger(cardIndex) || cardIndex < 0) {
+        return sendJson(400, {
+          code: 'invalid_card_index',
+          message: '?card= must be a non-negative integer',
+        });
+      }
+      const voiceOverride = parsed.searchParams.get('voice') || undefined;
+      (async () => {
+        let result;
+        try {
+          result = await audition.play({ cardIndex, voiceOverride });
+        } catch (err) {
+          return sendJson(502, { code: 'upstream_error', message: err.message });
+        }
+        if (result.code !== 'success') {
+          return sendJson(statusForCode(result.code), result);
+        }
+        res.writeHead(200, {
+          'Content-Type': result.contentType || 'audio/mpeg',
+          'Content-Length': result.buf.length,
+          'Cache-Control': 'no-store',
+          'X-Audition-Cache': result.fromCache ? 'HIT' : 'MISS',
+          'X-Audition-Key': result.cacheKey || '',
+        });
+        res.end(result.buf);
+      })();
+      return;
+    }
     // ─── Inline deck save (Task B) ──────────────────────────────────────────
     // POST /api/deck  body: full deck JSON  → validates + writes to disk.
     // The file watcher picks up the write and broadcasts the deck event, so

package/lib/stage-core/tts-audition.js ADDED Viewed

Binary file

package/lib/stage-core/voiceover-mux.js ADDED Viewed

@@ -0,0 +1,183 @@
+'use strict';
+/**
+ * Per-card voiceover audio prep for local Remotion render.
+ *
+ * Reuses the audition cache (~/.config/voxflow/stage-tts-cache/) so a card
+ * the user just listened to via stage's ▶ button doesn't get re-synthesized
+ * at render time. Spins up a tiny localhost HTTP server (auto-picked port)
+ * that serves audio files to the headless Chromium Remotion launches; the
+ * Remotion composition fetches voiceoverSrc URLs from this server while
+ * rendering. Tear the server down after renderMedia() resolves.
+ *
+ *   const aud = createTtsAuditionClient();
+ *   const server = await startVoiceoverServer({ cacheDir: aud.cacheDir });
+ *   const { byIdx, skipped } = await prepareVoiceovers({
+ *     deck, auditionClient: aud, baseUrl: server.url, onProgress,
+ *   });
+ *   // buildInputProps reads byIdx and threads URLs into card.slide.voiceoverSrc
+ *   await renderMedia({ inputProps: buildInputProps(deck, { voiceoverByIdx: byIdx }), ... });
+ *   await server.close();
+ *
+ * When auth is unavailable (no token in CLI cache), prepareVoiceovers
+ * returns an empty map quietly — the resulting mp4 is the Phase 0 silent
+ * video. Callers branch on the empty map to surface a hint to the user.
+ */
+const fs = require('fs');
+const http = require('http');
+const path = require('path');
+const { contentTypeFor } = require('./tts-audition');
+const { SYNTHESIZE_DEFAULTS } = require('../core/config');
+/**
+ * Tiny localhost HTTP server serving the audition cache directory.
+ * Only responds to GET /audio/<filename>; everything else is 404. Path
+ * traversal (.. or nested directories) is rejected up front since the
+ * cache layout is intentionally flat.
+ *
+ * @param {object} opts
+ * @param {string} opts.cacheDir         Directory containing <hash>.mp3 files.
+ * @param {number} [opts.preferredPort=0]  0 lets the OS pick a free port.
+ * @returns {Promise<{server, port, url, close}>}
+ */
+async function startVoiceoverServer({ cacheDir, preferredPort = 0 }) {
+  if (typeof cacheDir !== 'string' || !cacheDir) {
+    throw new Error('startVoiceoverServer: cacheDir required');
+  }
+  const server = http.createServer((req, res) => {
+    if (req.method !== 'GET' || !req.url.startsWith('/audio/')) {
+      res.writeHead(404, { 'Content-Type': 'text/plain' });
+      res.end('not found');
+      return;
+    }
+    const fname = req.url.slice('/audio/'.length).split('?')[0];
+    // Defense in depth — reject path traversal even on a localhost-only
+    // server. The audition cache is a flat dir of <sha256>.<ext> filenames.
+    if (fname === '' || fname.includes('/') || fname.includes('\\') || fname.includes('..')) {
+      res.writeHead(400, { 'Content-Type': 'text/plain' });
+      res.end('bad filename');
+      return;
+    }
+    const filePath = path.join(cacheDir, fname);
+    fs.stat(filePath, (statErr, st) => {
+      if (statErr || !st.isFile()) {
+        res.writeHead(404, { 'Content-Type': 'text/plain' });
+        res.end('not found');
+        return;
+      }
+      const ext = path.extname(fname).slice(1);
+      const ctype = contentTypeFor(ext);
+      res.writeHead(200, {
+        'Content-Type': ctype,
+        'Content-Length': st.size,
+        'Cache-Control': 'no-store',
+      });
+      fs.createReadStream(filePath).pipe(res);
+    });
+  });
+  await new Promise((resolve, reject) => {
+    server.once('error', reject);
+    server.listen(preferredPort, '127.0.0.1', () => {
+      server.removeListener('error', reject);
+      resolve();
+    });
+  });
+  const port = server.address().port;
+  return {
+    server,
+    port,
+    url: `http://127.0.0.1:${port}`,
+    async close() {
+      await new Promise((resolve) => server.close(() => resolve()));
+    },
+  };
+}
+/**
+ * Resolve + synthesize (or cache-hit) one mp3 per card, return a map of
+ * { cardIdx: audio URL } that buildInputProps threads into voiceoverSrc.
+ *
+ * @param {object} opts
+ * @param {object} opts.deck                       Validator-shaped deck.
+ * @param {{audition: Function}} opts.auditionClient
+ *   Same client stage's /api/audition uses. Shares the on-disk cache so a
+ *   card the user previewed in the browser doesn't burn quota again at
+ *   render time.
+ * @param {string} opts.baseUrl                    e.g. http://127.0.0.1:54321
+ * @param {(p:object) => void} [opts.onProgress]
+ *   Called once per resolved card: { cardIdx, total, fromCache, voiceId, textLen }.
+ *   Use this to print a one-line "voiceover N/M (cache hit)" log so the
+ *   user knows TTS is happening before the renderer takes over.
+ * @returns {Promise<{ byIdx: Record<number,string>, skipped: Array<{cardIdx, reason, message?}> }>}
+ *   skipped reasons: missing-card | voiceover-disabled | no-text |
+ *   not_logged_in | quota_exceeded | tts_failed | network_error | invalid_voice
+ */
+async function prepareVoiceovers({ deck, auditionClient, baseUrl, onProgress }) {
+  const byIdx = {};
+  const skipped = [];
+  if (!deck || !Array.isArray(deck.cards)) return { byIdx, skipped };
+  if (!auditionClient || typeof auditionClient.audition !== 'function') {
+    throw new Error('prepareVoiceovers: auditionClient.audition is required');
+  }
+  if (typeof baseUrl !== 'string' || !baseUrl) {
+    throw new Error('prepareVoiceovers: baseUrl is required');
+  }
+  const cards = deck.cards;
+  for (let i = 0; i < cards.length; i++) {
+    const card = cards[i];
+    if (!card) { skipped.push({ cardIdx: i, reason: 'missing-card' }); continue; }
+    const vo = card.voiceover || {};
+    if (vo.enabled === false) {
+      skipped.push({ cardIdx: i, reason: 'voiceover-disabled' });
+      continue;
+    }
+    const text = (typeof vo.text === 'string' && vo.text.trim())
+      ? vo.text
+      : card.narration;
+    if (typeof text !== 'string' || !text.trim()) {
+      skipped.push({ cardIdx: i, reason: 'no-text' });
+      continue;
+    }
+    const voiceId = vo.voiceId || card.voiceId || SYNTHESIZE_DEFAULTS.voice;
+    const speed = typeof vo.rate === 'number' ? vo.rate : 1.0;
+    const format = 'mp3';
+    let r;
+    try {
+      r = await auditionClient.audition({ voiceId, text, speed, format });
+    } catch (err) {
+      skipped.push({ cardIdx: i, reason: 'network_error', message: err.message || String(err) });
+      continue;
+    }
+    if (r.code !== 'success') {
+      skipped.push({ cardIdx: i, reason: r.code, message: r.message });
+      // not_logged_in / quota_exceeded → bail early so the user sees one
+      // clear message rather than N copies of the same root cause.
+      if (r.code === 'not_logged_in' || r.code === 'quota_exceeded') break;
+      continue;
+    }
+    const fname = `${r.cacheKey}.${format}`;
+    byIdx[i] = `${baseUrl.replace(/\/$/, '')}/audio/${fname}`;
+    if (typeof onProgress === 'function') {
+      try {
+        onProgress({
+          cardIdx: i,
+          total: cards.length,
+          fromCache: !!r.fromCache,
+          voiceId,
+          textLen: text.length,
+        });
+      } catch { /* swallow consumer errors */ }
+    }
+  }
+  return { byIdx, skipped };
+}
+module.exports = {
+  startVoiceoverServer,
+  prepareVoiceovers,
+};