voxflow 1.15.3 → 1.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,9 +38,14 @@ const DEFAULT_CARD_SEC = 4;
38
38
  * Phase 0 is silent — every card gets DEFAULT_CARD_SEC. Phase 1 will
39
39
  * splice per-card TTS in and replace this with audio-driven durations.
40
40
  */
41
- function buildInputProps(deck) {
41
+ function buildInputProps(deck, opts = {}) {
42
+ // Map of cardIdx → audio URL produced by prepareVoiceovers (or empty when
43
+ // the renderer runs silent). Threads into PaperSlideDeckProps.cards[].slide
44
+ // .voiceoverSrc so the composition's <Audio> element fetches it during
45
+ // Remotion's headless render.
46
+ const voiceoverByIdx = opts.voiceoverByIdx || {};
42
47
  const numberBadge = null;
43
- const cards = deck.cards.map((card) => {
48
+ const cards = deck.cards.map((card, i) => {
44
49
  const slide = {
45
50
  kind: card.kind,
46
51
  header: deck.header,
@@ -49,7 +54,7 @@ function buildInputProps(deck) {
49
54
  figureKeyword: card.figureKeyword ?? null,
50
55
  seriesTitle: deck.seriesTitle,
51
56
  seriesTagline: deck.seriesTagline,
52
- voiceoverSrc: null,
57
+ voiceoverSrc: voiceoverByIdx[i] || null,
53
58
  numberBadge,
54
59
  imageUrl: card.imageUrl,
55
60
  };
@@ -181,8 +186,53 @@ async function render(opts) {
181
186
  const outputDir = path.dirname(outputPath);
182
187
  if (!fs.existsSync(outputDir)) fs.mkdirSync(outputDir, { recursive: true });
183
188
 
189
+ // ─── Voiceover prep (Phase 1) ────────────────────────────────────────
190
+ // Synthesize per-card TTS up front so renderMedia's headless Chromium
191
+ // can fetch each clip as the composition plays. Reuses the audition
192
+ // cache so a card the user previewed in stage doesn't pay quota again.
193
+ // Skip the whole pass on --no-audio (back-compat with Phase 0 silent).
194
+ const includeAudio = opts.noAudio !== true;
195
+ let voiceoverByIdx = {};
196
+ let voiceoverServer = null;
197
+ let voiceoverSkipped = [];
198
+ if (includeAudio) {
199
+ const { createTtsAuditionClient } = require('../stage-core/tts-audition');
200
+ const { startVoiceoverServer, prepareVoiceovers } = require('../stage-core/voiceover-mux');
201
+ const audClient = createTtsAuditionClient();
202
+ voiceoverServer = await startVoiceoverServer({ cacheDir: audClient.cacheDir });
203
+ let synthCount = 0;
204
+ let cacheCount = 0;
205
+ const prep = await prepareVoiceovers({
206
+ deck,
207
+ auditionClient: audClient,
208
+ baseUrl: voiceoverServer.url,
209
+ onProgress: (p) => {
210
+ if (p.fromCache) cacheCount += 1; else synthCount += 1;
211
+ process.stdout.write(
212
+ `\r[slice render] voiceover ${p.cardIdx + 1}/${p.total} ` +
213
+ `(${p.fromCache ? 'cache' : 'synth'}) `
214
+ );
215
+ },
216
+ });
217
+ voiceoverByIdx = prep.byIdx;
218
+ voiceoverSkipped = prep.skipped;
219
+ if (synthCount > 0 || cacheCount > 0) process.stdout.write('\n');
220
+ if (Object.keys(voiceoverByIdx).length === 0) {
221
+ const fatal = voiceoverSkipped.find(
222
+ (s) => s.reason === 'not_logged_in' || s.reason === 'quota_exceeded'
223
+ );
224
+ if (fatal) {
225
+ console.warn(
226
+ `[slice render] ⚠ audio skipped — ${fatal.reason}` +
227
+ (fatal.message ? `: ${fatal.message}` : '') +
228
+ ' (rendering silent video; pass --no-audio to suppress this notice)'
229
+ );
230
+ }
231
+ }
232
+ }
233
+
184
234
  const serveUrl = resolveServeUrl();
185
- const inputProps = buildInputProps(deck);
235
+ const inputProps = buildInputProps(deck, { voiceoverByIdx });
186
236
 
187
237
  // Lazy require so users who never run `slice render` don't pay the
188
238
  // remotion install cost at CLI startup (renderer pulls in puppeteer-
@@ -246,24 +296,38 @@ async function render(opts) {
246
296
  const totalMs = Date.now() - t0;
247
297
  process.stdout.write('\n');
248
298
 
299
+ // Tear down the localhost audio file server after the render is fully
300
+ // committed to disk so a hanging Chromium fetch can't be interrupted.
301
+ if (voiceoverServer) {
302
+ try { await voiceoverServer.close(); } catch { /* best-effort */ }
303
+ }
304
+
249
305
  const stat = fs.statSync(outputPath);
250
306
  console.log(`[slice render] done in ${fmtSec(totalMs)} — ${humanSize(stat.size)}`);
251
307
  console.log(`[slice render] saved to ${outputPath}`);
252
- return { outputPath, totalMs, frames: lastFrame, size: stat.size };
308
+ return {
309
+ outputPath,
310
+ totalMs,
311
+ frames: lastFrame,
312
+ size: stat.size,
313
+ voiceoverCount: Object.keys(voiceoverByIdx).length,
314
+ voiceoverSkipped,
315
+ };
253
316
  }
254
317
 
255
318
  async function handle(args) {
256
319
  const { parseFlag } = require('../core/args');
257
320
  const output = parseFlag(args, '--output', '-o');
321
+ const noAudio = args.includes('--no-audio');
258
322
  const positional = args.find(
259
323
  (a) => !a.startsWith('-') && !a.startsWith('--')
260
324
  );
261
325
  if (!positional) {
262
- console.error('Usage: voxflow slice render <deck.json> [--output out.mp4]');
326
+ console.error('Usage: voxflow slice render <deck.json> [--output out.mp4] [--no-audio]');
263
327
  process.exit(1);
264
328
  }
265
329
  try {
266
- await render({ deckPath: positional, output });
330
+ await render({ deckPath: positional, output, noAudio });
267
331
  } catch (err) {
268
332
  console.error(`\nslice render failed: ${err.message}`);
269
333
  if (process.env.VOXFLOW_DEBUG) console.error(err.stack);
@@ -20,10 +20,12 @@ const { createEventBus } = require('../stage-core/event-bus');
20
20
  const { createSnapshotStore } = require('../stage-core/snapshot-store');
21
21
  const { createCloudRenderClient } = require('../stage-core/cloud-render');
22
22
  const { startLocalRender, getJobStatus } = require('../stage-core/local-render');
23
+ const { createTtsAuditionClient } = require('../stage-core/tts-audition');
23
24
  const { validatePaperSlideDeck, isV2LayoutTreeDeck } = require('../internal/deck-validator');
24
25
  const { renderSliceStageHtml } = require('../stage-ui/slice/template');
25
26
  const { emit: emitTelemetry } = require('../core/telemetry');
26
27
  const { readCachedToken } = require('../core/auth');
28
+ const { SYNTHESIZE_DEFAULTS } = require('../core/config');
27
29
 
28
30
  // Sourced from the canonical registry at repo root. Previously this list
29
31
  // silently fell out of sync (lagged at 6 themes while the rest of the repo
@@ -188,6 +190,37 @@ async function startSliceStage(opts) {
188
190
  },
189
191
  };
190
192
 
193
+ // ─── TTS audition bridge (per-card ▶ on stage UI) ──────────────────────
194
+ // Resolves `card.voiceover` / `card.voiceId` / `card.narration` against the
195
+ // live deck snapshot at request time so editing the deck → ▶ replays the
196
+ // new content immediately. Audio is cached by content hash so iterative
197
+ // re-listens cost zero quota after the first call.
198
+ const auditionBridge = opts.audition || (() => {
199
+ const tts = opts.ttsClient || createTtsAuditionClient(opts.ttsClientOpts || {});
200
+ return {
201
+ async play({ cardIndex, voiceOverride }) {
202
+ if (!snapshot.deck) return { code: 'no_deck', message: 'no deck loaded' };
203
+ const cards = Array.isArray(snapshot.deck.cards) ? snapshot.deck.cards : [];
204
+ const card = cards[cardIndex];
205
+ if (!card) return { code: 'card_not_found', message: `no card at index ${cardIndex}` };
206
+ const vo = card.voiceover || {};
207
+ if (vo.enabled === false) {
208
+ return { code: 'voiceover_disabled', message: 'card voiceover.enabled = false' };
209
+ }
210
+ const text = (typeof vo.text === 'string' && vo.text.trim()) ? vo.text : card.narration;
211
+ if (typeof text !== 'string' || !text.trim()) {
212
+ return { code: 'invalid_text', message: 'no text to synthesize (card.voiceover.text or card.narration)' };
213
+ }
214
+ const voiceId = (voiceOverride && String(voiceOverride).trim())
215
+ || vo.voiceId
216
+ || card.voiceId
217
+ || SYNTHESIZE_DEFAULTS.voice;
218
+ const speed = typeof vo.rate === 'number' ? vo.rate : 1.0;
219
+ return tts.audition({ voiceId, text, speed, format: 'mp3' });
220
+ },
221
+ };
222
+ })();
223
+
191
224
  // Boot-time auth probe so the UI can emphasise local vs cloud render.
192
225
  // We treat any cached, non-expired token as "logged in"; the actual
193
226
  // request flow still revalidates on /api/quota-balance.
@@ -204,6 +237,7 @@ async function startSliceStage(opts) {
204
237
  cloudRender,
205
238
  localRender: localRenderBridge,
206
239
  deckSaver: deckSaverBridge,
240
+ audition: auditionBridge,
207
241
  publishEvent: bus.publish,
208
242
  tokenAvailable,
209
243
  preferredPort,
@@ -128,6 +128,49 @@ function validateListPayload(list, i) {
128
128
  });
129
129
  }
130
130
 
131
+ // Optional per-card voiceover override. Extends the legacy `card.voiceId`
132
+ // (V1-only) with a nested object that carries audio behavior toggles — silent
133
+ // card, custom TTS text override, speech rate — so stage's audition endpoint
134
+ // and the local-render mux pass resolve a single source of truth per card.
135
+ // All fields are optional inside an optional object: omitting `voiceover`
136
+ // entirely keeps existing decks unchanged. Render-time resolution (highest
137
+ // precedence first):
138
+ // voiceId = voiceover.voiceId ?? card.voiceId ?? job-level default
139
+ // text = voiceover.text ?? card.narration
140
+ // enabled = voiceover.enabled ?? true
141
+ // rate = voiceover.rate ?? 1.0
142
+ const VOICEOVER_TEXT_MAX = 500;
143
+ function validateVoiceoverShape(vo, cardIdx) {
144
+ if (vo == null) return;
145
+ if (typeof vo !== 'object' || Array.isArray(vo)) {
146
+ throw new Error(`cards[${cardIdx}].voiceover must be an object`);
147
+ }
148
+ if (vo.enabled != null && typeof vo.enabled !== 'boolean') {
149
+ throw new Error(`cards[${cardIdx}].voiceover.enabled must be boolean`);
150
+ }
151
+ if (vo.voiceId != null) {
152
+ if (typeof vo.voiceId !== 'string' || !vo.voiceId.trim()) {
153
+ throw new Error(`cards[${cardIdx}].voiceover.voiceId must be non-empty string when present`);
154
+ }
155
+ if (vo.voiceId.length > 128) {
156
+ throw new Error(`cards[${cardIdx}].voiceover.voiceId too long (${vo.voiceId.length} > 128)`);
157
+ }
158
+ }
159
+ if (vo.text != null) {
160
+ if (typeof vo.text !== 'string') {
161
+ throw new Error(`cards[${cardIdx}].voiceover.text must be string`);
162
+ }
163
+ if (vo.text.length > VOICEOVER_TEXT_MAX) {
164
+ throw new Error(`cards[${cardIdx}].voiceover.text too long (${vo.text.length} > ${VOICEOVER_TEXT_MAX})`);
165
+ }
166
+ }
167
+ if (vo.rate != null) {
168
+ if (typeof vo.rate !== 'number' || !Number.isFinite(vo.rate) || vo.rate < 0.5 || vo.rate > 2.0) {
169
+ throw new Error(`cards[${cardIdx}].voiceover.rate must be number in [0.5, 2.0]`);
170
+ }
171
+ }
172
+ }
173
+
131
174
  function validatePaperSlideDeck(deck) {
132
175
  if (!deck || typeof deck !== 'object') throw new Error('deck missing');
133
176
  for (const f of ['header', 'seriesTitle', 'seriesTagline']) {
@@ -235,6 +278,7 @@ function validatePaperSlideDeck(deck) {
235
278
  throw new Error(`cards[${i}].voiceId too long (${card.voiceId.length} > 128)`);
236
279
  }
237
280
  }
281
+ validateVoiceoverShape(card.voiceover, i);
238
282
  // Optional per-card image URL — photo-feature / atmospheric themes
239
283
  // composite it as a full-bleed background; other themes ignore it.
240
284
  // Shape-check only (string, length cap, http(s) prefix); reachability
@@ -446,6 +490,7 @@ function validatePaperSlideDeckV2(deck) {
446
490
  if (stepsEls.length !== 1) throw new Error(`cards[${i}] list card must contain exactly one steps element (got ${stepsEls.length})`);
447
491
  richCounts.list += 1;
448
492
  }
493
+ validateVoiceoverShape(card.voiceover, i);
449
494
  });
450
495
  // Cap on rich-kind variety — at most 1 of each (same as V1 prompt rule)
451
496
  for (const k of Object.keys(richCounts)) {
@@ -477,6 +522,7 @@ module.exports = {
477
522
  validateQuotePayload,
478
523
  validateDataPayload,
479
524
  validateListPayload,
525
+ validateVoiceoverShape,
480
526
  QUOTE_TEXT_MAX,
481
527
  QUOTE_ATTRIBUTION_MAX,
482
528
  DATA_VALUE_MAX,
@@ -485,4 +531,5 @@ module.exports = {
485
531
  LIST_ITEM_MAX_LEN,
486
532
  LIST_ITEM_MIN_COUNT,
487
533
  LIST_ITEM_MAX_COUNT,
534
+ VOICEOVER_TEXT_MAX,
488
535
  };
@@ -45,6 +45,8 @@ const {
45
45
  THEME_TO_DECK_ID,
46
46
  DEFAULT_THEME,
47
47
  } = require('../commands/slice-render');
48
+ const { createTtsAuditionClient } = require('./tts-audition');
49
+ const { startVoiceoverServer, prepareVoiceovers } = require('./voiceover-mux');
48
50
 
49
51
  // In-memory job table. We never persist jobs — a stage restart wipes history,
50
52
  // which is fine because the produced mp4 lives on disk under the user's deck
@@ -158,10 +160,58 @@ function startLocalRender(opts) {
158
160
 
159
161
  async function runRender({ job, deck, onProgress, onDone, onError }) {
160
162
  const { jobId, outputPath, deckId } = job;
163
+ let voiceoverServer = null;
161
164
  try {
162
165
  job.state = 'preparing';
166
+
167
+ // ─── Voiceover prep (Phase 1) ──────────────────────────────────────
168
+ // Stage's Render button defaults to including audio — users in stage
169
+ // are iterating and expect a richer preview. The audition cache makes
170
+ // re-renders effectively free for cards they already previewed.
171
+ // Falls back to silent video on not_logged_in / quota_exceeded
172
+ // (recorded in job.voiceoverSkipped so the UI can surface the reason).
173
+ let voiceoverByIdx = {};
174
+ let voiceoverSkipped = [];
175
+ try {
176
+ const audClient = createTtsAuditionClient();
177
+ voiceoverServer = await startVoiceoverServer({ cacheDir: audClient.cacheDir });
178
+ const prep = await prepareVoiceovers({
179
+ deck,
180
+ auditionClient: audClient,
181
+ baseUrl: voiceoverServer.url,
182
+ onProgress: (p) => {
183
+ if (typeof onProgress === 'function') {
184
+ try {
185
+ onProgress({
186
+ jobId,
187
+ progress: 0,
188
+ framesRendered: 0,
189
+ framesTotal: 0,
190
+ phase: 'voiceover',
191
+ voiceoverIndex: p.cardIdx + 1,
192
+ voiceoverTotal: p.total,
193
+ voiceoverFromCache: p.fromCache,
194
+ });
195
+ } catch { /* swallow */ }
196
+ }
197
+ },
198
+ });
199
+ voiceoverByIdx = prep.byIdx;
200
+ voiceoverSkipped = prep.skipped;
201
+ } catch (err) {
202
+ // Voiceover prep failure is non-fatal — fall back to silent render
203
+ // so a TTS outage / first-run-without-login still produces an mp4.
204
+ voiceoverSkipped = [{ cardIdx: -1, reason: 'voiceover_prep_failed', message: err.message }];
205
+ if (voiceoverServer) {
206
+ try { await voiceoverServer.close(); } catch { /* */ }
207
+ voiceoverServer = null;
208
+ }
209
+ }
210
+ job.voiceoverCount = Object.keys(voiceoverByIdx).length;
211
+ job.voiceoverSkipped = voiceoverSkipped;
212
+
163
213
  const serveUrl = resolveServeUrl();
164
- const inputProps = buildInputProps(deck);
214
+ const inputProps = buildInputProps(deck, { voiceoverByIdx });
165
215
 
166
216
  const renderer = loadRenderer();
167
217
  job.coldStart = !chromeBinaryExists();
@@ -234,6 +284,12 @@ async function runRender({ job, deck, onProgress, onDone, onError }) {
234
284
  if (typeof onError === 'function') {
235
285
  try { onError({ jobId, message: job.error }); } catch { /* swallow */ }
236
286
  }
287
+ } finally {
288
+ // Always tear down the audio file server, including on render failure,
289
+ // so a stale localhost listener doesn't leak across jobs.
290
+ if (voiceoverServer) {
291
+ try { await voiceoverServer.close(); } catch { /* best-effort */ }
292
+ }
237
293
  }
238
294
  }
239
295
 
@@ -65,6 +65,7 @@ async function startStageServer(opts) {
65
65
  cloudRender = null,
66
66
  localRender = null,
67
67
  deckSaver = null,
68
+ audition = null,
68
69
  publishEvent = null,
69
70
  tokenAvailable = false,
70
71
  preferredPort = 5180,
@@ -243,8 +244,19 @@ async function startStageServer(opts) {
243
244
  function statusForCode(code) {
244
245
  if (code === 'not_logged_in') return 401;
245
246
  if (code === 'quota_exceeded') return 402;
246
- if (code === 'invalid_deck' || code === 'invalid_id') return 400;
247
- if (code === 'job_not_found') return 404;
247
+ if (
248
+ code === 'invalid_deck' ||
249
+ code === 'invalid_id' ||
250
+ code === 'invalid_card_index' ||
251
+ code === 'invalid_voice' ||
252
+ code === 'invalid_text'
253
+ ) return 400;
254
+ if (
255
+ code === 'job_not_found' ||
256
+ code === 'no_deck' ||
257
+ code === 'card_not_found'
258
+ ) return 404;
259
+ if (code === 'voiceover_disabled') return 409;
248
260
  if (code === 'success') return 200;
249
261
  return 502; // upstream error
250
262
  }
@@ -403,6 +415,49 @@ async function startStageServer(opts) {
403
415
  return;
404
416
  }
405
417
 
418
+ // ─── TTS audition (per-card ▶) ──────────────────────────────────────────
419
+ // GET /api/audition?card=<int>[&voice=<id>]
420
+ // Resolves card.voiceover/voiceId/narration → calls /api/tts/synthesize
421
+ // via the audition bridge → streams audio bytes (default mp3). Content
422
+ // hash caches identical (voice, text, speed, format) so iteration is
423
+ // free after the first call. The page never sees the JWT.
424
+ if (audition && req.method === 'GET' && req.url.startsWith('/api/audition')) {
425
+ let parsed;
426
+ try { parsed = new URL(req.url, `http://127.0.0.1:${port}`); }
427
+ catch {
428
+ return sendJson(400, { code: 'bad_request', message: 'invalid /api/audition url' });
429
+ }
430
+ const cardIndexStr = parsed.searchParams.get('card');
431
+ const cardIndex = Number.parseInt(cardIndexStr, 10);
432
+ if (!Number.isInteger(cardIndex) || cardIndex < 0) {
433
+ return sendJson(400, {
434
+ code: 'invalid_card_index',
435
+ message: '?card= must be a non-negative integer',
436
+ });
437
+ }
438
+ const voiceOverride = parsed.searchParams.get('voice') || undefined;
439
+ (async () => {
440
+ let result;
441
+ try {
442
+ result = await audition.play({ cardIndex, voiceOverride });
443
+ } catch (err) {
444
+ return sendJson(502, { code: 'upstream_error', message: err.message });
445
+ }
446
+ if (result.code !== 'success') {
447
+ return sendJson(statusForCode(result.code), result);
448
+ }
449
+ res.writeHead(200, {
450
+ 'Content-Type': result.contentType || 'audio/mpeg',
451
+ 'Content-Length': result.buf.length,
452
+ 'Cache-Control': 'no-store',
453
+ 'X-Audition-Cache': result.fromCache ? 'HIT' : 'MISS',
454
+ 'X-Audition-Key': result.cacheKey || '',
455
+ });
456
+ res.end(result.buf);
457
+ })();
458
+ return;
459
+ }
460
+
406
461
  // ─── Inline deck save (Task B) ──────────────────────────────────────────
407
462
  // POST /api/deck body: full deck JSON → validates + writes to disk.
408
463
  // The file watcher picks up the write and broadcasts the deck event, so
Binary file
@@ -0,0 +1,183 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Per-card voiceover audio prep for local Remotion render.
5
+ *
6
+ * Reuses the audition cache (~/.config/voxflow/stage-tts-cache/) so a card
7
+ * the user just listened to via stage's ▶ button doesn't get re-synthesized
8
+ * at render time. Spins up a tiny localhost HTTP server (auto-picked port)
9
+ * that serves audio files to the headless Chromium Remotion launches; the
10
+ * Remotion composition fetches voiceoverSrc URLs from this server while
11
+ * rendering. Tear the server down after renderMedia() resolves.
12
+ *
13
+ * const aud = createTtsAuditionClient();
14
+ * const server = await startVoiceoverServer({ cacheDir: aud.cacheDir });
15
+ * const { byIdx, skipped } = await prepareVoiceovers({
16
+ * deck, auditionClient: aud, baseUrl: server.url, onProgress,
17
+ * });
18
+ * // buildInputProps reads byIdx and threads URLs into card.slide.voiceoverSrc
19
+ * await renderMedia({ inputProps: buildInputProps(deck, { voiceoverByIdx: byIdx }), ... });
20
+ * await server.close();
21
+ *
22
+ * When auth is unavailable (no token in CLI cache), prepareVoiceovers
23
+ * returns an empty map quietly — the resulting mp4 is the Phase 0 silent
24
+ * video. Callers branch on the empty map to surface a hint to the user.
25
+ */
26
+
27
+ const fs = require('fs');
28
+ const http = require('http');
29
+ const path = require('path');
30
+
31
+ const { contentTypeFor } = require('./tts-audition');
32
+ const { SYNTHESIZE_DEFAULTS } = require('../core/config');
33
+
34
+ /**
35
+ * Tiny localhost HTTP server serving the audition cache directory.
36
+ * Only responds to GET /audio/<filename>; everything else is 404. Path
37
+ * traversal (.. or nested directories) is rejected up front since the
38
+ * cache layout is intentionally flat.
39
+ *
40
+ * @param {object} opts
41
+ * @param {string} opts.cacheDir Directory containing <hash>.mp3 files.
42
+ * @param {number} [opts.preferredPort=0] 0 lets the OS pick a free port.
43
+ * @returns {Promise<{server, port, url, close}>}
44
+ */
45
+ async function startVoiceoverServer({ cacheDir, preferredPort = 0 }) {
46
+ if (typeof cacheDir !== 'string' || !cacheDir) {
47
+ throw new Error('startVoiceoverServer: cacheDir required');
48
+ }
49
+ const server = http.createServer((req, res) => {
50
+ if (req.method !== 'GET' || !req.url.startsWith('/audio/')) {
51
+ res.writeHead(404, { 'Content-Type': 'text/plain' });
52
+ res.end('not found');
53
+ return;
54
+ }
55
+ const fname = req.url.slice('/audio/'.length).split('?')[0];
56
+ // Defense in depth — reject path traversal even on a localhost-only
57
+ // server. The audition cache is a flat dir of <sha256>.<ext> filenames.
58
+ if (fname === '' || fname.includes('/') || fname.includes('\\') || fname.includes('..')) {
59
+ res.writeHead(400, { 'Content-Type': 'text/plain' });
60
+ res.end('bad filename');
61
+ return;
62
+ }
63
+ const filePath = path.join(cacheDir, fname);
64
+ fs.stat(filePath, (statErr, st) => {
65
+ if (statErr || !st.isFile()) {
66
+ res.writeHead(404, { 'Content-Type': 'text/plain' });
67
+ res.end('not found');
68
+ return;
69
+ }
70
+ const ext = path.extname(fname).slice(1);
71
+ const ctype = contentTypeFor(ext);
72
+ res.writeHead(200, {
73
+ 'Content-Type': ctype,
74
+ 'Content-Length': st.size,
75
+ 'Cache-Control': 'no-store',
76
+ });
77
+ fs.createReadStream(filePath).pipe(res);
78
+ });
79
+ });
80
+ await new Promise((resolve, reject) => {
81
+ server.once('error', reject);
82
+ server.listen(preferredPort, '127.0.0.1', () => {
83
+ server.removeListener('error', reject);
84
+ resolve();
85
+ });
86
+ });
87
+ const port = server.address().port;
88
+ return {
89
+ server,
90
+ port,
91
+ url: `http://127.0.0.1:${port}`,
92
+ async close() {
93
+ await new Promise((resolve) => server.close(() => resolve()));
94
+ },
95
+ };
96
+ }
97
+
98
+ /**
99
+ * Resolve + synthesize (or cache-hit) one mp3 per card, return a map of
100
+ * { cardIdx: audio URL } that buildInputProps threads into voiceoverSrc.
101
+ *
102
+ * @param {object} opts
103
+ * @param {object} opts.deck Validator-shaped deck.
104
+ * @param {{audition: Function}} opts.auditionClient
105
+ * Same client stage's /api/audition uses. Shares the on-disk cache so a
106
+ * card the user previewed in the browser doesn't burn quota again at
107
+ * render time.
108
+ * @param {string} opts.baseUrl e.g. http://127.0.0.1:54321
109
+ * @param {(p:object) => void} [opts.onProgress]
110
+ * Called once per resolved card: { cardIdx, total, fromCache, voiceId, textLen }.
111
+ * Use this to print a one-line "voiceover N/M (cache hit)" log so the
112
+ * user knows TTS is happening before the renderer takes over.
113
+ * @returns {Promise<{ byIdx: Record<number,string>, skipped: Array<{cardIdx, reason, message?}> }>}
114
+ * skipped reasons: missing-card | voiceover-disabled | no-text |
115
+ * not_logged_in | quota_exceeded | tts_failed | network_error | invalid_voice
116
+ */
117
+ async function prepareVoiceovers({ deck, auditionClient, baseUrl, onProgress }) {
118
+ const byIdx = {};
119
+ const skipped = [];
120
+ if (!deck || !Array.isArray(deck.cards)) return { byIdx, skipped };
121
+ if (!auditionClient || typeof auditionClient.audition !== 'function') {
122
+ throw new Error('prepareVoiceovers: auditionClient.audition is required');
123
+ }
124
+ if (typeof baseUrl !== 'string' || !baseUrl) {
125
+ throw new Error('prepareVoiceovers: baseUrl is required');
126
+ }
127
+ const cards = deck.cards;
128
+
129
+ for (let i = 0; i < cards.length; i++) {
130
+ const card = cards[i];
131
+ if (!card) { skipped.push({ cardIdx: i, reason: 'missing-card' }); continue; }
132
+ const vo = card.voiceover || {};
133
+ if (vo.enabled === false) {
134
+ skipped.push({ cardIdx: i, reason: 'voiceover-disabled' });
135
+ continue;
136
+ }
137
+ const text = (typeof vo.text === 'string' && vo.text.trim())
138
+ ? vo.text
139
+ : card.narration;
140
+ if (typeof text !== 'string' || !text.trim()) {
141
+ skipped.push({ cardIdx: i, reason: 'no-text' });
142
+ continue;
143
+ }
144
+ const voiceId = vo.voiceId || card.voiceId || SYNTHESIZE_DEFAULTS.voice;
145
+ const speed = typeof vo.rate === 'number' ? vo.rate : 1.0;
146
+ const format = 'mp3';
147
+
148
+ let r;
149
+ try {
150
+ r = await auditionClient.audition({ voiceId, text, speed, format });
151
+ } catch (err) {
152
+ skipped.push({ cardIdx: i, reason: 'network_error', message: err.message || String(err) });
153
+ continue;
154
+ }
155
+ if (r.code !== 'success') {
156
+ skipped.push({ cardIdx: i, reason: r.code, message: r.message });
157
+ // not_logged_in / quota_exceeded → bail early so the user sees one
158
+ // clear message rather than N copies of the same root cause.
159
+ if (r.code === 'not_logged_in' || r.code === 'quota_exceeded') break;
160
+ continue;
161
+ }
162
+ const fname = `${r.cacheKey}.${format}`;
163
+ byIdx[i] = `${baseUrl.replace(/\/$/, '')}/audio/${fname}`;
164
+ if (typeof onProgress === 'function') {
165
+ try {
166
+ onProgress({
167
+ cardIdx: i,
168
+ total: cards.length,
169
+ fromCache: !!r.fromCache,
170
+ voiceId,
171
+ textLen: text.length,
172
+ });
173
+ } catch { /* swallow consumer errors */ }
174
+ }
175
+ }
176
+
177
+ return { byIdx, skipped };
178
+ }
179
+
180
+ module.exports = {
181
+ startVoiceoverServer,
182
+ prepareVoiceovers,
183
+ };