voxflow 1.15.3 → 1.15.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -128,6 +128,101 @@ function validateListPayload(list, i) {
128
128
  });
129
129
  }
130
130
 
131
+ // Optional per-card voiceover override. Extends the legacy `card.voiceId`
132
+ // (V1-only) with a nested object that carries audio behavior toggles — silent
133
+ // card, custom TTS text override, speech rate — so stage's audition endpoint
134
+ // and the local-render mux pass resolve a single source of truth per card.
135
+ // All fields are optional inside an optional object: omitting `voiceover`
136
+ // entirely keeps existing decks unchanged. Render-time resolution (highest
137
+ // precedence first):
138
+ // voiceId = voiceover.voiceId ?? card.voiceId ?? job-level default
139
+ // text = voiceover.text ?? card.narration
140
+ // enabled = voiceover.enabled ?? true
141
+ // rate = voiceover.rate ?? 1.0
142
+ // Optional per-card image registry. Each entry declares an AI-generation
143
+ // recipe (prompt + aspect + quality) addressable by a stable `id` so the
144
+ // content.html in V2 LayoutTree decks (and future themes that read by id)
145
+ // can reference the resolved URL. Generation lives in stage's /api/imagine
146
+ // endpoint, which content-hashes (prompt, aspect, quality) and caches
147
+ // upstream-backed `data:image/...` payloads.
148
+ const IMAGE_PROMPT_MAX = 1000;
149
+ const IMAGE_ID_MAX = 64;
150
+ const IMAGES_PER_CARD_MAX = 8;
151
+ const IMAGE_VALID_ASPECTS = new Set(['portrait', 'landscape', 'square']);
152
+ const IMAGE_VALID_QUALITIES = new Set(['fast', 'hd']);
153
+ function validateCardImagesShape(images, cardIdx) {
154
+ if (images == null) return;
155
+ if (!Array.isArray(images)) {
156
+ throw new Error(`cards[${cardIdx}].images must be an array (or null)`);
157
+ }
158
+ if (images.length > IMAGES_PER_CARD_MAX) {
159
+ throw new Error(`cards[${cardIdx}].images too many (${images.length} > ${IMAGES_PER_CARD_MAX})`);
160
+ }
161
+ const seen = new Set();
162
+ images.forEach((img, j) => {
163
+ if (!img || typeof img !== 'object' || Array.isArray(img)) {
164
+ throw new Error(`cards[${cardIdx}].images[${j}] must be an object`);
165
+ }
166
+ if (typeof img.id !== 'string' || !img.id.trim()) {
167
+ throw new Error(`cards[${cardIdx}].images[${j}].id required (non-empty string)`);
168
+ }
169
+ if (img.id.length > IMAGE_ID_MAX) {
170
+ throw new Error(`cards[${cardIdx}].images[${j}].id too long (${img.id.length} > ${IMAGE_ID_MAX})`);
171
+ }
172
+ if (!/^[a-zA-Z0-9_-]+$/.test(img.id)) {
173
+ throw new Error(`cards[${cardIdx}].images[${j}].id must match [a-zA-Z0-9_-]+`);
174
+ }
175
+ if (seen.has(img.id)) {
176
+ throw new Error(`cards[${cardIdx}].images[${j}].id duplicate: ${img.id}`);
177
+ }
178
+ seen.add(img.id);
179
+ if (typeof img.prompt !== 'string' || !img.prompt.trim()) {
180
+ throw new Error(`cards[${cardIdx}].images[${j}].prompt required (non-empty string)`);
181
+ }
182
+ if (img.prompt.length > IMAGE_PROMPT_MAX) {
183
+ throw new Error(`cards[${cardIdx}].images[${j}].prompt too long (${img.prompt.length} > ${IMAGE_PROMPT_MAX})`);
184
+ }
185
+ if (img.aspect != null && !IMAGE_VALID_ASPECTS.has(img.aspect)) {
186
+ throw new Error(`cards[${cardIdx}].images[${j}].aspect must be one of: ${[...IMAGE_VALID_ASPECTS].join(', ')}`);
187
+ }
188
+ if (img.quality != null && !IMAGE_VALID_QUALITIES.has(img.quality)) {
189
+ throw new Error(`cards[${cardIdx}].images[${j}].quality must be one of: ${[...IMAGE_VALID_QUALITIES].join(', ')}`);
190
+ }
191
+ });
192
+ }
193
+
194
+ const VOICEOVER_TEXT_MAX = 500;
195
+ function validateVoiceoverShape(vo, cardIdx) {
196
+ if (vo == null) return;
197
+ if (typeof vo !== 'object' || Array.isArray(vo)) {
198
+ throw new Error(`cards[${cardIdx}].voiceover must be an object`);
199
+ }
200
+ if (vo.enabled != null && typeof vo.enabled !== 'boolean') {
201
+ throw new Error(`cards[${cardIdx}].voiceover.enabled must be boolean`);
202
+ }
203
+ if (vo.voiceId != null) {
204
+ if (typeof vo.voiceId !== 'string' || !vo.voiceId.trim()) {
205
+ throw new Error(`cards[${cardIdx}].voiceover.voiceId must be non-empty string when present`);
206
+ }
207
+ if (vo.voiceId.length > 128) {
208
+ throw new Error(`cards[${cardIdx}].voiceover.voiceId too long (${vo.voiceId.length} > 128)`);
209
+ }
210
+ }
211
+ if (vo.text != null) {
212
+ if (typeof vo.text !== 'string') {
213
+ throw new Error(`cards[${cardIdx}].voiceover.text must be string`);
214
+ }
215
+ if (vo.text.length > VOICEOVER_TEXT_MAX) {
216
+ throw new Error(`cards[${cardIdx}].voiceover.text too long (${vo.text.length} > ${VOICEOVER_TEXT_MAX})`);
217
+ }
218
+ }
219
+ if (vo.rate != null) {
220
+ if (typeof vo.rate !== 'number' || !Number.isFinite(vo.rate) || vo.rate < 0.5 || vo.rate > 2.0) {
221
+ throw new Error(`cards[${cardIdx}].voiceover.rate must be number in [0.5, 2.0]`);
222
+ }
223
+ }
224
+ }
225
+
131
226
  function validatePaperSlideDeck(deck) {
132
227
  if (!deck || typeof deck !== 'object') throw new Error('deck missing');
133
228
  for (const f of ['header', 'seriesTitle', 'seriesTagline']) {
@@ -235,6 +330,8 @@ function validatePaperSlideDeck(deck) {
235
330
  throw new Error(`cards[${i}].voiceId too long (${card.voiceId.length} > 128)`);
236
331
  }
237
332
  }
333
+ validateVoiceoverShape(card.voiceover, i);
334
+ validateCardImagesShape(card.images, i);
238
335
  // Optional per-card image URL — photo-feature / atmospheric themes
239
336
  // composite it as a full-bleed background; other themes ignore it.
240
337
  // Shape-check only (string, length cap, http(s) prefix); reachability
@@ -290,10 +387,16 @@ function validatePaperSlideDeck(deck) {
290
387
  // / quote / stat / steps). Anything else is rejected so a renderer that
291
388
  // silently no-ops on unknown elements doesn't ship a broken video.
292
389
 
293
- const V2_ALLOWED_ELEMENTS = new Set(['heading', 'body', 'paper-figure', 'quote', 'stat', 'steps']);
390
+ const V2_ALLOWED_ELEMENTS = new Set(['heading', 'body', 'paper-figure', 'quote', 'stat', 'steps', 'raw-html']);
294
391
  const V2_HEADING_MAX = 22;
295
392
  const V2_BODY_TEXT_MAX = 30;
296
393
  const V2_STAT_LABEL_MAX = 30;
394
+ // raw-html is the open-ended escape hatch for themes / cards that want
395
+ // arbitrary markup. The cap is generous enough for a styled inline figure
396
+ // (a few paragraphs of HTML, maybe one inline data URI), but small enough
397
+ // to keep deck.json human-editable and prevent an LLM run-on from blowing
398
+ // past the watcher debounce.
399
+ const V2_RAW_HTML_MAX = 4096;
297
400
 
298
401
  function validateV2Children(children, cardIdx, allowedElements, opts = {}) {
299
402
  if (!Array.isArray(children) || children.length === 0) {
@@ -375,6 +478,26 @@ function validateV2Children(children, cardIdx, allowedElements, opts = {}) {
375
478
  );
376
479
  }
377
480
  });
481
+ } else if (el.el === 'raw-html') {
482
+ // Schema-only support: validator accepts the element and the
483
+ // V2 normalizer (video-present/src/compositions/PaperSlide/
484
+ // layout-tree.ts) maps it to a `rawHtml` field on the V1
485
+ // normalized output. PaperSlide composition rendering of
486
+ // arbitrary HTML lands in a follow-up PR — for now the
487
+ // composition silently skips this element, so a deck with
488
+ // raw-html validates + saves + edits cleanly but renders blank
489
+ // until the JSX side is updated.
490
+ if (typeof el.html !== 'string') {
491
+ throw new Error(`cards[${cardIdx}].children[${j}].html must be string`);
492
+ }
493
+ if (!el.html.trim()) {
494
+ throw new Error(`cards[${cardIdx}].children[${j}].html empty`);
495
+ }
496
+ if (el.html.length > V2_RAW_HTML_MAX) {
497
+ throw new Error(
498
+ `cards[${cardIdx}].children[${j}].html too long (${el.html.length} > ${V2_RAW_HTML_MAX})`
499
+ );
500
+ }
378
501
  }
379
502
  });
380
503
  return elements;
@@ -417,12 +540,22 @@ function validatePaperSlideDeckV2(deck) {
417
540
  if (!nonEmptyString(card.narration)) throw new Error(`cards[${i}].narration empty`);
418
541
 
419
542
  if (card.kind === 'title') {
420
- const els = validateV2Children(card.children, i, new Set(['heading']), { maxChildren: 3 });
421
- if (els.length === 0) throw new Error(`cards[${i}] title must have at least one heading`);
543
+ const els = validateV2Children(card.children, i, new Set(['heading', 'raw-html']), { maxChildren: 3 });
544
+ if (els.filter((e) => e.el === 'heading').length === 0) {
545
+ throw new Error(`cards[${i}] title must have at least one heading`);
546
+ }
422
547
  } else if (card.kind === 'body') {
423
- const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'paper-figure']), { maxChildren: 4 });
548
+ const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'paper-figure', 'raw-html']), { maxChildren: 4 });
424
549
  const figs = els.filter((e) => e.el === 'paper-figure');
425
- if (figs.length !== 1) throw new Error(`cards[${i}] body must contain exactly one paper-figure (got ${figs.length})`);
550
+ const rawEls = els.filter((e) => e.el === 'raw-html');
551
+ // A body card needs either a figure OR a raw-html block — raw
552
+ // HTML can stand in as the entire visual when the theme wants
553
+ // a custom panel instead of the canned figure renderer.
554
+ if (figs.length + rawEls.length !== 1) {
555
+ throw new Error(
556
+ `cards[${i}] body must contain exactly one paper-figure OR one raw-html (got ${figs.length} figure + ${rawEls.length} raw-html)`
557
+ );
558
+ }
426
559
  const heads = els.filter((e) => e.el === 'heading');
427
560
  if (heads.length === 0) throw new Error(`cards[${i}] body must contain at least one heading`);
428
561
  } else if (card.kind === 'quote') {
@@ -431,21 +564,23 @@ function validatePaperSlideDeckV2(deck) {
431
564
  // stat / steps directly and ignores any non-discriminator elements
432
565
  // on these kinds). Discriminator element must still be present
433
566
  // exactly once. Cap at 3 children to keep output bounded.
434
- const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'quote']), { maxChildren: 3 });
567
+ const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'quote', 'raw-html']), { maxChildren: 3 });
435
568
  const quoteEls = els.filter((e) => e.el === 'quote');
436
569
  if (quoteEls.length !== 1) throw new Error(`cards[${i}] quote card must contain exactly one quote element (got ${quoteEls.length})`);
437
570
  richCounts.quote += 1;
438
571
  } else if (card.kind === 'data') {
439
- const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'stat']), { maxChildren: 3 });
572
+ const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'stat', 'raw-html']), { maxChildren: 3 });
440
573
  const statEls = els.filter((e) => e.el === 'stat');
441
574
  if (statEls.length !== 1) throw new Error(`cards[${i}] data card must contain exactly one stat element (got ${statEls.length})`);
442
575
  richCounts.data += 1;
443
576
  } else if (card.kind === 'list') {
444
- const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'steps']), { maxChildren: 3 });
577
+ const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'steps', 'raw-html']), { maxChildren: 3 });
445
578
  const stepsEls = els.filter((e) => e.el === 'steps');
446
579
  if (stepsEls.length !== 1) throw new Error(`cards[${i}] list card must contain exactly one steps element (got ${stepsEls.length})`);
447
580
  richCounts.list += 1;
448
581
  }
582
+ validateVoiceoverShape(card.voiceover, i);
583
+ validateCardImagesShape(card.images, i);
449
584
  });
450
585
  // Cap on rich-kind variety — at most 1 of each (same as V1 prompt rule)
451
586
  for (const k of Object.keys(richCounts)) {
@@ -477,6 +612,8 @@ module.exports = {
477
612
  validateQuotePayload,
478
613
  validateDataPayload,
479
614
  validateListPayload,
615
+ validateVoiceoverShape,
616
+ validateCardImagesShape,
480
617
  QUOTE_TEXT_MAX,
481
618
  QUOTE_ATTRIBUTION_MAX,
482
619
  DATA_VALUE_MAX,
@@ -485,4 +622,9 @@ module.exports = {
485
622
  LIST_ITEM_MAX_LEN,
486
623
  LIST_ITEM_MIN_COUNT,
487
624
  LIST_ITEM_MAX_COUNT,
625
+ VOICEOVER_TEXT_MAX,
626
+ IMAGE_PROMPT_MAX,
627
+ IMAGE_ID_MAX,
628
+ IMAGES_PER_CARD_MAX,
629
+ V2_RAW_HTML_MAX,
488
630
  };
@@ -0,0 +1,233 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Stage → backend bridge for AI image generation (hunyuan-image).
5
+ *
6
+ * Mirror of tts-audition.js for the visual side: stage's per-card 🎨 button
7
+ * resolves `card.images[i] = { id, prompt, aspect?, quality? }`, hashes the
8
+ * generation recipe, and either returns a cached PNG/JPG or asks the backend
9
+ * to produce one. JWT lives on the server (same posture as cloud-render /
10
+ * audition).
11
+ *
12
+ * Public surface:
13
+ * createImageGenClient({ apiBase?, tokenLoader?, cacheDir?, fetchImpl? })
14
+ * → { imagine({ prompt, aspect, quality }), cacheDir }
15
+ *
16
+ * Cache layout: <cacheDir>/<sha256(prompt|aspect|quality)>.<ext>
17
+ * Default cacheDir: ~/.config/voxflow/stage-image-cache/ — global so a
18
+ * prompt repeated across decks resolves to one file on disk.
19
+ *
20
+ * Backend contract: POST /api/image/generate { prompt, ratio, quality }
21
+ * → { code: 'success', image: 'data:image/png;base64,...', quota }
22
+ * We parse the data URL, persist the raw bytes, and serve them with the
23
+ * declared MIME so <img src> works in the browser without a base64 hop.
24
+ */
25
+
26
+ const crypto = require('crypto');
27
+ const fs = require('fs');
28
+ const path = require('path');
29
+ const os = require('os');
30
+ const http = require('http');
31
+ const https = require('https');
32
+
33
+ const { API_BASE } = require('../core/config');
34
+ const { readCachedToken } = require('../core/auth');
35
+
36
+ const DEFAULT_TIMEOUT_MS = 180_000; // backend hunyuan-image takes 30-90s p95
37
+ const DEFAULT_ASPECT = 'portrait'; // 9:16 matches Slice frame
38
+ const DEFAULT_QUALITY = 'fast'; // 200-quota tier; 'hd' is 500
39
+ // Catalog references — surfaced so the stage UI can show "🎨 costs N quota"
40
+ // without an extra round-trip. Authoritative deduction is server-side.
41
+ const IMAGE_GEN_COST_FAST = 200;
42
+ const IMAGE_GEN_COST_HD = 500;
43
+
44
+ function defaultCacheDir() {
45
+ return path.join(os.homedir(), '.config', 'voxflow', 'stage-image-cache');
46
+ }
47
+
48
+ function cacheKey({ prompt, aspect, quality }) {
49
+ // Stable content hash — every parameter that affects image output is
50
+ // included so the same (prompt, aspect, quality) maps to the same file.
51
+ const h = crypto.createHash('sha256');
52
+ h.update(prompt);
53
+ h.update(' ');
54
+ h.update(aspect);
55
+ h.update(' ');
56
+ h.update(quality);
57
+ return h.digest('hex');
58
+ }
59
+
60
+ function extensionFor(mime) {
61
+ // Backend hunyuan-image currently returns PNG; map other common MIMEs in
62
+ // case the provider swap underneath produces JPEG / WebP.
63
+ if (mime === 'image/png') return 'png';
64
+ if (mime === 'image/jpeg' || mime === 'image/jpg') return 'jpg';
65
+ if (mime === 'image/webp') return 'webp';
66
+ return 'bin';
67
+ }
68
+
69
+ function cachePath(dir, key, ext) {
70
+ return path.join(dir, `${key}.${ext}`);
71
+ }
72
+
73
+ function parseDataUrl(dataUrl) {
74
+ if (typeof dataUrl !== 'string') return null;
75
+ // RFC 2397 base64 form: data:<mime>;base64,<bytes>
76
+ const m = /^data:([^;,]+);base64,(.+)$/.exec(dataUrl);
77
+ if (!m) return null;
78
+ let buf;
79
+ try { buf = Buffer.from(m[2], 'base64'); }
80
+ catch { return null; }
81
+ if (!buf.length) return null;
82
+ return { mime: m[1].toLowerCase(), buf };
83
+ }
84
+
85
+ function requestJson(targetUrl, opts = {}, body = null, timeoutMs = DEFAULT_TIMEOUT_MS) {
86
+ return new Promise((resolve, reject) => {
87
+ const u = new URL(targetUrl);
88
+ const mod = u.protocol === 'https:' ? https : http;
89
+ const headers = Object.assign({}, opts.headers || {});
90
+ let payload = null;
91
+ if (body !== null && body !== undefined) {
92
+ payload = Buffer.from(JSON.stringify(body), 'utf8');
93
+ headers['Content-Type'] = 'application/json';
94
+ headers['Content-Length'] = payload.length;
95
+ }
96
+ const req = mod.request({
97
+ hostname: u.hostname,
98
+ port: u.port || (u.protocol === 'https:' ? 443 : 80),
99
+ path: u.pathname + u.search,
100
+ method: opts.method || 'GET',
101
+ headers,
102
+ }, (res) => {
103
+ const chunks = [];
104
+ res.on('data', (c) => chunks.push(c));
105
+ res.on('end', () => {
106
+ const raw = Buffer.concat(chunks).toString('utf8');
107
+ let parsed = null;
108
+ try { parsed = JSON.parse(raw); } catch { /* keep null */ }
109
+ resolve({ status: res.statusCode, data: parsed });
110
+ });
111
+ });
112
+ req.on('error', reject);
113
+ req.setTimeout(timeoutMs, () => { req.destroy(new Error(`upstream timeout after ${timeoutMs}ms`)); });
114
+ if (payload) req.write(payload);
115
+ req.end();
116
+ });
117
+ }
118
+
119
+ /**
120
+ * @param {object} [opts]
121
+ * @param {string} [opts.apiBase=API_BASE]
122
+ * @param {() => ({access_token:string}|null)} [opts.tokenLoader]
123
+ * @param {string} [opts.cacheDir]
124
+ * @param {(args:object) => Promise<{status:number, data:object|null}>} [opts.fetchImpl]
125
+ * Test injection point — receives `{ prompt, ratio, quality, headers }`.
126
+ * @returns {{ imagine: (params:object) => Promise<object>, cacheDir: string }}
127
+ */
128
+ function createImageGenClient(opts = {}) {
129
+ const apiBase = (opts.apiBase || API_BASE).replace(/\/$/, '');
130
+ const tokenLoader = typeof opts.tokenLoader === 'function' ? opts.tokenLoader : readCachedToken;
131
+ const cacheDir = opts.cacheDir || defaultCacheDir();
132
+ const fetchImpl = typeof opts.fetchImpl === 'function' ? opts.fetchImpl : null;
133
+
134
+ try { fs.mkdirSync(cacheDir, { recursive: true }); } catch { /* best-effort */ }
135
+
136
+ function authHeaders() {
137
+ const cached = tokenLoader();
138
+ if (!cached || !cached.access_token) return null;
139
+ return { Authorization: 'Bearer ' + cached.access_token };
140
+ }
141
+
142
+ async function imagine({ prompt, aspect, quality } = {}) {
143
+ if (typeof prompt !== 'string' || !prompt.trim()) {
144
+ return { code: 'invalid_prompt', message: 'prompt required (non-empty string)' };
145
+ }
146
+ const asp = (aspect === 'portrait' || aspect === 'landscape' || aspect === 'square') ? aspect : DEFAULT_ASPECT;
147
+ const qual = (quality === 'fast' || quality === 'hd') ? quality : DEFAULT_QUALITY;
148
+ const key = cacheKey({ prompt, aspect: asp, quality: qual });
149
+
150
+ // Cache check — same recipe = same bytes. Walk known extensions so a
151
+ // stored PNG is found whether the cache layer tracks the MIME or not.
152
+ for (const ext of ['png', 'jpg', 'webp', 'bin']) {
153
+ const fp = cachePath(cacheDir, key, ext);
154
+ try {
155
+ const buf = fs.readFileSync(fp);
156
+ const mime = ({ png: 'image/png', jpg: 'image/jpeg', webp: 'image/webp', bin: 'application/octet-stream' })[ext];
157
+ return {
158
+ code: 'success',
159
+ buf,
160
+ contentType: mime,
161
+ fromCache: true,
162
+ cacheKey: key,
163
+ ext,
164
+ };
165
+ } catch { /* try next ext */ }
166
+ }
167
+
168
+ const headers = authHeaders();
169
+ if (!headers) {
170
+ return { code: 'not_logged_in', message: 'Run `voxflow login` first.' };
171
+ }
172
+
173
+ let r;
174
+ try {
175
+ if (fetchImpl) {
176
+ r = await fetchImpl({ prompt: prompt.trim(), ratio: asp, quality: qual, headers });
177
+ } else {
178
+ r = await requestJson(`${apiBase}/api/image/generate`, { method: 'POST', headers }, {
179
+ prompt: prompt.trim(),
180
+ ratio: asp,
181
+ quality: qual,
182
+ });
183
+ }
184
+ } catch (err) {
185
+ return { code: 'network_error', message: err.message || String(err) };
186
+ }
187
+
188
+ if (r.status === 401) return { code: 'not_logged_in', message: 'Token expired — run `voxflow login`.' };
189
+ if (r.status === 402 || r.status === 429) return { code: 'quota_exceeded', message: r.data?.message || 'Insufficient quota' };
190
+ if (r.status >= 400 || !r.data || r.data.code !== 'success') {
191
+ return {
192
+ code: r.data?.code || 'imagine_failed',
193
+ message: r.data?.message || `HTTP ${r.status}`,
194
+ };
195
+ }
196
+ const parsed = parseDataUrl(r.data.image);
197
+ if (!parsed) {
198
+ return { code: 'imagine_failed', message: 'response missing or malformed image data URL' };
199
+ }
200
+ const ext = extensionFor(parsed.mime);
201
+ const filePath = cachePath(cacheDir, key, ext);
202
+ const tmp = `${filePath}.tmp-${process.pid}-${Date.now()}`;
203
+ try {
204
+ fs.writeFileSync(tmp, parsed.buf);
205
+ fs.renameSync(tmp, filePath);
206
+ } catch {
207
+ try { fs.unlinkSync(tmp); } catch { /* */ }
208
+ }
209
+ return {
210
+ code: 'success',
211
+ buf: parsed.buf,
212
+ contentType: parsed.mime,
213
+ fromCache: false,
214
+ cacheKey: key,
215
+ ext,
216
+ };
217
+ }
218
+
219
+ return { imagine, cacheDir };
220
+ }
221
+
222
+ module.exports = {
223
+ createImageGenClient,
224
+ cacheKey,
225
+ cachePath,
226
+ defaultCacheDir,
227
+ parseDataUrl,
228
+ extensionFor,
229
+ IMAGE_GEN_COST_FAST,
230
+ IMAGE_GEN_COST_HD,
231
+ DEFAULT_ASPECT,
232
+ DEFAULT_QUALITY,
233
+ };
@@ -45,6 +45,9 @@ const {
45
45
  THEME_TO_DECK_ID,
46
46
  DEFAULT_THEME,
47
47
  } = require('../commands/slice-render');
48
+ const { createTtsAuditionClient } = require('./tts-audition');
49
+ const { createImageGenClient } = require('./image-gen');
50
+ const { startVoiceoverServer, prepareVoiceovers, prepareImages } = require('./voiceover-mux');
48
51
 
49
52
  // In-memory job table. We never persist jobs — a stage restart wipes history,
50
53
  // which is fine because the produced mp4 lives on disk under the user's deck
@@ -158,10 +161,92 @@ function startLocalRender(opts) {
158
161
 
159
162
  async function runRender({ job, deck, onProgress, onDone, onError }) {
160
163
  const { jobId, outputPath, deckId } = job;
164
+ let voiceoverServer = null;
161
165
  try {
162
166
  job.state = 'preparing';
167
+
168
+ // ─── Voiceover + Image prep (Phase 1 + Phase B) ─────────────────────
169
+ // Stage's Render button defaults to BOTH audio + AI image generation.
170
+ // Users in stage are iterating and expect a fully-rendered preview;
171
+ // hitting the cache makes re-renders effectively free for content they
172
+ // already auditioned / regenerated. Either pass can fail to silent
173
+ // fallback without aborting the render — failures land in
174
+ // job.voiceoverSkipped / job.imageSkipped so the UI can surface the
175
+ // reason post hoc.
176
+ let voiceoverByIdx = {};
177
+ let imageByIdx = {};
178
+ let voiceoverSkipped = [];
179
+ let imageSkipped = [];
180
+ try {
181
+ const audClient = createTtsAuditionClient();
182
+ const imgClient = createImageGenClient();
183
+ voiceoverServer = await startVoiceoverServer({
184
+ cacheDir: audClient.cacheDir,
185
+ imageCacheDir: imgClient.cacheDir,
186
+ });
187
+ const voPrep = await prepareVoiceovers({
188
+ deck,
189
+ auditionClient: audClient,
190
+ baseUrl: voiceoverServer.url,
191
+ onProgress: (p) => {
192
+ if (typeof onProgress === 'function') {
193
+ try {
194
+ onProgress({
195
+ jobId,
196
+ progress: 0,
197
+ framesRendered: 0,
198
+ framesTotal: 0,
199
+ phase: 'voiceover',
200
+ voiceoverIndex: p.cardIdx + 1,
201
+ voiceoverTotal: p.total,
202
+ voiceoverFromCache: p.fromCache,
203
+ });
204
+ } catch { /* swallow */ }
205
+ }
206
+ },
207
+ });
208
+ voiceoverByIdx = voPrep.byIdx;
209
+ voiceoverSkipped = voPrep.skipped;
210
+
211
+ const imgPrep = await prepareImages({
212
+ deck,
213
+ imgClient,
214
+ baseUrl: voiceoverServer.url,
215
+ onProgress: (p) => {
216
+ if (typeof onProgress === 'function') {
217
+ try {
218
+ onProgress({
219
+ jobId,
220
+ progress: 0,
221
+ framesRendered: 0,
222
+ framesTotal: 0,
223
+ phase: 'image',
224
+ imageIndex: p.cardIdx + 1,
225
+ imageTotal: p.total,
226
+ imageFromCache: p.fromCache,
227
+ });
228
+ } catch { /* swallow */ }
229
+ }
230
+ },
231
+ });
232
+ imageByIdx = imgPrep.byIdx;
233
+ imageSkipped = imgPrep.skipped;
234
+ } catch (err) {
235
+ // Media prep failure is non-fatal — fall back to silent + no-AI
236
+ // images so a TTS / image outage still produces an mp4.
237
+ voiceoverSkipped = [{ cardIdx: -1, reason: 'media_prep_failed', message: err.message }];
238
+ if (voiceoverServer) {
239
+ try { await voiceoverServer.close(); } catch { /* */ }
240
+ voiceoverServer = null;
241
+ }
242
+ }
243
+ job.voiceoverCount = Object.keys(voiceoverByIdx).length;
244
+ job.voiceoverSkipped = voiceoverSkipped;
245
+ job.imageCount = Object.keys(imageByIdx).length;
246
+ job.imageSkipped = imageSkipped;
247
+
163
248
  const serveUrl = resolveServeUrl();
164
- const inputProps = buildInputProps(deck);
249
+ const inputProps = buildInputProps(deck, { voiceoverByIdx, imageByIdx });
165
250
 
166
251
  const renderer = loadRenderer();
167
252
  job.coldStart = !chromeBinaryExists();
@@ -234,6 +319,12 @@ async function runRender({ job, deck, onProgress, onDone, onError }) {
234
319
  if (typeof onError === 'function') {
235
320
  try { onError({ jobId, message: job.error }); } catch { /* swallow */ }
236
321
  }
322
+ } finally {
323
+ // Always tear down the audio file server, including on render failure,
324
+ // so a stale localhost listener doesn't leak across jobs.
325
+ if (voiceoverServer) {
326
+ try { await voiceoverServer.close(); } catch { /* best-effort */ }
327
+ }
237
328
  }
238
329
  }
239
330