voxflow 1.15.4 → 1.15.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -139,6 +139,58 @@ function validateListPayload(list, i) {
139
139
  // text = voiceover.text ?? card.narration
140
140
  // enabled = voiceover.enabled ?? true
141
141
  // rate = voiceover.rate ?? 1.0
142
+ // Optional per-card image registry. Each entry declares an AI-generation
143
+ // recipe (prompt + aspect + quality) addressable by a stable `id` so the
144
+ // content.html in V2 LayoutTree decks (and future themes that read by id)
145
+ // can reference the resolved URL. Generation lives in stage's /api/imagine
146
+ // endpoint, which content-hashes (prompt, aspect, quality) and caches
147
+ // upstream-backed `data:image/...` payloads.
148
+ const IMAGE_PROMPT_MAX = 1000;
149
+ const IMAGE_ID_MAX = 64;
150
+ const IMAGES_PER_CARD_MAX = 8;
151
+ const IMAGE_VALID_ASPECTS = new Set(['portrait', 'landscape', 'square']);
152
+ const IMAGE_VALID_QUALITIES = new Set(['fast', 'hd']);
153
+ function validateCardImagesShape(images, cardIdx) {
154
+ if (images == null) return;
155
+ if (!Array.isArray(images)) {
156
+ throw new Error(`cards[${cardIdx}].images must be an array (or null)`);
157
+ }
158
+ if (images.length > IMAGES_PER_CARD_MAX) {
159
+ throw new Error(`cards[${cardIdx}].images too many (${images.length} > ${IMAGES_PER_CARD_MAX})`);
160
+ }
161
+ const seen = new Set();
162
+ images.forEach((img, j) => {
163
+ if (!img || typeof img !== 'object' || Array.isArray(img)) {
164
+ throw new Error(`cards[${cardIdx}].images[${j}] must be an object`);
165
+ }
166
+ if (typeof img.id !== 'string' || !img.id.trim()) {
167
+ throw new Error(`cards[${cardIdx}].images[${j}].id required (non-empty string)`);
168
+ }
169
+ if (img.id.length > IMAGE_ID_MAX) {
170
+ throw new Error(`cards[${cardIdx}].images[${j}].id too long (${img.id.length} > ${IMAGE_ID_MAX})`);
171
+ }
172
+ if (!/^[a-zA-Z0-9_-]+$/.test(img.id)) {
173
+ throw new Error(`cards[${cardIdx}].images[${j}].id must match [a-zA-Z0-9_-]+`);
174
+ }
175
+ if (seen.has(img.id)) {
176
+ throw new Error(`cards[${cardIdx}].images[${j}].id duplicate: ${img.id}`);
177
+ }
178
+ seen.add(img.id);
179
+ if (typeof img.prompt !== 'string' || !img.prompt.trim()) {
180
+ throw new Error(`cards[${cardIdx}].images[${j}].prompt required (non-empty string)`);
181
+ }
182
+ if (img.prompt.length > IMAGE_PROMPT_MAX) {
183
+ throw new Error(`cards[${cardIdx}].images[${j}].prompt too long (${img.prompt.length} > ${IMAGE_PROMPT_MAX})`);
184
+ }
185
+ if (img.aspect != null && !IMAGE_VALID_ASPECTS.has(img.aspect)) {
186
+ throw new Error(`cards[${cardIdx}].images[${j}].aspect must be one of: ${[...IMAGE_VALID_ASPECTS].join(', ')}`);
187
+ }
188
+ if (img.quality != null && !IMAGE_VALID_QUALITIES.has(img.quality)) {
189
+ throw new Error(`cards[${cardIdx}].images[${j}].quality must be one of: ${[...IMAGE_VALID_QUALITIES].join(', ')}`);
190
+ }
191
+ });
192
+ }
193
+
142
194
  const VOICEOVER_TEXT_MAX = 500;
143
195
  function validateVoiceoverShape(vo, cardIdx) {
144
196
  if (vo == null) return;
@@ -279,6 +331,7 @@ function validatePaperSlideDeck(deck) {
279
331
  }
280
332
  }
281
333
  validateVoiceoverShape(card.voiceover, i);
334
+ validateCardImagesShape(card.images, i);
282
335
  // Optional per-card image URL — photo-feature / atmospheric themes
283
336
  // composite it as a full-bleed background; other themes ignore it.
284
337
  // Shape-check only (string, length cap, http(s) prefix); reachability
@@ -334,10 +387,16 @@ function validatePaperSlideDeck(deck) {
334
387
  // / quote / stat / steps). Anything else is rejected so a renderer that
335
388
  // silently no-ops on unknown elements doesn't ship a broken video.
336
389
 
337
- const V2_ALLOWED_ELEMENTS = new Set(['heading', 'body', 'paper-figure', 'quote', 'stat', 'steps']);
390
+ const V2_ALLOWED_ELEMENTS = new Set(['heading', 'body', 'paper-figure', 'quote', 'stat', 'steps', 'raw-html']);
338
391
  const V2_HEADING_MAX = 22;
339
392
  const V2_BODY_TEXT_MAX = 30;
340
393
  const V2_STAT_LABEL_MAX = 30;
394
+ // raw-html is the open-ended escape hatch for themes / cards that want
395
+ // arbitrary markup. The cap is generous enough for a styled inline figure
396
+ // (a few paragraphs of HTML, maybe one inline data URI), but small enough
397
+ // to keep deck.json human-editable and prevent an LLM run-on from blowing
398
+ // past the watcher debounce.
399
+ const V2_RAW_HTML_MAX = 4096;
341
400
 
342
401
  function validateV2Children(children, cardIdx, allowedElements, opts = {}) {
343
402
  if (!Array.isArray(children) || children.length === 0) {
@@ -419,6 +478,26 @@ function validateV2Children(children, cardIdx, allowedElements, opts = {}) {
419
478
  );
420
479
  }
421
480
  });
481
+ } else if (el.el === 'raw-html') {
482
+ // Schema-only support: validator accepts the element and the
483
+ // V2 normalizer (video-present/src/compositions/PaperSlide/
484
+ // layout-tree.ts) maps it to a `rawHtml` field on the V1
485
+ // normalized output. PaperSlide composition rendering of
486
+ // arbitrary HTML lands in a follow-up PR — for now the
487
+ // composition silently skips this element, so a deck with
488
+ // raw-html validates + saves + edits cleanly but renders blank
489
+ // until the JSX side is updated.
490
+ if (typeof el.html !== 'string') {
491
+ throw new Error(`cards[${cardIdx}].children[${j}].html must be string`);
492
+ }
493
+ if (!el.html.trim()) {
494
+ throw new Error(`cards[${cardIdx}].children[${j}].html empty`);
495
+ }
496
+ if (el.html.length > V2_RAW_HTML_MAX) {
497
+ throw new Error(
498
+ `cards[${cardIdx}].children[${j}].html too long (${el.html.length} > ${V2_RAW_HTML_MAX})`
499
+ );
500
+ }
422
501
  }
423
502
  });
424
503
  return elements;
@@ -461,12 +540,22 @@ function validatePaperSlideDeckV2(deck) {
461
540
  if (!nonEmptyString(card.narration)) throw new Error(`cards[${i}].narration empty`);
462
541
 
463
542
  if (card.kind === 'title') {
464
- const els = validateV2Children(card.children, i, new Set(['heading']), { maxChildren: 3 });
465
- if (els.length === 0) throw new Error(`cards[${i}] title must have at least one heading`);
543
+ const els = validateV2Children(card.children, i, new Set(['heading', 'raw-html']), { maxChildren: 3 });
544
+ if (els.filter((e) => e.el === 'heading').length === 0) {
545
+ throw new Error(`cards[${i}] title must have at least one heading`);
546
+ }
466
547
  } else if (card.kind === 'body') {
467
- const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'paper-figure']), { maxChildren: 4 });
548
+ const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'paper-figure', 'raw-html']), { maxChildren: 4 });
468
549
  const figs = els.filter((e) => e.el === 'paper-figure');
469
- if (figs.length !== 1) throw new Error(`cards[${i}] body must contain exactly one paper-figure (got ${figs.length})`);
550
+ const rawEls = els.filter((e) => e.el === 'raw-html');
551
+ // A body card needs either a figure OR a raw-html block — raw
552
+ // HTML can stand in as the entire visual when the theme wants
553
+ // a custom panel instead of the canned figure renderer.
554
+ if (figs.length + rawEls.length !== 1) {
555
+ throw new Error(
556
+ `cards[${i}] body must contain exactly one paper-figure OR one raw-html (got ${figs.length} figure + ${rawEls.length} raw-html)`
557
+ );
558
+ }
470
559
  const heads = els.filter((e) => e.el === 'heading');
471
560
  if (heads.length === 0) throw new Error(`cards[${i}] body must contain at least one heading`);
472
561
  } else if (card.kind === 'quote') {
@@ -475,22 +564,23 @@ function validatePaperSlideDeckV2(deck) {
475
564
  // stat / steps directly and ignores any non-discriminator elements
476
565
  // on these kinds). Discriminator element must still be present
477
566
  // exactly once. Cap at 3 children to keep output bounded.
478
- const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'quote']), { maxChildren: 3 });
567
+ const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'quote', 'raw-html']), { maxChildren: 3 });
479
568
  const quoteEls = els.filter((e) => e.el === 'quote');
480
569
  if (quoteEls.length !== 1) throw new Error(`cards[${i}] quote card must contain exactly one quote element (got ${quoteEls.length})`);
481
570
  richCounts.quote += 1;
482
571
  } else if (card.kind === 'data') {
483
- const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'stat']), { maxChildren: 3 });
572
+ const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'stat', 'raw-html']), { maxChildren: 3 });
484
573
  const statEls = els.filter((e) => e.el === 'stat');
485
574
  if (statEls.length !== 1) throw new Error(`cards[${i}] data card must contain exactly one stat element (got ${statEls.length})`);
486
575
  richCounts.data += 1;
487
576
  } else if (card.kind === 'list') {
488
- const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'steps']), { maxChildren: 3 });
577
+ const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'steps', 'raw-html']), { maxChildren: 3 });
489
578
  const stepsEls = els.filter((e) => e.el === 'steps');
490
579
  if (stepsEls.length !== 1) throw new Error(`cards[${i}] list card must contain exactly one steps element (got ${stepsEls.length})`);
491
580
  richCounts.list += 1;
492
581
  }
493
582
  validateVoiceoverShape(card.voiceover, i);
583
+ validateCardImagesShape(card.images, i);
494
584
  });
495
585
  // Cap on rich-kind variety — at most 1 of each (same as V1 prompt rule)
496
586
  for (const k of Object.keys(richCounts)) {
@@ -523,6 +613,7 @@ module.exports = {
523
613
  validateDataPayload,
524
614
  validateListPayload,
525
615
  validateVoiceoverShape,
616
+ validateCardImagesShape,
526
617
  QUOTE_TEXT_MAX,
527
618
  QUOTE_ATTRIBUTION_MAX,
528
619
  DATA_VALUE_MAX,
@@ -532,4 +623,8 @@ module.exports = {
532
623
  LIST_ITEM_MIN_COUNT,
533
624
  LIST_ITEM_MAX_COUNT,
534
625
  VOICEOVER_TEXT_MAX,
626
+ IMAGE_PROMPT_MAX,
627
+ IMAGE_ID_MAX,
628
+ IMAGES_PER_CARD_MAX,
629
+ V2_RAW_HTML_MAX,
535
630
  };
@@ -0,0 +1,233 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Stage → backend bridge for AI image generation (hunyuan-image).
5
+ *
6
+ * Mirror of tts-audition.js for the visual side: stage's per-card 🎨 button
7
+ * resolves `card.images[i] = { id, prompt, aspect?, quality? }`, hashes the
8
+ * generation recipe, and either returns a cached PNG/JPG or asks the backend
9
+ * to produce one. JWT lives on the server (same posture as cloud-render /
10
+ * audition).
11
+ *
12
+ * Public surface:
13
+ * createImageGenClient({ apiBase?, tokenLoader?, cacheDir?, fetchImpl? })
14
+ * → { imagine({ prompt, aspect, quality }), cacheDir }
15
+ *
16
+ * Cache layout: <cacheDir>/<sha256(prompt|aspect|quality)>.<ext>
17
+ * Default cacheDir: ~/.config/voxflow/stage-image-cache/ — global so a
18
+ * prompt repeated across decks resolves to one file on disk.
19
+ *
20
+ * Backend contract: POST /api/image/generate { prompt, ratio, quality }
21
+ * → { code: 'success', image: 'data:image/png;base64,...', quota }
22
+ * We parse the data URL, persist the raw bytes, and serve them with the
23
+ * declared MIME so <img src> works in the browser without a base64 hop.
24
+ */
25
+
26
+ const crypto = require('crypto');
27
+ const fs = require('fs');
28
+ const path = require('path');
29
+ const os = require('os');
30
+ const http = require('http');
31
+ const https = require('https');
32
+
33
+ const { API_BASE } = require('../core/config');
34
+ const { readCachedToken } = require('../core/auth');
35
+
36
+ const DEFAULT_TIMEOUT_MS = 180_000; // backend hunyuan-image takes 30-90s p95
37
+ const DEFAULT_ASPECT = 'portrait'; // 9:16 matches Slice frame
38
+ const DEFAULT_QUALITY = 'fast'; // 200-quota tier; 'hd' is 500
39
+ // Catalog references — surfaced so the stage UI can show "🎨 costs N quota"
40
+ // without an extra round-trip. Authoritative deduction is server-side.
41
+ const IMAGE_GEN_COST_FAST = 200;
42
+ const IMAGE_GEN_COST_HD = 500;
43
+
44
+ function defaultCacheDir() {
45
+ return path.join(os.homedir(), '.config', 'voxflow', 'stage-image-cache');
46
+ }
47
+
48
+ function cacheKey({ prompt, aspect, quality }) {
49
+ // Stable content hash — every parameter that affects image output is
50
+ // included so the same (prompt, aspect, quality) maps to the same file.
51
+ const h = crypto.createHash('sha256');
52
+ h.update(prompt);
53
+ h.update(' ');
54
+ h.update(aspect);
55
+ h.update(' ');
56
+ h.update(quality);
57
+ return h.digest('hex');
58
+ }
59
+
60
+ function extensionFor(mime) {
61
+ // Backend hunyuan-image currently returns PNG; map other common MIMEs in
62
+ // case the provider swap underneath produces JPEG / WebP.
63
+ if (mime === 'image/png') return 'png';
64
+ if (mime === 'image/jpeg' || mime === 'image/jpg') return 'jpg';
65
+ if (mime === 'image/webp') return 'webp';
66
+ return 'bin';
67
+ }
68
+
69
+ function cachePath(dir, key, ext) {
70
+ return path.join(dir, `${key}.${ext}`);
71
+ }
72
+
73
+ function parseDataUrl(dataUrl) {
74
+ if (typeof dataUrl !== 'string') return null;
75
+ // RFC 2397 base64 form: data:<mime>;base64,<bytes>
76
+ const m = /^data:([^;,]+);base64,(.+)$/.exec(dataUrl);
77
+ if (!m) return null;
78
+ let buf;
79
+ try { buf = Buffer.from(m[2], 'base64'); }
80
+ catch { return null; }
81
+ if (!buf.length) return null;
82
+ return { mime: m[1].toLowerCase(), buf };
83
+ }
84
+
85
+ function requestJson(targetUrl, opts = {}, body = null, timeoutMs = DEFAULT_TIMEOUT_MS) {
86
+ return new Promise((resolve, reject) => {
87
+ const u = new URL(targetUrl);
88
+ const mod = u.protocol === 'https:' ? https : http;
89
+ const headers = Object.assign({}, opts.headers || {});
90
+ let payload = null;
91
+ if (body !== null && body !== undefined) {
92
+ payload = Buffer.from(JSON.stringify(body), 'utf8');
93
+ headers['Content-Type'] = 'application/json';
94
+ headers['Content-Length'] = payload.length;
95
+ }
96
+ const req = mod.request({
97
+ hostname: u.hostname,
98
+ port: u.port || (u.protocol === 'https:' ? 443 : 80),
99
+ path: u.pathname + u.search,
100
+ method: opts.method || 'GET',
101
+ headers,
102
+ }, (res) => {
103
+ const chunks = [];
104
+ res.on('data', (c) => chunks.push(c));
105
+ res.on('end', () => {
106
+ const raw = Buffer.concat(chunks).toString('utf8');
107
+ let parsed = null;
108
+ try { parsed = JSON.parse(raw); } catch { /* keep null */ }
109
+ resolve({ status: res.statusCode, data: parsed });
110
+ });
111
+ });
112
+ req.on('error', reject);
113
+ req.setTimeout(timeoutMs, () => { req.destroy(new Error(`upstream timeout after ${timeoutMs}ms`)); });
114
+ if (payload) req.write(payload);
115
+ req.end();
116
+ });
117
+ }
118
+
119
+ /**
120
+ * @param {object} [opts]
121
+ * @param {string} [opts.apiBase=API_BASE]
122
+ * @param {() => ({access_token:string}|null)} [opts.tokenLoader]
123
+ * @param {string} [opts.cacheDir]
124
+ * @param {(args:object) => Promise<{status:number, data:object|null}>} [opts.fetchImpl]
125
+ * Test injection point — receives `{ prompt, ratio, quality, headers }`.
126
+ * @returns {{ imagine: (params:object) => Promise<object>, cacheDir: string }}
127
+ */
128
+ function createImageGenClient(opts = {}) {
129
+ const apiBase = (opts.apiBase || API_BASE).replace(/\/$/, '');
130
+ const tokenLoader = typeof opts.tokenLoader === 'function' ? opts.tokenLoader : readCachedToken;
131
+ const cacheDir = opts.cacheDir || defaultCacheDir();
132
+ const fetchImpl = typeof opts.fetchImpl === 'function' ? opts.fetchImpl : null;
133
+
134
+ try { fs.mkdirSync(cacheDir, { recursive: true }); } catch { /* best-effort */ }
135
+
136
+ function authHeaders() {
137
+ const cached = tokenLoader();
138
+ if (!cached || !cached.access_token) return null;
139
+ return { Authorization: 'Bearer ' + cached.access_token };
140
+ }
141
+
142
+ async function imagine({ prompt, aspect, quality } = {}) {
143
+ if (typeof prompt !== 'string' || !prompt.trim()) {
144
+ return { code: 'invalid_prompt', message: 'prompt required (non-empty string)' };
145
+ }
146
+ const asp = (aspect === 'portrait' || aspect === 'landscape' || aspect === 'square') ? aspect : DEFAULT_ASPECT;
147
+ const qual = (quality === 'fast' || quality === 'hd') ? quality : DEFAULT_QUALITY;
148
+ const key = cacheKey({ prompt, aspect: asp, quality: qual });
149
+
150
+ // Cache check — same recipe = same bytes. Walk known extensions so a
151
+ // stored PNG is found whether the cache layer tracks the MIME or not.
152
+ for (const ext of ['png', 'jpg', 'webp', 'bin']) {
153
+ const fp = cachePath(cacheDir, key, ext);
154
+ try {
155
+ const buf = fs.readFileSync(fp);
156
+ const mime = ({ png: 'image/png', jpg: 'image/jpeg', webp: 'image/webp', bin: 'application/octet-stream' })[ext];
157
+ return {
158
+ code: 'success',
159
+ buf,
160
+ contentType: mime,
161
+ fromCache: true,
162
+ cacheKey: key,
163
+ ext,
164
+ };
165
+ } catch { /* try next ext */ }
166
+ }
167
+
168
+ const headers = authHeaders();
169
+ if (!headers) {
170
+ return { code: 'not_logged_in', message: 'Run `voxflow login` first.' };
171
+ }
172
+
173
+ let r;
174
+ try {
175
+ if (fetchImpl) {
176
+ r = await fetchImpl({ prompt: prompt.trim(), ratio: asp, quality: qual, headers });
177
+ } else {
178
+ r = await requestJson(`${apiBase}/api/image/generate`, { method: 'POST', headers }, {
179
+ prompt: prompt.trim(),
180
+ ratio: asp,
181
+ quality: qual,
182
+ });
183
+ }
184
+ } catch (err) {
185
+ return { code: 'network_error', message: err.message || String(err) };
186
+ }
187
+
188
+ if (r.status === 401) return { code: 'not_logged_in', message: 'Token expired — run `voxflow login`.' };
189
+ if (r.status === 402 || r.status === 429) return { code: 'quota_exceeded', message: r.data?.message || 'Insufficient quota' };
190
+ if (r.status >= 400 || !r.data || r.data.code !== 'success') {
191
+ return {
192
+ code: r.data?.code || 'imagine_failed',
193
+ message: r.data?.message || `HTTP ${r.status}`,
194
+ };
195
+ }
196
+ const parsed = parseDataUrl(r.data.image);
197
+ if (!parsed) {
198
+ return { code: 'imagine_failed', message: 'response missing or malformed image data URL' };
199
+ }
200
+ const ext = extensionFor(parsed.mime);
201
+ const filePath = cachePath(cacheDir, key, ext);
202
+ const tmp = `${filePath}.tmp-${process.pid}-${Date.now()}`;
203
+ try {
204
+ fs.writeFileSync(tmp, parsed.buf);
205
+ fs.renameSync(tmp, filePath);
206
+ } catch {
207
+ try { fs.unlinkSync(tmp); } catch { /* */ }
208
+ }
209
+ return {
210
+ code: 'success',
211
+ buf: parsed.buf,
212
+ contentType: parsed.mime,
213
+ fromCache: false,
214
+ cacheKey: key,
215
+ ext,
216
+ };
217
+ }
218
+
219
+ return { imagine, cacheDir };
220
+ }
221
+
222
+ module.exports = {
223
+ createImageGenClient,
224
+ cacheKey,
225
+ cachePath,
226
+ defaultCacheDir,
227
+ parseDataUrl,
228
+ extensionFor,
229
+ IMAGE_GEN_COST_FAST,
230
+ IMAGE_GEN_COST_HD,
231
+ DEFAULT_ASPECT,
232
+ DEFAULT_QUALITY,
233
+ };
@@ -46,7 +46,8 @@ const {
46
46
  DEFAULT_THEME,
47
47
  } = require('../commands/slice-render');
48
48
  const { createTtsAuditionClient } = require('./tts-audition');
49
- const { startVoiceoverServer, prepareVoiceovers } = require('./voiceover-mux');
49
+ const { createImageGenClient } = require('./image-gen');
50
+ const { startVoiceoverServer, prepareVoiceovers, prepareImages } = require('./voiceover-mux');
50
51
 
51
52
  // In-memory job table. We never persist jobs — a stage restart wipes history,
52
53
  // which is fine because the produced mp4 lives on disk under the user's deck
@@ -164,18 +165,26 @@ async function runRender({ job, deck, onProgress, onDone, onError }) {
164
165
  try {
165
166
  job.state = 'preparing';
166
167
 
167
- // ─── Voiceover prep (Phase 1) ──────────────────────────────────────
168
- // Stage's Render button defaults to including audio users in stage
169
- // are iterating and expect a richer preview. The audition cache makes
170
- // re-renders effectively free for cards they already previewed.
171
- // Falls back to silent video on not_logged_in / quota_exceeded
172
- // (recorded in job.voiceoverSkipped so the UI can surface the reason).
168
+ // ─── Voiceover + Image prep (Phase 1 + Phase B) ─────────────────────
169
+ // Stage's Render button defaults to BOTH audio + AI image generation.
170
+ // Users in stage are iterating and expect a fully-rendered preview;
171
+ // hitting the cache makes re-renders effectively free for content they
172
+ // already auditioned / regenerated. Either pass can fail to silent
173
+ // fallback without aborting the render failures land in
174
+ // job.voiceoverSkipped / job.imageSkipped so the UI can surface the
175
+ // reason post hoc.
173
176
  let voiceoverByIdx = {};
177
+ let imageByIdx = {};
174
178
  let voiceoverSkipped = [];
179
+ let imageSkipped = [];
175
180
  try {
176
181
  const audClient = createTtsAuditionClient();
177
- voiceoverServer = await startVoiceoverServer({ cacheDir: audClient.cacheDir });
178
- const prep = await prepareVoiceovers({
182
+ const imgClient = createImageGenClient();
183
+ voiceoverServer = await startVoiceoverServer({
184
+ cacheDir: audClient.cacheDir,
185
+ imageCacheDir: imgClient.cacheDir,
186
+ });
187
+ const voPrep = await prepareVoiceovers({
179
188
  deck,
180
189
  auditionClient: audClient,
181
190
  baseUrl: voiceoverServer.url,
@@ -196,12 +205,36 @@ async function runRender({ job, deck, onProgress, onDone, onError }) {
196
205
  }
197
206
  },
198
207
  });
199
- voiceoverByIdx = prep.byIdx;
200
- voiceoverSkipped = prep.skipped;
208
+ voiceoverByIdx = voPrep.byIdx;
209
+ voiceoverSkipped = voPrep.skipped;
210
+
211
+ const imgPrep = await prepareImages({
212
+ deck,
213
+ imgClient,
214
+ baseUrl: voiceoverServer.url,
215
+ onProgress: (p) => {
216
+ if (typeof onProgress === 'function') {
217
+ try {
218
+ onProgress({
219
+ jobId,
220
+ progress: 0,
221
+ framesRendered: 0,
222
+ framesTotal: 0,
223
+ phase: 'image',
224
+ imageIndex: p.cardIdx + 1,
225
+ imageTotal: p.total,
226
+ imageFromCache: p.fromCache,
227
+ });
228
+ } catch { /* swallow */ }
229
+ }
230
+ },
231
+ });
232
+ imageByIdx = imgPrep.byIdx;
233
+ imageSkipped = imgPrep.skipped;
201
234
  } catch (err) {
202
- // Voiceover prep failure is non-fatal — fall back to silent render
203
- // so a TTS outage / first-run-without-login still produces an mp4.
204
- voiceoverSkipped = [{ cardIdx: -1, reason: 'voiceover_prep_failed', message: err.message }];
235
+ // Media prep failure is non-fatal — fall back to silent + no-AI
236
+ // images so a TTS / image outage still produces an mp4.
237
+ voiceoverSkipped = [{ cardIdx: -1, reason: 'media_prep_failed', message: err.message }];
205
238
  if (voiceoverServer) {
206
239
  try { await voiceoverServer.close(); } catch { /* */ }
207
240
  voiceoverServer = null;
@@ -209,9 +242,11 @@ async function runRender({ job, deck, onProgress, onDone, onError }) {
209
242
  }
210
243
  job.voiceoverCount = Object.keys(voiceoverByIdx).length;
211
244
  job.voiceoverSkipped = voiceoverSkipped;
245
+ job.imageCount = Object.keys(imageByIdx).length;
246
+ job.imageSkipped = imageSkipped;
212
247
 
213
248
  const serveUrl = resolveServeUrl();
214
- const inputProps = buildInputProps(deck, { voiceoverByIdx });
249
+ const inputProps = buildInputProps(deck, { voiceoverByIdx, imageByIdx });
215
250
 
216
251
  const renderer = loadRenderer();
217
252
  job.coldStart = !chromeBinaryExists();
@@ -66,6 +66,7 @@ async function startStageServer(opts) {
66
66
  localRender = null,
67
67
  deckSaver = null,
68
68
  audition = null,
69
+ imagine = null,
69
70
  publishEvent = null,
70
71
  tokenAvailable = false,
71
72
  preferredPort = 5180,
@@ -249,12 +250,15 @@ async function startStageServer(opts) {
249
250
  code === 'invalid_id' ||
250
251
  code === 'invalid_card_index' ||
251
252
  code === 'invalid_voice' ||
252
- code === 'invalid_text'
253
+ code === 'invalid_text' ||
254
+ code === 'invalid_prompt' ||
255
+ code === 'invalid_image_id'
253
256
  ) return 400;
254
257
  if (
255
258
  code === 'job_not_found' ||
256
259
  code === 'no_deck' ||
257
- code === 'card_not_found'
260
+ code === 'card_not_found' ||
261
+ code === 'image_not_found'
258
262
  ) return 404;
259
263
  if (code === 'voiceover_disabled') return 409;
260
264
  if (code === 'success') return 200;
@@ -458,6 +462,55 @@ async function startStageServer(opts) {
458
462
  return;
459
463
  }
460
464
 
465
+ // ─── Image generation (per-card 🎨) ─────────────────────────────────────
466
+ // GET /api/imagine?card=<int>&img=<id>
467
+ // Resolves card.images[?] → calls /api/image/generate via the imagine
468
+ // bridge → streams PNG/JPG bytes. Content-hash caches identical
469
+ // (prompt, aspect, quality) so iterating on visuals is free after the
470
+ // first call. Symmetric with /api/audition.
471
+ if (imagine && req.method === 'GET' && req.url.startsWith('/api/imagine')) {
472
+ let parsed;
473
+ try { parsed = new URL(req.url, `http://127.0.0.1:${port}`); }
474
+ catch {
475
+ return sendJson(400, { code: 'bad_request', message: 'invalid /api/imagine url' });
476
+ }
477
+ const cardIndexStr = parsed.searchParams.get('card');
478
+ const cardIndex = Number.parseInt(cardIndexStr, 10);
479
+ if (!Number.isInteger(cardIndex) || cardIndex < 0) {
480
+ return sendJson(400, {
481
+ code: 'invalid_card_index',
482
+ message: '?card= must be a non-negative integer',
483
+ });
484
+ }
485
+ const imageId = parsed.searchParams.get('img') || '';
486
+ if (typeof imageId !== 'string' || !imageId.trim()) {
487
+ return sendJson(400, {
488
+ code: 'invalid_image_id',
489
+ message: '?img= must be a non-empty image id',
490
+ });
491
+ }
492
+ (async () => {
493
+ let result;
494
+ try {
495
+ result = await imagine.play({ cardIndex, imageId });
496
+ } catch (err) {
497
+ return sendJson(502, { code: 'upstream_error', message: err.message });
498
+ }
499
+ if (result.code !== 'success') {
500
+ return sendJson(statusForCode(result.code), result);
501
+ }
502
+ res.writeHead(200, {
503
+ 'Content-Type': result.contentType || 'image/png',
504
+ 'Content-Length': result.buf.length,
505
+ 'Cache-Control': 'no-store',
506
+ 'X-Imagine-Cache': result.fromCache ? 'HIT' : 'MISS',
507
+ 'X-Imagine-Key': result.cacheKey || '',
508
+ });
509
+ res.end(result.buf);
510
+ })();
511
+ return;
512
+ }
513
+
461
514
  // ─── Inline deck save (Task B) ──────────────────────────────────────────
462
515
  // POST /api/deck body: full deck JSON → validates + writes to disk.
463
516
  // The file watcher picks up the write and broadcasts the deck event, so