voxflow 1.15.3 → 1.15.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -1
- package/dist/remotion-bundle/bundle.js +3 -0
- package/dist/remotion-bundle/bundle.js.map +1 -1
- package/dist/templates/data-finding/deck.json +40 -0
- package/dist/templates/founder-lesson/deck.json +37 -0
- package/dist/templates/incident-review/deck.json +37 -0
- package/dist/templates/manifest.json +45 -0
- package/dist/templates/product-launch/deck.json +37 -0
- package/dist/templates/quiet-essay/deck.json +37 -0
- package/lib/commands/slice-fork.js +151 -0
- package/lib/commands/slice-render.js +115 -8
- package/lib/commands/slice-stage.js +65 -0
- package/lib/commands/slice.js +9 -0
- package/lib/internal/deck-validator.js +150 -8
- package/lib/stage-core/image-gen.js +233 -0
- package/lib/stage-core/local-render.js +92 -1
- package/lib/stage-core/server.js +110 -2
- package/lib/stage-core/tts-audition.js +0 -0
- package/lib/stage-core/voiceover-mux.js +290 -0
- package/lib/stage-ui/slice/template.js +333 -0
- package/package.json +1 -1
- package/skills/voxflow-slice/SKILL.md +146 -2
- package/skills/voxflow-slice/templates/data-finding/deck.json +40 -0
- package/skills/voxflow-slice/templates/founder-lesson/deck.json +37 -0
- package/skills/voxflow-slice/templates/incident-review/deck.json +37 -0
- package/skills/voxflow-slice/templates/manifest.json +45 -0
- package/skills/voxflow-slice/templates/product-launch/deck.json +37 -0
- package/skills/voxflow-slice/templates/quiet-essay/deck.json +37 -0
|
@@ -128,6 +128,101 @@ function validateListPayload(list, i) {
|
|
|
128
128
|
});
|
|
129
129
|
}
|
|
130
130
|
|
|
131
|
+
// Optional per-card voiceover override. Extends the legacy `card.voiceId`
|
|
132
|
+
// (V1-only) with a nested object that carries audio behavior toggles — silent
|
|
133
|
+
// card, custom TTS text override, speech rate — so stage's audition endpoint
|
|
134
|
+
// and the local-render mux pass resolve a single source of truth per card.
|
|
135
|
+
// All fields are optional inside an optional object: omitting `voiceover`
|
|
136
|
+
// entirely keeps existing decks unchanged. Render-time resolution (highest
|
|
137
|
+
// precedence first):
|
|
138
|
+
// voiceId = voiceover.voiceId ?? card.voiceId ?? job-level default
|
|
139
|
+
// text = voiceover.text ?? card.narration
|
|
140
|
+
// enabled = voiceover.enabled ?? true
|
|
141
|
+
// rate = voiceover.rate ?? 1.0
|
|
142
|
+
// Optional per-card image registry. Each entry declares an AI-generation
|
|
143
|
+
// recipe (prompt + aspect + quality) addressable by a stable `id` so the
|
|
144
|
+
// content.html in V2 LayoutTree decks (and future themes that read by id)
|
|
145
|
+
// can reference the resolved URL. Generation lives in stage's /api/imagine
|
|
146
|
+
// endpoint, which content-hashes (prompt, aspect, quality) and caches
|
|
147
|
+
// upstream-backed `data:image/...` payloads.
|
|
148
|
+
const IMAGE_PROMPT_MAX = 1000;
|
|
149
|
+
const IMAGE_ID_MAX = 64;
|
|
150
|
+
const IMAGES_PER_CARD_MAX = 8;
|
|
151
|
+
const IMAGE_VALID_ASPECTS = new Set(['portrait', 'landscape', 'square']);
|
|
152
|
+
const IMAGE_VALID_QUALITIES = new Set(['fast', 'hd']);
|
|
153
|
+
function validateCardImagesShape(images, cardIdx) {
|
|
154
|
+
if (images == null) return;
|
|
155
|
+
if (!Array.isArray(images)) {
|
|
156
|
+
throw new Error(`cards[${cardIdx}].images must be an array (or null)`);
|
|
157
|
+
}
|
|
158
|
+
if (images.length > IMAGES_PER_CARD_MAX) {
|
|
159
|
+
throw new Error(`cards[${cardIdx}].images too many (${images.length} > ${IMAGES_PER_CARD_MAX})`);
|
|
160
|
+
}
|
|
161
|
+
const seen = new Set();
|
|
162
|
+
images.forEach((img, j) => {
|
|
163
|
+
if (!img || typeof img !== 'object' || Array.isArray(img)) {
|
|
164
|
+
throw new Error(`cards[${cardIdx}].images[${j}] must be an object`);
|
|
165
|
+
}
|
|
166
|
+
if (typeof img.id !== 'string' || !img.id.trim()) {
|
|
167
|
+
throw new Error(`cards[${cardIdx}].images[${j}].id required (non-empty string)`);
|
|
168
|
+
}
|
|
169
|
+
if (img.id.length > IMAGE_ID_MAX) {
|
|
170
|
+
throw new Error(`cards[${cardIdx}].images[${j}].id too long (${img.id.length} > ${IMAGE_ID_MAX})`);
|
|
171
|
+
}
|
|
172
|
+
if (!/^[a-zA-Z0-9_-]+$/.test(img.id)) {
|
|
173
|
+
throw new Error(`cards[${cardIdx}].images[${j}].id must match [a-zA-Z0-9_-]+`);
|
|
174
|
+
}
|
|
175
|
+
if (seen.has(img.id)) {
|
|
176
|
+
throw new Error(`cards[${cardIdx}].images[${j}].id duplicate: ${img.id}`);
|
|
177
|
+
}
|
|
178
|
+
seen.add(img.id);
|
|
179
|
+
if (typeof img.prompt !== 'string' || !img.prompt.trim()) {
|
|
180
|
+
throw new Error(`cards[${cardIdx}].images[${j}].prompt required (non-empty string)`);
|
|
181
|
+
}
|
|
182
|
+
if (img.prompt.length > IMAGE_PROMPT_MAX) {
|
|
183
|
+
throw new Error(`cards[${cardIdx}].images[${j}].prompt too long (${img.prompt.length} > ${IMAGE_PROMPT_MAX})`);
|
|
184
|
+
}
|
|
185
|
+
if (img.aspect != null && !IMAGE_VALID_ASPECTS.has(img.aspect)) {
|
|
186
|
+
throw new Error(`cards[${cardIdx}].images[${j}].aspect must be one of: ${[...IMAGE_VALID_ASPECTS].join(', ')}`);
|
|
187
|
+
}
|
|
188
|
+
if (img.quality != null && !IMAGE_VALID_QUALITIES.has(img.quality)) {
|
|
189
|
+
throw new Error(`cards[${cardIdx}].images[${j}].quality must be one of: ${[...IMAGE_VALID_QUALITIES].join(', ')}`);
|
|
190
|
+
}
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const VOICEOVER_TEXT_MAX = 500;
|
|
195
|
+
function validateVoiceoverShape(vo, cardIdx) {
|
|
196
|
+
if (vo == null) return;
|
|
197
|
+
if (typeof vo !== 'object' || Array.isArray(vo)) {
|
|
198
|
+
throw new Error(`cards[${cardIdx}].voiceover must be an object`);
|
|
199
|
+
}
|
|
200
|
+
if (vo.enabled != null && typeof vo.enabled !== 'boolean') {
|
|
201
|
+
throw new Error(`cards[${cardIdx}].voiceover.enabled must be boolean`);
|
|
202
|
+
}
|
|
203
|
+
if (vo.voiceId != null) {
|
|
204
|
+
if (typeof vo.voiceId !== 'string' || !vo.voiceId.trim()) {
|
|
205
|
+
throw new Error(`cards[${cardIdx}].voiceover.voiceId must be non-empty string when present`);
|
|
206
|
+
}
|
|
207
|
+
if (vo.voiceId.length > 128) {
|
|
208
|
+
throw new Error(`cards[${cardIdx}].voiceover.voiceId too long (${vo.voiceId.length} > 128)`);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
if (vo.text != null) {
|
|
212
|
+
if (typeof vo.text !== 'string') {
|
|
213
|
+
throw new Error(`cards[${cardIdx}].voiceover.text must be string`);
|
|
214
|
+
}
|
|
215
|
+
if (vo.text.length > VOICEOVER_TEXT_MAX) {
|
|
216
|
+
throw new Error(`cards[${cardIdx}].voiceover.text too long (${vo.text.length} > ${VOICEOVER_TEXT_MAX})`);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
if (vo.rate != null) {
|
|
220
|
+
if (typeof vo.rate !== 'number' || !Number.isFinite(vo.rate) || vo.rate < 0.5 || vo.rate > 2.0) {
|
|
221
|
+
throw new Error(`cards[${cardIdx}].voiceover.rate must be number in [0.5, 2.0]`);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
131
226
|
function validatePaperSlideDeck(deck) {
|
|
132
227
|
if (!deck || typeof deck !== 'object') throw new Error('deck missing');
|
|
133
228
|
for (const f of ['header', 'seriesTitle', 'seriesTagline']) {
|
|
@@ -235,6 +330,8 @@ function validatePaperSlideDeck(deck) {
|
|
|
235
330
|
throw new Error(`cards[${i}].voiceId too long (${card.voiceId.length} > 128)`);
|
|
236
331
|
}
|
|
237
332
|
}
|
|
333
|
+
validateVoiceoverShape(card.voiceover, i);
|
|
334
|
+
validateCardImagesShape(card.images, i);
|
|
238
335
|
// Optional per-card image URL — photo-feature / atmospheric themes
|
|
239
336
|
// composite it as a full-bleed background; other themes ignore it.
|
|
240
337
|
// Shape-check only (string, length cap, http(s) prefix); reachability
|
|
@@ -290,10 +387,16 @@ function validatePaperSlideDeck(deck) {
|
|
|
290
387
|
// / quote / stat / steps). Anything else is rejected so a renderer that
|
|
291
388
|
// silently no-ops on unknown elements doesn't ship a broken video.
|
|
292
389
|
|
|
293
|
-
const V2_ALLOWED_ELEMENTS = new Set(['heading', 'body', 'paper-figure', 'quote', 'stat', 'steps']);
|
|
390
|
+
const V2_ALLOWED_ELEMENTS = new Set(['heading', 'body', 'paper-figure', 'quote', 'stat', 'steps', 'raw-html']);
|
|
294
391
|
const V2_HEADING_MAX = 22;
|
|
295
392
|
const V2_BODY_TEXT_MAX = 30;
|
|
296
393
|
const V2_STAT_LABEL_MAX = 30;
|
|
394
|
+
// raw-html is the open-ended escape hatch for themes / cards that want
|
|
395
|
+
// arbitrary markup. The cap is generous enough for a styled inline figure
|
|
396
|
+
// (a few paragraphs of HTML, maybe one inline data URI), but small enough
|
|
397
|
+
// to keep deck.json human-editable and prevent an LLM run-on from blowing
|
|
398
|
+
// past the watcher debounce.
|
|
399
|
+
const V2_RAW_HTML_MAX = 4096;
|
|
297
400
|
|
|
298
401
|
function validateV2Children(children, cardIdx, allowedElements, opts = {}) {
|
|
299
402
|
if (!Array.isArray(children) || children.length === 0) {
|
|
@@ -375,6 +478,26 @@ function validateV2Children(children, cardIdx, allowedElements, opts = {}) {
|
|
|
375
478
|
);
|
|
376
479
|
}
|
|
377
480
|
});
|
|
481
|
+
} else if (el.el === 'raw-html') {
|
|
482
|
+
// Schema-only support: validator accepts the element and the
|
|
483
|
+
// V2 normalizer (video-present/src/compositions/PaperSlide/
|
|
484
|
+
// layout-tree.ts) maps it to a `rawHtml` field on the V1
|
|
485
|
+
// normalized output. PaperSlide composition rendering of
|
|
486
|
+
// arbitrary HTML lands in a follow-up PR — for now the
|
|
487
|
+
// composition silently skips this element, so a deck with
|
|
488
|
+
// raw-html validates + saves + edits cleanly but renders blank
|
|
489
|
+
// until the JSX side is updated.
|
|
490
|
+
if (typeof el.html !== 'string') {
|
|
491
|
+
throw new Error(`cards[${cardIdx}].children[${j}].html must be string`);
|
|
492
|
+
}
|
|
493
|
+
if (!el.html.trim()) {
|
|
494
|
+
throw new Error(`cards[${cardIdx}].children[${j}].html empty`);
|
|
495
|
+
}
|
|
496
|
+
if (el.html.length > V2_RAW_HTML_MAX) {
|
|
497
|
+
throw new Error(
|
|
498
|
+
`cards[${cardIdx}].children[${j}].html too long (${el.html.length} > ${V2_RAW_HTML_MAX})`
|
|
499
|
+
);
|
|
500
|
+
}
|
|
378
501
|
}
|
|
379
502
|
});
|
|
380
503
|
return elements;
|
|
@@ -417,12 +540,22 @@ function validatePaperSlideDeckV2(deck) {
|
|
|
417
540
|
if (!nonEmptyString(card.narration)) throw new Error(`cards[${i}].narration empty`);
|
|
418
541
|
|
|
419
542
|
if (card.kind === 'title') {
|
|
420
|
-
const els = validateV2Children(card.children, i, new Set(['heading']), { maxChildren: 3 });
|
|
421
|
-
if (els.length === 0)
|
|
543
|
+
const els = validateV2Children(card.children, i, new Set(['heading', 'raw-html']), { maxChildren: 3 });
|
|
544
|
+
if (els.filter((e) => e.el === 'heading').length === 0) {
|
|
545
|
+
throw new Error(`cards[${i}] title must have at least one heading`);
|
|
546
|
+
}
|
|
422
547
|
} else if (card.kind === 'body') {
|
|
423
|
-
const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'paper-figure']), { maxChildren: 4 });
|
|
548
|
+
const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'paper-figure', 'raw-html']), { maxChildren: 4 });
|
|
424
549
|
const figs = els.filter((e) => e.el === 'paper-figure');
|
|
425
|
-
|
|
550
|
+
const rawEls = els.filter((e) => e.el === 'raw-html');
|
|
551
|
+
// A body card needs either a figure OR a raw-html block — raw
|
|
552
|
+
// HTML can stand in as the entire visual when the theme wants
|
|
553
|
+
// a custom panel instead of the canned figure renderer.
|
|
554
|
+
if (figs.length + rawEls.length !== 1) {
|
|
555
|
+
throw new Error(
|
|
556
|
+
`cards[${i}] body must contain exactly one paper-figure OR one raw-html (got ${figs.length} figure + ${rawEls.length} raw-html)`
|
|
557
|
+
);
|
|
558
|
+
}
|
|
426
559
|
const heads = els.filter((e) => e.el === 'heading');
|
|
427
560
|
if (heads.length === 0) throw new Error(`cards[${i}] body must contain at least one heading`);
|
|
428
561
|
} else if (card.kind === 'quote') {
|
|
@@ -431,21 +564,23 @@ function validatePaperSlideDeckV2(deck) {
|
|
|
431
564
|
// stat / steps directly and ignores any non-discriminator elements
|
|
432
565
|
// on these kinds). Discriminator element must still be present
|
|
433
566
|
// exactly once. Cap at 3 children to keep output bounded.
|
|
434
|
-
const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'quote']), { maxChildren: 3 });
|
|
567
|
+
const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'quote', 'raw-html']), { maxChildren: 3 });
|
|
435
568
|
const quoteEls = els.filter((e) => e.el === 'quote');
|
|
436
569
|
if (quoteEls.length !== 1) throw new Error(`cards[${i}] quote card must contain exactly one quote element (got ${quoteEls.length})`);
|
|
437
570
|
richCounts.quote += 1;
|
|
438
571
|
} else if (card.kind === 'data') {
|
|
439
|
-
const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'stat']), { maxChildren: 3 });
|
|
572
|
+
const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'stat', 'raw-html']), { maxChildren: 3 });
|
|
440
573
|
const statEls = els.filter((e) => e.el === 'stat');
|
|
441
574
|
if (statEls.length !== 1) throw new Error(`cards[${i}] data card must contain exactly one stat element (got ${statEls.length})`);
|
|
442
575
|
richCounts.data += 1;
|
|
443
576
|
} else if (card.kind === 'list') {
|
|
444
|
-
const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'steps']), { maxChildren: 3 });
|
|
577
|
+
const els = validateV2Children(card.children, i, new Set(['heading', 'body', 'steps', 'raw-html']), { maxChildren: 3 });
|
|
445
578
|
const stepsEls = els.filter((e) => e.el === 'steps');
|
|
446
579
|
if (stepsEls.length !== 1) throw new Error(`cards[${i}] list card must contain exactly one steps element (got ${stepsEls.length})`);
|
|
447
580
|
richCounts.list += 1;
|
|
448
581
|
}
|
|
582
|
+
validateVoiceoverShape(card.voiceover, i);
|
|
583
|
+
validateCardImagesShape(card.images, i);
|
|
449
584
|
});
|
|
450
585
|
// Cap on rich-kind variety — at most 1 of each (same as V1 prompt rule)
|
|
451
586
|
for (const k of Object.keys(richCounts)) {
|
|
@@ -477,6 +612,8 @@ module.exports = {
|
|
|
477
612
|
validateQuotePayload,
|
|
478
613
|
validateDataPayload,
|
|
479
614
|
validateListPayload,
|
|
615
|
+
validateVoiceoverShape,
|
|
616
|
+
validateCardImagesShape,
|
|
480
617
|
QUOTE_TEXT_MAX,
|
|
481
618
|
QUOTE_ATTRIBUTION_MAX,
|
|
482
619
|
DATA_VALUE_MAX,
|
|
@@ -485,4 +622,9 @@ module.exports = {
|
|
|
485
622
|
LIST_ITEM_MAX_LEN,
|
|
486
623
|
LIST_ITEM_MIN_COUNT,
|
|
487
624
|
LIST_ITEM_MAX_COUNT,
|
|
625
|
+
VOICEOVER_TEXT_MAX,
|
|
626
|
+
IMAGE_PROMPT_MAX,
|
|
627
|
+
IMAGE_ID_MAX,
|
|
628
|
+
IMAGES_PER_CARD_MAX,
|
|
629
|
+
V2_RAW_HTML_MAX,
|
|
488
630
|
};
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Stage → backend bridge for AI image generation (hunyuan-image).
|
|
5
|
+
*
|
|
6
|
+
* Mirror of tts-audition.js for the visual side: stage's per-card 🎨 button
|
|
7
|
+
* resolves `card.images[i] = { id, prompt, aspect?, quality? }`, hashes the
|
|
8
|
+
* generation recipe, and either returns a cached PNG/JPG or asks the backend
|
|
9
|
+
* to produce one. JWT lives on the server (same posture as cloud-render /
|
|
10
|
+
* audition).
|
|
11
|
+
*
|
|
12
|
+
* Public surface:
|
|
13
|
+
* createImageGenClient({ apiBase?, tokenLoader?, cacheDir?, fetchImpl? })
|
|
14
|
+
* → { imagine({ prompt, aspect, quality }), cacheDir }
|
|
15
|
+
*
|
|
16
|
+
* Cache layout: <cacheDir>/<sha256(prompt|aspect|quality)>.<ext>
|
|
17
|
+
* Default cacheDir: ~/.config/voxflow/stage-image-cache/ — global so a
|
|
18
|
+
* prompt repeated across decks resolves to one file on disk.
|
|
19
|
+
*
|
|
20
|
+
* Backend contract: POST /api/image/generate { prompt, ratio, quality }
|
|
21
|
+
* → { code: 'success', image: 'data:image/png;base64,...', quota }
|
|
22
|
+
* We parse the data URL, persist the raw bytes, and serve them with the
|
|
23
|
+
* declared MIME so <img src> works in the browser without a base64 hop.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
const crypto = require('crypto');
|
|
27
|
+
const fs = require('fs');
|
|
28
|
+
const path = require('path');
|
|
29
|
+
const os = require('os');
|
|
30
|
+
const http = require('http');
|
|
31
|
+
const https = require('https');
|
|
32
|
+
|
|
33
|
+
const { API_BASE } = require('../core/config');
|
|
34
|
+
const { readCachedToken } = require('../core/auth');
|
|
35
|
+
|
|
36
|
+
const DEFAULT_TIMEOUT_MS = 180_000; // backend hunyuan-image takes 30-90s p95
|
|
37
|
+
const DEFAULT_ASPECT = 'portrait'; // 9:16 matches Slice frame
|
|
38
|
+
const DEFAULT_QUALITY = 'fast'; // 200-quota tier; 'hd' is 500
|
|
39
|
+
// Catalog references — surfaced so the stage UI can show "🎨 costs N quota"
|
|
40
|
+
// without an extra round-trip. Authoritative deduction is server-side.
|
|
41
|
+
const IMAGE_GEN_COST_FAST = 200;
|
|
42
|
+
const IMAGE_GEN_COST_HD = 500;
|
|
43
|
+
|
|
44
|
+
function defaultCacheDir() {
|
|
45
|
+
return path.join(os.homedir(), '.config', 'voxflow', 'stage-image-cache');
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function cacheKey({ prompt, aspect, quality }) {
|
|
49
|
+
// Stable content hash — every parameter that affects image output is
|
|
50
|
+
// included so the same (prompt, aspect, quality) maps to the same file.
|
|
51
|
+
const h = crypto.createHash('sha256');
|
|
52
|
+
h.update(prompt);
|
|
53
|
+
h.update(' ');
|
|
54
|
+
h.update(aspect);
|
|
55
|
+
h.update(' ');
|
|
56
|
+
h.update(quality);
|
|
57
|
+
return h.digest('hex');
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function extensionFor(mime) {
|
|
61
|
+
// Backend hunyuan-image currently returns PNG; map other common MIMEs in
|
|
62
|
+
// case the provider swap underneath produces JPEG / WebP.
|
|
63
|
+
if (mime === 'image/png') return 'png';
|
|
64
|
+
if (mime === 'image/jpeg' || mime === 'image/jpg') return 'jpg';
|
|
65
|
+
if (mime === 'image/webp') return 'webp';
|
|
66
|
+
return 'bin';
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function cachePath(dir, key, ext) {
|
|
70
|
+
return path.join(dir, `${key}.${ext}`);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function parseDataUrl(dataUrl) {
|
|
74
|
+
if (typeof dataUrl !== 'string') return null;
|
|
75
|
+
// RFC 2397 base64 form: data:<mime>;base64,<bytes>
|
|
76
|
+
const m = /^data:([^;,]+);base64,(.+)$/.exec(dataUrl);
|
|
77
|
+
if (!m) return null;
|
|
78
|
+
let buf;
|
|
79
|
+
try { buf = Buffer.from(m[2], 'base64'); }
|
|
80
|
+
catch { return null; }
|
|
81
|
+
if (!buf.length) return null;
|
|
82
|
+
return { mime: m[1].toLowerCase(), buf };
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function requestJson(targetUrl, opts = {}, body = null, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
86
|
+
return new Promise((resolve, reject) => {
|
|
87
|
+
const u = new URL(targetUrl);
|
|
88
|
+
const mod = u.protocol === 'https:' ? https : http;
|
|
89
|
+
const headers = Object.assign({}, opts.headers || {});
|
|
90
|
+
let payload = null;
|
|
91
|
+
if (body !== null && body !== undefined) {
|
|
92
|
+
payload = Buffer.from(JSON.stringify(body), 'utf8');
|
|
93
|
+
headers['Content-Type'] = 'application/json';
|
|
94
|
+
headers['Content-Length'] = payload.length;
|
|
95
|
+
}
|
|
96
|
+
const req = mod.request({
|
|
97
|
+
hostname: u.hostname,
|
|
98
|
+
port: u.port || (u.protocol === 'https:' ? 443 : 80),
|
|
99
|
+
path: u.pathname + u.search,
|
|
100
|
+
method: opts.method || 'GET',
|
|
101
|
+
headers,
|
|
102
|
+
}, (res) => {
|
|
103
|
+
const chunks = [];
|
|
104
|
+
res.on('data', (c) => chunks.push(c));
|
|
105
|
+
res.on('end', () => {
|
|
106
|
+
const raw = Buffer.concat(chunks).toString('utf8');
|
|
107
|
+
let parsed = null;
|
|
108
|
+
try { parsed = JSON.parse(raw); } catch { /* keep null */ }
|
|
109
|
+
resolve({ status: res.statusCode, data: parsed });
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
req.on('error', reject);
|
|
113
|
+
req.setTimeout(timeoutMs, () => { req.destroy(new Error(`upstream timeout after ${timeoutMs}ms`)); });
|
|
114
|
+
if (payload) req.write(payload);
|
|
115
|
+
req.end();
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* @param {object} [opts]
|
|
121
|
+
* @param {string} [opts.apiBase=API_BASE]
|
|
122
|
+
* @param {() => ({access_token:string}|null)} [opts.tokenLoader]
|
|
123
|
+
* @param {string} [opts.cacheDir]
|
|
124
|
+
* @param {(args:object) => Promise<{status:number, data:object|null}>} [opts.fetchImpl]
|
|
125
|
+
* Test injection point — receives `{ prompt, ratio, quality, headers }`.
|
|
126
|
+
* @returns {{ imagine: (params:object) => Promise<object>, cacheDir: string }}
|
|
127
|
+
*/
|
|
128
|
+
function createImageGenClient(opts = {}) {
|
|
129
|
+
const apiBase = (opts.apiBase || API_BASE).replace(/\/$/, '');
|
|
130
|
+
const tokenLoader = typeof opts.tokenLoader === 'function' ? opts.tokenLoader : readCachedToken;
|
|
131
|
+
const cacheDir = opts.cacheDir || defaultCacheDir();
|
|
132
|
+
const fetchImpl = typeof opts.fetchImpl === 'function' ? opts.fetchImpl : null;
|
|
133
|
+
|
|
134
|
+
try { fs.mkdirSync(cacheDir, { recursive: true }); } catch { /* best-effort */ }
|
|
135
|
+
|
|
136
|
+
function authHeaders() {
|
|
137
|
+
const cached = tokenLoader();
|
|
138
|
+
if (!cached || !cached.access_token) return null;
|
|
139
|
+
return { Authorization: 'Bearer ' + cached.access_token };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async function imagine({ prompt, aspect, quality } = {}) {
|
|
143
|
+
if (typeof prompt !== 'string' || !prompt.trim()) {
|
|
144
|
+
return { code: 'invalid_prompt', message: 'prompt required (non-empty string)' };
|
|
145
|
+
}
|
|
146
|
+
const asp = (aspect === 'portrait' || aspect === 'landscape' || aspect === 'square') ? aspect : DEFAULT_ASPECT;
|
|
147
|
+
const qual = (quality === 'fast' || quality === 'hd') ? quality : DEFAULT_QUALITY;
|
|
148
|
+
const key = cacheKey({ prompt, aspect: asp, quality: qual });
|
|
149
|
+
|
|
150
|
+
// Cache check — same recipe = same bytes. Walk known extensions so a
|
|
151
|
+
// stored PNG is found whether the cache layer tracks the MIME or not.
|
|
152
|
+
for (const ext of ['png', 'jpg', 'webp', 'bin']) {
|
|
153
|
+
const fp = cachePath(cacheDir, key, ext);
|
|
154
|
+
try {
|
|
155
|
+
const buf = fs.readFileSync(fp);
|
|
156
|
+
const mime = ({ png: 'image/png', jpg: 'image/jpeg', webp: 'image/webp', bin: 'application/octet-stream' })[ext];
|
|
157
|
+
return {
|
|
158
|
+
code: 'success',
|
|
159
|
+
buf,
|
|
160
|
+
contentType: mime,
|
|
161
|
+
fromCache: true,
|
|
162
|
+
cacheKey: key,
|
|
163
|
+
ext,
|
|
164
|
+
};
|
|
165
|
+
} catch { /* try next ext */ }
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const headers = authHeaders();
|
|
169
|
+
if (!headers) {
|
|
170
|
+
return { code: 'not_logged_in', message: 'Run `voxflow login` first.' };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
let r;
|
|
174
|
+
try {
|
|
175
|
+
if (fetchImpl) {
|
|
176
|
+
r = await fetchImpl({ prompt: prompt.trim(), ratio: asp, quality: qual, headers });
|
|
177
|
+
} else {
|
|
178
|
+
r = await requestJson(`${apiBase}/api/image/generate`, { method: 'POST', headers }, {
|
|
179
|
+
prompt: prompt.trim(),
|
|
180
|
+
ratio: asp,
|
|
181
|
+
quality: qual,
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
} catch (err) {
|
|
185
|
+
return { code: 'network_error', message: err.message || String(err) };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (r.status === 401) return { code: 'not_logged_in', message: 'Token expired — run `voxflow login`.' };
|
|
189
|
+
if (r.status === 402 || r.status === 429) return { code: 'quota_exceeded', message: r.data?.message || 'Insufficient quota' };
|
|
190
|
+
if (r.status >= 400 || !r.data || r.data.code !== 'success') {
|
|
191
|
+
return {
|
|
192
|
+
code: r.data?.code || 'imagine_failed',
|
|
193
|
+
message: r.data?.message || `HTTP ${r.status}`,
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
const parsed = parseDataUrl(r.data.image);
|
|
197
|
+
if (!parsed) {
|
|
198
|
+
return { code: 'imagine_failed', message: 'response missing or malformed image data URL' };
|
|
199
|
+
}
|
|
200
|
+
const ext = extensionFor(parsed.mime);
|
|
201
|
+
const filePath = cachePath(cacheDir, key, ext);
|
|
202
|
+
const tmp = `${filePath}.tmp-${process.pid}-${Date.now()}`;
|
|
203
|
+
try {
|
|
204
|
+
fs.writeFileSync(tmp, parsed.buf);
|
|
205
|
+
fs.renameSync(tmp, filePath);
|
|
206
|
+
} catch {
|
|
207
|
+
try { fs.unlinkSync(tmp); } catch { /* */ }
|
|
208
|
+
}
|
|
209
|
+
return {
|
|
210
|
+
code: 'success',
|
|
211
|
+
buf: parsed.buf,
|
|
212
|
+
contentType: parsed.mime,
|
|
213
|
+
fromCache: false,
|
|
214
|
+
cacheKey: key,
|
|
215
|
+
ext,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
return { imagine, cacheDir };
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
module.exports = {
|
|
223
|
+
createImageGenClient,
|
|
224
|
+
cacheKey,
|
|
225
|
+
cachePath,
|
|
226
|
+
defaultCacheDir,
|
|
227
|
+
parseDataUrl,
|
|
228
|
+
extensionFor,
|
|
229
|
+
IMAGE_GEN_COST_FAST,
|
|
230
|
+
IMAGE_GEN_COST_HD,
|
|
231
|
+
DEFAULT_ASPECT,
|
|
232
|
+
DEFAULT_QUALITY,
|
|
233
|
+
};
|
|
@@ -45,6 +45,9 @@ const {
|
|
|
45
45
|
THEME_TO_DECK_ID,
|
|
46
46
|
DEFAULT_THEME,
|
|
47
47
|
} = require('../commands/slice-render');
|
|
48
|
+
const { createTtsAuditionClient } = require('./tts-audition');
|
|
49
|
+
const { createImageGenClient } = require('./image-gen');
|
|
50
|
+
const { startVoiceoverServer, prepareVoiceovers, prepareImages } = require('./voiceover-mux');
|
|
48
51
|
|
|
49
52
|
// In-memory job table. We never persist jobs — a stage restart wipes history,
|
|
50
53
|
// which is fine because the produced mp4 lives on disk under the user's deck
|
|
@@ -158,10 +161,92 @@ function startLocalRender(opts) {
|
|
|
158
161
|
|
|
159
162
|
async function runRender({ job, deck, onProgress, onDone, onError }) {
|
|
160
163
|
const { jobId, outputPath, deckId } = job;
|
|
164
|
+
let voiceoverServer = null;
|
|
161
165
|
try {
|
|
162
166
|
job.state = 'preparing';
|
|
167
|
+
|
|
168
|
+
// ─── Voiceover + Image prep (Phase 1 + Phase B) ─────────────────────
|
|
169
|
+
// Stage's Render button defaults to BOTH audio + AI image generation.
|
|
170
|
+
// Users in stage are iterating and expect a fully-rendered preview;
|
|
171
|
+
// hitting the cache makes re-renders effectively free for content they
|
|
172
|
+
// already auditioned / regenerated. Either pass can fail to silent
|
|
173
|
+
// fallback without aborting the render — failures land in
|
|
174
|
+
// job.voiceoverSkipped / job.imageSkipped so the UI can surface the
|
|
175
|
+
// reason post hoc.
|
|
176
|
+
let voiceoverByIdx = {};
|
|
177
|
+
let imageByIdx = {};
|
|
178
|
+
let voiceoverSkipped = [];
|
|
179
|
+
let imageSkipped = [];
|
|
180
|
+
try {
|
|
181
|
+
const audClient = createTtsAuditionClient();
|
|
182
|
+
const imgClient = createImageGenClient();
|
|
183
|
+
voiceoverServer = await startVoiceoverServer({
|
|
184
|
+
cacheDir: audClient.cacheDir,
|
|
185
|
+
imageCacheDir: imgClient.cacheDir,
|
|
186
|
+
});
|
|
187
|
+
const voPrep = await prepareVoiceovers({
|
|
188
|
+
deck,
|
|
189
|
+
auditionClient: audClient,
|
|
190
|
+
baseUrl: voiceoverServer.url,
|
|
191
|
+
onProgress: (p) => {
|
|
192
|
+
if (typeof onProgress === 'function') {
|
|
193
|
+
try {
|
|
194
|
+
onProgress({
|
|
195
|
+
jobId,
|
|
196
|
+
progress: 0,
|
|
197
|
+
framesRendered: 0,
|
|
198
|
+
framesTotal: 0,
|
|
199
|
+
phase: 'voiceover',
|
|
200
|
+
voiceoverIndex: p.cardIdx + 1,
|
|
201
|
+
voiceoverTotal: p.total,
|
|
202
|
+
voiceoverFromCache: p.fromCache,
|
|
203
|
+
});
|
|
204
|
+
} catch { /* swallow */ }
|
|
205
|
+
}
|
|
206
|
+
},
|
|
207
|
+
});
|
|
208
|
+
voiceoverByIdx = voPrep.byIdx;
|
|
209
|
+
voiceoverSkipped = voPrep.skipped;
|
|
210
|
+
|
|
211
|
+
const imgPrep = await prepareImages({
|
|
212
|
+
deck,
|
|
213
|
+
imgClient,
|
|
214
|
+
baseUrl: voiceoverServer.url,
|
|
215
|
+
onProgress: (p) => {
|
|
216
|
+
if (typeof onProgress === 'function') {
|
|
217
|
+
try {
|
|
218
|
+
onProgress({
|
|
219
|
+
jobId,
|
|
220
|
+
progress: 0,
|
|
221
|
+
framesRendered: 0,
|
|
222
|
+
framesTotal: 0,
|
|
223
|
+
phase: 'image',
|
|
224
|
+
imageIndex: p.cardIdx + 1,
|
|
225
|
+
imageTotal: p.total,
|
|
226
|
+
imageFromCache: p.fromCache,
|
|
227
|
+
});
|
|
228
|
+
} catch { /* swallow */ }
|
|
229
|
+
}
|
|
230
|
+
},
|
|
231
|
+
});
|
|
232
|
+
imageByIdx = imgPrep.byIdx;
|
|
233
|
+
imageSkipped = imgPrep.skipped;
|
|
234
|
+
} catch (err) {
|
|
235
|
+
// Media prep failure is non-fatal — fall back to silent + no-AI
|
|
236
|
+
// images so a TTS / image outage still produces an mp4.
|
|
237
|
+
voiceoverSkipped = [{ cardIdx: -1, reason: 'media_prep_failed', message: err.message }];
|
|
238
|
+
if (voiceoverServer) {
|
|
239
|
+
try { await voiceoverServer.close(); } catch { /* */ }
|
|
240
|
+
voiceoverServer = null;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
job.voiceoverCount = Object.keys(voiceoverByIdx).length;
|
|
244
|
+
job.voiceoverSkipped = voiceoverSkipped;
|
|
245
|
+
job.imageCount = Object.keys(imageByIdx).length;
|
|
246
|
+
job.imageSkipped = imageSkipped;
|
|
247
|
+
|
|
163
248
|
const serveUrl = resolveServeUrl();
|
|
164
|
-
const inputProps = buildInputProps(deck);
|
|
249
|
+
const inputProps = buildInputProps(deck, { voiceoverByIdx, imageByIdx });
|
|
165
250
|
|
|
166
251
|
const renderer = loadRenderer();
|
|
167
252
|
job.coldStart = !chromeBinaryExists();
|
|
@@ -234,6 +319,12 @@ async function runRender({ job, deck, onProgress, onDone, onError }) {
|
|
|
234
319
|
if (typeof onError === 'function') {
|
|
235
320
|
try { onError({ jobId, message: job.error }); } catch { /* swallow */ }
|
|
236
321
|
}
|
|
322
|
+
} finally {
|
|
323
|
+
// Always tear down the audio file server, including on render failure,
|
|
324
|
+
// so a stale localhost listener doesn't leak across jobs.
|
|
325
|
+
if (voiceoverServer) {
|
|
326
|
+
try { await voiceoverServer.close(); } catch { /* best-effort */ }
|
|
327
|
+
}
|
|
237
328
|
}
|
|
238
329
|
}
|
|
239
330
|
|