voxflow 1.15.3 → 1.15.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -65,6 +65,8 @@ async function startStageServer(opts) {
65
65
  cloudRender = null,
66
66
  localRender = null,
67
67
  deckSaver = null,
68
+ audition = null,
69
+ imagine = null,
68
70
  publishEvent = null,
69
71
  tokenAvailable = false,
70
72
  preferredPort = 5180,
@@ -243,8 +245,22 @@ async function startStageServer(opts) {
243
245
  function statusForCode(code) {
244
246
  if (code === 'not_logged_in') return 401;
245
247
  if (code === 'quota_exceeded') return 402;
246
- if (code === 'invalid_deck' || code === 'invalid_id') return 400;
247
- if (code === 'job_not_found') return 404;
248
+ if (
249
+ code === 'invalid_deck' ||
250
+ code === 'invalid_id' ||
251
+ code === 'invalid_card_index' ||
252
+ code === 'invalid_voice' ||
253
+ code === 'invalid_text' ||
254
+ code === 'invalid_prompt' ||
255
+ code === 'invalid_image_id'
256
+ ) return 400;
257
+ if (
258
+ code === 'job_not_found' ||
259
+ code === 'no_deck' ||
260
+ code === 'card_not_found' ||
261
+ code === 'image_not_found'
262
+ ) return 404;
263
+ if (code === 'voiceover_disabled') return 409;
248
264
  if (code === 'success') return 200;
249
265
  return 502; // upstream error
250
266
  }
@@ -403,6 +419,98 @@ async function startStageServer(opts) {
403
419
  return;
404
420
  }
405
421
 
422
+ // ─── TTS audition (per-card ▶) ──────────────────────────────────────────
423
+ // GET /api/audition?card=<int>[&voice=<id>]
424
+ // Resolves card.voiceover/voiceId/narration → calls /api/tts/synthesize
425
+ // via the audition bridge → streams audio bytes (default mp3). Content
426
+ // hash caches identical (voice, text, speed, format) so iteration is
427
+ // free after the first call. The page never sees the JWT.
428
+ if (audition && req.method === 'GET' && req.url.startsWith('/api/audition')) {
429
+ let parsed;
430
+ try { parsed = new URL(req.url, `http://127.0.0.1:${port}`); }
431
+ catch {
432
+ return sendJson(400, { code: 'bad_request', message: 'invalid /api/audition url' });
433
+ }
434
+ const cardIndexStr = parsed.searchParams.get('card');
435
+ const cardIndex = Number.parseInt(cardIndexStr, 10);
436
+ if (!Number.isInteger(cardIndex) || cardIndex < 0) {
437
+ return sendJson(400, {
438
+ code: 'invalid_card_index',
439
+ message: '?card= must be a non-negative integer',
440
+ });
441
+ }
442
+ const voiceOverride = parsed.searchParams.get('voice') || undefined;
443
+ (async () => {
444
+ let result;
445
+ try {
446
+ result = await audition.play({ cardIndex, voiceOverride });
447
+ } catch (err) {
448
+ return sendJson(502, { code: 'upstream_error', message: err.message });
449
+ }
450
+ if (result.code !== 'success') {
451
+ return sendJson(statusForCode(result.code), result);
452
+ }
453
+ res.writeHead(200, {
454
+ 'Content-Type': result.contentType || 'audio/mpeg',
455
+ 'Content-Length': result.buf.length,
456
+ 'Cache-Control': 'no-store',
457
+ 'X-Audition-Cache': result.fromCache ? 'HIT' : 'MISS',
458
+ 'X-Audition-Key': result.cacheKey || '',
459
+ });
460
+ res.end(result.buf);
461
+ })();
462
+ return;
463
+ }
464
+
465
+ // ─── Image generation (per-card 🎨) ─────────────────────────────────────
466
+ // GET /api/imagine?card=<int>&img=<id>
467
+ // Resolves card.images[?] → calls /api/image/generate via the imagine
468
+ // bridge → streams PNG/JPG bytes. Content-hash caches identical
469
+ // (prompt, aspect, quality) so iterating on visuals is free after the
470
+ // first call. Symmetric with /api/audition.
471
+ if (imagine && req.method === 'GET' && req.url.startsWith('/api/imagine')) {
472
+ let parsed;
473
+ try { parsed = new URL(req.url, `http://127.0.0.1:${port}`); }
474
+ catch {
475
+ return sendJson(400, { code: 'bad_request', message: 'invalid /api/imagine url' });
476
+ }
477
+ const cardIndexStr = parsed.searchParams.get('card');
478
+ const cardIndex = Number.parseInt(cardIndexStr, 10);
479
+ if (!Number.isInteger(cardIndex) || cardIndex < 0) {
480
+ return sendJson(400, {
481
+ code: 'invalid_card_index',
482
+ message: '?card= must be a non-negative integer',
483
+ });
484
+ }
485
+ const imageId = parsed.searchParams.get('img') || '';
486
+ if (typeof imageId !== 'string' || !imageId.trim()) {
487
+ return sendJson(400, {
488
+ code: 'invalid_image_id',
489
+ message: '?img= must be a non-empty image id',
490
+ });
491
+ }
492
+ (async () => {
493
+ let result;
494
+ try {
495
+ result = await imagine.play({ cardIndex, imageId });
496
+ } catch (err) {
497
+ return sendJson(502, { code: 'upstream_error', message: err.message });
498
+ }
499
+ if (result.code !== 'success') {
500
+ return sendJson(statusForCode(result.code), result);
501
+ }
502
+ res.writeHead(200, {
503
+ 'Content-Type': result.contentType || 'image/png',
504
+ 'Content-Length': result.buf.length,
505
+ 'Cache-Control': 'no-store',
506
+ 'X-Imagine-Cache': result.fromCache ? 'HIT' : 'MISS',
507
+ 'X-Imagine-Key': result.cacheKey || '',
508
+ });
509
+ res.end(result.buf);
510
+ })();
511
+ return;
512
+ }
513
+
406
514
  // ─── Inline deck save (Task B) ──────────────────────────────────────────
407
515
  // POST /api/deck body: full deck JSON → validates + writes to disk.
408
516
  // The file watcher picks up the write and broadcasts the deck event, so
Binary file
@@ -0,0 +1,290 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Per-card voiceover audio prep for local Remotion render.
5
+ *
6
+ * Reuses the audition cache (~/.config/voxflow/stage-tts-cache/) so a card
7
+ * the user just listened to via stage's ▶ button doesn't get re-synthesized
8
+ * at render time. Spins up a tiny localhost HTTP server (auto-picked port)
9
+ * that serves audio files to the headless Chromium Remotion launches; the
10
+ * Remotion composition fetches voiceoverSrc URLs from this server while
11
+ * rendering. Tear the server down after renderMedia() resolves.
12
+ *
13
+ * const aud = createTtsAuditionClient();
14
+ * const server = await startVoiceoverServer({ cacheDir: aud.cacheDir });
15
+ * const { byIdx, skipped } = await prepareVoiceovers({
16
+ * deck, auditionClient: aud, baseUrl: server.url, onProgress,
17
+ * });
18
+ * // buildInputProps reads byIdx and threads URLs into card.slide.voiceoverSrc
19
+ * await renderMedia({ inputProps: buildInputProps(deck, { voiceoverByIdx: byIdx }), ... });
20
+ * await server.close();
21
+ *
22
+ * When auth is unavailable (no token in CLI cache), prepareVoiceovers
23
+ * returns an empty map quietly — the resulting mp4 is the Phase 0 silent
24
+ * video. Callers branch on the empty map to surface a hint to the user.
25
+ */
26
+
27
+ const fs = require('fs');
28
+ const http = require('http');
29
+ const path = require('path');
30
+
31
+ const { contentTypeFor } = require('./tts-audition');
32
+ const { SYNTHESIZE_DEFAULTS } = require('../core/config');
33
+
34
+ // Mime mapper covering both audio + image extensions so the local media
35
+ // server can serve cached audition mp3s and generated PNG/JPEG/WebP from a
36
+ // single process. Anything unrecognised becomes octet-stream — Remotion's
37
+ // fetch still goes through but the composition decides what to do.
38
+ const MIME_BY_EXT = {
39
+ mp3: 'audio/mpeg',
40
+ wav: 'audio/wav',
41
+ pcm: 'audio/L16',
42
+ png: 'image/png',
43
+ jpg: 'image/jpeg',
44
+ jpeg: 'image/jpeg',
45
+ webp: 'image/webp',
46
+ };
47
+ function mimeFor(ext) {
48
+ if (!ext) return 'application/octet-stream';
49
+ return MIME_BY_EXT[ext.toLowerCase()] || contentTypeFor(ext);
50
+ }
51
+
52
+ function serveFileFrom(rootDir, urlFname, res) {
53
+ const fname = urlFname.split('?')[0];
54
+ if (fname === '' || fname.includes('/') || fname.includes('\\') || fname.includes('..')) {
55
+ res.writeHead(400, { 'Content-Type': 'text/plain' });
56
+ res.end('bad filename');
57
+ return;
58
+ }
59
+ const filePath = path.join(rootDir, fname);
60
+ fs.stat(filePath, (statErr, st) => {
61
+ if (statErr || !st.isFile()) {
62
+ res.writeHead(404, { 'Content-Type': 'text/plain' });
63
+ res.end('not found');
64
+ return;
65
+ }
66
+ const ext = path.extname(fname).slice(1);
67
+ res.writeHead(200, {
68
+ 'Content-Type': mimeFor(ext),
69
+ 'Content-Length': st.size,
70
+ 'Cache-Control': 'no-store',
71
+ });
72
+ fs.createReadStream(filePath).pipe(res);
73
+ });
74
+ }
75
+
76
+ /**
77
+ * Tiny localhost HTTP server serving the audition + image-gen caches.
78
+ * Responds to GET /audio/<filename> and (optionally) GET /image/<filename>;
79
+ * everything else is 404. Path traversal is rejected — the cache layouts
80
+ * are intentionally flat <sha256>.<ext> files.
81
+ *
82
+ * @param {object} opts
83
+ * @param {string} opts.cacheDir Directory containing audio <hash>.<ext> files.
84
+ * @param {string} [opts.imageCacheDir] Optional second root mounted at /image/.
85
+ * @param {number} [opts.preferredPort=0] 0 lets the OS pick a free port.
86
+ * @returns {Promise<{server, port, url, close}>}
87
+ */
88
+ async function startVoiceoverServer({ cacheDir, imageCacheDir = null, preferredPort = 0 }) {
89
+ if (typeof cacheDir !== 'string' || !cacheDir) {
90
+ throw new Error('startVoiceoverServer: cacheDir required');
91
+ }
92
+ const server = http.createServer((req, res) => {
93
+ if (req.method !== 'GET') {
94
+ res.writeHead(404, { 'Content-Type': 'text/plain' });
95
+ res.end('not found');
96
+ return;
97
+ }
98
+ if (req.url.startsWith('/audio/')) {
99
+ return serveFileFrom(cacheDir, req.url.slice('/audio/'.length), res);
100
+ }
101
+ if (imageCacheDir && req.url.startsWith('/image/')) {
102
+ return serveFileFrom(imageCacheDir, req.url.slice('/image/'.length), res);
103
+ }
104
+ res.writeHead(404, { 'Content-Type': 'text/plain' });
105
+ res.end('not found');
106
+ });
107
+ await new Promise((resolve, reject) => {
108
+ server.once('error', reject);
109
+ server.listen(preferredPort, '127.0.0.1', () => {
110
+ server.removeListener('error', reject);
111
+ resolve();
112
+ });
113
+ });
114
+ const port = server.address().port;
115
+ return {
116
+ server,
117
+ port,
118
+ url: `http://127.0.0.1:${port}`,
119
+ async close() {
120
+ await new Promise((resolve) => server.close(() => resolve()));
121
+ },
122
+ };
123
+ }
124
+
125
+ /**
126
+ * Resolve + synthesize (or cache-hit) one mp3 per card, return a map of
127
+ * { cardIdx: audio URL } that buildInputProps threads into voiceoverSrc.
128
+ *
129
+ * @param {object} opts
130
+ * @param {object} opts.deck Validator-shaped deck.
131
+ * @param {{audition: Function}} opts.auditionClient
132
+ * Same client stage's /api/audition uses. Shares the on-disk cache so a
133
+ * card the user previewed in the browser doesn't burn quota again at
134
+ * render time.
135
+ * @param {string} opts.baseUrl e.g. http://127.0.0.1:54321
136
+ * @param {(p:object) => void} [opts.onProgress]
137
+ * Called once per resolved card: { cardIdx, total, fromCache, voiceId, textLen }.
138
+ * Use this to print a one-line "voiceover N/M (cache hit)" log so the
139
+ * user knows TTS is happening before the renderer takes over.
140
+ * @returns {Promise<{ byIdx: Record<number,string>, skipped: Array<{cardIdx, reason, message?}> }>}
141
+ * skipped reasons: missing-card | voiceover-disabled | no-text |
142
+ * not_logged_in | quota_exceeded | tts_failed | network_error | invalid_voice
143
+ */
144
+ async function prepareVoiceovers({ deck, auditionClient, baseUrl, onProgress }) {
145
+ const byIdx = {};
146
+ const skipped = [];
147
+ if (!deck || !Array.isArray(deck.cards)) return { byIdx, skipped };
148
+ if (!auditionClient || typeof auditionClient.audition !== 'function') {
149
+ throw new Error('prepareVoiceovers: auditionClient.audition is required');
150
+ }
151
+ if (typeof baseUrl !== 'string' || !baseUrl) {
152
+ throw new Error('prepareVoiceovers: baseUrl is required');
153
+ }
154
+ const cards = deck.cards;
155
+
156
+ for (let i = 0; i < cards.length; i++) {
157
+ const card = cards[i];
158
+ if (!card) { skipped.push({ cardIdx: i, reason: 'missing-card' }); continue; }
159
+ const vo = card.voiceover || {};
160
+ if (vo.enabled === false) {
161
+ skipped.push({ cardIdx: i, reason: 'voiceover-disabled' });
162
+ continue;
163
+ }
164
+ const text = (typeof vo.text === 'string' && vo.text.trim())
165
+ ? vo.text
166
+ : card.narration;
167
+ if (typeof text !== 'string' || !text.trim()) {
168
+ skipped.push({ cardIdx: i, reason: 'no-text' });
169
+ continue;
170
+ }
171
+ const voiceId = vo.voiceId || card.voiceId || SYNTHESIZE_DEFAULTS.voice;
172
+ const speed = typeof vo.rate === 'number' ? vo.rate : 1.0;
173
+ const format = 'mp3';
174
+
175
+ let r;
176
+ try {
177
+ r = await auditionClient.audition({ voiceId, text, speed, format });
178
+ } catch (err) {
179
+ skipped.push({ cardIdx: i, reason: 'network_error', message: err.message || String(err) });
180
+ continue;
181
+ }
182
+ if (r.code !== 'success') {
183
+ skipped.push({ cardIdx: i, reason: r.code, message: r.message });
184
+ // not_logged_in / quota_exceeded → bail early so the user sees one
185
+ // clear message rather than N copies of the same root cause.
186
+ if (r.code === 'not_logged_in' || r.code === 'quota_exceeded') break;
187
+ continue;
188
+ }
189
+ const fname = `${r.cacheKey}.${format}`;
190
+ byIdx[i] = `${baseUrl.replace(/\/$/, '')}/audio/${fname}`;
191
+ if (typeof onProgress === 'function') {
192
+ try {
193
+ onProgress({
194
+ cardIdx: i,
195
+ total: cards.length,
196
+ fromCache: !!r.fromCache,
197
+ voiceId,
198
+ textLen: text.length,
199
+ });
200
+ } catch { /* swallow consumer errors */ }
201
+ }
202
+ }
203
+
204
+ return { byIdx, skipped };
205
+ }
206
+
207
+ /**
208
+ * Image counterpart of prepareVoiceovers — resolves the primary image
209
+ * for each card. Precedence per card:
210
+ * (a) card.imageUrl present → use as-is, no API call (external asset)
211
+ * (b) card.images[0] present → call image-gen client, route through
212
+ * the local server's /image/ mount, return URL.
213
+ * Returns { byIdx: { cardIdx → URL }, skipped: [{cardIdx, reason, message?}] }.
214
+ *
215
+ * @param {object} opts
216
+ * @param {object} opts.deck
217
+ * @param {{imagine: Function}} opts.imgClient
218
+ * @param {string} opts.baseUrl e.g. http://127.0.0.1:54321
219
+ * @param {(p:object) => void} [opts.onProgress]
220
+ */
221
+ async function prepareImages({ deck, imgClient, baseUrl, onProgress }) {
222
+ const byIdx = {};
223
+ const skipped = [];
224
+ if (!deck || !Array.isArray(deck.cards)) return { byIdx, skipped };
225
+ if (!imgClient || typeof imgClient.imagine !== 'function') {
226
+ throw new Error('prepareImages: imgClient.imagine is required');
227
+ }
228
+ if (typeof baseUrl !== 'string' || !baseUrl) {
229
+ throw new Error('prepareImages: baseUrl is required');
230
+ }
231
+ const cards = deck.cards;
232
+
233
+ for (let i = 0; i < cards.length; i++) {
234
+ const card = cards[i];
235
+ if (!card) { skipped.push({ cardIdx: i, reason: 'missing-card' }); continue; }
236
+
237
+ // External imageUrl wins — themes that already consume it (photo-feature
238
+ // / atmospheric) keep working unchanged.
239
+ if (typeof card.imageUrl === 'string' && card.imageUrl.trim()) {
240
+ byIdx[i] = card.imageUrl;
241
+ continue;
242
+ }
243
+
244
+ const images = Array.isArray(card.images) ? card.images : [];
245
+ const primary = images[0];
246
+ if (!primary) {
247
+ skipped.push({ cardIdx: i, reason: 'no-images' });
248
+ continue;
249
+ }
250
+
251
+ let r;
252
+ try {
253
+ r = await imgClient.imagine({
254
+ prompt: primary.prompt,
255
+ aspect: primary.aspect,
256
+ quality: primary.quality,
257
+ });
258
+ } catch (err) {
259
+ skipped.push({ cardIdx: i, reason: 'network_error', message: err.message || String(err) });
260
+ continue;
261
+ }
262
+ if (r.code !== 'success') {
263
+ skipped.push({ cardIdx: i, reason: r.code, message: r.message });
264
+ if (r.code === 'not_logged_in' || r.code === 'quota_exceeded') break;
265
+ continue;
266
+ }
267
+ const fname = `${r.cacheKey}.${r.ext}`;
268
+ byIdx[i] = `${baseUrl.replace(/\/$/, '')}/image/${fname}`;
269
+ if (typeof onProgress === 'function') {
270
+ try {
271
+ onProgress({
272
+ cardIdx: i,
273
+ total: cards.length,
274
+ fromCache: !!r.fromCache,
275
+ prompt: primary.prompt,
276
+ aspect: primary.aspect,
277
+ });
278
+ } catch { /* swallow */ }
279
+ }
280
+ }
281
+
282
+ return { byIdx, skipped };
283
+ }
284
+
285
+ module.exports = {
286
+ startVoiceoverServer,
287
+ prepareVoiceovers,
288
+ prepareImages,
289
+ mimeFor,
290
+ };