voxflow 1.15.2 → 1.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,8 +32,28 @@ const { findAvailablePort } = require('./port');
32
32
  * and GET /api/quota-balance — all proxies to the upstream API. Shape:
33
33
  * { submit(deck), status(jobId), quota() }. Lets users go from "deck I
34
34
  * like" → mp4 without leaving stage; the proxy keeps the JWT off the page.
35
+ * @param {object} [opts.localRender] Optional local-render handle.
36
+ * If provided, exposes POST /api/render-local and GET /api/render-local/:id/status.
37
+ * Shape: { start({onProgress, onDone, onError, output}), status(jobId) }.
38
+ * Used to render mp4 with `@remotion/renderer` *without* any cloud
39
+ * round-trip — required for offline-first authoring (skill-driven users
40
+ * may never have run `voxflow login`). Reuses the same SSE channel
41
+ * `subscribe` feeds for progress fan-out.
42
+ * @param {object} [opts.deckSaver] Optional inline-edit handle.
43
+ * If provided, exposes POST /api/deck → write new deck JSON to disk.
44
+ * Shape: { save(deck) → { ok:true } | { ok:false, errors:[...] } }.
45
+ * The file watcher picks up the resulting change and broadcasts deck SSE,
46
+ * so no extra fan-out is needed here.
47
+ * @param {(event:object) => void} [opts.publishEvent]
48
+ * Optional event-bus publisher — when provided, local-render progress
49
+ * events are fanned out through the same `subscribe` channel as deck
50
+ * events, so the page can listen on one EventSource for everything.
35
51
  * @param {number} [opts.preferredPort=5180]
36
52
  * @param {number} [opts.maxPortTries=10]
53
+ * @param {boolean} [opts.tokenAvailable]
54
+ * Optional flag baked into the boot snapshot returned by GET /api/auth-state.
55
+ * The page reads this once on load to decide whether to emphasise the
56
+ * "local" or "cloud" render button. Cheap; no JWT ever leaves the server.
37
57
  * @returns {Promise<{server:http.Server, port:number, url:string, close:() => Promise<void>}>}
38
58
  */
39
59
  async function startStageServer(opts) {
@@ -43,6 +63,11 @@ async function startStageServer(opts) {
43
63
  subscribe,
44
64
  snapshots = null,
45
65
  cloudRender = null,
66
+ localRender = null,
67
+ deckSaver = null,
68
+ audition = null,
69
+ publishEvent = null,
70
+ tokenAvailable = false,
46
71
  preferredPort = 5180,
47
72
  maxPortTries = 10,
48
73
  } = opts;
@@ -219,8 +244,19 @@ async function startStageServer(opts) {
219
244
  function statusForCode(code) {
220
245
  if (code === 'not_logged_in') return 401;
221
246
  if (code === 'quota_exceeded') return 402;
222
- if (code === 'invalid_deck' || code === 'invalid_id') return 400;
223
- if (code === 'job_not_found') return 404;
247
+ if (
248
+ code === 'invalid_deck' ||
249
+ code === 'invalid_id' ||
250
+ code === 'invalid_card_index' ||
251
+ code === 'invalid_voice' ||
252
+ code === 'invalid_text'
253
+ ) return 400;
254
+ if (
255
+ code === 'job_not_found' ||
256
+ code === 'no_deck' ||
257
+ code === 'card_not_found'
258
+ ) return 404;
259
+ if (code === 'voiceover_disabled') return 409;
224
260
  if (code === 'success') return 200;
225
261
  return 502; // upstream error
226
262
  }
@@ -273,6 +309,196 @@ async function startStageServer(opts) {
273
309
  return;
274
310
  }
275
311
 
312
+ // ─── Auth-state probe ───────────────────────────────────────────────────
313
+ // Single GET so the page can decide local-vs-cloud render emphasis on
314
+ // load. No JWT or refresh-token bytes ever leave the server.
315
+ if (req.method === 'GET' && req.url === '/api/auth-state') {
316
+ sendJson(200, { code: 'success', tokenAvailable: !!tokenAvailable });
317
+ return;
318
+ }
319
+
320
+ // ─── Local render (offline, @remotion/renderer) ─────────────────────────
321
+ // POST /api/render-local → start a job, return { jobId }
322
+ // GET /api/render-local/:id/status → poll status
323
+ //
324
+ // Progress + done events are pushed onto the deck SSE channel via
325
+ // publishEvent so the page listens on a single EventSource.
326
+ if (localRender && req.method === 'POST' && req.url === '/api/render-local') {
327
+ const chunks = [];
328
+ let total = 0;
329
+ const MAX = 64 * 1024;
330
+ req.on('data', (c) => {
331
+ total += c.length;
332
+ if (total > MAX) { req.destroy(); return; }
333
+ chunks.push(c);
334
+ });
335
+ req.on('end', () => {
336
+ let body = {};
337
+ const raw = Buffer.concat(chunks).toString('utf8');
338
+ if (raw) {
339
+ try { body = JSON.parse(raw); }
340
+ catch { return sendJson(400, { code: 'bad_json', message: 'render-local body must be JSON' }); }
341
+ }
342
+ const output = typeof body.output === 'string' && body.output.trim()
343
+ ? body.output.trim() : null;
344
+
345
+ let started;
346
+ try {
347
+ started = localRender.start({
348
+ output,
349
+ onProgress(p) {
350
+ if (typeof publishEvent === 'function') {
351
+ publishEvent({
352
+ type: 'render-local-progress',
353
+ jobId: p.jobId,
354
+ progress: p.progress,
355
+ framesRendered: p.framesRendered,
356
+ framesTotal: p.framesTotal,
357
+ ts: Date.now(),
358
+ });
359
+ }
360
+ },
361
+ onDone(d) {
362
+ if (typeof publishEvent === 'function') {
363
+ publishEvent({
364
+ type: 'render-local-done',
365
+ jobId: d.jobId,
366
+ outputPath: d.outputPath,
367
+ totalMs: d.totalMs,
368
+ sizeBytes: d.sizeBytes,
369
+ ts: Date.now(),
370
+ });
371
+ }
372
+ },
373
+ onError(e) {
374
+ if (typeof publishEvent === 'function') {
375
+ publishEvent({
376
+ type: 'render-local-error',
377
+ jobId: e.jobId,
378
+ message: e.message,
379
+ ts: Date.now(),
380
+ });
381
+ }
382
+ },
383
+ });
384
+ } catch (err) {
385
+ return sendJson(400, {
386
+ code: err.code || 'render_local_failed',
387
+ message: err.message || 'could not start local render',
388
+ });
389
+ }
390
+ return sendJson(200, {
391
+ code: 'success',
392
+ jobId: started.jobId,
393
+ outputPath: started.outputPath,
394
+ });
395
+ });
396
+ return;
397
+ }
398
+
399
+ if (localRender && req.method === 'GET' && req.url.startsWith('/api/render-local/')) {
400
+ // expected: /api/render-local/:id/status
401
+ const rest = req.url.slice('/api/render-local/'.length);
402
+ const slashIdx = rest.indexOf('/');
403
+ const id = slashIdx === -1 ? rest : rest.slice(0, slashIdx);
404
+ const tail = slashIdx === -1 ? '' : rest.slice(slashIdx + 1);
405
+ if (!id || tail !== 'status') {
406
+ sendJson(404, { code: 'not_found', message: `No route: ${req.method} ${req.url}` });
407
+ return;
408
+ }
409
+ const status = localRender.status(decodeURIComponent(id));
410
+ if (!status) {
411
+ sendJson(404, { code: 'job_not_found', message: `No local-render job: ${id}` });
412
+ return;
413
+ }
414
+ sendJson(200, { code: 'success', ...status });
415
+ return;
416
+ }
417
+
418
+ // ─── TTS audition (per-card ▶) ──────────────────────────────────────────
419
+ // GET /api/audition?card=<int>[&voice=<id>]
420
+ // Resolves card.voiceover/voiceId/narration → calls /api/tts/synthesize
421
+ // via the audition bridge → streams audio bytes (default mp3). Content
422
+ // hash caches identical (voice, text, speed, format) so iteration is
423
+ // free after the first call. The page never sees the JWT.
424
+ if (audition && req.method === 'GET' && req.url.startsWith('/api/audition')) {
425
+ let parsed;
426
+ try { parsed = new URL(req.url, `http://127.0.0.1:${port}`); }
427
+ catch {
428
+ return sendJson(400, { code: 'bad_request', message: 'invalid /api/audition url' });
429
+ }
430
+ const cardIndexStr = parsed.searchParams.get('card');
431
+ const cardIndex = Number.parseInt(cardIndexStr, 10);
432
+ if (!Number.isInteger(cardIndex) || cardIndex < 0) {
433
+ return sendJson(400, {
434
+ code: 'invalid_card_index',
435
+ message: '?card= must be a non-negative integer',
436
+ });
437
+ }
438
+ const voiceOverride = parsed.searchParams.get('voice') || undefined;
439
+ (async () => {
440
+ let result;
441
+ try {
442
+ result = await audition.play({ cardIndex, voiceOverride });
443
+ } catch (err) {
444
+ return sendJson(502, { code: 'upstream_error', message: err.message });
445
+ }
446
+ if (result.code !== 'success') {
447
+ return sendJson(statusForCode(result.code), result);
448
+ }
449
+ res.writeHead(200, {
450
+ 'Content-Type': result.contentType || 'audio/mpeg',
451
+ 'Content-Length': result.buf.length,
452
+ 'Cache-Control': 'no-store',
453
+ 'X-Audition-Cache': result.fromCache ? 'HIT' : 'MISS',
454
+ 'X-Audition-Key': result.cacheKey || '',
455
+ });
456
+ res.end(result.buf);
457
+ })();
458
+ return;
459
+ }
460
+
461
+ // ─── Inline deck save (Task B) ──────────────────────────────────────────
462
+ // POST /api/deck body: full deck JSON → validates + writes to disk.
463
+ // The file watcher picks up the write and broadcasts the deck event, so
464
+ // the page hot-reloads via the same path as an external editor save.
465
+ if (deckSaver && req.method === 'POST' && req.url === '/api/deck') {
466
+ const chunks = [];
467
+ let total = 0;
468
+ // Decks are small (5-8 cards, ~5 KB). 256 KB cap is plenty headroom
469
+ // even with V2 layout-tree decks while still defending against pasted
470
+ // monsters.
471
+ const MAX = 256 * 1024;
472
+ req.on('data', (c) => {
473
+ total += c.length;
474
+ if (total > MAX) { req.destroy(); return; }
475
+ chunks.push(c);
476
+ });
477
+ req.on('end', () => {
478
+ let body;
479
+ try { body = JSON.parse(Buffer.concat(chunks).toString('utf8') || '{}'); }
480
+ catch { return sendJson(400, { code: 'bad_json', message: 'deck body must be JSON' }); }
481
+ const deck = body && body.deck;
482
+ if (!deck || typeof deck !== 'object') {
483
+ return sendJson(400, { code: 'invalid_deck', message: 'body.deck (object) required' });
484
+ }
485
+ let result;
486
+ try { result = deckSaver.save(deck); }
487
+ catch (err) {
488
+ return sendJson(500, { code: 'deck_save_failed', message: err.message || 'save error' });
489
+ }
490
+ if (!result || result.ok !== true) {
491
+ return sendJson(400, {
492
+ code: 'invalid_deck',
493
+ message: (result && result.message) || 'deck failed validation',
494
+ errors: (result && result.errors) || undefined,
495
+ });
496
+ }
497
+ return sendJson(200, { code: 'success' });
498
+ });
499
+ return;
500
+ }
501
+
276
502
  if (req.method === 'GET' && req.url === '/events') {
277
503
  res.writeHead(200, {
278
504
  'Content-Type': 'text/event-stream; charset=utf-8',
Binary file
@@ -0,0 +1,183 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Per-card voiceover audio prep for local Remotion render.
5
+ *
6
+ * Reuses the audition cache (~/.config/voxflow/stage-tts-cache/) so a card
7
+ * the user just listened to via stage's ▶ button doesn't get re-synthesized
8
+ * at render time. Spins up a tiny localhost HTTP server (auto-picked port)
9
+ * that serves audio files to the headless Chromium Remotion launches; the
10
+ * Remotion composition fetches voiceoverSrc URLs from this server while
11
+ * rendering. Tear the server down after renderMedia() resolves.
12
+ *
13
+ * const aud = createTtsAuditionClient();
14
+ * const server = await startVoiceoverServer({ cacheDir: aud.cacheDir });
15
+ * const { byIdx, skipped } = await prepareVoiceovers({
16
+ * deck, auditionClient: aud, baseUrl: server.url, onProgress,
17
+ * });
18
+ * // buildInputProps reads byIdx and threads URLs into card.slide.voiceoverSrc
19
+ * await renderMedia({ inputProps: buildInputProps(deck, { voiceoverByIdx: byIdx }), ... });
20
+ * await server.close();
21
+ *
22
+ * When auth is unavailable (no token in CLI cache), prepareVoiceovers
23
+ * returns an empty map quietly — the resulting mp4 is the Phase 0 silent
24
+ * video. Callers branch on the empty map to surface a hint to the user.
25
+ */
26
+
27
+ const fs = require('fs');
28
+ const http = require('http');
29
+ const path = require('path');
30
+
31
+ const { contentTypeFor } = require('./tts-audition');
32
+ const { SYNTHESIZE_DEFAULTS } = require('../core/config');
33
+
34
+ /**
35
+ * Tiny localhost HTTP server serving the audition cache directory.
36
+ * Only responds to GET /audio/<filename>; everything else is 404. Path
37
+ * traversal (.. or nested directories) is rejected up front since the
38
+ * cache layout is intentionally flat.
39
+ *
40
+ * @param {object} opts
41
+ * @param {string} opts.cacheDir Directory containing <hash>.mp3 files.
42
+ * @param {number} [opts.preferredPort=0] 0 lets the OS pick a free port.
43
+ * @returns {Promise<{server, port, url, close}>}
44
+ */
45
+ async function startVoiceoverServer({ cacheDir, preferredPort = 0 }) {
46
+ if (typeof cacheDir !== 'string' || !cacheDir) {
47
+ throw new Error('startVoiceoverServer: cacheDir required');
48
+ }
49
+ const server = http.createServer((req, res) => {
50
+ if (req.method !== 'GET' || !req.url.startsWith('/audio/')) {
51
+ res.writeHead(404, { 'Content-Type': 'text/plain' });
52
+ res.end('not found');
53
+ return;
54
+ }
55
+ const fname = req.url.slice('/audio/'.length).split('?')[0];
56
+ // Defense in depth — reject path traversal even on a localhost-only
57
+ // server. The audition cache is a flat dir of <sha256>.<ext> filenames.
58
+ if (fname === '' || fname.includes('/') || fname.includes('\\') || fname.includes('..')) {
59
+ res.writeHead(400, { 'Content-Type': 'text/plain' });
60
+ res.end('bad filename');
61
+ return;
62
+ }
63
+ const filePath = path.join(cacheDir, fname);
64
+ fs.stat(filePath, (statErr, st) => {
65
+ if (statErr || !st.isFile()) {
66
+ res.writeHead(404, { 'Content-Type': 'text/plain' });
67
+ res.end('not found');
68
+ return;
69
+ }
70
+ const ext = path.extname(fname).slice(1);
71
+ const ctype = contentTypeFor(ext);
72
+ res.writeHead(200, {
73
+ 'Content-Type': ctype,
74
+ 'Content-Length': st.size,
75
+ 'Cache-Control': 'no-store',
76
+ });
77
+ fs.createReadStream(filePath).pipe(res);
78
+ });
79
+ });
80
+ await new Promise((resolve, reject) => {
81
+ server.once('error', reject);
82
+ server.listen(preferredPort, '127.0.0.1', () => {
83
+ server.removeListener('error', reject);
84
+ resolve();
85
+ });
86
+ });
87
+ const port = server.address().port;
88
+ return {
89
+ server,
90
+ port,
91
+ url: `http://127.0.0.1:${port}`,
92
+ async close() {
93
+ await new Promise((resolve) => server.close(() => resolve()));
94
+ },
95
+ };
96
+ }
97
+
98
+ /**
99
+ * Resolve + synthesize (or cache-hit) one mp3 per card, return a map of
100
+ * { cardIdx: audio URL } that buildInputProps threads into voiceoverSrc.
101
+ *
102
+ * @param {object} opts
103
+ * @param {object} opts.deck Validator-shaped deck.
104
+ * @param {{audition: Function}} opts.auditionClient
105
+ * Same client stage's /api/audition uses. Shares the on-disk cache so a
106
+ * card the user previewed in the browser doesn't burn quota again at
107
+ * render time.
108
+ * @param {string} opts.baseUrl e.g. http://127.0.0.1:54321
109
+ * @param {(p:object) => void} [opts.onProgress]
110
+ * Called once per resolved card: { cardIdx, total, fromCache, voiceId, textLen }.
111
+ * Use this to print a one-line "voiceover N/M (cache hit)" log so the
112
+ * user knows TTS is happening before the renderer takes over.
113
+ * @returns {Promise<{ byIdx: Record<number,string>, skipped: Array<{cardIdx, reason, message?}> }>}
114
+ * skipped reasons: missing-card | voiceover-disabled | no-text |
115
+ * not_logged_in | quota_exceeded | tts_failed | network_error | invalid_voice
116
+ */
117
+ async function prepareVoiceovers({ deck, auditionClient, baseUrl, onProgress }) {
118
+ const byIdx = {};
119
+ const skipped = [];
120
+ if (!deck || !Array.isArray(deck.cards)) return { byIdx, skipped };
121
+ if (!auditionClient || typeof auditionClient.audition !== 'function') {
122
+ throw new Error('prepareVoiceovers: auditionClient.audition is required');
123
+ }
124
+ if (typeof baseUrl !== 'string' || !baseUrl) {
125
+ throw new Error('prepareVoiceovers: baseUrl is required');
126
+ }
127
+ const cards = deck.cards;
128
+
129
+ for (let i = 0; i < cards.length; i++) {
130
+ const card = cards[i];
131
+ if (!card) { skipped.push({ cardIdx: i, reason: 'missing-card' }); continue; }
132
+ const vo = card.voiceover || {};
133
+ if (vo.enabled === false) {
134
+ skipped.push({ cardIdx: i, reason: 'voiceover-disabled' });
135
+ continue;
136
+ }
137
+ const text = (typeof vo.text === 'string' && vo.text.trim())
138
+ ? vo.text
139
+ : card.narration;
140
+ if (typeof text !== 'string' || !text.trim()) {
141
+ skipped.push({ cardIdx: i, reason: 'no-text' });
142
+ continue;
143
+ }
144
+ const voiceId = vo.voiceId || card.voiceId || SYNTHESIZE_DEFAULTS.voice;
145
+ const speed = typeof vo.rate === 'number' ? vo.rate : 1.0;
146
+ const format = 'mp3';
147
+
148
+ let r;
149
+ try {
150
+ r = await auditionClient.audition({ voiceId, text, speed, format });
151
+ } catch (err) {
152
+ skipped.push({ cardIdx: i, reason: 'network_error', message: err.message || String(err) });
153
+ continue;
154
+ }
155
+ if (r.code !== 'success') {
156
+ skipped.push({ cardIdx: i, reason: r.code, message: r.message });
157
+ // not_logged_in / quota_exceeded → bail early so the user sees one
158
+ // clear message rather than N copies of the same root cause.
159
+ if (r.code === 'not_logged_in' || r.code === 'quota_exceeded') break;
160
+ continue;
161
+ }
162
+ const fname = `${r.cacheKey}.${format}`;
163
+ byIdx[i] = `${baseUrl.replace(/\/$/, '')}/audio/${fname}`;
164
+ if (typeof onProgress === 'function') {
165
+ try {
166
+ onProgress({
167
+ cardIdx: i,
168
+ total: cards.length,
169
+ fromCache: !!r.fromCache,
170
+ voiceId,
171
+ textLen: text.length,
172
+ });
173
+ } catch { /* swallow consumer errors */ }
174
+ }
175
+ }
176
+
177
+ return { byIdx, skipped };
178
+ }
179
+
180
+ module.exports = {
181
+ startVoiceoverServer,
182
+ prepareVoiceovers,
183
+ };