voxflow 1.15.3 → 1.15.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -1
- package/lib/commands/slice-render.js +71 -7
- package/lib/commands/slice-stage.js +34 -0
- package/lib/internal/deck-validator.js +47 -0
- package/lib/stage-core/local-render.js +57 -1
- package/lib/stage-core/server.js +57 -2
- package/lib/stage-core/tts-audition.js +0 -0
- package/lib/stage-core/voiceover-mux.js +183 -0
- package/lib/stage-ui/slice/template.js +171 -0
- package/package.json +1 -1
- package/skills/voxflow-slice/SKILL.md +75 -2
|
@@ -38,9 +38,14 @@ const DEFAULT_CARD_SEC = 4;
|
|
|
38
38
|
* Phase 0 is silent — every card gets DEFAULT_CARD_SEC. Phase 1 will
|
|
39
39
|
* splice per-card TTS in and replace this with audio-driven durations.
|
|
40
40
|
*/
|
|
41
|
-
function buildInputProps(deck) {
|
|
41
|
+
function buildInputProps(deck, opts = {}) {
|
|
42
|
+
// Map of cardIdx → audio URL produced by prepareVoiceovers (or empty when
|
|
43
|
+
// the renderer runs silent). Threads into PaperSlideDeckProps.cards[].slide
|
|
44
|
+
// .voiceoverSrc so the composition's <Audio> element fetches it during
|
|
45
|
+
// Remotion's headless render.
|
|
46
|
+
const voiceoverByIdx = opts.voiceoverByIdx || {};
|
|
42
47
|
const numberBadge = null;
|
|
43
|
-
const cards = deck.cards.map((card) => {
|
|
48
|
+
const cards = deck.cards.map((card, i) => {
|
|
44
49
|
const slide = {
|
|
45
50
|
kind: card.kind,
|
|
46
51
|
header: deck.header,
|
|
@@ -49,7 +54,7 @@ function buildInputProps(deck) {
|
|
|
49
54
|
figureKeyword: card.figureKeyword ?? null,
|
|
50
55
|
seriesTitle: deck.seriesTitle,
|
|
51
56
|
seriesTagline: deck.seriesTagline,
|
|
52
|
-
voiceoverSrc: null,
|
|
57
|
+
voiceoverSrc: voiceoverByIdx[i] || null,
|
|
53
58
|
numberBadge,
|
|
54
59
|
imageUrl: card.imageUrl,
|
|
55
60
|
};
|
|
@@ -181,8 +186,53 @@ async function render(opts) {
|
|
|
181
186
|
const outputDir = path.dirname(outputPath);
|
|
182
187
|
if (!fs.existsSync(outputDir)) fs.mkdirSync(outputDir, { recursive: true });
|
|
183
188
|
|
|
189
|
+
// ─── Voiceover prep (Phase 1) ────────────────────────────────────────
|
|
190
|
+
// Synthesize per-card TTS up front so renderMedia's headless Chromium
|
|
191
|
+
// can fetch each clip as the composition plays. Reuses the audition
|
|
192
|
+
// cache so a card the user previewed in stage doesn't pay quota again.
|
|
193
|
+
// Skip the whole pass on --no-audio (back-compat with Phase 0 silent).
|
|
194
|
+
const includeAudio = opts.noAudio !== true;
|
|
195
|
+
let voiceoverByIdx = {};
|
|
196
|
+
let voiceoverServer = null;
|
|
197
|
+
let voiceoverSkipped = [];
|
|
198
|
+
if (includeAudio) {
|
|
199
|
+
const { createTtsAuditionClient } = require('../stage-core/tts-audition');
|
|
200
|
+
const { startVoiceoverServer, prepareVoiceovers } = require('../stage-core/voiceover-mux');
|
|
201
|
+
const audClient = createTtsAuditionClient();
|
|
202
|
+
voiceoverServer = await startVoiceoverServer({ cacheDir: audClient.cacheDir });
|
|
203
|
+
let synthCount = 0;
|
|
204
|
+
let cacheCount = 0;
|
|
205
|
+
const prep = await prepareVoiceovers({
|
|
206
|
+
deck,
|
|
207
|
+
auditionClient: audClient,
|
|
208
|
+
baseUrl: voiceoverServer.url,
|
|
209
|
+
onProgress: (p) => {
|
|
210
|
+
if (p.fromCache) cacheCount += 1; else synthCount += 1;
|
|
211
|
+
process.stdout.write(
|
|
212
|
+
`\r[slice render] voiceover ${p.cardIdx + 1}/${p.total} ` +
|
|
213
|
+
`(${p.fromCache ? 'cache' : 'synth'}) `
|
|
214
|
+
);
|
|
215
|
+
},
|
|
216
|
+
});
|
|
217
|
+
voiceoverByIdx = prep.byIdx;
|
|
218
|
+
voiceoverSkipped = prep.skipped;
|
|
219
|
+
if (synthCount > 0 || cacheCount > 0) process.stdout.write('\n');
|
|
220
|
+
if (Object.keys(voiceoverByIdx).length === 0) {
|
|
221
|
+
const fatal = voiceoverSkipped.find(
|
|
222
|
+
(s) => s.reason === 'not_logged_in' || s.reason === 'quota_exceeded'
|
|
223
|
+
);
|
|
224
|
+
if (fatal) {
|
|
225
|
+
console.warn(
|
|
226
|
+
`[slice render] ⚠ audio skipped — ${fatal.reason}` +
|
|
227
|
+
(fatal.message ? `: ${fatal.message}` : '') +
|
|
228
|
+
' (rendering silent video; pass --no-audio to suppress this notice)'
|
|
229
|
+
);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
184
234
|
const serveUrl = resolveServeUrl();
|
|
185
|
-
const inputProps = buildInputProps(deck);
|
|
235
|
+
const inputProps = buildInputProps(deck, { voiceoverByIdx });
|
|
186
236
|
|
|
187
237
|
// Lazy require so users who never run `slice render` don't pay the
|
|
188
238
|
// remotion install cost at CLI startup (renderer pulls in puppeteer-
|
|
@@ -246,24 +296,38 @@ async function render(opts) {
|
|
|
246
296
|
const totalMs = Date.now() - t0;
|
|
247
297
|
process.stdout.write('\n');
|
|
248
298
|
|
|
299
|
+
// Tear down the localhost audio file server after the render is fully
|
|
300
|
+
// committed to disk so a hanging Chromium fetch can't be interrupted.
|
|
301
|
+
if (voiceoverServer) {
|
|
302
|
+
try { await voiceoverServer.close(); } catch { /* best-effort */ }
|
|
303
|
+
}
|
|
304
|
+
|
|
249
305
|
const stat = fs.statSync(outputPath);
|
|
250
306
|
console.log(`[slice render] done in ${fmtSec(totalMs)} — ${humanSize(stat.size)}`);
|
|
251
307
|
console.log(`[slice render] saved to ${outputPath}`);
|
|
252
|
-
return {
|
|
308
|
+
return {
|
|
309
|
+
outputPath,
|
|
310
|
+
totalMs,
|
|
311
|
+
frames: lastFrame,
|
|
312
|
+
size: stat.size,
|
|
313
|
+
voiceoverCount: Object.keys(voiceoverByIdx).length,
|
|
314
|
+
voiceoverSkipped,
|
|
315
|
+
};
|
|
253
316
|
}
|
|
254
317
|
|
|
255
318
|
async function handle(args) {
|
|
256
319
|
const { parseFlag } = require('../core/args');
|
|
257
320
|
const output = parseFlag(args, '--output', '-o');
|
|
321
|
+
const noAudio = args.includes('--no-audio');
|
|
258
322
|
const positional = args.find(
|
|
259
323
|
(a) => !a.startsWith('-') && !a.startsWith('--')
|
|
260
324
|
);
|
|
261
325
|
if (!positional) {
|
|
262
|
-
console.error('Usage: voxflow slice render <deck.json> [--output out.mp4]');
|
|
326
|
+
console.error('Usage: voxflow slice render <deck.json> [--output out.mp4] [--no-audio]');
|
|
263
327
|
process.exit(1);
|
|
264
328
|
}
|
|
265
329
|
try {
|
|
266
|
-
await render({ deckPath: positional, output });
|
|
330
|
+
await render({ deckPath: positional, output, noAudio });
|
|
267
331
|
} catch (err) {
|
|
268
332
|
console.error(`\nslice render failed: ${err.message}`);
|
|
269
333
|
if (process.env.VOXFLOW_DEBUG) console.error(err.stack);
|
|
@@ -20,10 +20,12 @@ const { createEventBus } = require('../stage-core/event-bus');
|
|
|
20
20
|
const { createSnapshotStore } = require('../stage-core/snapshot-store');
|
|
21
21
|
const { createCloudRenderClient } = require('../stage-core/cloud-render');
|
|
22
22
|
const { startLocalRender, getJobStatus } = require('../stage-core/local-render');
|
|
23
|
+
const { createTtsAuditionClient } = require('../stage-core/tts-audition');
|
|
23
24
|
const { validatePaperSlideDeck, isV2LayoutTreeDeck } = require('../internal/deck-validator');
|
|
24
25
|
const { renderSliceStageHtml } = require('../stage-ui/slice/template');
|
|
25
26
|
const { emit: emitTelemetry } = require('../core/telemetry');
|
|
26
27
|
const { readCachedToken } = require('../core/auth');
|
|
28
|
+
const { SYNTHESIZE_DEFAULTS } = require('../core/config');
|
|
27
29
|
|
|
28
30
|
// Sourced from the canonical registry at repo root. Previously this list
|
|
29
31
|
// silently fell out of sync (lagged at 6 themes while the rest of the repo
|
|
@@ -188,6 +190,37 @@ async function startSliceStage(opts) {
|
|
|
188
190
|
},
|
|
189
191
|
};
|
|
190
192
|
|
|
193
|
+
// ─── TTS audition bridge (per-card ▶ on stage UI) ──────────────────────
|
|
194
|
+
// Resolves `card.voiceover` / `card.voiceId` / `card.narration` against the
|
|
195
|
+
// live deck snapshot at request time so editing the deck → ▶ replays the
|
|
196
|
+
// new content immediately. Audio is cached by content hash so iterative
|
|
197
|
+
// re-listens cost zero quota after the first call.
|
|
198
|
+
const auditionBridge = opts.audition || (() => {
|
|
199
|
+
const tts = opts.ttsClient || createTtsAuditionClient(opts.ttsClientOpts || {});
|
|
200
|
+
return {
|
|
201
|
+
async play({ cardIndex, voiceOverride }) {
|
|
202
|
+
if (!snapshot.deck) return { code: 'no_deck', message: 'no deck loaded' };
|
|
203
|
+
const cards = Array.isArray(snapshot.deck.cards) ? snapshot.deck.cards : [];
|
|
204
|
+
const card = cards[cardIndex];
|
|
205
|
+
if (!card) return { code: 'card_not_found', message: `no card at index ${cardIndex}` };
|
|
206
|
+
const vo = card.voiceover || {};
|
|
207
|
+
if (vo.enabled === false) {
|
|
208
|
+
return { code: 'voiceover_disabled', message: 'card voiceover.enabled = false' };
|
|
209
|
+
}
|
|
210
|
+
const text = (typeof vo.text === 'string' && vo.text.trim()) ? vo.text : card.narration;
|
|
211
|
+
if (typeof text !== 'string' || !text.trim()) {
|
|
212
|
+
return { code: 'invalid_text', message: 'no text to synthesize (card.voiceover.text or card.narration)' };
|
|
213
|
+
}
|
|
214
|
+
const voiceId = (voiceOverride && String(voiceOverride).trim())
|
|
215
|
+
|| vo.voiceId
|
|
216
|
+
|| card.voiceId
|
|
217
|
+
|| SYNTHESIZE_DEFAULTS.voice;
|
|
218
|
+
const speed = typeof vo.rate === 'number' ? vo.rate : 1.0;
|
|
219
|
+
return tts.audition({ voiceId, text, speed, format: 'mp3' });
|
|
220
|
+
},
|
|
221
|
+
};
|
|
222
|
+
})();
|
|
223
|
+
|
|
191
224
|
// Boot-time auth probe so the UI can emphasise local vs cloud render.
|
|
192
225
|
// We treat any cached, non-expired token as "logged in"; the actual
|
|
193
226
|
// request flow still revalidates on /api/quota-balance.
|
|
@@ -204,6 +237,7 @@ async function startSliceStage(opts) {
|
|
|
204
237
|
cloudRender,
|
|
205
238
|
localRender: localRenderBridge,
|
|
206
239
|
deckSaver: deckSaverBridge,
|
|
240
|
+
audition: auditionBridge,
|
|
207
241
|
publishEvent: bus.publish,
|
|
208
242
|
tokenAvailable,
|
|
209
243
|
preferredPort,
|
|
@@ -128,6 +128,49 @@ function validateListPayload(list, i) {
|
|
|
128
128
|
});
|
|
129
129
|
}
|
|
130
130
|
|
|
131
|
+
// Optional per-card voiceover override. Extends the legacy `card.voiceId`
|
|
132
|
+
// (V1-only) with a nested object that carries audio behavior toggles — silent
|
|
133
|
+
// card, custom TTS text override, speech rate — so stage's audition endpoint
|
|
134
|
+
// and the local-render mux pass resolve a single source of truth per card.
|
|
135
|
+
// All fields are optional inside an optional object: omitting `voiceover`
|
|
136
|
+
// entirely keeps existing decks unchanged. Render-time resolution (highest
|
|
137
|
+
// precedence first):
|
|
138
|
+
// voiceId = voiceover.voiceId ?? card.voiceId ?? job-level default
|
|
139
|
+
// text = voiceover.text ?? card.narration
|
|
140
|
+
// enabled = voiceover.enabled ?? true
|
|
141
|
+
// rate = voiceover.rate ?? 1.0
|
|
142
|
+
const VOICEOVER_TEXT_MAX = 500;
|
|
143
|
+
function validateVoiceoverShape(vo, cardIdx) {
|
|
144
|
+
if (vo == null) return;
|
|
145
|
+
if (typeof vo !== 'object' || Array.isArray(vo)) {
|
|
146
|
+
throw new Error(`cards[${cardIdx}].voiceover must be an object`);
|
|
147
|
+
}
|
|
148
|
+
if (vo.enabled != null && typeof vo.enabled !== 'boolean') {
|
|
149
|
+
throw new Error(`cards[${cardIdx}].voiceover.enabled must be boolean`);
|
|
150
|
+
}
|
|
151
|
+
if (vo.voiceId != null) {
|
|
152
|
+
if (typeof vo.voiceId !== 'string' || !vo.voiceId.trim()) {
|
|
153
|
+
throw new Error(`cards[${cardIdx}].voiceover.voiceId must be non-empty string when present`);
|
|
154
|
+
}
|
|
155
|
+
if (vo.voiceId.length > 128) {
|
|
156
|
+
throw new Error(`cards[${cardIdx}].voiceover.voiceId too long (${vo.voiceId.length} > 128)`);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
if (vo.text != null) {
|
|
160
|
+
if (typeof vo.text !== 'string') {
|
|
161
|
+
throw new Error(`cards[${cardIdx}].voiceover.text must be string`);
|
|
162
|
+
}
|
|
163
|
+
if (vo.text.length > VOICEOVER_TEXT_MAX) {
|
|
164
|
+
throw new Error(`cards[${cardIdx}].voiceover.text too long (${vo.text.length} > ${VOICEOVER_TEXT_MAX})`);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
if (vo.rate != null) {
|
|
168
|
+
if (typeof vo.rate !== 'number' || !Number.isFinite(vo.rate) || vo.rate < 0.5 || vo.rate > 2.0) {
|
|
169
|
+
throw new Error(`cards[${cardIdx}].voiceover.rate must be number in [0.5, 2.0]`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
131
174
|
function validatePaperSlideDeck(deck) {
|
|
132
175
|
if (!deck || typeof deck !== 'object') throw new Error('deck missing');
|
|
133
176
|
for (const f of ['header', 'seriesTitle', 'seriesTagline']) {
|
|
@@ -235,6 +278,7 @@ function validatePaperSlideDeck(deck) {
|
|
|
235
278
|
throw new Error(`cards[${i}].voiceId too long (${card.voiceId.length} > 128)`);
|
|
236
279
|
}
|
|
237
280
|
}
|
|
281
|
+
validateVoiceoverShape(card.voiceover, i);
|
|
238
282
|
// Optional per-card image URL — photo-feature / atmospheric themes
|
|
239
283
|
// composite it as a full-bleed background; other themes ignore it.
|
|
240
284
|
// Shape-check only (string, length cap, http(s) prefix); reachability
|
|
@@ -446,6 +490,7 @@ function validatePaperSlideDeckV2(deck) {
|
|
|
446
490
|
if (stepsEls.length !== 1) throw new Error(`cards[${i}] list card must contain exactly one steps element (got ${stepsEls.length})`);
|
|
447
491
|
richCounts.list += 1;
|
|
448
492
|
}
|
|
493
|
+
validateVoiceoverShape(card.voiceover, i);
|
|
449
494
|
});
|
|
450
495
|
// Cap on rich-kind variety — at most 1 of each (same as V1 prompt rule)
|
|
451
496
|
for (const k of Object.keys(richCounts)) {
|
|
@@ -477,6 +522,7 @@ module.exports = {
|
|
|
477
522
|
validateQuotePayload,
|
|
478
523
|
validateDataPayload,
|
|
479
524
|
validateListPayload,
|
|
525
|
+
validateVoiceoverShape,
|
|
480
526
|
QUOTE_TEXT_MAX,
|
|
481
527
|
QUOTE_ATTRIBUTION_MAX,
|
|
482
528
|
DATA_VALUE_MAX,
|
|
@@ -485,4 +531,5 @@ module.exports = {
|
|
|
485
531
|
LIST_ITEM_MAX_LEN,
|
|
486
532
|
LIST_ITEM_MIN_COUNT,
|
|
487
533
|
LIST_ITEM_MAX_COUNT,
|
|
534
|
+
VOICEOVER_TEXT_MAX,
|
|
488
535
|
};
|
|
@@ -45,6 +45,8 @@ const {
|
|
|
45
45
|
THEME_TO_DECK_ID,
|
|
46
46
|
DEFAULT_THEME,
|
|
47
47
|
} = require('../commands/slice-render');
|
|
48
|
+
const { createTtsAuditionClient } = require('./tts-audition');
|
|
49
|
+
const { startVoiceoverServer, prepareVoiceovers } = require('./voiceover-mux');
|
|
48
50
|
|
|
49
51
|
// In-memory job table. We never persist jobs — a stage restart wipes history,
|
|
50
52
|
// which is fine because the produced mp4 lives on disk under the user's deck
|
|
@@ -158,10 +160,58 @@ function startLocalRender(opts) {
|
|
|
158
160
|
|
|
159
161
|
async function runRender({ job, deck, onProgress, onDone, onError }) {
|
|
160
162
|
const { jobId, outputPath, deckId } = job;
|
|
163
|
+
let voiceoverServer = null;
|
|
161
164
|
try {
|
|
162
165
|
job.state = 'preparing';
|
|
166
|
+
|
|
167
|
+
// ─── Voiceover prep (Phase 1) ──────────────────────────────────────
|
|
168
|
+
// Stage's Render button defaults to including audio — users in stage
|
|
169
|
+
// are iterating and expect a richer preview. The audition cache makes
|
|
170
|
+
// re-renders effectively free for cards they already previewed.
|
|
171
|
+
// Falls back to silent video on not_logged_in / quota_exceeded
|
|
172
|
+
// (recorded in job.voiceoverSkipped so the UI can surface the reason).
|
|
173
|
+
let voiceoverByIdx = {};
|
|
174
|
+
let voiceoverSkipped = [];
|
|
175
|
+
try {
|
|
176
|
+
const audClient = createTtsAuditionClient();
|
|
177
|
+
voiceoverServer = await startVoiceoverServer({ cacheDir: audClient.cacheDir });
|
|
178
|
+
const prep = await prepareVoiceovers({
|
|
179
|
+
deck,
|
|
180
|
+
auditionClient: audClient,
|
|
181
|
+
baseUrl: voiceoverServer.url,
|
|
182
|
+
onProgress: (p) => {
|
|
183
|
+
if (typeof onProgress === 'function') {
|
|
184
|
+
try {
|
|
185
|
+
onProgress({
|
|
186
|
+
jobId,
|
|
187
|
+
progress: 0,
|
|
188
|
+
framesRendered: 0,
|
|
189
|
+
framesTotal: 0,
|
|
190
|
+
phase: 'voiceover',
|
|
191
|
+
voiceoverIndex: p.cardIdx + 1,
|
|
192
|
+
voiceoverTotal: p.total,
|
|
193
|
+
voiceoverFromCache: p.fromCache,
|
|
194
|
+
});
|
|
195
|
+
} catch { /* swallow */ }
|
|
196
|
+
}
|
|
197
|
+
},
|
|
198
|
+
});
|
|
199
|
+
voiceoverByIdx = prep.byIdx;
|
|
200
|
+
voiceoverSkipped = prep.skipped;
|
|
201
|
+
} catch (err) {
|
|
202
|
+
// Voiceover prep failure is non-fatal — fall back to silent render
|
|
203
|
+
// so a TTS outage / first-run-without-login still produces an mp4.
|
|
204
|
+
voiceoverSkipped = [{ cardIdx: -1, reason: 'voiceover_prep_failed', message: err.message }];
|
|
205
|
+
if (voiceoverServer) {
|
|
206
|
+
try { await voiceoverServer.close(); } catch { /* */ }
|
|
207
|
+
voiceoverServer = null;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
job.voiceoverCount = Object.keys(voiceoverByIdx).length;
|
|
211
|
+
job.voiceoverSkipped = voiceoverSkipped;
|
|
212
|
+
|
|
163
213
|
const serveUrl = resolveServeUrl();
|
|
164
|
-
const inputProps = buildInputProps(deck);
|
|
214
|
+
const inputProps = buildInputProps(deck, { voiceoverByIdx });
|
|
165
215
|
|
|
166
216
|
const renderer = loadRenderer();
|
|
167
217
|
job.coldStart = !chromeBinaryExists();
|
|
@@ -234,6 +284,12 @@ async function runRender({ job, deck, onProgress, onDone, onError }) {
|
|
|
234
284
|
if (typeof onError === 'function') {
|
|
235
285
|
try { onError({ jobId, message: job.error }); } catch { /* swallow */ }
|
|
236
286
|
}
|
|
287
|
+
} finally {
|
|
288
|
+
// Always tear down the audio file server, including on render failure,
|
|
289
|
+
// so a stale localhost listener doesn't leak across jobs.
|
|
290
|
+
if (voiceoverServer) {
|
|
291
|
+
try { await voiceoverServer.close(); } catch { /* best-effort */ }
|
|
292
|
+
}
|
|
237
293
|
}
|
|
238
294
|
}
|
|
239
295
|
|
package/lib/stage-core/server.js
CHANGED
|
@@ -65,6 +65,7 @@ async function startStageServer(opts) {
|
|
|
65
65
|
cloudRender = null,
|
|
66
66
|
localRender = null,
|
|
67
67
|
deckSaver = null,
|
|
68
|
+
audition = null,
|
|
68
69
|
publishEvent = null,
|
|
69
70
|
tokenAvailable = false,
|
|
70
71
|
preferredPort = 5180,
|
|
@@ -243,8 +244,19 @@ async function startStageServer(opts) {
|
|
|
243
244
|
function statusForCode(code) {
|
|
244
245
|
if (code === 'not_logged_in') return 401;
|
|
245
246
|
if (code === 'quota_exceeded') return 402;
|
|
246
|
-
if (
|
|
247
|
-
|
|
247
|
+
if (
|
|
248
|
+
code === 'invalid_deck' ||
|
|
249
|
+
code === 'invalid_id' ||
|
|
250
|
+
code === 'invalid_card_index' ||
|
|
251
|
+
code === 'invalid_voice' ||
|
|
252
|
+
code === 'invalid_text'
|
|
253
|
+
) return 400;
|
|
254
|
+
if (
|
|
255
|
+
code === 'job_not_found' ||
|
|
256
|
+
code === 'no_deck' ||
|
|
257
|
+
code === 'card_not_found'
|
|
258
|
+
) return 404;
|
|
259
|
+
if (code === 'voiceover_disabled') return 409;
|
|
248
260
|
if (code === 'success') return 200;
|
|
249
261
|
return 502; // upstream error
|
|
250
262
|
}
|
|
@@ -403,6 +415,49 @@ async function startStageServer(opts) {
|
|
|
403
415
|
return;
|
|
404
416
|
}
|
|
405
417
|
|
|
418
|
+
// ─── TTS audition (per-card ▶) ──────────────────────────────────────────
|
|
419
|
+
// GET /api/audition?card=<int>[&voice=<id>]
|
|
420
|
+
// Resolves card.voiceover/voiceId/narration → calls /api/tts/synthesize
|
|
421
|
+
// via the audition bridge → streams audio bytes (default mp3). Content
|
|
422
|
+
// hash caches identical (voice, text, speed, format) so iteration is
|
|
423
|
+
// free after the first call. The page never sees the JWT.
|
|
424
|
+
if (audition && req.method === 'GET' && req.url.startsWith('/api/audition')) {
|
|
425
|
+
let parsed;
|
|
426
|
+
try { parsed = new URL(req.url, `http://127.0.0.1:${port}`); }
|
|
427
|
+
catch {
|
|
428
|
+
return sendJson(400, { code: 'bad_request', message: 'invalid /api/audition url' });
|
|
429
|
+
}
|
|
430
|
+
const cardIndexStr = parsed.searchParams.get('card');
|
|
431
|
+
const cardIndex = Number.parseInt(cardIndexStr, 10);
|
|
432
|
+
if (!Number.isInteger(cardIndex) || cardIndex < 0) {
|
|
433
|
+
return sendJson(400, {
|
|
434
|
+
code: 'invalid_card_index',
|
|
435
|
+
message: '?card= must be a non-negative integer',
|
|
436
|
+
});
|
|
437
|
+
}
|
|
438
|
+
const voiceOverride = parsed.searchParams.get('voice') || undefined;
|
|
439
|
+
(async () => {
|
|
440
|
+
let result;
|
|
441
|
+
try {
|
|
442
|
+
result = await audition.play({ cardIndex, voiceOverride });
|
|
443
|
+
} catch (err) {
|
|
444
|
+
return sendJson(502, { code: 'upstream_error', message: err.message });
|
|
445
|
+
}
|
|
446
|
+
if (result.code !== 'success') {
|
|
447
|
+
return sendJson(statusForCode(result.code), result);
|
|
448
|
+
}
|
|
449
|
+
res.writeHead(200, {
|
|
450
|
+
'Content-Type': result.contentType || 'audio/mpeg',
|
|
451
|
+
'Content-Length': result.buf.length,
|
|
452
|
+
'Cache-Control': 'no-store',
|
|
453
|
+
'X-Audition-Cache': result.fromCache ? 'HIT' : 'MISS',
|
|
454
|
+
'X-Audition-Key': result.cacheKey || '',
|
|
455
|
+
});
|
|
456
|
+
res.end(result.buf);
|
|
457
|
+
})();
|
|
458
|
+
return;
|
|
459
|
+
}
|
|
460
|
+
|
|
406
461
|
// ─── Inline deck save (Task B) ──────────────────────────────────────────
|
|
407
462
|
// POST /api/deck body: full deck JSON → validates + writes to disk.
|
|
408
463
|
// The file watcher picks up the write and broadcasts the deck event, so
|
|
Binary file
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Per-card voiceover audio prep for local Remotion render.
|
|
5
|
+
*
|
|
6
|
+
* Reuses the audition cache (~/.config/voxflow/stage-tts-cache/) so a card
|
|
7
|
+
* the user just listened to via stage's ▶ button doesn't get re-synthesized
|
|
8
|
+
* at render time. Spins up a tiny localhost HTTP server (auto-picked port)
|
|
9
|
+
* that serves audio files to the headless Chromium Remotion launches; the
|
|
10
|
+
* Remotion composition fetches voiceoverSrc URLs from this server while
|
|
11
|
+
* rendering. Tear the server down after renderMedia() resolves.
|
|
12
|
+
*
|
|
13
|
+
* const aud = createTtsAuditionClient();
|
|
14
|
+
* const server = await startVoiceoverServer({ cacheDir: aud.cacheDir });
|
|
15
|
+
* const { byIdx, skipped } = await prepareVoiceovers({
|
|
16
|
+
* deck, auditionClient: aud, baseUrl: server.url, onProgress,
|
|
17
|
+
* });
|
|
18
|
+
* // buildInputProps reads byIdx and threads URLs into card.slide.voiceoverSrc
|
|
19
|
+
* await renderMedia({ inputProps: buildInputProps(deck, { voiceoverByIdx: byIdx }), ... });
|
|
20
|
+
* await server.close();
|
|
21
|
+
*
|
|
22
|
+
* When auth is unavailable (no token in CLI cache), prepareVoiceovers
|
|
23
|
+
* returns an empty map quietly — the resulting mp4 is the Phase 0 silent
|
|
24
|
+
* video. Callers branch on the empty map to surface a hint to the user.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
const fs = require('fs');
|
|
28
|
+
const http = require('http');
|
|
29
|
+
const path = require('path');
|
|
30
|
+
|
|
31
|
+
const { contentTypeFor } = require('./tts-audition');
|
|
32
|
+
const { SYNTHESIZE_DEFAULTS } = require('../core/config');
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Tiny localhost HTTP server serving the audition cache directory.
|
|
36
|
+
* Only responds to GET /audio/<filename>; everything else is 404. Path
|
|
37
|
+
* traversal (.. or nested directories) is rejected up front since the
|
|
38
|
+
* cache layout is intentionally flat.
|
|
39
|
+
*
|
|
40
|
+
* @param {object} opts
|
|
41
|
+
* @param {string} opts.cacheDir Directory containing <hash>.mp3 files.
|
|
42
|
+
* @param {number} [opts.preferredPort=0] 0 lets the OS pick a free port.
|
|
43
|
+
* @returns {Promise<{server, port, url, close}>}
|
|
44
|
+
*/
|
|
45
|
+
async function startVoiceoverServer({ cacheDir, preferredPort = 0 }) {
|
|
46
|
+
if (typeof cacheDir !== 'string' || !cacheDir) {
|
|
47
|
+
throw new Error('startVoiceoverServer: cacheDir required');
|
|
48
|
+
}
|
|
49
|
+
const server = http.createServer((req, res) => {
|
|
50
|
+
if (req.method !== 'GET' || !req.url.startsWith('/audio/')) {
|
|
51
|
+
res.writeHead(404, { 'Content-Type': 'text/plain' });
|
|
52
|
+
res.end('not found');
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
const fname = req.url.slice('/audio/'.length).split('?')[0];
|
|
56
|
+
// Defense in depth — reject path traversal even on a localhost-only
|
|
57
|
+
// server. The audition cache is a flat dir of <sha256>.<ext> filenames.
|
|
58
|
+
if (fname === '' || fname.includes('/') || fname.includes('\\') || fname.includes('..')) {
|
|
59
|
+
res.writeHead(400, { 'Content-Type': 'text/plain' });
|
|
60
|
+
res.end('bad filename');
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
const filePath = path.join(cacheDir, fname);
|
|
64
|
+
fs.stat(filePath, (statErr, st) => {
|
|
65
|
+
if (statErr || !st.isFile()) {
|
|
66
|
+
res.writeHead(404, { 'Content-Type': 'text/plain' });
|
|
67
|
+
res.end('not found');
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
const ext = path.extname(fname).slice(1);
|
|
71
|
+
const ctype = contentTypeFor(ext);
|
|
72
|
+
res.writeHead(200, {
|
|
73
|
+
'Content-Type': ctype,
|
|
74
|
+
'Content-Length': st.size,
|
|
75
|
+
'Cache-Control': 'no-store',
|
|
76
|
+
});
|
|
77
|
+
fs.createReadStream(filePath).pipe(res);
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
await new Promise((resolve, reject) => {
|
|
81
|
+
server.once('error', reject);
|
|
82
|
+
server.listen(preferredPort, '127.0.0.1', () => {
|
|
83
|
+
server.removeListener('error', reject);
|
|
84
|
+
resolve();
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
const port = server.address().port;
|
|
88
|
+
return {
|
|
89
|
+
server,
|
|
90
|
+
port,
|
|
91
|
+
url: `http://127.0.0.1:${port}`,
|
|
92
|
+
async close() {
|
|
93
|
+
await new Promise((resolve) => server.close(() => resolve()));
|
|
94
|
+
},
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Resolve + synthesize (or cache-hit) one mp3 per card, return a map of
|
|
100
|
+
* { cardIdx: audio URL } that buildInputProps threads into voiceoverSrc.
|
|
101
|
+
*
|
|
102
|
+
* @param {object} opts
|
|
103
|
+
* @param {object} opts.deck Validator-shaped deck.
|
|
104
|
+
* @param {{audition: Function}} opts.auditionClient
|
|
105
|
+
* Same client stage's /api/audition uses. Shares the on-disk cache so a
|
|
106
|
+
* card the user previewed in the browser doesn't burn quota again at
|
|
107
|
+
* render time.
|
|
108
|
+
* @param {string} opts.baseUrl e.g. http://127.0.0.1:54321
|
|
109
|
+
* @param {(p:object) => void} [opts.onProgress]
|
|
110
|
+
* Called once per resolved card: { cardIdx, total, fromCache, voiceId, textLen }.
|
|
111
|
+
* Use this to print a one-line "voiceover N/M (cache hit)" log so the
|
|
112
|
+
* user knows TTS is happening before the renderer takes over.
|
|
113
|
+
* @returns {Promise<{ byIdx: Record<number,string>, skipped: Array<{cardIdx, reason, message?}> }>}
|
|
114
|
+
* skipped reasons: missing-card | voiceover-disabled | no-text |
|
|
115
|
+
* not_logged_in | quota_exceeded | tts_failed | network_error | invalid_voice
|
|
116
|
+
*/
|
|
117
|
+
async function prepareVoiceovers({ deck, auditionClient, baseUrl, onProgress }) {
|
|
118
|
+
const byIdx = {};
|
|
119
|
+
const skipped = [];
|
|
120
|
+
if (!deck || !Array.isArray(deck.cards)) return { byIdx, skipped };
|
|
121
|
+
if (!auditionClient || typeof auditionClient.audition !== 'function') {
|
|
122
|
+
throw new Error('prepareVoiceovers: auditionClient.audition is required');
|
|
123
|
+
}
|
|
124
|
+
if (typeof baseUrl !== 'string' || !baseUrl) {
|
|
125
|
+
throw new Error('prepareVoiceovers: baseUrl is required');
|
|
126
|
+
}
|
|
127
|
+
const cards = deck.cards;
|
|
128
|
+
|
|
129
|
+
for (let i = 0; i < cards.length; i++) {
|
|
130
|
+
const card = cards[i];
|
|
131
|
+
if (!card) { skipped.push({ cardIdx: i, reason: 'missing-card' }); continue; }
|
|
132
|
+
const vo = card.voiceover || {};
|
|
133
|
+
if (vo.enabled === false) {
|
|
134
|
+
skipped.push({ cardIdx: i, reason: 'voiceover-disabled' });
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
const text = (typeof vo.text === 'string' && vo.text.trim())
|
|
138
|
+
? vo.text
|
|
139
|
+
: card.narration;
|
|
140
|
+
if (typeof text !== 'string' || !text.trim()) {
|
|
141
|
+
skipped.push({ cardIdx: i, reason: 'no-text' });
|
|
142
|
+
continue;
|
|
143
|
+
}
|
|
144
|
+
const voiceId = vo.voiceId || card.voiceId || SYNTHESIZE_DEFAULTS.voice;
|
|
145
|
+
const speed = typeof vo.rate === 'number' ? vo.rate : 1.0;
|
|
146
|
+
const format = 'mp3';
|
|
147
|
+
|
|
148
|
+
let r;
|
|
149
|
+
try {
|
|
150
|
+
r = await auditionClient.audition({ voiceId, text, speed, format });
|
|
151
|
+
} catch (err) {
|
|
152
|
+
skipped.push({ cardIdx: i, reason: 'network_error', message: err.message || String(err) });
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
if (r.code !== 'success') {
|
|
156
|
+
skipped.push({ cardIdx: i, reason: r.code, message: r.message });
|
|
157
|
+
// not_logged_in / quota_exceeded → bail early so the user sees one
|
|
158
|
+
// clear message rather than N copies of the same root cause.
|
|
159
|
+
if (r.code === 'not_logged_in' || r.code === 'quota_exceeded') break;
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
const fname = `${r.cacheKey}.${format}`;
|
|
163
|
+
byIdx[i] = `${baseUrl.replace(/\/$/, '')}/audio/${fname}`;
|
|
164
|
+
if (typeof onProgress === 'function') {
|
|
165
|
+
try {
|
|
166
|
+
onProgress({
|
|
167
|
+
cardIdx: i,
|
|
168
|
+
total: cards.length,
|
|
169
|
+
fromCache: !!r.fromCache,
|
|
170
|
+
voiceId,
|
|
171
|
+
textLen: text.length,
|
|
172
|
+
});
|
|
173
|
+
} catch { /* swallow consumer errors */ }
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return { byIdx, skipped };
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
module.exports = {
|
|
181
|
+
startVoiceoverServer,
|
|
182
|
+
prepareVoiceovers,
|
|
183
|
+
};
|