voxflow 1.15.3 → 1.15.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -1
- package/lib/commands/slice-render.js +71 -7
- package/lib/commands/slice-stage.js +34 -0
- package/lib/internal/deck-validator.js +47 -0
- package/lib/stage-core/local-render.js +57 -1
- package/lib/stage-core/server.js +57 -2
- package/lib/stage-core/tts-audition.js +0 -0
- package/lib/stage-core/voiceover-mux.js +183 -0
- package/lib/stage-ui/slice/template.js +171 -0
- package/package.json +1 -1
- package/skills/voxflow-slice/SKILL.md +75 -2
|
@@ -547,6 +547,49 @@ function renderSliceStageHtml({ sourcePath, port }) {
|
|
|
547
547
|
.deck-toolbar button.copied {
|
|
548
548
|
color: var(--good); border-color: var(--good);
|
|
549
549
|
}
|
|
550
|
+
/* ─── Audition (▶) — voice picker + per-card play + status ────────────── */
|
|
551
|
+
.toolbar-divider {
|
|
552
|
+
width: 1px; align-self: stretch;
|
|
553
|
+
background: var(--border); margin: 2px 4px;
|
|
554
|
+
}
|
|
555
|
+
.voice-picker {
|
|
556
|
+
display: inline-flex; align-items: center; gap: 6px;
|
|
557
|
+
border: 1px solid var(--border); border-radius: 8px;
|
|
558
|
+
padding: 0 8px; font-size: 12px;
|
|
559
|
+
}
|
|
560
|
+
.voice-picker:focus-within { border-color: var(--accent); }
|
|
561
|
+
.voice-picker-icon { color: var(--muted); font-size: 12px; }
|
|
562
|
+
#voice-picker-input {
|
|
563
|
+
appearance: none; border: 0; background: transparent;
|
|
564
|
+
font: inherit; color: var(--text); font-size: 12px;
|
|
565
|
+
padding: 6px 0; width: 180px; outline: none;
|
|
566
|
+
}
|
|
567
|
+
.stage-card .card-actions button[data-action="audition"] .audition-icon {
|
|
568
|
+
display: inline-block; margin-right: 4px; font-size: 9px;
|
|
569
|
+
}
|
|
570
|
+
.stage-card .card-actions button[data-action="audition"].playing {
|
|
571
|
+
background: rgba(88,81,184,0.92); color: #fff;
|
|
572
|
+
}
|
|
573
|
+
.stage-card .card-actions button[data-action="audition"].loading {
|
|
574
|
+
background: rgba(0,0,0,0.45); color: #fff; cursor: progress;
|
|
575
|
+
}
|
|
576
|
+
.stage-card .card-actions button[data-action="audition"].error {
|
|
577
|
+
background: rgba(239,68,68,0.92); color: #fff;
|
|
578
|
+
}
|
|
579
|
+
.audition-status {
|
|
580
|
+
display: inline-flex; align-items: center;
|
|
581
|
+
font-size: 11px; color: var(--muted);
|
|
582
|
+
padding: 4px 10px; border-radius: 6px;
|
|
583
|
+
max-width: 320px;
|
|
584
|
+
overflow: hidden; text-overflow: ellipsis; white-space: nowrap;
|
|
585
|
+
}
|
|
586
|
+
.audition-status[data-state="loading"] { color: var(--accent); }
|
|
587
|
+
.audition-status[data-state="cache"] {
|
|
588
|
+
color: var(--good); background: rgba(34,197,94,0.08);
|
|
589
|
+
}
|
|
590
|
+
.audition-status[data-state="error"] {
|
|
591
|
+
color: #b91c1c; background: rgba(239,68,68,0.08);
|
|
592
|
+
}
|
|
550
593
|
|
|
551
594
|
.selection-fab {
|
|
552
595
|
position: fixed; z-index: 50;
|
|
@@ -765,7 +808,14 @@ function renderSliceStageHtml({ sourcePath, port }) {
|
|
|
765
808
|
<button id="copy-json-btn" type="button" disabled title="Copy raw deck.json to clipboard">Copy JSON</button>
|
|
766
809
|
<button id="download-json-btn" type="button" disabled title="Save deck.json to disk">Download .json</button>
|
|
767
810
|
<button id="copy-md-btn" type="button" disabled title="Copy as Markdown — paste into Notion / blog / 飞书">Copy as Markdown</button>
|
|
811
|
+
<span class="toolbar-divider" aria-hidden="true"></span>
|
|
812
|
+
<label class="voice-picker" title="Voice ID override (empty = let card.voiceover.voiceId or default win)">
|
|
813
|
+
<span class="voice-picker-icon" aria-hidden="true">♪</span>
|
|
814
|
+
<input id="voice-picker-input" type="text" placeholder="Voice (default)" spellcheck="false" autocomplete="off" aria-label="Voice override for audition" />
|
|
815
|
+
</label>
|
|
816
|
+
<span class="audition-status" id="audition-status" hidden aria-live="polite"></span>
|
|
768
817
|
</div>
|
|
818
|
+
<audio id="audition-audio" preload="none"></audio>
|
|
769
819
|
<div id="cards-pane" class="empty">Waiting for deck…</div>
|
|
770
820
|
</section>
|
|
771
821
|
<section>
|
|
@@ -1018,6 +1068,123 @@ function renderSliceStageHtml({ sourcePath, port }) {
|
|
|
1018
1068
|
copyTextToClipboard(formatCardAsText(card), btn);
|
|
1019
1069
|
});
|
|
1020
1070
|
|
|
1071
|
+
// ─── Per-card Audition (▶) — fetch /api/audition, play in <audio>. ────
|
|
1072
|
+
// Voice override comes from the toolbar voice-picker input; empty input
|
|
1073
|
+
// means let the server bridge resolve via the documented precedence
|
|
1074
|
+
// (card.voiceover.voiceId → card.voiceId → SYNTHESIZE_DEFAULTS.voice).
|
|
1075
|
+
// Status bar surfaces loading / cache HIT / upstream error so the user
|
|
1076
|
+
// can spot quota or auth issues without opening browser devtools.
|
|
1077
|
+
var auditionAudio = document.getElementById('audition-audio');
|
|
1078
|
+
var auditionStatus = document.getElementById('audition-status');
|
|
1079
|
+
var voicePickerInput = document.getElementById('voice-picker-input');
|
|
1080
|
+
var currentAuditionBtn = null;
|
|
1081
|
+
|
|
1082
|
+
function setAuditionStatus(state, message) {
|
|
1083
|
+
if (!auditionStatus) return;
|
|
1084
|
+
if (!state) {
|
|
1085
|
+
auditionStatus.hidden = true;
|
|
1086
|
+
auditionStatus.removeAttribute('data-state');
|
|
1087
|
+
auditionStatus.textContent = '';
|
|
1088
|
+
return;
|
|
1089
|
+
}
|
|
1090
|
+
auditionStatus.hidden = false;
|
|
1091
|
+
auditionStatus.dataset.state = state;
|
|
1092
|
+
auditionStatus.textContent = message || '';
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
function resetAuditionBtn(btn) {
|
|
1096
|
+
if (!btn) return;
|
|
1097
|
+
btn.classList.remove('playing', 'loading', 'error');
|
|
1098
|
+
var icon = btn.querySelector('.audition-icon');
|
|
1099
|
+
if (icon) icon.textContent = '▶';
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
cardsPane.addEventListener('click', function (ev) {
|
|
1103
|
+
var btn = ev.target.closest && ev.target.closest('[data-action="audition"]');
|
|
1104
|
+
if (!btn || !currentDeck) return;
|
|
1105
|
+
var idx = parseInt(btn.getAttribute('data-card-index'), 10);
|
|
1106
|
+
if (!Number.isFinite(idx)) return;
|
|
1107
|
+
|
|
1108
|
+
// Click on the currently playing button = stop + reset.
|
|
1109
|
+
if (currentAuditionBtn === btn && !auditionAudio.paused) {
|
|
1110
|
+
auditionAudio.pause();
|
|
1111
|
+
auditionAudio.currentTime = 0;
|
|
1112
|
+
resetAuditionBtn(btn);
|
|
1113
|
+
currentAuditionBtn = null;
|
|
1114
|
+
setAuditionStatus(null);
|
|
1115
|
+
return;
|
|
1116
|
+
}
|
|
1117
|
+
// Reset a previously playing button (if any) when starting a new one.
|
|
1118
|
+
if (currentAuditionBtn && currentAuditionBtn !== btn) {
|
|
1119
|
+
resetAuditionBtn(currentAuditionBtn);
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
currentAuditionBtn = btn;
|
|
1123
|
+
btn.classList.remove('error');
|
|
1124
|
+
btn.classList.add('loading');
|
|
1125
|
+
var icon = btn.querySelector('.audition-icon');
|
|
1126
|
+
if (icon) icon.textContent = '⟳';
|
|
1127
|
+
setAuditionStatus('loading', 'Synthesizing card ' + (idx + 1) + '…');
|
|
1128
|
+
|
|
1129
|
+
var voiceOverride = (voicePickerInput && voicePickerInput.value.trim()) || '';
|
|
1130
|
+
var url = '/api/audition?card=' + encodeURIComponent(idx)
|
|
1131
|
+
+ (voiceOverride ? '&voice=' + encodeURIComponent(voiceOverride) : '');
|
|
1132
|
+
|
|
1133
|
+
fetch(url, { method: 'GET', credentials: 'same-origin' })
|
|
1134
|
+
.then(function (res) {
|
|
1135
|
+
var cache = res.headers.get('X-Audition-Cache') || '';
|
|
1136
|
+
if (!res.ok) {
|
|
1137
|
+
return res.json().then(function (j) {
|
|
1138
|
+
throw new Error((j && j.message) || ('HTTP ' + res.status));
|
|
1139
|
+
}, function () {
|
|
1140
|
+
throw new Error('HTTP ' + res.status);
|
|
1141
|
+
});
|
|
1142
|
+
}
|
|
1143
|
+
return res.blob().then(function (blob) { return { blob: blob, cache: cache }; });
|
|
1144
|
+
})
|
|
1145
|
+
.then(function (out) {
|
|
1146
|
+
var objectUrl = URL.createObjectURL(out.blob);
|
|
1147
|
+
auditionAudio.src = objectUrl;
|
|
1148
|
+
return auditionAudio.play().then(function () {
|
|
1149
|
+
btn.classList.remove('loading');
|
|
1150
|
+
btn.classList.add('playing');
|
|
1151
|
+
if (icon) icon.textContent = '❚❚';
|
|
1152
|
+
setAuditionStatus(
|
|
1153
|
+
out.cache === 'HIT' ? 'cache' : 'loading',
|
|
1154
|
+
out.cache === 'HIT' ? 'Cache hit — no quota used.' : 'Playing card ' + (idx + 1) + '.'
|
|
1155
|
+
);
|
|
1156
|
+
});
|
|
1157
|
+
})
|
|
1158
|
+
.catch(function (err) {
|
|
1159
|
+
btn.classList.remove('loading', 'playing');
|
|
1160
|
+
btn.classList.add('error');
|
|
1161
|
+
if (icon) icon.textContent = '!';
|
|
1162
|
+
setAuditionStatus('error', String(err && err.message || err));
|
|
1163
|
+
setTimeout(function () {
|
|
1164
|
+
if (currentAuditionBtn === btn) currentAuditionBtn = null;
|
|
1165
|
+
resetAuditionBtn(btn);
|
|
1166
|
+
}, 3000);
|
|
1167
|
+
});
|
|
1168
|
+
});
|
|
1169
|
+
|
|
1170
|
+
auditionAudio.addEventListener('ended', function () {
|
|
1171
|
+
if (currentAuditionBtn) {
|
|
1172
|
+
resetAuditionBtn(currentAuditionBtn);
|
|
1173
|
+
currentAuditionBtn = null;
|
|
1174
|
+
}
|
|
1175
|
+
setAuditionStatus(null);
|
|
1176
|
+
});
|
|
1177
|
+
auditionAudio.addEventListener('error', function () {
|
|
1178
|
+
if (currentAuditionBtn) {
|
|
1179
|
+
currentAuditionBtn.classList.remove('loading', 'playing');
|
|
1180
|
+
currentAuditionBtn.classList.add('error');
|
|
1181
|
+
var ic = currentAuditionBtn.querySelector('.audition-icon');
|
|
1182
|
+
if (ic) ic.textContent = '!';
|
|
1183
|
+
currentAuditionBtn = null;
|
|
1184
|
+
}
|
|
1185
|
+
setAuditionStatus('error', 'Audio playback failed.');
|
|
1186
|
+
});
|
|
1187
|
+
|
|
1021
1188
|
// For highlighting cards that just changed on hot-reload, we keep a
|
|
1022
1189
|
// hash of each card's stringified JSON. On the next deck event we
|
|
1023
1190
|
// diff per-index and add the just-changed CSS class to whichever
|
|
@@ -1117,6 +1284,10 @@ function renderSliceStageHtml({ sourcePath, port }) {
|
|
|
1117
1284
|
+ '<div class="body">' + titleHtml + '</div>'
|
|
1118
1285
|
+ '<div class="accent-bar"></div>'
|
|
1119
1286
|
+ '<div class="card-actions">'
|
|
1287
|
+
+ '<button type="button" data-action="audition" data-card-index="' + i + '"'
|
|
1288
|
+
+ ' aria-label="Audition card ' + (i + 1) + ' voiceover" title="Play TTS preview — uses card.voiceover or card.narration, costs 100 quota first time then cached">'
|
|
1289
|
+
+ '<span class="audition-icon" aria-hidden="true">▶</span>Audition'
|
|
1290
|
+
+ '</button>'
|
|
1120
1291
|
+ '<button type="button" data-action="copy-card" data-card-index="' + i + '"'
|
|
1121
1292
|
+ ' aria-label="Copy card ' + (i + 1) + ' as text">Copy text</button>'
|
|
1122
1293
|
+ '</div>'
|
package/package.json
CHANGED
|
@@ -145,6 +145,31 @@ All cards require a non-empty `narration` string (TTS reads this; 30–60 zh cha
|
|
|
145
145
|
}
|
|
146
146
|
```
|
|
147
147
|
|
|
148
|
+
### Optional per-card `voiceover` override
|
|
149
|
+
|
|
150
|
+
Any card kind may carry a nested `voiceover` object to tune its audio
|
|
151
|
+
track. All four sub-fields are optional inside an optional object —
|
|
152
|
+
absent ⇒ the renderer uses the job-level default voice with
|
|
153
|
+
`card.narration` at 1× speed (back-compat with Phase 0 silent decks).
|
|
154
|
+
|
|
155
|
+
```jsonc
|
|
156
|
+
{
|
|
157
|
+
"kind": "body",
|
|
158
|
+
"caption": "短字幕",
|
|
159
|
+
"narration": "默认是 TTS 朗读的文本",
|
|
160
|
+
"voiceover": {
|
|
161
|
+
"enabled": true, // false → this card is silent in the mp4
|
|
162
|
+
"voiceId": "v-female-R2s4N9qJ", // overrides the job-level default voice
|
|
163
|
+
"text": "口播稿可以跟字幕不一样", // overrides narration for TTS only (visible caption unaffected)
|
|
164
|
+
"rate": 1.1 // [0.5, 2.0], default 1.0
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Validator caps: `voiceover.text` ≤ 500 chars; `rate ∈ [0.5, 2.0]`.
|
|
170
|
+
Render-time resolution precedence:
|
|
171
|
+
`voiceover.voiceId → card.voiceId → job-level default`.
|
|
172
|
+
|
|
148
173
|
> **Source of truth**: `backend/services/paper-slide/deck-validator.js` — caps live at lines 39–46 (QUOTE_TEXT_MAX, DATA_VALUE_MAX, LIST_ITEM_MAX_LEN, etc.). Read it if anything below seems ambiguous.
|
|
149
174
|
|
|
150
175
|
### Controlled `figureKeyword` list
|
|
@@ -232,12 +257,59 @@ After writing `deck.json`, tell the user:
|
|
|
232
257
|
```
|
|
233
258
|
Wrote deck.json (<N> cards, theme: <theme-id>). Next:
|
|
234
259
|
|
|
235
|
-
voxflow slice
|
|
236
|
-
voxflow slice
|
|
260
|
+
voxflow slice preview deck.json # browser preview + per-card audition + render button
|
|
261
|
+
voxflow slice render deck.json --output out.mp4 # one-shot mp4 from the terminal
|
|
237
262
|
```
|
|
238
263
|
|
|
239
264
|
Do not run either command yourself unless the user asks.
|
|
240
265
|
|
|
266
|
+
Both commands work fully offline for the visual side. **Audio (per-card
|
|
267
|
+
TTS audition + render audio track) requires `voxflow login`** — 100 quota
|
|
268
|
+
per unique `(voice, text)` clip, then cached at
|
|
269
|
+
`~/.config/voxflow/stage-tts-cache/`. With no login, both commands fall
|
|
270
|
+
back silently to a Phase-0-style silent video; pass `--no-audio` to
|
|
271
|
+
`render` to suppress the audio pass entirely.
|
|
272
|
+
|
|
273
|
+
## Multi-turn editing loop (Claude Code / Cursor / native `Edit`)
|
|
274
|
+
|
|
275
|
+
When the user is iterating — "shorten card 2", "swap order", "different
|
|
276
|
+
voice for card 3", "hear card 1 again" — they are **NOT** asking for a
|
|
277
|
+
regen. Stay in this loop:
|
|
278
|
+
|
|
279
|
+
```bash
|
|
280
|
+
# Run once at the start of the session; auto-opens http://127.0.0.1:5180.
|
|
281
|
+
# The page hot-reloads on every save of deck.json (~50 ms fs watcher).
|
|
282
|
+
voxflow slice preview deck.json &
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
Then for every follow-up:
|
|
286
|
+
|
|
287
|
+
| User intent | Your move | Cost |
|
|
288
|
+
|---|---|---|
|
|
289
|
+
| "card N is too long" | `Edit` `cards[N-1].caption` / `.narration` — page hot-reloads | 0 |
|
|
290
|
+
| "swap card 2 and 3" | `Edit` the `cards` array order | 0 |
|
|
291
|
+
| "different voice for card 3" | `Edit` `cards[2].voiceover.voiceId` — or tell user to paste a voiceId in the toolbar voice picker | 0 (Edit) |
|
|
292
|
+
| "make card 4 silent" | `Edit` `cards[3].voiceover = { "enabled": false }` | 0 |
|
|
293
|
+
| "口播说点不一样的" | `Edit` `cards[i].voiceover.text` so TTS reads override while caption stays | 0 |
|
|
294
|
+
| "I want to hear card 3" | Tell user to click ▶ on card 3 in the browser; the toolbar shows cache hit / quota cost / error | 100 (first time per clip), 0 (cached) |
|
|
295
|
+
| "render mp4" | Tell user: click **Render mp4 (local)** in the browser, OR `voxflow slice render deck.json` | TTS pass (cached if auditioned) + render |
|
|
296
|
+
|
|
297
|
+
### Loop rules
|
|
298
|
+
|
|
299
|
+
1. **Edit only the fields the user asked about.** Other cards must stay
|
|
300
|
+
byte-identical — the stage UI's diff highlight is the user's "what
|
|
301
|
+
changed" indicator. Touching extra fields breaks that signal.
|
|
302
|
+
2. **Never re-run `voxflow slice <article>` during iteration** — that
|
|
303
|
+
costs 200 quota AND overwrites every user edit with a fresh LLM draft.
|
|
304
|
+
3. **Re-validate after every save** by re-reading the file. If the user
|
|
305
|
+
says "page shows old content" or "red banner appeared", the JSON has a
|
|
306
|
+
syntax error (trailing comma, unbalanced quote) — open, fix, save.
|
|
307
|
+
4. **Don't restart the preview server.** One process handles the whole
|
|
308
|
+
session; restarting wipes snapshot history.
|
|
309
|
+
5. **Don't call `/api/audition` yourself.** It's user-driven via the ▶
|
|
310
|
+
button. Editing `cards[i].voiceover.voiceId` is enough — the next ▶
|
|
311
|
+
click picks up the new voice.
|
|
312
|
+
|
|
241
313
|
## Self-review checklist
|
|
242
314
|
|
|
243
315
|
Before declaring the slice done:
|
|
@@ -254,6 +326,7 @@ Before declaring the slice done:
|
|
|
254
326
|
- [ ] If the theme is `photo-feature` or `atmospheric` and the user provided per-card images, `imageUrl` starts with `https://`
|
|
255
327
|
- [ ] No outro card unless the user explicitly asked for one
|
|
256
328
|
- [ ] No React, TSX, or CSS files were created
|
|
329
|
+
- [ ] If any card has a `voiceover` object, every key inside it (`enabled` / `voiceId` / `text` / `rate`) matches the schema (boolean / non-empty string ≤128 / string ≤500 / number in [0.5, 2.0])
|
|
257
330
|
|
|
258
331
|
## Anti-patterns
|
|
259
332
|
|