@glissade/narrate 0.5.0-pre.1 → 0.5.0-pre.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +46 -7
- package/dist/index.js +109 -36
- package/dist/providers.d.ts +10 -0
- package/dist/providers.js +49 -11
- package/package.json +3 -3
package/dist/index.d.ts
CHANGED
|
@@ -12,6 +12,29 @@ interface NarrationSegment {
|
|
|
12
12
|
/** silence after THIS segment (s); overrides the script default */
|
|
13
13
|
gapAfter?: number;
|
|
14
14
|
}
|
|
15
|
+
/** What the music bed does across a pause window. */
|
|
16
|
+
type BedMode = /** hold the current (ducked) level across the pause — no swell, the default */
|
|
17
|
+
'hold'
|
|
18
|
+
/** cut the bed to a floor for the window (a dramatic silence) */ | 'silence'
|
|
19
|
+
/** let the bed breathe back up to base while the voice rests */ | 'swell';
|
|
20
|
+
/**
|
|
21
|
+
* An explicit silence beat between segments — an addressable WINDOW, not just
|
|
22
|
+
* dead air. It shifts every later segment's start (re-flows on re-narrate) and
|
|
23
|
+
* gives you anchors (`beats.start/end/duration('id')`) to hang visuals and SFX
|
|
24
|
+
* on, plus a per-pause `bed` mode for the music. A pause supplies its own
|
|
25
|
+
* silence, so it suppresses the default inter-segment gap around it.
|
|
26
|
+
*/
|
|
27
|
+
interface NarrationPause {
|
|
28
|
+
id: string;
|
|
29
|
+
/** the silence length in seconds */
|
|
30
|
+
pause: number;
|
|
31
|
+
/** what the music bed does across this window; default 'hold' */
|
|
32
|
+
bed?: BedMode;
|
|
33
|
+
}
|
|
34
|
+
/** A script element: a spoken segment or an explicit pause beat. */
|
|
35
|
+
type NarrationElement = NarrationSegment | NarrationPause;
|
|
36
|
+
/** A pause element is the one carrying a numeric `pause` field. */
|
|
37
|
+
declare function isPause(el: NarrationElement): el is NarrationPause;
|
|
15
38
|
interface NarrationScript {
|
|
16
39
|
narrationVersion: 1;
|
|
17
40
|
provider?: string;
|
|
@@ -28,7 +51,8 @@ interface NarrationScript {
|
|
|
28
51
|
* segments word-less. Providers that supply their own words ignore this.
|
|
29
52
|
*/
|
|
30
53
|
align?: string;
|
|
31
|
-
segments
|
|
54
|
+
/** spoken segments and explicit pause beats, in playback order */
|
|
55
|
+
segments: NarrationElement[];
|
|
32
56
|
}
|
|
33
57
|
interface TimedWord {
|
|
34
58
|
word: string;
|
|
@@ -46,24 +70,35 @@ interface TimedSegment {
|
|
|
46
70
|
/** present only when the provider supplies word timestamps */
|
|
47
71
|
words?: TimedWord[];
|
|
48
72
|
}
|
|
73
|
+
/** A resolved pause window in the committed manifest. */
|
|
74
|
+
interface TimedPause {
|
|
75
|
+
id: string;
|
|
76
|
+
start: number;
|
|
77
|
+
duration: number;
|
|
78
|
+
bed: BedMode;
|
|
79
|
+
}
|
|
49
80
|
interface NarrationTiming {
|
|
50
81
|
timingVersion: 1;
|
|
51
82
|
provider: string;
|
|
52
83
|
providerVersion: string;
|
|
53
84
|
totalDuration: number;
|
|
54
85
|
segments: TimedSegment[];
|
|
86
|
+
/** explicit pause windows, addressable like segments; omitted when none */
|
|
87
|
+
pauses?: TimedPause[];
|
|
55
88
|
}
|
|
56
89
|
declare class NarrationError extends Error {
|
|
57
90
|
constructor(message: string);
|
|
58
91
|
}
|
|
59
92
|
interface NarrationAnchors {
|
|
60
|
-
/** segment start, absolute timeline seconds */
|
|
93
|
+
/** segment OR pause start, absolute timeline seconds */
|
|
61
94
|
start(id: string): number;
|
|
62
|
-
/** segment end (start + duration) */
|
|
95
|
+
/** segment OR pause end (start + duration) */
|
|
63
96
|
end(id: string): number;
|
|
64
97
|
duration(id: string): number;
|
|
98
|
+
/** start + offset — a sub-beat inside a segment or pause window */
|
|
99
|
+
at(id: string, offset?: number): number;
|
|
65
100
|
readonly totalDuration: number;
|
|
66
|
-
/** '<id>.start' / '<id>.end' labels — merge into the timeline for studio visibility */
|
|
101
|
+
/** '<id>.start' / '<id>.end' labels (segments + pauses) — merge into the timeline for studio visibility */
|
|
67
102
|
labels(): Record<string, number>;
|
|
68
103
|
/** narration clips on the existing AudioClip machinery; baseUrl prefixes each file */
|
|
69
104
|
clips(baseUrl: string): AudioClip[];
|
|
@@ -115,11 +150,15 @@ interface DuckOptions {
|
|
|
115
150
|
mergeGap?: number;
|
|
116
151
|
/** the music clip's `at` on the timeline; gain keys are CLIP-local. Default 0. */
|
|
117
152
|
clipAt?: number;
|
|
153
|
+
/** gain a 'silence' pause ducks the bed to; default 0 (a true cut). */
|
|
154
|
+
silence?: number;
|
|
118
155
|
}
|
|
119
156
|
/**
|
|
120
157
|
* The bed-ducking envelope every narrated video needs: duck windows are the
|
|
121
|
-
* narration segments, with attack/release ramps and near-window merging.
|
|
122
|
-
*
|
|
158
|
+
* narration segments, with attack/release ramps and near-window merging. Pause
|
|
159
|
+
* beats join in by their `bed` mode — `hold` (default) keeps the bed ducked
|
|
160
|
+
* across the pause, `silence` cuts it to a floor, `swell` lets it breathe back
|
|
161
|
+
* to base. Pure function of the committed manifest — re-narrate and the ducking
|
|
123
162
|
* re-flows. Returns a keys-only gain envelope for AudioClip.gain.
|
|
124
163
|
*/
|
|
125
164
|
declare function duckEnvelope(timing: NarrationTiming, opts?: DuckOptions): {
|
|
@@ -184,4 +223,4 @@ declare function music(timing: MusicTiming, at?: number): MusicAnchors;
|
|
|
184
223
|
declare function toSrt(timing: NarrationTiming): string;
|
|
185
224
|
declare function toVtt(timing: NarrationTiming): string;
|
|
186
225
|
//#endregion
|
|
187
|
-
export { CaptionStyle, CaptionTrackOptions, DuckOptions, MusicAnchors, MusicClipOptions, MusicTiming, NarrationAnchors, NarrationError, NarrationScript, NarrationSegment, NarrationTiming, TimedSegment, TimedWord, captionNode, captionTrack, duckEnvelope, music, narration, toSrt, toVtt, validateMusicTiming };
|
|
226
|
+
export { BedMode, CaptionStyle, CaptionTrackOptions, DuckOptions, MusicAnchors, MusicClipOptions, MusicTiming, NarrationAnchors, NarrationElement, NarrationError, NarrationPause, NarrationScript, NarrationSegment, NarrationTiming, TimedPause, TimedSegment, TimedWord, captionNode, captionTrack, duckEnvelope, isPause, music, narration, toSrt, toVtt, validateMusicTiming };
|
package/dist/index.js
CHANGED
|
@@ -8,6 +8,10 @@ import { Text, glow } from "@glissade/scene";
|
|
|
8
8
|
* offline and deterministic. Captions are a plain string track driving a
|
|
9
9
|
* Text node — they live in the timeline JSON and golden-frame CI covers them.
|
|
10
10
|
*/
|
|
11
|
+
/** A pause element is the one carrying a numeric `pause` field. */
|
|
12
|
+
function isPause(el) {
|
|
13
|
+
return typeof el.pause === "number";
|
|
14
|
+
}
|
|
11
15
|
var NarrationError = class extends Error {
|
|
12
16
|
constructor(message) {
|
|
13
17
|
super(message);
|
|
@@ -15,22 +19,37 @@ var NarrationError = class extends Error {
|
|
|
15
19
|
}
|
|
16
20
|
};
|
|
17
21
|
function narration(timing) {
|
|
18
|
-
const byId = new Map(
|
|
19
|
-
const
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
22
|
+
const byId = /* @__PURE__ */ new Map();
|
|
23
|
+
for (const s of timing.segments) {
|
|
24
|
+
if (byId.has(s.id)) throw new NarrationError(`duplicate narration id '${s.id}'`);
|
|
25
|
+
byId.set(s.id, {
|
|
26
|
+
start: s.start,
|
|
27
|
+
duration: s.duration
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
for (const p of timing.pauses ?? []) {
|
|
31
|
+
if (byId.has(p.id)) throw new NarrationError(`duplicate narration id '${p.id}' (segment and pause collide)`);
|
|
32
|
+
byId.set(p.id, {
|
|
33
|
+
start: p.start,
|
|
34
|
+
duration: p.duration
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
const beat = (id) => {
|
|
38
|
+
const b = byId.get(id);
|
|
39
|
+
if (!b) throw new NarrationError(`no narration beat '${id}' (have: ${[...byId.keys()].join(", ")})`);
|
|
40
|
+
return b;
|
|
23
41
|
};
|
|
24
42
|
return {
|
|
25
|
-
start: (id) =>
|
|
26
|
-
end: (id) =>
|
|
27
|
-
duration: (id) =>
|
|
43
|
+
start: (id) => beat(id).start,
|
|
44
|
+
end: (id) => beat(id).start + beat(id).duration,
|
|
45
|
+
duration: (id) => beat(id).duration,
|
|
46
|
+
at: (id, offset = 0) => beat(id).start + offset,
|
|
28
47
|
totalDuration: timing.totalDuration,
|
|
29
48
|
labels: () => {
|
|
30
49
|
const out = {};
|
|
31
|
-
for (const
|
|
32
|
-
out[`${
|
|
33
|
-
out[`${
|
|
50
|
+
for (const [id, b] of byId) {
|
|
51
|
+
out[`${id}.start`] = b.start;
|
|
52
|
+
out[`${id}.end`] = b.start + b.duration;
|
|
34
53
|
}
|
|
35
54
|
return out;
|
|
36
55
|
},
|
|
@@ -89,8 +108,10 @@ function captionNode(size, style = {}) {
|
|
|
89
108
|
}
|
|
90
109
|
/**
|
|
91
110
|
* The bed-ducking envelope every narrated video needs: duck windows are the
|
|
92
|
-
* narration segments, with attack/release ramps and near-window merging.
|
|
93
|
-
*
|
|
111
|
+
* narration segments, with attack/release ramps and near-window merging. Pause
|
|
112
|
+
* beats join in by their `bed` mode — `hold` (default) keeps the bed ducked
|
|
113
|
+
* across the pause, `silence` cuts it to a floor, `swell` lets it breathe back
|
|
114
|
+
* to base. Pure function of the committed manifest — re-narrate and the ducking
|
|
94
115
|
* re-flows. Returns a keys-only gain envelope for AudioClip.gain.
|
|
95
116
|
*/
|
|
96
117
|
function duckEnvelope(timing, opts = {}) {
|
|
@@ -100,31 +121,83 @@ function duckEnvelope(timing, opts = {}) {
|
|
|
100
121
|
const release = opts.release ?? .4;
|
|
101
122
|
const mergeGap = opts.mergeGap ?? .5;
|
|
102
123
|
const clipAt = opts.clipAt ?? 0;
|
|
103
|
-
const
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
124
|
+
const silence = opts.silence ?? 0;
|
|
125
|
+
const levelOf = (bed) => bed === "silence" ? silence : bed === "swell" ? base : duck;
|
|
126
|
+
const raw = [...timing.segments.map((s) => ({
|
|
127
|
+
start: s.start,
|
|
128
|
+
end: s.start + s.duration,
|
|
129
|
+
level: duck
|
|
130
|
+
})), ...(timing.pauses ?? []).map((p) => ({
|
|
131
|
+
start: p.start,
|
|
132
|
+
end: p.start + p.duration,
|
|
133
|
+
level: levelOf(p.bed)
|
|
134
|
+
}))].sort((a, b) => a.start - b.start);
|
|
135
|
+
const merged = [];
|
|
136
|
+
for (const w of raw) {
|
|
137
|
+
const last = merged[merged.length - 1];
|
|
138
|
+
if (last && last.level === w.level && w.start - last.end < attack + release + mergeGap) last.end = Math.max(last.end, w.end);
|
|
139
|
+
else merged.push({ ...w });
|
|
140
|
+
}
|
|
141
|
+
const active = merged.filter((w) => w.level !== base);
|
|
142
|
+
if (active.length === 0) return { keys: [key(0, base)] };
|
|
143
|
+
const regions = [];
|
|
144
|
+
for (const w of active) {
|
|
145
|
+
const prev = regions[regions.length - 1];
|
|
146
|
+
if (prev && w.start > prev.end) regions.push({
|
|
147
|
+
start: prev.end,
|
|
148
|
+
end: w.start,
|
|
149
|
+
level: base
|
|
110
150
|
});
|
|
151
|
+
regions.push({ ...w });
|
|
152
|
+
}
|
|
153
|
+
const transitions = [{
|
|
154
|
+
t: regions[0].start,
|
|
155
|
+
from: base,
|
|
156
|
+
to: regions[0].level
|
|
157
|
+
}];
|
|
158
|
+
for (let i = 0; i < regions.length - 1; i++) if (regions[i].level !== regions[i + 1].level) transitions.push({
|
|
159
|
+
t: regions[i].end,
|
|
160
|
+
from: regions[i].level,
|
|
161
|
+
to: regions[i + 1].level
|
|
162
|
+
});
|
|
163
|
+
const lastRegion = regions[regions.length - 1];
|
|
164
|
+
transitions.push({
|
|
165
|
+
t: lastRegion.end,
|
|
166
|
+
from: lastRegion.level,
|
|
167
|
+
to: base
|
|
168
|
+
});
|
|
169
|
+
let keys = [];
|
|
170
|
+
for (const tr of transitions) {
|
|
171
|
+
if (tr.to === tr.from) continue;
|
|
172
|
+
if (tr.to < tr.from) keys.push(key(tr.t - attack, tr.from), key(tr.t, tr.to));
|
|
173
|
+
else keys.push(key(tr.t, tr.from), key(tr.t + release, tr.to));
|
|
111
174
|
}
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
if (
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
175
|
+
keys = keys.map((k) => ({
|
|
176
|
+
t: k.t - clipAt,
|
|
177
|
+
value: k.value
|
|
178
|
+
})).sort((a, b) => a.t - b.t);
|
|
179
|
+
const ordered = [];
|
|
180
|
+
for (const k of keys) {
|
|
181
|
+
const prev = ordered[ordered.length - 1];
|
|
182
|
+
if (prev && k.t <= prev.t) prev.value = k.value;
|
|
183
|
+
else ordered.push(k);
|
|
184
|
+
}
|
|
185
|
+
const out = [];
|
|
186
|
+
for (let i = 0; i < ordered.length; i++) {
|
|
187
|
+
const k = ordered[i];
|
|
188
|
+
if (k.t < 0) {
|
|
189
|
+
const next = ordered[i + 1];
|
|
190
|
+
if (!next || next.t >= 0) {
|
|
191
|
+
const v = next && next.t > k.t ? k.value + (next.value - k.value) * ((0 - k.t) / (next.t - k.t)) : k.value;
|
|
192
|
+
out.push(key(0, v));
|
|
193
|
+
}
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
out.push(k);
|
|
124
197
|
}
|
|
125
|
-
if (
|
|
126
|
-
if (
|
|
127
|
-
return { keys };
|
|
198
|
+
if (out.length === 0) out.push(key(0, base));
|
|
199
|
+
if (out[0].t > 0) out.unshift(key(0, base));
|
|
200
|
+
return { keys: out };
|
|
128
201
|
}
|
|
129
202
|
function validateMusicTiming(timing) {
|
|
130
203
|
if (timing.musicVersion !== 1) throw new NarrationError(`unsupported musicVersion ${String(timing.musicVersion)}`);
|
|
@@ -191,4 +264,4 @@ function toVtt(timing) {
|
|
|
191
264
|
return "WEBVTT\n\n" + timing.segments.map((s) => `${srtTime(s.start, ".")} --> ${srtTime(s.start + s.duration, ".")}\n${s.text}`).join("\n\n") + "\n";
|
|
192
265
|
}
|
|
193
266
|
//#endregion
|
|
194
|
-
export { NarrationError, captionNode, captionTrack, duckEnvelope, music, narration, toSrt, toVtt, validateMusicTiming };
|
|
267
|
+
export { NarrationError, captionNode, captionTrack, duckEnvelope, isPause, music, narration, toSrt, toVtt, validateMusicTiming };
|
package/dist/providers.d.ts
CHANGED
|
@@ -41,9 +41,19 @@ declare function openaiProvider(opts?: {
|
|
|
41
41
|
* offline. Needs a voice MODEL (`.onnx` + sibling `.onnx.json`) — pass its
|
|
42
42
|
* path as `model`, or per-segment as `voice`. Emits no word timestamps; the
|
|
43
43
|
* alignment step (below) fills them in.
|
|
44
|
+
*
|
|
45
|
+
* DETERMINISTIC by default: VITS adds noise (generator + the stochastic
|
|
46
|
+
* duration predictor), so the same text re-synthesizes to slightly different
|
|
47
|
+
* audio/durations. glissade zeroes both noise scales so re-synth is
|
|
48
|
+
* byte-identical — reproducible pipelines, glissade's determinism contract.
|
|
49
|
+
* For piper's more-natural (but drifting) prosody, pass its defaults
|
|
50
|
+
* (`{ noiseScale: 0.667, noiseWScale: 0.8 }`) and wire via `providerImpl`.
|
|
51
|
+
* The noise mode is part of `version()`, so changing it invalidates the cache.
|
|
44
52
|
*/
|
|
45
53
|
declare function piperProvider(opts?: {
|
|
46
54
|
model?: string;
|
|
55
|
+
noiseScale?: number;
|
|
56
|
+
noiseWScale?: number;
|
|
47
57
|
}): TtsProvider;
|
|
48
58
|
declare function providerById(id: string): TtsProvider;
|
|
49
59
|
interface AlignRequest {
|
package/dist/providers.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { NarrationError } from "./index.js";
|
|
1
|
+
import { NarrationError, isPause } from "./index.js";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
|
|
4
4
|
import { basename, dirname, join } from "node:path";
|
|
@@ -138,8 +138,18 @@ function openaiProvider(opts = {}) {
|
|
|
138
138
|
* offline. Needs a voice MODEL (`.onnx` + sibling `.onnx.json`) — pass its
|
|
139
139
|
* path as `model`, or per-segment as `voice`. Emits no word timestamps; the
|
|
140
140
|
* alignment step (below) fills them in.
|
|
141
|
+
*
|
|
142
|
+
* DETERMINISTIC by default: VITS adds noise (generator + the stochastic
|
|
143
|
+
* duration predictor), so the same text re-synthesizes to slightly different
|
|
144
|
+
* audio/durations. glissade zeroes both noise scales so re-synth is
|
|
145
|
+
* byte-identical — reproducible pipelines, glissade's determinism contract.
|
|
146
|
+
* For piper's more-natural (but drifting) prosody, pass its defaults
|
|
147
|
+
* (`{ noiseScale: 0.667, noiseWScale: 0.8 }`) and wire via `providerImpl`.
|
|
148
|
+
* The noise mode is part of `version()`, so changing it invalidates the cache.
|
|
141
149
|
*/
|
|
142
150
|
function piperProvider(opts = {}) {
|
|
151
|
+
const noiseScale = opts.noiseScale ?? 0;
|
|
152
|
+
const noiseWScale = opts.noiseWScale ?? 0;
|
|
143
153
|
return {
|
|
144
154
|
id: "piper",
|
|
145
155
|
version: () => {
|
|
@@ -149,8 +159,13 @@ function piperProvider(opts = {}) {
|
|
|
149
159
|
throw new NarrationError(`could not run piper: ${r.error.message}`);
|
|
150
160
|
}
|
|
151
161
|
const m = /\b\d+\.\d+\.\d+\b/.exec(r.stdout ?? "");
|
|
152
|
-
const
|
|
153
|
-
|
|
162
|
+
const noise = `noise=${noiseScale}/${noiseWScale}`;
|
|
163
|
+
const v = m ? `piper ${m[0]}` : "piper";
|
|
164
|
+
return Promise.resolve([
|
|
165
|
+
v,
|
|
166
|
+
noise,
|
|
167
|
+
opts.model ? basename(opts.model) : null
|
|
168
|
+
].filter(Boolean).join(" "));
|
|
154
169
|
},
|
|
155
170
|
synthesize: (req) => {
|
|
156
171
|
const model = req.voice ?? opts.model;
|
|
@@ -161,7 +176,11 @@ function piperProvider(opts = {}) {
|
|
|
161
176
|
"--model",
|
|
162
177
|
model,
|
|
163
178
|
"--output_file",
|
|
164
|
-
out
|
|
179
|
+
out,
|
|
180
|
+
"--noise-scale",
|
|
181
|
+
String(noiseScale),
|
|
182
|
+
"--noise-w-scale",
|
|
183
|
+
String(noiseWScale)
|
|
165
184
|
];
|
|
166
185
|
if (req.rate !== void 0 && req.rate > 0) args.push("--length_scale", String(1 / req.rate));
|
|
167
186
|
const r = spawnSync("piper", args, {
|
|
@@ -379,9 +398,10 @@ async function synthesizeScript(scriptPath, opts = {}) {
|
|
|
379
398
|
const raw = JSON.parse(readFileSync(scriptPath, "utf8"));
|
|
380
399
|
if (raw.narrationVersion !== 1) throw new NarrationError(`unsupported narrationVersion ${String(raw.narrationVersion)}`);
|
|
381
400
|
const ids = /* @__PURE__ */ new Set();
|
|
382
|
-
for (const
|
|
383
|
-
if (ids.has(
|
|
384
|
-
ids.add(
|
|
401
|
+
for (const el of raw.segments) {
|
|
402
|
+
if (ids.has(el.id)) throw new NarrationError(`duplicate narration id '${el.id}'`);
|
|
403
|
+
ids.add(el.id);
|
|
404
|
+
if (isPause(el) && !(el.pause > 0)) throw new NarrationError(`pause '${el.id}' needs pause > 0`);
|
|
385
405
|
}
|
|
386
406
|
const provider = opts.providerImpl ?? providerById(opts.provider ?? raw.provider ?? "espeak");
|
|
387
407
|
const providerVersion = await provider.version();
|
|
@@ -404,8 +424,22 @@ async function synthesizeScript(scriptPath, opts = {}) {
|
|
|
404
424
|
const reused = [];
|
|
405
425
|
const aligned = [];
|
|
406
426
|
const segments = [];
|
|
427
|
+
const pauses = [];
|
|
407
428
|
let cursor = raw.leadIn ?? 0;
|
|
408
|
-
|
|
429
|
+
const elements = raw.segments;
|
|
430
|
+
for (let i = 0; i < elements.length; i++) {
|
|
431
|
+
const el = elements[i];
|
|
432
|
+
if (isPause(el)) {
|
|
433
|
+
pauses.push({
|
|
434
|
+
id: el.id,
|
|
435
|
+
start: cursor,
|
|
436
|
+
duration: el.pause,
|
|
437
|
+
bed: el.bed ?? "hold"
|
|
438
|
+
});
|
|
439
|
+
cursor += el.pause;
|
|
440
|
+
continue;
|
|
441
|
+
}
|
|
442
|
+
const seg = el;
|
|
409
443
|
const req = { text: seg.text };
|
|
410
444
|
const voice = seg.voice ?? raw.voice;
|
|
411
445
|
const rate = seg.rate ?? raw.rate;
|
|
@@ -463,16 +497,20 @@ async function synthesizeScript(scriptPath, opts = {}) {
|
|
|
463
497
|
end: cursor + w.end
|
|
464
498
|
}));
|
|
465
499
|
segments.push(timed);
|
|
466
|
-
cursor += duration
|
|
500
|
+
cursor += duration;
|
|
501
|
+
const next = elements[i + 1];
|
|
502
|
+
if (next && !isPause(next)) cursor += seg.gapAfter ?? raw.gap ?? .35;
|
|
467
503
|
}
|
|
468
504
|
cache.entries = Object.fromEntries(Object.entries(cache.entries).sort(([a], [b]) => a.localeCompare(b)));
|
|
469
505
|
writeFileSync(cachePath, JSON.stringify(cache, null, 2) + "\n");
|
|
506
|
+
const ends = [...segments.map((s) => s.start + s.duration), ...pauses.map((p) => p.start + p.duration)];
|
|
470
507
|
const timing = {
|
|
471
508
|
timingVersion: 1,
|
|
472
509
|
provider: provider.id,
|
|
473
510
|
providerVersion,
|
|
474
|
-
totalDuration:
|
|
475
|
-
segments
|
|
511
|
+
totalDuration: ends.length > 0 ? Math.max(...ends) : 0,
|
|
512
|
+
segments,
|
|
513
|
+
...pauses.length > 0 ? { pauses } : {}
|
|
476
514
|
};
|
|
477
515
|
const timingPath = `${base}.narration.timing.json`;
|
|
478
516
|
writeFileSync(timingPath, JSON.stringify(timing, null, 2) + "\n");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@glissade/narrate",
|
|
3
|
-
"version": "0.5.0-pre.
|
|
3
|
+
"version": "0.5.0-pre.2",
|
|
4
4
|
"description": "glissade narration + captions: TTS at prepare time (gs narrate), deterministic caching, narration-anchored timeline beats, and captions as plain tracks. Render stays offline.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"type": "module",
|
|
@@ -19,8 +19,8 @@
|
|
|
19
19
|
"dist"
|
|
20
20
|
],
|
|
21
21
|
"dependencies": {
|
|
22
|
-
"@glissade/core": "0.5.0-pre.
|
|
23
|
-
"@glissade/scene": "0.5.0-pre.
|
|
22
|
+
"@glissade/core": "0.5.0-pre.2",
|
|
23
|
+
"@glissade/scene": "0.5.0-pre.2"
|
|
24
24
|
},
|
|
25
25
|
"repository": {
|
|
26
26
|
"type": "git",
|