@glissade/narrate 0.5.0 → 0.6.0-pre.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +28 -1
- package/dist/index.js +70 -12
- package/dist/providers.js +2 -1
- package/package.json +3 -3
package/dist/index.d.ts
CHANGED
|
@@ -51,6 +51,15 @@ interface NarrationScript {
|
|
|
51
51
|
* segments word-less. Providers that supply their own words ignore this.
|
|
52
52
|
*/
|
|
53
53
|
align?: string;
|
|
54
|
+
/**
|
|
55
|
+
* Split long caption segments into timed sub-cues at ~`maxChars` (on word
|
|
56
|
+
* boundaries, using per-word timings when present). Persisted into the timing
|
|
57
|
+
* manifest so the burned track and the .srt/.vtt sidecars split identically.
|
|
58
|
+
* Omit for no split (the default).
|
|
59
|
+
*/
|
|
60
|
+
captionSplit?: {
|
|
61
|
+
maxChars: number;
|
|
62
|
+
};
|
|
54
63
|
/** spoken segments and explicit pause beats, in playback order */
|
|
55
64
|
segments: NarrationElement[];
|
|
56
65
|
}
|
|
@@ -85,6 +94,10 @@ interface NarrationTiming {
|
|
|
85
94
|
segments: TimedSegment[];
|
|
86
95
|
/** explicit pause windows, addressable like segments; omitted when none */
|
|
87
96
|
pauses?: TimedPause[];
|
|
97
|
+
/** caption split budget, committed so burned + sidecar split identically */
|
|
98
|
+
captionSplit?: {
|
|
99
|
+
maxChars: number;
|
|
100
|
+
};
|
|
88
101
|
}
|
|
89
102
|
declare class NarrationError extends Error {
|
|
90
103
|
constructor(message: string);
|
|
@@ -118,6 +131,20 @@ interface CaptionTrackOptions {
|
|
|
118
131
|
/** v1 granularity is per segment; 'word' is reserved (karaoke highlight, later) */
|
|
119
132
|
granularity?: 'segment';
|
|
120
133
|
}
|
|
134
|
+
/** One caption cue within a segment's window. */
|
|
135
|
+
interface CaptionCue {
|
|
136
|
+
text: string;
|
|
137
|
+
start: number;
|
|
138
|
+
end: number;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Split a segment's caption into timed sub-cues at ~`maxChars` (word-boundary).
|
|
142
|
+
* With per-word timings, each sub-cue is timed from its first word; without
|
|
143
|
+
* them, the segment window is divided evenly. No budget (or a short segment)
|
|
144
|
+
* yields a single cue — so the default is byte-identical. The SAME function
|
|
145
|
+
* drives the burned track AND the .srt/.vtt sidecars, so they match.
|
|
146
|
+
*/
|
|
147
|
+
declare function splitCaption(segment: TimedSegment, maxChars?: number): CaptionCue[];
|
|
121
148
|
declare function captionTrack(timing: NarrationTiming, opts?: CaptionTrackOptions): Track<string>;
|
|
122
149
|
interface CaptionStyle {
|
|
123
150
|
fontFamily?: string;
|
|
@@ -239,4 +266,4 @@ declare function music(timing: MusicTiming, at?: number): MusicAnchors;
|
|
|
239
266
|
declare function toSrt(timing: NarrationTiming): string;
|
|
240
267
|
declare function toVtt(timing: NarrationTiming): string;
|
|
241
268
|
//#endregion
|
|
242
|
-
export { BedMode, CaptionStyle, CaptionTrackOptions, DuckOptions, MusicAnchors, MusicClipOptions, MusicTiming, NarrationAnchors, NarrationElement, NarrationError, NarrationPause, NarrationScript, NarrationSegment, NarrationTiming, TimedPause, TimedSegment, TimedWord, captionNode, captionTrack, duckEnvelope, isPause, music, narration, toSrt, toVtt, validateMusicTiming };
|
|
269
|
+
export { BedMode, CaptionCue, CaptionStyle, CaptionTrackOptions, DuckOptions, MusicAnchors, MusicClipOptions, MusicTiming, NarrationAnchors, NarrationElement, NarrationError, NarrationPause, NarrationScript, NarrationSegment, NarrationTiming, TimedPause, TimedSegment, TimedWord, captionNode, captionTrack, duckEnvelope, isPause, music, narration, splitCaption, toSrt, toVtt, validateMusicTiming };
|
package/dist/index.js
CHANGED
|
@@ -76,16 +76,72 @@ function narration(timing) {
|
|
|
76
76
|
};
|
|
77
77
|
return anchors;
|
|
78
78
|
}
|
|
79
|
+
/**
|
|
80
|
+
* Split a segment's caption into timed sub-cues at ~`maxChars` (word-boundary).
|
|
81
|
+
* With per-word timings, each sub-cue is timed from its first word; without
|
|
82
|
+
* them, the segment window is divided evenly. No budget (or a short segment)
|
|
83
|
+
* yields a single cue — so the default is byte-identical. The SAME function
|
|
84
|
+
* drives the burned track AND the .srt/.vtt sidecars, so they match.
|
|
85
|
+
*/
|
|
86
|
+
function splitCaption(segment, maxChars) {
|
|
87
|
+
const end = segment.start + segment.duration;
|
|
88
|
+
if (!maxChars || segment.text.length <= maxChars) return [{
|
|
89
|
+
text: segment.text,
|
|
90
|
+
start: segment.start,
|
|
91
|
+
end
|
|
92
|
+
}];
|
|
93
|
+
if (segment.words && segment.words.length > 0) {
|
|
94
|
+
const cues = [];
|
|
95
|
+
let words = [];
|
|
96
|
+
let start = segment.words[0].start;
|
|
97
|
+
for (const w of segment.words) {
|
|
98
|
+
if (words.length > 0 && [...words, w.word].join(" ").length > maxChars) {
|
|
99
|
+
cues.push({
|
|
100
|
+
text: words.join(" "),
|
|
101
|
+
start,
|
|
102
|
+
end: w.start
|
|
103
|
+
});
|
|
104
|
+
words = [];
|
|
105
|
+
start = w.start;
|
|
106
|
+
}
|
|
107
|
+
words.push(w.word);
|
|
108
|
+
}
|
|
109
|
+
if (words.length > 0) cues.push({
|
|
110
|
+
text: words.join(" "),
|
|
111
|
+
start,
|
|
112
|
+
end
|
|
113
|
+
});
|
|
114
|
+
return cues;
|
|
115
|
+
}
|
|
116
|
+
const tokens = segment.text.split(/\s+/).filter(Boolean);
|
|
117
|
+
const chunks = [];
|
|
118
|
+
let cur = "";
|
|
119
|
+
for (const t of tokens) {
|
|
120
|
+
const candidate = cur ? `${cur} ${t}` : t;
|
|
121
|
+
if (cur && candidate.length > maxChars) {
|
|
122
|
+
chunks.push(cur);
|
|
123
|
+
cur = t;
|
|
124
|
+
} else cur = candidate;
|
|
125
|
+
}
|
|
126
|
+
if (cur) chunks.push(cur);
|
|
127
|
+
const span = segment.duration / chunks.length;
|
|
128
|
+
return chunks.map((text, i) => ({
|
|
129
|
+
text,
|
|
130
|
+
start: segment.start + i * span,
|
|
131
|
+
end: segment.start + (i + 1) * span
|
|
132
|
+
}));
|
|
133
|
+
}
|
|
79
134
|
function captionTrack(timing, opts = {}) {
|
|
80
135
|
const target = opts.target ?? "captions/text";
|
|
136
|
+
const budget = timing.captionSplit?.maxChars;
|
|
81
137
|
const keys = [key(0, "", { interp: "hold" })];
|
|
82
138
|
let cursor = 0;
|
|
83
139
|
for (const s of timing.segments) {
|
|
84
140
|
if (s.start > cursor + 1e-9) {
|
|
85
141
|
if (keys[keys.length - 1].value !== "") keys.push(key(cursor, "", { interp: "hold" }));
|
|
86
142
|
}
|
|
87
|
-
if (
|
|
88
|
-
else keys.push(key(
|
|
143
|
+
for (const cue of splitCaption(s, budget)) if (cue.start <= 1e-9) keys[0] = key(0, cue.text, { interp: "hold" });
|
|
144
|
+
else keys.push(key(cue.start, cue.text, { interp: "hold" }));
|
|
89
145
|
cursor = s.start + s.duration;
|
|
90
146
|
}
|
|
91
147
|
keys.push(key(cursor, "", { interp: "hold" }));
|
|
@@ -128,16 +184,18 @@ function captionNode(size, style = {}) {
|
|
|
128
184
|
weight: node.fontWeight
|
|
129
185
|
}, width > 0 ? width : void 0, m).length;
|
|
130
186
|
};
|
|
131
|
-
|
|
132
|
-
const m = node.measurerSource?.() ?? estimatingMeasurer;
|
|
187
|
+
const fit = (m) => {
|
|
133
188
|
let font = baseFont;
|
|
134
189
|
while (font > minFont && lineCountAt(font, m) > maxLines) font -= 1;
|
|
135
|
-
return
|
|
136
|
-
|
|
190
|
+
return {
|
|
191
|
+
font,
|
|
192
|
+
lines: Math.max(1, lineCountAt(font, m))
|
|
193
|
+
};
|
|
194
|
+
};
|
|
195
|
+
node.fontSize.bindSource(() => fit(node.measurerSource?.() ?? estimatingMeasurer).font);
|
|
137
196
|
node.position.bindSource(() => {
|
|
138
|
-
const
|
|
139
|
-
const
|
|
140
|
-
const step = quantize(node.fontSize() * lineHeight);
|
|
197
|
+
const { font, lines } = fit(node.measurerSource?.() ?? estimatingMeasurer);
|
|
198
|
+
const step = quantize(font * lineHeight);
|
|
141
199
|
return [size.w / 2, bottomY - (lines - 1) * step];
|
|
142
200
|
});
|
|
143
201
|
}
|
|
@@ -295,10 +353,10 @@ function srtTime(t, sep) {
|
|
|
295
353
|
return `${p(h, 2)}:${p(m, 2)}:${p(s, 2)}${sep}${p(f, 3)}`;
|
|
296
354
|
}
|
|
297
355
|
function toSrt(timing) {
|
|
298
|
-
return timing.segments.
|
|
356
|
+
return timing.segments.flatMap((s) => splitCaption(s, timing.captionSplit?.maxChars)).map((c, i) => `${i + 1}\n${srtTime(c.start, ",")} --> ${srtTime(c.end, ",")}\n${c.text}`).join("\n\n") + "\n";
|
|
299
357
|
}
|
|
300
358
|
function toVtt(timing) {
|
|
301
|
-
return "WEBVTT\n\n" + timing.segments.
|
|
359
|
+
return "WEBVTT\n\n" + timing.segments.flatMap((s) => splitCaption(s, timing.captionSplit?.maxChars)).map((c) => `${srtTime(c.start, ".")} --> ${srtTime(c.end, ".")}\n${c.text}`).join("\n\n") + "\n";
|
|
302
360
|
}
|
|
303
361
|
//#endregion
|
|
304
|
-
export { NarrationError, captionNode, captionTrack, duckEnvelope, isPause, music, narration, toSrt, toVtt, validateMusicTiming };
|
|
362
|
+
export { NarrationError, captionNode, captionTrack, duckEnvelope, isPause, music, narration, splitCaption, toSrt, toVtt, validateMusicTiming };
|
package/dist/providers.js
CHANGED
|
@@ -510,7 +510,8 @@ async function synthesizeScript(scriptPath, opts = {}) {
|
|
|
510
510
|
providerVersion,
|
|
511
511
|
totalDuration: ends.length > 0 ? Math.max(...ends) : 0,
|
|
512
512
|
segments,
|
|
513
|
-
...pauses.length > 0 ? { pauses } : {}
|
|
513
|
+
...pauses.length > 0 ? { pauses } : {},
|
|
514
|
+
...raw.captionSplit ? { captionSplit: raw.captionSplit } : {}
|
|
514
515
|
};
|
|
515
516
|
const timingPath = `${base}.narration.timing.json`;
|
|
516
517
|
writeFileSync(timingPath, JSON.stringify(timing, null, 2) + "\n");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@glissade/narrate",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0-pre.1",
|
|
4
4
|
"description": "glissade narration + captions: TTS at prepare time (gs narrate), deterministic caching, narration-anchored timeline beats, and captions as plain tracks. Render stays offline.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"type": "module",
|
|
@@ -19,8 +19,8 @@
|
|
|
19
19
|
"dist"
|
|
20
20
|
],
|
|
21
21
|
"dependencies": {
|
|
22
|
-
"@glissade/core": "0.
|
|
23
|
-
"@glissade/scene": "0.
|
|
22
|
+
"@glissade/core": "0.6.0-pre.1",
|
|
23
|
+
"@glissade/scene": "0.6.0-pre.1"
|
|
24
24
|
},
|
|
25
25
|
"repository": {
|
|
26
26
|
"type": "git",
|