@glissade/narrate 0.5.0 → 0.6.0-pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -51,6 +51,15 @@ interface NarrationScript {
51
51
  * segments word-less. Providers that supply their own words ignore this.
52
52
  */
53
53
  align?: string;
54
+ /**
55
+ * Split long caption segments into timed sub-cues at ~`maxChars` (on word
56
+ * boundaries, using per-word timings when present). Persisted into the timing
57
+ * manifest so the burned track and the .srt/.vtt sidecars split identically.
58
+ * Omit for no split (the default).
59
+ */
60
+ captionSplit?: {
61
+ maxChars: number;
62
+ };
54
63
  /** spoken segments and explicit pause beats, in playback order */
55
64
  segments: NarrationElement[];
56
65
  }
@@ -85,6 +94,10 @@ interface NarrationTiming {
85
94
  segments: TimedSegment[];
86
95
  /** explicit pause windows, addressable like segments; omitted when none */
87
96
  pauses?: TimedPause[];
97
+ /** caption split budget, committed so burned + sidecar split identically */
98
+ captionSplit?: {
99
+ maxChars: number;
100
+ };
88
101
  }
89
102
  declare class NarrationError extends Error {
90
103
  constructor(message: string);
@@ -118,6 +131,20 @@ interface CaptionTrackOptions {
118
131
  /** v1 granularity is per segment; 'word' is reserved (karaoke highlight, later) */
119
132
  granularity?: 'segment';
120
133
  }
134
+ /** One caption cue within a segment's window. */
135
+ interface CaptionCue {
136
+ text: string;
137
+ start: number;
138
+ end: number;
139
+ }
140
+ /**
141
+ * Split a segment's caption into timed sub-cues at ~`maxChars` (word-boundary).
142
+ * With per-word timings, each sub-cue is timed from its first word; without
143
+ * them, the segment window is divided evenly. No budget (or a short segment)
144
+ * yields a single cue — so the default is byte-identical. The SAME function
145
+ * drives the burned track AND the .srt/.vtt sidecars, so they match.
146
+ */
147
+ declare function splitCaption(segment: TimedSegment, maxChars?: number): CaptionCue[];
121
148
  declare function captionTrack(timing: NarrationTiming, opts?: CaptionTrackOptions): Track<string>;
122
149
  interface CaptionStyle {
123
150
  fontFamily?: string;
@@ -239,4 +266,4 @@ declare function music(timing: MusicTiming, at?: number): MusicAnchors;
239
266
  declare function toSrt(timing: NarrationTiming): string;
240
267
  declare function toVtt(timing: NarrationTiming): string;
241
268
  //#endregion
242
- export { BedMode, CaptionStyle, CaptionTrackOptions, DuckOptions, MusicAnchors, MusicClipOptions, MusicTiming, NarrationAnchors, NarrationElement, NarrationError, NarrationPause, NarrationScript, NarrationSegment, NarrationTiming, TimedPause, TimedSegment, TimedWord, captionNode, captionTrack, duckEnvelope, isPause, music, narration, toSrt, toVtt, validateMusicTiming };
269
+ export { BedMode, CaptionCue, CaptionStyle, CaptionTrackOptions, DuckOptions, MusicAnchors, MusicClipOptions, MusicTiming, NarrationAnchors, NarrationElement, NarrationError, NarrationPause, NarrationScript, NarrationSegment, NarrationTiming, TimedPause, TimedSegment, TimedWord, captionNode, captionTrack, duckEnvelope, isPause, music, narration, splitCaption, toSrt, toVtt, validateMusicTiming };
package/dist/index.js CHANGED
@@ -76,16 +76,72 @@ function narration(timing) {
76
76
  };
77
77
  return anchors;
78
78
  }
79
+ /**
80
+ * Split a segment's caption into timed sub-cues at ~`maxChars` (word-boundary).
81
+ * With per-word timings, each sub-cue is timed from its first word; without
82
+ * them, the segment window is divided evenly. No budget (or a short segment)
83
+ * yields a single cue — so the default is byte-identical. The SAME function
84
+ * drives the burned track AND the .srt/.vtt sidecars, so they match.
85
+ */
86
+ function splitCaption(segment, maxChars) {
87
+ const end = segment.start + segment.duration;
88
+ if (!maxChars || segment.text.length <= maxChars) return [{
89
+ text: segment.text,
90
+ start: segment.start,
91
+ end
92
+ }];
93
+ if (segment.words && segment.words.length > 0) {
94
+ const cues = [];
95
+ let words = [];
96
+ let start = segment.words[0].start;
97
+ for (const w of segment.words) {
98
+ if (words.length > 0 && [...words, w.word].join(" ").length > maxChars) {
99
+ cues.push({
100
+ text: words.join(" "),
101
+ start,
102
+ end: w.start
103
+ });
104
+ words = [];
105
+ start = w.start;
106
+ }
107
+ words.push(w.word);
108
+ }
109
+ if (words.length > 0) cues.push({
110
+ text: words.join(" "),
111
+ start,
112
+ end
113
+ });
114
+ return cues;
115
+ }
116
+ const tokens = segment.text.split(/\s+/).filter(Boolean);
117
+ const chunks = [];
118
+ let cur = "";
119
+ for (const t of tokens) {
120
+ const candidate = cur ? `${cur} ${t}` : t;
121
+ if (cur && candidate.length > maxChars) {
122
+ chunks.push(cur);
123
+ cur = t;
124
+ } else cur = candidate;
125
+ }
126
+ if (cur) chunks.push(cur);
127
+ const span = segment.duration / chunks.length;
128
+ return chunks.map((text, i) => ({
129
+ text,
130
+ start: segment.start + i * span,
131
+ end: segment.start + (i + 1) * span
132
+ }));
133
+ }
79
134
  function captionTrack(timing, opts = {}) {
80
135
  const target = opts.target ?? "captions/text";
136
+ const budget = timing.captionSplit?.maxChars;
81
137
  const keys = [key(0, "", { interp: "hold" })];
82
138
  let cursor = 0;
83
139
  for (const s of timing.segments) {
84
140
  if (s.start > cursor + 1e-9) {
85
141
  if (keys[keys.length - 1].value !== "") keys.push(key(cursor, "", { interp: "hold" }));
86
142
  }
87
- if (s.start <= 1e-9) keys[0] = key(0, s.text, { interp: "hold" });
88
- else keys.push(key(s.start, s.text, { interp: "hold" }));
143
+ for (const cue of splitCaption(s, budget)) if (cue.start <= 1e-9) keys[0] = key(0, cue.text, { interp: "hold" });
144
+ else keys.push(key(cue.start, cue.text, { interp: "hold" }));
89
145
  cursor = s.start + s.duration;
90
146
  }
91
147
  keys.push(key(cursor, "", { interp: "hold" }));
@@ -128,16 +184,18 @@ function captionNode(size, style = {}) {
128
184
  weight: node.fontWeight
129
185
  }, width > 0 ? width : void 0, m).length;
130
186
  };
131
- node.fontSize.bindSource(() => {
132
- const m = node.measurerSource?.() ?? estimatingMeasurer;
187
+ const fit = (m) => {
133
188
  let font = baseFont;
134
189
  while (font > minFont && lineCountAt(font, m) > maxLines) font -= 1;
135
- return font;
136
- });
190
+ return {
191
+ font,
192
+ lines: Math.max(1, lineCountAt(font, m))
193
+ };
194
+ };
195
+ node.fontSize.bindSource(() => fit(node.measurerSource?.() ?? estimatingMeasurer).font);
137
196
  node.position.bindSource(() => {
138
- const m = node.measurerSource?.() ?? estimatingMeasurer;
139
- const lines = Math.max(1, lineCountAt(node.fontSize(), m));
140
- const step = quantize(node.fontSize() * lineHeight);
197
+ const { font, lines } = fit(node.measurerSource?.() ?? estimatingMeasurer);
198
+ const step = quantize(font * lineHeight);
141
199
  return [size.w / 2, bottomY - (lines - 1) * step];
142
200
  });
143
201
  }
@@ -295,10 +353,10 @@ function srtTime(t, sep) {
295
353
  return `${p(h, 2)}:${p(m, 2)}:${p(s, 2)}${sep}${p(f, 3)}`;
296
354
  }
297
355
  function toSrt(timing) {
298
- return timing.segments.map((s, i) => `${i + 1}\n${srtTime(s.start, ",")} --> ${srtTime(s.start + s.duration, ",")}\n${s.text}`).join("\n\n") + "\n";
356
+ return timing.segments.flatMap((s) => splitCaption(s, timing.captionSplit?.maxChars)).map((c, i) => `${i + 1}\n${srtTime(c.start, ",")} --> ${srtTime(c.end, ",")}\n${c.text}`).join("\n\n") + "\n";
299
357
  }
300
358
  function toVtt(timing) {
301
- return "WEBVTT\n\n" + timing.segments.map((s) => `${srtTime(s.start, ".")} --> ${srtTime(s.start + s.duration, ".")}\n${s.text}`).join("\n\n") + "\n";
359
+ return "WEBVTT\n\n" + timing.segments.flatMap((s) => splitCaption(s, timing.captionSplit?.maxChars)).map((c) => `${srtTime(c.start, ".")} --> ${srtTime(c.end, ".")}\n${c.text}`).join("\n\n") + "\n";
302
360
  }
303
361
  //#endregion
304
- export { NarrationError, captionNode, captionTrack, duckEnvelope, isPause, music, narration, toSrt, toVtt, validateMusicTiming };
362
+ export { NarrationError, captionNode, captionTrack, duckEnvelope, isPause, music, narration, splitCaption, toSrt, toVtt, validateMusicTiming };
package/dist/providers.js CHANGED
@@ -510,7 +510,8 @@ async function synthesizeScript(scriptPath, opts = {}) {
510
510
  providerVersion,
511
511
  totalDuration: ends.length > 0 ? Math.max(...ends) : 0,
512
512
  segments,
513
- ...pauses.length > 0 ? { pauses } : {}
513
+ ...pauses.length > 0 ? { pauses } : {},
514
+ ...raw.captionSplit ? { captionSplit: raw.captionSplit } : {}
514
515
  };
515
516
  const timingPath = `${base}.narration.timing.json`;
516
517
  writeFileSync(timingPath, JSON.stringify(timing, null, 2) + "\n");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@glissade/narrate",
3
- "version": "0.5.0",
3
+ "version": "0.6.0-pre.1",
4
4
  "description": "glissade narration + captions: TTS at prepare time (gs narrate), deterministic caching, narration-anchored timeline beats, and captions as plain tracks. Render stays offline.",
5
5
  "license": "Apache-2.0",
6
6
  "type": "module",
@@ -19,8 +19,8 @@
19
19
  "dist"
20
20
  ],
21
21
  "dependencies": {
22
- "@glissade/core": "0.5.0",
23
- "@glissade/scene": "0.5.0"
22
+ "@glissade/core": "0.6.0-pre.1",
23
+ "@glissade/scene": "0.6.0-pre.1"
24
24
  },
25
25
  "repository": {
26
26
  "type": "git",