@glissade/narrate 0.4.5 → 0.5.0-pre.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -21,6 +21,13 @@ interface NarrationScript {
21
21
  gap?: number;
22
22
  /** silence before the first segment (s); default 0 */
23
23
  leadIn?: number;
24
+ /**
25
+ * Word-timing aligner for providers that don't emit word timestamps
26
+ * (espeak / openai / piper). 'heuristic' (default) estimates from text;
27
+ * 'vosk' derives real timings from the audio (offline ASR); 'none' leaves
28
+ * segments word-less. Providers that supply their own words ignore this.
29
+ */
30
+ align?: string;
24
31
  segments: NarrationSegment[];
25
32
  }
26
33
  interface TimedWord {
@@ -36,10 +36,112 @@ declare function espeakProvider(): TtsProvider;
36
36
  declare function openaiProvider(opts?: {
37
37
  model?: string;
38
38
  }): TtsProvider;
39
+ /**
40
+ * VITS-based local TTS: far more natural than espeak, runs on CPU, fully
41
+ * offline. Needs a voice MODEL (`.onnx` + sibling `.onnx.json`) — pass its
42
+ * path as `model`, or per-segment as `voice`. Emits no word timestamps; the
43
+ * alignment step (below) fills them in.
44
+ */
45
+ declare function piperProvider(opts?: {
46
+ model?: string;
47
+ }): TtsProvider;
39
48
  declare function providerById(id: string): TtsProvider;
49
+ interface AlignRequest {
50
+ /** the synthesized RIFF/WAV bytes */
51
+ wav: Buffer;
52
+ /** the spoken text (the segment text) */
53
+ text: string;
54
+ }
55
+ /**
56
+ * Turns (audio, known text) into per-word timings — the provider-independent
57
+ * way to get word timestamps. Runs ONLY in the prepare step (heavy work is
58
+ * fine; it runs once and the result is cached). Same three-member shape as
59
+ * TtsProvider: `version()` participates in the cache so swapping aligners
60
+ * re-aligns the CACHED wav without re-synthesizing.
61
+ */
62
+ interface Aligner {
63
+ readonly id: string;
64
+ version(): Promise<string>;
65
+ align(req: AlignRequest): Promise<{
66
+ word: string;
67
+ start: number;
68
+ end: number;
69
+ }[]>;
70
+ }
71
+ /**
72
+ * Distribute words across the clip by estimated spoken length (syllables, not
73
+ * characters — closer to real timing). Pure, deterministic, zero-dependency:
74
+ * the always-available floor. Good enough for captions; karaoke on a very
75
+ * slow/fast word wants a real aligner.
76
+ */
77
+ declare function heuristicWords(text: string, duration: number): {
78
+ word: string;
79
+ start: number;
80
+ end: number;
81
+ }[];
82
+ declare function heuristicAligner(): Aligner;
83
+ /**
84
+ * Fill words whose start/end are NaN by linear interpolation between their
85
+ * known neighbours (edges clamp). Keeps the result monotonic. Used after
86
+ * mapping when some script words got no timing.
87
+ */
88
+ declare function interpolateMissing(words: {
89
+ word: string;
90
+ start: number;
91
+ end: number;
92
+ }[]): {
93
+ word: string;
94
+ start: number;
95
+ end: number;
96
+ }[];
97
+ /**
98
+ * Transfer timed words (from an aligner) onto the script's own word tokens.
99
+ * Forced aligners return near-identical words; ASR (whisper) can differ
100
+ * (numbers spelled out, punctuation), so we LCS-align the normalized
101
+ * sequences and interpolate script words the aligner didn't time. Output
102
+ * length === script word count, in script order — what `wordBoxes()` indexes
103
+ * against. If nothing matched, distribute by syllable over the timed span.
104
+ */
105
+ declare function mapAsrToScript(timed: {
106
+ word: string;
107
+ start: number;
108
+ end: number;
109
+ }[], scriptText: string): {
110
+ word: string;
111
+ start: number;
112
+ end: number;
113
+ }[];
114
+ interface WavMono {
115
+ /** mono samples in [-1, 1] */
116
+ samples: Float32Array;
117
+ sampleRate: number;
118
+ }
119
+ /** Decode a 16-bit PCM RIFF/WAV to mono float samples (channels averaged). */
120
+ declare function decodeWavMono(wav: Buffer): WavMono;
121
+ /** Linear-resample mono float to a 16 kHz int16 LE PCM buffer (Vosk's input). */
122
+ declare function resampleTo16kPcm(input: WavMono): Buffer;
123
+ /**
124
+ * Word timings via Vosk (alphacephei) — offline, Apache-2.0, ~50 MB model, a
125
+ * real Node binding (no Python, no Docker, no multi-GB download). `vosk` is an
126
+ * OPTIONAL peer: install it (`npm i vosk`) and point at a model
127
+ * (`opts.model` / `VOSK_MODEL`) only if you use this aligner. ASR words are
128
+ * mapped onto the script tokens by `mapAsrToScript`.
129
+ */
130
+ declare function voskAligner(opts?: {
131
+ model?: string;
132
+ }): Aligner;
133
+ /** Resolve an aligner id; 'none' disables alignment (word-less segments). */
134
+ declare function alignerById(id: string): Aligner | null;
40
135
  interface SynthesizeOptions {
41
- /** override the script's provider */
136
+ /** override the script's provider (by id) */
42
137
  provider?: string;
138
+ /** override the script's aligner ('heuristic' | 'vosk' | 'none') */
139
+ aligner?: string;
140
+ /** a provider INSTANCE — wins over `provider`; the bring-your-own seam
141
+ * (e.g. a custom ElevenLabs/Azure TtsProvider) */
142
+ providerImpl?: TtsProvider;
143
+ /** an aligner INSTANCE (or null to disable) — wins over `aligner` */
144
+ alignerImpl?: Aligner | null;
43
145
  /** ignore the cache and re-synthesize everything */
44
146
  force?: boolean;
45
147
  }
@@ -49,6 +151,10 @@ interface SynthesizeResult {
49
151
  cacheDir: string;
50
152
  synthesized: string[];
51
153
  reused: string[];
154
+ /** segment ids whose words came from the aligner (not the provider) */
155
+ aligned: string[];
156
+ /** the aligner id used, or null when alignment was disabled */
157
+ aligner: string | null;
52
158
  }
53
159
  declare function cacheKey(seg: {
54
160
  text: string;
@@ -64,4 +170,4 @@ declare function synthesizeScript(scriptPath: string, opts?: SynthesizeOptions):
64
170
  /** Resolve `<scene>.narration.json` for a scene-module path (or accept the script itself). */
65
171
  declare function scriptPathFor(input: string): string;
66
172
  //#endregion
67
- export { SynthesizeOptions, SynthesizeResult, TtsProvider, TtsRequest, TtsResult, cacheKey, espeakProvider, fakeProvider, openaiProvider, providerById, scriptPathFor, synthesizeScript, wavDuration };
173
+ export { AlignRequest, Aligner, SynthesizeOptions, SynthesizeResult, TtsProvider, TtsRequest, TtsResult, alignerById, cacheKey, decodeWavMono, espeakProvider, fakeProvider, heuristicAligner, heuristicWords, interpolateMissing, mapAsrToScript, openaiProvider, piperProvider, providerById, resampleTo16kPcm, scriptPathFor, synthesizeScript, voskAligner, wavDuration };
package/dist/providers.js CHANGED
@@ -1,7 +1,8 @@
1
1
  import { NarrationError } from "./index.js";
2
2
  import { createHash } from "node:crypto";
3
- import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
3
+ import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
4
4
  import { basename, dirname, join } from "node:path";
5
+ import { tmpdir } from "node:os";
5
6
  import { spawnSync } from "node:child_process";
6
7
  //#region src/providers.ts
7
8
  /**
@@ -132,12 +133,277 @@ function openaiProvider(opts = {}) {
132
133
  }
133
134
  };
134
135
  }
136
+ /**
137
+ * VITS-based local TTS: far more natural than espeak, runs on CPU, fully
138
+ * offline. Needs a voice MODEL (`.onnx` + sibling `.onnx.json`) — pass its
139
+ * path as `model`, or per-segment as `voice`. Emits no word timestamps; the
140
+ * alignment step (below) fills them in.
141
+ */
142
+ function piperProvider(opts = {}) {
143
+ return {
144
+ id: "piper",
145
+ version: () => {
146
+ const r = spawnSync("piper", ["--version"], { encoding: "utf8" });
147
+ if (r.status !== 0) throw new NarrationError("piper not found on PATH — install rhasspy/piper, or use --provider fake/espeak/openai");
148
+ const v = (r.stdout.trim() || r.stderr.trim() || "piper").split("\n")[0];
149
+ return Promise.resolve(opts.model ? `${v} ${basename(opts.model)}` : v);
150
+ },
151
+ synthesize: (req) => {
152
+ const model = req.voice ?? opts.model;
153
+ if (!model) throw new NarrationError("piper needs a voice model (.onnx) — pass { model }, or set the segment voice to its path");
154
+ const tag = createHash("sha256").update(req.text).digest("hex").slice(0, 8);
155
+ const out = join(tmpdir(), `glissade-piper-${process.pid}-${tag}.wav`);
156
+ const args = [
157
+ "--model",
158
+ model,
159
+ "--output_file",
160
+ out
161
+ ];
162
+ if (req.rate !== void 0 && req.rate > 0) args.push("--length_scale", String(1 / req.rate));
163
+ const r = spawnSync("piper", args, {
164
+ input: req.text,
165
+ maxBuffer: 64 * 1024 * 1024
166
+ });
167
+ try {
168
+ if (r.status !== 0 || !existsSync(out)) throw new NarrationError(`piper failed: ${r.stderr?.toString().slice(0, 300) ?? "no output"}`);
169
+ const wav = readFileSync(out);
170
+ return Promise.resolve({
171
+ wav,
172
+ duration: wavDuration(wav)
173
+ });
174
+ } finally {
175
+ if (existsSync(out)) unlinkSync(out);
176
+ }
177
+ }
178
+ };
179
+ }
135
180
  function providerById(id) {
136
181
  switch (id) {
137
182
  case "fake": return fakeProvider();
138
183
  case "espeak": return espeakProvider();
184
+ case "piper": return piperProvider();
139
185
  case "openai": return openaiProvider();
140
- default: throw new NarrationError(`unknown TTS provider '${id}' (have: fake, espeak, openai)`);
186
+ default: throw new NarrationError(`unknown TTS provider '${id}' (have: fake, espeak, piper, openai)`);
187
+ }
188
+ }
189
+ /** ≈ syllable count: vowel groups, floored at 1 — a cheap spoken-length proxy. */
190
+ function syllableWeight(word) {
191
+ const groups = word.toLowerCase().match(/[aeiouy]+/g);
192
+ return Math.max(1, groups ? groups.length : 1);
193
+ }
194
+ /**
195
+ * Distribute words across the clip by estimated spoken length (syllables, not
196
+ * characters — closer to real timing). Pure, deterministic, zero-dependency:
197
+ * the always-available floor. Good enough for captions; karaoke on a very
198
+ * slow/fast word wants a real aligner.
199
+ */
200
+ function heuristicWords(text, duration) {
201
+ const words = text.trim().split(/\s+/).filter(Boolean);
202
+ if (words.length === 0) return [];
203
+ const weights = words.map(syllableWeight);
204
+ const total = weights.reduce((a, b) => a + b, 0);
205
+ const out = [];
206
+ let cursor = 0;
207
+ for (let i = 0; i < words.length; i++) {
208
+ const span = weights[i] / total * duration;
209
+ out.push({
210
+ word: words[i],
211
+ start: cursor,
212
+ end: cursor + span
213
+ });
214
+ cursor += span;
215
+ }
216
+ return out;
217
+ }
218
+ function heuristicAligner() {
219
+ return {
220
+ id: "heuristic",
221
+ version: () => Promise.resolve("heuristic-1"),
222
+ align: (req) => Promise.resolve(heuristicWords(req.text, wavDuration(req.wav)))
223
+ };
224
+ }
225
+ const normalizeWord = (w) => w.toLowerCase().replace(/[^\p{L}\p{N}]+/gu, "");
226
+ /**
227
+ * Fill words whose start/end are NaN by linear interpolation between their
228
+ * known neighbours (edges clamp). Keeps the result monotonic. Used after
229
+ * mapping when some script words got no timing.
230
+ */
231
+ function interpolateMissing(words) {
232
+ const out = words.map((w) => ({ ...w }));
233
+ const n = out.length;
234
+ let k = 0;
235
+ while (k < n) {
236
+ if (!Number.isNaN(out[k].start)) {
237
+ k++;
238
+ continue;
239
+ }
240
+ let j = k;
241
+ while (j < n && Number.isNaN(out[j].start)) j++;
242
+ const lo = k > 0 ? out[k - 1].end : j < n ? out[j].start : 0;
243
+ const hi = j < n ? out[j].start : lo;
244
+ const count = j - k;
245
+ const span = Math.max(0, hi - lo);
246
+ for (let t = 0; t < count; t++) {
247
+ out[k + t].start = lo + span * t / count;
248
+ out[k + t].end = lo + span * (t + 1) / count;
249
+ }
250
+ k = j;
251
+ }
252
+ return out;
253
+ }
254
+ /**
255
+ * Transfer timed words (from an aligner) onto the script's own word tokens.
256
+ * Forced aligners return near-identical words; ASR (whisper) can differ
257
+ * (numbers spelled out, punctuation), so we LCS-align the normalized
258
+ * sequences and interpolate script words the aligner didn't time. Output
259
+ * length === script word count, in script order — what `wordBoxes()` indexes
260
+ * against. If nothing matched, distribute by syllable over the timed span.
261
+ */
262
+ function mapAsrToScript(timed, scriptText) {
263
+ const script = scriptText.trim().split(/\s+/).filter(Boolean);
264
+ if (script.length === 0 || timed.length === 0) return [];
265
+ const s = script.map(normalizeWord);
266
+ const a = timed.map((w) => normalizeWord(w.word));
267
+ const n = s.length;
268
+ const m = a.length;
269
+ const dp = Array.from({ length: n + 1 }, () => new Array(m + 1).fill(0));
270
+ for (let i = n - 1; i >= 0; i--) for (let j = m - 1; j >= 0; j--) dp[i][j] = s[i] !== "" && s[i] === a[j] ? dp[i + 1][j + 1] + 1 : Math.max(dp[i + 1][j], dp[i][j + 1]);
271
+ const matched = new Array(n).fill(null);
272
+ let i = 0;
273
+ let j = 0;
274
+ while (i < n && j < m) if (s[i] !== "" && s[i] === a[j]) {
275
+ matched[i] = j;
276
+ i++;
277
+ j++;
278
+ } else if (dp[i + 1][j] >= dp[i][j + 1]) i++;
279
+ else j++;
280
+ if (matched.every((x) => x === null)) {
281
+ const lo = Math.min(...timed.map((w) => w.start));
282
+ const hi = Math.max(...timed.map((w) => w.end));
283
+ return heuristicWords(scriptText, Math.max(0, hi - lo)).map((w) => ({
284
+ ...w,
285
+ start: w.start + lo,
286
+ end: w.end + lo
287
+ }));
288
+ }
289
+ return interpolateMissing(script.map((word, k) => {
290
+ const mi = matched[k];
291
+ return mi != null ? {
292
+ word,
293
+ start: timed[mi].start,
294
+ end: timed[mi].end
295
+ } : {
296
+ word,
297
+ start: NaN,
298
+ end: NaN
299
+ };
300
+ }));
301
+ }
302
+ /** Decode a 16-bit PCM RIFF/WAV to mono float samples (channels averaged). */
303
+ function decodeWavMono(wav) {
304
+ if (wav.length < 44 || wav.toString("ascii", 0, 4) !== "RIFF" || wav.toString("ascii", 8, 12) !== "WAVE") throw new NarrationError("not a RIFF/WAVE file");
305
+ let channels = 1;
306
+ let sampleRate = 16e3;
307
+ let bits = 16;
308
+ let dataOffset = -1;
309
+ let dataSize = 0;
310
+ let offset = 12;
311
+ while (offset + 8 <= wav.length) {
312
+ const id = wav.toString("ascii", offset, offset + 4);
313
+ const size = wav.readUInt32LE(offset + 4);
314
+ if (id === "fmt ") {
315
+ channels = wav.readUInt16LE(offset + 10);
316
+ sampleRate = wav.readUInt32LE(offset + 12);
317
+ bits = wav.readUInt16LE(offset + 22);
318
+ } else if (id === "data") {
319
+ dataOffset = offset + 8;
320
+ dataSize = size;
321
+ }
322
+ offset += 8 + size + size % 2;
323
+ }
324
+ if (bits !== 16) throw new NarrationError(`only 16-bit PCM WAV is supported (got ${bits}-bit)`);
325
+ if (dataOffset < 0) throw new NarrationError("WAV has no data chunk");
326
+ const frames = Math.floor(dataSize / 2 / Math.max(1, channels));
327
+ const samples = new Float32Array(frames);
328
+ for (let f = 0; f < frames; f++) {
329
+ let acc = 0;
330
+ for (let c = 0; c < channels; c++) acc += wav.readInt16LE(dataOffset + (f * channels + c) * 2);
331
+ samples[f] = acc / channels / 32768;
332
+ }
333
+ return {
334
+ samples,
335
+ sampleRate
336
+ };
337
+ }
338
+ /** Linear-resample mono float to a 16 kHz int16 LE PCM buffer (Vosk's input). */
339
+ function resampleTo16kPcm(input) {
340
+ const ratio = input.sampleRate / 16e3;
341
+ const outLen = Math.max(1, Math.round(input.samples.length / ratio));
342
+ const out = Buffer.alloc(outLen * 2);
343
+ for (let i = 0; i < outLen; i++) {
344
+ const src = i * ratio;
345
+ const j = Math.floor(src);
346
+ const frac = src - j;
347
+ const a = input.samples[j] ?? 0;
348
+ const b = input.samples[j + 1] ?? a;
349
+ const v = Math.max(-1, Math.min(1, a + (b - a) * frac));
350
+ out.writeInt16LE(Math.round(v * 32767), i * 2);
351
+ }
352
+ return out;
353
+ }
354
+ /**
355
+ * Word timings via Vosk (alphacephei) — offline, Apache-2.0, ~50 MB model, a
356
+ * real Node binding (no Python, no Docker, no multi-GB download). `vosk` is an
357
+ * OPTIONAL peer: install it (`npm i vosk`) and point at a model
358
+ * (`opts.model` / `VOSK_MODEL`) only if you use this aligner. ASR words are
359
+ * mapped onto the script tokens by `mapAsrToScript`.
360
+ */
361
+ function voskAligner(opts = {}) {
362
+ const modelPath = opts.model ?? process.env["VOSK_MODEL"];
363
+ let vosk = null;
364
+ const load = async () => {
365
+ if (vosk) return vosk;
366
+ try {
367
+ vosk = await import("vosk");
368
+ } catch {
369
+ throw new NarrationError("vosk is not installed — `npm i vosk` and download a model, or use --align heuristic");
370
+ }
371
+ vosk.setLogLevel(-1);
372
+ return vosk;
373
+ };
374
+ return {
375
+ id: "vosk",
376
+ version: async () => {
377
+ if (!modelPath) throw new NarrationError("vosk needs a model — set VOSK_MODEL or pass { model } (alphacephei.com/vosk/models)");
378
+ if (!existsSync(modelPath)) throw new NarrationError(`vosk model not found at ${modelPath}`);
379
+ await load();
380
+ return `vosk:${basename(modelPath)}`;
381
+ },
382
+ align: async (req) => {
383
+ const v = await load();
384
+ const model = new v.Model(modelPath);
385
+ const rec = new v.Recognizer({
386
+ model,
387
+ sampleRate: 16e3
388
+ });
389
+ try {
390
+ rec.setWords(true);
391
+ rec.acceptWaveform(resampleTo16kPcm(decodeWavMono(req.wav)));
392
+ return mapAsrToScript(rec.finalResult().result ?? [], req.text);
393
+ } finally {
394
+ rec.free();
395
+ model.free();
396
+ }
397
+ }
398
+ };
399
+ }
400
+ /** Resolve an aligner id; 'none' disables alignment (word-less segments). */
401
+ function alignerById(id) {
402
+ switch (id) {
403
+ case "none": return null;
404
+ case "heuristic": return heuristicAligner();
405
+ case "vosk": return voskAligner();
406
+ default: throw new NarrationError(`unknown aligner '${id}' (have: heuristic, vosk, none)`);
141
407
  }
142
408
  }
143
409
  function cacheKey(seg, provider, providerVersion) {
@@ -162,8 +428,14 @@ async function synthesizeScript(scriptPath, opts = {}) {
162
428
  if (ids.has(s.id)) throw new NarrationError(`duplicate segment id '${s.id}'`);
163
429
  ids.add(s.id);
164
430
  }
165
- const provider = providerById(opts.provider ?? raw.provider ?? "espeak");
431
+ const provider = opts.providerImpl ?? providerById(opts.provider ?? raw.provider ?? "espeak");
166
432
  const providerVersion = await provider.version();
433
+ const aligner = opts.alignerImpl !== void 0 ? opts.alignerImpl : alignerById(opts.aligner ?? raw.align ?? "heuristic");
434
+ let alignerTag = null;
435
+ const alignerTagFor = async () => {
436
+ if (alignerTag === null) alignerTag = `${aligner.id}@${await aligner.version()}`;
437
+ return alignerTag;
438
+ };
167
439
  const base = scriptPath.replace(/\.narration\.json$/, "");
168
440
  if (base === scriptPath) throw new NarrationError(`script path must end with .narration.json: ${scriptPath}`);
169
441
  const cacheDir = `${base}.narration-cache`;
@@ -175,6 +447,7 @@ async function synthesizeScript(scriptPath, opts = {}) {
175
447
  };
176
448
  const synthesized = [];
177
449
  const reused = [];
450
+ const aligned = [];
178
451
  const segments = [];
179
452
  let cursor = raw.leadIn ?? 0;
180
453
  for (const seg of raw.segments) {
@@ -186,24 +459,42 @@ async function synthesizeScript(scriptPath, opts = {}) {
186
459
  const hash = cacheKey(req, provider.id, providerVersion);
187
460
  let entry = cache.entries[hash];
188
461
  let duration;
462
+ let wavBuf;
189
463
  let words;
190
464
  if (entry !== void 0 && !opts.force && existsSync(join(cacheDir, entry.file))) {
191
- duration = wavDuration(readFileSync(join(cacheDir, entry.file)));
192
- words = entry.words;
465
+ wavBuf = readFileSync(join(cacheDir, entry.file));
466
+ duration = wavDuration(wavBuf);
193
467
  reused.push(seg.id);
194
468
  } else {
195
469
  const result = await provider.synthesize(req);
196
470
  const file = `${seg.id}-${hash.slice(0, 8)}.wav`;
197
471
  writeFileSync(join(cacheDir, file), result.wav);
472
+ wavBuf = result.wav;
198
473
  duration = wavDuration(result.wav);
199
- words = result.words;
200
474
  entry = {
201
475
  file,
202
- ...words !== void 0 ? { words } : {}
476
+ ...result.words !== void 0 ? {
477
+ words: result.words,
478
+ wordsFrom: "provider"
479
+ } : {}
203
480
  };
204
481
  cache.entries[hash] = entry;
205
482
  synthesized.push(seg.id);
206
483
  }
484
+ if (entry.wordsFrom === "provider") words = entry.words;
485
+ else if (aligner !== null) {
486
+ const tag = await alignerTagFor();
487
+ if (entry.wordsFrom === tag && entry.words !== void 0) words = entry.words;
488
+ else {
489
+ words = await aligner.align({
490
+ wav: wavBuf,
491
+ text: seg.text
492
+ });
493
+ entry.words = words;
494
+ entry.wordsFrom = tag;
495
+ aligned.push(seg.id);
496
+ }
497
+ }
207
498
  const timed = {
208
499
  id: seg.id,
209
500
  text: seg.text,
@@ -235,7 +526,9 @@ async function synthesizeScript(scriptPath, opts = {}) {
235
526
  timingPath,
236
527
  cacheDir,
237
528
  synthesized,
238
- reused
529
+ reused,
530
+ aligned,
531
+ aligner: aligner?.id ?? null
239
532
  };
240
533
  }
241
534
  /** Resolve `<scene>.narration.json` for a scene-module path (or accept the script itself). */
@@ -246,4 +539,4 @@ function scriptPathFor(input) {
246
539
  return candidate;
247
540
  }
248
541
  //#endregion
249
- export { cacheKey, espeakProvider, fakeProvider, openaiProvider, providerById, scriptPathFor, synthesizeScript, wavDuration };
542
+ export { alignerById, cacheKey, decodeWavMono, espeakProvider, fakeProvider, heuristicAligner, heuristicWords, interpolateMissing, mapAsrToScript, openaiProvider, piperProvider, providerById, resampleTo16kPcm, scriptPathFor, synthesizeScript, voskAligner, wavDuration };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@glissade/narrate",
3
- "version": "0.4.5",
3
+ "version": "0.5.0-pre.0",
4
4
  "description": "glissade narration + captions: TTS at prepare time (gs narrate), deterministic caching, narration-anchored timeline beats, and captions as plain tracks. Render stays offline.",
5
5
  "license": "Apache-2.0",
6
6
  "type": "module",
@@ -19,8 +19,8 @@
19
19
  "dist"
20
20
  ],
21
21
  "dependencies": {
22
- "@glissade/core": "0.4.5",
23
- "@glissade/scene": "0.4.5"
22
+ "@glissade/core": "0.5.0-pre.0",
23
+ "@glissade/scene": "0.5.0-pre.0"
24
24
  },
25
25
  "repository": {
26
26
  "type": "git",