@glissade/narrate 0.8.0 → 0.8.1-pre.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +1 -1
- package/dist/providers.d.ts +9 -1
- package/dist/providers.js +130 -2
- package/package.json +14 -3
package/dist/index.d.ts
CHANGED
|
@@ -46,7 +46,7 @@ interface NarrationScript {
|
|
|
46
46
|
leadIn?: number;
|
|
47
47
|
/**
|
|
48
48
|
* Word-timing aligner for providers that don't emit word timestamps
|
|
49
|
-
* (espeak / openai / piper). 'heuristic' (default) estimates from text;
|
|
49
|
+
* (espeak / openai / piper / kokoro). 'heuristic' (default) estimates from text;
|
|
50
50
|
* 'vosk' derives real timings from the audio (offline ASR); 'none' leaves
|
|
51
51
|
* segments word-less. Providers that supply their own words ignore this.
|
|
52
52
|
*/
|
package/dist/providers.d.ts
CHANGED
|
@@ -68,6 +68,14 @@ declare function piperProvider(opts?: {
|
|
|
68
68
|
noiseScale?: number;
|
|
69
69
|
noiseWScale?: number;
|
|
70
70
|
}): TtsProvider;
|
|
71
|
+
/** PCM16 mono WAV from float samples in [-1, 1]. Round-to-nearest → deterministic. */
|
|
72
|
+
declare function floatToWav(samples: Float32Array, sampleRate: number): Buffer;
|
|
73
|
+
type KokoroDtype = 'fp32' | 'fp16' | 'q8' | 'q4' | 'q4f16';
|
|
74
|
+
declare function kokoroProvider(opts?: {
|
|
75
|
+
model?: string;
|
|
76
|
+
voice?: string;
|
|
77
|
+
dtype?: KokoroDtype;
|
|
78
|
+
}): TtsProvider;
|
|
71
79
|
declare function providerById(id: string): TtsProvider;
|
|
72
80
|
interface AlignRequest {
|
|
73
81
|
/** the synthesized RIFF/WAV bytes */
|
|
@@ -199,4 +207,4 @@ declare function synthesizeScript(scriptPath: string, opts?: SynthesizeOptions):
|
|
|
199
207
|
/** Resolve `<scene>.narration.json` for a scene-module path (or accept the script itself). */
|
|
200
208
|
declare function scriptPathFor(input: string): string;
|
|
201
209
|
//#endregion
|
|
202
|
-
export { AlignRequest, Aligner, SynthesizeOptions, SynthesizeResult, TtsProvider, TtsRequest, TtsResult, VoskAlignWord, alignerById, cacheKey, espeakProvider, fakeProvider, heuristicAligner, heuristicWords, interpolateMissing, mapAsrToScript, openaiProvider, piperProvider, providerById, resolvePiperVoice, scriptPathFor, stderrTail, synthesizeScript, voskAligner, wavDuration };
|
|
210
|
+
export { AlignRequest, Aligner, KokoroDtype, SynthesizeOptions, SynthesizeResult, TtsProvider, TtsRequest, TtsResult, VoskAlignWord, alignerById, cacheKey, espeakProvider, fakeProvider, floatToWav, heuristicAligner, heuristicWords, interpolateMissing, kokoroProvider, mapAsrToScript, openaiProvider, piperProvider, providerById, resolvePiperVoice, scriptPathFor, stderrTail, synthesizeScript, voskAligner, wavDuration };
|
package/dist/providers.js
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import { NarrationError, isPause } from "./index.js";
|
|
2
|
+
import { createRequire } from "node:module";
|
|
2
3
|
import { createHash } from "node:crypto";
|
|
3
4
|
import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
|
|
4
5
|
import { basename, dirname, isAbsolute, join, resolve } from "node:path";
|
|
5
6
|
import { homedir, tmpdir } from "node:os";
|
|
6
7
|
import { spawnSync } from "node:child_process";
|
|
8
|
+
import { pathToFileURL } from "node:url";
|
|
7
9
|
//#region src/providers.ts
|
|
8
10
|
/**
|
|
9
11
|
* '@glissade/narrate/providers' — the Node-only prepare side. Provider calls
|
|
@@ -225,13 +227,139 @@ function piperProvider(opts = {}) {
|
|
|
225
227
|
}
|
|
226
228
|
};
|
|
227
229
|
}
|
|
230
|
+
/** PCM16 mono WAV from float samples in [-1, 1]. Round-to-nearest → deterministic. */
|
|
231
|
+
function floatToWav(samples, sampleRate) {
|
|
232
|
+
const data = Buffer.alloc(samples.length * 2);
|
|
233
|
+
for (let i = 0; i < samples.length; i++) {
|
|
234
|
+
const s = Math.max(-1, Math.min(1, samples[i]));
|
|
235
|
+
data.writeInt16LE(Math.round(s * 32767), i * 2);
|
|
236
|
+
}
|
|
237
|
+
const header = Buffer.alloc(44);
|
|
238
|
+
header.write("RIFF", 0, "ascii");
|
|
239
|
+
header.writeUInt32LE(36 + data.length, 4);
|
|
240
|
+
header.write("WAVE", 8, "ascii");
|
|
241
|
+
header.write("fmt ", 12, "ascii");
|
|
242
|
+
header.writeUInt32LE(16, 16);
|
|
243
|
+
header.writeUInt16LE(1, 20);
|
|
244
|
+
header.writeUInt16LE(1, 22);
|
|
245
|
+
header.writeUInt32LE(sampleRate, 24);
|
|
246
|
+
header.writeUInt32LE(sampleRate * 2, 28);
|
|
247
|
+
header.writeUInt16LE(2, 32);
|
|
248
|
+
header.writeUInt16LE(16, 34);
|
|
249
|
+
header.write("data", 36, "ascii");
|
|
250
|
+
header.writeUInt32LE(data.length, 40);
|
|
251
|
+
return Buffer.concat([header, data]);
|
|
252
|
+
}
|
|
253
|
+
const KOKORO_MODEL = "onnx-community/Kokoro-82M-v1.0-ONNX";
|
|
254
|
+
const KOKORO_DEFAULT_VOICE = "af_heart";
|
|
255
|
+
/**
|
|
256
|
+
* Apache-2.0 82M neural TTS — markedly more natural than espeak/piper, fully
|
|
257
|
+
* offline on CPU via onnxruntime, no API key. Pure-Node through `kokoro-js`
|
|
258
|
+
* (Transformers.js), so unlike piper there is no `pip install` / external
|
|
259
|
+
* binary; `kokoro-js` is an OPTIONAL peer dep, lazy-loaded here.
|
|
260
|
+
*
|
|
261
|
+
* DETERMINISTIC by construction: inference takes tokenized phonemes + a FIXED
|
|
262
|
+
* voice/style embedding (not diffusion-sampled per call), so the same text →
|
|
263
|
+
* byte-identical PCM — no noise to zero out (piper's trick). `version()` pins
|
|
264
|
+
* the lib version + model + dtype, so any of those moving invalidates the
|
|
265
|
+
* cache. The model (~q8 92MB / fp32 326MB) downloads + caches on first use; it
|
|
266
|
+
* stays out of the bundle and the determinism-critical path.
|
|
267
|
+
*/
|
|
268
|
+
/** kokoro-js version read by walking up from its entry (it does not export
|
|
269
|
+
* `./package.json`, so the subpath can't be resolved directly). */
|
|
270
|
+
function kokoroVersionFrom(entry) {
|
|
271
|
+
let dir = dirname(entry);
|
|
272
|
+
for (let i = 0; i < 8; i++) {
|
|
273
|
+
const p = join(dir, "package.json");
|
|
274
|
+
if (existsSync(p)) try {
|
|
275
|
+
const j = JSON.parse(readFileSync(p, "utf8"));
|
|
276
|
+
if (j.name === "kokoro-js" && j.version) return j.version;
|
|
277
|
+
} catch {}
|
|
278
|
+
const up = dirname(dir);
|
|
279
|
+
if (up === dir) break;
|
|
280
|
+
dir = up;
|
|
281
|
+
}
|
|
282
|
+
return "unknown";
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Resolve the OPTIONAL peer `kokoro-js` from the USER'S project first. Under
|
|
286
|
+
* pnpm's isolated layout a peer is NOT linked into `@glissade/narrate`'s own
|
|
287
|
+
* store dir, so a bare `import('kokoro-js')` from this module fails; resolving
|
|
288
|
+
* relative to `process.cwd()` (where the user ran `add kokoro-js`) finds it.
|
|
289
|
+
* Falls back to this module for hoisted/global installs. Returns a `file://`
|
|
290
|
+
* entry URL (so the dynamic import is never bundled) + the resolved version.
|
|
291
|
+
* Throws a NarrationError that carries the REAL resolution error.
|
|
292
|
+
*/
|
|
293
|
+
function resolveKokoro() {
|
|
294
|
+
const bases = [pathToFileURL(join(process.cwd(), "package.json")).href, import.meta.url];
|
|
295
|
+
let lastErr;
|
|
296
|
+
for (const base of bases) try {
|
|
297
|
+
const entry = createRequire(base).resolve("kokoro-js");
|
|
298
|
+
return {
|
|
299
|
+
entryUrl: pathToFileURL(entry).href,
|
|
300
|
+
version: kokoroVersionFrom(entry)
|
|
301
|
+
};
|
|
302
|
+
} catch (e) {
|
|
303
|
+
lastErr = e;
|
|
304
|
+
}
|
|
305
|
+
throw new NarrationError(`kokoro-js could not be resolved from ${process.cwd()} (${lastErr?.code ?? "error"}: ${lastErr?.message ?? "not found"}) — install it in your project (npm / pnpm / yarn add kokoro-js; pnpm users must also allow its native build scripts — see the narration docs), or use --provider piper/espeak/openai`);
|
|
306
|
+
}
|
|
307
|
+
function kokoroProvider(opts = {}) {
|
|
308
|
+
const modelId = opts.model ?? KOKORO_MODEL;
|
|
309
|
+
const dtype = opts.dtype ?? "q8";
|
|
310
|
+
let loaded = null;
|
|
311
|
+
const loadLib = async () => {
|
|
312
|
+
const { entryUrl } = resolveKokoro();
|
|
313
|
+
let mod;
|
|
314
|
+
try {
|
|
315
|
+
mod = await import(entryUrl);
|
|
316
|
+
} catch (e) {
|
|
317
|
+
const err = e;
|
|
318
|
+
throw new NarrationError(`kokoro-js failed to load from ${entryUrl} (${err?.code ?? "error"}: ${err?.message ?? String(e)}) — ensure kokoro-js and onnxruntime-node are installed, or use --provider piper/espeak/openai`);
|
|
319
|
+
}
|
|
320
|
+
const lib = mod["KokoroTTS"] ? mod : mod["default"];
|
|
321
|
+
if (!lib?.KokoroTTS) throw new NarrationError(`kokoro-js loaded but exposes no KokoroTTS export (from ${entryUrl})`);
|
|
322
|
+
return lib;
|
|
323
|
+
};
|
|
324
|
+
const getModel = () => loaded ??= loadLib().then((k) => k.KokoroTTS.from_pretrained(modelId, {
|
|
325
|
+
dtype,
|
|
326
|
+
device: "cpu"
|
|
327
|
+
}));
|
|
328
|
+
return {
|
|
329
|
+
id: "kokoro",
|
|
330
|
+
version: () => {
|
|
331
|
+
const { version } = resolveKokoro();
|
|
332
|
+
return Promise.resolve(`kokoro-js ${version} ${basename(modelId)} dtype=${dtype}`);
|
|
333
|
+
},
|
|
334
|
+
synthesize: async (req) => {
|
|
335
|
+
const tts = await getModel();
|
|
336
|
+
const voice = req.voice ?? opts.voice ?? KOKORO_DEFAULT_VOICE;
|
|
337
|
+
const genOpts = req.rate !== void 0 && req.rate > 0 ? {
|
|
338
|
+
voice,
|
|
339
|
+
speed: req.rate
|
|
340
|
+
} : { voice };
|
|
341
|
+
let audio;
|
|
342
|
+
try {
|
|
343
|
+
audio = await tts.generate(req.text, genOpts);
|
|
344
|
+
} catch (e) {
|
|
345
|
+
throw new NarrationError(`kokoro synthesis failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
346
|
+
}
|
|
347
|
+
const wav = floatToWav(audio.audio, audio.sampling_rate);
|
|
348
|
+
return {
|
|
349
|
+
wav,
|
|
350
|
+
duration: wavDuration(wav)
|
|
351
|
+
};
|
|
352
|
+
}
|
|
353
|
+
};
|
|
354
|
+
}
|
|
228
355
|
function providerById(id) {
|
|
229
356
|
switch (id) {
|
|
230
357
|
case "fake": return fakeProvider();
|
|
231
358
|
case "espeak": return espeakProvider();
|
|
232
359
|
case "piper": return piperProvider();
|
|
360
|
+
case "kokoro": return kokoroProvider();
|
|
233
361
|
case "openai": return openaiProvider();
|
|
234
|
-
default: throw new NarrationError(`unknown TTS provider '${id}' (have: fake, espeak, piper, openai)`);
|
|
362
|
+
default: throw new NarrationError(`unknown TTS provider '${id}' (have: fake, espeak, piper, kokoro, openai)`);
|
|
235
363
|
}
|
|
236
364
|
}
|
|
237
365
|
/** ≈ syllable count: vowel groups, floored at 1 — a cheap spoken-length proxy. */
|
|
@@ -558,4 +686,4 @@ function scriptPathFor(input) {
|
|
|
558
686
|
return candidate;
|
|
559
687
|
}
|
|
560
688
|
//#endregion
|
|
561
|
-
export { alignerById, cacheKey, espeakProvider, fakeProvider, heuristicAligner, heuristicWords, interpolateMissing, mapAsrToScript, openaiProvider, piperProvider, providerById, resolvePiperVoice, scriptPathFor, stderrTail, synthesizeScript, voskAligner, wavDuration };
|
|
689
|
+
export { alignerById, cacheKey, espeakProvider, fakeProvider, floatToWav, heuristicAligner, heuristicWords, interpolateMissing, kokoroProvider, mapAsrToScript, openaiProvider, piperProvider, providerById, resolvePiperVoice, scriptPathFor, stderrTail, synthesizeScript, voskAligner, wavDuration };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@glissade/narrate",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.1-pre.1",
|
|
4
4
|
"description": "glissade narration + captions: TTS at prepare time (gs narrate), deterministic caching, narration-anchored timeline beats, and captions as plain tracks. Render stays offline.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"type": "module",
|
|
@@ -19,14 +19,25 @@
|
|
|
19
19
|
"dist"
|
|
20
20
|
],
|
|
21
21
|
"dependencies": {
|
|
22
|
-
"@glissade/core": "0.8.
|
|
23
|
-
"@glissade/scene": "0.8.
|
|
22
|
+
"@glissade/core": "0.8.1-pre.1",
|
|
23
|
+
"@glissade/scene": "0.8.1-pre.1"
|
|
24
|
+
},
|
|
25
|
+
"peerDependencies": {
|
|
26
|
+
"kokoro-js": "^1.2.0"
|
|
27
|
+
},
|
|
28
|
+
"peerDependenciesMeta": {
|
|
29
|
+
"kokoro-js": {
|
|
30
|
+
"optional": true
|
|
31
|
+
}
|
|
24
32
|
},
|
|
25
33
|
"repository": {
|
|
26
34
|
"type": "git",
|
|
27
35
|
"url": "git+https://github.com/tyevco/glissade.git",
|
|
28
36
|
"directory": "packages/narrate"
|
|
29
37
|
},
|
|
38
|
+
"devDependencies": {
|
|
39
|
+
"kokoro-js": "^1.2.1"
|
|
40
|
+
},
|
|
30
41
|
"scripts": {
|
|
31
42
|
"build": "tsdown",
|
|
32
43
|
"typecheck": "tsc --noEmit"
|