@polytts/browser-adapters 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { a as isIOSDevice, i as detectPreferredOrtProvider, o as isWebKitAppleRuntime, r as createOrtWasmSessionWithFallback } from "./ort-runtime-CjOKbTPg.js";
|
|
2
|
-
import {
|
|
2
|
+
import { n as phonemesToIds, t as ensureReadableStreamAsyncIterator } from "./readable-stream-polyfill-Cq7fDEUz.js";
|
|
3
3
|
import { r as SUPERTONIC_MODEL_SESSION_KEYS, t as SUPERTONIC_STYLE_IDS } from "./supertonic.shared-Dk6nkm3-.js";
|
|
4
4
|
import { getModelAssets, normalizeSpeakSpeed, pcmToAudioData } from "@polytts/core";
|
|
5
5
|
//#region src/adapters/browser-speech.ts
|
|
@@ -325,7 +325,7 @@ const kittenAdapter = {
|
|
|
325
325
|
stream: true,
|
|
326
326
|
dynamicVoices: false
|
|
327
327
|
},
|
|
328
|
-
isSupported: () => typeof WebAssembly !== "undefined" && typeof Worker !== "undefined",
|
|
328
|
+
isSupported: () => !isIOSDevice() && typeof WebAssembly !== "undefined" && typeof Worker !== "undefined",
|
|
329
329
|
createModel(spec, _context) {
|
|
330
330
|
return new KittenModel(spec);
|
|
331
331
|
}
|
|
@@ -531,7 +531,7 @@ const kokoroAdapter = {
|
|
|
531
531
|
stream: true,
|
|
532
532
|
dynamicVoices: true
|
|
533
533
|
},
|
|
534
|
-
isSupported: (spec) => isKokoroDeviceSupported(spec.config?.device),
|
|
534
|
+
isSupported: (spec) => !isIOSDevice() && isKokoroDeviceSupported(spec.config?.device),
|
|
535
535
|
createModel(spec, _context) {
|
|
536
536
|
return new KokoroModel(spec);
|
|
537
537
|
}
|
|
@@ -604,6 +604,10 @@ var PiperModel = class {
|
|
|
604
604
|
return isIOSDevice();
|
|
605
605
|
}
|
|
606
606
|
async load(signal, onProgress) {
|
|
607
|
+
if (this.useMainThread && this.mainThread) {
|
|
608
|
+
onProgress?.(1);
|
|
609
|
+
return;
|
|
610
|
+
}
|
|
607
611
|
const voiceConfig = await this.loadConfig(signal);
|
|
608
612
|
const modelData = await this.loadModelData(signal, onProgress);
|
|
609
613
|
if (this.useMainThread) {
|
|
@@ -682,6 +686,7 @@ var PiperModel = class {
|
|
|
682
686
|
intraOpNumThreads: 1
|
|
683
687
|
});
|
|
684
688
|
if (signal.aborted) throw new DOMException("Aborted", "AbortError");
|
|
689
|
+
ensureReadableStreamAsyncIterator();
|
|
685
690
|
const { phonemize } = await import("phonemizer");
|
|
686
691
|
this.mainThread = {
|
|
687
692
|
session,
|
|
@@ -907,7 +912,7 @@ const supertonicAdapter = {
|
|
|
907
912
|
stream: false,
|
|
908
913
|
dynamicVoices: false
|
|
909
914
|
},
|
|
910
|
-
isSupported: () => typeof WebAssembly !== "undefined" && typeof Worker !== "undefined",
|
|
915
|
+
isSupported: () => !isIOSDevice() && typeof WebAssembly !== "undefined" && typeof Worker !== "undefined",
|
|
911
916
|
createModel(spec, context) {
|
|
912
917
|
return new SupertonicModel(spec, context);
|
|
913
918
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { };
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
//#region src/adapters/kokoro.worker.ts
|
|
2
|
+
const KOKORO_ERROR_LOG_LEVEL = 40;
|
|
3
|
+
const abortedRequestIds = /* @__PURE__ */ new Set();
|
|
4
|
+
let tts = null;
|
|
5
|
+
let currentModuleUrl;
|
|
6
|
+
const DEFAULT_MODULE_KEY = "kokoro-js";
|
|
7
|
+
const kokoroModuleCache = /* @__PURE__ */ new Map();
|
|
8
|
+
async function loadKokoroBrowserModule(moduleUrl) {
|
|
9
|
+
const key = moduleUrl ?? DEFAULT_MODULE_KEY;
|
|
10
|
+
const cached = kokoroModuleCache.get(key);
|
|
11
|
+
if (cached) return cached;
|
|
12
|
+
const loadPromise = moduleUrl ? import(
|
|
13
|
+
/* @vite-ignore */
|
|
14
|
+
moduleUrl
|
|
15
|
+
) : import("kokoro-js");
|
|
16
|
+
kokoroModuleCache.set(key, loadPromise);
|
|
17
|
+
try {
|
|
18
|
+
return await loadPromise;
|
|
19
|
+
} catch (error) {
|
|
20
|
+
kokoroModuleCache.delete(key);
|
|
21
|
+
throw error;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
function ensureNotAborted(requestId) {
|
|
25
|
+
if (abortedRequestIds.has(requestId)) throw new DOMException("Aborted", "AbortError");
|
|
26
|
+
}
|
|
27
|
+
function configureKokoroEnvironment(env) {
|
|
28
|
+
if (!env) return;
|
|
29
|
+
if (!("caches" in globalThis) || globalThis.caches == null) {
|
|
30
|
+
env.useBrowserCache = false;
|
|
31
|
+
env.useWasmCache = false;
|
|
32
|
+
}
|
|
33
|
+
env.logLevel = Math.max(env.logLevel ?? KOKORO_ERROR_LOG_LEVEL, KOKORO_ERROR_LOG_LEVEL);
|
|
34
|
+
}
|
|
35
|
+
async function loadTTS(KokoroTTS, modelRef, preferredDevice, requestId) {
|
|
36
|
+
const progress_callback = (progress) => {
|
|
37
|
+
if (progress.status === "progress" && progress.loaded != null && progress.total) self.postMessage({
|
|
38
|
+
type: "progress",
|
|
39
|
+
requestId,
|
|
40
|
+
progress: progress.loaded / progress.total
|
|
41
|
+
});
|
|
42
|
+
};
|
|
43
|
+
const tryLoad = (device) => KokoroTTS.from_pretrained(modelRef, {
|
|
44
|
+
dtype: device === "webgpu" ? "fp32" : "q8",
|
|
45
|
+
device,
|
|
46
|
+
progress_callback
|
|
47
|
+
});
|
|
48
|
+
if (preferredDevice === "wasm") return {
|
|
49
|
+
tts: await tryLoad("wasm"),
|
|
50
|
+
device: "wasm"
|
|
51
|
+
};
|
|
52
|
+
try {
|
|
53
|
+
return {
|
|
54
|
+
tts: await tryLoad("webgpu"),
|
|
55
|
+
device: "webgpu"
|
|
56
|
+
};
|
|
57
|
+
} catch {
|
|
58
|
+
return {
|
|
59
|
+
tts: await tryLoad("wasm"),
|
|
60
|
+
device: "wasm"
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
self.onmessage = async (event) => {
|
|
65
|
+
const message = event.data;
|
|
66
|
+
if (message.type === "abort") {
|
|
67
|
+
abortedRequestIds.add(message.requestId);
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
const { requestId } = message;
|
|
71
|
+
try {
|
|
72
|
+
switch (message.type) {
|
|
73
|
+
case "load": {
|
|
74
|
+
currentModuleUrl = message.moduleUrl;
|
|
75
|
+
const kokoroModule = await loadKokoroBrowserModule(currentModuleUrl);
|
|
76
|
+
configureKokoroEnvironment(kokoroModule.env);
|
|
77
|
+
const loaded = await loadTTS(kokoroModule.KokoroTTS, message.modelRef, message.device, requestId);
|
|
78
|
+
tts = loaded.tts;
|
|
79
|
+
ensureNotAborted(requestId);
|
|
80
|
+
const voices = Object.entries(tts.voices ?? {}).map(([id, info]) => ({
|
|
81
|
+
id,
|
|
82
|
+
name: info?.name ?? id,
|
|
83
|
+
language: info?.language ?? (id.startsWith("b") ? "en-GB" : "en-US"),
|
|
84
|
+
gender: info?.gender
|
|
85
|
+
}));
|
|
86
|
+
self.postMessage({
|
|
87
|
+
type: "done",
|
|
88
|
+
requestId,
|
|
89
|
+
voices,
|
|
90
|
+
device: loaded.device
|
|
91
|
+
});
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
case "generate": {
|
|
95
|
+
if (!tts) throw new Error("Kokoro model not loaded");
|
|
96
|
+
ensureNotAborted(requestId);
|
|
97
|
+
const result = await tts.generate(message.text, { voice: message.voiceId });
|
|
98
|
+
ensureNotAborted(requestId);
|
|
99
|
+
const audio = result.audio;
|
|
100
|
+
const buffer = audio.buffer.slice(audio.byteOffset, audio.byteOffset + audio.byteLength);
|
|
101
|
+
self.postMessage({
|
|
102
|
+
type: "done",
|
|
103
|
+
requestId,
|
|
104
|
+
audio: buffer,
|
|
105
|
+
sampleRate: result.sampling_rate ?? 24e3
|
|
106
|
+
}, [buffer]);
|
|
107
|
+
break;
|
|
108
|
+
}
|
|
109
|
+
case "stream": {
|
|
110
|
+
if (!tts) throw new Error("Kokoro model not loaded");
|
|
111
|
+
const { TextSplitterStream } = await loadKokoroBrowserModule(currentModuleUrl);
|
|
112
|
+
const splitter = new TextSplitterStream();
|
|
113
|
+
const stream = tts.stream(splitter, { voice: message.voiceId });
|
|
114
|
+
splitter.push(message.text);
|
|
115
|
+
splitter.close();
|
|
116
|
+
for await (const { audio } of stream) {
|
|
117
|
+
ensureNotAborted(requestId);
|
|
118
|
+
const samples = audio.audio;
|
|
119
|
+
const buffer = samples.buffer.slice(samples.byteOffset, samples.byteOffset + samples.byteLength);
|
|
120
|
+
self.postMessage({
|
|
121
|
+
type: "chunk",
|
|
122
|
+
requestId,
|
|
123
|
+
audio: buffer,
|
|
124
|
+
sampleRate: audio.sampling_rate ?? 24e3
|
|
125
|
+
}, [buffer]);
|
|
126
|
+
}
|
|
127
|
+
ensureNotAborted(requestId);
|
|
128
|
+
self.postMessage({
|
|
129
|
+
type: "done",
|
|
130
|
+
requestId
|
|
131
|
+
});
|
|
132
|
+
break;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
} catch (error) {
|
|
136
|
+
if (error.name !== "AbortError") self.postMessage({
|
|
137
|
+
type: "error",
|
|
138
|
+
requestId,
|
|
139
|
+
message: error instanceof Error ? error.message : String(error)
|
|
140
|
+
});
|
|
141
|
+
} finally {
|
|
142
|
+
abortedRequestIds.delete(requestId);
|
|
143
|
+
}
|
|
144
|
+
};
|
|
145
|
+
//#endregion
|
package/dist/piper.worker.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { r as createOrtWasmSessionWithFallback } from "./ort-runtime-CjOKbTPg.js";
|
|
2
|
-
import {
|
|
2
|
+
import { n as phonemesToIds, t as ensureReadableStreamAsyncIterator } from "./readable-stream-polyfill-Cq7fDEUz.js";
|
|
3
3
|
//#region src/adapters/piper.worker.ts
|
|
4
|
+
ensureReadableStreamAsyncIterator();
|
|
4
5
|
let session = null;
|
|
5
6
|
let voiceConfig = null;
|
|
6
7
|
const abortedRequestIds = /* @__PURE__ */ new Set();
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
//#region src/adapters/piper-phonemes.ts
|
|
2
|
+
const BOS = "^";
|
|
3
|
+
const PAD = "_";
|
|
4
|
+
const EOS = "$";
|
|
5
|
+
function phonemesToIds(phonemes, config) {
|
|
6
|
+
const map = config.phoneme_id_map;
|
|
7
|
+
const ids = [];
|
|
8
|
+
if (map[BOS]) ids.push(...map[BOS]);
|
|
9
|
+
for (const phoneme of phonemes) {
|
|
10
|
+
const phonemeIds = map[phoneme];
|
|
11
|
+
if (phonemeIds === void 0) continue;
|
|
12
|
+
ids.push(...phonemeIds);
|
|
13
|
+
if (map[PAD]) ids.push(...map[PAD]);
|
|
14
|
+
}
|
|
15
|
+
if (map[EOS]) ids.push(...map[EOS]);
|
|
16
|
+
return ids;
|
|
17
|
+
}
|
|
18
|
+
//#endregion
|
|
19
|
+
//#region src/adapters/readable-stream-polyfill.ts
|
|
20
|
+
/**
|
|
21
|
+
* Polyfill `ReadableStream[Symbol.asyncIterator]` for Safari < 17.6.
|
|
22
|
+
*
|
|
23
|
+
* The `phonemizer` package uses `for await...of` on a ReadableStream to decompress its embedded
|
|
24
|
+
* data, which hangs on older Safari/iOS Safari. See:
|
|
25
|
+
* https://github.com/xenova/phonemizer.js/issues/2
|
|
26
|
+
*/
|
|
27
|
+
function ensureReadableStreamAsyncIterator() {
|
|
28
|
+
if (typeof ReadableStream !== "undefined" && !ReadableStream.prototype[Symbol.asyncIterator]) Object.defineProperty(ReadableStream.prototype, Symbol.asyncIterator, {
|
|
29
|
+
async *value() {
|
|
30
|
+
const reader = this.getReader();
|
|
31
|
+
try {
|
|
32
|
+
while (true) {
|
|
33
|
+
const { done, value } = await reader.read();
|
|
34
|
+
if (done) return;
|
|
35
|
+
yield value;
|
|
36
|
+
}
|
|
37
|
+
} finally {
|
|
38
|
+
reader.releaseLock();
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
enumerable: false,
|
|
42
|
+
writable: true,
|
|
43
|
+
configurable: true
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
//#endregion
|
|
47
|
+
export { phonemesToIds as n, ensureReadableStreamAsyncIterator as t };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@polytts/browser-adapters",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "Official browser adapter implementations for polytts.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"adapters",
|
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
"kokoro-js": "latest",
|
|
40
40
|
"onnxruntime-web": "latest",
|
|
41
41
|
"phonemizer": "latest",
|
|
42
|
-
"@polytts/core": "0.1.
|
|
42
|
+
"@polytts/core": "0.1.2"
|
|
43
43
|
},
|
|
44
44
|
"devDependencies": {
|
|
45
45
|
"@types/node": "^25.6.0",
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
//#region src/adapters/piper-phonemes.ts
|
|
2
|
-
const BOS = "^";
|
|
3
|
-
const PAD = "_";
|
|
4
|
-
const EOS = "$";
|
|
5
|
-
function phonemesToIds(phonemes, config) {
|
|
6
|
-
const map = config.phoneme_id_map;
|
|
7
|
-
const ids = [];
|
|
8
|
-
if (map[BOS]) ids.push(...map[BOS]);
|
|
9
|
-
for (const phoneme of phonemes) {
|
|
10
|
-
const phonemeIds = map[phoneme];
|
|
11
|
-
if (phonemeIds === void 0) continue;
|
|
12
|
-
ids.push(...phonemeIds);
|
|
13
|
-
if (map[PAD]) ids.push(...map[PAD]);
|
|
14
|
-
}
|
|
15
|
-
if (map[EOS]) ids.push(...map[EOS]);
|
|
16
|
-
return ids;
|
|
17
|
-
}
|
|
18
|
-
//#endregion
|
|
19
|
-
export { phonemesToIds as t };
|