@polytts/browser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 DengQing dengqing0821@gmail.com
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,94 @@
1
+ # @polytts/browser
2
+
3
+ [![npm version](https://img.shields.io/npm/v/@polytts/browser)](https://www.npmjs.com/package/@polytts/browser)
4
+
5
+ Explicit browser entrypoint for [`polytts`](https://github.com/Dunqing/polytts).
6
+
7
+ Use this package when you want the structured scoped package layout for browser apps, PWAs, or Electron renderers.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ npm install @polytts/browser
13
+ ```
14
+
15
+ ## Usage
16
+
17
+ ```ts
18
+ import { createBrowserTTS } from "@polytts/browser";
19
+
20
+ const tts = createBrowserTTS({
21
+ initialModelId: "browser-speech",
22
+ });
23
+
24
+ await tts.ready();
25
+ await tts.speak("Hello from the browser runtime.");
26
+ ```
27
+
28
+ For progressive audio:
29
+
30
+ ```ts
31
+ for await (const chunk of tts.synthesizeStream("Hello from a streaming browser model.")) {
32
+ console.log(chunk.sampleRate, chunk.channels[0]?.length ?? 0);
33
+ }
34
+ ```
35
+
36
+ The high-level controller also exposes `getInstallState(modelId)` and `isInstalled(modelId)` for quick download checks without reading raw runtime state.
37
+
38
+ If you do not want IndexedDB, pass a custom `assetStore`:
39
+
40
+ ```ts
41
+ import { MemoryAssetStore } from "@polytts/core";
42
+ import { createBrowserTTS } from "@polytts/browser";
43
+
44
+ const tts = createBrowserTTS({
45
+ assetStore: new MemoryAssetStore(),
46
+ });
47
+ ```
48
+
49
+ `@polytts/browser` also exports `LocalStorageAssetStore`, but it is only suitable for tiny assets, demos, and tests. `localStorage` is usually capped around 5 MB and base64 storage adds roughly 33% overhead, so do not use it for real ONNX bundles.
50
+
51
+ ## Lifecycle
52
+
53
+ The browser controller keeps three separate concepts:
54
+
55
+ - selected model and voice
56
+ - downloaded assets
57
+ - loaded runtime instance
58
+
59
+ Important behavior:
60
+
61
+ - `initialModelId` and `initialVoiceId` only set the starting selection
62
+ - `download(modelId)` caches assets, but does not prepare the runtime instance
63
+ - `ready()`, `selectModel()`, `selectFamily()`, and `selectVoice()` prepare the selected model
64
+ - `speak()`, `synthesize()`, and `synthesizeStream()` prepare on demand if needed
65
+
66
+ If your UI needs explicit install vs load states, use `getInstallState()`, `isInstalled()`, `status`, `phase`, and `phaseProgress` together instead of assuming they mean the same thing.
67
+
68
+ ## Voices
69
+
70
+ Catalog voice metadata and runtime-resolved voices are not always identical.
71
+
72
+ - Models such as Piper usually expose stable voices from catalog metadata.
73
+ - Models such as Kokoro may populate their final voice list only after the model is prepared.
74
+
75
+ If your UI renders a voice picker, expect some models to show no resolved voices until after `ready()` or `selectModel()` completes.
76
+
77
+ ## Platform notes
78
+
79
+ - `browser-speech` has no download step and depends entirely on the host browser engine.
80
+ - ONNX-backed models may download large bundles and should usually expose progress UI.
81
+ - Piper uses a safer main-thread ONNX path on iOS. That path is broadly compatible, but stop/cancel can only be observed before or after the current inference call returns.
82
+ - Worker-backed models stop more aggressively because the worker can be recycled on abort.
83
+
84
+ ## SSR
85
+
86
+ `@polytts/browser` is a browser entrypoint. The React providers can render on the server, but `createBrowserTTS()` and `createBrowserTTSRuntime()` should still be created in browser/client code.
87
+
88
+ ## Exports
89
+
90
+ - `createBrowserTTS()`
91
+ - `createBrowserTTSRuntime()`
92
+ - browser audio and storage helpers
93
+ - official browser adapters
94
+ - official catalogs from `@polytts/presets`
@@ -0,0 +1,265 @@
1
+ import { AssetBundleKey, AssetStore, AssetStore as AssetStore$1, AudioData, AudioData as AudioData$1, CatalogSource, InstallState, InstallState as InstallState$1, InstallStatus, ModelId, ModelId as ModelId$1, ModelRuntimeInfo, ModelSpec, RuntimeAudioPlayer, RuntimePhase, TTSAdapter, TTSRuntime, TTSRuntimeOptions, Voice, Voice as Voice$1, VoiceId, VoiceId as VoiceId$1, resolveModelDistribution } from "@polytts/core";
2
+ export * from "@polytts/browser-adapters";
3
+ export * from "@polytts/presets";
4
+
5
+ //#region src/audio.d.ts
6
+ /** The audio output strategy used for playback. */
7
+ type BrowserAudioPlaybackMode = "buffer" | "stream" | "audio-element";
8
+ /** Event emitted when audio reaches a playback milestone (ready or queued). */
9
+ interface BrowserAudioPlaybackEvent {
10
+ /** The playback strategy used (buffer, stream, or audio-element). */
11
+ mode: BrowserAudioPlaybackMode;
12
+ /** Timestamp (from `performance.now()`) when the event occurred. */
13
+ at: number;
14
+ }
15
+ /** Callbacks for observing audio playback lifecycle events. */
16
+ interface BrowserAudioPlayerOptions {
17
+ /** Called when audio data has been decoded and is ready to play. */
18
+ onAudioReady?: (event: BrowserAudioPlaybackEvent) => void;
19
+ /** Called when audio has been scheduled for playback on the output device. */
20
+ onPlaybackQueued?: (event: BrowserAudioPlaybackEvent) => void;
21
+ }
22
+ /** Convert a polytts AudioData object to a Web Audio API AudioBuffer. */
23
+ declare function audioDataToAudioBuffer(audio: AudioData$1): AudioBuffer;
24
+ /**
25
+ * Browser audio player that uses Web Audio API (or an HTMLAudioElement fallback on iOS) to play
26
+ * synthesized speech.
27
+ */
28
+ declare class BrowserAudioPlayer implements RuntimeAudioPlayer {
29
+ private readonly options;
30
+ private ctx;
31
+ private source;
32
+ private activeSources;
33
+ private pendingPlayback;
34
+ private _isPlaying;
35
+ constructor(options?: BrowserAudioPlayerOptions);
36
+ get isPlaying(): boolean;
37
+ warmup(): void;
38
+ play(audio: AudioData$1, signal: AbortSignal, speed?: number): Promise<void>;
39
+ playStream(audioStream: AsyncIterable<AudioData$1>, signal: AbortSignal, speed?: number): Promise<void>;
40
+ stop(): void;
41
+ dispose(): void;
42
+ private getContext;
43
+ private playViaAudioElement;
44
+ private isIOS;
45
+ }
46
+ //#endregion
47
+ //#region src/browser-tts.d.ts
48
+ /** Options for creating a browser TTS runtime, with sensible defaults for adapters and catalogs. */
49
+ interface BrowserTTSRuntimeOptions extends Omit<TTSRuntimeOptions, "adapters" | "catalogs" | "initialModelId" | "initialVoiceId"> {
50
+ /** Model to activate on startup. */
51
+ initialModelId?: ModelId$1;
52
+ /** Voice to activate on startup. */
53
+ initialVoiceId?: VoiceId$1;
54
+ /** Additional TTS adapters merged with the official browser adapters. */
55
+ extraAdapters?: TTSAdapter[];
56
+ /** Additional catalog sources merged with the official catalog. */
57
+ extraCatalogs?: CatalogSource[];
58
+ }
59
+ /** Describes a single TTS model with its capabilities, download state, and browser support status. */
60
+ interface BrowserTTSModel {
61
+ id: ModelId$1;
62
+ name: string;
63
+ family: string;
64
+ languages: string[];
65
+ /** Whether the model supports single or multiple voices. */
66
+ voiceMode: ModelSpec["voiceMode"];
67
+ /** How model assets are distributed (e.g. managed download vs inline). */
68
+ distributionKind: ReturnType<typeof resolveModelDistribution>["kind"];
69
+ /** Number of available voices, or null if unknown. */
70
+ voiceCount: number | null;
71
+ /** Total size of model assets in bytes. */
72
+ bundleSizeBytes: number;
73
+ /** Whether the current browser environment supports this model. */
74
+ supported: boolean;
75
+ /** Whether the model requires a separate download before use. */
76
+ requiresDownload: boolean;
77
+ /** Whether model assets have been fully downloaded. */
78
+ downloaded: boolean;
79
+ /** Current download/install status. */
80
+ downloadStatus: InstallStatus;
81
+ /** Download progress from 0 to 1, or null if not downloading. */
82
+ downloadProgress: number | null;
83
+ description?: string;
84
+ /** URL of the model's asset manifest, if applicable. */
85
+ manifestUrl?: string;
86
+ /** URL of the model's project homepage. */
87
+ homepage?: string;
88
+ }
89
+ /** A specific model/voice combination within a model family. */
90
+ interface BrowserTTSFamilyVariant {
91
+ modelId: ModelId$1;
92
+ modelName: string;
93
+ /** Representative voice for this variant, if the model supports voices. */
94
+ voiceId?: VoiceId$1;
95
+ /** Display name of the representative voice. */
96
+ voiceName?: string;
97
+ downloaded: boolean;
98
+ downloadStatus: InstallStatus;
99
+ /** Download progress from 0 to 1, or null if not downloading. */
100
+ downloadProgress: number | null;
101
+ }
102
+ /** A group of related TTS models (e.g. different sizes or languages) under a shared family name. */
103
+ interface BrowserTTSFamily {
104
+ id: string;
105
+ name: string;
106
+ languages: string[];
107
+ supported: boolean;
108
+ /** Currently active model within this family, or null if none is selected. */
109
+ selectedModelId: ModelId$1 | null;
110
+ /** All model IDs belonging to this family. */
111
+ modelIds: ModelId$1[];
112
+ /** Individual model/voice combinations within the family. */
113
+ variants: BrowserTTSFamilyVariant[];
114
+ }
115
+ /**
116
+ * Snapshot of the entire BrowserTTS state including models, families, selection, and playback
117
+ * status.
118
+ */
119
+ interface BrowserTTSState {
120
+ /** All registered models with their current state. */
121
+ models: BrowserTTSModel[];
122
+ /** Models grouped by family. */
123
+ families: BrowserTTSFamily[];
124
+ /** Family of the currently selected model, or null. */
125
+ selectedFamilyId: string | null;
126
+ /** Voices available for the currently active model. */
127
+ voices: Voice$1[];
128
+ /** Currently active model, or null if none selected. */
129
+ selectedModelId: ModelId$1 | null;
130
+ /** Currently active voice, or null if none selected. */
131
+ selectedVoiceId: VoiceId$1 | null;
132
+ /** High-level playback status derived from the runtime phase. */
133
+ status: "idle" | "preparing" | "speaking" | "error";
134
+ /** Granular runtime phase (e.g. installing, loading, speaking). */
135
+ phase: ReturnType<TTSRuntime["getState"]>["phase"];
136
+ /** Model that the current phase applies to, if any. */
137
+ phaseModelId: ModelId$1 | null;
138
+ /** Progress of the current phase from 0 to 1, or null. */
139
+ phaseProgress: number | null;
140
+ /** Per-model runtime information (e.g. WebGPU availability). */
141
+ runtimeInfoByModel: ReturnType<TTSRuntime["getState"]>["runtimeInfoByModel"];
142
+ error: string | null;
143
+ }
144
+ /** Options for selecting a model or voice in the BrowserTTS facade. */
145
+ interface BrowserTTSSelectionOptions {
146
+ modelId?: ModelId$1;
147
+ voiceId?: VoiceId$1;
148
+ /** Callback invoked with download/load progress from 0 to 1. */
149
+ onProgress?: (progress: number) => void;
150
+ }
151
+ /** Options for speech synthesis, including model/voice overrides and playback speed. */
152
+ interface BrowserTTSSpeakOptions {
153
+ modelId?: ModelId$1;
154
+ voiceId?: VoiceId$1;
155
+ /** Playback speed multiplier (e.g. 1.5 for 1.5x). */
156
+ speed?: number;
157
+ /** Models to try in order if the primary model fails. */
158
+ fallbackModelIds?: ModelId$1[];
159
+ }
160
+ /**
161
+ * High-level browser TTS facade that adds model family grouping, selection, and audio playback over
162
+ * the core TTSRuntime.
163
+ */
164
+ interface BrowserTTS {
165
+ /** The underlying core TTS runtime. */
166
+ readonly runtime: TTSRuntime;
167
+ /** Returns a snapshot of the current state. */
168
+ getState(): BrowserTTSState;
169
+ /** Subscribes to state changes; returns an unsubscribe function. */
170
+ subscribe(listener: (state: BrowserTTSState) => void): () => void;
171
+ listModels(): BrowserTTSModel[];
172
+ listFamilies(): BrowserTTSFamily[];
173
+ /** Returns the model for the given ID, or the currently active model if omitted. */
174
+ getModel(modelId?: ModelId$1 | null): BrowserTTSModel | null;
175
+ /** Returns the family for the given ID, or the currently active family if omitted. */
176
+ getFamily(familyId?: string | null): BrowserTTSFamily | null;
177
+ getSelectedModel(): BrowserTTSModel | null;
178
+ getSelectedFamily(): BrowserTTSFamily | null;
179
+ /** Returns the install state for the given or active model. */
180
+ getInstallState(modelId?: ModelId$1 | null): InstallState$1 | null;
181
+ /** Returns true if the given or active model's assets are fully available. */
182
+ isInstalled(modelId?: ModelId$1 | null): boolean;
183
+ listVoices(modelId?: ModelId$1): Promise<Voice$1[]>;
184
+ getSelectedVoice(): Voice$1 | null;
185
+ /** Activates a model, downloading assets if necessary. */
186
+ selectModel(modelId: ModelId$1, options?: Omit<BrowserTTSSelectionOptions, "modelId">): Promise<void>;
187
+ /** Activates a model family, picking the best supported variant. */
188
+ selectFamily(familyId: string, options?: BrowserTTSSelectionOptions & {
189
+ /** Preferred model variant within the family. */variantModel?: ModelId$1;
190
+ }): Promise<void>;
191
+ /** Activates a voice, resolving its parent model automatically. */
192
+ selectVoice(voiceId: VoiceId$1, options?: Omit<BrowserTTSSelectionOptions, "voiceId">): Promise<void>;
193
+ /** Ensures the selected (or specified) model is loaded and ready to speak. */
194
+ ready(options?: BrowserTTSSelectionOptions): Promise<void>;
195
+ /** Downloads model assets without activating the model. */
196
+ download(modelId?: ModelId$1, onProgress?: (progress: number) => void): Promise<void>;
197
+ /** Removes previously downloaded model assets. */
198
+ removeDownload(modelId?: ModelId$1): Promise<void>;
199
+ /** Synthesizes and plays text through the browser audio player. */
200
+ speak(text: string, options?: BrowserTTSSpeakOptions): Promise<void>;
201
+ /** Returns an async iterable of audio chunks for streaming playback. */
202
+ synthesizeStream(text: string, options?: BrowserTTSSpeakOptions): AsyncIterable<AudioData$1>;
203
+ /** Synthesizes text and returns the complete audio data. */
204
+ synthesize(text: string, options?: BrowserTTSSpeakOptions): Promise<AudioData$1>;
205
+ /** Stops any in-progress playback. */
206
+ stop(): void;
207
+ /** Releases all resources held by this instance. */
208
+ dispose(): void;
209
+ }
210
+ /**
211
+ * Create a TTSRuntime pre-configured with official browser adapters, catalog, and IndexedDB
212
+ * storage.
213
+ */
214
+ declare function createBrowserTTSRuntime(options?: BrowserTTSRuntimeOptions): TTSRuntime;
215
+ /** Create a fully configured BrowserTTS instance ready for model selection and speech synthesis. */
216
+ declare function createBrowserTTS(options?: BrowserTTSRuntimeOptions): BrowserTTS;
217
+ //#endregion
218
+ //#region src/storage/indexeddb-asset-store.d.ts
219
+ /** IndexedDB-backed asset store for caching downloaded TTS model bundles in the browser. */
220
+ declare class IndexedDbAssetStore implements AssetStore$1 {
221
+ private readonly dbName;
222
+ private dbPromise;
223
+ constructor(prefix?: string);
224
+ stageAsset(bundle: AssetBundleKey, assetName: string, data: ArrayBuffer): Promise<void>;
225
+ activateBundle(bundle: AssetBundleKey, assetNames: string[]): Promise<void>;
226
+ isInstalled(bundle: AssetBundleKey, requiredAssetNames?: string[]): Promise<boolean>;
227
+ getAsset(bundle: AssetBundleKey, assetName: string): Promise<ArrayBuffer | null>;
228
+ removeBundle(bundle: AssetBundleKey): Promise<void>;
229
+ private openDb;
230
+ private idbGet;
231
+ private txWrite;
232
+ }
233
+ //#endregion
234
+ //#region src/storage/localstorage-asset-store.d.ts
235
+ /** Minimal synchronous key-value storage interface compatible with `window.localStorage`. */
236
+ interface StorageLike {
237
+ getItem(key: string): string | null;
238
+ setItem(key: string, value: string): void;
239
+ removeItem(key: string): void;
240
+ }
241
+ /**
242
+ * LocalStorage-backed asset store for tiny assets, tests, and demos only.
243
+ *
244
+ * This store base64-encodes binary assets into synchronous `localStorage`, which is typically
245
+ * capped around 5 MB and adds roughly 33% storage overhead. It is not suitable for real ONNX model
246
+ * bundles; use `IndexedDbAssetStore` or a custom async store for production caches.
247
+ */
248
+ declare class LocalStorageAssetStore implements AssetStore$1 {
249
+ private readonly prefix;
250
+ private readonly storage;
251
+ constructor(prefix?: string, storage?: StorageLike);
252
+ stageAsset(bundle: AssetBundleKey, assetName: string, data: ArrayBuffer): Promise<void>;
253
+ activateBundle(bundle: AssetBundleKey, assetNames: string[]): Promise<void>;
254
+ isInstalled(bundle: AssetBundleKey, requiredAssetNames?: string[]): Promise<boolean>;
255
+ getAsset(bundle: AssetBundleKey, assetName: string): Promise<ArrayBuffer | null>;
256
+ removeBundle(bundle: AssetBundleKey): Promise<void>;
257
+ private assetKey;
258
+ private metaKey;
259
+ private stagingMetaKey;
260
+ private readMeta;
261
+ private removeAssetSet;
262
+ private setItem;
263
+ }
264
+ //#endregion
265
+ export { type AssetStore, type AudioData, BrowserAudioPlaybackEvent, BrowserAudioPlaybackMode, BrowserAudioPlayer, BrowserAudioPlayerOptions, BrowserTTS, BrowserTTSFamily, BrowserTTSFamilyVariant, BrowserTTSModel, BrowserTTSRuntimeOptions, BrowserTTSSelectionOptions, BrowserTTSSpeakOptions, BrowserTTSState, IndexedDbAssetStore, type InstallState, LocalStorageAssetStore, type ModelId, type ModelRuntimeInfo, type RuntimePhase, type StorageLike, type Voice, type VoiceId, audioDataToAudioBuffer, createBrowserTTS, createBrowserTTSRuntime };
package/dist/index.js ADDED
@@ -0,0 +1,873 @@
1
+ import { createTTSRuntime, getModelSizeBytes, isInstallStateAvailable, normalizeSpeakSpeed, resolveInstallState, resolveModelDistribution } from "@polytts/core";
2
+ import { officialAdapters } from "@polytts/browser-adapters";
3
+ import { officialCatalog } from "@polytts/presets";
4
+ export * from "@polytts/browser-adapters";
5
+ export * from "@polytts/presets";
6
+ //#region src/audio.ts
7
+ /** Convert a polytts AudioData object to a Web Audio API AudioBuffer. */
8
+ function audioDataToAudioBuffer(audio) {
9
+ const frameCount = audio.channels[0]?.length ?? 0;
10
+ const numberOfChannels = Math.max(audio.channels.length, 1);
11
+ const buffer = new OfflineAudioContext(numberOfChannels, Math.max(frameCount, 1), audio.sampleRate).createBuffer(numberOfChannels, Math.max(frameCount, 1), audio.sampleRate);
12
+ for (let channelIndex = 0; channelIndex < numberOfChannels; channelIndex += 1) buffer.getChannelData(channelIndex).set(audio.channels[channelIndex] ?? new Float32Array(frameCount || 1));
13
+ return buffer;
14
+ }
15
+ function audioDataToWavBytes(audio) {
16
+ const channelCount = Math.max(audio.channels.length, 1);
17
+ const frameCount = audio.channels[0]?.length ?? 0;
18
+ const bytesPerSample = 2;
19
+ const blockAlign = channelCount * bytesPerSample;
20
+ const byteRate = audio.sampleRate * blockAlign;
21
+ const dataSize = frameCount * blockAlign;
22
+ const buffer = new ArrayBuffer(44 + dataSize);
23
+ const view = new DataView(buffer);
24
+ const writeString = (offset, value) => {
25
+ for (let index = 0; index < value.length; index += 1) view.setUint8(offset + index, value.charCodeAt(index));
26
+ };
27
+ writeString(0, "RIFF");
28
+ view.setUint32(4, 36 + dataSize, true);
29
+ writeString(8, "WAVE");
30
+ writeString(12, "fmt ");
31
+ view.setUint32(16, 16, true);
32
+ view.setUint16(20, 1, true);
33
+ view.setUint16(22, channelCount, true);
34
+ view.setUint32(24, audio.sampleRate, true);
35
+ view.setUint32(28, byteRate, true);
36
+ view.setUint16(32, blockAlign, true);
37
+ view.setUint16(34, 16, true);
38
+ writeString(36, "data");
39
+ view.setUint32(40, dataSize, true);
40
+ let offset = 44;
41
+ for (let frameIndex = 0; frameIndex < frameCount; frameIndex += 1) for (let channelIndex = 0; channelIndex < channelCount; channelIndex += 1) {
42
+ const sample = Math.max(-1, Math.min(1, audio.channels[channelIndex]?.[frameIndex] ?? 0));
43
+ view.setInt16(offset, sample < 0 ? sample * 32768 : sample * 32767, true);
44
+ offset += bytesPerSample;
45
+ }
46
+ return buffer;
47
+ }
48
+ /**
49
+ * Browser audio player that uses Web Audio API (or an HTMLAudioElement fallback on iOS) to play
50
+ * synthesized speech.
51
+ */
52
+ var BrowserAudioPlayer = class {
53
+ ctx = null;
54
+ source = null;
55
+ activeSources = /* @__PURE__ */ new Set();
56
+ pendingPlayback = null;
57
+ _isPlaying = false;
58
+ constructor(options = {}) {
59
+ this.options = options;
60
+ }
61
+ get isPlaying() {
62
+ return this._isPlaying;
63
+ }
64
+ warmup() {
65
+ const ctx = this.getContext();
66
+ if (ctx.state === "suspended") ctx.resume();
67
+ try {
68
+ const silent = ctx.createBuffer(1, 1, ctx.sampleRate);
69
+ const source = ctx.createBufferSource();
70
+ source.buffer = silent;
71
+ source.connect(ctx.destination);
72
+ source.start();
73
+ } catch {}
74
+ }
75
+ async play(audio, signal, speed) {
76
+ this.stop();
77
+ if (signal.aborted) throw new DOMException("Aborted", "AbortError");
78
+ if (this.isIOS()) return this.playViaAudioElement(audio, signal, speed);
79
+ this.options.onAudioReady?.({
80
+ mode: "buffer",
81
+ at: performance.now()
82
+ });
83
+ const buffer = audioDataToAudioBuffer(audio);
84
+ const ctx = this.getContext();
85
+ if (ctx.state === "suspended") await ctx.resume();
86
+ const source = ctx.createBufferSource();
87
+ source.buffer = buffer;
88
+ source.playbackRate.value = normalizeSpeakSpeed(speed);
89
+ source.connect(ctx.destination);
90
+ source.start();
91
+ this.options.onPlaybackQueued?.({
92
+ mode: "buffer",
93
+ at: performance.now()
94
+ });
95
+ this.activeSources.add(source);
96
+ this.source = source;
97
+ this._isPlaying = true;
98
+ await new Promise((resolve, reject) => {
99
+ const cleanup = () => {
100
+ source.removeEventListener?.("ended", onEnded);
101
+ signal.removeEventListener("abort", onAbort);
102
+ if (this.pendingPlayback?.cleanup === cleanup) this.pendingPlayback = null;
103
+ };
104
+ const onEnded = () => {
105
+ cleanup();
106
+ this.activeSources.delete(source);
107
+ if (this.source === source) this.source = null;
108
+ this._isPlaying = false;
109
+ resolve();
110
+ };
111
+ const onAbort = () => {
112
+ cleanup();
113
+ this.activeSources.delete(source);
114
+ if (this.source === source) this.source = null;
115
+ this._isPlaying = false;
116
+ try {
117
+ source.stop();
118
+ } catch {}
119
+ reject(new DOMException("Aborted", "AbortError"));
120
+ };
121
+ this.pendingPlayback = {
122
+ cleanup,
123
+ reject
124
+ };
125
+ source.addEventListener("ended", onEnded);
126
+ signal.addEventListener("abort", onAbort);
127
+ });
128
+ }
129
+ async playStream(audioStream, signal, speed) {
130
+ this.stop();
131
+ if (signal.aborted) throw new DOMException("Aborted", "AbortError");
132
+ const ctx = this.getContext();
133
+ if (ctx.state === "suspended") await ctx.resume();
134
+ const playbackRate = normalizeSpeakSpeed(speed);
135
+ let settled = false;
136
+ let streamFinished = false;
137
+ let queuedSources = 0;
138
+ let scheduledAt = ctx.currentTime;
139
+ const scheduledSources = /* @__PURE__ */ new Set();
140
+ let sawFirstChunk = false;
141
+ let reportedPlaybackStart = false;
142
+ await new Promise((resolve, reject) => {
143
+ const cleanup = () => {
144
+ signal.removeEventListener("abort", onAbort);
145
+ if (this.pendingPlayback?.cleanup === cleanup) this.pendingPlayback = null;
146
+ };
147
+ const rejectPlayback = (reason) => {
148
+ if (settled) return;
149
+ settled = true;
150
+ cleanup();
151
+ this._isPlaying = false;
152
+ reject(reason);
153
+ };
154
+ const resolvePlayback = () => {
155
+ if (settled) return;
156
+ settled = true;
157
+ cleanup();
158
+ this._isPlaying = false;
159
+ resolve();
160
+ };
161
+ const stopScheduledSources = () => {
162
+ const sources = [...scheduledSources];
163
+ scheduledSources.clear();
164
+ for (const source of sources) {
165
+ this.activeSources.delete(source);
166
+ if (this.source === source) this.source = null;
167
+ try {
168
+ source.stop();
169
+ } catch {}
170
+ }
171
+ };
172
+ const maybeResolve = () => {
173
+ if (streamFinished && queuedSources === 0) resolvePlayback();
174
+ };
175
+ const onAbort = () => {
176
+ stopScheduledSources();
177
+ rejectPlayback(new DOMException("Aborted", "AbortError"));
178
+ };
179
+ this.pendingPlayback = {
180
+ cleanup,
181
+ reject: rejectPlayback
182
+ };
183
+ signal.addEventListener("abort", onAbort);
184
+ this._isPlaying = true;
185
+ (async () => {
186
+ try {
187
+ for await (const audio of audioStream) {
188
+ if (settled || signal.aborted) throw new DOMException("Aborted", "AbortError");
189
+ if (!sawFirstChunk) {
190
+ sawFirstChunk = true;
191
+ this.options.onAudioReady?.({
192
+ mode: "stream",
193
+ at: performance.now()
194
+ });
195
+ }
196
+ const buffer = audioDataToAudioBuffer(audio);
197
+ const source = ctx.createBufferSource();
198
+ source.buffer = buffer;
199
+ source.playbackRate.value = playbackRate;
200
+ source.connect(ctx.destination);
201
+ const onEnded = () => {
202
+ source.removeEventListener?.("ended", onEnded);
203
+ if (settled) return;
204
+ scheduledSources.delete(source);
205
+ this.activeSources.delete(source);
206
+ if (this.source === source) this.source = null;
207
+ queuedSources--;
208
+ maybeResolve();
209
+ };
210
+ scheduledSources.add(source);
211
+ this.activeSources.add(source);
212
+ this.source = source;
213
+ queuedSources++;
214
+ source.addEventListener("ended", onEnded);
215
+ const startAt = Math.max(scheduledAt, ctx.currentTime);
216
+ scheduledAt = startAt + buffer.duration / playbackRate;
217
+ source.start(startAt);
218
+ if (!reportedPlaybackStart) {
219
+ reportedPlaybackStart = true;
220
+ this.options.onPlaybackQueued?.({
221
+ mode: "stream",
222
+ at: performance.now() + Math.max(0, startAt - ctx.currentTime) * 1e3
223
+ });
224
+ }
225
+ }
226
+ streamFinished = true;
227
+ maybeResolve();
228
+ } catch (error) {
229
+ if (error.name === "AbortError") {
230
+ onAbort();
231
+ return;
232
+ }
233
+ stopScheduledSources();
234
+ rejectPlayback(error);
235
+ }
236
+ })();
237
+ });
238
+ }
239
+ stop() {
240
+ const sources = [...this.activeSources];
241
+ const pendingPlayback = this.pendingPlayback;
242
+ this.source = null;
243
+ this.activeSources.clear();
244
+ this.pendingPlayback = null;
245
+ this._isPlaying = false;
246
+ pendingPlayback?.cleanup();
247
+ for (const source of sources) try {
248
+ source.stop();
249
+ } catch {}
250
+ pendingPlayback?.reject(new DOMException("Aborted", "AbortError"));
251
+ }
252
+ dispose() {
253
+ this.stop();
254
+ if (this.ctx) {
255
+ this.ctx.close();
256
+ this.ctx = null;
257
+ }
258
+ }
259
+ getContext() {
260
+ if (!this.ctx) this.ctx = new AudioContext();
261
+ return this.ctx;
262
+ }
263
+ playViaAudioElement(audioData, signal, speed) {
264
+ return new Promise((resolve, reject) => {
265
+ this.options.onAudioReady?.({
266
+ mode: "audio-element",
267
+ at: performance.now()
268
+ });
269
+ const wavBuffer = audioDataToWavBytes(audioData);
270
+ const blob = new Blob([wavBuffer], { type: "audio/wav" });
271
+ const url = URL.createObjectURL(blob);
272
+ const audio = new Audio(url);
273
+ audio.playbackRate = normalizeSpeakSpeed(speed);
274
+ this._isPlaying = true;
275
+ const cleanup = () => {
276
+ audio.pause();
277
+ audio.removeAttribute("src");
278
+ URL.revokeObjectURL(url);
279
+ signal.removeEventListener("abort", onAbort);
280
+ this._isPlaying = false;
281
+ };
282
+ const onAbort = () => {
283
+ cleanup();
284
+ reject(new DOMException("Aborted", "AbortError"));
285
+ };
286
+ audio.onended = () => {
287
+ cleanup();
288
+ resolve();
289
+ };
290
+ audio.onerror = () => {
291
+ cleanup();
292
+ reject(/* @__PURE__ */ new Error("Audio playback failed"));
293
+ };
294
+ audio.play().then(() => {
295
+ this.options.onPlaybackQueued?.({
296
+ mode: "audio-element",
297
+ at: performance.now()
298
+ });
299
+ }).catch((error) => {
300
+ cleanup();
301
+ reject(error);
302
+ });
303
+ signal.addEventListener("abort", onAbort);
304
+ });
305
+ }
306
+ isIOS() {
307
+ if (typeof navigator === "undefined") return false;
308
+ return /iPad|iPhone|iPod/.test(navigator.userAgent) || navigator.platform === "MacIntel" && navigator.maxTouchPoints > 1;
309
+ }
310
+ };
311
+ //#endregion
312
+ //#region src/storage/indexeddb-asset-store.ts
313
+ function bundleId$1(bundle) {
314
+ return `${bundle.adapterId}/${bundle.modelId}`;
315
+ }
316
+ function revisionPrefix$1(bundle) {
317
+ return `${bundle.adapterId}/${bundle.modelId}/${bundle.revision}`;
318
+ }
319
+ /** IndexedDB-backed asset store for caching downloaded TTS model bundles in the browser. */
320
+ var IndexedDbAssetStore = class {
321
+ dbName;
322
+ dbPromise = null;
323
+ constructor(prefix = "polytts") {
324
+ this.dbName = `${prefix}-asset-cache`;
325
+ }
326
+ async stageAsset(bundle, assetName, data) {
327
+ const db = await this.openDb();
328
+ const key = `${revisionPrefix$1(bundle)}/${assetName}`;
329
+ await this.txWrite(db, ["staging"], (tx) => {
330
+ tx.objectStore("staging").put(data, key);
331
+ });
332
+ }
333
+ async activateBundle(bundle, assetNames) {
334
+ const db = await this.openDb();
335
+ const stagedAssets = /* @__PURE__ */ new Map();
336
+ for (const assetName of assetNames) {
337
+ const key = `${revisionPrefix$1(bundle)}/${assetName}`;
338
+ const data = await this.idbGet(db, "staging", key);
339
+ if (!data) throw new Error(`Missing staged asset: ${assetName}`);
340
+ stagedAssets.set(assetName, data);
341
+ }
342
+ await this.txWrite(db, [
343
+ "active",
344
+ "meta",
345
+ "staging"
346
+ ], (tx) => {
347
+ const active = tx.objectStore("active");
348
+ const meta = tx.objectStore("meta");
349
+ const staging = tx.objectStore("staging");
350
+ for (const [assetName, data] of stagedAssets) active.put(data, `${revisionPrefix$1(bundle)}/${assetName}`);
351
+ meta.put({
352
+ adapterId: bundle.adapterId,
353
+ modelId: bundle.modelId,
354
+ revision: bundle.revision,
355
+ activatedAt: Date.now(),
356
+ assetNames
357
+ }, bundleId$1(bundle));
358
+ for (const assetName of assetNames) staging.delete(`${revisionPrefix$1(bundle)}/${assetName}`);
359
+ });
360
+ }
361
+ async isInstalled(bundle, requiredAssetNames) {
362
+ const db = await this.openDb();
363
+ const meta = await this.idbGet(db, "meta", bundleId$1(bundle));
364
+ if (!meta || meta.revision !== bundle.revision) return false;
365
+ if (!requiredAssetNames?.length) return true;
366
+ return requiredAssetNames.every((assetName) => meta.assetNames.includes(assetName));
367
+ }
368
+ async getAsset(bundle, assetName) {
369
+ const db = await this.openDb();
370
+ return this.idbGet(db, "active", `${revisionPrefix$1(bundle)}/${assetName}`);
371
+ }
372
+ async removeBundle(bundle) {
373
+ const db = await this.openDb();
374
+ const meta = await this.idbGet(db, "meta", bundleId$1(bundle));
375
+ if (!meta) return;
376
+ await this.txWrite(db, ["active", "meta"], (tx) => {
377
+ const active = tx.objectStore("active");
378
+ const metaStore = tx.objectStore("meta");
379
+ for (const assetName of meta.assetNames) active.delete(`${bundle.adapterId}/${bundle.modelId}/${meta.revision}/${assetName}`);
380
+ metaStore.delete(bundleId$1(bundle));
381
+ });
382
+ }
383
+ async openDb() {
384
+ if (this.dbPromise) return this.dbPromise;
385
+ this.dbPromise = new Promise((resolve, reject) => {
386
+ const req = indexedDB.open(this.dbName, 1);
387
+ req.onupgradeneeded = () => {
388
+ const db = req.result;
389
+ if (!db.objectStoreNames.contains("staging")) db.createObjectStore("staging");
390
+ if (!db.objectStoreNames.contains("active")) db.createObjectStore("active");
391
+ if (!db.objectStoreNames.contains("meta")) db.createObjectStore("meta");
392
+ };
393
+ req.onsuccess = () => resolve(req.result);
394
+ req.onerror = () => {
395
+ this.dbPromise = null;
396
+ reject(req.error);
397
+ };
398
+ });
399
+ return this.dbPromise;
400
+ }
401
+ idbGet(db, storeName, key) {
402
+ return new Promise((resolve, reject) => {
403
+ const req = db.transaction(storeName, "readonly").objectStore(storeName).get(key);
404
+ req.onsuccess = () => resolve(req.result ?? null);
405
+ req.onerror = () => reject(req.error);
406
+ });
407
+ }
408
+ txWrite(db, stores, callback) {
409
+ return new Promise((resolve, reject) => {
410
+ const tx = db.transaction(stores, "readwrite");
411
+ callback(tx);
412
+ tx.oncomplete = () => resolve();
413
+ tx.onerror = () => reject(tx.error);
414
+ });
415
+ }
416
+ };
417
+ //#endregion
418
+ //#region src/storage/localstorage-asset-store.ts
419
+ function bundleId(bundle) {
420
+ return `${bundle.adapterId}/${bundle.modelId}`;
421
+ }
422
+ function revisionPrefix(bundle) {
423
+ return `${bundle.adapterId}/${bundle.modelId}/${bundle.revision}`;
424
+ }
425
+ function chunkBytes(value, size) {
426
+ const chunks = [];
427
+ for (let index = 0; index < value.length; index += size) chunks.push(value.slice(index, index + size));
428
+ return chunks;
429
+ }
430
+ function arrayBufferToBase64(data) {
431
+ const bytes = new Uint8Array(data);
432
+ let binary = "";
433
+ for (const chunk of chunkBytes(bytes, 32768)) binary += String.fromCharCode(...chunk);
434
+ return btoa(binary);
435
+ }
436
+ function base64ToArrayBuffer(value) {
437
+ const binary = atob(value);
438
+ const bytes = new Uint8Array(binary.length);
439
+ for (let index = 0; index < binary.length; index += 1) bytes[index] = binary.charCodeAt(index);
440
+ return bytes.buffer;
441
+ }
442
+ function getDefaultStorage() {
443
+ if (!("localStorage" in globalThis) || !globalThis.localStorage) throw new Error("LocalStorageAssetStore requires a localStorage-compatible storage implementation");
444
+ return globalThis.localStorage;
445
+ }
446
+ function isQuotaExceededError(error) {
447
+ return error instanceof DOMException ? error.name === "QuotaExceededError" || error.name === "NS_ERROR_DOM_QUOTA_REACHED" : error instanceof Error && /quota/i.test(error.name);
448
+ }
449
+ /**
450
+ * LocalStorage-backed asset store for tiny assets, tests, and demos only.
451
+ *
452
+ * This store base64-encodes binary assets into synchronous `localStorage`, which is typically
453
+ * capped around 5 MB and adds roughly 33% storage overhead. It is not suitable for real ONNX model
454
+ * bundles; use `IndexedDbAssetStore` or a custom async store for production caches.
455
+ */
456
+ var LocalStorageAssetStore = class {
457
+ prefix;
458
+ storage;
459
+ constructor(prefix = "polytts", storage = getDefaultStorage()) {
460
+ this.prefix = prefix;
461
+ this.storage = storage;
462
+ }
463
+ async stageAsset(bundle, assetName, data) {
464
+ const existingMeta = this.readMeta(this.stagingMetaKey(bundle));
465
+ if (existingMeta && existingMeta.revision !== bundle.revision) {
466
+ this.removeAssetSet("staging", {
467
+ adapterId: bundle.adapterId,
468
+ modelId: bundle.modelId,
469
+ revision: existingMeta.revision
470
+ }, existingMeta.assetNames);
471
+ this.storage.removeItem(this.stagingMetaKey(bundle));
472
+ }
473
+ const assetKey = this.assetKey("staging", bundle, assetName);
474
+ this.setItem(assetKey, arrayBufferToBase64(data), `staging asset "${assetName}"`);
475
+ const updatedMeta = {
476
+ adapterId: bundle.adapterId,
477
+ modelId: bundle.modelId,
478
+ revision: bundle.revision,
479
+ activatedAt: Date.now(),
480
+ assetNames: [...new Set([...existingMeta?.revision === bundle.revision ? existingMeta.assetNames : [], assetName])]
481
+ };
482
+ try {
483
+ this.setItem(this.stagingMetaKey(bundle), JSON.stringify(updatedMeta), "staging metadata");
484
+ } catch (error) {
485
+ this.storage.removeItem(assetKey);
486
+ throw error;
487
+ }
488
+ }
489
+ async activateBundle(bundle, assetNames) {
490
+ const stagedAssets = /* @__PURE__ */ new Map();
491
+ const previousMeta = this.readMeta(this.metaKey(bundle));
492
+ const stagingMeta = this.readMeta(this.stagingMetaKey(bundle));
493
+ for (const assetName of assetNames) {
494
+ const value = this.storage.getItem(this.assetKey("staging", bundle, assetName));
495
+ if (!value) throw new Error(`Missing staged asset: ${assetName}`);
496
+ stagedAssets.set(assetName, value);
497
+ }
498
+ if (previousMeta && previousMeta.revision !== bundle.revision) this.removeAssetSet("active", {
499
+ adapterId: bundle.adapterId,
500
+ modelId: bundle.modelId,
501
+ revision: previousMeta.revision
502
+ }, previousMeta.assetNames);
503
+ for (const [assetName, value] of stagedAssets) {
504
+ this.setItem(this.assetKey("active", bundle, assetName), value, `active asset "${assetName}"`);
505
+ this.storage.removeItem(this.assetKey("staging", bundle, assetName));
506
+ }
507
+ const meta = {
508
+ adapterId: bundle.adapterId,
509
+ modelId: bundle.modelId,
510
+ revision: bundle.revision,
511
+ activatedAt: Date.now(),
512
+ assetNames
513
+ };
514
+ this.setItem(this.metaKey(bundle), JSON.stringify(meta), "active metadata");
515
+ for (const assetName of stagingMeta?.assetNames ?? assetNames) this.storage.removeItem(this.assetKey("staging", bundle, assetName));
516
+ this.storage.removeItem(this.stagingMetaKey(bundle));
517
+ }
518
+ async isInstalled(bundle, requiredAssetNames) {
519
+ const meta = this.readMeta(this.metaKey(bundle));
520
+ if (!meta || meta.revision !== bundle.revision) return false;
521
+ if (!requiredAssetNames?.length) return true;
522
+ return requiredAssetNames.every((assetName) => meta.assetNames.includes(assetName));
523
+ }
524
+ async getAsset(bundle, assetName) {
525
+ const value = this.storage.getItem(this.assetKey("active", bundle, assetName));
526
+ return value ? base64ToArrayBuffer(value) : null;
527
+ }
528
+ async removeBundle(bundle) {
529
+ const activeMeta = this.readMeta(this.metaKey(bundle));
530
+ if (activeMeta) {
531
+ this.removeAssetSet("active", {
532
+ adapterId: bundle.adapterId,
533
+ modelId: bundle.modelId,
534
+ revision: activeMeta.revision
535
+ }, activeMeta.assetNames);
536
+ this.storage.removeItem(this.metaKey(bundle));
537
+ }
538
+ const stagingMeta = this.readMeta(this.stagingMetaKey(bundle));
539
+ if (stagingMeta) {
540
+ this.removeAssetSet("staging", {
541
+ adapterId: bundle.adapterId,
542
+ modelId: bundle.modelId,
543
+ revision: stagingMeta.revision
544
+ }, stagingMeta.assetNames);
545
+ this.storage.removeItem(this.stagingMetaKey(bundle));
546
+ }
547
+ }
548
+ assetKey(kind, bundle, assetName) {
549
+ return `${this.prefix}:${kind}:${revisionPrefix(bundle)}/${assetName}`;
550
+ }
551
+ metaKey(bundle) {
552
+ return `${this.prefix}:meta:${bundleId(bundle)}`;
553
+ }
554
+ stagingMetaKey(bundle) {
555
+ return `${this.prefix}:staging-meta:${bundleId(bundle)}`;
556
+ }
557
+ readMeta(key) {
558
+ const value = this.storage.getItem(key);
559
+ return value ? JSON.parse(value) : null;
560
+ }
561
+ removeAssetSet(kind, bundle, assetNames) {
562
+ for (const assetName of assetNames) this.storage.removeItem(this.assetKey(kind, bundle, assetName));
563
+ }
564
+ setItem(key, value, label) {
565
+ try {
566
+ this.storage.setItem(key, value);
567
+ } catch (error) {
568
+ if (isQuotaExceededError(error)) throw new Error(`LocalStorageAssetStore ran out of browser storage while writing ${label}. This store is only suitable for tiny assets, demos, and tests; use IndexedDbAssetStore or another async store for real model bundles.`, { cause: error });
569
+ throw error;
570
+ }
571
+ }
572
+ };
573
+ //#endregion
574
+ //#region src/browser-tts.ts
575
+ function titleCase(value) {
576
+ return value.split(/[-_\s]+/).filter(Boolean).map((part) => part.charAt(0).toUpperCase() + part.slice(1)).join(" ");
577
+ }
578
+ function getVoiceCount(model) {
579
+ if (model.voices?.length) return model.voices.length;
580
+ return model.voiceMode === "multi" ? null : 1;
581
+ }
582
+ function toBrowserTTSModel(model, runtimeState) {
583
+ const installState = resolveInstallState(model, runtimeState.installStates[model.id]);
584
+ const distribution = resolveModelDistribution(model);
585
+ const requiresDownload = distribution.kind === "managed-assets";
586
+ const downloaded = isInstallStateAvailable(installState);
587
+ return {
588
+ id: model.id,
589
+ name: model.name,
590
+ family: model.family,
591
+ languages: model.languages,
592
+ voiceMode: model.voiceMode,
593
+ distributionKind: distribution.kind,
594
+ voiceCount: getVoiceCount(model),
595
+ bundleSizeBytes: getModelSizeBytes(model),
596
+ supported: runtimeState.supportedModelIds.includes(model.id),
597
+ requiresDownload,
598
+ downloaded,
599
+ downloadStatus: installState.status,
600
+ downloadProgress: installState.progress,
601
+ description: model.description,
602
+ manifestUrl: model.manifestUrl,
603
+ homepage: model.homepage
604
+ };
605
+ }
606
+ function getFamilyDisplayName(family, models) {
607
+ if (models.length === 1) return models[0].name;
608
+ return titleCase(family);
609
+ }
610
+ function toBrowserTTSFamilies(models, runtimeState) {
611
+ const groups = /* @__PURE__ */ new Map();
612
+ for (const model of models) {
613
+ const familyModels = groups.get(model.family);
614
+ if (familyModels) familyModels.push(model);
615
+ else groups.set(model.family, [model]);
616
+ }
617
+ return [...groups.entries()].map(([familyId, familyModels]) => {
618
+ const modelIds = familyModels.map((model) => model.id);
619
+ const activeModelInFamily = runtimeState.activeModelId && modelIds.includes(runtimeState.activeModelId) ? runtimeState.activeModelId : null;
620
+ const languages = [...new Set(familyModels.flatMap((model) => model.languages))];
621
+ const variants = familyModels.map((model) => {
622
+ const installState = resolveInstallState(model, runtimeState.installStates[model.id]);
623
+ const downloaded = isInstallStateAvailable(installState);
624
+ const representativeVoice = model.voices?.find((voice) => voice.id === model.defaultVoiceId) ?? model.voices?.[0];
625
+ return {
626
+ modelId: model.id,
627
+ modelName: model.name,
628
+ voiceId: representativeVoice?.id ?? model.defaultVoiceId,
629
+ voiceName: representativeVoice?.name,
630
+ downloaded,
631
+ downloadStatus: installState.status,
632
+ downloadProgress: installState.progress
633
+ };
634
+ });
635
+ return {
636
+ id: familyId,
637
+ name: getFamilyDisplayName(familyId, familyModels),
638
+ languages,
639
+ supported: familyModels.some((model) => runtimeState.supportedModelIds.includes(model.id)),
640
+ selectedModelId: activeModelInFamily,
641
+ modelIds,
642
+ variants
643
+ };
644
+ });
645
+ }
646
+ function toBrowserTTSState(runtime) {
647
+ const runtimeState = runtime.getState();
648
+ return {
649
+ models: runtimeState.models.map((model) => toBrowserTTSModel(model, runtimeState)),
650
+ families: toBrowserTTSFamilies(runtimeState.models, runtimeState),
651
+ selectedFamilyId: runtimeState.activeModelId != null ? runtimeState.models.find((model) => model.id === runtimeState.activeModelId)?.family ?? null : null,
652
+ voices: runtimeState.voices,
653
+ selectedModelId: runtimeState.activeModelId,
654
+ selectedVoiceId: runtimeState.activeVoiceId,
655
+ status: runtimeState.error || runtimeState.phase === "error" ? "error" : runtimeState.phase === "speaking" || runtimeState.isSpeaking ? "speaking" : runtimeState.phase === "installing" || runtimeState.phase === "loading" || runtimeState.isPreparing ? "preparing" : "idle",
656
+ phase: runtimeState.phase,
657
+ phaseModelId: runtimeState.phaseModelId,
658
+ phaseProgress: runtimeState.phaseProgress,
659
+ runtimeInfoByModel: runtimeState.runtimeInfoByModel,
660
+ error: runtimeState.error
661
+ };
662
+ }
663
+ var BrowserTTSImpl = class {
664
+ state;
665
+ listeners = /* @__PURE__ */ new Set();
666
+ unsubscribeRuntime;
667
+ constructor(runtime) {
668
+ this.runtime = runtime;
669
+ this.state = toBrowserTTSState(runtime);
670
+ this.unsubscribeRuntime = this.runtime.subscribe(() => {
671
+ this.state = toBrowserTTSState(this.runtime);
672
+ for (const listener of this.listeners) listener(this.state);
673
+ });
674
+ }
675
+ getState() {
676
+ return this.state;
677
+ }
678
+ subscribe(listener) {
679
+ this.listeners.add(listener);
680
+ return () => {
681
+ this.listeners.delete(listener);
682
+ };
683
+ }
684
+ listModels() {
685
+ return this.getState().models;
686
+ }
687
+ listFamilies() {
688
+ return this.getState().families;
689
+ }
690
+ getModel(modelId) {
691
+ const targetModelId = modelId ?? this.runtime.getState().activeModelId;
692
+ if (!targetModelId) return null;
693
+ return this.listModels().find((model) => model.id === targetModelId) ?? null;
694
+ }
695
+ getFamily(familyId) {
696
+ const targetFamilyId = familyId ?? (this.runtime.getState().activeModelId ? this.runtime.getState().models.find((model) => model.id === this.runtime.getState().activeModelId)?.family ?? null : null);
697
+ if (!targetFamilyId) return null;
698
+ return this.listFamilies().find((family) => family.id === targetFamilyId) ?? null;
699
+ }
700
+ getSelectedModel() {
701
+ return this.getModel();
702
+ }
703
+ getSelectedFamily() {
704
+ return this.getFamily();
705
+ }
706
+ getInstallState(modelId) {
707
+ const resolvedModelId = modelId ?? this.runtime.getState().activeModelId;
708
+ if (!resolvedModelId) return null;
709
+ const model = this.runtime.getModel(resolvedModelId);
710
+ if (!model) return null;
711
+ return resolveInstallState(model, this.runtime.getState().installStates[model.id]);
712
+ }
713
+ isInstalled(modelId) {
714
+ return isInstallStateAvailable(this.getInstallState(modelId));
715
+ }
716
+ async listVoices(modelId) {
717
+ return this.runtime.listVoices(modelId ?? this.resolveModelId());
718
+ }
719
+ getSelectedVoice() {
720
+ const state = this.runtime.getState();
721
+ if (!state.activeVoiceId) return null;
722
+ return state.voices.find((voice) => voice.id === state.activeVoiceId) ?? null;
723
+ }
724
+ async selectModel(modelId, options) {
725
+ const selection = this.resolveModelSelection({
726
+ modelId,
727
+ voiceId: options?.voiceId
728
+ });
729
+ await this.runtime.prepare(selection.modelId, {
730
+ voiceId: selection.voiceId,
731
+ onProgress: options?.onProgress
732
+ });
733
+ }
734
+ async selectFamily(familyId, options) {
735
+ const family = this.getFamily(familyId);
736
+ if (!family) throw new Error(`Unknown model family: ${familyId}`);
737
+ const requestedVoice = options?.voiceId;
738
+ const modelId = (options?.variantModel && family.modelIds.includes(options.variantModel) && options.variantModel) ?? (requestedVoice ? this.findModelIdForVoice(requestedVoice, familyId) : null) ?? family.selectedModelId ?? family.modelIds.find((candidate) => {
739
+ return this.getModel(candidate)?.supported;
740
+ }) ?? family.modelIds[0];
741
+ if (!modelId) throw new Error(`Model family "${familyId}" has no registered models`);
742
+ await this.selectModel(modelId, {
743
+ voiceId: requestedVoice,
744
+ onProgress: options?.onProgress
745
+ });
746
+ }
747
+ async selectVoice(voiceId, options) {
748
+ const selection = this.resolveModelSelection({
749
+ modelId: options?.modelId,
750
+ voiceId
751
+ });
752
+ await this.runtime.prepare(selection.modelId, {
753
+ voiceId: selection.voiceId,
754
+ onProgress: options?.onProgress
755
+ });
756
+ }
757
+ async ready(options) {
758
+ const selection = this.resolveModelSelection({
759
+ modelId: options?.modelId,
760
+ voiceId: options?.voiceId
761
+ });
762
+ await this.runtime.prepare(selection.modelId, {
763
+ voiceId: selection.voiceId,
764
+ onProgress: options?.onProgress
765
+ });
766
+ }
767
+ async download(modelId, onProgress) {
768
+ await this.runtime.install(this.resolveModelId(modelId), onProgress);
769
+ }
770
+ async removeDownload(modelId) {
771
+ await this.runtime.uninstall(this.resolveModelId(modelId));
772
+ }
773
+ async speak(text, options) {
774
+ const selection = this.resolveModelSelection({
775
+ modelId: options?.modelId,
776
+ voiceId: options?.voiceId
777
+ });
778
+ await this.runtime.speak(text, {
779
+ modelId: selection.modelId,
780
+ voiceId: selection.voiceId,
781
+ speed: options?.speed,
782
+ fallbackModelIds: options?.fallbackModelIds
783
+ });
784
+ }
785
+ async synthesize(text, options) {
786
+ const selection = this.resolveModelSelection({
787
+ modelId: options?.modelId,
788
+ voiceId: options?.voiceId
789
+ });
790
+ return this.runtime.synthesize(text, {
791
+ modelId: selection.modelId,
792
+ voiceId: selection.voiceId,
793
+ speed: options?.speed,
794
+ fallbackModelIds: options?.fallbackModelIds
795
+ });
796
+ }
797
+ synthesizeStream(text, options) {
798
+ const selection = this.resolveModelSelection({
799
+ modelId: options?.modelId,
800
+ voiceId: options?.voiceId
801
+ });
802
+ return this.runtime.synthesizeStream(text, {
803
+ modelId: selection.modelId,
804
+ voiceId: selection.voiceId,
805
+ speed: options?.speed,
806
+ fallbackModelIds: options?.fallbackModelIds
807
+ });
808
+ }
809
+ stop() {
810
+ this.runtime.stop();
811
+ }
812
+ dispose() {
813
+ this.unsubscribeRuntime();
814
+ this.listeners.clear();
815
+ this.runtime.dispose();
816
+ }
817
+ resolveModelId(modelId) {
818
+ const runtimeState = this.runtime.getState();
819
+ const resolved = modelId ?? runtimeState.activeModelId ?? runtimeState.supportedModelIds[0] ?? runtimeState.models[0]?.id;
820
+ if (!resolved) throw new Error("No models are registered in this BrowserTTS instance");
821
+ return resolved;
822
+ }
823
+ resolveModelSelection(options) {
824
+ const modelId = this.resolveModelId(options?.modelId);
825
+ const voiceId = options?.voiceId;
826
+ if (!voiceId) return { modelId };
827
+ const model = this.runtime.getModel(modelId);
828
+ if (!model) return {
829
+ modelId,
830
+ voiceId
831
+ };
832
+ if (this.modelHasVoice(model, voiceId)) return {
833
+ modelId,
834
+ voiceId
835
+ };
836
+ const matchingFamilyModelId = this.findModelIdForVoice(voiceId, model.family);
837
+ if (matchingFamilyModelId) return {
838
+ modelId: matchingFamilyModelId,
839
+ voiceId
840
+ };
841
+ return {
842
+ modelId,
843
+ voiceId
844
+ };
845
+ }
846
+ findModelIdForVoice(voiceId, familyId) {
847
+ return this.runtime.getState().models.find((model) => (familyId == null || model.family === familyId) && this.modelHasVoice(model, voiceId))?.id ?? null;
848
+ }
849
+ modelHasVoice(model, voiceId) {
850
+ return model.defaultVoiceId === voiceId || model.voices?.some((voice) => voice.id === voiceId) === true;
851
+ }
852
+ };
853
+ /**
854
+ * Create a TTSRuntime pre-configured with official browser adapters, catalog, and IndexedDB
855
+ * storage.
856
+ */
857
+ function createBrowserTTSRuntime(options = {}) {
858
+ return createTTSRuntime({
859
+ assetStore: options.assetStore ?? new IndexedDbAssetStore("polytts"),
860
+ audioPlayer: options.audioPlayer ?? new BrowserAudioPlayer(),
861
+ fetch: options.fetch,
862
+ initialModelId: options.initialModelId,
863
+ initialVoiceId: options.initialVoiceId,
864
+ adapters: [...officialAdapters, ...options.extraAdapters ?? []],
865
+ catalogs: [officialCatalog, ...options.extraCatalogs ?? []]
866
+ });
867
+ }
868
+ /** Create a fully configured BrowserTTS instance ready for model selection and speech synthesis. */
869
+ function createBrowserTTS(options = {}) {
870
+ return new BrowserTTSImpl(createBrowserTTSRuntime(options));
871
+ }
872
+ //#endregion
873
+ export { BrowserAudioPlayer, IndexedDbAssetStore, LocalStorageAssetStore, audioDataToAudioBuffer, createBrowserTTS, createBrowserTTSRuntime };
package/package.json ADDED
@@ -0,0 +1,55 @@
1
+ {
2
+ "name": "@polytts/browser",
3
+ "version": "0.1.0",
4
+ "description": "Browser entrypoint for polytts text-to-speech runtimes.",
5
+ "keywords": [
6
+ "browser",
7
+ "onnx",
8
+ "polytts",
9
+ "text-to-speech",
10
+ "tts",
11
+ "web"
12
+ ],
13
+ "homepage": "https://github.com/Dunqing/polytts/tree/main/packages/browser#readme",
14
+ "bugs": {
15
+ "url": "https://github.com/Dunqing/polytts/issues"
16
+ },
17
+ "license": "MIT",
18
+ "repository": {
19
+ "type": "git",
20
+ "url": "git+https://github.com/Dunqing/polytts.git",
21
+ "directory": "packages/browser"
22
+ },
23
+ "files": [
24
+ "dist"
25
+ ],
26
+ "type": "module",
27
+ "exports": {
28
+ ".": {
29
+ "types": "./dist/index.d.ts",
30
+ "default": "./dist/index.js"
31
+ },
32
+ "./package.json": "./package.json"
33
+ },
34
+ "publishConfig": {
35
+ "access": "public"
36
+ },
37
+ "dependencies": {
38
+ "@polytts/browser-adapters": "0.1.0",
39
+ "@polytts/core": "0.1.0",
40
+ "@polytts/presets": "0.1.0"
41
+ },
42
+ "devDependencies": {
43
+ "fake-indexeddb": "latest",
44
+ "vite-plus": "latest"
45
+ },
46
+ "scripts": {
47
+ "build": "vp pack",
48
+ "test": "vp test",
49
+ "test:run": "vp test run",
50
+ "test:browser": "vp test run --config vitest.browser.config.ts"
51
+ },
52
+ "main": "./dist/index.js",
53
+ "module": "./dist/index.js",
54
+ "types": "./dist/index.d.ts"
55
+ }