@polytts/node 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 DengQing dengqing0821@gmail.com
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,50 @@
1
+ # @polytts/node
2
+
3
+ [![npm version](https://img.shields.io/npm/v/@polytts/node)](https://www.npmjs.com/package/@polytts/node)
4
+
5
+ Node entrypoint for [`polytts`](https://github.com/Dunqing/polytts).
6
+
7
+ Use this package for Node runtimes such as Electron main or preload code, Raycast extensions, and server-side synthesis workflows.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ npm install @polytts/node
13
+ ```
14
+
15
+ ## Usage
16
+
17
+ ```ts
18
+ import { createNodeTTS } from "@polytts/node";
19
+
20
+ const tts = createNodeTTS();
21
+
22
+ await tts.ready();
23
+ await tts.writeWav("./hello.wav", "Hello from Node.");
24
+ ```
25
+
26
+ For progressive synthesis:
27
+
28
+ ```ts
29
+ for await (const chunk of tts.synthesizeStream("Hello from a streaming Node runtime.")) {
30
+ console.log(chunk.sampleRate, chunk.channels[0]?.length ?? 0);
31
+ }
32
+ ```
33
+
34
+ The high-level API also exposes:
35
+
36
+ - `install(modelId?, onProgress?)`
37
+ - `uninstall(modelId?)`
38
+ - `getInstallState(modelId?)`
39
+ - `isInstalled(modelId?)`
40
+
41
+ `@polytts/node` intentionally does not expose `speak()`. In Node hosts, playback is application-specific, so the package focuses on synthesis and file/byte output (`synthesize()`, `synthesizeToWav()`, `writeWav()`).
42
+
43
+ ## Built-in models
44
+
45
+ - Kokoro 82M
46
+ - Piper voice bundles
47
+ - KittenTTS Mini and Nano
48
+ - Supertonic 2
49
+
50
+ `Browser Speech` is intentionally browser-only and is not part of this package.
@@ -0,0 +1,138 @@
1
+ import { AssetBundleKey, AssetStore, AssetStore as AssetStore$1, AudioData, AudioData as AudioData$1, CatalogSource, InstallState, InstallState as InstallState$1, ModelId, ModelId as ModelId$1, ModelRuntimeInfo, ModelSpec, PrepareOptions, RuntimePhase, RuntimeState, SpeakOptions, TTSAdapter, Voice, Voice as Voice$1, VoiceId, VoiceId as VoiceId$1 } from "@polytts/core";
2
+ import { officialNodeAdapters } from "@polytts/node-adapters";
3
+ import { officialNodeCatalog, officialNodeModels } from "@polytts/presets";
4
+
5
+ //#region src/storage/fs-asset-store.d.ts
6
+ /**
7
+ * Filesystem-backed asset store that caches TTS model files on disk. Defaults to `~/.polytts/cache`
8
+ * when no root directory is provided.
9
+ */
10
+ declare class FsAssetStore implements AssetStore$1 {
11
+ private readonly rootDir;
12
+ constructor(rootDir?: string);
13
+ stageAsset(bundle: AssetBundleKey, assetName: string, data: ArrayBuffer): Promise<void>;
14
+ activateBundle(bundle: AssetBundleKey, assetNames: string[]): Promise<void>;
15
+ isInstalled(bundle: AssetBundleKey, requiredAssetNames?: string[]): Promise<boolean>;
16
+ getAsset(bundle: AssetBundleKey, assetName: string): Promise<ArrayBuffer | null>;
17
+ removeBundle(bundle: AssetBundleKey): Promise<void>;
18
+ private readMeta;
19
+ private bundleDir;
20
+ private metaPath;
21
+ private revisionDir;
22
+ private activeRevisionDir;
23
+ private stagingDir;
24
+ private stagedAssetPath;
25
+ private activeAssetPath;
26
+ }
27
+ //#endregion
28
+ //#region src/index.d.ts
29
+ /** Audio data returned by synthesis operations. */
30
+ type NodeAudio = AudioData$1;
31
+ /** Options for preparing (loading) a TTS model. */
32
+ type NodePrepareOptions = PrepareOptions;
33
+ /** Options for synthesis operations (voice, speed, etc.). */
34
+ type NodeSynthesizeOptions = SpeakOptions;
35
+ /** A TTS adapter that provides a specific TTS backend (e.g. Piper, Kokoro). */
36
+ type NodeTTSAdapter = TTSAdapter;
37
+ /** Subset of the core RuntimeState exposed by the Node TTS runtime. */
38
+ type NodeRuntimeState = Pick<RuntimeState, "models" | "supportedModelIds" | "activeModelId" | "activeVoiceId" | "voices" | "isPreparing" | "isSpeaking" | "phase" | "phaseModelId" | "phaseProgress" | "runtimeInfoByModel" | "error" | "installStates" | "installStateHydrated">;
39
+ /** Configuration options for {@link createNodeTTSRuntime} and {@link createNodeTTS}. */
40
+ interface NodeTTSRuntimeOptions {
41
+ /** TTS adapters to use; defaults to official Node adapters. */
42
+ adapters?: NodeTTSAdapter[];
43
+ /** Inline model specs to register (merged with catalogs). */
44
+ models?: ModelSpec[];
45
+ /** Catalog sources providing model definitions. */
46
+ catalogs?: CatalogSource[];
47
+ /** Custom asset store for downloaded model files. */
48
+ assetStore?: AssetStore$1;
49
+ /** Filesystem directory for the default asset store cache. */
50
+ cacheDir?: string;
51
+ /** Custom fetch implementation for downloading assets. */
52
+ fetch?: typeof fetch;
53
+ /** Model to activate on startup. */
54
+ initialModelId?: ModelId$1;
55
+ /** Voice to activate on startup. */
56
+ initialVoiceId?: VoiceId$1;
57
+ }
58
+ /** Low-level Node TTS runtime providing model management and synthesis capabilities. */
59
+ interface NodeTTSRuntime {
60
+ getState(): NodeRuntimeState;
61
+ /** Subscribes to state changes; returns an unsubscribe function. */
62
+ subscribe(listener: (state: NodeRuntimeState) => void): () => void;
63
+ listModels(): ModelSpec[];
64
+ getModel(modelId: ModelId$1): ModelSpec | null;
65
+ /** Returns the install state for the given model. */
66
+ getInstallState(modelId: ModelId$1): InstallState$1 | null;
67
+ /** Returns true if the model's assets are fully available. */
68
+ isInstalled(modelId: ModelId$1): boolean;
69
+ listVoices(modelId?: ModelId$1): Promise<Voice$1[]>;
70
+ /** Downloads model assets, with optional progress callback. */
71
+ install(modelId: ModelId$1, onProgress?: (progress: number) => void): Promise<void>;
72
+ /** Removes previously downloaded model assets. */
73
+ uninstall(modelId: ModelId$1): Promise<void>;
74
+ /** Loads a model so it is ready for synthesis. */
75
+ prepare(modelId: ModelId$1, options?: NodePrepareOptions): Promise<void>;
76
+ /** Returns an async iterable of audio chunks for streaming synthesis. */
77
+ synthesizeStream(text: string, options?: NodeSynthesizeOptions): AsyncIterable<NodeAudio>;
78
+ /** Synthesizes text and returns the complete audio data. */
79
+ synthesize(text: string, options?: NodeSynthesizeOptions): Promise<NodeAudio>;
80
+ /** Releases all resources held by this runtime. */
81
+ dispose(): void;
82
+ }
83
+ /** High-level Node TTS interface with model/voice selection, WAV export, and convenience methods. */
84
+ interface NodeTTS {
85
+ /** The underlying low-level Node TTS runtime. */
86
+ readonly runtime: NodeTTSRuntime;
87
+ getState(): NodeRuntimeState;
88
+ /** Subscribes to state changes; returns an unsubscribe function. */
89
+ subscribe(listener: (state: NodeRuntimeState) => void): () => void;
90
+ listModels(): ModelSpec[];
91
+ /** Returns the model for the given ID, or null. */
92
+ getModel(modelId?: ModelId$1 | null): ModelSpec | null;
93
+ /** Returns the currently active model, or null. */
94
+ getSelectedModel(): ModelSpec | null;
95
+ /** Returns the install state for the given or active model. */
96
+ getInstallState(modelId?: ModelId$1 | null): InstallState$1 | null;
97
+ /** Returns true if the given or active model's assets are fully available. */
98
+ isInstalled(modelId?: ModelId$1 | null): boolean;
99
+ listVoices(modelId?: ModelId$1): Promise<Voice$1[]>;
100
+ /** Returns the currently active voice, or null. */
101
+ getSelectedVoice(): Voice$1 | null;
102
+ /** Downloads model assets; defaults to the active model if omitted. */
103
+ install(modelId?: ModelId$1, onProgress?: (progress: number) => void): Promise<void>;
104
+ /** Removes downloaded assets; defaults to the active model if omitted. */
105
+ uninstall(modelId?: ModelId$1): Promise<void>;
106
+ /** Activates a model, downloading and loading it if necessary. */
107
+ selectModel(modelId: ModelId$1, options?: Omit<NodePrepareOptions, "modelId">): Promise<void>;
108
+ /** Activates a voice, resolving its parent model automatically. */
109
+ selectVoice(voiceId: VoiceId$1): Promise<void>;
110
+ /** Ensures the selected (or specified) model is loaded and ready to synthesize. */
111
+ ready(options?: {
112
+ modelId?: ModelId$1;
113
+ voiceId?: VoiceId$1;
114
+ onProgress?: (progress: number) => void;
115
+ }): Promise<void>;
116
+ /** Returns an async iterable of audio chunks for streaming synthesis. */
117
+ synthesizeStream(text: string, options?: NodeSynthesizeOptions): AsyncIterable<NodeAudio>;
118
+ /** Synthesizes text and returns the complete audio data. */
119
+ synthesize(text: string, options?: NodeSynthesizeOptions): Promise<NodeAudio>;
120
+ /** Synthesizes text and returns WAV-encoded bytes. */
121
+ synthesizeToWav(text: string, options?: NodeSynthesizeOptions): Promise<Uint8Array>;
122
+ /** Synthesizes text and writes the result as a WAV file to disk. */
123
+ writeWav(path: string, text: string, options?: NodeSynthesizeOptions): Promise<void>;
124
+ /** Releases all resources held by this instance. */
125
+ dispose(): void;
126
+ }
127
+ /**
128
+ * Creates a low-level Node TTS runtime. Defaults to official adapters and model catalogs when none
129
+ * are provided. Use {@link createNodeTTS} for a higher-level API.
130
+ */
131
+ declare function createNodeTTSRuntime(options?: NodeTTSRuntimeOptions): NodeTTSRuntime;
132
+ /**
133
+ * Creates a high-level Node TTS instance with model/voice selection, WAV encoding, and file output.
134
+ * This is the recommended entry point for server-side TTS usage.
135
+ */
136
+ declare function createNodeTTS(options?: NodeTTSRuntimeOptions): NodeTTS;
137
+ //#endregion
138
+ export { type AssetStore, type AudioData, FsAssetStore, type InstallState, type ModelId, type ModelRuntimeInfo, NodeAudio, NodePrepareOptions, NodeRuntimeState, NodeSynthesizeOptions, NodeTTS, NodeTTSAdapter, NodeTTSRuntime, NodeTTSRuntimeOptions, type RuntimePhase, type Voice, type VoiceId, createNodeTTS, createNodeTTSRuntime, officialNodeAdapters, officialNodeCatalog, officialNodeModels };
package/dist/index.mjs ADDED
@@ -0,0 +1,357 @@
1
+ import { mkdir, readFile, rename, rm, writeFile } from "node:fs/promises";
2
+ import { createTTSRuntime, isInstallStateAvailable, resolveInstallState } from "@polytts/core";
3
+ import { officialNodeAdapters, officialNodeAdapters as officialNodeAdapters$1 } from "@polytts/node-adapters";
4
+ import { officialNodeCatalog, officialNodeModels, officialNodeModels as officialNodeModels$1 } from "@polytts/presets";
5
+ import { homedir } from "node:os";
6
+ import { dirname, join } from "node:path";
7
+ //#region src/storage/fs-asset-store.ts
8
+ function encodeSegment(value) {
9
+ return encodeURIComponent(value);
10
+ }
11
+ function toArrayBuffer(data) {
12
+ return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
13
+ }
14
+ async function readJson(path) {
15
+ try {
16
+ return JSON.parse(await readFile(path, "utf8"));
17
+ } catch (error) {
18
+ if (error.code === "ENOENT") return null;
19
+ throw error;
20
+ }
21
+ }
22
+ /**
23
+ * Filesystem-backed asset store that caches TTS model files on disk. Defaults to `~/.polytts/cache`
24
+ * when no root directory is provided.
25
+ */
26
+ var FsAssetStore = class {
27
+ constructor(rootDir = join(homedir(), ".polytts", "cache")) {
28
+ this.rootDir = rootDir;
29
+ }
30
+ async stageAsset(bundle, assetName, data) {
31
+ const targetPath = this.stagedAssetPath(bundle, assetName);
32
+ await mkdir(dirname(targetPath), { recursive: true });
33
+ await writeFile(targetPath, Buffer.from(data));
34
+ }
35
+ async activateBundle(bundle, assetNames) {
36
+ const stagedDir = this.stagingDir(bundle);
37
+ const activeDir = this.activeRevisionDir(bundle);
38
+ await Promise.all(assetNames.map(async (assetName) => {
39
+ await readFile(this.stagedAssetPath(bundle, assetName));
40
+ }));
41
+ const currentMeta = await this.readMeta(bundle);
42
+ await rm(activeDir, {
43
+ recursive: true,
44
+ force: true
45
+ });
46
+ await mkdir(dirname(activeDir), { recursive: true });
47
+ await rename(stagedDir, activeDir);
48
+ if (currentMeta && currentMeta.revision !== bundle.revision) await rm(this.revisionDir(bundle, currentMeta.revision), {
49
+ recursive: true,
50
+ force: true
51
+ });
52
+ await writeFile(this.metaPath(bundle), JSON.stringify({
53
+ revision: bundle.revision,
54
+ assetNames
55
+ }), "utf8");
56
+ }
57
+ async isInstalled(bundle, requiredAssetNames) {
58
+ const meta = await this.readMeta(bundle);
59
+ if (!meta || meta.revision !== bundle.revision) return false;
60
+ const assetNames = requiredAssetNames?.length ? requiredAssetNames : meta.assetNames;
61
+ try {
62
+ await Promise.all(assetNames.map((assetName) => readFile(this.activeAssetPath(bundle, assetName))));
63
+ return true;
64
+ } catch (error) {
65
+ if (error.code === "ENOENT") return false;
66
+ throw error;
67
+ }
68
+ }
69
+ async getAsset(bundle, assetName) {
70
+ const meta = await this.readMeta(bundle);
71
+ if (!meta || meta.revision !== bundle.revision) return null;
72
+ try {
73
+ return toArrayBuffer(await readFile(this.activeAssetPath(bundle, assetName)));
74
+ } catch (error) {
75
+ if (error.code === "ENOENT") return null;
76
+ throw error;
77
+ }
78
+ }
79
+ async removeBundle(bundle) {
80
+ await rm(this.bundleDir(bundle), {
81
+ recursive: true,
82
+ force: true
83
+ });
84
+ }
85
+ async readMeta(bundle) {
86
+ return readJson(this.metaPath(bundle));
87
+ }
88
+ bundleDir(bundle) {
89
+ return join(this.rootDir, encodeSegment(bundle.adapterId), encodeSegment(bundle.modelId));
90
+ }
91
+ metaPath(bundle) {
92
+ return join(this.bundleDir(bundle), "meta.json");
93
+ }
94
+ revisionDir(bundle, revision = bundle.revision) {
95
+ return join(this.bundleDir(bundle), "revisions", encodeSegment(revision));
96
+ }
97
+ activeRevisionDir(bundle) {
98
+ return this.revisionDir(bundle, bundle.revision);
99
+ }
100
+ stagingDir(bundle) {
101
+ return join(this.bundleDir(bundle), "staging", encodeSegment(bundle.revision));
102
+ }
103
+ stagedAssetPath(bundle, assetName) {
104
+ return join(this.stagingDir(bundle), ...assetName.split("/"));
105
+ }
106
+ activeAssetPath(bundle, assetName) {
107
+ return join(this.activeRevisionDir(bundle), ...assetName.split("/"));
108
+ }
109
+ };
110
+ //#endregion
111
+ //#region src/index.ts
112
+ function encodeWav(audio) {
113
+ const channelCount = audio.channels.length;
114
+ if (channelCount === 0) throw new Error("Cannot encode empty audio");
115
+ const frameCount = audio.channels[0].length;
116
+ const bytesPerSample = 2;
117
+ const blockAlign = channelCount * bytesPerSample;
118
+ const byteRate = audio.sampleRate * blockAlign;
119
+ const dataSize = frameCount * blockAlign;
120
+ const buffer = new ArrayBuffer(44 + dataSize);
121
+ const view = new DataView(buffer);
122
+ const output = new Uint8Array(buffer);
123
+ const writeAscii = (offset, value) => {
124
+ for (let i = 0; i < value.length; i += 1) view.setUint8(offset + i, value.charCodeAt(i));
125
+ };
126
+ writeAscii(0, "RIFF");
127
+ view.setUint32(4, 36 + dataSize, true);
128
+ writeAscii(8, "WAVE");
129
+ writeAscii(12, "fmt ");
130
+ view.setUint32(16, 16, true);
131
+ view.setUint16(20, 1, true);
132
+ view.setUint16(22, channelCount, true);
133
+ view.setUint32(24, audio.sampleRate, true);
134
+ view.setUint32(28, byteRate, true);
135
+ view.setUint16(32, blockAlign, true);
136
+ view.setUint16(34, 16, true);
137
+ writeAscii(36, "data");
138
+ view.setUint32(40, dataSize, true);
139
+ let offset = 44;
140
+ for (let frameIndex = 0; frameIndex < frameCount; frameIndex += 1) for (let channelIndex = 0; channelIndex < channelCount; channelIndex += 1) {
141
+ const sample = audio.channels[channelIndex][frameIndex] ?? 0;
142
+ const clamped = Math.max(-1, Math.min(1, sample));
143
+ view.setInt16(offset, clamped < 0 ? clamped * 32768 : clamped * 32767, true);
144
+ offset += bytesPerSample;
145
+ }
146
+ return output;
147
+ }
148
+ function toNodeRuntimeState(state) {
149
+ return {
150
+ models: state.models,
151
+ supportedModelIds: state.supportedModelIds,
152
+ activeModelId: state.activeModelId,
153
+ activeVoiceId: state.activeVoiceId,
154
+ voices: state.voices,
155
+ isPreparing: state.isPreparing,
156
+ isSpeaking: state.isSpeaking,
157
+ phase: state.phase,
158
+ phaseModelId: state.phaseModelId,
159
+ phaseProgress: state.phaseProgress,
160
+ runtimeInfoByModel: state.runtimeInfoByModel,
161
+ error: state.error,
162
+ installStates: state.installStates,
163
+ installStateHydrated: state.installStateHydrated
164
+ };
165
+ }
166
+ function createCatalogs(options) {
167
+ const configuredCatalogs = options.catalogs ?? [];
168
+ if (configuredCatalogs.length > 0) {
169
+ if (options.models?.length) return [{ models: options.models }, ...configuredCatalogs];
170
+ return configuredCatalogs;
171
+ }
172
+ return [{ models: options.models?.length ? options.models : officialNodeModels$1 }];
173
+ }
174
+ var NodeRuntimeImpl = class {
175
+ constructor(runtime) {
176
+ this.runtime = runtime;
177
+ }
178
+ getState() {
179
+ return toNodeRuntimeState(this.runtime.getState());
180
+ }
181
+ subscribe(listener) {
182
+ return this.runtime.subscribe((state) => {
183
+ listener(toNodeRuntimeState(state));
184
+ });
185
+ }
186
+ listModels() {
187
+ return this.runtime.listModels();
188
+ }
189
+ getModel(modelId) {
190
+ return this.runtime.getModel(modelId);
191
+ }
192
+ getInstallState(modelId) {
193
+ const model = this.runtime.getModel(modelId);
194
+ if (!model) return null;
195
+ return resolveInstallState(model, this.runtime.getState().installStates[model.id]);
196
+ }
197
+ isInstalled(modelId) {
198
+ return isInstallStateAvailable(this.getInstallState(modelId));
199
+ }
200
+ listVoices(modelId) {
201
+ return this.runtime.listVoices(modelId);
202
+ }
203
+ install(modelId, onProgress) {
204
+ return this.runtime.install(modelId, onProgress);
205
+ }
206
+ uninstall(modelId) {
207
+ return this.runtime.uninstall(modelId);
208
+ }
209
+ prepare(modelId, options) {
210
+ return this.runtime.prepare(modelId, options);
211
+ }
212
+ synthesize(text, options) {
213
+ return this.runtime.synthesize(text, options);
214
+ }
215
+ synthesizeStream(text, options) {
216
+ return this.runtime.synthesizeStream(text, options);
217
+ }
218
+ dispose() {
219
+ this.runtime.dispose();
220
+ }
221
+ };
222
+ var NodeTTSImpl = class {
223
+ constructor(runtime) {
224
+ this.runtime = runtime;
225
+ }
226
+ getState() {
227
+ return this.runtime.getState();
228
+ }
229
+ subscribe(listener) {
230
+ return this.runtime.subscribe(listener);
231
+ }
232
+ listModels() {
233
+ return this.runtime.listModels();
234
+ }
235
+ getModel(modelId) {
236
+ if (!modelId) return null;
237
+ return this.runtime.getModel(modelId);
238
+ }
239
+ getSelectedModel() {
240
+ return this.getModel(this.runtime.getState().activeModelId);
241
+ }
242
+ getInstallState(modelId) {
243
+ const resolvedModelId = modelId ?? this.runtime.getState().activeModelId;
244
+ if (!resolvedModelId) return null;
245
+ return this.runtime.getInstallState(resolvedModelId);
246
+ }
247
+ isInstalled(modelId) {
248
+ return isInstallStateAvailable(this.getInstallState(modelId));
249
+ }
250
+ listVoices(modelId) {
251
+ return this.runtime.listVoices(modelId);
252
+ }
253
+ getSelectedVoice() {
254
+ const state = this.runtime.getState();
255
+ return state.voices.find((voice) => voice.id === state.activeVoiceId) ?? null;
256
+ }
257
+ async install(modelId, onProgress) {
258
+ const targetModelId = modelId ?? this.runtime.getState().activeModelId;
259
+ if (!targetModelId) throw new Error("No active model selected");
260
+ await this.runtime.install(targetModelId, onProgress);
261
+ }
262
+ async uninstall(modelId) {
263
+ const targetModelId = modelId ?? this.runtime.getState().activeModelId;
264
+ if (!targetModelId) throw new Error("No active model selected");
265
+ await this.runtime.uninstall(targetModelId);
266
+ }
267
+ async selectModel(modelId, options) {
268
+ await this.runtime.prepare(modelId, options);
269
+ }
270
+ async selectVoice(voiceId) {
271
+ const selection = this.resolveModelSelection({ voiceId });
272
+ await this.runtime.prepare(selection.modelId, { voiceId: selection.voiceId });
273
+ }
274
+ async ready(options) {
275
+ const modelId = options?.modelId ?? this.runtime.getState().activeModelId;
276
+ if (!modelId) throw new Error("No active model selected");
277
+ await this.runtime.prepare(modelId, {
278
+ voiceId: options?.voiceId,
279
+ onProgress: options?.onProgress
280
+ });
281
+ }
282
+ synthesize(text, options) {
283
+ return this.runtime.synthesize(text, options);
284
+ }
285
+ synthesizeStream(text, options) {
286
+ return this.runtime.synthesizeStream(text, options);
287
+ }
288
+ async synthesizeToWav(text, options) {
289
+ return encodeWav(await this.runtime.synthesize(text, options));
290
+ }
291
+ async writeWav(path, text, options) {
292
+ const wav = await this.synthesizeToWav(text, options);
293
+ await writeFile(path, Buffer.from(wav.buffer, wav.byteOffset, wav.byteLength));
294
+ }
295
+ dispose() {
296
+ this.runtime.dispose();
297
+ }
298
+ resolveModelId(modelId) {
299
+ const runtimeState = this.runtime.getState();
300
+ const resolved = modelId ?? runtimeState.activeModelId ?? runtimeState.supportedModelIds[0] ?? runtimeState.models[0]?.id;
301
+ if (!resolved) throw new Error("No models are registered in this NodeTTS instance");
302
+ return resolved;
303
+ }
304
+ resolveModelSelection(options) {
305
+ const modelId = this.resolveModelId(options?.modelId);
306
+ const voiceId = options?.voiceId;
307
+ if (!voiceId) return { modelId };
308
+ const model = this.runtime.getModel(modelId);
309
+ if (!model) return {
310
+ modelId,
311
+ voiceId
312
+ };
313
+ if (this.modelHasVoice(model, voiceId)) return {
314
+ modelId,
315
+ voiceId
316
+ };
317
+ const matchingFamilyModelId = this.findModelIdForVoice(voiceId, model.family);
318
+ if (matchingFamilyModelId) return {
319
+ modelId: matchingFamilyModelId,
320
+ voiceId
321
+ };
322
+ return {
323
+ modelId,
324
+ voiceId
325
+ };
326
+ }
327
+ findModelIdForVoice(voiceId, familyId) {
328
+ return this.runtime.getState().models.find((model) => (familyId == null || model.family === familyId) && this.modelHasVoice(model, voiceId))?.id ?? null;
329
+ }
330
+ modelHasVoice(model, voiceId) {
331
+ return model.defaultVoiceId === voiceId || model.voices?.some((voice) => voice.id === voiceId) === true;
332
+ }
333
+ };
334
+ /**
335
+ * Creates a low-level Node TTS runtime. Defaults to official adapters and model catalogs when none
336
+ * are provided. Use {@link createNodeTTS} for a higher-level API.
337
+ */
338
+ function createNodeTTSRuntime(options = {}) {
339
+ return new NodeRuntimeImpl(createTTSRuntime({
340
+ adapters: options.adapters?.length ? options.adapters : officialNodeAdapters$1,
341
+ catalogs: createCatalogs(options),
342
+ assetStore: options.assetStore ?? new FsAssetStore(options.cacheDir),
343
+ audioPlayer: null,
344
+ fetch: options.fetch,
345
+ initialModelId: options.initialModelId,
346
+ initialVoiceId: options.initialVoiceId
347
+ }));
348
+ }
349
+ /**
350
+ * Creates a high-level Node TTS instance with model/voice selection, WAV encoding, and file output.
351
+ * This is the recommended entry point for server-side TTS usage.
352
+ */
353
+ function createNodeTTS(options = {}) {
354
+ return new NodeTTSImpl(createNodeTTSRuntime(options));
355
+ }
356
+ //#endregion
357
+ export { FsAssetStore, createNodeTTS, createNodeTTSRuntime, officialNodeAdapters, officialNodeCatalog, officialNodeModels };
package/package.json ADDED
@@ -0,0 +1,55 @@
1
+ {
2
+ "name": "@polytts/node",
3
+ "version": "0.1.0",
4
+ "description": "Node entrypoint for polytts text-to-speech runtimes.",
5
+ "keywords": [
6
+ "electron",
7
+ "node",
8
+ "onnx",
9
+ "polytts",
10
+ "raycast",
11
+ "text-to-speech",
12
+ "tts"
13
+ ],
14
+ "homepage": "https://github.com/Dunqing/polytts/tree/main/packages/node#readme",
15
+ "bugs": {
16
+ "url": "https://github.com/Dunqing/polytts/issues"
17
+ },
18
+ "license": "MIT",
19
+ "repository": {
20
+ "type": "git",
21
+ "url": "git+https://github.com/Dunqing/polytts.git",
22
+ "directory": "packages/node"
23
+ },
24
+ "files": [
25
+ "dist"
26
+ ],
27
+ "type": "module",
28
+ "exports": {
29
+ ".": {
30
+ "types": "./dist/index.d.mts",
31
+ "default": "./dist/index.mjs"
32
+ },
33
+ "./package.json": "./package.json"
34
+ },
35
+ "publishConfig": {
36
+ "access": "public"
37
+ },
38
+ "dependencies": {
39
+ "@polytts/core": "0.1.0",
40
+ "@polytts/node-adapters": "0.1.0",
41
+ "@polytts/presets": "0.1.0"
42
+ },
43
+ "devDependencies": {
44
+ "@types/node": "^25.6.0",
45
+ "vite-plus": "latest"
46
+ },
47
+ "scripts": {
48
+ "build": "vp pack",
49
+ "test": "vp test",
50
+ "test:run": "vp test run"
51
+ },
52
+ "main": "./dist/index.mjs",
53
+ "module": "./dist/index.mjs",
54
+ "types": "./dist/index.d.mts"
55
+ }