@polytts/node 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +50 -0
- package/dist/index.d.mts +138 -0
- package/dist/index.mjs +357 -0
- package/package.json +55 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 DengQing dengqing0821@gmail.com
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# @polytts/node
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/@polytts/node)
|
|
4
|
+
|
|
5
|
+
Node entrypoint for [`polytts`](https://github.com/Dunqing/polytts).
|
|
6
|
+
|
|
7
|
+
Use this package for Node runtimes such as Electron main or preload code, Raycast extensions, and server-side synthesis workflows.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
npm install @polytts/node
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Usage
|
|
16
|
+
|
|
17
|
+
```ts
|
|
18
|
+
import { createNodeTTS } from "@polytts/node";
|
|
19
|
+
|
|
20
|
+
const tts = createNodeTTS();
|
|
21
|
+
|
|
22
|
+
await tts.ready();
|
|
23
|
+
await tts.writeWav("./hello.wav", "Hello from Node.");
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
For progressive synthesis:
|
|
27
|
+
|
|
28
|
+
```ts
|
|
29
|
+
for await (const chunk of tts.synthesizeStream("Hello from a streaming Node runtime.")) {
|
|
30
|
+
console.log(chunk.sampleRate, chunk.channels[0]?.length ?? 0);
|
|
31
|
+
}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
The high-level API also exposes:
|
|
35
|
+
|
|
36
|
+
- `install(modelId?, onProgress?)`
|
|
37
|
+
- `uninstall(modelId?)`
|
|
38
|
+
- `getInstallState(modelId?)`
|
|
39
|
+
- `isInstalled(modelId?)`
|
|
40
|
+
|
|
41
|
+
`@polytts/node` intentionally does not expose `speak()`. In Node hosts, playback is application-specific, so the package focuses on synthesis and file/byte output (`synthesize()`, `synthesizeToWav()`, `writeWav()`).
|
|
42
|
+
|
|
43
|
+
## Built-in models
|
|
44
|
+
|
|
45
|
+
- Kokoro 82M
|
|
46
|
+
- Piper voice bundles
|
|
47
|
+
- KittenTTS Mini and Nano
|
|
48
|
+
- Supertonic 2
|
|
49
|
+
|
|
50
|
+
`Browser Speech` is intentionally browser-only and is not part of this package.
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { AssetBundleKey, AssetStore, AssetStore as AssetStore$1, AudioData, AudioData as AudioData$1, CatalogSource, InstallState, InstallState as InstallState$1, ModelId, ModelId as ModelId$1, ModelRuntimeInfo, ModelSpec, PrepareOptions, RuntimePhase, RuntimeState, SpeakOptions, TTSAdapter, Voice, Voice as Voice$1, VoiceId, VoiceId as VoiceId$1 } from "@polytts/core";
|
|
2
|
+
import { officialNodeAdapters } from "@polytts/node-adapters";
|
|
3
|
+
import { officialNodeCatalog, officialNodeModels } from "@polytts/presets";
|
|
4
|
+
|
|
5
|
+
//#region src/storage/fs-asset-store.d.ts
|
|
6
|
+
/**
|
|
7
|
+
* Filesystem-backed asset store that caches TTS model files on disk. Defaults to `~/.polytts/cache`
|
|
8
|
+
* when no root directory is provided.
|
|
9
|
+
*/
|
|
10
|
+
declare class FsAssetStore implements AssetStore$1 {
|
|
11
|
+
private readonly rootDir;
|
|
12
|
+
constructor(rootDir?: string);
|
|
13
|
+
stageAsset(bundle: AssetBundleKey, assetName: string, data: ArrayBuffer): Promise<void>;
|
|
14
|
+
activateBundle(bundle: AssetBundleKey, assetNames: string[]): Promise<void>;
|
|
15
|
+
isInstalled(bundle: AssetBundleKey, requiredAssetNames?: string[]): Promise<boolean>;
|
|
16
|
+
getAsset(bundle: AssetBundleKey, assetName: string): Promise<ArrayBuffer | null>;
|
|
17
|
+
removeBundle(bundle: AssetBundleKey): Promise<void>;
|
|
18
|
+
private readMeta;
|
|
19
|
+
private bundleDir;
|
|
20
|
+
private metaPath;
|
|
21
|
+
private revisionDir;
|
|
22
|
+
private activeRevisionDir;
|
|
23
|
+
private stagingDir;
|
|
24
|
+
private stagedAssetPath;
|
|
25
|
+
private activeAssetPath;
|
|
26
|
+
}
|
|
27
|
+
//#endregion
|
|
28
|
+
//#region src/index.d.ts
|
|
29
|
+
/** Audio data returned by synthesis operations. */
|
|
30
|
+
type NodeAudio = AudioData$1;
|
|
31
|
+
/** Options for preparing (loading) a TTS model. */
|
|
32
|
+
type NodePrepareOptions = PrepareOptions;
|
|
33
|
+
/** Options for synthesis operations (voice, speed, etc.). */
|
|
34
|
+
type NodeSynthesizeOptions = SpeakOptions;
|
|
35
|
+
/** A TTS adapter that provides a specific TTS backend (e.g. Piper, Kokoro). */
|
|
36
|
+
type NodeTTSAdapter = TTSAdapter;
|
|
37
|
+
/** Subset of the core RuntimeState exposed by the Node TTS runtime. */
|
|
38
|
+
type NodeRuntimeState = Pick<RuntimeState, "models" | "supportedModelIds" | "activeModelId" | "activeVoiceId" | "voices" | "isPreparing" | "isSpeaking" | "phase" | "phaseModelId" | "phaseProgress" | "runtimeInfoByModel" | "error" | "installStates" | "installStateHydrated">;
|
|
39
|
+
/** Configuration options for {@link createNodeTTSRuntime} and {@link createNodeTTS}. */
|
|
40
|
+
interface NodeTTSRuntimeOptions {
|
|
41
|
+
/** TTS adapters to use; defaults to official Node adapters. */
|
|
42
|
+
adapters?: NodeTTSAdapter[];
|
|
43
|
+
/** Inline model specs to register (merged with catalogs). */
|
|
44
|
+
models?: ModelSpec[];
|
|
45
|
+
/** Catalog sources providing model definitions. */
|
|
46
|
+
catalogs?: CatalogSource[];
|
|
47
|
+
/** Custom asset store for downloaded model files. */
|
|
48
|
+
assetStore?: AssetStore$1;
|
|
49
|
+
/** Filesystem directory for the default asset store cache. */
|
|
50
|
+
cacheDir?: string;
|
|
51
|
+
/** Custom fetch implementation for downloading assets. */
|
|
52
|
+
fetch?: typeof fetch;
|
|
53
|
+
/** Model to activate on startup. */
|
|
54
|
+
initialModelId?: ModelId$1;
|
|
55
|
+
/** Voice to activate on startup. */
|
|
56
|
+
initialVoiceId?: VoiceId$1;
|
|
57
|
+
}
|
|
58
|
+
/** Low-level Node TTS runtime providing model management and synthesis capabilities. */
|
|
59
|
+
interface NodeTTSRuntime {
|
|
60
|
+
getState(): NodeRuntimeState;
|
|
61
|
+
/** Subscribes to state changes; returns an unsubscribe function. */
|
|
62
|
+
subscribe(listener: (state: NodeRuntimeState) => void): () => void;
|
|
63
|
+
listModels(): ModelSpec[];
|
|
64
|
+
getModel(modelId: ModelId$1): ModelSpec | null;
|
|
65
|
+
/** Returns the install state for the given model. */
|
|
66
|
+
getInstallState(modelId: ModelId$1): InstallState$1 | null;
|
|
67
|
+
/** Returns true if the model's assets are fully available. */
|
|
68
|
+
isInstalled(modelId: ModelId$1): boolean;
|
|
69
|
+
listVoices(modelId?: ModelId$1): Promise<Voice$1[]>;
|
|
70
|
+
/** Downloads model assets, with optional progress callback. */
|
|
71
|
+
install(modelId: ModelId$1, onProgress?: (progress: number) => void): Promise<void>;
|
|
72
|
+
/** Removes previously downloaded model assets. */
|
|
73
|
+
uninstall(modelId: ModelId$1): Promise<void>;
|
|
74
|
+
/** Loads a model so it is ready for synthesis. */
|
|
75
|
+
prepare(modelId: ModelId$1, options?: NodePrepareOptions): Promise<void>;
|
|
76
|
+
/** Returns an async iterable of audio chunks for streaming synthesis. */
|
|
77
|
+
synthesizeStream(text: string, options?: NodeSynthesizeOptions): AsyncIterable<NodeAudio>;
|
|
78
|
+
/** Synthesizes text and returns the complete audio data. */
|
|
79
|
+
synthesize(text: string, options?: NodeSynthesizeOptions): Promise<NodeAudio>;
|
|
80
|
+
/** Releases all resources held by this runtime. */
|
|
81
|
+
dispose(): void;
|
|
82
|
+
}
|
|
83
|
+
/** High-level Node TTS interface with model/voice selection, WAV export, and convenience methods. */
|
|
84
|
+
interface NodeTTS {
|
|
85
|
+
/** The underlying low-level Node TTS runtime. */
|
|
86
|
+
readonly runtime: NodeTTSRuntime;
|
|
87
|
+
getState(): NodeRuntimeState;
|
|
88
|
+
/** Subscribes to state changes; returns an unsubscribe function. */
|
|
89
|
+
subscribe(listener: (state: NodeRuntimeState) => void): () => void;
|
|
90
|
+
listModels(): ModelSpec[];
|
|
91
|
+
/** Returns the model for the given ID, or null. */
|
|
92
|
+
getModel(modelId?: ModelId$1 | null): ModelSpec | null;
|
|
93
|
+
/** Returns the currently active model, or null. */
|
|
94
|
+
getSelectedModel(): ModelSpec | null;
|
|
95
|
+
/** Returns the install state for the given or active model. */
|
|
96
|
+
getInstallState(modelId?: ModelId$1 | null): InstallState$1 | null;
|
|
97
|
+
/** Returns true if the given or active model's assets are fully available. */
|
|
98
|
+
isInstalled(modelId?: ModelId$1 | null): boolean;
|
|
99
|
+
listVoices(modelId?: ModelId$1): Promise<Voice$1[]>;
|
|
100
|
+
/** Returns the currently active voice, or null. */
|
|
101
|
+
getSelectedVoice(): Voice$1 | null;
|
|
102
|
+
/** Downloads model assets; defaults to the active model if omitted. */
|
|
103
|
+
install(modelId?: ModelId$1, onProgress?: (progress: number) => void): Promise<void>;
|
|
104
|
+
/** Removes downloaded assets; defaults to the active model if omitted. */
|
|
105
|
+
uninstall(modelId?: ModelId$1): Promise<void>;
|
|
106
|
+
/** Activates a model, downloading and loading it if necessary. */
|
|
107
|
+
selectModel(modelId: ModelId$1, options?: Omit<NodePrepareOptions, "modelId">): Promise<void>;
|
|
108
|
+
/** Activates a voice, resolving its parent model automatically. */
|
|
109
|
+
selectVoice(voiceId: VoiceId$1): Promise<void>;
|
|
110
|
+
/** Ensures the selected (or specified) model is loaded and ready to synthesize. */
|
|
111
|
+
ready(options?: {
|
|
112
|
+
modelId?: ModelId$1;
|
|
113
|
+
voiceId?: VoiceId$1;
|
|
114
|
+
onProgress?: (progress: number) => void;
|
|
115
|
+
}): Promise<void>;
|
|
116
|
+
/** Returns an async iterable of audio chunks for streaming synthesis. */
|
|
117
|
+
synthesizeStream(text: string, options?: NodeSynthesizeOptions): AsyncIterable<NodeAudio>;
|
|
118
|
+
/** Synthesizes text and returns the complete audio data. */
|
|
119
|
+
synthesize(text: string, options?: NodeSynthesizeOptions): Promise<NodeAudio>;
|
|
120
|
+
/** Synthesizes text and returns WAV-encoded bytes. */
|
|
121
|
+
synthesizeToWav(text: string, options?: NodeSynthesizeOptions): Promise<Uint8Array>;
|
|
122
|
+
/** Synthesizes text and writes the result as a WAV file to disk. */
|
|
123
|
+
writeWav(path: string, text: string, options?: NodeSynthesizeOptions): Promise<void>;
|
|
124
|
+
/** Releases all resources held by this instance. */
|
|
125
|
+
dispose(): void;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Creates a low-level Node TTS runtime. Defaults to official adapters and model catalogs when none
|
|
129
|
+
* are provided. Use {@link createNodeTTS} for a higher-level API.
|
|
130
|
+
*/
|
|
131
|
+
declare function createNodeTTSRuntime(options?: NodeTTSRuntimeOptions): NodeTTSRuntime;
|
|
132
|
+
/**
|
|
133
|
+
* Creates a high-level Node TTS instance with model/voice selection, WAV encoding, and file output.
|
|
134
|
+
* This is the recommended entry point for server-side TTS usage.
|
|
135
|
+
*/
|
|
136
|
+
declare function createNodeTTS(options?: NodeTTSRuntimeOptions): NodeTTS;
|
|
137
|
+
//#endregion
|
|
138
|
+
export { type AssetStore, type AudioData, FsAssetStore, type InstallState, type ModelId, type ModelRuntimeInfo, NodeAudio, NodePrepareOptions, NodeRuntimeState, NodeSynthesizeOptions, NodeTTS, NodeTTSAdapter, NodeTTSRuntime, NodeTTSRuntimeOptions, type RuntimePhase, type Voice, type VoiceId, createNodeTTS, createNodeTTSRuntime, officialNodeAdapters, officialNodeCatalog, officialNodeModels };
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
import { mkdir, readFile, rename, rm, writeFile } from "node:fs/promises";
|
|
2
|
+
import { createTTSRuntime, isInstallStateAvailable, resolveInstallState } from "@polytts/core";
|
|
3
|
+
import { officialNodeAdapters, officialNodeAdapters as officialNodeAdapters$1 } from "@polytts/node-adapters";
|
|
4
|
+
import { officialNodeCatalog, officialNodeModels, officialNodeModels as officialNodeModels$1 } from "@polytts/presets";
|
|
5
|
+
import { homedir } from "node:os";
|
|
6
|
+
import { dirname, join } from "node:path";
|
|
7
|
+
//#region src/storage/fs-asset-store.ts
|
|
8
|
+
function encodeSegment(value) {
|
|
9
|
+
return encodeURIComponent(value);
|
|
10
|
+
}
|
|
11
|
+
function toArrayBuffer(data) {
|
|
12
|
+
return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
|
|
13
|
+
}
|
|
14
|
+
async function readJson(path) {
|
|
15
|
+
try {
|
|
16
|
+
return JSON.parse(await readFile(path, "utf8"));
|
|
17
|
+
} catch (error) {
|
|
18
|
+
if (error.code === "ENOENT") return null;
|
|
19
|
+
throw error;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Filesystem-backed asset store that caches TTS model files on disk. Defaults to `~/.polytts/cache`
|
|
24
|
+
* when no root directory is provided.
|
|
25
|
+
*/
|
|
26
|
+
var FsAssetStore = class {
|
|
27
|
+
constructor(rootDir = join(homedir(), ".polytts", "cache")) {
|
|
28
|
+
this.rootDir = rootDir;
|
|
29
|
+
}
|
|
30
|
+
async stageAsset(bundle, assetName, data) {
|
|
31
|
+
const targetPath = this.stagedAssetPath(bundle, assetName);
|
|
32
|
+
await mkdir(dirname(targetPath), { recursive: true });
|
|
33
|
+
await writeFile(targetPath, Buffer.from(data));
|
|
34
|
+
}
|
|
35
|
+
async activateBundle(bundle, assetNames) {
|
|
36
|
+
const stagedDir = this.stagingDir(bundle);
|
|
37
|
+
const activeDir = this.activeRevisionDir(bundle);
|
|
38
|
+
await Promise.all(assetNames.map(async (assetName) => {
|
|
39
|
+
await readFile(this.stagedAssetPath(bundle, assetName));
|
|
40
|
+
}));
|
|
41
|
+
const currentMeta = await this.readMeta(bundle);
|
|
42
|
+
await rm(activeDir, {
|
|
43
|
+
recursive: true,
|
|
44
|
+
force: true
|
|
45
|
+
});
|
|
46
|
+
await mkdir(dirname(activeDir), { recursive: true });
|
|
47
|
+
await rename(stagedDir, activeDir);
|
|
48
|
+
if (currentMeta && currentMeta.revision !== bundle.revision) await rm(this.revisionDir(bundle, currentMeta.revision), {
|
|
49
|
+
recursive: true,
|
|
50
|
+
force: true
|
|
51
|
+
});
|
|
52
|
+
await writeFile(this.metaPath(bundle), JSON.stringify({
|
|
53
|
+
revision: bundle.revision,
|
|
54
|
+
assetNames
|
|
55
|
+
}), "utf8");
|
|
56
|
+
}
|
|
57
|
+
async isInstalled(bundle, requiredAssetNames) {
|
|
58
|
+
const meta = await this.readMeta(bundle);
|
|
59
|
+
if (!meta || meta.revision !== bundle.revision) return false;
|
|
60
|
+
const assetNames = requiredAssetNames?.length ? requiredAssetNames : meta.assetNames;
|
|
61
|
+
try {
|
|
62
|
+
await Promise.all(assetNames.map((assetName) => readFile(this.activeAssetPath(bundle, assetName))));
|
|
63
|
+
return true;
|
|
64
|
+
} catch (error) {
|
|
65
|
+
if (error.code === "ENOENT") return false;
|
|
66
|
+
throw error;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
async getAsset(bundle, assetName) {
|
|
70
|
+
const meta = await this.readMeta(bundle);
|
|
71
|
+
if (!meta || meta.revision !== bundle.revision) return null;
|
|
72
|
+
try {
|
|
73
|
+
return toArrayBuffer(await readFile(this.activeAssetPath(bundle, assetName)));
|
|
74
|
+
} catch (error) {
|
|
75
|
+
if (error.code === "ENOENT") return null;
|
|
76
|
+
throw error;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
async removeBundle(bundle) {
|
|
80
|
+
await rm(this.bundleDir(bundle), {
|
|
81
|
+
recursive: true,
|
|
82
|
+
force: true
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
async readMeta(bundle) {
|
|
86
|
+
return readJson(this.metaPath(bundle));
|
|
87
|
+
}
|
|
88
|
+
bundleDir(bundle) {
|
|
89
|
+
return join(this.rootDir, encodeSegment(bundle.adapterId), encodeSegment(bundle.modelId));
|
|
90
|
+
}
|
|
91
|
+
metaPath(bundle) {
|
|
92
|
+
return join(this.bundleDir(bundle), "meta.json");
|
|
93
|
+
}
|
|
94
|
+
revisionDir(bundle, revision = bundle.revision) {
|
|
95
|
+
return join(this.bundleDir(bundle), "revisions", encodeSegment(revision));
|
|
96
|
+
}
|
|
97
|
+
activeRevisionDir(bundle) {
|
|
98
|
+
return this.revisionDir(bundle, bundle.revision);
|
|
99
|
+
}
|
|
100
|
+
stagingDir(bundle) {
|
|
101
|
+
return join(this.bundleDir(bundle), "staging", encodeSegment(bundle.revision));
|
|
102
|
+
}
|
|
103
|
+
stagedAssetPath(bundle, assetName) {
|
|
104
|
+
return join(this.stagingDir(bundle), ...assetName.split("/"));
|
|
105
|
+
}
|
|
106
|
+
activeAssetPath(bundle, assetName) {
|
|
107
|
+
return join(this.activeRevisionDir(bundle), ...assetName.split("/"));
|
|
108
|
+
}
|
|
109
|
+
};
|
|
110
|
+
//#endregion
|
|
111
|
+
//#region src/index.ts
|
|
112
|
+
function encodeWav(audio) {
|
|
113
|
+
const channelCount = audio.channels.length;
|
|
114
|
+
if (channelCount === 0) throw new Error("Cannot encode empty audio");
|
|
115
|
+
const frameCount = audio.channels[0].length;
|
|
116
|
+
const bytesPerSample = 2;
|
|
117
|
+
const blockAlign = channelCount * bytesPerSample;
|
|
118
|
+
const byteRate = audio.sampleRate * blockAlign;
|
|
119
|
+
const dataSize = frameCount * blockAlign;
|
|
120
|
+
const buffer = new ArrayBuffer(44 + dataSize);
|
|
121
|
+
const view = new DataView(buffer);
|
|
122
|
+
const output = new Uint8Array(buffer);
|
|
123
|
+
const writeAscii = (offset, value) => {
|
|
124
|
+
for (let i = 0; i < value.length; i += 1) view.setUint8(offset + i, value.charCodeAt(i));
|
|
125
|
+
};
|
|
126
|
+
writeAscii(0, "RIFF");
|
|
127
|
+
view.setUint32(4, 36 + dataSize, true);
|
|
128
|
+
writeAscii(8, "WAVE");
|
|
129
|
+
writeAscii(12, "fmt ");
|
|
130
|
+
view.setUint32(16, 16, true);
|
|
131
|
+
view.setUint16(20, 1, true);
|
|
132
|
+
view.setUint16(22, channelCount, true);
|
|
133
|
+
view.setUint32(24, audio.sampleRate, true);
|
|
134
|
+
view.setUint32(28, byteRate, true);
|
|
135
|
+
view.setUint16(32, blockAlign, true);
|
|
136
|
+
view.setUint16(34, 16, true);
|
|
137
|
+
writeAscii(36, "data");
|
|
138
|
+
view.setUint32(40, dataSize, true);
|
|
139
|
+
let offset = 44;
|
|
140
|
+
for (let frameIndex = 0; frameIndex < frameCount; frameIndex += 1) for (let channelIndex = 0; channelIndex < channelCount; channelIndex += 1) {
|
|
141
|
+
const sample = audio.channels[channelIndex][frameIndex] ?? 0;
|
|
142
|
+
const clamped = Math.max(-1, Math.min(1, sample));
|
|
143
|
+
view.setInt16(offset, clamped < 0 ? clamped * 32768 : clamped * 32767, true);
|
|
144
|
+
offset += bytesPerSample;
|
|
145
|
+
}
|
|
146
|
+
return output;
|
|
147
|
+
}
|
|
148
|
+
function toNodeRuntimeState(state) {
|
|
149
|
+
return {
|
|
150
|
+
models: state.models,
|
|
151
|
+
supportedModelIds: state.supportedModelIds,
|
|
152
|
+
activeModelId: state.activeModelId,
|
|
153
|
+
activeVoiceId: state.activeVoiceId,
|
|
154
|
+
voices: state.voices,
|
|
155
|
+
isPreparing: state.isPreparing,
|
|
156
|
+
isSpeaking: state.isSpeaking,
|
|
157
|
+
phase: state.phase,
|
|
158
|
+
phaseModelId: state.phaseModelId,
|
|
159
|
+
phaseProgress: state.phaseProgress,
|
|
160
|
+
runtimeInfoByModel: state.runtimeInfoByModel,
|
|
161
|
+
error: state.error,
|
|
162
|
+
installStates: state.installStates,
|
|
163
|
+
installStateHydrated: state.installStateHydrated
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
function createCatalogs(options) {
|
|
167
|
+
const configuredCatalogs = options.catalogs ?? [];
|
|
168
|
+
if (configuredCatalogs.length > 0) {
|
|
169
|
+
if (options.models?.length) return [{ models: options.models }, ...configuredCatalogs];
|
|
170
|
+
return configuredCatalogs;
|
|
171
|
+
}
|
|
172
|
+
return [{ models: options.models?.length ? options.models : officialNodeModels$1 }];
|
|
173
|
+
}
|
|
174
|
+
var NodeRuntimeImpl = class {
|
|
175
|
+
constructor(runtime) {
|
|
176
|
+
this.runtime = runtime;
|
|
177
|
+
}
|
|
178
|
+
getState() {
|
|
179
|
+
return toNodeRuntimeState(this.runtime.getState());
|
|
180
|
+
}
|
|
181
|
+
subscribe(listener) {
|
|
182
|
+
return this.runtime.subscribe((state) => {
|
|
183
|
+
listener(toNodeRuntimeState(state));
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
listModels() {
|
|
187
|
+
return this.runtime.listModels();
|
|
188
|
+
}
|
|
189
|
+
getModel(modelId) {
|
|
190
|
+
return this.runtime.getModel(modelId);
|
|
191
|
+
}
|
|
192
|
+
getInstallState(modelId) {
|
|
193
|
+
const model = this.runtime.getModel(modelId);
|
|
194
|
+
if (!model) return null;
|
|
195
|
+
return resolveInstallState(model, this.runtime.getState().installStates[model.id]);
|
|
196
|
+
}
|
|
197
|
+
isInstalled(modelId) {
|
|
198
|
+
return isInstallStateAvailable(this.getInstallState(modelId));
|
|
199
|
+
}
|
|
200
|
+
listVoices(modelId) {
|
|
201
|
+
return this.runtime.listVoices(modelId);
|
|
202
|
+
}
|
|
203
|
+
install(modelId, onProgress) {
|
|
204
|
+
return this.runtime.install(modelId, onProgress);
|
|
205
|
+
}
|
|
206
|
+
uninstall(modelId) {
|
|
207
|
+
return this.runtime.uninstall(modelId);
|
|
208
|
+
}
|
|
209
|
+
prepare(modelId, options) {
|
|
210
|
+
return this.runtime.prepare(modelId, options);
|
|
211
|
+
}
|
|
212
|
+
synthesize(text, options) {
|
|
213
|
+
return this.runtime.synthesize(text, options);
|
|
214
|
+
}
|
|
215
|
+
synthesizeStream(text, options) {
|
|
216
|
+
return this.runtime.synthesizeStream(text, options);
|
|
217
|
+
}
|
|
218
|
+
dispose() {
|
|
219
|
+
this.runtime.dispose();
|
|
220
|
+
}
|
|
221
|
+
};
|
|
222
|
+
var NodeTTSImpl = class {
|
|
223
|
+
constructor(runtime) {
|
|
224
|
+
this.runtime = runtime;
|
|
225
|
+
}
|
|
226
|
+
getState() {
|
|
227
|
+
return this.runtime.getState();
|
|
228
|
+
}
|
|
229
|
+
subscribe(listener) {
|
|
230
|
+
return this.runtime.subscribe(listener);
|
|
231
|
+
}
|
|
232
|
+
listModels() {
|
|
233
|
+
return this.runtime.listModels();
|
|
234
|
+
}
|
|
235
|
+
getModel(modelId) {
|
|
236
|
+
if (!modelId) return null;
|
|
237
|
+
return this.runtime.getModel(modelId);
|
|
238
|
+
}
|
|
239
|
+
getSelectedModel() {
|
|
240
|
+
return this.getModel(this.runtime.getState().activeModelId);
|
|
241
|
+
}
|
|
242
|
+
getInstallState(modelId) {
|
|
243
|
+
const resolvedModelId = modelId ?? this.runtime.getState().activeModelId;
|
|
244
|
+
if (!resolvedModelId) return null;
|
|
245
|
+
return this.runtime.getInstallState(resolvedModelId);
|
|
246
|
+
}
|
|
247
|
+
isInstalled(modelId) {
|
|
248
|
+
return isInstallStateAvailable(this.getInstallState(modelId));
|
|
249
|
+
}
|
|
250
|
+
listVoices(modelId) {
|
|
251
|
+
return this.runtime.listVoices(modelId);
|
|
252
|
+
}
|
|
253
|
+
getSelectedVoice() {
|
|
254
|
+
const state = this.runtime.getState();
|
|
255
|
+
return state.voices.find((voice) => voice.id === state.activeVoiceId) ?? null;
|
|
256
|
+
}
|
|
257
|
+
async install(modelId, onProgress) {
|
|
258
|
+
const targetModelId = modelId ?? this.runtime.getState().activeModelId;
|
|
259
|
+
if (!targetModelId) throw new Error("No active model selected");
|
|
260
|
+
await this.runtime.install(targetModelId, onProgress);
|
|
261
|
+
}
|
|
262
|
+
async uninstall(modelId) {
|
|
263
|
+
const targetModelId = modelId ?? this.runtime.getState().activeModelId;
|
|
264
|
+
if (!targetModelId) throw new Error("No active model selected");
|
|
265
|
+
await this.runtime.uninstall(targetModelId);
|
|
266
|
+
}
|
|
267
|
+
async selectModel(modelId, options) {
|
|
268
|
+
await this.runtime.prepare(modelId, options);
|
|
269
|
+
}
|
|
270
|
+
async selectVoice(voiceId) {
|
|
271
|
+
const selection = this.resolveModelSelection({ voiceId });
|
|
272
|
+
await this.runtime.prepare(selection.modelId, { voiceId: selection.voiceId });
|
|
273
|
+
}
|
|
274
|
+
async ready(options) {
|
|
275
|
+
const modelId = options?.modelId ?? this.runtime.getState().activeModelId;
|
|
276
|
+
if (!modelId) throw new Error("No active model selected");
|
|
277
|
+
await this.runtime.prepare(modelId, {
|
|
278
|
+
voiceId: options?.voiceId,
|
|
279
|
+
onProgress: options?.onProgress
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
synthesize(text, options) {
|
|
283
|
+
return this.runtime.synthesize(text, options);
|
|
284
|
+
}
|
|
285
|
+
synthesizeStream(text, options) {
|
|
286
|
+
return this.runtime.synthesizeStream(text, options);
|
|
287
|
+
}
|
|
288
|
+
async synthesizeToWav(text, options) {
|
|
289
|
+
return encodeWav(await this.runtime.synthesize(text, options));
|
|
290
|
+
}
|
|
291
|
+
async writeWav(path, text, options) {
|
|
292
|
+
const wav = await this.synthesizeToWav(text, options);
|
|
293
|
+
await writeFile(path, Buffer.from(wav.buffer, wav.byteOffset, wav.byteLength));
|
|
294
|
+
}
|
|
295
|
+
dispose() {
|
|
296
|
+
this.runtime.dispose();
|
|
297
|
+
}
|
|
298
|
+
resolveModelId(modelId) {
|
|
299
|
+
const runtimeState = this.runtime.getState();
|
|
300
|
+
const resolved = modelId ?? runtimeState.activeModelId ?? runtimeState.supportedModelIds[0] ?? runtimeState.models[0]?.id;
|
|
301
|
+
if (!resolved) throw new Error("No models are registered in this NodeTTS instance");
|
|
302
|
+
return resolved;
|
|
303
|
+
}
|
|
304
|
+
resolveModelSelection(options) {
|
|
305
|
+
const modelId = this.resolveModelId(options?.modelId);
|
|
306
|
+
const voiceId = options?.voiceId;
|
|
307
|
+
if (!voiceId) return { modelId };
|
|
308
|
+
const model = this.runtime.getModel(modelId);
|
|
309
|
+
if (!model) return {
|
|
310
|
+
modelId,
|
|
311
|
+
voiceId
|
|
312
|
+
};
|
|
313
|
+
if (this.modelHasVoice(model, voiceId)) return {
|
|
314
|
+
modelId,
|
|
315
|
+
voiceId
|
|
316
|
+
};
|
|
317
|
+
const matchingFamilyModelId = this.findModelIdForVoice(voiceId, model.family);
|
|
318
|
+
if (matchingFamilyModelId) return {
|
|
319
|
+
modelId: matchingFamilyModelId,
|
|
320
|
+
voiceId
|
|
321
|
+
};
|
|
322
|
+
return {
|
|
323
|
+
modelId,
|
|
324
|
+
voiceId
|
|
325
|
+
};
|
|
326
|
+
}
|
|
327
|
+
findModelIdForVoice(voiceId, familyId) {
|
|
328
|
+
return this.runtime.getState().models.find((model) => (familyId == null || model.family === familyId) && this.modelHasVoice(model, voiceId))?.id ?? null;
|
|
329
|
+
}
|
|
330
|
+
modelHasVoice(model, voiceId) {
|
|
331
|
+
return model.defaultVoiceId === voiceId || model.voices?.some((voice) => voice.id === voiceId) === true;
|
|
332
|
+
}
|
|
333
|
+
};
|
|
334
|
+
/**
|
|
335
|
+
* Creates a low-level Node TTS runtime. Defaults to official adapters and model catalogs when none
|
|
336
|
+
* are provided. Use {@link createNodeTTS} for a higher-level API.
|
|
337
|
+
*/
|
|
338
|
+
function createNodeTTSRuntime(options = {}) {
|
|
339
|
+
return new NodeRuntimeImpl(createTTSRuntime({
|
|
340
|
+
adapters: options.adapters?.length ? options.adapters : officialNodeAdapters$1,
|
|
341
|
+
catalogs: createCatalogs(options),
|
|
342
|
+
assetStore: options.assetStore ?? new FsAssetStore(options.cacheDir),
|
|
343
|
+
audioPlayer: null,
|
|
344
|
+
fetch: options.fetch,
|
|
345
|
+
initialModelId: options.initialModelId,
|
|
346
|
+
initialVoiceId: options.initialVoiceId
|
|
347
|
+
}));
|
|
348
|
+
}
|
|
349
|
+
/**
|
|
350
|
+
* Creates a high-level Node TTS instance with model/voice selection, WAV encoding, and file output.
|
|
351
|
+
* This is the recommended entry point for server-side TTS usage.
|
|
352
|
+
*/
|
|
353
|
+
function createNodeTTS(options = {}) {
|
|
354
|
+
return new NodeTTSImpl(createNodeTTSRuntime(options));
|
|
355
|
+
}
|
|
356
|
+
//#endregion
|
|
357
|
+
export { FsAssetStore, createNodeTTS, createNodeTTSRuntime, officialNodeAdapters, officialNodeCatalog, officialNodeModels };
|
package/package.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@polytts/node",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Node entrypoint for polytts text-to-speech runtimes.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"electron",
|
|
7
|
+
"node",
|
|
8
|
+
"onnx",
|
|
9
|
+
"polytts",
|
|
10
|
+
"raycast",
|
|
11
|
+
"text-to-speech",
|
|
12
|
+
"tts"
|
|
13
|
+
],
|
|
14
|
+
"homepage": "https://github.com/Dunqing/polytts/tree/main/packages/node#readme",
|
|
15
|
+
"bugs": {
|
|
16
|
+
"url": "https://github.com/Dunqing/polytts/issues"
|
|
17
|
+
},
|
|
18
|
+
"license": "MIT",
|
|
19
|
+
"repository": {
|
|
20
|
+
"type": "git",
|
|
21
|
+
"url": "git+https://github.com/Dunqing/polytts.git",
|
|
22
|
+
"directory": "packages/node"
|
|
23
|
+
},
|
|
24
|
+
"files": [
|
|
25
|
+
"dist"
|
|
26
|
+
],
|
|
27
|
+
"type": "module",
|
|
28
|
+
"exports": {
|
|
29
|
+
".": {
|
|
30
|
+
"types": "./dist/index.d.mts",
|
|
31
|
+
"default": "./dist/index.mjs"
|
|
32
|
+
},
|
|
33
|
+
"./package.json": "./package.json"
|
|
34
|
+
},
|
|
35
|
+
"publishConfig": {
|
|
36
|
+
"access": "public"
|
|
37
|
+
},
|
|
38
|
+
"dependencies": {
|
|
39
|
+
"@polytts/core": "0.1.0",
|
|
40
|
+
"@polytts/node-adapters": "0.1.0",
|
|
41
|
+
"@polytts/presets": "0.1.0"
|
|
42
|
+
},
|
|
43
|
+
"devDependencies": {
|
|
44
|
+
"@types/node": "^25.6.0",
|
|
45
|
+
"vite-plus": "latest"
|
|
46
|
+
},
|
|
47
|
+
"scripts": {
|
|
48
|
+
"build": "vp pack",
|
|
49
|
+
"test": "vp test",
|
|
50
|
+
"test:run": "vp test run"
|
|
51
|
+
},
|
|
52
|
+
"main": "./dist/index.mjs",
|
|
53
|
+
"module": "./dist/index.mjs",
|
|
54
|
+
"types": "./dist/index.d.mts"
|
|
55
|
+
}
|