@livekit/agents-plugin-elevenlabs 0.4.6 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/dist/index.cjs +23 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.js +1 -4
- package/dist/index.js.map +1 -1
- package/dist/models.cjs +17 -0
- package/dist/models.cjs.map +1 -0
- package/dist/models.js +0 -4
- package/dist/models.js.map +1 -1
- package/dist/tts.cjs +241 -0
- package/dist/tts.cjs.map +1 -0
- package/dist/tts.d.ts +1 -1
- package/dist/tts.d.ts.map +1 -1
- package/dist/tts.js +201 -198
- package/dist/tts.js.map +1 -1
- package/dist/tts.test.cjs +9 -0
- package/dist/tts.test.cjs.map +1 -0
- package/dist/tts.test.d.ts +2 -0
- package/dist/tts.test.d.ts.map +1 -0
- package/dist/tts.test.js +8 -0
- package/dist/tts.test.js.map +1 -0
- package/package.json +20 -8
- package/src/tts.test.ts +11 -0
- package/src/tts.ts +1 -2
package/README.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
<!--
|
|
2
|
+
SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
3
|
+
|
|
4
|
+
SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
-->
|
|
6
|
+
# ElevenLabs plugin for LiveKit Agents
|
|
7
|
+
|
|
8
|
+
The Agents Framework is designed for building realtime, programmable
|
|
9
|
+
participants that run on servers. Use it to create conversational, multi-modal
|
|
10
|
+
voice agents that can see, hear, and understand.
|
|
11
|
+
|
|
12
|
+
This package contains the ElevenLabs plugin, which allows for voice synthesis.
|
|
13
|
+
Refer to the [documentation](https://docs.livekit.io/agents/overview/) for
|
|
14
|
+
information on how to use it, or browse the [API
|
|
15
|
+
reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_elevenlabs.html).
|
|
16
|
+
See the [repository](https://github.com/livekit/agents-js) for more information
|
|
17
|
+
about the framework as a whole.
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __copyProps = (to, from, except, desc) => {
|
|
7
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
8
|
+
for (let key of __getOwnPropNames(from))
|
|
9
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
10
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
11
|
+
}
|
|
12
|
+
return to;
|
|
13
|
+
};
|
|
14
|
+
var __reExport = (target, mod, secondTarget) => (__copyProps(target, mod, "default"), secondTarget && __copyProps(secondTarget, mod, "default"));
|
|
15
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
16
|
+
var src_exports = {};
|
|
17
|
+
module.exports = __toCommonJS(src_exports);
|
|
18
|
+
__reExport(src_exports, require("./tts.cjs"), module.exports);
|
|
19
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
20
|
+
0 && (module.exports = {
|
|
21
|
+
...require("./tts.cjs")
|
|
22
|
+
});
|
|
23
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport * from './tts.js';\n"],"mappings":";;;;;;;;;;;;;;;AAAA;AAAA;AAIA,wBAAc,qBAJd;","names":[]}
|
package/dist/index.js
CHANGED
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport * from './tts.js';\n"],"mappings":"AAIA,cAAc;","names":[]}
|
package/dist/models.cjs
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __copyProps = (to, from, except, desc) => {
|
|
7
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
8
|
+
for (let key of __getOwnPropNames(from))
|
|
9
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
10
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
11
|
+
}
|
|
12
|
+
return to;
|
|
13
|
+
};
|
|
14
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
15
|
+
var models_exports = {};
|
|
16
|
+
module.exports = __toCommonJS(models_exports);
|
|
17
|
+
//# sourceMappingURL=models.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels =\n | 'eleven_monolingual_v1'\n | 'eleven_multilingual_v1'\n | 'eleven_multilingual_v2'\n | 'eleven_turbo_v2'\n | 'eleven_turbo_v2_5';\n\nexport type TTSEncoding =\n // XXX(nbsp): MP3 is not yet supported\n // | 'mp3_22050_32'\n // | 'mp3_44100_32'\n // | 'mp3_44100_64'\n // | 'mp3_44100_96'\n // | 'mp3_44100_128'\n // | 'mp3_44100_192'\n 'pcm_16000' | 'pcm_22050' | 'pcm_44100';\n"],"mappings":";;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
|
package/dist/models.js
CHANGED
package/dist/models.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
package/dist/tts.cjs
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var tts_exports = {};
|
|
20
|
+
__export(tts_exports, {
|
|
21
|
+
SynthesizeStream: () => SynthesizeStream,
|
|
22
|
+
TTS: () => TTS
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(tts_exports);
|
|
25
|
+
var import_agents = require("@livekit/agents");
|
|
26
|
+
var import_rtc_node = require("@livekit/rtc-node");
|
|
27
|
+
var import_node_crypto = require("node:crypto");
|
|
28
|
+
var import_node_url = require("node:url");
|
|
29
|
+
var import_ws = require("ws");
|
|
30
|
+
const DEFAULT_VOICE = {
|
|
31
|
+
id: "EXAVITQu4vr4xnSDxMaL",
|
|
32
|
+
name: "Bella",
|
|
33
|
+
category: "premade",
|
|
34
|
+
settings: {
|
|
35
|
+
stability: 0.71,
|
|
36
|
+
similarity_boost: 0.5,
|
|
37
|
+
style: 0,
|
|
38
|
+
use_speaker_boost: true
|
|
39
|
+
}
|
|
40
|
+
};
|
|
41
|
+
const API_BASE_URL_V1 = "https://api.elevenlabs.io/v1/";
|
|
42
|
+
const AUTHORIZATION_HEADER = "xi-api-key";
|
|
43
|
+
const defaultTTSOptions = {
|
|
44
|
+
apiKey: process.env.ELEVEN_API_KEY,
|
|
45
|
+
voice: DEFAULT_VOICE,
|
|
46
|
+
modelID: "eleven_turbo_v2_5",
|
|
47
|
+
baseURL: API_BASE_URL_V1,
|
|
48
|
+
encoding: "pcm_22050",
|
|
49
|
+
streamingLatency: 3,
|
|
50
|
+
wordTokenizer: new import_agents.tokenize.basic.WordTokenizer(false),
|
|
51
|
+
chunkLengthSchedule: [],
|
|
52
|
+
enableSsmlParsing: false
|
|
53
|
+
};
|
|
54
|
+
class TTS extends import_agents.tts.TTS {
|
|
55
|
+
#opts;
|
|
56
|
+
constructor(opts = {}) {
|
|
57
|
+
super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {
|
|
58
|
+
streaming: true
|
|
59
|
+
});
|
|
60
|
+
this.#opts = {
|
|
61
|
+
...defaultTTSOptions,
|
|
62
|
+
...opts
|
|
63
|
+
};
|
|
64
|
+
if (this.#opts.apiKey === void 0) {
|
|
65
|
+
throw new Error(
|
|
66
|
+
"ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY"
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
async listVoices() {
|
|
71
|
+
return fetch(this.#opts.baseURL + "/voices", {
|
|
72
|
+
headers: {
|
|
73
|
+
[AUTHORIZATION_HEADER]: this.#opts.apiKey
|
|
74
|
+
}
|
|
75
|
+
}).then((data) => data.json()).then((data) => {
|
|
76
|
+
const voices = [];
|
|
77
|
+
for (const voice of data.voices) {
|
|
78
|
+
voices.push({
|
|
79
|
+
id: voice.voice_id,
|
|
80
|
+
name: voice.name,
|
|
81
|
+
category: voice.category,
|
|
82
|
+
settings: void 0
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
return voices;
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
synthesize() {
|
|
89
|
+
throw new Error("Chunked responses are not supported on ElevenLabs TTS");
|
|
90
|
+
}
|
|
91
|
+
stream() {
|
|
92
|
+
return new SynthesizeStream(this.#opts);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
96
|
+
#opts;
|
|
97
|
+
#logger = (0, import_agents.log)();
|
|
98
|
+
streamURL;
|
|
99
|
+
constructor(opts) {
|
|
100
|
+
super();
|
|
101
|
+
this.#opts = opts;
|
|
102
|
+
this.closed = false;
|
|
103
|
+
const baseURL = opts.baseURL + (opts.baseURL.endsWith("/") ? "" : "/");
|
|
104
|
+
this.streamURL = new import_node_url.URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);
|
|
105
|
+
const params = {
|
|
106
|
+
model_id: opts.modelID,
|
|
107
|
+
output_format: opts.encoding,
|
|
108
|
+
optimize_streaming_latency: `${opts.streamingLatency}`,
|
|
109
|
+
enable_ssml_parsing: `${opts.enableSsmlParsing}`
|
|
110
|
+
};
|
|
111
|
+
Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
|
|
112
|
+
this.streamURL.protocol = this.streamURL.protocol.replace("http", "ws");
|
|
113
|
+
this.#run();
|
|
114
|
+
}
|
|
115
|
+
async #run() {
|
|
116
|
+
const segments = new import_agents.AsyncIterableQueue();
|
|
117
|
+
const tokenizeInput = async () => {
|
|
118
|
+
let stream = null;
|
|
119
|
+
for await (const text of this.input) {
|
|
120
|
+
if (text === SynthesizeStream.FLUSH_SENTINEL) {
|
|
121
|
+
stream == null ? void 0 : stream.endInput();
|
|
122
|
+
stream = null;
|
|
123
|
+
} else {
|
|
124
|
+
if (!stream) {
|
|
125
|
+
stream = this.#opts.wordTokenizer.stream();
|
|
126
|
+
segments.put(stream);
|
|
127
|
+
}
|
|
128
|
+
stream.pushText(text);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
segments.close();
|
|
132
|
+
};
|
|
133
|
+
const runStream = async () => {
|
|
134
|
+
for await (const stream of segments) {
|
|
135
|
+
await this.#runWS(stream);
|
|
136
|
+
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
137
|
+
}
|
|
138
|
+
};
|
|
139
|
+
await Promise.all([tokenizeInput(), runStream()]);
|
|
140
|
+
this.close();
|
|
141
|
+
}
|
|
142
|
+
async #runWS(stream, maxRetry = 3) {
|
|
143
|
+
let retries = 0;
|
|
144
|
+
let ws;
|
|
145
|
+
while (true) {
|
|
146
|
+
ws = new import_ws.WebSocket(this.streamURL, {
|
|
147
|
+
headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey }
|
|
148
|
+
});
|
|
149
|
+
try {
|
|
150
|
+
await new Promise((resolve, reject) => {
|
|
151
|
+
ws.on("open", resolve);
|
|
152
|
+
ws.on("error", (error) => reject(error));
|
|
153
|
+
ws.on("close", (code) => reject(`WebSocket returned ${code}`));
|
|
154
|
+
});
|
|
155
|
+
break;
|
|
156
|
+
} catch (e) {
|
|
157
|
+
if (retries >= maxRetry) {
|
|
158
|
+
throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);
|
|
159
|
+
}
|
|
160
|
+
const delay = Math.min(retries * 5, 5);
|
|
161
|
+
retries++;
|
|
162
|
+
this.#logger.warn(
|
|
163
|
+
`failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`
|
|
164
|
+
);
|
|
165
|
+
await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
const requestId = (0, import_node_crypto.randomUUID)();
|
|
169
|
+
const segmentId = (0, import_node_crypto.randomUUID)();
|
|
170
|
+
ws.send(
|
|
171
|
+
JSON.stringify({
|
|
172
|
+
text: " ",
|
|
173
|
+
voice_settings: this.#opts.voice.settings,
|
|
174
|
+
try_trigger_generation: true,
|
|
175
|
+
chunk_length_schedule: this.#opts.chunkLengthSchedule
|
|
176
|
+
})
|
|
177
|
+
);
|
|
178
|
+
let eosSent = false;
|
|
179
|
+
const sendTask = async () => {
|
|
180
|
+
let xmlContent = [];
|
|
181
|
+
for await (const data of stream) {
|
|
182
|
+
let text = data.token;
|
|
183
|
+
if (this.#opts.enableSsmlParsing && text.startsWith("<phoneme") || xmlContent.length) {
|
|
184
|
+
xmlContent.push(text);
|
|
185
|
+
if (text.indexOf("</phoneme>") !== -1) {
|
|
186
|
+
text = xmlContent.join(" ");
|
|
187
|
+
xmlContent = [];
|
|
188
|
+
} else {
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
ws.send(JSON.stringify({ text: text + " ", try_trigger_generation: false }));
|
|
193
|
+
}
|
|
194
|
+
if (xmlContent.length) {
|
|
195
|
+
this.#logger.warn("ElevenLabs stream ended with incomplete XML content");
|
|
196
|
+
}
|
|
197
|
+
ws.send(JSON.stringify({ text: "" }));
|
|
198
|
+
eosSent = true;
|
|
199
|
+
};
|
|
200
|
+
const listenTask = async () => {
|
|
201
|
+
while (!this.closed) {
|
|
202
|
+
try {
|
|
203
|
+
await new Promise((resolve, reject) => {
|
|
204
|
+
ws.removeAllListeners();
|
|
205
|
+
ws.on("message", (data) => resolve(data));
|
|
206
|
+
ws.on("close", (code, reason) => {
|
|
207
|
+
if (!eosSent) {
|
|
208
|
+
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
209
|
+
}
|
|
210
|
+
reject();
|
|
211
|
+
});
|
|
212
|
+
}).then((msg) => {
|
|
213
|
+
const json = JSON.parse(msg.toString());
|
|
214
|
+
if ("audio" in json) {
|
|
215
|
+
const data = new Int16Array(Buffer.from(json.audio, "base64").buffer);
|
|
216
|
+
const frame = new import_rtc_node.AudioFrame(
|
|
217
|
+
data,
|
|
218
|
+
sampleRateFromFormat(this.#opts.encoding),
|
|
219
|
+
1,
|
|
220
|
+
data.length
|
|
221
|
+
);
|
|
222
|
+
this.queue.put({ requestId, segmentId, frame });
|
|
223
|
+
}
|
|
224
|
+
});
|
|
225
|
+
} catch {
|
|
226
|
+
break;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
};
|
|
230
|
+
await Promise.all([sendTask(), listenTask()]);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
const sampleRateFromFormat = (encoding) => {
|
|
234
|
+
return Number(encoding.split("_")[1]);
|
|
235
|
+
};
|
|
236
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
237
|
+
0 && (module.exports = {
|
|
238
|
+
SynthesizeStream,
|
|
239
|
+
TTS
|
|
240
|
+
});
|
|
241
|
+
//# sourceMappingURL=tts.cjs.map
|
package/dist/tts.cjs.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AsyncIterableQueue, log, tokenize, tts } from '@livekit/agents';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'EXAVITQu4vr4xnSDxMaL',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule: number[];\n enableSsmlParsing: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_turbo_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n streamingLatency: 3,\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n chunkLengthSchedule: [],\n enableSsmlParsing: false,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n readonly streamURL: URL;\n\n constructor(opts: TTSOptions) {\n super();\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n optimize_streaming_latency: `${opts.streamingLatency}`,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n\n this.#run();\n }\n\n async #run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n this.close();\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = randomUUID();\n const segmentId = randomUUID();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n try_trigger_generation: true,\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n const listenTask = async () => {\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n reject();\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n if ('audio' in json) {\n const data = new Int16Array(Buffer.from(json.audio, 'base64').buffer);\n const frame = new AudioFrame(\n data,\n sampleRateFromFormat(this.#opts.encoding),\n 1,\n data.length,\n );\n this.queue.put({ requestId, segmentId, frame });\n }\n });\n } catch {\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAAuD;AACvD,sBAA2B;AAC3B,yBAA2B;AAC3B,sBAAoB;AACpB,gBAAwC;AAiBxC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,kBAAkB;AAAA,EAClB,eAAe,IAAI,uBAAS,MAAM,cAAc,KAAK;AAAA,EACrD,qBAAqB,CAAC;AAAA,EACtB,mBAAmB;AACrB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EAEA,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,KAAK,KAAK;AAAA,EACxC;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACL;AAAA,EAET,YAAY,MAAkB;AAC5B,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,oBAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,4BAA4B,GAAG,KAAK,gBAAgB;AAAA,MACpD,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,IAChD;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAEtE,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,WAAW,IAAI,iCAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAChD,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,oBAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,gBAAY,+BAAW;AAC7B,UAAM,gBAAY,+BAAW;AAE7B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,wBAAwB;AAAA,QACxB,uBAAuB,KAAK,MAAM;AAAA,MACpC,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,KAAK,wBAAwB,MAAM,CAAC,CAAC;AAAA,MAC7E;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAEA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,UAAM,aAAa,YAAY;AAC7B,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,qBAAO;AAAA,YACT,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,gBAAI,WAAW,MAAM;AACnB,oBAAM,OAAO,IAAI,WAAW,OAAO,KAAK,KAAK,OAAO,QAAQ,EAAE,MAAM;AACpE,oBAAM,QAAQ,IAAI;AAAA,gBAChB;AAAA,gBACA,qBAAqB,KAAK,MAAM,QAAQ;AAAA,gBACxC;AAAA,gBACA,KAAK;AAAA,cACP;AACA,mBAAK,MAAM,IAAI,EAAE,WAAW,WAAW,MAAM,CAAC;AAAA,YAChD;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AACN;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":[]}
|
package/dist/tts.d.ts
CHANGED
package/dist/tts.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAGA,OAAO,EAA2B,QAAQ,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAGA,OAAO,EAA2B,QAAQ,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAGzE,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE1D,KAAK,KAAK,GAAG;IACX,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B,CAAC;AAEF,KAAK,aAAa,GAAG;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;CAC5B,CAAC;AAiBF,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,SAAS,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,WAAW,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,aAAa,EAAE,QAAQ,CAAC,aAAa,CAAC;IACtC,mBAAmB,EAAE,MAAM,EAAE,CAAC;IAC9B,iBAAiB,EAAE,OAAO,CAAC;CAC5B;AAcD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;gBAGlB,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAiBpC,UAAU,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;IAuBpC,UAAU,IAAI,GAAG,CAAC,aAAa;IAI/B,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAGxD,QAAQ,CAAC,SAAS,EAAE,GAAG,CAAC;gBAEZ,IAAI,EAAE,UAAU;CA0J7B"}
|
package/dist/tts.js
CHANGED
|
@@ -1,213 +1,216 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
import {
|
|
6
|
-
import { AudioFrame } from '@livekit/rtc-node';
|
|
7
|
-
import { randomUUID } from 'node:crypto';
|
|
8
|
-
import { URL } from 'node:url';
|
|
9
|
-
import { WebSocket } from 'ws';
|
|
1
|
+
import { AsyncIterableQueue, log, tokenize, tts } from "@livekit/agents";
|
|
2
|
+
import { AudioFrame } from "@livekit/rtc-node";
|
|
3
|
+
import { randomUUID } from "node:crypto";
|
|
4
|
+
import { URL } from "node:url";
|
|
5
|
+
import { WebSocket } from "ws";
|
|
10
6
|
const DEFAULT_VOICE = {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
7
|
+
id: "EXAVITQu4vr4xnSDxMaL",
|
|
8
|
+
name: "Bella",
|
|
9
|
+
category: "premade",
|
|
10
|
+
settings: {
|
|
11
|
+
stability: 0.71,
|
|
12
|
+
similarity_boost: 0.5,
|
|
13
|
+
style: 0,
|
|
14
|
+
use_speaker_boost: true
|
|
15
|
+
}
|
|
20
16
|
};
|
|
21
|
-
const API_BASE_URL_V1 =
|
|
22
|
-
const AUTHORIZATION_HEADER =
|
|
17
|
+
const API_BASE_URL_V1 = "https://api.elevenlabs.io/v1/";
|
|
18
|
+
const AUTHORIZATION_HEADER = "xi-api-key";
|
|
23
19
|
const defaultTTSOptions = {
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
20
|
+
apiKey: process.env.ELEVEN_API_KEY,
|
|
21
|
+
voice: DEFAULT_VOICE,
|
|
22
|
+
modelID: "eleven_turbo_v2_5",
|
|
23
|
+
baseURL: API_BASE_URL_V1,
|
|
24
|
+
encoding: "pcm_22050",
|
|
25
|
+
streamingLatency: 3,
|
|
26
|
+
wordTokenizer: new tokenize.basic.WordTokenizer(false),
|
|
27
|
+
chunkLengthSchedule: [],
|
|
28
|
+
enableSsmlParsing: false
|
|
33
29
|
};
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
30
|
+
class TTS extends tts.TTS {
|
|
31
|
+
#opts;
|
|
32
|
+
constructor(opts = {}) {
|
|
33
|
+
super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {
|
|
34
|
+
streaming: true
|
|
35
|
+
});
|
|
36
|
+
this.#opts = {
|
|
37
|
+
...defaultTTSOptions,
|
|
38
|
+
...opts
|
|
39
|
+
};
|
|
40
|
+
if (this.#opts.apiKey === void 0) {
|
|
41
|
+
throw new Error(
|
|
42
|
+
"ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY"
|
|
43
|
+
);
|
|
47
44
|
}
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
settings: undefined,
|
|
63
|
-
});
|
|
64
|
-
}
|
|
65
|
-
return voices;
|
|
45
|
+
}
|
|
46
|
+
async listVoices() {
|
|
47
|
+
return fetch(this.#opts.baseURL + "/voices", {
|
|
48
|
+
headers: {
|
|
49
|
+
[AUTHORIZATION_HEADER]: this.#opts.apiKey
|
|
50
|
+
}
|
|
51
|
+
}).then((data) => data.json()).then((data) => {
|
|
52
|
+
const voices = [];
|
|
53
|
+
for (const voice of data.voices) {
|
|
54
|
+
voices.push({
|
|
55
|
+
id: voice.voice_id,
|
|
56
|
+
name: voice.name,
|
|
57
|
+
category: voice.category,
|
|
58
|
+
settings: void 0
|
|
66
59
|
});
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
60
|
+
}
|
|
61
|
+
return voices;
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
synthesize() {
|
|
65
|
+
throw new Error("Chunked responses are not supported on ElevenLabs TTS");
|
|
66
|
+
}
|
|
67
|
+
stream() {
|
|
68
|
+
return new SynthesizeStream(this.#opts);
|
|
69
|
+
}
|
|
74
70
|
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
71
|
+
class SynthesizeStream extends tts.SynthesizeStream {
|
|
72
|
+
#opts;
|
|
73
|
+
#logger = log();
|
|
74
|
+
streamURL;
|
|
75
|
+
constructor(opts) {
|
|
76
|
+
super();
|
|
77
|
+
this.#opts = opts;
|
|
78
|
+
this.closed = false;
|
|
79
|
+
const baseURL = opts.baseURL + (opts.baseURL.endsWith("/") ? "" : "/");
|
|
80
|
+
this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);
|
|
81
|
+
const params = {
|
|
82
|
+
model_id: opts.modelID,
|
|
83
|
+
output_format: opts.encoding,
|
|
84
|
+
optimize_streaming_latency: `${opts.streamingLatency}`,
|
|
85
|
+
enable_ssml_parsing: `${opts.enableSsmlParsing}`
|
|
86
|
+
};
|
|
87
|
+
Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
|
|
88
|
+
this.streamURL.protocol = this.streamURL.protocol.replace("http", "ws");
|
|
89
|
+
this.#run();
|
|
90
|
+
}
|
|
91
|
+
async #run() {
|
|
92
|
+
const segments = new AsyncIterableQueue();
|
|
93
|
+
const tokenizeInput = async () => {
|
|
94
|
+
let stream = null;
|
|
95
|
+
for await (const text of this.input) {
|
|
96
|
+
if (text === SynthesizeStream.FLUSH_SENTINEL) {
|
|
97
|
+
stream == null ? void 0 : stream.endInput();
|
|
98
|
+
stream = null;
|
|
99
|
+
} else {
|
|
100
|
+
if (!stream) {
|
|
101
|
+
stream = this.#opts.wordTokenizer.stream();
|
|
102
|
+
segments.put(stream);
|
|
103
|
+
}
|
|
104
|
+
stream.pushText(text);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
segments.close();
|
|
108
|
+
};
|
|
109
|
+
const runStream = async () => {
|
|
110
|
+
for await (const stream of segments) {
|
|
111
|
+
await this.#runWS(stream);
|
|
112
|
+
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
await Promise.all([tokenizeInput(), runStream()]);
|
|
116
|
+
this.close();
|
|
117
|
+
}
|
|
118
|
+
async #runWS(stream, maxRetry = 3) {
|
|
119
|
+
let retries = 0;
|
|
120
|
+
let ws;
|
|
121
|
+
while (true) {
|
|
122
|
+
ws = new WebSocket(this.streamURL, {
|
|
123
|
+
headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey }
|
|
124
|
+
});
|
|
125
|
+
try {
|
|
126
|
+
await new Promise((resolve, reject) => {
|
|
127
|
+
ws.on("open", resolve);
|
|
128
|
+
ws.on("error", (error) => reject(error));
|
|
129
|
+
ws.on("close", (code) => reject(`WebSocket returned ${code}`));
|
|
130
|
+
});
|
|
131
|
+
break;
|
|
132
|
+
} catch (e) {
|
|
133
|
+
if (retries >= maxRetry) {
|
|
134
|
+
throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);
|
|
135
|
+
}
|
|
136
|
+
const delay = Math.min(retries * 5, 5);
|
|
137
|
+
retries++;
|
|
138
|
+
this.#logger.warn(
|
|
139
|
+
`failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`
|
|
140
|
+
);
|
|
141
|
+
await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
|
|
142
|
+
}
|
|
123
143
|
}
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
144
|
+
const requestId = randomUUID();
|
|
145
|
+
const segmentId = randomUUID();
|
|
146
|
+
ws.send(
|
|
147
|
+
JSON.stringify({
|
|
148
|
+
text: " ",
|
|
149
|
+
voice_settings: this.#opts.voice.settings,
|
|
150
|
+
try_trigger_generation: true,
|
|
151
|
+
chunk_length_schedule: this.#opts.chunkLengthSchedule
|
|
152
|
+
})
|
|
153
|
+
);
|
|
154
|
+
let eosSent = false;
|
|
155
|
+
const sendTask = async () => {
|
|
156
|
+
let xmlContent = [];
|
|
157
|
+
for await (const data of stream) {
|
|
158
|
+
let text = data.token;
|
|
159
|
+
if (this.#opts.enableSsmlParsing && text.startsWith("<phoneme") || xmlContent.length) {
|
|
160
|
+
xmlContent.push(text);
|
|
161
|
+
if (text.indexOf("</phoneme>") !== -1) {
|
|
162
|
+
text = xmlContent.join(" ");
|
|
163
|
+
xmlContent = [];
|
|
164
|
+
} else {
|
|
165
|
+
continue;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
ws.send(JSON.stringify({ text: text + " ", try_trigger_generation: false }));
|
|
169
|
+
}
|
|
170
|
+
if (xmlContent.length) {
|
|
171
|
+
this.#logger.warn("ElevenLabs stream ended with incomplete XML content");
|
|
172
|
+
}
|
|
173
|
+
ws.send(JSON.stringify({ text: "" }));
|
|
174
|
+
eosSent = true;
|
|
175
|
+
};
|
|
176
|
+
const listenTask = async () => {
|
|
177
|
+
while (!this.closed) {
|
|
178
|
+
try {
|
|
179
|
+
await new Promise((resolve, reject) => {
|
|
180
|
+
ws.removeAllListeners();
|
|
181
|
+
ws.on("message", (data) => resolve(data));
|
|
182
|
+
ws.on("close", (code, reason) => {
|
|
183
|
+
if (!eosSent) {
|
|
184
|
+
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
185
|
+
}
|
|
186
|
+
reject();
|
|
130
187
|
});
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
}
|
|
143
|
-
const delay = Math.min(retries * 5, 5);
|
|
144
|
-
retries++;
|
|
145
|
-
this.#logger.warn(`failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`);
|
|
146
|
-
await new Promise((resolve) => setTimeout(resolve, delay * 1000));
|
|
188
|
+
}).then((msg) => {
|
|
189
|
+
const json = JSON.parse(msg.toString());
|
|
190
|
+
if ("audio" in json) {
|
|
191
|
+
const data = new Int16Array(Buffer.from(json.audio, "base64").buffer);
|
|
192
|
+
const frame = new AudioFrame(
|
|
193
|
+
data,
|
|
194
|
+
sampleRateFromFormat(this.#opts.encoding),
|
|
195
|
+
1,
|
|
196
|
+
data.length
|
|
197
|
+
);
|
|
198
|
+
this.queue.put({ requestId, segmentId, frame });
|
|
147
199
|
}
|
|
200
|
+
});
|
|
201
|
+
} catch {
|
|
202
|
+
break;
|
|
148
203
|
}
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
voice_settings: this.#opts.voice.settings,
|
|
154
|
-
try_trigger_generation: true,
|
|
155
|
-
chunk_length_schedule: this.#opts.chunkLengthSchedule,
|
|
156
|
-
}));
|
|
157
|
-
let eosSent = false;
|
|
158
|
-
const sendTask = async () => {
|
|
159
|
-
let xmlContent = [];
|
|
160
|
-
for await (const data of stream) {
|
|
161
|
-
let text = data.token;
|
|
162
|
-
if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {
|
|
163
|
-
xmlContent.push(text);
|
|
164
|
-
if (text.indexOf('</phoneme>') !== -1) {
|
|
165
|
-
text = xmlContent.join(' ');
|
|
166
|
-
xmlContent = [];
|
|
167
|
-
}
|
|
168
|
-
else {
|
|
169
|
-
continue;
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));
|
|
173
|
-
}
|
|
174
|
-
if (xmlContent.length) {
|
|
175
|
-
this.#logger.warn('ElevenLabs stream ended with incomplete XML content');
|
|
176
|
-
}
|
|
177
|
-
ws.send(JSON.stringify({ text: '' }));
|
|
178
|
-
eosSent = true;
|
|
179
|
-
};
|
|
180
|
-
const listenTask = async () => {
|
|
181
|
-
while (!this.closed) {
|
|
182
|
-
try {
|
|
183
|
-
await new Promise((resolve, reject) => {
|
|
184
|
-
ws.removeAllListeners();
|
|
185
|
-
ws.on('message', (data) => resolve(data));
|
|
186
|
-
ws.on('close', (code, reason) => {
|
|
187
|
-
if (!eosSent) {
|
|
188
|
-
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
189
|
-
}
|
|
190
|
-
reject();
|
|
191
|
-
});
|
|
192
|
-
}).then((msg) => {
|
|
193
|
-
const json = JSON.parse(msg.toString());
|
|
194
|
-
if ('audio' in json) {
|
|
195
|
-
const data = new Int16Array(Buffer.from(json.audio, 'base64').buffer);
|
|
196
|
-
const frame = new AudioFrame(data, sampleRateFromFormat(this.#opts.encoding), 1, data.length);
|
|
197
|
-
this.queue.put({ requestId, segmentId, frame });
|
|
198
|
-
}
|
|
199
|
-
});
|
|
200
|
-
}
|
|
201
|
-
catch {
|
|
202
|
-
break;
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
};
|
|
206
|
-
await Promise.all([sendTask(), listenTask()]);
|
|
207
|
-
}
|
|
204
|
+
}
|
|
205
|
+
};
|
|
206
|
+
await Promise.all([sendTask(), listenTask()]);
|
|
207
|
+
}
|
|
208
208
|
}
|
|
209
|
-
_a = SynthesizeStream;
|
|
210
209
|
const sampleRateFromFormat = (encoding) => {
|
|
211
|
-
|
|
210
|
+
return Number(encoding.split("_")[1]);
|
|
211
|
+
};
|
|
212
|
+
export {
|
|
213
|
+
SynthesizeStream,
|
|
214
|
+
TTS
|
|
212
215
|
};
|
|
213
216
|
//# sourceMappingURL=tts.js.map
|
package/dist/tts.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tts.js","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AACtC,OAAO,EAAE,kBAAkB,EAAE,GAAG,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAEzE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAC/B,OAAO,EAAgB,SAAS,EAAE,MAAM,IAAI,CAAC;AAiB7C,MAAM,aAAa,GAAU;IAC3B,EAAE,EAAE,sBAAsB;IAC1B,IAAI,EAAE,OAAO;IACb,QAAQ,EAAE,SAAS;IACnB,QAAQ,EAAE;QACR,SAAS,EAAE,IAAI;QACf,gBAAgB,EAAE,GAAG;QACrB,KAAK,EAAE,GAAG;QACV,iBAAiB,EAAE,IAAI;KACxB;CACF,CAAC;AAEF,MAAM,eAAe,GAAG,+BAA+B,CAAC;AACxD,MAAM,oBAAoB,GAAG,YAAY,CAAC;AAc1C,MAAM,iBAAiB,GAAe;IACpC,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;IAClC,KAAK,EAAE,aAAa;IACpB,OAAO,EAAE,mBAAmB;IAC5B,OAAO,EAAE,eAAe;IACxB,QAAQ,EAAE,WAAW;IACrB,gBAAgB,EAAE,CAAC;IACnB,aAAa,EAAE,IAAI,QAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,KAAK,CAAC;IACtD,mBAAmB,EAAE,EAAE;IACvB,iBAAiB,EAAE,KAAK;CACzB,CAAC;AAEF,MAAM,OAAO,GAAI,SAAQ,GAAG,CAAC,GAAG;IAC9B,KAAK,CAAa;IAElB,YAAY,OAA4B,EAAE;QACxC,KAAK,CAAC,oBAAoB,CAAC,IAAI,CAAC,QAAQ,IAAI,iBAAiB,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE;YAC1E,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,IAAI,CAAC,KAAK,GAAG;YACX,GAAG,iBAAiB;YACpB,GAAG,IAAI;SACR,CAAC;QAEF,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;YACpC,MAAM,IAAI,KAAK,CACb,8EAA8E,CAC/E,CAAC;QACJ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,UAAU;QACd,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,SAAS,EAAE;YAC3C,OAAO,EAAE;gBACP,CAAC,oBAAoB,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,MAAO;aAC3C;SACF,CAAC;aACC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;aAC3B,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE;YACb,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,KAAK,MAAM,KAAK,IACd,IACD,CAAC,MAAM,EAAE,CAAC;gBACT,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,KAAK,CAAC,QAAQ;oBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,QAAQ,EAAE,SAAS;iBACpB,CAAC,CAAC;YACL,CAAC;YACD,OAAO,MAAM,CAAC;QAChB,CAAC,CAAC,CAAC;IACP,CAAC;IAED,UAAU;QACR,MAAM,IAAI,KAAK,CAAC,uDAAuD,CAAC,CAAC;IAC3E,CAAC;IAED,MAAM;QACJ,OAAO,IAAI,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC1C,CAAC;CACF;AAED,MAAM,OAAO,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;IACxD,KAAK,CAAa;IAClB,OAAO,GAAG,GAAG,EAAE,CAAC;IACP,SAAS,CAAM;IAExB,YAAY,IAAgB;QAC1B,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QAEpB,sCAAsC;QACtC,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAEvE,IAAI,CAAC,SAAS,GAAG,IAAI,GAAG,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC,EAAE,eAAe,EAAE,OAAO,CAAC,CAAC;QAClF,MAAM,MAAM,GAAG;YACb,QAAQ,EAAE,IAAI,CAAC,OAAO;YACtB,aAAa,EAAE,IAAI,CAAC,QAAQ;YAC5B,0BAA0B,EAAE,GAAG,IAAI,CAAC,gBAAgB,EAAE;YACtD,mBAAmB,EAAE,GAAG,IAAI,CAAC,iBAAiB,EAAE;SACjD,CAAC;QACF,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACrF,IAAI,CAAC,SAAS,CAAC,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAExE,IAAI,CAAC,IAAI,EAAE,CAAC;IACd,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,QAAQ,GAAG,IAAI,kBAAkB,EAAc,CAAC;QAEtD,MAAM,aAAa,GAAG,KAAK,IAAI,EAAE;YAC/B,IAAI,MAAM,GAA+B,IAAI,CAAC;YAC9C,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBACpC,IAAI,IAAI,KAAK,EAAgB,CAAC,cAAc,EAAE,CAAC;oBAC7C,MAAM,EAAE,QAAQ,EAAE,CAAC;oBACnB,MAAM,GAAG,IAAI,CAAC;gBAChB,CAAC;qBAAM,CAAC;oBACN,IAAI,CAAC,MAAM,EAAE,CAAC;wBACZ,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,MAAM,EAAE,CAAC;wBAC3C,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;oBACvB,CAAC;oBACD,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC;YACH,CAAC;YACD,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,CAAC,CAAC;QAEF,MAAM,SAAS,GAAG,KAAK,IAAI,EAAE;YAC3B,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;gBACpC,MAAM,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;gBAC1B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAgB,CAAC,aAAa,CAAC,CAAC;YACjD,CAAC;QACH,CAAC,CAAC;QAEF,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC;QAClD,IAAI,CAAC,KAAK,EAAE,CAAC;IACf,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,MAA2B,EAAE,QAAQ,GAAG,CAAC;QACpD,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,EAAa,CAAC;QAClB,OAAO,IAAI,EAAE,CAAC;YACZ,EAAE,GAAG,IAAI,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE;gBACjC,OAAO,EAAE,EAAE,CAAC,oBAAoB,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;aACvD,CAAC,CAAC;YAEH,IAAI,CAAC;gBACH,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;oBACpC,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;oBACvB,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;oBACzC,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,sBAAsB,IAAI,EAAE,CAAC,CAAC,CAAC;gBACjE,CAAC,CAAC,CAAC;gBACH,MAAM;YACR,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,IAAI,OAAO,IAAI,QAAQ,EAAE,CAAC;oBACxB,MAAM,IAAI,KAAK,CAAC,yCAAyC,OAAO,cAAc,CAAC,EAAE,CAAC,CAAC;gBACrF,CAAC;gBAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvC,OAAO,EAAE,CAAC;gBAEV,IAAI,CAAC,OAAO,CAAC,IAAI,CACf,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ,GAAG,CAC/F,CAAC;gBACF,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC;YACpE,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;QAC/B,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;QAE/B,EAAE,CAAC,IAAI,CACL,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,GAAG;YACT,cAAc,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ;YACzC,sBAAsB,EAAE,IAAI;YAC5B,qBAAqB,EAAE,IAAI,CAAC,KAAK,CAAC,mBAAmB;SACtD,CAAC,CACH,CAAC;QACF,IAAI,OAAO,GAAG,KAAK,CAAC;QAEpB,MAAM,QAAQ,GAAG,KAAK,IAAI,EAAE;YAC1B,IAAI,UAAU,GAAa,EAAE,CAAC;YAC9B,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;gBAChC,IAAI,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC;gBAEtB,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,iBAAiB,IAAI,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC;oBACvF,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACtB,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;wBACtC,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;wBAC5B,UAAU,GAAG,EAAE,CAAC;oBAClB,CAAC;yBAAM,CAAC;wBACN,SAAS;oBACX,CAAC;gBACH,CAAC;gBAED,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,GAAG,GAAG,EAAE,sBAAsB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC;YAC/E,CAAC;YAED,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC;gBACtB,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,qDAAqD,CAAC,CAAC;YAC3E,CAAC;YAED,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;YACtC,OAAO,GAAG,IAAI,CAAC;QACjB,CAAC,CAAC;QAEF,MAAM,UAAU,GAAG,KAAK,IAAI,EAAE;YAC5B,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACpB,IAAI,CAAC;oBACH,MAAM,IAAI,OAAO,CAAU,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;wBAC7C,EAAE,CAAC,kBAAkB,EAAE,CAAC;wBACxB,EAAE,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;wBAC1C,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE;4BAC9B,IAAI,CAAC,OAAO,EAAE,CAAC;gCACb,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,8BAA8B,IAAI,KAAK,MAAM,EAAE,CAAC,CAAC;4BACtE,CAAC;4BACD,MAAM,EAAE,CAAC;wBACX,CAAC,CAAC,CAAC;oBACL,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;wBACd,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;wBACxC,IAAI,OAAO,IAAI,IAAI,EAAE,CAAC;4BACpB,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,CAAC;4BACtE,MAAM,KAAK,GAAG,IAAI,UAAU,CAC1B,IAAI,EACJ,oBAAoB,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,EACzC,CAAC,EACD,IAAI,CAAC,MAAM,CACZ,CAAC;4BACF,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,CAAC;wBAClD,CAAC;oBACH,CAAC,CAAC,CAAC;gBACL,CAAC;gBAAC,MAAM,CAAC;oBACP,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC,CAAC;QAEF,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC;IAChD,CAAC;CACF;;AAED,MAAM,oBAAoB,GAAG,CAAC,QAAqB,EAAU,EAAE;IAC7D,OAAO,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AACxC,CAAC,CAAC"}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AsyncIterableQueue, log, tokenize, tts } from '@livekit/agents';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'EXAVITQu4vr4xnSDxMaL',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule: number[];\n enableSsmlParsing: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_turbo_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n streamingLatency: 3,\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n chunkLengthSchedule: [],\n enableSsmlParsing: false,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n readonly streamURL: URL;\n\n constructor(opts: TTSOptions) {\n super();\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n optimize_streaming_latency: `${opts.streamingLatency}`,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n\n this.#run();\n }\n\n async #run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n this.close();\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = randomUUID();\n const segmentId = randomUUID();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n try_trigger_generation: true,\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n const listenTask = async () => {\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n reject();\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n if ('audio' in json) {\n const data = new Int16Array(Buffer.from(json.audio, 'base64').buffer);\n const frame = new AudioFrame(\n data,\n sampleRateFromFormat(this.#opts.encoding),\n 1,\n data.length,\n );\n this.queue.put({ requestId, segmentId, frame });\n }\n });\n } catch {\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":"AAGA,SAAS,oBAAoB,KAAK,UAAU,WAAW;AACvD,SAAS,kBAAkB;AAC3B,SAAS,kBAAkB;AAC3B,SAAS,WAAW;AACpB,SAAuB,iBAAiB;AAiBxC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,kBAAkB;AAAA,EAClB,eAAe,IAAI,SAAS,MAAM,cAAc,KAAK;AAAA,EACrD,qBAAqB,CAAC;AAAA,EACtB,mBAAmB;AACrB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EAEA,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,KAAK,KAAK;AAAA,EACxC;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACL;AAAA,EAET,YAAY,MAAkB;AAC5B,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,IAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,4BAA4B,GAAG,KAAK,gBAAgB;AAAA,MACpD,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,IAChD;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAEtE,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,WAAW,IAAI,mBAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAChD,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,UAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,YAAY,WAAW;AAC7B,UAAM,YAAY,WAAW;AAE7B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,wBAAwB;AAAA,QACxB,uBAAuB,KAAK,MAAM;AAAA,MACpC,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,KAAK,wBAAwB,MAAM,CAAC,CAAC;AAAA,MAC7E;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAEA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,UAAM,aAAa,YAAY;AAC7B,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,qBAAO;AAAA,YACT,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,gBAAI,WAAW,MAAM;AACnB,oBAAM,OAAO,IAAI,WAAW,OAAO,KAAK,KAAK,OAAO,QAAQ,EAAE,MAAM;AACpE,oBAAM,QAAQ,IAAI;AAAA,gBAChB;AAAA,gBACA,qBAAqB,KAAK,MAAM,QAAQ;AAAA,gBACxC;AAAA,gBACA,KAAK;AAAA,cACP;AACA,mBAAK,MAAM,IAAI,EAAE,WAAW,WAAW,MAAM,CAAC;AAAA,YAChD;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AACN;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":[]}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var import_agents_plugin_openai = require("@livekit/agents-plugin-openai");
|
|
3
|
+
var import_agents_plugins_test = require("@livekit/agents-plugins-test");
|
|
4
|
+
var import_vitest = require("vitest");
|
|
5
|
+
var import_tts = require("./tts.cjs");
|
|
6
|
+
(0, import_vitest.describe)("ElevenLabs", async () => {
|
|
7
|
+
await (0, import_agents_plugins_test.tts)(new import_tts.TTS(), new import_agents_plugin_openai.STT(), { nonStreaming: false });
|
|
8
|
+
});
|
|
9
|
+
//# sourceMappingURL=tts.test.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/tts.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { STT } from '@livekit/agents-plugin-openai';\nimport { tts } from '@livekit/agents-plugins-test';\nimport { describe } from 'vitest';\nimport { TTS } from './tts.js';\n\ndescribe('ElevenLabs', async () => {\n await tts(new TTS(), new STT(), { nonStreaming: false });\n});\n"],"mappings":";AAGA,kCAAoB;AACpB,iCAAoB;AACpB,oBAAyB;AACzB,iBAAoB;AAAA,IAEpB,wBAAS,cAAc,YAAY;AACjC,YAAM,gCAAI,IAAI,eAAI,GAAG,IAAI,gCAAI,GAAG,EAAE,cAAc,MAAM,CAAC;AACzD,CAAC;","names":[]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tts.test.d.ts","sourceRoot":"","sources":["../src/tts.test.ts"],"names":[],"mappings":""}
|
package/dist/tts.test.js
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { STT } from "@livekit/agents-plugin-openai";
|
|
2
|
+
import { tts } from "@livekit/agents-plugins-test";
|
|
3
|
+
import { describe } from "vitest";
|
|
4
|
+
import { TTS } from "./tts.js";
|
|
5
|
+
describe("ElevenLabs", async () => {
|
|
6
|
+
await tts(new TTS(), new STT(), { nonStreaming: false });
|
|
7
|
+
});
|
|
8
|
+
//# sourceMappingURL=tts.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/tts.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { STT } from '@livekit/agents-plugin-openai';\nimport { tts } from '@livekit/agents-plugins-test';\nimport { describe } from 'vitest';\nimport { TTS } from './tts.js';\n\ndescribe('ElevenLabs', async () => {\n await tts(new TTS(), new STT(), { nonStreaming: false });\n});\n"],"mappings":"AAGA,SAAS,WAAW;AACpB,SAAS,WAAW;AACpB,SAAS,gBAAgB;AACzB,SAAS,WAAW;AAEpB,SAAS,cAAc,YAAY;AACjC,QAAM,IAAI,IAAI,IAAI,GAAG,IAAI,IAAI,GAAG,EAAE,cAAc,MAAM,CAAC;AACzD,CAAC;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,33 +1,45 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-elevenlabs",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.1",
|
|
4
4
|
"description": "ElevenLabs plugin for LiveKit Node Agents",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
|
+
"require": "dist/index.cjs",
|
|
6
7
|
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js",
|
|
12
|
+
"require": "./dist/index.cjs"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
7
15
|
"author": "LiveKit",
|
|
8
16
|
"type": "module",
|
|
9
17
|
"repository": "git@github.com:livekit/agents-js.git",
|
|
10
18
|
"license": "Apache-2.0",
|
|
11
19
|
"files": [
|
|
12
20
|
"dist",
|
|
13
|
-
"src"
|
|
21
|
+
"src",
|
|
22
|
+
"README.md"
|
|
14
23
|
],
|
|
15
24
|
"devDependencies": {
|
|
25
|
+
"@livekit/agents": "^x",
|
|
26
|
+
"@livekit/agents-plugin-openai": "^x",
|
|
27
|
+
"@livekit/agents-plugins-test": "^x",
|
|
28
|
+
"@livekit/rtc-node": "^0.12.1",
|
|
16
29
|
"@microsoft/api-extractor": "^7.35.0",
|
|
17
|
-
"@livekit/rtc-node": "^0.11.1",
|
|
18
30
|
"@types/ws": "^8.5.10",
|
|
19
|
-
"
|
|
20
|
-
"
|
|
31
|
+
"tsup": "^8.3.5",
|
|
32
|
+
"typescript": "^5.0.0"
|
|
21
33
|
},
|
|
22
34
|
"dependencies": {
|
|
23
35
|
"ws": "^8.16.0"
|
|
24
36
|
},
|
|
25
37
|
"peerDependencies": {
|
|
26
|
-
"@livekit/rtc-node": "^0.
|
|
27
|
-
"@livekit/agents": "^0.
|
|
38
|
+
"@livekit/rtc-node": "^0.12.1",
|
|
39
|
+
"@livekit/agents": "^0.5.1x"
|
|
28
40
|
},
|
|
29
41
|
"scripts": {
|
|
30
|
-
"build": "tsc",
|
|
42
|
+
"build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"",
|
|
31
43
|
"clean": "rm -rf dist",
|
|
32
44
|
"clean:build": "pnpm clean && pnpm build",
|
|
33
45
|
"lint": "eslint -f unix \"src/**/*.{ts,js}\"",
|
package/src/tts.test.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { STT } from '@livekit/agents-plugin-openai';
|
|
5
|
+
import { tts } from '@livekit/agents-plugins-test';
|
|
6
|
+
import { describe } from 'vitest';
|
|
7
|
+
import { TTS } from './tts.js';
|
|
8
|
+
|
|
9
|
+
describe('ElevenLabs', async () => {
|
|
10
|
+
await tts(new TTS(), new STT(), { nonStreaming: false });
|
|
11
|
+
});
|
package/src/tts.ts
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { AsyncIterableQueue, log, tokenize, tts } from '@livekit/agents';
|
|
5
|
-
import type { WordStream } from '@livekit/agents/dist/tokenize/tokenizer.js';
|
|
6
5
|
import { AudioFrame } from '@livekit/rtc-node';
|
|
7
6
|
import { randomUUID } from 'node:crypto';
|
|
8
7
|
import { URL } from 'node:url';
|
|
@@ -141,7 +140,7 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
141
140
|
}
|
|
142
141
|
|
|
143
142
|
async #run() {
|
|
144
|
-
const segments = new AsyncIterableQueue<WordStream>();
|
|
143
|
+
const segments = new AsyncIterableQueue<tokenize.WordStream>();
|
|
145
144
|
|
|
146
145
|
const tokenizeInput = async () => {
|
|
147
146
|
let stream: tokenize.WordStream | null = null;
|