@livekit/agents-plugin-elevenlabs 0.6.3 → 1.0.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +14 -3
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -0
- package/dist/index.js.map +1 -1
- package/dist/models.cjs.map +1 -1
- package/dist/models.d.cts +1 -1
- package/dist/models.d.ts +1 -1
- package/dist/models.d.ts.map +1 -1
- package/dist/tts.cjs +46 -22
- package/dist/tts.cjs.map +1 -1
- package/dist/tts.d.cts +6 -2
- package/dist/tts.d.ts +6 -2
- package/dist/tts.d.ts.map +1 -1
- package/dist/tts.js +54 -23
- package/dist/tts.js.map +1 -1
- package/package.json +7 -7
- package/src/index.ts +14 -1
- package/src/models.ts +2 -1
- package/src/tts.ts +62 -24
package/dist/index.cjs
CHANGED
|
@@ -13,9 +13,20 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
13
13
|
};
|
|
14
14
|
var __reExport = (target, mod, secondTarget) => (__copyProps(target, mod, "default"), secondTarget && __copyProps(secondTarget, mod, "default"));
|
|
15
15
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
16
|
-
var
|
|
17
|
-
module.exports = __toCommonJS(
|
|
18
|
-
|
|
16
|
+
var index_exports = {};
|
|
17
|
+
module.exports = __toCommonJS(index_exports);
|
|
18
|
+
var import_agents = require("@livekit/agents");
|
|
19
|
+
__reExport(index_exports, require("./tts.cjs"), module.exports);
|
|
20
|
+
class ElevenLabsPlugin extends import_agents.Plugin {
|
|
21
|
+
constructor() {
|
|
22
|
+
super({
|
|
23
|
+
title: "elevenlabs",
|
|
24
|
+
version: "0.6.2",
|
|
25
|
+
package: "@livekit/agents-plugin-elevenlabs"
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
import_agents.Plugin.registerPlugin(new ElevenLabsPlugin());
|
|
19
30
|
// Annotate the CommonJS export names for ESM import in node:
|
|
20
31
|
0 && (module.exports = {
|
|
21
32
|
...require("./tts.cjs")
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText:
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { Plugin } from '@livekit/agents';\n\nexport * from './tts.js';\n\nclass ElevenLabsPlugin extends Plugin {\n constructor() {\n super({\n title: 'elevenlabs',\n version: '0.6.2',\n package: '@livekit/agents-plugin-elevenlabs',\n });\n }\n}\n\nPlugin.registerPlugin(new ElevenLabsPlugin());\n"],"mappings":";;;;;;;;;;;;;;;AAAA;AAAA;AAGA,oBAAuB;AAEvB,0BAAc,qBALd;AAOA,MAAM,yBAAyB,qBAAO;AAAA,EACpC,cAAc;AACZ,UAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,qBAAO,eAAe,IAAI,iBAAiB,CAAC;","names":[]}
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,cAAc,UAAU,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,2 +1,13 @@
|
|
|
1
|
+
import { Plugin } from "@livekit/agents";
|
|
1
2
|
export * from "./tts.js";
|
|
3
|
+
class ElevenLabsPlugin extends Plugin {
|
|
4
|
+
constructor() {
|
|
5
|
+
super({
|
|
6
|
+
title: "elevenlabs",
|
|
7
|
+
version: "0.6.2",
|
|
8
|
+
package: "@livekit/agents-plugin-elevenlabs"
|
|
9
|
+
});
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
Plugin.registerPlugin(new ElevenLabsPlugin());
|
|
2
13
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText:
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { Plugin } from '@livekit/agents';\n\nexport * from './tts.js';\n\nclass ElevenLabsPlugin extends Plugin {\n constructor() {\n super({\n title: 'elevenlabs',\n version: '0.6.2',\n package: '@livekit/agents-plugin-elevenlabs',\n });\n }\n}\n\nPlugin.registerPlugin(new ElevenLabsPlugin());\n"],"mappings":"AAGA,SAAS,cAAc;AAEvB,cAAc;AAEd,MAAM,yBAAyB,OAAO;AAAA,EACpC,cAAc;AACZ,UAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,OAAO,eAAe,IAAI,iBAAiB,CAAC;","names":[]}
|
package/dist/models.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels =\n | 'eleven_monolingual_v1'\n | 'eleven_multilingual_v1'\n | 'eleven_multilingual_v2'\n | 'eleven_flash_v2'\n | 'eleven_flash_v2_5'\n | 'eleven_turbo_v2'\n | 'eleven_turbo_v2_5';\n\nexport type TTSEncoding =\n // XXX(nbsp): MP3 is not yet supported\n // | 'mp3_22050_32'\n // | 'mp3_44100_32'\n // | 'mp3_44100_64'\n // | 'mp3_44100_96'\n // | 'mp3_44100_128'\n // | 'mp3_44100_192'\n 'pcm_16000' | 'pcm_22050' | 'pcm_44100';\n"],"mappings":";;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels =\n | 'eleven_monolingual_v1'\n | 'eleven_multilingual_v1'\n | 'eleven_multilingual_v2'\n | 'eleven_flash_v2'\n | 'eleven_flash_v2_5'\n | 'eleven_turbo_v2'\n | 'eleven_turbo_v2_5'\n | 'eleven_v3';\n\nexport type TTSEncoding =\n // XXX(nbsp): MP3 is not yet supported\n // | 'mp3_22050_32'\n // | 'mp3_44100_32'\n // | 'mp3_44100_64'\n // | 'mp3_44100_96'\n // | 'mp3_44100_128'\n // | 'mp3_44100_192'\n 'pcm_16000' | 'pcm_22050' | 'pcm_44100';\n"],"mappings":";;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
|
package/dist/models.d.cts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export type TTSModels = 'eleven_monolingual_v1' | 'eleven_multilingual_v1' | 'eleven_multilingual_v2' | 'eleven_flash_v2' | 'eleven_flash_v2_5' | 'eleven_turbo_v2' | 'eleven_turbo_v2_5';
|
|
1
|
+
export type TTSModels = 'eleven_monolingual_v1' | 'eleven_multilingual_v1' | 'eleven_multilingual_v2' | 'eleven_flash_v2' | 'eleven_flash_v2_5' | 'eleven_turbo_v2' | 'eleven_turbo_v2_5' | 'eleven_v3';
|
|
2
2
|
export type TTSEncoding = 'pcm_16000' | 'pcm_22050' | 'pcm_44100';
|
|
3
3
|
//# sourceMappingURL=models.d.ts.map
|
package/dist/models.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export type TTSModels = 'eleven_monolingual_v1' | 'eleven_multilingual_v1' | 'eleven_multilingual_v2' | 'eleven_flash_v2' | 'eleven_flash_v2_5' | 'eleven_turbo_v2' | 'eleven_turbo_v2_5';
|
|
1
|
+
export type TTSModels = 'eleven_monolingual_v1' | 'eleven_multilingual_v1' | 'eleven_multilingual_v2' | 'eleven_flash_v2' | 'eleven_flash_v2_5' | 'eleven_turbo_v2' | 'eleven_turbo_v2_5' | 'eleven_v3';
|
|
2
2
|
export type TTSEncoding = 'pcm_16000' | 'pcm_22050' | 'pcm_44100';
|
|
3
3
|
//# sourceMappingURL=models.d.ts.map
|
package/dist/models.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,SAAS,GACjB,uBAAuB,GACvB,wBAAwB,GACxB,wBAAwB,GACxB,iBAAiB,GACjB,mBAAmB,GACnB,iBAAiB,GACjB,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,SAAS,GACjB,uBAAuB,GACvB,wBAAwB,GACxB,wBAAwB,GACxB,iBAAiB,GACjB,mBAAmB,GACnB,iBAAiB,GACjB,mBAAmB,GACnB,WAAW,CAAC;AAEhB,MAAM,MAAM,WAAW,GAQrB,WAAW,GAAG,WAAW,GAAG,WAAW,CAAC"}
|
package/dist/tts.cjs
CHANGED
|
@@ -23,11 +23,11 @@ __export(tts_exports, {
|
|
|
23
23
|
});
|
|
24
24
|
module.exports = __toCommonJS(tts_exports);
|
|
25
25
|
var import_agents = require("@livekit/agents");
|
|
26
|
-
var import_node_crypto = require("node:crypto");
|
|
27
26
|
var import_node_url = require("node:url");
|
|
28
27
|
var import_ws = require("ws");
|
|
28
|
+
const DEFAULT_INACTIVITY_TIMEOUT = 300;
|
|
29
29
|
const DEFAULT_VOICE = {
|
|
30
|
-
id: "
|
|
30
|
+
id: "bIHbv24MWmeRgasZH58o",
|
|
31
31
|
name: "Bella",
|
|
32
32
|
category: "premade",
|
|
33
33
|
settings: {
|
|
@@ -42,13 +42,13 @@ const AUTHORIZATION_HEADER = "xi-api-key";
|
|
|
42
42
|
const defaultTTSOptions = {
|
|
43
43
|
apiKey: process.env.ELEVEN_API_KEY,
|
|
44
44
|
voice: DEFAULT_VOICE,
|
|
45
|
-
modelID: "
|
|
45
|
+
modelID: "eleven_turbo_v2_5",
|
|
46
46
|
baseURL: API_BASE_URL_V1,
|
|
47
47
|
encoding: "pcm_22050",
|
|
48
|
-
streamingLatency: 3,
|
|
49
48
|
wordTokenizer: new import_agents.tokenize.basic.WordTokenizer(false),
|
|
50
|
-
|
|
51
|
-
|
|
49
|
+
enableSsmlParsing: false,
|
|
50
|
+
inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,
|
|
51
|
+
syncAlignment: true
|
|
52
52
|
};
|
|
53
53
|
class TTS extends import_agents.tts.TTS {
|
|
54
54
|
#opts;
|
|
@@ -106,19 +106,24 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
106
106
|
const params = {
|
|
107
107
|
model_id: opts.modelID,
|
|
108
108
|
output_format: opts.encoding,
|
|
109
|
-
optimize_streaming_latency: `${opts.streamingLatency}`,
|
|
110
109
|
enable_ssml_parsing: `${opts.enableSsmlParsing}`,
|
|
111
|
-
|
|
110
|
+
sync_alignment: `${opts.syncAlignment}`,
|
|
111
|
+
...opts.autoMode !== void 0 && { auto_mode: `${opts.autoMode}` },
|
|
112
|
+
...opts.languageCode && { language_code: opts.languageCode },
|
|
113
|
+
...opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` },
|
|
114
|
+
...opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }
|
|
112
115
|
};
|
|
113
116
|
Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
|
|
114
117
|
this.streamURL.protocol = this.streamURL.protocol.replace("http", "ws");
|
|
115
|
-
this.#run();
|
|
116
118
|
}
|
|
117
|
-
async
|
|
119
|
+
async run() {
|
|
118
120
|
const segments = new import_agents.AsyncIterableQueue();
|
|
119
121
|
const tokenizeInput = async () => {
|
|
120
122
|
let stream = null;
|
|
121
123
|
for await (const text of this.input) {
|
|
124
|
+
if (this.abortController.signal.aborted) {
|
|
125
|
+
break;
|
|
126
|
+
}
|
|
122
127
|
if (text === SynthesizeStream.FLUSH_SENTINEL) {
|
|
123
128
|
stream == null ? void 0 : stream.endInput();
|
|
124
129
|
stream = null;
|
|
@@ -134,12 +139,14 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
134
139
|
};
|
|
135
140
|
const runStream = async () => {
|
|
136
141
|
for await (const stream of segments) {
|
|
142
|
+
if (this.abortController.signal.aborted) {
|
|
143
|
+
break;
|
|
144
|
+
}
|
|
137
145
|
await this.#runWS(stream);
|
|
138
146
|
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
139
147
|
}
|
|
140
148
|
};
|
|
141
149
|
await Promise.all([tokenizeInput(), runStream()]);
|
|
142
|
-
this.close();
|
|
143
150
|
}
|
|
144
151
|
async #runWS(stream, maxRetry = 3) {
|
|
145
152
|
let retries = 0;
|
|
@@ -148,6 +155,10 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
148
155
|
ws = new import_ws.WebSocket(this.streamURL, {
|
|
149
156
|
headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey }
|
|
150
157
|
});
|
|
158
|
+
ws.on("error", (error) => {
|
|
159
|
+
this.abortController.abort();
|
|
160
|
+
this.#logger.error({ error }, "Error connecting to ElevenLabs");
|
|
161
|
+
});
|
|
151
162
|
try {
|
|
152
163
|
await new Promise((resolve, reject) => {
|
|
153
164
|
ws.on("open", resolve);
|
|
@@ -167,20 +178,26 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
167
178
|
await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
|
|
168
179
|
}
|
|
169
180
|
}
|
|
170
|
-
const requestId = (0,
|
|
171
|
-
const segmentId = (0,
|
|
181
|
+
const requestId = (0, import_agents.shortuuid)();
|
|
182
|
+
const segmentId = (0, import_agents.shortuuid)();
|
|
172
183
|
ws.send(
|
|
173
184
|
JSON.stringify({
|
|
174
185
|
text: " ",
|
|
175
186
|
voice_settings: this.#opts.voice.settings,
|
|
176
|
-
|
|
177
|
-
|
|
187
|
+
...this.#opts.chunkLengthSchedule && {
|
|
188
|
+
generation_config: {
|
|
189
|
+
chunk_length_schedule: this.#opts.chunkLengthSchedule
|
|
190
|
+
}
|
|
191
|
+
}
|
|
178
192
|
})
|
|
179
193
|
);
|
|
180
194
|
let eosSent = false;
|
|
181
195
|
const sendTask = async () => {
|
|
182
196
|
let xmlContent = [];
|
|
183
197
|
for await (const data of stream) {
|
|
198
|
+
if (this.abortController.signal.aborted) {
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
184
201
|
let text = data.token;
|
|
185
202
|
if (this.#opts.enableSsmlParsing && text.startsWith("<phoneme") || xmlContent.length) {
|
|
186
203
|
xmlContent.push(text);
|
|
@@ -191,7 +208,7 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
191
208
|
continue;
|
|
192
209
|
}
|
|
193
210
|
}
|
|
194
|
-
ws.send(JSON.stringify({ text: text + " "
|
|
211
|
+
ws.send(JSON.stringify({ text: text + " " }));
|
|
195
212
|
}
|
|
196
213
|
if (xmlContent.length) {
|
|
197
214
|
this.#logger.warn("ElevenLabs stream ended with incomplete XML content");
|
|
@@ -207,8 +224,9 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
207
224
|
}
|
|
208
225
|
};
|
|
209
226
|
const listenTask = async () => {
|
|
227
|
+
let finalReceived = false;
|
|
210
228
|
const bstream = new import_agents.AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);
|
|
211
|
-
while (!this.closed) {
|
|
229
|
+
while (!this.closed && !this.abortController.signal.aborted) {
|
|
212
230
|
try {
|
|
213
231
|
await new Promise((resolve, reject) => {
|
|
214
232
|
ws.removeAllListeners();
|
|
@@ -217,30 +235,36 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
217
235
|
if (!eosSent) {
|
|
218
236
|
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
219
237
|
}
|
|
220
|
-
|
|
238
|
+
if (!finalReceived) {
|
|
239
|
+
reject(new Error("WebSocket closed"));
|
|
240
|
+
}
|
|
221
241
|
});
|
|
222
242
|
}).then((msg) => {
|
|
223
243
|
const json = JSON.parse(msg.toString());
|
|
224
|
-
if ("audio" in json) {
|
|
244
|
+
if ("audio" in json && json.audio !== null) {
|
|
225
245
|
const data = new Int8Array(Buffer.from(json.audio, "base64"));
|
|
226
246
|
for (const frame of bstream.write(data)) {
|
|
227
247
|
sendLastFrame(segmentId, false);
|
|
228
248
|
lastFrame = frame;
|
|
229
249
|
}
|
|
230
|
-
} else if (
|
|
250
|
+
} else if (json.isFinal) {
|
|
251
|
+
finalReceived = true;
|
|
231
252
|
for (const frame of bstream.flush()) {
|
|
232
253
|
sendLastFrame(segmentId, false);
|
|
233
254
|
lastFrame = frame;
|
|
234
255
|
}
|
|
235
256
|
sendLastFrame(segmentId, true);
|
|
236
257
|
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
237
|
-
if (segmentId === requestId) {
|
|
258
|
+
if (segmentId === requestId || this.abortController.signal.aborted) {
|
|
238
259
|
ws.close();
|
|
239
260
|
return;
|
|
240
261
|
}
|
|
241
262
|
}
|
|
242
263
|
});
|
|
243
|
-
} catch {
|
|
264
|
+
} catch (err) {
|
|
265
|
+
if (err instanceof Error && !err.message.includes("WebSocket closed")) {
|
|
266
|
+
this.#logger.error({ err }, "Error in listenTask from ElevenLabs WebSocket");
|
|
267
|
+
}
|
|
244
268
|
break;
|
|
245
269
|
}
|
|
246
270
|
}
|
package/dist/tts.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AsyncIterableQueue, AudioByteStream, log, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'EXAVITQu4vr4xnSDxMaL',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule: number[];\n enableSsmlParsing: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_flash_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n streamingLatency: 3,\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n chunkLengthSchedule: [],\n enableSsmlParsing: false,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n optimize_streaming_latency: `${opts.streamingLatency}`,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n ...(opts.languageCode && { language_code: opts.languageCode }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n\n this.#run();\n }\n\n async #run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n this.close();\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = randomUUID();\n const segmentId = randomUUID();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n try_trigger_generation: true,\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n reject();\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n if ('audio' in json) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('isFinal' in json) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId) {\n ws.close();\n return;\n }\n }\n });\n } catch {\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAAwE;AAExE,yBAA2B;AAC3B,sBAAoB;AACpB,gBAAwC;AAiBxC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAe7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,kBAAkB;AAAA,EAClB,eAAe,IAAI,uBAAS,MAAM,cAAc,KAAK;AAAA,EACrD,qBAAqB,CAAC;AAAA,EACtB,mBAAmB;AACrB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,oBAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,4BAA4B,GAAG,KAAK,gBAAgB;AAAA,MACpD,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,IAC9D;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAEtE,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,WAAW,IAAI,iCAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAChD,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,oBAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,gBAAY,+BAAW;AAC7B,UAAM,gBAAY,+BAAW;AAE7B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,wBAAwB;AAAA,QACxB,uBAAuB,KAAK,MAAM;AAAA,MACpC,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,KAAK,wBAAwB,MAAM,CAAC,CAAC;AAAA,MAC7E;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAEA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,YAAM,UAAU,IAAI,8BAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,qBAAO;AAAA,YACT,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,gBAAI,WAAW,MAAM;AACnB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,CAAC;AAC5D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,aAAa,MAAM;AAC5B,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,kBAAI,cAAc,WAAW;AAC3B,mBAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AACN;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n AsyncIterableQueue,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst DEFAULT_INACTIVITY_TIMEOUT = 300;\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'bIHbv24MWmeRgasZH58o',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency?: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule?: number[];\n enableSsmlParsing: boolean;\n inactivityTimeout: number;\n syncAlignment: boolean;\n autoMode?: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_turbo_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n enableSsmlParsing: false,\n inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,\n syncAlignment: true,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n sync_alignment: `${opts.syncAlignment}`,\n ...(opts.autoMode !== undefined && { auto_mode: `${opts.autoMode}` }),\n ...(opts.languageCode && { language_code: opts.languageCode }),\n ...(opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` }),\n ...(opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n }\n\n protected async run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (this.abortController.signal.aborted) {\n break;\n }\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n if (this.abortController.signal.aborted) {\n break;\n }\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n ws.on('error', (error) => {\n this.abortController.abort();\n this.#logger.error({ error }, 'Error connecting to ElevenLabs');\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n ...(this.#opts.chunkLengthSchedule && {\n generation_config: {\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n },\n }),\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n if (this.abortController.signal.aborted) {\n break;\n }\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ' })); // must always end with a space\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n // no more tokens, mark eos\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n let finalReceived = false;\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed && !this.abortController.signal.aborted) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n }\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n // remove the \"audio\" field from the json object when printing\n if ('audio' in json && json.audio !== null) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if (json.isFinal) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId || this.abortController.signal.aborted) {\n ws.close();\n return;\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in listenTask from ElevenLabs WebSocket');\n }\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AAEP,sBAAoB;AACpB,gBAAwC;AAGxC,MAAM,6BAA6B;AAgBnC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAkB7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,eAAe,IAAI,uBAAS,MAAM,cAAc,KAAK;AAAA,EACrD,mBAAmB;AAAA,EACnB,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,oBAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,gBAAgB,GAAG,KAAK,aAAa;AAAA,MACrC,GAAI,KAAK,aAAa,UAAa,EAAE,WAAW,GAAG,KAAK,QAAQ,GAAG;AAAA,MACnE,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,MAC5D,GAAI,KAAK,qBAAqB,EAAE,oBAAoB,GAAG,KAAK,iBAAiB,GAAG;AAAA,MAChF,GAAI,KAAK,oBAAoB,EAAE,4BAA4B,GAAG,KAAK,gBAAgB,GAAG;AAAA,IACxF;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAAA,EACxE;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW,IAAI,iCAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAAA,EAClD;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,oBAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,SAAG,GAAG,SAAS,CAAC,UAAU;AACxB,aAAK,gBAAgB,MAAM;AAC3B,aAAK,QAAQ,MAAM,EAAE,MAAM,GAAG,gCAAgC;AAAA,MAChE,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,gBAAY,yBAAU;AAC5B,UAAM,gBAAY,yBAAU;AAE5B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,GAAI,KAAK,MAAM,uBAAuB;AAAA,UACpC,mBAAmB;AAAA,YACjB,uBAAuB,KAAK,MAAM;AAAA,UACpC;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,IAAI,CAAC,CAAC;AAAA,MAC9C;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAGA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,UAAI,gBAAgB;AACpB,YAAM,UAAU,IAAI,8BAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AAEtC,gBAAI,WAAW,QAAQ,KAAK,UAAU,MAAM;AAC1C,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,CAAC;AAC5D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,KAAK,SAAS;AACvB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,kBAAI,cAAc,aAAa,KAAK,gBAAgB,OAAO,SAAS;AAClE,mBAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,+CAA+C;AAAA,UAC7E;AACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
|
package/dist/tts.d.cts
CHANGED
|
@@ -21,10 +21,13 @@ export interface TTSOptions {
|
|
|
21
21
|
languageCode?: string;
|
|
22
22
|
baseURL: string;
|
|
23
23
|
encoding: TTSEncoding;
|
|
24
|
-
streamingLatency
|
|
24
|
+
streamingLatency?: number;
|
|
25
25
|
wordTokenizer: tokenize.WordTokenizer;
|
|
26
|
-
chunkLengthSchedule
|
|
26
|
+
chunkLengthSchedule?: number[];
|
|
27
27
|
enableSsmlParsing: boolean;
|
|
28
|
+
inactivityTimeout: number;
|
|
29
|
+
syncAlignment: boolean;
|
|
30
|
+
autoMode?: boolean;
|
|
28
31
|
}
|
|
29
32
|
export declare class TTS extends tts.TTS {
|
|
30
33
|
#private;
|
|
@@ -39,6 +42,7 @@ export declare class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
39
42
|
label: string;
|
|
40
43
|
readonly streamURL: URL;
|
|
41
44
|
constructor(tts: TTS, opts: TTSOptions);
|
|
45
|
+
protected run(): Promise<void>;
|
|
42
46
|
}
|
|
43
47
|
export {};
|
|
44
48
|
//# sourceMappingURL=tts.d.ts.map
|
package/dist/tts.d.ts
CHANGED
|
@@ -21,10 +21,13 @@ export interface TTSOptions {
|
|
|
21
21
|
languageCode?: string;
|
|
22
22
|
baseURL: string;
|
|
23
23
|
encoding: TTSEncoding;
|
|
24
|
-
streamingLatency
|
|
24
|
+
streamingLatency?: number;
|
|
25
25
|
wordTokenizer: tokenize.WordTokenizer;
|
|
26
|
-
chunkLengthSchedule
|
|
26
|
+
chunkLengthSchedule?: number[];
|
|
27
27
|
enableSsmlParsing: boolean;
|
|
28
|
+
inactivityTimeout: number;
|
|
29
|
+
syncAlignment: boolean;
|
|
30
|
+
autoMode?: boolean;
|
|
28
31
|
}
|
|
29
32
|
export declare class TTS extends tts.TTS {
|
|
30
33
|
#private;
|
|
@@ -39,6 +42,7 @@ export declare class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
39
42
|
label: string;
|
|
40
43
|
readonly streamURL: URL;
|
|
41
44
|
constructor(tts: TTS, opts: TTSOptions);
|
|
45
|
+
protected run(): Promise<void>;
|
|
42
46
|
}
|
|
43
47
|
export {};
|
|
44
48
|
//# sourceMappingURL=tts.d.ts.map
|
package/dist/tts.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAGA,OAAO,
|
|
1
|
+
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAGA,OAAO,EAKL,QAAQ,EACR,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI1D,KAAK,KAAK,GAAG;IACX,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B,CAAC;AAEF,KAAK,aAAa,GAAG;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;CAC5B,CAAC;AAiBF,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,SAAS,GAAG,MAAM,CAAC;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,WAAW,CAAC;IACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,QAAQ,CAAC,aAAa,CAAC;IACtC,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,iBAAiB,EAAE,OAAO,CAAC;IAC3B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAcD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAoB;gBAEb,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAiBpC,UAAU,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;IAuBpC,UAAU,IAAI,GAAG,CAAC,aAAa;IAI/B,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAGxD,KAAK,SAAiC;IACtC,QAAQ,CAAC,SAAS,EAAE,GAAG,CAAC;gBAEZ,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;cAuBtB,GAAG;CAiLpB"}
|
package/dist/tts.js
CHANGED
|
@@ -1,9 +1,16 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
1
|
+
import {
|
|
2
|
+
AsyncIterableQueue,
|
|
3
|
+
AudioByteStream,
|
|
4
|
+
log,
|
|
5
|
+
shortuuid,
|
|
6
|
+
tokenize,
|
|
7
|
+
tts
|
|
8
|
+
} from "@livekit/agents";
|
|
3
9
|
import { URL } from "node:url";
|
|
4
10
|
import { WebSocket } from "ws";
|
|
11
|
+
const DEFAULT_INACTIVITY_TIMEOUT = 300;
|
|
5
12
|
const DEFAULT_VOICE = {
|
|
6
|
-
id: "
|
|
13
|
+
id: "bIHbv24MWmeRgasZH58o",
|
|
7
14
|
name: "Bella",
|
|
8
15
|
category: "premade",
|
|
9
16
|
settings: {
|
|
@@ -18,13 +25,13 @@ const AUTHORIZATION_HEADER = "xi-api-key";
|
|
|
18
25
|
const defaultTTSOptions = {
|
|
19
26
|
apiKey: process.env.ELEVEN_API_KEY,
|
|
20
27
|
voice: DEFAULT_VOICE,
|
|
21
|
-
modelID: "
|
|
28
|
+
modelID: "eleven_turbo_v2_5",
|
|
22
29
|
baseURL: API_BASE_URL_V1,
|
|
23
30
|
encoding: "pcm_22050",
|
|
24
|
-
streamingLatency: 3,
|
|
25
31
|
wordTokenizer: new tokenize.basic.WordTokenizer(false),
|
|
26
|
-
|
|
27
|
-
|
|
32
|
+
enableSsmlParsing: false,
|
|
33
|
+
inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,
|
|
34
|
+
syncAlignment: true
|
|
28
35
|
};
|
|
29
36
|
class TTS extends tts.TTS {
|
|
30
37
|
#opts;
|
|
@@ -82,19 +89,24 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
82
89
|
const params = {
|
|
83
90
|
model_id: opts.modelID,
|
|
84
91
|
output_format: opts.encoding,
|
|
85
|
-
optimize_streaming_latency: `${opts.streamingLatency}`,
|
|
86
92
|
enable_ssml_parsing: `${opts.enableSsmlParsing}`,
|
|
87
|
-
|
|
93
|
+
sync_alignment: `${opts.syncAlignment}`,
|
|
94
|
+
...opts.autoMode !== void 0 && { auto_mode: `${opts.autoMode}` },
|
|
95
|
+
...opts.languageCode && { language_code: opts.languageCode },
|
|
96
|
+
...opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` },
|
|
97
|
+
...opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }
|
|
88
98
|
};
|
|
89
99
|
Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
|
|
90
100
|
this.streamURL.protocol = this.streamURL.protocol.replace("http", "ws");
|
|
91
|
-
this.#run();
|
|
92
101
|
}
|
|
93
|
-
async
|
|
102
|
+
async run() {
|
|
94
103
|
const segments = new AsyncIterableQueue();
|
|
95
104
|
const tokenizeInput = async () => {
|
|
96
105
|
let stream = null;
|
|
97
106
|
for await (const text of this.input) {
|
|
107
|
+
if (this.abortController.signal.aborted) {
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
98
110
|
if (text === SynthesizeStream.FLUSH_SENTINEL) {
|
|
99
111
|
stream == null ? void 0 : stream.endInput();
|
|
100
112
|
stream = null;
|
|
@@ -110,12 +122,14 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
110
122
|
};
|
|
111
123
|
const runStream = async () => {
|
|
112
124
|
for await (const stream of segments) {
|
|
125
|
+
if (this.abortController.signal.aborted) {
|
|
126
|
+
break;
|
|
127
|
+
}
|
|
113
128
|
await this.#runWS(stream);
|
|
114
129
|
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
115
130
|
}
|
|
116
131
|
};
|
|
117
132
|
await Promise.all([tokenizeInput(), runStream()]);
|
|
118
|
-
this.close();
|
|
119
133
|
}
|
|
120
134
|
async #runWS(stream, maxRetry = 3) {
|
|
121
135
|
let retries = 0;
|
|
@@ -124,6 +138,10 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
124
138
|
ws = new WebSocket(this.streamURL, {
|
|
125
139
|
headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey }
|
|
126
140
|
});
|
|
141
|
+
ws.on("error", (error) => {
|
|
142
|
+
this.abortController.abort();
|
|
143
|
+
this.#logger.error({ error }, "Error connecting to ElevenLabs");
|
|
144
|
+
});
|
|
127
145
|
try {
|
|
128
146
|
await new Promise((resolve, reject) => {
|
|
129
147
|
ws.on("open", resolve);
|
|
@@ -143,20 +161,26 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
143
161
|
await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
|
|
144
162
|
}
|
|
145
163
|
}
|
|
146
|
-
const requestId =
|
|
147
|
-
const segmentId =
|
|
164
|
+
const requestId = shortuuid();
|
|
165
|
+
const segmentId = shortuuid();
|
|
148
166
|
ws.send(
|
|
149
167
|
JSON.stringify({
|
|
150
168
|
text: " ",
|
|
151
169
|
voice_settings: this.#opts.voice.settings,
|
|
152
|
-
|
|
153
|
-
|
|
170
|
+
...this.#opts.chunkLengthSchedule && {
|
|
171
|
+
generation_config: {
|
|
172
|
+
chunk_length_schedule: this.#opts.chunkLengthSchedule
|
|
173
|
+
}
|
|
174
|
+
}
|
|
154
175
|
})
|
|
155
176
|
);
|
|
156
177
|
let eosSent = false;
|
|
157
178
|
const sendTask = async () => {
|
|
158
179
|
let xmlContent = [];
|
|
159
180
|
for await (const data of stream) {
|
|
181
|
+
if (this.abortController.signal.aborted) {
|
|
182
|
+
break;
|
|
183
|
+
}
|
|
160
184
|
let text = data.token;
|
|
161
185
|
if (this.#opts.enableSsmlParsing && text.startsWith("<phoneme") || xmlContent.length) {
|
|
162
186
|
xmlContent.push(text);
|
|
@@ -167,7 +191,7 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
167
191
|
continue;
|
|
168
192
|
}
|
|
169
193
|
}
|
|
170
|
-
ws.send(JSON.stringify({ text: text + " "
|
|
194
|
+
ws.send(JSON.stringify({ text: text + " " }));
|
|
171
195
|
}
|
|
172
196
|
if (xmlContent.length) {
|
|
173
197
|
this.#logger.warn("ElevenLabs stream ended with incomplete XML content");
|
|
@@ -183,8 +207,9 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
183
207
|
}
|
|
184
208
|
};
|
|
185
209
|
const listenTask = async () => {
|
|
210
|
+
let finalReceived = false;
|
|
186
211
|
const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);
|
|
187
|
-
while (!this.closed) {
|
|
212
|
+
while (!this.closed && !this.abortController.signal.aborted) {
|
|
188
213
|
try {
|
|
189
214
|
await new Promise((resolve, reject) => {
|
|
190
215
|
ws.removeAllListeners();
|
|
@@ -193,30 +218,36 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
193
218
|
if (!eosSent) {
|
|
194
219
|
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
195
220
|
}
|
|
196
|
-
|
|
221
|
+
if (!finalReceived) {
|
|
222
|
+
reject(new Error("WebSocket closed"));
|
|
223
|
+
}
|
|
197
224
|
});
|
|
198
225
|
}).then((msg) => {
|
|
199
226
|
const json = JSON.parse(msg.toString());
|
|
200
|
-
if ("audio" in json) {
|
|
227
|
+
if ("audio" in json && json.audio !== null) {
|
|
201
228
|
const data = new Int8Array(Buffer.from(json.audio, "base64"));
|
|
202
229
|
for (const frame of bstream.write(data)) {
|
|
203
230
|
sendLastFrame(segmentId, false);
|
|
204
231
|
lastFrame = frame;
|
|
205
232
|
}
|
|
206
|
-
} else if (
|
|
233
|
+
} else if (json.isFinal) {
|
|
234
|
+
finalReceived = true;
|
|
207
235
|
for (const frame of bstream.flush()) {
|
|
208
236
|
sendLastFrame(segmentId, false);
|
|
209
237
|
lastFrame = frame;
|
|
210
238
|
}
|
|
211
239
|
sendLastFrame(segmentId, true);
|
|
212
240
|
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
213
|
-
if (segmentId === requestId) {
|
|
241
|
+
if (segmentId === requestId || this.abortController.signal.aborted) {
|
|
214
242
|
ws.close();
|
|
215
243
|
return;
|
|
216
244
|
}
|
|
217
245
|
}
|
|
218
246
|
});
|
|
219
|
-
} catch {
|
|
247
|
+
} catch (err) {
|
|
248
|
+
if (err instanceof Error && !err.message.includes("WebSocket closed")) {
|
|
249
|
+
this.#logger.error({ err }, "Error in listenTask from ElevenLabs WebSocket");
|
|
250
|
+
}
|
|
220
251
|
break;
|
|
221
252
|
}
|
|
222
253
|
}
|
package/dist/tts.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AsyncIterableQueue, AudioByteStream, log, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'EXAVITQu4vr4xnSDxMaL',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule: number[];\n enableSsmlParsing: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_flash_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n streamingLatency: 3,\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n chunkLengthSchedule: [],\n enableSsmlParsing: false,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n optimize_streaming_latency: `${opts.streamingLatency}`,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n ...(opts.languageCode && { language_code: opts.languageCode }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n\n this.#run();\n }\n\n async #run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n this.close();\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = randomUUID();\n const segmentId = randomUUID();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n try_trigger_generation: true,\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n reject();\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n if ('audio' in json) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('isFinal' in json) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId) {\n ws.close();\n return;\n }\n }\n });\n } catch {\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":"AAGA,SAAS,oBAAoB,iBAAiB,KAAK,UAAU,WAAW;AAExE,SAAS,kBAAkB;AAC3B,SAAS,WAAW;AACpB,SAAuB,iBAAiB;AAiBxC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAe7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,kBAAkB;AAAA,EAClB,eAAe,IAAI,SAAS,MAAM,cAAc,KAAK;AAAA,EACrD,qBAAqB,CAAC;AAAA,EACtB,mBAAmB;AACrB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,IAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,4BAA4B,GAAG,KAAK,gBAAgB;AAAA,MACpD,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,IAC9D;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAEtE,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,WAAW,IAAI,mBAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAChD,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,UAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,YAAY,WAAW;AAC7B,UAAM,YAAY,WAAW;AAE7B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,wBAAwB;AAAA,QACxB,uBAAuB,KAAK,MAAM;AAAA,MACpC,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,KAAK,wBAAwB,MAAM,CAAC,CAAC;AAAA,MAC7E;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAEA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,YAAM,UAAU,IAAI,gBAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,qBAAO;AAAA,YACT,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,gBAAI,WAAW,MAAM;AACnB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,CAAC;AAC5D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,aAAa,MAAM;AAC5B,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,kBAAI,cAAc,WAAW;AAC3B,mBAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AACN;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n AsyncIterableQueue,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst DEFAULT_INACTIVITY_TIMEOUT = 300;\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'bIHbv24MWmeRgasZH58o',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency?: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule?: number[];\n enableSsmlParsing: boolean;\n inactivityTimeout: number;\n syncAlignment: boolean;\n autoMode?: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_turbo_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n enableSsmlParsing: false,\n inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,\n syncAlignment: true,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n sync_alignment: `${opts.syncAlignment}`,\n ...(opts.autoMode !== undefined && { auto_mode: `${opts.autoMode}` }),\n ...(opts.languageCode && { language_code: opts.languageCode }),\n ...(opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` }),\n ...(opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n }\n\n protected async run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (this.abortController.signal.aborted) {\n break;\n }\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n if (this.abortController.signal.aborted) {\n break;\n }\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n ws.on('error', (error) => {\n this.abortController.abort();\n this.#logger.error({ error }, 'Error connecting to ElevenLabs');\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n ...(this.#opts.chunkLengthSchedule && {\n generation_config: {\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n },\n }),\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n if (this.abortController.signal.aborted) {\n break;\n }\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ' })); // must always end with a space\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n // no more tokens, mark eos\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n let finalReceived = false;\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed && !this.abortController.signal.aborted) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n }\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n // remove the \"audio\" field from the json object when printing\n if ('audio' in json && json.audio !== null) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if (json.isFinal) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId || this.abortController.signal.aborted) {\n ws.close();\n return;\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in listenTask from ElevenLabs WebSocket');\n }\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":"AAGA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,WAAW;AACpB,SAAuB,iBAAiB;AAGxC,MAAM,6BAA6B;AAgBnC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAkB7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,eAAe,IAAI,SAAS,MAAM,cAAc,KAAK;AAAA,EACrD,mBAAmB;AAAA,EACnB,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,IAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,gBAAgB,GAAG,KAAK,aAAa;AAAA,MACrC,GAAI,KAAK,aAAa,UAAa,EAAE,WAAW,GAAG,KAAK,QAAQ,GAAG;AAAA,MACnE,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,MAC5D,GAAI,KAAK,qBAAqB,EAAE,oBAAoB,GAAG,KAAK,iBAAiB,GAAG;AAAA,MAChF,GAAI,KAAK,oBAAoB,EAAE,4BAA4B,GAAG,KAAK,gBAAgB,GAAG;AAAA,IACxF;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAAA,EACxE;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW,IAAI,mBAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAAA,EAClD;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,UAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,SAAG,GAAG,SAAS,CAAC,UAAU;AACxB,aAAK,gBAAgB,MAAM;AAC3B,aAAK,QAAQ,MAAM,EAAE,MAAM,GAAG,gCAAgC;AAAA,MAChE,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,YAAY,UAAU;AAC5B,UAAM,YAAY,UAAU;AAE5B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,GAAI,KAAK,MAAM,uBAAuB;AAAA,UACpC,mBAAmB;AAAA,YACjB,uBAAuB,KAAK,MAAM;AAAA,UACpC;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,IAAI,CAAC,CAAC;AAAA,MAC9C;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAGA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,UAAI,gBAAgB;AACpB,YAAM,UAAU,IAAI,gBAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AAEtC,gBAAI,WAAW,QAAQ,KAAK,UAAU,MAAM;AAC1C,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,CAAC;AAC5D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,KAAK,SAAS;AACvB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,kBAAI,cAAc,aAAa,KAAK,gBAAgB,OAAO,SAAS;AAClE,mBAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,+CAA+C;AAAA,UAC7E;AACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-elevenlabs",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "1.0.0-next.1",
|
|
4
4
|
"description": "ElevenLabs plugin for LiveKit Node Agents",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -25,10 +25,10 @@
|
|
|
25
25
|
"README.md"
|
|
26
26
|
],
|
|
27
27
|
"devDependencies": {
|
|
28
|
-
"@livekit/agents": "^
|
|
29
|
-
"@livekit/agents-plugin-openai": "^
|
|
30
|
-
"@livekit/agents-plugins-test": "^
|
|
31
|
-
"@livekit/rtc-node": "^0.13.
|
|
28
|
+
"@livekit/agents": "^1.0.0-next.1",
|
|
29
|
+
"@livekit/agents-plugin-openai": "^1.0.0-next.1",
|
|
30
|
+
"@livekit/agents-plugins-test": "^1.0.0-next.1",
|
|
31
|
+
"@livekit/rtc-node": "^0.13.12",
|
|
32
32
|
"@microsoft/api-extractor": "^7.35.0",
|
|
33
33
|
"@types/ws": "^8.5.10",
|
|
34
34
|
"tsup": "^8.3.5",
|
|
@@ -38,8 +38,8 @@
|
|
|
38
38
|
"ws": "^8.16.0"
|
|
39
39
|
},
|
|
40
40
|
"peerDependencies": {
|
|
41
|
-
"@livekit/rtc-node": "^0.13.
|
|
42
|
-
"@livekit/agents": "^0.
|
|
41
|
+
"@livekit/rtc-node": "^0.13.12",
|
|
42
|
+
"@livekit/agents": "^1.0.0-next.11.0.0-next.1"
|
|
43
43
|
},
|
|
44
44
|
"scripts": {
|
|
45
45
|
"build": "tsup --onSuccess \"pnpm build:types\"",
|
package/src/index.ts
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
|
-
// SPDX-FileCopyrightText:
|
|
1
|
+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { Plugin } from '@livekit/agents';
|
|
4
5
|
|
|
5
6
|
export * from './tts.js';
|
|
7
|
+
|
|
8
|
+
class ElevenLabsPlugin extends Plugin {
|
|
9
|
+
constructor() {
|
|
10
|
+
super({
|
|
11
|
+
title: 'elevenlabs',
|
|
12
|
+
version: '0.6.2',
|
|
13
|
+
package: '@livekit/agents-plugin-elevenlabs',
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
Plugin.registerPlugin(new ElevenLabsPlugin());
|
package/src/models.ts
CHANGED
package/src/tts.ts
CHANGED
|
@@ -1,13 +1,21 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
AsyncIterableQueue,
|
|
6
|
+
AudioByteStream,
|
|
7
|
+
log,
|
|
8
|
+
shortuuid,
|
|
9
|
+
tokenize,
|
|
10
|
+
tts,
|
|
11
|
+
} from '@livekit/agents';
|
|
5
12
|
import type { AudioFrame } from '@livekit/rtc-node';
|
|
6
|
-
import { randomUUID } from 'node:crypto';
|
|
7
13
|
import { URL } from 'node:url';
|
|
8
14
|
import { type RawData, WebSocket } from 'ws';
|
|
9
15
|
import type { TTSEncoding, TTSModels } from './models.js';
|
|
10
16
|
|
|
17
|
+
const DEFAULT_INACTIVITY_TIMEOUT = 300;
|
|
18
|
+
|
|
11
19
|
type Voice = {
|
|
12
20
|
id: string;
|
|
13
21
|
name: string;
|
|
@@ -23,7 +31,7 @@ type VoiceSettings = {
|
|
|
23
31
|
};
|
|
24
32
|
|
|
25
33
|
const DEFAULT_VOICE: Voice = {
|
|
26
|
-
id: '
|
|
34
|
+
id: 'bIHbv24MWmeRgasZH58o',
|
|
27
35
|
name: 'Bella',
|
|
28
36
|
category: 'premade',
|
|
29
37
|
settings: {
|
|
@@ -44,22 +52,25 @@ export interface TTSOptions {
|
|
|
44
52
|
languageCode?: string;
|
|
45
53
|
baseURL: string;
|
|
46
54
|
encoding: TTSEncoding;
|
|
47
|
-
streamingLatency
|
|
55
|
+
streamingLatency?: number;
|
|
48
56
|
wordTokenizer: tokenize.WordTokenizer;
|
|
49
|
-
chunkLengthSchedule
|
|
57
|
+
chunkLengthSchedule?: number[];
|
|
50
58
|
enableSsmlParsing: boolean;
|
|
59
|
+
inactivityTimeout: number;
|
|
60
|
+
syncAlignment: boolean;
|
|
61
|
+
autoMode?: boolean;
|
|
51
62
|
}
|
|
52
63
|
|
|
53
64
|
const defaultTTSOptions: TTSOptions = {
|
|
54
65
|
apiKey: process.env.ELEVEN_API_KEY,
|
|
55
66
|
voice: DEFAULT_VOICE,
|
|
56
|
-
modelID: '
|
|
67
|
+
modelID: 'eleven_turbo_v2_5',
|
|
57
68
|
baseURL: API_BASE_URL_V1,
|
|
58
69
|
encoding: 'pcm_22050',
|
|
59
|
-
streamingLatency: 3,
|
|
60
70
|
wordTokenizer: new tokenize.basic.WordTokenizer(false),
|
|
61
|
-
chunkLengthSchedule: [],
|
|
62
71
|
enableSsmlParsing: false,
|
|
72
|
+
inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,
|
|
73
|
+
syncAlignment: true,
|
|
63
74
|
};
|
|
64
75
|
|
|
65
76
|
export class TTS extends tts.TTS {
|
|
@@ -133,22 +144,26 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
133
144
|
const params = {
|
|
134
145
|
model_id: opts.modelID,
|
|
135
146
|
output_format: opts.encoding,
|
|
136
|
-
optimize_streaming_latency: `${opts.streamingLatency}`,
|
|
137
147
|
enable_ssml_parsing: `${opts.enableSsmlParsing}`,
|
|
148
|
+
sync_alignment: `${opts.syncAlignment}`,
|
|
149
|
+
...(opts.autoMode !== undefined && { auto_mode: `${opts.autoMode}` }),
|
|
138
150
|
...(opts.languageCode && { language_code: opts.languageCode }),
|
|
151
|
+
...(opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` }),
|
|
152
|
+
...(opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }),
|
|
139
153
|
};
|
|
140
154
|
Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
|
|
141
155
|
this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');
|
|
142
|
-
|
|
143
|
-
this.#run();
|
|
144
156
|
}
|
|
145
157
|
|
|
146
|
-
async
|
|
158
|
+
protected async run() {
|
|
147
159
|
const segments = new AsyncIterableQueue<tokenize.WordStream>();
|
|
148
160
|
|
|
149
161
|
const tokenizeInput = async () => {
|
|
150
162
|
let stream: tokenize.WordStream | null = null;
|
|
151
163
|
for await (const text of this.input) {
|
|
164
|
+
if (this.abortController.signal.aborted) {
|
|
165
|
+
break;
|
|
166
|
+
}
|
|
152
167
|
if (text === SynthesizeStream.FLUSH_SENTINEL) {
|
|
153
168
|
stream?.endInput();
|
|
154
169
|
stream = null;
|
|
@@ -165,13 +180,15 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
165
180
|
|
|
166
181
|
const runStream = async () => {
|
|
167
182
|
for await (const stream of segments) {
|
|
183
|
+
if (this.abortController.signal.aborted) {
|
|
184
|
+
break;
|
|
185
|
+
}
|
|
168
186
|
await this.#runWS(stream);
|
|
169
187
|
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
170
188
|
}
|
|
171
189
|
};
|
|
172
190
|
|
|
173
191
|
await Promise.all([tokenizeInput(), runStream()]);
|
|
174
|
-
this.close();
|
|
175
192
|
}
|
|
176
193
|
|
|
177
194
|
async #runWS(stream: tokenize.WordStream, maxRetry = 3) {
|
|
@@ -182,6 +199,11 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
182
199
|
headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },
|
|
183
200
|
});
|
|
184
201
|
|
|
202
|
+
ws.on('error', (error) => {
|
|
203
|
+
this.abortController.abort();
|
|
204
|
+
this.#logger.error({ error }, 'Error connecting to ElevenLabs');
|
|
205
|
+
});
|
|
206
|
+
|
|
185
207
|
try {
|
|
186
208
|
await new Promise((resolve, reject) => {
|
|
187
209
|
ws.on('open', resolve);
|
|
@@ -204,15 +226,18 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
204
226
|
}
|
|
205
227
|
}
|
|
206
228
|
|
|
207
|
-
const requestId =
|
|
208
|
-
const segmentId =
|
|
229
|
+
const requestId = shortuuid();
|
|
230
|
+
const segmentId = shortuuid();
|
|
209
231
|
|
|
210
232
|
ws.send(
|
|
211
233
|
JSON.stringify({
|
|
212
234
|
text: ' ',
|
|
213
235
|
voice_settings: this.#opts.voice.settings,
|
|
214
|
-
|
|
215
|
-
|
|
236
|
+
...(this.#opts.chunkLengthSchedule && {
|
|
237
|
+
generation_config: {
|
|
238
|
+
chunk_length_schedule: this.#opts.chunkLengthSchedule,
|
|
239
|
+
},
|
|
240
|
+
}),
|
|
216
241
|
}),
|
|
217
242
|
);
|
|
218
243
|
let eosSent = false;
|
|
@@ -220,6 +245,9 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
220
245
|
const sendTask = async () => {
|
|
221
246
|
let xmlContent: string[] = [];
|
|
222
247
|
for await (const data of stream) {
|
|
248
|
+
if (this.abortController.signal.aborted) {
|
|
249
|
+
break;
|
|
250
|
+
}
|
|
223
251
|
let text = data.token;
|
|
224
252
|
|
|
225
253
|
if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {
|
|
@@ -232,13 +260,14 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
232
260
|
}
|
|
233
261
|
}
|
|
234
262
|
|
|
235
|
-
ws.send(JSON.stringify({ text: text + ' '
|
|
263
|
+
ws.send(JSON.stringify({ text: text + ' ' })); // must always end with a space
|
|
236
264
|
}
|
|
237
265
|
|
|
238
266
|
if (xmlContent.length) {
|
|
239
267
|
this.#logger.warn('ElevenLabs stream ended with incomplete XML content');
|
|
240
268
|
}
|
|
241
269
|
|
|
270
|
+
// no more tokens, mark eos
|
|
242
271
|
ws.send(JSON.stringify({ text: '' }));
|
|
243
272
|
eosSent = true;
|
|
244
273
|
};
|
|
@@ -252,8 +281,9 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
252
281
|
};
|
|
253
282
|
|
|
254
283
|
const listenTask = async () => {
|
|
284
|
+
let finalReceived = false;
|
|
255
285
|
const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);
|
|
256
|
-
while (!this.closed) {
|
|
286
|
+
while (!this.closed && !this.abortController.signal.aborted) {
|
|
257
287
|
try {
|
|
258
288
|
await new Promise<RawData>((resolve, reject) => {
|
|
259
289
|
ws.removeAllListeners();
|
|
@@ -262,17 +292,21 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
262
292
|
if (!eosSent) {
|
|
263
293
|
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
264
294
|
}
|
|
265
|
-
|
|
295
|
+
if (!finalReceived) {
|
|
296
|
+
reject(new Error('WebSocket closed'));
|
|
297
|
+
}
|
|
266
298
|
});
|
|
267
299
|
}).then((msg) => {
|
|
268
300
|
const json = JSON.parse(msg.toString());
|
|
269
|
-
|
|
301
|
+
// remove the "audio" field from the json object when printing
|
|
302
|
+
if ('audio' in json && json.audio !== null) {
|
|
270
303
|
const data = new Int8Array(Buffer.from(json.audio, 'base64'));
|
|
271
304
|
for (const frame of bstream.write(data)) {
|
|
272
305
|
sendLastFrame(segmentId, false);
|
|
273
306
|
lastFrame = frame;
|
|
274
307
|
}
|
|
275
|
-
} else if (
|
|
308
|
+
} else if (json.isFinal) {
|
|
309
|
+
finalReceived = true;
|
|
276
310
|
for (const frame of bstream.flush()) {
|
|
277
311
|
sendLastFrame(segmentId, false);
|
|
278
312
|
lastFrame = frame;
|
|
@@ -280,13 +314,17 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
280
314
|
sendLastFrame(segmentId, true);
|
|
281
315
|
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
282
316
|
|
|
283
|
-
if (segmentId === requestId) {
|
|
317
|
+
if (segmentId === requestId || this.abortController.signal.aborted) {
|
|
284
318
|
ws.close();
|
|
285
319
|
return;
|
|
286
320
|
}
|
|
287
321
|
}
|
|
288
322
|
});
|
|
289
|
-
} catch {
|
|
323
|
+
} catch (err) {
|
|
324
|
+
// skip log error for normal websocket close
|
|
325
|
+
if (err instanceof Error && !err.message.includes('WebSocket closed')) {
|
|
326
|
+
this.#logger.error({ err }, 'Error in listenTask from ElevenLabs WebSocket');
|
|
327
|
+
}
|
|
290
328
|
break;
|
|
291
329
|
}
|
|
292
330
|
}
|