@livekit/agents-plugin-elevenlabs 0.4.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,17 @@
1
+ <!--
2
+ SPDX-FileCopyrightText: 2024 LiveKit, Inc.
3
+
4
+ SPDX-License-Identifier: Apache-2.0
5
+ -->
6
+ # ElevenLabs plugin for LiveKit Agents
7
+
8
+ The Agents Framework is designed for building realtime, programmable
9
+ participants that run on servers. Use it to create conversational, multi-modal
10
+ voice agents that can see, hear, and understand.
11
+
12
+ This package contains the ElevenLabs plugin, which allows for voice synthesis.
13
+ Refer to the [documentation](https://docs.livekit.io/agents/overview/) for
14
+ information on how to use it, or browse the [API
15
+ reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_elevenlabs.html).
16
+ See the [repository](https://github.com/livekit/agents-js) for more information
17
+ about the framework as a whole.
package/dist/index.cjs ADDED
@@ -0,0 +1,23 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __copyProps = (to, from, except, desc) => {
7
+ if (from && typeof from === "object" || typeof from === "function") {
8
+ for (let key of __getOwnPropNames(from))
9
+ if (!__hasOwnProp.call(to, key) && key !== except)
10
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
11
+ }
12
+ return to;
13
+ };
14
+ var __reExport = (target, mod, secondTarget) => (__copyProps(target, mod, "default"), secondTarget && __copyProps(secondTarget, mod, "default"));
15
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
16
+ var src_exports = {};
17
+ module.exports = __toCommonJS(src_exports);
18
+ __reExport(src_exports, require("./tts.cjs"), module.exports);
19
+ // Annotate the CommonJS export names for ESM import in node:
20
+ 0 && (module.exports = {
21
+ ...require("./tts.cjs")
22
+ });
23
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport * from './tts.js';\n"],"mappings":";;;;;;;;;;;;;;;AAAA;AAAA;AAIA,wBAAc,qBAJd;","names":[]}
package/dist/index.js CHANGED
@@ -1,5 +1,2 @@
1
- // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
- //
3
- // SPDX-License-Identifier: Apache-2.0
4
- export * from './tts.js';
1
+ export * from "./tts.js";
5
2
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AAEtC,cAAc,UAAU,CAAC"}
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport * from './tts.js';\n"],"mappings":"AAIA,cAAc;","names":[]}
@@ -0,0 +1,17 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __copyProps = (to, from, except, desc) => {
7
+ if (from && typeof from === "object" || typeof from === "function") {
8
+ for (let key of __getOwnPropNames(from))
9
+ if (!__hasOwnProp.call(to, key) && key !== except)
10
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
11
+ }
12
+ return to;
13
+ };
14
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
15
+ var models_exports = {};
16
+ module.exports = __toCommonJS(models_exports);
17
+ //# sourceMappingURL=models.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels =\n | 'eleven_monolingual_v1'\n | 'eleven_multilingual_v1'\n | 'eleven_multilingual_v2'\n | 'eleven_turbo_v2'\n | 'eleven_turbo_v2_5';\n\nexport type TTSEncoding =\n // XXX(nbsp): MP3 is not yet supported\n // | 'mp3_22050_32'\n // | 'mp3_44100_32'\n // | 'mp3_44100_64'\n // | 'mp3_44100_96'\n // | 'mp3_44100_128'\n // | 'mp3_44100_192'\n 'pcm_16000' | 'pcm_22050' | 'pcm_44100';\n"],"mappings":";;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
package/dist/models.js CHANGED
@@ -1,5 +1 @@
1
- // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
- //
3
- // SPDX-License-Identifier: Apache-2.0
4
- export {};
5
1
  //# sourceMappingURL=models.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"models.js","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC"}
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
package/dist/tts.cjs ADDED
@@ -0,0 +1,241 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var tts_exports = {};
20
+ __export(tts_exports, {
21
+ SynthesizeStream: () => SynthesizeStream,
22
+ TTS: () => TTS
23
+ });
24
+ module.exports = __toCommonJS(tts_exports);
25
+ var import_agents = require("@livekit/agents");
26
+ var import_rtc_node = require("@livekit/rtc-node");
27
+ var import_node_crypto = require("node:crypto");
28
+ var import_node_url = require("node:url");
29
+ var import_ws = require("ws");
30
+ const DEFAULT_VOICE = {
31
+ id: "EXAVITQu4vr4xnSDxMaL",
32
+ name: "Bella",
33
+ category: "premade",
34
+ settings: {
35
+ stability: 0.71,
36
+ similarity_boost: 0.5,
37
+ style: 0,
38
+ use_speaker_boost: true
39
+ }
40
+ };
41
+ const API_BASE_URL_V1 = "https://api.elevenlabs.io/v1/";
42
+ const AUTHORIZATION_HEADER = "xi-api-key";
43
+ const defaultTTSOptions = {
44
+ apiKey: process.env.ELEVEN_API_KEY,
45
+ voice: DEFAULT_VOICE,
46
+ modelID: "eleven_turbo_v2_5",
47
+ baseURL: API_BASE_URL_V1,
48
+ encoding: "pcm_22050",
49
+ streamingLatency: 3,
50
+ wordTokenizer: new import_agents.tokenize.basic.WordTokenizer(false),
51
+ chunkLengthSchedule: [],
52
+ enableSsmlParsing: false
53
+ };
54
+ class TTS extends import_agents.tts.TTS {
55
+ #opts;
56
+ constructor(opts = {}) {
57
+ super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {
58
+ streaming: true
59
+ });
60
+ this.#opts = {
61
+ ...defaultTTSOptions,
62
+ ...opts
63
+ };
64
+ if (this.#opts.apiKey === void 0) {
65
+ throw new Error(
66
+ "ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY"
67
+ );
68
+ }
69
+ }
70
+ async listVoices() {
71
+ return fetch(this.#opts.baseURL + "/voices", {
72
+ headers: {
73
+ [AUTHORIZATION_HEADER]: this.#opts.apiKey
74
+ }
75
+ }).then((data) => data.json()).then((data) => {
76
+ const voices = [];
77
+ for (const voice of data.voices) {
78
+ voices.push({
79
+ id: voice.voice_id,
80
+ name: voice.name,
81
+ category: voice.category,
82
+ settings: void 0
83
+ });
84
+ }
85
+ return voices;
86
+ });
87
+ }
88
+ synthesize() {
89
+ throw new Error("Chunked responses are not supported on ElevenLabs TTS");
90
+ }
91
+ stream() {
92
+ return new SynthesizeStream(this.#opts);
93
+ }
94
+ }
95
+ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
96
+ #opts;
97
+ #logger = (0, import_agents.log)();
98
+ streamURL;
99
+ constructor(opts) {
100
+ super();
101
+ this.#opts = opts;
102
+ this.closed = false;
103
+ const baseURL = opts.baseURL + (opts.baseURL.endsWith("/") ? "" : "/");
104
+ this.streamURL = new import_node_url.URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);
105
+ const params = {
106
+ model_id: opts.modelID,
107
+ output_format: opts.encoding,
108
+ optimize_streaming_latency: `${opts.streamingLatency}`,
109
+ enable_ssml_parsing: `${opts.enableSsmlParsing}`
110
+ };
111
+ Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
112
+ this.streamURL.protocol = this.streamURL.protocol.replace("http", "ws");
113
+ this.#run();
114
+ }
115
+ async #run() {
116
+ const segments = new import_agents.AsyncIterableQueue();
117
+ const tokenizeInput = async () => {
118
+ let stream = null;
119
+ for await (const text of this.input) {
120
+ if (text === SynthesizeStream.FLUSH_SENTINEL) {
121
+ stream == null ? void 0 : stream.endInput();
122
+ stream = null;
123
+ } else {
124
+ if (!stream) {
125
+ stream = this.#opts.wordTokenizer.stream();
126
+ segments.put(stream);
127
+ }
128
+ stream.pushText(text);
129
+ }
130
+ }
131
+ segments.close();
132
+ };
133
+ const runStream = async () => {
134
+ for await (const stream of segments) {
135
+ await this.#runWS(stream);
136
+ this.queue.put(SynthesizeStream.END_OF_STREAM);
137
+ }
138
+ };
139
+ await Promise.all([tokenizeInput(), runStream()]);
140
+ this.close();
141
+ }
142
+ async #runWS(stream, maxRetry = 3) {
143
+ let retries = 0;
144
+ let ws;
145
+ while (true) {
146
+ ws = new import_ws.WebSocket(this.streamURL, {
147
+ headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey }
148
+ });
149
+ try {
150
+ await new Promise((resolve, reject) => {
151
+ ws.on("open", resolve);
152
+ ws.on("error", (error) => reject(error));
153
+ ws.on("close", (code) => reject(`WebSocket returned ${code}`));
154
+ });
155
+ break;
156
+ } catch (e) {
157
+ if (retries >= maxRetry) {
158
+ throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);
159
+ }
160
+ const delay = Math.min(retries * 5, 5);
161
+ retries++;
162
+ this.#logger.warn(
163
+ `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`
164
+ );
165
+ await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
166
+ }
167
+ }
168
+ const requestId = (0, import_node_crypto.randomUUID)();
169
+ const segmentId = (0, import_node_crypto.randomUUID)();
170
+ ws.send(
171
+ JSON.stringify({
172
+ text: " ",
173
+ voice_settings: this.#opts.voice.settings,
174
+ try_trigger_generation: true,
175
+ chunk_length_schedule: this.#opts.chunkLengthSchedule
176
+ })
177
+ );
178
+ let eosSent = false;
179
+ const sendTask = async () => {
180
+ let xmlContent = [];
181
+ for await (const data of stream) {
182
+ let text = data.token;
183
+ if (this.#opts.enableSsmlParsing && text.startsWith("<phoneme") || xmlContent.length) {
184
+ xmlContent.push(text);
185
+ if (text.indexOf("</phoneme>") !== -1) {
186
+ text = xmlContent.join(" ");
187
+ xmlContent = [];
188
+ } else {
189
+ continue;
190
+ }
191
+ }
192
+ ws.send(JSON.stringify({ text: text + " ", try_trigger_generation: false }));
193
+ }
194
+ if (xmlContent.length) {
195
+ this.#logger.warn("ElevenLabs stream ended with incomplete XML content");
196
+ }
197
+ ws.send(JSON.stringify({ text: "" }));
198
+ eosSent = true;
199
+ };
200
+ const listenTask = async () => {
201
+ while (!this.closed) {
202
+ try {
203
+ await new Promise((resolve, reject) => {
204
+ ws.removeAllListeners();
205
+ ws.on("message", (data) => resolve(data));
206
+ ws.on("close", (code, reason) => {
207
+ if (!eosSent) {
208
+ this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
209
+ }
210
+ reject();
211
+ });
212
+ }).then((msg) => {
213
+ const json = JSON.parse(msg.toString());
214
+ if ("audio" in json) {
215
+ const data = new Int16Array(Buffer.from(json.audio, "base64").buffer);
216
+ const frame = new import_rtc_node.AudioFrame(
217
+ data,
218
+ sampleRateFromFormat(this.#opts.encoding),
219
+ 1,
220
+ data.length
221
+ );
222
+ this.queue.put({ requestId, segmentId, frame });
223
+ }
224
+ });
225
+ } catch {
226
+ break;
227
+ }
228
+ }
229
+ };
230
+ await Promise.all([sendTask(), listenTask()]);
231
+ }
232
+ }
233
+ const sampleRateFromFormat = (encoding) => {
234
+ return Number(encoding.split("_")[1]);
235
+ };
236
+ // Annotate the CommonJS export names for ESM import in node:
237
+ 0 && (module.exports = {
238
+ SynthesizeStream,
239
+ TTS
240
+ });
241
+ //# sourceMappingURL=tts.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AsyncIterableQueue, log, tokenize, tts } from '@livekit/agents';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'EXAVITQu4vr4xnSDxMaL',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule: number[];\n enableSsmlParsing: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_turbo_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n streamingLatency: 3,\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n chunkLengthSchedule: [],\n enableSsmlParsing: false,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n readonly streamURL: URL;\n\n constructor(opts: TTSOptions) {\n super();\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n optimize_streaming_latency: `${opts.streamingLatency}`,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n\n this.#run();\n }\n\n async #run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n this.close();\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = randomUUID();\n const segmentId = randomUUID();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n try_trigger_generation: true,\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n const listenTask = async () => {\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n reject();\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n if ('audio' in json) {\n const data = new Int16Array(Buffer.from(json.audio, 'base64').buffer);\n const frame = new AudioFrame(\n data,\n sampleRateFromFormat(this.#opts.encoding),\n 1,\n data.length,\n );\n this.queue.put({ requestId, segmentId, frame });\n }\n });\n } catch {\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAAuD;AACvD,sBAA2B;AAC3B,yBAA2B;AAC3B,sBAAoB;AACpB,gBAAwC;AAiBxC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,kBAAkB;AAAA,EAClB,eAAe,IAAI,uBAAS,MAAM,cAAc,KAAK;AAAA,EACrD,qBAAqB,CAAC;AAAA,EACtB,mBAAmB;AACrB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EAEA,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,KAAK,KAAK;AAAA,EACxC;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACL;AAAA,EAET,YAAY,MAAkB;AAC5B,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,oBAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,4BAA4B,GAAG,KAAK,gBAAgB;AAAA,MACpD,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,IAChD;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAEtE,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,WAAW,IAAI,iCAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAChD,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,oBAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,gBAAY,+BAAW;AAC7B,UAAM,gBAAY,+BAAW;AAE7B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,wBAAwB;AAAA,QACxB,uBAAuB,KAAK,MAAM;AAAA,MACpC,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,KAAK,wBAAwB,MAAM,CAAC,CAAC;AAAA,MAC7E;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAEA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,UAAM,aAAa,YAAY;AAC7B,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,qBAAO;AAAA,YACT,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,gBAAI,WAAW,MAAM;AACnB,oBAAM,OAAO,IAAI,WAAW,OAAO,KAAK,KAAK,OAAO,QAAQ,EAAE,MAAM;AACpE,oBAAM,QAAQ,IAAI;AAAA,gBAChB;AAAA,gBACA,qBAAqB,KAAK,MAAM,QAAQ;AAAA,gBACxC;AAAA,gBACA,KAAK;AAAA,cACP;AACA,mBAAK,MAAM,IAAI,EAAE,WAAW,WAAW,MAAM,CAAC;AAAA,YAChD;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AACN;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":[]}
package/dist/tts.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- /// <reference types="node" />
1
+ /// <reference types="node" resolution-mode="require"/>
2
2
  import { tokenize, tts } from '@livekit/agents';
3
3
  import { URL } from 'node:url';
4
4
  import type { TTSEncoding, TTSModels } from './models.js';
package/dist/tts.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAGA,OAAO,EAA2B,QAAQ,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAIzE,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE1D,KAAK,KAAK,GAAG;IACX,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B,CAAC;AAEF,KAAK,aAAa,GAAG;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;CAC5B,CAAC;AAiBF,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,SAAS,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,WAAW,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,aAAa,EAAE,QAAQ,CAAC,aAAa,CAAC;IACtC,mBAAmB,EAAE,MAAM,EAAE,CAAC;IAC9B,iBAAiB,EAAE,OAAO,CAAC;CAC5B;AAcD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;gBAGlB,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAiBpC,UAAU,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;IAuBpC,UAAU,IAAI,GAAG,CAAC,aAAa;IAI/B,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAGxD,QAAQ,CAAC,SAAS,EAAE,GAAG,CAAC;gBAEZ,IAAI,EAAE,UAAU;CA0J7B"}
1
+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAGA,OAAO,EAA2B,QAAQ,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAGzE,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE1D,KAAK,KAAK,GAAG;IACX,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B,CAAC;AAEF,KAAK,aAAa,GAAG;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;CAC5B,CAAC;AAiBF,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,SAAS,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,WAAW,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,aAAa,EAAE,QAAQ,CAAC,aAAa,CAAC;IACtC,mBAAmB,EAAE,MAAM,EAAE,CAAC;IAC9B,iBAAiB,EAAE,OAAO,CAAC;CAC5B;AAcD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;gBAGlB,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAiBpC,UAAU,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;IAuBpC,UAAU,IAAI,GAAG,CAAC,aAAa;IAI/B,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAGxD,QAAQ,CAAC,SAAS,EAAE,GAAG,CAAC;gBAEZ,IAAI,EAAE,UAAU;CA0J7B"}
package/dist/tts.js CHANGED
@@ -1,213 +1,216 @@
1
- var _a;
2
- // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
3
- //
4
- // SPDX-License-Identifier: Apache-2.0
5
- import { AsyncIterableQueue, log, tokenize, tts } from '@livekit/agents';
6
- import { AudioFrame } from '@livekit/rtc-node';
7
- import { randomUUID } from 'node:crypto';
8
- import { URL } from 'node:url';
9
- import { WebSocket } from 'ws';
1
+ import { AsyncIterableQueue, log, tokenize, tts } from "@livekit/agents";
2
+ import { AudioFrame } from "@livekit/rtc-node";
3
+ import { randomUUID } from "node:crypto";
4
+ import { URL } from "node:url";
5
+ import { WebSocket } from "ws";
10
6
  const DEFAULT_VOICE = {
11
- id: 'EXAVITQu4vr4xnSDxMaL',
12
- name: 'Bella',
13
- category: 'premade',
14
- settings: {
15
- stability: 0.71,
16
- similarity_boost: 0.5,
17
- style: 0.0,
18
- use_speaker_boost: true,
19
- },
7
+ id: "EXAVITQu4vr4xnSDxMaL",
8
+ name: "Bella",
9
+ category: "premade",
10
+ settings: {
11
+ stability: 0.71,
12
+ similarity_boost: 0.5,
13
+ style: 0,
14
+ use_speaker_boost: true
15
+ }
20
16
  };
21
- const API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';
22
- const AUTHORIZATION_HEADER = 'xi-api-key';
17
+ const API_BASE_URL_V1 = "https://api.elevenlabs.io/v1/";
18
+ const AUTHORIZATION_HEADER = "xi-api-key";
23
19
  const defaultTTSOptions = {
24
- apiKey: process.env.ELEVEN_API_KEY,
25
- voice: DEFAULT_VOICE,
26
- modelID: 'eleven_turbo_v2_5',
27
- baseURL: API_BASE_URL_V1,
28
- encoding: 'pcm_22050',
29
- streamingLatency: 3,
30
- wordTokenizer: new tokenize.basic.WordTokenizer(false),
31
- chunkLengthSchedule: [],
32
- enableSsmlParsing: false,
20
+ apiKey: process.env.ELEVEN_API_KEY,
21
+ voice: DEFAULT_VOICE,
22
+ modelID: "eleven_turbo_v2_5",
23
+ baseURL: API_BASE_URL_V1,
24
+ encoding: "pcm_22050",
25
+ streamingLatency: 3,
26
+ wordTokenizer: new tokenize.basic.WordTokenizer(false),
27
+ chunkLengthSchedule: [],
28
+ enableSsmlParsing: false
33
29
  };
34
- export class TTS extends tts.TTS {
35
- #opts;
36
- constructor(opts = {}) {
37
- super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {
38
- streaming: true,
39
- });
40
- this.#opts = {
41
- ...defaultTTSOptions,
42
- ...opts,
43
- };
44
- if (this.#opts.apiKey === undefined) {
45
- throw new Error('ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY');
46
- }
30
+ class TTS extends tts.TTS {
31
+ #opts;
32
+ constructor(opts = {}) {
33
+ super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {
34
+ streaming: true
35
+ });
36
+ this.#opts = {
37
+ ...defaultTTSOptions,
38
+ ...opts
39
+ };
40
+ if (this.#opts.apiKey === void 0) {
41
+ throw new Error(
42
+ "ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY"
43
+ );
47
44
  }
48
- async listVoices() {
49
- return fetch(this.#opts.baseURL + '/voices', {
50
- headers: {
51
- [AUTHORIZATION_HEADER]: this.#opts.apiKey,
52
- },
53
- })
54
- .then((data) => data.json())
55
- .then((data) => {
56
- const voices = [];
57
- for (const voice of data.voices) {
58
- voices.push({
59
- id: voice.voice_id,
60
- name: voice.name,
61
- category: voice.category,
62
- settings: undefined,
63
- });
64
- }
65
- return voices;
45
+ }
46
+ async listVoices() {
47
+ return fetch(this.#opts.baseURL + "/voices", {
48
+ headers: {
49
+ [AUTHORIZATION_HEADER]: this.#opts.apiKey
50
+ }
51
+ }).then((data) => data.json()).then((data) => {
52
+ const voices = [];
53
+ for (const voice of data.voices) {
54
+ voices.push({
55
+ id: voice.voice_id,
56
+ name: voice.name,
57
+ category: voice.category,
58
+ settings: void 0
66
59
  });
67
- }
68
- synthesize() {
69
- throw new Error('Chunked responses are not supported on ElevenLabs TTS');
70
- }
71
- stream() {
72
- return new SynthesizeStream(this.#opts);
73
- }
60
+ }
61
+ return voices;
62
+ });
63
+ }
64
+ synthesize() {
65
+ throw new Error("Chunked responses are not supported on ElevenLabs TTS");
66
+ }
67
+ stream() {
68
+ return new SynthesizeStream(this.#opts);
69
+ }
74
70
  }
75
- export class SynthesizeStream extends tts.SynthesizeStream {
76
- #opts;
77
- #logger = log();
78
- streamURL;
79
- constructor(opts) {
80
- super();
81
- this.#opts = opts;
82
- this.closed = false;
83
- // add trailing slash to URL if needed
84
- const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');
85
- this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);
86
- const params = {
87
- model_id: opts.modelID,
88
- output_format: opts.encoding,
89
- optimize_streaming_latency: `${opts.streamingLatency}`,
90
- enable_ssml_parsing: `${opts.enableSsmlParsing}`,
91
- };
92
- Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
93
- this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');
94
- this.#run();
95
- }
96
- async #run() {
97
- const segments = new AsyncIterableQueue();
98
- const tokenizeInput = async () => {
99
- let stream = null;
100
- for await (const text of this.input) {
101
- if (text === _a.FLUSH_SENTINEL) {
102
- stream?.endInput();
103
- stream = null;
104
- }
105
- else {
106
- if (!stream) {
107
- stream = this.#opts.wordTokenizer.stream();
108
- segments.put(stream);
109
- }
110
- stream.pushText(text);
111
- }
112
- }
113
- segments.close();
114
- };
115
- const runStream = async () => {
116
- for await (const stream of segments) {
117
- await this.#runWS(stream);
118
- this.queue.put(_a.END_OF_STREAM);
119
- }
120
- };
121
- await Promise.all([tokenizeInput(), runStream()]);
122
- this.close();
71
+ class SynthesizeStream extends tts.SynthesizeStream {
72
+ #opts;
73
+ #logger = log();
74
+ streamURL;
75
+ constructor(opts) {
76
+ super();
77
+ this.#opts = opts;
78
+ this.closed = false;
79
+ const baseURL = opts.baseURL + (opts.baseURL.endsWith("/") ? "" : "/");
80
+ this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);
81
+ const params = {
82
+ model_id: opts.modelID,
83
+ output_format: opts.encoding,
84
+ optimize_streaming_latency: `${opts.streamingLatency}`,
85
+ enable_ssml_parsing: `${opts.enableSsmlParsing}`
86
+ };
87
+ Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
88
+ this.streamURL.protocol = this.streamURL.protocol.replace("http", "ws");
89
+ this.#run();
90
+ }
91
+ async #run() {
92
+ const segments = new AsyncIterableQueue();
93
+ const tokenizeInput = async () => {
94
+ let stream = null;
95
+ for await (const text of this.input) {
96
+ if (text === SynthesizeStream.FLUSH_SENTINEL) {
97
+ stream == null ? void 0 : stream.endInput();
98
+ stream = null;
99
+ } else {
100
+ if (!stream) {
101
+ stream = this.#opts.wordTokenizer.stream();
102
+ segments.put(stream);
103
+ }
104
+ stream.pushText(text);
105
+ }
106
+ }
107
+ segments.close();
108
+ };
109
+ const runStream = async () => {
110
+ for await (const stream of segments) {
111
+ await this.#runWS(stream);
112
+ this.queue.put(SynthesizeStream.END_OF_STREAM);
113
+ }
114
+ };
115
+ await Promise.all([tokenizeInput(), runStream()]);
116
+ this.close();
117
+ }
118
+ async #runWS(stream, maxRetry = 3) {
119
+ let retries = 0;
120
+ let ws;
121
+ while (true) {
122
+ ws = new WebSocket(this.streamURL, {
123
+ headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey }
124
+ });
125
+ try {
126
+ await new Promise((resolve, reject) => {
127
+ ws.on("open", resolve);
128
+ ws.on("error", (error) => reject(error));
129
+ ws.on("close", (code) => reject(`WebSocket returned ${code}`));
130
+ });
131
+ break;
132
+ } catch (e) {
133
+ if (retries >= maxRetry) {
134
+ throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);
135
+ }
136
+ const delay = Math.min(retries * 5, 5);
137
+ retries++;
138
+ this.#logger.warn(
139
+ `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`
140
+ );
141
+ await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
142
+ }
123
143
  }
124
- async #runWS(stream, maxRetry = 3) {
125
- let retries = 0;
126
- let ws;
127
- while (true) {
128
- ws = new WebSocket(this.streamURL, {
129
- headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },
144
+ const requestId = randomUUID();
145
+ const segmentId = randomUUID();
146
+ ws.send(
147
+ JSON.stringify({
148
+ text: " ",
149
+ voice_settings: this.#opts.voice.settings,
150
+ try_trigger_generation: true,
151
+ chunk_length_schedule: this.#opts.chunkLengthSchedule
152
+ })
153
+ );
154
+ let eosSent = false;
155
+ const sendTask = async () => {
156
+ let xmlContent = [];
157
+ for await (const data of stream) {
158
+ let text = data.token;
159
+ if (this.#opts.enableSsmlParsing && text.startsWith("<phoneme") || xmlContent.length) {
160
+ xmlContent.push(text);
161
+ if (text.indexOf("</phoneme>") !== -1) {
162
+ text = xmlContent.join(" ");
163
+ xmlContent = [];
164
+ } else {
165
+ continue;
166
+ }
167
+ }
168
+ ws.send(JSON.stringify({ text: text + " ", try_trigger_generation: false }));
169
+ }
170
+ if (xmlContent.length) {
171
+ this.#logger.warn("ElevenLabs stream ended with incomplete XML content");
172
+ }
173
+ ws.send(JSON.stringify({ text: "" }));
174
+ eosSent = true;
175
+ };
176
+ const listenTask = async () => {
177
+ while (!this.closed) {
178
+ try {
179
+ await new Promise((resolve, reject) => {
180
+ ws.removeAllListeners();
181
+ ws.on("message", (data) => resolve(data));
182
+ ws.on("close", (code, reason) => {
183
+ if (!eosSent) {
184
+ this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
185
+ }
186
+ reject();
130
187
  });
131
- try {
132
- await new Promise((resolve, reject) => {
133
- ws.on('open', resolve);
134
- ws.on('error', (error) => reject(error));
135
- ws.on('close', (code) => reject(`WebSocket returned ${code}`));
136
- });
137
- break;
138
- }
139
- catch (e) {
140
- if (retries >= maxRetry) {
141
- throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);
142
- }
143
- const delay = Math.min(retries * 5, 5);
144
- retries++;
145
- this.#logger.warn(`failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`);
146
- await new Promise((resolve) => setTimeout(resolve, delay * 1000));
188
+ }).then((msg) => {
189
+ const json = JSON.parse(msg.toString());
190
+ if ("audio" in json) {
191
+ const data = new Int16Array(Buffer.from(json.audio, "base64").buffer);
192
+ const frame = new AudioFrame(
193
+ data,
194
+ sampleRateFromFormat(this.#opts.encoding),
195
+ 1,
196
+ data.length
197
+ );
198
+ this.queue.put({ requestId, segmentId, frame });
147
199
  }
200
+ });
201
+ } catch {
202
+ break;
148
203
  }
149
- const requestId = randomUUID();
150
- const segmentId = randomUUID();
151
- ws.send(JSON.stringify({
152
- text: ' ',
153
- voice_settings: this.#opts.voice.settings,
154
- try_trigger_generation: true,
155
- chunk_length_schedule: this.#opts.chunkLengthSchedule,
156
- }));
157
- let eosSent = false;
158
- const sendTask = async () => {
159
- let xmlContent = [];
160
- for await (const data of stream) {
161
- let text = data.token;
162
- if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {
163
- xmlContent.push(text);
164
- if (text.indexOf('</phoneme>') !== -1) {
165
- text = xmlContent.join(' ');
166
- xmlContent = [];
167
- }
168
- else {
169
- continue;
170
- }
171
- }
172
- ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));
173
- }
174
- if (xmlContent.length) {
175
- this.#logger.warn('ElevenLabs stream ended with incomplete XML content');
176
- }
177
- ws.send(JSON.stringify({ text: '' }));
178
- eosSent = true;
179
- };
180
- const listenTask = async () => {
181
- while (!this.closed) {
182
- try {
183
- await new Promise((resolve, reject) => {
184
- ws.removeAllListeners();
185
- ws.on('message', (data) => resolve(data));
186
- ws.on('close', (code, reason) => {
187
- if (!eosSent) {
188
- this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
189
- }
190
- reject();
191
- });
192
- }).then((msg) => {
193
- const json = JSON.parse(msg.toString());
194
- if ('audio' in json) {
195
- const data = new Int16Array(Buffer.from(json.audio, 'base64').buffer);
196
- const frame = new AudioFrame(data, sampleRateFromFormat(this.#opts.encoding), 1, data.length);
197
- this.queue.put({ requestId, segmentId, frame });
198
- }
199
- });
200
- }
201
- catch {
202
- break;
203
- }
204
- }
205
- };
206
- await Promise.all([sendTask(), listenTask()]);
207
- }
204
+ }
205
+ };
206
+ await Promise.all([sendTask(), listenTask()]);
207
+ }
208
208
  }
209
- _a = SynthesizeStream;
210
209
  const sampleRateFromFormat = (encoding) => {
211
- return Number(encoding.split('_')[1]);
210
+ return Number(encoding.split("_")[1]);
211
+ };
212
+ export {
213
+ SynthesizeStream,
214
+ TTS
212
215
  };
213
216
  //# sourceMappingURL=tts.js.map
package/dist/tts.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"tts.js","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AACtC,OAAO,EAAE,kBAAkB,EAAE,GAAG,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAEzE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAC/B,OAAO,EAAgB,SAAS,EAAE,MAAM,IAAI,CAAC;AAiB7C,MAAM,aAAa,GAAU;IAC3B,EAAE,EAAE,sBAAsB;IAC1B,IAAI,EAAE,OAAO;IACb,QAAQ,EAAE,SAAS;IACnB,QAAQ,EAAE;QACR,SAAS,EAAE,IAAI;QACf,gBAAgB,EAAE,GAAG;QACrB,KAAK,EAAE,GAAG;QACV,iBAAiB,EAAE,IAAI;KACxB;CACF,CAAC;AAEF,MAAM,eAAe,GAAG,+BAA+B,CAAC;AACxD,MAAM,oBAAoB,GAAG,YAAY,CAAC;AAc1C,MAAM,iBAAiB,GAAe;IACpC,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;IAClC,KAAK,EAAE,aAAa;IACpB,OAAO,EAAE,mBAAmB;IAC5B,OAAO,EAAE,eAAe;IACxB,QAAQ,EAAE,WAAW;IACrB,gBAAgB,EAAE,CAAC;IACnB,aAAa,EAAE,IAAI,QAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,KAAK,CAAC;IACtD,mBAAmB,EAAE,EAAE;IACvB,iBAAiB,EAAE,KAAK;CACzB,CAAC;AAEF,MAAM,OAAO,GAAI,SAAQ,GAAG,CAAC,GAAG;IAC9B,KAAK,CAAa;IAElB,YAAY,OAA4B,EAAE;QACxC,KAAK,CAAC,oBAAoB,CAAC,IAAI,CAAC,QAAQ,IAAI,iBAAiB,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE;YAC1E,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,IAAI,CAAC,KAAK,GAAG;YACX,GAAG,iBAAiB;YACpB,GAAG,IAAI;SACR,CAAC;QAEF,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;YACpC,MAAM,IAAI,KAAK,CACb,8EAA8E,CAC/E,CAAC;QACJ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,UAAU;QACd,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,SAAS,EAAE;YAC3C,OAAO,EAAE;gBACP,CAAC,oBAAoB,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,MAAO;aAC3C;SACF,CAAC;aACC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;aAC3B,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE;YACb,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,KAAK,MAAM,KAAK,IACd,IACD,CAAC,MAAM,EAAE,CAAC;gBACT,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,KAAK,CAAC,QAAQ;oBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,QAAQ,EAAE,SAAS;iBACpB,CAAC,CAAC;YACL,CAAC;YACD,OAAO,MAAM,CAAC;QAChB,CAAC,CAAC,CAAC;IACP,CAAC;IAED,UAAU;QACR,MAAM,IAAI,KAAK,CAAC,uDAAuD,CAAC,CAAC;IAC3E,CAAC;IAED,MAAM;QACJ,OAAO,IAAI,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC1C,CAAC;CACF;AAED,MAAM,OAAO,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;IACxD,KAAK,CAAa;IAClB,OAAO,GAAG,GAAG,EAAE,CAAC;IACP,SAAS,CAAM;IAExB,YAAY,IAAgB;QAC1B,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QAEpB,sCAAsC;QACtC,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAEvE,IAAI,CAAC,SAAS,GAAG,IAAI,GAAG,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC,EAAE,eAAe,EAAE,OAAO,CAAC,CAAC;QAClF,MAAM,MAAM,GAAG;YACb,QAAQ,EAAE,IAAI,CAAC,OAAO;YACtB,aAAa,EAAE,IAAI,CAAC,QAAQ;YAC5B,0BAA0B,EAAE,GAAG,IAAI,CAAC,gBAAgB,EAAE;YACtD,mBAAmB,EAAE,GAAG,IAAI,CAAC,iBAAiB,EAAE;SACjD,CAAC;QACF,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACrF,IAAI,CAAC,SAAS,CAAC,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAExE,IAAI,CAAC,IAAI,EAAE,CAAC;IACd,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,QAAQ,GAAG,IAAI,kBAAkB,EAAc,CAAC;QAEtD,MAAM,aAAa,GAAG,KAAK,IAAI,EAAE;YAC/B,IAAI,MAAM,GAA+B,IAAI,CAAC;YAC9C,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBACpC,IAAI,IAAI,KAAK,EAAgB,CAAC,cAAc,EAAE,CAAC;oBAC7C,MAAM,EAAE,QAAQ,EAAE,CAAC;oBACnB,MAAM,GAAG,IAAI,CAAC;gBAChB,CAAC;qBAAM,CAAC;oBACN,IAAI,CAAC,MAAM,EAAE,CAAC;wBACZ,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,MAAM,EAAE,CAAC;wBAC3C,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;oBACvB,CAAC;oBACD,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC;YACH,CAAC;YACD,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,CAAC,CAAC;QAEF,MAAM,SAAS,GAAG,KAAK,IAAI,EAAE;YAC3B,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;gBACpC,MAAM,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;gBAC1B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAgB,CAAC,aAAa,CAAC,CAAC;YACjD,CAAC;QACH,CAAC,CAAC;QAEF,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC;QAClD,IAAI,CAAC,KAAK,EAAE,CAAC;IACf,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,MAA2B,EAAE,QAAQ,GAAG,CAAC;QACpD,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,EAAa,CAAC;QAClB,OAAO,IAAI,EAAE,CAAC;YACZ,EAAE,GAAG,IAAI,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE;gBACjC,OAAO,EAAE,EAAE,CAAC,oBAAoB,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;aACvD,CAAC,CAAC;YAEH,IAAI,CAAC;gBACH,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;oBACpC,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;oBACvB,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;oBACzC,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,sBAAsB,IAAI,EAAE,CAAC,CAAC,CAAC;gBACjE,CAAC,CAAC,CAAC;gBACH,MAAM;YACR,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,IAAI,OAAO,IAAI,QAAQ,EAAE,CAAC;oBACxB,MAAM,IAAI,KAAK,CAAC,yCAAyC,OAAO,cAAc,CAAC,EAAE,CAAC,CAAC;gBACrF,CAAC;gBAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvC,OAAO,EAAE,CAAC;gBAEV,IAAI,CAAC,OAAO,CAAC,IAAI,CACf,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ,GAAG,CAC/F,CAAC;gBACF,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC;YACpE,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;QAC/B,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;QAE/B,EAAE,CAAC,IAAI,CACL,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,GAAG;YACT,cAAc,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ;YACzC,sBAAsB,EAAE,IAAI;YAC5B,qBAAqB,EAAE,IAAI,CAAC,KAAK,CAAC,mBAAmB;SACtD,CAAC,CACH,CAAC;QACF,IAAI,OAAO,GAAG,KAAK,CAAC;QAEpB,MAAM,QAAQ,GAAG,KAAK,IAAI,EAAE;YAC1B,IAAI,UAAU,GAAa,EAAE,CAAC;YAC9B,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;gBAChC,IAAI,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC;gBAEtB,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,iBAAiB,IAAI,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC;oBACvF,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACtB,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;wBACtC,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;wBAC5B,UAAU,GAAG,EAAE,CAAC;oBAClB,CAAC;yBAAM,CAAC;wBACN,SAAS;oBACX,CAAC;gBACH,CAAC;gBAED,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,GAAG,GAAG,EAAE,sBAAsB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC;YAC/E,CAAC;YAED,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC;gBACtB,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,qDAAqD,CAAC,CAAC;YAC3E,CAAC;YAED,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;YACtC,OAAO,GAAG,IAAI,CAAC;QACjB,CAAC,CAAC;QAEF,MAAM,UAAU,GAAG,KAAK,IAAI,EAAE;YAC5B,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACpB,IAAI,CAAC;oBACH,MAAM,IAAI,OAAO,CAAU,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;wBAC7C,EAAE,CAAC,kBAAkB,EAAE,CAAC;wBACxB,EAAE,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;wBAC1C,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE;4BAC9B,IAAI,CAAC,OAAO,EAAE,CAAC;gCACb,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,8BAA8B,IAAI,KAAK,MAAM,EAAE,CAAC,CAAC;4BACtE,CAAC;4BACD,MAAM,EAAE,CAAC;wBACX,CAAC,CAAC,CAAC;oBACL,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;wBACd,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;wBACxC,IAAI,OAAO,IAAI,IAAI,EAAE,CAAC;4BACpB,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,CAAC;4BACtE,MAAM,KAAK,GAAG,IAAI,UAAU,CAC1B,IAAI,EACJ,oBAAoB,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,EACzC,CAAC,EACD,IAAI,CAAC,MAAM,CACZ,CAAC;4BACF,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,CAAC;wBAClD,CAAC;oBACH,CAAC,CAAC,CAAC;gBACL,CAAC;gBAAC,MAAM,CAAC;oBACP,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC,CAAC;QAEF,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC;IAChD,CAAC;CACF;;AAED,MAAM,oBAAoB,GAAG,CAAC,QAAqB,EAAU,EAAE;IAC7D,OAAO,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AACxC,CAAC,CAAC"}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AsyncIterableQueue, log, tokenize, tts } from '@livekit/agents';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'EXAVITQu4vr4xnSDxMaL',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule: number[];\n enableSsmlParsing: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_turbo_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n streamingLatency: 3,\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n chunkLengthSchedule: [],\n enableSsmlParsing: false,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n readonly streamURL: URL;\n\n constructor(opts: TTSOptions) {\n super();\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n optimize_streaming_latency: `${opts.streamingLatency}`,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n\n this.#run();\n }\n\n async #run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n this.close();\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = randomUUID();\n const segmentId = randomUUID();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n try_trigger_generation: true,\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n const listenTask = async () => {\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n reject();\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n if ('audio' in json) {\n const data = new Int16Array(Buffer.from(json.audio, 'base64').buffer);\n const frame = new AudioFrame(\n data,\n sampleRateFromFormat(this.#opts.encoding),\n 1,\n data.length,\n );\n this.queue.put({ requestId, segmentId, frame });\n }\n });\n } catch {\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":"AAGA,SAAS,oBAAoB,KAAK,UAAU,WAAW;AACvD,SAAS,kBAAkB;AAC3B,SAAS,kBAAkB;AAC3B,SAAS,WAAW;AACpB,SAAuB,iBAAiB;AAiBxC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,kBAAkB;AAAA,EAClB,eAAe,IAAI,SAAS,MAAM,cAAc,KAAK;AAAA,EACrD,qBAAqB,CAAC;AAAA,EACtB,mBAAmB;AACrB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EAEA,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,KAAK,KAAK;AAAA,EACxC;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACL;AAAA,EAET,YAAY,MAAkB;AAC5B,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,IAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,4BAA4B,GAAG,KAAK,gBAAgB;AAAA,MACpD,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,IAChD;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAEtE,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,WAAW,IAAI,mBAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAChD,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,UAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,YAAY,WAAW;AAC7B,UAAM,YAAY,WAAW;AAE7B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,wBAAwB;AAAA,QACxB,uBAAuB,KAAK,MAAM;AAAA,MACpC,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,KAAK,wBAAwB,MAAM,CAAC,CAAC;AAAA,MAC7E;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAEA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,UAAM,aAAa,YAAY;AAC7B,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,qBAAO;AAAA,YACT,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,gBAAI,WAAW,MAAM;AACnB,oBAAM,OAAO,IAAI,WAAW,OAAO,KAAK,KAAK,OAAO,QAAQ,EAAE,MAAM;AACpE,oBAAM,QAAQ,IAAI;AAAA,gBAChB;AAAA,gBACA,qBAAqB,KAAK,MAAM,QAAQ;AAAA,gBACxC;AAAA,gBACA,KAAK;AAAA,cACP;AACA,mBAAK,MAAM,IAAI,EAAE,WAAW,WAAW,MAAM,CAAC;AAAA,YAChD;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AACN;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":[]}
package/package.json CHANGED
@@ -1,33 +1,43 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-elevenlabs",
3
- "version": "0.4.6",
3
+ "version": "0.5.0",
4
4
  "description": "ElevenLabs plugin for LiveKit Node Agents",
5
5
  "main": "dist/index.js",
6
+ "require": "dist/index.cjs",
6
7
  "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js",
12
+ "require": "./dist/index.cjs"
13
+ }
14
+ },
7
15
  "author": "LiveKit",
8
16
  "type": "module",
9
17
  "repository": "git@github.com:livekit/agents-js.git",
10
18
  "license": "Apache-2.0",
11
19
  "files": [
12
20
  "dist",
13
- "src"
21
+ "src",
22
+ "README.md"
14
23
  ],
15
24
  "devDependencies": {
25
+ "@livekit/agents": "^x",
26
+ "@livekit/rtc-node": "^0.12.1",
16
27
  "@microsoft/api-extractor": "^7.35.0",
17
- "@livekit/rtc-node": "^0.11.1",
18
28
  "@types/ws": "^8.5.10",
19
- "typescript": "^5.0.0",
20
- "@livekit/agents": "^0.4.6"
29
+ "tsup": "^8.3.5",
30
+ "typescript": "^5.0.0"
21
31
  },
22
32
  "dependencies": {
23
33
  "ws": "^8.16.0"
24
34
  },
25
35
  "peerDependencies": {
26
- "@livekit/rtc-node": "^0.11.1",
27
- "@livekit/agents": "^0.4.6"
36
+ "@livekit/rtc-node": "^0.12.1",
37
+ "@livekit/agents": "^0.5.0x"
28
38
  },
29
39
  "scripts": {
30
- "build": "tsc",
40
+ "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"",
31
41
  "clean": "rm -rf dist",
32
42
  "clean:build": "pnpm clean && pnpm build",
33
43
  "lint": "eslint -f unix \"src/**/*.{ts,js}\"",
package/src/tts.ts CHANGED
@@ -2,7 +2,6 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { AsyncIterableQueue, log, tokenize, tts } from '@livekit/agents';
5
- import type { WordStream } from '@livekit/agents/dist/tokenize/tokenizer.js';
6
5
  import { AudioFrame } from '@livekit/rtc-node';
7
6
  import { randomUUID } from 'node:crypto';
8
7
  import { URL } from 'node:url';
@@ -141,7 +140,7 @@ export class SynthesizeStream extends tts.SynthesizeStream {
141
140
  }
142
141
 
143
142
  async #run() {
144
- const segments = new AsyncIterableQueue<WordStream>();
143
+ const segments = new AsyncIterableQueue<tokenize.WordStream>();
145
144
 
146
145
  const tokenizeInput = async () => {
147
146
  let stream: tokenize.WordStream | null = null;