@livekit/agents-plugin-elevenlabs 0.6.3 → 1.0.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -13,9 +13,20 @@ var __copyProps = (to, from, except, desc) => {
13
13
  };
14
14
  var __reExport = (target, mod, secondTarget) => (__copyProps(target, mod, "default"), secondTarget && __copyProps(secondTarget, mod, "default"));
15
15
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
16
- var src_exports = {};
17
- module.exports = __toCommonJS(src_exports);
18
- __reExport(src_exports, require("./tts.cjs"), module.exports);
16
+ var index_exports = {};
17
+ module.exports = __toCommonJS(index_exports);
18
+ var import_agents = require("@livekit/agents");
19
+ __reExport(index_exports, require("./tts.cjs"), module.exports);
20
+ class ElevenLabsPlugin extends import_agents.Plugin {
21
+ constructor() {
22
+ super({
23
+ title: "elevenlabs",
24
+ version: "0.6.2",
25
+ package: "@livekit/agents-plugin-elevenlabs"
26
+ });
27
+ }
28
+ }
29
+ import_agents.Plugin.registerPlugin(new ElevenLabsPlugin());
19
30
  // Annotate the CommonJS export names for ESM import in node:
20
31
  0 && (module.exports = {
21
32
  ...require("./tts.cjs")
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport * from './tts.js';\n"],"mappings":";;;;;;;;;;;;;;;AAAA;AAAA;AAIA,wBAAc,qBAJd;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { Plugin } from '@livekit/agents';\n\nexport * from './tts.js';\n\nclass ElevenLabsPlugin extends Plugin {\n constructor() {\n super({\n title: 'elevenlabs',\n version: '0.6.2',\n package: '@livekit/agents-plugin-elevenlabs',\n });\n }\n}\n\nPlugin.registerPlugin(new ElevenLabsPlugin());\n"],"mappings":";;;;;;;;;;;;;;;AAAA;AAAA;AAGA,oBAAuB;AAEvB,0BAAc,qBALd;AAOA,MAAM,yBAAyB,qBAAO;AAAA,EACpC,cAAc;AACZ,UAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,qBAAO,eAAe,IAAI,iBAAiB,CAAC;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAIA,cAAc,UAAU,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,cAAc,UAAU,CAAC"}
package/dist/index.js CHANGED
@@ -1,2 +1,13 @@
1
+ import { Plugin } from "@livekit/agents";
1
2
  export * from "./tts.js";
3
+ class ElevenLabsPlugin extends Plugin {
4
+ constructor() {
5
+ super({
6
+ title: "elevenlabs",
7
+ version: "0.6.2",
8
+ package: "@livekit/agents-plugin-elevenlabs"
9
+ });
10
+ }
11
+ }
12
+ Plugin.registerPlugin(new ElevenLabsPlugin());
2
13
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport * from './tts.js';\n"],"mappings":"AAIA,cAAc;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { Plugin } from '@livekit/agents';\n\nexport * from './tts.js';\n\nclass ElevenLabsPlugin extends Plugin {\n constructor() {\n super({\n title: 'elevenlabs',\n version: '0.6.2',\n package: '@livekit/agents-plugin-elevenlabs',\n });\n }\n}\n\nPlugin.registerPlugin(new ElevenLabsPlugin());\n"],"mappings":"AAGA,SAAS,cAAc;AAEvB,cAAc;AAEd,MAAM,yBAAyB,OAAO;AAAA,EACpC,cAAc;AACZ,UAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,OAAO,eAAe,IAAI,iBAAiB,CAAC;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels =\n | 'eleven_monolingual_v1'\n | 'eleven_multilingual_v1'\n | 'eleven_multilingual_v2'\n | 'eleven_flash_v2'\n | 'eleven_flash_v2_5'\n | 'eleven_turbo_v2'\n | 'eleven_turbo_v2_5';\n\nexport type TTSEncoding =\n // XXX(nbsp): MP3 is not yet supported\n // | 'mp3_22050_32'\n // | 'mp3_44100_32'\n // | 'mp3_44100_64'\n // | 'mp3_44100_96'\n // | 'mp3_44100_128'\n // | 'mp3_44100_192'\n 'pcm_16000' | 'pcm_22050' | 'pcm_44100';\n"],"mappings":";;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
1
+ {"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels =\n | 'eleven_monolingual_v1'\n | 'eleven_multilingual_v1'\n | 'eleven_multilingual_v2'\n | 'eleven_flash_v2'\n | 'eleven_flash_v2_5'\n | 'eleven_turbo_v2'\n | 'eleven_turbo_v2_5'\n | 'eleven_v3';\n\nexport type TTSEncoding =\n // XXX(nbsp): MP3 is not yet supported\n // | 'mp3_22050_32'\n // | 'mp3_44100_32'\n // | 'mp3_44100_64'\n // | 'mp3_44100_96'\n // | 'mp3_44100_128'\n // | 'mp3_44100_192'\n 'pcm_16000' | 'pcm_22050' | 'pcm_44100';\n"],"mappings":";;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
package/dist/models.d.cts CHANGED
@@ -1,3 +1,3 @@
1
- export type TTSModels = 'eleven_monolingual_v1' | 'eleven_multilingual_v1' | 'eleven_multilingual_v2' | 'eleven_flash_v2' | 'eleven_flash_v2_5' | 'eleven_turbo_v2' | 'eleven_turbo_v2_5';
1
+ export type TTSModels = 'eleven_monolingual_v1' | 'eleven_multilingual_v1' | 'eleven_multilingual_v2' | 'eleven_flash_v2' | 'eleven_flash_v2_5' | 'eleven_turbo_v2' | 'eleven_turbo_v2_5' | 'eleven_v3';
2
2
  export type TTSEncoding = 'pcm_16000' | 'pcm_22050' | 'pcm_44100';
3
3
  //# sourceMappingURL=models.d.ts.map
package/dist/models.d.ts CHANGED
@@ -1,3 +1,3 @@
1
- export type TTSModels = 'eleven_monolingual_v1' | 'eleven_multilingual_v1' | 'eleven_multilingual_v2' | 'eleven_flash_v2' | 'eleven_flash_v2_5' | 'eleven_turbo_v2' | 'eleven_turbo_v2_5';
1
+ export type TTSModels = 'eleven_monolingual_v1' | 'eleven_multilingual_v1' | 'eleven_multilingual_v2' | 'eleven_flash_v2' | 'eleven_flash_v2_5' | 'eleven_turbo_v2' | 'eleven_turbo_v2_5' | 'eleven_v3';
2
2
  export type TTSEncoding = 'pcm_16000' | 'pcm_22050' | 'pcm_44100';
3
3
  //# sourceMappingURL=models.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,SAAS,GACjB,uBAAuB,GACvB,wBAAwB,GACxB,wBAAwB,GACxB,iBAAiB,GACjB,mBAAmB,GACnB,iBAAiB,GACjB,mBAAmB,CAAC;AAExB,MAAM,MAAM,WAAW,GAQrB,WAAW,GAAG,WAAW,GAAG,WAAW,CAAC"}
1
+ {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,SAAS,GACjB,uBAAuB,GACvB,wBAAwB,GACxB,wBAAwB,GACxB,iBAAiB,GACjB,mBAAmB,GACnB,iBAAiB,GACjB,mBAAmB,GACnB,WAAW,CAAC;AAEhB,MAAM,MAAM,WAAW,GAQrB,WAAW,GAAG,WAAW,GAAG,WAAW,CAAC"}
package/dist/tts.cjs CHANGED
@@ -23,11 +23,11 @@ __export(tts_exports, {
23
23
  });
24
24
  module.exports = __toCommonJS(tts_exports);
25
25
  var import_agents = require("@livekit/agents");
26
- var import_node_crypto = require("node:crypto");
27
26
  var import_node_url = require("node:url");
28
27
  var import_ws = require("ws");
28
+ const DEFAULT_INACTIVITY_TIMEOUT = 300;
29
29
  const DEFAULT_VOICE = {
30
- id: "EXAVITQu4vr4xnSDxMaL",
30
+ id: "bIHbv24MWmeRgasZH58o",
31
31
  name: "Bella",
32
32
  category: "premade",
33
33
  settings: {
@@ -42,13 +42,13 @@ const AUTHORIZATION_HEADER = "xi-api-key";
42
42
  const defaultTTSOptions = {
43
43
  apiKey: process.env.ELEVEN_API_KEY,
44
44
  voice: DEFAULT_VOICE,
45
- modelID: "eleven_flash_v2_5",
45
+ modelID: "eleven_turbo_v2_5",
46
46
  baseURL: API_BASE_URL_V1,
47
47
  encoding: "pcm_22050",
48
- streamingLatency: 3,
49
48
  wordTokenizer: new import_agents.tokenize.basic.WordTokenizer(false),
50
- chunkLengthSchedule: [],
51
- enableSsmlParsing: false
49
+ enableSsmlParsing: false,
50
+ inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,
51
+ syncAlignment: true
52
52
  };
53
53
  class TTS extends import_agents.tts.TTS {
54
54
  #opts;
@@ -106,19 +106,24 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
106
106
  const params = {
107
107
  model_id: opts.modelID,
108
108
  output_format: opts.encoding,
109
- optimize_streaming_latency: `${opts.streamingLatency}`,
110
109
  enable_ssml_parsing: `${opts.enableSsmlParsing}`,
111
- ...opts.languageCode && { language_code: opts.languageCode }
110
+ sync_alignment: `${opts.syncAlignment}`,
111
+ ...opts.autoMode !== void 0 && { auto_mode: `${opts.autoMode}` },
112
+ ...opts.languageCode && { language_code: opts.languageCode },
113
+ ...opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` },
114
+ ...opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }
112
115
  };
113
116
  Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
114
117
  this.streamURL.protocol = this.streamURL.protocol.replace("http", "ws");
115
- this.#run();
116
118
  }
117
- async #run() {
119
+ async run() {
118
120
  const segments = new import_agents.AsyncIterableQueue();
119
121
  const tokenizeInput = async () => {
120
122
  let stream = null;
121
123
  for await (const text of this.input) {
124
+ if (this.abortController.signal.aborted) {
125
+ break;
126
+ }
122
127
  if (text === SynthesizeStream.FLUSH_SENTINEL) {
123
128
  stream == null ? void 0 : stream.endInput();
124
129
  stream = null;
@@ -134,12 +139,14 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
134
139
  };
135
140
  const runStream = async () => {
136
141
  for await (const stream of segments) {
142
+ if (this.abortController.signal.aborted) {
143
+ break;
144
+ }
137
145
  await this.#runWS(stream);
138
146
  this.queue.put(SynthesizeStream.END_OF_STREAM);
139
147
  }
140
148
  };
141
149
  await Promise.all([tokenizeInput(), runStream()]);
142
- this.close();
143
150
  }
144
151
  async #runWS(stream, maxRetry = 3) {
145
152
  let retries = 0;
@@ -148,6 +155,10 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
148
155
  ws = new import_ws.WebSocket(this.streamURL, {
149
156
  headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey }
150
157
  });
158
+ ws.on("error", (error) => {
159
+ this.abortController.abort();
160
+ this.#logger.error({ error }, "Error connecting to ElevenLabs");
161
+ });
151
162
  try {
152
163
  await new Promise((resolve, reject) => {
153
164
  ws.on("open", resolve);
@@ -167,20 +178,26 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
167
178
  await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
168
179
  }
169
180
  }
170
- const requestId = (0, import_node_crypto.randomUUID)();
171
- const segmentId = (0, import_node_crypto.randomUUID)();
181
+ const requestId = (0, import_agents.shortuuid)();
182
+ const segmentId = (0, import_agents.shortuuid)();
172
183
  ws.send(
173
184
  JSON.stringify({
174
185
  text: " ",
175
186
  voice_settings: this.#opts.voice.settings,
176
- try_trigger_generation: true,
177
- chunk_length_schedule: this.#opts.chunkLengthSchedule
187
+ ...this.#opts.chunkLengthSchedule && {
188
+ generation_config: {
189
+ chunk_length_schedule: this.#opts.chunkLengthSchedule
190
+ }
191
+ }
178
192
  })
179
193
  );
180
194
  let eosSent = false;
181
195
  const sendTask = async () => {
182
196
  let xmlContent = [];
183
197
  for await (const data of stream) {
198
+ if (this.abortController.signal.aborted) {
199
+ break;
200
+ }
184
201
  let text = data.token;
185
202
  if (this.#opts.enableSsmlParsing && text.startsWith("<phoneme") || xmlContent.length) {
186
203
  xmlContent.push(text);
@@ -191,7 +208,7 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
191
208
  continue;
192
209
  }
193
210
  }
194
- ws.send(JSON.stringify({ text: text + " ", try_trigger_generation: false }));
211
+ ws.send(JSON.stringify({ text: text + " " }));
195
212
  }
196
213
  if (xmlContent.length) {
197
214
  this.#logger.warn("ElevenLabs stream ended with incomplete XML content");
@@ -207,8 +224,9 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
207
224
  }
208
225
  };
209
226
  const listenTask = async () => {
227
+ let finalReceived = false;
210
228
  const bstream = new import_agents.AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);
211
- while (!this.closed) {
229
+ while (!this.closed && !this.abortController.signal.aborted) {
212
230
  try {
213
231
  await new Promise((resolve, reject) => {
214
232
  ws.removeAllListeners();
@@ -217,30 +235,36 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
217
235
  if (!eosSent) {
218
236
  this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
219
237
  }
220
- reject();
238
+ if (!finalReceived) {
239
+ reject(new Error("WebSocket closed"));
240
+ }
221
241
  });
222
242
  }).then((msg) => {
223
243
  const json = JSON.parse(msg.toString());
224
- if ("audio" in json) {
244
+ if ("audio" in json && json.audio !== null) {
225
245
  const data = new Int8Array(Buffer.from(json.audio, "base64"));
226
246
  for (const frame of bstream.write(data)) {
227
247
  sendLastFrame(segmentId, false);
228
248
  lastFrame = frame;
229
249
  }
230
- } else if ("isFinal" in json) {
250
+ } else if (json.isFinal) {
251
+ finalReceived = true;
231
252
  for (const frame of bstream.flush()) {
232
253
  sendLastFrame(segmentId, false);
233
254
  lastFrame = frame;
234
255
  }
235
256
  sendLastFrame(segmentId, true);
236
257
  this.queue.put(SynthesizeStream.END_OF_STREAM);
237
- if (segmentId === requestId) {
258
+ if (segmentId === requestId || this.abortController.signal.aborted) {
238
259
  ws.close();
239
260
  return;
240
261
  }
241
262
  }
242
263
  });
243
- } catch {
264
+ } catch (err) {
265
+ if (err instanceof Error && !err.message.includes("WebSocket closed")) {
266
+ this.#logger.error({ err }, "Error in listenTask from ElevenLabs WebSocket");
267
+ }
244
268
  break;
245
269
  }
246
270
  }
package/dist/tts.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AsyncIterableQueue, AudioByteStream, log, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'EXAVITQu4vr4xnSDxMaL',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule: number[];\n enableSsmlParsing: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_flash_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n streamingLatency: 3,\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n chunkLengthSchedule: [],\n enableSsmlParsing: false,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n optimize_streaming_latency: `${opts.streamingLatency}`,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n ...(opts.languageCode && { language_code: opts.languageCode }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n\n this.#run();\n }\n\n async #run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n this.close();\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = randomUUID();\n const segmentId = randomUUID();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n try_trigger_generation: true,\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n reject();\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n if ('audio' in json) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('isFinal' in json) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId) {\n ws.close();\n return;\n }\n }\n });\n } catch {\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAAwE;AAExE,yBAA2B;AAC3B,sBAAoB;AACpB,gBAAwC;AAiBxC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAe7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,kBAAkB;AAAA,EAClB,eAAe,IAAI,uBAAS,MAAM,cAAc,KAAK;AAAA,EACrD,qBAAqB,CAAC;AAAA,EACtB,mBAAmB;AACrB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,oBAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,4BAA4B,GAAG,KAAK,gBAAgB;AAAA,MACpD,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,IAC9D;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAEtE,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,WAAW,IAAI,iCAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAChD,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,oBAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,gBAAY,+BAAW;AAC7B,UAAM,gBAAY,+BAAW;AAE7B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,wBAAwB;AAAA,QACxB,uBAAuB,KAAK,MAAM;AAAA,MACpC,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,KAAK,wBAAwB,MAAM,CAAC,CAAC;AAAA,MAC7E;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAEA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,YAAM,UAAU,IAAI,8BAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,qBAAO;AAAA,YACT,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,gBAAI,WAAW,MAAM;AACnB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,CAAC;AAC5D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,aAAa,MAAM;AAC5B,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,kBAAI,cAAc,WAAW;AAC3B,mBAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AACN;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n AsyncIterableQueue,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst DEFAULT_INACTIVITY_TIMEOUT = 300;\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'bIHbv24MWmeRgasZH58o',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency?: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule?: number[];\n enableSsmlParsing: boolean;\n inactivityTimeout: number;\n syncAlignment: boolean;\n autoMode?: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_turbo_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n enableSsmlParsing: false,\n inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,\n syncAlignment: true,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n sync_alignment: `${opts.syncAlignment}`,\n ...(opts.autoMode !== undefined && { auto_mode: `${opts.autoMode}` }),\n ...(opts.languageCode && { language_code: opts.languageCode }),\n ...(opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` }),\n ...(opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n }\n\n protected async run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (this.abortController.signal.aborted) {\n break;\n }\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n if (this.abortController.signal.aborted) {\n break;\n }\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n ws.on('error', (error) => {\n this.abortController.abort();\n this.#logger.error({ error }, 'Error connecting to ElevenLabs');\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n ...(this.#opts.chunkLengthSchedule && {\n generation_config: {\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n },\n }),\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n if (this.abortController.signal.aborted) {\n break;\n }\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ' })); // must always end with a space\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n // no more tokens, mark eos\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n let finalReceived = false;\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed && !this.abortController.signal.aborted) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n }\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n // remove the \"audio\" field from the json object when printing\n if ('audio' in json && json.audio !== null) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if (json.isFinal) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId || this.abortController.signal.aborted) {\n ws.close();\n return;\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in listenTask from ElevenLabs WebSocket');\n }\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AAEP,sBAAoB;AACpB,gBAAwC;AAGxC,MAAM,6BAA6B;AAgBnC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAkB7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,eAAe,IAAI,uBAAS,MAAM,cAAc,KAAK;AAAA,EACrD,mBAAmB;AAAA,EACnB,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,oBAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,gBAAgB,GAAG,KAAK,aAAa;AAAA,MACrC,GAAI,KAAK,aAAa,UAAa,EAAE,WAAW,GAAG,KAAK,QAAQ,GAAG;AAAA,MACnE,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,MAC5D,GAAI,KAAK,qBAAqB,EAAE,oBAAoB,GAAG,KAAK,iBAAiB,GAAG;AAAA,MAChF,GAAI,KAAK,oBAAoB,EAAE,4BAA4B,GAAG,KAAK,gBAAgB,GAAG;AAAA,IACxF;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAAA,EACxE;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW,IAAI,iCAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAAA,EAClD;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,oBAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,SAAG,GAAG,SAAS,CAAC,UAAU;AACxB,aAAK,gBAAgB,MAAM;AAC3B,aAAK,QAAQ,MAAM,EAAE,MAAM,GAAG,gCAAgC;AAAA,MAChE,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,gBAAY,yBAAU;AAC5B,UAAM,gBAAY,yBAAU;AAE5B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,GAAI,KAAK,MAAM,uBAAuB;AAAA,UACpC,mBAAmB;AAAA,YACjB,uBAAuB,KAAK,MAAM;AAAA,UACpC;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,IAAI,CAAC,CAAC;AAAA,MAC9C;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAGA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,UAAI,gBAAgB;AACpB,YAAM,UAAU,IAAI,8BAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AAEtC,gBAAI,WAAW,QAAQ,KAAK,UAAU,MAAM;AAC1C,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,CAAC;AAC5D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,KAAK,SAAS;AACvB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,kBAAI,cAAc,aAAa,KAAK,gBAAgB,OAAO,SAAS;AAClE,mBAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,+CAA+C;AAAA,UAC7E;AACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
package/dist/tts.d.cts CHANGED
@@ -21,10 +21,13 @@ export interface TTSOptions {
21
21
  languageCode?: string;
22
22
  baseURL: string;
23
23
  encoding: TTSEncoding;
24
- streamingLatency: number;
24
+ streamingLatency?: number;
25
25
  wordTokenizer: tokenize.WordTokenizer;
26
- chunkLengthSchedule: number[];
26
+ chunkLengthSchedule?: number[];
27
27
  enableSsmlParsing: boolean;
28
+ inactivityTimeout: number;
29
+ syncAlignment: boolean;
30
+ autoMode?: boolean;
28
31
  }
29
32
  export declare class TTS extends tts.TTS {
30
33
  #private;
@@ -39,6 +42,7 @@ export declare class SynthesizeStream extends tts.SynthesizeStream {
39
42
  label: string;
40
43
  readonly streamURL: URL;
41
44
  constructor(tts: TTS, opts: TTSOptions);
45
+ protected run(): Promise<void>;
42
46
  }
43
47
  export {};
44
48
  //# sourceMappingURL=tts.d.ts.map
package/dist/tts.d.ts CHANGED
@@ -21,10 +21,13 @@ export interface TTSOptions {
21
21
  languageCode?: string;
22
22
  baseURL: string;
23
23
  encoding: TTSEncoding;
24
- streamingLatency: number;
24
+ streamingLatency?: number;
25
25
  wordTokenizer: tokenize.WordTokenizer;
26
- chunkLengthSchedule: number[];
26
+ chunkLengthSchedule?: number[];
27
27
  enableSsmlParsing: boolean;
28
+ inactivityTimeout: number;
29
+ syncAlignment: boolean;
30
+ autoMode?: boolean;
28
31
  }
29
32
  export declare class TTS extends tts.TTS {
30
33
  #private;
@@ -39,6 +42,7 @@ export declare class SynthesizeStream extends tts.SynthesizeStream {
39
42
  label: string;
40
43
  readonly streamURL: URL;
41
44
  constructor(tts: TTS, opts: TTSOptions);
45
+ protected run(): Promise<void>;
42
46
  }
43
47
  export {};
44
48
  //# sourceMappingURL=tts.d.ts.map
package/dist/tts.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAGA,OAAO,EAA4C,QAAQ,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAG1F,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE1D,KAAK,KAAK,GAAG;IACX,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B,CAAC;AAEF,KAAK,aAAa,GAAG;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;CAC5B,CAAC;AAiBF,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,SAAS,GAAG,MAAM,CAAC;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,WAAW,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,aAAa,EAAE,QAAQ,CAAC,aAAa,CAAC;IACtC,mBAAmB,EAAE,MAAM,EAAE,CAAC;IAC9B,iBAAiB,EAAE,OAAO,CAAC;CAC5B;AAcD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAoB;gBAEb,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAiBpC,UAAU,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;IAuBpC,UAAU,IAAI,GAAG,CAAC,aAAa;IAI/B,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAGxD,KAAK,SAAiC;IACtC,QAAQ,CAAC,SAAS,EAAE,GAAG,CAAC;gBAEZ,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;CA6KvC"}
1
+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAGA,OAAO,EAKL,QAAQ,EACR,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI1D,KAAK,KAAK,GAAG;IACX,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B,CAAC;AAEF,KAAK,aAAa,GAAG;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;CAC5B,CAAC;AAiBF,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,SAAS,GAAG,MAAM,CAAC;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,WAAW,CAAC;IACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,QAAQ,CAAC,aAAa,CAAC;IACtC,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,iBAAiB,EAAE,OAAO,CAAC;IAC3B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAcD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAoB;gBAEb,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAiBpC,UAAU,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;IAuBpC,UAAU,IAAI,GAAG,CAAC,aAAa;IAI/B,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAGxD,KAAK,SAAiC;IACtC,QAAQ,CAAC,SAAS,EAAE,GAAG,CAAC;gBAEZ,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;cAuBtB,GAAG;CAiLpB"}
package/dist/tts.js CHANGED
@@ -1,9 +1,16 @@
1
- import { AsyncIterableQueue, AudioByteStream, log, tokenize, tts } from "@livekit/agents";
2
- import { randomUUID } from "node:crypto";
1
+ import {
2
+ AsyncIterableQueue,
3
+ AudioByteStream,
4
+ log,
5
+ shortuuid,
6
+ tokenize,
7
+ tts
8
+ } from "@livekit/agents";
3
9
  import { URL } from "node:url";
4
10
  import { WebSocket } from "ws";
11
+ const DEFAULT_INACTIVITY_TIMEOUT = 300;
5
12
  const DEFAULT_VOICE = {
6
- id: "EXAVITQu4vr4xnSDxMaL",
13
+ id: "bIHbv24MWmeRgasZH58o",
7
14
  name: "Bella",
8
15
  category: "premade",
9
16
  settings: {
@@ -18,13 +25,13 @@ const AUTHORIZATION_HEADER = "xi-api-key";
18
25
  const defaultTTSOptions = {
19
26
  apiKey: process.env.ELEVEN_API_KEY,
20
27
  voice: DEFAULT_VOICE,
21
- modelID: "eleven_flash_v2_5",
28
+ modelID: "eleven_turbo_v2_5",
22
29
  baseURL: API_BASE_URL_V1,
23
30
  encoding: "pcm_22050",
24
- streamingLatency: 3,
25
31
  wordTokenizer: new tokenize.basic.WordTokenizer(false),
26
- chunkLengthSchedule: [],
27
- enableSsmlParsing: false
32
+ enableSsmlParsing: false,
33
+ inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,
34
+ syncAlignment: true
28
35
  };
29
36
  class TTS extends tts.TTS {
30
37
  #opts;
@@ -82,19 +89,24 @@ class SynthesizeStream extends tts.SynthesizeStream {
82
89
  const params = {
83
90
  model_id: opts.modelID,
84
91
  output_format: opts.encoding,
85
- optimize_streaming_latency: `${opts.streamingLatency}`,
86
92
  enable_ssml_parsing: `${opts.enableSsmlParsing}`,
87
- ...opts.languageCode && { language_code: opts.languageCode }
93
+ sync_alignment: `${opts.syncAlignment}`,
94
+ ...opts.autoMode !== void 0 && { auto_mode: `${opts.autoMode}` },
95
+ ...opts.languageCode && { language_code: opts.languageCode },
96
+ ...opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` },
97
+ ...opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }
88
98
  };
89
99
  Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
90
100
  this.streamURL.protocol = this.streamURL.protocol.replace("http", "ws");
91
- this.#run();
92
101
  }
93
- async #run() {
102
+ async run() {
94
103
  const segments = new AsyncIterableQueue();
95
104
  const tokenizeInput = async () => {
96
105
  let stream = null;
97
106
  for await (const text of this.input) {
107
+ if (this.abortController.signal.aborted) {
108
+ break;
109
+ }
98
110
  if (text === SynthesizeStream.FLUSH_SENTINEL) {
99
111
  stream == null ? void 0 : stream.endInput();
100
112
  stream = null;
@@ -110,12 +122,14 @@ class SynthesizeStream extends tts.SynthesizeStream {
110
122
  };
111
123
  const runStream = async () => {
112
124
  for await (const stream of segments) {
125
+ if (this.abortController.signal.aborted) {
126
+ break;
127
+ }
113
128
  await this.#runWS(stream);
114
129
  this.queue.put(SynthesizeStream.END_OF_STREAM);
115
130
  }
116
131
  };
117
132
  await Promise.all([tokenizeInput(), runStream()]);
118
- this.close();
119
133
  }
120
134
  async #runWS(stream, maxRetry = 3) {
121
135
  let retries = 0;
@@ -124,6 +138,10 @@ class SynthesizeStream extends tts.SynthesizeStream {
124
138
  ws = new WebSocket(this.streamURL, {
125
139
  headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey }
126
140
  });
141
+ ws.on("error", (error) => {
142
+ this.abortController.abort();
143
+ this.#logger.error({ error }, "Error connecting to ElevenLabs");
144
+ });
127
145
  try {
128
146
  await new Promise((resolve, reject) => {
129
147
  ws.on("open", resolve);
@@ -143,20 +161,26 @@ class SynthesizeStream extends tts.SynthesizeStream {
143
161
  await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
144
162
  }
145
163
  }
146
- const requestId = randomUUID();
147
- const segmentId = randomUUID();
164
+ const requestId = shortuuid();
165
+ const segmentId = shortuuid();
148
166
  ws.send(
149
167
  JSON.stringify({
150
168
  text: " ",
151
169
  voice_settings: this.#opts.voice.settings,
152
- try_trigger_generation: true,
153
- chunk_length_schedule: this.#opts.chunkLengthSchedule
170
+ ...this.#opts.chunkLengthSchedule && {
171
+ generation_config: {
172
+ chunk_length_schedule: this.#opts.chunkLengthSchedule
173
+ }
174
+ }
154
175
  })
155
176
  );
156
177
  let eosSent = false;
157
178
  const sendTask = async () => {
158
179
  let xmlContent = [];
159
180
  for await (const data of stream) {
181
+ if (this.abortController.signal.aborted) {
182
+ break;
183
+ }
160
184
  let text = data.token;
161
185
  if (this.#opts.enableSsmlParsing && text.startsWith("<phoneme") || xmlContent.length) {
162
186
  xmlContent.push(text);
@@ -167,7 +191,7 @@ class SynthesizeStream extends tts.SynthesizeStream {
167
191
  continue;
168
192
  }
169
193
  }
170
- ws.send(JSON.stringify({ text: text + " ", try_trigger_generation: false }));
194
+ ws.send(JSON.stringify({ text: text + " " }));
171
195
  }
172
196
  if (xmlContent.length) {
173
197
  this.#logger.warn("ElevenLabs stream ended with incomplete XML content");
@@ -183,8 +207,9 @@ class SynthesizeStream extends tts.SynthesizeStream {
183
207
  }
184
208
  };
185
209
  const listenTask = async () => {
210
+ let finalReceived = false;
186
211
  const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);
187
- while (!this.closed) {
212
+ while (!this.closed && !this.abortController.signal.aborted) {
188
213
  try {
189
214
  await new Promise((resolve, reject) => {
190
215
  ws.removeAllListeners();
@@ -193,30 +218,36 @@ class SynthesizeStream extends tts.SynthesizeStream {
193
218
  if (!eosSent) {
194
219
  this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
195
220
  }
196
- reject();
221
+ if (!finalReceived) {
222
+ reject(new Error("WebSocket closed"));
223
+ }
197
224
  });
198
225
  }).then((msg) => {
199
226
  const json = JSON.parse(msg.toString());
200
- if ("audio" in json) {
227
+ if ("audio" in json && json.audio !== null) {
201
228
  const data = new Int8Array(Buffer.from(json.audio, "base64"));
202
229
  for (const frame of bstream.write(data)) {
203
230
  sendLastFrame(segmentId, false);
204
231
  lastFrame = frame;
205
232
  }
206
- } else if ("isFinal" in json) {
233
+ } else if (json.isFinal) {
234
+ finalReceived = true;
207
235
  for (const frame of bstream.flush()) {
208
236
  sendLastFrame(segmentId, false);
209
237
  lastFrame = frame;
210
238
  }
211
239
  sendLastFrame(segmentId, true);
212
240
  this.queue.put(SynthesizeStream.END_OF_STREAM);
213
- if (segmentId === requestId) {
241
+ if (segmentId === requestId || this.abortController.signal.aborted) {
214
242
  ws.close();
215
243
  return;
216
244
  }
217
245
  }
218
246
  });
219
- } catch {
247
+ } catch (err) {
248
+ if (err instanceof Error && !err.message.includes("WebSocket closed")) {
249
+ this.#logger.error({ err }, "Error in listenTask from ElevenLabs WebSocket");
250
+ }
220
251
  break;
221
252
  }
222
253
  }
package/dist/tts.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AsyncIterableQueue, AudioByteStream, log, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'EXAVITQu4vr4xnSDxMaL',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule: number[];\n enableSsmlParsing: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_flash_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n streamingLatency: 3,\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n chunkLengthSchedule: [],\n enableSsmlParsing: false,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n optimize_streaming_latency: `${opts.streamingLatency}`,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n ...(opts.languageCode && { language_code: opts.languageCode }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n\n this.#run();\n }\n\n async #run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n this.close();\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = randomUUID();\n const segmentId = randomUUID();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n try_trigger_generation: true,\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n reject();\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n if ('audio' in json) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('isFinal' in json) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId) {\n ws.close();\n return;\n }\n }\n });\n } catch {\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":"AAGA,SAAS,oBAAoB,iBAAiB,KAAK,UAAU,WAAW;AAExE,SAAS,kBAAkB;AAC3B,SAAS,WAAW;AACpB,SAAuB,iBAAiB;AAiBxC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAe7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,kBAAkB;AAAA,EAClB,eAAe,IAAI,SAAS,MAAM,cAAc,KAAK;AAAA,EACrD,qBAAqB,CAAC;AAAA,EACtB,mBAAmB;AACrB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,IAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,4BAA4B,GAAG,KAAK,gBAAgB;AAAA,MACpD,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,IAC9D;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAEtE,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,WAAW,IAAI,mBAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAChD,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,UAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,YAAY,WAAW;AAC7B,UAAM,YAAY,WAAW;AAE7B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,wBAAwB;AAAA,QACxB,uBAAuB,KAAK,MAAM;AAAA,MACpC,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,KAAK,wBAAwB,MAAM,CAAC,CAAC;AAAA,MAC7E;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAEA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,YAAM,UAAU,IAAI,gBAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,qBAAO;AAAA,YACT,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,gBAAI,WAAW,MAAM;AACnB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,CAAC;AAC5D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,aAAa,MAAM;AAC5B,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,kBAAI,cAAc,WAAW;AAC3B,mBAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AACN;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n AsyncIterableQueue,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst DEFAULT_INACTIVITY_TIMEOUT = 300;\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'bIHbv24MWmeRgasZH58o',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency?: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule?: number[];\n enableSsmlParsing: boolean;\n inactivityTimeout: number;\n syncAlignment: boolean;\n autoMode?: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_turbo_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n enableSsmlParsing: false,\n inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,\n syncAlignment: true,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n sync_alignment: `${opts.syncAlignment}`,\n ...(opts.autoMode !== undefined && { auto_mode: `${opts.autoMode}` }),\n ...(opts.languageCode && { language_code: opts.languageCode }),\n ...(opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` }),\n ...(opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n }\n\n protected async run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (this.abortController.signal.aborted) {\n break;\n }\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n if (this.abortController.signal.aborted) {\n break;\n }\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n ws.on('error', (error) => {\n this.abortController.abort();\n this.#logger.error({ error }, 'Error connecting to ElevenLabs');\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n ...(this.#opts.chunkLengthSchedule && {\n generation_config: {\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n },\n }),\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n if (this.abortController.signal.aborted) {\n break;\n }\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ' })); // must always end with a space\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n // no more tokens, mark eos\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n let finalReceived = false;\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed && !this.abortController.signal.aborted) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n }\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n // remove the \"audio\" field from the json object when printing\n if ('audio' in json && json.audio !== null) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if (json.isFinal) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId || this.abortController.signal.aborted) {\n ws.close();\n return;\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in listenTask from ElevenLabs WebSocket');\n }\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":"AAGA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,WAAW;AACpB,SAAuB,iBAAiB;AAGxC,MAAM,6BAA6B;AAgBnC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAkB7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,eAAe,IAAI,SAAS,MAAM,cAAc,KAAK;AAAA,EACrD,mBAAmB;AAAA,EACnB,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,IAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,gBAAgB,GAAG,KAAK,aAAa;AAAA,MACrC,GAAI,KAAK,aAAa,UAAa,EAAE,WAAW,GAAG,KAAK,QAAQ,GAAG;AAAA,MACnE,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,MAC5D,GAAI,KAAK,qBAAqB,EAAE,oBAAoB,GAAG,KAAK,iBAAiB,GAAG;AAAA,MAChF,GAAI,KAAK,oBAAoB,EAAE,4BAA4B,GAAG,KAAK,gBAAgB,GAAG;AAAA,IACxF;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAAA,EACxE;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW,IAAI,mBAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAAA,EAClD;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,UAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,SAAG,GAAG,SAAS,CAAC,UAAU;AACxB,aAAK,gBAAgB,MAAM;AAC3B,aAAK,QAAQ,MAAM,EAAE,MAAM,GAAG,gCAAgC;AAAA,MAChE,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,YAAY,UAAU;AAC5B,UAAM,YAAY,UAAU;AAE5B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,GAAI,KAAK,MAAM,uBAAuB;AAAA,UACpC,mBAAmB;AAAA,YACjB,uBAAuB,KAAK,MAAM;AAAA,UACpC;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,IAAI,CAAC,CAAC;AAAA,MAC9C;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAGA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,UAAI,gBAAgB;AACpB,YAAM,UAAU,IAAI,gBAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AAEtC,gBAAI,WAAW,QAAQ,KAAK,UAAU,MAAM;AAC1C,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,CAAC;AAC5D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,KAAK,SAAS;AACvB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,kBAAI,cAAc,aAAa,KAAK,gBAAgB,OAAO,SAAS;AAClE,mBAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,+CAA+C;AAAA,UAC7E;AACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-elevenlabs",
3
- "version": "0.6.3",
3
+ "version": "1.0.0-next.1",
4
4
  "description": "ElevenLabs plugin for LiveKit Node Agents",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -25,10 +25,10 @@
25
25
  "README.md"
26
26
  ],
27
27
  "devDependencies": {
28
- "@livekit/agents": "^x",
29
- "@livekit/agents-plugin-openai": "^x",
30
- "@livekit/agents-plugins-test": "^x",
31
- "@livekit/rtc-node": "^0.13.11",
28
+ "@livekit/agents": "^1.0.0-next.1",
29
+ "@livekit/agents-plugin-openai": "^1.0.0-next.1",
30
+ "@livekit/agents-plugins-test": "^1.0.0-next.1",
31
+ "@livekit/rtc-node": "^0.13.12",
32
32
  "@microsoft/api-extractor": "^7.35.0",
33
33
  "@types/ws": "^8.5.10",
34
34
  "tsup": "^8.3.5",
@@ -38,8 +38,8 @@
38
38
  "ws": "^8.16.0"
39
39
  },
40
40
  "peerDependencies": {
41
- "@livekit/rtc-node": "^0.13.11",
42
- "@livekit/agents": "^0.7.7x"
41
+ "@livekit/rtc-node": "^0.13.12",
42
+ "@livekit/agents": "^1.0.0-next.11.0.0-next.1"
43
43
  },
44
44
  "scripts": {
45
45
  "build": "tsup --onSuccess \"pnpm build:types\"",
package/src/index.ts CHANGED
@@ -1,5 +1,18 @@
1
- // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
+ import { Plugin } from '@livekit/agents';
4
5
 
5
6
  export * from './tts.js';
7
+
8
+ class ElevenLabsPlugin extends Plugin {
9
+ constructor() {
10
+ super({
11
+ title: 'elevenlabs',
12
+ version: '0.6.2',
13
+ package: '@livekit/agents-plugin-elevenlabs',
14
+ });
15
+ }
16
+ }
17
+
18
+ Plugin.registerPlugin(new ElevenLabsPlugin());
package/src/models.ts CHANGED
@@ -9,7 +9,8 @@ export type TTSModels =
9
9
  | 'eleven_flash_v2'
10
10
  | 'eleven_flash_v2_5'
11
11
  | 'eleven_turbo_v2'
12
- | 'eleven_turbo_v2_5';
12
+ | 'eleven_turbo_v2_5'
13
+ | 'eleven_v3';
13
14
 
14
15
  export type TTSEncoding =
15
16
  // XXX(nbsp): MP3 is not yet supported
package/src/tts.ts CHANGED
@@ -1,13 +1,21 @@
1
1
  // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- import { AsyncIterableQueue, AudioByteStream, log, tokenize, tts } from '@livekit/agents';
4
+ import {
5
+ AsyncIterableQueue,
6
+ AudioByteStream,
7
+ log,
8
+ shortuuid,
9
+ tokenize,
10
+ tts,
11
+ } from '@livekit/agents';
5
12
  import type { AudioFrame } from '@livekit/rtc-node';
6
- import { randomUUID } from 'node:crypto';
7
13
  import { URL } from 'node:url';
8
14
  import { type RawData, WebSocket } from 'ws';
9
15
  import type { TTSEncoding, TTSModels } from './models.js';
10
16
 
17
+ const DEFAULT_INACTIVITY_TIMEOUT = 300;
18
+
11
19
  type Voice = {
12
20
  id: string;
13
21
  name: string;
@@ -23,7 +31,7 @@ type VoiceSettings = {
23
31
  };
24
32
 
25
33
  const DEFAULT_VOICE: Voice = {
26
- id: 'EXAVITQu4vr4xnSDxMaL',
34
+ id: 'bIHbv24MWmeRgasZH58o',
27
35
  name: 'Bella',
28
36
  category: 'premade',
29
37
  settings: {
@@ -44,22 +52,25 @@ export interface TTSOptions {
44
52
  languageCode?: string;
45
53
  baseURL: string;
46
54
  encoding: TTSEncoding;
47
- streamingLatency: number;
55
+ streamingLatency?: number;
48
56
  wordTokenizer: tokenize.WordTokenizer;
49
- chunkLengthSchedule: number[];
57
+ chunkLengthSchedule?: number[];
50
58
  enableSsmlParsing: boolean;
59
+ inactivityTimeout: number;
60
+ syncAlignment: boolean;
61
+ autoMode?: boolean;
51
62
  }
52
63
 
53
64
  const defaultTTSOptions: TTSOptions = {
54
65
  apiKey: process.env.ELEVEN_API_KEY,
55
66
  voice: DEFAULT_VOICE,
56
- modelID: 'eleven_flash_v2_5',
67
+ modelID: 'eleven_turbo_v2_5',
57
68
  baseURL: API_BASE_URL_V1,
58
69
  encoding: 'pcm_22050',
59
- streamingLatency: 3,
60
70
  wordTokenizer: new tokenize.basic.WordTokenizer(false),
61
- chunkLengthSchedule: [],
62
71
  enableSsmlParsing: false,
72
+ inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,
73
+ syncAlignment: true,
63
74
  };
64
75
 
65
76
  export class TTS extends tts.TTS {
@@ -133,22 +144,26 @@ export class SynthesizeStream extends tts.SynthesizeStream {
133
144
  const params = {
134
145
  model_id: opts.modelID,
135
146
  output_format: opts.encoding,
136
- optimize_streaming_latency: `${opts.streamingLatency}`,
137
147
  enable_ssml_parsing: `${opts.enableSsmlParsing}`,
148
+ sync_alignment: `${opts.syncAlignment}`,
149
+ ...(opts.autoMode !== undefined && { auto_mode: `${opts.autoMode}` }),
138
150
  ...(opts.languageCode && { language_code: opts.languageCode }),
151
+ ...(opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` }),
152
+ ...(opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }),
139
153
  };
140
154
  Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
141
155
  this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');
142
-
143
- this.#run();
144
156
  }
145
157
 
146
- async #run() {
158
+ protected async run() {
147
159
  const segments = new AsyncIterableQueue<tokenize.WordStream>();
148
160
 
149
161
  const tokenizeInput = async () => {
150
162
  let stream: tokenize.WordStream | null = null;
151
163
  for await (const text of this.input) {
164
+ if (this.abortController.signal.aborted) {
165
+ break;
166
+ }
152
167
  if (text === SynthesizeStream.FLUSH_SENTINEL) {
153
168
  stream?.endInput();
154
169
  stream = null;
@@ -165,13 +180,15 @@ export class SynthesizeStream extends tts.SynthesizeStream {
165
180
 
166
181
  const runStream = async () => {
167
182
  for await (const stream of segments) {
183
+ if (this.abortController.signal.aborted) {
184
+ break;
185
+ }
168
186
  await this.#runWS(stream);
169
187
  this.queue.put(SynthesizeStream.END_OF_STREAM);
170
188
  }
171
189
  };
172
190
 
173
191
  await Promise.all([tokenizeInput(), runStream()]);
174
- this.close();
175
192
  }
176
193
 
177
194
  async #runWS(stream: tokenize.WordStream, maxRetry = 3) {
@@ -182,6 +199,11 @@ export class SynthesizeStream extends tts.SynthesizeStream {
182
199
  headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },
183
200
  });
184
201
 
202
+ ws.on('error', (error) => {
203
+ this.abortController.abort();
204
+ this.#logger.error({ error }, 'Error connecting to ElevenLabs');
205
+ });
206
+
185
207
  try {
186
208
  await new Promise((resolve, reject) => {
187
209
  ws.on('open', resolve);
@@ -204,15 +226,18 @@ export class SynthesizeStream extends tts.SynthesizeStream {
204
226
  }
205
227
  }
206
228
 
207
- const requestId = randomUUID();
208
- const segmentId = randomUUID();
229
+ const requestId = shortuuid();
230
+ const segmentId = shortuuid();
209
231
 
210
232
  ws.send(
211
233
  JSON.stringify({
212
234
  text: ' ',
213
235
  voice_settings: this.#opts.voice.settings,
214
- try_trigger_generation: true,
215
- chunk_length_schedule: this.#opts.chunkLengthSchedule,
236
+ ...(this.#opts.chunkLengthSchedule && {
237
+ generation_config: {
238
+ chunk_length_schedule: this.#opts.chunkLengthSchedule,
239
+ },
240
+ }),
216
241
  }),
217
242
  );
218
243
  let eosSent = false;
@@ -220,6 +245,9 @@ export class SynthesizeStream extends tts.SynthesizeStream {
220
245
  const sendTask = async () => {
221
246
  let xmlContent: string[] = [];
222
247
  for await (const data of stream) {
248
+ if (this.abortController.signal.aborted) {
249
+ break;
250
+ }
223
251
  let text = data.token;
224
252
 
225
253
  if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {
@@ -232,13 +260,14 @@ export class SynthesizeStream extends tts.SynthesizeStream {
232
260
  }
233
261
  }
234
262
 
235
- ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));
263
+ ws.send(JSON.stringify({ text: text + ' ' })); // must always end with a space
236
264
  }
237
265
 
238
266
  if (xmlContent.length) {
239
267
  this.#logger.warn('ElevenLabs stream ended with incomplete XML content');
240
268
  }
241
269
 
270
+ // no more tokens, mark eos
242
271
  ws.send(JSON.stringify({ text: '' }));
243
272
  eosSent = true;
244
273
  };
@@ -252,8 +281,9 @@ export class SynthesizeStream extends tts.SynthesizeStream {
252
281
  };
253
282
 
254
283
  const listenTask = async () => {
284
+ let finalReceived = false;
255
285
  const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);
256
- while (!this.closed) {
286
+ while (!this.closed && !this.abortController.signal.aborted) {
257
287
  try {
258
288
  await new Promise<RawData>((resolve, reject) => {
259
289
  ws.removeAllListeners();
@@ -262,17 +292,21 @@ export class SynthesizeStream extends tts.SynthesizeStream {
262
292
  if (!eosSent) {
263
293
  this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
264
294
  }
265
- reject();
295
+ if (!finalReceived) {
296
+ reject(new Error('WebSocket closed'));
297
+ }
266
298
  });
267
299
  }).then((msg) => {
268
300
  const json = JSON.parse(msg.toString());
269
- if ('audio' in json) {
301
+ // remove the "audio" field from the json object when printing
302
+ if ('audio' in json && json.audio !== null) {
270
303
  const data = new Int8Array(Buffer.from(json.audio, 'base64'));
271
304
  for (const frame of bstream.write(data)) {
272
305
  sendLastFrame(segmentId, false);
273
306
  lastFrame = frame;
274
307
  }
275
- } else if ('isFinal' in json) {
308
+ } else if (json.isFinal) {
309
+ finalReceived = true;
276
310
  for (const frame of bstream.flush()) {
277
311
  sendLastFrame(segmentId, false);
278
312
  lastFrame = frame;
@@ -280,13 +314,17 @@ export class SynthesizeStream extends tts.SynthesizeStream {
280
314
  sendLastFrame(segmentId, true);
281
315
  this.queue.put(SynthesizeStream.END_OF_STREAM);
282
316
 
283
- if (segmentId === requestId) {
317
+ if (segmentId === requestId || this.abortController.signal.aborted) {
284
318
  ws.close();
285
319
  return;
286
320
  }
287
321
  }
288
322
  });
289
- } catch {
323
+ } catch (err) {
324
+ // skip log error for normal websocket close
325
+ if (err instanceof Error && !err.message.includes('WebSocket closed')) {
326
+ this.#logger.error({ err }, 'Error in listenTask from ElevenLabs WebSocket');
327
+ }
290
328
  break;
291
329
  }
292
330
  }