@livekit/agents-plugin-elevenlabs 1.0.17 → 1.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/tts.cjs CHANGED
@@ -39,13 +39,12 @@ const DEFAULT_VOICE = {
39
39
  };
40
40
  const API_BASE_URL_V1 = "https://api.elevenlabs.io/v1/";
41
41
  const AUTHORIZATION_HEADER = "xi-api-key";
42
- const defaultTTSOptions = {
42
+ const defaultTTSOptionsBase = {
43
43
  apiKey: process.env.ELEVEN_API_KEY,
44
44
  voice: DEFAULT_VOICE,
45
45
  modelID: "eleven_turbo_v2_5",
46
46
  baseURL: API_BASE_URL_V1,
47
47
  encoding: "pcm_22050",
48
- wordTokenizer: new import_agents.tokenize.basic.WordTokenizer(false),
49
48
  enableSsmlParsing: false,
50
49
  inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,
51
50
  syncAlignment: true
@@ -54,12 +53,23 @@ class TTS extends import_agents.tts.TTS {
54
53
  #opts;
55
54
  label = "elevenlabs.TTS";
56
55
  constructor(opts = {}) {
57
- super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {
56
+ super(sampleRateFromFormat(opts.encoding || defaultTTSOptionsBase.encoding), 1, {
58
57
  streaming: true
59
58
  });
59
+ const autoMode = opts.autoMode !== void 0 ? opts.autoMode : false;
60
+ let wordTokenizer = opts.wordTokenizer;
61
+ if (!wordTokenizer) {
62
+ wordTokenizer = autoMode ? new import_agents.tokenize.basic.SentenceTokenizer() : new import_agents.tokenize.basic.WordTokenizer(false);
63
+ } else if (autoMode && !(wordTokenizer instanceof import_agents.tokenize.SentenceTokenizer)) {
64
+ (0, import_agents.log)().warn(
65
+ "autoMode is enabled, it expects full sentences or phrases. Please provide a SentenceTokenizer instead of a WordTokenizer."
66
+ );
67
+ }
60
68
  this.#opts = {
61
- ...defaultTTSOptions,
62
- ...opts
69
+ ...defaultTTSOptionsBase,
70
+ ...opts,
71
+ autoMode,
72
+ wordTokenizer
63
73
  };
64
74
  if (this.#opts.apiKey === void 0) {
65
75
  throw new Error(
@@ -180,17 +190,18 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
180
190
  }
181
191
  const requestId = (0, import_agents.shortuuid)();
182
192
  const segmentId = (0, import_agents.shortuuid)();
183
- ws.send(
184
- JSON.stringify({
185
- text: " ",
186
- voice_settings: this.#opts.voice.settings,
187
- ...this.#opts.chunkLengthSchedule && {
188
- generation_config: {
189
- chunk_length_schedule: this.#opts.chunkLengthSchedule
190
- }
193
+ const wsSend = (data) => {
194
+ ws.send(JSON.stringify(data));
195
+ };
196
+ wsSend({
197
+ text: " ",
198
+ voice_settings: this.#opts.voice.settings,
199
+ ...this.#opts.chunkLengthSchedule && {
200
+ generation_config: {
201
+ chunk_length_schedule: this.#opts.chunkLengthSchedule
191
202
  }
192
- })
193
- );
203
+ }
204
+ });
194
205
  let eosSent = false;
195
206
  const sendTask = async () => {
196
207
  let xmlContent = [];
@@ -208,12 +219,16 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
208
219
  continue;
209
220
  }
210
221
  }
211
- ws.send(JSON.stringify({ text: text + " " }));
222
+ wsSend({
223
+ text: text + " "
224
+ // must always end with a space
225
+ // ...(flushOnChunk && { flush: true }),
226
+ });
212
227
  }
213
228
  if (xmlContent.length) {
214
229
  this.#logger.warn("ElevenLabs stream ended with incomplete XML content");
215
230
  }
216
- ws.send(JSON.stringify({ text: "" }));
231
+ wsSend({ text: "" });
217
232
  eosSent = true;
218
233
  };
219
234
  let lastFrame;
package/dist/tts.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n AsyncIterableQueue,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst DEFAULT_INACTIVITY_TIMEOUT = 300;\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'bIHbv24MWmeRgasZH58o',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency?: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule?: number[];\n enableSsmlParsing: boolean;\n inactivityTimeout: number;\n syncAlignment: boolean;\n autoMode?: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_turbo_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n enableSsmlParsing: false,\n inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,\n syncAlignment: true,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n sync_alignment: `${opts.syncAlignment}`,\n ...(opts.autoMode !== undefined && { auto_mode: `${opts.autoMode}` }),\n ...(opts.languageCode && { language_code: opts.languageCode }),\n ...(opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` }),\n ...(opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n }\n\n protected async run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (this.abortController.signal.aborted) {\n break;\n }\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n if (this.abortController.signal.aborted) {\n break;\n }\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n ws.on('error', (error) => {\n this.abortController.abort();\n this.#logger.error({ error }, 'Error connecting to ElevenLabs');\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n ...(this.#opts.chunkLengthSchedule && {\n generation_config: {\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n },\n }),\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n if (this.abortController.signal.aborted) {\n break;\n }\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ' })); // must always end with a space\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n // no more tokens, mark eos\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n let finalReceived = false;\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed && !this.abortController.signal.aborted) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n }\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n // remove the \"audio\" field from the json object when printing\n if ('audio' in json && json.audio !== null) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if (json.isFinal) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId || this.abortController.signal.aborted) {\n ws.close();\n return;\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in listenTask from ElevenLabs WebSocket');\n }\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AAEP,sBAAoB;AACpB,gBAAwC;AAGxC,MAAM,6BAA6B;AAgBnC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAkB7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,eAAe,IAAI,uBAAS,MAAM,cAAc,KAAK;AAAA,EACrD,mBAAmB;AAAA,EACnB,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,oBAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,gBAAgB,GAAG,KAAK,aAAa;AAAA,MACrC,GAAI,KAAK,aAAa,UAAa,EAAE,WAAW,GAAG,KAAK,QAAQ,GAAG;AAAA,MACnE,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,MAC5D,GAAI,KAAK,qBAAqB,EAAE,oBAAoB,GAAG,KAAK,iBAAiB,GAAG;AAAA,MAChF,GAAI,KAAK,oBAAoB,EAAE,4BAA4B,GAAG,KAAK,gBAAgB,GAAG;AAAA,IACxF;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAAA,EACxE;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW,IAAI,iCAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAAA,EAClD;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,oBAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,SAAG,GAAG,SAAS,CAAC,UAAU;AACxB,aAAK,gBAAgB,MAAM;AAC3B,aAAK,QAAQ,MAAM,EAAE,MAAM,GAAG,gCAAgC;AAAA,MAChE,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,gBAAY,yBAAU;AAC5B,UAAM,gBAAY,yBAAU;AAE5B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,GAAI,KAAK,MAAM,uBAAuB;AAAA,UACpC,mBAAmB;AAAA,YACjB,uBAAuB,KAAK,MAAM;AAAA,UACpC;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,IAAI,CAAC,CAAC;AAAA,MAC9C;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAGA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,UAAI,gBAAgB;AACpB,YAAM,UAAU,IAAI,8BAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AAEtC,gBAAI,WAAW,QAAQ,KAAK,UAAU,MAAM;AAC1C,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,CAAC;AAC5D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,KAAK,SAAS;AACvB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,kBAAI,cAAc,aAAa,KAAK,gBAAgB,OAAO,SAAS;AAClE,mBAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,+CAA+C;AAAA,UAC7E;AACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n AsyncIterableQueue,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst DEFAULT_INACTIVITY_TIMEOUT = 300;\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'bIHbv24MWmeRgasZH58o',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency?: number;\n wordTokenizer: tokenize.WordTokenizer | tokenize.SentenceTokenizer;\n chunkLengthSchedule?: number[];\n enableSsmlParsing: boolean;\n inactivityTimeout: number;\n syncAlignment: boolean;\n autoMode?: boolean;\n}\n\nconst defaultTTSOptionsBase = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_turbo_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050' as TTSEncoding,\n enableSsmlParsing: false,\n inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,\n syncAlignment: true,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptionsBase.encoding), 1, {\n streaming: true,\n });\n\n // Set autoMode to true by default if not provided is Python behavior,\n // but to make it non-breaking, we keep false as default in typescript\n const autoMode = opts.autoMode !== undefined ? opts.autoMode : false;\n\n // Set default tokenizer based on autoMode if not provided\n let wordTokenizer = opts.wordTokenizer;\n if (!wordTokenizer) {\n wordTokenizer = autoMode\n ? new tokenize.basic.SentenceTokenizer()\n : new tokenize.basic.WordTokenizer(false);\n } else if (autoMode && !(wordTokenizer instanceof tokenize.SentenceTokenizer)) {\n // Warn if autoMode is enabled but a WordTokenizer was provided\n log().warn(\n 'autoMode is enabled, it expects full sentences or phrases. ' +\n 'Please provide a SentenceTokenizer instead of a WordTokenizer.',\n );\n }\n\n this.#opts = {\n ...defaultTTSOptionsBase,\n ...opts,\n autoMode,\n wordTokenizer,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n sync_alignment: `${opts.syncAlignment}`,\n ...(opts.autoMode !== undefined && { auto_mode: `${opts.autoMode}` }),\n ...(opts.languageCode && { language_code: opts.languageCode }),\n ...(opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` }),\n ...(opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n }\n\n protected async run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream | tokenize.SentenceStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | tokenize.SentenceStream | null = null;\n for await (const text of this.input) {\n if (this.abortController.signal.aborted) {\n break;\n }\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n if (this.abortController.signal.aborted) {\n break;\n }\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n }\n\n async #runWS(stream: tokenize.WordStream | tokenize.SentenceStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n ws.on('error', (error) => {\n this.abortController.abort();\n this.#logger.error({ error }, 'Error connecting to ElevenLabs');\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n // simple helper to make sure what we send to ws.send\n const wsSend = (data: {\n // (SynthesizeContent from python)\n text: string;\n // setting flush somehow never finishes the current speech generation\n // https://github.com/livekit/agents-js/pull/820#issuecomment-3517138706\n // flush?: boolean;\n // initialization\n voice_settings?: VoiceSettings;\n generation_config?: {\n chunk_length_schedule: number[];\n };\n }) => {\n ws.send(JSON.stringify(data));\n };\n\n wsSend({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n ...(this.#opts.chunkLengthSchedule && {\n generation_config: {\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n },\n }),\n });\n let eosSent = false;\n\n const sendTask = async () => {\n // Determine if we should flush on each chunk (sentence)\n /*const flushOnChunk =\n this.#opts.wordTokenizer instanceof tokenize.SentenceTokenizer &&\n this.#opts.autoMode !== undefined &&\n this.#opts.autoMode;*/\n\n let xmlContent: string[] = [];\n for await (const data of stream) {\n if (this.abortController.signal.aborted) {\n break;\n }\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n wsSend({\n text: text + ' ', // must always end with a space\n // ...(flushOnChunk && { flush: true }),\n });\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n // no more tokens, mark eos with flush\n // setting flush somehow never finishes the current speech generation\n // wsSend({ text: '', flush: true });\n wsSend({ text: '' });\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n let finalReceived = false;\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed && !this.abortController.signal.aborted) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n }\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n // remove the \"audio\" field from the json object when printing\n if ('audio' in json && json.audio !== null) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if (json.isFinal) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId || this.abortController.signal.aborted) {\n ws.close();\n return;\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in listenTask from ElevenLabs WebSocket');\n }\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AAEP,sBAAoB;AACpB,gBAAwC;AAGxC,MAAM,6BAA6B;AAgBnC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAkB7B,MAAM,wBAAwB;AAAA,EAC5B,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,mBAAmB;AAAA,EACnB,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,sBAAsB,QAAQ,GAAG,GAAG;AAAA,MAC9E,WAAW;AAAA,IACb,CAAC;AAID,UAAM,WAAW,KAAK,aAAa,SAAY,KAAK,WAAW;AAG/D,QAAI,gBAAgB,KAAK;AACzB,QAAI,CAAC,eAAe;AAClB,sBAAgB,WACZ,IAAI,uBAAS,MAAM,kBAAkB,IACrC,IAAI,uBAAS,MAAM,cAAc,KAAK;AAAA,IAC5C,WAAW,YAAY,EAAE,yBAAyB,uBAAS,oBAAoB;AAE7E,6BAAI,EAAE;AAAA,QACJ;AAAA,MAEF;AAAA,IACF;AAEA,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,MACH;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,oBAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,gBAAgB,GAAG,KAAK,aAAa;AAAA,MACrC,GAAI,KAAK,aAAa,UAAa,EAAE,WAAW,GAAG,KAAK,QAAQ,GAAG;AAAA,MACnE,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,MAC5D,GAAI,KAAK,qBAAqB,EAAE,oBAAoB,GAAG,KAAK,iBAAiB,GAAG;AAAA,MAChF,GAAI,KAAK,oBAAoB,EAAE,4BAA4B,GAAG,KAAK,gBAAgB,GAAG;AAAA,IACxF;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAAA,EACxE;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW,IAAI,iCAAkE;AAEvF,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAA+D;AACnE,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAAA,EAClD;AAAA,EAEA,MAAM,OAAO,QAAuD,WAAW,GAAG;AAChF,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,oBAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,SAAG,GAAG,SAAS,CAAC,UAAU;AACxB,aAAK,gBAAgB,MAAM;AAC3B,aAAK,QAAQ,MAAM,EAAE,MAAM,GAAG,gCAAgC;AAAA,MAChE,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,gBAAY,yBAAU;AAC5B,UAAM,gBAAY,yBAAU;AAG5B,UAAM,SAAS,CAAC,SAWV;AACJ,SAAG,KAAK,KAAK,UAAU,IAAI,CAAC;AAAA,IAC9B;AAEA,WAAO;AAAA,MACL,MAAM;AAAA,MACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,MACjC,GAAI,KAAK,MAAM,uBAAuB;AAAA,QACpC,mBAAmB;AAAA,UACjB,uBAAuB,KAAK,MAAM;AAAA,QACpC;AAAA,MACF;AAAA,IACF,CAAC;AACD,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAO3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,eAAO;AAAA,UACL,MAAM,OAAO;AAAA;AAAA;AAAA,QAEf,CAAC;AAAA,MACH;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAKA,aAAO,EAAE,MAAM,GAAG,CAAC;AACnB,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,UAAI,gBAAgB;AACpB,YAAM,UAAU,IAAI,8BAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AAEtC,gBAAI,WAAW,QAAQ,KAAK,UAAU,MAAM;AAC1C,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,CAAC;AAC5D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,KAAK,SAAS;AACvB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,kBAAI,cAAc,aAAa,KAAK,gBAAgB,OAAO,SAAS;AAClE,mBAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,+CAA+C;AAAA,UAC7E;AACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
package/dist/tts.d.cts CHANGED
@@ -22,7 +22,7 @@ export interface TTSOptions {
22
22
  baseURL: string;
23
23
  encoding: TTSEncoding;
24
24
  streamingLatency?: number;
25
- wordTokenizer: tokenize.WordTokenizer;
25
+ wordTokenizer: tokenize.WordTokenizer | tokenize.SentenceTokenizer;
26
26
  chunkLengthSchedule?: number[];
27
27
  enableSsmlParsing: boolean;
28
28
  inactivityTimeout: number;
package/dist/tts.d.ts CHANGED
@@ -22,7 +22,7 @@ export interface TTSOptions {
22
22
  baseURL: string;
23
23
  encoding: TTSEncoding;
24
24
  streamingLatency?: number;
25
- wordTokenizer: tokenize.WordTokenizer;
25
+ wordTokenizer: tokenize.WordTokenizer | tokenize.SentenceTokenizer;
26
26
  chunkLengthSchedule?: number[];
27
27
  enableSsmlParsing: boolean;
28
28
  inactivityTimeout: number;
package/dist/tts.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAGA,OAAO,EAKL,QAAQ,EACR,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI1D,KAAK,KAAK,GAAG;IACX,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B,CAAC;AAEF,KAAK,aAAa,GAAG;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;CAC5B,CAAC;AAiBF,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,SAAS,GAAG,MAAM,CAAC;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,WAAW,CAAC;IACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,QAAQ,CAAC,aAAa,CAAC;IACtC,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,iBAAiB,EAAE,OAAO,CAAC;IAC3B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAcD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAoB;gBAEb,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAiBpC,UAAU,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;IAuBpC,UAAU,IAAI,GAAG,CAAC,aAAa;IAI/B,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAGxD,KAAK,SAAiC;IACtC,QAAQ,CAAC,SAAS,EAAE,GAAG,CAAC;gBAEZ,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;cAuBtB,GAAG;CAiLpB"}
1
+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAGA,OAAO,EAKL,QAAQ,EACR,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI1D,KAAK,KAAK,GAAG;IACX,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B,CAAC;AAEF,KAAK,aAAa,GAAG;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;CAC5B,CAAC;AAiBF,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,SAAS,GAAG,MAAM,CAAC;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,WAAW,CAAC;IACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,QAAQ,CAAC,aAAa,GAAG,QAAQ,CAAC,iBAAiB,CAAC;IACnE,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,iBAAiB,EAAE,OAAO,CAAC;IAC3B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAaD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAoB;gBAEb,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAqCpC,UAAU,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;IAuBpC,UAAU,IAAI,GAAG,CAAC,aAAa;IAI/B,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAGxD,KAAK,SAAiC;IACtC,QAAQ,CAAC,SAAS,EAAE,GAAG,CAAC;gBAEZ,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;cAuBtB,GAAG;CA0MpB"}
package/dist/tts.js CHANGED
@@ -22,13 +22,12 @@ const DEFAULT_VOICE = {
22
22
  };
23
23
  const API_BASE_URL_V1 = "https://api.elevenlabs.io/v1/";
24
24
  const AUTHORIZATION_HEADER = "xi-api-key";
25
- const defaultTTSOptions = {
25
+ const defaultTTSOptionsBase = {
26
26
  apiKey: process.env.ELEVEN_API_KEY,
27
27
  voice: DEFAULT_VOICE,
28
28
  modelID: "eleven_turbo_v2_5",
29
29
  baseURL: API_BASE_URL_V1,
30
30
  encoding: "pcm_22050",
31
- wordTokenizer: new tokenize.basic.WordTokenizer(false),
32
31
  enableSsmlParsing: false,
33
32
  inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,
34
33
  syncAlignment: true
@@ -37,12 +36,23 @@ class TTS extends tts.TTS {
37
36
  #opts;
38
37
  label = "elevenlabs.TTS";
39
38
  constructor(opts = {}) {
40
- super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {
39
+ super(sampleRateFromFormat(opts.encoding || defaultTTSOptionsBase.encoding), 1, {
41
40
  streaming: true
42
41
  });
42
+ const autoMode = opts.autoMode !== void 0 ? opts.autoMode : false;
43
+ let wordTokenizer = opts.wordTokenizer;
44
+ if (!wordTokenizer) {
45
+ wordTokenizer = autoMode ? new tokenize.basic.SentenceTokenizer() : new tokenize.basic.WordTokenizer(false);
46
+ } else if (autoMode && !(wordTokenizer instanceof tokenize.SentenceTokenizer)) {
47
+ log().warn(
48
+ "autoMode is enabled, it expects full sentences or phrases. Please provide a SentenceTokenizer instead of a WordTokenizer."
49
+ );
50
+ }
43
51
  this.#opts = {
44
- ...defaultTTSOptions,
45
- ...opts
52
+ ...defaultTTSOptionsBase,
53
+ ...opts,
54
+ autoMode,
55
+ wordTokenizer
46
56
  };
47
57
  if (this.#opts.apiKey === void 0) {
48
58
  throw new Error(
@@ -163,17 +173,18 @@ class SynthesizeStream extends tts.SynthesizeStream {
163
173
  }
164
174
  const requestId = shortuuid();
165
175
  const segmentId = shortuuid();
166
- ws.send(
167
- JSON.stringify({
168
- text: " ",
169
- voice_settings: this.#opts.voice.settings,
170
- ...this.#opts.chunkLengthSchedule && {
171
- generation_config: {
172
- chunk_length_schedule: this.#opts.chunkLengthSchedule
173
- }
176
+ const wsSend = (data) => {
177
+ ws.send(JSON.stringify(data));
178
+ };
179
+ wsSend({
180
+ text: " ",
181
+ voice_settings: this.#opts.voice.settings,
182
+ ...this.#opts.chunkLengthSchedule && {
183
+ generation_config: {
184
+ chunk_length_schedule: this.#opts.chunkLengthSchedule
174
185
  }
175
- })
176
- );
186
+ }
187
+ });
177
188
  let eosSent = false;
178
189
  const sendTask = async () => {
179
190
  let xmlContent = [];
@@ -191,12 +202,16 @@ class SynthesizeStream extends tts.SynthesizeStream {
191
202
  continue;
192
203
  }
193
204
  }
194
- ws.send(JSON.stringify({ text: text + " " }));
205
+ wsSend({
206
+ text: text + " "
207
+ // must always end with a space
208
+ // ...(flushOnChunk && { flush: true }),
209
+ });
195
210
  }
196
211
  if (xmlContent.length) {
197
212
  this.#logger.warn("ElevenLabs stream ended with incomplete XML content");
198
213
  }
199
- ws.send(JSON.stringify({ text: "" }));
214
+ wsSend({ text: "" });
200
215
  eosSent = true;
201
216
  };
202
217
  let lastFrame;
package/dist/tts.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n AsyncIterableQueue,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst DEFAULT_INACTIVITY_TIMEOUT = 300;\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'bIHbv24MWmeRgasZH58o',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency?: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule?: number[];\n enableSsmlParsing: boolean;\n inactivityTimeout: number;\n syncAlignment: boolean;\n autoMode?: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_turbo_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n enableSsmlParsing: false,\n inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,\n syncAlignment: true,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n sync_alignment: `${opts.syncAlignment}`,\n ...(opts.autoMode !== undefined && { auto_mode: `${opts.autoMode}` }),\n ...(opts.languageCode && { language_code: opts.languageCode }),\n ...(opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` }),\n ...(opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n }\n\n protected async run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (this.abortController.signal.aborted) {\n break;\n }\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n if (this.abortController.signal.aborted) {\n break;\n }\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n ws.on('error', (error) => {\n this.abortController.abort();\n this.#logger.error({ error }, 'Error connecting to ElevenLabs');\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n ...(this.#opts.chunkLengthSchedule && {\n generation_config: {\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n },\n }),\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n if (this.abortController.signal.aborted) {\n break;\n }\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ' })); // must always end with a space\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n // no more tokens, mark eos\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n let finalReceived = false;\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed && !this.abortController.signal.aborted) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n }\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n // remove the \"audio\" field from the json object when printing\n if ('audio' in json && json.audio !== null) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if (json.isFinal) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId || this.abortController.signal.aborted) {\n ws.close();\n return;\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in listenTask from ElevenLabs WebSocket');\n }\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":"AAGA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,WAAW;AACpB,SAAuB,iBAAiB;AAGxC,MAAM,6BAA6B;AAgBnC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAkB7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,eAAe,IAAI,SAAS,MAAM,cAAc,KAAK;AAAA,EACrD,mBAAmB;AAAA,EACnB,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,IAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,gBAAgB,GAAG,KAAK,aAAa;AAAA,MACrC,GAAI,KAAK,aAAa,UAAa,EAAE,WAAW,GAAG,KAAK,QAAQ,GAAG;AAAA,MACnE,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,MAC5D,GAAI,KAAK,qBAAqB,EAAE,oBAAoB,GAAG,KAAK,iBAAiB,GAAG;AAAA,MAChF,GAAI,KAAK,oBAAoB,EAAE,4BAA4B,GAAG,KAAK,gBAAgB,GAAG;AAAA,IACxF;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAAA,EACxE;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW,IAAI,mBAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAAA,EAClD;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,UAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,SAAG,GAAG,SAAS,CAAC,UAAU;AACxB,aAAK,gBAAgB,MAAM;AAC3B,aAAK,QAAQ,MAAM,EAAE,MAAM,GAAG,gCAAgC;AAAA,MAChE,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,YAAY,UAAU;AAC5B,UAAM,YAAY,UAAU;AAE5B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,GAAI,KAAK,MAAM,uBAAuB;AAAA,UACpC,mBAAmB;AAAA,YACjB,uBAAuB,KAAK,MAAM;AAAA,UACpC;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,IAAI,CAAC,CAAC;AAAA,MAC9C;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAGA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,UAAI,gBAAgB;AACpB,YAAM,UAAU,IAAI,gBAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AAEtC,gBAAI,WAAW,QAAQ,KAAK,UAAU,MAAM;AAC1C,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,CAAC;AAC5D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,KAAK,SAAS;AACvB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,kBAAI,cAAc,aAAa,KAAK,gBAAgB,OAAO,SAAS;AAClE,mBAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,+CAA+C;AAAA,UAC7E;AACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n AsyncIterableQueue,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst DEFAULT_INACTIVITY_TIMEOUT = 300;\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'bIHbv24MWmeRgasZH58o',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency?: number;\n wordTokenizer: tokenize.WordTokenizer | tokenize.SentenceTokenizer;\n chunkLengthSchedule?: number[];\n enableSsmlParsing: boolean;\n inactivityTimeout: number;\n syncAlignment: boolean;\n autoMode?: boolean;\n}\n\nconst defaultTTSOptionsBase = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_turbo_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050' as TTSEncoding,\n enableSsmlParsing: false,\n inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,\n syncAlignment: true,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptionsBase.encoding), 1, {\n streaming: true,\n });\n\n // Set autoMode to true by default if not provided is Python behavior,\n // but to make it non-breaking, we keep false as default in typescript\n const autoMode = opts.autoMode !== undefined ? opts.autoMode : false;\n\n // Set default tokenizer based on autoMode if not provided\n let wordTokenizer = opts.wordTokenizer;\n if (!wordTokenizer) {\n wordTokenizer = autoMode\n ? new tokenize.basic.SentenceTokenizer()\n : new tokenize.basic.WordTokenizer(false);\n } else if (autoMode && !(wordTokenizer instanceof tokenize.SentenceTokenizer)) {\n // Warn if autoMode is enabled but a WordTokenizer was provided\n log().warn(\n 'autoMode is enabled, it expects full sentences or phrases. ' +\n 'Please provide a SentenceTokenizer instead of a WordTokenizer.',\n );\n }\n\n this.#opts = {\n ...defaultTTSOptionsBase,\n ...opts,\n autoMode,\n wordTokenizer,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n sync_alignment: `${opts.syncAlignment}`,\n ...(opts.autoMode !== undefined && { auto_mode: `${opts.autoMode}` }),\n ...(opts.languageCode && { language_code: opts.languageCode }),\n ...(opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` }),\n ...(opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n }\n\n protected async run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream | tokenize.SentenceStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | tokenize.SentenceStream | null = null;\n for await (const text of this.input) {\n if (this.abortController.signal.aborted) {\n break;\n }\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n if (this.abortController.signal.aborted) {\n break;\n }\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n }\n\n async #runWS(stream: tokenize.WordStream | tokenize.SentenceStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n ws.on('error', (error) => {\n this.abortController.abort();\n this.#logger.error({ error }, 'Error connecting to ElevenLabs');\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n // simple helper to make sure what we send to ws.send\n const wsSend = (data: {\n // (SynthesizeContent from python)\n text: string;\n // setting flush somehow never finishes the current speech generation\n // https://github.com/livekit/agents-js/pull/820#issuecomment-3517138706\n // flush?: boolean;\n // initialization\n voice_settings?: VoiceSettings;\n generation_config?: {\n chunk_length_schedule: number[];\n };\n }) => {\n ws.send(JSON.stringify(data));\n };\n\n wsSend({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n ...(this.#opts.chunkLengthSchedule && {\n generation_config: {\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n },\n }),\n });\n let eosSent = false;\n\n const sendTask = async () => {\n // Determine if we should flush on each chunk (sentence)\n /*const flushOnChunk =\n this.#opts.wordTokenizer instanceof tokenize.SentenceTokenizer &&\n this.#opts.autoMode !== undefined &&\n this.#opts.autoMode;*/\n\n let xmlContent: string[] = [];\n for await (const data of stream) {\n if (this.abortController.signal.aborted) {\n break;\n }\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n wsSend({\n text: text + ' ', // must always end with a space\n // ...(flushOnChunk && { flush: true }),\n });\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n // no more tokens, mark eos with flush\n // setting flush somehow never finishes the current speech generation\n // wsSend({ text: '', flush: true });\n wsSend({ text: '' });\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n let finalReceived = false;\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed && !this.abortController.signal.aborted) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n }\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n // remove the \"audio\" field from the json object when printing\n if ('audio' in json && json.audio !== null) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if (json.isFinal) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId || this.abortController.signal.aborted) {\n ws.close();\n return;\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in listenTask from ElevenLabs WebSocket');\n }\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":"AAGA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,WAAW;AACpB,SAAuB,iBAAiB;AAGxC,MAAM,6BAA6B;AAgBnC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAkB7B,MAAM,wBAAwB;AAAA,EAC5B,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,mBAAmB;AAAA,EACnB,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,sBAAsB,QAAQ,GAAG,GAAG;AAAA,MAC9E,WAAW;AAAA,IACb,CAAC;AAID,UAAM,WAAW,KAAK,aAAa,SAAY,KAAK,WAAW;AAG/D,QAAI,gBAAgB,KAAK;AACzB,QAAI,CAAC,eAAe;AAClB,sBAAgB,WACZ,IAAI,SAAS,MAAM,kBAAkB,IACrC,IAAI,SAAS,MAAM,cAAc,KAAK;AAAA,IAC5C,WAAW,YAAY,EAAE,yBAAyB,SAAS,oBAAoB;AAE7E,UAAI,EAAE;AAAA,QACJ;AAAA,MAEF;AAAA,IACF;AAEA,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,MACH;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,IAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,gBAAgB,GAAG,KAAK,aAAa;AAAA,MACrC,GAAI,KAAK,aAAa,UAAa,EAAE,WAAW,GAAG,KAAK,QAAQ,GAAG;AAAA,MACnE,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,MAC5D,GAAI,KAAK,qBAAqB,EAAE,oBAAoB,GAAG,KAAK,iBAAiB,GAAG;AAAA,MAChF,GAAI,KAAK,oBAAoB,EAAE,4BAA4B,GAAG,KAAK,gBAAgB,GAAG;AAAA,IACxF;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAAA,EACxE;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW,IAAI,mBAAkE;AAEvF,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAA+D;AACnE,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAAA,EAClD;AAAA,EAEA,MAAM,OAAO,QAAuD,WAAW,GAAG;AAChF,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,UAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,SAAG,GAAG,SAAS,CAAC,UAAU;AACxB,aAAK,gBAAgB,MAAM;AAC3B,aAAK,QAAQ,MAAM,EAAE,MAAM,GAAG,gCAAgC;AAAA,MAChE,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,YAAY,UAAU;AAC5B,UAAM,YAAY,UAAU;AAG5B,UAAM,SAAS,CAAC,SAWV;AACJ,SAAG,KAAK,KAAK,UAAU,IAAI,CAAC;AAAA,IAC9B;AAEA,WAAO;AAAA,MACL,MAAM;AAAA,MACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,MACjC,GAAI,KAAK,MAAM,uBAAuB;AAAA,QACpC,mBAAmB;AAAA,UACjB,uBAAuB,KAAK,MAAM;AAAA,QACpC;AAAA,MACF;AAAA,IACF,CAAC;AACD,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAO3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,QACF;AACA,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,eAAO;AAAA,UACL,MAAM,OAAO;AAAA;AAAA;AAAA,QAEf,CAAC;AAAA,MACH;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAKA,aAAO,EAAE,MAAM,GAAG,CAAC;AACnB,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,UAAI,gBAAgB;AACpB,YAAM,UAAU,IAAI,gBAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AAEtC,gBAAI,WAAW,QAAQ,KAAK,UAAU,MAAM;AAC1C,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,CAAC;AAC5D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,KAAK,SAAS;AACvB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,kBAAI,cAAc,aAAa,KAAK,gBAAgB,OAAO,SAAS;AAClE,mBAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,+CAA+C;AAAA,UAC7E;AACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-elevenlabs",
3
- "version": "1.0.17",
3
+ "version": "1.0.18",
4
4
  "description": "ElevenLabs plugin for LiveKit Node Agents",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -30,16 +30,16 @@
30
30
  "@types/ws": "^8.5.10",
31
31
  "tsup": "^8.3.5",
32
32
  "typescript": "^5.0.0",
33
- "@livekit/agents": "1.0.17",
34
- "@livekit/agents-plugin-openai": "1.0.17",
35
- "@livekit/agents-plugins-test": "1.0.17"
33
+ "@livekit/agents": "1.0.18",
34
+ "@livekit/agents-plugin-openai": "1.0.18",
35
+ "@livekit/agents-plugins-test": "1.0.18"
36
36
  },
37
37
  "dependencies": {
38
38
  "ws": "^8.16.0"
39
39
  },
40
40
  "peerDependencies": {
41
41
  "@livekit/rtc-node": "^0.13.12",
42
- "@livekit/agents": "1.0.17"
42
+ "@livekit/agents": "1.0.18"
43
43
  },
44
44
  "scripts": {
45
45
  "build": "tsup --onSuccess \"pnpm build:types\"",
package/src/tts.ts CHANGED
@@ -53,7 +53,7 @@ export interface TTSOptions {
53
53
  baseURL: string;
54
54
  encoding: TTSEncoding;
55
55
  streamingLatency?: number;
56
- wordTokenizer: tokenize.WordTokenizer;
56
+ wordTokenizer: tokenize.WordTokenizer | tokenize.SentenceTokenizer;
57
57
  chunkLengthSchedule?: number[];
58
58
  enableSsmlParsing: boolean;
59
59
  inactivityTimeout: number;
@@ -61,13 +61,12 @@ export interface TTSOptions {
61
61
  autoMode?: boolean;
62
62
  }
63
63
 
64
- const defaultTTSOptions: TTSOptions = {
64
+ const defaultTTSOptionsBase = {
65
65
  apiKey: process.env.ELEVEN_API_KEY,
66
66
  voice: DEFAULT_VOICE,
67
67
  modelID: 'eleven_turbo_v2_5',
68
68
  baseURL: API_BASE_URL_V1,
69
- encoding: 'pcm_22050',
70
- wordTokenizer: new tokenize.basic.WordTokenizer(false),
69
+ encoding: 'pcm_22050' as TTSEncoding,
71
70
  enableSsmlParsing: false,
72
71
  inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,
73
72
  syncAlignment: true,
@@ -78,13 +77,33 @@ export class TTS extends tts.TTS {
78
77
  label = 'elevenlabs.TTS';
79
78
 
80
79
  constructor(opts: Partial<TTSOptions> = {}) {
81
- super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {
80
+ super(sampleRateFromFormat(opts.encoding || defaultTTSOptionsBase.encoding), 1, {
82
81
  streaming: true,
83
82
  });
84
83
 
84
+ // Set autoMode to true by default if not provided is Python behavior,
85
+ // but to make it non-breaking, we keep false as default in typescript
86
+ const autoMode = opts.autoMode !== undefined ? opts.autoMode : false;
87
+
88
+ // Set default tokenizer based on autoMode if not provided
89
+ let wordTokenizer = opts.wordTokenizer;
90
+ if (!wordTokenizer) {
91
+ wordTokenizer = autoMode
92
+ ? new tokenize.basic.SentenceTokenizer()
93
+ : new tokenize.basic.WordTokenizer(false);
94
+ } else if (autoMode && !(wordTokenizer instanceof tokenize.SentenceTokenizer)) {
95
+ // Warn if autoMode is enabled but a WordTokenizer was provided
96
+ log().warn(
97
+ 'autoMode is enabled, it expects full sentences or phrases. ' +
98
+ 'Please provide a SentenceTokenizer instead of a WordTokenizer.',
99
+ );
100
+ }
101
+
85
102
  this.#opts = {
86
- ...defaultTTSOptions,
103
+ ...defaultTTSOptionsBase,
87
104
  ...opts,
105
+ autoMode,
106
+ wordTokenizer,
88
107
  };
89
108
 
90
109
  if (this.#opts.apiKey === undefined) {
@@ -156,10 +175,10 @@ export class SynthesizeStream extends tts.SynthesizeStream {
156
175
  }
157
176
 
158
177
  protected async run() {
159
- const segments = new AsyncIterableQueue<tokenize.WordStream>();
178
+ const segments = new AsyncIterableQueue<tokenize.WordStream | tokenize.SentenceStream>();
160
179
 
161
180
  const tokenizeInput = async () => {
162
- let stream: tokenize.WordStream | null = null;
181
+ let stream: tokenize.WordStream | tokenize.SentenceStream | null = null;
163
182
  for await (const text of this.input) {
164
183
  if (this.abortController.signal.aborted) {
165
184
  break;
@@ -191,7 +210,7 @@ export class SynthesizeStream extends tts.SynthesizeStream {
191
210
  await Promise.all([tokenizeInput(), runStream()]);
192
211
  }
193
212
 
194
- async #runWS(stream: tokenize.WordStream, maxRetry = 3) {
213
+ async #runWS(stream: tokenize.WordStream | tokenize.SentenceStream, maxRetry = 3) {
195
214
  let retries = 0;
196
215
  let ws: WebSocket;
197
216
  while (true) {
@@ -229,20 +248,40 @@ export class SynthesizeStream extends tts.SynthesizeStream {
229
248
  const requestId = shortuuid();
230
249
  const segmentId = shortuuid();
231
250
 
232
- ws.send(
233
- JSON.stringify({
234
- text: ' ',
235
- voice_settings: this.#opts.voice.settings,
236
- ...(this.#opts.chunkLengthSchedule && {
237
- generation_config: {
238
- chunk_length_schedule: this.#opts.chunkLengthSchedule,
239
- },
240
- }),
251
+ // simple helper to make sure what we send to ws.send
252
+ const wsSend = (data: {
253
+ // (SynthesizeContent from python)
254
+ text: string;
255
+ // setting flush somehow never finishes the current speech generation
256
+ // https://github.com/livekit/agents-js/pull/820#issuecomment-3517138706
257
+ // flush?: boolean;
258
+ // initialization
259
+ voice_settings?: VoiceSettings;
260
+ generation_config?: {
261
+ chunk_length_schedule: number[];
262
+ };
263
+ }) => {
264
+ ws.send(JSON.stringify(data));
265
+ };
266
+
267
+ wsSend({
268
+ text: ' ',
269
+ voice_settings: this.#opts.voice.settings,
270
+ ...(this.#opts.chunkLengthSchedule && {
271
+ generation_config: {
272
+ chunk_length_schedule: this.#opts.chunkLengthSchedule,
273
+ },
241
274
  }),
242
- );
275
+ });
243
276
  let eosSent = false;
244
277
 
245
278
  const sendTask = async () => {
279
+ // Determine if we should flush on each chunk (sentence)
280
+ /*const flushOnChunk =
281
+ this.#opts.wordTokenizer instanceof tokenize.SentenceTokenizer &&
282
+ this.#opts.autoMode !== undefined &&
283
+ this.#opts.autoMode;*/
284
+
246
285
  let xmlContent: string[] = [];
247
286
  for await (const data of stream) {
248
287
  if (this.abortController.signal.aborted) {
@@ -260,15 +299,20 @@ export class SynthesizeStream extends tts.SynthesizeStream {
260
299
  }
261
300
  }
262
301
 
263
- ws.send(JSON.stringify({ text: text + ' ' })); // must always end with a space
302
+ wsSend({
303
+ text: text + ' ', // must always end with a space
304
+ // ...(flushOnChunk && { flush: true }),
305
+ });
264
306
  }
265
307
 
266
308
  if (xmlContent.length) {
267
309
  this.#logger.warn('ElevenLabs stream ended with incomplete XML content');
268
310
  }
269
311
 
270
- // no more tokens, mark eos
271
- ws.send(JSON.stringify({ text: '' }));
312
+ // no more tokens, mark eos with flush
313
+ // setting flush somehow never finishes the current speech generation
314
+ // wsSend({ text: '', flush: true });
315
+ wsSend({ text: '' });
272
316
  eosSent = true;
273
317
  };
274
318