@livekit/agents-plugin-elevenlabs 0.5.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/tts.cjs CHANGED
@@ -107,7 +107,8 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
107
107
  model_id: opts.modelID,
108
108
  output_format: opts.encoding,
109
109
  optimize_streaming_latency: `${opts.streamingLatency}`,
110
- enable_ssml_parsing: `${opts.enableSsmlParsing}`
110
+ enable_ssml_parsing: `${opts.enableSsmlParsing}`,
111
+ ...opts.languageCode && { language_code: opts.languageCode }
111
112
  };
112
113
  Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
113
114
  this.streamURL.protocol = this.streamURL.protocol.replace("http", "ws");
package/dist/tts.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AsyncIterableQueue, AudioByteStream, log, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'EXAVITQu4vr4xnSDxMaL',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule: number[];\n enableSsmlParsing: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_flash_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n streamingLatency: 3,\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n chunkLengthSchedule: [],\n enableSsmlParsing: false,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n optimize_streaming_latency: `${opts.streamingLatency}`,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n\n this.#run();\n }\n\n async #run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n this.close();\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = randomUUID();\n const segmentId = randomUUID();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n try_trigger_generation: true,\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n reject();\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n if ('audio' in json) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64').buffer);\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('isFinal' in json) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n }\n });\n } catch {\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAAwE;AAExE,yBAA2B;AAC3B,sBAAoB;AACpB,gBAAwC;AAiBxC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,kBAAkB;AAAA,EAClB,eAAe,IAAI,uBAAS,MAAM,cAAc,KAAK;AAAA,EACrD,qBAAqB,CAAC;AAAA,EACtB,mBAAmB;AACrB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,oBAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,4BAA4B,GAAG,KAAK,gBAAgB;AAAA,MACpD,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,IAChD;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAEtE,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,WAAW,IAAI,iCAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAChD,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,oBAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,gBAAY,+BAAW;AAC7B,UAAM,gBAAY,+BAAW;AAE7B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,wBAAwB;AAAA,QACxB,uBAAuB,KAAK,MAAM;AAAA,MACpC,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,KAAK,wBAAwB,MAAM,CAAC,CAAC;AAAA,MAC7E;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAEA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,YAAM,UAAU,IAAI,8BAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,qBAAO;AAAA,YACT,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,gBAAI,WAAW,MAAM;AACnB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,EAAE,MAAM;AACnE,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,aAAa,MAAM;AAC5B,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAAA,YAC/B;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AACN;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AsyncIterableQueue, AudioByteStream, log, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'EXAVITQu4vr4xnSDxMaL',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule: number[];\n enableSsmlParsing: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_flash_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n streamingLatency: 3,\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n chunkLengthSchedule: [],\n enableSsmlParsing: false,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n optimize_streaming_latency: `${opts.streamingLatency}`,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n ...(opts.languageCode && { language_code: opts.languageCode }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n\n this.#run();\n }\n\n async #run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n this.close();\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = randomUUID();\n const segmentId = randomUUID();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n try_trigger_generation: true,\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n reject();\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n if ('audio' in json) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64').buffer);\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('isFinal' in json) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n }\n });\n } catch {\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAAwE;AAExE,yBAA2B;AAC3B,sBAAoB;AACpB,gBAAwC;AAiBxC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAe7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,kBAAkB;AAAA,EAClB,eAAe,IAAI,uBAAS,MAAM,cAAc,KAAK;AAAA,EACrD,qBAAqB,CAAC;AAAA,EACtB,mBAAmB;AACrB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,oBAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,4BAA4B,GAAG,KAAK,gBAAgB;AAAA,MACpD,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,IAC9D;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAEtE,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,WAAW,IAAI,iCAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAChD,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,oBAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,gBAAY,+BAAW;AAC7B,UAAM,gBAAY,+BAAW;AAE7B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,wBAAwB;AAAA,QACxB,uBAAuB,KAAK,MAAM;AAAA,MACpC,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,KAAK,wBAAwB,MAAM,CAAC,CAAC;AAAA,MAC7E;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAEA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,YAAM,UAAU,IAAI,8BAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,qBAAO;AAAA,YACT,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,gBAAI,WAAW,MAAM;AACnB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,EAAE,MAAM;AACnE,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,aAAa,MAAM;AAC5B,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAAA,YAC/B;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AACN;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
package/dist/tts.d.ts CHANGED
@@ -18,6 +18,7 @@ export interface TTSOptions {
18
18
  apiKey?: string;
19
19
  voice: Voice;
20
20
  modelID: TTSModels | string;
21
+ languageCode?: string;
21
22
  baseURL: string;
22
23
  encoding: TTSEncoding;
23
24
  streamingLatency: number;
package/dist/tts.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAGA,OAAO,EAA4C,QAAQ,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAG1F,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE1D,KAAK,KAAK,GAAG;IACX,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B,CAAC;AAEF,KAAK,aAAa,GAAG;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;CAC5B,CAAC;AAiBF,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,SAAS,GAAG,MAAM,CAAC;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,WAAW,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,aAAa,EAAE,QAAQ,CAAC,aAAa,CAAC;IACtC,mBAAmB,EAAE,MAAM,EAAE,CAAC;IAC9B,iBAAiB,EAAE,OAAO,CAAC;CAC5B;AAcD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAoB;gBAEb,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAiBpC,UAAU,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;IAuBpC,UAAU,IAAI,GAAG,CAAC,aAAa;IAI/B,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAGxD,KAAK,SAAiC;IACtC,QAAQ,CAAC,SAAS,EAAE,GAAG,CAAC;gBAEZ,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;CAsKvC"}
1
+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":";AAGA,OAAO,EAA4C,QAAQ,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAG1F,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE1D,KAAK,KAAK,GAAG;IACX,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B,CAAC;AAEF,KAAK,aAAa,GAAG;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;CAC5B,CAAC;AAiBF,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,SAAS,GAAG,MAAM,CAAC;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,WAAW,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,aAAa,EAAE,QAAQ,CAAC,aAAa,CAAC;IACtC,mBAAmB,EAAE,MAAM,EAAE,CAAC;IAC9B,iBAAiB,EAAE,OAAO,CAAC;CAC5B;AAcD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAoB;gBAEb,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAiBpC,UAAU,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;IAuBpC,UAAU,IAAI,GAAG,CAAC,aAAa;IAI/B,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAGxD,KAAK,SAAiC;IACtC,QAAQ,CAAC,SAAS,EAAE,GAAG,CAAC;gBAEZ,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;CAuKvC"}
package/dist/tts.js CHANGED
@@ -83,7 +83,8 @@ class SynthesizeStream extends tts.SynthesizeStream {
83
83
  model_id: opts.modelID,
84
84
  output_format: opts.encoding,
85
85
  optimize_streaming_latency: `${opts.streamingLatency}`,
86
- enable_ssml_parsing: `${opts.enableSsmlParsing}`
86
+ enable_ssml_parsing: `${opts.enableSsmlParsing}`,
87
+ ...opts.languageCode && { language_code: opts.languageCode }
87
88
  };
88
89
  Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
89
90
  this.streamURL.protocol = this.streamURL.protocol.replace("http", "ws");
package/dist/tts.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AsyncIterableQueue, AudioByteStream, log, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'EXAVITQu4vr4xnSDxMaL',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule: number[];\n enableSsmlParsing: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_flash_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n streamingLatency: 3,\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n chunkLengthSchedule: [],\n enableSsmlParsing: false,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n optimize_streaming_latency: `${opts.streamingLatency}`,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n\n this.#run();\n }\n\n async #run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n this.close();\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = randomUUID();\n const segmentId = randomUUID();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n try_trigger_generation: true,\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n reject();\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n if ('audio' in json) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64').buffer);\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('isFinal' in json) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n }\n });\n } catch {\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":"AAGA,SAAS,oBAAoB,iBAAiB,KAAK,UAAU,WAAW;AAExE,SAAS,kBAAkB;AAC3B,SAAS,WAAW;AACpB,SAAuB,iBAAiB;AAiBxC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,kBAAkB;AAAA,EAClB,eAAe,IAAI,SAAS,MAAM,cAAc,KAAK;AAAA,EACrD,qBAAqB,CAAC;AAAA,EACtB,mBAAmB;AACrB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,IAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,4BAA4B,GAAG,KAAK,gBAAgB;AAAA,MACpD,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,IAChD;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAEtE,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,WAAW,IAAI,mBAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAChD,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,UAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,YAAY,WAAW;AAC7B,UAAM,YAAY,WAAW;AAE7B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,wBAAwB;AAAA,QACxB,uBAAuB,KAAK,MAAM;AAAA,MACpC,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,KAAK,wBAAwB,MAAM,CAAC,CAAC;AAAA,MAC7E;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAEA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,YAAM,UAAU,IAAI,gBAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,qBAAO;AAAA,YACT,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,gBAAI,WAAW,MAAM;AACnB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,EAAE,MAAM;AACnE,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,aAAa,MAAM;AAC5B,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAAA,YAC/B;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AACN;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AsyncIterableQueue, AudioByteStream, log, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { URL } from 'node:url';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\ntype Voice = {\n id: string;\n name: string;\n category: string;\n settings?: VoiceSettings;\n};\n\ntype VoiceSettings = {\n stability: number; // 0..1\n similarity_boost: number; // 0..1\n style?: number; // 0..1\n use_speaker_boost: boolean;\n};\n\nconst DEFAULT_VOICE: Voice = {\n id: 'EXAVITQu4vr4xnSDxMaL',\n name: 'Bella',\n category: 'premade',\n settings: {\n stability: 0.71,\n similarity_boost: 0.5,\n style: 0.0,\n use_speaker_boost: true,\n },\n};\n\nconst API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';\nconst AUTHORIZATION_HEADER = 'xi-api-key';\n\nexport interface TTSOptions {\n apiKey?: string;\n voice: Voice;\n modelID: TTSModels | string;\n languageCode?: string;\n baseURL: string;\n encoding: TTSEncoding;\n streamingLatency: number;\n wordTokenizer: tokenize.WordTokenizer;\n chunkLengthSchedule: number[];\n enableSsmlParsing: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n apiKey: process.env.ELEVEN_API_KEY,\n voice: DEFAULT_VOICE,\n modelID: 'eleven_flash_v2_5',\n baseURL: API_BASE_URL_V1,\n encoding: 'pcm_22050',\n streamingLatency: 3,\n wordTokenizer: new tokenize.basic.WordTokenizer(false),\n chunkLengthSchedule: [],\n enableSsmlParsing: false,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'elevenlabs.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',\n );\n }\n }\n\n async listVoices(): Promise<Voice[]> {\n return fetch(this.#opts.baseURL + '/voices', {\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n },\n })\n .then((data) => data.json())\n .then((data) => {\n const voices: Voice[] = [];\n for (const voice of (\n data as { voices: { voice_id: string; name: string; category: string }[] }\n ).voices) {\n voices.push({\n id: voice.voice_id,\n name: voice.name,\n category: voice.category,\n settings: undefined,\n });\n }\n return voices;\n });\n }\n\n synthesize(): tts.ChunkedStream {\n throw new Error('Chunked responses are not supported on ElevenLabs TTS');\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n label = 'elevenlabs.SynthesizeStream';\n readonly streamURL: URL;\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.closed = false;\n\n // add trailing slash to URL if needed\n const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');\n\n this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);\n const params = {\n model_id: opts.modelID,\n output_format: opts.encoding,\n optimize_streaming_latency: `${opts.streamingLatency}`,\n enable_ssml_parsing: `${opts.enableSsmlParsing}`,\n ...(opts.languageCode && { language_code: opts.languageCode }),\n };\n Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));\n this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');\n\n this.#run();\n }\n\n async #run() {\n const segments = new AsyncIterableQueue<tokenize.WordStream>();\n\n const tokenizeInput = async () => {\n let stream: tokenize.WordStream | null = null;\n for await (const text of this.input) {\n if (text === SynthesizeStream.FLUSH_SENTINEL) {\n stream?.endInput();\n stream = null;\n } else {\n if (!stream) {\n stream = this.#opts.wordTokenizer.stream();\n segments.put(stream);\n }\n stream.pushText(text);\n }\n }\n segments.close();\n };\n\n const runStream = async () => {\n for await (const stream of segments) {\n await this.#runWS(stream);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n };\n\n await Promise.all([tokenizeInput(), runStream()]);\n this.close();\n }\n\n async #runWS(stream: tokenize.WordStream, maxRetry = 3) {\n let retries = 0;\n let ws: WebSocket;\n while (true) {\n ws = new WebSocket(this.streamURL, {\n headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n break;\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 5);\n retries++;\n\n this.#logger.warn(\n `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n const requestId = randomUUID();\n const segmentId = randomUUID();\n\n ws.send(\n JSON.stringify({\n text: ' ',\n voice_settings: this.#opts.voice.settings,\n try_trigger_generation: true,\n chunk_length_schedule: this.#opts.chunkLengthSchedule,\n }),\n );\n let eosSent = false;\n\n const sendTask = async () => {\n let xmlContent: string[] = [];\n for await (const data of stream) {\n let text = data.token;\n\n if ((this.#opts.enableSsmlParsing && text.startsWith('<phoneme')) || xmlContent.length) {\n xmlContent.push(text);\n if (text.indexOf('</phoneme>') !== -1) {\n text = xmlContent.join(' ');\n xmlContent = [];\n } else {\n continue;\n }\n }\n\n ws.send(JSON.stringify({ text: text + ' ', try_trigger_generation: false }));\n }\n\n if (xmlContent.length) {\n this.#logger.warn('ElevenLabs stream ended with incomplete XML content');\n }\n\n ws.send(JSON.stringify({ text: '' }));\n eosSent = true;\n };\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const listenTask = async () => {\n const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!eosSent) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n reject();\n });\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n if ('audio' in json) {\n const data = new Int8Array(Buffer.from(json.audio, 'base64').buffer);\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('isFinal' in json) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n }\n });\n } catch {\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n }\n}\n\nconst sampleRateFromFormat = (encoding: TTSEncoding): number => {\n return Number(encoding.split('_')[1]);\n};\n"],"mappings":"AAGA,SAAS,oBAAoB,iBAAiB,KAAK,UAAU,WAAW;AAExE,SAAS,kBAAkB;AAC3B,SAAS,WAAW;AACpB,SAAuB,iBAAiB;AAiBxC,MAAM,gBAAuB;AAAA,EAC3B,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,UAAU;AAAA,EACV,UAAU;AAAA,IACR,WAAW;AAAA,IACX,kBAAkB;AAAA,IAClB,OAAO;AAAA,IACP,mBAAmB;AAAA,EACrB;AACF;AAEA,MAAM,kBAAkB;AACxB,MAAM,uBAAuB;AAe7B,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,OAAO;AAAA,EACP,SAAS;AAAA,EACT,SAAS;AAAA,EACT,UAAU;AAAA,EACV,kBAAkB;AAAA,EAClB,eAAe,IAAI,SAAS,MAAM,cAAc,KAAK;AAAA,EACrD,qBAAqB,CAAC;AAAA,EACtB,mBAAmB;AACrB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,qBAAqB,KAAK,YAAY,kBAAkB,QAAQ,GAAG,GAAG;AAAA,MAC1E,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,aAA+B;AACnC,WAAO,MAAM,KAAK,MAAM,UAAU,WAAW;AAAA,MAC3C,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,MACrC;AAAA,IACF,CAAC,EACE,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,EAC1B,KAAK,CAAC,SAAS;AACd,YAAM,SAAkB,CAAC;AACzB,iBAAW,SACT,KACA,QAAQ;AACR,eAAO,KAAK;AAAA,UACV,IAAI,MAAM;AAAA,UACV,MAAM,MAAM;AAAA,UACZ,UAAU,MAAM;AAAA,UAChB,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACL;AAAA,EAEA,aAAgC;AAC9B,UAAM,IAAI,MAAM,uDAAuD;AAAA,EACzE;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACC;AAAA,EAET,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AAGd,UAAM,UAAU,KAAK,WAAW,KAAK,QAAQ,SAAS,GAAG,IAAI,KAAK;AAElE,SAAK,YAAY,IAAI,IAAI,kBAAkB,KAAK,MAAM,EAAE,iBAAiB,OAAO;AAChF,UAAM,SAAS;AAAA,MACb,UAAU,KAAK;AAAA,MACf,eAAe,KAAK;AAAA,MACpB,4BAA4B,GAAG,KAAK,gBAAgB;AAAA,MACpD,qBAAqB,GAAG,KAAK,iBAAiB;AAAA,MAC9C,GAAI,KAAK,gBAAgB,EAAE,eAAe,KAAK,aAAa;AAAA,IAC9D;AACA,WAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,UAAU,aAAa,OAAO,GAAG,CAAC,CAAC;AACnF,SAAK,UAAU,WAAW,KAAK,UAAU,SAAS,QAAQ,QAAQ,IAAI;AAEtE,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,WAAW,IAAI,mBAAwC;AAE7D,UAAM,gBAAgB,YAAY;AAChC,UAAI,SAAqC;AACzC,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,2CAAQ;AACR,mBAAS;AAAA,QACX,OAAO;AACL,cAAI,CAAC,QAAQ;AACX,qBAAS,KAAK,MAAM,cAAc,OAAO;AACzC,qBAAS,IAAI,MAAM;AAAA,UACrB;AACA,iBAAO,SAAS,IAAI;AAAA,QACtB;AAAA,MACF;AACA,eAAS,MAAM;AAAA,IACjB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,UAAU,UAAU;AACnC,cAAM,KAAK,OAAO,MAAM;AACxB,aAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,MAC/C;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,CAAC;AAChD,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,OAAO,QAA6B,WAAW,GAAG;AACtD,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,MAAM;AACX,WAAK,IAAI,UAAU,KAAK,WAAW;AAAA,QACjC,SAAS,EAAE,CAAC,oBAAoB,GAAG,KAAK,MAAM,OAAO;AAAA,MACvD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AACD;AAAA,MACF,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,yCAAyC,OAAO,cAAc,CAAC,EAAE;AAAA,QACnF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,CAAC;AACrC;AAEA,aAAK,QAAQ;AAAA,UACX,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC7F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,UAAM,YAAY,WAAW;AAC7B,UAAM,YAAY,WAAW;AAE7B,OAAG;AAAA,MACD,KAAK,UAAU;AAAA,QACb,MAAM;AAAA,QACN,gBAAgB,KAAK,MAAM,MAAM;AAAA,QACjC,wBAAwB;AAAA,QACxB,uBAAuB,KAAK,MAAM;AAAA,MACpC,CAAC;AAAA,IACH;AACA,QAAI,UAAU;AAEd,UAAM,WAAW,YAAY;AAC3B,UAAI,aAAuB,CAAC;AAC5B,uBAAiB,QAAQ,QAAQ;AAC/B,YAAI,OAAO,KAAK;AAEhB,YAAK,KAAK,MAAM,qBAAqB,KAAK,WAAW,UAAU,KAAM,WAAW,QAAQ;AACtF,qBAAW,KAAK,IAAI;AACpB,cAAI,KAAK,QAAQ,YAAY,MAAM,IAAI;AACrC,mBAAO,WAAW,KAAK,GAAG;AAC1B,yBAAa,CAAC;AAAA,UAChB,OAAO;AACL;AAAA,UACF;AAAA,QACF;AAEA,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,OAAO,KAAK,wBAAwB,MAAM,CAAC,CAAC;AAAA,MAC7E;AAEA,UAAI,WAAW,QAAQ;AACrB,aAAK,QAAQ,KAAK,qDAAqD;AAAA,MACzE;AAEA,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,GAAG,CAAC,CAAC;AACpC,gBAAU;AAAA,IACZ;AAEA,QAAI;AACJ,UAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,UAAI,WAAW;AACb,aAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,UAAM,aAAa,YAAY;AAC7B,YAAM,UAAU,IAAI,gBAAgB,qBAAqB,KAAK,MAAM,QAAQ,GAAG,CAAC;AAChF,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,SAAS,WAAW;AAC9C,eAAG,mBAAmB;AACtB,eAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,eAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,qBAAO;AAAA,YACT,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,gBAAI,WAAW,MAAM;AACnB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,OAAO,QAAQ,EAAE,MAAM;AACnE,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,aAAa,MAAM;AAC5B,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAAA,YAC/B;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AACN;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAAA,EAC9C;AACF;AAEA,MAAM,uBAAuB,CAAC,aAAkC;AAC9D,SAAO,OAAO,SAAS,MAAM,GAAG,EAAE,CAAC,CAAC;AACtC;","names":["tts","segmentId"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-elevenlabs",
3
- "version": "0.5.3",
3
+ "version": "0.6.0",
4
4
  "description": "ElevenLabs plugin for LiveKit Node Agents",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -25,7 +25,7 @@
25
25
  "@livekit/agents": "^x",
26
26
  "@livekit/agents-plugin-openai": "^x",
27
27
  "@livekit/agents-plugins-test": "^x",
28
- "@livekit/rtc-node": "^0.13.1",
28
+ "@livekit/rtc-node": "^0.13.2",
29
29
  "@microsoft/api-extractor": "^7.35.0",
30
30
  "@types/ws": "^8.5.10",
31
31
  "tsup": "^8.3.5",
@@ -35,8 +35,8 @@
35
35
  "ws": "^8.16.0"
36
36
  },
37
37
  "peerDependencies": {
38
- "@livekit/rtc-node": "^0.13.1",
39
- "@livekit/agents": "^0.6.1x"
38
+ "@livekit/rtc-node": "^0.13.2",
39
+ "@livekit/agents": "^0.6.3x"
40
40
  },
41
41
  "scripts": {
42
42
  "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"",
package/src/tts.ts CHANGED
@@ -41,6 +41,7 @@ export interface TTSOptions {
41
41
  apiKey?: string;
42
42
  voice: Voice;
43
43
  modelID: TTSModels | string;
44
+ languageCode?: string;
44
45
  baseURL: string;
45
46
  encoding: TTSEncoding;
46
47
  streamingLatency: number;
@@ -134,6 +135,7 @@ export class SynthesizeStream extends tts.SynthesizeStream {
134
135
  output_format: opts.encoding,
135
136
  optimize_streaming_latency: `${opts.streamingLatency}`,
136
137
  enable_ssml_parsing: `${opts.enableSsmlParsing}`,
138
+ ...(opts.languageCode && { language_code: opts.languageCode }),
137
139
  };
138
140
  Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
139
141
  this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');