@livekit/agents-plugin-cartesia 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/tts.cjs CHANGED
@@ -39,7 +39,8 @@ const defaultTTSOptions = {
39
39
  voice: import_models.TTSDefaultVoiceId,
40
40
  apiKey: process.env.CARTESIA_API_KEY,
41
41
  language: "en",
42
- baseUrl: "https://api.cartesia.ai"
42
+ baseUrl: "https://api.cartesia.ai",
43
+ chunkTimeout: 5e3
43
44
  };
44
45
  class TTS extends import_agents.tts.TTS {
45
46
  #opts;
@@ -203,6 +204,13 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
203
204
  lastFrame = void 0;
204
205
  }
205
206
  };
207
+ let timeout = null;
208
+ const clearTTSChunkTimeout = () => {
209
+ if (timeout) {
210
+ clearTimeout(timeout);
211
+ timeout = null;
212
+ }
213
+ };
206
214
  while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {
207
215
  try {
208
216
  await new Promise((resolve, reject) => {
@@ -228,6 +236,13 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
228
236
  sendLastFrame(segmentId, false);
229
237
  lastFrame = frame;
230
238
  }
239
+ clearTTSChunkTimeout();
240
+ timeout = setTimeout(() => {
241
+ this.#logger.error(
242
+ `Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`
243
+ );
244
+ ws2.close();
245
+ }, this.#opts.chunkTimeout);
231
246
  } else if ("done" in json) {
232
247
  finalReceived = true;
233
248
  for (const frame of bstream.flush()) {
@@ -241,6 +256,7 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
241
256
  if (segmentId === requestId) {
242
257
  closing = true;
243
258
  shouldExit = true;
259
+ clearTTSChunkTimeout();
244
260
  ws2.close();
245
261
  }
246
262
  }
@@ -249,6 +265,7 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
249
265
  if (err instanceof Error && !err.message.includes("WebSocket closed")) {
250
266
  this.#logger.error({ err }, "Error in recvTask from Cartesia WebSocket");
251
267
  }
268
+ clearTTSChunkTimeout();
252
269
  break;
253
270
  }
254
271
  }
package/dist/tts.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n break;\n }\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAA+D;AAE/D,wBAAwB;AACxB,gBAAwC;AACxC,oBAMO;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AACX;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,aAAa,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,oBAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n chunkTimeout: 5000,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n this.#logger.error(\n `Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n clearTTSChunkTimeout();\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n clearTTSChunkTimeout();\n break;\n }\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAA+D;AAE/D,wBAAwB;AACxB,gBAAwC;AACxC,oBAMO;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAmB7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,cAAc;AAChB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,aAAa,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAKA,mCAAqB;AACrB,wBAAU,WAAW,MAAM;AACzB,qBAAK,QAAQ;AAAA,kBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,gBAC9E;AACA,gBAAAA,IAAG,MAAM;AAAA,cACX,GAAG,KAAK,MAAM,YAAY;AAAA,YAC5B,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AACA,+BAAqB;AACrB;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,oBAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
package/dist/tts.d.cts CHANGED
@@ -10,6 +10,10 @@ export interface TTSOptions {
10
10
  apiKey?: string;
11
11
  language: string;
12
12
  baseUrl: string;
13
+ /**
14
+ * The timeout for the next chunk to be received from the Cartesia API.
15
+ */
16
+ chunkTimeout: number;
13
17
  }
14
18
  export declare class TTS extends tts.TTS {
15
19
  #private;
package/dist/tts.d.ts CHANGED
@@ -10,6 +10,10 @@ export interface TTSOptions {
10
10
  apiKey?: string;
11
11
  language: string;
12
12
  baseUrl: string;
13
+ /**
14
+ * The timeout for the next chunk to be received from the Cartesia API.
15
+ */
16
+ chunkTimeout: number;
13
17
  }
14
18
  export declare class TTS extends tts.TTS {
15
19
  #private;
package/dist/tts.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EAA6C,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAIjF,OAAO,EAEL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,eAAe,EACpB,KAAK,aAAa,EACnB,MAAM,aAAa,CAAC;AAQrB,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,SAAS,GAAG,MAAM,CAAC;IAC1B,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IAC/B,OAAO,CAAC,EAAE,CAAC,eAAe,GAAG,MAAM,CAAC,EAAE,CAAC;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB;AAYD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAyB1C,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAYvC,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAAC,aAAa;IAI3C,MAAM,IAAI,gBAAgB;CAG3B;AAED,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;;IAClD,KAAK,SAA4B;gBAKrB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU;cAMpC,GAAG;CA8CpB;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAMxD,KAAK,SAA+B;gBAExB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;IAKtC,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;cAWvB,GAAG;CA2HpB"}
1
+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EAA6C,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAIjF,OAAO,EAEL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,eAAe,EACpB,KAAK,aAAa,EACnB,MAAM,aAAa,CAAC;AAQrB,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,SAAS,GAAG,MAAM,CAAC;IAC1B,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IAC/B,OAAO,CAAC,EAAE,CAAC,eAAe,GAAG,MAAM,CAAC,EAAE,CAAC;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAEhB;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC;CACtB;AAaD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAyB1C,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAYvC,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAAC,aAAa;IAI3C,MAAM,IAAI,gBAAgB;CAG3B;AAED,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;;IAClD,KAAK,SAA4B;gBAKrB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU;cAMpC,GAAG;CA8CpB;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAMxD,KAAK,SAA+B;gBAExB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;IAKtC,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;cAWvB,GAAG;CAiJpB"}
package/dist/tts.js CHANGED
@@ -16,7 +16,8 @@ const defaultTTSOptions = {
16
16
  voice: TTSDefaultVoiceId,
17
17
  apiKey: process.env.CARTESIA_API_KEY,
18
18
  language: "en",
19
- baseUrl: "https://api.cartesia.ai"
19
+ baseUrl: "https://api.cartesia.ai",
20
+ chunkTimeout: 5e3
20
21
  };
21
22
  class TTS extends tts.TTS {
22
23
  #opts;
@@ -180,6 +181,13 @@ class SynthesizeStream extends tts.SynthesizeStream {
180
181
  lastFrame = void 0;
181
182
  }
182
183
  };
184
+ let timeout = null;
185
+ const clearTTSChunkTimeout = () => {
186
+ if (timeout) {
187
+ clearTimeout(timeout);
188
+ timeout = null;
189
+ }
190
+ };
183
191
  while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {
184
192
  try {
185
193
  await new Promise((resolve, reject) => {
@@ -205,6 +213,13 @@ class SynthesizeStream extends tts.SynthesizeStream {
205
213
  sendLastFrame(segmentId, false);
206
214
  lastFrame = frame;
207
215
  }
216
+ clearTTSChunkTimeout();
217
+ timeout = setTimeout(() => {
218
+ this.#logger.error(
219
+ `Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`
220
+ );
221
+ ws2.close();
222
+ }, this.#opts.chunkTimeout);
208
223
  } else if ("done" in json) {
209
224
  finalReceived = true;
210
225
  for (const frame of bstream.flush()) {
@@ -218,6 +233,7 @@ class SynthesizeStream extends tts.SynthesizeStream {
218
233
  if (segmentId === requestId) {
219
234
  closing = true;
220
235
  shouldExit = true;
236
+ clearTTSChunkTimeout();
221
237
  ws2.close();
222
238
  }
223
239
  }
@@ -226,6 +242,7 @@ class SynthesizeStream extends tts.SynthesizeStream {
226
242
  if (err instanceof Error && !err.message.includes("WebSocket closed")) {
227
243
  this.#logger.error({ err }, "Error in recvTask from Cartesia WebSocket");
228
244
  }
245
+ clearTTSChunkTimeout();
229
246
  break;
230
247
  }
231
248
  }
package/dist/tts.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n break;\n }\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":"AAGA,SAAS,iBAAiB,KAAK,WAAW,UAAU,WAAW;AAE/D,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AACxC;AAAA,EACE;AAAA,OAKK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AACX;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,UAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n chunkTimeout: 5000,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n this.#logger.error(\n `Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n clearTTSChunkTimeout();\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n clearTTSChunkTimeout();\n break;\n }\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":"AAGA,SAAS,iBAAiB,KAAK,WAAW,UAAU,WAAW;AAE/D,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AACxC;AAAA,EACE;AAAA,OAKK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAmB7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,cAAc;AAChB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAKA,mCAAqB;AACrB,wBAAU,WAAW,MAAM;AACzB,qBAAK,QAAQ;AAAA,kBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,gBAC9E;AACA,gBAAAA,IAAG,MAAM;AAAA,cACX,GAAG,KAAK,MAAM,YAAY;AAAA,YAC5B,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AACA,+BAAqB;AACrB;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,UAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-cartesia",
3
- "version": "1.0.0",
3
+ "version": "1.0.2",
4
4
  "description": "Cartesia plugin for LiveKit Node Agents",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -30,16 +30,16 @@
30
30
  "@types/ws": "^8.5.10",
31
31
  "tsup": "^8.3.5",
32
32
  "typescript": "^5.0.0",
33
- "@livekit/agents": "1.0.0",
34
- "@livekit/agents-plugin-openai": "1.0.0",
35
- "@livekit/agents-plugins-test": "1.0.0"
33
+ "@livekit/agents": "1.0.2",
34
+ "@livekit/agents-plugin-openai": "1.0.2",
35
+ "@livekit/agents-plugins-test": "1.0.2"
36
36
  },
37
37
  "dependencies": {
38
38
  "ws": "^8.16.0"
39
39
  },
40
40
  "peerDependencies": {
41
41
  "@livekit/rtc-node": "^0.13.12",
42
- "@livekit/agents": "1.0.0"
42
+ "@livekit/agents": "1.0.2"
43
43
  },
44
44
  "scripts": {
45
45
  "build": "tsup --onSuccess \"pnpm build:types\"",
package/src/tts.ts CHANGED
@@ -29,6 +29,11 @@ export interface TTSOptions {
29
29
  apiKey?: string;
30
30
  language: string;
31
31
  baseUrl: string;
32
+
33
+ /**
34
+ * The timeout for the next chunk to be received from the Cartesia API.
35
+ */
36
+ chunkTimeout: number;
32
37
  }
33
38
 
34
39
  const defaultTTSOptions: TTSOptions = {
@@ -39,6 +44,7 @@ const defaultTTSOptions: TTSOptions = {
39
44
  apiKey: process.env.CARTESIA_API_KEY,
40
45
  language: 'en',
41
46
  baseUrl: 'https://api.cartesia.ai',
47
+ chunkTimeout: 5000,
42
48
  };
43
49
 
44
50
  export class TTS extends tts.TTS {
@@ -227,6 +233,15 @@ export class SynthesizeStream extends tts.SynthesizeStream {
227
233
  }
228
234
  };
229
235
 
236
+ let timeout: NodeJS.Timeout | null = null;
237
+
238
+ const clearTTSChunkTimeout = () => {
239
+ if (timeout) {
240
+ clearTimeout(timeout);
241
+ timeout = null;
242
+ }
243
+ };
244
+
230
245
  while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {
231
246
  try {
232
247
  await new Promise<RawData | null>((resolve, reject) => {
@@ -254,6 +269,17 @@ export class SynthesizeStream extends tts.SynthesizeStream {
254
269
  sendLastFrame(segmentId, false);
255
270
  lastFrame = frame;
256
271
  }
272
+
273
+ // IMPORTANT: close WS if TTS chunk stream been stuck too long
274
+ // this allows unblock the current "broken" TTS node so that any future TTS nodes
275
+ // can continue to process the stream without been blocked by the stuck node
276
+ clearTTSChunkTimeout();
277
+ timeout = setTimeout(() => {
278
+ this.#logger.error(
279
+ `Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`,
280
+ );
281
+ ws.close();
282
+ }, this.#opts.chunkTimeout);
257
283
  } else if ('done' in json) {
258
284
  finalReceived = true;
259
285
  for (const frame of bstream.flush()) {
@@ -268,6 +294,7 @@ export class SynthesizeStream extends tts.SynthesizeStream {
268
294
  if (segmentId === requestId) {
269
295
  closing = true;
270
296
  shouldExit = true;
297
+ clearTTSChunkTimeout();
271
298
  ws.close();
272
299
  }
273
300
  }
@@ -277,6 +304,7 @@ export class SynthesizeStream extends tts.SynthesizeStream {
277
304
  if (err instanceof Error && !err.message.includes('WebSocket closed')) {
278
305
  this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');
279
306
  }
307
+ clearTTSChunkTimeout();
280
308
  break;
281
309
  }
282
310
  }