@livekit/agents-plugin-cartesia 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/tts.cjs +18 -1
- package/dist/tts.cjs.map +1 -1
- package/dist/tts.d.cts +4 -0
- package/dist/tts.d.ts +4 -0
- package/dist/tts.d.ts.map +1 -1
- package/dist/tts.js +18 -1
- package/dist/tts.js.map +1 -1
- package/package.json +5 -5
- package/src/tts.ts +28 -0
package/dist/tts.cjs
CHANGED
|
@@ -39,7 +39,8 @@ const defaultTTSOptions = {
|
|
|
39
39
|
voice: import_models.TTSDefaultVoiceId,
|
|
40
40
|
apiKey: process.env.CARTESIA_API_KEY,
|
|
41
41
|
language: "en",
|
|
42
|
-
baseUrl: "https://api.cartesia.ai"
|
|
42
|
+
baseUrl: "https://api.cartesia.ai",
|
|
43
|
+
chunkTimeout: 5e3
|
|
43
44
|
};
|
|
44
45
|
class TTS extends import_agents.tts.TTS {
|
|
45
46
|
#opts;
|
|
@@ -203,6 +204,13 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
203
204
|
lastFrame = void 0;
|
|
204
205
|
}
|
|
205
206
|
};
|
|
207
|
+
let timeout = null;
|
|
208
|
+
const clearTTSChunkTimeout = () => {
|
|
209
|
+
if (timeout) {
|
|
210
|
+
clearTimeout(timeout);
|
|
211
|
+
timeout = null;
|
|
212
|
+
}
|
|
213
|
+
};
|
|
206
214
|
while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {
|
|
207
215
|
try {
|
|
208
216
|
await new Promise((resolve, reject) => {
|
|
@@ -228,6 +236,13 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
228
236
|
sendLastFrame(segmentId, false);
|
|
229
237
|
lastFrame = frame;
|
|
230
238
|
}
|
|
239
|
+
clearTTSChunkTimeout();
|
|
240
|
+
timeout = setTimeout(() => {
|
|
241
|
+
this.#logger.error(
|
|
242
|
+
`Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`
|
|
243
|
+
);
|
|
244
|
+
ws2.close();
|
|
245
|
+
}, this.#opts.chunkTimeout);
|
|
231
246
|
} else if ("done" in json) {
|
|
232
247
|
finalReceived = true;
|
|
233
248
|
for (const frame of bstream.flush()) {
|
|
@@ -241,6 +256,7 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
241
256
|
if (segmentId === requestId) {
|
|
242
257
|
closing = true;
|
|
243
258
|
shouldExit = true;
|
|
259
|
+
clearTTSChunkTimeout();
|
|
244
260
|
ws2.close();
|
|
245
261
|
}
|
|
246
262
|
}
|
|
@@ -249,6 +265,7 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
249
265
|
if (err instanceof Error && !err.message.includes("WebSocket closed")) {
|
|
250
266
|
this.#logger.error({ err }, "Error in recvTask from Cartesia WebSocket");
|
|
251
267
|
}
|
|
268
|
+
clearTTSChunkTimeout();
|
|
252
269
|
break;
|
|
253
270
|
}
|
|
254
271
|
}
|
package/dist/tts.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n break;\n }\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAA+D;AAE/D,wBAAwB;AACxB,gBAAwC;AACxC,oBAMO;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AACX;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,aAAa,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,oBAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n chunkTimeout: 5000,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n this.#logger.error(\n `Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n clearTTSChunkTimeout();\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n clearTTSChunkTimeout();\n break;\n }\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAA+D;AAE/D,wBAAwB;AACxB,gBAAwC;AACxC,oBAMO;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAmB7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,cAAc;AAChB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,aAAa,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAKA,mCAAqB;AACrB,wBAAU,WAAW,MAAM;AACzB,qBAAK,QAAQ;AAAA,kBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,gBAC9E;AACA,gBAAAA,IAAG,MAAM;AAAA,cACX,GAAG,KAAK,MAAM,YAAY;AAAA,YAC5B,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AACA,+BAAqB;AACrB;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,oBAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
|
package/dist/tts.d.cts
CHANGED
|
@@ -10,6 +10,10 @@ export interface TTSOptions {
|
|
|
10
10
|
apiKey?: string;
|
|
11
11
|
language: string;
|
|
12
12
|
baseUrl: string;
|
|
13
|
+
/**
|
|
14
|
+
* The timeout for the next chunk to be received from the Cartesia API.
|
|
15
|
+
*/
|
|
16
|
+
chunkTimeout: number;
|
|
13
17
|
}
|
|
14
18
|
export declare class TTS extends tts.TTS {
|
|
15
19
|
#private;
|
package/dist/tts.d.ts
CHANGED
|
@@ -10,6 +10,10 @@ export interface TTSOptions {
|
|
|
10
10
|
apiKey?: string;
|
|
11
11
|
language: string;
|
|
12
12
|
baseUrl: string;
|
|
13
|
+
/**
|
|
14
|
+
* The timeout for the next chunk to be received from the Cartesia API.
|
|
15
|
+
*/
|
|
16
|
+
chunkTimeout: number;
|
|
13
17
|
}
|
|
14
18
|
export declare class TTS extends tts.TTS {
|
|
15
19
|
#private;
|
package/dist/tts.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EAA6C,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAIjF,OAAO,EAEL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,eAAe,EACpB,KAAK,aAAa,EACnB,MAAM,aAAa,CAAC;AAQrB,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,SAAS,GAAG,MAAM,CAAC;IAC1B,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IAC/B,OAAO,CAAC,EAAE,CAAC,eAAe,GAAG,MAAM,CAAC,EAAE,CAAC;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EAA6C,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAIjF,OAAO,EAEL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,eAAe,EACpB,KAAK,aAAa,EACnB,MAAM,aAAa,CAAC;AAQrB,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,SAAS,GAAG,MAAM,CAAC;IAC1B,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IAC/B,OAAO,CAAC,EAAE,CAAC,eAAe,GAAG,MAAM,CAAC,EAAE,CAAC;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAEhB;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC;CACtB;AAaD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAyB1C,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAYvC,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAAC,aAAa;IAI3C,MAAM,IAAI,gBAAgB;CAG3B;AAED,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;;IAClD,KAAK,SAA4B;gBAKrB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU;cAMpC,GAAG;CA8CpB;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAMxD,KAAK,SAA+B;gBAExB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;IAKtC,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;cAWvB,GAAG;CAiJpB"}
|
package/dist/tts.js
CHANGED
|
@@ -16,7 +16,8 @@ const defaultTTSOptions = {
|
|
|
16
16
|
voice: TTSDefaultVoiceId,
|
|
17
17
|
apiKey: process.env.CARTESIA_API_KEY,
|
|
18
18
|
language: "en",
|
|
19
|
-
baseUrl: "https://api.cartesia.ai"
|
|
19
|
+
baseUrl: "https://api.cartesia.ai",
|
|
20
|
+
chunkTimeout: 5e3
|
|
20
21
|
};
|
|
21
22
|
class TTS extends tts.TTS {
|
|
22
23
|
#opts;
|
|
@@ -180,6 +181,13 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
180
181
|
lastFrame = void 0;
|
|
181
182
|
}
|
|
182
183
|
};
|
|
184
|
+
let timeout = null;
|
|
185
|
+
const clearTTSChunkTimeout = () => {
|
|
186
|
+
if (timeout) {
|
|
187
|
+
clearTimeout(timeout);
|
|
188
|
+
timeout = null;
|
|
189
|
+
}
|
|
190
|
+
};
|
|
183
191
|
while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {
|
|
184
192
|
try {
|
|
185
193
|
await new Promise((resolve, reject) => {
|
|
@@ -205,6 +213,13 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
205
213
|
sendLastFrame(segmentId, false);
|
|
206
214
|
lastFrame = frame;
|
|
207
215
|
}
|
|
216
|
+
clearTTSChunkTimeout();
|
|
217
|
+
timeout = setTimeout(() => {
|
|
218
|
+
this.#logger.error(
|
|
219
|
+
`Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`
|
|
220
|
+
);
|
|
221
|
+
ws2.close();
|
|
222
|
+
}, this.#opts.chunkTimeout);
|
|
208
223
|
} else if ("done" in json) {
|
|
209
224
|
finalReceived = true;
|
|
210
225
|
for (const frame of bstream.flush()) {
|
|
@@ -218,6 +233,7 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
218
233
|
if (segmentId === requestId) {
|
|
219
234
|
closing = true;
|
|
220
235
|
shouldExit = true;
|
|
236
|
+
clearTTSChunkTimeout();
|
|
221
237
|
ws2.close();
|
|
222
238
|
}
|
|
223
239
|
}
|
|
@@ -226,6 +242,7 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
226
242
|
if (err instanceof Error && !err.message.includes("WebSocket closed")) {
|
|
227
243
|
this.#logger.error({ err }, "Error in recvTask from Cartesia WebSocket");
|
|
228
244
|
}
|
|
245
|
+
clearTTSChunkTimeout();
|
|
229
246
|
break;
|
|
230
247
|
}
|
|
231
248
|
}
|
package/dist/tts.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n break;\n }\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":"AAGA,SAAS,iBAAiB,KAAK,WAAW,UAAU,WAAW;AAE/D,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AACxC;AAAA,EACE;AAAA,OAKK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AACX;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,UAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n chunkTimeout: 5000,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n this.#logger.error(\n `Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n clearTTSChunkTimeout();\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n clearTTSChunkTimeout();\n break;\n }\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":"AAGA,SAAS,iBAAiB,KAAK,WAAW,UAAU,WAAW;AAE/D,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AACxC;AAAA,EACE;AAAA,OAKK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAmB7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,cAAc;AAChB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAKA,mCAAqB;AACrB,wBAAU,WAAW,MAAM;AACzB,qBAAK,QAAQ;AAAA,kBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,gBAC9E;AACA,gBAAAA,IAAG,MAAM;AAAA,cACX,GAAG,KAAK,MAAM,YAAY;AAAA,YAC5B,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AACA,+BAAqB;AACrB;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,UAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-cartesia",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.2",
|
|
4
4
|
"description": "Cartesia plugin for LiveKit Node Agents",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -30,16 +30,16 @@
|
|
|
30
30
|
"@types/ws": "^8.5.10",
|
|
31
31
|
"tsup": "^8.3.5",
|
|
32
32
|
"typescript": "^5.0.0",
|
|
33
|
-
"@livekit/agents": "1.0.
|
|
34
|
-
"@livekit/agents-plugin-openai": "1.0.
|
|
35
|
-
"@livekit/agents-plugins-test": "1.0.
|
|
33
|
+
"@livekit/agents": "1.0.2",
|
|
34
|
+
"@livekit/agents-plugin-openai": "1.0.2",
|
|
35
|
+
"@livekit/agents-plugins-test": "1.0.2"
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"ws": "^8.16.0"
|
|
39
39
|
},
|
|
40
40
|
"peerDependencies": {
|
|
41
41
|
"@livekit/rtc-node": "^0.13.12",
|
|
42
|
-
"@livekit/agents": "1.0.
|
|
42
|
+
"@livekit/agents": "1.0.2"
|
|
43
43
|
},
|
|
44
44
|
"scripts": {
|
|
45
45
|
"build": "tsup --onSuccess \"pnpm build:types\"",
|
package/src/tts.ts
CHANGED
|
@@ -29,6 +29,11 @@ export interface TTSOptions {
|
|
|
29
29
|
apiKey?: string;
|
|
30
30
|
language: string;
|
|
31
31
|
baseUrl: string;
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* The timeout for the next chunk to be received from the Cartesia API.
|
|
35
|
+
*/
|
|
36
|
+
chunkTimeout: number;
|
|
32
37
|
}
|
|
33
38
|
|
|
34
39
|
const defaultTTSOptions: TTSOptions = {
|
|
@@ -39,6 +44,7 @@ const defaultTTSOptions: TTSOptions = {
|
|
|
39
44
|
apiKey: process.env.CARTESIA_API_KEY,
|
|
40
45
|
language: 'en',
|
|
41
46
|
baseUrl: 'https://api.cartesia.ai',
|
|
47
|
+
chunkTimeout: 5000,
|
|
42
48
|
};
|
|
43
49
|
|
|
44
50
|
export class TTS extends tts.TTS {
|
|
@@ -227,6 +233,15 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
227
233
|
}
|
|
228
234
|
};
|
|
229
235
|
|
|
236
|
+
let timeout: NodeJS.Timeout | null = null;
|
|
237
|
+
|
|
238
|
+
const clearTTSChunkTimeout = () => {
|
|
239
|
+
if (timeout) {
|
|
240
|
+
clearTimeout(timeout);
|
|
241
|
+
timeout = null;
|
|
242
|
+
}
|
|
243
|
+
};
|
|
244
|
+
|
|
230
245
|
while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {
|
|
231
246
|
try {
|
|
232
247
|
await new Promise<RawData | null>((resolve, reject) => {
|
|
@@ -254,6 +269,17 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
254
269
|
sendLastFrame(segmentId, false);
|
|
255
270
|
lastFrame = frame;
|
|
256
271
|
}
|
|
272
|
+
|
|
273
|
+
// IMPORTANT: close WS if TTS chunk stream been stuck too long
|
|
274
|
+
// this allows unblock the current "broken" TTS node so that any future TTS nodes
|
|
275
|
+
// can continue to process the stream without been blocked by the stuck node
|
|
276
|
+
clearTTSChunkTimeout();
|
|
277
|
+
timeout = setTimeout(() => {
|
|
278
|
+
this.#logger.error(
|
|
279
|
+
`Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`,
|
|
280
|
+
);
|
|
281
|
+
ws.close();
|
|
282
|
+
}, this.#opts.chunkTimeout);
|
|
257
283
|
} else if ('done' in json) {
|
|
258
284
|
finalReceived = true;
|
|
259
285
|
for (const frame of bstream.flush()) {
|
|
@@ -268,6 +294,7 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
268
294
|
if (segmentId === requestId) {
|
|
269
295
|
closing = true;
|
|
270
296
|
shouldExit = true;
|
|
297
|
+
clearTTSChunkTimeout();
|
|
271
298
|
ws.close();
|
|
272
299
|
}
|
|
273
300
|
}
|
|
@@ -277,6 +304,7 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
277
304
|
if (err instanceof Error && !err.message.includes('WebSocket closed')) {
|
|
278
305
|
this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');
|
|
279
306
|
}
|
|
307
|
+
clearTTSChunkTimeout();
|
|
280
308
|
break;
|
|
281
309
|
}
|
|
282
310
|
}
|