@livekit/agents-plugin-cartesia 1.0.25 → 1.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/tts.cjs +19 -6
- package/dist/tts.cjs.map +1 -1
- package/dist/tts.d.cts +3 -3
- package/dist/tts.d.ts +3 -3
- package/dist/tts.d.ts.map +1 -1
- package/dist/tts.js +27 -7
- package/dist/tts.js.map +1 -1
- package/package.json +7 -7
- package/src/tts.ts +39 -6
package/dist/tts.cjs
CHANGED
|
@@ -76,8 +76,8 @@ class TTS extends import_agents.tts.TTS {
|
|
|
76
76
|
);
|
|
77
77
|
}
|
|
78
78
|
}
|
|
79
|
-
synthesize(text) {
|
|
80
|
-
return new ChunkedStream(this, text, this.#opts);
|
|
79
|
+
synthesize(text, connOptions, abortSignal) {
|
|
80
|
+
return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);
|
|
81
81
|
}
|
|
82
82
|
stream() {
|
|
83
83
|
return new SynthesizeStream(this, this.#opts);
|
|
@@ -85,11 +85,11 @@ class TTS extends import_agents.tts.TTS {
|
|
|
85
85
|
}
|
|
86
86
|
class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
87
87
|
label = "cartesia.ChunkedStream";
|
|
88
|
+
#logger = (0, import_agents.log)();
|
|
88
89
|
#opts;
|
|
89
90
|
#text;
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
super(text, tts2);
|
|
91
|
+
constructor(tts2, text, opts, connOptions, abortSignal) {
|
|
92
|
+
super(text, tts2, connOptions, abortSignal);
|
|
93
93
|
this.#text = text;
|
|
94
94
|
this.#opts = opts;
|
|
95
95
|
}
|
|
@@ -99,6 +99,7 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
|
99
99
|
const json = toCartesiaOptions(this.#opts);
|
|
100
100
|
json.transcript = this.#text;
|
|
101
101
|
const baseUrl = new URL(this.#opts.baseUrl);
|
|
102
|
+
const doneFut = new import_agents.Future();
|
|
102
103
|
const req = (0, import_node_https.request)(
|
|
103
104
|
{
|
|
104
105
|
hostname: baseUrl.hostname,
|
|
@@ -108,7 +109,8 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
|
108
109
|
headers: {
|
|
109
110
|
[AUTHORIZATION_HEADER]: this.#opts.apiKey,
|
|
110
111
|
[VERSION_HEADER]: VERSION
|
|
111
|
-
}
|
|
112
|
+
},
|
|
113
|
+
signal: this.abortSignal
|
|
112
114
|
},
|
|
113
115
|
(res) => {
|
|
114
116
|
res.on("data", (chunk) => {
|
|
@@ -131,11 +133,22 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
|
131
133
|
});
|
|
132
134
|
}
|
|
133
135
|
this.queue.close();
|
|
136
|
+
doneFut.resolve();
|
|
137
|
+
});
|
|
138
|
+
res.on("error", (err) => {
|
|
139
|
+
if (err.message === "aborted") return;
|
|
140
|
+
this.#logger.error({ err }, "Cartesia TTS response error");
|
|
134
141
|
});
|
|
135
142
|
}
|
|
136
143
|
);
|
|
144
|
+
req.on("error", (err) => {
|
|
145
|
+
if (err.name === "AbortError") return;
|
|
146
|
+
this.#logger.error({ err }, "Cartesia TTS request error");
|
|
147
|
+
});
|
|
148
|
+
req.on("close", () => doneFut.resolve());
|
|
137
149
|
req.write(JSON.stringify(json));
|
|
138
150
|
req.end();
|
|
151
|
+
await doneFut.await;
|
|
139
152
|
}
|
|
140
153
|
}
|
|
141
154
|
class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
package/dist/tts.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n chunkTimeout: 5000,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.debug(`WebSocket closed with code ${code}: ${reason}`);\n }\n\n clearTTSChunkTimeout();\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n // cartesia chunk timeout quite often, so we make it a debug log\n this.#logger.debug(\n `Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n clearTTSChunkTimeout();\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n if (err.message.includes('Queue is closed')) {\n this.#logger.warn(\n { err },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n } else {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n }\n clearTTSChunkTimeout();\n break;\n }\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAA+D;AAE/D,wBAAwB;AACxB,gBAAwC;AACxC,oBAMO;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAmB7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,cAAc;AAChB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,aAAa,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AAEA,mCAAqB;AACrB,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAKA,mCAAqB;AACrB,wBAAU,WAAW,MAAM;AAEzB,qBAAK,QAAQ;AAAA,kBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,gBAC9E;AACA,gBAAAA,IAAG,MAAM;AAAA,cACX,GAAG,KAAK,MAAM,YAAY;AAAA,YAC5B,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,gBAAI,IAAI,QAAQ,SAAS,iBAAiB,GAAG;AAC3C,mBAAK,QAAQ;AAAA,gBACX,EAAE,IAAI;AAAA,gBACN;AAAA,cACF;AAAA,YACF,OAAO;AACL,mBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,YACzE;AAAA,UACF;AACA,+BAAqB;AACrB;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,oBAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n Future,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n chunkTimeout: 5000,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #logger = log();\n #opts: TTSOptions;\n #text: string;\n\n constructor(\n tts: TTS,\n text: string,\n opts: TTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const doneFut = new Future<void>();\n\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n signal: this.abortSignal,\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n doneFut.resolve();\n });\n res.on('error', (err) => {\n if (err.message === 'aborted') return;\n this.#logger.error({ err }, 'Cartesia TTS response error');\n });\n },\n );\n\n req.on('error', (err) => {\n if (err.name === 'AbortError') return;\n this.#logger.error({ err }, 'Cartesia TTS request error');\n });\n req.on('close', () => doneFut.resolve());\n req.write(JSON.stringify(json));\n req.end();\n\n await doneFut.await;\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.debug(`WebSocket closed with code ${code}: ${reason}`);\n }\n\n clearTTSChunkTimeout();\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n // cartesia chunk timeout quite often, so we make it a debug log\n this.#logger.debug(\n `Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n clearTTSChunkTimeout();\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n if (err.message.includes('Queue is closed')) {\n this.#logger.warn(\n { err },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n } else {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n }\n clearTTSChunkTimeout();\n break;\n }\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAQO;AAEP,wBAAwB;AACxB,gBAAwC;AACxC,oBAMO;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAmB7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,cAAc;AAChB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WACE,MACA,aACA,aACmB;AACnB,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,OAAO,aAAa,WAAW;AAAA,EAC3E;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR,cAAU,mBAAI;AAAA,EACd;AAAA,EACA;AAAA,EAEA,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAU,IAAI,qBAAa;AAEjC,UAAM,UAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,QACA,QAAQ,KAAK;AAAA,MACf;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AACjB,kBAAQ,QAAQ;AAAA,QAClB,CAAC;AACD,YAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,cAAI,IAAI,YAAY,UAAW;AAC/B,eAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6BAA6B;AAAA,QAC3D,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,UAAI,IAAI,SAAS,aAAc;AAC/B,WAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,4BAA4B;AAAA,IAC1D,CAAC;AACD,QAAI,GAAG,SAAS,MAAM,QAAQ,QAAQ,CAAC;AACvC,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAER,UAAM,QAAQ;AAAA,EAChB;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,aAAa,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AAEA,mCAAqB;AACrB,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAKA,mCAAqB;AACrB,wBAAU,WAAW,MAAM;AAEzB,qBAAK,QAAQ;AAAA,kBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,gBAC9E;AACA,gBAAAA,IAAG,MAAM;AAAA,cACX,GAAG,KAAK,MAAM,YAAY;AAAA,YAC5B,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,gBAAI,IAAI,QAAQ,SAAS,iBAAiB,GAAG;AAC3C,mBAAK,QAAQ;AAAA,gBACX,EAAE,IAAI;AAAA,gBACN;AAAA,cACF;AAAA,YACF,OAAO;AACL,mBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,YACzE;AAAA,UACF;AACA,+BAAqB;AACrB;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,oBAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
|
package/dist/tts.d.cts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { tts } from '@livekit/agents';
|
|
1
|
+
import { type APIConnectOptions, tts } from '@livekit/agents';
|
|
2
2
|
import { type TTSEncoding, type TTSModels, type TTSVoiceEmotion, type TTSVoiceSpeed } from './models.js';
|
|
3
3
|
export interface TTSOptions {
|
|
4
4
|
model: TTSModels | string;
|
|
@@ -20,13 +20,13 @@ export declare class TTS extends tts.TTS {
|
|
|
20
20
|
label: string;
|
|
21
21
|
constructor(opts?: Partial<TTSOptions>);
|
|
22
22
|
updateOptions(opts: Partial<TTSOptions>): void;
|
|
23
|
-
synthesize(text: string): tts.ChunkedStream;
|
|
23
|
+
synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): tts.ChunkedStream;
|
|
24
24
|
stream(): SynthesizeStream;
|
|
25
25
|
}
|
|
26
26
|
export declare class ChunkedStream extends tts.ChunkedStream {
|
|
27
27
|
#private;
|
|
28
28
|
label: string;
|
|
29
|
-
constructor(tts: TTS, text: string, opts: TTSOptions);
|
|
29
|
+
constructor(tts: TTS, text: string, opts: TTSOptions, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
|
|
30
30
|
protected run(): Promise<void>;
|
|
31
31
|
}
|
|
32
32
|
export declare class SynthesizeStream extends tts.SynthesizeStream {
|
package/dist/tts.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { tts } from '@livekit/agents';
|
|
1
|
+
import { type APIConnectOptions, tts } from '@livekit/agents';
|
|
2
2
|
import { type TTSEncoding, type TTSModels, type TTSVoiceEmotion, type TTSVoiceSpeed } from './models.js';
|
|
3
3
|
export interface TTSOptions {
|
|
4
4
|
model: TTSModels | string;
|
|
@@ -20,13 +20,13 @@ export declare class TTS extends tts.TTS {
|
|
|
20
20
|
label: string;
|
|
21
21
|
constructor(opts?: Partial<TTSOptions>);
|
|
22
22
|
updateOptions(opts: Partial<TTSOptions>): void;
|
|
23
|
-
synthesize(text: string): tts.ChunkedStream;
|
|
23
|
+
synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): tts.ChunkedStream;
|
|
24
24
|
stream(): SynthesizeStream;
|
|
25
25
|
}
|
|
26
26
|
export declare class ChunkedStream extends tts.ChunkedStream {
|
|
27
27
|
#private;
|
|
28
28
|
label: string;
|
|
29
|
-
constructor(tts: TTS, text: string, opts: TTSOptions);
|
|
29
|
+
constructor(tts: TTS, text: string, opts: TTSOptions, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
|
|
30
30
|
protected run(): Promise<void>;
|
|
31
31
|
}
|
|
32
32
|
export declare class SynthesizeStream extends tts.SynthesizeStream {
|
package/dist/tts.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,
|
|
1
|
+
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EAMtB,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,EAEL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,eAAe,EACpB,KAAK,aAAa,EACnB,MAAM,aAAa,CAAC;AAQrB,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,SAAS,GAAG,MAAM,CAAC;IAC1B,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IAC/B,OAAO,CAAC,EAAE,CAAC,eAAe,GAAG,MAAM,CAAC,EAAE,CAAC;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAEhB;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC;CACtB;AAaD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAyB1C,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAYvC,UAAU,CACR,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW,GACxB,GAAG,CAAC,aAAa;IAIpB,MAAM,IAAI,gBAAgB;CAG3B;AAED,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;;IAClD,KAAK,SAA4B;gBAM/B,GAAG,EAAE,GAAG,EACR,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,UAAU,EAChB,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW;cAOX,GAAG;CA6DpB;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAMxD,KAAK,SAA+B;gBAExB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;IAKtC,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;cAWvB,GAAG;CA2JpB"}
|
package/dist/tts.js
CHANGED
|
@@ -1,4 +1,11 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
AudioByteStream,
|
|
3
|
+
Future,
|
|
4
|
+
log,
|
|
5
|
+
shortuuid,
|
|
6
|
+
tokenize,
|
|
7
|
+
tts
|
|
8
|
+
} from "@livekit/agents";
|
|
2
9
|
import { request } from "node:https";
|
|
3
10
|
import { WebSocket } from "ws";
|
|
4
11
|
import {
|
|
@@ -53,8 +60,8 @@ class TTS extends tts.TTS {
|
|
|
53
60
|
);
|
|
54
61
|
}
|
|
55
62
|
}
|
|
56
|
-
synthesize(text) {
|
|
57
|
-
return new ChunkedStream(this, text, this.#opts);
|
|
63
|
+
synthesize(text, connOptions, abortSignal) {
|
|
64
|
+
return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);
|
|
58
65
|
}
|
|
59
66
|
stream() {
|
|
60
67
|
return new SynthesizeStream(this, this.#opts);
|
|
@@ -62,11 +69,11 @@ class TTS extends tts.TTS {
|
|
|
62
69
|
}
|
|
63
70
|
class ChunkedStream extends tts.ChunkedStream {
|
|
64
71
|
label = "cartesia.ChunkedStream";
|
|
72
|
+
#logger = log();
|
|
65
73
|
#opts;
|
|
66
74
|
#text;
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
super(text, tts2);
|
|
75
|
+
constructor(tts2, text, opts, connOptions, abortSignal) {
|
|
76
|
+
super(text, tts2, connOptions, abortSignal);
|
|
70
77
|
this.#text = text;
|
|
71
78
|
this.#opts = opts;
|
|
72
79
|
}
|
|
@@ -76,6 +83,7 @@ class ChunkedStream extends tts.ChunkedStream {
|
|
|
76
83
|
const json = toCartesiaOptions(this.#opts);
|
|
77
84
|
json.transcript = this.#text;
|
|
78
85
|
const baseUrl = new URL(this.#opts.baseUrl);
|
|
86
|
+
const doneFut = new Future();
|
|
79
87
|
const req = request(
|
|
80
88
|
{
|
|
81
89
|
hostname: baseUrl.hostname,
|
|
@@ -85,7 +93,8 @@ class ChunkedStream extends tts.ChunkedStream {
|
|
|
85
93
|
headers: {
|
|
86
94
|
[AUTHORIZATION_HEADER]: this.#opts.apiKey,
|
|
87
95
|
[VERSION_HEADER]: VERSION
|
|
88
|
-
}
|
|
96
|
+
},
|
|
97
|
+
signal: this.abortSignal
|
|
89
98
|
},
|
|
90
99
|
(res) => {
|
|
91
100
|
res.on("data", (chunk) => {
|
|
@@ -108,11 +117,22 @@ class ChunkedStream extends tts.ChunkedStream {
|
|
|
108
117
|
});
|
|
109
118
|
}
|
|
110
119
|
this.queue.close();
|
|
120
|
+
doneFut.resolve();
|
|
121
|
+
});
|
|
122
|
+
res.on("error", (err) => {
|
|
123
|
+
if (err.message === "aborted") return;
|
|
124
|
+
this.#logger.error({ err }, "Cartesia TTS response error");
|
|
111
125
|
});
|
|
112
126
|
}
|
|
113
127
|
);
|
|
128
|
+
req.on("error", (err) => {
|
|
129
|
+
if (err.name === "AbortError") return;
|
|
130
|
+
this.#logger.error({ err }, "Cartesia TTS request error");
|
|
131
|
+
});
|
|
132
|
+
req.on("close", () => doneFut.resolve());
|
|
114
133
|
req.write(JSON.stringify(json));
|
|
115
134
|
req.end();
|
|
135
|
+
await doneFut.await;
|
|
116
136
|
}
|
|
117
137
|
}
|
|
118
138
|
class SynthesizeStream extends tts.SynthesizeStream {
|
package/dist/tts.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n chunkTimeout: 5000,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.debug(`WebSocket closed with code ${code}: ${reason}`);\n }\n\n clearTTSChunkTimeout();\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n // cartesia chunk timeout quite often, so we make it a debug log\n this.#logger.debug(\n `Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n clearTTSChunkTimeout();\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n if (err.message.includes('Queue is closed')) {\n this.#logger.warn(\n { err },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n } else {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n }\n clearTTSChunkTimeout();\n break;\n }\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":"AAGA,SAAS,iBAAiB,KAAK,WAAW,UAAU,WAAW;AAE/D,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AACxC;AAAA,EACE;AAAA,OAKK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAmB7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,cAAc;AAChB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AAEA,mCAAqB;AACrB,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAKA,mCAAqB;AACrB,wBAAU,WAAW,MAAM;AAEzB,qBAAK,QAAQ;AAAA,kBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,gBAC9E;AACA,gBAAAA,IAAG,MAAM;AAAA,cACX,GAAG,KAAK,MAAM,YAAY;AAAA,YAC5B,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,gBAAI,IAAI,QAAQ,SAAS,iBAAiB,GAAG;AAC3C,mBAAK,QAAQ;AAAA,gBACX,EAAE,IAAI;AAAA,gBACN;AAAA,cACF;AAAA,YACF,OAAO;AACL,mBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,YACzE;AAAA,UACF;AACA,+BAAqB;AACrB;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,UAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n Future,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n chunkTimeout: 5000,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #logger = log();\n #opts: TTSOptions;\n #text: string;\n\n constructor(\n tts: TTS,\n text: string,\n opts: TTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const doneFut = new Future<void>();\n\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n signal: this.abortSignal,\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n doneFut.resolve();\n });\n res.on('error', (err) => {\n if (err.message === 'aborted') return;\n this.#logger.error({ err }, 'Cartesia TTS response error');\n });\n },\n );\n\n req.on('error', (err) => {\n if (err.name === 'AbortError') return;\n this.#logger.error({ err }, 'Cartesia TTS request error');\n });\n req.on('close', () => doneFut.resolve());\n req.write(JSON.stringify(json));\n req.end();\n\n await doneFut.await;\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.debug(`WebSocket closed with code ${code}: ${reason}`);\n }\n\n clearTTSChunkTimeout();\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n // cartesia chunk timeout quite often, so we make it a debug log\n this.#logger.debug(\n `Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n clearTTSChunkTimeout();\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n if (err.message.includes('Queue is closed')) {\n this.#logger.warn(\n { err },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n } else {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n }\n clearTTSChunkTimeout();\n break;\n }\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AACxC;AAAA,EACE;AAAA,OAKK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAmB7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,cAAc;AAChB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WACE,MACA,aACA,aACmB;AACnB,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,OAAO,aAAa,WAAW;AAAA,EAC3E;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR,UAAU,IAAI;AAAA,EACd;AAAA,EACA;AAAA,EAEA,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAU,IAAI,OAAa;AAEjC,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,QACA,QAAQ,KAAK;AAAA,MACf;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AACjB,kBAAQ,QAAQ;AAAA,QAClB,CAAC;AACD,YAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,cAAI,IAAI,YAAY,UAAW;AAC/B,eAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6BAA6B;AAAA,QAC3D,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,UAAI,IAAI,SAAS,aAAc;AAC/B,WAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,4BAA4B;AAAA,IAC1D,CAAC;AACD,QAAI,GAAG,SAAS,MAAM,QAAQ,QAAQ,CAAC;AACvC,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAER,UAAM,QAAQ;AAAA,EAChB;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AAEA,mCAAqB;AACrB,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAKA,mCAAqB;AACrB,wBAAU,WAAW,MAAM;AAEzB,qBAAK,QAAQ;AAAA,kBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,gBAC9E;AACA,gBAAAA,IAAG,MAAM;AAAA,cACX,GAAG,KAAK,MAAM,YAAY;AAAA,YAC5B,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,gBAAI,IAAI,QAAQ,SAAS,iBAAiB,GAAG;AAC3C,mBAAK,QAAQ;AAAA,gBACX,EAAE,IAAI;AAAA,gBACN;AAAA,cACF;AAAA,YACF,OAAO;AACL,mBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,YACzE;AAAA,UACF;AACA,+BAAqB;AACrB;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,UAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-cartesia",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.30",
|
|
4
4
|
"description": "Cartesia plugin for LiveKit Node Agents",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -25,21 +25,21 @@
|
|
|
25
25
|
"README.md"
|
|
26
26
|
],
|
|
27
27
|
"devDependencies": {
|
|
28
|
-
"@livekit/rtc-node": "^0.13.
|
|
28
|
+
"@livekit/rtc-node": "^0.13.22",
|
|
29
29
|
"@microsoft/api-extractor": "^7.35.0",
|
|
30
30
|
"@types/ws": "^8.5.10",
|
|
31
31
|
"tsup": "^8.3.5",
|
|
32
32
|
"typescript": "^5.0.0",
|
|
33
|
-
"@livekit/agents": "1.0.
|
|
34
|
-
"@livekit/agents-plugin-openai": "1.0.
|
|
35
|
-
"@livekit/agents-plugins-test": "1.0.
|
|
33
|
+
"@livekit/agents": "1.0.30",
|
|
34
|
+
"@livekit/agents-plugin-openai": "1.0.30",
|
|
35
|
+
"@livekit/agents-plugins-test": "1.0.30"
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"ws": "^8.16.0"
|
|
39
39
|
},
|
|
40
40
|
"peerDependencies": {
|
|
41
|
-
"@livekit/rtc-node": "^0.13.
|
|
42
|
-
"@livekit/agents": "1.0.
|
|
41
|
+
"@livekit/rtc-node": "^0.13.22",
|
|
42
|
+
"@livekit/agents": "1.0.30"
|
|
43
43
|
},
|
|
44
44
|
"scripts": {
|
|
45
45
|
"build": "tsup --onSuccess \"pnpm build:types\"",
|
package/src/tts.ts
CHANGED
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
type APIConnectOptions,
|
|
6
|
+
AudioByteStream,
|
|
7
|
+
Future,
|
|
8
|
+
log,
|
|
9
|
+
shortuuid,
|
|
10
|
+
tokenize,
|
|
11
|
+
tts,
|
|
12
|
+
} from '@livekit/agents';
|
|
5
13
|
import type { AudioFrame } from '@livekit/rtc-node';
|
|
6
14
|
import { request } from 'node:https';
|
|
7
15
|
import { type RawData, WebSocket } from 'ws';
|
|
@@ -88,8 +96,12 @@ export class TTS extends tts.TTS {
|
|
|
88
96
|
}
|
|
89
97
|
}
|
|
90
98
|
|
|
91
|
-
synthesize(
|
|
92
|
-
|
|
99
|
+
synthesize(
|
|
100
|
+
text: string,
|
|
101
|
+
connOptions?: APIConnectOptions,
|
|
102
|
+
abortSignal?: AbortSignal,
|
|
103
|
+
): tts.ChunkedStream {
|
|
104
|
+
return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);
|
|
93
105
|
}
|
|
94
106
|
|
|
95
107
|
stream(): SynthesizeStream {
|
|
@@ -99,12 +111,18 @@ export class TTS extends tts.TTS {
|
|
|
99
111
|
|
|
100
112
|
export class ChunkedStream extends tts.ChunkedStream {
|
|
101
113
|
label = 'cartesia.ChunkedStream';
|
|
114
|
+
#logger = log();
|
|
102
115
|
#opts: TTSOptions;
|
|
103
116
|
#text: string;
|
|
104
117
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
118
|
+
constructor(
|
|
119
|
+
tts: TTS,
|
|
120
|
+
text: string,
|
|
121
|
+
opts: TTSOptions,
|
|
122
|
+
connOptions?: APIConnectOptions,
|
|
123
|
+
abortSignal?: AbortSignal,
|
|
124
|
+
) {
|
|
125
|
+
super(text, tts, connOptions, abortSignal);
|
|
108
126
|
this.#text = text;
|
|
109
127
|
this.#opts = opts;
|
|
110
128
|
}
|
|
@@ -116,6 +134,8 @@ export class ChunkedStream extends tts.ChunkedStream {
|
|
|
116
134
|
json.transcript = this.#text;
|
|
117
135
|
|
|
118
136
|
const baseUrl = new URL(this.#opts.baseUrl);
|
|
137
|
+
const doneFut = new Future<void>();
|
|
138
|
+
|
|
119
139
|
const req = request(
|
|
120
140
|
{
|
|
121
141
|
hostname: baseUrl.hostname,
|
|
@@ -126,6 +146,7 @@ export class ChunkedStream extends tts.ChunkedStream {
|
|
|
126
146
|
[AUTHORIZATION_HEADER]: this.#opts.apiKey!,
|
|
127
147
|
[VERSION_HEADER]: VERSION,
|
|
128
148
|
},
|
|
149
|
+
signal: this.abortSignal,
|
|
129
150
|
},
|
|
130
151
|
(res) => {
|
|
131
152
|
res.on('data', (chunk) => {
|
|
@@ -148,12 +169,24 @@ export class ChunkedStream extends tts.ChunkedStream {
|
|
|
148
169
|
});
|
|
149
170
|
}
|
|
150
171
|
this.queue.close();
|
|
172
|
+
doneFut.resolve();
|
|
173
|
+
});
|
|
174
|
+
res.on('error', (err) => {
|
|
175
|
+
if (err.message === 'aborted') return;
|
|
176
|
+
this.#logger.error({ err }, 'Cartesia TTS response error');
|
|
151
177
|
});
|
|
152
178
|
},
|
|
153
179
|
);
|
|
154
180
|
|
|
181
|
+
req.on('error', (err) => {
|
|
182
|
+
if (err.name === 'AbortError') return;
|
|
183
|
+
this.#logger.error({ err }, 'Cartesia TTS request error');
|
|
184
|
+
});
|
|
185
|
+
req.on('close', () => doneFut.resolve());
|
|
155
186
|
req.write(JSON.stringify(json));
|
|
156
187
|
req.end();
|
|
188
|
+
|
|
189
|
+
await doneFut.await;
|
|
157
190
|
}
|
|
158
191
|
}
|
|
159
192
|
|