@livekit/agents-plugin-cartesia 1.0.39 → 1.0.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/tts.cjs +132 -74
- package/dist/tts.cjs.map +1 -1
- package/dist/tts.d.cts +6 -0
- package/dist/tts.d.ts +6 -0
- package/dist/tts.d.ts.map +1 -1
- package/dist/tts.js +140 -74
- package/dist/tts.js.map +1 -1
- package/dist/types.cjs +124 -0
- package/dist/types.cjs.map +1 -0
- package/dist/types.d.cts +133 -0
- package/dist/types.d.ts +133 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +87 -0
- package/dist/types.js.map +1 -0
- package/package.json +6 -5
- package/src/tts.ts +199 -82
- package/src/types.ts +116 -0
package/dist/tts.js
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import {
|
|
2
2
|
AudioByteStream,
|
|
3
3
|
Future,
|
|
4
|
+
createTimedString,
|
|
4
5
|
log,
|
|
5
6
|
shortuuid,
|
|
7
|
+
stream,
|
|
6
8
|
tokenize,
|
|
7
9
|
tts
|
|
8
10
|
} from "@livekit/agents";
|
|
@@ -11,6 +13,13 @@ import { WebSocket } from "ws";
|
|
|
11
13
|
import {
|
|
12
14
|
TTSDefaultVoiceId
|
|
13
15
|
} from "./models.js";
|
|
16
|
+
import {
|
|
17
|
+
cartesiaMessageSchema,
|
|
18
|
+
hasWordTimestamps,
|
|
19
|
+
isChunkMessage,
|
|
20
|
+
isDoneMessage,
|
|
21
|
+
isErrorMessage
|
|
22
|
+
} from "./types.js";
|
|
14
23
|
const AUTHORIZATION_HEADER = "X-API-Key";
|
|
15
24
|
const VERSION_HEADER = "Cartesia-Version";
|
|
16
25
|
const VERSION = "2024-06-10";
|
|
@@ -24,19 +33,22 @@ const defaultTTSOptions = {
|
|
|
24
33
|
apiKey: process.env.CARTESIA_API_KEY,
|
|
25
34
|
language: "en",
|
|
26
35
|
baseUrl: "https://api.cartesia.ai",
|
|
27
|
-
chunkTimeout: 5e3
|
|
36
|
+
chunkTimeout: 5e3,
|
|
37
|
+
wordTimestamps: true
|
|
28
38
|
};
|
|
29
39
|
class TTS extends tts.TTS {
|
|
30
40
|
#opts;
|
|
31
41
|
label = "cartesia.TTS";
|
|
32
42
|
constructor(opts = {}) {
|
|
33
|
-
|
|
34
|
-
streaming: true
|
|
35
|
-
});
|
|
36
|
-
this.#opts = {
|
|
43
|
+
const resolvedOpts = {
|
|
37
44
|
...defaultTTSOptions,
|
|
38
45
|
...opts
|
|
39
46
|
};
|
|
47
|
+
super(resolvedOpts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {
|
|
48
|
+
streaming: true,
|
|
49
|
+
alignedTranscript: resolvedOpts.wordTimestamps ?? true
|
|
50
|
+
});
|
|
51
|
+
this.#opts = resolvedOpts;
|
|
40
52
|
if (this.#opts.apiKey === void 0) {
|
|
41
53
|
throw new Error(
|
|
42
54
|
"Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY"
|
|
@@ -158,26 +170,26 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
158
170
|
async run() {
|
|
159
171
|
const requestId = shortuuid();
|
|
160
172
|
let closing = false;
|
|
173
|
+
let sentenceStreamClosed = false;
|
|
161
174
|
const sentenceStreamTask = async (ws2) => {
|
|
162
|
-
const packet = toCartesiaOptions(this.#opts);
|
|
175
|
+
const packet = toCartesiaOptions(this.#opts, true);
|
|
163
176
|
for await (const event of this.#tokenizer) {
|
|
164
|
-
|
|
165
|
-
JSON.stringify({
|
|
166
|
-
...packet,
|
|
167
|
-
context_id: requestId,
|
|
168
|
-
transcript: event.token + " ",
|
|
169
|
-
continue: true
|
|
170
|
-
})
|
|
171
|
-
);
|
|
172
|
-
}
|
|
173
|
-
ws2.send(
|
|
174
|
-
JSON.stringify({
|
|
177
|
+
const msg = {
|
|
175
178
|
...packet,
|
|
176
179
|
context_id: requestId,
|
|
177
|
-
transcript: " ",
|
|
178
|
-
continue:
|
|
179
|
-
}
|
|
180
|
-
|
|
180
|
+
transcript: event.token + " ",
|
|
181
|
+
continue: true
|
|
182
|
+
};
|
|
183
|
+
ws2.send(JSON.stringify(msg));
|
|
184
|
+
}
|
|
185
|
+
const endMsg = {
|
|
186
|
+
...packet,
|
|
187
|
+
context_id: requestId,
|
|
188
|
+
transcript: " ",
|
|
189
|
+
continue: false
|
|
190
|
+
};
|
|
191
|
+
ws2.send(JSON.stringify(endMsg));
|
|
192
|
+
sentenceStreamClosed = true;
|
|
181
193
|
};
|
|
182
194
|
const inputTask = async () => {
|
|
183
195
|
for await (const data of this.input) {
|
|
@@ -191,14 +203,21 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
191
203
|
this.#tokenizer.close();
|
|
192
204
|
};
|
|
193
205
|
const recvTask = async (ws2) => {
|
|
194
|
-
let finalReceived = false;
|
|
195
|
-
let shouldExit = false;
|
|
196
206
|
const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
|
|
207
|
+
const eventChannel = stream.createStreamChannel();
|
|
197
208
|
let lastFrame;
|
|
209
|
+
let pendingTimedTranscripts = [];
|
|
198
210
|
const sendLastFrame = (segmentId, final) => {
|
|
199
211
|
if (lastFrame && !this.queue.closed) {
|
|
200
|
-
this.queue.put({
|
|
212
|
+
this.queue.put({
|
|
213
|
+
requestId,
|
|
214
|
+
segmentId,
|
|
215
|
+
frame: lastFrame,
|
|
216
|
+
final,
|
|
217
|
+
timedTranscripts: pendingTimedTranscripts.length > 0 ? pendingTimedTranscripts : void 0
|
|
218
|
+
});
|
|
201
219
|
lastFrame = void 0;
|
|
220
|
+
pendingTimedTranscripts = [];
|
|
202
221
|
}
|
|
203
222
|
};
|
|
204
223
|
let timeout = null;
|
|
@@ -208,41 +227,81 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
208
227
|
timeout = null;
|
|
209
228
|
}
|
|
210
229
|
};
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
230
|
+
const onMessage = (data) => {
|
|
231
|
+
void eventChannel.write(data).catch((error) => {
|
|
232
|
+
this.#logger.debug({ error }, "Failed writing Cartesia event to channel (likely closed)");
|
|
233
|
+
});
|
|
234
|
+
};
|
|
235
|
+
const onClose = (code, reason) => {
|
|
236
|
+
if (!closing) {
|
|
237
|
+
this.#logger.debug(`WebSocket closed with code ${code}: ${reason.toString()}`);
|
|
238
|
+
}
|
|
239
|
+
clearTTSChunkTimeout();
|
|
240
|
+
void eventChannel.close();
|
|
241
|
+
};
|
|
242
|
+
const onError = (err) => {
|
|
243
|
+
this.#logger.error({ err }, "Cartesia WebSocket error");
|
|
244
|
+
void eventChannel.close();
|
|
245
|
+
};
|
|
246
|
+
ws2.on("message", onMessage);
|
|
247
|
+
ws2.on("close", onClose);
|
|
248
|
+
ws2.on("error", onError);
|
|
249
|
+
try {
|
|
250
|
+
const reader = eventChannel.stream().getReader();
|
|
251
|
+
while (!this.closed && !this.abortController.signal.aborted) {
|
|
252
|
+
const result = await reader.read();
|
|
253
|
+
if (result.done) break;
|
|
254
|
+
const rawMsg = result.value;
|
|
255
|
+
let serverMsg;
|
|
256
|
+
try {
|
|
257
|
+
const json = JSON.parse(rawMsg.toString());
|
|
258
|
+
serverMsg = cartesiaMessageSchema.parse(json);
|
|
259
|
+
} catch (parseErr) {
|
|
260
|
+
this.#logger.warn({ parseErr }, "Failed to parse Cartesia message");
|
|
261
|
+
continue;
|
|
262
|
+
}
|
|
263
|
+
if (isErrorMessage(serverMsg)) {
|
|
264
|
+
this.#logger.error({ error: serverMsg.error }, "Cartesia returned error");
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
const segmentId = serverMsg.context_id;
|
|
268
|
+
if (this.#opts.wordTimestamps !== false && hasWordTimestamps(serverMsg)) {
|
|
269
|
+
const wordTimestamps = serverMsg.word_timestamps;
|
|
270
|
+
for (let i = 0; i < wordTimestamps.words.length; i++) {
|
|
271
|
+
const word = wordTimestamps.words[i];
|
|
272
|
+
const startTime = wordTimestamps.start[i];
|
|
273
|
+
const endTime = wordTimestamps.end[i];
|
|
274
|
+
if (word !== void 0 && startTime !== void 0 && endTime !== void 0) {
|
|
275
|
+
pendingTimedTranscripts.push(
|
|
276
|
+
createTimedString({
|
|
277
|
+
text: word + " ",
|
|
278
|
+
// Add space after word for consistency
|
|
279
|
+
startTime,
|
|
280
|
+
endTime
|
|
281
|
+
})
|
|
241
282
|
);
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
if (isChunkMessage(serverMsg)) {
|
|
287
|
+
const audioBuffer = Buffer.from(serverMsg.data, "base64");
|
|
288
|
+
const audioData = audioBuffer.buffer.slice(
|
|
289
|
+
audioBuffer.byteOffset,
|
|
290
|
+
audioBuffer.byteOffset + audioBuffer.byteLength
|
|
291
|
+
);
|
|
292
|
+
for (const frame of bstream.write(audioData)) {
|
|
293
|
+
sendLastFrame(segmentId, false);
|
|
294
|
+
lastFrame = frame;
|
|
295
|
+
}
|
|
296
|
+
clearTTSChunkTimeout();
|
|
297
|
+
timeout = setTimeout(() => {
|
|
298
|
+
this.#logger.debug(
|
|
299
|
+
`Cartesia WebSocket TTS chunk stream timeout after ${this.#opts.chunkTimeout}ms`
|
|
300
|
+
);
|
|
301
|
+
ws2.close();
|
|
302
|
+
}, this.#opts.chunkTimeout);
|
|
303
|
+
} else if (isDoneMessage(serverMsg)) {
|
|
304
|
+
if (sentenceStreamClosed) {
|
|
246
305
|
for (const frame of bstream.flush()) {
|
|
247
306
|
sendLastFrame(segmentId, false);
|
|
248
307
|
lastFrame = frame;
|
|
@@ -253,26 +312,29 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
253
312
|
}
|
|
254
313
|
if (segmentId === requestId) {
|
|
255
314
|
closing = true;
|
|
256
|
-
shouldExit = true;
|
|
257
315
|
clearTTSChunkTimeout();
|
|
258
316
|
ws2.close();
|
|
317
|
+
break;
|
|
259
318
|
}
|
|
260
319
|
}
|
|
261
|
-
});
|
|
262
|
-
} catch (err) {
|
|
263
|
-
if (err instanceof Error && !err.message.includes("WebSocket closed")) {
|
|
264
|
-
if (err.message.includes("Queue is closed")) {
|
|
265
|
-
this.#logger.warn(
|
|
266
|
-
{ err },
|
|
267
|
-
"Queue closed during transcript processing (expected during disconnect)"
|
|
268
|
-
);
|
|
269
|
-
} else {
|
|
270
|
-
this.#logger.error({ err }, "Error in recvTask from Cartesia WebSocket");
|
|
271
|
-
}
|
|
272
320
|
}
|
|
273
|
-
clearTTSChunkTimeout();
|
|
274
|
-
break;
|
|
275
321
|
}
|
|
322
|
+
} catch (err) {
|
|
323
|
+
if (err instanceof Error && !err.message.includes("WebSocket closed")) {
|
|
324
|
+
if (err.message.includes("Queue is closed") || err.message.includes("Channel is closed")) {
|
|
325
|
+
this.#logger.warn(
|
|
326
|
+
{ err },
|
|
327
|
+
"Channel closed during transcript processing (expected during disconnect)"
|
|
328
|
+
);
|
|
329
|
+
} else {
|
|
330
|
+
this.#logger.error({ err }, "Error in recvTask from Cartesia WebSocket");
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
} finally {
|
|
334
|
+
ws2.off("message", onMessage);
|
|
335
|
+
ws2.off("close", onClose);
|
|
336
|
+
ws2.off("error", onError);
|
|
337
|
+
clearTTSChunkTimeout();
|
|
276
338
|
}
|
|
277
339
|
};
|
|
278
340
|
const wsUrl = this.#opts.baseUrl.replace(/^http/, "ws");
|
|
@@ -290,7 +352,7 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
290
352
|
}
|
|
291
353
|
}
|
|
292
354
|
}
|
|
293
|
-
const toCartesiaOptions = (opts) => {
|
|
355
|
+
const toCartesiaOptions = (opts, streaming = false) => {
|
|
294
356
|
const voice = {};
|
|
295
357
|
if (typeof opts.voice === "string") {
|
|
296
358
|
voice.mode = "id";
|
|
@@ -309,7 +371,7 @@ const toCartesiaOptions = (opts) => {
|
|
|
309
371
|
if (Object.keys(voiceControls).length) {
|
|
310
372
|
voice.__experimental_controls = voiceControls;
|
|
311
373
|
}
|
|
312
|
-
|
|
374
|
+
const result = {
|
|
313
375
|
model_id: opts.model,
|
|
314
376
|
voice,
|
|
315
377
|
output_format: {
|
|
@@ -319,6 +381,10 @@ const toCartesiaOptions = (opts) => {
|
|
|
319
381
|
},
|
|
320
382
|
language: opts.language
|
|
321
383
|
};
|
|
384
|
+
if (streaming && opts.wordTimestamps !== false) {
|
|
385
|
+
result.add_timestamps = true;
|
|
386
|
+
}
|
|
387
|
+
return result;
|
|
322
388
|
};
|
|
323
389
|
export {
|
|
324
390
|
ChunkedStream,
|
package/dist/tts.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n Future,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n chunkTimeout: 5000,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #logger = log();\n #opts: TTSOptions;\n #text: string;\n\n constructor(\n tts: TTS,\n text: string,\n opts: TTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const doneFut = new Future<void>();\n\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n signal: this.abortSignal,\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n doneFut.resolve();\n });\n res.on('error', (err) => {\n if (err.message === 'aborted') return;\n this.#logger.error({ err }, 'Cartesia TTS response error');\n });\n },\n );\n\n req.on('error', (err) => {\n if (err.name === 'AbortError') return;\n this.#logger.error({ err }, 'Cartesia TTS request error');\n });\n req.on('close', () => doneFut.resolve());\n req.write(JSON.stringify(json));\n req.end();\n\n await doneFut.await;\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.debug(`WebSocket closed with code ${code}: ${reason}`);\n }\n\n clearTTSChunkTimeout();\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n // cartesia chunk timeout quite often, so we make it a debug log\n this.#logger.debug(\n `Cartesia WebSocket STT chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n clearTTSChunkTimeout();\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n if (err.message.includes('Queue is closed')) {\n this.#logger.warn(\n { err },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n } else {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n }\n clearTTSChunkTimeout();\n break;\n }\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AACxC;AAAA,EACE;AAAA,OAKK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAmB7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,cAAc;AAChB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WACE,MACA,aACA,aACmB;AACnB,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,OAAO,aAAa,WAAW;AAAA,EAC3E;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR,UAAU,IAAI;AAAA,EACd;AAAA,EACA;AAAA,EAEA,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAU,IAAI,OAAa;AAEjC,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,QACA,QAAQ,KAAK;AAAA,MACf;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AACjB,kBAAQ,QAAQ;AAAA,QAClB,CAAC;AACD,YAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,cAAI,IAAI,YAAY,UAAW;AAC/B,eAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6BAA6B;AAAA,QAC3D,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,UAAI,IAAI,SAAS,aAAc;AAC/B,WAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,4BAA4B;AAAA,IAC1D,CAAC;AACD,QAAI,GAAG,SAAS,MAAM,QAAQ,QAAQ,CAAC;AACvC,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAER,UAAM,QAAQ;AAAA,EAChB;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AAEA,mCAAqB;AACrB,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAKA,mCAAqB;AACrB,wBAAU,WAAW,MAAM;AAEzB,qBAAK,QAAQ;AAAA,kBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,gBAC9E;AACA,gBAAAA,IAAG,MAAM;AAAA,cACX,GAAG,KAAK,MAAM,YAAY;AAAA,YAC5B,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,gBAAI,IAAI,QAAQ,SAAS,iBAAiB,GAAG;AAC3C,mBAAK,QAAQ;AAAA,gBACX,EAAE,IAAI;AAAA,gBACN;AAAA,cACF;AAAA,YACF,OAAO;AACL,mBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,YACzE;AAAA,UACF;AACA,+BAAqB;AACrB;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,UAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n Future,\n type TimedString,\n createTimedString,\n log,\n shortuuid,\n stream,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\nimport {\n type CartesiaServerMessage,\n cartesiaMessageSchema,\n hasWordTimestamps,\n isChunkMessage,\n isDoneMessage,\n isErrorMessage,\n} from './types.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n\n /**\n * Whether to add word timestamps to the output. When enabled, the TTS will return\n * timing information for each word in the transcript.\n * @defaultValue true\n */\n wordTimestamps?: boolean;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n chunkTimeout: 5000,\n wordTimestamps: true,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n const resolvedOpts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n super(resolvedOpts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n alignedTranscript: resolvedOpts.wordTimestamps ?? true,\n });\n\n this.#opts = resolvedOpts;\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #logger = log();\n #opts: TTSOptions;\n #text: string;\n\n constructor(\n tts: TTS,\n text: string,\n opts: TTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const doneFut = new Future<void>();\n\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n signal: this.abortSignal,\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n doneFut.resolve();\n });\n res.on('error', (err) => {\n if (err.message === 'aborted') return;\n this.#logger.error({ err }, 'Cartesia TTS response error');\n });\n },\n );\n\n req.on('error', (err) => {\n if (err.name === 'AbortError') return;\n this.#logger.error({ err }, 'Cartesia TTS request error');\n });\n req.on('close', () => doneFut.resolve());\n req.write(JSON.stringify(json));\n req.end();\n\n await doneFut.await;\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n // Only close WebSocket when both: 1) Cartesia returns done, AND 2) all sentences have been sent\n let sentenceStreamClosed = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts, true);\n for await (const event of this.#tokenizer) {\n const msg = {\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n };\n ws.send(JSON.stringify(msg));\n }\n\n const endMsg = {\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n };\n ws.send(JSON.stringify(endMsg));\n // Mark sentence stream as closed\n sentenceStreamClosed = true;\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n // Use event channel and set up listeners ONCE to avoid missing messages during listener re-registration\n const recvTask = async (ws: WebSocket) => {\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n // Create event channel to buffer incoming messages\n // This prevents message loss between listener re-registrations\n const eventChannel = stream.createStreamChannel<RawData>();\n\n let lastFrame: AudioFrame | undefined;\n let pendingTimedTranscripts: TimedString[] = [];\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n // Include timedTranscripts with the audio frame\n this.queue.put({\n requestId,\n segmentId,\n frame: lastFrame,\n final,\n timedTranscripts:\n pendingTimedTranscripts.length > 0 ? pendingTimedTranscripts : undefined,\n });\n lastFrame = undefined;\n pendingTimedTranscripts = [];\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n // Set up WebSocket listeners ONCE (not in a loop)\n const onMessage = (data: RawData) => {\n void eventChannel.write(data).catch((error: unknown) => {\n this.#logger.debug({ error }, 'Failed writing Cartesia event to channel (likely closed)');\n });\n };\n\n const onClose = (code: number, reason: Buffer) => {\n if (!closing) {\n this.#logger.debug(`WebSocket closed with code ${code}: ${reason.toString()}`);\n }\n clearTTSChunkTimeout();\n void eventChannel.close();\n };\n\n const onError = (err: Error) => {\n this.#logger.error({ err }, 'Cartesia WebSocket error');\n void eventChannel.close();\n };\n\n // Attach listeners ONCE\n ws.on('message', onMessage);\n ws.on('close', onClose);\n ws.on('error', onError);\n\n try {\n // Process messages from the channel\n const reader = eventChannel.stream().getReader();\n\n while (!this.closed && !this.abortController.signal.aborted) {\n const result = await reader.read();\n if (result.done) break;\n\n const rawMsg = result.value;\n\n // Parse message with Zod schema for type safety\n let serverMsg: CartesiaServerMessage;\n try {\n const json = JSON.parse(rawMsg.toString());\n serverMsg = cartesiaMessageSchema.parse(json);\n } catch (parseErr) {\n this.#logger.warn({ parseErr }, 'Failed to parse Cartesia message');\n continue;\n }\n\n // Handle error messages\n if (isErrorMessage(serverMsg)) {\n this.#logger.error({ error: serverMsg.error }, 'Cartesia returned error');\n continue;\n }\n\n const segmentId = serverMsg.context_id;\n\n // Process word timestamps if present (typed via Zod schema)\n if (this.#opts.wordTimestamps !== false && hasWordTimestamps(serverMsg)) {\n const wordTimestamps = serverMsg.word_timestamps;\n for (let i = 0; i < wordTimestamps.words.length; i++) {\n const word = wordTimestamps.words[i];\n const startTime = wordTimestamps.start[i];\n const endTime = wordTimestamps.end[i];\n if (word !== undefined && startTime !== undefined && endTime !== undefined) {\n pendingTimedTranscripts.push(\n createTimedString({\n text: word + ' ', // Add space after word for consistency\n startTime,\n endTime,\n }),\n );\n }\n }\n }\n\n // Handle audio chunk messages\n if (isChunkMessage(serverMsg)) {\n const audioBuffer = Buffer.from(serverMsg.data, 'base64');\n // Extract ArrayBuffer from Buffer for AudioByteStream compatibility\n const audioData = audioBuffer.buffer.slice(\n audioBuffer.byteOffset,\n audioBuffer.byteOffset + audioBuffer.byteLength,\n );\n for (const frame of bstream.write(audioData)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n // cartesia chunk timeout quite often, so we make it a debug log\n this.#logger.debug(\n `Cartesia WebSocket TTS chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if (isDoneMessage(serverMsg)) {\n // This ensures all sentences have been sent before closing\n if (sentenceStreamClosed) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n clearTTSChunkTimeout();\n ws.close();\n break; // Exit the loop\n }\n }\n // If sentenceStreamClosed is false, continue receiving - more done messages will come\n }\n }\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n if (\n err.message.includes('Queue is closed') ||\n err.message.includes('Channel is closed')\n ) {\n this.#logger.warn(\n { err },\n 'Channel closed during transcript processing (expected during disconnect)',\n );\n } else {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n }\n } finally {\n // IMPORTANT: Remove listeners so connection can be reused\n ws.off('message', onMessage);\n ws.off('close', onClose);\n ws.off('error', onError);\n clearTTSChunkTimeout();\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\n/**\n * Convert TTSOptions to Cartesia API format.\n *\n * @param opts - TTS options\n * @param streaming - Whether this is for streaming (WebSocket) or non-streaming (HTTP)\n */\nconst toCartesiaOptions = (\n opts: TTSOptions,\n streaming: boolean = false,\n): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n const result: { [id: string]: unknown } = {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n\n if (streaming && opts.wordTimestamps !== false) {\n result.add_timestamps = true;\n }\n\n return result;\n};\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EAEA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AACxC;AAAA,EACE;AAAA,OAKK;AACP;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AA0B7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,cAAc;AAAA,EACd,gBAAgB;AAClB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,eAAe;AAAA,MACnB,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,UAAM,aAAa,cAAc,kBAAkB,YAAY,cAAc;AAAA,MAC3E,WAAW;AAAA,MACX,mBAAmB,aAAa,kBAAkB;AAAA,IACpD,CAAC;AAED,SAAK,QAAQ;AAEb,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WACE,MACA,aACA,aACmB;AACnB,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,OAAO,aAAa,WAAW;AAAA,EAC3E;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR,UAAU,IAAI;AAAA,EACd;AAAA,EACA;AAAA,EAEA,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAU,IAAI,OAAa;AAEjC,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,QACA,QAAQ,KAAK;AAAA,MACf;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AACjB,kBAAQ,QAAQ;AAAA,QAClB,CAAC;AACD,YAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,cAAI,IAAI,YAAY,UAAW;AAC/B,eAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6BAA6B;AAAA,QAC3D,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,UAAI,IAAI,SAAS,aAAc;AAC/B,WAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,4BAA4B;AAAA,IAC1D,CAAC;AACD,QAAI,GAAG,SAAS,MAAM,QAAQ,QAAQ,CAAC;AACvC,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAER,UAAM,QAAQ;AAAA,EAChB;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,QAAI,UAAU;AAEd,QAAI,uBAAuB;AAE3B,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,OAAO,IAAI;AACjD,uBAAiB,SAAS,KAAK,YAAY;AACzC,cAAM,MAAM;AAAA,UACV,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY,MAAM,QAAQ;AAAA,UAC1B,UAAU;AAAA,QACZ;AACA,QAAAA,IAAG,KAAK,KAAK,UAAU,GAAG,CAAC;AAAA,MAC7B;AAEA,YAAM,SAAS;AAAA,QACb,GAAG;AAAA,QACH,YAAY;AAAA,QACZ,YAAY;AAAA,QACZ,UAAU;AAAA,MACZ;AACA,MAAAA,IAAG,KAAK,KAAK,UAAU,MAAM,CAAC;AAE9B,6BAAuB;AAAA,IACzB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAGA,UAAM,WAAW,OAAOA,QAAkB;AACxC,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAIvE,YAAM,eAAe,OAAO,oBAA6B;AAEzD,UAAI;AACJ,UAAI,0BAAyC,CAAC;AAE9C,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AAEnC,eAAK,MAAM,IAAI;AAAA,YACb;AAAA,YACA;AAAA,YACA,OAAO;AAAA,YACP;AAAA,YACA,kBACE,wBAAwB,SAAS,IAAI,0BAA0B;AAAA,UACnE,CAAC;AACD,sBAAY;AACZ,oCAA0B,CAAC;AAAA,QAC7B;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAGA,YAAM,YAAY,CAAC,SAAkB;AACnC,aAAK,aAAa,MAAM,IAAI,EAAE,MAAM,CAAC,UAAmB;AACtD,eAAK,QAAQ,MAAM,EAAE,MAAM,GAAG,0DAA0D;AAAA,QAC1F,CAAC;AAAA,MACH;AAEA,YAAM,UAAU,CAAC,MAAc,WAAmB;AAChD,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,OAAO,SAAS,CAAC,EAAE;AAAA,QAC/E;AACA,6BAAqB;AACrB,aAAK,aAAa,MAAM;AAAA,MAC1B;AAEA,YAAM,UAAU,CAAC,QAAe;AAC9B,aAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,0BAA0B;AACtD,aAAK,aAAa,MAAM;AAAA,MAC1B;AAGA,MAAAA,IAAG,GAAG,WAAW,SAAS;AAC1B,MAAAA,IAAG,GAAG,SAAS,OAAO;AACtB,MAAAA,IAAG,GAAG,SAAS,OAAO;AAEtB,UAAI;AAEF,cAAM,SAAS,aAAa,OAAO,EAAE,UAAU;AAE/C,eAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,OAAO,KAAM;AAEjB,gBAAM,SAAS,OAAO;AAGtB,cAAI;AACJ,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,OAAO,SAAS,CAAC;AACzC,wBAAY,sBAAsB,MAAM,IAAI;AAAA,UAC9C,SAAS,UAAU;AACjB,iBAAK,QAAQ,KAAK,EAAE,SAAS,GAAG,kCAAkC;AAClE;AAAA,UACF;AAGA,cAAI,eAAe,SAAS,GAAG;AAC7B,iBAAK,QAAQ,MAAM,EAAE,OAAO,UAAU,MAAM,GAAG,yBAAyB;AACxE;AAAA,UACF;AAEA,gBAAM,YAAY,UAAU;AAG5B,cAAI,KAAK,MAAM,mBAAmB,SAAS,kBAAkB,SAAS,GAAG;AACvE,kBAAM,iBAAiB,UAAU;AACjC,qBAAS,IAAI,GAAG,IAAI,eAAe,MAAM,QAAQ,KAAK;AACpD,oBAAM,OAAO,eAAe,MAAM,CAAC;AACnC,oBAAM,YAAY,eAAe,MAAM,CAAC;AACxC,oBAAM,UAAU,eAAe,IAAI,CAAC;AACpC,kBAAI,SAAS,UAAa,cAAc,UAAa,YAAY,QAAW;AAC1E,wCAAwB;AAAA,kBACtB,kBAAkB;AAAA,oBAChB,MAAM,OAAO;AAAA;AAAA,oBACb;AAAA,oBACA;AAAA,kBACF,CAAC;AAAA,gBACH;AAAA,cACF;AAAA,YACF;AAAA,UACF;AAGA,cAAI,eAAe,SAAS,GAAG;AAC7B,kBAAM,cAAc,OAAO,KAAK,UAAU,MAAM,QAAQ;AAExD,kBAAM,YAAY,YAAY,OAAO;AAAA,cACnC,YAAY;AAAA,cACZ,YAAY,aAAa,YAAY;AAAA,YACvC;AACA,uBAAW,SAAS,QAAQ,MAAM,SAAS,GAAG;AAC5C,4BAAc,WAAW,KAAK;AAC9B,0BAAY;AAAA,YACd;AAKA,iCAAqB;AACrB,sBAAU,WAAW,MAAM;AAEzB,mBAAK,QAAQ;AAAA,gBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,cAC9E;AACA,cAAAA,IAAG,MAAM;AAAA,YACX,GAAG,KAAK,MAAM,YAAY;AAAA,UAC5B,WAAW,cAAc,SAAS,GAAG;AAEnC,gBAAI,sBAAsB;AACxB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UAEF;AAAA,QACF;AAAA,MACF,SAAS,KAAK;AAEZ,YAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,cACE,IAAI,QAAQ,SAAS,iBAAiB,KACtC,IAAI,QAAQ,SAAS,mBAAmB,GACxC;AACA,iBAAK,QAAQ;AAAA,cACX,EAAE,IAAI;AAAA,cACN;AAAA,YACF;AAAA,UACF,OAAO;AACL,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AAAA,QACF;AAAA,MACF,UAAE;AAEA,QAAAA,IAAG,IAAI,WAAW,SAAS;AAC3B,QAAAA,IAAG,IAAI,SAAS,OAAO;AACvB,QAAAA,IAAG,IAAI,SAAS,OAAO;AACvB,6BAAqB;AAAA,MACvB;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,UAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAQA,MAAM,oBAAoB,CACxB,MACA,YAAqB,UACS;AAC9B,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,QAAM,SAAoC;AAAA,IACxC,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AAEA,MAAI,aAAa,KAAK,mBAAmB,OAAO;AAC9C,WAAO,iBAAiB;AAAA,EAC1B;AAEA,SAAO;AACT;","names":["tts","ws"]}
|
package/dist/types.cjs
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var types_exports = {};
|
|
20
|
+
__export(types_exports, {
|
|
21
|
+
cartesiaChunkMessageSchema: () => cartesiaChunkMessageSchema,
|
|
22
|
+
cartesiaDoneMessageSchema: () => cartesiaDoneMessageSchema,
|
|
23
|
+
cartesiaErrorMessageSchema: () => cartesiaErrorMessageSchema,
|
|
24
|
+
cartesiaFlushDoneMessageSchema: () => cartesiaFlushDoneMessageSchema,
|
|
25
|
+
cartesiaMessageSchema: () => cartesiaMessageSchema,
|
|
26
|
+
cartesiaServerMessageSchema: () => cartesiaServerMessageSchema,
|
|
27
|
+
cartesiaTimestampsMessageSchema: () => cartesiaTimestampsMessageSchema,
|
|
28
|
+
cartesiaWordTimestampsSchema: () => cartesiaWordTimestampsSchema,
|
|
29
|
+
hasWordTimestamps: () => hasWordTimestamps,
|
|
30
|
+
isChunkMessage: () => isChunkMessage,
|
|
31
|
+
isDoneMessage: () => isDoneMessage,
|
|
32
|
+
isErrorMessage: () => isErrorMessage,
|
|
33
|
+
isFlushDoneMessage: () => isFlushDoneMessage,
|
|
34
|
+
isTimestampsMessage: () => isTimestampsMessage
|
|
35
|
+
});
|
|
36
|
+
module.exports = __toCommonJS(types_exports);
|
|
37
|
+
var import_zod = require("zod");
|
|
38
|
+
const cartesiaWordTimestampsSchema = import_zod.z.object({
|
|
39
|
+
words: import_zod.z.array(import_zod.z.string()),
|
|
40
|
+
start: import_zod.z.array(import_zod.z.number()),
|
|
41
|
+
end: import_zod.z.array(import_zod.z.number())
|
|
42
|
+
});
|
|
43
|
+
const cartesiaChunkMessageSchema = import_zod.z.object({
|
|
44
|
+
type: import_zod.z.literal("chunk"),
|
|
45
|
+
data: import_zod.z.string(),
|
|
46
|
+
done: import_zod.z.boolean(),
|
|
47
|
+
status_code: import_zod.z.number(),
|
|
48
|
+
step_time: import_zod.z.number(),
|
|
49
|
+
context_id: import_zod.z.string()
|
|
50
|
+
});
|
|
51
|
+
const cartesiaTimestampsMessageSchema = import_zod.z.object({
|
|
52
|
+
type: import_zod.z.literal("timestamps"),
|
|
53
|
+
done: import_zod.z.boolean(),
|
|
54
|
+
status_code: import_zod.z.number(),
|
|
55
|
+
context_id: import_zod.z.string(),
|
|
56
|
+
word_timestamps: cartesiaWordTimestampsSchema
|
|
57
|
+
});
|
|
58
|
+
const cartesiaDoneMessageSchema = import_zod.z.object({
|
|
59
|
+
type: import_zod.z.literal("done"),
|
|
60
|
+
done: import_zod.z.boolean(),
|
|
61
|
+
status_code: import_zod.z.number(),
|
|
62
|
+
context_id: import_zod.z.string()
|
|
63
|
+
});
|
|
64
|
+
const cartesiaFlushDoneMessageSchema = import_zod.z.object({
|
|
65
|
+
type: import_zod.z.literal("flush_done"),
|
|
66
|
+
done: import_zod.z.boolean(),
|
|
67
|
+
flush_done: import_zod.z.boolean(),
|
|
68
|
+
flush_id: import_zod.z.number(),
|
|
69
|
+
status_code: import_zod.z.number(),
|
|
70
|
+
context_id: import_zod.z.string()
|
|
71
|
+
});
|
|
72
|
+
const cartesiaErrorMessageSchema = import_zod.z.object({
|
|
73
|
+
type: import_zod.z.string(),
|
|
74
|
+
done: import_zod.z.boolean(),
|
|
75
|
+
error: import_zod.z.string(),
|
|
76
|
+
status_code: import_zod.z.number(),
|
|
77
|
+
context_id: import_zod.z.string()
|
|
78
|
+
});
|
|
79
|
+
const cartesiaServerMessageSchema = import_zod.z.discriminatedUnion("type", [
|
|
80
|
+
cartesiaChunkMessageSchema,
|
|
81
|
+
cartesiaTimestampsMessageSchema,
|
|
82
|
+
cartesiaDoneMessageSchema,
|
|
83
|
+
cartesiaFlushDoneMessageSchema
|
|
84
|
+
]);
|
|
85
|
+
const cartesiaMessageSchema = import_zod.z.union([
|
|
86
|
+
cartesiaServerMessageSchema,
|
|
87
|
+
cartesiaErrorMessageSchema
|
|
88
|
+
]);
|
|
89
|
+
function isChunkMessage(msg) {
|
|
90
|
+
return "type" in msg && msg.type === "chunk";
|
|
91
|
+
}
|
|
92
|
+
function isTimestampsMessage(msg) {
|
|
93
|
+
return "type" in msg && msg.type === "timestamps";
|
|
94
|
+
}
|
|
95
|
+
function isDoneMessage(msg) {
|
|
96
|
+
return "type" in msg && msg.type === "done";
|
|
97
|
+
}
|
|
98
|
+
function isFlushDoneMessage(msg) {
|
|
99
|
+
return "type" in msg && msg.type === "flush_done";
|
|
100
|
+
}
|
|
101
|
+
function isErrorMessage(msg) {
|
|
102
|
+
return "error" in msg && typeof msg.error === "string";
|
|
103
|
+
}
|
|
104
|
+
function hasWordTimestamps(msg) {
|
|
105
|
+
return isTimestampsMessage(msg);
|
|
106
|
+
}
|
|
107
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
108
|
+
0 && (module.exports = {
|
|
109
|
+
cartesiaChunkMessageSchema,
|
|
110
|
+
cartesiaDoneMessageSchema,
|
|
111
|
+
cartesiaErrorMessageSchema,
|
|
112
|
+
cartesiaFlushDoneMessageSchema,
|
|
113
|
+
cartesiaMessageSchema,
|
|
114
|
+
cartesiaServerMessageSchema,
|
|
115
|
+
cartesiaTimestampsMessageSchema,
|
|
116
|
+
cartesiaWordTimestampsSchema,
|
|
117
|
+
hasWordTimestamps,
|
|
118
|
+
isChunkMessage,
|
|
119
|
+
isDoneMessage,
|
|
120
|
+
isErrorMessage,
|
|
121
|
+
isFlushDoneMessage,
|
|
122
|
+
isTimestampsMessage
|
|
123
|
+
});
|
|
124
|
+
//# sourceMappingURL=types.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/types.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { z } from 'zod';\n\n// ============================================================================\n// Cartesia WebSocket API Schemas\n// Ref: https://docs.cartesia.ai/api-reference/tts/websocket\n// ============================================================================\n\n/** Word timestamps schema - contains timing info for each word */\nexport const cartesiaWordTimestampsSchema = z.object({\n words: z.array(z.string()),\n start: z.array(z.number()),\n end: z.array(z.number()),\n});\n\n/** Audio chunk message - type: \"chunk\" with base64-encoded audio data */\nexport const cartesiaChunkMessageSchema = z.object({\n type: z.literal('chunk'),\n data: z.string(),\n done: z.boolean(),\n status_code: z.number(),\n step_time: z.number(),\n context_id: z.string(),\n});\n\n/** Word timestamps message - type: \"timestamps\" with word timing data */\nexport const cartesiaTimestampsMessageSchema = z.object({\n type: z.literal('timestamps'),\n done: z.boolean(),\n status_code: z.number(),\n context_id: z.string(),\n word_timestamps: cartesiaWordTimestampsSchema,\n});\n\n/** Done message - type: \"done\" indicates completion */\nexport const cartesiaDoneMessageSchema = z.object({\n type: z.literal('done'),\n done: z.boolean(),\n status_code: z.number(),\n context_id: z.string(),\n});\n\n/** Flush done message - type: \"flush_done\" indicates flush completion */\nexport const cartesiaFlushDoneMessageSchema = z.object({\n type: z.literal('flush_done'),\n done: z.boolean(),\n flush_done: z.boolean(),\n flush_id: z.number(),\n status_code: z.number(),\n context_id: z.string(),\n});\n\n/** Error message - has error field */\nexport const cartesiaErrorMessageSchema = z.object({\n type: z.string(),\n done: z.boolean(),\n error: z.string(),\n status_code: z.number(),\n context_id: z.string(),\n});\n\n/** Union of all possible Cartesia server messages using discriminated union on 'type' */\nexport const cartesiaServerMessageSchema = z.discriminatedUnion('type', [\n cartesiaChunkMessageSchema,\n cartesiaTimestampsMessageSchema,\n cartesiaDoneMessageSchema,\n cartesiaFlushDoneMessageSchema,\n]);\n\n// Fallback schema for error messages (can't be in discriminated union due to dynamic type)\nexport const cartesiaMessageSchema = z.union([\n cartesiaServerMessageSchema,\n cartesiaErrorMessageSchema,\n]);\n\n// ============================================================================\n// Type exports from Zod schemas\n// ============================================================================\n\nexport type CartesiaWordTimestamps = z.infer<typeof cartesiaWordTimestampsSchema>;\nexport type CartesiaChunkMessage = z.infer<typeof cartesiaChunkMessageSchema>;\nexport type CartesiaTimestampsMessage = z.infer<typeof cartesiaTimestampsMessageSchema>;\nexport type CartesiaDoneMessage = z.infer<typeof cartesiaDoneMessageSchema>;\nexport type CartesiaFlushDoneMessage = z.infer<typeof cartesiaFlushDoneMessageSchema>;\nexport type CartesiaErrorMessage = z.infer<typeof cartesiaErrorMessageSchema>;\nexport type CartesiaServerMessage = z.infer<typeof cartesiaMessageSchema>;\n\n// ============================================================================\n// Helper type guards for message discrimination\n// ============================================================================\n\nexport function isChunkMessage(msg: CartesiaServerMessage): msg is CartesiaChunkMessage {\n return 'type' in msg && msg.type === 'chunk';\n}\n\nexport function isTimestampsMessage(msg: CartesiaServerMessage): msg is CartesiaTimestampsMessage {\n return 'type' in msg && msg.type === 'timestamps';\n}\n\nexport function isDoneMessage(msg: CartesiaServerMessage): msg is CartesiaDoneMessage {\n return 'type' in msg && msg.type === 'done';\n}\n\nexport function isFlushDoneMessage(msg: CartesiaServerMessage): msg is CartesiaFlushDoneMessage {\n return 'type' in msg && msg.type === 'flush_done';\n}\n\nexport function isErrorMessage(msg: CartesiaServerMessage): msg is CartesiaErrorMessage {\n return 'error' in msg && typeof msg.error === 'string';\n}\n\nexport function hasWordTimestamps(msg: CartesiaServerMessage): msg is CartesiaTimestampsMessage {\n return isTimestampsMessage(msg);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,iBAAkB;AAQX,MAAM,+BAA+B,aAAE,OAAO;AAAA,EACnD,OAAO,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EACzB,OAAO,aAAE,MAAM,aAAE,OAAO,CAAC;AAAA,EACzB,KAAK,aAAE,MAAM,aAAE,OAAO,CAAC;AACzB,CAAC;AAGM,MAAM,6BAA6B,aAAE,OAAO;AAAA,EACjD,MAAM,aAAE,QAAQ,OAAO;AAAA,EACvB,MAAM,aAAE,OAAO;AAAA,EACf,MAAM,aAAE,QAAQ;AAAA,EAChB,aAAa,aAAE,OAAO;AAAA,EACtB,WAAW,aAAE,OAAO;AAAA,EACpB,YAAY,aAAE,OAAO;AACvB,CAAC;AAGM,MAAM,kCAAkC,aAAE,OAAO;AAAA,EACtD,MAAM,aAAE,QAAQ,YAAY;AAAA,EAC5B,MAAM,aAAE,QAAQ;AAAA,EAChB,aAAa,aAAE,OAAO;AAAA,EACtB,YAAY,aAAE,OAAO;AAAA,EACrB,iBAAiB;AACnB,CAAC;AAGM,MAAM,4BAA4B,aAAE,OAAO;AAAA,EAChD,MAAM,aAAE,QAAQ,MAAM;AAAA,EACtB,MAAM,aAAE,QAAQ;AAAA,EAChB,aAAa,aAAE,OAAO;AAAA,EACtB,YAAY,aAAE,OAAO;AACvB,CAAC;AAGM,MAAM,iCAAiC,aAAE,OAAO;AAAA,EACrD,MAAM,aAAE,QAAQ,YAAY;AAAA,EAC5B,MAAM,aAAE,QAAQ;AAAA,EAChB,YAAY,aAAE,QAAQ;AAAA,EACtB,UAAU,aAAE,OAAO;AAAA,EACnB,aAAa,aAAE,OAAO;AAAA,EACtB,YAAY,aAAE,OAAO;AACvB,CAAC;AAGM,MAAM,6BAA6B,aAAE,OAAO;AAAA,EACjD,MAAM,aAAE,OAAO;AAAA,EACf,MAAM,aAAE,QAAQ;AAAA,EAChB,OAAO,aAAE,OAAO;AAAA,EAChB,aAAa,aAAE,OAAO;AAAA,EACtB,YAAY,aAAE,OAAO;AACvB,CAAC;AAGM,MAAM,8BAA8B,aAAE,mBAAmB,QAAQ;AAAA,EACtE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAGM,MAAM,wBAAwB,aAAE,MAAM;AAAA,EAC3C;AAAA,EACA;AACF,CAAC;AAkBM,SAAS,eAAe,KAAyD;AACtF,SAAO,UAAU,OAAO,IAAI,SAAS;AACvC;AAEO,SAAS,oBAAoB,KAA8D;AAChG,SAAO,UAAU,OAAO,IAAI,SAAS;AACvC;AAEO,SAAS,cAAc,KAAwD;AACpF,SAAO,UAAU,OAAO,IAAI,SAAS;AACvC;AAEO,SAAS,mBAAmB,KAA6D;AAC9F,SAAO,UAAU,OAAO,IAAI,SAAS;AACvC;AAEO,SAAS,eAAe,KAAyD;AACtF,SAAO,WAAW,OAAO,OAAO,IAAI,UAAU;AAChD;AAEO,SAAS,kBAAkB,KAA8D;AAC9F,SAAO,oBAAoB,GAAG;AAChC;","names":[]}
|