@livekit/agents-plugin-cartesia 1.0.47 → 1.0.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/dist/models.cjs +6 -3
- package/dist/models.cjs.map +1 -1
- package/dist/models.d.cts +3 -2
- package/dist/models.d.ts +3 -2
- package/dist/models.d.ts.map +1 -1
- package/dist/models.js +4 -2
- package/dist/models.js.map +1 -1
- package/dist/tts.cjs +69 -31
- package/dist/tts.cjs.map +1 -1
- package/dist/tts.d.cts +6 -0
- package/dist/tts.d.ts +6 -0
- package/dist/tts.d.ts.map +1 -1
- package/dist/tts.js +71 -32
- package/dist/tts.js.map +1 -1
- package/package.json +5 -5
- package/src/models.ts +11 -2
- package/src/tts.ts +99 -38
package/dist/index.cjs
CHANGED
package/dist/index.js
CHANGED
package/dist/models.cjs
CHANGED
|
@@ -18,12 +18,15 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
18
18
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
19
|
var models_exports = {};
|
|
20
20
|
__export(models_exports, {
|
|
21
|
-
TTSDefaultVoiceId: () => TTSDefaultVoiceId
|
|
21
|
+
TTSDefaultVoiceId: () => TTSDefaultVoiceId,
|
|
22
|
+
isSonic3: () => isSonic3
|
|
22
23
|
});
|
|
23
24
|
module.exports = __toCommonJS(models_exports);
|
|
24
|
-
const TTSDefaultVoiceId = "
|
|
25
|
+
const TTSDefaultVoiceId = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
|
|
26
|
+
const isSonic3 = (model) => model.startsWith("sonic-3");
|
|
25
27
|
// Annotate the CommonJS export names for ESM import in node:
|
|
26
28
|
0 && (module.exports = {
|
|
27
|
-
TTSDefaultVoiceId
|
|
29
|
+
TTSDefaultVoiceId,
|
|
30
|
+
isSonic3
|
|
28
31
|
});
|
|
29
32
|
//# sourceMappingURL=models.cjs.map
|
package/dist/models.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels
|
|
1
|
+
{"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n// Ref: python livekit-plugins/livekit-plugins-cartesia/livekit/plugins/cartesia/models.py - 11 lines\nexport type TTSModels =\n | 'sonic'\n | 'sonic-2'\n | 'sonic-3'\n | 'sonic-lite'\n | 'sonic-preview'\n | 'sonic-turbo';\n\nexport type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';\n\nexport const TTSDefaultVoiceId = 'f786b574-daa5-4673-aa0c-cbe3e8534c02';\n\nexport const isSonic3 = (model: string): boolean => model.startsWith('sonic-3');\n\nexport type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';\n\nexport type TTSVoiceEmotion =\n | 'anger:lowest'\n | 'anger:low'\n | 'anger'\n | 'anger:high'\n | 'anger:highest'\n | 'positivity:lowest'\n | 'positivity:low'\n | 'positivity'\n | 'positivity:high'\n | 'positivity:highest'\n | 'surprise:lowest'\n | 'surprise:low'\n | 'surprise'\n | 'surprise:high'\n | 'surprise:highest'\n | 'sadness:lowest'\n | 'sadness:low'\n | 'sadness'\n | 'sadness:high'\n | 'sadness:highest'\n | 'curiosity:lowest'\n | 'curiosity:low'\n | 'curiosity'\n | 'curiosity:high'\n | 'curiosity:highest';\n\nexport type TTSEncoding =\n // XXX(nbsp): not yet supported\n // | 'pcm_f32le'\n // | 'pcm_mulaw'\n // | 'pcm_alaw'\n 'pcm_s16le';\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAeO,MAAM,oBAAoB;AAE1B,MAAM,WAAW,CAAC,UAA2B,MAAM,WAAW,SAAS;","names":[]}
|
package/dist/models.d.cts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
export type TTSModels = 'sonic' | 'sonic-2' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';
|
|
1
|
+
export type TTSModels = 'sonic' | 'sonic-2' | 'sonic-3' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';
|
|
2
2
|
export type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';
|
|
3
|
-
export declare const TTSDefaultVoiceId = "
|
|
3
|
+
export declare const TTSDefaultVoiceId = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
|
|
4
|
+
export declare const isSonic3: (model: string) => boolean;
|
|
4
5
|
export type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';
|
|
5
6
|
export type TTSVoiceEmotion = 'anger:lowest' | 'anger:low' | 'anger' | 'anger:high' | 'anger:highest' | 'positivity:lowest' | 'positivity:low' | 'positivity' | 'positivity:high' | 'positivity:highest' | 'surprise:lowest' | 'surprise:low' | 'surprise' | 'surprise:high' | 'surprise:highest' | 'sadness:lowest' | 'sadness:low' | 'sadness' | 'sadness:high' | 'sadness:highest' | 'curiosity:lowest' | 'curiosity:low' | 'curiosity' | 'curiosity:high' | 'curiosity:highest';
|
|
6
7
|
export type TTSEncoding = 'pcm_s16le';
|
package/dist/models.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
export type TTSModels = 'sonic' | 'sonic-2' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';
|
|
1
|
+
export type TTSModels = 'sonic' | 'sonic-2' | 'sonic-3' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';
|
|
2
2
|
export type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';
|
|
3
|
-
export declare const TTSDefaultVoiceId = "
|
|
3
|
+
export declare const TTSDefaultVoiceId = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
|
|
4
|
+
export declare const isSonic3: (model: string) => boolean;
|
|
4
5
|
export type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';
|
|
5
6
|
export type TTSVoiceEmotion = 'anger:lowest' | 'anger:low' | 'anger' | 'anger:high' | 'anger:highest' | 'positivity:lowest' | 'positivity:low' | 'positivity' | 'positivity:high' | 'positivity:highest' | 'surprise:lowest' | 'surprise:low' | 'surprise' | 'surprise:high' | 'surprise:highest' | 'sadness:lowest' | 'sadness:low' | 'sadness' | 'sadness:high' | 'sadness:highest' | 'curiosity:lowest' | 'curiosity:low' | 'curiosity' | 'curiosity:high' | 'curiosity:highest';
|
|
6
7
|
export type TTSEncoding = 'pcm_s16le';
|
package/dist/models.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAKA,MAAM,MAAM,SAAS,GACjB,OAAO,GACP,SAAS,GACT,SAAS,GACT,YAAY,GACZ,eAAe,GACf,aAAa,CAAC;AAElB,MAAM,MAAM,YAAY,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC;AAE1E,eAAO,MAAM,iBAAiB,yCAAyC,CAAC;AAExE,eAAO,MAAM,QAAQ,UAAW,MAAM,KAAG,OAAsC,CAAC;AAEhF,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,SAAS,CAAC;AAE/E,MAAM,MAAM,eAAe,GACvB,cAAc,GACd,WAAW,GACX,OAAO,GACP,YAAY,GACZ,eAAe,GACf,mBAAmB,GACnB,gBAAgB,GAChB,YAAY,GACZ,iBAAiB,GACjB,oBAAoB,GACpB,iBAAiB,GACjB,cAAc,GACd,UAAU,GACV,eAAe,GACf,kBAAkB,GAClB,gBAAgB,GAChB,aAAa,GACb,SAAS,GACT,cAAc,GACd,iBAAiB,GACjB,kBAAkB,GAClB,eAAe,GACf,WAAW,GACX,gBAAgB,GAChB,mBAAmB,CAAC;AAExB,MAAM,MAAM,WAAW,GAKrB,WAAW,CAAC"}
|
package/dist/models.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
const TTSDefaultVoiceId = "
|
|
1
|
+
const TTSDefaultVoiceId = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
|
|
2
|
+
const isSonic3 = (model) => model.startsWith("sonic-3");
|
|
2
3
|
export {
|
|
3
|
-
TTSDefaultVoiceId
|
|
4
|
+
TTSDefaultVoiceId,
|
|
5
|
+
isSonic3
|
|
4
6
|
};
|
|
5
7
|
//# sourceMappingURL=models.js.map
|
package/dist/models.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels
|
|
1
|
+
{"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n// Ref: python livekit-plugins/livekit-plugins-cartesia/livekit/plugins/cartesia/models.py - 11 lines\nexport type TTSModels =\n | 'sonic'\n | 'sonic-2'\n | 'sonic-3'\n | 'sonic-lite'\n | 'sonic-preview'\n | 'sonic-turbo';\n\nexport type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';\n\nexport const TTSDefaultVoiceId = 'f786b574-daa5-4673-aa0c-cbe3e8534c02';\n\nexport const isSonic3 = (model: string): boolean => model.startsWith('sonic-3');\n\nexport type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';\n\nexport type TTSVoiceEmotion =\n | 'anger:lowest'\n | 'anger:low'\n | 'anger'\n | 'anger:high'\n | 'anger:highest'\n | 'positivity:lowest'\n | 'positivity:low'\n | 'positivity'\n | 'positivity:high'\n | 'positivity:highest'\n | 'surprise:lowest'\n | 'surprise:low'\n | 'surprise'\n | 'surprise:high'\n | 'surprise:highest'\n | 'sadness:lowest'\n | 'sadness:low'\n | 'sadness'\n | 'sadness:high'\n | 'sadness:highest'\n | 'curiosity:lowest'\n | 'curiosity:low'\n | 'curiosity'\n | 'curiosity:high'\n | 'curiosity:highest';\n\nexport type TTSEncoding =\n // XXX(nbsp): not yet supported\n // | 'pcm_f32le'\n // | 'pcm_mulaw'\n // | 'pcm_alaw'\n 'pcm_s16le';\n"],"mappings":"AAeO,MAAM,oBAAoB;AAE1B,MAAM,WAAW,CAAC,UAA2B,MAAM,WAAW,SAAS;","names":[]}
|
package/dist/tts.cjs
CHANGED
|
@@ -30,20 +30,49 @@ var import_models = require("./models.cjs");
|
|
|
30
30
|
var import_types = require("./types.cjs");
|
|
31
31
|
const AUTHORIZATION_HEADER = "X-API-Key";
|
|
32
32
|
const VERSION_HEADER = "Cartesia-Version";
|
|
33
|
-
const
|
|
33
|
+
const API_VERSION = "2025-04-16";
|
|
34
|
+
const API_VERSION_WITH_EXPERIMENTAL_CONTROLS = "2024-11-13";
|
|
35
|
+
const MODEL_WITH_EXPERIMENTAL_CONTROLS = "sonic-2-2025-03-07";
|
|
34
36
|
const NUM_CHANNELS = 1;
|
|
35
37
|
const BUFFERED_WORDS_COUNT = 8;
|
|
36
38
|
const defaultTTSOptions = {
|
|
37
|
-
model: "sonic-
|
|
39
|
+
model: "sonic-3",
|
|
38
40
|
encoding: "pcm_s16le",
|
|
39
41
|
sampleRate: 24e3,
|
|
40
42
|
voice: import_models.TTSDefaultVoiceId,
|
|
41
43
|
apiKey: process.env.CARTESIA_API_KEY,
|
|
42
44
|
language: "en",
|
|
43
45
|
baseUrl: "https://api.cartesia.ai",
|
|
46
|
+
apiVersion: API_VERSION,
|
|
44
47
|
chunkTimeout: 5e3,
|
|
45
48
|
wordTimestamps: true
|
|
46
49
|
};
|
|
50
|
+
const checkGenerationConfig = (opts) => {
|
|
51
|
+
const logger = (0, import_agents.log)();
|
|
52
|
+
if ((0, import_models.isSonic3)(opts.model)) {
|
|
53
|
+
if (opts.speed !== void 0 && typeof opts.speed === "number") {
|
|
54
|
+
if (opts.speed < 0.6 || opts.speed > 2) {
|
|
55
|
+
logger.warn("speed must be between 0.6 and 2.0 for sonic-3");
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
if (opts.volume !== void 0 && (opts.volume < 0.5 || opts.volume > 2)) {
|
|
59
|
+
logger.warn("volume must be between 0.5 and 2.0 for sonic-3");
|
|
60
|
+
}
|
|
61
|
+
} else if (opts.apiVersion !== API_VERSION_WITH_EXPERIMENTAL_CONTROLS || opts.model !== MODEL_WITH_EXPERIMENTAL_CONTROLS) {
|
|
62
|
+
if (opts.speed || opts.emotion) {
|
|
63
|
+
logger.warn(
|
|
64
|
+
{ model: opts.model, speed: opts.speed, emotion: opts.emotion },
|
|
65
|
+
`speed and emotion controls are only supported for model '${MODEL_WITH_EXPERIMENTAL_CONTROLS}' or sonic-3 models, see https://docs.cartesia.ai/developer-tools/changelog for details`
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
if (opts.pronunciationDictId && !(0, import_models.isSonic3)(opts.model)) {
|
|
70
|
+
logger.warn(
|
|
71
|
+
{ model: opts.model, pronunciationDictId: opts.pronunciationDictId },
|
|
72
|
+
"pronunciationDictId is only supported for sonic-3 models"
|
|
73
|
+
);
|
|
74
|
+
}
|
|
75
|
+
};
|
|
47
76
|
class TTS extends import_agents.tts.TTS {
|
|
48
77
|
#opts;
|
|
49
78
|
label = "cartesia.TTS";
|
|
@@ -62,22 +91,14 @@ class TTS extends import_agents.tts.TTS {
|
|
|
62
91
|
"Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY"
|
|
63
92
|
);
|
|
64
93
|
}
|
|
65
|
-
if (
|
|
66
|
-
|
|
67
|
-
logger.warn(
|
|
68
|
-
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
69
|
-
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
|
|
70
|
-
);
|
|
94
|
+
if (this.#opts.speed || this.#opts.emotion || this.#opts.volume || this.#opts.pronunciationDictId) {
|
|
95
|
+
checkGenerationConfig(this.#opts);
|
|
71
96
|
}
|
|
72
97
|
}
|
|
73
98
|
updateOptions(opts) {
|
|
74
99
|
this.#opts = { ...this.#opts, ...opts };
|
|
75
|
-
if (
|
|
76
|
-
|
|
77
|
-
logger.warn(
|
|
78
|
-
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
79
|
-
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
|
|
80
|
-
);
|
|
100
|
+
if (this.#opts.speed || this.#opts.emotion || this.#opts.volume || this.#opts.pronunciationDictId) {
|
|
101
|
+
checkGenerationConfig(this.#opts);
|
|
81
102
|
}
|
|
82
103
|
}
|
|
83
104
|
synthesize(text, connOptions, abortSignal) {
|
|
@@ -112,7 +133,7 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
|
112
133
|
method: "POST",
|
|
113
134
|
headers: {
|
|
114
135
|
[AUTHORIZATION_HEADER]: this.#opts.apiKey,
|
|
115
|
-
[VERSION_HEADER]:
|
|
136
|
+
[VERSION_HEADER]: this.#opts.apiVersion
|
|
116
137
|
},
|
|
117
138
|
signal: this.abortSignal
|
|
118
139
|
},
|
|
@@ -178,11 +199,8 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
178
199
|
}
|
|
179
200
|
updateOptions(opts) {
|
|
180
201
|
this.#opts = { ...this.#opts, ...opts };
|
|
181
|
-
if (
|
|
182
|
-
this.#
|
|
183
|
-
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
184
|
-
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
|
|
185
|
-
);
|
|
202
|
+
if (this.#opts.speed || this.#opts.emotion || this.#opts.volume || this.#opts.pronunciationDictId) {
|
|
203
|
+
checkGenerationConfig(this.#opts);
|
|
186
204
|
}
|
|
187
205
|
}
|
|
188
206
|
async run() {
|
|
@@ -356,7 +374,7 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
356
374
|
}
|
|
357
375
|
};
|
|
358
376
|
const wsUrl = this.#opts.baseUrl.replace(/^http/, "ws");
|
|
359
|
-
const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${
|
|
377
|
+
const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${this.#opts.apiVersion}`;
|
|
360
378
|
let ws;
|
|
361
379
|
try {
|
|
362
380
|
ws = await connectCartesiaWebSocket({
|
|
@@ -499,15 +517,17 @@ const toCartesiaOptions = (opts, streaming = false) => {
|
|
|
499
517
|
voice.mode = "embedding";
|
|
500
518
|
voice.embedding = opts.voice;
|
|
501
519
|
}
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
520
|
+
if (opts.apiVersion === API_VERSION_WITH_EXPERIMENTAL_CONTROLS) {
|
|
521
|
+
const voiceControls = {};
|
|
522
|
+
if (opts.speed) {
|
|
523
|
+
voiceControls.speed = opts.speed;
|
|
524
|
+
}
|
|
525
|
+
if (opts.emotion) {
|
|
526
|
+
voiceControls.emotion = opts.emotion;
|
|
527
|
+
}
|
|
528
|
+
if (Object.keys(voiceControls).length) {
|
|
529
|
+
voice.__experimental_controls = voiceControls;
|
|
530
|
+
}
|
|
511
531
|
}
|
|
512
532
|
const result = {
|
|
513
533
|
model_id: opts.model,
|
|
@@ -518,8 +538,26 @@ const toCartesiaOptions = (opts, streaming = false) => {
|
|
|
518
538
|
sample_rate: opts.sampleRate
|
|
519
539
|
},
|
|
520
540
|
language: opts.language,
|
|
521
|
-
|
|
541
|
+
max_buffer_delay_ms: 0
|
|
522
542
|
};
|
|
543
|
+
if (opts.pronunciationDictId) {
|
|
544
|
+
result.pronunciation_dict_id = opts.pronunciationDictId;
|
|
545
|
+
}
|
|
546
|
+
if (opts.apiVersion > API_VERSION_WITH_EXPERIMENTAL_CONTROLS && (0, import_models.isSonic3)(opts.model)) {
|
|
547
|
+
const generationConfig = {};
|
|
548
|
+
if (opts.speed) {
|
|
549
|
+
generationConfig.speed = opts.speed;
|
|
550
|
+
}
|
|
551
|
+
if (opts.emotion) {
|
|
552
|
+
generationConfig.emotion = opts.emotion[0];
|
|
553
|
+
}
|
|
554
|
+
if (opts.volume) {
|
|
555
|
+
generationConfig.volume = opts.volume;
|
|
556
|
+
}
|
|
557
|
+
if (Object.keys(generationConfig).length) {
|
|
558
|
+
result.generation_config = generationConfig;
|
|
559
|
+
}
|
|
560
|
+
}
|
|
523
561
|
if (streaming && opts.wordTimestamps !== false) {
|
|
524
562
|
result.add_timestamps = true;
|
|
525
563
|
}
|
package/dist/tts.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n APIConnectionError,\n APITimeoutError,\n AudioByteStream,\n Future,\n type TimedString,\n createTimedString,\n log,\n shortuuid,\n stream,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\nimport {\n type CartesiaServerMessage,\n cartesiaMessageSchema,\n hasWordTimestamps,\n isChunkMessage,\n isDoneMessage,\n isErrorMessage,\n} from './types.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n\n /**\n * Whether to add word timestamps to the output. When enabled, the TTS will return\n * timing information for each word in the transcript.\n * @defaultValue true\n */\n wordTimestamps?: boolean;\n\n pronunciationDictId?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n chunkTimeout: 5000,\n wordTimestamps: true,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n const resolvedOpts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n super(resolvedOpts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n alignedTranscript: resolvedOpts.wordTimestamps ?? true,\n });\n\n this.#opts = resolvedOpts;\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts, options?.connOptions);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #logger = log();\n #opts: TTSOptions;\n #text: string;\n\n constructor(\n tts: TTS,\n text: string,\n opts: TTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const doneFut = new Future<void>();\n\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n signal: this.abortSignal,\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n if (!doneFut.done) doneFut.resolve();\n });\n res.on('error', (err) => {\n if (err.message === 'aborted') return;\n this.#logger.error({ err }, 'Cartesia TTS response error');\n if (!doneFut.done) doneFut.reject(err);\n });\n },\n );\n\n req.on('error', (err) => {\n if (err.name === 'AbortError') return;\n this.#logger.error({ err }, 'Cartesia TTS request error');\n if (!doneFut.done) doneFut.reject(err);\n });\n req.on('close', () => {\n if (!doneFut.done) doneFut.resolve();\n });\n req.write(JSON.stringify(json));\n req.end();\n\n try {\n await doneFut.await;\n } catch (e) {\n if (this.abortSignal.aborted) return;\n if (!this.queue.closed) this.queue.close();\n throw toRetryableConnectionError(e);\n }\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions, connOptions?: APIConnectOptions) {\n super(tts, connOptions);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n // Only close WebSocket when both: 1) Cartesia returns done, AND 2) all sentences have been sent\n let sentenceStreamClosed = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts, true);\n for await (const event of this.#tokenizer) {\n const msg = {\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n };\n ws.send(JSON.stringify(msg));\n }\n\n const endMsg = {\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n };\n ws.send(JSON.stringify(endMsg));\n // Mark sentence stream as closed\n sentenceStreamClosed = true;\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n // Use event channel and set up listeners ONCE to avoid missing messages during listener re-registration\n const recvTask = async (ws: WebSocket) => {\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n // Create event channel to buffer incoming messages\n // This prevents message loss between listener re-registrations\n const eventChannel = stream.createStreamChannel<RawData>();\n\n let lastFrame: AudioFrame | undefined;\n let pendingTimedTranscripts: TimedString[] = [];\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n // Include timedTranscripts with the audio frame\n this.queue.put({\n requestId,\n segmentId,\n frame: lastFrame,\n final,\n timedTranscripts:\n pendingTimedTranscripts.length > 0 ? pendingTimedTranscripts : undefined,\n });\n lastFrame = undefined;\n pendingTimedTranscripts = [];\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n // Set up WebSocket listeners ONCE (not in a loop)\n const onMessage = (data: RawData) => {\n void eventChannel.write(data).catch((error: unknown) => {\n this.#logger.debug({ error }, 'Failed writing Cartesia event to channel (likely closed)');\n });\n };\n\n const onClose = (code: number, reason: Buffer) => {\n if (!closing) {\n this.#logger.debug(`WebSocket closed with code ${code}: ${reason.toString()}`);\n }\n clearTTSChunkTimeout();\n void eventChannel.close();\n };\n\n const onError = (err: Error) => {\n this.#logger.error({ err }, 'Cartesia WebSocket error');\n void eventChannel.close();\n };\n\n // Attach listeners ONCE\n ws.on('message', onMessage);\n ws.on('close', onClose);\n ws.on('error', onError);\n\n try {\n // Process messages from the channel\n const reader = eventChannel.stream().getReader();\n\n while (!this.closed && !this.abortController.signal.aborted) {\n const result = await reader.read();\n if (result.done) break;\n\n const rawMsg = result.value;\n\n // Parse message with Zod schema for type safety\n let serverMsg: CartesiaServerMessage;\n try {\n const json = JSON.parse(rawMsg.toString());\n serverMsg = cartesiaMessageSchema.parse(json);\n } catch (parseErr) {\n this.#logger.warn({ parseErr }, 'Failed to parse Cartesia message');\n continue;\n }\n\n // Handle error messages\n if (isErrorMessage(serverMsg)) {\n this.#logger.error({ error: serverMsg.error }, 'Cartesia returned error');\n continue;\n }\n\n const segmentId = serverMsg.context_id;\n\n // Process word timestamps if present (typed via Zod schema)\n if (this.#opts.wordTimestamps !== false && hasWordTimestamps(serverMsg)) {\n const wordTimestamps = serverMsg.word_timestamps;\n for (let i = 0; i < wordTimestamps.words.length; i++) {\n const word = wordTimestamps.words[i];\n const startTime = wordTimestamps.start[i];\n const endTime = wordTimestamps.end[i];\n if (word !== undefined && startTime !== undefined && endTime !== undefined) {\n pendingTimedTranscripts.push(\n createTimedString({\n text: word + ' ', // Add space after word for consistency\n startTime,\n endTime,\n }),\n );\n }\n }\n }\n\n // Handle audio chunk messages\n if (isChunkMessage(serverMsg)) {\n const audioBuffer = Buffer.from(serverMsg.data, 'base64');\n // Extract ArrayBuffer from Buffer for AudioByteStream compatibility\n const audioData = audioBuffer.buffer.slice(\n audioBuffer.byteOffset,\n audioBuffer.byteOffset + audioBuffer.byteLength,\n );\n for (const frame of bstream.write(audioData)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n // cartesia chunk timeout quite often, so we make it a debug log\n this.#logger.debug(\n `Cartesia WebSocket TTS chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if (isDoneMessage(serverMsg)) {\n // This ensures all sentences have been sent before closing\n if (sentenceStreamClosed) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n clearTTSChunkTimeout();\n ws.close();\n break; // Exit the loop\n }\n }\n // If sentenceStreamClosed is false, continue receiving - more done messages will come\n }\n }\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n if (\n err.message.includes('Queue is closed') ||\n err.message.includes('Channel is closed')\n ) {\n this.#logger.warn(\n { err },\n 'Channel closed during transcript processing (expected during disconnect)',\n );\n } else {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n }\n } finally {\n // IMPORTANT: Remove listeners so connection can be reused\n ws.off('message', onMessage);\n ws.off('close', onClose);\n ws.off('error', onError);\n clearTTSChunkTimeout();\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n\n let ws: WebSocket | undefined;\n try {\n ws = await connectCartesiaWebSocket({\n url,\n timeoutMs: this.connOptions.timeoutMs,\n abortSignal: this.abortSignal,\n });\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n if (this.abortSignal.aborted) {\n return;\n }\n throw toRetryableConnectionError(e);\n } finally {\n // Ensure we don't leak sockets/tasks across retry attempts.\n if (ws && ws.readyState !== WebSocket.CLOSED) {\n safeTerminateWebSocket(ws);\n }\n }\n }\n}\n\nconst asError = (e: unknown): Error => (e instanceof Error ? e : new Error(String(e)));\n\nconst transientNetworkCodes = new Set([\n 'ETIMEDOUT',\n 'ECONNRESET',\n 'EAI_AGAIN',\n 'ENETUNREACH',\n 'ECONNREFUSED',\n 'EHOSTUNREACH',\n]);\n\nconst isRecord = (v: unknown): v is Record<string, unknown> => {\n return v !== null && typeof v === 'object';\n};\n\nconst isAggregateErrorLike = (e: unknown): e is { errors: unknown[]; name?: string } => {\n if (!isRecord(e)) return false;\n return e.name === 'AggregateError' && Array.isArray(e.errors);\n};\n\nconst hasErrorCode = (e: unknown, code: string): boolean => {\n if (isRecord(e) && e.code === code) return true;\n if (isAggregateErrorLike(e)) {\n return e.errors.some((inner) => hasErrorCode(inner, code));\n }\n return false;\n};\n\nconst hasAnyTransientCode = (e: unknown): boolean => {\n if (isRecord(e) && typeof e.code === 'string') {\n return transientNetworkCodes.has(e.code);\n }\n if (isAggregateErrorLike(e)) {\n return e.errors.some((inner) => hasAnyTransientCode(inner));\n }\n return false;\n};\n\nconst toRetryableConnectionError = (e: unknown): APIConnectionError => {\n const err = asError(e);\n const isTimeout =\n hasErrorCode(e, 'ETIMEDOUT') ||\n (typeof err.message === 'string' && err.message.includes('ETIMEDOUT'));\n const message = isTimeout\n ? `Cartesia connection timed out`\n : `Cartesia connection failed: ${err.message || 'unknown error'}`;\n return isTimeout ? new APITimeoutError({ message }) : new APIConnectionError({ message });\n};\n\nconst waitForWsOpen = async ({\n ws,\n timeoutMs,\n abortSignal,\n}: {\n ws: WebSocket;\n timeoutMs: number;\n abortSignal: AbortSignal;\n}) => {\n if (abortSignal.aborted) {\n throw new Error('aborted');\n }\n\n const fut = new Future<void>();\n let timeout: NodeJS.Timeout | undefined;\n\n const cleanup = () => {\n if (timeout) clearTimeout(timeout);\n ws.off('open', onOpen);\n ws.off('error', onError);\n ws.off('close', onClose);\n abortSignal.removeEventListener('abort', onAbort);\n };\n\n const onOpen = () => fut.resolve();\n const onError = (err: Error) => fut.reject(asError(err));\n const onClose = (code: number, reason: Buffer) =>\n fut.reject(\n new Error(`WebSocket closed before open (code=${code}, reason=${reason.toString()})`),\n );\n const onAbort = () => fut.reject(new Error('aborted'));\n\n ws.on('open', onOpen);\n ws.on('error', onError);\n ws.on('close', onClose);\n abortSignal.addEventListener('abort', onAbort, { once: true });\n\n if (timeoutMs > 0) {\n timeout = setTimeout(() => fut.reject(new Error('connect timeout')), timeoutMs);\n }\n\n try {\n await fut.await;\n } finally {\n cleanup();\n }\n};\n\nconst safeTerminateWebSocket = (ws: WebSocket) => {\n // `ws` can emit an 'error' event during teardown (especially if CONNECTING).\n // If there is no error listener at that moment, Node will treat it as unhandled and crash the process.\n try {\n ws.on('error', () => {});\n } catch {\n // ignore\n }\n\n try {\n // `terminate()` can throw if the socket was never established; `close()` is safer in CONNECTING.\n if (ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n } else {\n ws.terminate();\n }\n } catch {\n // ignore\n }\n};\n\nconst connectCartesiaWebSocket = async ({\n url,\n timeoutMs,\n abortSignal,\n}: {\n url: string;\n timeoutMs: number;\n abortSignal: AbortSignal;\n}): Promise<WebSocket> => {\n const connectOnce = async (family?: number): Promise<WebSocket> => {\n const ws = new WebSocket(url, { handshakeTimeout: timeoutMs, family });\n try {\n await waitForWsOpen({ ws, timeoutMs, abortSignal });\n return ws;\n } catch (e) {\n safeTerminateWebSocket(ws);\n throw e;\n }\n };\n\n try {\n return await connectOnce();\n } catch (e) {\n // Mitigation for Node.js dual-stack (IPv6/IPv4) connect flakiness (\"happy eyeballs\"):\n // some environments surface `AggregateError` with nested `ETIMEDOUT` during the initial\n // WebSocket open. In that case we do a one-off retry forcing IPv4 (`family: 4`) before\n // letting the outer framework retry loop handle further attempts.\n //\n // If you still see `AggregateError`/`ETIMEDOUT`:\n // - Increase the session TTS connect timeout (`connOptions.ttsConnOptions.timeoutMs`)\n // - Or adjust Node's family autoselection behavior via `NODE_OPTIONS`, e.g.\n // `--network-family-autoselection-attempt-timeout=5000` (or disable it entirely).\n if (hasAnyTransientCode(e) || isAggregateErrorLike(e)) {\n return await connectOnce(4);\n }\n throw e;\n }\n};\n\n/**\n * Convert TTSOptions to Cartesia API format.\n *\n * @param opts - TTS options\n * @param streaming - Whether this is for streaming (WebSocket) or non-streaming (HTTP)\n */\nconst toCartesiaOptions = (\n opts: TTSOptions,\n streaming: boolean = false,\n): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n const result: { [id: string]: unknown } = {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n pronunciation_dict_id: opts.pronunciationDictId,\n };\n\n if (streaming && opts.wordTimestamps !== false) {\n result.add_timestamps = true;\n }\n\n return result;\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAaO;AAEP,wBAAwB;AACxB,gBAAwC;AACxC,oBAMO;AACP,mBAOO;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AA4B7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,cAAc;AAAA,EACd,gBAAgB;AAClB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,eAAe;AAAA,MACnB,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,UAAM,aAAa,cAAc,kBAAkB,YAAY,cAAc;AAAA,MAC3E,WAAW;AAAA,MACX,mBAAmB,aAAa,kBAAkB;AAAA,IACpD,CAAC;AAED,SAAK,QAAQ;AAEb,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WACE,MACA,aACA,aACmB;AACnB,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,OAAO,aAAa,WAAW;AAAA,EAC3E;AAAA,EAEA,OAAO,SAAiE;AACtE,WAAO,IAAI,iBAAiB,MAAM,KAAK,OAAO,mCAAS,WAAW;AAAA,EACpE;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR,cAAU,mBAAI;AAAA,EACd;AAAA,EACA;AAAA,EAEA,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAU,IAAI,qBAAa;AAEjC,UAAM,UAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,QACA,QAAQ,KAAK;AAAA,MACf;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AACjB,cAAI,CAAC,QAAQ,KAAM,SAAQ,QAAQ;AAAA,QACrC,CAAC;AACD,YAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,cAAI,IAAI,YAAY,UAAW;AAC/B,eAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6BAA6B;AACzD,cAAI,CAAC,QAAQ,KAAM,SAAQ,OAAO,GAAG;AAAA,QACvC,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,UAAI,IAAI,SAAS,aAAc;AAC/B,WAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,4BAA4B;AACxD,UAAI,CAAC,QAAQ,KAAM,SAAQ,OAAO,GAAG;AAAA,IACvC,CAAC;AACD,QAAI,GAAG,SAAS,MAAM;AACpB,UAAI,CAAC,QAAQ,KAAM,SAAQ,QAAQ;AAAA,IACrC,CAAC;AACD,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAER,QAAI;AACF,YAAM,QAAQ;AAAA,IAChB,SAAS,GAAG;AACV,UAAI,KAAK,YAAY,QAAS;AAC9B,UAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,YAAM,2BAA2B,CAAC;AAAA,IACpC;AAAA,EACF;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,aAAa,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB,aAAiC;AACvE,UAAMA,MAAK,WAAW;AACtB,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,QAAI,UAAU;AAEd,QAAI,uBAAuB;AAE3B,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,OAAO,IAAI;AACjD,uBAAiB,SAAS,KAAK,YAAY;AACzC,cAAM,MAAM;AAAA,UACV,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY,MAAM,QAAQ;AAAA,UAC1B,UAAU;AAAA,QACZ;AACA,QAAAA,IAAG,KAAK,KAAK,UAAU,GAAG,CAAC;AAAA,MAC7B;AAEA,YAAM,SAAS;AAAA,QACb,GAAG;AAAA,QACH,YAAY;AAAA,QACZ,YAAY;AAAA,QACZ,UAAU;AAAA,MACZ;AACA,MAAAA,IAAG,KAAK,KAAK,UAAU,MAAM,CAAC;AAE9B,6BAAuB;AAAA,IACzB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAGA,UAAM,WAAW,OAAOA,QAAkB;AACxC,YAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AAIvE,YAAM,eAAe,qBAAO,oBAA6B;AAEzD,UAAI;AACJ,UAAI,0BAAyC,CAAC;AAE9C,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AAEnC,eAAK,MAAM,IAAI;AAAA,YACb;AAAA,YACA;AAAA,YACA,OAAO;AAAA,YACP;AAAA,YACA,kBACE,wBAAwB,SAAS,IAAI,0BAA0B;AAAA,UACnE,CAAC;AACD,sBAAY;AACZ,oCAA0B,CAAC;AAAA,QAC7B;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAGA,YAAM,YAAY,CAAC,SAAkB;AACnC,aAAK,aAAa,MAAM,IAAI,EAAE,MAAM,CAAC,UAAmB;AACtD,eAAK,QAAQ,MAAM,EAAE,MAAM,GAAG,0DAA0D;AAAA,QAC1F,CAAC;AAAA,MACH;AAEA,YAAM,UAAU,CAAC,MAAc,WAAmB;AAChD,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,OAAO,SAAS,CAAC,EAAE;AAAA,QAC/E;AACA,6BAAqB;AACrB,aAAK,aAAa,MAAM;AAAA,MAC1B;AAEA,YAAM,UAAU,CAAC,QAAe;AAC9B,aAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,0BAA0B;AACtD,aAAK,aAAa,MAAM;AAAA,MAC1B;AAGA,MAAAA,IAAG,GAAG,WAAW,SAAS;AAC1B,MAAAA,IAAG,GAAG,SAAS,OAAO;AACtB,MAAAA,IAAG,GAAG,SAAS,OAAO;AAEtB,UAAI;AAEF,cAAM,SAAS,aAAa,OAAO,EAAE,UAAU;AAE/C,eAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,OAAO,KAAM;AAEjB,gBAAM,SAAS,OAAO;AAGtB,cAAI;AACJ,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,OAAO,SAAS,CAAC;AACzC,wBAAY,mCAAsB,MAAM,IAAI;AAAA,UAC9C,SAAS,UAAU;AACjB,iBAAK,QAAQ,KAAK,EAAE,SAAS,GAAG,kCAAkC;AAClE;AAAA,UACF;AAGA,kBAAI,6BAAe,SAAS,GAAG;AAC7B,iBAAK,QAAQ,MAAM,EAAE,OAAO,UAAU,MAAM,GAAG,yBAAyB;AACxE;AAAA,UACF;AAEA,gBAAM,YAAY,UAAU;AAG5B,cAAI,KAAK,MAAM,mBAAmB,aAAS,gCAAkB,SAAS,GAAG;AACvE,kBAAM,iBAAiB,UAAU;AACjC,qBAAS,IAAI,GAAG,IAAI,eAAe,MAAM,QAAQ,KAAK;AACpD,oBAAM,OAAO,eAAe,MAAM,CAAC;AACnC,oBAAM,YAAY,eAAe,MAAM,CAAC;AACxC,oBAAM,UAAU,eAAe,IAAI,CAAC;AACpC,kBAAI,SAAS,UAAa,cAAc,UAAa,YAAY,QAAW;AAC1E,wCAAwB;AAAA,sBACtB,iCAAkB;AAAA,oBAChB,MAAM,OAAO;AAAA;AAAA,oBACb;AAAA,oBACA;AAAA,kBACF,CAAC;AAAA,gBACH;AAAA,cACF;AAAA,YACF;AAAA,UACF;AAGA,kBAAI,6BAAe,SAAS,GAAG;AAC7B,kBAAM,cAAc,OAAO,KAAK,UAAU,MAAM,QAAQ;AAExD,kBAAM,YAAY,YAAY,OAAO;AAAA,cACnC,YAAY;AAAA,cACZ,YAAY,aAAa,YAAY;AAAA,YACvC;AACA,uBAAW,SAAS,QAAQ,MAAM,SAAS,GAAG;AAC5C,4BAAc,WAAW,KAAK;AAC9B,0BAAY;AAAA,YACd;AAKA,iCAAqB;AACrB,sBAAU,WAAW,MAAM;AAEzB,mBAAK,QAAQ;AAAA,gBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,cAC9E;AACA,cAAAA,IAAG,MAAM;AAAA,YACX,GAAG,KAAK,MAAM,YAAY;AAAA,UAC5B,eAAW,4BAAc,SAAS,GAAG;AAEnC,gBAAI,sBAAsB;AACxB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UAEF;AAAA,QACF;AAAA,MACF,SAAS,KAAK;AAEZ,YAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,cACE,IAAI,QAAQ,SAAS,iBAAiB,KACtC,IAAI,QAAQ,SAAS,mBAAmB,GACxC;AACA,iBAAK,QAAQ;AAAA,cACX,EAAE,IAAI;AAAA,cACN;AAAA,YACF;AAAA,UACF,OAAO;AACL,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AAAA,QACF;AAAA,MACF,UAAE;AAEA,QAAAA,IAAG,IAAI,WAAW,SAAS;AAC3B,QAAAA,IAAG,IAAI,SAAS,OAAO;AACvB,QAAAA,IAAG,IAAI,SAAS,OAAO;AACvB,6BAAqB;AAAA,MACvB;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAE3F,QAAI;AACJ,QAAI;AACF,WAAK,MAAM,yBAAyB;AAAA,QAClC;AAAA,QACA,WAAW,KAAK,YAAY;AAAA,QAC5B,aAAa,KAAK;AAAA,MACpB,CAAC;AACD,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,UAAI,KAAK,YAAY,SAAS;AAC5B;AAAA,MACF;AACA,YAAM,2BAA2B,CAAC;AAAA,IACpC,UAAE;AAEA,UAAI,MAAM,GAAG,eAAe,oBAAU,QAAQ;AAC5C,+BAAuB,EAAE;AAAA,MAC3B;AAAA,IACF;AAAA,EACF;AACF;AAEA,MAAM,UAAU,CAAC,MAAuB,aAAa,QAAQ,IAAI,IAAI,MAAM,OAAO,CAAC,CAAC;AAEpF,MAAM,wBAAwB,oBAAI,IAAI;AAAA,EACpC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,MAAM,WAAW,CAAC,MAA6C;AAC7D,SAAO,MAAM,QAAQ,OAAO,MAAM;AACpC;AAEA,MAAM,uBAAuB,CAAC,MAA0D;AACtF,MAAI,CAAC,SAAS,CAAC,EAAG,QAAO;AACzB,SAAO,EAAE,SAAS,oBAAoB,MAAM,QAAQ,EAAE,MAAM;AAC9D;AAEA,MAAM,eAAe,CAAC,GAAY,SAA0B;AAC1D,MAAI,SAAS,CAAC,KAAK,EAAE,SAAS,KAAM,QAAO;AAC3C,MAAI,qBAAqB,CAAC,GAAG;AAC3B,WAAO,EAAE,OAAO,KAAK,CAAC,UAAU,aAAa,OAAO,IAAI,CAAC;AAAA,EAC3D;AACA,SAAO;AACT;AAEA,MAAM,sBAAsB,CAAC,MAAwB;AACnD,MAAI,SAAS,CAAC,KAAK,OAAO,EAAE,SAAS,UAAU;AAC7C,WAAO,sBAAsB,IAAI,EAAE,IAAI;AAAA,EACzC;AACA,MAAI,qBAAqB,CAAC,GAAG;AAC3B,WAAO,EAAE,OAAO,KAAK,CAAC,UAAU,oBAAoB,KAAK,CAAC;AAAA,EAC5D;AACA,SAAO;AACT;AAEA,MAAM,6BAA6B,CAAC,MAAmC;AACrE,QAAM,MAAM,QAAQ,CAAC;AACrB,QAAM,YACJ,aAAa,GAAG,WAAW,KAC1B,OAAO,IAAI,YAAY,YAAY,IAAI,QAAQ,SAAS,WAAW;AACtE,QAAM,UAAU,YACZ,kCACA,+BAA+B,IAAI,WAAW,eAAe;AACjE,SAAO,YAAY,IAAI,8BAAgB,EAAE,QAAQ,CAAC,IAAI,IAAI,iCAAmB,EAAE,QAAQ,CAAC;AAC1F;AAEA,MAAM,gBAAgB,OAAO;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AACF,MAIM;AACJ,MAAI,YAAY,SAAS;AACvB,UAAM,IAAI,MAAM,SAAS;AAAA,EAC3B;AAEA,QAAM,MAAM,IAAI,qBAAa;AAC7B,MAAI;AAEJ,QAAM,UAAU,MAAM;AACpB,QAAI,QAAS,cAAa,OAAO;AACjC,OAAG,IAAI,QAAQ,MAAM;AACrB,OAAG,IAAI,SAAS,OAAO;AACvB,OAAG,IAAI,SAAS,OAAO;AACvB,gBAAY,oBAAoB,SAAS,OAAO;AAAA,EAClD;AAEA,QAAM,SAAS,MAAM,IAAI,QAAQ;AACjC,QAAM,UAAU,CAAC,QAAe,IAAI,OAAO,QAAQ,GAAG,CAAC;AACvD,QAAM,UAAU,CAAC,MAAc,WAC7B,IAAI;AAAA,IACF,IAAI,MAAM,sCAAsC,IAAI,YAAY,OAAO,SAAS,CAAC,GAAG;AAAA,EACtF;AACF,QAAM,UAAU,MAAM,IAAI,OAAO,IAAI,MAAM,SAAS,CAAC;AAErD,KAAG,GAAG,QAAQ,MAAM;AACpB,KAAG,GAAG,SAAS,OAAO;AACtB,KAAG,GAAG,SAAS,OAAO;AACtB,cAAY,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAE7D,MAAI,YAAY,GAAG;AACjB,cAAU,WAAW,MAAM,IAAI,OAAO,IAAI,MAAM,iBAAiB,CAAC,GAAG,SAAS;AAAA,EAChF;AAEA,MAAI;AACF,UAAM,IAAI;AAAA,EACZ,UAAE;AACA,YAAQ;AAAA,EACV;AACF;AAEA,MAAM,yBAAyB,CAAC,OAAkB;AAGhD,MAAI;AACF,OAAG,GAAG,SAAS,MAAM;AAAA,IAAC,CAAC;AAAA,EACzB,QAAQ;AAAA,EAER;AAEA,MAAI;AAEF,QAAI,GAAG,eAAe,oBAAU,YAAY;AAC1C,SAAG,MAAM;AAAA,IACX,OAAO;AACL,SAAG,UAAU;AAAA,IACf;AAAA,EACF,QAAQ;AAAA,EAER;AACF;AAEA,MAAM,2BAA2B,OAAO;AAAA,EACtC;AAAA,EACA;AAAA,EACA;AACF,MAI0B;AACxB,QAAM,cAAc,OAAO,WAAwC;AACjE,UAAM,KAAK,IAAI,oBAAU,KAAK,EAAE,kBAAkB,WAAW,OAAO,CAAC;AACrE,QAAI;AACF,YAAM,cAAc,EAAE,IAAI,WAAW,YAAY,CAAC;AAClD,aAAO;AAAA,IACT,SAAS,GAAG;AACV,6BAAuB,EAAE;AACzB,YAAM;AAAA,IACR;AAAA,EACF;AAEA,MAAI;AACF,WAAO,MAAM,YAAY;AAAA,EAC3B,SAAS,GAAG;AAUV,QAAI,oBAAoB,CAAC,KAAK,qBAAqB,CAAC,GAAG;AACrD,aAAO,MAAM,YAAY,CAAC;AAAA,IAC5B;AACA,UAAM;AAAA,EACR;AACF;AAQA,MAAM,oBAAoB,CACxB,MACA,YAAqB,UACS;AAC9B,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,QAAM,SAAoC;AAAA,IACxC,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,IACf,uBAAuB,KAAK;AAAA,EAC9B;AAEA,MAAI,aAAa,KAAK,mBAAmB,OAAO;AAC9C,WAAO,iBAAiB;AAAA,EAC1B;AAEA,SAAO;AACT;","names":["tts","ws"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n APIConnectionError,\n APITimeoutError,\n AudioByteStream,\n Future,\n type TimedString,\n createTimedString,\n log,\n shortuuid,\n stream,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n isSonic3,\n} from './models.js';\nimport {\n type CartesiaServerMessage,\n cartesiaMessageSchema,\n hasWordTimestamps,\n isChunkMessage,\n isDoneMessage,\n isErrorMessage,\n} from './types.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst API_VERSION = '2025-04-16';\nconst API_VERSION_WITH_EXPERIMENTAL_CONTROLS = '2024-11-13';\nconst MODEL_WITH_EXPERIMENTAL_CONTROLS = 'sonic-2-2025-03-07';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n /**\n * Volume of the speech. For sonic-3, the value is valid between 0.5 and 2.0.\n * @see https://docs.cartesia.ai/api-reference/tts/bytes#body-generation-config-volume\n */\n volume?: number;\n apiKey?: string;\n language: string;\n baseUrl: string;\n apiVersion: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n\n /**\n * Whether to add word timestamps to the output. When enabled, the TTS will return\n * timing information for each word in the transcript.\n * @defaultValue true\n */\n wordTimestamps?: boolean;\n\n pronunciationDictId?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-3',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n apiVersion: API_VERSION,\n chunkTimeout: 5000,\n wordTimestamps: true,\n};\n\nconst checkGenerationConfig = (opts: TTSOptions) => {\n const logger = log();\n if (isSonic3(opts.model)) {\n if (opts.speed !== undefined && typeof opts.speed === 'number') {\n if (opts.speed < 0.6 || opts.speed > 2.0) {\n logger.warn('speed must be between 0.6 and 2.0 for sonic-3');\n }\n }\n if (opts.volume !== undefined && (opts.volume < 0.5 || opts.volume > 2.0)) {\n logger.warn('volume must be between 0.5 and 2.0 for sonic-3');\n }\n } else if (\n opts.apiVersion !== API_VERSION_WITH_EXPERIMENTAL_CONTROLS ||\n opts.model !== MODEL_WITH_EXPERIMENTAL_CONTROLS\n ) {\n if (opts.speed || opts.emotion) {\n logger.warn(\n { model: opts.model, speed: opts.speed, emotion: opts.emotion },\n `speed and emotion controls are only supported for model '${MODEL_WITH_EXPERIMENTAL_CONTROLS}' ` +\n `or sonic-3 models, see https://docs.cartesia.ai/developer-tools/changelog for details`,\n );\n }\n }\n\n if (opts.pronunciationDictId && !isSonic3(opts.model)) {\n logger.warn(\n { model: opts.model, pronunciationDictId: opts.pronunciationDictId },\n 'pronunciationDictId is only supported for sonic-3 models',\n );\n }\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n const resolvedOpts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n super(resolvedOpts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n alignedTranscript: resolvedOpts.wordTimestamps ?? true,\n });\n\n this.#opts = resolvedOpts;\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if (\n this.#opts.speed ||\n this.#opts.emotion ||\n this.#opts.volume ||\n this.#opts.pronunciationDictId\n ) {\n checkGenerationConfig(this.#opts);\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if (\n this.#opts.speed ||\n this.#opts.emotion ||\n this.#opts.volume ||\n this.#opts.pronunciationDictId\n ) {\n checkGenerationConfig(this.#opts);\n }\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts, options?.connOptions);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #logger = log();\n #opts: TTSOptions;\n #text: string;\n\n constructor(\n tts: TTS,\n text: string,\n opts: TTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const doneFut = new Future<void>();\n\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: this.#opts.apiVersion,\n },\n signal: this.abortSignal,\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n if (!doneFut.done) doneFut.resolve();\n });\n res.on('error', (err) => {\n if (err.message === 'aborted') return;\n this.#logger.error({ err }, 'Cartesia TTS response error');\n if (!doneFut.done) doneFut.reject(err);\n });\n },\n );\n\n req.on('error', (err) => {\n if (err.name === 'AbortError') return;\n this.#logger.error({ err }, 'Cartesia TTS request error');\n if (!doneFut.done) doneFut.reject(err);\n });\n req.on('close', () => {\n if (!doneFut.done) doneFut.resolve();\n });\n req.write(JSON.stringify(json));\n req.end();\n\n try {\n await doneFut.await;\n } catch (e) {\n if (this.abortSignal.aborted) return;\n if (!this.queue.closed) this.queue.close();\n throw toRetryableConnectionError(e);\n }\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions, connOptions?: APIConnectOptions) {\n super(tts, connOptions);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if (\n this.#opts.speed ||\n this.#opts.emotion ||\n this.#opts.volume ||\n this.#opts.pronunciationDictId\n ) {\n checkGenerationConfig(this.#opts);\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n // Only close WebSocket when both: 1) Cartesia returns done, AND 2) all sentences have been sent\n let sentenceStreamClosed = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts, true);\n for await (const event of this.#tokenizer) {\n const msg = {\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n };\n ws.send(JSON.stringify(msg));\n }\n\n const endMsg = {\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n };\n ws.send(JSON.stringify(endMsg));\n // Mark sentence stream as closed\n sentenceStreamClosed = true;\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n // Use event channel and set up listeners ONCE to avoid missing messages during listener re-registration\n const recvTask = async (ws: WebSocket) => {\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n // Create event channel to buffer incoming messages\n // This prevents message loss between listener re-registrations\n const eventChannel = stream.createStreamChannel<RawData>();\n\n let lastFrame: AudioFrame | undefined;\n let pendingTimedTranscripts: TimedString[] = [];\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n // Include timedTranscripts with the audio frame\n this.queue.put({\n requestId,\n segmentId,\n frame: lastFrame,\n final,\n timedTranscripts:\n pendingTimedTranscripts.length > 0 ? pendingTimedTranscripts : undefined,\n });\n lastFrame = undefined;\n pendingTimedTranscripts = [];\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n // Set up WebSocket listeners ONCE (not in a loop)\n const onMessage = (data: RawData) => {\n void eventChannel.write(data).catch((error: unknown) => {\n this.#logger.debug({ error }, 'Failed writing Cartesia event to channel (likely closed)');\n });\n };\n\n const onClose = (code: number, reason: Buffer) => {\n if (!closing) {\n this.#logger.debug(`WebSocket closed with code ${code}: ${reason.toString()}`);\n }\n clearTTSChunkTimeout();\n void eventChannel.close();\n };\n\n const onError = (err: Error) => {\n this.#logger.error({ err }, 'Cartesia WebSocket error');\n void eventChannel.close();\n };\n\n // Attach listeners ONCE\n ws.on('message', onMessage);\n ws.on('close', onClose);\n ws.on('error', onError);\n\n try {\n // Process messages from the channel\n const reader = eventChannel.stream().getReader();\n\n while (!this.closed && !this.abortController.signal.aborted) {\n const result = await reader.read();\n if (result.done) break;\n\n const rawMsg = result.value;\n\n // Parse message with Zod schema for type safety\n let serverMsg: CartesiaServerMessage;\n try {\n const json = JSON.parse(rawMsg.toString());\n serverMsg = cartesiaMessageSchema.parse(json);\n } catch (parseErr) {\n this.#logger.warn({ parseErr }, 'Failed to parse Cartesia message');\n continue;\n }\n\n // Handle error messages\n if (isErrorMessage(serverMsg)) {\n this.#logger.error({ error: serverMsg.error }, 'Cartesia returned error');\n continue;\n }\n\n const segmentId = serverMsg.context_id;\n\n // Process word timestamps if present (typed via Zod schema)\n if (this.#opts.wordTimestamps !== false && hasWordTimestamps(serverMsg)) {\n const wordTimestamps = serverMsg.word_timestamps;\n for (let i = 0; i < wordTimestamps.words.length; i++) {\n const word = wordTimestamps.words[i];\n const startTime = wordTimestamps.start[i];\n const endTime = wordTimestamps.end[i];\n if (word !== undefined && startTime !== undefined && endTime !== undefined) {\n pendingTimedTranscripts.push(\n createTimedString({\n text: word + ' ', // Add space after word for consistency\n startTime,\n endTime,\n }),\n );\n }\n }\n }\n\n // Handle audio chunk messages\n if (isChunkMessage(serverMsg)) {\n const audioBuffer = Buffer.from(serverMsg.data, 'base64');\n // Extract ArrayBuffer from Buffer for AudioByteStream compatibility\n const audioData = audioBuffer.buffer.slice(\n audioBuffer.byteOffset,\n audioBuffer.byteOffset + audioBuffer.byteLength,\n );\n for (const frame of bstream.write(audioData)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n // cartesia chunk timeout quite often, so we make it a debug log\n this.#logger.debug(\n `Cartesia WebSocket TTS chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if (isDoneMessage(serverMsg)) {\n // This ensures all sentences have been sent before closing\n if (sentenceStreamClosed) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n clearTTSChunkTimeout();\n ws.close();\n break; // Exit the loop\n }\n }\n // If sentenceStreamClosed is false, continue receiving - more done messages will come\n }\n }\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n if (\n err.message.includes('Queue is closed') ||\n err.message.includes('Channel is closed')\n ) {\n this.#logger.warn(\n { err },\n 'Channel closed during transcript processing (expected during disconnect)',\n );\n } else {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n }\n } finally {\n // IMPORTANT: Remove listeners so connection can be reused\n ws.off('message', onMessage);\n ws.off('close', onClose);\n ws.off('error', onError);\n clearTTSChunkTimeout();\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${this.#opts.apiVersion}`;\n\n let ws: WebSocket | undefined;\n try {\n ws = await connectCartesiaWebSocket({\n url,\n timeoutMs: this.connOptions.timeoutMs,\n abortSignal: this.abortSignal,\n });\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n if (this.abortSignal.aborted) {\n return;\n }\n throw toRetryableConnectionError(e);\n } finally {\n // Ensure we don't leak sockets/tasks across retry attempts.\n if (ws && ws.readyState !== WebSocket.CLOSED) {\n safeTerminateWebSocket(ws);\n }\n }\n }\n}\n\nconst asError = (e: unknown): Error => (e instanceof Error ? e : new Error(String(e)));\n\nconst transientNetworkCodes = new Set([\n 'ETIMEDOUT',\n 'ECONNRESET',\n 'EAI_AGAIN',\n 'ENETUNREACH',\n 'ECONNREFUSED',\n 'EHOSTUNREACH',\n]);\n\nconst isRecord = (v: unknown): v is Record<string, unknown> => {\n return v !== null && typeof v === 'object';\n};\n\nconst isAggregateErrorLike = (e: unknown): e is { errors: unknown[]; name?: string } => {\n if (!isRecord(e)) return false;\n return e.name === 'AggregateError' && Array.isArray(e.errors);\n};\n\nconst hasErrorCode = (e: unknown, code: string): boolean => {\n if (isRecord(e) && e.code === code) return true;\n if (isAggregateErrorLike(e)) {\n return e.errors.some((inner) => hasErrorCode(inner, code));\n }\n return false;\n};\n\nconst hasAnyTransientCode = (e: unknown): boolean => {\n if (isRecord(e) && typeof e.code === 'string') {\n return transientNetworkCodes.has(e.code);\n }\n if (isAggregateErrorLike(e)) {\n return e.errors.some((inner) => hasAnyTransientCode(inner));\n }\n return false;\n};\n\nconst toRetryableConnectionError = (e: unknown): APIConnectionError => {\n const err = asError(e);\n const isTimeout =\n hasErrorCode(e, 'ETIMEDOUT') ||\n (typeof err.message === 'string' && err.message.includes('ETIMEDOUT'));\n const message = isTimeout\n ? `Cartesia connection timed out`\n : `Cartesia connection failed: ${err.message || 'unknown error'}`;\n return isTimeout ? new APITimeoutError({ message }) : new APIConnectionError({ message });\n};\n\nconst waitForWsOpen = async ({\n ws,\n timeoutMs,\n abortSignal,\n}: {\n ws: WebSocket;\n timeoutMs: number;\n abortSignal: AbortSignal;\n}) => {\n if (abortSignal.aborted) {\n throw new Error('aborted');\n }\n\n const fut = new Future<void>();\n let timeout: NodeJS.Timeout | undefined;\n\n const cleanup = () => {\n if (timeout) clearTimeout(timeout);\n ws.off('open', onOpen);\n ws.off('error', onError);\n ws.off('close', onClose);\n abortSignal.removeEventListener('abort', onAbort);\n };\n\n const onOpen = () => fut.resolve();\n const onError = (err: Error) => fut.reject(asError(err));\n const onClose = (code: number, reason: Buffer) =>\n fut.reject(\n new Error(`WebSocket closed before open (code=${code}, reason=${reason.toString()})`),\n );\n const onAbort = () => fut.reject(new Error('aborted'));\n\n ws.on('open', onOpen);\n ws.on('error', onError);\n ws.on('close', onClose);\n abortSignal.addEventListener('abort', onAbort, { once: true });\n\n if (timeoutMs > 0) {\n timeout = setTimeout(() => fut.reject(new Error('connect timeout')), timeoutMs);\n }\n\n try {\n await fut.await;\n } finally {\n cleanup();\n }\n};\n\nconst safeTerminateWebSocket = (ws: WebSocket) => {\n // `ws` can emit an 'error' event during teardown (especially if CONNECTING).\n // If there is no error listener at that moment, Node will treat it as unhandled and crash the process.\n try {\n ws.on('error', () => {});\n } catch {\n // ignore\n }\n\n try {\n // `terminate()` can throw if the socket was never established; `close()` is safer in CONNECTING.\n if (ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n } else {\n ws.terminate();\n }\n } catch {\n // ignore\n }\n};\n\nconst connectCartesiaWebSocket = async ({\n url,\n timeoutMs,\n abortSignal,\n}: {\n url: string;\n timeoutMs: number;\n abortSignal: AbortSignal;\n}): Promise<WebSocket> => {\n const connectOnce = async (family?: number): Promise<WebSocket> => {\n const ws = new WebSocket(url, { handshakeTimeout: timeoutMs, family });\n try {\n await waitForWsOpen({ ws, timeoutMs, abortSignal });\n return ws;\n } catch (e) {\n safeTerminateWebSocket(ws);\n throw e;\n }\n };\n\n try {\n return await connectOnce();\n } catch (e) {\n // Mitigation for Node.js dual-stack (IPv6/IPv4) connect flakiness (\"happy eyeballs\"):\n // some environments surface `AggregateError` with nested `ETIMEDOUT` during the initial\n // WebSocket open. In that case we do a one-off retry forcing IPv4 (`family: 4`) before\n // letting the outer framework retry loop handle further attempts.\n //\n // If you still see `AggregateError`/`ETIMEDOUT`:\n // - Increase the session TTS connect timeout (`connOptions.ttsConnOptions.timeoutMs`)\n // - Or adjust Node's family autoselection behavior via `NODE_OPTIONS`, e.g.\n // `--network-family-autoselection-attempt-timeout=5000` (or disable it entirely).\n if (hasAnyTransientCode(e) || isAggregateErrorLike(e)) {\n return await connectOnce(4);\n }\n throw e;\n }\n};\n\nconst toCartesiaOptions = (\n opts: TTSOptions,\n streaming: boolean = false,\n): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n if (opts.apiVersion === API_VERSION_WITH_EXPERIMENTAL_CONTROLS) {\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n }\n\n const result: { [id: string]: unknown } = {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n max_buffer_delay_ms: 0,\n };\n\n if (opts.pronunciationDictId) {\n result.pronunciation_dict_id = opts.pronunciationDictId;\n }\n\n if (opts.apiVersion > API_VERSION_WITH_EXPERIMENTAL_CONTROLS && isSonic3(opts.model)) {\n const generationConfig: { [id: string]: unknown } = {};\n if (opts.speed) {\n generationConfig.speed = opts.speed;\n }\n if (opts.emotion) {\n generationConfig.emotion = opts.emotion[0];\n }\n if (opts.volume) {\n generationConfig.volume = opts.volume;\n }\n if (Object.keys(generationConfig).length) {\n result.generation_config = generationConfig;\n }\n }\n\n if (streaming && opts.wordTimestamps !== false) {\n result.add_timestamps = true;\n }\n\n return result;\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAaO;AAEP,wBAAwB;AACxB,gBAAwC;AACxC,oBAOO;AACP,mBAOO;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,cAAc;AACpB,MAAM,yCAAyC;AAC/C,MAAM,mCAAmC;AACzC,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAkC7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,YAAY;AAAA,EACZ,cAAc;AAAA,EACd,gBAAgB;AAClB;AAEA,MAAM,wBAAwB,CAAC,SAAqB;AAClD,QAAM,aAAS,mBAAI;AACnB,UAAI,wBAAS,KAAK,KAAK,GAAG;AACxB,QAAI,KAAK,UAAU,UAAa,OAAO,KAAK,UAAU,UAAU;AAC9D,UAAI,KAAK,QAAQ,OAAO,KAAK,QAAQ,GAAK;AACxC,eAAO,KAAK,+CAA+C;AAAA,MAC7D;AAAA,IACF;AACA,QAAI,KAAK,WAAW,WAAc,KAAK,SAAS,OAAO,KAAK,SAAS,IAAM;AACzE,aAAO,KAAK,gDAAgD;AAAA,IAC9D;AAAA,EACF,WACE,KAAK,eAAe,0CACpB,KAAK,UAAU,kCACf;AACA,QAAI,KAAK,SAAS,KAAK,SAAS;AAC9B,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,OAAO,OAAO,KAAK,OAAO,SAAS,KAAK,QAAQ;AAAA,QAC9D,4DAA4D,gCAAgC;AAAA,MAE9F;AAAA,IACF;AAAA,EACF;AAEA,MAAI,KAAK,uBAAuB,KAAC,wBAAS,KAAK,KAAK,GAAG;AACrD,WAAO;AAAA,MACL,EAAE,OAAO,KAAK,OAAO,qBAAqB,KAAK,oBAAoB;AAAA,MACnE;AAAA,IACF;AAAA,EACF;AACF;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,eAAe;AAAA,MACnB,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,UAAM,aAAa,cAAc,kBAAkB,YAAY,cAAc;AAAA,MAC3E,WAAW;AAAA,MACX,mBAAmB,aAAa,kBAAkB;AAAA,IACpD,CAAC;AAED,SAAK,QAAQ;AAEb,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,QACE,KAAK,MAAM,SACX,KAAK,MAAM,WACX,KAAK,MAAM,UACX,KAAK,MAAM,qBACX;AACA,4BAAsB,KAAK,KAAK;AAAA,IAClC;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,QACE,KAAK,MAAM,SACX,KAAK,MAAM,WACX,KAAK,MAAM,UACX,KAAK,MAAM,qBACX;AACA,4BAAsB,KAAK,KAAK;AAAA,IAClC;AAAA,EACF;AAAA,EAEA,WACE,MACA,aACA,aACmB;AACnB,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,OAAO,aAAa,WAAW;AAAA,EAC3E;AAAA,EAEA,OAAO,SAAiE;AACtE,WAAO,IAAI,iBAAiB,MAAM,KAAK,OAAO,mCAAS,WAAW;AAAA,EACpE;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR,cAAU,mBAAI;AAAA,EACd;AAAA,EACA;AAAA,EAEA,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAU,IAAI,qBAAa;AAEjC,UAAM,UAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG,KAAK,MAAM;AAAA,QAC/B;AAAA,QACA,QAAQ,KAAK;AAAA,MACf;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AACjB,cAAI,CAAC,QAAQ,KAAM,SAAQ,QAAQ;AAAA,QACrC,CAAC;AACD,YAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,cAAI,IAAI,YAAY,UAAW;AAC/B,eAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6BAA6B;AACzD,cAAI,CAAC,QAAQ,KAAM,SAAQ,OAAO,GAAG;AAAA,QACvC,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,UAAI,IAAI,SAAS,aAAc;AAC/B,WAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,4BAA4B;AACxD,UAAI,CAAC,QAAQ,KAAM,SAAQ,OAAO,GAAG;AAAA,IACvC,CAAC;AACD,QAAI,GAAG,SAAS,MAAM;AACpB,UAAI,CAAC,QAAQ,KAAM,SAAQ,QAAQ;AAAA,IACrC,CAAC;AACD,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAER,QAAI;AACF,YAAM,QAAQ;AAAA,IAChB,SAAS,GAAG;AACV,UAAI,KAAK,YAAY,QAAS;AAC9B,UAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,YAAM,2BAA2B,CAAC;AAAA,IACpC;AAAA,EACF;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,aAAa,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB,aAAiC;AACvE,UAAMA,MAAK,WAAW;AACtB,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,QACE,KAAK,MAAM,SACX,KAAK,MAAM,WACX,KAAK,MAAM,UACX,KAAK,MAAM,qBACX;AACA,4BAAsB,KAAK,KAAK;AAAA,IAClC;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,QAAI,UAAU;AAEd,QAAI,uBAAuB;AAE3B,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,OAAO,IAAI;AACjD,uBAAiB,SAAS,KAAK,YAAY;AACzC,cAAM,MAAM;AAAA,UACV,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY,MAAM,QAAQ;AAAA,UAC1B,UAAU;AAAA,QACZ;AACA,QAAAA,IAAG,KAAK,KAAK,UAAU,GAAG,CAAC;AAAA,MAC7B;AAEA,YAAM,SAAS;AAAA,QACb,GAAG;AAAA,QACH,YAAY;AAAA,QACZ,YAAY;AAAA,QACZ,UAAU;AAAA,MACZ;AACA,MAAAA,IAAG,KAAK,KAAK,UAAU,MAAM,CAAC;AAE9B,6BAAuB;AAAA,IACzB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAGA,UAAM,WAAW,OAAOA,QAAkB;AACxC,YAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AAIvE,YAAM,eAAe,qBAAO,oBAA6B;AAEzD,UAAI;AACJ,UAAI,0BAAyC,CAAC;AAE9C,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AAEnC,eAAK,MAAM,IAAI;AAAA,YACb;AAAA,YACA;AAAA,YACA,OAAO;AAAA,YACP;AAAA,YACA,kBACE,wBAAwB,SAAS,IAAI,0BAA0B;AAAA,UACnE,CAAC;AACD,sBAAY;AACZ,oCAA0B,CAAC;AAAA,QAC7B;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAGA,YAAM,YAAY,CAAC,SAAkB;AACnC,aAAK,aAAa,MAAM,IAAI,EAAE,MAAM,CAAC,UAAmB;AACtD,eAAK,QAAQ,MAAM,EAAE,MAAM,GAAG,0DAA0D;AAAA,QAC1F,CAAC;AAAA,MACH;AAEA,YAAM,UAAU,CAAC,MAAc,WAAmB;AAChD,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,OAAO,SAAS,CAAC,EAAE;AAAA,QAC/E;AACA,6BAAqB;AACrB,aAAK,aAAa,MAAM;AAAA,MAC1B;AAEA,YAAM,UAAU,CAAC,QAAe;AAC9B,aAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,0BAA0B;AACtD,aAAK,aAAa,MAAM;AAAA,MAC1B;AAGA,MAAAA,IAAG,GAAG,WAAW,SAAS;AAC1B,MAAAA,IAAG,GAAG,SAAS,OAAO;AACtB,MAAAA,IAAG,GAAG,SAAS,OAAO;AAEtB,UAAI;AAEF,cAAM,SAAS,aAAa,OAAO,EAAE,UAAU;AAE/C,eAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,OAAO,KAAM;AAEjB,gBAAM,SAAS,OAAO;AAGtB,cAAI;AACJ,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,OAAO,SAAS,CAAC;AACzC,wBAAY,mCAAsB,MAAM,IAAI;AAAA,UAC9C,SAAS,UAAU;AACjB,iBAAK,QAAQ,KAAK,EAAE,SAAS,GAAG,kCAAkC;AAClE;AAAA,UACF;AAGA,kBAAI,6BAAe,SAAS,GAAG;AAC7B,iBAAK,QAAQ,MAAM,EAAE,OAAO,UAAU,MAAM,GAAG,yBAAyB;AACxE;AAAA,UACF;AAEA,gBAAM,YAAY,UAAU;AAG5B,cAAI,KAAK,MAAM,mBAAmB,aAAS,gCAAkB,SAAS,GAAG;AACvE,kBAAM,iBAAiB,UAAU;AACjC,qBAAS,IAAI,GAAG,IAAI,eAAe,MAAM,QAAQ,KAAK;AACpD,oBAAM,OAAO,eAAe,MAAM,CAAC;AACnC,oBAAM,YAAY,eAAe,MAAM,CAAC;AACxC,oBAAM,UAAU,eAAe,IAAI,CAAC;AACpC,kBAAI,SAAS,UAAa,cAAc,UAAa,YAAY,QAAW;AAC1E,wCAAwB;AAAA,sBACtB,iCAAkB;AAAA,oBAChB,MAAM,OAAO;AAAA;AAAA,oBACb;AAAA,oBACA;AAAA,kBACF,CAAC;AAAA,gBACH;AAAA,cACF;AAAA,YACF;AAAA,UACF;AAGA,kBAAI,6BAAe,SAAS,GAAG;AAC7B,kBAAM,cAAc,OAAO,KAAK,UAAU,MAAM,QAAQ;AAExD,kBAAM,YAAY,YAAY,OAAO;AAAA,cACnC,YAAY;AAAA,cACZ,YAAY,aAAa,YAAY;AAAA,YACvC;AACA,uBAAW,SAAS,QAAQ,MAAM,SAAS,GAAG;AAC5C,4BAAc,WAAW,KAAK;AAC9B,0BAAY;AAAA,YACd;AAKA,iCAAqB;AACrB,sBAAU,WAAW,MAAM;AAEzB,mBAAK,QAAQ;AAAA,gBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,cAC9E;AACA,cAAAA,IAAG,MAAM;AAAA,YACX,GAAG,KAAK,MAAM,YAAY;AAAA,UAC5B,eAAW,4BAAc,SAAS,GAAG;AAEnC,gBAAI,sBAAsB;AACxB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UAEF;AAAA,QACF;AAAA,MACF,SAAS,KAAK;AAEZ,YAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,cACE,IAAI,QAAQ,SAAS,iBAAiB,KACtC,IAAI,QAAQ,SAAS,mBAAmB,GACxC;AACA,iBAAK,QAAQ;AAAA,cACX,EAAE,IAAI;AAAA,cACN;AAAA,YACF;AAAA,UACF,OAAO;AACL,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AAAA,QACF;AAAA,MACF,UAAE;AAEA,QAAAA,IAAG,IAAI,WAAW,SAAS;AAC3B,QAAAA,IAAG,IAAI,SAAS,OAAO;AACvB,QAAAA,IAAG,IAAI,SAAS,OAAO;AACvB,6BAAqB;AAAA,MACvB;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,KAAK,MAAM,UAAU;AAEzG,QAAI;AACJ,QAAI;AACF,WAAK,MAAM,yBAAyB;AAAA,QAClC;AAAA,QACA,WAAW,KAAK,YAAY;AAAA,QAC5B,aAAa,KAAK;AAAA,MACpB,CAAC;AACD,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,UAAI,KAAK,YAAY,SAAS;AAC5B;AAAA,MACF;AACA,YAAM,2BAA2B,CAAC;AAAA,IACpC,UAAE;AAEA,UAAI,MAAM,GAAG,eAAe,oBAAU,QAAQ;AAC5C,+BAAuB,EAAE;AAAA,MAC3B;AAAA,IACF;AAAA,EACF;AACF;AAEA,MAAM,UAAU,CAAC,MAAuB,aAAa,QAAQ,IAAI,IAAI,MAAM,OAAO,CAAC,CAAC;AAEpF,MAAM,wBAAwB,oBAAI,IAAI;AAAA,EACpC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,MAAM,WAAW,CAAC,MAA6C;AAC7D,SAAO,MAAM,QAAQ,OAAO,MAAM;AACpC;AAEA,MAAM,uBAAuB,CAAC,MAA0D;AACtF,MAAI,CAAC,SAAS,CAAC,EAAG,QAAO;AACzB,SAAO,EAAE,SAAS,oBAAoB,MAAM,QAAQ,EAAE,MAAM;AAC9D;AAEA,MAAM,eAAe,CAAC,GAAY,SAA0B;AAC1D,MAAI,SAAS,CAAC,KAAK,EAAE,SAAS,KAAM,QAAO;AAC3C,MAAI,qBAAqB,CAAC,GAAG;AAC3B,WAAO,EAAE,OAAO,KAAK,CAAC,UAAU,aAAa,OAAO,IAAI,CAAC;AAAA,EAC3D;AACA,SAAO;AACT;AAEA,MAAM,sBAAsB,CAAC,MAAwB;AACnD,MAAI,SAAS,CAAC,KAAK,OAAO,EAAE,SAAS,UAAU;AAC7C,WAAO,sBAAsB,IAAI,EAAE,IAAI;AAAA,EACzC;AACA,MAAI,qBAAqB,CAAC,GAAG;AAC3B,WAAO,EAAE,OAAO,KAAK,CAAC,UAAU,oBAAoB,KAAK,CAAC;AAAA,EAC5D;AACA,SAAO;AACT;AAEA,MAAM,6BAA6B,CAAC,MAAmC;AACrE,QAAM,MAAM,QAAQ,CAAC;AACrB,QAAM,YACJ,aAAa,GAAG,WAAW,KAC1B,OAAO,IAAI,YAAY,YAAY,IAAI,QAAQ,SAAS,WAAW;AACtE,QAAM,UAAU,YACZ,kCACA,+BAA+B,IAAI,WAAW,eAAe;AACjE,SAAO,YAAY,IAAI,8BAAgB,EAAE,QAAQ,CAAC,IAAI,IAAI,iCAAmB,EAAE,QAAQ,CAAC;AAC1F;AAEA,MAAM,gBAAgB,OAAO;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AACF,MAIM;AACJ,MAAI,YAAY,SAAS;AACvB,UAAM,IAAI,MAAM,SAAS;AAAA,EAC3B;AAEA,QAAM,MAAM,IAAI,qBAAa;AAC7B,MAAI;AAEJ,QAAM,UAAU,MAAM;AACpB,QAAI,QAAS,cAAa,OAAO;AACjC,OAAG,IAAI,QAAQ,MAAM;AACrB,OAAG,IAAI,SAAS,OAAO;AACvB,OAAG,IAAI,SAAS,OAAO;AACvB,gBAAY,oBAAoB,SAAS,OAAO;AAAA,EAClD;AAEA,QAAM,SAAS,MAAM,IAAI,QAAQ;AACjC,QAAM,UAAU,CAAC,QAAe,IAAI,OAAO,QAAQ,GAAG,CAAC;AACvD,QAAM,UAAU,CAAC,MAAc,WAC7B,IAAI;AAAA,IACF,IAAI,MAAM,sCAAsC,IAAI,YAAY,OAAO,SAAS,CAAC,GAAG;AAAA,EACtF;AACF,QAAM,UAAU,MAAM,IAAI,OAAO,IAAI,MAAM,SAAS,CAAC;AAErD,KAAG,GAAG,QAAQ,MAAM;AACpB,KAAG,GAAG,SAAS,OAAO;AACtB,KAAG,GAAG,SAAS,OAAO;AACtB,cAAY,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAE7D,MAAI,YAAY,GAAG;AACjB,cAAU,WAAW,MAAM,IAAI,OAAO,IAAI,MAAM,iBAAiB,CAAC,GAAG,SAAS;AAAA,EAChF;AAEA,MAAI;AACF,UAAM,IAAI;AAAA,EACZ,UAAE;AACA,YAAQ;AAAA,EACV;AACF;AAEA,MAAM,yBAAyB,CAAC,OAAkB;AAGhD,MAAI;AACF,OAAG,GAAG,SAAS,MAAM;AAAA,IAAC,CAAC;AAAA,EACzB,QAAQ;AAAA,EAER;AAEA,MAAI;AAEF,QAAI,GAAG,eAAe,oBAAU,YAAY;AAC1C,SAAG,MAAM;AAAA,IACX,OAAO;AACL,SAAG,UAAU;AAAA,IACf;AAAA,EACF,QAAQ;AAAA,EAER;AACF;AAEA,MAAM,2BAA2B,OAAO;AAAA,EACtC;AAAA,EACA;AAAA,EACA;AACF,MAI0B;AACxB,QAAM,cAAc,OAAO,WAAwC;AACjE,UAAM,KAAK,IAAI,oBAAU,KAAK,EAAE,kBAAkB,WAAW,OAAO,CAAC;AACrE,QAAI;AACF,YAAM,cAAc,EAAE,IAAI,WAAW,YAAY,CAAC;AAClD,aAAO;AAAA,IACT,SAAS,GAAG;AACV,6BAAuB,EAAE;AACzB,YAAM;AAAA,IACR;AAAA,EACF;AAEA,MAAI;AACF,WAAO,MAAM,YAAY;AAAA,EAC3B,SAAS,GAAG;AAUV,QAAI,oBAAoB,CAAC,KAAK,qBAAqB,CAAC,GAAG;AACrD,aAAO,MAAM,YAAY,CAAC;AAAA,IAC5B;AACA,UAAM;AAAA,EACR;AACF;AAEA,MAAM,oBAAoB,CACxB,MACA,YAAqB,UACS;AAC9B,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,MAAI,KAAK,eAAe,wCAAwC;AAC9D,UAAM,gBAA2C,CAAC;AAClD,QAAI,KAAK,OAAO;AACd,oBAAc,QAAQ,KAAK;AAAA,IAC7B;AACA,QAAI,KAAK,SAAS;AAChB,oBAAc,UAAU,KAAK;AAAA,IAC/B;AACA,QAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,YAAM,0BAA0B;AAAA,IAClC;AAAA,EACF;AAEA,QAAM,SAAoC;AAAA,IACxC,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,IACf,qBAAqB;AAAA,EACvB;AAEA,MAAI,KAAK,qBAAqB;AAC5B,WAAO,wBAAwB,KAAK;AAAA,EACtC;AAEA,MAAI,KAAK,aAAa,8CAA0C,wBAAS,KAAK,KAAK,GAAG;AACpF,UAAM,mBAA8C,CAAC;AACrD,QAAI,KAAK,OAAO;AACd,uBAAiB,QAAQ,KAAK;AAAA,IAChC;AACA,QAAI,KAAK,SAAS;AAChB,uBAAiB,UAAU,KAAK,QAAQ,CAAC;AAAA,IAC3C;AACA,QAAI,KAAK,QAAQ;AACf,uBAAiB,SAAS,KAAK;AAAA,IACjC;AACA,QAAI,OAAO,KAAK,gBAAgB,EAAE,QAAQ;AACxC,aAAO,oBAAoB;AAAA,IAC7B;AAAA,EACF;AAEA,MAAI,aAAa,KAAK,mBAAmB,OAAO;AAC9C,WAAO,iBAAiB;AAAA,EAC1B;AAEA,SAAO;AACT;","names":["tts","ws"]}
|
package/dist/tts.d.cts
CHANGED
|
@@ -7,9 +7,15 @@ export interface TTSOptions {
|
|
|
7
7
|
voice: string | number[];
|
|
8
8
|
speed?: TTSVoiceSpeed | number;
|
|
9
9
|
emotion?: (TTSVoiceEmotion | string)[];
|
|
10
|
+
/**
|
|
11
|
+
* Volume of the speech. For sonic-3, the value is valid between 0.5 and 2.0.
|
|
12
|
+
* @see https://docs.cartesia.ai/api-reference/tts/bytes#body-generation-config-volume
|
|
13
|
+
*/
|
|
14
|
+
volume?: number;
|
|
10
15
|
apiKey?: string;
|
|
11
16
|
language: string;
|
|
12
17
|
baseUrl: string;
|
|
18
|
+
apiVersion: string;
|
|
13
19
|
/**
|
|
14
20
|
* The timeout for the next chunk to be received from the Cartesia API.
|
|
15
21
|
*/
|
package/dist/tts.d.ts
CHANGED
|
@@ -7,9 +7,15 @@ export interface TTSOptions {
|
|
|
7
7
|
voice: string | number[];
|
|
8
8
|
speed?: TTSVoiceSpeed | number;
|
|
9
9
|
emotion?: (TTSVoiceEmotion | string)[];
|
|
10
|
+
/**
|
|
11
|
+
* Volume of the speech. For sonic-3, the value is valid between 0.5 and 2.0.
|
|
12
|
+
* @see https://docs.cartesia.ai/api-reference/tts/bytes#body-generation-config-volume
|
|
13
|
+
*/
|
|
14
|
+
volume?: number;
|
|
10
15
|
apiKey?: string;
|
|
11
16
|
language: string;
|
|
12
17
|
baseUrl: string;
|
|
18
|
+
apiVersion: string;
|
|
13
19
|
/**
|
|
14
20
|
* The timeout for the next chunk to be received from the Cartesia API.
|
|
15
21
|
*/
|
package/dist/tts.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EAWtB,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,EAEL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,eAAe,EACpB,KAAK,aAAa,
|
|
1
|
+
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EAWtB,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,EAEL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,eAAe,EACpB,KAAK,aAAa,EAEnB,MAAM,aAAa,CAAC;AAkBrB,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,SAAS,GAAG,MAAM,CAAC;IAC1B,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IAC/B,OAAO,CAAC,EAAE,CAAC,eAAe,GAAG,MAAM,CAAC,EAAE,CAAC;IACvC;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IAEnB;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC;IAErB;;;;OAIG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B;AA+CD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IA6B1C,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAavC,UAAU,CACR,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW,GACxB,GAAG,CAAC,aAAa;IAIpB,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,gBAAgB;CAGxE;AAED,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;;IAClD,KAAK,SAA4B;gBAM/B,GAAG,EAAE,GAAG,EACR,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,UAAU,EAChB,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW;cAOX,GAAG;CAuEpB;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAMxD,KAAK,SAA+B;gBAExB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,CAAC,EAAE,iBAAiB;IAKvE,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;cAavB,GAAG;CAkPpB"}
|
package/dist/tts.js
CHANGED
|
@@ -13,7 +13,8 @@ import {
|
|
|
13
13
|
import { request } from "node:https";
|
|
14
14
|
import { WebSocket } from "ws";
|
|
15
15
|
import {
|
|
16
|
-
TTSDefaultVoiceId
|
|
16
|
+
TTSDefaultVoiceId,
|
|
17
|
+
isSonic3
|
|
17
18
|
} from "./models.js";
|
|
18
19
|
import {
|
|
19
20
|
cartesiaMessageSchema,
|
|
@@ -24,20 +25,49 @@ import {
|
|
|
24
25
|
} from "./types.js";
|
|
25
26
|
const AUTHORIZATION_HEADER = "X-API-Key";
|
|
26
27
|
const VERSION_HEADER = "Cartesia-Version";
|
|
27
|
-
const
|
|
28
|
+
const API_VERSION = "2025-04-16";
|
|
29
|
+
const API_VERSION_WITH_EXPERIMENTAL_CONTROLS = "2024-11-13";
|
|
30
|
+
const MODEL_WITH_EXPERIMENTAL_CONTROLS = "sonic-2-2025-03-07";
|
|
28
31
|
const NUM_CHANNELS = 1;
|
|
29
32
|
const BUFFERED_WORDS_COUNT = 8;
|
|
30
33
|
const defaultTTSOptions = {
|
|
31
|
-
model: "sonic-
|
|
34
|
+
model: "sonic-3",
|
|
32
35
|
encoding: "pcm_s16le",
|
|
33
36
|
sampleRate: 24e3,
|
|
34
37
|
voice: TTSDefaultVoiceId,
|
|
35
38
|
apiKey: process.env.CARTESIA_API_KEY,
|
|
36
39
|
language: "en",
|
|
37
40
|
baseUrl: "https://api.cartesia.ai",
|
|
41
|
+
apiVersion: API_VERSION,
|
|
38
42
|
chunkTimeout: 5e3,
|
|
39
43
|
wordTimestamps: true
|
|
40
44
|
};
|
|
45
|
+
const checkGenerationConfig = (opts) => {
|
|
46
|
+
const logger = log();
|
|
47
|
+
if (isSonic3(opts.model)) {
|
|
48
|
+
if (opts.speed !== void 0 && typeof opts.speed === "number") {
|
|
49
|
+
if (opts.speed < 0.6 || opts.speed > 2) {
|
|
50
|
+
logger.warn("speed must be between 0.6 and 2.0 for sonic-3");
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
if (opts.volume !== void 0 && (opts.volume < 0.5 || opts.volume > 2)) {
|
|
54
|
+
logger.warn("volume must be between 0.5 and 2.0 for sonic-3");
|
|
55
|
+
}
|
|
56
|
+
} else if (opts.apiVersion !== API_VERSION_WITH_EXPERIMENTAL_CONTROLS || opts.model !== MODEL_WITH_EXPERIMENTAL_CONTROLS) {
|
|
57
|
+
if (opts.speed || opts.emotion) {
|
|
58
|
+
logger.warn(
|
|
59
|
+
{ model: opts.model, speed: opts.speed, emotion: opts.emotion },
|
|
60
|
+
`speed and emotion controls are only supported for model '${MODEL_WITH_EXPERIMENTAL_CONTROLS}' or sonic-3 models, see https://docs.cartesia.ai/developer-tools/changelog for details`
|
|
61
|
+
);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
if (opts.pronunciationDictId && !isSonic3(opts.model)) {
|
|
65
|
+
logger.warn(
|
|
66
|
+
{ model: opts.model, pronunciationDictId: opts.pronunciationDictId },
|
|
67
|
+
"pronunciationDictId is only supported for sonic-3 models"
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
};
|
|
41
71
|
class TTS extends tts.TTS {
|
|
42
72
|
#opts;
|
|
43
73
|
label = "cartesia.TTS";
|
|
@@ -56,22 +86,14 @@ class TTS extends tts.TTS {
|
|
|
56
86
|
"Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY"
|
|
57
87
|
);
|
|
58
88
|
}
|
|
59
|
-
if (
|
|
60
|
-
|
|
61
|
-
logger.warn(
|
|
62
|
-
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
63
|
-
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
|
|
64
|
-
);
|
|
89
|
+
if (this.#opts.speed || this.#opts.emotion || this.#opts.volume || this.#opts.pronunciationDictId) {
|
|
90
|
+
checkGenerationConfig(this.#opts);
|
|
65
91
|
}
|
|
66
92
|
}
|
|
67
93
|
updateOptions(opts) {
|
|
68
94
|
this.#opts = { ...this.#opts, ...opts };
|
|
69
|
-
if (
|
|
70
|
-
|
|
71
|
-
logger.warn(
|
|
72
|
-
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
73
|
-
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
|
|
74
|
-
);
|
|
95
|
+
if (this.#opts.speed || this.#opts.emotion || this.#opts.volume || this.#opts.pronunciationDictId) {
|
|
96
|
+
checkGenerationConfig(this.#opts);
|
|
75
97
|
}
|
|
76
98
|
}
|
|
77
99
|
synthesize(text, connOptions, abortSignal) {
|
|
@@ -106,7 +128,7 @@ class ChunkedStream extends tts.ChunkedStream {
|
|
|
106
128
|
method: "POST",
|
|
107
129
|
headers: {
|
|
108
130
|
[AUTHORIZATION_HEADER]: this.#opts.apiKey,
|
|
109
|
-
[VERSION_HEADER]:
|
|
131
|
+
[VERSION_HEADER]: this.#opts.apiVersion
|
|
110
132
|
},
|
|
111
133
|
signal: this.abortSignal
|
|
112
134
|
},
|
|
@@ -172,11 +194,8 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
172
194
|
}
|
|
173
195
|
updateOptions(opts) {
|
|
174
196
|
this.#opts = { ...this.#opts, ...opts };
|
|
175
|
-
if (
|
|
176
|
-
this.#
|
|
177
|
-
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
178
|
-
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
|
|
179
|
-
);
|
|
197
|
+
if (this.#opts.speed || this.#opts.emotion || this.#opts.volume || this.#opts.pronunciationDictId) {
|
|
198
|
+
checkGenerationConfig(this.#opts);
|
|
180
199
|
}
|
|
181
200
|
}
|
|
182
201
|
async run() {
|
|
@@ -350,7 +369,7 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
350
369
|
}
|
|
351
370
|
};
|
|
352
371
|
const wsUrl = this.#opts.baseUrl.replace(/^http/, "ws");
|
|
353
|
-
const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${
|
|
372
|
+
const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${this.#opts.apiVersion}`;
|
|
354
373
|
let ws;
|
|
355
374
|
try {
|
|
356
375
|
ws = await connectCartesiaWebSocket({
|
|
@@ -493,15 +512,17 @@ const toCartesiaOptions = (opts, streaming = false) => {
|
|
|
493
512
|
voice.mode = "embedding";
|
|
494
513
|
voice.embedding = opts.voice;
|
|
495
514
|
}
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
515
|
+
if (opts.apiVersion === API_VERSION_WITH_EXPERIMENTAL_CONTROLS) {
|
|
516
|
+
const voiceControls = {};
|
|
517
|
+
if (opts.speed) {
|
|
518
|
+
voiceControls.speed = opts.speed;
|
|
519
|
+
}
|
|
520
|
+
if (opts.emotion) {
|
|
521
|
+
voiceControls.emotion = opts.emotion;
|
|
522
|
+
}
|
|
523
|
+
if (Object.keys(voiceControls).length) {
|
|
524
|
+
voice.__experimental_controls = voiceControls;
|
|
525
|
+
}
|
|
505
526
|
}
|
|
506
527
|
const result = {
|
|
507
528
|
model_id: opts.model,
|
|
@@ -512,8 +533,26 @@ const toCartesiaOptions = (opts, streaming = false) => {
|
|
|
512
533
|
sample_rate: opts.sampleRate
|
|
513
534
|
},
|
|
514
535
|
language: opts.language,
|
|
515
|
-
|
|
536
|
+
max_buffer_delay_ms: 0
|
|
516
537
|
};
|
|
538
|
+
if (opts.pronunciationDictId) {
|
|
539
|
+
result.pronunciation_dict_id = opts.pronunciationDictId;
|
|
540
|
+
}
|
|
541
|
+
if (opts.apiVersion > API_VERSION_WITH_EXPERIMENTAL_CONTROLS && isSonic3(opts.model)) {
|
|
542
|
+
const generationConfig = {};
|
|
543
|
+
if (opts.speed) {
|
|
544
|
+
generationConfig.speed = opts.speed;
|
|
545
|
+
}
|
|
546
|
+
if (opts.emotion) {
|
|
547
|
+
generationConfig.emotion = opts.emotion[0];
|
|
548
|
+
}
|
|
549
|
+
if (opts.volume) {
|
|
550
|
+
generationConfig.volume = opts.volume;
|
|
551
|
+
}
|
|
552
|
+
if (Object.keys(generationConfig).length) {
|
|
553
|
+
result.generation_config = generationConfig;
|
|
554
|
+
}
|
|
555
|
+
}
|
|
517
556
|
if (streaming && opts.wordTimestamps !== false) {
|
|
518
557
|
result.add_timestamps = true;
|
|
519
558
|
}
|
package/dist/tts.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n APIConnectionError,\n APITimeoutError,\n AudioByteStream,\n Future,\n type TimedString,\n createTimedString,\n log,\n shortuuid,\n stream,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\nimport {\n type CartesiaServerMessage,\n cartesiaMessageSchema,\n hasWordTimestamps,\n isChunkMessage,\n isDoneMessage,\n isErrorMessage,\n} from './types.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n\n /**\n * Whether to add word timestamps to the output. When enabled, the TTS will return\n * timing information for each word in the transcript.\n * @defaultValue true\n */\n wordTimestamps?: boolean;\n\n pronunciationDictId?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n chunkTimeout: 5000,\n wordTimestamps: true,\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n const resolvedOpts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n super(resolvedOpts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n alignedTranscript: resolvedOpts.wordTimestamps ?? true,\n });\n\n this.#opts = resolvedOpts;\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts, options?.connOptions);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #logger = log();\n #opts: TTSOptions;\n #text: string;\n\n constructor(\n tts: TTS,\n text: string,\n opts: TTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const doneFut = new Future<void>();\n\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n signal: this.abortSignal,\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n if (!doneFut.done) doneFut.resolve();\n });\n res.on('error', (err) => {\n if (err.message === 'aborted') return;\n this.#logger.error({ err }, 'Cartesia TTS response error');\n if (!doneFut.done) doneFut.reject(err);\n });\n },\n );\n\n req.on('error', (err) => {\n if (err.name === 'AbortError') return;\n this.#logger.error({ err }, 'Cartesia TTS request error');\n if (!doneFut.done) doneFut.reject(err);\n });\n req.on('close', () => {\n if (!doneFut.done) doneFut.resolve();\n });\n req.write(JSON.stringify(json));\n req.end();\n\n try {\n await doneFut.await;\n } catch (e) {\n if (this.abortSignal.aborted) return;\n if (!this.queue.closed) this.queue.close();\n throw toRetryableConnectionError(e);\n }\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions, connOptions?: APIConnectOptions) {\n super(tts, connOptions);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n // Only close WebSocket when both: 1) Cartesia returns done, AND 2) all sentences have been sent\n let sentenceStreamClosed = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts, true);\n for await (const event of this.#tokenizer) {\n const msg = {\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n };\n ws.send(JSON.stringify(msg));\n }\n\n const endMsg = {\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n };\n ws.send(JSON.stringify(endMsg));\n // Mark sentence stream as closed\n sentenceStreamClosed = true;\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n // Use event channel and set up listeners ONCE to avoid missing messages during listener re-registration\n const recvTask = async (ws: WebSocket) => {\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n // Create event channel to buffer incoming messages\n // This prevents message loss between listener re-registrations\n const eventChannel = stream.createStreamChannel<RawData>();\n\n let lastFrame: AudioFrame | undefined;\n let pendingTimedTranscripts: TimedString[] = [];\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n // Include timedTranscripts with the audio frame\n this.queue.put({\n requestId,\n segmentId,\n frame: lastFrame,\n final,\n timedTranscripts:\n pendingTimedTranscripts.length > 0 ? pendingTimedTranscripts : undefined,\n });\n lastFrame = undefined;\n pendingTimedTranscripts = [];\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n // Set up WebSocket listeners ONCE (not in a loop)\n const onMessage = (data: RawData) => {\n void eventChannel.write(data).catch((error: unknown) => {\n this.#logger.debug({ error }, 'Failed writing Cartesia event to channel (likely closed)');\n });\n };\n\n const onClose = (code: number, reason: Buffer) => {\n if (!closing) {\n this.#logger.debug(`WebSocket closed with code ${code}: ${reason.toString()}`);\n }\n clearTTSChunkTimeout();\n void eventChannel.close();\n };\n\n const onError = (err: Error) => {\n this.#logger.error({ err }, 'Cartesia WebSocket error');\n void eventChannel.close();\n };\n\n // Attach listeners ONCE\n ws.on('message', onMessage);\n ws.on('close', onClose);\n ws.on('error', onError);\n\n try {\n // Process messages from the channel\n const reader = eventChannel.stream().getReader();\n\n while (!this.closed && !this.abortController.signal.aborted) {\n const result = await reader.read();\n if (result.done) break;\n\n const rawMsg = result.value;\n\n // Parse message with Zod schema for type safety\n let serverMsg: CartesiaServerMessage;\n try {\n const json = JSON.parse(rawMsg.toString());\n serverMsg = cartesiaMessageSchema.parse(json);\n } catch (parseErr) {\n this.#logger.warn({ parseErr }, 'Failed to parse Cartesia message');\n continue;\n }\n\n // Handle error messages\n if (isErrorMessage(serverMsg)) {\n this.#logger.error({ error: serverMsg.error }, 'Cartesia returned error');\n continue;\n }\n\n const segmentId = serverMsg.context_id;\n\n // Process word timestamps if present (typed via Zod schema)\n if (this.#opts.wordTimestamps !== false && hasWordTimestamps(serverMsg)) {\n const wordTimestamps = serverMsg.word_timestamps;\n for (let i = 0; i < wordTimestamps.words.length; i++) {\n const word = wordTimestamps.words[i];\n const startTime = wordTimestamps.start[i];\n const endTime = wordTimestamps.end[i];\n if (word !== undefined && startTime !== undefined && endTime !== undefined) {\n pendingTimedTranscripts.push(\n createTimedString({\n text: word + ' ', // Add space after word for consistency\n startTime,\n endTime,\n }),\n );\n }\n }\n }\n\n // Handle audio chunk messages\n if (isChunkMessage(serverMsg)) {\n const audioBuffer = Buffer.from(serverMsg.data, 'base64');\n // Extract ArrayBuffer from Buffer for AudioByteStream compatibility\n const audioData = audioBuffer.buffer.slice(\n audioBuffer.byteOffset,\n audioBuffer.byteOffset + audioBuffer.byteLength,\n );\n for (const frame of bstream.write(audioData)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n // cartesia chunk timeout quite often, so we make it a debug log\n this.#logger.debug(\n `Cartesia WebSocket TTS chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if (isDoneMessage(serverMsg)) {\n // This ensures all sentences have been sent before closing\n if (sentenceStreamClosed) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n clearTTSChunkTimeout();\n ws.close();\n break; // Exit the loop\n }\n }\n // If sentenceStreamClosed is false, continue receiving - more done messages will come\n }\n }\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n if (\n err.message.includes('Queue is closed') ||\n err.message.includes('Channel is closed')\n ) {\n this.#logger.warn(\n { err },\n 'Channel closed during transcript processing (expected during disconnect)',\n );\n } else {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n }\n } finally {\n // IMPORTANT: Remove listeners so connection can be reused\n ws.off('message', onMessage);\n ws.off('close', onClose);\n ws.off('error', onError);\n clearTTSChunkTimeout();\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n\n let ws: WebSocket | undefined;\n try {\n ws = await connectCartesiaWebSocket({\n url,\n timeoutMs: this.connOptions.timeoutMs,\n abortSignal: this.abortSignal,\n });\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n if (this.abortSignal.aborted) {\n return;\n }\n throw toRetryableConnectionError(e);\n } finally {\n // Ensure we don't leak sockets/tasks across retry attempts.\n if (ws && ws.readyState !== WebSocket.CLOSED) {\n safeTerminateWebSocket(ws);\n }\n }\n }\n}\n\nconst asError = (e: unknown): Error => (e instanceof Error ? e : new Error(String(e)));\n\nconst transientNetworkCodes = new Set([\n 'ETIMEDOUT',\n 'ECONNRESET',\n 'EAI_AGAIN',\n 'ENETUNREACH',\n 'ECONNREFUSED',\n 'EHOSTUNREACH',\n]);\n\nconst isRecord = (v: unknown): v is Record<string, unknown> => {\n return v !== null && typeof v === 'object';\n};\n\nconst isAggregateErrorLike = (e: unknown): e is { errors: unknown[]; name?: string } => {\n if (!isRecord(e)) return false;\n return e.name === 'AggregateError' && Array.isArray(e.errors);\n};\n\nconst hasErrorCode = (e: unknown, code: string): boolean => {\n if (isRecord(e) && e.code === code) return true;\n if (isAggregateErrorLike(e)) {\n return e.errors.some((inner) => hasErrorCode(inner, code));\n }\n return false;\n};\n\nconst hasAnyTransientCode = (e: unknown): boolean => {\n if (isRecord(e) && typeof e.code === 'string') {\n return transientNetworkCodes.has(e.code);\n }\n if (isAggregateErrorLike(e)) {\n return e.errors.some((inner) => hasAnyTransientCode(inner));\n }\n return false;\n};\n\nconst toRetryableConnectionError = (e: unknown): APIConnectionError => {\n const err = asError(e);\n const isTimeout =\n hasErrorCode(e, 'ETIMEDOUT') ||\n (typeof err.message === 'string' && err.message.includes('ETIMEDOUT'));\n const message = isTimeout\n ? `Cartesia connection timed out`\n : `Cartesia connection failed: ${err.message || 'unknown error'}`;\n return isTimeout ? new APITimeoutError({ message }) : new APIConnectionError({ message });\n};\n\nconst waitForWsOpen = async ({\n ws,\n timeoutMs,\n abortSignal,\n}: {\n ws: WebSocket;\n timeoutMs: number;\n abortSignal: AbortSignal;\n}) => {\n if (abortSignal.aborted) {\n throw new Error('aborted');\n }\n\n const fut = new Future<void>();\n let timeout: NodeJS.Timeout | undefined;\n\n const cleanup = () => {\n if (timeout) clearTimeout(timeout);\n ws.off('open', onOpen);\n ws.off('error', onError);\n ws.off('close', onClose);\n abortSignal.removeEventListener('abort', onAbort);\n };\n\n const onOpen = () => fut.resolve();\n const onError = (err: Error) => fut.reject(asError(err));\n const onClose = (code: number, reason: Buffer) =>\n fut.reject(\n new Error(`WebSocket closed before open (code=${code}, reason=${reason.toString()})`),\n );\n const onAbort = () => fut.reject(new Error('aborted'));\n\n ws.on('open', onOpen);\n ws.on('error', onError);\n ws.on('close', onClose);\n abortSignal.addEventListener('abort', onAbort, { once: true });\n\n if (timeoutMs > 0) {\n timeout = setTimeout(() => fut.reject(new Error('connect timeout')), timeoutMs);\n }\n\n try {\n await fut.await;\n } finally {\n cleanup();\n }\n};\n\nconst safeTerminateWebSocket = (ws: WebSocket) => {\n // `ws` can emit an 'error' event during teardown (especially if CONNECTING).\n // If there is no error listener at that moment, Node will treat it as unhandled and crash the process.\n try {\n ws.on('error', () => {});\n } catch {\n // ignore\n }\n\n try {\n // `terminate()` can throw if the socket was never established; `close()` is safer in CONNECTING.\n if (ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n } else {\n ws.terminate();\n }\n } catch {\n // ignore\n }\n};\n\nconst connectCartesiaWebSocket = async ({\n url,\n timeoutMs,\n abortSignal,\n}: {\n url: string;\n timeoutMs: number;\n abortSignal: AbortSignal;\n}): Promise<WebSocket> => {\n const connectOnce = async (family?: number): Promise<WebSocket> => {\n const ws = new WebSocket(url, { handshakeTimeout: timeoutMs, family });\n try {\n await waitForWsOpen({ ws, timeoutMs, abortSignal });\n return ws;\n } catch (e) {\n safeTerminateWebSocket(ws);\n throw e;\n }\n };\n\n try {\n return await connectOnce();\n } catch (e) {\n // Mitigation for Node.js dual-stack (IPv6/IPv4) connect flakiness (\"happy eyeballs\"):\n // some environments surface `AggregateError` with nested `ETIMEDOUT` during the initial\n // WebSocket open. In that case we do a one-off retry forcing IPv4 (`family: 4`) before\n // letting the outer framework retry loop handle further attempts.\n //\n // If you still see `AggregateError`/`ETIMEDOUT`:\n // - Increase the session TTS connect timeout (`connOptions.ttsConnOptions.timeoutMs`)\n // - Or adjust Node's family autoselection behavior via `NODE_OPTIONS`, e.g.\n // `--network-family-autoselection-attempt-timeout=5000` (or disable it entirely).\n if (hasAnyTransientCode(e) || isAggregateErrorLike(e)) {\n return await connectOnce(4);\n }\n throw e;\n }\n};\n\n/**\n * Convert TTSOptions to Cartesia API format.\n *\n * @param opts - TTS options\n * @param streaming - Whether this is for streaming (WebSocket) or non-streaming (HTTP)\n */\nconst toCartesiaOptions = (\n opts: TTSOptions,\n streaming: boolean = false,\n): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n const result: { [id: string]: unknown } = {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n pronunciation_dict_id: opts.pronunciationDictId,\n };\n\n if (streaming && opts.wordTimestamps !== false) {\n result.add_timestamps = true;\n }\n\n return result;\n};\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AACxC;AAAA,EACE;AAAA,OAKK;AACP;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AA4B7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,cAAc;AAAA,EACd,gBAAgB;AAClB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,eAAe;AAAA,MACnB,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,UAAM,aAAa,cAAc,kBAAkB,YAAY,cAAc;AAAA,MAC3E,WAAW;AAAA,MACX,mBAAmB,aAAa,kBAAkB;AAAA,IACpD,CAAC;AAED,SAAK,QAAQ;AAEb,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WACE,MACA,aACA,aACmB;AACnB,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,OAAO,aAAa,WAAW;AAAA,EAC3E;AAAA,EAEA,OAAO,SAAiE;AACtE,WAAO,IAAI,iBAAiB,MAAM,KAAK,OAAO,mCAAS,WAAW;AAAA,EACpE;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR,UAAU,IAAI;AAAA,EACd;AAAA,EACA;AAAA,EAEA,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAU,IAAI,OAAa;AAEjC,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,QACA,QAAQ,KAAK;AAAA,MACf;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AACjB,cAAI,CAAC,QAAQ,KAAM,SAAQ,QAAQ;AAAA,QACrC,CAAC;AACD,YAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,cAAI,IAAI,YAAY,UAAW;AAC/B,eAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6BAA6B;AACzD,cAAI,CAAC,QAAQ,KAAM,SAAQ,OAAO,GAAG;AAAA,QACvC,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,UAAI,IAAI,SAAS,aAAc;AAC/B,WAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,4BAA4B;AACxD,UAAI,CAAC,QAAQ,KAAM,SAAQ,OAAO,GAAG;AAAA,IACvC,CAAC;AACD,QAAI,GAAG,SAAS,MAAM;AACpB,UAAI,CAAC,QAAQ,KAAM,SAAQ,QAAQ;AAAA,IACrC,CAAC;AACD,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAER,QAAI;AACF,YAAM,QAAQ;AAAA,IAChB,SAAS,GAAG;AACV,UAAI,KAAK,YAAY,QAAS;AAC9B,UAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,YAAM,2BAA2B,CAAC;AAAA,IACpC;AAAA,EACF;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB,aAAiC;AACvE,UAAMA,MAAK,WAAW;AACtB,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,QAAI,UAAU;AAEd,QAAI,uBAAuB;AAE3B,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,OAAO,IAAI;AACjD,uBAAiB,SAAS,KAAK,YAAY;AACzC,cAAM,MAAM;AAAA,UACV,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY,MAAM,QAAQ;AAAA,UAC1B,UAAU;AAAA,QACZ;AACA,QAAAA,IAAG,KAAK,KAAK,UAAU,GAAG,CAAC;AAAA,MAC7B;AAEA,YAAM,SAAS;AAAA,QACb,GAAG;AAAA,QACH,YAAY;AAAA,QACZ,YAAY;AAAA,QACZ,UAAU;AAAA,MACZ;AACA,MAAAA,IAAG,KAAK,KAAK,UAAU,MAAM,CAAC;AAE9B,6BAAuB;AAAA,IACzB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAGA,UAAM,WAAW,OAAOA,QAAkB;AACxC,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAIvE,YAAM,eAAe,OAAO,oBAA6B;AAEzD,UAAI;AACJ,UAAI,0BAAyC,CAAC;AAE9C,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AAEnC,eAAK,MAAM,IAAI;AAAA,YACb;AAAA,YACA;AAAA,YACA,OAAO;AAAA,YACP;AAAA,YACA,kBACE,wBAAwB,SAAS,IAAI,0BAA0B;AAAA,UACnE,CAAC;AACD,sBAAY;AACZ,oCAA0B,CAAC;AAAA,QAC7B;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAGA,YAAM,YAAY,CAAC,SAAkB;AACnC,aAAK,aAAa,MAAM,IAAI,EAAE,MAAM,CAAC,UAAmB;AACtD,eAAK,QAAQ,MAAM,EAAE,MAAM,GAAG,0DAA0D;AAAA,QAC1F,CAAC;AAAA,MACH;AAEA,YAAM,UAAU,CAAC,MAAc,WAAmB;AAChD,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,OAAO,SAAS,CAAC,EAAE;AAAA,QAC/E;AACA,6BAAqB;AACrB,aAAK,aAAa,MAAM;AAAA,MAC1B;AAEA,YAAM,UAAU,CAAC,QAAe;AAC9B,aAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,0BAA0B;AACtD,aAAK,aAAa,MAAM;AAAA,MAC1B;AAGA,MAAAA,IAAG,GAAG,WAAW,SAAS;AAC1B,MAAAA,IAAG,GAAG,SAAS,OAAO;AACtB,MAAAA,IAAG,GAAG,SAAS,OAAO;AAEtB,UAAI;AAEF,cAAM,SAAS,aAAa,OAAO,EAAE,UAAU;AAE/C,eAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,OAAO,KAAM;AAEjB,gBAAM,SAAS,OAAO;AAGtB,cAAI;AACJ,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,OAAO,SAAS,CAAC;AACzC,wBAAY,sBAAsB,MAAM,IAAI;AAAA,UAC9C,SAAS,UAAU;AACjB,iBAAK,QAAQ,KAAK,EAAE,SAAS,GAAG,kCAAkC;AAClE;AAAA,UACF;AAGA,cAAI,eAAe,SAAS,GAAG;AAC7B,iBAAK,QAAQ,MAAM,EAAE,OAAO,UAAU,MAAM,GAAG,yBAAyB;AACxE;AAAA,UACF;AAEA,gBAAM,YAAY,UAAU;AAG5B,cAAI,KAAK,MAAM,mBAAmB,SAAS,kBAAkB,SAAS,GAAG;AACvE,kBAAM,iBAAiB,UAAU;AACjC,qBAAS,IAAI,GAAG,IAAI,eAAe,MAAM,QAAQ,KAAK;AACpD,oBAAM,OAAO,eAAe,MAAM,CAAC;AACnC,oBAAM,YAAY,eAAe,MAAM,CAAC;AACxC,oBAAM,UAAU,eAAe,IAAI,CAAC;AACpC,kBAAI,SAAS,UAAa,cAAc,UAAa,YAAY,QAAW;AAC1E,wCAAwB;AAAA,kBACtB,kBAAkB;AAAA,oBAChB,MAAM,OAAO;AAAA;AAAA,oBACb;AAAA,oBACA;AAAA,kBACF,CAAC;AAAA,gBACH;AAAA,cACF;AAAA,YACF;AAAA,UACF;AAGA,cAAI,eAAe,SAAS,GAAG;AAC7B,kBAAM,cAAc,OAAO,KAAK,UAAU,MAAM,QAAQ;AAExD,kBAAM,YAAY,YAAY,OAAO;AAAA,cACnC,YAAY;AAAA,cACZ,YAAY,aAAa,YAAY;AAAA,YACvC;AACA,uBAAW,SAAS,QAAQ,MAAM,SAAS,GAAG;AAC5C,4BAAc,WAAW,KAAK;AAC9B,0BAAY;AAAA,YACd;AAKA,iCAAqB;AACrB,sBAAU,WAAW,MAAM;AAEzB,mBAAK,QAAQ;AAAA,gBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,cAC9E;AACA,cAAAA,IAAG,MAAM;AAAA,YACX,GAAG,KAAK,MAAM,YAAY;AAAA,UAC5B,WAAW,cAAc,SAAS,GAAG;AAEnC,gBAAI,sBAAsB;AACxB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UAEF;AAAA,QACF;AAAA,MACF,SAAS,KAAK;AAEZ,YAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,cACE,IAAI,QAAQ,SAAS,iBAAiB,KACtC,IAAI,QAAQ,SAAS,mBAAmB,GACxC;AACA,iBAAK,QAAQ;AAAA,cACX,EAAE,IAAI;AAAA,cACN;AAAA,YACF;AAAA,UACF,OAAO;AACL,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AAAA,QACF;AAAA,MACF,UAAE;AAEA,QAAAA,IAAG,IAAI,WAAW,SAAS;AAC3B,QAAAA,IAAG,IAAI,SAAS,OAAO;AACvB,QAAAA,IAAG,IAAI,SAAS,OAAO;AACvB,6BAAqB;AAAA,MACvB;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAE3F,QAAI;AACJ,QAAI;AACF,WAAK,MAAM,yBAAyB;AAAA,QAClC;AAAA,QACA,WAAW,KAAK,YAAY;AAAA,QAC5B,aAAa,KAAK;AAAA,MACpB,CAAC;AACD,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,UAAI,KAAK,YAAY,SAAS;AAC5B;AAAA,MACF;AACA,YAAM,2BAA2B,CAAC;AAAA,IACpC,UAAE;AAEA,UAAI,MAAM,GAAG,eAAe,UAAU,QAAQ;AAC5C,+BAAuB,EAAE;AAAA,MAC3B;AAAA,IACF;AAAA,EACF;AACF;AAEA,MAAM,UAAU,CAAC,MAAuB,aAAa,QAAQ,IAAI,IAAI,MAAM,OAAO,CAAC,CAAC;AAEpF,MAAM,wBAAwB,oBAAI,IAAI;AAAA,EACpC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,MAAM,WAAW,CAAC,MAA6C;AAC7D,SAAO,MAAM,QAAQ,OAAO,MAAM;AACpC;AAEA,MAAM,uBAAuB,CAAC,MAA0D;AACtF,MAAI,CAAC,SAAS,CAAC,EAAG,QAAO;AACzB,SAAO,EAAE,SAAS,oBAAoB,MAAM,QAAQ,EAAE,MAAM;AAC9D;AAEA,MAAM,eAAe,CAAC,GAAY,SAA0B;AAC1D,MAAI,SAAS,CAAC,KAAK,EAAE,SAAS,KAAM,QAAO;AAC3C,MAAI,qBAAqB,CAAC,GAAG;AAC3B,WAAO,EAAE,OAAO,KAAK,CAAC,UAAU,aAAa,OAAO,IAAI,CAAC;AAAA,EAC3D;AACA,SAAO;AACT;AAEA,MAAM,sBAAsB,CAAC,MAAwB;AACnD,MAAI,SAAS,CAAC,KAAK,OAAO,EAAE,SAAS,UAAU;AAC7C,WAAO,sBAAsB,IAAI,EAAE,IAAI;AAAA,EACzC;AACA,MAAI,qBAAqB,CAAC,GAAG;AAC3B,WAAO,EAAE,OAAO,KAAK,CAAC,UAAU,oBAAoB,KAAK,CAAC;AAAA,EAC5D;AACA,SAAO;AACT;AAEA,MAAM,6BAA6B,CAAC,MAAmC;AACrE,QAAM,MAAM,QAAQ,CAAC;AACrB,QAAM,YACJ,aAAa,GAAG,WAAW,KAC1B,OAAO,IAAI,YAAY,YAAY,IAAI,QAAQ,SAAS,WAAW;AACtE,QAAM,UAAU,YACZ,kCACA,+BAA+B,IAAI,WAAW,eAAe;AACjE,SAAO,YAAY,IAAI,gBAAgB,EAAE,QAAQ,CAAC,IAAI,IAAI,mBAAmB,EAAE,QAAQ,CAAC;AAC1F;AAEA,MAAM,gBAAgB,OAAO;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AACF,MAIM;AACJ,MAAI,YAAY,SAAS;AACvB,UAAM,IAAI,MAAM,SAAS;AAAA,EAC3B;AAEA,QAAM,MAAM,IAAI,OAAa;AAC7B,MAAI;AAEJ,QAAM,UAAU,MAAM;AACpB,QAAI,QAAS,cAAa,OAAO;AACjC,OAAG,IAAI,QAAQ,MAAM;AACrB,OAAG,IAAI,SAAS,OAAO;AACvB,OAAG,IAAI,SAAS,OAAO;AACvB,gBAAY,oBAAoB,SAAS,OAAO;AAAA,EAClD;AAEA,QAAM,SAAS,MAAM,IAAI,QAAQ;AACjC,QAAM,UAAU,CAAC,QAAe,IAAI,OAAO,QAAQ,GAAG,CAAC;AACvD,QAAM,UAAU,CAAC,MAAc,WAC7B,IAAI;AAAA,IACF,IAAI,MAAM,sCAAsC,IAAI,YAAY,OAAO,SAAS,CAAC,GAAG;AAAA,EACtF;AACF,QAAM,UAAU,MAAM,IAAI,OAAO,IAAI,MAAM,SAAS,CAAC;AAErD,KAAG,GAAG,QAAQ,MAAM;AACpB,KAAG,GAAG,SAAS,OAAO;AACtB,KAAG,GAAG,SAAS,OAAO;AACtB,cAAY,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAE7D,MAAI,YAAY,GAAG;AACjB,cAAU,WAAW,MAAM,IAAI,OAAO,IAAI,MAAM,iBAAiB,CAAC,GAAG,SAAS;AAAA,EAChF;AAEA,MAAI;AACF,UAAM,IAAI;AAAA,EACZ,UAAE;AACA,YAAQ;AAAA,EACV;AACF;AAEA,MAAM,yBAAyB,CAAC,OAAkB;AAGhD,MAAI;AACF,OAAG,GAAG,SAAS,MAAM;AAAA,IAAC,CAAC;AAAA,EACzB,QAAQ;AAAA,EAER;AAEA,MAAI;AAEF,QAAI,GAAG,eAAe,UAAU,YAAY;AAC1C,SAAG,MAAM;AAAA,IACX,OAAO;AACL,SAAG,UAAU;AAAA,IACf;AAAA,EACF,QAAQ;AAAA,EAER;AACF;AAEA,MAAM,2BAA2B,OAAO;AAAA,EACtC;AAAA,EACA;AAAA,EACA;AACF,MAI0B;AACxB,QAAM,cAAc,OAAO,WAAwC;AACjE,UAAM,KAAK,IAAI,UAAU,KAAK,EAAE,kBAAkB,WAAW,OAAO,CAAC;AACrE,QAAI;AACF,YAAM,cAAc,EAAE,IAAI,WAAW,YAAY,CAAC;AAClD,aAAO;AAAA,IACT,SAAS,GAAG;AACV,6BAAuB,EAAE;AACzB,YAAM;AAAA,IACR;AAAA,EACF;AAEA,MAAI;AACF,WAAO,MAAM,YAAY;AAAA,EAC3B,SAAS,GAAG;AAUV,QAAI,oBAAoB,CAAC,KAAK,qBAAqB,CAAC,GAAG;AACrD,aAAO,MAAM,YAAY,CAAC;AAAA,IAC5B;AACA,UAAM;AAAA,EACR;AACF;AAQA,MAAM,oBAAoB,CACxB,MACA,YAAqB,UACS;AAC9B,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,QAAM,SAAoC;AAAA,IACxC,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,IACf,uBAAuB,KAAK;AAAA,EAC9B;AAEA,MAAI,aAAa,KAAK,mBAAmB,OAAO;AAC9C,WAAO,iBAAiB;AAAA,EAC1B;AAEA,SAAO;AACT;","names":["tts","ws"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n APIConnectionError,\n APITimeoutError,\n AudioByteStream,\n Future,\n type TimedString,\n createTimedString,\n log,\n shortuuid,\n stream,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n isSonic3,\n} from './models.js';\nimport {\n type CartesiaServerMessage,\n cartesiaMessageSchema,\n hasWordTimestamps,\n isChunkMessage,\n isDoneMessage,\n isErrorMessage,\n} from './types.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst API_VERSION = '2025-04-16';\nconst API_VERSION_WITH_EXPERIMENTAL_CONTROLS = '2024-11-13';\nconst MODEL_WITH_EXPERIMENTAL_CONTROLS = 'sonic-2-2025-03-07';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n /**\n * Volume of the speech. For sonic-3, the value is valid between 0.5 and 2.0.\n * @see https://docs.cartesia.ai/api-reference/tts/bytes#body-generation-config-volume\n */\n volume?: number;\n apiKey?: string;\n language: string;\n baseUrl: string;\n apiVersion: string;\n\n /**\n * The timeout for the next chunk to be received from the Cartesia API.\n */\n chunkTimeout: number;\n\n /**\n * Whether to add word timestamps to the output. When enabled, the TTS will return\n * timing information for each word in the transcript.\n * @defaultValue true\n */\n wordTimestamps?: boolean;\n\n pronunciationDictId?: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-3',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n apiVersion: API_VERSION,\n chunkTimeout: 5000,\n wordTimestamps: true,\n};\n\nconst checkGenerationConfig = (opts: TTSOptions) => {\n const logger = log();\n if (isSonic3(opts.model)) {\n if (opts.speed !== undefined && typeof opts.speed === 'number') {\n if (opts.speed < 0.6 || opts.speed > 2.0) {\n logger.warn('speed must be between 0.6 and 2.0 for sonic-3');\n }\n }\n if (opts.volume !== undefined && (opts.volume < 0.5 || opts.volume > 2.0)) {\n logger.warn('volume must be between 0.5 and 2.0 for sonic-3');\n }\n } else if (\n opts.apiVersion !== API_VERSION_WITH_EXPERIMENTAL_CONTROLS ||\n opts.model !== MODEL_WITH_EXPERIMENTAL_CONTROLS\n ) {\n if (opts.speed || opts.emotion) {\n logger.warn(\n { model: opts.model, speed: opts.speed, emotion: opts.emotion },\n `speed and emotion controls are only supported for model '${MODEL_WITH_EXPERIMENTAL_CONTROLS}' ` +\n `or sonic-3 models, see https://docs.cartesia.ai/developer-tools/changelog for details`,\n );\n }\n }\n\n if (opts.pronunciationDictId && !isSonic3(opts.model)) {\n logger.warn(\n { model: opts.model, pronunciationDictId: opts.pronunciationDictId },\n 'pronunciationDictId is only supported for sonic-3 models',\n );\n }\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n const resolvedOpts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n super(resolvedOpts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n alignedTranscript: resolvedOpts.wordTimestamps ?? true,\n });\n\n this.#opts = resolvedOpts;\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if (\n this.#opts.speed ||\n this.#opts.emotion ||\n this.#opts.volume ||\n this.#opts.pronunciationDictId\n ) {\n checkGenerationConfig(this.#opts);\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if (\n this.#opts.speed ||\n this.#opts.emotion ||\n this.#opts.volume ||\n this.#opts.pronunciationDictId\n ) {\n checkGenerationConfig(this.#opts);\n }\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts, connOptions, abortSignal);\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts, options?.connOptions);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #logger = log();\n #opts: TTSOptions;\n #text: string;\n\n constructor(\n tts: TTS,\n text: string,\n opts: TTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const doneFut = new Future<void>();\n\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: this.#opts.apiVersion,\n },\n signal: this.abortSignal,\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n if (!doneFut.done) doneFut.resolve();\n });\n res.on('error', (err) => {\n if (err.message === 'aborted') return;\n this.#logger.error({ err }, 'Cartesia TTS response error');\n if (!doneFut.done) doneFut.reject(err);\n });\n },\n );\n\n req.on('error', (err) => {\n if (err.name === 'AbortError') return;\n this.#logger.error({ err }, 'Cartesia TTS request error');\n if (!doneFut.done) doneFut.reject(err);\n });\n req.on('close', () => {\n if (!doneFut.done) doneFut.resolve();\n });\n req.write(JSON.stringify(json));\n req.end();\n\n try {\n await doneFut.await;\n } catch (e) {\n if (this.abortSignal.aborted) return;\n if (!this.queue.closed) this.queue.close();\n throw toRetryableConnectionError(e);\n }\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions, connOptions?: APIConnectOptions) {\n super(tts, connOptions);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if (\n this.#opts.speed ||\n this.#opts.emotion ||\n this.#opts.volume ||\n this.#opts.pronunciationDictId\n ) {\n checkGenerationConfig(this.#opts);\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n // Only close WebSocket when both: 1) Cartesia returns done, AND 2) all sentences have been sent\n let sentenceStreamClosed = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts, true);\n for await (const event of this.#tokenizer) {\n const msg = {\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n };\n ws.send(JSON.stringify(msg));\n }\n\n const endMsg = {\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n };\n ws.send(JSON.stringify(endMsg));\n // Mark sentence stream as closed\n sentenceStreamClosed = true;\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n // Use event channel and set up listeners ONCE to avoid missing messages during listener re-registration\n const recvTask = async (ws: WebSocket) => {\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n // Create event channel to buffer incoming messages\n // This prevents message loss between listener re-registrations\n const eventChannel = stream.createStreamChannel<RawData>();\n\n let lastFrame: AudioFrame | undefined;\n let pendingTimedTranscripts: TimedString[] = [];\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n // Include timedTranscripts with the audio frame\n this.queue.put({\n requestId,\n segmentId,\n frame: lastFrame,\n final,\n timedTranscripts:\n pendingTimedTranscripts.length > 0 ? pendingTimedTranscripts : undefined,\n });\n lastFrame = undefined;\n pendingTimedTranscripts = [];\n }\n };\n\n let timeout: NodeJS.Timeout | null = null;\n\n const clearTTSChunkTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n // Set up WebSocket listeners ONCE (not in a loop)\n const onMessage = (data: RawData) => {\n void eventChannel.write(data).catch((error: unknown) => {\n this.#logger.debug({ error }, 'Failed writing Cartesia event to channel (likely closed)');\n });\n };\n\n const onClose = (code: number, reason: Buffer) => {\n if (!closing) {\n this.#logger.debug(`WebSocket closed with code ${code}: ${reason.toString()}`);\n }\n clearTTSChunkTimeout();\n void eventChannel.close();\n };\n\n const onError = (err: Error) => {\n this.#logger.error({ err }, 'Cartesia WebSocket error');\n void eventChannel.close();\n };\n\n // Attach listeners ONCE\n ws.on('message', onMessage);\n ws.on('close', onClose);\n ws.on('error', onError);\n\n try {\n // Process messages from the channel\n const reader = eventChannel.stream().getReader();\n\n while (!this.closed && !this.abortController.signal.aborted) {\n const result = await reader.read();\n if (result.done) break;\n\n const rawMsg = result.value;\n\n // Parse message with Zod schema for type safety\n let serverMsg: CartesiaServerMessage;\n try {\n const json = JSON.parse(rawMsg.toString());\n serverMsg = cartesiaMessageSchema.parse(json);\n } catch (parseErr) {\n this.#logger.warn({ parseErr }, 'Failed to parse Cartesia message');\n continue;\n }\n\n // Handle error messages\n if (isErrorMessage(serverMsg)) {\n this.#logger.error({ error: serverMsg.error }, 'Cartesia returned error');\n continue;\n }\n\n const segmentId = serverMsg.context_id;\n\n // Process word timestamps if present (typed via Zod schema)\n if (this.#opts.wordTimestamps !== false && hasWordTimestamps(serverMsg)) {\n const wordTimestamps = serverMsg.word_timestamps;\n for (let i = 0; i < wordTimestamps.words.length; i++) {\n const word = wordTimestamps.words[i];\n const startTime = wordTimestamps.start[i];\n const endTime = wordTimestamps.end[i];\n if (word !== undefined && startTime !== undefined && endTime !== undefined) {\n pendingTimedTranscripts.push(\n createTimedString({\n text: word + ' ', // Add space after word for consistency\n startTime,\n endTime,\n }),\n );\n }\n }\n }\n\n // Handle audio chunk messages\n if (isChunkMessage(serverMsg)) {\n const audioBuffer = Buffer.from(serverMsg.data, 'base64');\n // Extract ArrayBuffer from Buffer for AudioByteStream compatibility\n const audioData = audioBuffer.buffer.slice(\n audioBuffer.byteOffset,\n audioBuffer.byteOffset + audioBuffer.byteLength,\n );\n for (const frame of bstream.write(audioData)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n\n // IMPORTANT: close WS if TTS chunk stream been stuck too long\n // this allows unblock the current \"broken\" TTS node so that any future TTS nodes\n // can continue to process the stream without been blocked by the stuck node\n clearTTSChunkTimeout();\n timeout = setTimeout(() => {\n // cartesia chunk timeout quite often, so we make it a debug log\n this.#logger.debug(\n `Cartesia WebSocket TTS chunk stream timeout after ${this.#opts.chunkTimeout}ms`,\n );\n ws.close();\n }, this.#opts.chunkTimeout);\n } else if (isDoneMessage(serverMsg)) {\n // This ensures all sentences have been sent before closing\n if (sentenceStreamClosed) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n clearTTSChunkTimeout();\n ws.close();\n break; // Exit the loop\n }\n }\n // If sentenceStreamClosed is false, continue receiving - more done messages will come\n }\n }\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n if (\n err.message.includes('Queue is closed') ||\n err.message.includes('Channel is closed')\n ) {\n this.#logger.warn(\n { err },\n 'Channel closed during transcript processing (expected during disconnect)',\n );\n } else {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n }\n } finally {\n // IMPORTANT: Remove listeners so connection can be reused\n ws.off('message', onMessage);\n ws.off('close', onClose);\n ws.off('error', onError);\n clearTTSChunkTimeout();\n }\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${this.#opts.apiVersion}`;\n\n let ws: WebSocket | undefined;\n try {\n ws = await connectCartesiaWebSocket({\n url,\n timeoutMs: this.connOptions.timeoutMs,\n abortSignal: this.abortSignal,\n });\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n if (this.abortSignal.aborted) {\n return;\n }\n throw toRetryableConnectionError(e);\n } finally {\n // Ensure we don't leak sockets/tasks across retry attempts.\n if (ws && ws.readyState !== WebSocket.CLOSED) {\n safeTerminateWebSocket(ws);\n }\n }\n }\n}\n\nconst asError = (e: unknown): Error => (e instanceof Error ? e : new Error(String(e)));\n\nconst transientNetworkCodes = new Set([\n 'ETIMEDOUT',\n 'ECONNRESET',\n 'EAI_AGAIN',\n 'ENETUNREACH',\n 'ECONNREFUSED',\n 'EHOSTUNREACH',\n]);\n\nconst isRecord = (v: unknown): v is Record<string, unknown> => {\n return v !== null && typeof v === 'object';\n};\n\nconst isAggregateErrorLike = (e: unknown): e is { errors: unknown[]; name?: string } => {\n if (!isRecord(e)) return false;\n return e.name === 'AggregateError' && Array.isArray(e.errors);\n};\n\nconst hasErrorCode = (e: unknown, code: string): boolean => {\n if (isRecord(e) && e.code === code) return true;\n if (isAggregateErrorLike(e)) {\n return e.errors.some((inner) => hasErrorCode(inner, code));\n }\n return false;\n};\n\nconst hasAnyTransientCode = (e: unknown): boolean => {\n if (isRecord(e) && typeof e.code === 'string') {\n return transientNetworkCodes.has(e.code);\n }\n if (isAggregateErrorLike(e)) {\n return e.errors.some((inner) => hasAnyTransientCode(inner));\n }\n return false;\n};\n\nconst toRetryableConnectionError = (e: unknown): APIConnectionError => {\n const err = asError(e);\n const isTimeout =\n hasErrorCode(e, 'ETIMEDOUT') ||\n (typeof err.message === 'string' && err.message.includes('ETIMEDOUT'));\n const message = isTimeout\n ? `Cartesia connection timed out`\n : `Cartesia connection failed: ${err.message || 'unknown error'}`;\n return isTimeout ? new APITimeoutError({ message }) : new APIConnectionError({ message });\n};\n\nconst waitForWsOpen = async ({\n ws,\n timeoutMs,\n abortSignal,\n}: {\n ws: WebSocket;\n timeoutMs: number;\n abortSignal: AbortSignal;\n}) => {\n if (abortSignal.aborted) {\n throw new Error('aborted');\n }\n\n const fut = new Future<void>();\n let timeout: NodeJS.Timeout | undefined;\n\n const cleanup = () => {\n if (timeout) clearTimeout(timeout);\n ws.off('open', onOpen);\n ws.off('error', onError);\n ws.off('close', onClose);\n abortSignal.removeEventListener('abort', onAbort);\n };\n\n const onOpen = () => fut.resolve();\n const onError = (err: Error) => fut.reject(asError(err));\n const onClose = (code: number, reason: Buffer) =>\n fut.reject(\n new Error(`WebSocket closed before open (code=${code}, reason=${reason.toString()})`),\n );\n const onAbort = () => fut.reject(new Error('aborted'));\n\n ws.on('open', onOpen);\n ws.on('error', onError);\n ws.on('close', onClose);\n abortSignal.addEventListener('abort', onAbort, { once: true });\n\n if (timeoutMs > 0) {\n timeout = setTimeout(() => fut.reject(new Error('connect timeout')), timeoutMs);\n }\n\n try {\n await fut.await;\n } finally {\n cleanup();\n }\n};\n\nconst safeTerminateWebSocket = (ws: WebSocket) => {\n // `ws` can emit an 'error' event during teardown (especially if CONNECTING).\n // If there is no error listener at that moment, Node will treat it as unhandled and crash the process.\n try {\n ws.on('error', () => {});\n } catch {\n // ignore\n }\n\n try {\n // `terminate()` can throw if the socket was never established; `close()` is safer in CONNECTING.\n if (ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n } else {\n ws.terminate();\n }\n } catch {\n // ignore\n }\n};\n\nconst connectCartesiaWebSocket = async ({\n url,\n timeoutMs,\n abortSignal,\n}: {\n url: string;\n timeoutMs: number;\n abortSignal: AbortSignal;\n}): Promise<WebSocket> => {\n const connectOnce = async (family?: number): Promise<WebSocket> => {\n const ws = new WebSocket(url, { handshakeTimeout: timeoutMs, family });\n try {\n await waitForWsOpen({ ws, timeoutMs, abortSignal });\n return ws;\n } catch (e) {\n safeTerminateWebSocket(ws);\n throw e;\n }\n };\n\n try {\n return await connectOnce();\n } catch (e) {\n // Mitigation for Node.js dual-stack (IPv6/IPv4) connect flakiness (\"happy eyeballs\"):\n // some environments surface `AggregateError` with nested `ETIMEDOUT` during the initial\n // WebSocket open. In that case we do a one-off retry forcing IPv4 (`family: 4`) before\n // letting the outer framework retry loop handle further attempts.\n //\n // If you still see `AggregateError`/`ETIMEDOUT`:\n // - Increase the session TTS connect timeout (`connOptions.ttsConnOptions.timeoutMs`)\n // - Or adjust Node's family autoselection behavior via `NODE_OPTIONS`, e.g.\n // `--network-family-autoselection-attempt-timeout=5000` (or disable it entirely).\n if (hasAnyTransientCode(e) || isAggregateErrorLike(e)) {\n return await connectOnce(4);\n }\n throw e;\n }\n};\n\nconst toCartesiaOptions = (\n opts: TTSOptions,\n streaming: boolean = false,\n): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n if (opts.apiVersion === API_VERSION_WITH_EXPERIMENTAL_CONTROLS) {\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n }\n\n const result: { [id: string]: unknown } = {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n max_buffer_delay_ms: 0,\n };\n\n if (opts.pronunciationDictId) {\n result.pronunciation_dict_id = opts.pronunciationDictId;\n }\n\n if (opts.apiVersion > API_VERSION_WITH_EXPERIMENTAL_CONTROLS && isSonic3(opts.model)) {\n const generationConfig: { [id: string]: unknown } = {};\n if (opts.speed) {\n generationConfig.speed = opts.speed;\n }\n if (opts.emotion) {\n generationConfig.emotion = opts.emotion[0];\n }\n if (opts.volume) {\n generationConfig.volume = opts.volume;\n }\n if (Object.keys(generationConfig).length) {\n result.generation_config = generationConfig;\n }\n }\n\n if (streaming && opts.wordTimestamps !== false) {\n result.add_timestamps = true;\n }\n\n return result;\n};\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AACxC;AAAA,EACE;AAAA,EAKA;AAAA,OACK;AACP;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,cAAc;AACpB,MAAM,yCAAyC;AAC/C,MAAM,mCAAmC;AACzC,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAkC7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AAAA,EACT,YAAY;AAAA,EACZ,cAAc;AAAA,EACd,gBAAgB;AAClB;AAEA,MAAM,wBAAwB,CAAC,SAAqB;AAClD,QAAM,SAAS,IAAI;AACnB,MAAI,SAAS,KAAK,KAAK,GAAG;AACxB,QAAI,KAAK,UAAU,UAAa,OAAO,KAAK,UAAU,UAAU;AAC9D,UAAI,KAAK,QAAQ,OAAO,KAAK,QAAQ,GAAK;AACxC,eAAO,KAAK,+CAA+C;AAAA,MAC7D;AAAA,IACF;AACA,QAAI,KAAK,WAAW,WAAc,KAAK,SAAS,OAAO,KAAK,SAAS,IAAM;AACzE,aAAO,KAAK,gDAAgD;AAAA,IAC9D;AAAA,EACF,WACE,KAAK,eAAe,0CACpB,KAAK,UAAU,kCACf;AACA,QAAI,KAAK,SAAS,KAAK,SAAS;AAC9B,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,OAAO,OAAO,KAAK,OAAO,SAAS,KAAK,QAAQ;AAAA,QAC9D,4DAA4D,gCAAgC;AAAA,MAE9F;AAAA,IACF;AAAA,EACF;AAEA,MAAI,KAAK,uBAAuB,CAAC,SAAS,KAAK,KAAK,GAAG;AACrD,WAAO;AAAA,MACL,EAAE,OAAO,KAAK,OAAO,qBAAqB,KAAK,oBAAoB;AAAA,MACnE;AAAA,IACF;AAAA,EACF;AACF;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,eAAe;AAAA,MACnB,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,UAAM,aAAa,cAAc,kBAAkB,YAAY,cAAc;AAAA,MAC3E,WAAW;AAAA,MACX,mBAAmB,aAAa,kBAAkB;AAAA,IACpD,CAAC;AAED,SAAK,QAAQ;AAEb,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,QACE,KAAK,MAAM,SACX,KAAK,MAAM,WACX,KAAK,MAAM,UACX,KAAK,MAAM,qBACX;AACA,4BAAsB,KAAK,KAAK;AAAA,IAClC;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,QACE,KAAK,MAAM,SACX,KAAK,MAAM,WACX,KAAK,MAAM,UACX,KAAK,MAAM,qBACX;AACA,4BAAsB,KAAK,KAAK;AAAA,IAClC;AAAA,EACF;AAAA,EAEA,WACE,MACA,aACA,aACmB;AACnB,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,OAAO,aAAa,WAAW;AAAA,EAC3E;AAAA,EAEA,OAAO,SAAiE;AACtE,WAAO,IAAI,iBAAiB,MAAM,KAAK,OAAO,mCAAS,WAAW;AAAA,EACpE;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR,UAAU,IAAI;AAAA,EACd;AAAA,EACA;AAAA,EAEA,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAU,IAAI,OAAa;AAEjC,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG,KAAK,MAAM;AAAA,QAC/B;AAAA,QACA,QAAQ,KAAK;AAAA,MACf;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AACjB,cAAI,CAAC,QAAQ,KAAM,SAAQ,QAAQ;AAAA,QACrC,CAAC;AACD,YAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,cAAI,IAAI,YAAY,UAAW;AAC/B,eAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6BAA6B;AACzD,cAAI,CAAC,QAAQ,KAAM,SAAQ,OAAO,GAAG;AAAA,QACvC,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,UAAI,IAAI,SAAS,aAAc;AAC/B,WAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,4BAA4B;AACxD,UAAI,CAAC,QAAQ,KAAM,SAAQ,OAAO,GAAG;AAAA,IACvC,CAAC;AACD,QAAI,GAAG,SAAS,MAAM;AACpB,UAAI,CAAC,QAAQ,KAAM,SAAQ,QAAQ;AAAA,IACrC,CAAC;AACD,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAER,QAAI;AACF,YAAM,QAAQ;AAAA,IAChB,SAAS,GAAG;AACV,UAAI,KAAK,YAAY,QAAS;AAC9B,UAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,YAAM,2BAA2B,CAAC;AAAA,IACpC;AAAA,EACF;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB,aAAiC;AACvE,UAAMA,MAAK,WAAW;AACtB,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,QACE,KAAK,MAAM,SACX,KAAK,MAAM,WACX,KAAK,MAAM,UACX,KAAK,MAAM,qBACX;AACA,4BAAsB,KAAK,KAAK;AAAA,IAClC;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,QAAI,UAAU;AAEd,QAAI,uBAAuB;AAE3B,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,OAAO,IAAI;AACjD,uBAAiB,SAAS,KAAK,YAAY;AACzC,cAAM,MAAM;AAAA,UACV,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY,MAAM,QAAQ;AAAA,UAC1B,UAAU;AAAA,QACZ;AACA,QAAAA,IAAG,KAAK,KAAK,UAAU,GAAG,CAAC;AAAA,MAC7B;AAEA,YAAM,SAAS;AAAA,QACb,GAAG;AAAA,QACH,YAAY;AAAA,QACZ,YAAY;AAAA,QACZ,UAAU;AAAA,MACZ;AACA,MAAAA,IAAG,KAAK,KAAK,UAAU,MAAM,CAAC;AAE9B,6BAAuB;AAAA,IACzB;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAGA,UAAM,WAAW,OAAOA,QAAkB;AACxC,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAIvE,YAAM,eAAe,OAAO,oBAA6B;AAEzD,UAAI;AACJ,UAAI,0BAAyC,CAAC;AAE9C,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AAEnC,eAAK,MAAM,IAAI;AAAA,YACb;AAAA,YACA;AAAA,YACA,OAAO;AAAA,YACP;AAAA,YACA,kBACE,wBAAwB,SAAS,IAAI,0BAA0B;AAAA,UACnE,CAAC;AACD,sBAAY;AACZ,oCAA0B,CAAC;AAAA,QAC7B;AAAA,MACF;AAEA,UAAI,UAAiC;AAErC,YAAM,uBAAuB,MAAM;AACjC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAGA,YAAM,YAAY,CAAC,SAAkB;AACnC,aAAK,aAAa,MAAM,IAAI,EAAE,MAAM,CAAC,UAAmB;AACtD,eAAK,QAAQ,MAAM,EAAE,MAAM,GAAG,0DAA0D;AAAA,QAC1F,CAAC;AAAA,MACH;AAEA,YAAM,UAAU,CAAC,MAAc,WAAmB;AAChD,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,OAAO,SAAS,CAAC,EAAE;AAAA,QAC/E;AACA,6BAAqB;AACrB,aAAK,aAAa,MAAM;AAAA,MAC1B;AAEA,YAAM,UAAU,CAAC,QAAe;AAC9B,aAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,0BAA0B;AACtD,aAAK,aAAa,MAAM;AAAA,MAC1B;AAGA,MAAAA,IAAG,GAAG,WAAW,SAAS;AAC1B,MAAAA,IAAG,GAAG,SAAS,OAAO;AACtB,MAAAA,IAAG,GAAG,SAAS,OAAO;AAEtB,UAAI;AAEF,cAAM,SAAS,aAAa,OAAO,EAAE,UAAU;AAE/C,eAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,SAAS;AAC3D,gBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,cAAI,OAAO,KAAM;AAEjB,gBAAM,SAAS,OAAO;AAGtB,cAAI;AACJ,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,OAAO,SAAS,CAAC;AACzC,wBAAY,sBAAsB,MAAM,IAAI;AAAA,UAC9C,SAAS,UAAU;AACjB,iBAAK,QAAQ,KAAK,EAAE,SAAS,GAAG,kCAAkC;AAClE;AAAA,UACF;AAGA,cAAI,eAAe,SAAS,GAAG;AAC7B,iBAAK,QAAQ,MAAM,EAAE,OAAO,UAAU,MAAM,GAAG,yBAAyB;AACxE;AAAA,UACF;AAEA,gBAAM,YAAY,UAAU;AAG5B,cAAI,KAAK,MAAM,mBAAmB,SAAS,kBAAkB,SAAS,GAAG;AACvE,kBAAM,iBAAiB,UAAU;AACjC,qBAAS,IAAI,GAAG,IAAI,eAAe,MAAM,QAAQ,KAAK;AACpD,oBAAM,OAAO,eAAe,MAAM,CAAC;AACnC,oBAAM,YAAY,eAAe,MAAM,CAAC;AACxC,oBAAM,UAAU,eAAe,IAAI,CAAC;AACpC,kBAAI,SAAS,UAAa,cAAc,UAAa,YAAY,QAAW;AAC1E,wCAAwB;AAAA,kBACtB,kBAAkB;AAAA,oBAChB,MAAM,OAAO;AAAA;AAAA,oBACb;AAAA,oBACA;AAAA,kBACF,CAAC;AAAA,gBACH;AAAA,cACF;AAAA,YACF;AAAA,UACF;AAGA,cAAI,eAAe,SAAS,GAAG;AAC7B,kBAAM,cAAc,OAAO,KAAK,UAAU,MAAM,QAAQ;AAExD,kBAAM,YAAY,YAAY,OAAO;AAAA,cACnC,YAAY;AAAA,cACZ,YAAY,aAAa,YAAY;AAAA,YACvC;AACA,uBAAW,SAAS,QAAQ,MAAM,SAAS,GAAG;AAC5C,4BAAc,WAAW,KAAK;AAC9B,0BAAY;AAAA,YACd;AAKA,iCAAqB;AACrB,sBAAU,WAAW,MAAM;AAEzB,mBAAK,QAAQ;AAAA,gBACX,qDAAqD,KAAK,MAAM,YAAY;AAAA,cAC9E;AACA,cAAAA,IAAG,MAAM;AAAA,YACX,GAAG,KAAK,MAAM,YAAY;AAAA,UAC5B,WAAW,cAAc,SAAS,GAAG;AAEnC,gBAAI,sBAAsB;AACxB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,qCAAqB;AACrB,gBAAAA,IAAG,MAAM;AACT;AAAA,cACF;AAAA,YACF;AAAA,UAEF;AAAA,QACF;AAAA,MACF,SAAS,KAAK;AAEZ,YAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,cACE,IAAI,QAAQ,SAAS,iBAAiB,KACtC,IAAI,QAAQ,SAAS,mBAAmB,GACxC;AACA,iBAAK,QAAQ;AAAA,cACX,EAAE,IAAI;AAAA,cACN;AAAA,YACF;AAAA,UACF,OAAO;AACL,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AAAA,QACF;AAAA,MACF,UAAE;AAEA,QAAAA,IAAG,IAAI,WAAW,SAAS;AAC3B,QAAAA,IAAG,IAAI,SAAS,OAAO;AACvB,QAAAA,IAAG,IAAI,SAAS,OAAO;AACvB,6BAAqB;AAAA,MACvB;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,KAAK,MAAM,UAAU;AAEzG,QAAI;AACJ,QAAI;AACF,WAAK,MAAM,yBAAyB;AAAA,QAClC;AAAA,QACA,WAAW,KAAK,YAAY;AAAA,QAC5B,aAAa,KAAK;AAAA,MACpB,CAAC;AACD,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,UAAI,KAAK,YAAY,SAAS;AAC5B;AAAA,MACF;AACA,YAAM,2BAA2B,CAAC;AAAA,IACpC,UAAE;AAEA,UAAI,MAAM,GAAG,eAAe,UAAU,QAAQ;AAC5C,+BAAuB,EAAE;AAAA,MAC3B;AAAA,IACF;AAAA,EACF;AACF;AAEA,MAAM,UAAU,CAAC,MAAuB,aAAa,QAAQ,IAAI,IAAI,MAAM,OAAO,CAAC,CAAC;AAEpF,MAAM,wBAAwB,oBAAI,IAAI;AAAA,EACpC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,MAAM,WAAW,CAAC,MAA6C;AAC7D,SAAO,MAAM,QAAQ,OAAO,MAAM;AACpC;AAEA,MAAM,uBAAuB,CAAC,MAA0D;AACtF,MAAI,CAAC,SAAS,CAAC,EAAG,QAAO;AACzB,SAAO,EAAE,SAAS,oBAAoB,MAAM,QAAQ,EAAE,MAAM;AAC9D;AAEA,MAAM,eAAe,CAAC,GAAY,SAA0B;AAC1D,MAAI,SAAS,CAAC,KAAK,EAAE,SAAS,KAAM,QAAO;AAC3C,MAAI,qBAAqB,CAAC,GAAG;AAC3B,WAAO,EAAE,OAAO,KAAK,CAAC,UAAU,aAAa,OAAO,IAAI,CAAC;AAAA,EAC3D;AACA,SAAO;AACT;AAEA,MAAM,sBAAsB,CAAC,MAAwB;AACnD,MAAI,SAAS,CAAC,KAAK,OAAO,EAAE,SAAS,UAAU;AAC7C,WAAO,sBAAsB,IAAI,EAAE,IAAI;AAAA,EACzC;AACA,MAAI,qBAAqB,CAAC,GAAG;AAC3B,WAAO,EAAE,OAAO,KAAK,CAAC,UAAU,oBAAoB,KAAK,CAAC;AAAA,EAC5D;AACA,SAAO;AACT;AAEA,MAAM,6BAA6B,CAAC,MAAmC;AACrE,QAAM,MAAM,QAAQ,CAAC;AACrB,QAAM,YACJ,aAAa,GAAG,WAAW,KAC1B,OAAO,IAAI,YAAY,YAAY,IAAI,QAAQ,SAAS,WAAW;AACtE,QAAM,UAAU,YACZ,kCACA,+BAA+B,IAAI,WAAW,eAAe;AACjE,SAAO,YAAY,IAAI,gBAAgB,EAAE,QAAQ,CAAC,IAAI,IAAI,mBAAmB,EAAE,QAAQ,CAAC;AAC1F;AAEA,MAAM,gBAAgB,OAAO;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AACF,MAIM;AACJ,MAAI,YAAY,SAAS;AACvB,UAAM,IAAI,MAAM,SAAS;AAAA,EAC3B;AAEA,QAAM,MAAM,IAAI,OAAa;AAC7B,MAAI;AAEJ,QAAM,UAAU,MAAM;AACpB,QAAI,QAAS,cAAa,OAAO;AACjC,OAAG,IAAI,QAAQ,MAAM;AACrB,OAAG,IAAI,SAAS,OAAO;AACvB,OAAG,IAAI,SAAS,OAAO;AACvB,gBAAY,oBAAoB,SAAS,OAAO;AAAA,EAClD;AAEA,QAAM,SAAS,MAAM,IAAI,QAAQ;AACjC,QAAM,UAAU,CAAC,QAAe,IAAI,OAAO,QAAQ,GAAG,CAAC;AACvD,QAAM,UAAU,CAAC,MAAc,WAC7B,IAAI;AAAA,IACF,IAAI,MAAM,sCAAsC,IAAI,YAAY,OAAO,SAAS,CAAC,GAAG;AAAA,EACtF;AACF,QAAM,UAAU,MAAM,IAAI,OAAO,IAAI,MAAM,SAAS,CAAC;AAErD,KAAG,GAAG,QAAQ,MAAM;AACpB,KAAG,GAAG,SAAS,OAAO;AACtB,KAAG,GAAG,SAAS,OAAO;AACtB,cAAY,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAE7D,MAAI,YAAY,GAAG;AACjB,cAAU,WAAW,MAAM,IAAI,OAAO,IAAI,MAAM,iBAAiB,CAAC,GAAG,SAAS;AAAA,EAChF;AAEA,MAAI;AACF,UAAM,IAAI;AAAA,EACZ,UAAE;AACA,YAAQ;AAAA,EACV;AACF;AAEA,MAAM,yBAAyB,CAAC,OAAkB;AAGhD,MAAI;AACF,OAAG,GAAG,SAAS,MAAM;AAAA,IAAC,CAAC;AAAA,EACzB,QAAQ;AAAA,EAER;AAEA,MAAI;AAEF,QAAI,GAAG,eAAe,UAAU,YAAY;AAC1C,SAAG,MAAM;AAAA,IACX,OAAO;AACL,SAAG,UAAU;AAAA,IACf;AAAA,EACF,QAAQ;AAAA,EAER;AACF;AAEA,MAAM,2BAA2B,OAAO;AAAA,EACtC;AAAA,EACA;AAAA,EACA;AACF,MAI0B;AACxB,QAAM,cAAc,OAAO,WAAwC;AACjE,UAAM,KAAK,IAAI,UAAU,KAAK,EAAE,kBAAkB,WAAW,OAAO,CAAC;AACrE,QAAI;AACF,YAAM,cAAc,EAAE,IAAI,WAAW,YAAY,CAAC;AAClD,aAAO;AAAA,IACT,SAAS,GAAG;AACV,6BAAuB,EAAE;AACzB,YAAM;AAAA,IACR;AAAA,EACF;AAEA,MAAI;AACF,WAAO,MAAM,YAAY;AAAA,EAC3B,SAAS,GAAG;AAUV,QAAI,oBAAoB,CAAC,KAAK,qBAAqB,CAAC,GAAG;AACrD,aAAO,MAAM,YAAY,CAAC;AAAA,IAC5B;AACA,UAAM;AAAA,EACR;AACF;AAEA,MAAM,oBAAoB,CACxB,MACA,YAAqB,UACS;AAC9B,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,MAAI,KAAK,eAAe,wCAAwC;AAC9D,UAAM,gBAA2C,CAAC;AAClD,QAAI,KAAK,OAAO;AACd,oBAAc,QAAQ,KAAK;AAAA,IAC7B;AACA,QAAI,KAAK,SAAS;AAChB,oBAAc,UAAU,KAAK;AAAA,IAC/B;AACA,QAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,YAAM,0BAA0B;AAAA,IAClC;AAAA,EACF;AAEA,QAAM,SAAoC;AAAA,IACxC,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,IACf,qBAAqB;AAAA,EACvB;AAEA,MAAI,KAAK,qBAAqB;AAC5B,WAAO,wBAAwB,KAAK;AAAA,EACtC;AAEA,MAAI,KAAK,aAAa,0CAA0C,SAAS,KAAK,KAAK,GAAG;AACpF,UAAM,mBAA8C,CAAC;AACrD,QAAI,KAAK,OAAO;AACd,uBAAiB,QAAQ,KAAK;AAAA,IAChC;AACA,QAAI,KAAK,SAAS;AAChB,uBAAiB,UAAU,KAAK,QAAQ,CAAC;AAAA,IAC3C;AACA,QAAI,KAAK,QAAQ;AACf,uBAAiB,SAAS,KAAK;AAAA,IACjC;AACA,QAAI,OAAO,KAAK,gBAAgB,EAAE,QAAQ;AACxC,aAAO,oBAAoB;AAAA,IAC7B;AAAA,EACF;AAEA,MAAI,aAAa,KAAK,mBAAmB,OAAO;AAC9C,WAAO,iBAAiB;AAAA,EAC1B;AAEA,SAAO;AACT;","names":["tts","ws"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-cartesia",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.49",
|
|
4
4
|
"description": "Cartesia plugin for LiveKit Node Agents",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -30,9 +30,9 @@
|
|
|
30
30
|
"@types/ws": "^8.5.10",
|
|
31
31
|
"tsup": "^8.3.5",
|
|
32
32
|
"typescript": "^5.0.0",
|
|
33
|
-
"@livekit/agents": "1.0.
|
|
34
|
-
"@livekit/agents-plugin-openai": "1.0.
|
|
35
|
-
"@livekit/agents-plugins-test": "1.0.
|
|
33
|
+
"@livekit/agents": "1.0.49",
|
|
34
|
+
"@livekit/agents-plugin-openai": "1.0.49",
|
|
35
|
+
"@livekit/agents-plugins-test": "1.0.49"
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"ws": "^8.16.0"
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
"peerDependencies": {
|
|
41
41
|
"@livekit/rtc-node": "^0.13.24",
|
|
42
42
|
"zod": "^3.25.76 || ^4.1.8",
|
|
43
|
-
"@livekit/agents": "1.0.
|
|
43
|
+
"@livekit/agents": "1.0.49"
|
|
44
44
|
},
|
|
45
45
|
"scripts": {
|
|
46
46
|
"build": "tsup --onSuccess \"pnpm build:types\"",
|
package/src/models.ts
CHANGED
|
@@ -2,11 +2,20 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
// Ref: python livekit-plugins/livekit-plugins-cartesia/livekit/plugins/cartesia/models.py - 11 lines
|
|
6
|
+
export type TTSModels =
|
|
7
|
+
| 'sonic'
|
|
8
|
+
| 'sonic-2'
|
|
9
|
+
| 'sonic-3'
|
|
10
|
+
| 'sonic-lite'
|
|
11
|
+
| 'sonic-preview'
|
|
12
|
+
| 'sonic-turbo';
|
|
6
13
|
|
|
7
14
|
export type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';
|
|
8
15
|
|
|
9
|
-
export const TTSDefaultVoiceId = '
|
|
16
|
+
export const TTSDefaultVoiceId = 'f786b574-daa5-4673-aa0c-cbe3e8534c02';
|
|
17
|
+
|
|
18
|
+
export const isSonic3 = (model: string): boolean => model.startsWith('sonic-3');
|
|
10
19
|
|
|
11
20
|
export type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';
|
|
12
21
|
|
package/src/tts.ts
CHANGED
|
@@ -24,6 +24,7 @@ import {
|
|
|
24
24
|
type TTSModels,
|
|
25
25
|
type TTSVoiceEmotion,
|
|
26
26
|
type TTSVoiceSpeed,
|
|
27
|
+
isSonic3,
|
|
27
28
|
} from './models.js';
|
|
28
29
|
import {
|
|
29
30
|
type CartesiaServerMessage,
|
|
@@ -36,7 +37,9 @@ import {
|
|
|
36
37
|
|
|
37
38
|
const AUTHORIZATION_HEADER = 'X-API-Key';
|
|
38
39
|
const VERSION_HEADER = 'Cartesia-Version';
|
|
39
|
-
const
|
|
40
|
+
const API_VERSION = '2025-04-16';
|
|
41
|
+
const API_VERSION_WITH_EXPERIMENTAL_CONTROLS = '2024-11-13';
|
|
42
|
+
const MODEL_WITH_EXPERIMENTAL_CONTROLS = 'sonic-2-2025-03-07';
|
|
40
43
|
const NUM_CHANNELS = 1;
|
|
41
44
|
const BUFFERED_WORDS_COUNT = 8;
|
|
42
45
|
|
|
@@ -47,9 +50,15 @@ export interface TTSOptions {
|
|
|
47
50
|
voice: string | number[];
|
|
48
51
|
speed?: TTSVoiceSpeed | number;
|
|
49
52
|
emotion?: (TTSVoiceEmotion | string)[];
|
|
53
|
+
/**
|
|
54
|
+
* Volume of the speech. For sonic-3, the value is valid between 0.5 and 2.0.
|
|
55
|
+
* @see https://docs.cartesia.ai/api-reference/tts/bytes#body-generation-config-volume
|
|
56
|
+
*/
|
|
57
|
+
volume?: number;
|
|
50
58
|
apiKey?: string;
|
|
51
59
|
language: string;
|
|
52
60
|
baseUrl: string;
|
|
61
|
+
apiVersion: string;
|
|
53
62
|
|
|
54
63
|
/**
|
|
55
64
|
* The timeout for the next chunk to be received from the Cartesia API.
|
|
@@ -67,17 +76,50 @@ export interface TTSOptions {
|
|
|
67
76
|
}
|
|
68
77
|
|
|
69
78
|
const defaultTTSOptions: TTSOptions = {
|
|
70
|
-
model: 'sonic-
|
|
79
|
+
model: 'sonic-3',
|
|
71
80
|
encoding: 'pcm_s16le',
|
|
72
81
|
sampleRate: 24000,
|
|
73
82
|
voice: TTSDefaultVoiceId,
|
|
74
83
|
apiKey: process.env.CARTESIA_API_KEY,
|
|
75
84
|
language: 'en',
|
|
76
85
|
baseUrl: 'https://api.cartesia.ai',
|
|
86
|
+
apiVersion: API_VERSION,
|
|
77
87
|
chunkTimeout: 5000,
|
|
78
88
|
wordTimestamps: true,
|
|
79
89
|
};
|
|
80
90
|
|
|
91
|
+
const checkGenerationConfig = (opts: TTSOptions) => {
|
|
92
|
+
const logger = log();
|
|
93
|
+
if (isSonic3(opts.model)) {
|
|
94
|
+
if (opts.speed !== undefined && typeof opts.speed === 'number') {
|
|
95
|
+
if (opts.speed < 0.6 || opts.speed > 2.0) {
|
|
96
|
+
logger.warn('speed must be between 0.6 and 2.0 for sonic-3');
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
if (opts.volume !== undefined && (opts.volume < 0.5 || opts.volume > 2.0)) {
|
|
100
|
+
logger.warn('volume must be between 0.5 and 2.0 for sonic-3');
|
|
101
|
+
}
|
|
102
|
+
} else if (
|
|
103
|
+
opts.apiVersion !== API_VERSION_WITH_EXPERIMENTAL_CONTROLS ||
|
|
104
|
+
opts.model !== MODEL_WITH_EXPERIMENTAL_CONTROLS
|
|
105
|
+
) {
|
|
106
|
+
if (opts.speed || opts.emotion) {
|
|
107
|
+
logger.warn(
|
|
108
|
+
{ model: opts.model, speed: opts.speed, emotion: opts.emotion },
|
|
109
|
+
`speed and emotion controls are only supported for model '${MODEL_WITH_EXPERIMENTAL_CONTROLS}' ` +
|
|
110
|
+
`or sonic-3 models, see https://docs.cartesia.ai/developer-tools/changelog for details`,
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (opts.pronunciationDictId && !isSonic3(opts.model)) {
|
|
116
|
+
logger.warn(
|
|
117
|
+
{ model: opts.model, pronunciationDictId: opts.pronunciationDictId },
|
|
118
|
+
'pronunciationDictId is only supported for sonic-3 models',
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
};
|
|
122
|
+
|
|
81
123
|
export class TTS extends tts.TTS {
|
|
82
124
|
#opts: TTSOptions;
|
|
83
125
|
label = 'cartesia.TTS';
|
|
@@ -101,24 +143,26 @@ export class TTS extends tts.TTS {
|
|
|
101
143
|
);
|
|
102
144
|
}
|
|
103
145
|
|
|
104
|
-
if (
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
146
|
+
if (
|
|
147
|
+
this.#opts.speed ||
|
|
148
|
+
this.#opts.emotion ||
|
|
149
|
+
this.#opts.volume ||
|
|
150
|
+
this.#opts.pronunciationDictId
|
|
151
|
+
) {
|
|
152
|
+
checkGenerationConfig(this.#opts);
|
|
110
153
|
}
|
|
111
154
|
}
|
|
112
155
|
|
|
113
156
|
updateOptions(opts: Partial<TTSOptions>) {
|
|
114
157
|
this.#opts = { ...this.#opts, ...opts };
|
|
115
158
|
|
|
116
|
-
if (
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
159
|
+
if (
|
|
160
|
+
this.#opts.speed ||
|
|
161
|
+
this.#opts.emotion ||
|
|
162
|
+
this.#opts.volume ||
|
|
163
|
+
this.#opts.pronunciationDictId
|
|
164
|
+
) {
|
|
165
|
+
checkGenerationConfig(this.#opts);
|
|
122
166
|
}
|
|
123
167
|
}
|
|
124
168
|
|
|
@@ -170,7 +214,7 @@ export class ChunkedStream extends tts.ChunkedStream {
|
|
|
170
214
|
method: 'POST',
|
|
171
215
|
headers: {
|
|
172
216
|
[AUTHORIZATION_HEADER]: this.#opts.apiKey!,
|
|
173
|
-
[VERSION_HEADER]:
|
|
217
|
+
[VERSION_HEADER]: this.#opts.apiVersion,
|
|
174
218
|
},
|
|
175
219
|
signal: this.abortSignal,
|
|
176
220
|
},
|
|
@@ -242,11 +286,13 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
242
286
|
updateOptions(opts: Partial<TTSOptions>) {
|
|
243
287
|
this.#opts = { ...this.#opts, ...opts };
|
|
244
288
|
|
|
245
|
-
if (
|
|
246
|
-
this.#
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
289
|
+
if (
|
|
290
|
+
this.#opts.speed ||
|
|
291
|
+
this.#opts.emotion ||
|
|
292
|
+
this.#opts.volume ||
|
|
293
|
+
this.#opts.pronunciationDictId
|
|
294
|
+
) {
|
|
295
|
+
checkGenerationConfig(this.#opts);
|
|
250
296
|
}
|
|
251
297
|
}
|
|
252
298
|
|
|
@@ -470,7 +516,7 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
470
516
|
};
|
|
471
517
|
|
|
472
518
|
const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');
|
|
473
|
-
const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${
|
|
519
|
+
const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${this.#opts.apiVersion}`;
|
|
474
520
|
|
|
475
521
|
let ws: WebSocket | undefined;
|
|
476
522
|
try {
|
|
@@ -651,12 +697,6 @@ const connectCartesiaWebSocket = async ({
|
|
|
651
697
|
}
|
|
652
698
|
};
|
|
653
699
|
|
|
654
|
-
/**
|
|
655
|
-
* Convert TTSOptions to Cartesia API format.
|
|
656
|
-
*
|
|
657
|
-
* @param opts - TTS options
|
|
658
|
-
* @param streaming - Whether this is for streaming (WebSocket) or non-streaming (HTTP)
|
|
659
|
-
*/
|
|
660
700
|
const toCartesiaOptions = (
|
|
661
701
|
opts: TTSOptions,
|
|
662
702
|
streaming: boolean = false,
|
|
@@ -670,16 +710,17 @@ const toCartesiaOptions = (
|
|
|
670
710
|
voice.embedding = opts.voice;
|
|
671
711
|
}
|
|
672
712
|
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
713
|
+
if (opts.apiVersion === API_VERSION_WITH_EXPERIMENTAL_CONTROLS) {
|
|
714
|
+
const voiceControls: { [id: string]: unknown } = {};
|
|
715
|
+
if (opts.speed) {
|
|
716
|
+
voiceControls.speed = opts.speed;
|
|
717
|
+
}
|
|
718
|
+
if (opts.emotion) {
|
|
719
|
+
voiceControls.emotion = opts.emotion;
|
|
720
|
+
}
|
|
721
|
+
if (Object.keys(voiceControls).length) {
|
|
722
|
+
voice.__experimental_controls = voiceControls;
|
|
723
|
+
}
|
|
683
724
|
}
|
|
684
725
|
|
|
685
726
|
const result: { [id: string]: unknown } = {
|
|
@@ -691,9 +732,29 @@ const toCartesiaOptions = (
|
|
|
691
732
|
sample_rate: opts.sampleRate,
|
|
692
733
|
},
|
|
693
734
|
language: opts.language,
|
|
694
|
-
|
|
735
|
+
max_buffer_delay_ms: 0,
|
|
695
736
|
};
|
|
696
737
|
|
|
738
|
+
if (opts.pronunciationDictId) {
|
|
739
|
+
result.pronunciation_dict_id = opts.pronunciationDictId;
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
if (opts.apiVersion > API_VERSION_WITH_EXPERIMENTAL_CONTROLS && isSonic3(opts.model)) {
|
|
743
|
+
const generationConfig: { [id: string]: unknown } = {};
|
|
744
|
+
if (opts.speed) {
|
|
745
|
+
generationConfig.speed = opts.speed;
|
|
746
|
+
}
|
|
747
|
+
if (opts.emotion) {
|
|
748
|
+
generationConfig.emotion = opts.emotion[0];
|
|
749
|
+
}
|
|
750
|
+
if (opts.volume) {
|
|
751
|
+
generationConfig.volume = opts.volume;
|
|
752
|
+
}
|
|
753
|
+
if (Object.keys(generationConfig).length) {
|
|
754
|
+
result.generation_config = generationConfig;
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
|
|
697
758
|
if (streaming && opts.wordTimestamps !== false) {
|
|
698
759
|
result.add_timestamps = true;
|
|
699
760
|
}
|