@livekit/agents-plugin-cartesia 0.1.3 → 1.0.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +14 -3
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -0
- package/dist/index.js.map +1 -1
- package/dist/models.cjs +1 -1
- package/dist/models.cjs.map +1 -1
- package/dist/models.d.cts +7 -0
- package/dist/models.d.ts +2 -2
- package/dist/models.d.ts.map +1 -1
- package/dist/models.js +1 -1
- package/dist/models.js.map +1 -1
- package/dist/tts.cjs +89 -42
- package/dist/tts.cjs.map +1 -1
- package/dist/tts.d.cts +35 -0
- package/dist/tts.d.ts +3 -0
- package/dist/tts.d.ts.map +1 -1
- package/dist/tts.js +90 -43
- package/dist/tts.js.map +1 -1
- package/dist/tts.test.d.cts +2 -0
- package/package.json +15 -11
- package/src/index.ts +14 -1
- package/src/models.ts +2 -2
- package/src/tts.ts +99 -44
package/dist/index.cjs
CHANGED
|
@@ -13,9 +13,20 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
13
13
|
};
|
|
14
14
|
var __reExport = (target, mod, secondTarget) => (__copyProps(target, mod, "default"), secondTarget && __copyProps(secondTarget, mod, "default"));
|
|
15
15
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
16
|
-
var
|
|
17
|
-
module.exports = __toCommonJS(
|
|
18
|
-
|
|
16
|
+
var index_exports = {};
|
|
17
|
+
module.exports = __toCommonJS(index_exports);
|
|
18
|
+
var import_agents = require("@livekit/agents");
|
|
19
|
+
__reExport(index_exports, require("./tts.cjs"), module.exports);
|
|
20
|
+
class CartesiaPlugin extends import_agents.Plugin {
|
|
21
|
+
constructor() {
|
|
22
|
+
super({
|
|
23
|
+
title: "cartesia",
|
|
24
|
+
version: "0.1.3",
|
|
25
|
+
package: "@livekit/agents-plugin-cartesia"
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
import_agents.Plugin.registerPlugin(new CartesiaPlugin());
|
|
19
30
|
// Annotate the CommonJS export names for ESM import in node:
|
|
20
31
|
0 && (module.exports = {
|
|
21
32
|
...require("./tts.cjs")
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText:
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { Plugin } from '@livekit/agents';\n\nexport * from './tts.js';\n\nclass CartesiaPlugin extends Plugin {\n constructor() {\n super({\n title: 'cartesia',\n version: '0.1.3',\n package: '@livekit/agents-plugin-cartesia',\n });\n }\n}\n\nPlugin.registerPlugin(new CartesiaPlugin());\n"],"mappings":";;;;;;;;;;;;;;;AAAA;AAAA;AAGA,oBAAuB;AAEvB,0BAAc,qBALd;AAOA,MAAM,uBAAuB,qBAAO;AAAA,EAClC,cAAc;AACZ,UAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,qBAAO,eAAe,IAAI,eAAe,CAAC;","names":[]}
|
package/dist/index.d.cts
ADDED
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,cAAc,UAAU,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,2 +1,13 @@
|
|
|
1
|
+
import { Plugin } from "@livekit/agents";
|
|
1
2
|
export * from "./tts.js";
|
|
3
|
+
class CartesiaPlugin extends Plugin {
|
|
4
|
+
constructor() {
|
|
5
|
+
super({
|
|
6
|
+
title: "cartesia",
|
|
7
|
+
version: "0.1.3",
|
|
8
|
+
package: "@livekit/agents-plugin-cartesia"
|
|
9
|
+
});
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
Plugin.registerPlugin(new CartesiaPlugin());
|
|
2
13
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText:
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { Plugin } from '@livekit/agents';\n\nexport * from './tts.js';\n\nclass CartesiaPlugin extends Plugin {\n constructor() {\n super({\n title: 'cartesia',\n version: '0.1.3',\n package: '@livekit/agents-plugin-cartesia',\n });\n }\n}\n\nPlugin.registerPlugin(new CartesiaPlugin());\n"],"mappings":"AAGA,SAAS,cAAc;AAEvB,cAAc;AAEd,MAAM,uBAAuB,OAAO;AAAA,EAClC,cAAc;AACZ,UAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,OAAO,eAAe,IAAI,eAAe,CAAC;","names":[]}
|
package/dist/models.cjs
CHANGED
|
@@ -21,7 +21,7 @@ __export(models_exports, {
|
|
|
21
21
|
TTSDefaultVoiceId: () => TTSDefaultVoiceId
|
|
22
22
|
});
|
|
23
23
|
module.exports = __toCommonJS(models_exports);
|
|
24
|
-
const TTSDefaultVoiceId = "
|
|
24
|
+
const TTSDefaultVoiceId = "794f9389-aac1-45b6-b726-9d9369183238";
|
|
25
25
|
// Annotate the CommonJS export names for ESM import in node:
|
|
26
26
|
0 && (module.exports = {
|
|
27
27
|
TTSDefaultVoiceId
|
package/dist/models.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels = 'sonic-
|
|
1
|
+
{"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels = 'sonic' | 'sonic-2' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';\n\nexport type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';\n\nexport const TTSDefaultVoiceId = '794f9389-aac1-45b6-b726-9d9369183238';\n\nexport type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';\n\nexport type TTSVoiceEmotion =\n | 'anger:lowest'\n | 'anger:low'\n | 'anger'\n | 'anger:high'\n | 'anger:highest'\n | 'positivity:lowest'\n | 'positivity:low'\n | 'positivity'\n | 'positivity:high'\n | 'positivity:highest'\n | 'surprise:lowest'\n | 'surprise:low'\n | 'surprise'\n | 'surprise:high'\n | 'surprise:highest'\n | 'sadness:lowest'\n | 'sadness:low'\n | 'sadness'\n | 'sadness:high'\n | 'sadness:highest'\n | 'curiosity:lowest'\n | 'curiosity:low'\n | 'curiosity'\n | 'curiosity:high'\n | 'curiosity:highest';\n\nexport type TTSEncoding =\n // XXX(nbsp): not yet supported\n // | 'pcm_f32le'\n // | 'pcm_mulaw'\n // | 'pcm_alaw'\n 'pcm_s16le';\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAQO,MAAM,oBAAoB;","names":[]}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export type TTSModels = 'sonic' | 'sonic-2' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';
|
|
2
|
+
export type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';
|
|
3
|
+
export declare const TTSDefaultVoiceId = "794f9389-aac1-45b6-b726-9d9369183238";
|
|
4
|
+
export type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';
|
|
5
|
+
export type TTSVoiceEmotion = 'anger:lowest' | 'anger:low' | 'anger' | 'anger:high' | 'anger:highest' | 'positivity:lowest' | 'positivity:low' | 'positivity' | 'positivity:high' | 'positivity:highest' | 'surprise:lowest' | 'surprise:low' | 'surprise' | 'surprise:high' | 'surprise:highest' | 'sadness:lowest' | 'sadness:low' | 'sadness' | 'sadness:high' | 'sadness:highest' | 'curiosity:lowest' | 'curiosity:low' | 'curiosity' | 'curiosity:high' | 'curiosity:highest';
|
|
6
|
+
export type TTSEncoding = 'pcm_s16le';
|
|
7
|
+
//# sourceMappingURL=models.d.ts.map
|
package/dist/models.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
export type TTSModels = 'sonic-
|
|
1
|
+
export type TTSModels = 'sonic' | 'sonic-2' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';
|
|
2
2
|
export type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';
|
|
3
|
-
export declare const TTSDefaultVoiceId = "
|
|
3
|
+
export declare const TTSDefaultVoiceId = "794f9389-aac1-45b6-b726-9d9369183238";
|
|
4
4
|
export type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';
|
|
5
5
|
export type TTSVoiceEmotion = 'anger:lowest' | 'anger:low' | 'anger' | 'anger:high' | 'anger:highest' | 'positivity:lowest' | 'positivity:low' | 'positivity' | 'positivity:high' | 'positivity:highest' | 'surprise:lowest' | 'surprise:low' | 'surprise' | 'surprise:high' | 'surprise:highest' | 'sadness:lowest' | 'sadness:low' | 'sadness' | 'sadness:high' | 'sadness:highest' | 'curiosity:lowest' | 'curiosity:low' | 'curiosity' | 'curiosity:high' | 'curiosity:highest';
|
|
6
6
|
export type TTSEncoding = 'pcm_s16le';
|
package/dist/models.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,SAAS,GAAG,eAAe,GAAG,
|
|
1
|
+
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,SAAS,GAAG,OAAO,GAAG,SAAS,GAAG,YAAY,GAAG,eAAe,GAAG,aAAa,CAAC;AAE7F,MAAM,MAAM,YAAY,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC;AAE1E,eAAO,MAAM,iBAAiB,yCAAyC,CAAC;AAExE,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,SAAS,CAAC;AAE/E,MAAM,MAAM,eAAe,GACvB,cAAc,GACd,WAAW,GACX,OAAO,GACP,YAAY,GACZ,eAAe,GACf,mBAAmB,GACnB,gBAAgB,GAChB,YAAY,GACZ,iBAAiB,GACjB,oBAAoB,GACpB,iBAAiB,GACjB,cAAc,GACd,UAAU,GACV,eAAe,GACf,kBAAkB,GAClB,gBAAgB,GAChB,aAAa,GACb,SAAS,GACT,cAAc,GACd,iBAAiB,GACjB,kBAAkB,GAClB,eAAe,GACf,WAAW,GACX,gBAAgB,GAChB,mBAAmB,CAAC;AAExB,MAAM,MAAM,WAAW,GAKrB,WAAW,CAAC"}
|
package/dist/models.js
CHANGED
package/dist/models.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels = 'sonic-
|
|
1
|
+
{"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels = 'sonic' | 'sonic-2' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';\n\nexport type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';\n\nexport const TTSDefaultVoiceId = '794f9389-aac1-45b6-b726-9d9369183238';\n\nexport type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';\n\nexport type TTSVoiceEmotion =\n | 'anger:lowest'\n | 'anger:low'\n | 'anger'\n | 'anger:high'\n | 'anger:highest'\n | 'positivity:lowest'\n | 'positivity:low'\n | 'positivity'\n | 'positivity:high'\n | 'positivity:highest'\n | 'surprise:lowest'\n | 'surprise:low'\n | 'surprise'\n | 'surprise:high'\n | 'surprise:highest'\n | 'sadness:lowest'\n | 'sadness:low'\n | 'sadness'\n | 'sadness:high'\n | 'sadness:highest'\n | 'curiosity:lowest'\n | 'curiosity:low'\n | 'curiosity'\n | 'curiosity:high'\n | 'curiosity:highest';\n\nexport type TTSEncoding =\n // XXX(nbsp): not yet supported\n // | 'pcm_f32le'\n // | 'pcm_mulaw'\n // | 'pcm_alaw'\n 'pcm_s16le';\n"],"mappings":"AAQO,MAAM,oBAAoB;","names":[]}
|
package/dist/tts.cjs
CHANGED
|
@@ -24,7 +24,6 @@ __export(tts_exports, {
|
|
|
24
24
|
});
|
|
25
25
|
module.exports = __toCommonJS(tts_exports);
|
|
26
26
|
var import_agents = require("@livekit/agents");
|
|
27
|
-
var import_node_crypto = require("node:crypto");
|
|
28
27
|
var import_node_https = require("node:https");
|
|
29
28
|
var import_ws = require("ws");
|
|
30
29
|
var import_models = require("./models.cjs");
|
|
@@ -34,12 +33,13 @@ const VERSION = "2024-06-10";
|
|
|
34
33
|
const NUM_CHANNELS = 1;
|
|
35
34
|
const BUFFERED_WORDS_COUNT = 8;
|
|
36
35
|
const defaultTTSOptions = {
|
|
37
|
-
model: "sonic-
|
|
36
|
+
model: "sonic-2",
|
|
38
37
|
encoding: "pcm_s16le",
|
|
39
38
|
sampleRate: 24e3,
|
|
40
39
|
voice: import_models.TTSDefaultVoiceId,
|
|
41
40
|
apiKey: process.env.CARTESIA_API_KEY,
|
|
42
|
-
language: "en"
|
|
41
|
+
language: "en",
|
|
42
|
+
baseUrl: "https://api.cartesia.ai"
|
|
43
43
|
};
|
|
44
44
|
class TTS extends import_agents.tts.TTS {
|
|
45
45
|
#opts;
|
|
@@ -57,9 +57,23 @@ class TTS extends import_agents.tts.TTS {
|
|
|
57
57
|
"Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY"
|
|
58
58
|
);
|
|
59
59
|
}
|
|
60
|
+
if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== "sonic-2-2025-03-07") {
|
|
61
|
+
const logger = (0, import_agents.log)();
|
|
62
|
+
logger.warn(
|
|
63
|
+
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
64
|
+
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
|
|
65
|
+
);
|
|
66
|
+
}
|
|
60
67
|
}
|
|
61
68
|
updateOptions(opts) {
|
|
62
69
|
this.#opts = { ...this.#opts, ...opts };
|
|
70
|
+
if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== "sonic-2-2025-03-07") {
|
|
71
|
+
const logger = (0, import_agents.log)();
|
|
72
|
+
logger.warn(
|
|
73
|
+
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
74
|
+
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
|
|
75
|
+
);
|
|
76
|
+
}
|
|
63
77
|
}
|
|
64
78
|
synthesize(text) {
|
|
65
79
|
return new ChunkedStream(this, text, this.#opts);
|
|
@@ -77,17 +91,17 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
|
77
91
|
super(text, tts2);
|
|
78
92
|
this.#text = text;
|
|
79
93
|
this.#opts = opts;
|
|
80
|
-
this.#run();
|
|
81
94
|
}
|
|
82
|
-
async
|
|
83
|
-
const requestId = (0,
|
|
95
|
+
async run() {
|
|
96
|
+
const requestId = (0, import_agents.shortuuid)();
|
|
84
97
|
const bstream = new import_agents.AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
|
|
85
98
|
const json = toCartesiaOptions(this.#opts);
|
|
86
99
|
json.transcript = this.#text;
|
|
100
|
+
const baseUrl = new URL(this.#opts.baseUrl);
|
|
87
101
|
const req = (0, import_node_https.request)(
|
|
88
102
|
{
|
|
89
|
-
hostname:
|
|
90
|
-
port: 443,
|
|
103
|
+
hostname: baseUrl.hostname,
|
|
104
|
+
port: parseInt(baseUrl.port) || (baseUrl.protocol === "https:" ? 443 : 80),
|
|
91
105
|
path: "/tts/bytes",
|
|
92
106
|
method: "POST",
|
|
93
107
|
headers: {
|
|
@@ -126,18 +140,25 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
|
126
140
|
class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
127
141
|
#opts;
|
|
128
142
|
#logger = (0, import_agents.log)();
|
|
129
|
-
#tokenizer = new import_agents.tokenize.basic.SentenceTokenizer(
|
|
143
|
+
#tokenizer = new import_agents.tokenize.basic.SentenceTokenizer({
|
|
144
|
+
minSentenceLength: BUFFERED_WORDS_COUNT
|
|
145
|
+
}).stream();
|
|
130
146
|
label = "cartesia.SynthesizeStream";
|
|
131
147
|
constructor(tts2, opts) {
|
|
132
148
|
super(tts2);
|
|
133
149
|
this.#opts = opts;
|
|
134
|
-
this.#run();
|
|
135
150
|
}
|
|
136
151
|
updateOptions(opts) {
|
|
137
152
|
this.#opts = { ...this.#opts, ...opts };
|
|
153
|
+
if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== "sonic-2-2025-03-07") {
|
|
154
|
+
this.#logger.warn(
|
|
155
|
+
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
156
|
+
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
|
|
157
|
+
);
|
|
158
|
+
}
|
|
138
159
|
}
|
|
139
|
-
async
|
|
140
|
-
const requestId = (0,
|
|
160
|
+
async run() {
|
|
161
|
+
const requestId = (0, import_agents.shortuuid)();
|
|
141
162
|
let closing = false;
|
|
142
163
|
const sentenceStreamTask = async (ws2) => {
|
|
143
164
|
const packet = toCartesiaOptions(this.#opts);
|
|
@@ -172,45 +193,70 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
172
193
|
this.#tokenizer.close();
|
|
173
194
|
};
|
|
174
195
|
const recvTask = async (ws2) => {
|
|
196
|
+
let finalReceived = false;
|
|
197
|
+
let shouldExit = false;
|
|
175
198
|
const bstream = new import_agents.AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
|
|
176
199
|
let lastFrame;
|
|
177
200
|
const sendLastFrame = (segmentId, final) => {
|
|
178
|
-
if (lastFrame) {
|
|
201
|
+
if (lastFrame && !this.queue.closed) {
|
|
179
202
|
this.queue.put({ requestId, segmentId, frame: lastFrame, final });
|
|
180
203
|
lastFrame = void 0;
|
|
181
204
|
}
|
|
182
205
|
};
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
206
|
+
while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {
|
|
207
|
+
try {
|
|
208
|
+
await new Promise((resolve, reject) => {
|
|
209
|
+
ws2.removeAllListeners();
|
|
210
|
+
ws2.on("message", (data) => resolve(data));
|
|
211
|
+
ws2.on("close", (code, reason) => {
|
|
212
|
+
if (!closing) {
|
|
213
|
+
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
214
|
+
}
|
|
215
|
+
if (!finalReceived) {
|
|
216
|
+
reject(new Error("WebSocket closed"));
|
|
217
|
+
} else {
|
|
218
|
+
resolve(null);
|
|
219
|
+
}
|
|
220
|
+
});
|
|
221
|
+
}).then((msg) => {
|
|
222
|
+
if (!msg) return;
|
|
223
|
+
const json = JSON.parse(msg.toString());
|
|
224
|
+
const segmentId = json.context_id;
|
|
225
|
+
if ("data" in json) {
|
|
226
|
+
const data = new Int8Array(Buffer.from(json.data, "base64"));
|
|
227
|
+
for (const frame of bstream.write(data)) {
|
|
228
|
+
sendLastFrame(segmentId, false);
|
|
229
|
+
lastFrame = frame;
|
|
230
|
+
}
|
|
231
|
+
} else if ("done" in json) {
|
|
232
|
+
finalReceived = true;
|
|
233
|
+
for (const frame of bstream.flush()) {
|
|
234
|
+
sendLastFrame(segmentId, false);
|
|
235
|
+
lastFrame = frame;
|
|
236
|
+
}
|
|
237
|
+
sendLastFrame(segmentId, true);
|
|
238
|
+
if (!this.queue.closed) {
|
|
239
|
+
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
240
|
+
}
|
|
241
|
+
if (segmentId === requestId) {
|
|
242
|
+
closing = true;
|
|
243
|
+
shouldExit = true;
|
|
244
|
+
this.#logger.info("Cartesia WebSocket close event sent");
|
|
245
|
+
ws2.close();
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
});
|
|
249
|
+
} catch (err) {
|
|
250
|
+
if (err instanceof Error && !err.message.includes("WebSocket closed")) {
|
|
251
|
+
this.#logger.error({ err }, "Error in recvTask from Cartesia WebSocket");
|
|
203
252
|
}
|
|
253
|
+
break;
|
|
204
254
|
}
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
if (!closing) {
|
|
208
|
-
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
209
|
-
}
|
|
210
|
-
ws2.removeAllListeners();
|
|
211
|
-
});
|
|
255
|
+
}
|
|
256
|
+
this.#logger.info("Cartesia WebSocket closed");
|
|
212
257
|
};
|
|
213
|
-
const
|
|
258
|
+
const wsUrl = this.#opts.baseUrl.replace(/^http/, "ws");
|
|
259
|
+
const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;
|
|
214
260
|
const ws = new import_ws.WebSocket(url);
|
|
215
261
|
try {
|
|
216
262
|
await new Promise((resolve, reject) => {
|
|
@@ -219,6 +265,7 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
|
|
|
219
265
|
ws.on("close", (code) => reject(`WebSocket returned ${code}`));
|
|
220
266
|
});
|
|
221
267
|
await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);
|
|
268
|
+
this.#logger.info("Cartesia run completed");
|
|
222
269
|
} catch (e) {
|
|
223
270
|
throw new Error(`failed to connect to Cartesia: ${e}`);
|
|
224
271
|
}
|
|
@@ -240,7 +287,7 @@ const toCartesiaOptions = (opts) => {
|
|
|
240
287
|
if (opts.emotion) {
|
|
241
288
|
voiceControls.emotion = opts.emotion;
|
|
242
289
|
}
|
|
243
|
-
if (Object.keys(
|
|
290
|
+
if (Object.keys(voiceControls).length) {
|
|
244
291
|
voice.__experimental_controls = voiceControls;
|
|
245
292
|
}
|
|
246
293
|
return {
|
package/dist/tts.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { request } from 'node:https';\nimport { WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-english',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n this.#run();\n }\n\n async #run() {\n const requestId = randomUUID();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const req = request(\n {\n hostname: 'api.cartesia.ai',\n port: 443,\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer(undefined, BUFFERED_WORDS_COUNT).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.#run();\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n async #run() {\n const requestId = randomUUID();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n ws.on('message', (data) => {\n const json = JSON.parse(data.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('done' in json) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId) {\n closing = true;\n ws.close();\n return;\n }\n }\n });\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n ws.removeAllListeners();\n });\n };\n\n const url = `wss://api.cartesia.ai/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys({}).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAAoD;AAEpD,yBAA2B;AAC3B,wBAAwB;AACxB,gBAA0B;AAC1B,oBAMO;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAa7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AACZ;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AACb,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,gBAAY,+BAAW;AAC7B,UAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAM;AAAA,MACV;AAAA,QACE,UAAU;AAAA,QACV,MAAM;AAAA,QACN,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,aAAa,IAAI,uBAAS,MAAM,kBAAkB,QAAW,oBAAoB,EAAE,OAAO;AAAA,EAC1F,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,gBAAY,+BAAW;AAC7B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,YAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,WAAW;AACb,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,MAAAA,IAAG,GAAG,WAAW,CAAC,SAAS;AACzB,cAAM,OAAO,KAAK,MAAM,KAAK,SAAS,CAAC;AACvC,cAAM,YAAY,KAAK;AACvB,YAAI,UAAU,MAAM;AAClB,gBAAMC,QAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,qBAAW,SAAS,QAAQ,MAAMA,KAAI,GAAG;AACvC,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AAAA,QACF,WAAW,UAAU,MAAM;AACzB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AACA,wBAAc,WAAW,IAAI;AAC7B,eAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,cAAI,cAAc,WAAW;AAC3B,sBAAU;AACV,YAAAD,IAAG,MAAM;AACT;AAAA,UACF;AAAA,QACF;AAAA,MACF,CAAC;AACD,MAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,QACpE;AACA,QAAAA,IAAG,mBAAmB;AAAA,MACxB,CAAC;AAAA,IACH;AAEA,UAAM,MAAM,+CAA+C,KAAK,MAAM,MAAM,qBAAqB,OAAO;AACxG,UAAM,KAAK,IAAI,oBAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,CAAC,CAAC,EAAE,QAAQ;AAC1B,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws","data"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n this.#logger.info('Cartesia WebSocket close event sent');\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n break;\n }\n }\n\n this.#logger.info('Cartesia WebSocket closed');\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n this.#logger.info('Cartesia run completed');\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAA+D;AAE/D,wBAAwB;AACxB,gBAAwC;AACxC,oBAMO;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AACX;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,aAAa,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,qBAAK,QAAQ,KAAK,qCAAqC;AACvD,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AACA;AAAA,QACF;AAAA,MACF;AAEA,WAAK,QAAQ,KAAK,2BAA2B;AAAA,IAC/C;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,oBAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AACrE,WAAK,QAAQ,KAAK,wBAAwB;AAAA,IAC5C,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
|
package/dist/tts.d.cts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { tts } from '@livekit/agents';
|
|
2
|
+
import { type TTSEncoding, type TTSModels, type TTSVoiceEmotion, type TTSVoiceSpeed } from './models.js';
|
|
3
|
+
export interface TTSOptions {
|
|
4
|
+
model: TTSModels | string;
|
|
5
|
+
encoding: TTSEncoding;
|
|
6
|
+
sampleRate: number;
|
|
7
|
+
voice: string | number[];
|
|
8
|
+
speed?: TTSVoiceSpeed | number;
|
|
9
|
+
emotion?: (TTSVoiceEmotion | string)[];
|
|
10
|
+
apiKey?: string;
|
|
11
|
+
language: string;
|
|
12
|
+
baseUrl: string;
|
|
13
|
+
}
|
|
14
|
+
export declare class TTS extends tts.TTS {
|
|
15
|
+
#private;
|
|
16
|
+
label: string;
|
|
17
|
+
constructor(opts?: Partial<TTSOptions>);
|
|
18
|
+
updateOptions(opts: Partial<TTSOptions>): void;
|
|
19
|
+
synthesize(text: string): tts.ChunkedStream;
|
|
20
|
+
stream(): SynthesizeStream;
|
|
21
|
+
}
|
|
22
|
+
export declare class ChunkedStream extends tts.ChunkedStream {
|
|
23
|
+
#private;
|
|
24
|
+
label: string;
|
|
25
|
+
constructor(tts: TTS, text: string, opts: TTSOptions);
|
|
26
|
+
protected run(): Promise<void>;
|
|
27
|
+
}
|
|
28
|
+
export declare class SynthesizeStream extends tts.SynthesizeStream {
|
|
29
|
+
#private;
|
|
30
|
+
label: string;
|
|
31
|
+
constructor(tts: TTS, opts: TTSOptions);
|
|
32
|
+
updateOptions(opts: Partial<TTSOptions>): void;
|
|
33
|
+
protected run(): Promise<void>;
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=tts.d.ts.map
|
package/dist/tts.d.ts
CHANGED
|
@@ -9,6 +9,7 @@ export interface TTSOptions {
|
|
|
9
9
|
emotion?: (TTSVoiceEmotion | string)[];
|
|
10
10
|
apiKey?: string;
|
|
11
11
|
language: string;
|
|
12
|
+
baseUrl: string;
|
|
12
13
|
}
|
|
13
14
|
export declare class TTS extends tts.TTS {
|
|
14
15
|
#private;
|
|
@@ -22,11 +23,13 @@ export declare class ChunkedStream extends tts.ChunkedStream {
|
|
|
22
23
|
#private;
|
|
23
24
|
label: string;
|
|
24
25
|
constructor(tts: TTS, text: string, opts: TTSOptions);
|
|
26
|
+
protected run(): Promise<void>;
|
|
25
27
|
}
|
|
26
28
|
export declare class SynthesizeStream extends tts.SynthesizeStream {
|
|
27
29
|
#private;
|
|
28
30
|
label: string;
|
|
29
31
|
constructor(tts: TTS, opts: TTSOptions);
|
|
30
32
|
updateOptions(opts: Partial<TTSOptions>): void;
|
|
33
|
+
protected run(): Promise<void>;
|
|
31
34
|
}
|
|
32
35
|
//# sourceMappingURL=tts.d.ts.map
|
package/dist/tts.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,
|
|
1
|
+
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EAA6C,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAIjF,OAAO,EAEL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,eAAe,EACpB,KAAK,aAAa,EACnB,MAAM,aAAa,CAAC;AAQrB,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,SAAS,GAAG,MAAM,CAAC;IAC1B,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IAC/B,OAAO,CAAC,EAAE,CAAC,eAAe,GAAG,MAAM,CAAC,EAAE,CAAC;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB;AAYD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAyB1C,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAYvC,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAAC,aAAa;IAI3C,MAAM,IAAI,gBAAgB;CAG3B;AAED,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;;IAClD,KAAK,SAA4B;gBAKrB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU;cAMpC,GAAG;CA8CpB;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAMxD,KAAK,SAA+B;gBAExB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;IAKtC,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;cAWvB,GAAG;CA+HpB"}
|
package/dist/tts.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import { AudioByteStream, log, tokenize, tts } from "@livekit/agents";
|
|
2
|
-
import { randomUUID } from "node:crypto";
|
|
1
|
+
import { AudioByteStream, log, shortuuid, tokenize, tts } from "@livekit/agents";
|
|
3
2
|
import { request } from "node:https";
|
|
4
3
|
import { WebSocket } from "ws";
|
|
5
4
|
import {
|
|
@@ -11,12 +10,13 @@ const VERSION = "2024-06-10";
|
|
|
11
10
|
const NUM_CHANNELS = 1;
|
|
12
11
|
const BUFFERED_WORDS_COUNT = 8;
|
|
13
12
|
const defaultTTSOptions = {
|
|
14
|
-
model: "sonic-
|
|
13
|
+
model: "sonic-2",
|
|
15
14
|
encoding: "pcm_s16le",
|
|
16
15
|
sampleRate: 24e3,
|
|
17
16
|
voice: TTSDefaultVoiceId,
|
|
18
17
|
apiKey: process.env.CARTESIA_API_KEY,
|
|
19
|
-
language: "en"
|
|
18
|
+
language: "en",
|
|
19
|
+
baseUrl: "https://api.cartesia.ai"
|
|
20
20
|
};
|
|
21
21
|
class TTS extends tts.TTS {
|
|
22
22
|
#opts;
|
|
@@ -34,9 +34,23 @@ class TTS extends tts.TTS {
|
|
|
34
34
|
"Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY"
|
|
35
35
|
);
|
|
36
36
|
}
|
|
37
|
+
if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== "sonic-2-2025-03-07") {
|
|
38
|
+
const logger = log();
|
|
39
|
+
logger.warn(
|
|
40
|
+
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
41
|
+
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
|
|
42
|
+
);
|
|
43
|
+
}
|
|
37
44
|
}
|
|
38
45
|
updateOptions(opts) {
|
|
39
46
|
this.#opts = { ...this.#opts, ...opts };
|
|
47
|
+
if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== "sonic-2-2025-03-07") {
|
|
48
|
+
const logger = log();
|
|
49
|
+
logger.warn(
|
|
50
|
+
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
51
|
+
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
|
|
52
|
+
);
|
|
53
|
+
}
|
|
40
54
|
}
|
|
41
55
|
synthesize(text) {
|
|
42
56
|
return new ChunkedStream(this, text, this.#opts);
|
|
@@ -54,17 +68,17 @@ class ChunkedStream extends tts.ChunkedStream {
|
|
|
54
68
|
super(text, tts2);
|
|
55
69
|
this.#text = text;
|
|
56
70
|
this.#opts = opts;
|
|
57
|
-
this.#run();
|
|
58
71
|
}
|
|
59
|
-
async
|
|
60
|
-
const requestId =
|
|
72
|
+
async run() {
|
|
73
|
+
const requestId = shortuuid();
|
|
61
74
|
const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
|
|
62
75
|
const json = toCartesiaOptions(this.#opts);
|
|
63
76
|
json.transcript = this.#text;
|
|
77
|
+
const baseUrl = new URL(this.#opts.baseUrl);
|
|
64
78
|
const req = request(
|
|
65
79
|
{
|
|
66
|
-
hostname:
|
|
67
|
-
port: 443,
|
|
80
|
+
hostname: baseUrl.hostname,
|
|
81
|
+
port: parseInt(baseUrl.port) || (baseUrl.protocol === "https:" ? 443 : 80),
|
|
68
82
|
path: "/tts/bytes",
|
|
69
83
|
method: "POST",
|
|
70
84
|
headers: {
|
|
@@ -103,18 +117,25 @@ class ChunkedStream extends tts.ChunkedStream {
|
|
|
103
117
|
class SynthesizeStream extends tts.SynthesizeStream {
|
|
104
118
|
#opts;
|
|
105
119
|
#logger = log();
|
|
106
|
-
#tokenizer = new tokenize.basic.SentenceTokenizer(
|
|
120
|
+
#tokenizer = new tokenize.basic.SentenceTokenizer({
|
|
121
|
+
minSentenceLength: BUFFERED_WORDS_COUNT
|
|
122
|
+
}).stream();
|
|
107
123
|
label = "cartesia.SynthesizeStream";
|
|
108
124
|
constructor(tts2, opts) {
|
|
109
125
|
super(tts2);
|
|
110
126
|
this.#opts = opts;
|
|
111
|
-
this.#run();
|
|
112
127
|
}
|
|
113
128
|
updateOptions(opts) {
|
|
114
129
|
this.#opts = { ...this.#opts, ...opts };
|
|
130
|
+
if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== "sonic-2-2025-03-07") {
|
|
131
|
+
this.#logger.warn(
|
|
132
|
+
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
133
|
+
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
|
|
134
|
+
);
|
|
135
|
+
}
|
|
115
136
|
}
|
|
116
|
-
async
|
|
117
|
-
const requestId =
|
|
137
|
+
async run() {
|
|
138
|
+
const requestId = shortuuid();
|
|
118
139
|
let closing = false;
|
|
119
140
|
const sentenceStreamTask = async (ws2) => {
|
|
120
141
|
const packet = toCartesiaOptions(this.#opts);
|
|
@@ -149,45 +170,70 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
149
170
|
this.#tokenizer.close();
|
|
150
171
|
};
|
|
151
172
|
const recvTask = async (ws2) => {
|
|
173
|
+
let finalReceived = false;
|
|
174
|
+
let shouldExit = false;
|
|
152
175
|
const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
|
|
153
176
|
let lastFrame;
|
|
154
177
|
const sendLastFrame = (segmentId, final) => {
|
|
155
|
-
if (lastFrame) {
|
|
178
|
+
if (lastFrame && !this.queue.closed) {
|
|
156
179
|
this.queue.put({ requestId, segmentId, frame: lastFrame, final });
|
|
157
180
|
lastFrame = void 0;
|
|
158
181
|
}
|
|
159
182
|
};
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
183
|
+
while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {
|
|
184
|
+
try {
|
|
185
|
+
await new Promise((resolve, reject) => {
|
|
186
|
+
ws2.removeAllListeners();
|
|
187
|
+
ws2.on("message", (data) => resolve(data));
|
|
188
|
+
ws2.on("close", (code, reason) => {
|
|
189
|
+
if (!closing) {
|
|
190
|
+
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
191
|
+
}
|
|
192
|
+
if (!finalReceived) {
|
|
193
|
+
reject(new Error("WebSocket closed"));
|
|
194
|
+
} else {
|
|
195
|
+
resolve(null);
|
|
196
|
+
}
|
|
197
|
+
});
|
|
198
|
+
}).then((msg) => {
|
|
199
|
+
if (!msg) return;
|
|
200
|
+
const json = JSON.parse(msg.toString());
|
|
201
|
+
const segmentId = json.context_id;
|
|
202
|
+
if ("data" in json) {
|
|
203
|
+
const data = new Int8Array(Buffer.from(json.data, "base64"));
|
|
204
|
+
for (const frame of bstream.write(data)) {
|
|
205
|
+
sendLastFrame(segmentId, false);
|
|
206
|
+
lastFrame = frame;
|
|
207
|
+
}
|
|
208
|
+
} else if ("done" in json) {
|
|
209
|
+
finalReceived = true;
|
|
210
|
+
for (const frame of bstream.flush()) {
|
|
211
|
+
sendLastFrame(segmentId, false);
|
|
212
|
+
lastFrame = frame;
|
|
213
|
+
}
|
|
214
|
+
sendLastFrame(segmentId, true);
|
|
215
|
+
if (!this.queue.closed) {
|
|
216
|
+
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
217
|
+
}
|
|
218
|
+
if (segmentId === requestId) {
|
|
219
|
+
closing = true;
|
|
220
|
+
shouldExit = true;
|
|
221
|
+
this.#logger.info("Cartesia WebSocket close event sent");
|
|
222
|
+
ws2.close();
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
});
|
|
226
|
+
} catch (err) {
|
|
227
|
+
if (err instanceof Error && !err.message.includes("WebSocket closed")) {
|
|
228
|
+
this.#logger.error({ err }, "Error in recvTask from Cartesia WebSocket");
|
|
180
229
|
}
|
|
230
|
+
break;
|
|
181
231
|
}
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
if (!closing) {
|
|
185
|
-
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
186
|
-
}
|
|
187
|
-
ws2.removeAllListeners();
|
|
188
|
-
});
|
|
232
|
+
}
|
|
233
|
+
this.#logger.info("Cartesia WebSocket closed");
|
|
189
234
|
};
|
|
190
|
-
const
|
|
235
|
+
const wsUrl = this.#opts.baseUrl.replace(/^http/, "ws");
|
|
236
|
+
const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;
|
|
191
237
|
const ws = new WebSocket(url);
|
|
192
238
|
try {
|
|
193
239
|
await new Promise((resolve, reject) => {
|
|
@@ -196,6 +242,7 @@ class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
196
242
|
ws.on("close", (code) => reject(`WebSocket returned ${code}`));
|
|
197
243
|
});
|
|
198
244
|
await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);
|
|
245
|
+
this.#logger.info("Cartesia run completed");
|
|
199
246
|
} catch (e) {
|
|
200
247
|
throw new Error(`failed to connect to Cartesia: ${e}`);
|
|
201
248
|
}
|
|
@@ -217,7 +264,7 @@ const toCartesiaOptions = (opts) => {
|
|
|
217
264
|
if (opts.emotion) {
|
|
218
265
|
voiceControls.emotion = opts.emotion;
|
|
219
266
|
}
|
|
220
|
-
if (Object.keys(
|
|
267
|
+
if (Object.keys(voiceControls).length) {
|
|
221
268
|
voice.__experimental_controls = voiceControls;
|
|
222
269
|
}
|
|
223
270
|
return {
|
package/dist/tts.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { request } from 'node:https';\nimport { WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-english',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n this.#run();\n }\n\n async #run() {\n const requestId = randomUUID();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const req = request(\n {\n hostname: 'api.cartesia.ai',\n port: 443,\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer(undefined, BUFFERED_WORDS_COUNT).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.#run();\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n async #run() {\n const requestId = randomUUID();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n ws.on('message', (data) => {\n const json = JSON.parse(data.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('done' in json) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId) {\n closing = true;\n ws.close();\n return;\n }\n }\n });\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n ws.removeAllListeners();\n });\n };\n\n const url = `wss://api.cartesia.ai/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys({}).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":"AAGA,SAAS,iBAAiB,KAAK,UAAU,WAAW;AAEpD,SAAS,kBAAkB;AAC3B,SAAS,eAAe;AACxB,SAAS,iBAAiB;AAC1B;AAAA,EACE;AAAA,OAKK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAa7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AACZ;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AACb,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,YAAY,WAAW;AAC7B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU;AAAA,QACV,MAAM;AAAA,QACN,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB,QAAW,oBAAoB,EAAE,OAAO;AAAA,EAC1F,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,YAAY,WAAW;AAC7B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,WAAW;AACb,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,MAAAA,IAAG,GAAG,WAAW,CAAC,SAAS;AACzB,cAAM,OAAO,KAAK,MAAM,KAAK,SAAS,CAAC;AACvC,cAAM,YAAY,KAAK;AACvB,YAAI,UAAU,MAAM;AAClB,gBAAMC,QAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,qBAAW,SAAS,QAAQ,MAAMA,KAAI,GAAG;AACvC,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AAAA,QACF,WAAW,UAAU,MAAM;AACzB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AACA,wBAAc,WAAW,IAAI;AAC7B,eAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,cAAI,cAAc,WAAW;AAC3B,sBAAU;AACV,YAAAD,IAAG,MAAM;AACT;AAAA,UACF;AAAA,QACF;AAAA,MACF,CAAC;AACD,MAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,QACpE;AACA,QAAAA,IAAG,mBAAmB;AAAA,MACxB,CAAC;AAAA,IACH;AAEA,UAAM,MAAM,+CAA+C,KAAK,MAAM,MAAM,qBAAqB,OAAO;AACxG,UAAM,KAAK,IAAI,UAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,CAAC,CAAC,EAAE,QAAQ;AAC1B,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws","data"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n this.#logger.info('Cartesia WebSocket close event sent');\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n break;\n }\n }\n\n this.#logger.info('Cartesia WebSocket closed');\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n this.#logger.info('Cartesia run completed');\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":"AAGA,SAAS,iBAAiB,KAAK,WAAW,UAAU,WAAW;AAE/D,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AACxC;AAAA,EACE;AAAA,OAKK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AACX;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,qBAAK,QAAQ,KAAK,qCAAqC;AACvD,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AACA;AAAA,QACF;AAAA,MACF;AAEA,WAAK,QAAQ,KAAK,2BAA2B;AAAA,IAC/C;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,UAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AACrE,WAAK,QAAQ,KAAK,wBAAwB;AAAA,IAC5C,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
|
package/package.json
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-cartesia",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "1.0.0-next.0",
|
|
4
4
|
"description": "Cartesia plugin for LiveKit Node Agents",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
8
8
|
"exports": {
|
|
9
|
-
"
|
|
9
|
+
"import": {
|
|
10
10
|
"types": "./dist/index.d.ts",
|
|
11
|
-
"
|
|
12
|
-
|
|
11
|
+
"default": "./dist/index.js"
|
|
12
|
+
},
|
|
13
|
+
"require": {
|
|
14
|
+
"types": "./dist/index.d.cts",
|
|
15
|
+
"default": "./dist/index.cjs"
|
|
13
16
|
}
|
|
14
17
|
},
|
|
15
18
|
"author": "LiveKit",
|
|
@@ -22,10 +25,10 @@
|
|
|
22
25
|
"README.md"
|
|
23
26
|
],
|
|
24
27
|
"devDependencies": {
|
|
25
|
-
"@livekit/agents": "^
|
|
26
|
-
"@livekit/agents-plugin-openai": "^
|
|
27
|
-
"@livekit/agents-plugins-test": "^
|
|
28
|
-
"@livekit/rtc-node": "^0.13.
|
|
28
|
+
"@livekit/agents": "^1.0.0-next.0",
|
|
29
|
+
"@livekit/agents-plugin-openai": "^1.0.0-next.0",
|
|
30
|
+
"@livekit/agents-plugins-test": "^1.0.0-next.0",
|
|
31
|
+
"@livekit/rtc-node": "^0.13.12",
|
|
29
32
|
"@microsoft/api-extractor": "^7.35.0",
|
|
30
33
|
"@types/ws": "^8.5.10",
|
|
31
34
|
"tsup": "^8.3.5",
|
|
@@ -35,11 +38,12 @@
|
|
|
35
38
|
"ws": "^8.16.0"
|
|
36
39
|
},
|
|
37
40
|
"peerDependencies": {
|
|
38
|
-
"@livekit/rtc-node": "^0.13.
|
|
39
|
-
"@livekit/agents": "^0.
|
|
41
|
+
"@livekit/rtc-node": "^0.13.12",
|
|
42
|
+
"@livekit/agents": "^1.0.0-next.01.0.0-next.0"
|
|
40
43
|
},
|
|
41
44
|
"scripts": {
|
|
42
|
-
"build": "tsup --onSuccess \"
|
|
45
|
+
"build": "tsup --onSuccess \"pnpm build:types\"",
|
|
46
|
+
"build:types": "tsc --declaration --emitDeclarationOnly && node ../../scripts/copyDeclarationOutput.js",
|
|
43
47
|
"clean": "rm -rf dist",
|
|
44
48
|
"clean:build": "pnpm clean && pnpm build",
|
|
45
49
|
"lint": "eslint -f unix \"src/**/*.{ts,js}\"",
|
package/src/index.ts
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
|
-
// SPDX-FileCopyrightText:
|
|
1
|
+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { Plugin } from '@livekit/agents';
|
|
4
5
|
|
|
5
6
|
export * from './tts.js';
|
|
7
|
+
|
|
8
|
+
class CartesiaPlugin extends Plugin {
|
|
9
|
+
constructor() {
|
|
10
|
+
super({
|
|
11
|
+
title: 'cartesia',
|
|
12
|
+
version: '0.1.3',
|
|
13
|
+
package: '@livekit/agents-plugin-cartesia',
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
Plugin.registerPlugin(new CartesiaPlugin());
|
package/src/models.ts
CHANGED
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
export type TTSModels = 'sonic-
|
|
5
|
+
export type TTSModels = 'sonic' | 'sonic-2' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';
|
|
6
6
|
|
|
7
7
|
export type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';
|
|
8
8
|
|
|
9
|
-
export const TTSDefaultVoiceId = '
|
|
9
|
+
export const TTSDefaultVoiceId = '794f9389-aac1-45b6-b726-9d9369183238';
|
|
10
10
|
|
|
11
11
|
export type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';
|
|
12
12
|
|
package/src/tts.ts
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import { AudioByteStream, log, tokenize, tts } from '@livekit/agents';
|
|
4
|
+
import { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';
|
|
5
5
|
import type { AudioFrame } from '@livekit/rtc-node';
|
|
6
|
-
import { randomUUID } from 'node:crypto';
|
|
7
6
|
import { request } from 'node:https';
|
|
8
|
-
import { WebSocket } from 'ws';
|
|
7
|
+
import { type RawData, WebSocket } from 'ws';
|
|
9
8
|
import {
|
|
10
9
|
TTSDefaultVoiceId,
|
|
11
10
|
type TTSEncoding,
|
|
@@ -29,15 +28,17 @@ export interface TTSOptions {
|
|
|
29
28
|
emotion?: (TTSVoiceEmotion | string)[];
|
|
30
29
|
apiKey?: string;
|
|
31
30
|
language: string;
|
|
31
|
+
baseUrl: string;
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
const defaultTTSOptions: TTSOptions = {
|
|
35
|
-
model: 'sonic-
|
|
35
|
+
model: 'sonic-2',
|
|
36
36
|
encoding: 'pcm_s16le',
|
|
37
37
|
sampleRate: 24000,
|
|
38
38
|
voice: TTSDefaultVoiceId,
|
|
39
39
|
apiKey: process.env.CARTESIA_API_KEY,
|
|
40
40
|
language: 'en',
|
|
41
|
+
baseUrl: 'https://api.cartesia.ai',
|
|
41
42
|
};
|
|
42
43
|
|
|
43
44
|
export class TTS extends tts.TTS {
|
|
@@ -59,10 +60,26 @@ export class TTS extends tts.TTS {
|
|
|
59
60
|
'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',
|
|
60
61
|
);
|
|
61
62
|
}
|
|
63
|
+
|
|
64
|
+
if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {
|
|
65
|
+
const logger = log();
|
|
66
|
+
logger.warn(
|
|
67
|
+
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
68
|
+
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details",
|
|
69
|
+
);
|
|
70
|
+
}
|
|
62
71
|
}
|
|
63
72
|
|
|
64
73
|
updateOptions(opts: Partial<TTSOptions>) {
|
|
65
74
|
this.#opts = { ...this.#opts, ...opts };
|
|
75
|
+
|
|
76
|
+
if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {
|
|
77
|
+
const logger = log();
|
|
78
|
+
logger.warn(
|
|
79
|
+
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
80
|
+
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details",
|
|
81
|
+
);
|
|
82
|
+
}
|
|
66
83
|
}
|
|
67
84
|
|
|
68
85
|
synthesize(text: string): tts.ChunkedStream {
|
|
@@ -84,19 +101,19 @@ export class ChunkedStream extends tts.ChunkedStream {
|
|
|
84
101
|
super(text, tts);
|
|
85
102
|
this.#text = text;
|
|
86
103
|
this.#opts = opts;
|
|
87
|
-
this.#run();
|
|
88
104
|
}
|
|
89
105
|
|
|
90
|
-
async
|
|
91
|
-
const requestId =
|
|
106
|
+
protected async run() {
|
|
107
|
+
const requestId = shortuuid();
|
|
92
108
|
const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
|
|
93
109
|
const json = toCartesiaOptions(this.#opts);
|
|
94
110
|
json.transcript = this.#text;
|
|
95
111
|
|
|
112
|
+
const baseUrl = new URL(this.#opts.baseUrl);
|
|
96
113
|
const req = request(
|
|
97
114
|
{
|
|
98
|
-
hostname:
|
|
99
|
-
port: 443,
|
|
115
|
+
hostname: baseUrl.hostname,
|
|
116
|
+
port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),
|
|
100
117
|
path: '/tts/bytes',
|
|
101
118
|
method: 'POST',
|
|
102
119
|
headers: {
|
|
@@ -137,21 +154,29 @@ export class ChunkedStream extends tts.ChunkedStream {
|
|
|
137
154
|
export class SynthesizeStream extends tts.SynthesizeStream {
|
|
138
155
|
#opts: TTSOptions;
|
|
139
156
|
#logger = log();
|
|
140
|
-
#tokenizer = new tokenize.basic.SentenceTokenizer(
|
|
157
|
+
#tokenizer = new tokenize.basic.SentenceTokenizer({
|
|
158
|
+
minSentenceLength: BUFFERED_WORDS_COUNT,
|
|
159
|
+
}).stream();
|
|
141
160
|
label = 'cartesia.SynthesizeStream';
|
|
142
161
|
|
|
143
162
|
constructor(tts: TTS, opts: TTSOptions) {
|
|
144
163
|
super(tts);
|
|
145
164
|
this.#opts = opts;
|
|
146
|
-
this.#run();
|
|
147
165
|
}
|
|
148
166
|
|
|
149
167
|
updateOptions(opts: Partial<TTSOptions>) {
|
|
150
168
|
this.#opts = { ...this.#opts, ...opts };
|
|
169
|
+
|
|
170
|
+
if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {
|
|
171
|
+
this.#logger.warn(
|
|
172
|
+
{ model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
|
|
173
|
+
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details",
|
|
174
|
+
);
|
|
175
|
+
}
|
|
151
176
|
}
|
|
152
177
|
|
|
153
|
-
async
|
|
154
|
-
const requestId =
|
|
178
|
+
protected async run() {
|
|
179
|
+
const requestId = shortuuid();
|
|
155
180
|
let closing = false;
|
|
156
181
|
|
|
157
182
|
const sentenceStreamTask = async (ws: WebSocket) => {
|
|
@@ -190,49 +215,78 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
190
215
|
};
|
|
191
216
|
|
|
192
217
|
const recvTask = async (ws: WebSocket) => {
|
|
218
|
+
let finalReceived = false;
|
|
219
|
+
let shouldExit = false;
|
|
193
220
|
const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
|
|
194
221
|
|
|
195
222
|
let lastFrame: AudioFrame | undefined;
|
|
196
223
|
const sendLastFrame = (segmentId: string, final: boolean) => {
|
|
197
|
-
if (lastFrame) {
|
|
224
|
+
if (lastFrame && !this.queue.closed) {
|
|
198
225
|
this.queue.put({ requestId, segmentId, frame: lastFrame, final });
|
|
199
226
|
lastFrame = undefined;
|
|
200
227
|
}
|
|
201
228
|
};
|
|
202
229
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
230
|
+
while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {
|
|
231
|
+
try {
|
|
232
|
+
await new Promise<RawData | null>((resolve, reject) => {
|
|
233
|
+
ws.removeAllListeners();
|
|
234
|
+
ws.on('message', (data) => resolve(data));
|
|
235
|
+
ws.on('close', (code, reason) => {
|
|
236
|
+
if (!closing) {
|
|
237
|
+
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
238
|
+
}
|
|
239
|
+
if (!finalReceived) {
|
|
240
|
+
reject(new Error('WebSocket closed'));
|
|
241
|
+
} else {
|
|
242
|
+
// If we've received the final message, resolve with empty to exit gracefully
|
|
243
|
+
resolve(null);
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
}).then((msg) => {
|
|
247
|
+
if (!msg) return;
|
|
248
|
+
|
|
249
|
+
const json = JSON.parse(msg.toString());
|
|
250
|
+
const segmentId = json.context_id;
|
|
251
|
+
if ('data' in json) {
|
|
252
|
+
const data = new Int8Array(Buffer.from(json.data, 'base64'));
|
|
253
|
+
for (const frame of bstream.write(data)) {
|
|
254
|
+
sendLastFrame(segmentId, false);
|
|
255
|
+
lastFrame = frame;
|
|
256
|
+
}
|
|
257
|
+
} else if ('done' in json) {
|
|
258
|
+
finalReceived = true;
|
|
259
|
+
for (const frame of bstream.flush()) {
|
|
260
|
+
sendLastFrame(segmentId, false);
|
|
261
|
+
lastFrame = frame;
|
|
262
|
+
}
|
|
263
|
+
sendLastFrame(segmentId, true);
|
|
264
|
+
if (!this.queue.closed) {
|
|
265
|
+
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
if (segmentId === requestId) {
|
|
269
|
+
closing = true;
|
|
270
|
+
shouldExit = true;
|
|
271
|
+
this.#logger.info('Cartesia WebSocket close event sent');
|
|
272
|
+
ws.close();
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
});
|
|
276
|
+
} catch (err) {
|
|
277
|
+
// skip log error for normal websocket close
|
|
278
|
+
if (err instanceof Error && !err.message.includes('WebSocket closed')) {
|
|
279
|
+
this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');
|
|
224
280
|
}
|
|
281
|
+
break;
|
|
225
282
|
}
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
230
|
-
}
|
|
231
|
-
ws.removeAllListeners();
|
|
232
|
-
});
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
this.#logger.info('Cartesia WebSocket closed');
|
|
233
286
|
};
|
|
234
287
|
|
|
235
|
-
const
|
|
288
|
+
const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');
|
|
289
|
+
const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;
|
|
236
290
|
const ws = new WebSocket(url);
|
|
237
291
|
|
|
238
292
|
try {
|
|
@@ -243,6 +297,7 @@ export class SynthesizeStream extends tts.SynthesizeStream {
|
|
|
243
297
|
});
|
|
244
298
|
|
|
245
299
|
await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);
|
|
300
|
+
this.#logger.info('Cartesia run completed');
|
|
246
301
|
} catch (e) {
|
|
247
302
|
throw new Error(`failed to connect to Cartesia: ${e}`);
|
|
248
303
|
}
|
|
@@ -267,7 +322,7 @@ const toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {
|
|
|
267
322
|
voiceControls.emotion = opts.emotion;
|
|
268
323
|
}
|
|
269
324
|
|
|
270
|
-
if (Object.keys(
|
|
325
|
+
if (Object.keys(voiceControls).length) {
|
|
271
326
|
voice.__experimental_controls = voiceControls;
|
|
272
327
|
}
|
|
273
328
|
|