@livekit/agents-plugin-cartesia 0.1.4 → 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -13,9 +13,20 @@ var __copyProps = (to, from, except, desc) => {
13
13
  };
14
14
  var __reExport = (target, mod, secondTarget) => (__copyProps(target, mod, "default"), secondTarget && __copyProps(secondTarget, mod, "default"));
15
15
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
16
- var src_exports = {};
17
- module.exports = __toCommonJS(src_exports);
18
- __reExport(src_exports, require("./tts.cjs"), module.exports);
16
+ var index_exports = {};
17
+ module.exports = __toCommonJS(index_exports);
18
+ var import_agents = require("@livekit/agents");
19
+ __reExport(index_exports, require("./tts.cjs"), module.exports);
20
+ class CartesiaPlugin extends import_agents.Plugin {
21
+ constructor() {
22
+ super({
23
+ title: "cartesia",
24
+ version: "0.1.3",
25
+ package: "@livekit/agents-plugin-cartesia"
26
+ });
27
+ }
28
+ }
29
+ import_agents.Plugin.registerPlugin(new CartesiaPlugin());
19
30
  // Annotate the CommonJS export names for ESM import in node:
20
31
  0 && (module.exports = {
21
32
  ...require("./tts.cjs")
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport * from './tts.js';\n"],"mappings":";;;;;;;;;;;;;;;AAAA;AAAA;AAIA,wBAAc,qBAJd;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { Plugin } from '@livekit/agents';\n\nexport * from './tts.js';\n\nclass CartesiaPlugin extends Plugin {\n constructor() {\n super({\n title: 'cartesia',\n version: '0.1.3',\n package: '@livekit/agents-plugin-cartesia',\n });\n }\n}\n\nPlugin.registerPlugin(new CartesiaPlugin());\n"],"mappings":";;;;;;;;;;;;;;;AAAA;AAAA;AAGA,oBAAuB;AAEvB,0BAAc,qBALd;AAOA,MAAM,uBAAuB,qBAAO;AAAA,EAClC,cAAc;AACZ,UAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,qBAAO,eAAe,IAAI,eAAe,CAAC;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAIA,cAAc,UAAU,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,cAAc,UAAU,CAAC"}
package/dist/index.js CHANGED
@@ -1,2 +1,13 @@
1
+ import { Plugin } from "@livekit/agents";
1
2
  export * from "./tts.js";
3
+ class CartesiaPlugin extends Plugin {
4
+ constructor() {
5
+ super({
6
+ title: "cartesia",
7
+ version: "0.1.3",
8
+ package: "@livekit/agents-plugin-cartesia"
9
+ });
10
+ }
11
+ }
12
+ Plugin.registerPlugin(new CartesiaPlugin());
2
13
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport * from './tts.js';\n"],"mappings":"AAIA,cAAc;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { Plugin } from '@livekit/agents';\n\nexport * from './tts.js';\n\nclass CartesiaPlugin extends Plugin {\n constructor() {\n super({\n title: 'cartesia',\n version: '0.1.3',\n package: '@livekit/agents-plugin-cartesia',\n });\n }\n}\n\nPlugin.registerPlugin(new CartesiaPlugin());\n"],"mappings":"AAGA,SAAS,cAAc;AAEvB,cAAc;AAEd,MAAM,uBAAuB,OAAO;AAAA,EAClC,cAAc;AACZ,UAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,OAAO,eAAe,IAAI,eAAe,CAAC;","names":[]}
package/dist/models.cjs CHANGED
@@ -21,7 +21,7 @@ __export(models_exports, {
21
21
  TTSDefaultVoiceId: () => TTSDefaultVoiceId
22
22
  });
23
23
  module.exports = __toCommonJS(models_exports);
24
- const TTSDefaultVoiceId = "c2ac25f9-ecc4-4f56-9095-651354df60c0";
24
+ const TTSDefaultVoiceId = "794f9389-aac1-45b6-b726-9d9369183238";
25
25
  // Annotate the CommonJS export names for ESM import in node:
26
26
  0 && (module.exports = {
27
27
  TTSDefaultVoiceId
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels = 'sonic-english' | 'sonic-multilingual';\n\nexport type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';\n\nexport const TTSDefaultVoiceId = 'c2ac25f9-ecc4-4f56-9095-651354df60c0';\n\nexport type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';\n\nexport type TTSVoiceEmotion =\n | 'anger:lowest'\n | 'anger:low'\n | 'anger'\n | 'anger:high'\n | 'anger:highest'\n | 'positivity:lowest'\n | 'positivity:low'\n | 'positivity'\n | 'positivity:high'\n | 'positivity:highest'\n | 'surprise:lowest'\n | 'surprise:low'\n | 'surprise'\n | 'surprise:high'\n | 'surprise:highest'\n | 'sadness:lowest'\n | 'sadness:low'\n | 'sadness'\n | 'sadness:high'\n | 'sadness:highest'\n | 'curiosity:lowest'\n | 'curiosity:low'\n | 'curiosity'\n | 'curiosity:high'\n | 'curiosity:highest';\n\nexport type TTSEncoding =\n // XXX(nbsp): not yet supported\n // | 'pcm_f32le'\n // | 'pcm_mulaw'\n // | 'pcm_alaw'\n 'pcm_s16le';\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAQO,MAAM,oBAAoB;","names":[]}
1
+ {"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels = 'sonic' | 'sonic-2' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';\n\nexport type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';\n\nexport const TTSDefaultVoiceId = '794f9389-aac1-45b6-b726-9d9369183238';\n\nexport type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';\n\nexport type TTSVoiceEmotion =\n | 'anger:lowest'\n | 'anger:low'\n | 'anger'\n | 'anger:high'\n | 'anger:highest'\n | 'positivity:lowest'\n | 'positivity:low'\n | 'positivity'\n | 'positivity:high'\n | 'positivity:highest'\n | 'surprise:lowest'\n | 'surprise:low'\n | 'surprise'\n | 'surprise:high'\n | 'surprise:highest'\n | 'sadness:lowest'\n | 'sadness:low'\n | 'sadness'\n | 'sadness:high'\n | 'sadness:highest'\n | 'curiosity:lowest'\n | 'curiosity:low'\n | 'curiosity'\n | 'curiosity:high'\n | 'curiosity:highest';\n\nexport type TTSEncoding =\n // XXX(nbsp): not yet supported\n // | 'pcm_f32le'\n // | 'pcm_mulaw'\n // | 'pcm_alaw'\n 'pcm_s16le';\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAQO,MAAM,oBAAoB;","names":[]}
package/dist/models.d.cts CHANGED
@@ -1,6 +1,6 @@
1
- export type TTSModels = 'sonic-english' | 'sonic-multilingual';
1
+ export type TTSModels = 'sonic' | 'sonic-2' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';
2
2
  export type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';
3
- export declare const TTSDefaultVoiceId = "c2ac25f9-ecc4-4f56-9095-651354df60c0";
3
+ export declare const TTSDefaultVoiceId = "794f9389-aac1-45b6-b726-9d9369183238";
4
4
  export type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';
5
5
  export type TTSVoiceEmotion = 'anger:lowest' | 'anger:low' | 'anger' | 'anger:high' | 'anger:highest' | 'positivity:lowest' | 'positivity:low' | 'positivity' | 'positivity:high' | 'positivity:highest' | 'surprise:lowest' | 'surprise:low' | 'surprise' | 'surprise:high' | 'surprise:highest' | 'sadness:lowest' | 'sadness:low' | 'sadness' | 'sadness:high' | 'sadness:highest' | 'curiosity:lowest' | 'curiosity:low' | 'curiosity' | 'curiosity:high' | 'curiosity:highest';
6
6
  export type TTSEncoding = 'pcm_s16le';
package/dist/models.d.ts CHANGED
@@ -1,6 +1,6 @@
1
- export type TTSModels = 'sonic-english' | 'sonic-multilingual';
1
+ export type TTSModels = 'sonic' | 'sonic-2' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';
2
2
  export type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';
3
- export declare const TTSDefaultVoiceId = "c2ac25f9-ecc4-4f56-9095-651354df60c0";
3
+ export declare const TTSDefaultVoiceId = "794f9389-aac1-45b6-b726-9d9369183238";
4
4
  export type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';
5
5
  export type TTSVoiceEmotion = 'anger:lowest' | 'anger:low' | 'anger' | 'anger:high' | 'anger:highest' | 'positivity:lowest' | 'positivity:low' | 'positivity' | 'positivity:high' | 'positivity:highest' | 'surprise:lowest' | 'surprise:low' | 'surprise' | 'surprise:high' | 'surprise:highest' | 'sadness:lowest' | 'sadness:low' | 'sadness' | 'sadness:high' | 'sadness:highest' | 'curiosity:lowest' | 'curiosity:low' | 'curiosity' | 'curiosity:high' | 'curiosity:highest';
6
6
  export type TTSEncoding = 'pcm_s16le';
@@ -1 +1 @@
1
- {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,SAAS,GAAG,eAAe,GAAG,oBAAoB,CAAC;AAE/D,MAAM,MAAM,YAAY,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC;AAE1E,eAAO,MAAM,iBAAiB,yCAAyC,CAAC;AAExE,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,SAAS,CAAC;AAE/E,MAAM,MAAM,eAAe,GACvB,cAAc,GACd,WAAW,GACX,OAAO,GACP,YAAY,GACZ,eAAe,GACf,mBAAmB,GACnB,gBAAgB,GAChB,YAAY,GACZ,iBAAiB,GACjB,oBAAoB,GACpB,iBAAiB,GACjB,cAAc,GACd,UAAU,GACV,eAAe,GACf,kBAAkB,GAClB,gBAAgB,GAChB,aAAa,GACb,SAAS,GACT,cAAc,GACd,iBAAiB,GACjB,kBAAkB,GAClB,eAAe,GACf,WAAW,GACX,gBAAgB,GAChB,mBAAmB,CAAC;AAExB,MAAM,MAAM,WAAW,GAKrB,WAAW,CAAC"}
1
+ {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,SAAS,GAAG,OAAO,GAAG,SAAS,GAAG,YAAY,GAAG,eAAe,GAAG,aAAa,CAAC;AAE7F,MAAM,MAAM,YAAY,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC;AAE1E,eAAO,MAAM,iBAAiB,yCAAyC,CAAC;AAExE,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,SAAS,CAAC;AAE/E,MAAM,MAAM,eAAe,GACvB,cAAc,GACd,WAAW,GACX,OAAO,GACP,YAAY,GACZ,eAAe,GACf,mBAAmB,GACnB,gBAAgB,GAChB,YAAY,GACZ,iBAAiB,GACjB,oBAAoB,GACpB,iBAAiB,GACjB,cAAc,GACd,UAAU,GACV,eAAe,GACf,kBAAkB,GAClB,gBAAgB,GAChB,aAAa,GACb,SAAS,GACT,cAAc,GACd,iBAAiB,GACjB,kBAAkB,GAClB,eAAe,GACf,WAAW,GACX,gBAAgB,GAChB,mBAAmB,CAAC;AAExB,MAAM,MAAM,WAAW,GAKrB,WAAW,CAAC"}
package/dist/models.js CHANGED
@@ -1,4 +1,4 @@
1
- const TTSDefaultVoiceId = "c2ac25f9-ecc4-4f56-9095-651354df60c0";
1
+ const TTSDefaultVoiceId = "794f9389-aac1-45b6-b726-9d9369183238";
2
2
  export {
3
3
  TTSDefaultVoiceId
4
4
  };
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels = 'sonic-english' | 'sonic-multilingual';\n\nexport type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';\n\nexport const TTSDefaultVoiceId = 'c2ac25f9-ecc4-4f56-9095-651354df60c0';\n\nexport type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';\n\nexport type TTSVoiceEmotion =\n | 'anger:lowest'\n | 'anger:low'\n | 'anger'\n | 'anger:high'\n | 'anger:highest'\n | 'positivity:lowest'\n | 'positivity:low'\n | 'positivity'\n | 'positivity:high'\n | 'positivity:highest'\n | 'surprise:lowest'\n | 'surprise:low'\n | 'surprise'\n | 'surprise:high'\n | 'surprise:highest'\n | 'sadness:lowest'\n | 'sadness:low'\n | 'sadness'\n | 'sadness:high'\n | 'sadness:highest'\n | 'curiosity:lowest'\n | 'curiosity:low'\n | 'curiosity'\n | 'curiosity:high'\n | 'curiosity:highest';\n\nexport type TTSEncoding =\n // XXX(nbsp): not yet supported\n // | 'pcm_f32le'\n // | 'pcm_mulaw'\n // | 'pcm_alaw'\n 'pcm_s16le';\n"],"mappings":"AAQO,MAAM,oBAAoB;","names":[]}
1
+ {"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type TTSModels = 'sonic' | 'sonic-2' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';\n\nexport type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';\n\nexport const TTSDefaultVoiceId = '794f9389-aac1-45b6-b726-9d9369183238';\n\nexport type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';\n\nexport type TTSVoiceEmotion =\n | 'anger:lowest'\n | 'anger:low'\n | 'anger'\n | 'anger:high'\n | 'anger:highest'\n | 'positivity:lowest'\n | 'positivity:low'\n | 'positivity'\n | 'positivity:high'\n | 'positivity:highest'\n | 'surprise:lowest'\n | 'surprise:low'\n | 'surprise'\n | 'surprise:high'\n | 'surprise:highest'\n | 'sadness:lowest'\n | 'sadness:low'\n | 'sadness'\n | 'sadness:high'\n | 'sadness:highest'\n | 'curiosity:lowest'\n | 'curiosity:low'\n | 'curiosity'\n | 'curiosity:high'\n | 'curiosity:highest';\n\nexport type TTSEncoding =\n // XXX(nbsp): not yet supported\n // | 'pcm_f32le'\n // | 'pcm_mulaw'\n // | 'pcm_alaw'\n 'pcm_s16le';\n"],"mappings":"AAQO,MAAM,oBAAoB;","names":[]}
package/dist/tts.cjs CHANGED
@@ -24,7 +24,6 @@ __export(tts_exports, {
24
24
  });
25
25
  module.exports = __toCommonJS(tts_exports);
26
26
  var import_agents = require("@livekit/agents");
27
- var import_node_crypto = require("node:crypto");
28
27
  var import_node_https = require("node:https");
29
28
  var import_ws = require("ws");
30
29
  var import_models = require("./models.cjs");
@@ -34,12 +33,13 @@ const VERSION = "2024-06-10";
34
33
  const NUM_CHANNELS = 1;
35
34
  const BUFFERED_WORDS_COUNT = 8;
36
35
  const defaultTTSOptions = {
37
- model: "sonic-english",
36
+ model: "sonic-2",
38
37
  encoding: "pcm_s16le",
39
38
  sampleRate: 24e3,
40
39
  voice: import_models.TTSDefaultVoiceId,
41
40
  apiKey: process.env.CARTESIA_API_KEY,
42
- language: "en"
41
+ language: "en",
42
+ baseUrl: "https://api.cartesia.ai"
43
43
  };
44
44
  class TTS extends import_agents.tts.TTS {
45
45
  #opts;
@@ -57,9 +57,23 @@ class TTS extends import_agents.tts.TTS {
57
57
  "Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY"
58
58
  );
59
59
  }
60
+ if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== "sonic-2-2025-03-07") {
61
+ const logger = (0, import_agents.log)();
62
+ logger.warn(
63
+ { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
64
+ "speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
65
+ );
66
+ }
60
67
  }
61
68
  updateOptions(opts) {
62
69
  this.#opts = { ...this.#opts, ...opts };
70
+ if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== "sonic-2-2025-03-07") {
71
+ const logger = (0, import_agents.log)();
72
+ logger.warn(
73
+ { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
74
+ "speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
75
+ );
76
+ }
63
77
  }
64
78
  synthesize(text) {
65
79
  return new ChunkedStream(this, text, this.#opts);
@@ -77,17 +91,17 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
77
91
  super(text, tts2);
78
92
  this.#text = text;
79
93
  this.#opts = opts;
80
- this.#run();
81
94
  }
82
- async #run() {
83
- const requestId = (0, import_node_crypto.randomUUID)();
95
+ async run() {
96
+ const requestId = (0, import_agents.shortuuid)();
84
97
  const bstream = new import_agents.AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
85
98
  const json = toCartesiaOptions(this.#opts);
86
99
  json.transcript = this.#text;
100
+ const baseUrl = new URL(this.#opts.baseUrl);
87
101
  const req = (0, import_node_https.request)(
88
102
  {
89
- hostname: "api.cartesia.ai",
90
- port: 443,
103
+ hostname: baseUrl.hostname,
104
+ port: parseInt(baseUrl.port) || (baseUrl.protocol === "https:" ? 443 : 80),
91
105
  path: "/tts/bytes",
92
106
  method: "POST",
93
107
  headers: {
@@ -126,18 +140,25 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
126
140
  class SynthesizeStream extends import_agents.tts.SynthesizeStream {
127
141
  #opts;
128
142
  #logger = (0, import_agents.log)();
129
- #tokenizer = new import_agents.tokenize.basic.SentenceTokenizer(void 0, BUFFERED_WORDS_COUNT).stream();
143
+ #tokenizer = new import_agents.tokenize.basic.SentenceTokenizer({
144
+ minSentenceLength: BUFFERED_WORDS_COUNT
145
+ }).stream();
130
146
  label = "cartesia.SynthesizeStream";
131
147
  constructor(tts2, opts) {
132
148
  super(tts2);
133
149
  this.#opts = opts;
134
- this.#run();
135
150
  }
136
151
  updateOptions(opts) {
137
152
  this.#opts = { ...this.#opts, ...opts };
153
+ if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== "sonic-2-2025-03-07") {
154
+ this.#logger.warn(
155
+ { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
156
+ "speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
157
+ );
158
+ }
138
159
  }
139
- async #run() {
140
- const requestId = (0, import_node_crypto.randomUUID)();
160
+ async run() {
161
+ const requestId = (0, import_agents.shortuuid)();
141
162
  let closing = false;
142
163
  const sentenceStreamTask = async (ws2) => {
143
164
  const packet = toCartesiaOptions(this.#opts);
@@ -172,45 +193,70 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
172
193
  this.#tokenizer.close();
173
194
  };
174
195
  const recvTask = async (ws2) => {
196
+ let finalReceived = false;
197
+ let shouldExit = false;
175
198
  const bstream = new import_agents.AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
176
199
  let lastFrame;
177
200
  const sendLastFrame = (segmentId, final) => {
178
- if (lastFrame) {
201
+ if (lastFrame && !this.queue.closed) {
179
202
  this.queue.put({ requestId, segmentId, frame: lastFrame, final });
180
203
  lastFrame = void 0;
181
204
  }
182
205
  };
183
- ws2.on("message", (data) => {
184
- const json = JSON.parse(data.toString());
185
- const segmentId = json.context_id;
186
- if ("data" in json) {
187
- const data2 = new Int8Array(Buffer.from(json.data, "base64"));
188
- for (const frame of bstream.write(data2)) {
189
- sendLastFrame(segmentId, false);
190
- lastFrame = frame;
191
- }
192
- } else if ("done" in json) {
193
- for (const frame of bstream.flush()) {
194
- sendLastFrame(segmentId, false);
195
- lastFrame = frame;
196
- }
197
- sendLastFrame(segmentId, true);
198
- this.queue.put(SynthesizeStream.END_OF_STREAM);
199
- if (segmentId === requestId) {
200
- closing = true;
201
- ws2.close();
202
- return;
206
+ while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {
207
+ try {
208
+ await new Promise((resolve, reject) => {
209
+ ws2.removeAllListeners();
210
+ ws2.on("message", (data) => resolve(data));
211
+ ws2.on("close", (code, reason) => {
212
+ if (!closing) {
213
+ this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
214
+ }
215
+ if (!finalReceived) {
216
+ reject(new Error("WebSocket closed"));
217
+ } else {
218
+ resolve(null);
219
+ }
220
+ });
221
+ }).then((msg) => {
222
+ if (!msg) return;
223
+ const json = JSON.parse(msg.toString());
224
+ const segmentId = json.context_id;
225
+ if ("data" in json) {
226
+ const data = new Int8Array(Buffer.from(json.data, "base64"));
227
+ for (const frame of bstream.write(data)) {
228
+ sendLastFrame(segmentId, false);
229
+ lastFrame = frame;
230
+ }
231
+ } else if ("done" in json) {
232
+ finalReceived = true;
233
+ for (const frame of bstream.flush()) {
234
+ sendLastFrame(segmentId, false);
235
+ lastFrame = frame;
236
+ }
237
+ sendLastFrame(segmentId, true);
238
+ if (!this.queue.closed) {
239
+ this.queue.put(SynthesizeStream.END_OF_STREAM);
240
+ }
241
+ if (segmentId === requestId) {
242
+ closing = true;
243
+ shouldExit = true;
244
+ this.#logger.info("Cartesia WebSocket close event sent");
245
+ ws2.close();
246
+ }
247
+ }
248
+ });
249
+ } catch (err) {
250
+ if (err instanceof Error && !err.message.includes("WebSocket closed")) {
251
+ this.#logger.error({ err }, "Error in recvTask from Cartesia WebSocket");
203
252
  }
253
+ break;
204
254
  }
205
- });
206
- ws2.on("close", (code, reason) => {
207
- if (!closing) {
208
- this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
209
- }
210
- ws2.removeAllListeners();
211
- });
255
+ }
256
+ this.#logger.info("Cartesia WebSocket closed");
212
257
  };
213
- const url = `wss://api.cartesia.ai/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;
258
+ const wsUrl = this.#opts.baseUrl.replace(/^http/, "ws");
259
+ const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;
214
260
  const ws = new import_ws.WebSocket(url);
215
261
  try {
216
262
  await new Promise((resolve, reject) => {
@@ -219,6 +265,7 @@ class SynthesizeStream extends import_agents.tts.SynthesizeStream {
219
265
  ws.on("close", (code) => reject(`WebSocket returned ${code}`));
220
266
  });
221
267
  await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);
268
+ this.#logger.info("Cartesia run completed");
222
269
  } catch (e) {
223
270
  throw new Error(`failed to connect to Cartesia: ${e}`);
224
271
  }
@@ -240,7 +287,7 @@ const toCartesiaOptions = (opts) => {
240
287
  if (opts.emotion) {
241
288
  voiceControls.emotion = opts.emotion;
242
289
  }
243
- if (Object.keys({}).length) {
290
+ if (Object.keys(voiceControls).length) {
244
291
  voice.__experimental_controls = voiceControls;
245
292
  }
246
293
  return {
package/dist/tts.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { request } from 'node:https';\nimport { WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-english',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n this.#run();\n }\n\n async #run() {\n const requestId = randomUUID();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const req = request(\n {\n hostname: 'api.cartesia.ai',\n port: 443,\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer(undefined, BUFFERED_WORDS_COUNT).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.#run();\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n async #run() {\n const requestId = randomUUID();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n ws.on('message', (data) => {\n const json = JSON.parse(data.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('done' in json) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId) {\n closing = true;\n ws.close();\n return;\n }\n }\n });\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n ws.removeAllListeners();\n });\n };\n\n const url = `wss://api.cartesia.ai/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys({}).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAAoD;AAEpD,yBAA2B;AAC3B,wBAAwB;AACxB,gBAA0B;AAC1B,oBAMO;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAa7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AACZ;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AACb,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,gBAAY,+BAAW;AAC7B,UAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAM;AAAA,MACV;AAAA,QACE,UAAU;AAAA,QACV,MAAM;AAAA,QACN,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,aAAa,IAAI,uBAAS,MAAM,kBAAkB,QAAW,oBAAoB,EAAE,OAAO;AAAA,EAC1F,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,gBAAY,+BAAW;AAC7B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,YAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,WAAW;AACb,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,MAAAA,IAAG,GAAG,WAAW,CAAC,SAAS;AACzB,cAAM,OAAO,KAAK,MAAM,KAAK,SAAS,CAAC;AACvC,cAAM,YAAY,KAAK;AACvB,YAAI,UAAU,MAAM;AAClB,gBAAMC,QAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,qBAAW,SAAS,QAAQ,MAAMA,KAAI,GAAG;AACvC,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AAAA,QACF,WAAW,UAAU,MAAM;AACzB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AACA,wBAAc,WAAW,IAAI;AAC7B,eAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,cAAI,cAAc,WAAW;AAC3B,sBAAU;AACV,YAAAD,IAAG,MAAM;AACT;AAAA,UACF;AAAA,QACF;AAAA,MACF,CAAC;AACD,MAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,QACpE;AACA,QAAAA,IAAG,mBAAmB;AAAA,MACxB,CAAC;AAAA,IACH;AAEA,UAAM,MAAM,+CAA+C,KAAK,MAAM,MAAM,qBAAqB,OAAO;AACxG,UAAM,KAAK,IAAI,oBAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,CAAC,CAAC,EAAE,QAAQ;AAC1B,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws","data"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n this.#logger.info('Cartesia WebSocket close event sent');\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n break;\n }\n }\n\n this.#logger.info('Cartesia WebSocket closed');\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n this.#logger.info('Cartesia run completed');\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAA+D;AAE/D,wBAAwB;AACxB,gBAAwC;AACxC,oBAMO;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AACX;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,aAAS,mBAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,UAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,aAAa,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,8BAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,qBAAK,QAAQ,KAAK,qCAAqC;AACvD,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AACA;AAAA,QACF;AAAA,MACF;AAEA,WAAK,QAAQ,KAAK,2BAA2B;AAAA,IAC/C;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,oBAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AACrE,WAAK,QAAQ,KAAK,wBAAwB;AAAA,IAC5C,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
package/dist/tts.d.cts CHANGED
@@ -9,6 +9,7 @@ export interface TTSOptions {
9
9
  emotion?: (TTSVoiceEmotion | string)[];
10
10
  apiKey?: string;
11
11
  language: string;
12
+ baseUrl: string;
12
13
  }
13
14
  export declare class TTS extends tts.TTS {
14
15
  #private;
@@ -22,11 +23,13 @@ export declare class ChunkedStream extends tts.ChunkedStream {
22
23
  #private;
23
24
  label: string;
24
25
  constructor(tts: TTS, text: string, opts: TTSOptions);
26
+ protected run(): Promise<void>;
25
27
  }
26
28
  export declare class SynthesizeStream extends tts.SynthesizeStream {
27
29
  #private;
28
30
  label: string;
29
31
  constructor(tts: TTS, opts: TTSOptions);
30
32
  updateOptions(opts: Partial<TTSOptions>): void;
33
+ protected run(): Promise<void>;
31
34
  }
32
35
  //# sourceMappingURL=tts.d.ts.map
package/dist/tts.d.ts CHANGED
@@ -9,6 +9,7 @@ export interface TTSOptions {
9
9
  emotion?: (TTSVoiceEmotion | string)[];
10
10
  apiKey?: string;
11
11
  language: string;
12
+ baseUrl: string;
12
13
  }
13
14
  export declare class TTS extends tts.TTS {
14
15
  #private;
@@ -22,11 +23,13 @@ export declare class ChunkedStream extends tts.ChunkedStream {
22
23
  #private;
23
24
  label: string;
24
25
  constructor(tts: TTS, text: string, opts: TTSOptions);
26
+ protected run(): Promise<void>;
25
27
  }
26
28
  export declare class SynthesizeStream extends tts.SynthesizeStream {
27
29
  #private;
28
30
  label: string;
29
31
  constructor(tts: TTS, opts: TTSOptions);
30
32
  updateOptions(opts: Partial<TTSOptions>): void;
33
+ protected run(): Promise<void>;
31
34
  }
32
35
  //# sourceMappingURL=tts.d.ts.map
package/dist/tts.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EAAkC,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAKtE,OAAO,EAEL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,eAAe,EACpB,KAAK,aAAa,EACnB,MAAM,aAAa,CAAC;AAQrB,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,SAAS,GAAG,MAAM,CAAC;IAC1B,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IAC/B,OAAO,CAAC,EAAE,CAAC,eAAe,GAAG,MAAM,CAAC,EAAE,CAAC;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAWD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAiB1C,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAIvC,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAAC,aAAa;IAI3C,MAAM,IAAI,gBAAgB;CAG3B;AAED,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;;IAClD,KAAK,SAA4B;gBAKrB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU;CAoDrD;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAIxD,KAAK,SAA+B;gBAExB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;IAMtC,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;CAqGxC"}
1
+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EAA6C,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAIjF,OAAO,EAEL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,eAAe,EACpB,KAAK,aAAa,EACnB,MAAM,aAAa,CAAC;AAQrB,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,SAAS,GAAG,MAAM,CAAC;IAC1B,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC;IAC/B,OAAO,CAAC,EAAE,CAAC,eAAe,GAAG,MAAM,CAAC,EAAE,CAAC;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB;AAYD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAE9B,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAyB1C,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAYvC,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAAC,aAAa;IAI3C,MAAM,IAAI,gBAAgB;CAG3B;AAED,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;;IAClD,KAAK,SAA4B;gBAKrB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU;cAMpC,GAAG;CA8CpB;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;;IAMxD,KAAK,SAA+B;gBAExB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;IAKtC,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;cAWvB,GAAG;CA+HpB"}
package/dist/tts.js CHANGED
@@ -1,5 +1,4 @@
1
- import { AudioByteStream, log, tokenize, tts } from "@livekit/agents";
2
- import { randomUUID } from "node:crypto";
1
+ import { AudioByteStream, log, shortuuid, tokenize, tts } from "@livekit/agents";
3
2
  import { request } from "node:https";
4
3
  import { WebSocket } from "ws";
5
4
  import {
@@ -11,12 +10,13 @@ const VERSION = "2024-06-10";
11
10
  const NUM_CHANNELS = 1;
12
11
  const BUFFERED_WORDS_COUNT = 8;
13
12
  const defaultTTSOptions = {
14
- model: "sonic-english",
13
+ model: "sonic-2",
15
14
  encoding: "pcm_s16le",
16
15
  sampleRate: 24e3,
17
16
  voice: TTSDefaultVoiceId,
18
17
  apiKey: process.env.CARTESIA_API_KEY,
19
- language: "en"
18
+ language: "en",
19
+ baseUrl: "https://api.cartesia.ai"
20
20
  };
21
21
  class TTS extends tts.TTS {
22
22
  #opts;
@@ -34,9 +34,23 @@ class TTS extends tts.TTS {
34
34
  "Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY"
35
35
  );
36
36
  }
37
+ if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== "sonic-2-2025-03-07") {
38
+ const logger = log();
39
+ logger.warn(
40
+ { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
41
+ "speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
42
+ );
43
+ }
37
44
  }
38
45
  updateOptions(opts) {
39
46
  this.#opts = { ...this.#opts, ...opts };
47
+ if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== "sonic-2-2025-03-07") {
48
+ const logger = log();
49
+ logger.warn(
50
+ { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
51
+ "speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
52
+ );
53
+ }
40
54
  }
41
55
  synthesize(text) {
42
56
  return new ChunkedStream(this, text, this.#opts);
@@ -54,17 +68,17 @@ class ChunkedStream extends tts.ChunkedStream {
54
68
  super(text, tts2);
55
69
  this.#text = text;
56
70
  this.#opts = opts;
57
- this.#run();
58
71
  }
59
- async #run() {
60
- const requestId = randomUUID();
72
+ async run() {
73
+ const requestId = shortuuid();
61
74
  const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
62
75
  const json = toCartesiaOptions(this.#opts);
63
76
  json.transcript = this.#text;
77
+ const baseUrl = new URL(this.#opts.baseUrl);
64
78
  const req = request(
65
79
  {
66
- hostname: "api.cartesia.ai",
67
- port: 443,
80
+ hostname: baseUrl.hostname,
81
+ port: parseInt(baseUrl.port) || (baseUrl.protocol === "https:" ? 443 : 80),
68
82
  path: "/tts/bytes",
69
83
  method: "POST",
70
84
  headers: {
@@ -103,18 +117,25 @@ class ChunkedStream extends tts.ChunkedStream {
103
117
  class SynthesizeStream extends tts.SynthesizeStream {
104
118
  #opts;
105
119
  #logger = log();
106
- #tokenizer = new tokenize.basic.SentenceTokenizer(void 0, BUFFERED_WORDS_COUNT).stream();
120
+ #tokenizer = new tokenize.basic.SentenceTokenizer({
121
+ minSentenceLength: BUFFERED_WORDS_COUNT
122
+ }).stream();
107
123
  label = "cartesia.SynthesizeStream";
108
124
  constructor(tts2, opts) {
109
125
  super(tts2);
110
126
  this.#opts = opts;
111
- this.#run();
112
127
  }
113
128
  updateOptions(opts) {
114
129
  this.#opts = { ...this.#opts, ...opts };
130
+ if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== "sonic-2-2025-03-07") {
131
+ this.#logger.warn(
132
+ { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
133
+ "speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details"
134
+ );
135
+ }
115
136
  }
116
- async #run() {
117
- const requestId = randomUUID();
137
+ async run() {
138
+ const requestId = shortuuid();
118
139
  let closing = false;
119
140
  const sentenceStreamTask = async (ws2) => {
120
141
  const packet = toCartesiaOptions(this.#opts);
@@ -149,45 +170,70 @@ class SynthesizeStream extends tts.SynthesizeStream {
149
170
  this.#tokenizer.close();
150
171
  };
151
172
  const recvTask = async (ws2) => {
173
+ let finalReceived = false;
174
+ let shouldExit = false;
152
175
  const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
153
176
  let lastFrame;
154
177
  const sendLastFrame = (segmentId, final) => {
155
- if (lastFrame) {
178
+ if (lastFrame && !this.queue.closed) {
156
179
  this.queue.put({ requestId, segmentId, frame: lastFrame, final });
157
180
  lastFrame = void 0;
158
181
  }
159
182
  };
160
- ws2.on("message", (data) => {
161
- const json = JSON.parse(data.toString());
162
- const segmentId = json.context_id;
163
- if ("data" in json) {
164
- const data2 = new Int8Array(Buffer.from(json.data, "base64"));
165
- for (const frame of bstream.write(data2)) {
166
- sendLastFrame(segmentId, false);
167
- lastFrame = frame;
168
- }
169
- } else if ("done" in json) {
170
- for (const frame of bstream.flush()) {
171
- sendLastFrame(segmentId, false);
172
- lastFrame = frame;
173
- }
174
- sendLastFrame(segmentId, true);
175
- this.queue.put(SynthesizeStream.END_OF_STREAM);
176
- if (segmentId === requestId) {
177
- closing = true;
178
- ws2.close();
179
- return;
183
+ while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {
184
+ try {
185
+ await new Promise((resolve, reject) => {
186
+ ws2.removeAllListeners();
187
+ ws2.on("message", (data) => resolve(data));
188
+ ws2.on("close", (code, reason) => {
189
+ if (!closing) {
190
+ this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
191
+ }
192
+ if (!finalReceived) {
193
+ reject(new Error("WebSocket closed"));
194
+ } else {
195
+ resolve(null);
196
+ }
197
+ });
198
+ }).then((msg) => {
199
+ if (!msg) return;
200
+ const json = JSON.parse(msg.toString());
201
+ const segmentId = json.context_id;
202
+ if ("data" in json) {
203
+ const data = new Int8Array(Buffer.from(json.data, "base64"));
204
+ for (const frame of bstream.write(data)) {
205
+ sendLastFrame(segmentId, false);
206
+ lastFrame = frame;
207
+ }
208
+ } else if ("done" in json) {
209
+ finalReceived = true;
210
+ for (const frame of bstream.flush()) {
211
+ sendLastFrame(segmentId, false);
212
+ lastFrame = frame;
213
+ }
214
+ sendLastFrame(segmentId, true);
215
+ if (!this.queue.closed) {
216
+ this.queue.put(SynthesizeStream.END_OF_STREAM);
217
+ }
218
+ if (segmentId === requestId) {
219
+ closing = true;
220
+ shouldExit = true;
221
+ this.#logger.info("Cartesia WebSocket close event sent");
222
+ ws2.close();
223
+ }
224
+ }
225
+ });
226
+ } catch (err) {
227
+ if (err instanceof Error && !err.message.includes("WebSocket closed")) {
228
+ this.#logger.error({ err }, "Error in recvTask from Cartesia WebSocket");
180
229
  }
230
+ break;
181
231
  }
182
- });
183
- ws2.on("close", (code, reason) => {
184
- if (!closing) {
185
- this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
186
- }
187
- ws2.removeAllListeners();
188
- });
232
+ }
233
+ this.#logger.info("Cartesia WebSocket closed");
189
234
  };
190
- const url = `wss://api.cartesia.ai/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;
235
+ const wsUrl = this.#opts.baseUrl.replace(/^http/, "ws");
236
+ const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;
191
237
  const ws = new WebSocket(url);
192
238
  try {
193
239
  await new Promise((resolve, reject) => {
@@ -196,6 +242,7 @@ class SynthesizeStream extends tts.SynthesizeStream {
196
242
  ws.on("close", (code) => reject(`WebSocket returned ${code}`));
197
243
  });
198
244
  await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);
245
+ this.#logger.info("Cartesia run completed");
199
246
  } catch (e) {
200
247
  throw new Error(`failed to connect to Cartesia: ${e}`);
201
248
  }
@@ -217,7 +264,7 @@ const toCartesiaOptions = (opts) => {
217
264
  if (opts.emotion) {
218
265
  voiceControls.emotion = opts.emotion;
219
266
  }
220
- if (Object.keys({}).length) {
267
+ if (Object.keys(voiceControls).length) {
221
268
  voice.__experimental_controls = voiceControls;
222
269
  }
223
270
  return {
package/dist/tts.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { randomUUID } from 'node:crypto';\nimport { request } from 'node:https';\nimport { WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-english',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n this.#run();\n }\n\n async #run() {\n const requestId = randomUUID();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const req = request(\n {\n hostname: 'api.cartesia.ai',\n port: 443,\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer(undefined, BUFFERED_WORDS_COUNT).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n this.#run();\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n async #run() {\n const requestId = randomUUID();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n ws.on('message', (data) => {\n const json = JSON.parse(data.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('done' in json) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n\n if (segmentId === requestId) {\n closing = true;\n ws.close();\n return;\n }\n }\n });\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n ws.removeAllListeners();\n });\n };\n\n const url = `wss://api.cartesia.ai/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys({}).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":"AAGA,SAAS,iBAAiB,KAAK,UAAU,WAAW;AAEpD,SAAS,kBAAkB;AAC3B,SAAS,eAAe;AACxB,SAAS,iBAAiB;AAC1B;AAAA,EACE;AAAA,OAKK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAa7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AACZ;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AACb,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,YAAY,WAAW;AAC7B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU;AAAA,QACV,MAAM;AAAA,QACN,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB,QAAW,oBAAoB,EAAE,OAAO;AAAA,EAC1F,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,YAAY,WAAW;AAC7B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,WAAW;AACb,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,MAAAA,IAAG,GAAG,WAAW,CAAC,SAAS;AACzB,cAAM,OAAO,KAAK,MAAM,KAAK,SAAS,CAAC;AACvC,cAAM,YAAY,KAAK;AACvB,YAAI,UAAU,MAAM;AAClB,gBAAMC,QAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,qBAAW,SAAS,QAAQ,MAAMA,KAAI,GAAG;AACvC,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AAAA,QACF,WAAW,UAAU,MAAM;AACzB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AACA,wBAAc,WAAW,IAAI;AAC7B,eAAK,MAAM,IAAI,iBAAiB,aAAa;AAE7C,cAAI,cAAc,WAAW;AAC3B,sBAAU;AACV,YAAAD,IAAG,MAAM;AACT;AAAA,UACF;AAAA,QACF;AAAA,MACF,CAAC;AACD,MAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,QACpE;AACA,QAAAA,IAAG,mBAAmB;AAAA,MACxB,CAAC;AAAA,IACH;AAEA,UAAM,MAAM,+CAA+C,KAAK,MAAM,MAAM,qBAAqB,OAAO;AACxG,UAAM,KAAK,IAAI,UAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AAAA,IACvE,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,CAAC,CAAC,EAAE,QAAQ;AAC1B,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws","data"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport {\n TTSDefaultVoiceId,\n type TTSEncoding,\n type TTSModels,\n type TTSVoiceEmotion,\n type TTSVoiceSpeed,\n} from './models.js';\n\nconst AUTHORIZATION_HEADER = 'X-API-Key';\nconst VERSION_HEADER = 'Cartesia-Version';\nconst VERSION = '2024-06-10';\nconst NUM_CHANNELS = 1;\nconst BUFFERED_WORDS_COUNT = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n voice: string | number[];\n speed?: TTSVoiceSpeed | number;\n emotion?: (TTSVoiceEmotion | string)[];\n apiKey?: string;\n language: string;\n baseUrl: string;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'sonic-2',\n encoding: 'pcm_s16le',\n sampleRate: 24000,\n voice: TTSDefaultVoiceId,\n apiKey: process.env.CARTESIA_API_KEY,\n language: 'en',\n baseUrl: 'https://api.cartesia.ai',\n};\n\nexport class TTS extends tts.TTS {\n #opts: TTSOptions;\n label = 'cartesia.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: true,\n });\n\n this.#opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.#opts.apiKey === undefined) {\n throw new Error(\n 'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',\n );\n }\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n const logger = log();\n logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.#opts);\n }\n\n stream(): SynthesizeStream {\n return new SynthesizeStream(this, this.#opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'cartesia.ChunkedStream';\n #opts: TTSOptions;\n #text: string;\n\n // set Promise<T> to any because OpenAI returns an annoying Response type\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.#text = text;\n this.#opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n const json = toCartesiaOptions(this.#opts);\n json.transcript = this.#text;\n\n const baseUrl = new URL(this.#opts.baseUrl);\n const req = request(\n {\n hostname: baseUrl.hostname,\n port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),\n path: '/tts/bytes',\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: this.#opts.apiKey!,\n [VERSION_HEADER]: VERSION,\n },\n },\n (res) => {\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n });\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n this.queue.close();\n });\n },\n );\n\n req.write(JSON.stringify(json));\n req.end();\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n #opts: TTSOptions;\n #logger = log();\n #tokenizer = new tokenize.basic.SentenceTokenizer({\n minSentenceLength: BUFFERED_WORDS_COUNT,\n }).stream();\n label = 'cartesia.SynthesizeStream';\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.#opts = opts;\n }\n\n updateOptions(opts: Partial<TTSOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n\n if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {\n this.#logger.warn(\n { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },\n \"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details\",\n );\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n let closing = false;\n\n const sentenceStreamTask = async (ws: WebSocket) => {\n const packet = toCartesiaOptions(this.#opts);\n for await (const event of this.#tokenizer) {\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: event.token + ' ',\n continue: true,\n }),\n );\n }\n\n ws.send(\n JSON.stringify({\n ...packet,\n context_id: requestId,\n transcript: ' ',\n continue: false,\n }),\n );\n };\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.#tokenizer.flush();\n continue;\n }\n this.#tokenizer.pushText(data);\n }\n this.#tokenizer.endInput();\n this.#tokenizer.close();\n };\n\n const recvTask = async (ws: WebSocket) => {\n let finalReceived = false;\n let shouldExit = false;\n const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);\n\n let lastFrame: AudioFrame | undefined;\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {\n try {\n await new Promise<RawData | null>((resolve, reject) => {\n ws.removeAllListeners();\n ws.on('message', (data) => resolve(data));\n ws.on('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n }\n if (!finalReceived) {\n reject(new Error('WebSocket closed'));\n } else {\n // If we've received the final message, resolve with empty to exit gracefully\n resolve(null);\n }\n });\n }).then((msg) => {\n if (!msg) return;\n\n const json = JSON.parse(msg.toString());\n const segmentId = json.context_id;\n if ('data' in json) {\n const data = new Int8Array(Buffer.from(json.data, 'base64'));\n for (const frame of bstream.write(data)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n } else if ('done' in json) {\n finalReceived = true;\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n\n if (segmentId === requestId) {\n closing = true;\n shouldExit = true;\n this.#logger.info('Cartesia WebSocket close event sent');\n ws.close();\n }\n }\n });\n } catch (err) {\n // skip log error for normal websocket close\n if (err instanceof Error && !err.message.includes('WebSocket closed')) {\n this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');\n }\n break;\n }\n }\n\n this.#logger.info('Cartesia WebSocket closed');\n };\n\n const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');\n const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;\n const ws = new WebSocket(url);\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);\n this.#logger.info('Cartesia run completed');\n } catch (e) {\n throw new Error(`failed to connect to Cartesia: ${e}`);\n }\n }\n}\n\nconst toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {\n const voice: { [id: string]: unknown } = {};\n if (typeof opts.voice === 'string') {\n voice.mode = 'id';\n voice.id = opts.voice;\n } else {\n voice.mode = 'embedding';\n voice.embedding = opts.voice;\n }\n\n const voiceControls: { [id: string]: unknown } = {};\n if (opts.speed) {\n voiceControls.speed = opts.speed;\n }\n if (opts.emotion) {\n voiceControls.emotion = opts.emotion;\n }\n\n if (Object.keys(voiceControls).length) {\n voice.__experimental_controls = voiceControls;\n }\n\n return {\n model_id: opts.model,\n voice,\n output_format: {\n container: 'raw',\n encoding: opts.encoding,\n sample_rate: opts.sampleRate,\n },\n language: opts.language,\n };\n};\n"],"mappings":"AAGA,SAAS,iBAAiB,KAAK,WAAW,UAAU,WAAW;AAE/D,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AACxC;AAAA,EACE;AAAA,OAKK;AAEP,MAAM,uBAAuB;AAC7B,MAAM,iBAAiB;AACvB,MAAM,UAAU;AAChB,MAAM,eAAe;AACrB,MAAM,uBAAuB;AAc7B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,SAAS;AACX;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAC1C,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,WAAW;AAAA,IACb,CAAC;AAED,SAAK,QAAQ;AAAA,MACX,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,MAAM,WAAW,QAAW;AACnC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,YAAM,SAAS,IAAI;AACnB,aAAO;AAAA,QACL,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,iBAAiB,MAAM,KAAK,KAAK;AAAA,EAC9C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR;AAAA,EACA;AAAA;AAAA,EAGA,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,QAAQ;AACb,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AACvE,UAAM,OAAO,kBAAkB,KAAK,KAAK;AACzC,SAAK,aAAa,KAAK;AAEvB,UAAM,UAAU,IAAI,IAAI,KAAK,MAAM,OAAO;AAC1C,UAAM,MAAM;AAAA,MACV;AAAA,QACE,UAAU,QAAQ;AAAA,QAClB,MAAM,SAAS,QAAQ,IAAI,MAAM,QAAQ,aAAa,WAAW,MAAM;AAAA,QACvE,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,CAAC,oBAAoB,GAAG,KAAK,MAAM;AAAA,UACnC,CAAC,cAAc,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,MACA,CAAC,QAAQ;AACP,YAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,qBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AACD,YAAI,GAAG,SAAS,MAAM;AACpB,qBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,iBAAK,MAAM,IAAI;AAAA,cACb;AAAA,cACA;AAAA,cACA,OAAO;AAAA,cACP,WAAW;AAAA,YACb,CAAC;AAAA,UACH;AACA,eAAK,MAAM,MAAM;AAAA,QACnB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,QAAI,IAAI;AAAA,EACV;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACzD;AAAA,EACA,UAAU,IAAI;AAAA,EACd,aAAa,IAAI,SAAS,MAAM,kBAAkB;AAAA,IAChD,mBAAmB;AAAA,EACrB,CAAC,EAAE,OAAO;AAAA,EACV,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,SAAK,KAAK,MAAM,SAAS,KAAK,MAAM,YAAY,KAAK,MAAM,UAAU,sBAAsB;AACzF,WAAK,QAAQ;AAAA,QACX,EAAE,OAAO,KAAK,MAAM,OAAO,OAAO,KAAK,MAAM,OAAO,SAAS,KAAK,MAAM,QAAQ;AAAA,QAChF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,QAAI,UAAU;AAEd,UAAM,qBAAqB,OAAOC,QAAkB;AAClD,YAAM,SAAS,kBAAkB,KAAK,KAAK;AAC3C,uBAAiB,SAAS,KAAK,YAAY;AACzC,QAAAA,IAAG;AAAA,UACD,KAAK,UAAU;AAAA,YACb,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,YAAY,MAAM,QAAQ;AAAA,YAC1B,UAAU;AAAA,UACZ,CAAC;AAAA,QACH;AAAA,MACF;AAEA,MAAAA,IAAG;AAAA,QACD,KAAK,UAAU;AAAA,UACb,GAAG;AAAA,UACH,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,WAAW,MAAM;AACtB;AAAA,QACF;AACA,aAAK,WAAW,SAAS,IAAI;AAAA,MAC/B;AACA,WAAK,WAAW,SAAS;AACzB,WAAK,WAAW,MAAM;AAAA,IACxB;AAEA,UAAM,WAAW,OAAOA,QAAkB;AACxC,UAAI,gBAAgB;AACpB,UAAI,aAAa;AACjB,YAAM,UAAU,IAAI,gBAAgB,KAAK,MAAM,YAAY,YAAY;AAEvE,UAAI;AACJ,YAAM,gBAAgB,CAAC,WAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,aAAO,CAAC,KAAK,UAAU,CAAC,KAAK,gBAAgB,OAAO,WAAW,CAAC,YAAY;AAC1E,YAAI;AACF,gBAAM,IAAI,QAAwB,CAAC,SAAS,WAAW;AACrD,YAAAA,IAAG,mBAAmB;AACtB,YAAAA,IAAG,GAAG,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AACxC,YAAAA,IAAG,GAAG,SAAS,CAAC,MAAM,WAAW;AAC/B,kBAAI,CAAC,SAAS;AACZ,qBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAAA,cACpE;AACA,kBAAI,CAAC,eAAe;AAClB,uBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,cACtC,OAAO;AAEL,wBAAQ,IAAI;AAAA,cACd;AAAA,YACF,CAAC;AAAA,UACH,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,gBAAI,CAAC,IAAK;AAEV,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,kBAAM,YAAY,KAAK;AACvB,gBAAI,UAAU,MAAM;AAClB,oBAAM,OAAO,IAAI,UAAU,OAAO,KAAK,KAAK,MAAM,QAAQ,CAAC;AAC3D,yBAAW,SAAS,QAAQ,MAAM,IAAI,GAAG;AACvC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AAAA,YACF,WAAW,UAAU,MAAM;AACzB,8BAAgB;AAChB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAC7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AAEA,kBAAI,cAAc,WAAW;AAC3B,0BAAU;AACV,6BAAa;AACb,qBAAK,QAAQ,KAAK,qCAAqC;AACvD,gBAAAA,IAAG,MAAM;AAAA,cACX;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,KAAK;AAEZ,cAAI,eAAe,SAAS,CAAC,IAAI,QAAQ,SAAS,kBAAkB,GAAG;AACrE,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,2CAA2C;AAAA,UACzE;AACA;AAAA,QACF;AAAA,MACF;AAEA,WAAK,QAAQ,KAAK,2BAA2B;AAAA,IAC/C;AAEA,UAAM,QAAQ,KAAK,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,GAAG,KAAK,0BAA0B,KAAK,MAAM,MAAM,qBAAqB,OAAO;AAC3F,UAAM,KAAK,IAAI,UAAU,GAAG;AAE5B,QAAI;AACF,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,WAAG,GAAG,QAAQ,OAAO;AACrB,WAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,WAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,MAC/D,CAAC;AAED,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,mBAAmB,EAAE,GAAG,SAAS,EAAE,CAAC,CAAC;AACrE,WAAK,QAAQ,KAAK,wBAAwB;AAAA,IAC5C,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,kCAAkC,CAAC,EAAE;AAAA,IACvD;AAAA,EACF;AACF;AAEA,MAAM,oBAAoB,CAAC,SAAgD;AACzE,QAAM,QAAmC,CAAC;AAC1C,MAAI,OAAO,KAAK,UAAU,UAAU;AAClC,UAAM,OAAO;AACb,UAAM,KAAK,KAAK;AAAA,EAClB,OAAO;AACL,UAAM,OAAO;AACb,UAAM,YAAY,KAAK;AAAA,EACzB;AAEA,QAAM,gBAA2C,CAAC;AAClD,MAAI,KAAK,OAAO;AACd,kBAAc,QAAQ,KAAK;AAAA,EAC7B;AACA,MAAI,KAAK,SAAS;AAChB,kBAAc,UAAU,KAAK;AAAA,EAC/B;AAEA,MAAI,OAAO,KAAK,aAAa,EAAE,QAAQ;AACrC,UAAM,0BAA0B;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,UAAU,KAAK;AAAA,IACf;AAAA,IACA,eAAe;AAAA,MACb,WAAW;AAAA,MACX,UAAU,KAAK;AAAA,MACf,aAAa,KAAK;AAAA,IACpB;AAAA,IACA,UAAU,KAAK;AAAA,EACjB;AACF;","names":["tts","ws"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-cartesia",
3
- "version": "0.1.4",
3
+ "version": "1.0.0-next.0",
4
4
  "description": "Cartesia plugin for LiveKit Node Agents",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -25,10 +25,10 @@
25
25
  "README.md"
26
26
  ],
27
27
  "devDependencies": {
28
- "@livekit/agents": "^x",
29
- "@livekit/agents-plugin-openai": "^x",
30
- "@livekit/agents-plugins-test": "^x",
31
- "@livekit/rtc-node": "^0.13.11",
28
+ "@livekit/agents": "^1.0.0-next.0",
29
+ "@livekit/agents-plugin-openai": "^1.0.0-next.0",
30
+ "@livekit/agents-plugins-test": "^1.0.0-next.0",
31
+ "@livekit/rtc-node": "^0.13.12",
32
32
  "@microsoft/api-extractor": "^7.35.0",
33
33
  "@types/ws": "^8.5.10",
34
34
  "tsup": "^8.3.5",
@@ -38,8 +38,8 @@
38
38
  "ws": "^8.16.0"
39
39
  },
40
40
  "peerDependencies": {
41
- "@livekit/rtc-node": "^0.13.11",
42
- "@livekit/agents": "^0.7.7x"
41
+ "@livekit/rtc-node": "^0.13.12",
42
+ "@livekit/agents": "^1.0.0-next.01.0.0-next.0"
43
43
  },
44
44
  "scripts": {
45
45
  "build": "tsup --onSuccess \"pnpm build:types\"",
package/src/index.ts CHANGED
@@ -1,5 +1,18 @@
1
- // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
+ import { Plugin } from '@livekit/agents';
4
5
 
5
6
  export * from './tts.js';
7
+
8
+ class CartesiaPlugin extends Plugin {
9
+ constructor() {
10
+ super({
11
+ title: 'cartesia',
12
+ version: '0.1.3',
13
+ package: '@livekit/agents-plugin-cartesia',
14
+ });
15
+ }
16
+ }
17
+
18
+ Plugin.registerPlugin(new CartesiaPlugin());
package/src/models.ts CHANGED
@@ -2,11 +2,11 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
 
5
- export type TTSModels = 'sonic-english' | 'sonic-multilingual';
5
+ export type TTSModels = 'sonic' | 'sonic-2' | 'sonic-lite' | 'sonic-preview' | 'sonic-turbo';
6
6
 
7
7
  export type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja';
8
8
 
9
- export const TTSDefaultVoiceId = 'c2ac25f9-ecc4-4f56-9095-651354df60c0';
9
+ export const TTSDefaultVoiceId = '794f9389-aac1-45b6-b726-9d9369183238';
10
10
 
11
11
  export type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest';
12
12
 
package/src/tts.ts CHANGED
@@ -1,11 +1,10 @@
1
1
  // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- import { AudioByteStream, log, tokenize, tts } from '@livekit/agents';
4
+ import { AudioByteStream, log, shortuuid, tokenize, tts } from '@livekit/agents';
5
5
  import type { AudioFrame } from '@livekit/rtc-node';
6
- import { randomUUID } from 'node:crypto';
7
6
  import { request } from 'node:https';
8
- import { WebSocket } from 'ws';
7
+ import { type RawData, WebSocket } from 'ws';
9
8
  import {
10
9
  TTSDefaultVoiceId,
11
10
  type TTSEncoding,
@@ -29,15 +28,17 @@ export interface TTSOptions {
29
28
  emotion?: (TTSVoiceEmotion | string)[];
30
29
  apiKey?: string;
31
30
  language: string;
31
+ baseUrl: string;
32
32
  }
33
33
 
34
34
  const defaultTTSOptions: TTSOptions = {
35
- model: 'sonic-english',
35
+ model: 'sonic-2',
36
36
  encoding: 'pcm_s16le',
37
37
  sampleRate: 24000,
38
38
  voice: TTSDefaultVoiceId,
39
39
  apiKey: process.env.CARTESIA_API_KEY,
40
40
  language: 'en',
41
+ baseUrl: 'https://api.cartesia.ai',
41
42
  };
42
43
 
43
44
  export class TTS extends tts.TTS {
@@ -59,10 +60,26 @@ export class TTS extends tts.TTS {
59
60
  'Cartesia API key is required, whether as an argument or as $CARTESIA_API_KEY',
60
61
  );
61
62
  }
63
+
64
+ if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {
65
+ const logger = log();
66
+ logger.warn(
67
+ { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
68
+ "speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details",
69
+ );
70
+ }
62
71
  }
63
72
 
64
73
  updateOptions(opts: Partial<TTSOptions>) {
65
74
  this.#opts = { ...this.#opts, ...opts };
75
+
76
+ if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {
77
+ const logger = log();
78
+ logger.warn(
79
+ { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
80
+ "speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details",
81
+ );
82
+ }
66
83
  }
67
84
 
68
85
  synthesize(text: string): tts.ChunkedStream {
@@ -84,19 +101,19 @@ export class ChunkedStream extends tts.ChunkedStream {
84
101
  super(text, tts);
85
102
  this.#text = text;
86
103
  this.#opts = opts;
87
- this.#run();
88
104
  }
89
105
 
90
- async #run() {
91
- const requestId = randomUUID();
106
+ protected async run() {
107
+ const requestId = shortuuid();
92
108
  const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
93
109
  const json = toCartesiaOptions(this.#opts);
94
110
  json.transcript = this.#text;
95
111
 
112
+ const baseUrl = new URL(this.#opts.baseUrl);
96
113
  const req = request(
97
114
  {
98
- hostname: 'api.cartesia.ai',
99
- port: 443,
115
+ hostname: baseUrl.hostname,
116
+ port: parseInt(baseUrl.port) || (baseUrl.protocol === 'https:' ? 443 : 80),
100
117
  path: '/tts/bytes',
101
118
  method: 'POST',
102
119
  headers: {
@@ -137,21 +154,29 @@ export class ChunkedStream extends tts.ChunkedStream {
137
154
  export class SynthesizeStream extends tts.SynthesizeStream {
138
155
  #opts: TTSOptions;
139
156
  #logger = log();
140
- #tokenizer = new tokenize.basic.SentenceTokenizer(undefined, BUFFERED_WORDS_COUNT).stream();
157
+ #tokenizer = new tokenize.basic.SentenceTokenizer({
158
+ minSentenceLength: BUFFERED_WORDS_COUNT,
159
+ }).stream();
141
160
  label = 'cartesia.SynthesizeStream';
142
161
 
143
162
  constructor(tts: TTS, opts: TTSOptions) {
144
163
  super(tts);
145
164
  this.#opts = opts;
146
- this.#run();
147
165
  }
148
166
 
149
167
  updateOptions(opts: Partial<TTSOptions>) {
150
168
  this.#opts = { ...this.#opts, ...opts };
169
+
170
+ if ((this.#opts.speed || this.#opts.emotion) && this.#opts.model !== 'sonic-2-2025-03-07') {
171
+ this.#logger.warn(
172
+ { model: this.#opts.model, speed: this.#opts.speed, emotion: this.#opts.emotion },
173
+ "speed and emotion controls are only supported for model 'sonic-2-2025-03-07', see https://docs.cartesia.ai/developer-tools/changelog for details",
174
+ );
175
+ }
151
176
  }
152
177
 
153
- async #run() {
154
- const requestId = randomUUID();
178
+ protected async run() {
179
+ const requestId = shortuuid();
155
180
  let closing = false;
156
181
 
157
182
  const sentenceStreamTask = async (ws: WebSocket) => {
@@ -190,49 +215,78 @@ export class SynthesizeStream extends tts.SynthesizeStream {
190
215
  };
191
216
 
192
217
  const recvTask = async (ws: WebSocket) => {
218
+ let finalReceived = false;
219
+ let shouldExit = false;
193
220
  const bstream = new AudioByteStream(this.#opts.sampleRate, NUM_CHANNELS);
194
221
 
195
222
  let lastFrame: AudioFrame | undefined;
196
223
  const sendLastFrame = (segmentId: string, final: boolean) => {
197
- if (lastFrame) {
224
+ if (lastFrame && !this.queue.closed) {
198
225
  this.queue.put({ requestId, segmentId, frame: lastFrame, final });
199
226
  lastFrame = undefined;
200
227
  }
201
228
  };
202
229
 
203
- ws.on('message', (data) => {
204
- const json = JSON.parse(data.toString());
205
- const segmentId = json.context_id;
206
- if ('data' in json) {
207
- const data = new Int8Array(Buffer.from(json.data, 'base64'));
208
- for (const frame of bstream.write(data)) {
209
- sendLastFrame(segmentId, false);
210
- lastFrame = frame;
211
- }
212
- } else if ('done' in json) {
213
- for (const frame of bstream.flush()) {
214
- sendLastFrame(segmentId, false);
215
- lastFrame = frame;
216
- }
217
- sendLastFrame(segmentId, true);
218
- this.queue.put(SynthesizeStream.END_OF_STREAM);
219
-
220
- if (segmentId === requestId) {
221
- closing = true;
222
- ws.close();
223
- return;
230
+ while (!this.closed && !this.abortController.signal.aborted && !shouldExit) {
231
+ try {
232
+ await new Promise<RawData | null>((resolve, reject) => {
233
+ ws.removeAllListeners();
234
+ ws.on('message', (data) => resolve(data));
235
+ ws.on('close', (code, reason) => {
236
+ if (!closing) {
237
+ this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
238
+ }
239
+ if (!finalReceived) {
240
+ reject(new Error('WebSocket closed'));
241
+ } else {
242
+ // If we've received the final message, resolve with empty to exit gracefully
243
+ resolve(null);
244
+ }
245
+ });
246
+ }).then((msg) => {
247
+ if (!msg) return;
248
+
249
+ const json = JSON.parse(msg.toString());
250
+ const segmentId = json.context_id;
251
+ if ('data' in json) {
252
+ const data = new Int8Array(Buffer.from(json.data, 'base64'));
253
+ for (const frame of bstream.write(data)) {
254
+ sendLastFrame(segmentId, false);
255
+ lastFrame = frame;
256
+ }
257
+ } else if ('done' in json) {
258
+ finalReceived = true;
259
+ for (const frame of bstream.flush()) {
260
+ sendLastFrame(segmentId, false);
261
+ lastFrame = frame;
262
+ }
263
+ sendLastFrame(segmentId, true);
264
+ if (!this.queue.closed) {
265
+ this.queue.put(SynthesizeStream.END_OF_STREAM);
266
+ }
267
+
268
+ if (segmentId === requestId) {
269
+ closing = true;
270
+ shouldExit = true;
271
+ this.#logger.info('Cartesia WebSocket close event sent');
272
+ ws.close();
273
+ }
274
+ }
275
+ });
276
+ } catch (err) {
277
+ // skip log error for normal websocket close
278
+ if (err instanceof Error && !err.message.includes('WebSocket closed')) {
279
+ this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');
224
280
  }
281
+ break;
225
282
  }
226
- });
227
- ws.on('close', (code, reason) => {
228
- if (!closing) {
229
- this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
230
- }
231
- ws.removeAllListeners();
232
- });
283
+ }
284
+
285
+ this.#logger.info('Cartesia WebSocket closed');
233
286
  };
234
287
 
235
- const url = `wss://api.cartesia.ai/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;
288
+ const wsUrl = this.#opts.baseUrl.replace(/^http/, 'ws');
289
+ const url = `${wsUrl}/tts/websocket?api_key=${this.#opts.apiKey}&cartesia_version=${VERSION}`;
236
290
  const ws = new WebSocket(url);
237
291
 
238
292
  try {
@@ -243,6 +297,7 @@ export class SynthesizeStream extends tts.SynthesizeStream {
243
297
  });
244
298
 
245
299
  await Promise.all([inputTask(), sentenceStreamTask(ws), recvTask(ws)]);
300
+ this.#logger.info('Cartesia run completed');
246
301
  } catch (e) {
247
302
  throw new Error(`failed to connect to Cartesia: ${e}`);
248
303
  }
@@ -267,7 +322,7 @@ const toCartesiaOptions = (opts: TTSOptions): { [id: string]: unknown } => {
267
322
  voiceControls.emotion = opts.emotion;
268
323
  }
269
324
 
270
- if (Object.keys({}).length) {
325
+ if (Object.keys(voiceControls).length) {
271
326
  voice.__experimental_controls = voiceControls;
272
327
  }
273
328