@cartesia/cartesia-js 0.0.4-alpha.0 → 1.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/.turbo/turbo-build.log +64 -46
  2. package/CHANGELOG.md +6 -0
  3. package/README.md +123 -16
  4. package/dist/{chunk-XPIMIAAE.js → chunk-3FL2SNIR.js} +1 -1
  5. package/dist/chunk-3GBZUGUD.js +17 -0
  6. package/dist/chunk-4RMSIQLG.js +25 -0
  7. package/dist/chunk-BCQ63627.js +32 -0
  8. package/dist/chunk-JOHSCOLW.js +106 -0
  9. package/dist/chunk-LYPTISWL.js +75 -0
  10. package/dist/chunk-NDNN326Q.js +207 -0
  11. package/dist/chunk-WBK6LLXX.js +58 -0
  12. package/dist/chunk-WE63M7PJ.js +119 -0
  13. package/dist/{chunk-R4P7LWVZ.js → chunk-WIFMLPT5.js} +31 -6
  14. package/dist/chunk-X7SJMF2R.js +22 -0
  15. package/dist/index.cjs +391 -158
  16. package/dist/index.d.cts +7 -3
  17. package/dist/index.d.ts +7 -3
  18. package/dist/index.js +13 -6
  19. package/dist/lib/client.cjs +46 -0
  20. package/dist/lib/client.d.cts +2 -0
  21. package/dist/lib/client.d.ts +2 -0
  22. package/dist/lib/client.js +3 -3
  23. package/dist/lib/constants.cjs +11 -7
  24. package/dist/lib/constants.d.cts +2 -3
  25. package/dist/lib/constants.d.ts +2 -3
  26. package/dist/lib/constants.js +4 -6
  27. package/dist/lib/index.cjs +276 -163
  28. package/dist/lib/index.d.cts +6 -2
  29. package/dist/lib/index.d.ts +6 -2
  30. package/dist/lib/index.js +9 -6
  31. package/dist/react/index.cjs +524 -275
  32. package/dist/react/index.d.cts +20 -14
  33. package/dist/react/index.d.ts +20 -14
  34. package/dist/react/index.js +142 -98
  35. package/dist/react/utils.js +2 -2
  36. package/dist/tts/index.cjs +470 -0
  37. package/dist/tts/index.d.cts +17 -0
  38. package/dist/tts/index.d.ts +17 -0
  39. package/dist/tts/index.js +12 -0
  40. package/dist/tts/player.cjs +198 -0
  41. package/dist/tts/player.d.cts +43 -0
  42. package/dist/tts/player.d.ts +43 -0
  43. package/dist/tts/player.js +8 -0
  44. package/dist/tts/source.cjs +167 -0
  45. package/dist/tts/source.d.cts +53 -0
  46. package/dist/tts/source.d.ts +53 -0
  47. package/dist/tts/source.js +7 -0
  48. package/dist/{audio → tts}/utils.cjs +12 -53
  49. package/dist/tts/utils.d.cts +67 -0
  50. package/dist/tts/utils.d.ts +67 -0
  51. package/dist/{audio → tts}/utils.js +2 -7
  52. package/dist/{audio/index.cjs → tts/websocket.cjs} +213 -164
  53. package/dist/tts/websocket.d.cts +53 -0
  54. package/dist/tts/websocket.d.ts +53 -0
  55. package/dist/tts/websocket.js +11 -0
  56. package/dist/types/index.d.cts +50 -1
  57. package/dist/types/index.d.ts +50 -1
  58. package/dist/voices/index.cjs +155 -0
  59. package/dist/voices/index.d.cts +12 -0
  60. package/dist/voices/index.d.ts +12 -0
  61. package/dist/voices/index.js +9 -0
  62. package/package.json +2 -1
  63. package/src/index.ts +1 -0
  64. package/src/lib/client.ts +14 -1
  65. package/src/lib/constants.ts +13 -3
  66. package/src/lib/index.ts +6 -3
  67. package/src/react/index.ts +157 -103
  68. package/src/tts/index.ts +17 -0
  69. package/src/tts/player.ts +109 -0
  70. package/src/tts/source.ts +98 -0
  71. package/src/{audio → tts}/utils.ts +19 -97
  72. package/src/tts/websocket.ts +210 -0
  73. package/src/types/index.ts +63 -0
  74. package/src/voices/index.ts +47 -0
  75. package/dist/audio/index.d.cts +0 -5
  76. package/dist/audio/index.d.ts +0 -5
  77. package/dist/audio/index.js +0 -10
  78. package/dist/audio/utils.d.cts +0 -5
  79. package/dist/audio/utils.d.ts +0 -5
  80. package/dist/chunk-4MHF74A7.js +0 -272
  81. package/dist/chunk-5TSWLYOW.js +0 -113
  82. package/dist/chunk-MJIFZWHS.js +0 -18
  83. package/dist/chunk-OVI3W3GG.js +0 -12
  84. package/dist/chunk-S6A27RQL.js +0 -18
  85. package/dist/index-C2_3XFxn.d.cts +0 -163
  86. package/dist/index-DgwnZezj.d.ts +0 -163
  87. package/src/audio/index.ts +0 -297
@@ -1,15 +1,14 @@
1
1
  "use strict";
2
2
  var __create = Object.create;
3
3
  var __defProp = Object.defineProperty;
4
+ var __defProps = Object.defineProperties;
4
5
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
6
+ var __getOwnPropDescs = Object.getOwnPropertyDescriptors;
5
7
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
8
  var __getOwnPropSymbols = Object.getOwnPropertySymbols;
7
9
  var __getProtoOf = Object.getPrototypeOf;
8
10
  var __hasOwnProp = Object.prototype.hasOwnProperty;
9
11
  var __propIsEnum = Object.prototype.propertyIsEnumerable;
10
- var __knownSymbol = (name, symbol) => {
11
- return (symbol = Symbol[name]) ? symbol : Symbol.for("Symbol." + name);
12
- };
13
12
  var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
14
13
  var __spreadValues = (a, b) => {
15
14
  for (var prop in b || (b = {}))
@@ -22,6 +21,7 @@ var __spreadValues = (a, b) => {
22
21
  }
23
22
  return a;
24
23
  };
24
+ var __spreadProps = (a, b) => __defProps(a, __getOwnPropDescs(b));
25
25
  var __export = (target, all) => {
26
26
  for (var name in all)
27
27
  __defProp(target, name, { get: all[name], enumerable: true });
@@ -43,6 +43,28 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
43
43
  mod
44
44
  ));
45
45
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
46
+ var __accessCheck = (obj, member, msg) => {
47
+ if (!member.has(obj))
48
+ throw TypeError("Cannot " + msg);
49
+ };
50
+ var __privateGet = (obj, member, getter) => {
51
+ __accessCheck(obj, member, "read from private field");
52
+ return getter ? getter.call(obj) : member.get(obj);
53
+ };
54
+ var __privateAdd = (obj, member, value) => {
55
+ if (member.has(obj))
56
+ throw TypeError("Cannot add the same private member more than once");
57
+ member instanceof WeakSet ? member.add(obj) : member.set(obj, value);
58
+ };
59
+ var __privateSet = (obj, member, value, setter) => {
60
+ __accessCheck(obj, member, "write to private field");
61
+ setter ? setter.call(obj, value) : member.set(obj, value);
62
+ return value;
63
+ };
64
+ var __privateMethod = (obj, member, method) => {
65
+ __accessCheck(obj, member, "access private method");
66
+ return method;
67
+ };
46
68
  var __async = (__this, __arguments, generator) => {
47
69
  return new Promise((resolve, reject) => {
48
70
  var fulfilled = (value) => {
@@ -63,26 +85,29 @@ var __async = (__this, __arguments, generator) => {
63
85
  step((generator = generator.apply(__this, __arguments)).next());
64
86
  });
65
87
  };
66
- var __forAwait = (obj, it, method) => (it = obj[__knownSymbol("asyncIterator")]) ? it.call(obj) : (obj = obj[__knownSymbol("iterator")](), it = {}, method = (key, fn) => (fn = obj[key]) && (it[key] = (arg) => new Promise((yes, no, done) => (arg = fn.call(obj, arg), done = arg.done, Promise.resolve(arg.value).then((value) => yes({ value, done }), no)))), method("next"), method("return"), it);
67
88
 
68
89
  // src/react/index.ts
69
90
  var react_exports = {};
70
91
  __export(react_exports, {
71
- useAudio: () => useAudio
92
+ useTTS: () => useTTS
72
93
  });
73
94
  module.exports = __toCommonJS(react_exports);
74
95
  var import_react = require("react");
75
96
 
76
- // src/audio/index.ts
77
- var import_emittery = __toESM(require("emittery"), 1);
78
- var import_human_id = require("human-id");
79
- var import_partysocket = require("partysocket");
97
+ // src/lib/client.ts
98
+ var import_cross_fetch = __toESM(require("cross-fetch"), 1);
80
99
 
81
100
  // src/lib/constants.ts
82
101
  var BASE_URL = "https://api.cartesia.ai/v0";
83
- var SAMPLE_RATE = 44100;
84
- var constructWebsocketUrl = (baseUrl) => {
85
- return new URL(`${baseUrl.replace(/^http/, "ws")}/audio/websocket`);
102
+ var constructApiUrl = (baseUrl, path, protocol) => {
103
+ const normalizedPath = path.startsWith("/") ? path : `/${path}`;
104
+ if (!protocol) {
105
+ return new URL(`${baseUrl}${normalizedPath}`);
106
+ }
107
+ if (!["http", "ws"].includes(protocol)) {
108
+ throw new Error(`Invalid protocol: ${protocol}`);
109
+ }
110
+ return new URL(`${baseUrl.replace(/^http/, protocol)}${normalizedPath}`);
86
111
  };
87
112
 
88
113
  // src/lib/client.ts
@@ -94,14 +119,119 @@ var Client = class {
94
119
  this.apiKey = options.apiKey || process.env.CARTESIA_API_KEY;
95
120
  this.baseUrl = options.baseUrl || BASE_URL;
96
121
  }
122
+ fetch(path, options = {}) {
123
+ const url = constructApiUrl(this.baseUrl, path);
124
+ return (0, import_cross_fetch.default)(url.toString(), __spreadProps(__spreadValues({}, options), {
125
+ headers: __spreadValues({
126
+ "X-API-KEY": this.apiKey
127
+ }, options.headers)
128
+ }));
129
+ }
130
+ };
131
+
132
+ // src/tts/websocket.ts
133
+ var import_emittery2 = __toESM(require("emittery"), 1);
134
+ var import_human_id = require("human-id");
135
+ var import_partysocket = require("partysocket");
136
+
137
+ // src/tts/source.ts
138
+ var import_emittery = __toESM(require("emittery"), 1);
139
+ var _emitter, _buffer, _readIndex, _closed, _sampleRate;
140
+ var Source = class {
141
+ /**
142
+ * Create a new Source.
143
+ *
144
+ * @param options - Options for the Source.
145
+ * @param options.sampleRate - The sample rate of the audio.
146
+ */
147
+ constructor({ sampleRate }) {
148
+ __privateAdd(this, _emitter, new import_emittery.default());
149
+ __privateAdd(this, _buffer, new Float32Array());
150
+ __privateAdd(this, _readIndex, 0);
151
+ __privateAdd(this, _closed, false);
152
+ __privateAdd(this, _sampleRate, void 0);
153
+ this.on = __privateGet(this, _emitter).on.bind(__privateGet(this, _emitter));
154
+ this.once = __privateGet(this, _emitter).once.bind(__privateGet(this, _emitter));
155
+ this.events = __privateGet(this, _emitter).events.bind(__privateGet(this, _emitter));
156
+ this.off = __privateGet(this, _emitter).off.bind(__privateGet(this, _emitter));
157
+ __privateSet(this, _sampleRate, sampleRate);
158
+ }
159
+ get sampleRate() {
160
+ return __privateGet(this, _sampleRate);
161
+ }
162
+ /**
163
+ * Append audio to the buffer.
164
+ *
165
+ * @param src The audio to append.
166
+ */
167
+ enqueue(src) {
168
+ return __async(this, null, function* () {
169
+ __privateSet(this, _buffer, new Float32Array([...__privateGet(this, _buffer), ...src]));
170
+ yield __privateGet(this, _emitter).emit("enqueue");
171
+ });
172
+ }
173
+ /**
174
+ * Read audio from the buffer.
175
+ *
176
+ * @param dst The buffer to read the audio into.
177
+ * @returns The number of samples read. If the source is closed, this will be
178
+ * less than the length of the provided buffer.
179
+ */
180
+ read(dst) {
181
+ return __async(this, null, function* () {
182
+ const targetReadIndex = __privateGet(this, _readIndex) + dst.length;
183
+ while (!__privateGet(this, _closed) && targetReadIndex > __privateGet(this, _buffer).length) {
184
+ yield __privateGet(this, _emitter).emit("wait");
185
+ yield Promise.race([
186
+ __privateGet(this, _emitter).once("enqueue"),
187
+ __privateGet(this, _emitter).once("close")
188
+ ]);
189
+ yield __privateGet(this, _emitter).emit("read");
190
+ }
191
+ const read = Math.min(dst.length, __privateGet(this, _buffer).length - __privateGet(this, _readIndex));
192
+ dst.set(__privateGet(this, _buffer).slice(__privateGet(this, _readIndex), __privateGet(this, _readIndex) + read));
193
+ __privateSet(this, _readIndex, __privateGet(this, _readIndex) + read);
194
+ return read;
195
+ });
196
+ }
197
+ /**
198
+ * Get the number of samples in a given duration.
199
+ *
200
+ * @param durationSecs The duration in seconds.
201
+ * @returns The number of samples.
202
+ */
203
+ durationToSampleCount(durationSecs) {
204
+ return Math.trunc(durationSecs * __privateGet(this, _sampleRate));
205
+ }
206
+ get buffer() {
207
+ return __privateGet(this, _buffer);
208
+ }
209
+ get readIndex() {
210
+ return __privateGet(this, _readIndex);
211
+ }
212
+ /**
213
+ * Close the source. This signals that no more audio will be enqueued.
214
+ *
215
+ * This will emit a "close" event.
216
+ *
217
+ * @returns A promise that resolves when the source is closed.
218
+ */
219
+ close() {
220
+ return __async(this, null, function* () {
221
+ __privateSet(this, _closed, true);
222
+ yield __privateGet(this, _emitter).emit("close");
223
+ __privateGet(this, _emitter).clearListeners();
224
+ });
225
+ }
97
226
  };
227
+ _emitter = new WeakMap();
228
+ _buffer = new WeakMap();
229
+ _readIndex = new WeakMap();
230
+ _closed = new WeakMap();
231
+ _sampleRate = new WeakMap();
98
232
 
99
- // src/audio/utils.ts
233
+ // src/tts/utils.ts
100
234
  var import_base64_js = __toESM(require("base64-js"), 1);
101
- function getBufferDuration(b64) {
102
- const floats = base64ToArray(b64);
103
- return floats.length / SAMPLE_RATE;
104
- }
105
235
  function base64ToArray(b64) {
106
236
  return filterSentinel(b64).reduce((acc, b) => {
107
237
  const floats = new Float32Array(import_base64_js.default.toByteArray(b).buffer);
@@ -111,20 +241,24 @@ function base64ToArray(b64) {
111
241
  return newAcc;
112
242
  }, new Float32Array(0));
113
243
  }
114
- function playAudioBuffer(b64, context, maybeStartAt = null, onEnded = null) {
115
- const startAt = maybeStartAt != null ? maybeStartAt : context.currentTime;
116
- const floats = base64ToArray(b64);
244
+ function playAudioBuffer(floats, context, startAt, sampleRate) {
117
245
  const source = context.createBufferSource();
118
- const buffer = context.createBuffer(1, floats.length, SAMPLE_RATE);
246
+ const buffer = context.createBuffer(1, floats.length, sampleRate);
119
247
  buffer.getChannelData(0).set(floats);
120
248
  source.buffer = buffer;
121
249
  source.connect(context.destination);
122
250
  source.start(startAt);
123
- source.onended = onEnded;
124
- return buffer.duration;
251
+ return new Promise((resolve) => {
252
+ source.onended = () => {
253
+ resolve();
254
+ };
255
+ });
125
256
  }
126
257
  function createMessageHandlerForContextId(contextId, handler) {
127
258
  return (event) => {
259
+ if (typeof event.data !== "string") {
260
+ return;
261
+ }
128
262
  const message = JSON.parse(event.data);
129
263
  if (message.context_id !== contextId) {
130
264
  return;
@@ -135,7 +269,7 @@ function createMessageHandlerForContextId(contextId, handler) {
135
269
  } else {
136
270
  chunk = message.data;
137
271
  }
138
- handler({ chunk, message });
272
+ handler({ chunk, message: event.data });
139
273
  };
140
274
  }
141
275
  function getSentinel() {
@@ -149,9 +283,6 @@ function filterSentinel(collection) {
149
283
  (x) => !isSentinel(x)
150
284
  );
151
285
  }
152
- function isComplete(chunks) {
153
- return isSentinel(chunks[chunks.length - 1]);
154
- }
155
286
  function getEmitteryCallbacks(emitter) {
156
287
  return {
157
288
  on: emitter.on.bind(emitter),
@@ -160,91 +291,76 @@ function getEmitteryCallbacks(emitter) {
160
291
  events: emitter.events.bind(emitter)
161
292
  };
162
293
  }
163
- function bufferToWav(sampleRate, channelBuffers) {
164
- const totalSamples = channelBuffers[0].length * channelBuffers.length;
165
- const buffer = new ArrayBuffer(44 + totalSamples * 2);
166
- const view = new DataView(buffer);
167
- const writeString = (view2, offset2, string) => {
168
- for (let i = 0; i < string.length; i++) {
169
- view2.setUint8(offset2 + i, string.charCodeAt(i));
170
- }
171
- };
172
- writeString(view, 0, "RIFF");
173
- view.setUint32(4, 36 + totalSamples * 2, true);
174
- writeString(view, 8, "WAVE");
175
- writeString(view, 12, "fmt ");
176
- view.setUint32(16, 16, true);
177
- view.setUint16(20, 1, true);
178
- view.setUint16(22, channelBuffers.length, true);
179
- view.setUint32(24, sampleRate, true);
180
- view.setUint32(28, sampleRate * 4, true);
181
- view.setUint16(32, channelBuffers.length * 2, true);
182
- view.setUint16(34, 16, true);
183
- writeString(view, 36, "data");
184
- view.setUint32(40, totalSamples * 2, true);
185
- let offset = 44;
186
- for (let i = 0; i < channelBuffers[0].length; i++) {
187
- for (let channel = 0; channel < channelBuffers.length; channel++) {
188
- const s = Math.max(-1, Math.min(1, channelBuffers[channel][i]));
189
- view.setInt16(offset, s < 0 ? s * 32768 : s * 32767, true);
190
- offset += 2;
191
- }
192
- }
193
- return buffer;
194
- }
195
294
 
196
- // src/audio/index.ts
197
- var audio_default = class extends Client {
198
- constructor() {
199
- super(...arguments);
200
- this.isConnected = false;
295
+ // src/tts/websocket.ts
296
+ var _isConnected, _sampleRate2, _generateId, generateId_fn;
297
+ var WebSocket = class extends Client {
298
+ /**
299
+ * Create a new WebSocket client.
300
+ *
301
+ * @param args - Arguments to pass to the Client constructor.
302
+ */
303
+ constructor({ sampleRate }, ...args) {
304
+ super(...args);
305
+ /**
306
+ * Generate a unique ID suitable for a streaming context.
307
+ *
308
+ * Not suitable for security purposes or as a primary key, since
309
+ * it lacks the amount of entropy required for those use cases.
310
+ *
311
+ * @returns A unique ID.
312
+ */
313
+ __privateAdd(this, _generateId);
314
+ __privateAdd(this, _isConnected, false);
315
+ __privateAdd(this, _sampleRate2, void 0);
316
+ __privateSet(this, _sampleRate2, sampleRate);
201
317
  }
202
318
  /**
203
- * Stream audio from a model.
319
+ * Send a message over the WebSocket in order to start a stream.
204
320
  *
205
- * @param inputs - Stream options. Includes a `model` key and some `parameters`, which
206
- * are model-specific and can be found in the model's documentation.
321
+ * @param inputs - Stream options.
207
322
  * @param options - Options for the stream.
208
323
  * @param options.timeout - The maximum time to wait for a chunk before cancelling the stream.
209
324
  * If `0`, the stream will not time out.
210
- * @returns An object with a method `play` of type `(bufferDuration: number) => Promise<void>`
211
- * that plays the audio as it arrives, with `bufferDuration` seconds of audio buffered before
212
- * starting playback.
325
+ * @returns A Source object that can be passed to a Player to play the audio.
213
326
  */
214
- stream(inputs, { timeout = 0 } = {}) {
327
+ send(inputs, { timeout = 0 } = {}) {
215
328
  var _a, _b, _c, _d;
216
- if (!this.isConnected) {
329
+ if (!__privateGet(this, _isConnected)) {
217
330
  throw new Error("Not connected to WebSocket. Call .connect() first.");
218
331
  }
219
- const contextId = this.generateId();
332
+ const contextId = __privateMethod(this, _generateId, generateId_fn).call(this);
220
333
  (_a = this.socket) == null ? void 0 : _a.send(
221
- JSON.stringify({
222
- data: inputs,
334
+ JSON.stringify(__spreadProps(__spreadValues({
223
335
  context_id: contextId
224
- })
336
+ }, inputs), {
337
+ output_format: {
338
+ container: "raw",
339
+ encoding: "pcm_f32le",
340
+ sample_rate: __privateGet(this, _sampleRate2)
341
+ }
342
+ }))
225
343
  );
344
+ const emitter = new import_emittery2.default();
345
+ const source = new Source({
346
+ sampleRate: __privateGet(this, _sampleRate2)
347
+ });
226
348
  const streamCompleteController = new AbortController();
227
349
  let timeoutId = null;
228
350
  if (timeout > 0) {
229
351
  timeoutId = setTimeout(streamCompleteController.abort, timeout);
230
352
  }
231
- const chunks = [];
232
- const emitter = new import_emittery.default();
233
353
  const handleMessage = createMessageHandlerForContextId(
234
354
  contextId,
235
355
  (_0) => __async(this, [_0], function* ({ chunk, message }) {
236
- chunks.push(chunk);
237
- yield emitter.emit("chunk", {
238
- chunk,
239
- chunks
240
- });
241
- yield emitter.emit("message", message);
356
+ emitter.emit("message", message);
242
357
  if (isSentinel(chunk)) {
243
- yield emitter.emit("streamed", {
244
- chunks
245
- });
358
+ yield source.close();
246
359
  streamCompleteController.abort();
247
- } else if (timeoutId) {
360
+ return;
361
+ }
362
+ yield source.enqueue(base64ToArray([chunk]));
363
+ if (timeoutId) {
248
364
  clearTimeout(timeoutId);
249
365
  timeoutId = setTimeout(streamCompleteController.abort, timeout);
250
366
  }
@@ -272,107 +388,12 @@ var audio_default = class extends Client {
272
388
  }
273
389
  );
274
390
  streamCompleteController.signal.addEventListener("abort", () => {
391
+ source.close();
275
392
  if (timeoutId) {
276
393
  clearTimeout(timeoutId);
277
394
  }
278
- emitter.clearListeners();
279
- });
280
- const play = (_0) => __async(this, [_0], function* ({ bufferDuration }) {
281
- const context = new AudioContext({
282
- sampleRate: SAMPLE_RATE
283
- });
284
- let startNextPlaybackAt = 0;
285
- const playLatestChunk = (chunk) => {
286
- if (isSentinel(chunk)) {
287
- return true;
288
- }
289
- startNextPlaybackAt = playAudioBuffer([chunk], context, startNextPlaybackAt) + Math.max(context.currentTime, startNextPlaybackAt);
290
- return false;
291
- };
292
- const playChunks = (chunks2) => {
293
- startNextPlaybackAt += playAudioBuffer(
294
- chunks2,
295
- context,
296
- startNextPlaybackAt
297
- );
298
- if (isComplete(chunks2)) {
299
- return;
300
- }
301
- };
302
- const tryStart = (chunks2) => __async(this, null, function* () {
303
- startNextPlaybackAt = context.currentTime;
304
- if (isComplete(chunks2) || streamCompleteController.signal.aborted) {
305
- emitter.emit("buffered");
306
- playChunks(chunks2);
307
- return true;
308
- }
309
- if (getBufferDuration(chunks2) > bufferDuration) {
310
- emitter.emit("buffered");
311
- playChunks(chunks2);
312
- try {
313
- for (var iter2 = __forAwait(emitter.events("chunk")), more2, temp2, error2; more2 = !(temp2 = yield iter2.next()).done; more2 = false) {
314
- const { chunk } = temp2.value;
315
- if (playLatestChunk(chunk)) {
316
- break;
317
- }
318
- }
319
- } catch (temp2) {
320
- error2 = [temp2];
321
- } finally {
322
- try {
323
- more2 && (temp2 = iter2.return) && (yield temp2.call(iter2));
324
- } finally {
325
- if (error2)
326
- throw error2[0];
327
- }
328
- }
329
- return true;
330
- }
331
- emitter.emit("buffering");
332
- return false;
333
- });
334
- if (!(yield tryStart(chunks))) {
335
- try {
336
- for (var iter = __forAwait(emitter.events("chunk")), more, temp, error; more = !(temp = yield iter.next()).done; more = false) {
337
- const { chunks: chunks2 } = temp.value;
338
- if (yield tryStart(chunks2)) {
339
- const playbackEndsIn = Math.max(0, startNextPlaybackAt - context.currentTime) * 1e3;
340
- emitter.emit("scheduled", { playbackEndsIn });
341
- break;
342
- }
343
- }
344
- } catch (temp) {
345
- error = [temp];
346
- } finally {
347
- try {
348
- more && (temp = iter.return) && (yield temp.call(iter));
349
- } finally {
350
- if (error)
351
- throw error[0];
352
- }
353
- }
354
- } else {
355
- const playbackEndsIn = Math.max(0, startNextPlaybackAt - context.currentTime) * 1e3;
356
- emitter.emit("scheduled", { playbackEndsIn });
357
- }
358
- });
359
- return __spreadValues({
360
- play
361
- }, getEmitteryCallbacks(emitter));
362
- }
363
- /**
364
- * Generate a unique ID suitable for a streaming context.
365
- *
366
- * Not suitable for security purposes or as a primary key, since
367
- * it lacks the amount of entropy required for those use cases.
368
- *
369
- * @returns A unique ID.
370
- */
371
- generateId() {
372
- return (0, import_human_id.humanId)({
373
- separator: "-",
374
- capitalize: false
375
395
  });
396
+ return __spreadValues({ source }, getEmitteryCallbacks(emitter));
376
397
  }
377
398
  /**
378
399
  * Authenticate and connect to a Cartesia streaming WebSocket.
@@ -381,16 +402,16 @@ var audio_default = class extends Client {
381
402
  * @throws {Error} If the WebSocket fails to connect.
382
403
  */
383
404
  connect() {
384
- const url = constructWebsocketUrl(this.baseUrl);
405
+ const url = constructApiUrl(this.baseUrl, "/tts/websocket", "ws");
385
406
  url.searchParams.set("api_key", this.apiKey);
386
- const emitter = new import_emittery.default();
407
+ const emitter = new import_emittery2.default();
387
408
  this.socket = new import_partysocket.WebSocket(url.toString());
388
409
  this.socket.onopen = () => {
389
- this.isConnected = true;
410
+ __privateSet(this, _isConnected, true);
390
411
  emitter.emit("open");
391
412
  };
392
413
  this.socket.onclose = () => {
393
- this.isConnected = false;
414
+ __privateSet(this, _isConnected, false);
394
415
  emitter.emit("close");
395
416
  };
396
417
  return new Promise(
@@ -437,6 +458,192 @@ var audio_default = class extends Client {
437
458
  (_a = this.socket) == null ? void 0 : _a.close();
438
459
  }
439
460
  };
461
+ _isConnected = new WeakMap();
462
+ _sampleRate2 = new WeakMap();
463
+ _generateId = new WeakSet();
464
+ generateId_fn = function() {
465
+ return (0, import_human_id.humanId)({
466
+ separator: "-",
467
+ capitalize: false
468
+ });
469
+ };
470
+
471
+ // src/tts/index.ts
472
+ var TTS = class extends Client {
473
+ /**
474
+ * Get a WebSocket client for streaming audio from the TTS API.
475
+ *
476
+ * @returns {WebSocket} A Cartesia WebSocket client.
477
+ */
478
+ websocket(options) {
479
+ return new WebSocket(options, {
480
+ apiKey: this.apiKey,
481
+ baseUrl: this.baseUrl
482
+ });
483
+ }
484
+ };
485
+
486
+ // src/voices/index.ts
487
+ var Voices = class extends Client {
488
+ list() {
489
+ return __async(this, null, function* () {
490
+ const response = yield this.fetch("/voices");
491
+ return response.json();
492
+ });
493
+ }
494
+ get(voiceId) {
495
+ return __async(this, null, function* () {
496
+ const response = yield this.fetch(`/voices/${voiceId}`);
497
+ return response.json();
498
+ });
499
+ }
500
+ create(voice) {
501
+ return __async(this, null, function* () {
502
+ const response = yield this.fetch("/voices", {
503
+ method: "POST",
504
+ body: JSON.stringify(voice)
505
+ });
506
+ return response.json();
507
+ });
508
+ }
509
+ clone(options) {
510
+ return __async(this, null, function* () {
511
+ if (options.mode === "url") {
512
+ const response = yield this.fetch(
513
+ `/voices/clone/url?link=${options.link}`,
514
+ {
515
+ method: "POST"
516
+ }
517
+ );
518
+ return response.json();
519
+ }
520
+ if (options.mode === "clip") {
521
+ const formData = new FormData();
522
+ formData.append("clip", options.clip);
523
+ const response = yield this.fetch("/voices/clone/clip", {
524
+ method: "POST",
525
+ body: formData
526
+ });
527
+ return response.json();
528
+ }
529
+ throw new Error("Invalid mode for clone()");
530
+ });
531
+ }
532
+ };
533
+
534
+ // src/lib/index.ts
535
+ var Cartesia = class extends Client {
536
+ constructor(options = {}) {
537
+ super(options);
538
+ this.tts = new TTS(options);
539
+ this.voices = new Voices(options);
540
+ }
541
+ };
542
+
543
+ // src/tts/player.ts
544
+ var import_emittery3 = __toESM(require("emittery"), 1);
545
+ var _context, _startNextPlaybackAt, _bufferDuration, _emitter2, _playBuffer, playBuffer_fn;
546
+ var Player = class {
547
+ /**
548
+ * Create a new Player.
549
+ *
550
+ * @param options - Options for the Player.
551
+ * @param options.bufferDuration - The duration of the audio buffer to play.
552
+ */
553
+ constructor({ bufferDuration }) {
554
+ __privateAdd(this, _playBuffer);
555
+ __privateAdd(this, _context, null);
556
+ __privateAdd(this, _startNextPlaybackAt, 0);
557
+ __privateAdd(this, _bufferDuration, void 0);
558
+ __privateAdd(this, _emitter2, new import_emittery3.default());
559
+ __privateSet(this, _bufferDuration, bufferDuration);
560
+ }
561
+ /**
562
+ * Play audio from a source.
563
+ *
564
+ * @param source The source to play audio from.
565
+ * @returns A promise that resolves when the audio has finished playing.
566
+ */
567
+ play(source) {
568
+ return __async(this, null, function* () {
569
+ __privateSet(this, _startNextPlaybackAt, 0);
570
+ __privateSet(this, _context, new AudioContext({ sampleRate: source.sampleRate }));
571
+ const buffer = new Float32Array(
572
+ source.durationToSampleCount(__privateGet(this, _bufferDuration))
573
+ );
574
+ const plays = [];
575
+ while (true) {
576
+ const read = yield source.read(buffer);
577
+ const playableAudio = buffer.slice(0, read);
578
+ plays.push(__privateMethod(this, _playBuffer, playBuffer_fn).call(this, playableAudio, source.sampleRate));
579
+ if (read < buffer.length) {
580
+ yield __privateGet(this, _emitter2).emit("finish");
581
+ break;
582
+ }
583
+ }
584
+ yield Promise.all(plays);
585
+ });
586
+ }
587
+ /**
588
+ * Pause the audio.
589
+ *
590
+ * @returns A promise that resolves when the audio has been paused.
591
+ */
592
+ pause() {
593
+ return __async(this, null, function* () {
594
+ if (!__privateGet(this, _context)) {
595
+ throw new Error("AudioContext not initialized.");
596
+ }
597
+ yield __privateGet(this, _context).suspend();
598
+ });
599
+ }
600
+ /**
601
+ * Resume the audio.
602
+ *
603
+ * @returns A promise that resolves when the audio has been resumed.
604
+ */
605
+ resume() {
606
+ return __async(this, null, function* () {
607
+ if (!__privateGet(this, _context)) {
608
+ throw new Error("AudioContext not initialized.");
609
+ }
610
+ yield __privateGet(this, _context).resume();
611
+ });
612
+ }
613
+ /**
614
+ * Toggle the audio.
615
+ *
616
+ * @returns A promise that resolves when the audio has been toggled.
617
+ */
618
+ toggle() {
619
+ return __async(this, null, function* () {
620
+ if (!__privateGet(this, _context)) {
621
+ throw new Error("AudioContext not initialized.");
622
+ }
623
+ if (__privateGet(this, _context).state === "running") {
624
+ yield this.pause();
625
+ } else {
626
+ yield this.resume();
627
+ }
628
+ });
629
+ }
630
+ };
631
+ _context = new WeakMap();
632
+ _startNextPlaybackAt = new WeakMap();
633
+ _bufferDuration = new WeakMap();
634
+ _emitter2 = new WeakMap();
635
+ _playBuffer = new WeakSet();
636
+ playBuffer_fn = function(buf, sampleRate) {
637
+ return __async(this, null, function* () {
638
+ if (!__privateGet(this, _context)) {
639
+ throw new Error("AudioContext not initialized.");
640
+ }
641
+ const startAt = __privateGet(this, _startNextPlaybackAt);
642
+ const duration = buf.length / sampleRate;
643
+ __privateSet(this, _startNextPlaybackAt, duration + Math.max(__privateGet(this, _context).currentTime, __privateGet(this, _startNextPlaybackAt)));
644
+ yield playAudioBuffer(buf, __privateGet(this, _context), startAt, sampleRate);
645
+ });
646
+ };
440
647
 
441
648
  // src/react/utils.ts
442
649
  function pingServer(url) {
@@ -449,78 +656,88 @@ function pingServer(url) {
449
656
  }
450
657
 
451
658
  // src/react/index.ts
452
- function useAudio({ apiKey, baseUrl }) {
659
+ var PING_INTERVAL = 5e3;
660
+ var DEFAULT_BUFFER_DURATION = 0.01;
661
+ function useTTS({
662
+ apiKey,
663
+ baseUrl,
664
+ sampleRate
665
+ }) {
666
+ var _a, _b;
453
667
  if (typeof window === "undefined") {
454
668
  return {
455
- stream: () => {
456
- },
669
+ buffer: () => __async(this, null, function* () {
670
+ }),
457
671
  play: () => __async(this, null, function* () {
458
672
  }),
459
- download: () => null,
673
+ pause: () => __async(this, null, function* () {
674
+ }),
675
+ resume: () => __async(this, null, function* () {
676
+ }),
677
+ toggle: () => __async(this, null, function* () {
678
+ }),
679
+ playbackStatus: "inactive",
680
+ bufferStatus: "inactive",
681
+ isWaiting: false,
682
+ source: null,
460
683
  isConnected: false,
461
- isPlaying: false,
462
- isStreamed: false,
463
- isBuffering: false,
464
- chunks: [],
465
- messages: []
684
+ metrics: {
685
+ modelLatency: null
686
+ }
466
687
  };
467
688
  }
468
- const audio = (0, import_react.useMemo)(() => {
689
+ const websocket = (0, import_react.useMemo)(() => {
469
690
  if (!apiKey) {
470
691
  return null;
471
692
  }
472
- const audio2 = new audio_default({ apiKey, baseUrl });
473
- return audio2;
474
- }, [apiKey, baseUrl]);
475
- const streamReturn = (0, import_react.useRef)(null);
476
- const [isStreamed, setIsStreamed] = (0, import_react.useState)(false);
477
- const [isPlaying, setIsPlaying] = (0, import_react.useState)(false);
478
- const [isBuffering, setIsBuffering] = (0, import_react.useState)(false);
693
+ const cartesia = new Cartesia({ apiKey, baseUrl });
694
+ baseUrl = baseUrl != null ? baseUrl : cartesia.baseUrl;
695
+ return cartesia.tts.websocket({ sampleRate });
696
+ }, [apiKey, baseUrl, sampleRate]);
697
+ const websocketReturn = (0, import_react.useRef)(null);
698
+ const player = (0, import_react.useRef)(null);
699
+ const [playbackStatus, setPlaybackStatus] = (0, import_react.useState)("inactive");
700
+ const [bufferStatus, setBufferStatus] = (0, import_react.useState)("inactive");
701
+ const [isWaiting, setIsWaiting] = (0, import_react.useState)(false);
479
702
  const [isConnected, setIsConnected] = (0, import_react.useState)(false);
480
- const [chunks, setChunks] = (0, import_react.useState)([]);
703
+ const [bufferDuration, setBufferDuration] = (0, import_react.useState)(null);
481
704
  const [messages, setMessages] = (0, import_react.useState)([]);
482
- const latencyEndpoint = "https://api.cartesia.ai";
483
- const stream = (0, import_react.useCallback)(
705
+ const buffer = (0, import_react.useCallback)(
484
706
  (options) => __async(this, null, function* () {
485
- var _a;
486
- setIsStreamed(false);
487
- streamReturn.current = (_a = audio == null ? void 0 : audio.stream(options)) != null ? _a : null;
488
- if (!streamReturn.current) {
707
+ var _a2;
708
+ setMessages([]);
709
+ setBufferStatus("buffering");
710
+ websocketReturn.current = (_a2 = websocket == null ? void 0 : websocket.send(options)) != null ? _a2 : null;
711
+ if (!websocketReturn.current) {
489
712
  return;
490
713
  }
491
- setMessages([]);
492
- streamReturn.current.on(
493
- "chunk",
494
- ({ chunks: chunks3 }) => {
495
- setChunks(chunks3);
496
- }
497
- );
498
- streamReturn.current.on(
499
- "message",
500
- (message) => {
501
- setMessages((messages2) => [...messages2, message]);
502
- }
503
- );
504
- const { chunks: chunks2 } = yield streamReturn.current.once("streamed");
505
- setChunks(chunks2);
506
- setIsStreamed(true);
714
+ websocketReturn.current.on("message", (message) => {
715
+ setMessages((messages2) => [...messages2, JSON.parse(message)]);
716
+ });
717
+ yield websocketReturn.current.source.once("close");
718
+ setBufferStatus("buffered");
507
719
  }),
508
- [audio]
720
+ [websocket]
509
721
  );
510
- const download = (0, import_react.useCallback)(() => {
511
- if (!isStreamed) {
512
- return null;
722
+ const metrics = (0, import_react.useMemo)(() => {
723
+ var _a2;
724
+ if (messages.length === 0) {
725
+ return {
726
+ modelLatency: null
727
+ };
513
728
  }
514
- const audio2 = bufferToWav(SAMPLE_RATE, [base64ToArray(chunks)]);
515
- return new Blob([audio2], { type: "audio/wav" });
516
- }, [isStreamed, chunks]);
729
+ const modelLatency = (_a2 = messages[0].step_time) != null ? _a2 : null;
730
+ return {
731
+ modelLatency: Math.trunc(modelLatency)
732
+ };
733
+ }, [messages]);
517
734
  (0, import_react.useEffect)(() => {
518
735
  let cleanup = () => {
519
736
  };
520
737
  function setupConnection() {
521
738
  return __async(this, null, function* () {
522
739
  try {
523
- const connection = yield audio == null ? void 0 : audio.connect();
740
+ const connection = yield websocket == null ? void 0 : websocket.connect();
524
741
  if (!connection) {
525
742
  return;
526
743
  }
@@ -531,9 +748,25 @@ function useAudio({ apiKey, baseUrl }) {
531
748
  const unsubscribe = connection.on("close", () => {
532
749
  setIsConnected(false);
533
750
  });
751
+ const intervalId = setInterval(() => {
752
+ if (baseUrl) {
753
+ pingServer(new URL(baseUrl).origin).then((ping) => {
754
+ let bufferDuration2;
755
+ if (ping < 300) {
756
+ bufferDuration2 = 0.01;
757
+ } else if (ping > 1500) {
758
+ bufferDuration2 = 6;
759
+ } else {
760
+ bufferDuration2 = ping / 1e3 * 4;
761
+ }
762
+ setBufferDuration(bufferDuration2);
763
+ });
764
+ }
765
+ }, PING_INTERVAL);
534
766
  return () => {
535
767
  unsubscribe();
536
- audio == null ? void 0 : audio.disconnect();
768
+ clearInterval(intervalId);
769
+ websocket == null ? void 0 : websocket.disconnect();
537
770
  };
538
771
  } catch (e) {
539
772
  console.error(e);
@@ -544,54 +777,70 @@ function useAudio({ apiKey, baseUrl }) {
544
777
  cleanup = cleanupConnection;
545
778
  });
546
779
  return () => cleanup == null ? void 0 : cleanup();
547
- }, [audio]);
780
+ }, [websocket, baseUrl]);
548
781
  const play = (0, import_react.useCallback)(() => __async(this, null, function* () {
549
- var _a;
550
- if (isPlaying || !streamReturn.current) {
782
+ if (playbackStatus === "playing" || !websocketReturn.current) {
551
783
  return;
552
784
  }
553
- setIsPlaying(true);
554
- const ping = yield pingServer(latencyEndpoint);
555
- let bufferingTimeout;
556
- let bufferDuration;
557
- if (ping < 300) {
558
- bufferDuration = 0;
559
- } else if (ping > 1500) {
560
- bufferDuration = 6;
561
- } else {
562
- bufferDuration = ping / 1e3 * 4;
563
- }
564
- streamReturn.current.once("buffering").then(() => {
565
- bufferingTimeout = setTimeout(() => {
566
- setIsBuffering(true);
567
- }, 250);
785
+ setPlaybackStatus("playing");
786
+ const unsubscribes = [];
787
+ unsubscribes.push(
788
+ websocketReturn.current.source.on("wait", () => {
789
+ setIsWaiting(true);
790
+ })
791
+ );
792
+ unsubscribes.push(
793
+ websocketReturn.current.source.on("read", () => {
794
+ setIsWaiting(false);
795
+ })
796
+ );
797
+ player.current = new Player({
798
+ bufferDuration: bufferDuration != null ? bufferDuration : DEFAULT_BUFFER_DURATION
568
799
  });
569
- streamReturn.current.once("buffered").then(() => {
570
- if (bufferingTimeout) {
571
- clearTimeout(bufferingTimeout);
800
+ yield player.current.play(websocketReturn.current.source);
801
+ for (const unsubscribe of unsubscribes) {
802
+ unsubscribe();
803
+ }
804
+ setPlaybackStatus("finished");
805
+ }), [playbackStatus, bufferDuration]);
806
+ const pause = (0, import_react.useCallback)(() => __async(this, null, function* () {
807
+ var _a2;
808
+ yield (_a2 = player.current) == null ? void 0 : _a2.pause();
809
+ setPlaybackStatus("paused");
810
+ }), []);
811
+ const resume = (0, import_react.useCallback)(() => __async(this, null, function* () {
812
+ var _a2;
813
+ yield (_a2 = player.current) == null ? void 0 : _a2.resume();
814
+ setPlaybackStatus("playing");
815
+ }), []);
816
+ const toggle = (0, import_react.useCallback)(() => __async(this, null, function* () {
817
+ var _a2;
818
+ yield (_a2 = player.current) == null ? void 0 : _a2.toggle();
819
+ setPlaybackStatus((status) => {
820
+ if (status === "playing") {
821
+ return "paused";
572
822
  }
573
- setIsBuffering(false);
574
- });
575
- streamReturn.current.once("scheduled").then((data) => {
576
- setTimeout(() => {
577
- setIsPlaying(false);
578
- }, data.playbackEndsIn);
823
+ if (status === "paused") {
824
+ return "playing";
825
+ }
826
+ return status;
579
827
  });
580
- yield (_a = streamReturn.current) == null ? void 0 : _a.play({ bufferDuration });
581
- }), [isPlaying]);
828
+ }), []);
582
829
  return {
583
- stream,
830
+ buffer,
584
831
  play,
585
- download,
586
- isPlaying,
832
+ pause,
833
+ source: (_b = (_a = websocketReturn.current) == null ? void 0 : _a.source) != null ? _b : null,
834
+ resume,
835
+ toggle,
836
+ playbackStatus,
837
+ bufferStatus,
838
+ isWaiting,
587
839
  isConnected,
588
- isStreamed,
589
- isBuffering,
590
- chunks,
591
- messages
840
+ metrics
592
841
  };
593
842
  }
594
843
  // Annotate the CommonJS export names for ESM import in node:
595
844
  0 && (module.exports = {
596
- useAudio
845
+ useTTS
597
846
  });