@cartesia/cartesia-js 1.0.0-alpha.4 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/.turbo/turbo-build.log +49 -49
  2. package/CHANGELOG.md +23 -0
  3. package/LICENSE.md +21 -0
  4. package/README.md +102 -21
  5. package/dist/{chunk-VK7LBMVI.js → chunk-2NA5SEML.js} +2 -2
  6. package/dist/{chunk-PQ5EVEEH.js → chunk-5M33ZF3Y.js} +1 -1
  7. package/dist/{chunk-PQ6CIPFW.js → chunk-6YQ6KDIQ.js} +44 -5
  8. package/dist/{chunk-IQAXBRHU.js → chunk-ASZKHN7Q.js} +53 -29
  9. package/dist/{chunk-RO7TY474.js → chunk-BHY7MNGT.js} +11 -6
  10. package/dist/{chunk-WIFMLPT5.js → chunk-GHY2WEOK.js} +13 -0
  11. package/dist/{chunk-SGXUEFII.js → chunk-KUSVZXDT.js} +2 -2
  12. package/dist/{chunk-36JBKJUN.js → chunk-LZO6K34D.js} +20 -7
  13. package/dist/{chunk-3FL2SNIR.js → chunk-NQVZNVOU.js} +1 -1
  14. package/dist/{chunk-ISRU7PLL.js → chunk-OFH3ML4L.js} +3 -3
  15. package/dist/index.cjs +129 -39
  16. package/dist/index.d.cts +4 -4
  17. package/dist/index.d.ts +4 -4
  18. package/dist/index.js +15 -9
  19. package/dist/lib/client.js +2 -2
  20. package/dist/lib/constants.js +1 -1
  21. package/dist/lib/index.cjs +106 -33
  22. package/dist/lib/index.js +8 -8
  23. package/dist/react/index.cjs +231 -92
  24. package/dist/react/index.d.cts +4 -3
  25. package/dist/react/index.d.ts +4 -3
  26. package/dist/react/index.js +117 -64
  27. package/dist/react/utils.js +2 -2
  28. package/dist/tts/index.cjs +106 -33
  29. package/dist/tts/index.js +6 -6
  30. package/dist/tts/player.cjs +23 -5
  31. package/dist/tts/player.d.cts +6 -0
  32. package/dist/tts/player.d.ts +6 -0
  33. package/dist/tts/player.js +4 -3
  34. package/dist/tts/source.cjs +50 -4
  35. package/dist/tts/source.d.cts +16 -6
  36. package/dist/tts/source.d.ts +16 -6
  37. package/dist/tts/source.js +4 -2
  38. package/dist/tts/utils.cjs +18 -6
  39. package/dist/tts/utils.d.cts +7 -5
  40. package/dist/tts/utils.d.ts +7 -5
  41. package/dist/tts/utils.js +3 -2
  42. package/dist/tts/websocket.cjs +106 -33
  43. package/dist/tts/websocket.d.cts +20 -10
  44. package/dist/tts/websocket.d.ts +20 -10
  45. package/dist/tts/websocket.js +5 -5
  46. package/dist/types/index.d.cts +60 -4
  47. package/dist/types/index.d.ts +60 -4
  48. package/dist/voices/index.js +3 -3
  49. package/package.json +1 -1
  50. package/src/index.ts +2 -0
  51. package/src/react/index.ts +117 -62
  52. package/src/tts/player.ts +15 -8
  53. package/src/tts/source.ts +53 -7
  54. package/src/tts/utils.ts +26 -12
  55. package/src/tts/websocket.ts +42 -19
  56. package/src/types/index.ts +81 -3
@@ -5,71 +5,71 @@ $ tsup src/ --format cjs,esm --dts
5
5
  CLI Target: es6
6
6
  CJS Build start
7
7
  ESM Build start
8
- ESM dist/index.js 437.00 B
8
+ CJS dist/index.cjs 24.12 KB
9
+ CJS dist/lib/client.cjs 3.34 KB
10
+ CJS dist/react/index.cjs 31.68 KB
11
+ CJS dist/tts/index.cjs 18.49 KB
12
+ CJS dist/types/index.cjs 764.00 B
13
+ CJS dist/voices/index.cjs 5.04 KB
14
+ CJS dist/lib/constants.cjs 1.43 KB
15
+ CJS dist/lib/index.cjs 20.02 KB
16
+ CJS dist/react/utils.cjs 1.80 KB
17
+ CJS dist/tts/player.cjs 6.96 KB
18
+ CJS dist/tts/source.cjs 8.15 KB
19
+ CJS dist/tts/utils.cjs 4.33 KB
20
+ CJS dist/tts/websocket.cjs 18.18 KB
21
+ CJS ⚡️ Build success in 110ms
9
22
  ESM dist/chunk-FXPGR372.js 0 B
23
+ ESM dist/tts/source.js 144.00 B
24
+ ESM dist/tts/utils.js 426.00 B
25
+ ESM dist/tts/websocket.js 242.00 B
26
+ ESM dist/types/index.js 31.00 B
27
+ ESM dist/voices/index.js 174.00 B
28
+ ESM dist/index.js 499.00 B
10
29
  ESM dist/lib/client.js 132.00 B
11
- ESM dist/react/index.js 6.22 KB
12
- ESM dist/tts/index.js 261.00 B
13
30
  ESM dist/lib/constants.js 183.00 B
14
31
  ESM dist/lib/index.js 322.00 B
15
- ESM dist/chunk-ISRU7PLL.js 353.00 B
16
- ESM dist/chunk-SGXUEFII.js 1.35 KB
32
+ ESM dist/react/index.js 7.62 KB
33
+ ESM dist/chunk-OFH3ML4L.js 353.00 B
34
+ ESM dist/chunk-KUSVZXDT.js 1.35 KB
17
35
  ESM dist/react/utils.js 109.00 B
18
- ESM dist/chunk-3FL2SNIR.js 337.00 B
19
- ESM dist/chunk-VK7LBMVI.js 439.00 B
20
- ESM dist/chunk-IQAXBRHU.js 5.84 KB
21
- ESM dist/chunk-PQ6CIPFW.js 4.02 KB
22
- ESM dist/chunk-PQ5EVEEH.js 841.00 B
36
+ ESM dist/chunk-NQVZNVOU.js 337.00 B
37
+ ESM dist/tts/index.js 261.00 B
38
+ ESM dist/chunk-2NA5SEML.js 439.00 B
39
+ ESM dist/chunk-ASZKHN7Q.js 6.89 KB
40
+ ESM dist/chunk-5M33ZF3Y.js 841.00 B
23
41
  ESM dist/chunk-2BFEKY3F.js 366.00 B
24
- ESM dist/tts/player.js 143.00 B
25
- ESM dist/chunk-36JBKJUN.js 3.52 KB
26
- ESM dist/chunk-RO7TY474.js 1.95 KB
27
- ESM dist/chunk-WIFMLPT5.js 2.27 KB
28
- ESM dist/types/index.js 31.00 B
29
- ESM dist/voices/index.js 174.00 B
30
- ESM dist/tts/source.js 112.00 B
31
- ESM dist/tts/utils.js 395.00 B
32
- ESM dist/tts/websocket.js 242.00 B
33
- ESM ⚡️ Build success in 100ms
34
- CJS dist/index.cjs 21.01 KB
35
- CJS dist/lib/client.cjs 3.34 KB
36
- CJS dist/lib/constants.cjs 1.43 KB
37
- CJS dist/lib/index.cjs 17.18 KB
38
- CJS dist/react/index.cjs 27.24 KB
39
- CJS dist/react/utils.cjs 1.80 KB
40
- CJS dist/tts/index.cjs 15.66 KB
41
- CJS dist/tts/player.cjs 6.66 KB
42
- CJS dist/tts/source.cjs 6.63 KB
43
- CJS dist/tts/utils.cjs 3.87 KB
44
- CJS dist/tts/websocket.cjs 15.34 KB
45
- CJS dist/types/index.cjs 764.00 B
46
- CJS dist/voices/index.cjs 5.04 KB
47
- CJS ⚡️ Build success in 105ms
42
+ ESM dist/tts/player.js 174.00 B
43
+ ESM dist/chunk-LZO6K34D.js 3.76 KB
44
+ ESM dist/chunk-BHY7MNGT.js 2.11 KB
45
+ ESM dist/chunk-6YQ6KDIQ.js 5.32 KB
46
+ ESM dist/chunk-GHY2WEOK.js 2.70 KB
47
+ ESM ⚡️ Build success in 112ms
48
48
  DTS Build start
49
- DTS ⚡️ Build success in 8445ms
50
- DTS dist/index.d.cts 509.00 B
49
+ DTS ⚡️ Build success in 8520ms
50
+ DTS dist/index.d.cts 804.00 B
51
51
  DTS dist/lib/constants.d.cts 564.00 B
52
52
  DTS dist/lib/index.d.cts 410.00 B
53
- DTS dist/react/index.d.cts 1018.00 B
53
+ DTS dist/react/index.d.cts 1.07 KB
54
54
  DTS dist/react/utils.d.cts 240.00 B
55
55
  DTS dist/tts/index.d.cts 471.00 B
56
- DTS dist/tts/player.d.cts 1.06 KB
57
- DTS dist/tts/utils.d.cts 2.56 KB
58
- DTS dist/tts/websocket.d.cts 2.39 KB
59
- DTS dist/tts/source.d.cts 2.17 KB
56
+ DTS dist/tts/player.d.cts 1.20 KB
57
+ DTS dist/tts/utils.d.cts 2.68 KB
58
+ DTS dist/tts/websocket.d.cts 2.94 KB
59
+ DTS dist/tts/source.d.cts 2.55 KB
60
60
  DTS dist/voices/index.d.cts 399.00 B
61
61
  DTS dist/lib/client.d.cts 267.00 B
62
- DTS dist/types/index.d.cts 1.28 KB
63
- DTS dist/index.d.ts 501.00 B
62
+ DTS dist/types/index.d.cts 3.07 KB
63
+ DTS dist/index.d.ts 796.00 B
64
64
  DTS dist/lib/constants.d.ts 564.00 B
65
65
  DTS dist/lib/index.d.ts 404.00 B
66
- DTS dist/react/index.d.ts 1016.00 B
66
+ DTS dist/react/index.d.ts 1.07 KB
67
67
  DTS dist/react/utils.d.ts 240.00 B
68
68
  DTS dist/tts/index.d.ts 467.00 B
69
- DTS dist/tts/player.d.ts 1.06 KB
70
- DTS dist/tts/utils.d.ts 2.55 KB
71
- DTS dist/tts/websocket.d.ts 2.39 KB
72
- DTS dist/tts/source.d.ts 2.16 KB
69
+ DTS dist/tts/player.d.ts 1.20 KB
70
+ DTS dist/tts/utils.d.ts 2.68 KB
71
+ DTS dist/tts/websocket.d.ts 2.94 KB
72
+ DTS dist/tts/source.d.ts 2.55 KB
73
73
  DTS dist/voices/index.d.ts 397.00 B
74
74
  DTS dist/lib/client.d.ts 266.00 B
75
- DTS dist/types/index.d.ts 1.28 KB
75
+ DTS dist/types/index.d.ts 3.07 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # @cartesia/cartesia-js
2
2
 
3
+ ## 1.0.1
4
+
5
+ ### Patch Changes
6
+
7
+ - cb7adc2: Introduces support for continuations, timestamps, voice control and multiple output formats. Improves typing and error handling for the package.
8
+
9
+ ## 1.0.0
10
+
11
+ ### Major Changes
12
+
13
+ - 3ee5bfc: Initial release of Cartesia client with voices and WebSocket support
14
+
15
+ ### Minor Changes
16
+
17
+ - e49f73a: Stabilize audio playback in the browser to support play/pause functionality.
18
+
19
+ ### Patch Changes
20
+
21
+ - c98a0c7: Fix typo in README
22
+ - 38af01f: Fix how URLs are constructed, solving WebSocket connection failure
23
+ - 8ecf940: Add provisional Node.js support
24
+ - 585d2c9: Makes JS client compatible with the Cartesia Stable API (2024-06-10)
25
+
3
26
  ## 1.0.0-alpha.4
4
27
 
5
28
  ### Patch Changes
package/LICENSE.md ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Cartesia AI, Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -1,4 +1,24 @@
1
- # JavaScript Client for Cartesia
1
+ # Cartesia JavaScript Client
2
+
3
+ ![NPM Version](https://img.shields.io/npm/v/%40cartesia%2Fcartesia-js?logo=npm)
4
+ [![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/ZVxavqHB9X)
5
+
6
+ This client provides convenient access to [Cartesia's TTS models](https://cartesia.ai/). Sonic is the fastest text-to-speech model around—it can generate a second of audio in just 650ms, and it can stream out the first audio chunk in just 135ms. Alongside Sonic, we also offer an extensive prebuilt voice library for a variety of use cases.
7
+
8
+ The JavaScript client is a thin wrapper around the Cartesia API. You can view docs for the API at [docs.cartesia.ai](https://docs.cartesia.ai/).
9
+
10
+ - [Cartesia JavaScript Client](#cartesia-javascript-client)
11
+ - [Installation](#installation)
12
+ - [Usage](#usage)
13
+ - [CRUD on Voices](#crud-on-voices)
14
+ - [TTS over WebSocket](#tts-over-websocket)
15
+ - [Input Streaming with Contexts](#input-streaming-with-contexts)
16
+ - [Timestamps](#timestamps)
17
+ - [Speed and emotion controls \[Alpha\]](#speed-and-emotion-controls-alpha)
18
+ - [Multilingual TTS \[Alpha\]](#multilingual-tts-alpha)
19
+ - [Playing audio in the browser](#playing-audio-in-the-browser)
20
+ - [React](#react)
21
+
2
22
 
3
23
  ## Installation
4
24
 
@@ -40,12 +60,6 @@ const newVoice = await cartesia.voices.create({
40
60
  });
41
61
  console.log(newVoice);
42
62
 
43
- // Clone a voice from a URL.
44
- const clonedVoice = await cartesia.voices.clone({
45
- mode: "url",
46
- url: "https://youtu.be/AdtLxlttrHg?si=07OLmDPg__0IN14f&t=6",
47
- });
48
-
49
63
  // Clone a voice from a file.
50
64
  const clonedVoice = await cartesia.voices.clone({
51
65
  mode: "clip",
@@ -62,8 +76,12 @@ const cartesia = new Cartesia({
62
76
  apiKey: "your-api-key",
63
77
  });
64
78
 
65
- // Initialize the WebSocket. Make sure the sample rate you specify is supported.
66
- const websocket = cartesia.tts.websocket({ sampleRate: 44100 });
79
+ // Initialize the WebSocket. Make sure the output format you specify is supported.
80
+ const websocket = cartesia.tts.websocket({
81
+ container: "raw",
82
+ encoding: "pcm_f32le",
83
+ sampleRate: 44100
84
+ });
67
85
 
68
86
  try {
69
87
  await websocket.connect();
@@ -73,14 +91,13 @@ try {
73
91
 
74
92
  // Create a stream.
75
93
  const response = await websocket.send({
76
- model: "upbeat-moon",
94
+ model_id: "sonic-english",
77
95
  voice: {
78
- mode: "embedding",
79
- embedding: Array(192).fill(1.0),
96
+ mode: "id",
97
+ id: "a0e99841-438c-4a64-b679-ae501e7d6091",
80
98
  },
81
99
  transcript: "Hello, world!"
82
100
  // The WebSocket sets output_format on your behalf.
83
- // The container is "raw" and the encoding is "pcm_f32le".
84
101
  });
85
102
 
86
103
  // Access the raw messages from the WebSocket.
@@ -96,12 +113,76 @@ for await (const message of response.events('message')) {
96
113
  }
97
114
  ```
98
115
 
99
- #### Playing audio in the browser
116
+ #### Input Streaming with Contexts
117
+
118
+ You can perform input streaming with contexts as described in the [docs](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-websocket#input-streaming-with-contexts). The WebSocket's `send` method is just a wrapper around sending a message on the WebSocket, so the request format specified the docs can be used directly.
119
+
120
+ You should use the return from the first `send` call on a context to receive outputs and events for the entire context. You can ignore the return values of subsequent `send` calls.
121
+
122
+ #### Timestamps
123
+
124
+ To receive timestamps in responses, set the `add_timestamps` field in the request object to `true`.
125
+
126
+ ```js
127
+ const response = await websocket.send({
128
+ model_id: "sonic-english",
129
+ voice: {
130
+ mode: "id",
131
+ id: "a0e99841-438c-4a64-b679-ae501e7d6091",
132
+ },
133
+ transcript: "Hello, world!",
134
+ add_timestamps: true,
135
+ });
136
+ ```
137
+
138
+ You can then listen for timestamps on the returned response object.
139
+
140
+ ```js
141
+ response.on("timestamps", (timestamps) => {
142
+ console.log("Received timestamps for words:", timestamps.words);
143
+ console.log("Words start at:", timestamps.start);
144
+ console.log("Words end at:", timestamps.end);
145
+ });
146
+
147
+ // You can also access timestamps using a for-await-of loop.
148
+ for (await const timestamps of response.events('timestamps')) {
149
+ console.log("Received timestamps for words:", timestamps.words);
150
+ console.log("Words start at:", timestamps.start);
151
+ console.log("Words end at:", timestamps.end);
152
+ }
153
+ ```
154
+
155
+ #### Speed and emotion controls [Alpha]
156
+
157
+ The API has experimental support for speed and emotion controls that is not subject to semantic versioning and is subject to change without notice. You can control the speed and emotion of the synthesized speech by setting the `speed` and `emotion` fields under `voice.__experimental_controls` in the request object.
158
+
159
+ ```js
160
+ const response = await websocket.send({
161
+ model_id: "sonic-english",
162
+ voice: {
163
+ mode: "id",
164
+ id: "a0e99841-438c-4a64-b679-ae501e7d6091",
165
+ __experimental_controls: {
166
+ speed: "fastest",
167
+ emotion: ["sadness", "surprise:high"],
168
+ },
169
+ },
170
+ transcript: "Hello, world!",
171
+ });
172
+ ```
173
+
174
+ ### Multilingual TTS [Alpha]
175
+
176
+ You can define the language of the text you want to synthesize by setting the `language` field in the request object. Make sure that you are using `model_id: "sonic-multilingual"` in the request object.
177
+
178
+ Supported languages are listed at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
179
+
180
+ ### Playing audio in the browser
100
181
 
101
- (We currently only support playing audio in the browser. Support for other JS environments is coming soon.)
182
+ (The `WebPlayer` class only supports playing audio in the browser and the raw PCM format with fp32le encoding.)
102
183
 
103
184
  ```js
104
- // If you're using the client in the browser, you can play the audio like this:
185
+ // If you're using the client in the browser, you can control audio playback using our WebPlayer:
105
186
  import { WebPlayer } from "@cartesia/cartesia-js";
106
187
 
107
188
  console.log("Playing stream...");
@@ -118,7 +199,7 @@ console.log("Done playing.");
118
199
 
119
200
  ## React
120
201
 
121
- We export a React hook that simplifies the process of using the TTS API. The hook manages the WebSocket connection and provides a simple interface for buffering and playing audio.
202
+ We export a React hook that simplifies the process of using the TTS API. The hook manages the WebSocket connection and provides a simple interface for buffering, playing, pausing and restarting audio.
122
203
 
123
204
  ```jsx
124
205
  import { useTTS } from '@cartesia/cartesia-js/react';
@@ -134,11 +215,11 @@ function TextToSpeech() {
134
215
  const handlePlay = async () => {
135
216
  // Begin buffering the audio.
136
217
  const response = await tts.buffer({
137
- model_id: "upbeat-moon",
218
+ model_id: "sonic-english",
138
219
  voice: {
139
- mode: "embedding",
140
- embedding: Array(192).fill(1.0),
141
- },
220
+ mode: "id",
221
+ id: "a0e99841-438c-4a64-b679-ae501e7d6091",
222
+ },
142
223
  transcript: text,
143
224
  });
144
225
 
@@ -1,9 +1,9 @@
1
1
  import {
2
2
  WebSocket
3
- } from "./chunk-IQAXBRHU.js";
3
+ } from "./chunk-ASZKHN7Q.js";
4
4
  import {
5
5
  Client
6
- } from "./chunk-PQ5EVEEH.js";
6
+ } from "./chunk-5M33ZF3Y.js";
7
7
 
8
8
  // src/tts/index.ts
9
9
  var TTS = class extends Client {
@@ -6,7 +6,7 @@ import {
6
6
  import {
7
7
  __spreadProps,
8
8
  __spreadValues
9
- } from "./chunk-WIFMLPT5.js";
9
+ } from "./chunk-GHY2WEOK.js";
10
10
 
11
11
  // src/lib/client.ts
12
12
  import fetch from "cross-fetch";
@@ -2,12 +2,19 @@ import {
2
2
  __async,
3
3
  __privateAdd,
4
4
  __privateGet,
5
+ __privateMethod,
5
6
  __privateSet
6
- } from "./chunk-WIFMLPT5.js";
7
+ } from "./chunk-GHY2WEOK.js";
7
8
 
8
9
  // src/tts/source.ts
9
10
  import Emittery from "emittery";
10
- var _emitter, _buffer, _readIndex, _writeIndex, _closed, _sampleRate;
11
+ var ENCODING_MAP = {
12
+ pcm_f32le: { arrayType: Float32Array, bytesPerElement: 4 },
13
+ pcm_s16le: { arrayType: Int16Array, bytesPerElement: 2 },
14
+ pcm_alaw: { arrayType: Uint8Array, bytesPerElement: 1 },
15
+ pcm_mulaw: { arrayType: Uint8Array, bytesPerElement: 1 }
16
+ };
17
+ var _emitter, _buffer, _readIndex, _writeIndex, _closed, _sampleRate, _encoding, _container, _createBuffer, createBuffer_fn;
11
18
  var Source = class {
12
19
  /**
13
20
  * Create a new Source.
@@ -15,23 +22,44 @@ var Source = class {
15
22
  * @param options - Options for the Source.
16
23
  * @param options.sampleRate - The sample rate of the audio.
17
24
  */
18
- constructor({ sampleRate }) {
25
+ constructor({
26
+ sampleRate,
27
+ encoding,
28
+ container
29
+ }) {
30
+ /**
31
+ * Create a new buffer for the source.
32
+ *
33
+ * @param size - The size of the buffer to create.
34
+ * @returns The new buffer as a TypedArray based on the encoding.
35
+ */
36
+ __privateAdd(this, _createBuffer);
19
37
  __privateAdd(this, _emitter, new Emittery());
20
38
  __privateAdd(this, _buffer, void 0);
21
39
  __privateAdd(this, _readIndex, 0);
22
40
  __privateAdd(this, _writeIndex, 0);
23
41
  __privateAdd(this, _closed, false);
24
42
  __privateAdd(this, _sampleRate, void 0);
43
+ __privateAdd(this, _encoding, void 0);
44
+ __privateAdd(this, _container, void 0);
25
45
  this.on = __privateGet(this, _emitter).on.bind(__privateGet(this, _emitter));
26
46
  this.once = __privateGet(this, _emitter).once.bind(__privateGet(this, _emitter));
27
47
  this.events = __privateGet(this, _emitter).events.bind(__privateGet(this, _emitter));
28
48
  this.off = __privateGet(this, _emitter).off.bind(__privateGet(this, _emitter));
29
49
  __privateSet(this, _sampleRate, sampleRate);
30
- __privateSet(this, _buffer, new Float32Array(1024));
50
+ __privateSet(this, _encoding, encoding);
51
+ __privateSet(this, _container, container);
52
+ __privateSet(this, _buffer, __privateMethod(this, _createBuffer, createBuffer_fn).call(this, 1024));
31
53
  }
32
54
  get sampleRate() {
33
55
  return __privateGet(this, _sampleRate);
34
56
  }
57
+ get encoding() {
58
+ return __privateGet(this, _encoding);
59
+ }
60
+ get container() {
61
+ return __privateGet(this, _container);
62
+ }
35
63
  /**
36
64
  * Append audio to the buffer.
37
65
  *
@@ -45,7 +73,7 @@ var Source = class {
45
73
  while (newCapacity < requiredCapacity) {
46
74
  newCapacity *= 2;
47
75
  }
48
- const newBuffer = new Float32Array(newCapacity);
76
+ const newBuffer = __privateMethod(this, _createBuffer, createBuffer_fn).call(this, newCapacity);
49
77
  newBuffer.set(__privateGet(this, _buffer));
50
78
  __privateSet(this, _buffer, newBuffer);
51
79
  }
@@ -93,6 +121,9 @@ var Source = class {
93
121
  get readIndex() {
94
122
  return __privateGet(this, _readIndex);
95
123
  }
124
+ get writeIndex() {
125
+ return __privateGet(this, _writeIndex);
126
+ }
96
127
  /**
97
128
  * Close the source. This signals that no more audio will be enqueued.
98
129
  *
@@ -114,7 +145,15 @@ _readIndex = new WeakMap();
114
145
  _writeIndex = new WeakMap();
115
146
  _closed = new WeakMap();
116
147
  _sampleRate = new WeakMap();
148
+ _encoding = new WeakMap();
149
+ _container = new WeakMap();
150
+ _createBuffer = new WeakSet();
151
+ createBuffer_fn = function(size) {
152
+ const { arrayType: ArrayType } = ENCODING_MAP[__privateGet(this, _encoding)];
153
+ return new ArrayType(size);
154
+ };
117
155
 
118
156
  export {
157
+ ENCODING_MAP,
119
158
  Source
120
159
  };
@@ -1,9 +1,6 @@
1
- import {
2
- Source
3
- } from "./chunk-PQ6CIPFW.js";
4
1
  import {
5
2
  Client
6
- } from "./chunk-PQ5EVEEH.js";
3
+ } from "./chunk-5M33ZF3Y.js";
7
4
  import {
8
5
  CARTESIA_VERSION,
9
6
  constructApiUrl
@@ -13,29 +10,33 @@ import {
13
10
  createMessageHandlerForContextId,
14
11
  getEmitteryCallbacks,
15
12
  isSentinel
16
- } from "./chunk-RO7TY474.js";
13
+ } from "./chunk-BHY7MNGT.js";
14
+ import {
15
+ Source
16
+ } from "./chunk-6YQ6KDIQ.js";
17
17
  import {
18
18
  __async,
19
+ __objRest,
19
20
  __privateAdd,
20
21
  __privateGet,
21
22
  __privateMethod,
22
23
  __privateSet,
23
24
  __spreadProps,
24
25
  __spreadValues
25
- } from "./chunk-WIFMLPT5.js";
26
+ } from "./chunk-GHY2WEOK.js";
26
27
 
27
28
  // src/tts/websocket.ts
28
29
  import Emittery from "emittery";
29
30
  import { humanId } from "human-id";
30
31
  import { WebSocket as PartySocketWebSocket } from "partysocket";
31
- var _isConnected, _sampleRate, _generateId, generateId_fn;
32
+ var _isConnected, _sampleRate, _container, _encoding, _generateId, generateId_fn;
32
33
  var WebSocket = class extends Client {
33
34
  /**
34
35
  * Create a new WebSocket client.
35
36
  *
36
37
  * @param args - Arguments to pass to the Client constructor.
37
38
  */
38
- constructor({ sampleRate }, ...args) {
39
+ constructor({ sampleRate, container, encoding }, ...args) {
39
40
  super(...args);
40
41
  /**
41
42
  * Generate a unique ID suitable for a streaming context.
@@ -48,37 +49,47 @@ var WebSocket = class extends Client {
48
49
  __privateAdd(this, _generateId);
49
50
  __privateAdd(this, _isConnected, false);
50
51
  __privateAdd(this, _sampleRate, void 0);
52
+ __privateAdd(this, _container, void 0);
53
+ __privateAdd(this, _encoding, void 0);
51
54
  __privateSet(this, _sampleRate, sampleRate);
55
+ __privateSet(this, _container, container != null ? container : "raw");
56
+ __privateSet(this, _encoding, encoding != null ? encoding : "pcm_f32le");
52
57
  }
53
58
  /**
54
- * Send a message over the WebSocket in order to start a stream.
59
+ * Send a message over the WebSocket to start a stream.
55
60
  *
56
- * @param inputs - Stream options.
61
+ * @param inputs - Stream options. Defined in the StreamRequest type.
57
62
  * @param options - Options for the stream.
58
63
  * @param options.timeout - The maximum time to wait for a chunk before cancelling the stream.
59
- * If `0`, the stream will not time out.
64
+ * If set to `0`, the stream will not time out.
60
65
  * @returns A Source object that can be passed to a Player to play the audio.
66
+ * @returns An Emittery instance that emits messages from the WebSocket.
67
+ * @returns An abort function that can be called to cancel the stream.
61
68
  */
62
- send(inputs, { timeout = 0 } = {}) {
63
- var _a, _b, _c, _d;
69
+ send(_a, { timeout = 0 } = {}) {
70
+ var inputs = __objRest(_a, []);
71
+ var _a2, _b, _c, _d;
64
72
  if (!__privateGet(this, _isConnected)) {
65
73
  throw new Error("Not connected to WebSocket. Call .connect() first.");
66
74
  }
67
- const contextId = __privateMethod(this, _generateId, generateId_fn).call(this);
68
- (_a = this.socket) == null ? void 0 : _a.send(
69
- JSON.stringify(__spreadProps(__spreadValues({
70
- context_id: contextId
71
- }, inputs), {
72
- output_format: {
73
- container: "raw",
74
- encoding: "pcm_f32le",
75
- sample_rate: __privateGet(this, _sampleRate)
76
- }
77
- }))
75
+ if (!inputs.context_id) {
76
+ inputs.context_id = __privateMethod(this, _generateId, generateId_fn).call(this);
77
+ }
78
+ if (!inputs.output_format) {
79
+ inputs.output_format = {
80
+ container: __privateGet(this, _container),
81
+ encoding: __privateGet(this, _encoding),
82
+ sample_rate: __privateGet(this, _sampleRate)
83
+ };
84
+ }
85
+ (_a2 = this.socket) == null ? void 0 : _a2.send(
86
+ JSON.stringify(__spreadValues({}, inputs))
78
87
  );
79
88
  const emitter = new Emittery();
80
89
  const source = new Source({
81
- sampleRate: __privateGet(this, _sampleRate)
90
+ sampleRate: __privateGet(this, _sampleRate),
91
+ encoding: __privateGet(this, _encoding),
92
+ container: __privateGet(this, _container)
82
93
  });
83
94
  const streamCompleteController = new AbortController();
84
95
  let timeoutId = null;
@@ -86,19 +97,26 @@ var WebSocket = class extends Client {
86
97
  timeoutId = setTimeout(streamCompleteController.abort, timeout);
87
98
  }
88
99
  const handleMessage = createMessageHandlerForContextId(
89
- contextId,
90
- (_0) => __async(this, [_0], function* ({ chunk, message }) {
100
+ inputs.context_id,
101
+ (_0) => __async(this, [_0], function* ({ chunk, message, data }) {
91
102
  emitter.emit("message", message);
103
+ if (data.type === "timestamps") {
104
+ emitter.emit("timestamps", data.word_timestamps);
105
+ return;
106
+ }
92
107
  if (isSentinel(chunk)) {
93
108
  yield source.close();
94
109
  streamCompleteController.abort();
95
110
  return;
96
111
  }
97
- yield source.enqueue(base64ToArray([chunk]));
98
112
  if (timeoutId) {
99
113
  clearTimeout(timeoutId);
100
114
  timeoutId = setTimeout(streamCompleteController.abort, timeout);
101
115
  }
116
+ if (!chunk) {
117
+ return;
118
+ }
119
+ yield source.enqueue(base64ToArray([chunk], __privateGet(this, _encoding)));
102
120
  })
103
121
  );
104
122
  (_b = this.socket) == null ? void 0 : _b.addEventListener("message", handleMessage, {
@@ -128,7 +146,11 @@ var WebSocket = class extends Client {
128
146
  clearTimeout(timeoutId);
129
147
  }
130
148
  });
131
- return __spreadValues({ source }, getEmitteryCallbacks(emitter));
149
+ return __spreadProps(__spreadValues({
150
+ source
151
+ }, getEmitteryCallbacks(emitter)), {
152
+ stop: streamCompleteController.abort.bind(streamCompleteController)
153
+ });
132
154
  }
133
155
  /**
134
156
  * Authenticate and connect to a Cartesia streaming WebSocket.
@@ -198,6 +220,8 @@ var WebSocket = class extends Client {
198
220
  };
199
221
  _isConnected = new WeakMap();
200
222
  _sampleRate = new WeakMap();
223
+ _container = new WeakMap();
224
+ _encoding = new WeakMap();
201
225
  _generateId = new WeakSet();
202
226
  generateId_fn = function() {
203
227
  return humanId({