@cartesia/cartesia-js 2.2.4 → 2.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/Client.d.ts +3 -0
  2. package/Client.js +11 -6
  3. package/README.md +539 -172
  4. package/api/resources/apiStatus/client/Client.js +1 -1
  5. package/api/resources/auth/client/Client.d.ts +17 -0
  6. package/api/resources/auth/client/Client.js +18 -1
  7. package/api/resources/auth/types/TokenGrant.d.ts +3 -1
  8. package/api/resources/auth/types/TokenRequest.d.ts +2 -2
  9. package/api/resources/index.d.ts +1 -0
  10. package/api/resources/index.js +1 -0
  11. package/api/resources/infill/client/Client.js +1 -1
  12. package/api/resources/infill/client/requests/InfillBytesRequest.d.ts +3 -11
  13. package/api/resources/stt/client/Client.d.ts +43 -0
  14. package/api/resources/stt/client/Client.js +108 -0
  15. package/api/resources/stt/client/index.d.ts +1 -1
  16. package/api/resources/stt/client/index.js +15 -3
  17. package/api/resources/stt/client/requests/TranscriptionRequest.d.ts +147 -0
  18. package/api/resources/stt/client/requests/TranscriptionRequest.js +5 -0
  19. package/api/resources/stt/client/requests/index.d.ts +1 -0
  20. package/api/resources/stt/client/requests/index.js +2 -0
  21. package/api/resources/stt/index.d.ts +1 -0
  22. package/api/resources/stt/index.js +1 -0
  23. package/api/resources/stt/types/SttEncoding.d.ts +6 -6
  24. package/api/resources/stt/types/SttEncoding.js +5 -0
  25. package/api/resources/stt/types/TimestampGranularity.d.ts +12 -0
  26. package/api/resources/stt/types/TimestampGranularity.js +9 -0
  27. package/api/resources/stt/types/TranscriptMessage.d.ts +4 -1
  28. package/api/resources/stt/types/TranscriptionResponse.d.ts +4 -1
  29. package/api/resources/stt/types/TranscriptionWord.d.ts +11 -0
  30. package/api/resources/stt/types/TranscriptionWord.js +5 -0
  31. package/api/resources/stt/types/index.d.ts +2 -0
  32. package/api/resources/stt/types/index.js +2 -0
  33. package/api/resources/tts/client/Client.d.ts +7 -2
  34. package/api/resources/tts/client/Client.js +8 -8
  35. package/api/resources/tts/types/Controls.d.ts +1 -1
  36. package/api/resources/tts/types/Emotion.d.ts +2 -33
  37. package/api/resources/tts/types/Emotion.js +0 -28
  38. package/api/resources/tts/types/EmotionDeprecated.d.ts +38 -0
  39. package/api/resources/tts/types/EmotionDeprecated.js +33 -0
  40. package/api/resources/tts/types/GenerationConfig.d.ts +15 -0
  41. package/api/resources/tts/types/GenerationConfig.js +5 -0
  42. package/api/resources/tts/types/GenerationRequest.d.ts +5 -4
  43. package/api/resources/tts/types/Mp3OutputFormat.d.ts +1 -0
  44. package/api/resources/tts/types/RawOutputFormat.d.ts +1 -0
  45. package/api/resources/tts/types/SseOutputFormat.d.ts +10 -0
  46. package/api/resources/tts/types/SseOutputFormat.js +5 -0
  47. package/api/resources/tts/types/TtsRequest.d.ts +1 -0
  48. package/api/resources/tts/types/TtssseRequest.d.ts +27 -0
  49. package/api/resources/tts/types/TtssseRequest.js +5 -0
  50. package/api/resources/tts/types/WebSocketRawOutputFormat.d.ts +1 -0
  51. package/api/resources/tts/types/WebSocketRequest.d.ts +2 -4
  52. package/api/resources/tts/types/WebSocketTtsRequest.d.ts +4 -1
  53. package/api/resources/tts/types/index.d.ts +5 -1
  54. package/api/resources/tts/types/index.js +5 -1
  55. package/api/resources/voiceChanger/client/Client.d.ts +9 -4
  56. package/api/resources/voiceChanger/client/Client.js +24 -20
  57. package/api/resources/voiceChanger/client/requests/VoiceChangerBytesRequest.d.ts +3 -8
  58. package/api/resources/voiceChanger/client/requests/VoiceChangerSseRequest.d.ts +3 -8
  59. package/api/resources/voices/client/Client.js +8 -8
  60. package/api/resources/voices/client/requests/CloneVoiceRequest.d.ts +6 -24
  61. package/api/resources/voices/types/LocalizeDialect.d.ts +4 -8
  62. package/core/fetcher/Fetcher.d.ts +2 -2
  63. package/core/fetcher/Fetcher.js +4 -3
  64. package/core/fetcher/getResponseBody.js +3 -3
  65. package/dist/Client.d.ts +3 -0
  66. package/dist/Client.js +11 -6
  67. package/dist/api/resources/apiStatus/client/Client.js +1 -1
  68. package/dist/api/resources/auth/client/Client.d.ts +17 -0
  69. package/dist/api/resources/auth/client/Client.js +18 -1
  70. package/dist/api/resources/auth/types/TokenGrant.d.ts +3 -1
  71. package/dist/api/resources/auth/types/TokenRequest.d.ts +2 -2
  72. package/dist/api/resources/index.d.ts +1 -0
  73. package/dist/api/resources/index.js +1 -0
  74. package/dist/api/resources/infill/client/Client.js +1 -1
  75. package/dist/api/resources/infill/client/requests/InfillBytesRequest.d.ts +3 -11
  76. package/dist/api/resources/stt/client/Client.d.ts +43 -0
  77. package/dist/api/resources/stt/client/Client.js +108 -0
  78. package/dist/api/resources/stt/client/index.d.ts +1 -1
  79. package/dist/api/resources/stt/client/index.js +15 -3
  80. package/dist/api/resources/stt/client/requests/TranscriptionRequest.d.ts +147 -0
  81. package/dist/api/resources/stt/client/requests/TranscriptionRequest.js +5 -0
  82. package/dist/api/resources/stt/client/requests/index.d.ts +1 -0
  83. package/dist/api/resources/stt/client/requests/index.js +2 -0
  84. package/dist/api/resources/stt/index.d.ts +1 -0
  85. package/dist/api/resources/stt/index.js +1 -0
  86. package/dist/api/resources/stt/types/SttEncoding.d.ts +6 -6
  87. package/dist/api/resources/stt/types/SttEncoding.js +5 -0
  88. package/dist/api/resources/stt/types/TimestampGranularity.d.ts +12 -0
  89. package/dist/api/resources/stt/types/TimestampGranularity.js +9 -0
  90. package/dist/api/resources/stt/types/TranscriptMessage.d.ts +4 -1
  91. package/dist/api/resources/stt/types/TranscriptionResponse.d.ts +4 -1
  92. package/dist/api/resources/stt/types/TranscriptionWord.d.ts +11 -0
  93. package/dist/api/resources/stt/types/TranscriptionWord.js +5 -0
  94. package/dist/api/resources/stt/types/index.d.ts +2 -0
  95. package/dist/api/resources/stt/types/index.js +2 -0
  96. package/dist/api/resources/tts/client/Client.d.ts +7 -2
  97. package/dist/api/resources/tts/client/Client.js +8 -8
  98. package/dist/api/resources/tts/types/Controls.d.ts +1 -1
  99. package/dist/api/resources/tts/types/Emotion.d.ts +2 -33
  100. package/dist/api/resources/tts/types/Emotion.js +0 -28
  101. package/dist/api/resources/tts/types/EmotionDeprecated.d.ts +38 -0
  102. package/dist/api/resources/tts/types/EmotionDeprecated.js +33 -0
  103. package/dist/api/resources/tts/types/GenerationConfig.d.ts +15 -0
  104. package/dist/api/resources/tts/types/GenerationConfig.js +5 -0
  105. package/dist/api/resources/tts/types/GenerationRequest.d.ts +5 -4
  106. package/dist/api/resources/tts/types/Mp3OutputFormat.d.ts +1 -0
  107. package/dist/api/resources/tts/types/RawOutputFormat.d.ts +1 -0
  108. package/dist/api/resources/tts/types/SseOutputFormat.d.ts +10 -0
  109. package/dist/api/resources/tts/types/SseOutputFormat.js +5 -0
  110. package/dist/api/resources/tts/types/TtsRequest.d.ts +1 -0
  111. package/dist/api/resources/tts/types/TtssseRequest.d.ts +27 -0
  112. package/dist/api/resources/tts/types/TtssseRequest.js +5 -0
  113. package/dist/api/resources/tts/types/WebSocketRawOutputFormat.d.ts +1 -0
  114. package/dist/api/resources/tts/types/WebSocketRequest.d.ts +2 -4
  115. package/dist/api/resources/tts/types/WebSocketTtsRequest.d.ts +4 -1
  116. package/dist/api/resources/tts/types/index.d.ts +5 -1
  117. package/dist/api/resources/tts/types/index.js +5 -1
  118. package/dist/api/resources/voiceChanger/client/Client.d.ts +9 -4
  119. package/dist/api/resources/voiceChanger/client/Client.js +24 -20
  120. package/dist/api/resources/voiceChanger/client/requests/VoiceChangerBytesRequest.d.ts +3 -8
  121. package/dist/api/resources/voiceChanger/client/requests/VoiceChangerSseRequest.d.ts +3 -8
  122. package/dist/api/resources/voices/client/Client.js +8 -8
  123. package/dist/api/resources/voices/client/requests/CloneVoiceRequest.d.ts +6 -24
  124. package/dist/api/resources/voices/types/LocalizeDialect.d.ts +4 -8
  125. package/dist/core/fetcher/Fetcher.d.ts +2 -2
  126. package/dist/core/fetcher/Fetcher.js +4 -3
  127. package/dist/core/fetcher/getResponseBody.js +3 -3
  128. package/dist/index.d.ts +2 -0
  129. package/dist/index.js +8 -1
  130. package/dist/serialization/resources/auth/types/TokenGrant.d.ts +2 -1
  131. package/dist/serialization/resources/auth/types/TokenGrant.js +2 -1
  132. package/dist/serialization/resources/auth/types/TokenRequest.d.ts +1 -1
  133. package/dist/serialization/resources/auth/types/TokenRequest.js +1 -1
  134. package/dist/serialization/resources/stt/types/SttEncoding.d.ts +1 -1
  135. package/dist/serialization/resources/stt/types/SttEncoding.js +1 -1
  136. package/dist/serialization/resources/stt/types/TimestampGranularity.d.ts +10 -0
  137. package/dist/serialization/resources/stt/types/TimestampGranularity.js +41 -0
  138. package/dist/serialization/resources/stt/types/TranscriptMessage.d.ts +2 -0
  139. package/dist/serialization/resources/stt/types/TranscriptMessage.js +2 -0
  140. package/dist/serialization/resources/stt/types/TranscriptionResponse.d.ts +2 -0
  141. package/dist/serialization/resources/stt/types/TranscriptionResponse.js +2 -0
  142. package/dist/serialization/resources/stt/types/TranscriptionWord.d.ts +14 -0
  143. package/dist/serialization/resources/stt/types/TranscriptionWord.js +45 -0
  144. package/dist/serialization/resources/stt/types/index.d.ts +2 -0
  145. package/dist/serialization/resources/stt/types/index.js +2 -0
  146. package/dist/serialization/resources/tts/types/Controls.d.ts +2 -2
  147. package/dist/serialization/resources/tts/types/Controls.js +2 -2
  148. package/dist/serialization/resources/tts/types/Emotion.d.ts +1 -1
  149. package/dist/serialization/resources/tts/types/Emotion.js +1 -27
  150. package/dist/serialization/resources/tts/types/EmotionDeprecated.d.ts +10 -0
  151. package/dist/serialization/resources/tts/types/EmotionDeprecated.js +67 -0
  152. package/dist/serialization/resources/tts/types/GenerationConfig.d.ts +15 -0
  153. package/dist/serialization/resources/tts/types/GenerationConfig.js +46 -0
  154. package/dist/serialization/resources/tts/types/GenerationRequest.d.ts +3 -1
  155. package/dist/serialization/resources/tts/types/GenerationRequest.js +3 -1
  156. package/dist/serialization/resources/tts/types/SseOutputFormat.d.ts +15 -0
  157. package/dist/serialization/resources/tts/types/SseOutputFormat.js +46 -0
  158. package/dist/serialization/resources/tts/types/TtsRequest.d.ts +2 -0
  159. package/dist/serialization/resources/tts/types/TtsRequest.js +2 -0
  160. package/dist/serialization/resources/tts/types/TtssseRequest.d.ts +29 -0
  161. package/dist/serialization/resources/tts/types/TtssseRequest.js +60 -0
  162. package/dist/serialization/resources/tts/types/WebSocketTtsRequest.d.ts +3 -1
  163. package/dist/serialization/resources/tts/types/WebSocketTtsRequest.js +3 -1
  164. package/dist/serialization/resources/tts/types/index.d.ts +5 -1
  165. package/dist/serialization/resources/tts/types/index.js +5 -1
  166. package/dist/version.d.ts +1 -1
  167. package/dist/version.js +1 -1
  168. package/dist/wrapper/StreamingSTTClient.d.ts +22 -2
  169. package/dist/wrapper/StreamingSTTClient.js +124 -1
  170. package/dist/wrapper/SttWebsocket.d.ts +8 -3
  171. package/dist/wrapper/SttWebsocket.js +24 -3
  172. package/dist/wrapper/Websocket.js +1 -1
  173. package/index.d.ts +2 -0
  174. package/index.js +8 -1
  175. package/package.json +1 -1
  176. package/reference.md +89 -1
  177. package/serialization/resources/auth/types/TokenGrant.d.ts +2 -1
  178. package/serialization/resources/auth/types/TokenGrant.js +2 -1
  179. package/serialization/resources/auth/types/TokenRequest.d.ts +1 -1
  180. package/serialization/resources/auth/types/TokenRequest.js +1 -1
  181. package/serialization/resources/stt/types/SttEncoding.d.ts +1 -1
  182. package/serialization/resources/stt/types/SttEncoding.js +1 -1
  183. package/serialization/resources/stt/types/TimestampGranularity.d.ts +10 -0
  184. package/serialization/resources/stt/types/TimestampGranularity.js +41 -0
  185. package/serialization/resources/stt/types/TranscriptMessage.d.ts +2 -0
  186. package/serialization/resources/stt/types/TranscriptMessage.js +2 -0
  187. package/serialization/resources/stt/types/TranscriptionResponse.d.ts +2 -0
  188. package/serialization/resources/stt/types/TranscriptionResponse.js +2 -0
  189. package/serialization/resources/stt/types/TranscriptionWord.d.ts +14 -0
  190. package/serialization/resources/stt/types/TranscriptionWord.js +45 -0
  191. package/serialization/resources/stt/types/index.d.ts +2 -0
  192. package/serialization/resources/stt/types/index.js +2 -0
  193. package/serialization/resources/tts/types/Controls.d.ts +2 -2
  194. package/serialization/resources/tts/types/Controls.js +2 -2
  195. package/serialization/resources/tts/types/Emotion.d.ts +1 -1
  196. package/serialization/resources/tts/types/Emotion.js +1 -27
  197. package/serialization/resources/tts/types/EmotionDeprecated.d.ts +10 -0
  198. package/serialization/resources/tts/types/EmotionDeprecated.js +67 -0
  199. package/serialization/resources/tts/types/GenerationConfig.d.ts +15 -0
  200. package/serialization/resources/tts/types/GenerationConfig.js +46 -0
  201. package/serialization/resources/tts/types/GenerationRequest.d.ts +3 -1
  202. package/serialization/resources/tts/types/GenerationRequest.js +3 -1
  203. package/serialization/resources/tts/types/SseOutputFormat.d.ts +15 -0
  204. package/serialization/resources/tts/types/SseOutputFormat.js +46 -0
  205. package/serialization/resources/tts/types/TtsRequest.d.ts +2 -0
  206. package/serialization/resources/tts/types/TtsRequest.js +2 -0
  207. package/serialization/resources/tts/types/TtssseRequest.d.ts +29 -0
  208. package/serialization/resources/tts/types/TtssseRequest.js +60 -0
  209. package/serialization/resources/tts/types/WebSocketTtsRequest.d.ts +3 -1
  210. package/serialization/resources/tts/types/WebSocketTtsRequest.js +3 -1
  211. package/serialization/resources/tts/types/index.d.ts +5 -1
  212. package/serialization/resources/tts/types/index.js +5 -1
  213. package/version.d.ts +1 -1
  214. package/version.js +1 -1
  215. package/wrapper/StreamingSTTClient.d.ts +22 -2
  216. package/wrapper/StreamingSTTClient.js +124 -1
  217. package/wrapper/SttWebsocket.d.ts +8 -3
  218. package/wrapper/SttWebsocket.js +24 -3
  219. package/wrapper/Websocket.js +1 -1
package/README.md CHANGED
@@ -1,13 +1,9 @@
1
- # Cartesia TypeScript SDK
1
+ # Cartesia TypeScript Library
2
2
 
3
3
  [![fern shield](https://img.shields.io/badge/%F0%9F%8C%BF-Built%20with%20Fern-brightgreen)](https://buildwithfern.com?utm_source=github&utm_medium=github&utm_campaign=readme&utm_source=https%3A%2F%2Fgithub.com%2Fcartesia-ai%2Fcartesia-js)
4
4
  [![npm shield](https://img.shields.io/npm/v/@cartesia/cartesia-js)](https://www.npmjs.com/package/@cartesia/cartesia-js)
5
- [![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/cartesia)
6
5
 
7
- The Cartesia TypeScript library provides convenient access to the Cartesia API from TypeScript/JavaScript, runnable in both Node.js and browsers.
8
-
9
- > [!TIP]
10
- > **[@cartesia-ai/cartesia-nextjs-demo](https://github.com/cartesia-ai/cartesia-nextjs-demo)** is our demo app that shows how to use Cartesia text-to-speech in a browser-based application.
6
+ The Cartesia TypeScript library provides convenient access to the Cartesia APIs from TypeScript.
11
7
 
12
8
  ## Installation
13
9
 
@@ -17,222 +13,589 @@ npm i -s @cartesia/cartesia-js
17
13
 
18
14
  ## Reference
19
15
 
20
- A full reference for this library is available [here](./reference.md).
16
+ A full reference for this library is available [here](https://github.com/cartesia-ai/cartesia-js/blob/HEAD/./reference.md).
21
17
 
22
18
  ## Usage
23
19
 
24
- ### Instantiation
25
-
26
20
  Instantiate and use the client with the following:
27
21
 
28
22
  ```typescript
29
23
  import { CartesiaClient } from "@cartesia/cartesia-js";
30
- import process from "node:process"
31
- import fs from "node:fs"
32
-
33
- // Set up the client.
34
- const client = new CartesiaClient({ apiKey: process.env.CARTESIA_API_KEY });
35
-
36
- // Call the TTS API's bytes endpoint, which returns binary audio data as an ArrayBuffer.
37
- const response = await client.tts.bytes({
38
- modelId: "sonic-2",
39
- transcript: "Hello, world!",
40
- voice: {
41
- mode: "id",
42
- id: "694f9389-aac1-45b6-b726-9d9369183238",
43
- },
44
- language: "en",
45
- outputFormat: {
46
- container: "wav",
47
- sampleRate: 44100,
48
- encoding: "pcm_f32le",
24
+
25
+ const client = new CartesiaClient({ apiKey: "YOUR_API_KEY" });
26
+ await client.auth.accessToken({
27
+ grants: {
28
+ stt: true,
49
29
  },
30
+ expiresIn: 60,
50
31
  });
51
-
52
- // Write the response to a file.
53
- fs.writeFileSync("sonic.wav", new Uint8Array(response));
54
32
  ```
55
33
 
56
- ### TTS over WebSocket
34
+ ## Speech-to-Text (STT)
57
35
 
58
- ```js
36
+ ```typescript
59
37
  import { CartesiaClient } from "@cartesia/cartesia-js";
38
+ import fs from "node:fs";
39
+
40
+ async function streamingSTTExample() {
41
+ const client = new CartesiaClient({
42
+ apiKey: process.env.CARTESIA_API_KEY,
43
+ });
44
+
45
+ // Create websocket connection with endpointing parameters
46
+ const sttWs = client.stt.websocket({
47
+ model: "ink-whisper",
48
+ language: "en", // Language of your audio
49
+ encoding: "pcm_s16le", // Audio encoding format (required)
50
+ sampleRate: 16000, // Audio sample rate (required)
51
+ minVolume: 0.1, // Volume threshold for voice activity detection (0.0-1.0)
52
+ maxSilenceDurationSecs: 2.0, // Maximum silence duration before endpointing
53
+ });
54
+
55
+ // Concurrent audio sending
56
+ async function sendAudio() {
57
+ try {
58
+ const audioBuffer = fs.readFileSync("audio.wav");
59
+ const chunkSize = 3200; // ~200ms chunks for more realistic streaming
60
+
61
+ console.log("Starting audio stream...");
62
+
63
+ for (let i = 0; i < audioBuffer.length; i += chunkSize) {
64
+ const chunk = audioBuffer.subarray(i, i + chunkSize);
65
+ const arrayBuffer = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength);
66
+
67
+ await sttWs.send(arrayBuffer);
68
+ console.log(`Sent chunk ${Math.floor(i / chunkSize) + 1}`);
69
+
70
+ // Simulate real-time audio capture delay
71
+ await new Promise((resolve) => setTimeout(resolve, 100));
72
+ }
73
+
74
+ await sttWs.finalize();
75
+ console.log("Audio streaming completed");
76
+ } catch (error) {
77
+ console.error("Error sending audio:", error);
78
+ }
79
+ }
60
80
 
61
- const cartesia = new CartesiaClient({
62
- apiKey: process.env.CARTESIA_API_KEY,
63
- });
81
+ // Concurrent transcript receiving with word-level timestamps
82
+ async function receiveTranscripts(): Promise<string> {
83
+ return new Promise((resolve) => {
84
+ let fullTranscript = "";
85
+
86
+ sttWs.onMessage((result) => {
87
+ if (result.type === "transcript") {
88
+ const status = result.isFinal ? "FINAL" : "INTERIM";
89
+ console.log(`[${status}] "${result.text}"`);
90
+
91
+ // Handle word-level timestamps if available
92
+ if (result.words && result.words.length > 0) {
93
+ console.log("Word-level timestamps:");
94
+ result.words.forEach((word) => {
95
+ console.log(` "${word.word}": ${word.start.toFixed(2)}s - ${word.end.toFixed(2)}s`);
96
+ });
97
+ }
98
+
99
+ if (result.isFinal) {
100
+ fullTranscript += `${result.text} `;
101
+ }
102
+ } else if (result.type === "flush_done") {
103
+ console.log("Flush completed - sending done command");
104
+ sttWs.done().catch(console.error);
105
+ } else if (result.type === "done") {
106
+ console.log("Transcription completed");
107
+ resolve(fullTranscript.trim());
108
+ } else if (result.type === "error") {
109
+ console.error(`Error: ${result.message}`);
110
+ resolve("");
111
+ }
112
+ });
113
+ });
114
+ }
64
115
 
65
- // Initialize the WebSocket. Make sure the output format you specify is supported.
66
- const websocket = cartesia.tts.websocket({
67
- container: "raw",
68
- encoding: "pcm_f32le",
69
- sampleRate: 44100,
70
- });
116
+ try {
117
+ console.log("Starting STT processing...");
71
118
 
72
- // Create a stream.
73
- const response = await websocket.send({
74
- modelId: "sonic-2",
75
- voice: {
76
- mode: "id",
77
- id: "a0e99841-438c-4a64-b679-ae501e7d6091",
78
- },
79
- transcript: "Hello, world!",
80
- // The WebSocket sets output_format on your behalf.
81
- });
119
+ // Run audio sending and transcript receiving concurrently
120
+ const [, finalTranscript] = await Promise.all([sendAudio(), receiveTranscripts()]);
82
121
 
83
- // Access the raw messages from the WebSocket.
84
- response.on("message", (message) => {
85
- // Raw message.
86
- console.log("Received message:", message);
87
- });
122
+ console.log(`\nFinal transcript: ${finalTranscript}`);
88
123
 
89
- // You can also access messages using a for-await-of loop.
90
- for await (const message of response.events("message")) {
91
- // Raw message.
92
- console.log("Received message:", message);
124
+ // Clean up
125
+ sttWs.disconnect();
126
+
127
+ return finalTranscript;
128
+ } catch (error) {
129
+ console.error("STT processing error:", error);
130
+ sttWs.disconnect();
131
+ throw error;
132
+ }
93
133
  }
134
+
135
+ // Run the example
136
+ streamingSTTExample().catch(console.error);
94
137
  ```
95
138
 
96
- #### Input Streaming with Contexts
139
+ ## Request And Response Types
97
140
 
98
- ```js
99
- const contextOptions = {
100
- contextId: "my-context",
101
- modelId: "sonic-2",
102
- voice: {
103
- mode: "id",
104
- id: "a0e99841-438c-4a64-b679-ae501e7d6091",
105
- },
141
+ The SDK exports all request and response types as TypeScript interfaces. Simply import them with the
142
+ following namespace:
143
+
144
+ ```typescript
145
+ import { Cartesia } from "@cartesia/cartesia-js";
146
+
147
+ const request: Cartesia.InfillBytesRequest = {
148
+ ...
106
149
  };
150
+ ```
107
151
 
108
- // Initial request on the context uses websocket.send().
109
- // This response object will aggregate the results of all the inputs sent on the context.
110
- const response = await websocket.send({
111
- ...contextOptions,
112
- transcript: "Hello, world!",
113
- });
152
+ ## Exception Handling
114
153
 
115
- // Subsequent requests on the same context use websocket.continue().
116
- await websocket.continue({
117
- ...contextOptions,
118
- transcript: " How are you today?",
119
- });
154
+ When the API returns a non-success status code (4xx or 5xx response), a subclass of the following error
155
+ will be thrown.
156
+
157
+ ```typescript
158
+ import { CartesiaError } from "@cartesia/cartesia-js";
159
+
160
+ try {
161
+ await client.auth.accessToken(...);
162
+ } catch (err) {
163
+ if (err instanceof CartesiaError) {
164
+ console.log(err.statusCode);
165
+ console.log(err.message);
166
+ console.log(err.body);
167
+ }
168
+ }
120
169
  ```
121
170
 
122
- See the [input streaming docs](https://docs.cartesia.ai/reference/web-socket/stream-speech/working-with-web-sockets#input-streaming-with-contexts) for more information.
171
+ ## Binary Response
123
172
 
124
- ### Playing audio in the browser
173
+ You can consume binary data from endpoints using the `BinaryResponse` type which lets you choose how to consume the data:
174
+
175
+ ```typescript
176
+ const response = await client.agents.downloadCallAudio(...);
177
+ const stream: ReadableStream<Uint8Array> = response.stream();
178
+ // const arrayBuffer: ArrayBuffer = await response.arrayBuffer();
179
+ // const blob: Blob = response.blob();
180
+ // const bytes: Uint8Array = response.bytes();
181
+ // You can only use the response body once, so you must choose one of the above methods.
182
+ // If you want to check if the response body has been used, you can use the following property.
183
+ const bodyUsed = response.bodyUsed;
184
+ ```
125
185
 
126
- (The `WebPlayer` class only supports playing audio in the browser and the raw PCM format with fp32le encoding.)
186
+ <details>
187
+ <summary>Save binary response to a file</summary>
127
188
 
128
- ```js
129
- // If you're using the client in the browser, you can control audio playback using our WebPlayer:
130
- import { WebPlayer } from "@cartesia/cartesia-js";
189
+ <blockquote>
190
+ <details>
191
+ <summary>Node.js</summary>
131
192
 
132
- console.log("Playing stream...");
193
+ <blockquote>
194
+ <details>
195
+ <summary>ReadableStream (most-efficient)</summary>
133
196
 
134
- // Create a Player object.
135
- const player = new WebPlayer();
197
+ ```ts
198
+ import { createWriteStream } from 'fs';
199
+ import { Readable } from 'stream';
200
+ import { pipeline } from 'stream/promises';
136
201
 
137
- // Play the audio. (`response` includes a custom Source object that the Player can play.)
138
- // The call resolves when the audio finishes playing.
139
- await player.play(response.source);
202
+ const response = await client.agents.downloadCallAudio(...);
140
203
 
141
- console.log("Done playing.");
204
+ const stream = response.stream();
205
+ const nodeStream = Readable.fromWeb(stream);
206
+ const writeStream = createWriteStream('path/to/file');
207
+
208
+ await pipeline(nodeStream, writeStream);
142
209
  ```
143
210
 
144
- ## Speech-to-Text (STT)
211
+ </details>
212
+ </blockquote>
145
213
 
146
- ```typescript
147
- import { CartesiaClient } from "@cartesia/cartesia-js";
148
- import fs from "fs";
214
+ <blockquote>
215
+ <details>
216
+ <summary>ArrayBuffer</summary>
149
217
 
150
- const client = new CartesiaClient({
151
- apiKey: process.env.CARTESIA_API_KEY,
152
- });
218
+ ```ts
219
+ import { writeFile } from 'fs/promises';
153
220
 
154
- // Create STT WebSocket connection
155
- const sttWs = client.stt.websocket({
156
- model: "ink-whisper",
157
- language: "en",
158
- encoding: "pcm_s16le",
159
- sampleRate: 16000,
160
- });
221
+ const response = await client.agents.downloadCallAudio(...);
161
222
 
162
- // Set up message handler
163
- await sttWs.onMessage((result) => {
164
- if (result.type === "transcript") {
165
- const status = result.isFinal ? "FINAL" : "INTERIM";
166
- console.log(`[${status}] ${result.text}`);
167
- if (result.duration) {
168
- console.log(`Duration: ${result.duration.toFixed(2)}s`);
169
- }
170
- } else if (result.type === "flush_done") {
171
- console.log("Flush completed");
172
- await sttWs.done(); // Send done command
173
- } else if (result.type === "done") {
174
- console.log("Session complete");
175
- } else if (result.type === "error") {
176
- console.error(`Error: ${result.message}`);
177
- }
178
- });
223
+ const arrayBuffer = await response.arrayBuffer();
224
+ await writeFile('path/to/file', Buffer.from(arrayBuffer));
225
+ ```
179
226
 
180
- // Load and send audio data
181
- const audioBuffer = fs.readFileSync("audio.wav");
182
- const chunkSize = 1600; // ~100ms at 16kHz
183
- const audioChunks = [];
227
+ </details>
228
+ </blockquote>
184
229
 
185
- for (let i = 0; i < audioBuffer.length; i += chunkSize) {
186
- const chunk = audioBuffer.slice(i, i + chunkSize);
187
- audioChunks.push(chunk.buffer);
188
- }
230
+ <blockquote>
231
+ <details>
232
+ <summary>Blob</summary>
233
+
234
+ ```ts
235
+ import { writeFile } from 'fs/promises';
236
+
237
+ const response = await client.agents.downloadCallAudio(...);
238
+
239
+ const blob = await response.blob();
240
+ const arrayBuffer = await blob.arrayBuffer();
241
+ await writeFile('output.bin', Buffer.from(arrayBuffer));
242
+ ```
243
+
244
+ </details>
245
+ </blockquote>
246
+
247
+ <blockquote>
248
+ <details>
249
+ <summary>Bytes (UIntArray8)</summary>
250
+
251
+ ```ts
252
+ import { writeFile } from 'fs/promises';
253
+
254
+ const response = await client.agents.downloadCallAudio(...);
255
+
256
+ const bytes = await response.bytes();
257
+ await writeFile('path/to/file', bytes);
258
+ ```
259
+
260
+ </details>
261
+ </blockquote>
262
+
263
+ </details>
264
+ </blockquote>
265
+
266
+ <blockquote>
267
+ <details>
268
+ <summary>Bun</summary>
269
+
270
+ <blockquote>
271
+ <details>
272
+ <summary>ReadableStream (most-efficient)</summary>
273
+
274
+ ```ts
275
+ const response = await client.agents.downloadCallAudio(...);
276
+
277
+ const stream = response.stream();
278
+ await Bun.write('path/to/file', stream);
279
+ ```
280
+
281
+ </details>
282
+ </blockquote>
283
+
284
+ <blockquote>
285
+ <details>
286
+ <summary>ArrayBuffer</summary>
287
+
288
+ ```ts
289
+ const response = await client.agents.downloadCallAudio(...);
290
+
291
+ const arrayBuffer = await response.arrayBuffer();
292
+ await Bun.write('path/to/file', arrayBuffer);
293
+ ```
294
+
295
+ </details>
296
+ </blockquote>
297
+
298
+ <blockquote>
299
+ <details>
300
+ <summary>Blob</summary>
301
+
302
+ ```ts
303
+ const response = await client.agents.downloadCallAudio(...);
304
+
305
+ const blob = await response.blob();
306
+ await Bun.write('path/to/file', blob);
307
+ ```
308
+
309
+ </details>
310
+ </blockquote>
311
+
312
+ <blockquote>
313
+ <details>
314
+ <summary>Bytes (UIntArray8)</summary>
315
+
316
+ ```ts
317
+ const response = await client.agents.downloadCallAudio(...);
318
+
319
+ const bytes = await response.bytes();
320
+ await Bun.write('path/to/file', bytes);
321
+ ```
322
+
323
+ </details>
324
+ </blockquote>
325
+
326
+ </details>
327
+ </blockquote>
328
+
329
+ <blockquote>
330
+ <details>
331
+ <summary>Deno</summary>
189
332
 
190
- // Send audio chunks
191
- for (const chunk of audioChunks) {
192
- await sttWs.send(chunk);
333
+ <blockquote>
334
+ <details>
335
+ <summary>ReadableStream (most-efficient)</summary>
336
+
337
+ ```ts
338
+ const response = await client.agents.downloadCallAudio(...);
339
+
340
+ const stream = response.stream();
341
+ const file = await Deno.open('path/to/file', { write: true, create: true });
342
+ await stream.pipeTo(file.writable);
343
+ ```
344
+
345
+ </details>
346
+ </blockquote>
347
+
348
+ <blockquote>
349
+ <details>
350
+ <summary>ArrayBuffer</summary>
351
+
352
+ ```ts
353
+ const response = await client.agents.downloadCallAudio(...);
354
+
355
+ const arrayBuffer = await response.arrayBuffer();
356
+ await Deno.writeFile('path/to/file', new Uint8Array(arrayBuffer));
357
+ ```
358
+
359
+ </details>
360
+ </blockquote>
361
+
362
+ <blockquote>
363
+ <details>
364
+ <summary>Blob</summary>
365
+
366
+ ```ts
367
+ const response = await client.agents.downloadCallAudio(...);
368
+
369
+ const blob = await response.blob();
370
+ const arrayBuffer = await blob.arrayBuffer();
371
+ await Deno.writeFile('path/to/file', new Uint8Array(arrayBuffer));
372
+ ```
373
+
374
+ </details>
375
+ </blockquote>
376
+
377
+ <blockquote>
378
+ <details>
379
+ <summary>Bytes (UIntArray8)</summary>
380
+
381
+ ```ts
382
+ const response = await client.agents.downloadCallAudio(...);
383
+
384
+ const bytes = await response.bytes();
385
+ await Deno.writeFile('path/to/file', bytes);
386
+ ```
387
+
388
+ </details>
389
+ </blockquote>
390
+
391
+ </details>
392
+ </blockquote>
393
+
394
+ <blockquote>
395
+ <details>
396
+ <summary>Browser</summary>
397
+
398
+ <blockquote>
399
+ <details>
400
+ <summary>Blob (most-efficient)</summary>
401
+
402
+ ```ts
403
+ const response = await client.agents.downloadCallAudio(...);
404
+
405
+ const blob = await response.blob();
406
+ const url = URL.createObjectURL(blob);
407
+
408
+ // trigger download
409
+ const a = document.createElement('a');
410
+ a.href = url;
411
+ a.download = 'filename';
412
+ a.click();
413
+ URL.revokeObjectURL(url);
414
+ ```
415
+
416
+ </details>
417
+ </blockquote>
418
+
419
+ <blockquote>
420
+ <details>
421
+ <summary>ReadableStream</summary>
422
+
423
+ ```ts
424
+ const response = await client.agents.downloadCallAudio(...);
425
+
426
+ const stream = response.stream();
427
+ const reader = stream.getReader();
428
+ const chunks = [];
429
+
430
+ while (true) {
431
+ const { done, value } = await reader.read();
432
+ if (done) break;
433
+ chunks.push(value);
193
434
  }
194
435
 
195
- // Finalize transcription
196
- await sttWs.finalize();
436
+ const blob = new Blob(chunks);
437
+ const url = URL.createObjectURL(blob);
197
438
 
198
- // Disconnect when done
199
- sttWs.disconnect();
439
+ // trigger download
440
+ const a = document.createElement('a');
441
+ a.href = url;
442
+ a.download = 'filename';
443
+ a.click();
444
+ URL.revokeObjectURL(url);
200
445
  ```
201
446
 
202
- ## Request And Response Types
447
+ </details>
448
+ </blockquote>
203
449
 
204
- The SDK exports all request and response types as TypeScript interfaces. Simply import them with the
205
- following namespace:
450
+ <blockquote>
451
+ <details>
452
+ <summary>ArrayBuffer</summary>
206
453
 
207
- ```typescript
208
- import { Cartesia } from "@cartesia/cartesia-js";
454
+ ```ts
455
+ const response = await client.agents.downloadCallAudio(...);
209
456
 
210
- const request: Cartesia.VoiceChangerBytesRequest = {
211
- ...
212
- };
457
+ const arrayBuffer = await response.arrayBuffer();
458
+ const blob = new Blob([arrayBuffer]);
459
+ const url = URL.createObjectURL(blob);
460
+
461
+ // trigger download
462
+ const a = document.createElement('a');
463
+ a.href = url;
464
+ a.download = 'filename';
465
+ a.click();
466
+ URL.revokeObjectURL(url);
213
467
  ```
214
468
 
215
- ## Exception Handling
469
+ </details>
470
+ </blockquote>
216
471
 
217
- When the API returns a non-success status code (4xx or 5xx response), a subclass of the following error
218
- will be thrown.
472
+ <blockquote>
473
+ <details>
474
+ <summary>Bytes (UIntArray8)</summary>
475
+
476
+ ```ts
477
+ const response = await client.agents.downloadCallAudio(...);
478
+
479
+ const bytes = await response.bytes();
480
+ const blob = new Blob([bytes]);
481
+ const url = URL.createObjectURL(blob);
482
+
483
+ // trigger download
484
+ const a = document.createElement('a');
485
+ a.href = url;
486
+ a.download = 'filename';
487
+ a.click();
488
+ URL.revokeObjectURL(url);
489
+ ```
490
+
491
+ </details>
492
+ </blockquote>
493
+
494
+ </details>
495
+ </blockquote>
496
+
497
+ </details>
498
+ </blockquote>
499
+
500
+ <details>
501
+ <summary>Convert binary response to text</summary>
502
+
503
+ <blockquote>
504
+ <details>
505
+ <summary>ReadableStream</summary>
506
+
507
+ ```ts
508
+ const response = await client.agents.downloadCallAudio(...);
509
+
510
+ const stream = response.stream();
511
+ const text = await new Response(stream).text();
512
+ ```
513
+
514
+ </details>
515
+ </blockquote>
516
+
517
+ <blockquote>
518
+ <details>
519
+ <summary>ArrayBuffer</summary>
520
+
521
+ ```ts
522
+ const response = await client.agents.downloadCallAudio(...);
523
+
524
+ const arrayBuffer = await response.arrayBuffer();
525
+ const text = new TextDecoder().decode(arrayBuffer);
526
+ ```
527
+
528
+ </details>
529
+ </blockquote>
530
+
531
+ <blockquote>
532
+ <details>
533
+ <summary>Blob</summary>
534
+
535
+ ```ts
536
+ const response = await client.agents.downloadCallAudio(...);
537
+
538
+ const blob = await response.blob();
539
+ const text = await blob.text();
540
+ ```
541
+
542
+ </details>
543
+ </blockquote>
544
+
545
+ <blockquote>
546
+ <details>
547
+ <summary>Bytes (UIntArray8)</summary>
548
+
549
+ ```ts
550
+ const response = await client.agents.downloadCallAudio(...);
551
+
552
+ const bytes = await response.bytes();
553
+ const text = new TextDecoder().decode(bytes);
554
+ ```
555
+
556
+ </details>
557
+ </blockquote>
558
+
559
+ </details>
560
+
561
+ ## Pagination
562
+
563
+ List endpoints are paginated. The SDK provides an iterator so that you can simply loop over the items:
219
564
 
220
565
  ```typescript
221
- import { CartesiaError } from "@cartesia/cartesia-js";
566
+ import { CartesiaClient } from "@cartesia/cartesia-js";
222
567
 
223
- try {
224
- await client.tts.bytes(...);
225
- } catch (err) {
226
- if (err instanceof CartesiaError) {
227
- console.log(err.statusCode);
228
- console.log(err.message);
229
- console.log(err.body);
230
- }
568
+ const client = new CartesiaClient({ token: "YOUR_TOKEN" });
569
+ const response = await client.agents.listCalls({
570
+ agentId: "agent_id",
571
+ });
572
+ for await (const item of response) {
573
+ console.log(item);
574
+ }
575
+
576
+ // Or you can manually iterate page-by-page
577
+ let page = await client.agents.listCalls({
578
+ agentId: "agent_id",
579
+ });
580
+ while (page.hasNextPage()) {
581
+ page = page.getNextPage();
231
582
  }
232
583
  ```
233
584
 
234
585
  ## Advanced
235
586
 
587
+ ### Additional Headers
588
+
589
+ If you would like to send additional headers as part of the request, use the `headers` request option.
590
+
591
+ ```typescript
592
+ const response = await client.auth.accessToken(..., {
593
+ headers: {
594
+ 'X-Custom-Header': 'custom value'
595
+ }
596
+ });
597
+ ```
598
+
236
599
  ### Retries
237
600
 
238
601
  The SDK is instrumented with automatic retries with exponential backoff. A request will be retried as long
@@ -241,14 +604,14 @@ retry limit (default: 2).
241
604
 
242
605
  A request is deemed retriable when any of the following HTTP status codes is returned:
243
606
 
244
- - [408](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/408) (Timeout)
245
- - [429](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/429) (Too Many Requests)
246
- - [5XX](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/500) (Internal Server Errors)
607
+ - [408](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/408) (Timeout)
608
+ - [429](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/429) (Too Many Requests)
609
+ - [5XX](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/500) (Internal Server Errors)
247
610
 
248
611
  Use the `maxRetries` request option to configure this behavior.
249
612
 
250
613
  ```typescript
251
- const response = await client.tts.bytes(..., {
614
+ const response = await client.auth.accessToken(..., {
252
615
  maxRetries: 0 // override maxRetries at the request level
253
616
  });
254
617
  ```
@@ -258,7 +621,7 @@ const response = await client.tts.bytes(..., {
258
621
  The SDK defaults to a 60 second timeout. Use the `timeoutInSeconds` option to configure this behavior.
259
622
 
260
623
  ```typescript
261
- const response = await client.tts.bytes(..., {
624
+ const response = await client.auth.accessToken(..., {
262
625
  timeoutInSeconds: 30 // override timeout to 30s
263
626
  });
264
627
  ```
@@ -269,7 +632,7 @@ The SDK allows users to abort requests at any point by passing in an abort signa
269
632
 
270
633
  ```typescript
271
634
  const controller = new AbortController();
272
- const response = await client.tts.bytes(..., {
635
+ const response = await client.auth.accessToken(..., {
273
636
  abortSignal: controller.signal
274
637
  });
275
638
  controller.abort(); // aborts the request
@@ -280,12 +643,12 @@ controller.abort(); // aborts the request
280
643
  The SDK defaults to `node-fetch` but will use the global fetch client if present. The SDK works in the following
281
644
  runtimes:
282
645
 
283
- - Node.js 18+
284
- - Vercel
285
- - Cloudflare Workers
286
- - Deno v1.25+
287
- - Bun 1.0+
288
- - React Native
646
+ - Node.js 18+
647
+ - Vercel
648
+ - Cloudflare Workers
649
+ - Deno v1.25+
650
+ - Bun 1.0+
651
+ - React Native
289
652
 
290
653
  ### Customizing Fetch Client
291
654
 
@@ -310,3 +673,7 @@ a proof of concept, but know that we will not be able to merge it as-is. We sugg
310
673
  an issue first to discuss with us!
311
674
 
312
675
  On the other hand, contributions to the README are always very welcome!
676
+
677
+ ## Documentation
678
+
679
+ API reference documentation is available [here](https://docs.cartesia.ai/).