@cartesia/cartesia-js 2.2.5 → 2.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/Client.d.ts +3 -0
  2. package/Client.js +11 -6
  3. package/README.md +469 -144
  4. package/api/resources/apiStatus/client/Client.js +1 -1
  5. package/api/resources/auth/client/Client.d.ts +17 -0
  6. package/api/resources/auth/client/Client.js +18 -1
  7. package/api/resources/auth/types/TokenGrant.d.ts +3 -1
  8. package/api/resources/auth/types/TokenRequest.d.ts +2 -2
  9. package/api/resources/index.d.ts +1 -0
  10. package/api/resources/index.js +1 -0
  11. package/api/resources/infill/client/Client.js +1 -1
  12. package/api/resources/infill/client/requests/InfillBytesRequest.d.ts +3 -11
  13. package/api/resources/stt/client/Client.d.ts +43 -0
  14. package/api/resources/stt/client/Client.js +108 -0
  15. package/api/resources/stt/client/index.d.ts +1 -1
  16. package/api/resources/stt/client/index.js +15 -3
  17. package/api/resources/stt/client/requests/TranscriptionRequest.d.ts +147 -0
  18. package/api/resources/stt/client/requests/TranscriptionRequest.js +5 -0
  19. package/api/resources/stt/client/requests/index.d.ts +1 -0
  20. package/api/resources/stt/client/requests/index.js +2 -0
  21. package/api/resources/stt/index.d.ts +1 -0
  22. package/api/resources/stt/index.js +1 -0
  23. package/api/resources/stt/types/TimestampGranularity.d.ts +12 -0
  24. package/api/resources/stt/types/TimestampGranularity.js +9 -0
  25. package/api/resources/stt/types/TranscriptMessage.d.ts +1 -1
  26. package/api/resources/stt/types/TranscriptionResponse.d.ts +2 -2
  27. package/api/resources/stt/types/index.d.ts +1 -0
  28. package/api/resources/stt/types/index.js +1 -0
  29. package/api/resources/tts/client/Client.d.ts +7 -2
  30. package/api/resources/tts/client/Client.js +8 -8
  31. package/api/resources/tts/types/Controls.d.ts +1 -1
  32. package/api/resources/tts/types/Emotion.d.ts +2 -33
  33. package/api/resources/tts/types/Emotion.js +0 -28
  34. package/api/resources/tts/types/EmotionDeprecated.d.ts +38 -0
  35. package/api/resources/tts/types/EmotionDeprecated.js +33 -0
  36. package/api/resources/tts/types/GenerationConfig.d.ts +15 -0
  37. package/api/resources/tts/types/GenerationConfig.js +5 -0
  38. package/api/resources/tts/types/GenerationRequest.d.ts +5 -4
  39. package/api/resources/tts/types/Mp3OutputFormat.d.ts +1 -0
  40. package/api/resources/tts/types/RawOutputFormat.d.ts +1 -0
  41. package/api/resources/tts/types/SseOutputFormat.d.ts +10 -0
  42. package/api/resources/tts/types/SseOutputFormat.js +5 -0
  43. package/api/resources/tts/types/TtsRequest.d.ts +1 -0
  44. package/api/resources/tts/types/TtssseRequest.d.ts +27 -0
  45. package/api/resources/tts/types/TtssseRequest.js +5 -0
  46. package/api/resources/tts/types/WebSocketRawOutputFormat.d.ts +1 -0
  47. package/api/resources/tts/types/WebSocketRequest.d.ts +2 -4
  48. package/api/resources/tts/types/WebSocketTtsRequest.d.ts +4 -1
  49. package/api/resources/tts/types/index.d.ts +5 -1
  50. package/api/resources/tts/types/index.js +5 -1
  51. package/api/resources/voiceChanger/client/Client.d.ts +9 -4
  52. package/api/resources/voiceChanger/client/Client.js +24 -20
  53. package/api/resources/voiceChanger/client/requests/VoiceChangerBytesRequest.d.ts +3 -8
  54. package/api/resources/voiceChanger/client/requests/VoiceChangerSseRequest.d.ts +3 -8
  55. package/api/resources/voices/client/Client.js +8 -8
  56. package/api/resources/voices/client/requests/CloneVoiceRequest.d.ts +6 -24
  57. package/api/resources/voices/types/LocalizeDialect.d.ts +4 -8
  58. package/core/fetcher/Fetcher.d.ts +2 -2
  59. package/core/fetcher/Fetcher.js +4 -3
  60. package/core/fetcher/getResponseBody.js +3 -3
  61. package/dist/Client.d.ts +3 -0
  62. package/dist/Client.js +11 -6
  63. package/dist/api/resources/apiStatus/client/Client.js +1 -1
  64. package/dist/api/resources/auth/client/Client.d.ts +17 -0
  65. package/dist/api/resources/auth/client/Client.js +18 -1
  66. package/dist/api/resources/auth/types/TokenGrant.d.ts +3 -1
  67. package/dist/api/resources/auth/types/TokenRequest.d.ts +2 -2
  68. package/dist/api/resources/index.d.ts +1 -0
  69. package/dist/api/resources/index.js +1 -0
  70. package/dist/api/resources/infill/client/Client.js +1 -1
  71. package/dist/api/resources/infill/client/requests/InfillBytesRequest.d.ts +3 -11
  72. package/dist/api/resources/stt/client/Client.d.ts +43 -0
  73. package/dist/api/resources/stt/client/Client.js +108 -0
  74. package/dist/api/resources/stt/client/index.d.ts +1 -1
  75. package/dist/api/resources/stt/client/index.js +15 -3
  76. package/dist/api/resources/stt/client/requests/TranscriptionRequest.d.ts +147 -0
  77. package/dist/api/resources/stt/client/requests/TranscriptionRequest.js +5 -0
  78. package/dist/api/resources/stt/client/requests/index.d.ts +1 -0
  79. package/dist/api/resources/stt/client/requests/index.js +2 -0
  80. package/dist/api/resources/stt/index.d.ts +1 -0
  81. package/dist/api/resources/stt/index.js +1 -0
  82. package/dist/api/resources/stt/types/TimestampGranularity.d.ts +12 -0
  83. package/dist/api/resources/stt/types/TimestampGranularity.js +9 -0
  84. package/dist/api/resources/stt/types/TranscriptMessage.d.ts +1 -1
  85. package/dist/api/resources/stt/types/TranscriptionResponse.d.ts +2 -2
  86. package/dist/api/resources/stt/types/index.d.ts +1 -0
  87. package/dist/api/resources/stt/types/index.js +1 -0
  88. package/dist/api/resources/tts/client/Client.d.ts +7 -2
  89. package/dist/api/resources/tts/client/Client.js +8 -8
  90. package/dist/api/resources/tts/types/Controls.d.ts +1 -1
  91. package/dist/api/resources/tts/types/Emotion.d.ts +2 -33
  92. package/dist/api/resources/tts/types/Emotion.js +0 -28
  93. package/dist/api/resources/tts/types/EmotionDeprecated.d.ts +38 -0
  94. package/dist/api/resources/tts/types/EmotionDeprecated.js +33 -0
  95. package/dist/api/resources/tts/types/GenerationConfig.d.ts +15 -0
  96. package/dist/api/resources/tts/types/GenerationConfig.js +5 -0
  97. package/dist/api/resources/tts/types/GenerationRequest.d.ts +5 -4
  98. package/dist/api/resources/tts/types/Mp3OutputFormat.d.ts +1 -0
  99. package/dist/api/resources/tts/types/RawOutputFormat.d.ts +1 -0
  100. package/dist/api/resources/tts/types/SseOutputFormat.d.ts +10 -0
  101. package/dist/api/resources/tts/types/SseOutputFormat.js +5 -0
  102. package/dist/api/resources/tts/types/TtsRequest.d.ts +1 -0
  103. package/dist/api/resources/tts/types/TtssseRequest.d.ts +27 -0
  104. package/dist/api/resources/tts/types/TtssseRequest.js +5 -0
  105. package/dist/api/resources/tts/types/WebSocketRawOutputFormat.d.ts +1 -0
  106. package/dist/api/resources/tts/types/WebSocketRequest.d.ts +2 -4
  107. package/dist/api/resources/tts/types/WebSocketTtsRequest.d.ts +4 -1
  108. package/dist/api/resources/tts/types/index.d.ts +5 -1
  109. package/dist/api/resources/tts/types/index.js +5 -1
  110. package/dist/api/resources/voiceChanger/client/Client.d.ts +9 -4
  111. package/dist/api/resources/voiceChanger/client/Client.js +24 -20
  112. package/dist/api/resources/voiceChanger/client/requests/VoiceChangerBytesRequest.d.ts +3 -8
  113. package/dist/api/resources/voiceChanger/client/requests/VoiceChangerSseRequest.d.ts +3 -8
  114. package/dist/api/resources/voices/client/Client.js +8 -8
  115. package/dist/api/resources/voices/client/requests/CloneVoiceRequest.d.ts +6 -24
  116. package/dist/api/resources/voices/types/LocalizeDialect.d.ts +4 -8
  117. package/dist/core/fetcher/Fetcher.d.ts +2 -2
  118. package/dist/core/fetcher/Fetcher.js +4 -3
  119. package/dist/core/fetcher/getResponseBody.js +3 -3
  120. package/dist/index.d.ts +2 -0
  121. package/dist/index.js +8 -1
  122. package/dist/serialization/resources/auth/types/TokenGrant.d.ts +2 -1
  123. package/dist/serialization/resources/auth/types/TokenGrant.js +2 -1
  124. package/dist/serialization/resources/auth/types/TokenRequest.d.ts +1 -1
  125. package/dist/serialization/resources/auth/types/TokenRequest.js +1 -1
  126. package/dist/serialization/resources/stt/types/TimestampGranularity.d.ts +10 -0
  127. package/dist/serialization/resources/stt/types/TimestampGranularity.js +41 -0
  128. package/dist/serialization/resources/stt/types/index.d.ts +1 -0
  129. package/dist/serialization/resources/stt/types/index.js +1 -0
  130. package/dist/serialization/resources/tts/types/Controls.d.ts +2 -2
  131. package/dist/serialization/resources/tts/types/Controls.js +2 -2
  132. package/dist/serialization/resources/tts/types/Emotion.d.ts +1 -1
  133. package/dist/serialization/resources/tts/types/Emotion.js +1 -27
  134. package/dist/serialization/resources/tts/types/EmotionDeprecated.d.ts +10 -0
  135. package/dist/serialization/resources/tts/types/EmotionDeprecated.js +67 -0
  136. package/dist/serialization/resources/tts/types/GenerationConfig.d.ts +15 -0
  137. package/dist/serialization/resources/tts/types/GenerationConfig.js +46 -0
  138. package/dist/serialization/resources/tts/types/GenerationRequest.d.ts +3 -1
  139. package/dist/serialization/resources/tts/types/GenerationRequest.js +3 -1
  140. package/dist/serialization/resources/tts/types/SseOutputFormat.d.ts +15 -0
  141. package/dist/serialization/resources/tts/types/SseOutputFormat.js +46 -0
  142. package/dist/serialization/resources/tts/types/TtsRequest.d.ts +2 -0
  143. package/dist/serialization/resources/tts/types/TtsRequest.js +2 -0
  144. package/dist/serialization/resources/tts/types/TtssseRequest.d.ts +29 -0
  145. package/dist/serialization/resources/tts/types/TtssseRequest.js +60 -0
  146. package/dist/serialization/resources/tts/types/WebSocketTtsRequest.d.ts +3 -1
  147. package/dist/serialization/resources/tts/types/WebSocketTtsRequest.js +3 -1
  148. package/dist/serialization/resources/tts/types/index.d.ts +5 -1
  149. package/dist/serialization/resources/tts/types/index.js +5 -1
  150. package/dist/version.d.ts +1 -1
  151. package/dist/version.js +1 -1
  152. package/dist/wrapper/StreamingSTTClient.d.ts +22 -2
  153. package/dist/wrapper/StreamingSTTClient.js +124 -1
  154. package/dist/wrapper/SttWebsocket.d.ts +8 -3
  155. package/dist/wrapper/SttWebsocket.js +24 -3
  156. package/dist/wrapper/Websocket.js +1 -1
  157. package/index.d.ts +2 -0
  158. package/index.js +8 -1
  159. package/package.json +1 -1
  160. package/reference.md +89 -1
  161. package/serialization/resources/auth/types/TokenGrant.d.ts +2 -1
  162. package/serialization/resources/auth/types/TokenGrant.js +2 -1
  163. package/serialization/resources/auth/types/TokenRequest.d.ts +1 -1
  164. package/serialization/resources/auth/types/TokenRequest.js +1 -1
  165. package/serialization/resources/stt/types/TimestampGranularity.d.ts +10 -0
  166. package/serialization/resources/stt/types/TimestampGranularity.js +41 -0
  167. package/serialization/resources/stt/types/index.d.ts +1 -0
  168. package/serialization/resources/stt/types/index.js +1 -0
  169. package/serialization/resources/tts/types/Controls.d.ts +2 -2
  170. package/serialization/resources/tts/types/Controls.js +2 -2
  171. package/serialization/resources/tts/types/Emotion.d.ts +1 -1
  172. package/serialization/resources/tts/types/Emotion.js +1 -27
  173. package/serialization/resources/tts/types/EmotionDeprecated.d.ts +10 -0
  174. package/serialization/resources/tts/types/EmotionDeprecated.js +67 -0
  175. package/serialization/resources/tts/types/GenerationConfig.d.ts +15 -0
  176. package/serialization/resources/tts/types/GenerationConfig.js +46 -0
  177. package/serialization/resources/tts/types/GenerationRequest.d.ts +3 -1
  178. package/serialization/resources/tts/types/GenerationRequest.js +3 -1
  179. package/serialization/resources/tts/types/SseOutputFormat.d.ts +15 -0
  180. package/serialization/resources/tts/types/SseOutputFormat.js +46 -0
  181. package/serialization/resources/tts/types/TtsRequest.d.ts +2 -0
  182. package/serialization/resources/tts/types/TtsRequest.js +2 -0
  183. package/serialization/resources/tts/types/TtssseRequest.d.ts +29 -0
  184. package/serialization/resources/tts/types/TtssseRequest.js +60 -0
  185. package/serialization/resources/tts/types/WebSocketTtsRequest.d.ts +3 -1
  186. package/serialization/resources/tts/types/WebSocketTtsRequest.js +3 -1
  187. package/serialization/resources/tts/types/index.d.ts +5 -1
  188. package/serialization/resources/tts/types/index.js +5 -1
  189. package/version.d.ts +1 -1
  190. package/version.js +1 -1
  191. package/wrapper/StreamingSTTClient.d.ts +22 -2
  192. package/wrapper/StreamingSTTClient.js +124 -1
  193. package/wrapper/SttWebsocket.d.ts +8 -3
  194. package/wrapper/SttWebsocket.js +24 -3
  195. package/wrapper/Websocket.js +1 -1
@@ -19,9 +19,13 @@ export * from "./WebSocketRawOutputFormat";
19
19
  export * from "./WebSocketRequest";
20
20
  export * from "./WebSocketTtsRequest";
21
21
  export * from "./TtsRequest";
22
+ export * from "./TtssseRequest";
22
23
  export * from "./SupportedLanguage";
24
+ export * from "./GenerationConfig";
25
+ export * from "./Emotion";
23
26
  export * from "./OutputFormat";
24
27
  export * from "./RawOutputFormat";
28
+ export * from "./SseOutputFormat";
25
29
  export * from "./RawEncoding";
26
30
  export * from "./WavOutputFormat";
27
31
  export * from "./Mp3OutputFormat";
@@ -32,4 +36,4 @@ export * from "./Controls";
32
36
  export * from "./Speed";
33
37
  export * from "./NumericalSpecifier";
34
38
  export * from "./NaturalSpecifier";
35
- export * from "./Emotion";
39
+ export * from "./EmotionDeprecated";
@@ -35,9 +35,13 @@ __exportStar(require("./WebSocketRawOutputFormat"), exports);
35
35
  __exportStar(require("./WebSocketRequest"), exports);
36
36
  __exportStar(require("./WebSocketTtsRequest"), exports);
37
37
  __exportStar(require("./TtsRequest"), exports);
38
+ __exportStar(require("./TtssseRequest"), exports);
38
39
  __exportStar(require("./SupportedLanguage"), exports);
40
+ __exportStar(require("./GenerationConfig"), exports);
41
+ __exportStar(require("./Emotion"), exports);
39
42
  __exportStar(require("./OutputFormat"), exports);
40
43
  __exportStar(require("./RawOutputFormat"), exports);
44
+ __exportStar(require("./SseOutputFormat"), exports);
41
45
  __exportStar(require("./RawEncoding"), exports);
42
46
  __exportStar(require("./WavOutputFormat"), exports);
43
47
  __exportStar(require("./Mp3OutputFormat"), exports);
@@ -48,4 +52,4 @@ __exportStar(require("./Controls"), exports);
48
52
  __exportStar(require("./Speed"), exports);
49
53
  __exportStar(require("./NumericalSpecifier"), exports);
50
54
  __exportStar(require("./NaturalSpecifier"), exports);
51
- __exportStar(require("./Emotion"), exports);
55
+ __exportStar(require("./EmotionDeprecated"), exports);
package/dist/version.d.ts CHANGED
@@ -1 +1 @@
1
- export declare const SDK_VERSION = "2.2.5";
1
+ export declare const SDK_VERSION = "2.2.8";
package/dist/version.js CHANGED
@@ -1,4 +1,4 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.SDK_VERSION = void 0;
4
- exports.SDK_VERSION = "2.2.5";
4
+ exports.SDK_VERSION = "2.2.8";
@@ -1,6 +1,26 @@
1
1
  import { Stt } from "../api/resources/stt/client/Client";
2
- import SttWebsocket, { SttWebSocketOptions } from "./SttWebsocket";
2
+ import SttWebsocket, { SttWebSocketOptions, TranscriptionResult } from "./SttWebsocket";
3
3
  export declare class StreamingSTTClient extends Stt {
4
4
  constructor(options?: Stt.Options);
5
- websocket(options?: SttWebSocketOptions): SttWebsocket;
5
+ /**
6
+ * Create a WebSocket connection for real-time speech transcription.
7
+ *
8
+ * @param options - Configuration options for the STT WebSocket
9
+ * @param options.model - ID of the model to use for transcription (required)
10
+ * @param options.language - The language of the input audio in ISO-639-1 format (defaults to "en")
11
+ * @param options.encoding - The encoding format of the audio data (required). Must be one of: "pcm_s16le", "pcm_s32le", "pcm_f16le", "pcm_f32le", "pcm_mulaw", "pcm_alaw"
12
+ * @param options.sampleRate - The sample rate of the audio in Hz (required)
13
+ * @param options.minVolume - Volume threshold for voice activity detection (0.0-1.0)
14
+ * @param options.maxSilenceDurationSecs - Maximum duration of silence before endpointing
15
+ * @returns SttWebsocket instance for STT operations
16
+ */
17
+ websocket(options: SttWebSocketOptions): SttWebsocket;
18
+ /**
19
+ * Transcribe audio chunks using WebSocket with automatic connection management.
20
+ *
21
+ * @param audioChunks - Iterator of audio chunks as ArrayBuffer
22
+ * @param options - Configuration options for the STT WebSocket
23
+ * @returns AsyncGenerator yielding transcription results
24
+ */
25
+ transcribeChunks(audioChunks: AsyncIterable<ArrayBuffer>, options: SttWebSocketOptions): AsyncGenerator<TranscriptionResult, void, unknown>;
6
26
  }
@@ -1,4 +1,33 @@
1
1
  "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __await = (this && this.__await) || function (v) { return this instanceof __await ? (this.v = v, this) : new __await(v); }
12
+ var __asyncValues = (this && this.__asyncValues) || function (o) {
13
+ if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
14
+ var m = o[Symbol.asyncIterator], i;
15
+ return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
16
+ function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
17
+ function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
18
+ };
19
+ var __asyncGenerator = (this && this.__asyncGenerator) || function (thisArg, _arguments, generator) {
20
+ if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
21
+ var g = generator.apply(thisArg, _arguments || []), i, q = [];
22
+ return i = Object.create((typeof AsyncIterator === "function" ? AsyncIterator : Object).prototype), verb("next"), verb("throw"), verb("return", awaitReturn), i[Symbol.asyncIterator] = function () { return this; }, i;
23
+ function awaitReturn(f) { return function (v) { return Promise.resolve(v).then(f, reject); }; }
24
+ function verb(n, f) { if (g[n]) { i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; if (f) i[n] = f(i[n]); } }
25
+ function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }
26
+ function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }
27
+ function fulfill(value) { resume("next", value); }
28
+ function reject(value) { resume("throw", value); }
29
+ function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }
30
+ };
2
31
  var __importDefault = (this && this.__importDefault) || function (mod) {
3
32
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
33
  };
@@ -10,8 +39,102 @@ class StreamingSTTClient extends Client_1.Stt {
10
39
  constructor(options = {}) {
11
40
  super(options);
12
41
  }
13
- websocket(options = {}) {
42
+ /**
43
+ * Create a WebSocket connection for real-time speech transcription.
44
+ *
45
+ * @param options - Configuration options for the STT WebSocket
46
+ * @param options.model - ID of the model to use for transcription (required)
47
+ * @param options.language - The language of the input audio in ISO-639-1 format (defaults to "en")
48
+ * @param options.encoding - The encoding format of the audio data (required). Must be one of: "pcm_s16le", "pcm_s32le", "pcm_f16le", "pcm_f32le", "pcm_mulaw", "pcm_alaw"
49
+ * @param options.sampleRate - The sample rate of the audio in Hz (required)
50
+ * @param options.minVolume - Volume threshold for voice activity detection (0.0-1.0)
51
+ * @param options.maxSilenceDurationSecs - Maximum duration of silence before endpointing
52
+ * @returns SttWebsocket instance for STT operations
53
+ */
54
+ websocket(options) {
14
55
  return new SttWebsocket_1.default(options, Object.assign({ cartesiaVersion: "2024-06-10" }, this._options));
15
56
  }
57
+ /**
58
+ * Transcribe audio chunks using WebSocket with automatic connection management.
59
+ *
60
+ * @param audioChunks - Iterator of audio chunks as ArrayBuffer
61
+ * @param options - Configuration options for the STT WebSocket
62
+ * @returns AsyncGenerator yielding transcription results
63
+ */
64
+ transcribeChunks(audioChunks, options) {
65
+ return __asyncGenerator(this, arguments, function* transcribeChunks_1() {
66
+ const ws = this.websocket(options);
67
+ try {
68
+ // Set up message handling
69
+ const resultQueue = [];
70
+ let isComplete = false;
71
+ let error = null;
72
+ yield __await(ws.onMessage((result) => {
73
+ if (result.type === "error") {
74
+ error = new Error(result.message || "STT error occurred");
75
+ }
76
+ else if (result.type === "done") {
77
+ isComplete = true;
78
+ }
79
+ resultQueue.push(result);
80
+ }));
81
+ // Send audio chunks
82
+ const sendAudio = () => __awaiter(this, void 0, void 0, function* () {
83
+ var _a, e_1, _b, _c;
84
+ try {
85
+ try {
86
+ for (var _d = true, audioChunks_1 = __asyncValues(audioChunks), audioChunks_1_1; audioChunks_1_1 = yield audioChunks_1.next(), _a = audioChunks_1_1.done, !_a; _d = true) {
87
+ _c = audioChunks_1_1.value;
88
+ _d = false;
89
+ const chunk = _c;
90
+ yield ws.send(chunk);
91
+ }
92
+ }
93
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
94
+ finally {
95
+ try {
96
+ if (!_d && !_a && (_b = audioChunks_1.return)) yield _b.call(audioChunks_1);
97
+ }
98
+ finally { if (e_1) throw e_1.error; }
99
+ }
100
+ // Finalize and close session
101
+ yield ws.finalize();
102
+ yield ws.done();
103
+ }
104
+ catch (e) {
105
+ error = e instanceof Error ? e : new Error(String(e));
106
+ }
107
+ });
108
+ // Start sending audio in background
109
+ const sendPromise = sendAudio();
110
+ // Yield results as they come in
111
+ while (!isComplete && !error) {
112
+ if (resultQueue.length > 0) {
113
+ const result = resultQueue.shift();
114
+ yield yield __await(result);
115
+ if (result.type === "done") {
116
+ break;
117
+ }
118
+ }
119
+ else {
120
+ // Small delay to avoid busy waiting
121
+ yield __await(new Promise(resolve => setTimeout(resolve, 10)));
122
+ }
123
+ }
124
+ // Wait for sending to complete and handle any errors
125
+ yield __await(sendPromise);
126
+ if (error) {
127
+ throw error;
128
+ }
129
+ // Yield any remaining results
130
+ while (resultQueue.length > 0) {
131
+ yield yield __await(resultQueue.shift());
132
+ }
133
+ }
134
+ finally {
135
+ ws.disconnect();
136
+ }
137
+ });
138
+ }
16
139
  }
17
140
  exports.StreamingSTTClient = StreamingSTTClient;
@@ -1,10 +1,14 @@
1
1
  import { ReconnectingWebSocket, Options } from "../core/websocket";
2
2
  import { Stt } from "../api/resources/stt/client/Client";
3
+ import * as Cartesia from "../api/index";
4
+ import { SttEncoding } from "../api/resources/stt/types/SttEncoding";
3
5
  export interface SttWebSocketOptions {
4
6
  model?: string;
5
7
  language?: string;
6
- encoding?: string;
7
- sampleRate?: number;
8
+ encoding: SttEncoding;
9
+ sampleRate: number;
10
+ minVolume?: number;
11
+ maxSilenceDurationSecs?: number;
8
12
  }
9
13
  export interface TranscriptionResult {
10
14
  type: "transcript" | "flush_done" | "done" | "error";
@@ -13,13 +17,14 @@ export interface TranscriptionResult {
13
17
  isFinal?: boolean;
14
18
  duration?: number;
15
19
  language?: string;
20
+ words?: Cartesia.TranscriptionWord[];
16
21
  message?: string;
17
22
  }
18
23
  export default class SttWebsocket {
19
24
  #private;
20
25
  private readonly options;
21
26
  socket?: ReconnectingWebSocket;
22
- constructor({ model, language, encoding, sampleRate, }: SttWebSocketOptions, options: Stt.Options);
27
+ constructor({ model, language, encoding, sampleRate, minVolume, maxSilenceDurationSecs, }: SttWebSocketOptions, options: Stt.Options);
23
28
  send(audioData: ArrayBuffer): Promise<void>;
24
29
  finalize(): Promise<void>;
25
30
  done(): Promise<void>;
@@ -52,13 +52,13 @@ var __classPrivateFieldGet = (this && this.__classPrivateFieldGet) || function (
52
52
  if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
53
53
  return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
54
54
  };
55
- var _SttWebsocket_instances, _SttWebsocket_isConnected, _SttWebsocket_model, _SttWebsocket_language, _SttWebsocket_encoding, _SttWebsocket_sampleRate, _SttWebsocket_connectionPromise, _SttWebsocket_ensureConnected;
55
+ var _SttWebsocket_instances, _SttWebsocket_isConnected, _SttWebsocket_model, _SttWebsocket_language, _SttWebsocket_encoding, _SttWebsocket_sampleRate, _SttWebsocket_minVolume, _SttWebsocket_maxSilenceDurationSecs, _SttWebsocket_connectionPromise, _SttWebsocket_ensureConnected;
56
56
  Object.defineProperty(exports, "__esModule", { value: true });
57
57
  const core = __importStar(require("../core"));
58
58
  const environments = __importStar(require("../environments"));
59
59
  const websocket_1 = require("../core/websocket");
60
60
  class SttWebsocket {
61
- constructor({ model = "ink-whisper", language = "en", encoding = "pcm_s16le", sampleRate = 16000, }, options) {
61
+ constructor({ model = "ink-whisper", language = "en", encoding, sampleRate, minVolume, maxSilenceDurationSecs, }, options) {
62
62
  _SttWebsocket_instances.add(this);
63
63
  this.options = options;
64
64
  _SttWebsocket_isConnected.set(this, false);
@@ -66,11 +66,24 @@ class SttWebsocket {
66
66
  _SttWebsocket_language.set(this, void 0);
67
67
  _SttWebsocket_encoding.set(this, void 0);
68
68
  _SttWebsocket_sampleRate.set(this, void 0);
69
+ _SttWebsocket_minVolume.set(this, void 0);
70
+ _SttWebsocket_maxSilenceDurationSecs.set(this, void 0);
69
71
  _SttWebsocket_connectionPromise.set(this, void 0);
72
+ if (!model) {
73
+ throw new Error("model parameter is required");
74
+ }
75
+ if (!encoding) {
76
+ throw new Error("encoding parameter is required");
77
+ }
78
+ if (!sampleRate) {
79
+ throw new Error("sampleRate parameter is required");
80
+ }
70
81
  __classPrivateFieldSet(this, _SttWebsocket_model, model, "f");
71
82
  __classPrivateFieldSet(this, _SttWebsocket_language, language, "f");
72
83
  __classPrivateFieldSet(this, _SttWebsocket_encoding, encoding, "f");
73
84
  __classPrivateFieldSet(this, _SttWebsocket_sampleRate, sampleRate, "f");
85
+ __classPrivateFieldSet(this, _SttWebsocket_minVolume, minVolume, "f");
86
+ __classPrivateFieldSet(this, _SttWebsocket_maxSilenceDurationSecs, maxSilenceDurationSecs, "f");
74
87
  }
75
88
  send(audioData) {
76
89
  return __awaiter(this, void 0, void 0, function* () {
@@ -112,6 +125,10 @@ class SttWebsocket {
112
125
  result.isFinal = data.is_final || false;
113
126
  result.duration = data.duration;
114
127
  result.language = data.language;
128
+ // Include word-level timestamps if available
129
+ if (data.words) {
130
+ result.words = data.words;
131
+ }
115
132
  }
116
133
  else if (data.type === "flush_done") {
117
134
  // Acknowledgment for finalize command
@@ -152,6 +169,10 @@ class SttWebsocket {
152
169
  };
153
170
  if (__classPrivateFieldGet(this, _SttWebsocket_language, "f"))
154
171
  params.language = __classPrivateFieldGet(this, _SttWebsocket_language, "f");
172
+ if (__classPrivateFieldGet(this, _SttWebsocket_minVolume, "f") !== undefined)
173
+ params.min_volume = __classPrivateFieldGet(this, _SttWebsocket_minVolume, "f").toString();
174
+ if (__classPrivateFieldGet(this, _SttWebsocket_maxSilenceDurationSecs, "f") !== undefined)
175
+ params.max_silence_duration_secs = __classPrivateFieldGet(this, _SttWebsocket_maxSilenceDurationSecs, "f").toString();
155
176
  const apiKey = yield core.Supplier.get(this.options.apiKey);
156
177
  if (apiKey) {
157
178
  params.api_key = apiKey;
@@ -185,7 +206,7 @@ class SttWebsocket {
185
206
  __classPrivateFieldSet(this, _SttWebsocket_isConnected, false, "f");
186
207
  }
187
208
  }
188
- _SttWebsocket_isConnected = new WeakMap(), _SttWebsocket_model = new WeakMap(), _SttWebsocket_language = new WeakMap(), _SttWebsocket_encoding = new WeakMap(), _SttWebsocket_sampleRate = new WeakMap(), _SttWebsocket_connectionPromise = new WeakMap(), _SttWebsocket_instances = new WeakSet(), _SttWebsocket_ensureConnected = function _SttWebsocket_ensureConnected() {
209
+ _SttWebsocket_isConnected = new WeakMap(), _SttWebsocket_model = new WeakMap(), _SttWebsocket_language = new WeakMap(), _SttWebsocket_encoding = new WeakMap(), _SttWebsocket_sampleRate = new WeakMap(), _SttWebsocket_minVolume = new WeakMap(), _SttWebsocket_maxSilenceDurationSecs = new WeakMap(), _SttWebsocket_connectionPromise = new WeakMap(), _SttWebsocket_instances = new WeakSet(), _SttWebsocket_ensureConnected = function _SttWebsocket_ensureConnected() {
189
210
  return __awaiter(this, void 0, void 0, function* () {
190
211
  if (__classPrivateFieldGet(this, _SttWebsocket_isConnected, "f"))
191
212
  return;
@@ -180,7 +180,7 @@ class Websocket {
180
180
  var _a;
181
181
  const baseUrl = ((_a = (yield core.Supplier.get(this.options.environment))) !== null && _a !== void 0 ? _a : environments.CartesiaEnvironment.Production).replace(/^https?:\/\//, "");
182
182
  const params = {
183
- cartesia_version: this.options.cartesiaVersion || "2024-06-10",
183
+ cartesia_version: this.options.cartesiaVersion || "2025-04-16",
184
184
  };
185
185
  const apiKey = yield core.Supplier.get(this.options.apiKey);
186
186
  if (apiKey) {
package/index.d.ts CHANGED
@@ -1,5 +1,7 @@
1
1
  export * as Cartesia from "./api";
2
2
  export { CartesiaClient } from "./wrapper/Client";
3
3
  export { WebPlayer } from "./wrapper/WebPlayer";
4
+ export { default as SttWebsocket } from "./wrapper/SttWebsocket";
5
+ export { StreamingSTTClient } from "./wrapper/StreamingSTTClient";
4
6
  export { CartesiaEnvironment } from "./environments";
5
7
  export { CartesiaError, CartesiaTimeoutError } from "./errors";
package/index.js CHANGED
@@ -32,13 +32,20 @@ var __importStar = (this && this.__importStar) || (function () {
32
32
  return result;
33
33
  };
34
34
  })();
35
+ var __importDefault = (this && this.__importDefault) || function (mod) {
36
+ return (mod && mod.__esModule) ? mod : { "default": mod };
37
+ };
35
38
  Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.CartesiaTimeoutError = exports.CartesiaError = exports.CartesiaEnvironment = exports.WebPlayer = exports.CartesiaClient = exports.Cartesia = void 0;
39
+ exports.CartesiaTimeoutError = exports.CartesiaError = exports.CartesiaEnvironment = exports.StreamingSTTClient = exports.SttWebsocket = exports.WebPlayer = exports.CartesiaClient = exports.Cartesia = void 0;
37
40
  exports.Cartesia = __importStar(require("./api"));
38
41
  var Client_1 = require("./wrapper/Client");
39
42
  Object.defineProperty(exports, "CartesiaClient", { enumerable: true, get: function () { return Client_1.CartesiaClient; } });
40
43
  var WebPlayer_1 = require("./wrapper/WebPlayer");
41
44
  Object.defineProperty(exports, "WebPlayer", { enumerable: true, get: function () { return WebPlayer_1.WebPlayer; } });
45
+ var SttWebsocket_1 = require("./wrapper/SttWebsocket");
46
+ Object.defineProperty(exports, "SttWebsocket", { enumerable: true, get: function () { return __importDefault(SttWebsocket_1).default; } });
47
+ var StreamingSTTClient_1 = require("./wrapper/StreamingSTTClient");
48
+ Object.defineProperty(exports, "StreamingSTTClient", { enumerable: true, get: function () { return StreamingSTTClient_1.StreamingSTTClient; } });
42
49
  var environments_1 = require("./environments");
43
50
  Object.defineProperty(exports, "CartesiaEnvironment", { enumerable: true, get: function () { return environments_1.CartesiaEnvironment; } });
44
51
  var errors_1 = require("./errors");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cartesia/cartesia-js",
3
- "version": "2.2.5",
3
+ "version": "2.2.8",
4
4
  "private": false,
5
5
  "repository": "https://github.com/cartesia-ai/cartesia-js",
6
6
  "main": "./index.js",
package/reference.md CHANGED
@@ -75,6 +75,7 @@ Generates a new Access Token for the client. These tokens are short-lived and sh
75
75
  await client.auth.accessToken({
76
76
  grants: {
77
77
  tts: true,
78
+ stt: true,
78
79
  },
79
80
  expiresIn: 60,
80
81
  });
@@ -217,6 +218,93 @@ await client.infill.bytes(fs.createReadStream("/path/to/your/file"), fs.createRe
217
218
  </dl>
218
219
  </details>
219
220
 
221
+ ## Stt
222
+
223
+ <details><summary><code>client.stt.<a href="/src/api/resources/stt/client/Client.ts">transcribe</a>(file, { ...params }) -> Cartesia.TranscriptionResponse</code></summary>
224
+ <dl>
225
+ <dd>
226
+
227
+ #### 📝 Description
228
+
229
+ <dl>
230
+ <dd>
231
+
232
+ <dl>
233
+ <dd>
234
+
235
+ Transcribes audio files into text using Cartesia's Speech-to-Text API.
236
+
237
+ Upload an audio file and receive a complete transcription response. Supports arbitrarily long audio files with automatic intelligent chunking for longer audio.
238
+
239
+ **Supported audio formats:** flac, m4a, mp3, mp4, mpeg, mpga, oga, ogg, wav, webm
240
+
241
+ **Response format:** Returns JSON with transcribed text, duration, and language. Include `timestamp_granularities: ["word"]` to get word-level timestamps.
242
+
243
+ **Pricing:** Batch transcription is priced at **1 credit per 2 seconds** of audio processed.
244
+
245
+ <Note>
246
+ For migrating from the OpenAI SDK, see our [OpenAI Whisper to Cartesia Ink Migration Guide](/api-reference/stt/migrate-from-open-ai).
247
+ </Note>
248
+ </dd>
249
+ </dl>
250
+ </dd>
251
+ </dl>
252
+
253
+ #### 🔌 Usage
254
+
255
+ <dl>
256
+ <dd>
257
+
258
+ <dl>
259
+ <dd>
260
+
261
+ ```typescript
262
+ await client.stt.transcribe(fs.createReadStream("/path/to/your/file"), {
263
+ model: "ink-whisper",
264
+ language: "en",
265
+ });
266
+ ```
267
+
268
+ </dd>
269
+ </dl>
270
+ </dd>
271
+ </dl>
272
+
273
+ #### ⚙️ Parameters
274
+
275
+ <dl>
276
+ <dd>
277
+
278
+ <dl>
279
+ <dd>
280
+
281
+ **file:** `File | fs.ReadStream | Blob`
282
+
283
+ </dd>
284
+ </dl>
285
+
286
+ <dl>
287
+ <dd>
288
+
289
+ **request:** `Cartesia.TranscriptionRequest`
290
+
291
+ </dd>
292
+ </dl>
293
+
294
+ <dl>
295
+ <dd>
296
+
297
+ **requestOptions:** `Stt.RequestOptions`
298
+
299
+ </dd>
300
+ </dl>
301
+ </dd>
302
+ </dl>
303
+
304
+ </dd>
305
+ </dl>
306
+ </details>
307
+
220
308
  ## Tts
221
309
 
222
310
  <details><summary><code>client.tts.<a href="/src/api/resources/tts/client/Client.ts">bytes</a>({ ...params }) -> stream.Readable</code></summary>
@@ -325,7 +413,7 @@ for await (const item of response) {
325
413
  <dl>
326
414
  <dd>
327
415
 
328
- **request:** `Cartesia.TtsRequest`
416
+ **request:** `Cartesia.TtssseRequest`
329
417
 
330
418
  </dd>
331
419
  </dl>
@@ -7,6 +7,7 @@ import * as core from "../../../../core";
7
7
  export declare const TokenGrant: core.serialization.ObjectSchema<serializers.TokenGrant.Raw, Cartesia.TokenGrant>;
8
8
  export declare namespace TokenGrant {
9
9
  interface Raw {
10
- tts: boolean;
10
+ tts?: boolean | null;
11
+ stt?: boolean | null;
11
12
  }
12
13
  }
@@ -39,5 +39,6 @@ Object.defineProperty(exports, "__esModule", { value: true });
39
39
  exports.TokenGrant = void 0;
40
40
  const core = __importStar(require("../../../../core"));
41
41
  exports.TokenGrant = core.serialization.object({
42
- tts: core.serialization.boolean(),
42
+ tts: core.serialization.boolean().optional(),
43
+ stt: core.serialization.boolean().optional(),
43
44
  });
@@ -8,7 +8,7 @@ import { TokenGrant } from "./TokenGrant";
8
8
  export declare const TokenRequest: core.serialization.ObjectSchema<serializers.TokenRequest.Raw, Cartesia.TokenRequest>;
9
9
  export declare namespace TokenRequest {
10
10
  interface Raw {
11
- grants: TokenGrant.Raw;
11
+ grants?: TokenGrant.Raw | null;
12
12
  expires_in?: number | null;
13
13
  }
14
14
  }
@@ -40,6 +40,6 @@ exports.TokenRequest = void 0;
40
40
  const core = __importStar(require("../../../../core"));
41
41
  const TokenGrant_1 = require("./TokenGrant");
42
42
  exports.TokenRequest = core.serialization.object({
43
- grants: TokenGrant_1.TokenGrant,
43
+ grants: TokenGrant_1.TokenGrant.optional(),
44
44
  expiresIn: core.serialization.property("expires_in", core.serialization.number().optional()),
45
45
  });
@@ -0,0 +1,10 @@
1
+ /**
2
+ * This file was auto-generated by Fern from our API Definition.
3
+ */
4
+ import * as serializers from "../../../index";
5
+ import * as Cartesia from "../../../../api/index";
6
+ import * as core from "../../../../core";
7
+ export declare const TimestampGranularity: core.serialization.Schema<serializers.TimestampGranularity.Raw, Cartesia.TimestampGranularity>;
8
+ export declare namespace TimestampGranularity {
9
+ type Raw = "word";
10
+ }
@@ -0,0 +1,41 @@
1
+ "use strict";
2
+ /**
3
+ * This file was auto-generated by Fern from our API Definition.
4
+ */
5
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
6
+ if (k2 === undefined) k2 = k;
7
+ var desc = Object.getOwnPropertyDescriptor(m, k);
8
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
9
+ desc = { enumerable: true, get: function() { return m[k]; } };
10
+ }
11
+ Object.defineProperty(o, k2, desc);
12
+ }) : (function(o, m, k, k2) {
13
+ if (k2 === undefined) k2 = k;
14
+ o[k2] = m[k];
15
+ }));
16
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
17
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
18
+ }) : function(o, v) {
19
+ o["default"] = v;
20
+ });
21
+ var __importStar = (this && this.__importStar) || (function () {
22
+ var ownKeys = function(o) {
23
+ ownKeys = Object.getOwnPropertyNames || function (o) {
24
+ var ar = [];
25
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
26
+ return ar;
27
+ };
28
+ return ownKeys(o);
29
+ };
30
+ return function (mod) {
31
+ if (mod && mod.__esModule) return mod;
32
+ var result = {};
33
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
34
+ __setModuleDefault(result, mod);
35
+ return result;
36
+ };
37
+ })();
38
+ Object.defineProperty(exports, "__esModule", { value: true });
39
+ exports.TimestampGranularity = void 0;
40
+ const core = __importStar(require("../../../../core"));
41
+ exports.TimestampGranularity = core.serialization.enum_(["word"]);
@@ -1,3 +1,4 @@
1
+ export * from "./TimestampGranularity";
1
2
  export * from "./TranscriptionWord";
2
3
  export * from "./TranscriptionResponse";
3
4
  export * from "./StreamingTranscriptionResponse";
@@ -14,6 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./TimestampGranularity"), exports);
17
18
  __exportStar(require("./TranscriptionWord"), exports);
18
19
  __exportStar(require("./TranscriptionResponse"), exports);
19
20
  __exportStar(require("./StreamingTranscriptionResponse"), exports);
@@ -5,11 +5,11 @@ import * as serializers from "../../../index";
5
5
  import * as Cartesia from "../../../../api/index";
6
6
  import * as core from "../../../../core";
7
7
  import { Speed } from "./Speed";
8
- import { Emotion } from "./Emotion";
8
+ import { EmotionDeprecated } from "./EmotionDeprecated";
9
9
  export declare const Controls: core.serialization.ObjectSchema<serializers.Controls.Raw, Cartesia.Controls>;
10
10
  export declare namespace Controls {
11
11
  interface Raw {
12
12
  speed: Speed.Raw;
13
- emotion: Emotion.Raw[];
13
+ emotion: EmotionDeprecated.Raw[];
14
14
  }
15
15
  }
@@ -39,8 +39,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
39
39
  exports.Controls = void 0;
40
40
  const core = __importStar(require("../../../../core"));
41
41
  const Speed_1 = require("./Speed");
42
- const Emotion_1 = require("./Emotion");
42
+ const EmotionDeprecated_1 = require("./EmotionDeprecated");
43
43
  exports.Controls = core.serialization.object({
44
44
  speed: Speed_1.Speed,
45
- emotion: core.serialization.list(Emotion_1.Emotion),
45
+ emotion: core.serialization.list(EmotionDeprecated_1.EmotionDeprecated),
46
46
  });
@@ -6,5 +6,5 @@ import * as Cartesia from "../../../../api/index";
6
6
  import * as core from "../../../../core";
7
7
  export declare const Emotion: core.serialization.Schema<serializers.Emotion.Raw, Cartesia.Emotion>;
8
8
  export declare namespace Emotion {
9
- type Raw = "anger:lowest" | "anger:low" | "anger" | "anger:high" | "anger:highest" | "positivity:lowest" | "positivity:low" | "positivity" | "positivity:high" | "positivity:highest" | "surprise:lowest" | "surprise:low" | "surprise" | "surprise:high" | "surprise:highest" | "sadness:lowest" | "sadness:low" | "sadness" | "sadness:high" | "sadness:highest" | "curiosity:lowest" | "curiosity:low" | "curiosity" | "curiosity:high" | "curiosity:highest";
9
+ type Raw = string;
10
10
  }