@cartesia/cartesia-js 3.0.0-b11 → 3.0.0-b13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/backcompat/errors.d.mts +1 -1
  3. package/backcompat/errors.d.mts.map +1 -1
  4. package/backcompat/errors.d.ts +1 -1
  5. package/backcompat/errors.d.ts.map +1 -1
  6. package/backcompat/errors.js +1 -1
  7. package/backcompat/errors.js.map +1 -1
  8. package/backcompat/errors.mjs +1 -1
  9. package/backcompat/errors.mjs.map +1 -1
  10. package/backcompat/index.d.mts.map +1 -1
  11. package/backcompat/index.d.ts.map +1 -1
  12. package/backcompat/index.js +5 -5
  13. package/backcompat/index.js.map +1 -1
  14. package/backcompat/index.mjs +5 -5
  15. package/backcompat/index.mjs.map +1 -1
  16. package/backcompat/tts-wrapper.d.mts +33 -10
  17. package/backcompat/tts-wrapper.d.mts.map +1 -1
  18. package/backcompat/tts-wrapper.d.ts +33 -10
  19. package/backcompat/tts-wrapper.d.ts.map +1 -1
  20. package/backcompat/tts-wrapper.js +83 -26
  21. package/backcompat/tts-wrapper.js.map +1 -1
  22. package/backcompat/tts-wrapper.mjs +85 -28
  23. package/backcompat/tts-wrapper.mjs.map +1 -1
  24. package/backcompat/types.d.mts +1 -1
  25. package/backcompat/types.d.mts.map +1 -1
  26. package/backcompat/types.d.ts +1 -1
  27. package/backcompat/types.d.ts.map +1 -1
  28. package/backcompat/voice-changer-wrapper.d.mts +4 -4
  29. package/backcompat/voice-changer-wrapper.d.mts.map +1 -1
  30. package/backcompat/voice-changer-wrapper.d.ts +4 -4
  31. package/backcompat/voice-changer-wrapper.d.ts.map +1 -1
  32. package/backcompat/voice-changer-wrapper.js +6 -6
  33. package/backcompat/voice-changer-wrapper.js.map +1 -1
  34. package/backcompat/voice-changer-wrapper.mjs +7 -7
  35. package/backcompat/voice-changer-wrapper.mjs.map +1 -1
  36. package/backcompat/voices-wrapper.d.mts +18 -5
  37. package/backcompat/voices-wrapper.d.mts.map +1 -1
  38. package/backcompat/voices-wrapper.d.ts +18 -5
  39. package/backcompat/voices-wrapper.d.ts.map +1 -1
  40. package/backcompat/voices-wrapper.js +63 -0
  41. package/backcompat/voices-wrapper.js.map +1 -1
  42. package/backcompat/voices-wrapper.mjs +63 -0
  43. package/backcompat/voices-wrapper.mjs.map +1 -1
  44. package/package.json +1 -1
  45. package/resources/agents/calls.d.mts +1 -1
  46. package/resources/agents/calls.d.mts.map +1 -1
  47. package/resources/agents/calls.d.ts +1 -1
  48. package/resources/agents/calls.d.ts.map +1 -1
  49. package/resources/agents/calls.js +1 -2
  50. package/resources/agents/calls.js.map +1 -1
  51. package/resources/agents/calls.mjs +1 -2
  52. package/resources/agents/calls.mjs.map +1 -1
  53. package/resources/infill.d.mts +1 -1
  54. package/resources/infill.d.mts.map +1 -1
  55. package/resources/infill.d.ts +1 -1
  56. package/resources/infill.d.ts.map +1 -1
  57. package/resources/infill.js +1 -6
  58. package/resources/infill.js.map +1 -1
  59. package/resources/infill.mjs +1 -6
  60. package/resources/infill.mjs.map +1 -1
  61. package/resources/voice-changer.d.mts +1 -1
  62. package/resources/voice-changer.d.mts.map +1 -1
  63. package/resources/voice-changer.d.ts +1 -1
  64. package/resources/voice-changer.d.ts.map +1 -1
  65. package/resources/voice-changer.js +1 -6
  66. package/resources/voice-changer.js.map +1 -1
  67. package/resources/voice-changer.mjs +1 -6
  68. package/resources/voice-changer.mjs.map +1 -1
  69. package/src/backcompat/errors.ts +32 -40
  70. package/src/backcompat/index.ts +64 -67
  71. package/src/backcompat/tts-wrapper.ts +405 -322
  72. package/src/backcompat/types.ts +13 -13
  73. package/src/backcompat/voice-changer-wrapper.ts +58 -56
  74. package/src/backcompat/voices-wrapper.ts +217 -150
  75. package/src/resources/agents/calls.ts +2 -3
  76. package/src/resources/infill.ts +2 -7
  77. package/src/resources/voice-changer.ts +2 -7
  78. package/src/version.ts +1 -1
  79. package/version.d.mts +1 -1
  80. package/version.d.ts +1 -1
  81. package/version.js +1 -1
  82. package/version.mjs +1 -1
@@ -1,351 +1,434 @@
1
- import WebSocket from "ws";
2
- import { Cartesia } from "../client";
3
- import { type RequestOptions as InternalRequestOptions } from "../internal/request-options";
4
- import { BackCompatRequestOptions } from "./types";
5
- import { wrap } from "./errors";
6
- import { Readable } from "stream";
1
+ import WebSocket from 'ws';
2
+ import { Cartesia } from '../client';
3
+ import { BackCompatRequestOptions } from './types';
4
+ import { wrap } from './errors';
5
+ import { Readable } from 'stream';
7
6
 
8
7
  // Define compatible interfaces to match the old SDK types for WebSocket
9
8
  export interface BackCompatWebSocketOptions {
10
- container?: "raw" | "wav" | "mp3";
11
- encoding?: "pcm_f32le" | "pcm_s16le" | "pcm_alaw" | "pcm_mulaw";
12
- sampleRate: number;
9
+ container?: 'raw' | 'wav' | 'mp3';
10
+ encoding?: 'pcm_f32le' | 'pcm_s16le' | 'pcm_alaw' | 'pcm_mulaw';
11
+ sampleRate: number;
13
12
  }
14
13
 
15
14
  export type BackCompatTtsRequestVoiceSpecifier =
16
- | { mode: "id"; id: string }
17
- | { mode: "embedding"; embedding: number[] };
15
+ | { mode: 'id'; id: string }
16
+ | { mode: 'embedding'; embedding: number[] };
18
17
 
19
18
  export interface BackCompatGenerationConfig {
20
- volume?: number;
21
- speed?: number;
22
- emotion?: string[]; // Simplified from strict union for backcompat flexibility
19
+ volume?: number;
20
+ speed?: number;
21
+ emotion?: string[]; // Simplified from strict union for backcompat flexibility
23
22
  }
24
23
 
25
24
  export interface BackCompatWebSocketTtsRequest {
26
- modelId: string;
27
- transcript: string;
28
- voice: BackCompatTtsRequestVoiceSpecifier;
29
- generationConfig?: BackCompatGenerationConfig;
30
- outputFormat?: {
31
- container?: "raw" | "wav" | "mp3";
32
- encoding?: "pcm_f32le" | "pcm_s16le" | "pcm_alaw" | "pcm_mulaw";
33
- sampleRate?: number;
34
- bitRate?: number;
35
- };
36
- contextId?: string; // Backcompat might pass this in request?
37
- // Add other fields as needed
38
- continue?: boolean;
39
- duration?: number;
40
- addTimestamps?: boolean;
41
- addPhonemeTimestamps?: boolean;
25
+ modelId: string;
26
+ transcript: string;
27
+ voice: BackCompatTtsRequestVoiceSpecifier;
28
+ generationConfig?: BackCompatGenerationConfig;
29
+ outputFormat?: {
30
+ container?: 'raw' | 'wav' | 'mp3';
31
+ encoding?: 'pcm_f32le' | 'pcm_s16le' | 'pcm_alaw' | 'pcm_mulaw';
32
+ sampleRate?: number;
33
+ bitRate?: number;
34
+ };
35
+ contextId?: string; // Backcompat might pass this in request?
36
+ // Add other fields as needed
37
+ continue?: boolean;
38
+ duration?: number;
39
+ addTimestamps?: boolean;
40
+ addPhonemeTimestamps?: boolean;
42
41
  }
43
42
 
44
43
  export interface BackCompatTtsRequest {
45
- modelId: string;
46
- transcript: string;
47
- voice: BackCompatTtsRequestVoiceSpecifier;
48
- language?: string;
49
- outputFormat: {
50
- container: "raw" | "wav" | "mp3";
51
- encoding?: "pcm_f32le" | "pcm_s16le" | "pcm_alaw" | "pcm_mulaw";
52
- sampleRate: number;
53
- bitRate?: number;
54
- };
55
- generationConfig?: BackCompatGenerationConfig;
56
- duration?: number;
57
- speed?: "slow" | "normal" | "fast";
58
- pronunciationDictId?: string;
44
+ modelId: string;
45
+ transcript: string;
46
+ voice: BackCompatTtsRequestVoiceSpecifier;
47
+ language?: string;
48
+ outputFormat: {
49
+ container: 'raw' | 'wav' | 'mp3';
50
+ encoding?: 'pcm_f32le' | 'pcm_s16le' | 'pcm_alaw' | 'pcm_mulaw';
51
+ sampleRate: number;
52
+ bitRate?: number;
53
+ };
54
+ generationConfig?: BackCompatGenerationConfig;
55
+ duration?: number;
56
+ speed?: 'slow' | 'normal' | 'fast';
57
+ pronunciationDictId?: string;
59
58
  }
60
59
 
61
60
  // Helper for generating UUIDs. Not cryptographically secure.
62
61
  function uuidv4() {
63
- return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, (c) => {
64
- var r = Math.random() * 16 | 0, v = c === 'x' ? r : (r & 0x3 | 0x8);
65
- return v.toString(16);
66
- });
62
+ return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, (c) => {
63
+ var r = (Math.random() * 16) | 0,
64
+ v = c === 'x' ? r : (r & 0x3) | 0x8;
65
+ return v.toString(16);
66
+ });
67
67
  }
68
68
 
69
69
  class AudioSource {
70
- private buffers: Buffer[] = [];
71
- private waiter: ((val?: any) => void) | null = null;
72
- public isDone = false;
73
-
74
- push(data: Buffer) {
75
- this.buffers.push(data);
76
- if (this.waiter) {
77
- this.waiter();
78
- this.waiter = null;
79
- }
80
- }
81
-
82
- markDone() {
83
- this.isDone = true;
84
- if (this.waiter) {
85
- this.waiter();
86
- this.waiter = null;
87
- }
88
- }
89
-
90
- async read(outBuffer: Float32Array): Promise<number> {
91
- if (this.buffers.length === 0 && !this.isDone) {
92
- await new Promise<void>((resolve) => { this.waiter = resolve; });
93
- }
94
-
95
- if (this.buffers.length === 0 && this.isDone) {
96
- return 0;
97
- }
98
-
99
- let totalFloatsRead = 0;
100
- let outOffset = 0;
101
- const maxFloats = outBuffer.length;
102
-
103
- while (this.buffers.length > 0 && totalFloatsRead < maxFloats) {
104
- const buf = this.buffers[0] as Buffer; // ts not smart enough to check loop condition
105
- const floatsInBuf = buf.length / 4;
106
- const floatsNeeded = maxFloats - totalFloatsRead;
107
-
108
- const floatsToCopy = Math.min(floatsInBuf, floatsNeeded);
109
- const bytesToCopy = floatsToCopy * 4;
110
-
111
- // Copy to outBuffer.
112
- // Create a view on the buffer to read floats.
113
-
114
- // We need to ensure byteOffset is a multiple of 4.
115
- // If not, we must copy the buffer to a new one.
116
- let srcFloats: Float32Array;
117
- if (buf.byteOffset % 4 === 0) {
118
- srcFloats = new Float32Array(buf.buffer, buf.byteOffset, floatsInBuf);
119
- } else {
120
- const alignedBuf = new Uint8Array(buf);
121
- srcFloats = new Float32Array(alignedBuf.buffer, alignedBuf.byteOffset, floatsInBuf);
122
- }
123
-
124
- outBuffer.set(srcFloats.subarray(0, floatsToCopy), outOffset);
125
-
126
- totalFloatsRead += floatsToCopy;
127
- outOffset += floatsToCopy;
128
-
129
- if (floatsToCopy < floatsInBuf) {
130
- // We didn't use the whole buffer. Update it.
131
- this.buffers[0] = buf.subarray(bytesToCopy);
132
- } else {
133
- // We used the whole buffer. Remove it.
134
- this.buffers.shift();
135
- }
136
- }
137
-
138
- return totalFloatsRead;
139
- }
70
+ private buffers: Buffer[] = [];
71
+ private waiter: ((val?: any) => void) | null = null;
72
+ public isDone = false;
73
+
74
+ push(data: Buffer) {
75
+ this.buffers.push(data);
76
+ if (this.waiter) {
77
+ this.waiter();
78
+ this.waiter = null;
79
+ }
80
+ }
81
+
82
+ markDone() {
83
+ this.isDone = true;
84
+ if (this.waiter) {
85
+ this.waiter();
86
+ this.waiter = null;
87
+ }
88
+ }
89
+
90
+ async read(outBuffer: Float32Array): Promise<number> {
91
+ if (this.buffers.length === 0 && !this.isDone) {
92
+ await new Promise<void>((resolve) => {
93
+ this.waiter = resolve;
94
+ });
95
+ }
96
+
97
+ if (this.buffers.length === 0 && this.isDone) {
98
+ return 0;
99
+ }
100
+
101
+ let totalFloatsRead = 0;
102
+ let outOffset = 0;
103
+ const maxFloats = outBuffer.length;
104
+
105
+ while (this.buffers.length > 0 && totalFloatsRead < maxFloats) {
106
+ const buf = this.buffers[0] as Buffer; // ts not smart enough to check loop condition
107
+ const floatsInBuf = buf.length / 4;
108
+ const floatsNeeded = maxFloats - totalFloatsRead;
109
+
110
+ const floatsToCopy = Math.min(floatsInBuf, floatsNeeded);
111
+ const bytesToCopy = floatsToCopy * 4;
112
+
113
+ // Copy to outBuffer.
114
+ // Create a view on the buffer to read floats.
115
+
116
+ // We need to ensure byteOffset is a multiple of 4.
117
+ // If not, we must copy the buffer to a new one.
118
+ let srcFloats: Float32Array;
119
+ if (buf.byteOffset % 4 === 0) {
120
+ srcFloats = new Float32Array(buf.buffer, buf.byteOffset, floatsInBuf);
121
+ } else {
122
+ const alignedBuf = new Uint8Array(buf);
123
+ srcFloats = new Float32Array(alignedBuf.buffer, alignedBuf.byteOffset, floatsInBuf);
124
+ }
125
+
126
+ outBuffer.set(srcFloats.subarray(0, floatsToCopy), outOffset);
127
+
128
+ totalFloatsRead += floatsToCopy;
129
+ outOffset += floatsToCopy;
130
+
131
+ if (floatsToCopy < floatsInBuf) {
132
+ // We didn't use the whole buffer. Update it.
133
+ this.buffers[0] = buf.subarray(bytesToCopy);
134
+ } else {
135
+ // We used the whole buffer. Remove it.
136
+ this.buffers.shift();
137
+ }
138
+ }
139
+
140
+ return totalFloatsRead;
141
+ }
140
142
  }
141
143
 
142
144
  export class WebSocketWrapper {
143
- private client: Cartesia;
144
- private config: BackCompatWebSocketOptions;
145
- private socket: WebSocket | null = null;
146
- private sources: Map<string, AudioSource> = new Map();
147
- // Fallback source for messages without context_id or if we just want to capture everything (legacy behavior?)
148
- // The original test didn't use context_id explicitly in send() but expected a response source.
149
- // We'll map context_id to source.
150
- private defaultSource: AudioSource | null = null;
151
-
152
- constructor(client: Cartesia, config: BackCompatWebSocketOptions) {
153
- this.client = client;
154
- this.config = config;
155
- }
156
-
157
- async connect() {
158
- const baseURL = this.client.baseURL;
159
- // Construct WebSocket URL
160
- // baseURL is like https://api.cartesia.ai
161
- let urlStr = baseURL.replace(/^http/, "ws");
162
- if (!urlStr.includes("/tts/websocket")) {
163
- if (urlStr.endsWith("/")) {
164
- urlStr += "tts/websocket";
165
- } else {
166
- urlStr += "/tts/websocket";
167
- }
168
- }
169
-
170
- const url = new URL(urlStr);
171
-
172
- const headers: any = {
173
- "cartesia-version": "2025-04-16",
174
- };
175
- if (this.client.apiKey) {
176
- headers["Authorization"] = `Bearer ${this.client.apiKey}`;
177
- }
178
-
179
- this.socket = new WebSocket(url.toString(), {
180
- headers: headers,
181
- });
182
-
183
- return new Promise<void>((resolve, reject) => {
184
- this.socket!.on("open", () => {
185
- console.log("WebSocket connected.");
186
- resolve();
187
- });
188
-
189
- this.socket!.on("error", (err) => {
190
- console.error("WebSocket error:", err);
191
- reject(err);
192
- });
193
-
194
- this.socket!.on("message", (data) => {
195
- this.handleMessage(data);
196
- });
197
-
198
- this.socket!.on("close", () => {
199
- console.log("WebSocket closed.");
200
- this.sources.forEach((s) => { s.markDone(); });
201
- if (this.defaultSource) this.defaultSource.markDone();
202
- });
203
- });
204
- }
205
-
206
- private handleMessage(data: WebSocket.Data) {
207
- try {
208
- const str = data.toString();
209
- const msg = JSON.parse(str);
210
-
211
- const contextId = msg.context_id;
212
- let source = contextId ? this.sources.get(contextId) : this.defaultSource;
213
-
214
- // If we received a message for a context we don't know about, and we have a default source, use it
215
- if (!source && this.defaultSource) {
216
- source = this.defaultSource;
217
- }
218
-
219
- if (msg.type === "chunk" && msg.data) {
220
- const audioData = Buffer.from(msg.data, "base64");
221
- if (source) source.push(audioData);
222
- } else if (msg.type === "done") {
223
- if (source) source.markDone();
224
- } else if (msg.type === "error") {
225
- console.error("Server error:", msg);
226
- if (source) source.markDone(); // Fail the stream?
227
- }
228
- } catch (e) {
229
- console.error("Error parsing message:", e);
230
- }
231
- }
232
-
233
- async send(request: BackCompatWebSocketTtsRequest) {
234
- if (!this.socket) {
235
- throw new Error("WebSocket not connected");
236
- }
237
-
238
- // Ensure request has a context_id so we can route the response
239
- const contextId = request.contextId || uuidv4();
240
-
241
- const source = new AudioSource();
242
- this.sources.set(contextId, source);
243
- // Also set as default source if none exists, for compatibility with simple tests
244
- if (!this.defaultSource) {
245
- this.defaultSource = source;
246
- }
247
-
248
- // Construct payload
249
- const payload: any = {
250
- model_id: request.modelId,
251
- transcript: request.transcript,
252
- voice: request.voice,
253
- context_id: contextId,
254
- };
255
-
256
- // Output Format
257
- if (request.outputFormat) {
258
- payload.output_format = {
259
- container: request.outputFormat.container,
260
- encoding: request.outputFormat.encoding,
261
- sample_rate: request.outputFormat.sampleRate,
262
- bit_rate: request.outputFormat.bitRate,
263
- };
264
- } else if (this.config) {
265
- payload.output_format = {
266
- container: this.config.container,
267
- encoding: this.config.encoding,
268
- sample_rate: this.config.sampleRate,
269
- };
270
- }
271
-
272
- // Generation Config
273
- if (request.generationConfig) {
274
- payload.generation_config = request.generationConfig;
275
- }
276
-
277
- // Other fields
278
- if (request.continue !== undefined) payload.continue = request.continue;
279
- if (request.duration !== undefined) payload.duration = request.duration;
280
- if (request.addTimestamps !== undefined) payload.add_timestamps = request.addTimestamps;
281
- if (request.addPhonemeTimestamps !== undefined) payload.add_phoneme_timestamps = request.addPhonemeTimestamps;
282
-
283
- this.socket.send(JSON.stringify(payload));
284
-
285
- return {
286
- source: source
287
- };
288
- }
289
-
290
- disconnect() {
291
- if (this.socket) {
292
- this.socket.close();
293
- }
294
- }
145
+ private client: Cartesia;
146
+ private config: BackCompatWebSocketOptions;
147
+ private socket: WebSocket | null = null;
148
+ private sources: Map<string, AudioSource> = new Map();
149
+ // Fallback source for messages without context_id or if we just want to capture everything (legacy behavior?)
150
+ // The original test didn't use context_id explicitly in send() but expected a response source.
151
+ // We'll map context_id to source.
152
+ private defaultSource: AudioSource | null = null;
153
+
154
+ constructor(client: Cartesia, config: BackCompatWebSocketOptions) {
155
+ this.client = client;
156
+ this.config = config;
157
+ }
158
+
159
+ async connect() {
160
+ const baseURL = this.client.baseURL;
161
+ // Construct WebSocket URL
162
+ // baseURL is like https://api.cartesia.ai
163
+ let urlStr = baseURL.replace(/^http/, 'ws');
164
+ if (!urlStr.includes('/tts/websocket')) {
165
+ if (urlStr.endsWith('/')) {
166
+ urlStr += 'tts/websocket';
167
+ } else {
168
+ urlStr += '/tts/websocket';
169
+ }
170
+ }
171
+
172
+ const url = new URL(urlStr);
173
+
174
+ const headers: any = {
175
+ 'cartesia-version': '2025-04-16',
176
+ };
177
+ if (this.client.apiKey) {
178
+ headers['Authorization'] = `Bearer ${this.client.apiKey}`;
179
+ }
180
+
181
+ this.socket = new WebSocket(url.toString(), {
182
+ headers: headers,
183
+ });
184
+
185
+ return new Promise<void>((resolve, reject) => {
186
+ this.socket!.on('open', () => {
187
+ console.log('WebSocket connected.');
188
+ resolve();
189
+ });
190
+
191
+ this.socket!.on('error', (err) => {
192
+ console.error('WebSocket error:', err);
193
+ reject(err);
194
+ });
195
+
196
+ this.socket!.on('message', (data) => {
197
+ this.handleMessage(data);
198
+ });
199
+
200
+ this.socket!.on('close', () => {
201
+ console.log('WebSocket closed.');
202
+ this.sources.forEach((s) => {
203
+ s.markDone();
204
+ });
205
+ if (this.defaultSource) this.defaultSource.markDone();
206
+ });
207
+ });
208
+ }
209
+
210
+ private handleMessage(data: WebSocket.Data) {
211
+ try {
212
+ const str = data.toString();
213
+ const msg = JSON.parse(str);
214
+
215
+ const contextId = msg.context_id;
216
+ let source = contextId ? this.sources.get(contextId) : this.defaultSource;
217
+
218
+ // If we received a message for a context we don't know about, and we have a default source, use it
219
+ if (!source && this.defaultSource) {
220
+ source = this.defaultSource;
221
+ }
222
+
223
+ if (msg.type === 'chunk' && msg.data) {
224
+ const audioData = Buffer.from(msg.data, 'base64');
225
+ if (source) source.push(audioData);
226
+ } else if (msg.type === 'done') {
227
+ if (source) source.markDone();
228
+ } else if (msg.type === 'error') {
229
+ console.error('Server error:', msg);
230
+ if (source) source.markDone(); // Fail the stream?
231
+ }
232
+ } catch (e) {
233
+ console.error('Error parsing message:', e);
234
+ }
235
+ }
236
+
237
+ async send(request: BackCompatWebSocketTtsRequest) {
238
+ if (!this.socket) {
239
+ throw new Error('WebSocket not connected');
240
+ }
241
+
242
+ // Ensure request has a context_id so we can route the response
243
+ const contextId = request.contextId || uuidv4();
244
+
245
+ const source = new AudioSource();
246
+ this.sources.set(contextId, source);
247
+ // Also set as default source if none exists, for compatibility with simple tests
248
+ if (!this.defaultSource) {
249
+ this.defaultSource = source;
250
+ }
251
+
252
+ // Construct payload
253
+ const payload: any = {
254
+ model_id: request.modelId,
255
+ transcript: request.transcript,
256
+ voice: request.voice,
257
+ context_id: contextId,
258
+ };
259
+
260
+ // Output Format
261
+ if (request.outputFormat) {
262
+ payload.output_format = {
263
+ container: request.outputFormat.container,
264
+ encoding: request.outputFormat.encoding,
265
+ sample_rate: request.outputFormat.sampleRate,
266
+ bit_rate: request.outputFormat.bitRate,
267
+ };
268
+ } else if (this.config) {
269
+ payload.output_format = {
270
+ container: this.config.container,
271
+ encoding: this.config.encoding,
272
+ sample_rate: this.config.sampleRate,
273
+ };
274
+ }
275
+
276
+ // Generation Config
277
+ if (request.generationConfig) {
278
+ payload.generation_config = request.generationConfig;
279
+ }
280
+
281
+ // Other fields
282
+ if (request.continue !== undefined) payload.continue = request.continue;
283
+ if (request.duration !== undefined) payload.duration = request.duration;
284
+ if (request.addTimestamps !== undefined) payload.add_timestamps = request.addTimestamps;
285
+ if (request.addPhonemeTimestamps !== undefined)
286
+ payload.add_phoneme_timestamps = request.addPhonemeTimestamps;
287
+
288
+ this.socket.send(JSON.stringify(payload));
289
+
290
+ return {
291
+ source: source,
292
+ };
293
+ }
294
+
295
+ disconnect() {
296
+ if (this.socket) {
297
+ this.socket.close();
298
+ }
299
+ }
300
+ }
301
+
302
+ export interface BackCompatTtsGenerateOptions {
303
+ modelId?: string;
304
+ outputFormat?: {
305
+ container: 'raw' | 'wav' | 'mp3';
306
+ encoding?: 'pcm_f32le' | 'pcm_s16le' | 'pcm_alaw' | 'pcm_mulaw';
307
+ sampleRate: number;
308
+ bitRate?: number;
309
+ };
310
+ language?: string;
311
+ generationConfig?: BackCompatGenerationConfig;
312
+ speed?: 'slow' | 'normal' | 'fast';
313
+ pronunciationDictId?: string;
295
314
  }
296
315
 
297
316
  /** @deprecated Use the new SDK's tts methods on the {@link Cartesia} instance instead. */
298
317
  export class TTSWrapper {
299
- private client: Cartesia;
300
-
301
- constructor(client: Cartesia) {
302
- this.client = client;
303
- }
304
-
305
- /** @deprecated Use {@link Cartesia.tts.websocket} instead. */
306
- websocket(config: BackCompatWebSocketOptions) {
307
- return new WebSocketWrapper(this.client, config);
308
- }
309
-
310
- /** @deprecated Use {@link Cartesia.tts.generate} instead. */
311
- async bytes(request: BackCompatTtsRequest, requestOptions?: BackCompatRequestOptions): Promise<Readable> {
312
- const params: any = {
313
- model_id: request.modelId,
314
- transcript: request.transcript,
315
- voice: request.voice,
316
- generation_config: request.generationConfig,
317
- duration: request.duration,
318
- language: request.language,
319
- speed: request.speed,
320
- pronunciation_dict_id: request.pronunciationDictId,
321
- };
322
-
323
- if (request.outputFormat) {
324
- params.output_format = {
325
- container: request.outputFormat.container,
326
- encoding: request.outputFormat.encoding,
327
- sample_rate: request.outputFormat.sampleRate,
328
- bit_rate: request.outputFormat.bitRate,
329
- };
330
- }
331
-
332
- const options: any = {};
333
- if (requestOptions) {
334
- if (requestOptions.timeoutInSeconds) {
335
- options.timeout = requestOptions.timeoutInSeconds * 1000;
336
- }
337
- if (requestOptions.maxRetries !== undefined) {
338
- options.maxRetries = requestOptions.maxRetries;
339
- }
340
- options.headers = requestOptions.headers;
341
- options.signal = requestOptions.abortSignal;
342
- }
343
-
344
- const response = await wrap(this.client.tts.generate(params, options));
345
- if (!response.body) {
346
- throw new Error("Response body is null");
347
- }
348
-
349
- return Readable.fromWeb(response.body);
350
- }
318
+ private client: Cartesia;
319
+
320
+ constructor(client: Cartesia) {
321
+ this.client = client;
322
+ }
323
+
324
+ /** @deprecated Use {@link Cartesia.tts.websocket} instead. */
325
+ websocket(config: BackCompatWebSocketOptions) {
326
+ return new WebSocketWrapper(this.client, config);
327
+ }
328
+
329
+ /**
330
+ * Generate speech from text.
331
+ * @param transcript The text to convert to speech
332
+ * @param voiceId The voice ID to use
333
+ * @param options Generation options
334
+ * @param signal Optional abort signal
335
+ * @param _source Optional source identifier (e.g., "playground_tts") - for tracking purposes
336
+ * @deprecated Use {@link Cartesia.tts.generate} instead.
337
+ */
338
+ async generate(
339
+ transcript: string,
340
+ voiceId: string,
341
+ options?: BackCompatTtsGenerateOptions,
342
+ signal?: AbortSignal,
343
+ _source?: string,
344
+ ): Promise<Readable> {
345
+ const params: any = {
346
+ model_id: options?.modelId ?? 'sonic-2',
347
+ transcript,
348
+ voice: { mode: 'id', id: voiceId },
349
+ };
350
+
351
+ if (options?.outputFormat) {
352
+ params.output_format = {
353
+ container: options.outputFormat.container,
354
+ encoding: options.outputFormat.encoding,
355
+ sample_rate: options.outputFormat.sampleRate,
356
+ bit_rate: options.outputFormat.bitRate,
357
+ };
358
+ } else {
359
+ // Default output format
360
+ params.output_format = {
361
+ container: 'wav',
362
+ encoding: 'pcm_s16le',
363
+ sample_rate: 44100,
364
+ };
365
+ }
366
+
367
+ if (options?.language) {
368
+ params.language = options.language;
369
+ }
370
+ if (options?.generationConfig) {
371
+ params.generation_config = options.generationConfig;
372
+ }
373
+ if (options?.speed) {
374
+ params.speed = options.speed;
375
+ }
376
+ if (options?.pronunciationDictId) {
377
+ params.pronunciation_dict_id = options.pronunciationDictId;
378
+ }
379
+
380
+ const requestOptions: any = {};
381
+ if (signal) {
382
+ requestOptions.signal = signal;
383
+ }
384
+
385
+ const response = await wrap(this.client.tts.generate(params, requestOptions));
386
+ if (!response.body) {
387
+ throw new Error('Response body is null');
388
+ }
389
+
390
+ return Readable.fromWeb(response.body);
391
+ }
392
+
393
+ /** @deprecated Use {@link Cartesia.tts.generate} instead. */
394
+ async bytes(request: BackCompatTtsRequest, requestOptions?: BackCompatRequestOptions): Promise<Readable> {
395
+ const params: any = {
396
+ model_id: request.modelId,
397
+ transcript: request.transcript,
398
+ voice: request.voice,
399
+ generation_config: request.generationConfig,
400
+ duration: request.duration,
401
+ language: request.language,
402
+ speed: request.speed,
403
+ pronunciation_dict_id: request.pronunciationDictId,
404
+ };
405
+
406
+ if (request.outputFormat) {
407
+ params.output_format = {
408
+ container: request.outputFormat.container,
409
+ encoding: request.outputFormat.encoding,
410
+ sample_rate: request.outputFormat.sampleRate,
411
+ bit_rate: request.outputFormat.bitRate,
412
+ };
413
+ }
414
+
415
+ const options: any = {};
416
+ if (requestOptions) {
417
+ if (requestOptions.timeoutInSeconds) {
418
+ options.timeout = requestOptions.timeoutInSeconds * 1000;
419
+ }
420
+ if (requestOptions.maxRetries !== undefined) {
421
+ options.maxRetries = requestOptions.maxRetries;
422
+ }
423
+ options.headers = requestOptions.headers;
424
+ options.signal = requestOptions.abortSignal;
425
+ }
426
+
427
+ const response = await wrap(this.client.tts.generate(params, options));
428
+ if (!response.body) {
429
+ throw new Error('Response body is null');
430
+ }
431
+
432
+ return Readable.fromWeb(response.body);
433
+ }
351
434
  }