@cartesia/cartesia-js 3.0.0-b14 → 3.0.0-b16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,159 @@
1
1
  // File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  import * as WS from 'ws';
4
+ import { humanId } from 'human-id';
4
5
  import { TTSEmitter, buildURL } from './internal-base';
5
6
  import * as TTSAPI from './tts';
6
- import { Cartesia } from '../../client';
7
+ import type { Cartesia } from '../../client';
8
+
9
+ /**
10
+ * Request parameters for context.generate(), same as GenerationRequest but without context_id.
11
+ */
12
+ export type ContextGenerateRequest = Omit<TTSAPI.GenerationRequest, 'context_id'>;
13
+
14
+ /**
15
+ * Options for creating a context, including the model, voice, and output format.
16
+ */
17
+ export interface ContextOptions {
18
+ model_id: string;
19
+ voice: TTSAPI.VoiceSpecifier;
20
+ output_format: TTSAPI.GenerationRequest['output_format'];
21
+ contextId?: string;
22
+ }
23
+
24
+ /**
25
+ * A context helper for managing WebSocket conversations with automatic context_id handling.
26
+ */
27
+ export class TTSWSContext {
28
+ private _ws: TTSWS;
29
+ private _options: Omit<ContextOptions, 'contextId'>;
30
+ readonly contextId: string;
31
+
32
+ constructor(ws: TTSWS, options: ContextOptions) {
33
+ this._ws = ws;
34
+ this._options = {
35
+ model_id: options.model_id,
36
+ voice: options.voice,
37
+ output_format: options.output_format,
38
+ };
39
+ this.contextId = options.contextId ?? humanId({ separator: '-', capitalize: false });
40
+ }
41
+
42
+ /**
43
+ * Send a transcript chunk with continue: true.
44
+ * Call this multiple times to stream transcript chunks, then call done() to finish.
45
+ */
46
+ async push(options: { transcript: string }) {
47
+ this._ws.send({
48
+ model_id: this._options.model_id,
49
+ voice: this._options.voice,
50
+ output_format: this._options.output_format,
51
+ transcript: options.transcript,
52
+ context_id: this.contextId,
53
+ continue: true,
54
+ });
55
+ }
56
+
57
+ /**
58
+ * Signal that no more transcript chunks will be sent.
59
+ * Sends an empty transcript with continue: false.
60
+ */
61
+ async done() {
62
+ this._ws.send({
63
+ model_id: this._options.model_id,
64
+ voice: this._options.voice,
65
+ output_format: this._options.output_format,
66
+ transcript: '',
67
+ context_id: this.contextId,
68
+ continue: false,
69
+ });
70
+ }
71
+
72
+ /**
73
+ * Send a generation request without waiting for responses.
74
+ * Use this for streaming multiple transcript chunks.
75
+ * The context_id is automatically set.
76
+ */
77
+ send(request: ContextGenerateRequest) {
78
+ this._ws.send({
79
+ ...request,
80
+ context_id: this.contextId,
81
+ });
82
+ }
83
+
84
+ /**
85
+ * Iterate over responses for this context.
86
+ * Completes when a "done" event is received.
87
+ */
88
+ async *receive(): AsyncGenerator<TTSAPI.WebsocketResponse> {
89
+ const queue: TTSAPI.WebsocketResponse[] = [];
90
+ let done = false;
91
+ let error: Error | null = null;
92
+ let resolve: (() => void) | null = null;
93
+
94
+ const onEvent = (event: TTSAPI.WebsocketResponse) => {
95
+ // Filter by context_id
96
+ if ('context_id' in event && event.context_id !== this.contextId) {
97
+ return;
98
+ }
99
+ queue.push(event);
100
+ if (event.type === 'done' || event.type === 'error') {
101
+ done = true;
102
+ if (event.type === 'error') {
103
+ error = new Error(JSON.stringify(event));
104
+ }
105
+ }
106
+ resolve?.();
107
+ };
108
+
109
+ this._ws.on('event', onEvent);
110
+
111
+ try {
112
+ while (!done || queue.length > 0) {
113
+ if (queue.length > 0) {
114
+ const event = queue.shift()!;
115
+ yield event;
116
+ if (event.type === 'done') {
117
+ return;
118
+ }
119
+ if (event.type === 'error') {
120
+ throw error;
121
+ }
122
+ } else {
123
+ await new Promise<void>((r) => {
124
+ resolve = r;
125
+ });
126
+ }
127
+ }
128
+ } finally {
129
+ this._ws.off('event', onEvent);
130
+ }
131
+ }
132
+
133
+ /**
134
+ * Send a generation request and iterate over the responses.
135
+ * The context_id is automatically set.
136
+ */
137
+ async *generate(request: ContextGenerateRequest): AsyncGenerator<TTSAPI.WebsocketResponse> {
138
+ yield* this._ws.generate({
139
+ ...request,
140
+ context_id: this.contextId,
141
+ });
142
+ }
143
+
144
+ /**
145
+ * Cancel this context to stop generating speech.
146
+ */
147
+ cancel() {
148
+ this._ws.cancelContext(this.contextId);
149
+ }
150
+ }
7
151
 
8
152
  export class TTSWS extends TTSEmitter {
9
153
  url: URL;
10
154
  socket: WS.WebSocket;
11
155
  private client: Cartesia;
156
+ private _ready: Promise<void>;
12
157
 
13
158
  constructor(client: Cartesia, options?: WS.ClientOptions | undefined) {
14
159
  super();
@@ -23,6 +168,11 @@ export class TTSWS extends TTSEmitter {
23
168
  },
24
169
  });
25
170
 
171
+ this._ready = new Promise((resolve, reject) => {
172
+ this.socket.once('open', () => resolve());
173
+ this.socket.once('error', (err) => reject(err));
174
+ });
175
+
26
176
  this.socket.on('message', (wsEvent) => {
27
177
  const event = (() => {
28
178
  try {
@@ -58,6 +208,71 @@ export class TTSWS extends TTSEmitter {
58
208
  }
59
209
  }
60
210
 
211
+ /**
212
+ * Send a generation request and iterate over the responses.
213
+ */
214
+ async *generate(request: TTSAPI.GenerationRequest): AsyncGenerator<TTSAPI.WebsocketResponse> {
215
+ const contextId = request.context_id;
216
+ const queue: TTSAPI.WebsocketResponse[] = [];
217
+ let done = false;
218
+ let error: Error | null = null;
219
+ let resolve: (() => void) | null = null;
220
+
221
+ const onEvent = (event: TTSAPI.WebsocketResponse) => {
222
+ // Filter by context_id if specified
223
+ if (contextId && 'context_id' in event && event.context_id !== contextId) {
224
+ return;
225
+ }
226
+ queue.push(event);
227
+ if (event.type === 'done' || event.type === 'error') {
228
+ done = true;
229
+ if (event.type === 'error') {
230
+ error = new Error(JSON.stringify(event));
231
+ }
232
+ }
233
+ resolve?.();
234
+ };
235
+
236
+ this.on('event', onEvent);
237
+
238
+ try {
239
+ this.send(request);
240
+
241
+ while (!done || queue.length > 0) {
242
+ if (queue.length > 0) {
243
+ const event = queue.shift()!;
244
+ yield event;
245
+ if (event.type === 'done') {
246
+ return;
247
+ }
248
+ if (event.type === 'error') {
249
+ throw error;
250
+ }
251
+ } else {
252
+ await new Promise<void>((r) => {
253
+ resolve = r;
254
+ });
255
+ }
256
+ }
257
+ } finally {
258
+ this.off('event', onEvent);
259
+ }
260
+ }
261
+
262
+ /**
263
+ * Cancel a context to stop generating speech for it.
264
+ */
265
+ cancelContext(contextId: string) {
266
+ this.send({ cancel: true, context_id: contextId });
267
+ }
268
+
269
+ /**
270
+ * Create a new context with the given options.
271
+ */
272
+ context(options: ContextOptions): TTSWSContext {
273
+ return new TTSWSContext(this, options);
274
+ }
275
+
61
276
  close(props?: { code: number; reason: string }) {
62
277
  try {
63
278
  this.socket.close(props?.code ?? 1000, props?.reason ?? 'OK');
@@ -66,6 +281,14 @@ export class TTSWS extends TTSEmitter {
66
281
  }
67
282
  }
68
283
 
284
+ /**
285
+ * Wait for the WebSocket connection to be ready.
286
+ */
287
+ async connect(): Promise<this> {
288
+ await this._ready;
289
+ return this;
290
+ }
291
+
69
292
  private authHeaders(): Record<string, string> {
70
293
  if (this.client.token) {
71
294
  return { Authorization: `Bearer ${this.client.token}` };
@@ -163,7 +163,8 @@ export type SupportedLanguage =
163
163
  | 'kn'
164
164
  | 'ml'
165
165
  | 'mr'
166
- | 'pa';
166
+ | 'pa'
167
+ | (string & {});
167
168
 
168
169
  export interface Voice {
169
170
  /**
package/src/version.ts CHANGED
@@ -1 +1 @@
1
- export const VERSION = '3.0.0-b14'; // x-release-please-version
1
+ export const VERSION = '3.0.0-b16'; // x-release-please-version
package/version.d.mts CHANGED
@@ -1,2 +1,2 @@
1
- export declare const VERSION = "3.0.0-b14";
1
+ export declare const VERSION = "3.0.0-b16";
2
2
  //# sourceMappingURL=version.d.mts.map
package/version.d.ts CHANGED
@@ -1,2 +1,2 @@
1
- export declare const VERSION = "3.0.0-b14";
1
+ export declare const VERSION = "3.0.0-b16";
2
2
  //# sourceMappingURL=version.d.ts.map
package/version.js CHANGED
@@ -1,5 +1,5 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.VERSION = void 0;
4
- exports.VERSION = '3.0.0-b14'; // x-release-please-version
4
+ exports.VERSION = '3.0.0-b16'; // x-release-please-version
5
5
  //# sourceMappingURL=version.js.map
package/version.mjs CHANGED
@@ -1,2 +1,2 @@
1
- export const VERSION = '3.0.0-b14'; // x-release-please-version
1
+ export const VERSION = '3.0.0-b16'; // x-release-please-version
2
2
  //# sourceMappingURL=version.mjs.map