@cartesia/cartesia-js 3.0.0-b14 → 3.0.0-b16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/README.md +1 -1
- package/package.json +12 -11
- package/resources/tts/index.d.mts +1 -0
- package/resources/tts/index.d.mts.map +1 -1
- package/resources/tts/index.d.ts +1 -0
- package/resources/tts/index.d.ts.map +1 -1
- package/resources/tts/index.js +4 -1
- package/resources/tts/index.js.map +1 -1
- package/resources/tts/index.mjs +1 -0
- package/resources/tts/index.mjs.map +1 -1
- package/resources/tts/tts.d.mts +7 -0
- package/resources/tts/tts.d.mts.map +1 -1
- package/resources/tts/tts.d.ts +7 -0
- package/resources/tts/tts.d.ts.map +1 -1
- package/resources/tts/tts.js +9 -0
- package/resources/tts/tts.js.map +1 -1
- package/resources/tts/tts.mjs +9 -0
- package/resources/tts/tts.mjs.map +1 -1
- package/resources/tts/ws.d.mts +72 -1
- package/resources/tts/ws.d.mts.map +1 -1
- package/resources/tts/ws.d.ts +72 -1
- package/resources/tts/ws.d.ts.map +1 -1
- package/resources/tts/ws.js +190 -1
- package/resources/tts/ws.js.map +1 -1
- package/resources/tts/ws.mjs +188 -0
- package/resources/tts/ws.mjs.map +1 -1
- package/resources/voices.d.mts +1 -1
- package/resources/voices.d.mts.map +1 -1
- package/resources/voices.d.ts +1 -1
- package/resources/voices.d.ts.map +1 -1
- package/src/resources/tts/index.ts +2 -0
- package/src/resources/tts/tts.ts +11 -0
- package/src/resources/tts/ws.ts +224 -1
- package/src/resources/voices.ts +2 -1
- package/src/version.ts +1 -1
- package/version.d.mts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
package/src/resources/tts/ws.ts
CHANGED
|
@@ -1,14 +1,159 @@
|
|
|
1
1
|
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
2
|
|
|
3
3
|
import * as WS from 'ws';
|
|
4
|
+
import { humanId } from 'human-id';
|
|
4
5
|
import { TTSEmitter, buildURL } from './internal-base';
|
|
5
6
|
import * as TTSAPI from './tts';
|
|
6
|
-
import { Cartesia } from '../../client';
|
|
7
|
+
import type { Cartesia } from '../../client';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Request parameters for context.generate(), same as GenerationRequest but without context_id.
|
|
11
|
+
*/
|
|
12
|
+
export type ContextGenerateRequest = Omit<TTSAPI.GenerationRequest, 'context_id'>;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Options for creating a context, including the model, voice, and output format.
|
|
16
|
+
*/
|
|
17
|
+
export interface ContextOptions {
|
|
18
|
+
model_id: string;
|
|
19
|
+
voice: TTSAPI.VoiceSpecifier;
|
|
20
|
+
output_format: TTSAPI.GenerationRequest['output_format'];
|
|
21
|
+
contextId?: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* A context helper for managing WebSocket conversations with automatic context_id handling.
|
|
26
|
+
*/
|
|
27
|
+
export class TTSWSContext {
|
|
28
|
+
private _ws: TTSWS;
|
|
29
|
+
private _options: Omit<ContextOptions, 'contextId'>;
|
|
30
|
+
readonly contextId: string;
|
|
31
|
+
|
|
32
|
+
constructor(ws: TTSWS, options: ContextOptions) {
|
|
33
|
+
this._ws = ws;
|
|
34
|
+
this._options = {
|
|
35
|
+
model_id: options.model_id,
|
|
36
|
+
voice: options.voice,
|
|
37
|
+
output_format: options.output_format,
|
|
38
|
+
};
|
|
39
|
+
this.contextId = options.contextId ?? humanId({ separator: '-', capitalize: false });
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Send a transcript chunk with continue: true.
|
|
44
|
+
* Call this multiple times to stream transcript chunks, then call done() to finish.
|
|
45
|
+
*/
|
|
46
|
+
async push(options: { transcript: string }) {
|
|
47
|
+
this._ws.send({
|
|
48
|
+
model_id: this._options.model_id,
|
|
49
|
+
voice: this._options.voice,
|
|
50
|
+
output_format: this._options.output_format,
|
|
51
|
+
transcript: options.transcript,
|
|
52
|
+
context_id: this.contextId,
|
|
53
|
+
continue: true,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Signal that no more transcript chunks will be sent.
|
|
59
|
+
* Sends an empty transcript with continue: false.
|
|
60
|
+
*/
|
|
61
|
+
async done() {
|
|
62
|
+
this._ws.send({
|
|
63
|
+
model_id: this._options.model_id,
|
|
64
|
+
voice: this._options.voice,
|
|
65
|
+
output_format: this._options.output_format,
|
|
66
|
+
transcript: '',
|
|
67
|
+
context_id: this.contextId,
|
|
68
|
+
continue: false,
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Send a generation request without waiting for responses.
|
|
74
|
+
* Use this for streaming multiple transcript chunks.
|
|
75
|
+
* The context_id is automatically set.
|
|
76
|
+
*/
|
|
77
|
+
send(request: ContextGenerateRequest) {
|
|
78
|
+
this._ws.send({
|
|
79
|
+
...request,
|
|
80
|
+
context_id: this.contextId,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Iterate over responses for this context.
|
|
86
|
+
* Completes when a "done" event is received.
|
|
87
|
+
*/
|
|
88
|
+
async *receive(): AsyncGenerator<TTSAPI.WebsocketResponse> {
|
|
89
|
+
const queue: TTSAPI.WebsocketResponse[] = [];
|
|
90
|
+
let done = false;
|
|
91
|
+
let error: Error | null = null;
|
|
92
|
+
let resolve: (() => void) | null = null;
|
|
93
|
+
|
|
94
|
+
const onEvent = (event: TTSAPI.WebsocketResponse) => {
|
|
95
|
+
// Filter by context_id
|
|
96
|
+
if ('context_id' in event && event.context_id !== this.contextId) {
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
queue.push(event);
|
|
100
|
+
if (event.type === 'done' || event.type === 'error') {
|
|
101
|
+
done = true;
|
|
102
|
+
if (event.type === 'error') {
|
|
103
|
+
error = new Error(JSON.stringify(event));
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
resolve?.();
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
this._ws.on('event', onEvent);
|
|
110
|
+
|
|
111
|
+
try {
|
|
112
|
+
while (!done || queue.length > 0) {
|
|
113
|
+
if (queue.length > 0) {
|
|
114
|
+
const event = queue.shift()!;
|
|
115
|
+
yield event;
|
|
116
|
+
if (event.type === 'done') {
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
if (event.type === 'error') {
|
|
120
|
+
throw error;
|
|
121
|
+
}
|
|
122
|
+
} else {
|
|
123
|
+
await new Promise<void>((r) => {
|
|
124
|
+
resolve = r;
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
} finally {
|
|
129
|
+
this._ws.off('event', onEvent);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Send a generation request and iterate over the responses.
|
|
135
|
+
* The context_id is automatically set.
|
|
136
|
+
*/
|
|
137
|
+
async *generate(request: ContextGenerateRequest): AsyncGenerator<TTSAPI.WebsocketResponse> {
|
|
138
|
+
yield* this._ws.generate({
|
|
139
|
+
...request,
|
|
140
|
+
context_id: this.contextId,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Cancel this context to stop generating speech.
|
|
146
|
+
*/
|
|
147
|
+
cancel() {
|
|
148
|
+
this._ws.cancelContext(this.contextId);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
7
151
|
|
|
8
152
|
export class TTSWS extends TTSEmitter {
|
|
9
153
|
url: URL;
|
|
10
154
|
socket: WS.WebSocket;
|
|
11
155
|
private client: Cartesia;
|
|
156
|
+
private _ready: Promise<void>;
|
|
12
157
|
|
|
13
158
|
constructor(client: Cartesia, options?: WS.ClientOptions | undefined) {
|
|
14
159
|
super();
|
|
@@ -23,6 +168,11 @@ export class TTSWS extends TTSEmitter {
|
|
|
23
168
|
},
|
|
24
169
|
});
|
|
25
170
|
|
|
171
|
+
this._ready = new Promise((resolve, reject) => {
|
|
172
|
+
this.socket.once('open', () => resolve());
|
|
173
|
+
this.socket.once('error', (err) => reject(err));
|
|
174
|
+
});
|
|
175
|
+
|
|
26
176
|
this.socket.on('message', (wsEvent) => {
|
|
27
177
|
const event = (() => {
|
|
28
178
|
try {
|
|
@@ -58,6 +208,71 @@ export class TTSWS extends TTSEmitter {
|
|
|
58
208
|
}
|
|
59
209
|
}
|
|
60
210
|
|
|
211
|
+
/**
|
|
212
|
+
* Send a generation request and iterate over the responses.
|
|
213
|
+
*/
|
|
214
|
+
async *generate(request: TTSAPI.GenerationRequest): AsyncGenerator<TTSAPI.WebsocketResponse> {
|
|
215
|
+
const contextId = request.context_id;
|
|
216
|
+
const queue: TTSAPI.WebsocketResponse[] = [];
|
|
217
|
+
let done = false;
|
|
218
|
+
let error: Error | null = null;
|
|
219
|
+
let resolve: (() => void) | null = null;
|
|
220
|
+
|
|
221
|
+
const onEvent = (event: TTSAPI.WebsocketResponse) => {
|
|
222
|
+
// Filter by context_id if specified
|
|
223
|
+
if (contextId && 'context_id' in event && event.context_id !== contextId) {
|
|
224
|
+
return;
|
|
225
|
+
}
|
|
226
|
+
queue.push(event);
|
|
227
|
+
if (event.type === 'done' || event.type === 'error') {
|
|
228
|
+
done = true;
|
|
229
|
+
if (event.type === 'error') {
|
|
230
|
+
error = new Error(JSON.stringify(event));
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
resolve?.();
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
this.on('event', onEvent);
|
|
237
|
+
|
|
238
|
+
try {
|
|
239
|
+
this.send(request);
|
|
240
|
+
|
|
241
|
+
while (!done || queue.length > 0) {
|
|
242
|
+
if (queue.length > 0) {
|
|
243
|
+
const event = queue.shift()!;
|
|
244
|
+
yield event;
|
|
245
|
+
if (event.type === 'done') {
|
|
246
|
+
return;
|
|
247
|
+
}
|
|
248
|
+
if (event.type === 'error') {
|
|
249
|
+
throw error;
|
|
250
|
+
}
|
|
251
|
+
} else {
|
|
252
|
+
await new Promise<void>((r) => {
|
|
253
|
+
resolve = r;
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
} finally {
|
|
258
|
+
this.off('event', onEvent);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Cancel a context to stop generating speech for it.
|
|
264
|
+
*/
|
|
265
|
+
cancelContext(contextId: string) {
|
|
266
|
+
this.send({ cancel: true, context_id: contextId });
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Create a new context with the given options.
|
|
271
|
+
*/
|
|
272
|
+
context(options: ContextOptions): TTSWSContext {
|
|
273
|
+
return new TTSWSContext(this, options);
|
|
274
|
+
}
|
|
275
|
+
|
|
61
276
|
close(props?: { code: number; reason: string }) {
|
|
62
277
|
try {
|
|
63
278
|
this.socket.close(props?.code ?? 1000, props?.reason ?? 'OK');
|
|
@@ -66,6 +281,14 @@ export class TTSWS extends TTSEmitter {
|
|
|
66
281
|
}
|
|
67
282
|
}
|
|
68
283
|
|
|
284
|
+
/**
|
|
285
|
+
* Wait for the WebSocket connection to be ready.
|
|
286
|
+
*/
|
|
287
|
+
async connect(): Promise<this> {
|
|
288
|
+
await this._ready;
|
|
289
|
+
return this;
|
|
290
|
+
}
|
|
291
|
+
|
|
69
292
|
private authHeaders(): Record<string, string> {
|
|
70
293
|
if (this.client.token) {
|
|
71
294
|
return { Authorization: `Bearer ${this.client.token}` };
|
package/src/resources/voices.ts
CHANGED
package/src/version.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export const VERSION = '3.0.0-
|
|
1
|
+
export const VERSION = '3.0.0-b16'; // x-release-please-version
|
package/version.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const VERSION = "3.0.0-
|
|
1
|
+
export declare const VERSION = "3.0.0-b16";
|
|
2
2
|
//# sourceMappingURL=version.d.mts.map
|
package/version.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const VERSION = "3.0.0-
|
|
1
|
+
export declare const VERSION = "3.0.0-b16";
|
|
2
2
|
//# sourceMappingURL=version.d.ts.map
|
package/version.js
CHANGED
package/version.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export const VERSION = '3.0.0-
|
|
1
|
+
export const VERSION = '3.0.0-b16'; // x-release-please-version
|
|
2
2
|
//# sourceMappingURL=version.mjs.map
|