@cartesia/cartesia-js 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +47 -47
- package/CHANGELOG.md +6 -0
- package/LICENSE.md +21 -0
- package/README.md +92 -19
- package/dist/{chunk-F4QWVJY3.js → chunk-2NA5SEML.js} +2 -2
- package/dist/{chunk-PQ5EVEEH.js → chunk-5M33ZF3Y.js} +1 -1
- package/dist/{chunk-PQ6CIPFW.js → chunk-6YQ6KDIQ.js} +44 -5
- package/dist/{chunk-FN7BK4PS.js → chunk-ASZKHN7Q.js} +44 -26
- package/dist/{chunk-RO7TY474.js → chunk-BHY7MNGT.js} +11 -6
- package/dist/{chunk-WIFMLPT5.js → chunk-GHY2WEOK.js} +13 -0
- package/dist/{chunk-SGXUEFII.js → chunk-KUSVZXDT.js} +2 -2
- package/dist/{chunk-JYLAM6VU.js → chunk-LZO6K34D.js} +2 -2
- package/dist/{chunk-3FL2SNIR.js → chunk-NQVZNVOU.js} +1 -1
- package/dist/{chunk-IEN4NCER.js → chunk-OFH3ML4L.js} +3 -3
- package/dist/index.cjs +102 -31
- package/dist/index.d.cts +4 -4
- package/dist/index.d.ts +4 -4
- package/dist/index.js +15 -9
- package/dist/lib/client.js +2 -2
- package/dist/lib/constants.js +1 -1
- package/dist/lib/index.cjs +97 -30
- package/dist/lib/index.js +8 -8
- package/dist/react/index.cjs +202 -86
- package/dist/react/index.d.cts +4 -3
- package/dist/react/index.d.ts +4 -3
- package/dist/react/index.js +115 -66
- package/dist/react/utils.js +2 -2
- package/dist/tts/index.cjs +97 -30
- package/dist/tts/index.js +6 -6
- package/dist/tts/player.cjs +5 -0
- package/dist/tts/player.js +4 -3
- package/dist/tts/source.cjs +50 -4
- package/dist/tts/source.d.cts +16 -6
- package/dist/tts/source.d.ts +16 -6
- package/dist/tts/source.js +4 -2
- package/dist/tts/utils.cjs +18 -6
- package/dist/tts/utils.d.cts +7 -5
- package/dist/tts/utils.d.ts +7 -5
- package/dist/tts/utils.js +3 -2
- package/dist/tts/websocket.cjs +97 -30
- package/dist/tts/websocket.d.cts +12 -8
- package/dist/tts/websocket.d.ts +12 -8
- package/dist/tts/websocket.js +5 -5
- package/dist/types/index.d.cts +60 -4
- package/dist/types/index.d.ts +60 -4
- package/dist/voices/index.js +3 -3
- package/package.json +1 -1
- package/src/index.ts +2 -0
- package/src/react/index.ts +114 -63
- package/src/tts/source.ts +53 -7
- package/src/tts/utils.ts +26 -12
- package/src/tts/websocket.ts +33 -16
- package/src/types/index.ts +81 -3
package/src/tts/websocket.ts
CHANGED
|
@@ -6,8 +6,10 @@ import { CARTESIA_VERSION, constructApiUrl } from "../lib/constants";
|
|
|
6
6
|
import type {
|
|
7
7
|
ConnectionEventData,
|
|
8
8
|
EmitteryCallbacks,
|
|
9
|
+
StreamOptions,
|
|
9
10
|
StreamRequest,
|
|
10
11
|
WebSocketOptions,
|
|
12
|
+
WordTimestamps,
|
|
11
13
|
} from "../types";
|
|
12
14
|
import Source from "./source";
|
|
13
15
|
import {
|
|
@@ -21,6 +23,8 @@ export default class WebSocket extends Client {
|
|
|
21
23
|
socket?: PartySocketWebSocket;
|
|
22
24
|
#isConnected = false;
|
|
23
25
|
#sampleRate: number;
|
|
26
|
+
#container: string;
|
|
27
|
+
#encoding: string;
|
|
24
28
|
|
|
25
29
|
/**
|
|
26
30
|
* Create a new WebSocket client.
|
|
@@ -28,18 +32,20 @@ export default class WebSocket extends Client {
|
|
|
28
32
|
* @param args - Arguments to pass to the Client constructor.
|
|
29
33
|
*/
|
|
30
34
|
constructor(
|
|
31
|
-
{ sampleRate }: WebSocketOptions,
|
|
35
|
+
{ sampleRate, container, encoding }: WebSocketOptions,
|
|
32
36
|
...args: ConstructorParameters<typeof Client>
|
|
33
37
|
) {
|
|
34
38
|
super(...args);
|
|
35
39
|
|
|
36
40
|
this.#sampleRate = sampleRate;
|
|
41
|
+
this.#container = container ?? "raw"; // Default to raw audio for backwards compatibility.
|
|
42
|
+
this.#encoding = encoding ?? "pcm_f32le"; // Default to 32-bit floating point PCM for backwards compatibility.
|
|
37
43
|
}
|
|
38
44
|
|
|
39
45
|
/**
|
|
40
46
|
* Send a message over the WebSocket to start a stream.
|
|
41
47
|
*
|
|
42
|
-
* @param inputs - Stream options.
|
|
48
|
+
* @param inputs - Stream options. Defined in the StreamRequest type.
|
|
43
49
|
* @param options - Options for the stream.
|
|
44
50
|
* @param options.timeout - The maximum time to wait for a chunk before cancelling the stream.
|
|
45
51
|
* If set to `0`, the stream will not time out.
|
|
@@ -47,33 +53,37 @@ export default class WebSocket extends Client {
|
|
|
47
53
|
* @returns An Emittery instance that emits messages from the WebSocket.
|
|
48
54
|
* @returns An abort function that can be called to cancel the stream.
|
|
49
55
|
*/
|
|
50
|
-
send(
|
|
51
|
-
inputs: StreamRequest["inputs"],
|
|
52
|
-
{ timeout = 0 }: StreamRequest["options"] = {},
|
|
53
|
-
) {
|
|
56
|
+
send({ ...inputs }: StreamRequest, { timeout = 0 }: StreamOptions = {}) {
|
|
54
57
|
if (!this.#isConnected) {
|
|
55
58
|
throw new Error("Not connected to WebSocket. Call .connect() first.");
|
|
56
59
|
}
|
|
57
60
|
|
|
61
|
+
if (!inputs.context_id) {
|
|
62
|
+
inputs.context_id = this.#generateId();
|
|
63
|
+
}
|
|
64
|
+
if (!inputs.output_format) {
|
|
65
|
+
inputs.output_format = {
|
|
66
|
+
container: this.#container,
|
|
67
|
+
encoding: this.#encoding,
|
|
68
|
+
sample_rate: this.#sampleRate,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
58
72
|
// Send audio request.
|
|
59
|
-
const contextId = this.#generateId();
|
|
60
73
|
this.socket?.send(
|
|
61
74
|
JSON.stringify({
|
|
62
|
-
context_id: contextId,
|
|
63
75
|
...inputs,
|
|
64
|
-
output_format: {
|
|
65
|
-
container: "raw",
|
|
66
|
-
encoding: "pcm_f32le",
|
|
67
|
-
sample_rate: this.#sampleRate,
|
|
68
|
-
},
|
|
69
76
|
}),
|
|
70
77
|
);
|
|
71
78
|
|
|
72
79
|
const emitter = new Emittery<{
|
|
73
80
|
message: string;
|
|
81
|
+
timestamps: WordTimestamps;
|
|
74
82
|
}>();
|
|
75
83
|
const source = new Source({
|
|
76
84
|
sampleRate: this.#sampleRate,
|
|
85
|
+
encoding: this.#encoding,
|
|
86
|
+
container: this.#container,
|
|
77
87
|
});
|
|
78
88
|
// Used to signal that the stream is complete, either because the
|
|
79
89
|
// WebSocket has closed, or because the stream has finished.
|
|
@@ -84,19 +94,26 @@ export default class WebSocket extends Client {
|
|
|
84
94
|
timeoutId = setTimeout(streamCompleteController.abort, timeout);
|
|
85
95
|
}
|
|
86
96
|
const handleMessage = createMessageHandlerForContextId(
|
|
87
|
-
|
|
88
|
-
async ({ chunk, message }) => {
|
|
97
|
+
inputs.context_id,
|
|
98
|
+
async ({ chunk, message, data }) => {
|
|
89
99
|
emitter.emit("message", message);
|
|
100
|
+
if (data.type === "timestamps") {
|
|
101
|
+
emitter.emit("timestamps", data.word_timestamps);
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
90
104
|
if (isSentinel(chunk)) {
|
|
91
105
|
await source.close();
|
|
92
106
|
streamCompleteController.abort();
|
|
93
107
|
return;
|
|
94
108
|
}
|
|
95
|
-
await source.enqueue(base64ToArray([chunk]));
|
|
96
109
|
if (timeoutId) {
|
|
97
110
|
clearTimeout(timeoutId);
|
|
98
111
|
timeoutId = setTimeout(streamCompleteController.abort, timeout);
|
|
99
112
|
}
|
|
113
|
+
if (!chunk) {
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
await source.enqueue(base64ToArray([chunk], this.#encoding));
|
|
100
117
|
},
|
|
101
118
|
);
|
|
102
119
|
this.socket?.addEventListener("message", handleMessage, {
|
package/src/types/index.ts
CHANGED
|
@@ -14,13 +14,85 @@ export type ConnectionEventData = {
|
|
|
14
14
|
close: never;
|
|
15
15
|
};
|
|
16
16
|
|
|
17
|
+
export type VoiceSpecifier =
|
|
18
|
+
| {
|
|
19
|
+
mode: "id";
|
|
20
|
+
id: string;
|
|
21
|
+
}
|
|
22
|
+
| {
|
|
23
|
+
mode: "embedding";
|
|
24
|
+
embedding: number[];
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
export type Emotion =
|
|
28
|
+
| "anger"
|
|
29
|
+
| "sadness"
|
|
30
|
+
| "positivity"
|
|
31
|
+
| "curiosity"
|
|
32
|
+
| "surprise";
|
|
33
|
+
export type Intensity = "lowest" | "low" | "high" | "highest";
|
|
34
|
+
export type EmotionControl = Emotion | `${Emotion}:${Intensity}`;
|
|
35
|
+
|
|
36
|
+
export type VoiceOptions = VoiceSpecifier & {
|
|
37
|
+
__experimental_controls?: {
|
|
38
|
+
speed?: "slowest" | "slow" | "normal" | "fast" | "fastest";
|
|
39
|
+
emotion?: EmotionControl[];
|
|
40
|
+
};
|
|
41
|
+
};
|
|
42
|
+
|
|
17
43
|
export type StreamRequest = {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
44
|
+
model_id: string;
|
|
45
|
+
transcript: string;
|
|
46
|
+
voice: VoiceOptions;
|
|
47
|
+
output_format?: {
|
|
48
|
+
container: string;
|
|
49
|
+
encoding: string;
|
|
50
|
+
sample_rate: number;
|
|
21
51
|
};
|
|
52
|
+
context_id?: string;
|
|
53
|
+
continue?: boolean;
|
|
54
|
+
duration?: number;
|
|
55
|
+
language?: string;
|
|
56
|
+
add_timestamps?: boolean;
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
export type StreamOptions = {
|
|
60
|
+
timeout?: number;
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
export type WebSocketBaseResponse = {
|
|
64
|
+
context_id: string;
|
|
65
|
+
status_code: number;
|
|
66
|
+
done: boolean;
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
export type WordTimestamps = {
|
|
70
|
+
words: string[];
|
|
71
|
+
start: number[];
|
|
72
|
+
end: number[];
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
export type WebSocketTimestampsResponse = WebSocketBaseResponse & {
|
|
76
|
+
type: "timestamps";
|
|
77
|
+
word_timestamps: WordTimestamps;
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
export type WebSocketChunkResponse = WebSocketBaseResponse & {
|
|
81
|
+
type: "chunk";
|
|
82
|
+
data: string;
|
|
83
|
+
step_time: number;
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
export type WebSocketErrorResponse = WebSocketBaseResponse & {
|
|
87
|
+
type: "error";
|
|
88
|
+
error: string;
|
|
22
89
|
};
|
|
23
90
|
|
|
91
|
+
export type WebSocketResponse =
|
|
92
|
+
| WebSocketTimestampsResponse
|
|
93
|
+
| WebSocketChunkResponse
|
|
94
|
+
| WebSocketErrorResponse;
|
|
95
|
+
|
|
24
96
|
export type EmitteryCallbacks<T> = {
|
|
25
97
|
on: Emittery<T>["on"];
|
|
26
98
|
off: Emittery<T>["off"];
|
|
@@ -56,6 +128,8 @@ export type CloneResponse = {
|
|
|
56
128
|
};
|
|
57
129
|
|
|
58
130
|
export type WebSocketOptions = {
|
|
131
|
+
container?: string;
|
|
132
|
+
encoding?: string;
|
|
59
133
|
sampleRate: number;
|
|
60
134
|
};
|
|
61
135
|
|
|
@@ -65,3 +139,7 @@ export type SourceEventData = {
|
|
|
65
139
|
wait: never;
|
|
66
140
|
read: never;
|
|
67
141
|
};
|
|
142
|
+
|
|
143
|
+
export type TypedArray = Float32Array | Int16Array | Uint8Array;
|
|
144
|
+
|
|
145
|
+
export type Encoding = "pcm_f32le" | "pcm_s16le" | "pcm_alaw" | "pcm_mulaw";
|