@cartesia/cartesia-js 0.0.4-alpha.0 → 1.0.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/.turbo/turbo-build.log +63 -45
  2. package/CHANGELOG.md +12 -0
  3. package/README.md +123 -16
  4. package/dist/chunk-36JBKJUN.js +119 -0
  5. package/dist/chunk-3F5E46FT.js +212 -0
  6. package/dist/{chunk-XPIMIAAE.js → chunk-3FL2SNIR.js} +1 -1
  7. package/dist/chunk-JGP5BIUV.js +34 -0
  8. package/dist/chunk-KWBSQZTY.js +25 -0
  9. package/dist/chunk-PQ6CIPFW.js +120 -0
  10. package/dist/chunk-RO7TY474.js +81 -0
  11. package/dist/chunk-T3RG6WV4.js +22 -0
  12. package/dist/{chunk-R4P7LWVZ.js → chunk-WIFMLPT5.js} +31 -6
  13. package/dist/chunk-WVTITUXX.js +58 -0
  14. package/dist/chunk-XHTDPLFR.js +19 -0
  15. package/dist/index.cjs +425 -166
  16. package/dist/index.d.cts +7 -3
  17. package/dist/index.d.ts +7 -3
  18. package/dist/index.js +13 -6
  19. package/dist/lib/client.cjs +49 -1
  20. package/dist/lib/client.d.cts +2 -0
  21. package/dist/lib/client.d.ts +2 -0
  22. package/dist/lib/client.js +3 -3
  23. package/dist/lib/constants.cjs +15 -8
  24. package/dist/lib/constants.d.cts +4 -4
  25. package/dist/lib/constants.d.ts +4 -4
  26. package/dist/lib/constants.js +6 -6
  27. package/dist/lib/index.cjs +310 -171
  28. package/dist/lib/index.d.cts +6 -2
  29. package/dist/lib/index.d.ts +6 -2
  30. package/dist/lib/index.js +9 -6
  31. package/dist/react/index.cjs +573 -290
  32. package/dist/react/index.d.cts +20 -14
  33. package/dist/react/index.d.ts +20 -14
  34. package/dist/react/index.js +157 -105
  35. package/dist/react/utils.js +2 -2
  36. package/dist/tts/index.cjs +496 -0
  37. package/dist/tts/index.d.cts +17 -0
  38. package/dist/tts/index.d.ts +17 -0
  39. package/dist/tts/index.js +12 -0
  40. package/dist/tts/player.cjs +198 -0
  41. package/dist/tts/player.d.cts +43 -0
  42. package/dist/tts/player.d.ts +43 -0
  43. package/dist/tts/player.js +8 -0
  44. package/dist/tts/source.cjs +181 -0
  45. package/dist/tts/source.d.cts +53 -0
  46. package/dist/tts/source.d.ts +53 -0
  47. package/dist/tts/source.js +7 -0
  48. package/dist/{audio → tts}/utils.cjs +25 -60
  49. package/dist/tts/utils.d.cts +67 -0
  50. package/dist/tts/utils.d.ts +67 -0
  51. package/dist/{audio → tts}/utils.js +2 -7
  52. package/dist/tts/websocket.cjs +479 -0
  53. package/dist/tts/websocket.d.cts +53 -0
  54. package/dist/tts/websocket.d.ts +53 -0
  55. package/dist/tts/websocket.js +11 -0
  56. package/dist/types/index.d.cts +50 -1
  57. package/dist/types/index.d.ts +50 -1
  58. package/dist/voices/index.cjs +157 -0
  59. package/dist/voices/index.d.cts +12 -0
  60. package/dist/voices/index.d.ts +12 -0
  61. package/dist/voices/index.js +9 -0
  62. package/package.json +2 -1
  63. package/src/index.ts +1 -0
  64. package/src/lib/client.ts +15 -1
  65. package/src/lib/constants.ts +15 -4
  66. package/src/lib/index.ts +6 -3
  67. package/src/react/index.ts +176 -110
  68. package/src/tts/index.ts +17 -0
  69. package/src/tts/player.ts +110 -0
  70. package/src/tts/source.ts +115 -0
  71. package/src/tts/utils.ts +150 -0
  72. package/src/tts/websocket.ts +214 -0
  73. package/src/types/index.ts +63 -0
  74. package/src/voices/index.ts +47 -0
  75. package/dist/audio/index.cjs +0 -404
  76. package/dist/audio/index.d.cts +0 -5
  77. package/dist/audio/index.d.ts +0 -5
  78. package/dist/audio/index.js +0 -10
  79. package/dist/audio/utils.d.cts +0 -5
  80. package/dist/audio/utils.d.ts +0 -5
  81. package/dist/chunk-4MHF74A7.js +0 -272
  82. package/dist/chunk-5TSWLYOW.js +0 -113
  83. package/dist/chunk-MJIFZWHS.js +0 -18
  84. package/dist/chunk-OVI3W3GG.js +0 -12
  85. package/dist/chunk-S6A27RQL.js +0 -18
  86. package/dist/index-C2_3XFxn.d.cts +0 -163
  87. package/dist/index-DgwnZezj.d.ts +0 -163
  88. package/src/audio/index.ts +0 -297
  89. package/src/audio/utils.ts +0 -220
@@ -0,0 +1,150 @@
1
+ import base64 from "base64-js";
2
+ import type Emittery from "emittery";
3
+ import type { Chunk, EmitteryCallbacks, Sentinel } from "../types";
4
+
5
+ /**
6
+ * Convert base64-encoded audio buffer(s) to a Float32Array.
7
+ *
8
+ * @param b64 The base64-encoded audio buffer, or an array of base64-encoded
9
+ * audio buffers.
10
+ * @returns The audio buffer(s) as a Float32Array.
11
+ */
12
+ export function base64ToArray(b64: Chunk[]): Float32Array {
13
+ const byteArrays = filterSentinel(b64).map((b) => base64.toByteArray(b));
14
+ const totalLength = byteArrays.reduce(
15
+ (acc, arr) => acc + arr.length / Float32Array.BYTES_PER_ELEMENT,
16
+ 0,
17
+ );
18
+ const result = new Float32Array(totalLength);
19
+
20
+ let offset = 0;
21
+ for (const arr of byteArrays) {
22
+ const floats = new Float32Array(arr.buffer);
23
+ result.set(floats, offset);
24
+ offset += floats.length;
25
+ }
26
+
27
+ return result;
28
+ }
29
+
30
+ /**
31
+ * Schedule an audio buffer to play at a given time in the passed context.
32
+ *
33
+ * @param floats The audio buffer to play.
34
+ * @param context The audio context to play the buffer in.
35
+ * @param startAt The time to start playing the buffer at.
36
+ * @param sampleRate The sample rate of the audio.
37
+ * @returns A promise that resolves when the audio has finished playing.
38
+ */
39
+ export function playAudioBuffer(
40
+ floats: Float32Array,
41
+ context: AudioContext,
42
+ startAt: number,
43
+ sampleRate: number,
44
+ ) {
45
+ const source = context.createBufferSource();
46
+ const buffer = context.createBuffer(1, floats.length, sampleRate);
47
+ buffer.getChannelData(0).set(floats);
48
+ source.buffer = buffer;
49
+ source.connect(context.destination);
50
+ source.start(startAt);
51
+
52
+ return new Promise<void>((resolve) => {
53
+ source.onended = () => {
54
+ resolve();
55
+ };
56
+ });
57
+ }
58
+
59
+ /**
60
+ * Unwraps a chunk of audio data from a message event and calls the
61
+ * handler with it if the context ID matches.
62
+ *
63
+ * @param contextId The context ID to listen for.
64
+ * @param handler The handler to call with the chunk of audio data.
65
+ * @returns A message event handler.
66
+ */
67
+ export function createMessageHandlerForContextId(
68
+ contextId: string,
69
+ handler: ({
70
+ chunk,
71
+ message,
72
+ }: {
73
+ chunk: Chunk;
74
+ message: string;
75
+ }) => void,
76
+ ) {
77
+ return (event: MessageEvent) => {
78
+ if (typeof event.data !== "string") {
79
+ return; // Ignore non-string messages.
80
+ }
81
+ const message = JSON.parse(event.data);
82
+ if (message.context_id !== contextId) {
83
+ return; // Ignore messages for other contexts.
84
+ }
85
+ let chunk: Chunk;
86
+ if (message.done) {
87
+ // Convert the done message to a sentinel value.
88
+ chunk = getSentinel();
89
+ } else {
90
+ chunk = message.data;
91
+ }
92
+ handler({ chunk, message: event.data });
93
+ };
94
+ }
95
+
96
+ /**
97
+ * Get a sentinel value that indicates the end of a stream.
98
+ * @returns A sentinel value to indicate the end of a stream.
99
+ */
100
+ export function getSentinel(): Sentinel {
101
+ return null;
102
+ }
103
+
104
+ /**
105
+ * Check if a chunk is a sentinel value (i.e. null).
106
+ *
107
+ * @param chunk
108
+ * @returns Whether the chunk is a sentinel value.
109
+ */
110
+ export function isSentinel(x: unknown): x is Sentinel {
111
+ return x === getSentinel();
112
+ }
113
+
114
+ /**
115
+ * Filter out null values from a collection.
116
+ *
117
+ * @param collection The collection to filter.
118
+ * @returns The collection with null values removed.
119
+ */
120
+ export function filterSentinel<T>(collection: T[]): Exclude<T, Sentinel>[] {
121
+ return collection.filter(
122
+ (x): x is Exclude<T, ReturnType<typeof getSentinel>> => !isSentinel(x),
123
+ );
124
+ }
125
+
126
+ /**
127
+ * Check if an array of chunks is complete by testing if the last chunk is a sentinel
128
+ * value (i.e. null).
129
+ * @param chunk
130
+ * @returns Whether the array of chunks is complete.
131
+ */
132
+ export function isComplete(chunks: Chunk[]) {
133
+ return isSentinel(chunks[chunks.length - 1]);
134
+ }
135
+
136
+ /**
137
+ * Get user-facing emitter callbacks for an Emittery instance.
138
+ * @param emitter The Emittery instance to get callbacks for.
139
+ * @returns User-facing emitter callbacks.
140
+ */
141
+ export function getEmitteryCallbacks<T>(
142
+ emitter: Emittery<T>,
143
+ ): EmitteryCallbacks<T> {
144
+ return {
145
+ on: emitter.on.bind(emitter),
146
+ off: emitter.off.bind(emitter),
147
+ once: emitter.once.bind(emitter),
148
+ events: emitter.events.bind(emitter),
149
+ };
150
+ }
@@ -0,0 +1,214 @@
1
+ import Emittery from "emittery";
2
+ import { humanId } from "human-id";
3
+ import { WebSocket as PartySocketWebSocket } from "partysocket";
4
+ import { Client } from "../lib/client";
5
+ import { CARTESIA_VERSION, constructApiUrl } from "../lib/constants";
6
+ import type {
7
+ ConnectionEventData,
8
+ EmitteryCallbacks,
9
+ StreamRequest,
10
+ WebSocketOptions,
11
+ } from "../types";
12
+ import Source from "./source";
13
+ import {
14
+ base64ToArray,
15
+ createMessageHandlerForContextId,
16
+ getEmitteryCallbacks,
17
+ isSentinel,
18
+ } from "./utils";
19
+
20
+ export default class WebSocket extends Client {
21
+ socket?: PartySocketWebSocket;
22
+ #isConnected = false;
23
+ #sampleRate: number;
24
+
25
+ /**
26
+ * Create a new WebSocket client.
27
+ *
28
+ * @param args - Arguments to pass to the Client constructor.
29
+ */
30
+ constructor(
31
+ { sampleRate }: WebSocketOptions,
32
+ ...args: ConstructorParameters<typeof Client>
33
+ ) {
34
+ super(...args);
35
+
36
+ this.#sampleRate = sampleRate;
37
+ }
38
+
39
+ /**
40
+ * Send a message over the WebSocket in order to start a stream.
41
+ *
42
+ * @param inputs - Stream options.
43
+ * @param options - Options for the stream.
44
+ * @param options.timeout - The maximum time to wait for a chunk before cancelling the stream.
45
+ * If `0`, the stream will not time out.
46
+ * @returns A Source object that can be passed to a Player to play the audio.
47
+ */
48
+ send(
49
+ inputs: StreamRequest["inputs"],
50
+ { timeout = 0 }: StreamRequest["options"] = {},
51
+ ) {
52
+ if (!this.#isConnected) {
53
+ throw new Error("Not connected to WebSocket. Call .connect() first.");
54
+ }
55
+
56
+ // Send audio request.
57
+ const contextId = this.#generateId();
58
+ this.socket?.send(
59
+ JSON.stringify({
60
+ context_id: contextId,
61
+ ...inputs,
62
+ output_format: {
63
+ container: "raw",
64
+ encoding: "pcm_f32le",
65
+ sample_rate: this.#sampleRate,
66
+ },
67
+ }),
68
+ );
69
+
70
+ const emitter = new Emittery<{
71
+ message: string;
72
+ }>();
73
+ const source = new Source({
74
+ sampleRate: this.#sampleRate,
75
+ });
76
+ // Used to signal that the stream is complete, either because the
77
+ // WebSocket has closed, or because the stream has finished.
78
+ const streamCompleteController = new AbortController();
79
+ // Set a timeout.
80
+ let timeoutId: ReturnType<typeof setTimeout> | null = null;
81
+ if (timeout > 0) {
82
+ timeoutId = setTimeout(streamCompleteController.abort, timeout);
83
+ }
84
+ const handleMessage = createMessageHandlerForContextId(
85
+ contextId,
86
+ async ({ chunk, message }) => {
87
+ emitter.emit("message", message);
88
+ if (isSentinel(chunk)) {
89
+ await source.close();
90
+ streamCompleteController.abort();
91
+ return;
92
+ }
93
+ await source.enqueue(base64ToArray([chunk]));
94
+ if (timeoutId) {
95
+ clearTimeout(timeoutId);
96
+ timeoutId = setTimeout(streamCompleteController.abort, timeout);
97
+ }
98
+ },
99
+ );
100
+ this.socket?.addEventListener("message", handleMessage, {
101
+ signal: streamCompleteController.signal,
102
+ });
103
+ this.socket?.addEventListener(
104
+ "close",
105
+ () => {
106
+ streamCompleteController.abort();
107
+ },
108
+ {
109
+ once: true,
110
+ },
111
+ );
112
+ this.socket?.addEventListener(
113
+ "error",
114
+ () => {
115
+ streamCompleteController.abort();
116
+ },
117
+ {
118
+ once: true,
119
+ },
120
+ );
121
+ streamCompleteController.signal.addEventListener("abort", () => {
122
+ source.close();
123
+ if (timeoutId) {
124
+ clearTimeout(timeoutId);
125
+ }
126
+ });
127
+
128
+ return { source, ...getEmitteryCallbacks(emitter) };
129
+ }
130
+
131
+ /**
132
+ * Generate a unique ID suitable for a streaming context.
133
+ *
134
+ * Not suitable for security purposes or as a primary key, since
135
+ * it lacks the amount of entropy required for those use cases.
136
+ *
137
+ * @returns A unique ID.
138
+ */
139
+ #generateId() {
140
+ return humanId({
141
+ separator: "-",
142
+ capitalize: false,
143
+ });
144
+ }
145
+
146
+ /**
147
+ * Authenticate and connect to a Cartesia streaming WebSocket.
148
+ *
149
+ * @returns A promise that resolves when the WebSocket is connected.
150
+ * @throws {Error} If the WebSocket fails to connect.
151
+ */
152
+ connect() {
153
+ const url = constructApiUrl(
154
+ this.baseUrl,
155
+ `/tts/websocket?cartesia_version=${CARTESIA_VERSION}`,
156
+ "ws",
157
+ );
158
+ url.searchParams.set("api_key", this.apiKey);
159
+ const emitter = new Emittery<ConnectionEventData>();
160
+ this.socket = new PartySocketWebSocket(url.toString());
161
+ this.socket.onopen = () => {
162
+ this.#isConnected = true;
163
+ emitter.emit("open");
164
+ };
165
+ this.socket.onclose = () => {
166
+ this.#isConnected = false;
167
+ emitter.emit("close");
168
+ };
169
+
170
+ return new Promise<EmitteryCallbacks<ConnectionEventData>>(
171
+ (resolve, reject) => {
172
+ this.socket?.addEventListener(
173
+ "open",
174
+ () => {
175
+ resolve(getEmitteryCallbacks(emitter));
176
+ },
177
+ {
178
+ once: true,
179
+ },
180
+ );
181
+
182
+ const aborter = new AbortController();
183
+ this.socket?.addEventListener(
184
+ "error",
185
+ () => {
186
+ aborter.abort();
187
+ reject(new Error("WebSocket failed to connect."));
188
+ },
189
+ {
190
+ signal: aborter.signal,
191
+ },
192
+ );
193
+
194
+ this.socket?.addEventListener(
195
+ "close",
196
+ () => {
197
+ aborter.abort();
198
+ reject(new Error("WebSocket closed before it could connect."));
199
+ },
200
+ {
201
+ signal: aborter.signal,
202
+ },
203
+ );
204
+ },
205
+ );
206
+ }
207
+
208
+ /**
209
+ * Disconnect from the Cartesia streaming WebSocket.
210
+ */
211
+ disconnect() {
212
+ this.socket?.close();
213
+ }
214
+ }
@@ -1,4 +1,67 @@
1
+ import type Emittery from "emittery";
2
+
1
3
  export interface ClientOptions {
2
4
  apiKey?: string;
3
5
  baseUrl?: string;
4
6
  }
7
+
8
+ export type Sentinel = null;
9
+
10
+ export type Chunk = string | Sentinel;
11
+
12
+ export type ConnectionEventData = {
13
+ open: never;
14
+ close: never;
15
+ };
16
+
17
+ export type StreamRequest = {
18
+ inputs: object;
19
+ options: {
20
+ timeout?: number;
21
+ };
22
+ };
23
+
24
+ export type EmitteryCallbacks<T> = {
25
+ on: Emittery<T>["on"];
26
+ off: Emittery<T>["off"];
27
+ once: Emittery<T>["once"];
28
+ events: Emittery<T>["events"];
29
+ };
30
+
31
+ export type CloneOptions =
32
+ | {
33
+ mode: "url";
34
+ link: string;
35
+ }
36
+ | {
37
+ mode: "clip";
38
+ clip: Blob;
39
+ };
40
+
41
+ export type Voice = {
42
+ id: string;
43
+ name: string;
44
+ description: string;
45
+ embedding: number[];
46
+ is_public: boolean;
47
+ user_id: string;
48
+ created_at: string;
49
+ };
50
+
51
+ export type CreateVoice = Pick<Voice, "name" | "description" | "embedding"> &
52
+ Partial<Omit<Voice, "name" | "description" | "embedding">>;
53
+
54
+ export type CloneResponse = {
55
+ embedding: number[];
56
+ };
57
+
58
+ export type WebSocketOptions = {
59
+ sampleRate: number;
60
+ };
61
+
62
+ export type SourceEventData = {
63
+ enqueue: never;
64
+ close: never;
65
+ wait: never;
66
+ read: never;
67
+ };
@@ -0,0 +1,47 @@
1
+ import { Client } from "../lib/client";
2
+ import type { CloneOptions, CloneResponse, CreateVoice, Voice } from "../types";
3
+
4
+ export default class Voices extends Client {
5
+ async list(): Promise<Voice[]> {
6
+ const response = await this.fetch("/voices");
7
+ return response.json();
8
+ }
9
+
10
+ async get(voiceId: string): Promise<Voice> {
11
+ const response = await this.fetch(`/voices/${voiceId}`);
12
+ return response.json();
13
+ }
14
+
15
+ async create(voice: CreateVoice): Promise<Voice> {
16
+ const response = await this.fetch("/voices", {
17
+ method: "POST",
18
+ body: JSON.stringify(voice),
19
+ });
20
+ return response.json() as Promise<Voice>;
21
+ }
22
+
23
+ async clone(options: CloneOptions): Promise<CloneResponse> {
24
+ if (options.mode === "url") {
25
+ const response = await this.fetch(
26
+ `/voices/clone/url?link=${options.link}`,
27
+ {
28
+ method: "POST",
29
+ },
30
+ );
31
+ return response.json();
32
+ }
33
+
34
+ if (options.mode === "clip") {
35
+ const formData = new FormData();
36
+ formData.append("clip", options.clip);
37
+
38
+ const response = await this.fetch("/voices/clone/clip", {
39
+ method: "POST",
40
+ body: formData,
41
+ });
42
+ return response.json();
43
+ }
44
+
45
+ throw new Error("Invalid mode for clone()");
46
+ }
47
+ }