@cartesia/cartesia-js 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/.turbo/turbo-build.log +47 -0
  2. package/CHANGELOG.md +7 -0
  3. package/README.md +38 -0
  4. package/dist/audio/index.d.mts +4 -0
  5. package/dist/audio/index.d.ts +4 -0
  6. package/dist/audio/index.js +354 -0
  7. package/dist/audio/index.mjs +9 -0
  8. package/dist/audio/utils.d.mts +4 -0
  9. package/dist/audio/utils.d.ts +4 -0
  10. package/dist/audio/utils.js +113 -0
  11. package/dist/audio/utils.mjs +21 -0
  12. package/dist/chunk-35HX6ML3.mjs +39 -0
  13. package/dist/chunk-5RMUZJV7.mjs +240 -0
  14. package/dist/chunk-BTFHUVNH.mjs +71 -0
  15. package/dist/chunk-ERFCRIWU.mjs +18 -0
  16. package/dist/index-Ds4LDkmk.d.ts +127 -0
  17. package/dist/index-Dt9A_pEb.d.mts +127 -0
  18. package/dist/lib/client.d.mts +9 -0
  19. package/dist/lib/client.d.ts +9 -0
  20. package/dist/lib/client.js +43 -0
  21. package/dist/lib/client.mjs +7 -0
  22. package/dist/lib/constants.d.mts +5 -0
  23. package/dist/lib/constants.d.ts +5 -0
  24. package/dist/lib/constants.js +38 -0
  25. package/dist/lib/constants.mjs +10 -0
  26. package/dist/lib/index.d.mts +11 -0
  27. package/dist/lib/index.d.ts +11 -0
  28. package/dist/lib/index.js +368 -0
  29. package/dist/lib/index.mjs +19 -0
  30. package/dist/react/index.d.mts +22 -0
  31. package/dist/react/index.d.ts +22 -0
  32. package/dist/react/index.js +431 -0
  33. package/dist/react/index.mjs +82 -0
  34. package/dist/types/index.d.mts +6 -0
  35. package/dist/types/index.d.ts +6 -0
  36. package/dist/types/index.js +18 -0
  37. package/dist/types/index.mjs +0 -0
  38. package/index.ts +3 -0
  39. package/package.json +50 -0
  40. package/src/audio/index.ts +242 -0
  41. package/src/audio/utils.ts +138 -0
  42. package/src/lib/client.ts +17 -0
  43. package/src/lib/constants.ts +6 -0
  44. package/src/lib/index.ts +13 -0
  45. package/src/react/index.ts +91 -0
  46. package/src/types/index.ts +4 -0
  47. package/tsconfig.json +3 -0
@@ -0,0 +1,242 @@
1
+ import Emittery from "emittery";
2
+ import { humanId } from "human-id";
3
+ import { Client } from "../lib/client";
4
+ import { SAMPLE_RATE, constructWebsocketUrl } from "../lib/constants";
5
+ import {
6
+ type Sentinel,
7
+ createMessageHandlerForContextId,
8
+ getBufferDuration,
9
+ isComplete,
10
+ isSentinel,
11
+ playAudioBuffer,
12
+ } from "./utils";
13
+
14
+ export type Chunk = string | Sentinel;
15
+ export type StreamEventData = {
16
+ chunk: {
17
+ chunk: Chunk;
18
+ chunks: Chunk[];
19
+ };
20
+ message: unknown;
21
+ };
22
+ export default class extends Client {
23
+ socket?: WebSocket;
24
+ isConnected = false;
25
+
26
+ /**
27
+ * Stream audio from a model.
28
+ *
29
+ * @param inputs - Stream options. Includes a `model` key and some `parameters`, which
30
+ * are model-specific and can be found in the model's documentation.
31
+ * @param options - Options for the stream.
32
+ * @param options.timeout - The maximum time to wait for a chunk before cancelling the stream.
33
+ * If `0`, the stream will not time out.
34
+ * @returns An object with a method `play` of type `(bufferDuration: number) => Promise<void>`
35
+ * that plays the audio as it arrives, with `bufferDuration` seconds of audio buffered before
36
+ * starting playback.
37
+ */
38
+ stream(inputs: object, { timeout = 0 }: { timeout?: number } = {}) {
39
+ if (!this.isConnected) {
40
+ throw new Error("Not connected to WebSocket. Call .connect() first.");
41
+ }
42
+
43
+ // Send audio request.
44
+ const contextId = this.generateId();
45
+ this.socket?.send(
46
+ JSON.stringify({
47
+ data: inputs,
48
+ context_id: contextId,
49
+ }),
50
+ );
51
+
52
+ // Used to signal that the stream is complete, either because the
53
+ // WebSocket has closed, or because the stream has finished.
54
+ const streamCompleteController = new AbortController();
55
+ // Set a timeout.
56
+ let timeoutId: ReturnType<typeof setTimeout> | null = null;
57
+ if (timeout > 0) {
58
+ timeoutId = setTimeout(streamCompleteController.abort, timeout);
59
+ }
60
+ // Array of base64-encoded audio chunks, representing directly sampled
61
+ // audio data, i.e. floats in the range [-1, 1].
62
+ const chunks: Chunk[] = [];
63
+ // Used to dispatch events.
64
+ const emitter = new Emittery<StreamEventData>();
65
+ const handleMessage = createMessageHandlerForContextId(
66
+ contextId,
67
+ async ({ chunk, message }) => {
68
+ chunks.push(chunk);
69
+ await emitter.emit("chunk", {
70
+ chunk,
71
+ chunks,
72
+ });
73
+ await emitter.emit("message", message);
74
+ if (isSentinel(chunk)) {
75
+ streamCompleteController.abort();
76
+ } else if (timeoutId) {
77
+ clearTimeout(timeoutId);
78
+ timeoutId = setTimeout(streamCompleteController.abort, timeout);
79
+ }
80
+ },
81
+ );
82
+ this.socket?.addEventListener("message", handleMessage, {
83
+ signal: streamCompleteController.signal,
84
+ });
85
+ this.socket?.addEventListener("close", streamCompleteController.abort, {
86
+ once: true,
87
+ });
88
+ this.socket?.addEventListener("error", streamCompleteController.abort, {
89
+ once: true,
90
+ });
91
+ streamCompleteController.signal.addEventListener("abort", () => {
92
+ emitter.clearListeners();
93
+ });
94
+
95
+ const play = async ({ bufferDuration }: { bufferDuration: number }) => {
96
+ const context = new AudioContext({
97
+ sampleRate: SAMPLE_RATE,
98
+ });
99
+
100
+ let startNextPlaybackAt = 0;
101
+ const playLatestChunk = (chunk: Chunk) => {
102
+ if (isSentinel(chunk)) {
103
+ return true; // Indicates that playback has finished.
104
+ }
105
+ startNextPlaybackAt =
106
+ playAudioBuffer([chunk], context, startNextPlaybackAt) +
107
+ Math.max(context.currentTime, startNextPlaybackAt);
108
+ return false; // Indicates that playback has not finished.
109
+ };
110
+
111
+ const playChunks = (chunks: Chunk[]) => {
112
+ startNextPlaybackAt += playAudioBuffer(
113
+ chunks,
114
+ context,
115
+ startNextPlaybackAt,
116
+ );
117
+
118
+ if (isComplete(chunks)) {
119
+ return;
120
+ }
121
+ };
122
+
123
+ // tryStart tries to start playback if the buffer duration is
124
+ // already satisfied or if all the chunks have arrived. If it is
125
+ // not, it returns false, indicating that the caller should call
126
+ // it again when more chunks arrive.
127
+ const tryStart = async (chunks: Chunk[]) => {
128
+ startNextPlaybackAt = context.currentTime;
129
+
130
+ if (isComplete(chunks) || streamCompleteController.signal.aborted) {
131
+ playChunks(chunks);
132
+ return true; // Done playing.
133
+ }
134
+
135
+ if (getBufferDuration(chunks) > bufferDuration) {
136
+ // Play the initial chunks that we already have.
137
+ playChunks(chunks);
138
+ // If the stream is not complete, play new chunks as they
139
+ // arrive.
140
+ for await (const { chunk } of emitter.events("chunk")) {
141
+ if (playLatestChunk(chunk)) {
142
+ break;
143
+ }
144
+ }
145
+ return true; // Done playing.
146
+ }
147
+ return false; // Need to buffer more audio.
148
+ };
149
+
150
+ if (!(await tryStart(chunks))) {
151
+ for await (const { chunks } of emitter.events("chunk")) {
152
+ if (await tryStart(chunks)) {
153
+ break;
154
+ }
155
+ }
156
+ }
157
+ };
158
+
159
+ return {
160
+ play,
161
+ on: emitter.on.bind(emitter),
162
+ off: emitter.off.bind(emitter),
163
+ once: emitter.once.bind(emitter),
164
+ events: emitter.events.bind(emitter),
165
+ };
166
+ }
167
+
168
+ /**
169
+ * Generate a unique ID suitable for a streaming context.
170
+ *
171
+ * Not suitable for security purposes or as a primary key, since
172
+ * it lacks the amount of entropy required for those use cases.
173
+ *
174
+ * @returns A unique ID.
175
+ */
176
+ generateId() {
177
+ return humanId({
178
+ separator: "-",
179
+ capitalize: false,
180
+ });
181
+ }
182
+
183
+ /**
184
+ * Authenticate and connect to a Cartesia streaming WebSocket.
185
+ *
186
+ * @returns A promise that resolves when the WebSocket is connected.
187
+ * @throws {Error} If the WebSocket fails to connect.
188
+ */
189
+ connect() {
190
+ const url = constructWebsocketUrl(this.baseUrl);
191
+ url.searchParams.set("api_key", this.apiKey);
192
+ this.socket = new WebSocket(url);
193
+ this.socket.onopen = () => {
194
+ this.isConnected = true;
195
+ };
196
+ this.socket.onclose = () => {
197
+ this.isConnected = false;
198
+ };
199
+
200
+ return new Promise<void>((resolve, reject) => {
201
+ this.socket?.addEventListener(
202
+ "open",
203
+ () => {
204
+ resolve();
205
+ },
206
+ {
207
+ once: true,
208
+ },
209
+ );
210
+
211
+ const aborter = new AbortController();
212
+ this.socket?.addEventListener(
213
+ "error",
214
+ () => {
215
+ aborter.abort();
216
+ reject(new Error("WebSocket failed to connect."));
217
+ },
218
+ {
219
+ signal: aborter.signal,
220
+ },
221
+ );
222
+
223
+ this.socket?.addEventListener(
224
+ "close",
225
+ () => {
226
+ aborter.abort();
227
+ reject(new Error("WebSocket closed before it could connect."));
228
+ },
229
+ {
230
+ signal: aborter.signal,
231
+ },
232
+ );
233
+ });
234
+ }
235
+
236
+ /**
237
+ * Disconnect from the Cartesia streaming WebSocket.
238
+ */
239
+ disconnect() {
240
+ this.socket?.close();
241
+ }
242
+ }
@@ -0,0 +1,138 @@
1
+ import base64 from "base64-js";
2
+ import type { Chunk, StreamEventData } from ".";
3
+ import { SAMPLE_RATE } from "../lib/constants";
4
+
5
+ /**
6
+ * Get the duration of base64-encoded audio buffer(s) in seconds.
7
+ *
8
+ * @param b64 The base64-encoded audio buffer, or an array of base64-encoded
9
+ * audio buffers.
10
+ * @returns The duration of the buffer(s) in seconds.
11
+ */
12
+ export function getBufferDuration(b64: Chunk[]) {
13
+ const floats = base64ToArray(b64);
14
+ return floats.length / SAMPLE_RATE;
15
+ }
16
+
17
+ /**
18
+ * Convert base64-encoded audio buffer(s) to a Float32Array.
19
+ *
20
+ * @param b64 The base64-encoded audio buffer, or an array of base64-encoded
21
+ * audio buffers.
22
+ * @returns The audio buffer(s) as a Float32Array.
23
+ */
24
+ export function base64ToArray(b64: Chunk[]): Float32Array {
25
+ return filterSentinel(b64).reduce((acc, b) => {
26
+ const floats = new Float32Array(base64.toByteArray(b).buffer);
27
+ const newAcc = new Float32Array(acc.length + floats.length);
28
+ newAcc.set(acc, 0);
29
+ newAcc.set(floats, acc.length);
30
+ return newAcc;
31
+ }, new Float32Array(0));
32
+ }
33
+
34
+ /**
35
+ * Schedule an audio buffer to play at a given time in the passed context.
36
+ *
37
+ * @param b64 The base64-encoded audio buffer to play.
38
+ * @param context The audio context to play the buffer in.
39
+ * @param maybeStartAt The time to start playing the buffer at, or null to play
40
+ * immediately.
41
+ * @param onEnded The callback to call when the buffer has finished playing.
42
+ * @returns The duration of the buffer in seconds.
43
+ */
44
+ export function playAudioBuffer(
45
+ b64: Chunk[],
46
+ context: AudioContext,
47
+ maybeStartAt: number | null = null,
48
+ onEnded: AudioScheduledSourceNode["onended"] = null,
49
+ ) {
50
+ const startAt = maybeStartAt ?? context.currentTime;
51
+
52
+ const floats = base64ToArray(b64);
53
+ const source = context.createBufferSource();
54
+ const buffer = context.createBuffer(1, floats.length, SAMPLE_RATE);
55
+ buffer.getChannelData(0).set(floats);
56
+ source.buffer = buffer;
57
+ source.connect(context.destination);
58
+ source.start(startAt);
59
+ source.onended = onEnded;
60
+
61
+ return buffer.duration;
62
+ }
63
+
64
+ /**
65
+ * Unwraps a chunk of audio data from a message event and calls the
66
+ * handler with it if the context ID matches.
67
+ *
68
+ * @param contextId The context ID to listen for.
69
+ * @param handler The handler to call with the chunk of audio data.
70
+ * @returns A message event handler.
71
+ */
72
+ export function createMessageHandlerForContextId(
73
+ contextId: string,
74
+ handler: ({
75
+ chunk,
76
+ message,
77
+ }: {
78
+ chunk: Chunk;
79
+ message: StreamEventData["message"];
80
+ }) => void,
81
+ ) {
82
+ return (event: MessageEvent) => {
83
+ const message = JSON.parse(event.data);
84
+ if (message.context_id !== contextId) {
85
+ return; // Ignore messages for other contexts.
86
+ }
87
+ let chunk: Chunk;
88
+ if (message.done) {
89
+ // Convert the done message to a sentinel value.
90
+ chunk = getSentinel();
91
+ } else {
92
+ chunk = message.data;
93
+ }
94
+ handler({ chunk, message });
95
+ };
96
+ }
97
+
98
+ export type Sentinel = null;
99
+
100
+ /**
101
+ * Get a sentinel value that indicates the end of a stream.
102
+ * @returns A sentinel value to indicate the end of a stream.
103
+ */
104
+ export function getSentinel(): Sentinel {
105
+ return null;
106
+ }
107
+
108
+ /**
109
+ * Check if a chunk is a sentinel value (i.e. null).
110
+ *
111
+ * @param chunk
112
+ * @returns Whether the chunk is a sentinel value.
113
+ */
114
+ export function isSentinel(x: unknown): x is Sentinel {
115
+ return x === getSentinel();
116
+ }
117
+
118
+ /**
119
+ * Filter out null values from a collection.
120
+ *
121
+ * @param collection The collection to filter.
122
+ * @returns The collection with null values removed.
123
+ */
124
+ export function filterSentinel<T>(collection: T[]): Exclude<T, Sentinel>[] {
125
+ return collection.filter(
126
+ (x): x is Exclude<T, ReturnType<typeof getSentinel>> => !isSentinel(x),
127
+ );
128
+ }
129
+
130
+ /**
131
+ * Check if an array of chunks is complete by testing if the last chunk is a sentinel
132
+ * value (i.e. null).
133
+ * @param chunk
134
+ * @returns Whether the array of chunks is complete.
135
+ */
136
+ export function isComplete(chunks: Chunk[]) {
137
+ return isSentinel(chunks[chunks.length - 1]);
138
+ }
@@ -0,0 +1,17 @@
1
+ import type { ClientOptions } from "../types";
2
+ import { BASE_URL } from "./constants";
3
+
4
+ export class Client {
5
+ apiKey: string;
6
+ baseUrl: string;
7
+
8
+ constructor(options: ClientOptions = {}) {
9
+ if (!(options.apiKey || process.env.CARTESIA_API_KEY)) {
10
+ throw new Error("Missing Cartesia API key.");
11
+ }
12
+
13
+ // biome-ignore lint/style/noNonNullAssertion: Guaranteed to be defined by the check above.
14
+ this.apiKey = (options.apiKey || process.env.CARTESIA_API_KEY)!;
15
+ this.baseUrl = options.baseUrl || BASE_URL;
16
+ }
17
+ }
@@ -0,0 +1,6 @@
1
+ export const BASE_URL = "https://api.cartesia.ai/v0";
2
+ export const SAMPLE_RATE = 44100;
3
+
4
+ export const constructWebsocketUrl = (baseUrl: string) => {
5
+ return new URL(`${baseUrl.replace(/^http/, "ws")}/ws`);
6
+ };
@@ -0,0 +1,13 @@
1
+ import Audio from "../audio";
2
+ import type { ClientOptions } from "../types";
3
+ import { Client } from "./client";
4
+
5
+ export class Cartesia extends Client {
6
+ audio: Audio;
7
+
8
+ constructor(options: ClientOptions = {}) {
9
+ super(options);
10
+
11
+ this.audio = new Audio(options);
12
+ }
13
+ }
@@ -0,0 +1,91 @@
1
+ import { useCallback, useEffect, useMemo, useRef, useState } from "react";
2
+ import CartesiaAudio, { type Chunk, type StreamEventData } from "../audio";
3
+
4
+ interface UseAudioOptions {
5
+ apiKey: string;
6
+ baseUrl?: string;
7
+ }
8
+
9
+ interface UseAudioReturn {
10
+ stream: (options: object) => void;
11
+ play: (bufferDuration?: number) => Promise<void>;
12
+ isPlaying: boolean;
13
+ chunks: Chunk[];
14
+ messages: StreamEventData["message"][];
15
+ }
16
+ /**
17
+ * React hook to use the Cartesia audio API.
18
+ */
19
+ export function useAudio({ apiKey, baseUrl }: UseAudioOptions): UseAudioReturn {
20
+ if (typeof window === "undefined" || !apiKey) {
21
+ return {
22
+ stream: () => {},
23
+ play: async () => {},
24
+ isPlaying: false,
25
+ chunks: [],
26
+ messages: [],
27
+ };
28
+ }
29
+
30
+ const audio = useMemo(() => {
31
+ const audio = new CartesiaAudio({ apiKey, baseUrl });
32
+ return audio;
33
+ }, [apiKey, baseUrl]);
34
+ const streamReturn = useRef<ReturnType<CartesiaAudio["stream"]> | null>(null);
35
+ const [isPlaying, setIsPlaying] = useState(false);
36
+ const [chunks, setChunks] = useState<Chunk[]>([]);
37
+ const [messages, setMessages] = useState<StreamEventData["message"][]>([]);
38
+
39
+ const stream = useCallback(
40
+ (options: object) => {
41
+ streamReturn.current = audio?.stream(options) ?? null;
42
+ streamReturn.current.on(
43
+ "chunk",
44
+ ({ chunks }: StreamEventData["chunk"]) => {
45
+ setChunks(chunks);
46
+ },
47
+ );
48
+ streamReturn.current.on(
49
+ "message",
50
+ (message: StreamEventData["message"]) => {
51
+ setMessages((messages) => [...messages, message]);
52
+ },
53
+ );
54
+ },
55
+ [audio],
56
+ );
57
+
58
+ useEffect(() => {
59
+ async function initialize() {
60
+ try {
61
+ await audio?.connect();
62
+ } catch (e) {
63
+ console.error(e);
64
+ }
65
+ return () => {
66
+ audio?.disconnect();
67
+ };
68
+ }
69
+ initialize();
70
+ }, [audio]);
71
+
72
+ const play = useCallback(
73
+ async (bufferDuration = 0) => {
74
+ if (isPlaying || !streamReturn.current) {
75
+ return;
76
+ }
77
+ setIsPlaying(true);
78
+ await streamReturn.current?.play({ bufferDuration });
79
+ setIsPlaying(false);
80
+ },
81
+ [isPlaying],
82
+ );
83
+
84
+ // TODO:
85
+ // - [] Pause and stop playback.
86
+ // - [] Access the play and buffer cursors.
87
+ // - [] Seek to a specific time.
88
+ // These are probably best implemented by adding event listener
89
+ // functionality to the base library.
90
+ return { stream, play, isPlaying, chunks, messages };
91
+ }
@@ -0,0 +1,4 @@
1
+ export interface ClientOptions {
2
+ apiKey?: string;
3
+ baseUrl?: string;
4
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,3 @@
1
+ {
2
+ "extends": "@repo/config-typescript/react-library.json"
3
+ }