@cartesia/cartesia-js 0.0.3 → 1.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/.turbo/turbo-build.log +68 -38
  2. package/CHANGELOG.md +12 -0
  3. package/README.md +123 -16
  4. package/dist/chunk-3FL2SNIR.js +17 -0
  5. package/dist/chunk-3GBZUGUD.js +17 -0
  6. package/dist/chunk-4RMSIQLG.js +25 -0
  7. package/dist/chunk-BCQ63627.js +32 -0
  8. package/dist/chunk-JOHSCOLW.js +106 -0
  9. package/dist/chunk-LYPTISWL.js +75 -0
  10. package/dist/chunk-NDNN326Q.js +207 -0
  11. package/dist/chunk-WBK6LLXX.js +58 -0
  12. package/dist/chunk-WE63M7PJ.js +119 -0
  13. package/dist/{chunk-HNLIBHEN.mjs → chunk-WIFMLPT5.js} +31 -16
  14. package/dist/chunk-X7SJMF2R.js +22 -0
  15. package/dist/index.cjs +652 -0
  16. package/dist/index.d.cts +10 -0
  17. package/dist/index.d.ts +10 -0
  18. package/dist/index.js +20 -0
  19. package/dist/lib/client.cjs +89 -0
  20. package/dist/lib/client.d.cts +11 -0
  21. package/dist/lib/client.d.ts +2 -0
  22. package/dist/lib/client.js +7 -42
  23. package/dist/lib/constants.cjs +42 -0
  24. package/dist/lib/constants.d.cts +4 -0
  25. package/dist/lib/constants.d.ts +2 -3
  26. package/dist/lib/constants.js +8 -37
  27. package/dist/lib/index.cjs +531 -0
  28. package/dist/lib/index.d.cts +16 -0
  29. package/dist/lib/index.d.ts +6 -2
  30. package/dist/lib/index.js +13 -409
  31. package/dist/react/index.cjs +846 -0
  32. package/dist/react/index.d.cts +33 -0
  33. package/dist/react/index.d.ts +20 -13
  34. package/dist/react/index.js +161 -501
  35. package/dist/react/utils.cjs +57 -0
  36. package/dist/react/utils.d.cts +7 -0
  37. package/dist/react/utils.d.ts +7 -0
  38. package/dist/react/utils.js +7 -0
  39. package/dist/tts/index.cjs +470 -0
  40. package/dist/tts/index.d.cts +17 -0
  41. package/dist/tts/index.d.ts +17 -0
  42. package/dist/tts/index.js +12 -0
  43. package/dist/tts/player.cjs +198 -0
  44. package/dist/tts/player.d.cts +43 -0
  45. package/dist/tts/player.d.ts +43 -0
  46. package/dist/tts/player.js +8 -0
  47. package/dist/tts/source.cjs +167 -0
  48. package/dist/tts/source.d.cts +53 -0
  49. package/dist/tts/source.d.ts +53 -0
  50. package/dist/tts/source.js +7 -0
  51. package/dist/{audio/utils.js → tts/utils.cjs} +13 -54
  52. package/dist/tts/utils.d.cts +67 -0
  53. package/dist/tts/utils.d.ts +67 -0
  54. package/dist/{audio/utils.mjs → tts/utils.js} +2 -6
  55. package/dist/tts/websocket.cjs +453 -0
  56. package/dist/tts/websocket.d.cts +53 -0
  57. package/dist/tts/websocket.d.ts +53 -0
  58. package/dist/tts/websocket.js +11 -0
  59. package/dist/types/index.cjs +18 -0
  60. package/dist/types/index.d.cts +55 -0
  61. package/dist/types/index.d.ts +50 -1
  62. package/dist/types/index.js +1 -18
  63. package/dist/voices/index.cjs +155 -0
  64. package/dist/voices/index.d.cts +12 -0
  65. package/dist/voices/index.d.ts +12 -0
  66. package/dist/voices/index.js +9 -0
  67. package/package.json +11 -7
  68. package/src/index.ts +4 -0
  69. package/src/lib/client.ts +14 -1
  70. package/src/lib/constants.ts +13 -3
  71. package/src/lib/index.ts +6 -3
  72. package/src/react/index.ts +167 -75
  73. package/src/react/utils.ts +11 -0
  74. package/src/tts/index.ts +17 -0
  75. package/src/tts/player.ts +109 -0
  76. package/src/tts/source.ts +98 -0
  77. package/src/{audio → tts}/utils.ts +19 -97
  78. package/src/tts/websocket.ts +210 -0
  79. package/src/types/index.ts +63 -0
  80. package/src/voices/index.ts +47 -0
  81. package/dist/audio/index.d.mts +0 -5
  82. package/dist/audio/index.d.ts +0 -5
  83. package/dist/audio/index.js +0 -396
  84. package/dist/audio/index.mjs +0 -9
  85. package/dist/audio/utils.d.mts +0 -5
  86. package/dist/audio/utils.d.ts +0 -5
  87. package/dist/chunk-3CYTAFLF.mjs +0 -262
  88. package/dist/chunk-FRIBQZPN.mjs +0 -113
  89. package/dist/chunk-XSFPHPPG.mjs +0 -18
  90. package/dist/index-DSBmfK9-.d.mts +0 -158
  91. package/dist/index-qwAyxV5I.d.ts +0 -158
  92. package/dist/lib/client.d.mts +0 -9
  93. package/dist/lib/client.mjs +0 -7
  94. package/dist/lib/constants.d.mts +0 -5
  95. package/dist/lib/constants.mjs +0 -10
  96. package/dist/lib/index.d.mts +0 -12
  97. package/dist/lib/index.mjs +0 -19
  98. package/dist/react/index.d.mts +0 -26
  99. package/dist/react/index.mjs +0 -130
  100. package/dist/types/index.d.mts +0 -6
  101. package/index.ts +0 -3
  102. package/src/audio/index.ts +0 -282
  103. /package/dist/{types/index.mjs → chunk-FXPGR372.js} +0 -0
@@ -1,19 +1,6 @@
1
1
  import base64 from "base64-js";
2
2
  import type Emittery from "emittery";
3
- import type { Chunk, StreamEventData } from ".";
4
- import { SAMPLE_RATE } from "../lib/constants";
5
-
6
- /**
7
- * Get the duration of base64-encoded audio buffer(s) in seconds.
8
- *
9
- * @param b64 The base64-encoded audio buffer, or an array of base64-encoded
10
- * audio buffers.
11
- * @returns The duration of the buffer(s) in seconds.
12
- */
13
- export function getBufferDuration(b64: Chunk[]) {
14
- const floats = base64ToArray(b64);
15
- return floats.length / SAMPLE_RATE;
16
- }
3
+ import type { Chunk, EmitteryCallbacks, Sentinel } from "../types";
17
4
 
18
5
  /**
19
6
  * Convert base64-encoded audio buffer(s) to a Float32Array.
@@ -35,31 +22,30 @@ export function base64ToArray(b64: Chunk[]): Float32Array {
35
22
  /**
36
23
  * Schedule an audio buffer to play at a given time in the passed context.
37
24
  *
38
- * @param b64 The base64-encoded audio buffer to play.
25
+ * @param floats The audio buffer to play.
39
26
  * @param context The audio context to play the buffer in.
40
- * @param maybeStartAt The time to start playing the buffer at, or null to play
41
- * immediately.
42
- * @param onEnded The callback to call when the buffer has finished playing.
43
- * @returns The duration of the buffer in seconds.
27
+ * @param startAt The time to start playing the buffer at.
28
+ * @param sampleRate The sample rate of the audio.
29
+ * @returns A promise that resolves when the audio has finished playing.
44
30
  */
45
31
  export function playAudioBuffer(
46
- b64: Chunk[],
32
+ floats: Float32Array,
47
33
  context: AudioContext,
48
- maybeStartAt: number | null = null,
49
- onEnded: AudioScheduledSourceNode["onended"] = null,
34
+ startAt: number,
35
+ sampleRate: number,
50
36
  ) {
51
- const startAt = maybeStartAt ?? context.currentTime;
52
-
53
- const floats = base64ToArray(b64);
54
37
  const source = context.createBufferSource();
55
- const buffer = context.createBuffer(1, floats.length, SAMPLE_RATE);
38
+ const buffer = context.createBuffer(1, floats.length, sampleRate);
56
39
  buffer.getChannelData(0).set(floats);
57
40
  source.buffer = buffer;
58
41
  source.connect(context.destination);
59
42
  source.start(startAt);
60
- source.onended = onEnded;
61
43
 
62
- return buffer.duration;
44
+ return new Promise<void>((resolve) => {
45
+ source.onended = () => {
46
+ resolve();
47
+ };
48
+ });
63
49
  }
64
50
 
65
51
  /**
@@ -77,10 +63,13 @@ export function createMessageHandlerForContextId(
77
63
  message,
78
64
  }: {
79
65
  chunk: Chunk;
80
- message: StreamEventData["message"];
66
+ message: string;
81
67
  }) => void,
82
68
  ) {
83
69
  return (event: MessageEvent) => {
70
+ if (typeof event.data !== "string") {
71
+ return; // Ignore non-string messages.
72
+ }
84
73
  const message = JSON.parse(event.data);
85
74
  if (message.context_id !== contextId) {
86
75
  return; // Ignore messages for other contexts.
@@ -92,12 +81,10 @@ export function createMessageHandlerForContextId(
92
81
  } else {
93
82
  chunk = message.data;
94
83
  }
95
- handler({ chunk, message });
84
+ handler({ chunk, message: event.data });
96
85
  };
97
86
  }
98
87
 
99
- export type Sentinel = null;
100
-
101
88
  /**
102
89
  * Get a sentinel value that indicates the end of a stream.
103
90
  * @returns A sentinel value to indicate the end of a stream.
@@ -138,12 +125,6 @@ export function isComplete(chunks: Chunk[]) {
138
125
  return isSentinel(chunks[chunks.length - 1]);
139
126
  }
140
127
 
141
- export type EmitteryCallbacks<T> = {
142
- on: Emittery<T>["on"];
143
- off: Emittery<T>["off"];
144
- once: Emittery<T>["once"];
145
- events: Emittery<T>["events"];
146
- };
147
128
  /**
148
129
  * Get user-facing emitter callbacks for an Emittery instance.
149
130
  * @param emitter The Emittery instance to get callbacks for.
@@ -159,62 +140,3 @@ export function getEmitteryCallbacks<T>(
159
140
  events: emitter.events.bind(emitter),
160
141
  };
161
142
  }
162
-
163
- /**
164
- * Converts a base64-encoded audio buffer to a WAV file.
165
- * Source: https://gist.github.com/Daninet/22edc59cf2aee0b9a90c18e553e49297
166
- * @param b64 The base64-encoded audio buffer to convert to a WAV file.
167
- */
168
- export function bufferToWav(
169
- sampleRate: number,
170
- channelBuffers: Float32Array[],
171
- ) {
172
- const totalSamples = channelBuffers[0].length * channelBuffers.length;
173
-
174
- const buffer = new ArrayBuffer(44 + totalSamples * 2);
175
- const view = new DataView(buffer);
176
-
177
- const writeString = (view: DataView, offset: number, string: string) => {
178
- for (let i = 0; i < string.length; i++) {
179
- view.setUint8(offset + i, string.charCodeAt(i));
180
- }
181
- };
182
-
183
- /* RIFF identifier */
184
- writeString(view, 0, "RIFF");
185
- /* RIFF chunk length */
186
- view.setUint32(4, 36 + totalSamples * 2, true);
187
- /* RIFF type */
188
- writeString(view, 8, "WAVE");
189
- /* format chunk identifier */
190
- writeString(view, 12, "fmt ");
191
- /* format chunk length */
192
- view.setUint32(16, 16, true);
193
- /* sample format (raw) */
194
- view.setUint16(20, 1, true);
195
- /* channel count */
196
- view.setUint16(22, channelBuffers.length, true);
197
- /* sample rate */
198
- view.setUint32(24, sampleRate, true);
199
- /* byte rate (sample rate * block align) */
200
- view.setUint32(28, sampleRate * 4, true);
201
- /* block align (channel count * bytes per sample) */
202
- view.setUint16(32, channelBuffers.length * 2, true);
203
- /* bits per sample */
204
- view.setUint16(34, 16, true);
205
- /* data chunk identifier */
206
- writeString(view, 36, "data");
207
- /* data chunk length */
208
- view.setUint32(40, totalSamples * 2, true);
209
-
210
- let offset = 44;
211
- for (let i = 0; i < channelBuffers[0].length; i++) {
212
- for (let channel = 0; channel < channelBuffers.length; channel++) {
213
- const s = Math.max(-1, Math.min(1, channelBuffers[channel][i]));
214
- view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
215
- offset += 2;
216
- }
217
- }
218
-
219
- return buffer;
220
- }
@@ -0,0 +1,210 @@
1
+ import Emittery from "emittery";
2
+ import { humanId } from "human-id";
3
+ import { WebSocket as PartySocketWebSocket } from "partysocket";
4
+ import { Client } from "../lib/client";
5
+ import { constructApiUrl } from "../lib/constants";
6
+ import type {
7
+ ConnectionEventData,
8
+ EmitteryCallbacks,
9
+ StreamRequest,
10
+ WebSocketOptions,
11
+ } from "../types";
12
+ import Source from "./source";
13
+ import {
14
+ base64ToArray,
15
+ createMessageHandlerForContextId,
16
+ getEmitteryCallbacks,
17
+ isSentinel,
18
+ } from "./utils";
19
+
20
+ export default class WebSocket extends Client {
21
+ socket?: PartySocketWebSocket;
22
+ #isConnected = false;
23
+ #sampleRate: number;
24
+
25
+ /**
26
+ * Create a new WebSocket client.
27
+ *
28
+ * @param args - Arguments to pass to the Client constructor.
29
+ */
30
+ constructor(
31
+ { sampleRate }: WebSocketOptions,
32
+ ...args: ConstructorParameters<typeof Client>
33
+ ) {
34
+ super(...args);
35
+
36
+ this.#sampleRate = sampleRate;
37
+ }
38
+
39
+ /**
40
+ * Send a message over the WebSocket in order to start a stream.
41
+ *
42
+ * @param inputs - Stream options.
43
+ * @param options - Options for the stream.
44
+ * @param options.timeout - The maximum time to wait for a chunk before cancelling the stream.
45
+ * If `0`, the stream will not time out.
46
+ * @returns A Source object that can be passed to a Player to play the audio.
47
+ */
48
+ send(
49
+ inputs: StreamRequest["inputs"],
50
+ { timeout = 0 }: StreamRequest["options"] = {},
51
+ ) {
52
+ if (!this.#isConnected) {
53
+ throw new Error("Not connected to WebSocket. Call .connect() first.");
54
+ }
55
+
56
+ // Send audio request.
57
+ const contextId = this.#generateId();
58
+ this.socket?.send(
59
+ JSON.stringify({
60
+ context_id: contextId,
61
+ ...inputs,
62
+ output_format: {
63
+ container: "raw",
64
+ encoding: "pcm_f32le",
65
+ sample_rate: this.#sampleRate,
66
+ },
67
+ }),
68
+ );
69
+
70
+ const emitter = new Emittery<{
71
+ message: string;
72
+ }>();
73
+ const source = new Source({
74
+ sampleRate: this.#sampleRate,
75
+ });
76
+ // Used to signal that the stream is complete, either because the
77
+ // WebSocket has closed, or because the stream has finished.
78
+ const streamCompleteController = new AbortController();
79
+ // Set a timeout.
80
+ let timeoutId: ReturnType<typeof setTimeout> | null = null;
81
+ if (timeout > 0) {
82
+ timeoutId = setTimeout(streamCompleteController.abort, timeout);
83
+ }
84
+ const handleMessage = createMessageHandlerForContextId(
85
+ contextId,
86
+ async ({ chunk, message }) => {
87
+ emitter.emit("message", message);
88
+ if (isSentinel(chunk)) {
89
+ await source.close();
90
+ streamCompleteController.abort();
91
+ return;
92
+ }
93
+ await source.enqueue(base64ToArray([chunk]));
94
+ if (timeoutId) {
95
+ clearTimeout(timeoutId);
96
+ timeoutId = setTimeout(streamCompleteController.abort, timeout);
97
+ }
98
+ },
99
+ );
100
+ this.socket?.addEventListener("message", handleMessage, {
101
+ signal: streamCompleteController.signal,
102
+ });
103
+ this.socket?.addEventListener(
104
+ "close",
105
+ () => {
106
+ streamCompleteController.abort();
107
+ },
108
+ {
109
+ once: true,
110
+ },
111
+ );
112
+ this.socket?.addEventListener(
113
+ "error",
114
+ () => {
115
+ streamCompleteController.abort();
116
+ },
117
+ {
118
+ once: true,
119
+ },
120
+ );
121
+ streamCompleteController.signal.addEventListener("abort", () => {
122
+ source.close();
123
+ if (timeoutId) {
124
+ clearTimeout(timeoutId);
125
+ }
126
+ });
127
+
128
+ return { source, ...getEmitteryCallbacks(emitter) };
129
+ }
130
+
131
+ /**
132
+ * Generate a unique ID suitable for a streaming context.
133
+ *
134
+ * Not suitable for security purposes or as a primary key, since
135
+ * it lacks the amount of entropy required for those use cases.
136
+ *
137
+ * @returns A unique ID.
138
+ */
139
+ #generateId() {
140
+ return humanId({
141
+ separator: "-",
142
+ capitalize: false,
143
+ });
144
+ }
145
+
146
+ /**
147
+ * Authenticate and connect to a Cartesia streaming WebSocket.
148
+ *
149
+ * @returns A promise that resolves when the WebSocket is connected.
150
+ * @throws {Error} If the WebSocket fails to connect.
151
+ */
152
+ connect() {
153
+ const url = constructApiUrl(this.baseUrl, "/tts/websocket", "ws");
154
+ url.searchParams.set("api_key", this.apiKey);
155
+ const emitter = new Emittery<ConnectionEventData>();
156
+ this.socket = new PartySocketWebSocket(url.toString());
157
+ this.socket.onopen = () => {
158
+ this.#isConnected = true;
159
+ emitter.emit("open");
160
+ };
161
+ this.socket.onclose = () => {
162
+ this.#isConnected = false;
163
+ emitter.emit("close");
164
+ };
165
+
166
+ return new Promise<EmitteryCallbacks<ConnectionEventData>>(
167
+ (resolve, reject) => {
168
+ this.socket?.addEventListener(
169
+ "open",
170
+ () => {
171
+ resolve(getEmitteryCallbacks(emitter));
172
+ },
173
+ {
174
+ once: true,
175
+ },
176
+ );
177
+
178
+ const aborter = new AbortController();
179
+ this.socket?.addEventListener(
180
+ "error",
181
+ () => {
182
+ aborter.abort();
183
+ reject(new Error("WebSocket failed to connect."));
184
+ },
185
+ {
186
+ signal: aborter.signal,
187
+ },
188
+ );
189
+
190
+ this.socket?.addEventListener(
191
+ "close",
192
+ () => {
193
+ aborter.abort();
194
+ reject(new Error("WebSocket closed before it could connect."));
195
+ },
196
+ {
197
+ signal: aborter.signal,
198
+ },
199
+ );
200
+ },
201
+ );
202
+ }
203
+
204
+ /**
205
+ * Disconnect from the Cartesia streaming WebSocket.
206
+ */
207
+ disconnect() {
208
+ this.socket?.close();
209
+ }
210
+ }
@@ -1,4 +1,67 @@
1
+ import type Emittery from "emittery";
2
+
1
3
  export interface ClientOptions {
2
4
  apiKey?: string;
3
5
  baseUrl?: string;
4
6
  }
7
+
8
+ export type Sentinel = null;
9
+
10
+ export type Chunk = string | Sentinel;
11
+
12
+ export type ConnectionEventData = {
13
+ open: never;
14
+ close: never;
15
+ };
16
+
17
+ export type StreamRequest = {
18
+ inputs: object;
19
+ options: {
20
+ timeout?: number;
21
+ };
22
+ };
23
+
24
+ export type EmitteryCallbacks<T> = {
25
+ on: Emittery<T>["on"];
26
+ off: Emittery<T>["off"];
27
+ once: Emittery<T>["once"];
28
+ events: Emittery<T>["events"];
29
+ };
30
+
31
+ export type CloneOptions =
32
+ | {
33
+ mode: "url";
34
+ link: string;
35
+ }
36
+ | {
37
+ mode: "clip";
38
+ clip: Blob;
39
+ };
40
+
41
+ export type Voice = {
42
+ id: string;
43
+ name: string;
44
+ description: string;
45
+ embedding: number[];
46
+ is_public: boolean;
47
+ user_id: string;
48
+ created_at: string;
49
+ };
50
+
51
+ export type CreateVoice = Pick<Voice, "name" | "description" | "embedding"> &
52
+ Partial<Omit<Voice, "name" | "description" | "embedding">>;
53
+
54
+ export type CloneResponse = {
55
+ embedding: number[];
56
+ };
57
+
58
+ export type WebSocketOptions = {
59
+ sampleRate: number;
60
+ };
61
+
62
+ export type SourceEventData = {
63
+ enqueue: never;
64
+ close: never;
65
+ wait: never;
66
+ read: never;
67
+ };
@@ -0,0 +1,47 @@
1
+ import { Client } from "../lib/client";
2
+ import type { CloneOptions, CloneResponse, CreateVoice, Voice } from "../types";
3
+
4
+ export default class Voices extends Client {
5
+ async list(): Promise<Voice[]> {
6
+ const response = await this.fetch("/voices");
7
+ return response.json();
8
+ }
9
+
10
+ async get(voiceId: string): Promise<Voice> {
11
+ const response = await this.fetch(`/voices/${voiceId}`);
12
+ return response.json();
13
+ }
14
+
15
+ async create(voice: CreateVoice): Promise<Voice> {
16
+ const response = await this.fetch("/voices", {
17
+ method: "POST",
18
+ body: JSON.stringify(voice),
19
+ });
20
+ return response.json() as Promise<Voice>;
21
+ }
22
+
23
+ async clone(options: CloneOptions): Promise<CloneResponse> {
24
+ if (options.mode === "url") {
25
+ const response = await this.fetch(
26
+ `/voices/clone/url?link=${options.link}`,
27
+ {
28
+ method: "POST",
29
+ },
30
+ );
31
+ return response.json();
32
+ }
33
+
34
+ if (options.mode === "clip") {
35
+ const formData = new FormData();
36
+ formData.append("clip", options.clip);
37
+
38
+ const response = await this.fetch("/voices/clone/clip", {
39
+ method: "POST",
40
+ body: formData,
41
+ });
42
+ return response.json();
43
+ }
44
+
45
+ throw new Error("Invalid mode for clone()");
46
+ }
47
+ }
@@ -1,5 +0,0 @@
1
- import 'emittery';
2
- import 'partysocket';
3
- import '../lib/client.mjs';
4
- export { C as Chunk, l as ConnectionEventData, S as StreamEventData, m as StreamRequest, e as default } from '../index-DSBmfK9-.mjs';
5
- import '../types/index.mjs';
@@ -1,5 +0,0 @@
1
- import 'emittery';
2
- import 'partysocket';
3
- import '../lib/client.js';
4
- export { C as Chunk, l as ConnectionEventData, S as StreamEventData, m as StreamRequest, e as default } from '../index-qwAyxV5I.js';
5
- import '../types/index.js';