@cartesia/cartesia-js 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/.turbo/turbo-build.log +47 -47
  2. package/CHANGELOG.md +6 -0
  3. package/LICENSE.md +21 -0
  4. package/README.md +92 -19
  5. package/dist/{chunk-F4QWVJY3.js → chunk-2NA5SEML.js} +2 -2
  6. package/dist/{chunk-PQ5EVEEH.js → chunk-5M33ZF3Y.js} +1 -1
  7. package/dist/{chunk-PQ6CIPFW.js → chunk-6YQ6KDIQ.js} +44 -5
  8. package/dist/{chunk-FN7BK4PS.js → chunk-ASZKHN7Q.js} +44 -26
  9. package/dist/{chunk-RO7TY474.js → chunk-BHY7MNGT.js} +11 -6
  10. package/dist/{chunk-WIFMLPT5.js → chunk-GHY2WEOK.js} +13 -0
  11. package/dist/{chunk-SGXUEFII.js → chunk-KUSVZXDT.js} +2 -2
  12. package/dist/{chunk-JYLAM6VU.js → chunk-LZO6K34D.js} +2 -2
  13. package/dist/{chunk-3FL2SNIR.js → chunk-NQVZNVOU.js} +1 -1
  14. package/dist/{chunk-IEN4NCER.js → chunk-OFH3ML4L.js} +3 -3
  15. package/dist/index.cjs +102 -31
  16. package/dist/index.d.cts +4 -4
  17. package/dist/index.d.ts +4 -4
  18. package/dist/index.js +15 -9
  19. package/dist/lib/client.js +2 -2
  20. package/dist/lib/constants.js +1 -1
  21. package/dist/lib/index.cjs +97 -30
  22. package/dist/lib/index.js +8 -8
  23. package/dist/react/index.cjs +202 -86
  24. package/dist/react/index.d.cts +4 -3
  25. package/dist/react/index.d.ts +4 -3
  26. package/dist/react/index.js +115 -66
  27. package/dist/react/utils.js +2 -2
  28. package/dist/tts/index.cjs +97 -30
  29. package/dist/tts/index.js +6 -6
  30. package/dist/tts/player.cjs +5 -0
  31. package/dist/tts/player.js +4 -3
  32. package/dist/tts/source.cjs +50 -4
  33. package/dist/tts/source.d.cts +16 -6
  34. package/dist/tts/source.d.ts +16 -6
  35. package/dist/tts/source.js +4 -2
  36. package/dist/tts/utils.cjs +18 -6
  37. package/dist/tts/utils.d.cts +7 -5
  38. package/dist/tts/utils.d.ts +7 -5
  39. package/dist/tts/utils.js +3 -2
  40. package/dist/tts/websocket.cjs +97 -30
  41. package/dist/tts/websocket.d.cts +12 -8
  42. package/dist/tts/websocket.d.ts +12 -8
  43. package/dist/tts/websocket.js +5 -5
  44. package/dist/types/index.d.cts +60 -4
  45. package/dist/types/index.d.ts +60 -4
  46. package/dist/voices/index.js +3 -3
  47. package/package.json +1 -1
  48. package/src/index.ts +2 -0
  49. package/src/react/index.ts +114 -63
  50. package/src/tts/source.ts +53 -7
  51. package/src/tts/utils.ts +26 -12
  52. package/src/tts/websocket.ts +33 -16
  53. package/src/types/index.ts +81 -3
@@ -10,12 +10,64 @@ type ConnectionEventData = {
10
10
  open: never;
11
11
  close: never;
12
12
  };
13
+ type VoiceSpecifier = {
14
+ mode: "id";
15
+ id: string;
16
+ } | {
17
+ mode: "embedding";
18
+ embedding: number[];
19
+ };
20
+ type Emotion = "anger" | "sadness" | "positivity" | "curiosity" | "surprise";
21
+ type Intensity = "lowest" | "low" | "high" | "highest";
22
+ type EmotionControl = Emotion | `${Emotion}:${Intensity}`;
23
+ type VoiceOptions = VoiceSpecifier & {
24
+ __experimental_controls?: {
25
+ speed?: "slowest" | "slow" | "normal" | "fast" | "fastest";
26
+ emotion?: EmotionControl[];
27
+ };
28
+ };
13
29
  type StreamRequest = {
14
- inputs: object;
15
- options: {
16
- timeout?: number;
30
+ model_id: string;
31
+ transcript: string;
32
+ voice: VoiceOptions;
33
+ output_format?: {
34
+ container: string;
35
+ encoding: string;
36
+ sample_rate: number;
17
37
  };
38
+ context_id?: string;
39
+ continue?: boolean;
40
+ duration?: number;
41
+ language?: string;
42
+ add_timestamps?: boolean;
43
+ };
44
+ type StreamOptions = {
45
+ timeout?: number;
46
+ };
47
+ type WebSocketBaseResponse = {
48
+ context_id: string;
49
+ status_code: number;
50
+ done: boolean;
51
+ };
52
+ type WordTimestamps = {
53
+ words: string[];
54
+ start: number[];
55
+ end: number[];
56
+ };
57
+ type WebSocketTimestampsResponse = WebSocketBaseResponse & {
58
+ type: "timestamps";
59
+ word_timestamps: WordTimestamps;
60
+ };
61
+ type WebSocketChunkResponse = WebSocketBaseResponse & {
62
+ type: "chunk";
63
+ data: string;
64
+ step_time: number;
65
+ };
66
+ type WebSocketErrorResponse = WebSocketBaseResponse & {
67
+ type: "error";
68
+ error: string;
18
69
  };
70
+ type WebSocketResponse = WebSocketTimestampsResponse | WebSocketChunkResponse | WebSocketErrorResponse;
19
71
  type EmitteryCallbacks<T> = {
20
72
  on: emittery__default<T>["on"];
21
73
  off: emittery__default<T>["off"];
@@ -43,6 +95,8 @@ type CloneResponse = {
43
95
  embedding: number[];
44
96
  };
45
97
  type WebSocketOptions = {
98
+ container?: string;
99
+ encoding?: string;
46
100
  sampleRate: number;
47
101
  };
48
102
  type SourceEventData = {
@@ -51,5 +105,7 @@ type SourceEventData = {
51
105
  wait: never;
52
106
  read: never;
53
107
  };
108
+ type TypedArray = Float32Array | Int16Array | Uint8Array;
109
+ type Encoding = "pcm_f32le" | "pcm_s16le" | "pcm_alaw" | "pcm_mulaw";
54
110
 
55
- export type { Chunk, ClientOptions, CloneOptions, CloneResponse, ConnectionEventData, CreateVoice, EmitteryCallbacks, Sentinel, SourceEventData, StreamRequest, Voice, WebSocketOptions };
111
+ export type { Chunk, ClientOptions, CloneOptions, CloneResponse, ConnectionEventData, CreateVoice, EmitteryCallbacks, Emotion, EmotionControl, Encoding, Intensity, Sentinel, SourceEventData, StreamOptions, StreamRequest, TypedArray, Voice, VoiceOptions, VoiceSpecifier, WebSocketBaseResponse, WebSocketChunkResponse, WebSocketErrorResponse, WebSocketOptions, WebSocketResponse, WebSocketTimestampsResponse, WordTimestamps };
@@ -10,12 +10,64 @@ type ConnectionEventData = {
10
10
  open: never;
11
11
  close: never;
12
12
  };
13
+ type VoiceSpecifier = {
14
+ mode: "id";
15
+ id: string;
16
+ } | {
17
+ mode: "embedding";
18
+ embedding: number[];
19
+ };
20
+ type Emotion = "anger" | "sadness" | "positivity" | "curiosity" | "surprise";
21
+ type Intensity = "lowest" | "low" | "high" | "highest";
22
+ type EmotionControl = Emotion | `${Emotion}:${Intensity}`;
23
+ type VoiceOptions = VoiceSpecifier & {
24
+ __experimental_controls?: {
25
+ speed?: "slowest" | "slow" | "normal" | "fast" | "fastest";
26
+ emotion?: EmotionControl[];
27
+ };
28
+ };
13
29
  type StreamRequest = {
14
- inputs: object;
15
- options: {
16
- timeout?: number;
30
+ model_id: string;
31
+ transcript: string;
32
+ voice: VoiceOptions;
33
+ output_format?: {
34
+ container: string;
35
+ encoding: string;
36
+ sample_rate: number;
17
37
  };
38
+ context_id?: string;
39
+ continue?: boolean;
40
+ duration?: number;
41
+ language?: string;
42
+ add_timestamps?: boolean;
43
+ };
44
+ type StreamOptions = {
45
+ timeout?: number;
46
+ };
47
+ type WebSocketBaseResponse = {
48
+ context_id: string;
49
+ status_code: number;
50
+ done: boolean;
51
+ };
52
+ type WordTimestamps = {
53
+ words: string[];
54
+ start: number[];
55
+ end: number[];
56
+ };
57
+ type WebSocketTimestampsResponse = WebSocketBaseResponse & {
58
+ type: "timestamps";
59
+ word_timestamps: WordTimestamps;
60
+ };
61
+ type WebSocketChunkResponse = WebSocketBaseResponse & {
62
+ type: "chunk";
63
+ data: string;
64
+ step_time: number;
65
+ };
66
+ type WebSocketErrorResponse = WebSocketBaseResponse & {
67
+ type: "error";
68
+ error: string;
18
69
  };
70
+ type WebSocketResponse = WebSocketTimestampsResponse | WebSocketChunkResponse | WebSocketErrorResponse;
19
71
  type EmitteryCallbacks<T> = {
20
72
  on: emittery__default<T>["on"];
21
73
  off: emittery__default<T>["off"];
@@ -43,6 +95,8 @@ type CloneResponse = {
43
95
  embedding: number[];
44
96
  };
45
97
  type WebSocketOptions = {
98
+ container?: string;
99
+ encoding?: string;
46
100
  sampleRate: number;
47
101
  };
48
102
  type SourceEventData = {
@@ -51,5 +105,7 @@ type SourceEventData = {
51
105
  wait: never;
52
106
  read: never;
53
107
  };
108
+ type TypedArray = Float32Array | Int16Array | Uint8Array;
109
+ type Encoding = "pcm_f32le" | "pcm_s16le" | "pcm_alaw" | "pcm_mulaw";
54
110
 
55
- export type { Chunk, ClientOptions, CloneOptions, CloneResponse, ConnectionEventData, CreateVoice, EmitteryCallbacks, Sentinel, SourceEventData, StreamRequest, Voice, WebSocketOptions };
111
+ export type { Chunk, ClientOptions, CloneOptions, CloneResponse, ConnectionEventData, CreateVoice, EmitteryCallbacks, Emotion, EmotionControl, Encoding, Intensity, Sentinel, SourceEventData, StreamOptions, StreamRequest, TypedArray, Voice, VoiceOptions, VoiceSpecifier, WebSocketBaseResponse, WebSocketChunkResponse, WebSocketErrorResponse, WebSocketOptions, WebSocketResponse, WebSocketTimestampsResponse, WordTimestamps };
@@ -1,9 +1,9 @@
1
1
  import {
2
2
  Voices
3
- } from "../chunk-SGXUEFII.js";
4
- import "../chunk-PQ5EVEEH.js";
3
+ } from "../chunk-KUSVZXDT.js";
4
+ import "../chunk-5M33ZF3Y.js";
5
5
  import "../chunk-2BFEKY3F.js";
6
- import "../chunk-WIFMLPT5.js";
6
+ import "../chunk-GHY2WEOK.js";
7
7
  export {
8
8
  Voices as default
9
9
  };
package/package.json CHANGED
@@ -4,7 +4,7 @@
4
4
  "name": "Cartesia",
5
5
  "url": "https://cartesia.ai"
6
6
  },
7
- "version": "1.0.0",
7
+ "version": "1.0.1",
8
8
  "description": "Client for the Cartesia API.",
9
9
  "type": "module",
10
10
  "module": "./dist/index.js",
package/src/index.ts CHANGED
@@ -2,3 +2,5 @@ export { Cartesia as default } from "./lib";
2
2
  export * from "./lib";
3
3
  export * from "./types";
4
4
  export { default as WebPlayer } from "./tts/player";
5
+ export { default as Source } from "./tts/source";
6
+ export { default as WebSocket } from "./tts/websocket";
@@ -4,12 +4,14 @@ import { Cartesia } from "../lib";
4
4
  import Player from "../tts/player";
5
5
  import type Source from "../tts/source";
6
6
  import type WebSocket from "../tts/websocket";
7
+ import type { StreamRequest } from "../types";
7
8
  import { pingServer } from "./utils";
8
9
 
9
10
  export type UseTTSOptions = {
10
11
  apiKey: string | null;
11
12
  baseUrl?: string;
12
13
  sampleRate: number;
14
+ onError?: (error: Error) => void;
13
15
  };
14
16
 
15
17
  export type PlaybackStatus = "inactive" | "playing" | "paused" | "finished";
@@ -20,7 +22,7 @@ export type Metrics = {
20
22
  };
21
23
 
22
24
  export interface UseTTSReturn {
23
- buffer: (options: object) => Promise<void>;
25
+ buffer: (options: StreamRequest) => Promise<void>;
24
26
  play: (bufferDuration?: number) => Promise<void>;
25
27
  pause: () => Promise<void>;
26
28
  resume: () => Promise<void>;
@@ -47,6 +49,7 @@ export function useTTS({
47
49
  apiKey,
48
50
  baseUrl,
49
51
  sampleRate,
52
+ onError,
50
53
  }: UseTTSOptions): UseTTSReturn {
51
54
  if (typeof window === "undefined") {
52
55
  return {
@@ -72,7 +75,11 @@ export function useTTS({
72
75
  }
73
76
  const cartesia = new Cartesia({ apiKey, baseUrl });
74
77
  baseUrl = baseUrl ?? cartesia.baseUrl;
75
- return cartesia.tts.websocket({ sampleRate });
78
+ return cartesia.tts.websocket({
79
+ container: "raw",
80
+ encoding: "pcm_f32le",
81
+ sampleRate,
82
+ });
76
83
  }, [apiKey, baseUrl, sampleRate]);
77
84
  const websocketReturn = useRef<ReturnType<WebSocket["send"]> | null>(null);
78
85
  const player = useRef<Player | null>(null);
@@ -85,23 +92,35 @@ export function useTTS({
85
92
  const [messages, setMessages] = useState<Message[]>([]);
86
93
 
87
94
  const buffer = useCallback(
88
- async (options: object) => {
95
+ async (options: StreamRequest) => {
89
96
  websocketReturn.current?.stop(); // Abort the previous request if it exists.
90
97
 
91
- setMessages([]);
92
- setBufferStatus("buffering");
93
- websocketReturn.current = websocket?.send(options) ?? null;
94
- if (!websocketReturn.current) {
95
- return;
98
+ try {
99
+ setMessages([]);
100
+ setBufferStatus("buffering");
101
+ websocketReturn.current = websocket?.send(options) ?? null;
102
+ if (!websocketReturn.current) {
103
+ return;
104
+ }
105
+ const unsubscribe = websocketReturn.current.on("message", (message) => {
106
+ const parsedMessage = JSON.parse(message);
107
+ setMessages((messages) => [...messages, parsedMessage]);
108
+ if (parsedMessage.error) {
109
+ onError?.(new Error(parsedMessage.error));
110
+ }
111
+ });
112
+ await websocketReturn.current.source.once("close");
113
+ setBufferStatus("buffered");
114
+ unsubscribe();
115
+ } catch (error) {
116
+ if (error instanceof Error) {
117
+ onError?.(error);
118
+ } else {
119
+ console.error(error);
120
+ }
96
121
  }
97
- const unsubscribe = websocketReturn.current.on("message", (message) => {
98
- setMessages((messages) => [...messages, JSON.parse(message)]);
99
- });
100
- await websocketReturn.current.source.once("close");
101
- setBufferStatus("buffered");
102
- unsubscribe();
103
122
  },
104
- [websocket],
123
+ [websocket, onError],
105
124
  );
106
125
 
107
126
  const metrics = useMemo(() => {
@@ -173,64 +192,96 @@ export function useTTS({
173
192
  }, [websocket, baseUrl]);
174
193
 
175
194
  const play = useCallback(async () => {
176
- if (playbackStatus === "playing" || !websocketReturn.current) {
177
- return;
178
- }
179
- if (player.current) {
180
- // Stop the current player if it exists.
181
- await player.current.stop();
182
- }
195
+ try {
196
+ if (playbackStatus === "playing" || !websocketReturn.current) {
197
+ return;
198
+ }
199
+ if (player.current) {
200
+ // Stop the current player if it exists.
201
+ await player.current.stop();
202
+ }
183
203
 
184
- setPlaybackStatus("playing");
185
-
186
- const unsubscribes = [];
187
- unsubscribes.push(
188
- websocketReturn.current.source.on("wait", () => {
189
- setIsWaiting(true);
190
- }),
191
- );
192
- unsubscribes.push(
193
- websocketReturn.current.source.on("read", () => {
194
- setIsWaiting(false);
195
- }),
196
- );
197
-
198
- player.current = new Player({
199
- bufferDuration: bufferDuration ?? DEFAULT_BUFFER_DURATION,
200
- });
201
- // Wait for the playback to finish before setting isPlaying to false.
202
- await player.current.play(websocketReturn.current.source);
204
+ setPlaybackStatus("playing");
203
205
 
204
- for (const unsubscribe of unsubscribes) {
205
- // Deregister the event listeners (.on()) that we registered above to avoid memory leaks.
206
- unsubscribe();
207
- }
206
+ const unsubscribes = [];
207
+ unsubscribes.push(
208
+ websocketReturn.current.source.on("wait", () => {
209
+ setIsWaiting(true);
210
+ }),
211
+ );
212
+ unsubscribes.push(
213
+ websocketReturn.current.source.on("read", () => {
214
+ setIsWaiting(false);
215
+ }),
216
+ );
208
217
 
209
- setPlaybackStatus("finished");
210
- }, [playbackStatus, bufferDuration]);
218
+ player.current = new Player({
219
+ bufferDuration: bufferDuration ?? DEFAULT_BUFFER_DURATION,
220
+ });
221
+ // Wait for the playback to finish before setting isPlaying to false.
222
+ await player.current.play(websocketReturn.current.source);
223
+
224
+ for (const unsubscribe of unsubscribes) {
225
+ // Deregister the event listeners (.on()) that we registered above to avoid memory leaks.
226
+ unsubscribe();
227
+ }
228
+
229
+ setPlaybackStatus("finished");
230
+ } catch (error) {
231
+ if (error instanceof Error) {
232
+ onError?.(error);
233
+ } else {
234
+ console.error(error);
235
+ }
236
+ }
237
+ }, [playbackStatus, bufferDuration, onError]);
211
238
 
212
239
  const pause = useCallback(async () => {
213
- await player.current?.pause();
214
- setPlaybackStatus("paused");
215
- }, []);
240
+ try {
241
+ await player.current?.pause();
242
+ setPlaybackStatus("paused");
243
+ } catch (error) {
244
+ if (error instanceof Error) {
245
+ onError?.(error);
246
+ } else {
247
+ console.error(error);
248
+ }
249
+ }
250
+ }, [onError]);
216
251
 
217
252
  const resume = useCallback(async () => {
218
- await player.current?.resume();
219
- setPlaybackStatus("playing");
220
- }, []);
253
+ try {
254
+ await player.current?.resume();
255
+ setPlaybackStatus("playing");
256
+ } catch (error) {
257
+ if (error instanceof Error) {
258
+ onError?.(error);
259
+ } else {
260
+ console.error(error);
261
+ }
262
+ }
263
+ }, [onError]);
221
264
 
222
265
  const toggle = useCallback(async () => {
223
- await player.current?.toggle();
224
- setPlaybackStatus((status) => {
225
- if (status === "playing") {
226
- return "paused";
227
- }
228
- if (status === "paused") {
229
- return "playing";
266
+ try {
267
+ await player.current?.toggle();
268
+ setPlaybackStatus((status) => {
269
+ if (status === "playing") {
270
+ return "paused";
271
+ }
272
+ if (status === "paused") {
273
+ return "playing";
274
+ }
275
+ return status;
276
+ });
277
+ } catch (error) {
278
+ if (error instanceof Error) {
279
+ onError?.(error);
280
+ } else {
281
+ console.error(error);
230
282
  }
231
- return status;
232
- });
233
- }, []);
283
+ }
284
+ }, [onError]);
234
285
 
235
286
  return {
236
287
  buffer,
package/src/tts/source.ts CHANGED
@@ -1,13 +1,30 @@
1
1
  import Emittery from "emittery";
2
- import type { SourceEventData } from "../types";
2
+ import type { Encoding, SourceEventData, TypedArray } from "../types";
3
+
4
+ type EncodingInfo = {
5
+ arrayType:
6
+ | Float32ArrayConstructor
7
+ | Int16ArrayConstructor
8
+ | Uint8ArrayConstructor;
9
+ bytesPerElement: number;
10
+ };
11
+
12
+ export const ENCODING_MAP: Record<Encoding, EncodingInfo> = {
13
+ pcm_f32le: { arrayType: Float32Array, bytesPerElement: 4 },
14
+ pcm_s16le: { arrayType: Int16Array, bytesPerElement: 2 },
15
+ pcm_alaw: { arrayType: Uint8Array, bytesPerElement: 1 },
16
+ pcm_mulaw: { arrayType: Uint8Array, bytesPerElement: 1 },
17
+ };
3
18
 
4
19
  export default class Source {
5
20
  #emitter = new Emittery<SourceEventData>();
6
- #buffer: Float32Array;
21
+ #buffer: TypedArray;
7
22
  #readIndex = 0;
8
23
  #writeIndex = 0;
9
24
  #closed = false;
10
25
  #sampleRate: number;
26
+ #encoding: Encoding;
27
+ #container: string;
11
28
 
12
29
  on = this.#emitter.on.bind(this.#emitter);
13
30
  once = this.#emitter.once.bind(this.#emitter);
@@ -20,21 +37,46 @@ export default class Source {
20
37
  * @param options - Options for the Source.
21
38
  * @param options.sampleRate - The sample rate of the audio.
22
39
  */
23
- constructor({ sampleRate }: { sampleRate: number }) {
40
+ constructor({
41
+ sampleRate,
42
+ encoding,
43
+ container,
44
+ }: { sampleRate: number; encoding: string; container: string }) {
24
45
  this.#sampleRate = sampleRate;
25
- this.#buffer = new Float32Array(1024); // Initial size, can be adjusted
46
+ this.#encoding = encoding as Encoding;
47
+ this.#container = container;
48
+ this.#buffer = this.#createBuffer(1024); // Initial size, can be adjusted
26
49
  }
27
50
 
28
51
  get sampleRate() {
29
52
  return this.#sampleRate;
30
53
  }
31
54
 
55
+ get encoding() {
56
+ return this.#encoding;
57
+ }
58
+
59
+ get container() {
60
+ return this.#container;
61
+ }
62
+
63
+ /**
64
+ * Create a new buffer for the source.
65
+ *
66
+ * @param size - The size of the buffer to create.
67
+ * @returns The new buffer as a TypedArray based on the encoding.
68
+ */
69
+ #createBuffer(size: number): TypedArray {
70
+ const { arrayType: ArrayType } = ENCODING_MAP[this.#encoding];
71
+ return new ArrayType(size);
72
+ }
73
+
32
74
  /**
33
75
  * Append audio to the buffer.
34
76
  *
35
77
  * @param src The audio to append.
36
78
  */
37
- async enqueue(src: Float32Array) {
79
+ async enqueue(src: TypedArray) {
38
80
  const requiredCapacity = this.#writeIndex + src.length;
39
81
 
40
82
  // Resize buffer if necessary
@@ -44,7 +86,7 @@ export default class Source {
44
86
  newCapacity *= 2; // Double the buffer size
45
87
  }
46
88
 
47
- const newBuffer = new Float32Array(newCapacity);
89
+ const newBuffer = this.#createBuffer(newCapacity);
48
90
  newBuffer.set(this.#buffer);
49
91
  this.#buffer = newBuffer;
50
92
  }
@@ -62,7 +104,7 @@ export default class Source {
62
104
  * @returns The number of samples read. If the source is closed, this will be
63
105
  * less than the length of the provided buffer.
64
106
  */
65
- async read(dst: Float32Array): Promise<number> {
107
+ async read(dst: TypedArray): Promise<number> {
66
108
  // Read the buffer into the provided buffer.
67
109
  const targetReadIndex = this.#readIndex + dst.length;
68
110
 
@@ -100,6 +142,10 @@ export default class Source {
100
142
  return this.#readIndex;
101
143
  }
102
144
 
145
+ get writeIndex() {
146
+ return this.#writeIndex;
147
+ }
148
+
103
149
  /**
104
150
  * Close the source. This signals that no more audio will be enqueued.
105
151
  *
package/src/tts/utils.ts CHANGED
@@ -1,25 +1,38 @@
1
1
  import base64 from "base64-js";
2
2
  import type Emittery from "emittery";
3
- import type { Chunk, EmitteryCallbacks, Sentinel } from "../types";
3
+ import type {
4
+ Chunk,
5
+ EmitteryCallbacks,
6
+ Encoding,
7
+ Sentinel,
8
+ TypedArray,
9
+ WebSocketResponse,
10
+ } from "../types";
11
+ import { ENCODING_MAP } from "./source";
4
12
 
5
13
  /**
6
- * Convert base64-encoded audio buffer(s) to a Float32Array.
14
+ * Convert base64-encoded audio buffer(s) to a TypedArray.
7
15
  *
8
16
  * @param b64 The base64-encoded audio buffer, or an array of base64-encoded
9
17
  * audio buffers.
10
- * @returns The audio buffer(s) as a Float32Array.
18
+ * @param encoding The encoding of the audio buffer(s).
19
+ * @returns The audio buffer(s) as a TypedArray.
11
20
  */
12
- export function base64ToArray(b64: Chunk[]): Float32Array {
21
+ export function base64ToArray(b64: Chunk[], encoding: string): TypedArray {
13
22
  const byteArrays = filterSentinel(b64).map((b) => base64.toByteArray(b));
23
+
24
+ const { arrayType: ArrayType, bytesPerElement } =
25
+ ENCODING_MAP[encoding as Encoding];
26
+
14
27
  const totalLength = byteArrays.reduce(
15
- (acc, arr) => acc + arr.length / Float32Array.BYTES_PER_ELEMENT,
28
+ (acc, arr) => acc + arr.length / bytesPerElement,
16
29
  0,
17
30
  );
18
- const result = new Float32Array(totalLength);
31
+ const result = new ArrayType(totalLength);
19
32
 
20
33
  let offset = 0;
21
34
  for (const arr of byteArrays) {
22
- const floats = new Float32Array(arr.buffer);
35
+ const floats = new ArrayType(arr.buffer);
23
36
  result.set(floats, offset);
24
37
  offset += floats.length;
25
38
  }
@@ -70,26 +83,27 @@ export function createMessageHandlerForContextId(
70
83
  chunk,
71
84
  message,
72
85
  }: {
73
- chunk: Chunk;
86
+ chunk?: Chunk;
74
87
  message: string;
88
+ data: WebSocketResponse;
75
89
  }) => void,
76
90
  ) {
77
91
  return (event: MessageEvent) => {
78
92
  if (typeof event.data !== "string") {
79
93
  return; // Ignore non-string messages.
80
94
  }
81
- const message = JSON.parse(event.data);
95
+ const message: WebSocketResponse = JSON.parse(event.data);
82
96
  if (message.context_id !== contextId) {
83
97
  return; // Ignore messages for other contexts.
84
98
  }
85
- let chunk: Chunk;
99
+ let chunk: Chunk | undefined;
86
100
  if (message.done) {
87
101
  // Convert the done message to a sentinel value.
88
102
  chunk = getSentinel();
89
- } else {
103
+ } else if (message.type === "chunk") {
90
104
  chunk = message.data;
91
105
  }
92
- handler({ chunk, message: event.data });
106
+ handler({ chunk, message: event.data, data: message });
93
107
  };
94
108
  }
95
109