@cartesia/cartesia-js 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/.turbo/turbo-build.log +47 -47
  2. package/CHANGELOG.md +6 -0
  3. package/LICENSE.md +21 -0
  4. package/README.md +92 -19
  5. package/dist/{chunk-F4QWVJY3.js → chunk-2NA5SEML.js} +2 -2
  6. package/dist/{chunk-PQ5EVEEH.js → chunk-5M33ZF3Y.js} +1 -1
  7. package/dist/{chunk-PQ6CIPFW.js → chunk-6YQ6KDIQ.js} +44 -5
  8. package/dist/{chunk-FN7BK4PS.js → chunk-ASZKHN7Q.js} +44 -26
  9. package/dist/{chunk-RO7TY474.js → chunk-BHY7MNGT.js} +11 -6
  10. package/dist/{chunk-WIFMLPT5.js → chunk-GHY2WEOK.js} +13 -0
  11. package/dist/{chunk-SGXUEFII.js → chunk-KUSVZXDT.js} +2 -2
  12. package/dist/{chunk-JYLAM6VU.js → chunk-LZO6K34D.js} +2 -2
  13. package/dist/{chunk-3FL2SNIR.js → chunk-NQVZNVOU.js} +1 -1
  14. package/dist/{chunk-IEN4NCER.js → chunk-OFH3ML4L.js} +3 -3
  15. package/dist/index.cjs +102 -31
  16. package/dist/index.d.cts +4 -4
  17. package/dist/index.d.ts +4 -4
  18. package/dist/index.js +15 -9
  19. package/dist/lib/client.js +2 -2
  20. package/dist/lib/constants.js +1 -1
  21. package/dist/lib/index.cjs +97 -30
  22. package/dist/lib/index.js +8 -8
  23. package/dist/react/index.cjs +202 -86
  24. package/dist/react/index.d.cts +4 -3
  25. package/dist/react/index.d.ts +4 -3
  26. package/dist/react/index.js +115 -66
  27. package/dist/react/utils.js +2 -2
  28. package/dist/tts/index.cjs +97 -30
  29. package/dist/tts/index.js +6 -6
  30. package/dist/tts/player.cjs +5 -0
  31. package/dist/tts/player.js +4 -3
  32. package/dist/tts/source.cjs +50 -4
  33. package/dist/tts/source.d.cts +16 -6
  34. package/dist/tts/source.d.ts +16 -6
  35. package/dist/tts/source.js +4 -2
  36. package/dist/tts/utils.cjs +18 -6
  37. package/dist/tts/utils.d.cts +7 -5
  38. package/dist/tts/utils.d.ts +7 -5
  39. package/dist/tts/utils.js +3 -2
  40. package/dist/tts/websocket.cjs +97 -30
  41. package/dist/tts/websocket.d.cts +12 -8
  42. package/dist/tts/websocket.d.ts +12 -8
  43. package/dist/tts/websocket.js +5 -5
  44. package/dist/types/index.d.cts +60 -4
  45. package/dist/types/index.d.ts +60 -4
  46. package/dist/voices/index.js +3 -3
  47. package/package.json +1 -1
  48. package/src/index.ts +2 -0
  49. package/src/react/index.ts +114 -63
  50. package/src/tts/source.ts +53 -7
  51. package/src/tts/utils.ts +26 -12
  52. package/src/tts/websocket.ts +33 -16
  53. package/src/types/index.ts +81 -3
@@ -6,8 +6,10 @@ import { CARTESIA_VERSION, constructApiUrl } from "../lib/constants";
6
6
  import type {
7
7
  ConnectionEventData,
8
8
  EmitteryCallbacks,
9
+ StreamOptions,
9
10
  StreamRequest,
10
11
  WebSocketOptions,
12
+ WordTimestamps,
11
13
  } from "../types";
12
14
  import Source from "./source";
13
15
  import {
@@ -21,6 +23,8 @@ export default class WebSocket extends Client {
21
23
  socket?: PartySocketWebSocket;
22
24
  #isConnected = false;
23
25
  #sampleRate: number;
26
+ #container: string;
27
+ #encoding: string;
24
28
 
25
29
  /**
26
30
  * Create a new WebSocket client.
@@ -28,18 +32,20 @@ export default class WebSocket extends Client {
28
32
  * @param args - Arguments to pass to the Client constructor.
29
33
  */
30
34
  constructor(
31
- { sampleRate }: WebSocketOptions,
35
+ { sampleRate, container, encoding }: WebSocketOptions,
32
36
  ...args: ConstructorParameters<typeof Client>
33
37
  ) {
34
38
  super(...args);
35
39
 
36
40
  this.#sampleRate = sampleRate;
41
+ this.#container = container ?? "raw"; // Default to raw audio for backwards compatibility.
42
+ this.#encoding = encoding ?? "pcm_f32le"; // Default to 32-bit floating point PCM for backwards compatibility.
37
43
  }
38
44
 
39
45
  /**
40
46
  * Send a message over the WebSocket to start a stream.
41
47
  *
42
- * @param inputs - Stream options.
48
+ * @param inputs - Stream options. Defined in the StreamRequest type.
43
49
  * @param options - Options for the stream.
44
50
  * @param options.timeout - The maximum time to wait for a chunk before cancelling the stream.
45
51
  * If set to `0`, the stream will not time out.
@@ -47,33 +53,37 @@ export default class WebSocket extends Client {
47
53
  * @returns An Emittery instance that emits messages from the WebSocket.
48
54
  * @returns An abort function that can be called to cancel the stream.
49
55
  */
50
- send(
51
- inputs: StreamRequest["inputs"],
52
- { timeout = 0 }: StreamRequest["options"] = {},
53
- ) {
56
+ send({ ...inputs }: StreamRequest, { timeout = 0 }: StreamOptions = {}) {
54
57
  if (!this.#isConnected) {
55
58
  throw new Error("Not connected to WebSocket. Call .connect() first.");
56
59
  }
57
60
 
61
+ if (!inputs.context_id) {
62
+ inputs.context_id = this.#generateId();
63
+ }
64
+ if (!inputs.output_format) {
65
+ inputs.output_format = {
66
+ container: this.#container,
67
+ encoding: this.#encoding,
68
+ sample_rate: this.#sampleRate,
69
+ };
70
+ }
71
+
58
72
  // Send audio request.
59
- const contextId = this.#generateId();
60
73
  this.socket?.send(
61
74
  JSON.stringify({
62
- context_id: contextId,
63
75
  ...inputs,
64
- output_format: {
65
- container: "raw",
66
- encoding: "pcm_f32le",
67
- sample_rate: this.#sampleRate,
68
- },
69
76
  }),
70
77
  );
71
78
 
72
79
  const emitter = new Emittery<{
73
80
  message: string;
81
+ timestamps: WordTimestamps;
74
82
  }>();
75
83
  const source = new Source({
76
84
  sampleRate: this.#sampleRate,
85
+ encoding: this.#encoding,
86
+ container: this.#container,
77
87
  });
78
88
  // Used to signal that the stream is complete, either because the
79
89
  // WebSocket has closed, or because the stream has finished.
@@ -84,19 +94,26 @@ export default class WebSocket extends Client {
84
94
  timeoutId = setTimeout(streamCompleteController.abort, timeout);
85
95
  }
86
96
  const handleMessage = createMessageHandlerForContextId(
87
- contextId,
88
- async ({ chunk, message }) => {
97
+ inputs.context_id,
98
+ async ({ chunk, message, data }) => {
89
99
  emitter.emit("message", message);
100
+ if (data.type === "timestamps") {
101
+ emitter.emit("timestamps", data.word_timestamps);
102
+ return;
103
+ }
90
104
  if (isSentinel(chunk)) {
91
105
  await source.close();
92
106
  streamCompleteController.abort();
93
107
  return;
94
108
  }
95
- await source.enqueue(base64ToArray([chunk]));
96
109
  if (timeoutId) {
97
110
  clearTimeout(timeoutId);
98
111
  timeoutId = setTimeout(streamCompleteController.abort, timeout);
99
112
  }
113
+ if (!chunk) {
114
+ return;
115
+ }
116
+ await source.enqueue(base64ToArray([chunk], this.#encoding));
100
117
  },
101
118
  );
102
119
  this.socket?.addEventListener("message", handleMessage, {
@@ -14,13 +14,85 @@ export type ConnectionEventData = {
14
14
  close: never;
15
15
  };
16
16
 
17
+ export type VoiceSpecifier =
18
+ | {
19
+ mode: "id";
20
+ id: string;
21
+ }
22
+ | {
23
+ mode: "embedding";
24
+ embedding: number[];
25
+ };
26
+
27
+ export type Emotion =
28
+ | "anger"
29
+ | "sadness"
30
+ | "positivity"
31
+ | "curiosity"
32
+ | "surprise";
33
+ export type Intensity = "lowest" | "low" | "high" | "highest";
34
+ export type EmotionControl = Emotion | `${Emotion}:${Intensity}`;
35
+
36
+ export type VoiceOptions = VoiceSpecifier & {
37
+ __experimental_controls?: {
38
+ speed?: "slowest" | "slow" | "normal" | "fast" | "fastest";
39
+ emotion?: EmotionControl[];
40
+ };
41
+ };
42
+
17
43
  export type StreamRequest = {
18
- inputs: object;
19
- options: {
20
- timeout?: number;
44
+ model_id: string;
45
+ transcript: string;
46
+ voice: VoiceOptions;
47
+ output_format?: {
48
+ container: string;
49
+ encoding: string;
50
+ sample_rate: number;
21
51
  };
52
+ context_id?: string;
53
+ continue?: boolean;
54
+ duration?: number;
55
+ language?: string;
56
+ add_timestamps?: boolean;
57
+ };
58
+
59
+ export type StreamOptions = {
60
+ timeout?: number;
61
+ };
62
+
63
+ export type WebSocketBaseResponse = {
64
+ context_id: string;
65
+ status_code: number;
66
+ done: boolean;
67
+ };
68
+
69
+ export type WordTimestamps = {
70
+ words: string[];
71
+ start: number[];
72
+ end: number[];
73
+ };
74
+
75
+ export type WebSocketTimestampsResponse = WebSocketBaseResponse & {
76
+ type: "timestamps";
77
+ word_timestamps: WordTimestamps;
78
+ };
79
+
80
+ export type WebSocketChunkResponse = WebSocketBaseResponse & {
81
+ type: "chunk";
82
+ data: string;
83
+ step_time: number;
84
+ };
85
+
86
+ export type WebSocketErrorResponse = WebSocketBaseResponse & {
87
+ type: "error";
88
+ error: string;
22
89
  };
23
90
 
91
+ export type WebSocketResponse =
92
+ | WebSocketTimestampsResponse
93
+ | WebSocketChunkResponse
94
+ | WebSocketErrorResponse;
95
+
24
96
  export type EmitteryCallbacks<T> = {
25
97
  on: Emittery<T>["on"];
26
98
  off: Emittery<T>["off"];
@@ -56,6 +128,8 @@ export type CloneResponse = {
56
128
  };
57
129
 
58
130
  export type WebSocketOptions = {
131
+ container?: string;
132
+ encoding?: string;
59
133
  sampleRate: number;
60
134
  };
61
135
 
@@ -65,3 +139,7 @@ export type SourceEventData = {
65
139
  wait: never;
66
140
  read: never;
67
141
  };
142
+
143
+ export type TypedArray = Float32Array | Int16Array | Uint8Array;
144
+
145
+ export type Encoding = "pcm_f32le" | "pcm_s16le" | "pcm_alaw" | "pcm_mulaw";