@cartesia/cartesia-js 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +47 -47
- package/CHANGELOG.md +6 -0
- package/LICENSE.md +21 -0
- package/README.md +92 -19
- package/dist/{chunk-F4QWVJY3.js → chunk-2NA5SEML.js} +2 -2
- package/dist/{chunk-PQ5EVEEH.js → chunk-5M33ZF3Y.js} +1 -1
- package/dist/{chunk-PQ6CIPFW.js → chunk-6YQ6KDIQ.js} +44 -5
- package/dist/{chunk-FN7BK4PS.js → chunk-ASZKHN7Q.js} +44 -26
- package/dist/{chunk-RO7TY474.js → chunk-BHY7MNGT.js} +11 -6
- package/dist/{chunk-WIFMLPT5.js → chunk-GHY2WEOK.js} +13 -0
- package/dist/{chunk-SGXUEFII.js → chunk-KUSVZXDT.js} +2 -2
- package/dist/{chunk-JYLAM6VU.js → chunk-LZO6K34D.js} +2 -2
- package/dist/{chunk-3FL2SNIR.js → chunk-NQVZNVOU.js} +1 -1
- package/dist/{chunk-IEN4NCER.js → chunk-OFH3ML4L.js} +3 -3
- package/dist/index.cjs +102 -31
- package/dist/index.d.cts +4 -4
- package/dist/index.d.ts +4 -4
- package/dist/index.js +15 -9
- package/dist/lib/client.js +2 -2
- package/dist/lib/constants.js +1 -1
- package/dist/lib/index.cjs +97 -30
- package/dist/lib/index.js +8 -8
- package/dist/react/index.cjs +202 -86
- package/dist/react/index.d.cts +4 -3
- package/dist/react/index.d.ts +4 -3
- package/dist/react/index.js +115 -66
- package/dist/react/utils.js +2 -2
- package/dist/tts/index.cjs +97 -30
- package/dist/tts/index.js +6 -6
- package/dist/tts/player.cjs +5 -0
- package/dist/tts/player.js +4 -3
- package/dist/tts/source.cjs +50 -4
- package/dist/tts/source.d.cts +16 -6
- package/dist/tts/source.d.ts +16 -6
- package/dist/tts/source.js +4 -2
- package/dist/tts/utils.cjs +18 -6
- package/dist/tts/utils.d.cts +7 -5
- package/dist/tts/utils.d.ts +7 -5
- package/dist/tts/utils.js +3 -2
- package/dist/tts/websocket.cjs +97 -30
- package/dist/tts/websocket.d.cts +12 -8
- package/dist/tts/websocket.d.ts +12 -8
- package/dist/tts/websocket.js +5 -5
- package/dist/types/index.d.cts +60 -4
- package/dist/types/index.d.ts +60 -4
- package/dist/voices/index.js +3 -3
- package/package.json +1 -1
- package/src/index.ts +2 -0
- package/src/react/index.ts +114 -63
- package/src/tts/source.ts +53 -7
- package/src/tts/utils.ts +26 -12
- package/src/tts/websocket.ts +33 -16
- package/src/types/index.ts +81 -3
package/dist/types/index.d.cts
CHANGED
|
@@ -10,12 +10,64 @@ type ConnectionEventData = {
|
|
|
10
10
|
open: never;
|
|
11
11
|
close: never;
|
|
12
12
|
};
|
|
13
|
+
type VoiceSpecifier = {
|
|
14
|
+
mode: "id";
|
|
15
|
+
id: string;
|
|
16
|
+
} | {
|
|
17
|
+
mode: "embedding";
|
|
18
|
+
embedding: number[];
|
|
19
|
+
};
|
|
20
|
+
type Emotion = "anger" | "sadness" | "positivity" | "curiosity" | "surprise";
|
|
21
|
+
type Intensity = "lowest" | "low" | "high" | "highest";
|
|
22
|
+
type EmotionControl = Emotion | `${Emotion}:${Intensity}`;
|
|
23
|
+
type VoiceOptions = VoiceSpecifier & {
|
|
24
|
+
__experimental_controls?: {
|
|
25
|
+
speed?: "slowest" | "slow" | "normal" | "fast" | "fastest";
|
|
26
|
+
emotion?: EmotionControl[];
|
|
27
|
+
};
|
|
28
|
+
};
|
|
13
29
|
type StreamRequest = {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
30
|
+
model_id: string;
|
|
31
|
+
transcript: string;
|
|
32
|
+
voice: VoiceOptions;
|
|
33
|
+
output_format?: {
|
|
34
|
+
container: string;
|
|
35
|
+
encoding: string;
|
|
36
|
+
sample_rate: number;
|
|
17
37
|
};
|
|
38
|
+
context_id?: string;
|
|
39
|
+
continue?: boolean;
|
|
40
|
+
duration?: number;
|
|
41
|
+
language?: string;
|
|
42
|
+
add_timestamps?: boolean;
|
|
43
|
+
};
|
|
44
|
+
type StreamOptions = {
|
|
45
|
+
timeout?: number;
|
|
46
|
+
};
|
|
47
|
+
type WebSocketBaseResponse = {
|
|
48
|
+
context_id: string;
|
|
49
|
+
status_code: number;
|
|
50
|
+
done: boolean;
|
|
51
|
+
};
|
|
52
|
+
type WordTimestamps = {
|
|
53
|
+
words: string[];
|
|
54
|
+
start: number[];
|
|
55
|
+
end: number[];
|
|
56
|
+
};
|
|
57
|
+
type WebSocketTimestampsResponse = WebSocketBaseResponse & {
|
|
58
|
+
type: "timestamps";
|
|
59
|
+
word_timestamps: WordTimestamps;
|
|
60
|
+
};
|
|
61
|
+
type WebSocketChunkResponse = WebSocketBaseResponse & {
|
|
62
|
+
type: "chunk";
|
|
63
|
+
data: string;
|
|
64
|
+
step_time: number;
|
|
65
|
+
};
|
|
66
|
+
type WebSocketErrorResponse = WebSocketBaseResponse & {
|
|
67
|
+
type: "error";
|
|
68
|
+
error: string;
|
|
18
69
|
};
|
|
70
|
+
type WebSocketResponse = WebSocketTimestampsResponse | WebSocketChunkResponse | WebSocketErrorResponse;
|
|
19
71
|
type EmitteryCallbacks<T> = {
|
|
20
72
|
on: emittery__default<T>["on"];
|
|
21
73
|
off: emittery__default<T>["off"];
|
|
@@ -43,6 +95,8 @@ type CloneResponse = {
|
|
|
43
95
|
embedding: number[];
|
|
44
96
|
};
|
|
45
97
|
type WebSocketOptions = {
|
|
98
|
+
container?: string;
|
|
99
|
+
encoding?: string;
|
|
46
100
|
sampleRate: number;
|
|
47
101
|
};
|
|
48
102
|
type SourceEventData = {
|
|
@@ -51,5 +105,7 @@ type SourceEventData = {
|
|
|
51
105
|
wait: never;
|
|
52
106
|
read: never;
|
|
53
107
|
};
|
|
108
|
+
type TypedArray = Float32Array | Int16Array | Uint8Array;
|
|
109
|
+
type Encoding = "pcm_f32le" | "pcm_s16le" | "pcm_alaw" | "pcm_mulaw";
|
|
54
110
|
|
|
55
|
-
export type { Chunk, ClientOptions, CloneOptions, CloneResponse, ConnectionEventData, CreateVoice, EmitteryCallbacks, Sentinel, SourceEventData, StreamRequest, Voice, WebSocketOptions };
|
|
111
|
+
export type { Chunk, ClientOptions, CloneOptions, CloneResponse, ConnectionEventData, CreateVoice, EmitteryCallbacks, Emotion, EmotionControl, Encoding, Intensity, Sentinel, SourceEventData, StreamOptions, StreamRequest, TypedArray, Voice, VoiceOptions, VoiceSpecifier, WebSocketBaseResponse, WebSocketChunkResponse, WebSocketErrorResponse, WebSocketOptions, WebSocketResponse, WebSocketTimestampsResponse, WordTimestamps };
|
package/dist/types/index.d.ts
CHANGED
|
@@ -10,12 +10,64 @@ type ConnectionEventData = {
|
|
|
10
10
|
open: never;
|
|
11
11
|
close: never;
|
|
12
12
|
};
|
|
13
|
+
type VoiceSpecifier = {
|
|
14
|
+
mode: "id";
|
|
15
|
+
id: string;
|
|
16
|
+
} | {
|
|
17
|
+
mode: "embedding";
|
|
18
|
+
embedding: number[];
|
|
19
|
+
};
|
|
20
|
+
type Emotion = "anger" | "sadness" | "positivity" | "curiosity" | "surprise";
|
|
21
|
+
type Intensity = "lowest" | "low" | "high" | "highest";
|
|
22
|
+
type EmotionControl = Emotion | `${Emotion}:${Intensity}`;
|
|
23
|
+
type VoiceOptions = VoiceSpecifier & {
|
|
24
|
+
__experimental_controls?: {
|
|
25
|
+
speed?: "slowest" | "slow" | "normal" | "fast" | "fastest";
|
|
26
|
+
emotion?: EmotionControl[];
|
|
27
|
+
};
|
|
28
|
+
};
|
|
13
29
|
type StreamRequest = {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
30
|
+
model_id: string;
|
|
31
|
+
transcript: string;
|
|
32
|
+
voice: VoiceOptions;
|
|
33
|
+
output_format?: {
|
|
34
|
+
container: string;
|
|
35
|
+
encoding: string;
|
|
36
|
+
sample_rate: number;
|
|
17
37
|
};
|
|
38
|
+
context_id?: string;
|
|
39
|
+
continue?: boolean;
|
|
40
|
+
duration?: number;
|
|
41
|
+
language?: string;
|
|
42
|
+
add_timestamps?: boolean;
|
|
43
|
+
};
|
|
44
|
+
type StreamOptions = {
|
|
45
|
+
timeout?: number;
|
|
46
|
+
};
|
|
47
|
+
type WebSocketBaseResponse = {
|
|
48
|
+
context_id: string;
|
|
49
|
+
status_code: number;
|
|
50
|
+
done: boolean;
|
|
51
|
+
};
|
|
52
|
+
type WordTimestamps = {
|
|
53
|
+
words: string[];
|
|
54
|
+
start: number[];
|
|
55
|
+
end: number[];
|
|
56
|
+
};
|
|
57
|
+
type WebSocketTimestampsResponse = WebSocketBaseResponse & {
|
|
58
|
+
type: "timestamps";
|
|
59
|
+
word_timestamps: WordTimestamps;
|
|
60
|
+
};
|
|
61
|
+
type WebSocketChunkResponse = WebSocketBaseResponse & {
|
|
62
|
+
type: "chunk";
|
|
63
|
+
data: string;
|
|
64
|
+
step_time: number;
|
|
65
|
+
};
|
|
66
|
+
type WebSocketErrorResponse = WebSocketBaseResponse & {
|
|
67
|
+
type: "error";
|
|
68
|
+
error: string;
|
|
18
69
|
};
|
|
70
|
+
type WebSocketResponse = WebSocketTimestampsResponse | WebSocketChunkResponse | WebSocketErrorResponse;
|
|
19
71
|
type EmitteryCallbacks<T> = {
|
|
20
72
|
on: emittery__default<T>["on"];
|
|
21
73
|
off: emittery__default<T>["off"];
|
|
@@ -43,6 +95,8 @@ type CloneResponse = {
|
|
|
43
95
|
embedding: number[];
|
|
44
96
|
};
|
|
45
97
|
type WebSocketOptions = {
|
|
98
|
+
container?: string;
|
|
99
|
+
encoding?: string;
|
|
46
100
|
sampleRate: number;
|
|
47
101
|
};
|
|
48
102
|
type SourceEventData = {
|
|
@@ -51,5 +105,7 @@ type SourceEventData = {
|
|
|
51
105
|
wait: never;
|
|
52
106
|
read: never;
|
|
53
107
|
};
|
|
108
|
+
type TypedArray = Float32Array | Int16Array | Uint8Array;
|
|
109
|
+
type Encoding = "pcm_f32le" | "pcm_s16le" | "pcm_alaw" | "pcm_mulaw";
|
|
54
110
|
|
|
55
|
-
export type { Chunk, ClientOptions, CloneOptions, CloneResponse, ConnectionEventData, CreateVoice, EmitteryCallbacks, Sentinel, SourceEventData, StreamRequest, Voice, WebSocketOptions };
|
|
111
|
+
export type { Chunk, ClientOptions, CloneOptions, CloneResponse, ConnectionEventData, CreateVoice, EmitteryCallbacks, Emotion, EmotionControl, Encoding, Intensity, Sentinel, SourceEventData, StreamOptions, StreamRequest, TypedArray, Voice, VoiceOptions, VoiceSpecifier, WebSocketBaseResponse, WebSocketChunkResponse, WebSocketErrorResponse, WebSocketOptions, WebSocketResponse, WebSocketTimestampsResponse, WordTimestamps };
|
package/dist/voices/index.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import {
|
|
2
2
|
Voices
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
3
|
+
} from "../chunk-KUSVZXDT.js";
|
|
4
|
+
import "../chunk-5M33ZF3Y.js";
|
|
5
5
|
import "../chunk-2BFEKY3F.js";
|
|
6
|
-
import "../chunk-
|
|
6
|
+
import "../chunk-GHY2WEOK.js";
|
|
7
7
|
export {
|
|
8
8
|
Voices as default
|
|
9
9
|
};
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
package/src/react/index.ts
CHANGED
|
@@ -4,12 +4,14 @@ import { Cartesia } from "../lib";
|
|
|
4
4
|
import Player from "../tts/player";
|
|
5
5
|
import type Source from "../tts/source";
|
|
6
6
|
import type WebSocket from "../tts/websocket";
|
|
7
|
+
import type { StreamRequest } from "../types";
|
|
7
8
|
import { pingServer } from "./utils";
|
|
8
9
|
|
|
9
10
|
export type UseTTSOptions = {
|
|
10
11
|
apiKey: string | null;
|
|
11
12
|
baseUrl?: string;
|
|
12
13
|
sampleRate: number;
|
|
14
|
+
onError?: (error: Error) => void;
|
|
13
15
|
};
|
|
14
16
|
|
|
15
17
|
export type PlaybackStatus = "inactive" | "playing" | "paused" | "finished";
|
|
@@ -20,7 +22,7 @@ export type Metrics = {
|
|
|
20
22
|
};
|
|
21
23
|
|
|
22
24
|
export interface UseTTSReturn {
|
|
23
|
-
buffer: (options:
|
|
25
|
+
buffer: (options: StreamRequest) => Promise<void>;
|
|
24
26
|
play: (bufferDuration?: number) => Promise<void>;
|
|
25
27
|
pause: () => Promise<void>;
|
|
26
28
|
resume: () => Promise<void>;
|
|
@@ -47,6 +49,7 @@ export function useTTS({
|
|
|
47
49
|
apiKey,
|
|
48
50
|
baseUrl,
|
|
49
51
|
sampleRate,
|
|
52
|
+
onError,
|
|
50
53
|
}: UseTTSOptions): UseTTSReturn {
|
|
51
54
|
if (typeof window === "undefined") {
|
|
52
55
|
return {
|
|
@@ -72,7 +75,11 @@ export function useTTS({
|
|
|
72
75
|
}
|
|
73
76
|
const cartesia = new Cartesia({ apiKey, baseUrl });
|
|
74
77
|
baseUrl = baseUrl ?? cartesia.baseUrl;
|
|
75
|
-
return cartesia.tts.websocket({
|
|
78
|
+
return cartesia.tts.websocket({
|
|
79
|
+
container: "raw",
|
|
80
|
+
encoding: "pcm_f32le",
|
|
81
|
+
sampleRate,
|
|
82
|
+
});
|
|
76
83
|
}, [apiKey, baseUrl, sampleRate]);
|
|
77
84
|
const websocketReturn = useRef<ReturnType<WebSocket["send"]> | null>(null);
|
|
78
85
|
const player = useRef<Player | null>(null);
|
|
@@ -85,23 +92,35 @@ export function useTTS({
|
|
|
85
92
|
const [messages, setMessages] = useState<Message[]>([]);
|
|
86
93
|
|
|
87
94
|
const buffer = useCallback(
|
|
88
|
-
async (options:
|
|
95
|
+
async (options: StreamRequest) => {
|
|
89
96
|
websocketReturn.current?.stop(); // Abort the previous request if it exists.
|
|
90
97
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
98
|
+
try {
|
|
99
|
+
setMessages([]);
|
|
100
|
+
setBufferStatus("buffering");
|
|
101
|
+
websocketReturn.current = websocket?.send(options) ?? null;
|
|
102
|
+
if (!websocketReturn.current) {
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
const unsubscribe = websocketReturn.current.on("message", (message) => {
|
|
106
|
+
const parsedMessage = JSON.parse(message);
|
|
107
|
+
setMessages((messages) => [...messages, parsedMessage]);
|
|
108
|
+
if (parsedMessage.error) {
|
|
109
|
+
onError?.(new Error(parsedMessage.error));
|
|
110
|
+
}
|
|
111
|
+
});
|
|
112
|
+
await websocketReturn.current.source.once("close");
|
|
113
|
+
setBufferStatus("buffered");
|
|
114
|
+
unsubscribe();
|
|
115
|
+
} catch (error) {
|
|
116
|
+
if (error instanceof Error) {
|
|
117
|
+
onError?.(error);
|
|
118
|
+
} else {
|
|
119
|
+
console.error(error);
|
|
120
|
+
}
|
|
96
121
|
}
|
|
97
|
-
const unsubscribe = websocketReturn.current.on("message", (message) => {
|
|
98
|
-
setMessages((messages) => [...messages, JSON.parse(message)]);
|
|
99
|
-
});
|
|
100
|
-
await websocketReturn.current.source.once("close");
|
|
101
|
-
setBufferStatus("buffered");
|
|
102
|
-
unsubscribe();
|
|
103
122
|
},
|
|
104
|
-
[websocket],
|
|
123
|
+
[websocket, onError],
|
|
105
124
|
);
|
|
106
125
|
|
|
107
126
|
const metrics = useMemo(() => {
|
|
@@ -173,64 +192,96 @@ export function useTTS({
|
|
|
173
192
|
}, [websocket, baseUrl]);
|
|
174
193
|
|
|
175
194
|
const play = useCallback(async () => {
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
195
|
+
try {
|
|
196
|
+
if (playbackStatus === "playing" || !websocketReturn.current) {
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
if (player.current) {
|
|
200
|
+
// Stop the current player if it exists.
|
|
201
|
+
await player.current.stop();
|
|
202
|
+
}
|
|
183
203
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
const unsubscribes = [];
|
|
187
|
-
unsubscribes.push(
|
|
188
|
-
websocketReturn.current.source.on("wait", () => {
|
|
189
|
-
setIsWaiting(true);
|
|
190
|
-
}),
|
|
191
|
-
);
|
|
192
|
-
unsubscribes.push(
|
|
193
|
-
websocketReturn.current.source.on("read", () => {
|
|
194
|
-
setIsWaiting(false);
|
|
195
|
-
}),
|
|
196
|
-
);
|
|
197
|
-
|
|
198
|
-
player.current = new Player({
|
|
199
|
-
bufferDuration: bufferDuration ?? DEFAULT_BUFFER_DURATION,
|
|
200
|
-
});
|
|
201
|
-
// Wait for the playback to finish before setting isPlaying to false.
|
|
202
|
-
await player.current.play(websocketReturn.current.source);
|
|
204
|
+
setPlaybackStatus("playing");
|
|
203
205
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
206
|
+
const unsubscribes = [];
|
|
207
|
+
unsubscribes.push(
|
|
208
|
+
websocketReturn.current.source.on("wait", () => {
|
|
209
|
+
setIsWaiting(true);
|
|
210
|
+
}),
|
|
211
|
+
);
|
|
212
|
+
unsubscribes.push(
|
|
213
|
+
websocketReturn.current.source.on("read", () => {
|
|
214
|
+
setIsWaiting(false);
|
|
215
|
+
}),
|
|
216
|
+
);
|
|
208
217
|
|
|
209
|
-
|
|
210
|
-
|
|
218
|
+
player.current = new Player({
|
|
219
|
+
bufferDuration: bufferDuration ?? DEFAULT_BUFFER_DURATION,
|
|
220
|
+
});
|
|
221
|
+
// Wait for the playback to finish before setting isPlaying to false.
|
|
222
|
+
await player.current.play(websocketReturn.current.source);
|
|
223
|
+
|
|
224
|
+
for (const unsubscribe of unsubscribes) {
|
|
225
|
+
// Deregister the event listeners (.on()) that we registered above to avoid memory leaks.
|
|
226
|
+
unsubscribe();
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
setPlaybackStatus("finished");
|
|
230
|
+
} catch (error) {
|
|
231
|
+
if (error instanceof Error) {
|
|
232
|
+
onError?.(error);
|
|
233
|
+
} else {
|
|
234
|
+
console.error(error);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}, [playbackStatus, bufferDuration, onError]);
|
|
211
238
|
|
|
212
239
|
const pause = useCallback(async () => {
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
240
|
+
try {
|
|
241
|
+
await player.current?.pause();
|
|
242
|
+
setPlaybackStatus("paused");
|
|
243
|
+
} catch (error) {
|
|
244
|
+
if (error instanceof Error) {
|
|
245
|
+
onError?.(error);
|
|
246
|
+
} else {
|
|
247
|
+
console.error(error);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}, [onError]);
|
|
216
251
|
|
|
217
252
|
const resume = useCallback(async () => {
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
253
|
+
try {
|
|
254
|
+
await player.current?.resume();
|
|
255
|
+
setPlaybackStatus("playing");
|
|
256
|
+
} catch (error) {
|
|
257
|
+
if (error instanceof Error) {
|
|
258
|
+
onError?.(error);
|
|
259
|
+
} else {
|
|
260
|
+
console.error(error);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}, [onError]);
|
|
221
264
|
|
|
222
265
|
const toggle = useCallback(async () => {
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
266
|
+
try {
|
|
267
|
+
await player.current?.toggle();
|
|
268
|
+
setPlaybackStatus((status) => {
|
|
269
|
+
if (status === "playing") {
|
|
270
|
+
return "paused";
|
|
271
|
+
}
|
|
272
|
+
if (status === "paused") {
|
|
273
|
+
return "playing";
|
|
274
|
+
}
|
|
275
|
+
return status;
|
|
276
|
+
});
|
|
277
|
+
} catch (error) {
|
|
278
|
+
if (error instanceof Error) {
|
|
279
|
+
onError?.(error);
|
|
280
|
+
} else {
|
|
281
|
+
console.error(error);
|
|
230
282
|
}
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
}, []);
|
|
283
|
+
}
|
|
284
|
+
}, [onError]);
|
|
234
285
|
|
|
235
286
|
return {
|
|
236
287
|
buffer,
|
package/src/tts/source.ts
CHANGED
|
@@ -1,13 +1,30 @@
|
|
|
1
1
|
import Emittery from "emittery";
|
|
2
|
-
import type { SourceEventData } from "../types";
|
|
2
|
+
import type { Encoding, SourceEventData, TypedArray } from "../types";
|
|
3
|
+
|
|
4
|
+
type EncodingInfo = {
|
|
5
|
+
arrayType:
|
|
6
|
+
| Float32ArrayConstructor
|
|
7
|
+
| Int16ArrayConstructor
|
|
8
|
+
| Uint8ArrayConstructor;
|
|
9
|
+
bytesPerElement: number;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
export const ENCODING_MAP: Record<Encoding, EncodingInfo> = {
|
|
13
|
+
pcm_f32le: { arrayType: Float32Array, bytesPerElement: 4 },
|
|
14
|
+
pcm_s16le: { arrayType: Int16Array, bytesPerElement: 2 },
|
|
15
|
+
pcm_alaw: { arrayType: Uint8Array, bytesPerElement: 1 },
|
|
16
|
+
pcm_mulaw: { arrayType: Uint8Array, bytesPerElement: 1 },
|
|
17
|
+
};
|
|
3
18
|
|
|
4
19
|
export default class Source {
|
|
5
20
|
#emitter = new Emittery<SourceEventData>();
|
|
6
|
-
#buffer:
|
|
21
|
+
#buffer: TypedArray;
|
|
7
22
|
#readIndex = 0;
|
|
8
23
|
#writeIndex = 0;
|
|
9
24
|
#closed = false;
|
|
10
25
|
#sampleRate: number;
|
|
26
|
+
#encoding: Encoding;
|
|
27
|
+
#container: string;
|
|
11
28
|
|
|
12
29
|
on = this.#emitter.on.bind(this.#emitter);
|
|
13
30
|
once = this.#emitter.once.bind(this.#emitter);
|
|
@@ -20,21 +37,46 @@ export default class Source {
|
|
|
20
37
|
* @param options - Options for the Source.
|
|
21
38
|
* @param options.sampleRate - The sample rate of the audio.
|
|
22
39
|
*/
|
|
23
|
-
constructor({
|
|
40
|
+
constructor({
|
|
41
|
+
sampleRate,
|
|
42
|
+
encoding,
|
|
43
|
+
container,
|
|
44
|
+
}: { sampleRate: number; encoding: string; container: string }) {
|
|
24
45
|
this.#sampleRate = sampleRate;
|
|
25
|
-
this.#
|
|
46
|
+
this.#encoding = encoding as Encoding;
|
|
47
|
+
this.#container = container;
|
|
48
|
+
this.#buffer = this.#createBuffer(1024); // Initial size, can be adjusted
|
|
26
49
|
}
|
|
27
50
|
|
|
28
51
|
get sampleRate() {
|
|
29
52
|
return this.#sampleRate;
|
|
30
53
|
}
|
|
31
54
|
|
|
55
|
+
get encoding() {
|
|
56
|
+
return this.#encoding;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
get container() {
|
|
60
|
+
return this.#container;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Create a new buffer for the source.
|
|
65
|
+
*
|
|
66
|
+
* @param size - The size of the buffer to create.
|
|
67
|
+
* @returns The new buffer as a TypedArray based on the encoding.
|
|
68
|
+
*/
|
|
69
|
+
#createBuffer(size: number): TypedArray {
|
|
70
|
+
const { arrayType: ArrayType } = ENCODING_MAP[this.#encoding];
|
|
71
|
+
return new ArrayType(size);
|
|
72
|
+
}
|
|
73
|
+
|
|
32
74
|
/**
|
|
33
75
|
* Append audio to the buffer.
|
|
34
76
|
*
|
|
35
77
|
* @param src The audio to append.
|
|
36
78
|
*/
|
|
37
|
-
async enqueue(src:
|
|
79
|
+
async enqueue(src: TypedArray) {
|
|
38
80
|
const requiredCapacity = this.#writeIndex + src.length;
|
|
39
81
|
|
|
40
82
|
// Resize buffer if necessary
|
|
@@ -44,7 +86,7 @@ export default class Source {
|
|
|
44
86
|
newCapacity *= 2; // Double the buffer size
|
|
45
87
|
}
|
|
46
88
|
|
|
47
|
-
const newBuffer =
|
|
89
|
+
const newBuffer = this.#createBuffer(newCapacity);
|
|
48
90
|
newBuffer.set(this.#buffer);
|
|
49
91
|
this.#buffer = newBuffer;
|
|
50
92
|
}
|
|
@@ -62,7 +104,7 @@ export default class Source {
|
|
|
62
104
|
* @returns The number of samples read. If the source is closed, this will be
|
|
63
105
|
* less than the length of the provided buffer.
|
|
64
106
|
*/
|
|
65
|
-
async read(dst:
|
|
107
|
+
async read(dst: TypedArray): Promise<number> {
|
|
66
108
|
// Read the buffer into the provided buffer.
|
|
67
109
|
const targetReadIndex = this.#readIndex + dst.length;
|
|
68
110
|
|
|
@@ -100,6 +142,10 @@ export default class Source {
|
|
|
100
142
|
return this.#readIndex;
|
|
101
143
|
}
|
|
102
144
|
|
|
145
|
+
get writeIndex() {
|
|
146
|
+
return this.#writeIndex;
|
|
147
|
+
}
|
|
148
|
+
|
|
103
149
|
/**
|
|
104
150
|
* Close the source. This signals that no more audio will be enqueued.
|
|
105
151
|
*
|
package/src/tts/utils.ts
CHANGED
|
@@ -1,25 +1,38 @@
|
|
|
1
1
|
import base64 from "base64-js";
|
|
2
2
|
import type Emittery from "emittery";
|
|
3
|
-
import type {
|
|
3
|
+
import type {
|
|
4
|
+
Chunk,
|
|
5
|
+
EmitteryCallbacks,
|
|
6
|
+
Encoding,
|
|
7
|
+
Sentinel,
|
|
8
|
+
TypedArray,
|
|
9
|
+
WebSocketResponse,
|
|
10
|
+
} from "../types";
|
|
11
|
+
import { ENCODING_MAP } from "./source";
|
|
4
12
|
|
|
5
13
|
/**
|
|
6
|
-
* Convert base64-encoded audio buffer(s) to a
|
|
14
|
+
* Convert base64-encoded audio buffer(s) to a TypedArray.
|
|
7
15
|
*
|
|
8
16
|
* @param b64 The base64-encoded audio buffer, or an array of base64-encoded
|
|
9
17
|
* audio buffers.
|
|
10
|
-
* @
|
|
18
|
+
* @param encoding The encoding of the audio buffer(s).
|
|
19
|
+
* @returns The audio buffer(s) as a TypedArray.
|
|
11
20
|
*/
|
|
12
|
-
export function base64ToArray(b64: Chunk[]):
|
|
21
|
+
export function base64ToArray(b64: Chunk[], encoding: string): TypedArray {
|
|
13
22
|
const byteArrays = filterSentinel(b64).map((b) => base64.toByteArray(b));
|
|
23
|
+
|
|
24
|
+
const { arrayType: ArrayType, bytesPerElement } =
|
|
25
|
+
ENCODING_MAP[encoding as Encoding];
|
|
26
|
+
|
|
14
27
|
const totalLength = byteArrays.reduce(
|
|
15
|
-
(acc, arr) => acc + arr.length /
|
|
28
|
+
(acc, arr) => acc + arr.length / bytesPerElement,
|
|
16
29
|
0,
|
|
17
30
|
);
|
|
18
|
-
const result = new
|
|
31
|
+
const result = new ArrayType(totalLength);
|
|
19
32
|
|
|
20
33
|
let offset = 0;
|
|
21
34
|
for (const arr of byteArrays) {
|
|
22
|
-
const floats = new
|
|
35
|
+
const floats = new ArrayType(arr.buffer);
|
|
23
36
|
result.set(floats, offset);
|
|
24
37
|
offset += floats.length;
|
|
25
38
|
}
|
|
@@ -70,26 +83,27 @@ export function createMessageHandlerForContextId(
|
|
|
70
83
|
chunk,
|
|
71
84
|
message,
|
|
72
85
|
}: {
|
|
73
|
-
chunk
|
|
86
|
+
chunk?: Chunk;
|
|
74
87
|
message: string;
|
|
88
|
+
data: WebSocketResponse;
|
|
75
89
|
}) => void,
|
|
76
90
|
) {
|
|
77
91
|
return (event: MessageEvent) => {
|
|
78
92
|
if (typeof event.data !== "string") {
|
|
79
93
|
return; // Ignore non-string messages.
|
|
80
94
|
}
|
|
81
|
-
const message = JSON.parse(event.data);
|
|
95
|
+
const message: WebSocketResponse = JSON.parse(event.data);
|
|
82
96
|
if (message.context_id !== contextId) {
|
|
83
97
|
return; // Ignore messages for other contexts.
|
|
84
98
|
}
|
|
85
|
-
let chunk: Chunk;
|
|
99
|
+
let chunk: Chunk | undefined;
|
|
86
100
|
if (message.done) {
|
|
87
101
|
// Convert the done message to a sentinel value.
|
|
88
102
|
chunk = getSentinel();
|
|
89
|
-
} else {
|
|
103
|
+
} else if (message.type === "chunk") {
|
|
90
104
|
chunk = message.data;
|
|
91
105
|
}
|
|
92
|
-
handler({ chunk, message: event.data });
|
|
106
|
+
handler({ chunk, message: event.data, data: message });
|
|
93
107
|
};
|
|
94
108
|
}
|
|
95
109
|
|