@cartesia/cartesia-js 1.0.0-alpha.4 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +49 -49
- package/CHANGELOG.md +23 -0
- package/LICENSE.md +21 -0
- package/README.md +102 -21
- package/dist/{chunk-VK7LBMVI.js → chunk-2NA5SEML.js} +2 -2
- package/dist/{chunk-PQ5EVEEH.js → chunk-5M33ZF3Y.js} +1 -1
- package/dist/{chunk-PQ6CIPFW.js → chunk-6YQ6KDIQ.js} +44 -5
- package/dist/{chunk-IQAXBRHU.js → chunk-ASZKHN7Q.js} +53 -29
- package/dist/{chunk-RO7TY474.js → chunk-BHY7MNGT.js} +11 -6
- package/dist/{chunk-WIFMLPT5.js → chunk-GHY2WEOK.js} +13 -0
- package/dist/{chunk-SGXUEFII.js → chunk-KUSVZXDT.js} +2 -2
- package/dist/{chunk-36JBKJUN.js → chunk-LZO6K34D.js} +20 -7
- package/dist/{chunk-3FL2SNIR.js → chunk-NQVZNVOU.js} +1 -1
- package/dist/{chunk-ISRU7PLL.js → chunk-OFH3ML4L.js} +3 -3
- package/dist/index.cjs +129 -39
- package/dist/index.d.cts +4 -4
- package/dist/index.d.ts +4 -4
- package/dist/index.js +15 -9
- package/dist/lib/client.js +2 -2
- package/dist/lib/constants.js +1 -1
- package/dist/lib/index.cjs +106 -33
- package/dist/lib/index.js +8 -8
- package/dist/react/index.cjs +231 -92
- package/dist/react/index.d.cts +4 -3
- package/dist/react/index.d.ts +4 -3
- package/dist/react/index.js +117 -64
- package/dist/react/utils.js +2 -2
- package/dist/tts/index.cjs +106 -33
- package/dist/tts/index.js +6 -6
- package/dist/tts/player.cjs +23 -5
- package/dist/tts/player.d.cts +6 -0
- package/dist/tts/player.d.ts +6 -0
- package/dist/tts/player.js +4 -3
- package/dist/tts/source.cjs +50 -4
- package/dist/tts/source.d.cts +16 -6
- package/dist/tts/source.d.ts +16 -6
- package/dist/tts/source.js +4 -2
- package/dist/tts/utils.cjs +18 -6
- package/dist/tts/utils.d.cts +7 -5
- package/dist/tts/utils.d.ts +7 -5
- package/dist/tts/utils.js +3 -2
- package/dist/tts/websocket.cjs +106 -33
- package/dist/tts/websocket.d.cts +20 -10
- package/dist/tts/websocket.d.ts +20 -10
- package/dist/tts/websocket.js +5 -5
- package/dist/types/index.d.cts +60 -4
- package/dist/types/index.d.ts +60 -4
- package/dist/voices/index.js +3 -3
- package/package.json +1 -1
- package/src/index.ts +2 -0
- package/src/react/index.ts +117 -62
- package/src/tts/player.ts +15 -8
- package/src/tts/source.ts +53 -7
- package/src/tts/utils.ts +26 -12
- package/src/tts/websocket.ts +42 -19
- package/src/types/index.ts +81 -3
package/dist/tts/websocket.d.cts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as emittery from 'emittery';
|
|
2
2
|
import { WebSocket as WebSocket$1 } from 'partysocket';
|
|
3
3
|
import { Client } from '../lib/client.cjs';
|
|
4
|
-
import { WebSocketOptions, StreamRequest, EmitteryCallbacks, ConnectionEventData } from '../types/index.cjs';
|
|
4
|
+
import { WebSocketOptions, StreamRequest, StreamOptions, WordTimestamps, EmitteryCallbacks, ConnectionEventData } from '../types/index.cjs';
|
|
5
5
|
import Source from './source.cjs';
|
|
6
6
|
|
|
7
7
|
declare class WebSocket extends Client {
|
|
@@ -12,28 +12,38 @@ declare class WebSocket extends Client {
|
|
|
12
12
|
*
|
|
13
13
|
* @param args - Arguments to pass to the Client constructor.
|
|
14
14
|
*/
|
|
15
|
-
constructor({ sampleRate }: WebSocketOptions, ...args: ConstructorParameters<typeof Client>);
|
|
15
|
+
constructor({ sampleRate, container, encoding }: WebSocketOptions, ...args: ConstructorParameters<typeof Client>);
|
|
16
16
|
/**
|
|
17
|
-
* Send a message over the WebSocket
|
|
17
|
+
* Send a message over the WebSocket to start a stream.
|
|
18
18
|
*
|
|
19
|
-
* @param inputs - Stream options.
|
|
19
|
+
* @param inputs - Stream options. Defined in the StreamRequest type.
|
|
20
20
|
* @param options - Options for the stream.
|
|
21
21
|
* @param options.timeout - The maximum time to wait for a chunk before cancelling the stream.
|
|
22
|
-
*
|
|
22
|
+
* If set to `0`, the stream will not time out.
|
|
23
23
|
* @returns A Source object that can be passed to a Player to play the audio.
|
|
24
|
+
* @returns An Emittery instance that emits messages from the WebSocket.
|
|
25
|
+
* @returns An abort function that can be called to cancel the stream.
|
|
24
26
|
*/
|
|
25
|
-
send(inputs: StreamRequest
|
|
26
|
-
|
|
27
|
+
send({ ...inputs }: StreamRequest, { timeout }?: StreamOptions): {
|
|
28
|
+
stop: {
|
|
29
|
+
(reason?: any): void;
|
|
30
|
+
(reason?: any): void;
|
|
31
|
+
};
|
|
32
|
+
on: <Name extends "timestamps" | keyof emittery.OmnipresentEventData | "message">(eventName: Name | readonly Name[], listener: (eventData: ({
|
|
27
33
|
message: string;
|
|
34
|
+
timestamps: WordTimestamps;
|
|
28
35
|
} & emittery.OmnipresentEventData)[Name]) => void | Promise<void>) => emittery.UnsubscribeFunction;
|
|
29
|
-
off: <Name_1 extends keyof emittery.OmnipresentEventData | "message">(eventName: Name_1 | readonly Name_1[], listener: (eventData: ({
|
|
36
|
+
off: <Name_1 extends "timestamps" | keyof emittery.OmnipresentEventData | "message">(eventName: Name_1 | readonly Name_1[], listener: (eventData: ({
|
|
30
37
|
message: string;
|
|
38
|
+
timestamps: WordTimestamps;
|
|
31
39
|
} & emittery.OmnipresentEventData)[Name_1]) => void | Promise<void>) => void;
|
|
32
|
-
once: <Name_2 extends keyof emittery.OmnipresentEventData | "message">(eventName: Name_2 | readonly Name_2[]) => emittery.EmitteryOncePromise<({
|
|
40
|
+
once: <Name_2 extends "timestamps" | keyof emittery.OmnipresentEventData | "message">(eventName: Name_2 | readonly Name_2[]) => emittery.EmitteryOncePromise<({
|
|
33
41
|
message: string;
|
|
42
|
+
timestamps: WordTimestamps;
|
|
34
43
|
} & emittery.OmnipresentEventData)[Name_2]>;
|
|
35
|
-
events: <Name_3 extends "message">(eventName: Name_3 | readonly Name_3[]) => AsyncIterableIterator<{
|
|
44
|
+
events: <Name_3 extends "timestamps" | "message">(eventName: Name_3 | readonly Name_3[]) => AsyncIterableIterator<{
|
|
36
45
|
message: string;
|
|
46
|
+
timestamps: WordTimestamps;
|
|
37
47
|
}[Name_3]>;
|
|
38
48
|
source: Source;
|
|
39
49
|
};
|
package/dist/tts/websocket.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as emittery from 'emittery';
|
|
2
2
|
import { WebSocket as WebSocket$1 } from 'partysocket';
|
|
3
3
|
import { Client } from '../lib/client.js';
|
|
4
|
-
import { WebSocketOptions, StreamRequest, EmitteryCallbacks, ConnectionEventData } from '../types/index.js';
|
|
4
|
+
import { WebSocketOptions, StreamRequest, StreamOptions, WordTimestamps, EmitteryCallbacks, ConnectionEventData } from '../types/index.js';
|
|
5
5
|
import Source from './source.js';
|
|
6
6
|
|
|
7
7
|
declare class WebSocket extends Client {
|
|
@@ -12,28 +12,38 @@ declare class WebSocket extends Client {
|
|
|
12
12
|
*
|
|
13
13
|
* @param args - Arguments to pass to the Client constructor.
|
|
14
14
|
*/
|
|
15
|
-
constructor({ sampleRate }: WebSocketOptions, ...args: ConstructorParameters<typeof Client>);
|
|
15
|
+
constructor({ sampleRate, container, encoding }: WebSocketOptions, ...args: ConstructorParameters<typeof Client>);
|
|
16
16
|
/**
|
|
17
|
-
* Send a message over the WebSocket
|
|
17
|
+
* Send a message over the WebSocket to start a stream.
|
|
18
18
|
*
|
|
19
|
-
* @param inputs - Stream options.
|
|
19
|
+
* @param inputs - Stream options. Defined in the StreamRequest type.
|
|
20
20
|
* @param options - Options for the stream.
|
|
21
21
|
* @param options.timeout - The maximum time to wait for a chunk before cancelling the stream.
|
|
22
|
-
*
|
|
22
|
+
* If set to `0`, the stream will not time out.
|
|
23
23
|
* @returns A Source object that can be passed to a Player to play the audio.
|
|
24
|
+
* @returns An Emittery instance that emits messages from the WebSocket.
|
|
25
|
+
* @returns An abort function that can be called to cancel the stream.
|
|
24
26
|
*/
|
|
25
|
-
send(inputs: StreamRequest
|
|
26
|
-
|
|
27
|
+
send({ ...inputs }: StreamRequest, { timeout }?: StreamOptions): {
|
|
28
|
+
stop: {
|
|
29
|
+
(reason?: any): void;
|
|
30
|
+
(reason?: any): void;
|
|
31
|
+
};
|
|
32
|
+
on: <Name extends "timestamps" | keyof emittery.OmnipresentEventData | "message">(eventName: Name | readonly Name[], listener: (eventData: ({
|
|
27
33
|
message: string;
|
|
34
|
+
timestamps: WordTimestamps;
|
|
28
35
|
} & emittery.OmnipresentEventData)[Name]) => void | Promise<void>) => emittery.UnsubscribeFunction;
|
|
29
|
-
off: <Name_1 extends keyof emittery.OmnipresentEventData | "message">(eventName: Name_1 | readonly Name_1[], listener: (eventData: ({
|
|
36
|
+
off: <Name_1 extends "timestamps" | keyof emittery.OmnipresentEventData | "message">(eventName: Name_1 | readonly Name_1[], listener: (eventData: ({
|
|
30
37
|
message: string;
|
|
38
|
+
timestamps: WordTimestamps;
|
|
31
39
|
} & emittery.OmnipresentEventData)[Name_1]) => void | Promise<void>) => void;
|
|
32
|
-
once: <Name_2 extends keyof emittery.OmnipresentEventData | "message">(eventName: Name_2 | readonly Name_2[]) => emittery.EmitteryOncePromise<({
|
|
40
|
+
once: <Name_2 extends "timestamps" | keyof emittery.OmnipresentEventData | "message">(eventName: Name_2 | readonly Name_2[]) => emittery.EmitteryOncePromise<({
|
|
33
41
|
message: string;
|
|
42
|
+
timestamps: WordTimestamps;
|
|
34
43
|
} & emittery.OmnipresentEventData)[Name_2]>;
|
|
35
|
-
events: <Name_3 extends "message">(eventName: Name_3 | readonly Name_3[]) => AsyncIterableIterator<{
|
|
44
|
+
events: <Name_3 extends "timestamps" | "message">(eventName: Name_3 | readonly Name_3[]) => AsyncIterableIterator<{
|
|
36
45
|
message: string;
|
|
46
|
+
timestamps: WordTimestamps;
|
|
37
47
|
}[Name_3]>;
|
|
38
48
|
source: Source;
|
|
39
49
|
};
|
package/dist/tts/websocket.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
WebSocket
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
5
|
-
import "../chunk-PQ5EVEEH.js";
|
|
3
|
+
} from "../chunk-ASZKHN7Q.js";
|
|
4
|
+
import "../chunk-5M33ZF3Y.js";
|
|
6
5
|
import "../chunk-2BFEKY3F.js";
|
|
7
|
-
import "../chunk-
|
|
8
|
-
import "../chunk-
|
|
6
|
+
import "../chunk-BHY7MNGT.js";
|
|
7
|
+
import "../chunk-6YQ6KDIQ.js";
|
|
8
|
+
import "../chunk-GHY2WEOK.js";
|
|
9
9
|
export {
|
|
10
10
|
WebSocket as default
|
|
11
11
|
};
|
package/dist/types/index.d.cts
CHANGED
|
@@ -10,12 +10,64 @@ type ConnectionEventData = {
|
|
|
10
10
|
open: never;
|
|
11
11
|
close: never;
|
|
12
12
|
};
|
|
13
|
+
type VoiceSpecifier = {
|
|
14
|
+
mode: "id";
|
|
15
|
+
id: string;
|
|
16
|
+
} | {
|
|
17
|
+
mode: "embedding";
|
|
18
|
+
embedding: number[];
|
|
19
|
+
};
|
|
20
|
+
type Emotion = "anger" | "sadness" | "positivity" | "curiosity" | "surprise";
|
|
21
|
+
type Intensity = "lowest" | "low" | "high" | "highest";
|
|
22
|
+
type EmotionControl = Emotion | `${Emotion}:${Intensity}`;
|
|
23
|
+
type VoiceOptions = VoiceSpecifier & {
|
|
24
|
+
__experimental_controls?: {
|
|
25
|
+
speed?: "slowest" | "slow" | "normal" | "fast" | "fastest";
|
|
26
|
+
emotion?: EmotionControl[];
|
|
27
|
+
};
|
|
28
|
+
};
|
|
13
29
|
type StreamRequest = {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
30
|
+
model_id: string;
|
|
31
|
+
transcript: string;
|
|
32
|
+
voice: VoiceOptions;
|
|
33
|
+
output_format?: {
|
|
34
|
+
container: string;
|
|
35
|
+
encoding: string;
|
|
36
|
+
sample_rate: number;
|
|
17
37
|
};
|
|
38
|
+
context_id?: string;
|
|
39
|
+
continue?: boolean;
|
|
40
|
+
duration?: number;
|
|
41
|
+
language?: string;
|
|
42
|
+
add_timestamps?: boolean;
|
|
43
|
+
};
|
|
44
|
+
type StreamOptions = {
|
|
45
|
+
timeout?: number;
|
|
46
|
+
};
|
|
47
|
+
type WebSocketBaseResponse = {
|
|
48
|
+
context_id: string;
|
|
49
|
+
status_code: number;
|
|
50
|
+
done: boolean;
|
|
51
|
+
};
|
|
52
|
+
type WordTimestamps = {
|
|
53
|
+
words: string[];
|
|
54
|
+
start: number[];
|
|
55
|
+
end: number[];
|
|
56
|
+
};
|
|
57
|
+
type WebSocketTimestampsResponse = WebSocketBaseResponse & {
|
|
58
|
+
type: "timestamps";
|
|
59
|
+
word_timestamps: WordTimestamps;
|
|
60
|
+
};
|
|
61
|
+
type WebSocketChunkResponse = WebSocketBaseResponse & {
|
|
62
|
+
type: "chunk";
|
|
63
|
+
data: string;
|
|
64
|
+
step_time: number;
|
|
65
|
+
};
|
|
66
|
+
type WebSocketErrorResponse = WebSocketBaseResponse & {
|
|
67
|
+
type: "error";
|
|
68
|
+
error: string;
|
|
18
69
|
};
|
|
70
|
+
type WebSocketResponse = WebSocketTimestampsResponse | WebSocketChunkResponse | WebSocketErrorResponse;
|
|
19
71
|
type EmitteryCallbacks<T> = {
|
|
20
72
|
on: emittery__default<T>["on"];
|
|
21
73
|
off: emittery__default<T>["off"];
|
|
@@ -43,6 +95,8 @@ type CloneResponse = {
|
|
|
43
95
|
embedding: number[];
|
|
44
96
|
};
|
|
45
97
|
type WebSocketOptions = {
|
|
98
|
+
container?: string;
|
|
99
|
+
encoding?: string;
|
|
46
100
|
sampleRate: number;
|
|
47
101
|
};
|
|
48
102
|
type SourceEventData = {
|
|
@@ -51,5 +105,7 @@ type SourceEventData = {
|
|
|
51
105
|
wait: never;
|
|
52
106
|
read: never;
|
|
53
107
|
};
|
|
108
|
+
type TypedArray = Float32Array | Int16Array | Uint8Array;
|
|
109
|
+
type Encoding = "pcm_f32le" | "pcm_s16le" | "pcm_alaw" | "pcm_mulaw";
|
|
54
110
|
|
|
55
|
-
export type { Chunk, ClientOptions, CloneOptions, CloneResponse, ConnectionEventData, CreateVoice, EmitteryCallbacks, Sentinel, SourceEventData, StreamRequest, Voice, WebSocketOptions };
|
|
111
|
+
export type { Chunk, ClientOptions, CloneOptions, CloneResponse, ConnectionEventData, CreateVoice, EmitteryCallbacks, Emotion, EmotionControl, Encoding, Intensity, Sentinel, SourceEventData, StreamOptions, StreamRequest, TypedArray, Voice, VoiceOptions, VoiceSpecifier, WebSocketBaseResponse, WebSocketChunkResponse, WebSocketErrorResponse, WebSocketOptions, WebSocketResponse, WebSocketTimestampsResponse, WordTimestamps };
|
package/dist/types/index.d.ts
CHANGED
|
@@ -10,12 +10,64 @@ type ConnectionEventData = {
|
|
|
10
10
|
open: never;
|
|
11
11
|
close: never;
|
|
12
12
|
};
|
|
13
|
+
type VoiceSpecifier = {
|
|
14
|
+
mode: "id";
|
|
15
|
+
id: string;
|
|
16
|
+
} | {
|
|
17
|
+
mode: "embedding";
|
|
18
|
+
embedding: number[];
|
|
19
|
+
};
|
|
20
|
+
type Emotion = "anger" | "sadness" | "positivity" | "curiosity" | "surprise";
|
|
21
|
+
type Intensity = "lowest" | "low" | "high" | "highest";
|
|
22
|
+
type EmotionControl = Emotion | `${Emotion}:${Intensity}`;
|
|
23
|
+
type VoiceOptions = VoiceSpecifier & {
|
|
24
|
+
__experimental_controls?: {
|
|
25
|
+
speed?: "slowest" | "slow" | "normal" | "fast" | "fastest";
|
|
26
|
+
emotion?: EmotionControl[];
|
|
27
|
+
};
|
|
28
|
+
};
|
|
13
29
|
type StreamRequest = {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
30
|
+
model_id: string;
|
|
31
|
+
transcript: string;
|
|
32
|
+
voice: VoiceOptions;
|
|
33
|
+
output_format?: {
|
|
34
|
+
container: string;
|
|
35
|
+
encoding: string;
|
|
36
|
+
sample_rate: number;
|
|
17
37
|
};
|
|
38
|
+
context_id?: string;
|
|
39
|
+
continue?: boolean;
|
|
40
|
+
duration?: number;
|
|
41
|
+
language?: string;
|
|
42
|
+
add_timestamps?: boolean;
|
|
43
|
+
};
|
|
44
|
+
type StreamOptions = {
|
|
45
|
+
timeout?: number;
|
|
46
|
+
};
|
|
47
|
+
type WebSocketBaseResponse = {
|
|
48
|
+
context_id: string;
|
|
49
|
+
status_code: number;
|
|
50
|
+
done: boolean;
|
|
51
|
+
};
|
|
52
|
+
type WordTimestamps = {
|
|
53
|
+
words: string[];
|
|
54
|
+
start: number[];
|
|
55
|
+
end: number[];
|
|
56
|
+
};
|
|
57
|
+
type WebSocketTimestampsResponse = WebSocketBaseResponse & {
|
|
58
|
+
type: "timestamps";
|
|
59
|
+
word_timestamps: WordTimestamps;
|
|
60
|
+
};
|
|
61
|
+
type WebSocketChunkResponse = WebSocketBaseResponse & {
|
|
62
|
+
type: "chunk";
|
|
63
|
+
data: string;
|
|
64
|
+
step_time: number;
|
|
65
|
+
};
|
|
66
|
+
type WebSocketErrorResponse = WebSocketBaseResponse & {
|
|
67
|
+
type: "error";
|
|
68
|
+
error: string;
|
|
18
69
|
};
|
|
70
|
+
type WebSocketResponse = WebSocketTimestampsResponse | WebSocketChunkResponse | WebSocketErrorResponse;
|
|
19
71
|
type EmitteryCallbacks<T> = {
|
|
20
72
|
on: emittery__default<T>["on"];
|
|
21
73
|
off: emittery__default<T>["off"];
|
|
@@ -43,6 +95,8 @@ type CloneResponse = {
|
|
|
43
95
|
embedding: number[];
|
|
44
96
|
};
|
|
45
97
|
type WebSocketOptions = {
|
|
98
|
+
container?: string;
|
|
99
|
+
encoding?: string;
|
|
46
100
|
sampleRate: number;
|
|
47
101
|
};
|
|
48
102
|
type SourceEventData = {
|
|
@@ -51,5 +105,7 @@ type SourceEventData = {
|
|
|
51
105
|
wait: never;
|
|
52
106
|
read: never;
|
|
53
107
|
};
|
|
108
|
+
type TypedArray = Float32Array | Int16Array | Uint8Array;
|
|
109
|
+
type Encoding = "pcm_f32le" | "pcm_s16le" | "pcm_alaw" | "pcm_mulaw";
|
|
54
110
|
|
|
55
|
-
export type { Chunk, ClientOptions, CloneOptions, CloneResponse, ConnectionEventData, CreateVoice, EmitteryCallbacks, Sentinel, SourceEventData, StreamRequest, Voice, WebSocketOptions };
|
|
111
|
+
export type { Chunk, ClientOptions, CloneOptions, CloneResponse, ConnectionEventData, CreateVoice, EmitteryCallbacks, Emotion, EmotionControl, Encoding, Intensity, Sentinel, SourceEventData, StreamOptions, StreamRequest, TypedArray, Voice, VoiceOptions, VoiceSpecifier, WebSocketBaseResponse, WebSocketChunkResponse, WebSocketErrorResponse, WebSocketOptions, WebSocketResponse, WebSocketTimestampsResponse, WordTimestamps };
|
package/dist/voices/index.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import {
|
|
2
2
|
Voices
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
3
|
+
} from "../chunk-KUSVZXDT.js";
|
|
4
|
+
import "../chunk-5M33ZF3Y.js";
|
|
5
5
|
import "../chunk-2BFEKY3F.js";
|
|
6
|
-
import "../chunk-
|
|
6
|
+
import "../chunk-GHY2WEOK.js";
|
|
7
7
|
export {
|
|
8
8
|
Voices as default
|
|
9
9
|
};
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
package/src/react/index.ts
CHANGED
|
@@ -4,12 +4,14 @@ import { Cartesia } from "../lib";
|
|
|
4
4
|
import Player from "../tts/player";
|
|
5
5
|
import type Source from "../tts/source";
|
|
6
6
|
import type WebSocket from "../tts/websocket";
|
|
7
|
+
import type { StreamRequest } from "../types";
|
|
7
8
|
import { pingServer } from "./utils";
|
|
8
9
|
|
|
9
10
|
export type UseTTSOptions = {
|
|
10
11
|
apiKey: string | null;
|
|
11
12
|
baseUrl?: string;
|
|
12
13
|
sampleRate: number;
|
|
14
|
+
onError?: (error: Error) => void;
|
|
13
15
|
};
|
|
14
16
|
|
|
15
17
|
export type PlaybackStatus = "inactive" | "playing" | "paused" | "finished";
|
|
@@ -20,7 +22,7 @@ export type Metrics = {
|
|
|
20
22
|
};
|
|
21
23
|
|
|
22
24
|
export interface UseTTSReturn {
|
|
23
|
-
buffer: (options:
|
|
25
|
+
buffer: (options: StreamRequest) => Promise<void>;
|
|
24
26
|
play: (bufferDuration?: number) => Promise<void>;
|
|
25
27
|
pause: () => Promise<void>;
|
|
26
28
|
resume: () => Promise<void>;
|
|
@@ -47,6 +49,7 @@ export function useTTS({
|
|
|
47
49
|
apiKey,
|
|
48
50
|
baseUrl,
|
|
49
51
|
sampleRate,
|
|
52
|
+
onError,
|
|
50
53
|
}: UseTTSOptions): UseTTSReturn {
|
|
51
54
|
if (typeof window === "undefined") {
|
|
52
55
|
return {
|
|
@@ -72,7 +75,11 @@ export function useTTS({
|
|
|
72
75
|
}
|
|
73
76
|
const cartesia = new Cartesia({ apiKey, baseUrl });
|
|
74
77
|
baseUrl = baseUrl ?? cartesia.baseUrl;
|
|
75
|
-
return cartesia.tts.websocket({
|
|
78
|
+
return cartesia.tts.websocket({
|
|
79
|
+
container: "raw",
|
|
80
|
+
encoding: "pcm_f32le",
|
|
81
|
+
sampleRate,
|
|
82
|
+
});
|
|
76
83
|
}, [apiKey, baseUrl, sampleRate]);
|
|
77
84
|
const websocketReturn = useRef<ReturnType<WebSocket["send"]> | null>(null);
|
|
78
85
|
const player = useRef<Player | null>(null);
|
|
@@ -85,21 +92,35 @@ export function useTTS({
|
|
|
85
92
|
const [messages, setMessages] = useState<Message[]>([]);
|
|
86
93
|
|
|
87
94
|
const buffer = useCallback(
|
|
88
|
-
async (options:
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
95
|
+
async (options: StreamRequest) => {
|
|
96
|
+
websocketReturn.current?.stop(); // Abort the previous request if it exists.
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
setMessages([]);
|
|
100
|
+
setBufferStatus("buffering");
|
|
101
|
+
websocketReturn.current = websocket?.send(options) ?? null;
|
|
102
|
+
if (!websocketReturn.current) {
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
const unsubscribe = websocketReturn.current.on("message", (message) => {
|
|
106
|
+
const parsedMessage = JSON.parse(message);
|
|
107
|
+
setMessages((messages) => [...messages, parsedMessage]);
|
|
108
|
+
if (parsedMessage.error) {
|
|
109
|
+
onError?.(new Error(parsedMessage.error));
|
|
110
|
+
}
|
|
111
|
+
});
|
|
112
|
+
await websocketReturn.current.source.once("close");
|
|
113
|
+
setBufferStatus("buffered");
|
|
114
|
+
unsubscribe();
|
|
115
|
+
} catch (error) {
|
|
116
|
+
if (error instanceof Error) {
|
|
117
|
+
onError?.(error);
|
|
118
|
+
} else {
|
|
119
|
+
console.error(error);
|
|
120
|
+
}
|
|
94
121
|
}
|
|
95
|
-
const unsubscribe = websocketReturn.current.on("message", (message) => {
|
|
96
|
-
setMessages((messages) => [...messages, JSON.parse(message)]);
|
|
97
|
-
});
|
|
98
|
-
await websocketReturn.current.source.once("close");
|
|
99
|
-
setBufferStatus("buffered");
|
|
100
|
-
unsubscribe();
|
|
101
122
|
},
|
|
102
|
-
[websocket],
|
|
123
|
+
[websocket, onError],
|
|
103
124
|
);
|
|
104
125
|
|
|
105
126
|
const metrics = useMemo(() => {
|
|
@@ -171,63 +192,97 @@ export function useTTS({
|
|
|
171
192
|
}, [websocket, baseUrl]);
|
|
172
193
|
|
|
173
194
|
const play = useCallback(async () => {
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
setIsWaiting(true);
|
|
183
|
-
}),
|
|
184
|
-
);
|
|
185
|
-
unsubscribes.push(
|
|
186
|
-
websocketReturn.current.source.on("read", () => {
|
|
187
|
-
setIsWaiting(false);
|
|
188
|
-
}),
|
|
189
|
-
);
|
|
190
|
-
|
|
191
|
-
player.current = new Player({
|
|
192
|
-
bufferDuration: bufferDuration ?? DEFAULT_BUFFER_DURATION,
|
|
193
|
-
});
|
|
194
|
-
// Wait for the playback to finish before setting isPlaying to false.
|
|
195
|
-
await player.current.play(websocketReturn.current.source);
|
|
195
|
+
try {
|
|
196
|
+
if (playbackStatus === "playing" || !websocketReturn.current) {
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
if (player.current) {
|
|
200
|
+
// Stop the current player if it exists.
|
|
201
|
+
await player.current.stop();
|
|
202
|
+
}
|
|
196
203
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
204
|
+
setPlaybackStatus("playing");
|
|
205
|
+
|
|
206
|
+
const unsubscribes = [];
|
|
207
|
+
unsubscribes.push(
|
|
208
|
+
websocketReturn.current.source.on("wait", () => {
|
|
209
|
+
setIsWaiting(true);
|
|
210
|
+
}),
|
|
211
|
+
);
|
|
212
|
+
unsubscribes.push(
|
|
213
|
+
websocketReturn.current.source.on("read", () => {
|
|
214
|
+
setIsWaiting(false);
|
|
215
|
+
}),
|
|
216
|
+
);
|
|
201
217
|
|
|
202
|
-
|
|
203
|
-
|
|
218
|
+
player.current = new Player({
|
|
219
|
+
bufferDuration: bufferDuration ?? DEFAULT_BUFFER_DURATION,
|
|
220
|
+
});
|
|
221
|
+
// Wait for the playback to finish before setting isPlaying to false.
|
|
222
|
+
await player.current.play(websocketReturn.current.source);
|
|
223
|
+
|
|
224
|
+
for (const unsubscribe of unsubscribes) {
|
|
225
|
+
// Deregister the event listeners (.on()) that we registered above to avoid memory leaks.
|
|
226
|
+
unsubscribe();
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
setPlaybackStatus("finished");
|
|
230
|
+
} catch (error) {
|
|
231
|
+
if (error instanceof Error) {
|
|
232
|
+
onError?.(error);
|
|
233
|
+
} else {
|
|
234
|
+
console.error(error);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}, [playbackStatus, bufferDuration, onError]);
|
|
204
238
|
|
|
205
239
|
const pause = useCallback(async () => {
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
240
|
+
try {
|
|
241
|
+
await player.current?.pause();
|
|
242
|
+
setPlaybackStatus("paused");
|
|
243
|
+
} catch (error) {
|
|
244
|
+
if (error instanceof Error) {
|
|
245
|
+
onError?.(error);
|
|
246
|
+
} else {
|
|
247
|
+
console.error(error);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}, [onError]);
|
|
209
251
|
|
|
210
252
|
const resume = useCallback(async () => {
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
253
|
+
try {
|
|
254
|
+
await player.current?.resume();
|
|
255
|
+
setPlaybackStatus("playing");
|
|
256
|
+
} catch (error) {
|
|
257
|
+
if (error instanceof Error) {
|
|
258
|
+
onError?.(error);
|
|
259
|
+
} else {
|
|
260
|
+
console.error(error);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}, [onError]);
|
|
214
264
|
|
|
215
265
|
const toggle = useCallback(async () => {
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
266
|
+
try {
|
|
267
|
+
await player.current?.toggle();
|
|
268
|
+
setPlaybackStatus((status) => {
|
|
269
|
+
if (status === "playing") {
|
|
270
|
+
return "paused";
|
|
271
|
+
}
|
|
272
|
+
if (status === "paused") {
|
|
273
|
+
return "playing";
|
|
274
|
+
}
|
|
275
|
+
return status;
|
|
276
|
+
});
|
|
277
|
+
} catch (error) {
|
|
278
|
+
if (error instanceof Error) {
|
|
279
|
+
onError?.(error);
|
|
280
|
+
} else {
|
|
281
|
+
console.error(error);
|
|
223
282
|
}
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
}, []);
|
|
283
|
+
}
|
|
284
|
+
}, [onError]);
|
|
227
285
|
|
|
228
|
-
// TODO:
|
|
229
|
-
// - [] Access the play and buffer cursors.
|
|
230
|
-
// - [] Seek to a specific time.
|
|
231
286
|
return {
|
|
232
287
|
buffer,
|
|
233
288
|
play,
|
package/src/tts/player.ts
CHANGED
|
@@ -1,16 +1,10 @@
|
|
|
1
|
-
import Emittery from "emittery";
|
|
2
1
|
import type Source from "./source";
|
|
3
2
|
import { playAudioBuffer } from "./utils";
|
|
4
3
|
|
|
5
|
-
type PlayEventData = {
|
|
6
|
-
finish: never;
|
|
7
|
-
};
|
|
8
|
-
|
|
9
4
|
export default class Player {
|
|
10
5
|
#context: AudioContext | null = null;
|
|
11
6
|
#startNextPlaybackAt = 0;
|
|
12
7
|
#bufferDuration: number;
|
|
13
|
-
#emitter = new Emittery<PlayEventData>();
|
|
14
8
|
|
|
15
9
|
/**
|
|
16
10
|
* Create a new Player.
|
|
@@ -26,6 +20,9 @@ export default class Player {
|
|
|
26
20
|
if (!this.#context) {
|
|
27
21
|
throw new Error("AudioContext not initialized.");
|
|
28
22
|
}
|
|
23
|
+
if (buf.length === 0) {
|
|
24
|
+
return;
|
|
25
|
+
}
|
|
29
26
|
|
|
30
27
|
const startAt = this.#startNextPlaybackAt;
|
|
31
28
|
const duration = buf.length / sampleRate;
|
|
@@ -60,11 +57,9 @@ export default class Player {
|
|
|
60
57
|
|
|
61
58
|
if (read < buffer.length) {
|
|
62
59
|
// No more audio to read.
|
|
63
|
-
await this.#emitter.emit("finish");
|
|
64
60
|
break;
|
|
65
61
|
}
|
|
66
62
|
}
|
|
67
|
-
|
|
68
63
|
await Promise.all(plays);
|
|
69
64
|
}
|
|
70
65
|
|
|
@@ -107,4 +102,16 @@ export default class Player {
|
|
|
107
102
|
await this.resume();
|
|
108
103
|
}
|
|
109
104
|
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Stop the audio.
|
|
108
|
+
*
|
|
109
|
+
* @returns A promise that resolves when the audio has been stopped.
|
|
110
|
+
*/
|
|
111
|
+
async stop() {
|
|
112
|
+
if (!this.#context) {
|
|
113
|
+
throw new Error("AudioContext not initialized.");
|
|
114
|
+
}
|
|
115
|
+
await this.#context?.close();
|
|
116
|
+
}
|
|
110
117
|
}
|