@cartesia/cartesia-js 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +50 -50
- package/CHANGELOG.md +12 -0
- package/LICENSE.md +21 -0
- package/README.md +92 -19
- package/dist/{chunk-PQ6CIPFW.js → chunk-6YQ6KDIQ.js} +44 -5
- package/dist/{chunk-RO7TY474.js → chunk-BHY7MNGT.js} +11 -6
- package/dist/{chunk-F4QWVJY3.js → chunk-EDAAHENY.js} +2 -2
- package/dist/{chunk-WIFMLPT5.js → chunk-GHY2WEOK.js} +13 -0
- package/dist/{chunk-FN7BK4PS.js → chunk-IZBPLCGW.js} +97 -75
- package/dist/{chunk-JYLAM6VU.js → chunk-LZO6K34D.js} +2 -2
- package/dist/{chunk-3FL2SNIR.js → chunk-NQVZNVOU.js} +1 -1
- package/dist/{chunk-IEN4NCER.js → chunk-NVOCUUOF.js} +3 -3
- package/dist/chunk-PISCPZK4.js +40 -0
- package/dist/{chunk-SGXUEFII.js → chunk-UCYL2SOX.js} +18 -15
- package/dist/index.cjs +186 -103
- package/dist/index.d.cts +4 -4
- package/dist/index.d.ts +4 -4
- package/dist/index.js +15 -9
- package/dist/lib/client.cjs +35 -10
- package/dist/lib/client.d.cts +2 -2
- package/dist/lib/client.d.ts +2 -2
- package/dist/lib/client.js +2 -2
- package/dist/lib/constants.js +1 -1
- package/dist/lib/index.cjs +181 -102
- package/dist/lib/index.js +8 -8
- package/dist/react/index.cjs +286 -158
- package/dist/react/index.d.cts +5 -4
- package/dist/react/index.d.ts +5 -4
- package/dist/react/index.js +115 -66
- package/dist/react/utils.js +2 -2
- package/dist/tts/index.cjs +165 -89
- package/dist/tts/index.js +6 -6
- package/dist/tts/player.cjs +5 -0
- package/dist/tts/player.js +4 -3
- package/dist/tts/source.cjs +50 -4
- package/dist/tts/source.d.cts +16 -6
- package/dist/tts/source.d.ts +16 -6
- package/dist/tts/source.js +4 -2
- package/dist/tts/utils.cjs +18 -6
- package/dist/tts/utils.d.cts +7 -5
- package/dist/tts/utils.d.ts +7 -5
- package/dist/tts/utils.js +3 -2
- package/dist/tts/websocket.cjs +165 -89
- package/dist/tts/websocket.d.cts +12 -8
- package/dist/tts/websocket.d.ts +12 -8
- package/dist/tts/websocket.js +5 -5
- package/dist/types/index.d.cts +65 -5
- package/dist/types/index.d.ts +65 -5
- package/dist/voices/index.cjs +31 -23
- package/dist/voices/index.d.cts +2 -1
- package/dist/voices/index.d.ts +2 -1
- package/dist/voices/index.js +3 -3
- package/package.json +1 -1
- package/src/index.ts +2 -0
- package/src/lib/client.ts +10 -10
- package/src/react/index.ts +115 -64
- package/src/tts/source.ts +53 -7
- package/src/tts/utils.ts +26 -12
- package/src/tts/websocket.ts +42 -23
- package/src/types/index.ts +89 -4
- package/src/voices/index.ts +22 -15
- package/dist/chunk-PQ5EVEEH.js +0 -34
package/.turbo/turbo-build.log
CHANGED
|
@@ -5,71 +5,71 @@ $ tsup src/ --format cjs,esm --dts
|
|
|
5
5
|
[34mCLI[39m Target: es6
|
|
6
6
|
[34mCJS[39m Build start
|
|
7
7
|
[34mESM[39m Build start
|
|
8
|
-
[
|
|
8
|
+
[32mCJS[39m [1mdist/types/index.cjs [22m[32m764.00 B[39m
|
|
9
|
+
[32mCJS[39m [1mdist/voices/index.cjs [22m[32m5.36 KB[39m
|
|
10
|
+
[32mCJS[39m [1mdist/index.cjs [22m[32m24.67 KB[39m
|
|
11
|
+
[32mCJS[39m [1mdist/lib/client.cjs [22m[32m4.07 KB[39m
|
|
12
|
+
[32mCJS[39m [1mdist/lib/constants.cjs [22m[32m1.43 KB[39m
|
|
13
|
+
[32mCJS[39m [1mdist/lib/index.cjs [22m[32m20.57 KB[39m
|
|
14
|
+
[32mCJS[39m [1mdist/react/index.cjs [22m[32m32.23 KB[39m
|
|
15
|
+
[32mCJS[39m [1mdist/react/utils.cjs [22m[32m1.80 KB[39m
|
|
16
|
+
[32mCJS[39m [1mdist/tts/index.cjs [22m[32m18.92 KB[39m
|
|
17
|
+
[32mCJS[39m [1mdist/tts/player.cjs [22m[32m6.96 KB[39m
|
|
18
|
+
[32mCJS[39m [1mdist/tts/source.cjs [22m[32m8.15 KB[39m
|
|
19
|
+
[32mCJS[39m [1mdist/tts/utils.cjs [22m[32m4.33 KB[39m
|
|
20
|
+
[32mCJS[39m [1mdist/tts/websocket.cjs [22m[32m18.61 KB[39m
|
|
21
|
+
[32mCJS[39m ⚡️ Build success in 75ms
|
|
22
|
+
[32mESM[39m [1mdist/index.js [22m[32m499.00 B[39m
|
|
23
|
+
[32mESM[39m [1mdist/lib/constants.js [22m[32m183.00 B[39m
|
|
9
24
|
[32mESM[39m [1mdist/chunk-FXPGR372.js [22m[32m0 B[39m
|
|
10
|
-
[32mESM[39m [1mdist/
|
|
11
|
-
[32mESM[39m [1mdist/react/index.js [22m[32m6.35 KB[39m
|
|
25
|
+
[32mESM[39m [1mdist/react/index.js [22m[32m7.62 KB[39m
|
|
12
26
|
[32mESM[39m [1mdist/tts/index.js [22m[32m261.00 B[39m
|
|
13
|
-
[32mESM[39m [1mdist/lib/constants.js [22m[32m183.00 B[39m
|
|
14
27
|
[32mESM[39m [1mdist/lib/index.js [22m[32m322.00 B[39m
|
|
15
|
-
[32mESM[39m [1mdist/chunk-
|
|
16
|
-
[32mESM[39m [1mdist/chunk-
|
|
28
|
+
[32mESM[39m [1mdist/chunk-NVOCUUOF.js [22m[32m353.00 B[39m
|
|
29
|
+
[32mESM[39m [1mdist/chunk-UCYL2SOX.js [22m[32m1.48 KB[39m
|
|
17
30
|
[32mESM[39m [1mdist/react/utils.js [22m[32m109.00 B[39m
|
|
18
|
-
[32mESM[39m [1mdist/chunk-
|
|
19
|
-
[32mESM[39m [1mdist/chunk-
|
|
20
|
-
[32mESM[39m [1mdist/chunk-
|
|
21
|
-
[32mESM[39m [1mdist/chunk-
|
|
22
|
-
[32mESM[39m [1mdist/chunk-PQ5EVEEH.js [22m[32m841.00 B[39m
|
|
31
|
+
[32mESM[39m [1mdist/chunk-NQVZNVOU.js [22m[32m337.00 B[39m
|
|
32
|
+
[32mESM[39m [1mdist/chunk-EDAAHENY.js [22m[32m439.00 B[39m
|
|
33
|
+
[32mESM[39m [1mdist/chunk-IZBPLCGW.js [22m[32m7.13 KB[39m
|
|
34
|
+
[32mESM[39m [1mdist/chunk-PISCPZK4.js [22m[32m1.02 KB[39m
|
|
23
35
|
[32mESM[39m [1mdist/chunk-2BFEKY3F.js [22m[32m366.00 B[39m
|
|
24
|
-
[32mESM[39m [1mdist/tts/player.js [22m[
|
|
25
|
-
[32mESM[39m [1mdist/chunk-
|
|
26
|
-
[32mESM[39m [1mdist/chunk-
|
|
27
|
-
[32mESM[39m [1mdist/chunk-
|
|
36
|
+
[32mESM[39m [1mdist/tts/player.js [22m[32m174.00 B[39m
|
|
37
|
+
[32mESM[39m [1mdist/chunk-LZO6K34D.js [22m[32m3.76 KB[39m
|
|
38
|
+
[32mESM[39m [1mdist/chunk-BHY7MNGT.js [22m[32m2.11 KB[39m
|
|
39
|
+
[32mESM[39m [1mdist/chunk-6YQ6KDIQ.js [22m[32m5.32 KB[39m
|
|
40
|
+
[32mESM[39m [1mdist/chunk-GHY2WEOK.js [22m[32m2.70 KB[39m
|
|
41
|
+
[32mESM[39m [1mdist/tts/source.js [22m[32m144.00 B[39m
|
|
42
|
+
[32mESM[39m [1mdist/tts/utils.js [22m[32m426.00 B[39m
|
|
43
|
+
[32mESM[39m [1mdist/tts/websocket.js [22m[32m242.00 B[39m
|
|
28
44
|
[32mESM[39m [1mdist/types/index.js [22m[32m31.00 B[39m
|
|
29
45
|
[32mESM[39m [1mdist/voices/index.js [22m[32m174.00 B[39m
|
|
30
|
-
[32mESM[39m [1mdist/
|
|
31
|
-
[32mESM[39m
|
|
32
|
-
[32mESM[39m [1mdist/tts/websocket.js [22m[32m242.00 B[39m
|
|
33
|
-
[32mESM[39m ⚡️ Build success in 108ms
|
|
34
|
-
[32mCJS[39m [1mdist/index.cjs [22m[32m21.48 KB[39m
|
|
35
|
-
[32mCJS[39m [1mdist/lib/client.cjs [22m[32m3.34 KB[39m
|
|
36
|
-
[32mCJS[39m [1mdist/lib/constants.cjs [22m[32m1.43 KB[39m
|
|
37
|
-
[32mCJS[39m [1mdist/lib/index.cjs [22m[32m17.45 KB[39m
|
|
38
|
-
[32mCJS[39m [1mdist/react/index.cjs [22m[32m27.85 KB[39m
|
|
39
|
-
[32mCJS[39m [1mdist/react/utils.cjs [22m[32m1.80 KB[39m
|
|
40
|
-
[32mCJS[39m [1mdist/tts/index.cjs [22m[32m15.93 KB[39m
|
|
41
|
-
[32mCJS[39m [1mdist/tts/player.cjs [22m[32m6.87 KB[39m
|
|
42
|
-
[32mCJS[39m [1mdist/tts/source.cjs [22m[32m6.63 KB[39m
|
|
43
|
-
[32mCJS[39m [1mdist/tts/utils.cjs [22m[32m3.87 KB[39m
|
|
44
|
-
[32mCJS[39m [1mdist/tts/websocket.cjs [22m[32m15.62 KB[39m
|
|
45
|
-
[32mCJS[39m [1mdist/types/index.cjs [22m[32m764.00 B[39m
|
|
46
|
-
[32mCJS[39m [1mdist/voices/index.cjs [22m[32m5.04 KB[39m
|
|
47
|
-
[32mCJS[39m ⚡️ Build success in 111ms
|
|
46
|
+
[32mESM[39m [1mdist/lib/client.js [22m[32m132.00 B[39m
|
|
47
|
+
[32mESM[39m ⚡️ Build success in 77ms
|
|
48
48
|
[34mDTS[39m Build start
|
|
49
|
-
[32mDTS[39m ⚡️ Build success in
|
|
50
|
-
[32mDTS[39m [1mdist/index.d.cts [22m[
|
|
49
|
+
[32mDTS[39m ⚡️ Build success in 8516ms
|
|
50
|
+
[32mDTS[39m [1mdist/index.d.cts [22m[32m817.00 B[39m
|
|
51
51
|
[32mDTS[39m [1mdist/lib/constants.d.cts [22m[32m564.00 B[39m
|
|
52
52
|
[32mDTS[39m [1mdist/lib/index.d.cts [22m[32m410.00 B[39m
|
|
53
|
-
[32mDTS[39m [1mdist/react/index.d.cts [22m[
|
|
53
|
+
[32mDTS[39m [1mdist/react/index.d.cts [22m[32m1.09 KB[39m
|
|
54
54
|
[32mDTS[39m [1mdist/react/utils.d.cts [22m[32m240.00 B[39m
|
|
55
55
|
[32mDTS[39m [1mdist/tts/index.d.cts [22m[32m471.00 B[39m
|
|
56
56
|
[32mDTS[39m [1mdist/tts/player.d.cts [22m[32m1.20 KB[39m
|
|
57
|
-
[32mDTS[39m [1mdist/tts/utils.d.cts [22m[32m2.
|
|
58
|
-
[32mDTS[39m [1mdist/tts/websocket.d.cts [22m[32m2.
|
|
59
|
-
[32mDTS[39m [1mdist/tts/source.d.cts [22m[32m2.
|
|
60
|
-
[32mDTS[39m [1mdist/voices/index.d.cts [22m[
|
|
61
|
-
[32mDTS[39m [1mdist/lib/client.d.cts [22m[
|
|
62
|
-
[32mDTS[39m [1mdist/types/index.d.cts [22m[
|
|
63
|
-
[32mDTS[39m [1mdist/index.d.ts [22m[
|
|
57
|
+
[32mDTS[39m [1mdist/tts/utils.d.cts [22m[32m2.68 KB[39m
|
|
58
|
+
[32mDTS[39m [1mdist/tts/websocket.d.cts [22m[32m2.94 KB[39m
|
|
59
|
+
[32mDTS[39m [1mdist/tts/source.d.cts [22m[32m2.55 KB[39m
|
|
60
|
+
[32mDTS[39m [1mdist/voices/index.d.cts [22m[32m472.00 B[39m
|
|
61
|
+
[32mDTS[39m [1mdist/lib/client.d.cts [22m[32m293.00 B[39m
|
|
62
|
+
[32mDTS[39m [1mdist/types/index.d.cts [22m[32m3.25 KB[39m
|
|
63
|
+
[32mDTS[39m [1mdist/index.d.ts [22m[32m809.00 B[39m
|
|
64
64
|
[32mDTS[39m [1mdist/lib/constants.d.ts [22m[32m564.00 B[39m
|
|
65
65
|
[32mDTS[39m [1mdist/lib/index.d.ts [22m[32m404.00 B[39m
|
|
66
|
-
[32mDTS[39m [1mdist/react/index.d.ts [22m[
|
|
66
|
+
[32mDTS[39m [1mdist/react/index.d.ts [22m[32m1.09 KB[39m
|
|
67
67
|
[32mDTS[39m [1mdist/react/utils.d.ts [22m[32m240.00 B[39m
|
|
68
68
|
[32mDTS[39m [1mdist/tts/index.d.ts [22m[32m467.00 B[39m
|
|
69
69
|
[32mDTS[39m [1mdist/tts/player.d.ts [22m[32m1.20 KB[39m
|
|
70
|
-
[32mDTS[39m [1mdist/tts/utils.d.ts [22m[32m2.
|
|
71
|
-
[32mDTS[39m [1mdist/tts/websocket.d.ts [22m[32m2.
|
|
72
|
-
[32mDTS[39m [1mdist/tts/source.d.ts [22m[32m2.
|
|
73
|
-
[32mDTS[39m [1mdist/voices/index.d.ts [22m[
|
|
74
|
-
[32mDTS[39m [1mdist/lib/client.d.ts [22m[
|
|
75
|
-
[32mDTS[39m [1mdist/types/index.d.ts [22m[
|
|
70
|
+
[32mDTS[39m [1mdist/tts/utils.d.ts [22m[32m2.68 KB[39m
|
|
71
|
+
[32mDTS[39m [1mdist/tts/websocket.d.ts [22m[32m2.94 KB[39m
|
|
72
|
+
[32mDTS[39m [1mdist/tts/source.d.ts [22m[32m2.55 KB[39m
|
|
73
|
+
[32mDTS[39m [1mdist/voices/index.d.ts [22m[32m470.00 B[39m
|
|
74
|
+
[32mDTS[39m [1mdist/lib/client.d.ts [22m[32m292.00 B[39m
|
|
75
|
+
[32mDTS[39m [1mdist/types/index.d.ts [22m[32m3.25 KB[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# @cartesia/cartesia-js
|
|
2
2
|
|
|
3
|
+
## 1.0.2
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- Make voice creation and cloning fully compatible with API.
|
|
8
|
+
|
|
9
|
+
## 1.0.1
|
|
10
|
+
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
- cb7adc2: Introduces support for continuations, timestamps, voice control and multiple output formats. Improves typing and error handling for the package.
|
|
14
|
+
|
|
3
15
|
## 1.0.0
|
|
4
16
|
|
|
5
17
|
### Major Changes
|
package/LICENSE.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Cartesia AI, Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -5,7 +5,19 @@
|
|
|
5
5
|
|
|
6
6
|
This client provides convenient access to [Cartesia's TTS models](https://cartesia.ai/). Sonic is the fastest text-to-speech model around—it can generate a second of audio in just 650ms, and it can stream out the first audio chunk in just 135ms. Alongside Sonic, we also offer an extensive prebuilt voice library for a variety of use cases.
|
|
7
7
|
|
|
8
|
-
The
|
|
8
|
+
The JavaScript client is a thin wrapper around the Cartesia API. You can view docs for the API at [docs.cartesia.ai](https://docs.cartesia.ai/).
|
|
9
|
+
|
|
10
|
+
- [Cartesia JavaScript Client](#cartesia-javascript-client)
|
|
11
|
+
- [Installation](#installation)
|
|
12
|
+
- [Usage](#usage)
|
|
13
|
+
- [CRUD on Voices](#crud-on-voices)
|
|
14
|
+
- [TTS over WebSocket](#tts-over-websocket)
|
|
15
|
+
- [Input Streaming with Contexts](#input-streaming-with-contexts)
|
|
16
|
+
- [Timestamps](#timestamps)
|
|
17
|
+
- [Speed and emotion controls \[Alpha\]](#speed-and-emotion-controls-alpha)
|
|
18
|
+
- [Multilingual TTS \[Alpha\]](#multilingual-tts-alpha)
|
|
19
|
+
- [Playing audio in the browser](#playing-audio-in-the-browser)
|
|
20
|
+
- [React](#react)
|
|
9
21
|
|
|
10
22
|
|
|
11
23
|
## Installation
|
|
@@ -48,12 +60,6 @@ const newVoice = await cartesia.voices.create({
|
|
|
48
60
|
});
|
|
49
61
|
console.log(newVoice);
|
|
50
62
|
|
|
51
|
-
// Clone a voice from a URL.
|
|
52
|
-
const clonedVoice = await cartesia.voices.clone({
|
|
53
|
-
mode: "url",
|
|
54
|
-
url: "https://youtu.be/AdtLxlttrHg?si=07OLmDPg__0IN14f&t=6",
|
|
55
|
-
});
|
|
56
|
-
|
|
57
63
|
// Clone a voice from a file.
|
|
58
64
|
const clonedVoice = await cartesia.voices.clone({
|
|
59
65
|
mode: "clip",
|
|
@@ -70,8 +76,12 @@ const cartesia = new Cartesia({
|
|
|
70
76
|
apiKey: "your-api-key",
|
|
71
77
|
});
|
|
72
78
|
|
|
73
|
-
// Initialize the WebSocket. Make sure the
|
|
74
|
-
const websocket = cartesia.tts.websocket({
|
|
79
|
+
// Initialize the WebSocket. Make sure the output format you specify is supported.
|
|
80
|
+
const websocket = cartesia.tts.websocket({
|
|
81
|
+
container: "raw",
|
|
82
|
+
encoding: "pcm_f32le",
|
|
83
|
+
sampleRate: 44100
|
|
84
|
+
});
|
|
75
85
|
|
|
76
86
|
try {
|
|
77
87
|
await websocket.connect();
|
|
@@ -81,14 +91,13 @@ try {
|
|
|
81
91
|
|
|
82
92
|
// Create a stream.
|
|
83
93
|
const response = await websocket.send({
|
|
84
|
-
|
|
94
|
+
model_id: "sonic-english",
|
|
85
95
|
voice: {
|
|
86
|
-
mode: "
|
|
87
|
-
|
|
96
|
+
mode: "id",
|
|
97
|
+
id: "a0e99841-438c-4a64-b679-ae501e7d6091",
|
|
88
98
|
},
|
|
89
99
|
transcript: "Hello, world!"
|
|
90
100
|
// The WebSocket sets output_format on your behalf.
|
|
91
|
-
// The container is "raw" and the encoding is "pcm_f32le".
|
|
92
101
|
});
|
|
93
102
|
|
|
94
103
|
// Access the raw messages from the WebSocket.
|
|
@@ -104,9 +113,73 @@ for await (const message of response.events('message')) {
|
|
|
104
113
|
}
|
|
105
114
|
```
|
|
106
115
|
|
|
107
|
-
####
|
|
116
|
+
#### Input Streaming with Contexts
|
|
117
|
+
|
|
118
|
+
You can perform input streaming with contexts as described in the [docs](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-websocket#input-streaming-with-contexts). The WebSocket's `send` method is just a wrapper around sending a message on the WebSocket, so the request format specified the docs can be used directly.
|
|
119
|
+
|
|
120
|
+
You should use the return from the first `send` call on a context to receive outputs and events for the entire context. You can ignore the return values of subsequent `send` calls.
|
|
121
|
+
|
|
122
|
+
#### Timestamps
|
|
123
|
+
|
|
124
|
+
To receive timestamps in responses, set the `add_timestamps` field in the request object to `true`.
|
|
125
|
+
|
|
126
|
+
```js
|
|
127
|
+
const response = await websocket.send({
|
|
128
|
+
model_id: "sonic-english",
|
|
129
|
+
voice: {
|
|
130
|
+
mode: "id",
|
|
131
|
+
id: "a0e99841-438c-4a64-b679-ae501e7d6091",
|
|
132
|
+
},
|
|
133
|
+
transcript: "Hello, world!",
|
|
134
|
+
add_timestamps: true,
|
|
135
|
+
});
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
You can then listen for timestamps on the returned response object.
|
|
139
|
+
|
|
140
|
+
```js
|
|
141
|
+
response.on("timestamps", (timestamps) => {
|
|
142
|
+
console.log("Received timestamps for words:", timestamps.words);
|
|
143
|
+
console.log("Words start at:", timestamps.start);
|
|
144
|
+
console.log("Words end at:", timestamps.end);
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
// You can also access timestamps using a for-await-of loop.
|
|
148
|
+
for (await const timestamps of response.events('timestamps')) {
|
|
149
|
+
console.log("Received timestamps for words:", timestamps.words);
|
|
150
|
+
console.log("Words start at:", timestamps.start);
|
|
151
|
+
console.log("Words end at:", timestamps.end);
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
#### Speed and emotion controls [Alpha]
|
|
156
|
+
|
|
157
|
+
The API has experimental support for speed and emotion controls that is not subject to semantic versioning and is subject to change without notice. You can control the speed and emotion of the synthesized speech by setting the `speed` and `emotion` fields under `voice.__experimental_controls` in the request object.
|
|
158
|
+
|
|
159
|
+
```js
|
|
160
|
+
const response = await websocket.send({
|
|
161
|
+
model_id: "sonic-english",
|
|
162
|
+
voice: {
|
|
163
|
+
mode: "id",
|
|
164
|
+
id: "a0e99841-438c-4a64-b679-ae501e7d6091",
|
|
165
|
+
__experimental_controls: {
|
|
166
|
+
speed: "fastest",
|
|
167
|
+
emotion: ["sadness", "surprise:high"],
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
transcript: "Hello, world!",
|
|
171
|
+
});
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### Multilingual TTS [Alpha]
|
|
175
|
+
|
|
176
|
+
You can define the language of the text you want to synthesize by setting the `language` field in the request object. Make sure that you are using `model_id: "sonic-multilingual"` in the request object.
|
|
177
|
+
|
|
178
|
+
Supported languages are listed at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
|
|
179
|
+
|
|
180
|
+
### Playing audio in the browser
|
|
108
181
|
|
|
109
|
-
(The `WebPlayer` class only supports playing audio in the browser.)
|
|
182
|
+
(The `WebPlayer` class only supports playing audio in the browser and the raw PCM format with fp32le encoding.)
|
|
110
183
|
|
|
111
184
|
```js
|
|
112
185
|
// If you're using the client in the browser, you can control audio playback using our WebPlayer:
|
|
@@ -142,11 +215,11 @@ function TextToSpeech() {
|
|
|
142
215
|
const handlePlay = async () => {
|
|
143
216
|
// Begin buffering the audio.
|
|
144
217
|
const response = await tts.buffer({
|
|
145
|
-
model_id: "
|
|
218
|
+
model_id: "sonic-english",
|
|
146
219
|
voice: {
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
220
|
+
mode: "id",
|
|
221
|
+
id: "a0e99841-438c-4a64-b679-ae501e7d6091",
|
|
222
|
+
},
|
|
150
223
|
transcript: text,
|
|
151
224
|
});
|
|
152
225
|
|
|
@@ -2,12 +2,19 @@ import {
|
|
|
2
2
|
__async,
|
|
3
3
|
__privateAdd,
|
|
4
4
|
__privateGet,
|
|
5
|
+
__privateMethod,
|
|
5
6
|
__privateSet
|
|
6
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-GHY2WEOK.js";
|
|
7
8
|
|
|
8
9
|
// src/tts/source.ts
|
|
9
10
|
import Emittery from "emittery";
|
|
10
|
-
var
|
|
11
|
+
var ENCODING_MAP = {
|
|
12
|
+
pcm_f32le: { arrayType: Float32Array, bytesPerElement: 4 },
|
|
13
|
+
pcm_s16le: { arrayType: Int16Array, bytesPerElement: 2 },
|
|
14
|
+
pcm_alaw: { arrayType: Uint8Array, bytesPerElement: 1 },
|
|
15
|
+
pcm_mulaw: { arrayType: Uint8Array, bytesPerElement: 1 }
|
|
16
|
+
};
|
|
17
|
+
var _emitter, _buffer, _readIndex, _writeIndex, _closed, _sampleRate, _encoding, _container, _createBuffer, createBuffer_fn;
|
|
11
18
|
var Source = class {
|
|
12
19
|
/**
|
|
13
20
|
* Create a new Source.
|
|
@@ -15,23 +22,44 @@ var Source = class {
|
|
|
15
22
|
* @param options - Options for the Source.
|
|
16
23
|
* @param options.sampleRate - The sample rate of the audio.
|
|
17
24
|
*/
|
|
18
|
-
constructor({
|
|
25
|
+
constructor({
|
|
26
|
+
sampleRate,
|
|
27
|
+
encoding,
|
|
28
|
+
container
|
|
29
|
+
}) {
|
|
30
|
+
/**
|
|
31
|
+
* Create a new buffer for the source.
|
|
32
|
+
*
|
|
33
|
+
* @param size - The size of the buffer to create.
|
|
34
|
+
* @returns The new buffer as a TypedArray based on the encoding.
|
|
35
|
+
*/
|
|
36
|
+
__privateAdd(this, _createBuffer);
|
|
19
37
|
__privateAdd(this, _emitter, new Emittery());
|
|
20
38
|
__privateAdd(this, _buffer, void 0);
|
|
21
39
|
__privateAdd(this, _readIndex, 0);
|
|
22
40
|
__privateAdd(this, _writeIndex, 0);
|
|
23
41
|
__privateAdd(this, _closed, false);
|
|
24
42
|
__privateAdd(this, _sampleRate, void 0);
|
|
43
|
+
__privateAdd(this, _encoding, void 0);
|
|
44
|
+
__privateAdd(this, _container, void 0);
|
|
25
45
|
this.on = __privateGet(this, _emitter).on.bind(__privateGet(this, _emitter));
|
|
26
46
|
this.once = __privateGet(this, _emitter).once.bind(__privateGet(this, _emitter));
|
|
27
47
|
this.events = __privateGet(this, _emitter).events.bind(__privateGet(this, _emitter));
|
|
28
48
|
this.off = __privateGet(this, _emitter).off.bind(__privateGet(this, _emitter));
|
|
29
49
|
__privateSet(this, _sampleRate, sampleRate);
|
|
30
|
-
__privateSet(this,
|
|
50
|
+
__privateSet(this, _encoding, encoding);
|
|
51
|
+
__privateSet(this, _container, container);
|
|
52
|
+
__privateSet(this, _buffer, __privateMethod(this, _createBuffer, createBuffer_fn).call(this, 1024));
|
|
31
53
|
}
|
|
32
54
|
get sampleRate() {
|
|
33
55
|
return __privateGet(this, _sampleRate);
|
|
34
56
|
}
|
|
57
|
+
get encoding() {
|
|
58
|
+
return __privateGet(this, _encoding);
|
|
59
|
+
}
|
|
60
|
+
get container() {
|
|
61
|
+
return __privateGet(this, _container);
|
|
62
|
+
}
|
|
35
63
|
/**
|
|
36
64
|
* Append audio to the buffer.
|
|
37
65
|
*
|
|
@@ -45,7 +73,7 @@ var Source = class {
|
|
|
45
73
|
while (newCapacity < requiredCapacity) {
|
|
46
74
|
newCapacity *= 2;
|
|
47
75
|
}
|
|
48
|
-
const newBuffer =
|
|
76
|
+
const newBuffer = __privateMethod(this, _createBuffer, createBuffer_fn).call(this, newCapacity);
|
|
49
77
|
newBuffer.set(__privateGet(this, _buffer));
|
|
50
78
|
__privateSet(this, _buffer, newBuffer);
|
|
51
79
|
}
|
|
@@ -93,6 +121,9 @@ var Source = class {
|
|
|
93
121
|
get readIndex() {
|
|
94
122
|
return __privateGet(this, _readIndex);
|
|
95
123
|
}
|
|
124
|
+
get writeIndex() {
|
|
125
|
+
return __privateGet(this, _writeIndex);
|
|
126
|
+
}
|
|
96
127
|
/**
|
|
97
128
|
* Close the source. This signals that no more audio will be enqueued.
|
|
98
129
|
*
|
|
@@ -114,7 +145,15 @@ _readIndex = new WeakMap();
|
|
|
114
145
|
_writeIndex = new WeakMap();
|
|
115
146
|
_closed = new WeakMap();
|
|
116
147
|
_sampleRate = new WeakMap();
|
|
148
|
+
_encoding = new WeakMap();
|
|
149
|
+
_container = new WeakMap();
|
|
150
|
+
_createBuffer = new WeakSet();
|
|
151
|
+
createBuffer_fn = function(size) {
|
|
152
|
+
const { arrayType: ArrayType } = ENCODING_MAP[__privateGet(this, _encoding)];
|
|
153
|
+
return new ArrayType(size);
|
|
154
|
+
};
|
|
117
155
|
|
|
118
156
|
export {
|
|
157
|
+
ENCODING_MAP,
|
|
119
158
|
Source
|
|
120
159
|
};
|
|
@@ -1,15 +1,20 @@
|
|
|
1
|
+
import {
|
|
2
|
+
ENCODING_MAP
|
|
3
|
+
} from "./chunk-6YQ6KDIQ.js";
|
|
4
|
+
|
|
1
5
|
// src/tts/utils.ts
|
|
2
6
|
import base64 from "base64-js";
|
|
3
|
-
function base64ToArray(b64) {
|
|
7
|
+
function base64ToArray(b64, encoding) {
|
|
4
8
|
const byteArrays = filterSentinel(b64).map((b) => base64.toByteArray(b));
|
|
9
|
+
const { arrayType: ArrayType, bytesPerElement } = ENCODING_MAP[encoding];
|
|
5
10
|
const totalLength = byteArrays.reduce(
|
|
6
|
-
(acc, arr) => acc + arr.length /
|
|
11
|
+
(acc, arr) => acc + arr.length / bytesPerElement,
|
|
7
12
|
0
|
|
8
13
|
);
|
|
9
|
-
const result = new
|
|
14
|
+
const result = new ArrayType(totalLength);
|
|
10
15
|
let offset = 0;
|
|
11
16
|
for (const arr of byteArrays) {
|
|
12
|
-
const floats = new
|
|
17
|
+
const floats = new ArrayType(arr.buffer);
|
|
13
18
|
result.set(floats, offset);
|
|
14
19
|
offset += floats.length;
|
|
15
20
|
}
|
|
@@ -40,10 +45,10 @@ function createMessageHandlerForContextId(contextId, handler) {
|
|
|
40
45
|
let chunk;
|
|
41
46
|
if (message.done) {
|
|
42
47
|
chunk = getSentinel();
|
|
43
|
-
} else {
|
|
48
|
+
} else if (message.type === "chunk") {
|
|
44
49
|
chunk = message.data;
|
|
45
50
|
}
|
|
46
|
-
handler({ chunk, message: event.data });
|
|
51
|
+
handler({ chunk, message: event.data, data: message });
|
|
47
52
|
};
|
|
48
53
|
}
|
|
49
54
|
function getSentinel() {
|
|
@@ -17,6 +17,18 @@ var __spreadValues = (a, b) => {
|
|
|
17
17
|
return a;
|
|
18
18
|
};
|
|
19
19
|
var __spreadProps = (a, b) => __defProps(a, __getOwnPropDescs(b));
|
|
20
|
+
var __objRest = (source, exclude) => {
|
|
21
|
+
var target = {};
|
|
22
|
+
for (var prop in source)
|
|
23
|
+
if (__hasOwnProp.call(source, prop) && exclude.indexOf(prop) < 0)
|
|
24
|
+
target[prop] = source[prop];
|
|
25
|
+
if (source != null && __getOwnPropSymbols)
|
|
26
|
+
for (var prop of __getOwnPropSymbols(source)) {
|
|
27
|
+
if (exclude.indexOf(prop) < 0 && __propIsEnum.call(source, prop))
|
|
28
|
+
target[prop] = source[prop];
|
|
29
|
+
}
|
|
30
|
+
return target;
|
|
31
|
+
};
|
|
20
32
|
var __accessCheck = (obj, member, msg) => {
|
|
21
33
|
if (!member.has(obj))
|
|
22
34
|
throw TypeError("Cannot " + msg);
|
|
@@ -63,6 +75,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
63
75
|
export {
|
|
64
76
|
__spreadValues,
|
|
65
77
|
__spreadProps,
|
|
78
|
+
__objRest,
|
|
66
79
|
__privateGet,
|
|
67
80
|
__privateAdd,
|
|
68
81
|
__privateSet,
|