cartesia 1.0.13__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cartesia-1.0.13/cartesia.egg-info → cartesia-1.1.0}/PKG-INFO +41 -24
- cartesia-1.0.13/PKG-INFO → cartesia-1.1.0/README.md +34 -30
- cartesia-1.1.0/cartesia/__init__.py +4 -0
- cartesia-1.1.0/cartesia/_async_sse.py +95 -0
- cartesia-1.1.0/cartesia/_async_websocket.py +313 -0
- cartesia-1.1.0/cartesia/_constants.py +10 -0
- cartesia-1.1.0/cartesia/_logger.py +3 -0
- cartesia-1.1.0/cartesia/_sse.py +143 -0
- {cartesia-1.0.13 → cartesia-1.1.0}/cartesia/_types.py +3 -2
- cartesia-1.1.0/cartesia/_websocket.py +355 -0
- cartesia-1.1.0/cartesia/async_client.py +82 -0
- cartesia-1.1.0/cartesia/async_tts.py +63 -0
- cartesia-1.1.0/cartesia/client.py +69 -0
- cartesia-1.1.0/cartesia/resource.py +44 -0
- cartesia-1.1.0/cartesia/tts.py +146 -0
- cartesia-1.1.0/cartesia/utils/tts.py +74 -0
- cartesia-1.1.0/cartesia/version.py +1 -0
- cartesia-1.1.0/cartesia/voices.py +170 -0
- cartesia-1.0.13/README.md → cartesia-1.1.0/cartesia.egg-info/PKG-INFO +47 -13
- {cartesia-1.0.13 → cartesia-1.1.0}/cartesia.egg-info/SOURCES.txt +12 -1
- cartesia-1.1.0/cartesia.egg-info/requires.txt +5 -0
- cartesia-1.1.0/pyproject.toml +84 -0
- {cartesia-1.0.13 → cartesia-1.1.0}/tests/test_tts.py +129 -30
- cartesia-1.0.13/cartesia/__init__.py +0 -3
- cartesia-1.0.13/cartesia/client.py +0 -1393
- cartesia-1.0.13/cartesia/version.py +0 -1
- cartesia-1.0.13/cartesia.egg-info/requires.txt +0 -22
- cartesia-1.0.13/pyproject.toml +0 -56
- cartesia-1.0.13/setup.py +0 -292
- {cartesia-1.0.13 → cartesia-1.1.0}/LICENSE.md +0 -0
- {cartesia-1.0.13 → cartesia-1.1.0}/cartesia/utils/__init__.py +0 -0
- {cartesia-1.0.13 → cartesia-1.1.0}/cartesia/utils/deprecated.py +0 -0
- {cartesia-1.0.13 → cartesia-1.1.0}/cartesia/utils/retry.py +0 -0
- {cartesia-1.0.13 → cartesia-1.1.0}/cartesia.egg-info/dependency_links.txt +0 -0
- {cartesia-1.0.13 → cartesia-1.1.0}/cartesia.egg-info/top_level.txt +0 -0
- {cartesia-1.0.13 → cartesia-1.1.0}/setup.cfg +0 -0
- {cartesia-1.0.13 → cartesia-1.1.0}/tests/test_deprecated.py +0 -0
@@ -1,35 +1,32 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cartesia
|
3
|
-
Version: 1.0
|
3
|
+
Version: 1.1.0
|
4
4
|
Summary: The official Python library for the Cartesia API.
|
5
|
-
|
6
|
-
Author: Cartesia, Inc.
|
7
|
-
Author-email: support@cartesia.ai
|
8
|
-
Classifier: Programming Language :: Python
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
10
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
11
|
-
Requires-Python: >=3.8.0
|
5
|
+
Requires-Python: >=3.9
|
12
6
|
Description-Content-Type: text/markdown
|
13
|
-
Provides-Extra: dev
|
14
|
-
Provides-Extra: all
|
15
7
|
License-File: LICENSE.md
|
16
|
-
|
8
|
+
Requires-Dist: aiohttp>=3.10.10
|
9
|
+
Requires-Dist: httpx>=0.27.2
|
10
|
+
Requires-Dist: iterators>=0.2.0
|
11
|
+
Requires-Dist: requests>=2.32.3
|
12
|
+
Requires-Dist: websockets>=13.1
|
17
13
|
|
18
14
|
# Cartesia Python API Library
|
19
15
|
|
20
16
|

|
21
|
-
[](https://discord.gg/
|
17
|
+
[](https://discord.gg/cartesia)
|
22
18
|
|
23
19
|
The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
|
24
20
|
|
25
21
|
> [!IMPORTANT]
|
26
|
-
> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/
|
22
|
+
> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/cartesia) for any support requests!
|
27
23
|
|
28
24
|
- [Cartesia Python API Library](#cartesia-python-api-library)
|
29
25
|
- [Documentation](#documentation)
|
30
26
|
- [Installation](#installation)
|
31
27
|
- [Voices](#voices)
|
32
28
|
- [Text-to-Speech](#text-to-speech)
|
29
|
+
- [Bytes](#bytes)
|
33
30
|
- [Server-Sent Events (SSE)](#server-sent-events-sse)
|
34
31
|
- [WebSocket](#websocket)
|
35
32
|
- [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
|
@@ -88,6 +85,30 @@ new_voice = client.voices.create(
|
|
88
85
|
|
89
86
|
## Text-to-Speech
|
90
87
|
|
88
|
+
### Bytes
|
89
|
+
|
90
|
+
```python
|
91
|
+
from cartesia import Cartesia
|
92
|
+
import os
|
93
|
+
|
94
|
+
client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
|
95
|
+
|
96
|
+
data = client.tts.bytes(
|
97
|
+
model_id="sonic-english",
|
98
|
+
transcript="Hello, world! I'm generating audio on Cartesia.",
|
99
|
+
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
|
100
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/tts/bytes
|
101
|
+
output_format={
|
102
|
+
"container": "wav",
|
103
|
+
"encoding": "pcm_f32le",
|
104
|
+
"sample_rate": 44100,
|
105
|
+
},
|
106
|
+
)
|
107
|
+
|
108
|
+
with open("output.wav", "wb") as f:
|
109
|
+
f.write(data)
|
110
|
+
```
|
111
|
+
|
91
112
|
### Server-Sent Events (SSE)
|
92
113
|
|
93
114
|
```python
|
@@ -96,7 +117,6 @@ import pyaudio
|
|
96
117
|
import os
|
97
118
|
|
98
119
|
client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
|
99
|
-
voice_name = "Barbershop Man"
|
100
120
|
voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
|
101
121
|
voice = client.voices.get(id=voice_id)
|
102
122
|
|
@@ -105,7 +125,7 @@ transcript = "Hello! Welcome to Cartesia"
|
|
105
125
|
# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
|
106
126
|
model_id = "sonic-english"
|
107
127
|
|
108
|
-
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/
|
128
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
|
109
129
|
output_format = {
|
110
130
|
"container": "raw",
|
111
131
|
"encoding": "pcm_f32le",
|
@@ -149,14 +169,13 @@ import os
|
|
149
169
|
|
150
170
|
async def write_stream():
|
151
171
|
client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
|
152
|
-
voice_name = "Barbershop Man"
|
153
172
|
voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
|
154
173
|
voice = client.voices.get(id=voice_id)
|
155
174
|
transcript = "Hello! Welcome to Cartesia"
|
156
175
|
# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
|
157
176
|
model_id = "sonic-english"
|
158
177
|
|
159
|
-
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/
|
178
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
|
160
179
|
output_format = {
|
161
180
|
"container": "raw",
|
162
181
|
"encoding": "pcm_f32le",
|
@@ -203,7 +222,6 @@ import pyaudio
|
|
203
222
|
import os
|
204
223
|
|
205
224
|
client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
|
206
|
-
voice_name = "Barbershop Man"
|
207
225
|
voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
|
208
226
|
voice = client.voices.get(id=voice_id)
|
209
227
|
transcript = "Hello! Welcome to Cartesia"
|
@@ -211,7 +229,7 @@ transcript = "Hello! Welcome to Cartesia"
|
|
211
229
|
# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
|
212
230
|
model_id = "sonic-english"
|
213
231
|
|
214
|
-
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/
|
232
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
|
215
233
|
output_format = {
|
216
234
|
"container": "raw",
|
217
235
|
"encoding": "pcm_f32le",
|
@@ -272,7 +290,7 @@ async def send_transcripts(ctx):
|
|
272
290
|
# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
|
273
291
|
model_id = "sonic-english"
|
274
292
|
|
275
|
-
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/
|
293
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
|
276
294
|
output_format = {
|
277
295
|
"container": "raw",
|
278
296
|
"encoding": "pcm_f32le",
|
@@ -380,7 +398,7 @@ voice_id = "87748186-23bb-4158-a1eb-332911b0b708"
|
|
380
398
|
# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
|
381
399
|
model_id = "sonic-english"
|
382
400
|
|
383
|
-
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/
|
401
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
|
384
402
|
output_format = {
|
385
403
|
"container": "raw",
|
386
404
|
"encoding": "pcm_f32le",
|
@@ -460,7 +478,6 @@ import pyaudio
|
|
460
478
|
import os
|
461
479
|
|
462
480
|
client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
|
463
|
-
voice_name = "Barbershop Man"
|
464
481
|
voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
|
465
482
|
voice = client.voices.get(id=voice_id)
|
466
483
|
|
@@ -470,7 +487,7 @@ language = "es" # Language code corresponding to the language of the transcript
|
|
470
487
|
# Make sure you use the multilingual model! You can check out all models at https://docs.cartesia.ai/getting-started/available-models
|
471
488
|
model_id = "sonic-multilingual"
|
472
489
|
|
473
|
-
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/
|
490
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
|
474
491
|
output_format = {
|
475
492
|
"container": "raw",
|
476
493
|
"encoding": "pcm_f32le",
|
@@ -623,7 +640,7 @@ display(audio)
|
|
623
640
|
|
624
641
|
#### Output Formats
|
625
642
|
|
626
|
-
You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/
|
643
|
+
You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events).
|
627
644
|
|
628
645
|
The previously used `output_format` strings are now deprecated and will be removed in v1.2.0. These are listed in the `DeprecatedOutputFormatMapping` class in `cartesia._types`.
|
629
646
|
|
@@ -1,35 +1,19 @@
|
|
1
|
-
Metadata-Version: 2.1
|
2
|
-
Name: cartesia
|
3
|
-
Version: 1.0.13
|
4
|
-
Summary: The official Python library for the Cartesia API.
|
5
|
-
Home-page:
|
6
|
-
Author: Cartesia, Inc.
|
7
|
-
Author-email: support@cartesia.ai
|
8
|
-
Classifier: Programming Language :: Python
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
10
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
11
|
-
Requires-Python: >=3.8.0
|
12
|
-
Description-Content-Type: text/markdown
|
13
|
-
Provides-Extra: dev
|
14
|
-
Provides-Extra: all
|
15
|
-
License-File: LICENSE.md
|
16
|
-
|
17
|
-
|
18
1
|
# Cartesia Python API Library
|
19
2
|
|
20
3
|

|
21
|
-
[](https://discord.gg/
|
4
|
+
[](https://discord.gg/cartesia)
|
22
5
|
|
23
6
|
The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
|
24
7
|
|
25
8
|
> [!IMPORTANT]
|
26
|
-
> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/
|
9
|
+
> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/cartesia) for any support requests!
|
27
10
|
|
28
11
|
- [Cartesia Python API Library](#cartesia-python-api-library)
|
29
12
|
- [Documentation](#documentation)
|
30
13
|
- [Installation](#installation)
|
31
14
|
- [Voices](#voices)
|
32
15
|
- [Text-to-Speech](#text-to-speech)
|
16
|
+
- [Bytes](#bytes)
|
33
17
|
- [Server-Sent Events (SSE)](#server-sent-events-sse)
|
34
18
|
- [WebSocket](#websocket)
|
35
19
|
- [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
|
@@ -88,6 +72,30 @@ new_voice = client.voices.create(
|
|
88
72
|
|
89
73
|
## Text-to-Speech
|
90
74
|
|
75
|
+
### Bytes
|
76
|
+
|
77
|
+
```python
|
78
|
+
from cartesia import Cartesia
|
79
|
+
import os
|
80
|
+
|
81
|
+
client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
|
82
|
+
|
83
|
+
data = client.tts.bytes(
|
84
|
+
model_id="sonic-english",
|
85
|
+
transcript="Hello, world! I'm generating audio on Cartesia.",
|
86
|
+
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
|
87
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/tts/bytes
|
88
|
+
output_format={
|
89
|
+
"container": "wav",
|
90
|
+
"encoding": "pcm_f32le",
|
91
|
+
"sample_rate": 44100,
|
92
|
+
},
|
93
|
+
)
|
94
|
+
|
95
|
+
with open("output.wav", "wb") as f:
|
96
|
+
f.write(data)
|
97
|
+
```
|
98
|
+
|
91
99
|
### Server-Sent Events (SSE)
|
92
100
|
|
93
101
|
```python
|
@@ -96,7 +104,6 @@ import pyaudio
|
|
96
104
|
import os
|
97
105
|
|
98
106
|
client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
|
99
|
-
voice_name = "Barbershop Man"
|
100
107
|
voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
|
101
108
|
voice = client.voices.get(id=voice_id)
|
102
109
|
|
@@ -105,7 +112,7 @@ transcript = "Hello! Welcome to Cartesia"
|
|
105
112
|
# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
|
106
113
|
model_id = "sonic-english"
|
107
114
|
|
108
|
-
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/
|
115
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
|
109
116
|
output_format = {
|
110
117
|
"container": "raw",
|
111
118
|
"encoding": "pcm_f32le",
|
@@ -149,14 +156,13 @@ import os
|
|
149
156
|
|
150
157
|
async def write_stream():
|
151
158
|
client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
|
152
|
-
voice_name = "Barbershop Man"
|
153
159
|
voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
|
154
160
|
voice = client.voices.get(id=voice_id)
|
155
161
|
transcript = "Hello! Welcome to Cartesia"
|
156
162
|
# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
|
157
163
|
model_id = "sonic-english"
|
158
164
|
|
159
|
-
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/
|
165
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
|
160
166
|
output_format = {
|
161
167
|
"container": "raw",
|
162
168
|
"encoding": "pcm_f32le",
|
@@ -203,7 +209,6 @@ import pyaudio
|
|
203
209
|
import os
|
204
210
|
|
205
211
|
client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
|
206
|
-
voice_name = "Barbershop Man"
|
207
212
|
voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
|
208
213
|
voice = client.voices.get(id=voice_id)
|
209
214
|
transcript = "Hello! Welcome to Cartesia"
|
@@ -211,7 +216,7 @@ transcript = "Hello! Welcome to Cartesia"
|
|
211
216
|
# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
|
212
217
|
model_id = "sonic-english"
|
213
218
|
|
214
|
-
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/
|
219
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
|
215
220
|
output_format = {
|
216
221
|
"container": "raw",
|
217
222
|
"encoding": "pcm_f32le",
|
@@ -272,7 +277,7 @@ async def send_transcripts(ctx):
|
|
272
277
|
# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
|
273
278
|
model_id = "sonic-english"
|
274
279
|
|
275
|
-
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/
|
280
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
|
276
281
|
output_format = {
|
277
282
|
"container": "raw",
|
278
283
|
"encoding": "pcm_f32le",
|
@@ -380,7 +385,7 @@ voice_id = "87748186-23bb-4158-a1eb-332911b0b708"
|
|
380
385
|
# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
|
381
386
|
model_id = "sonic-english"
|
382
387
|
|
383
|
-
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/
|
388
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
|
384
389
|
output_format = {
|
385
390
|
"container": "raw",
|
386
391
|
"encoding": "pcm_f32le",
|
@@ -460,7 +465,6 @@ import pyaudio
|
|
460
465
|
import os
|
461
466
|
|
462
467
|
client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
|
463
|
-
voice_name = "Barbershop Man"
|
464
468
|
voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
|
465
469
|
voice = client.voices.get(id=voice_id)
|
466
470
|
|
@@ -470,7 +474,7 @@ language = "es" # Language code corresponding to the language of the transcript
|
|
470
474
|
# Make sure you use the multilingual model! You can check out all models at https://docs.cartesia.ai/getting-started/available-models
|
471
475
|
model_id = "sonic-multilingual"
|
472
476
|
|
473
|
-
# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/
|
477
|
+
# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
|
474
478
|
output_format = {
|
475
479
|
"container": "raw",
|
476
480
|
"encoding": "pcm_f32le",
|
@@ -623,7 +627,7 @@ display(audio)
|
|
623
627
|
|
624
628
|
#### Output Formats
|
625
629
|
|
626
|
-
You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/
|
630
|
+
You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events).
|
627
631
|
|
628
632
|
The previously used `output_format` strings are now deprecated and will be removed in v1.2.0. These are listed in the `DeprecatedOutputFormatMapping` class in `cartesia._types`.
|
629
633
|
|
@@ -0,0 +1,95 @@
|
|
1
|
+
import base64
|
2
|
+
import json
|
3
|
+
from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Union
|
4
|
+
|
5
|
+
import aiohttp
|
6
|
+
|
7
|
+
from cartesia._constants import BACKOFF_FACTOR, MAX_RETRIES
|
8
|
+
from cartesia._logger import logger
|
9
|
+
from cartesia._sse import _SSE
|
10
|
+
from cartesia._types import OutputFormat, VoiceControls
|
11
|
+
from cartesia.utils.retry import retry_on_connection_error_async
|
12
|
+
from cartesia.utils.tts import _construct_tts_request
|
13
|
+
|
14
|
+
|
15
|
+
class _AsyncSSE(_SSE):
|
16
|
+
"""This class contains methods to generate audio using Server-Sent Events asynchronously."""
|
17
|
+
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
http_url: str,
|
21
|
+
headers: Dict[str, str],
|
22
|
+
timeout: float,
|
23
|
+
get_session: Callable[[], Optional[aiohttp.ClientSession]],
|
24
|
+
):
|
25
|
+
super().__init__(http_url, headers, timeout)
|
26
|
+
self._get_session = get_session
|
27
|
+
|
28
|
+
async def send(
|
29
|
+
self,
|
30
|
+
model_id: str,
|
31
|
+
transcript: str,
|
32
|
+
output_format: OutputFormat,
|
33
|
+
voice_id: Optional[str] = None,
|
34
|
+
voice_embedding: Optional[List[float]] = None,
|
35
|
+
duration: Optional[int] = None,
|
36
|
+
language: Optional[str] = None,
|
37
|
+
stream: bool = True,
|
38
|
+
_experimental_voice_controls: Optional[VoiceControls] = None,
|
39
|
+
) -> Union[bytes, AsyncGenerator[bytes, None]]:
|
40
|
+
request_body = _construct_tts_request(
|
41
|
+
model_id=model_id,
|
42
|
+
transcript=transcript,
|
43
|
+
output_format=output_format,
|
44
|
+
voice_id=voice_id,
|
45
|
+
voice_embedding=voice_embedding,
|
46
|
+
duration=duration,
|
47
|
+
language=language,
|
48
|
+
_experimental_voice_controls=_experimental_voice_controls,
|
49
|
+
)
|
50
|
+
|
51
|
+
generator = self._sse_generator_wrapper(request_body)
|
52
|
+
|
53
|
+
if stream:
|
54
|
+
return generator
|
55
|
+
|
56
|
+
chunks = []
|
57
|
+
async for chunk in generator:
|
58
|
+
chunks.append(chunk["audio"])
|
59
|
+
|
60
|
+
return {"audio": b"".join(chunks)}
|
61
|
+
|
62
|
+
@retry_on_connection_error_async(
|
63
|
+
max_retries=MAX_RETRIES, backoff_factor=BACKOFF_FACTOR, logger=logger
|
64
|
+
)
|
65
|
+
async def _sse_generator_wrapper(self, request_body: Dict[str, Any]):
|
66
|
+
"""Need to wrap the sse generator in a function for the retry decorator to work."""
|
67
|
+
try:
|
68
|
+
async for chunk in self._sse_generator(request_body):
|
69
|
+
yield chunk
|
70
|
+
except Exception as e:
|
71
|
+
raise RuntimeError(f"Error generating audio. {e}")
|
72
|
+
|
73
|
+
async def _sse_generator(self, request_body: Dict[str, Any]):
|
74
|
+
session = await self._get_session()
|
75
|
+
async with session.post(
|
76
|
+
f"{self.http_url}/tts/sse",
|
77
|
+
data=json.dumps(request_body),
|
78
|
+
headers=self.headers,
|
79
|
+
) as response:
|
80
|
+
if not response.ok:
|
81
|
+
raise ValueError(f"Failed to generate audio. {await response.text()}")
|
82
|
+
|
83
|
+
buffer = ""
|
84
|
+
async for chunk_bytes in response.content.iter_any():
|
85
|
+
buffer, outputs = self._update_buffer(buffer=buffer, chunk_bytes=chunk_bytes)
|
86
|
+
for output in outputs:
|
87
|
+
yield output
|
88
|
+
|
89
|
+
if buffer:
|
90
|
+
try:
|
91
|
+
chunk_json = json.loads(buffer)
|
92
|
+
audio = base64.b64decode(chunk_json["data"])
|
93
|
+
yield {"audio": audio}
|
94
|
+
except json.JSONDecodeError:
|
95
|
+
pass
|