cartesia 1.0.13__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {cartesia-1.0.13/cartesia.egg-info → cartesia-1.1.0}/PKG-INFO +41 -24
  2. cartesia-1.0.13/PKG-INFO → cartesia-1.1.0/README.md +34 -30
  3. cartesia-1.1.0/cartesia/__init__.py +4 -0
  4. cartesia-1.1.0/cartesia/_async_sse.py +95 -0
  5. cartesia-1.1.0/cartesia/_async_websocket.py +313 -0
  6. cartesia-1.1.0/cartesia/_constants.py +10 -0
  7. cartesia-1.1.0/cartesia/_logger.py +3 -0
  8. cartesia-1.1.0/cartesia/_sse.py +143 -0
  9. {cartesia-1.0.13 → cartesia-1.1.0}/cartesia/_types.py +3 -2
  10. cartesia-1.1.0/cartesia/_websocket.py +355 -0
  11. cartesia-1.1.0/cartesia/async_client.py +82 -0
  12. cartesia-1.1.0/cartesia/async_tts.py +63 -0
  13. cartesia-1.1.0/cartesia/client.py +69 -0
  14. cartesia-1.1.0/cartesia/resource.py +44 -0
  15. cartesia-1.1.0/cartesia/tts.py +146 -0
  16. cartesia-1.1.0/cartesia/utils/tts.py +74 -0
  17. cartesia-1.1.0/cartesia/version.py +1 -0
  18. cartesia-1.1.0/cartesia/voices.py +170 -0
  19. cartesia-1.0.13/README.md → cartesia-1.1.0/cartesia.egg-info/PKG-INFO +47 -13
  20. {cartesia-1.0.13 → cartesia-1.1.0}/cartesia.egg-info/SOURCES.txt +12 -1
  21. cartesia-1.1.0/cartesia.egg-info/requires.txt +5 -0
  22. cartesia-1.1.0/pyproject.toml +84 -0
  23. {cartesia-1.0.13 → cartesia-1.1.0}/tests/test_tts.py +129 -30
  24. cartesia-1.0.13/cartesia/__init__.py +0 -3
  25. cartesia-1.0.13/cartesia/client.py +0 -1393
  26. cartesia-1.0.13/cartesia/version.py +0 -1
  27. cartesia-1.0.13/cartesia.egg-info/requires.txt +0 -22
  28. cartesia-1.0.13/pyproject.toml +0 -56
  29. cartesia-1.0.13/setup.py +0 -292
  30. {cartesia-1.0.13 → cartesia-1.1.0}/LICENSE.md +0 -0
  31. {cartesia-1.0.13 → cartesia-1.1.0}/cartesia/utils/__init__.py +0 -0
  32. {cartesia-1.0.13 → cartesia-1.1.0}/cartesia/utils/deprecated.py +0 -0
  33. {cartesia-1.0.13 → cartesia-1.1.0}/cartesia/utils/retry.py +0 -0
  34. {cartesia-1.0.13 → cartesia-1.1.0}/cartesia.egg-info/dependency_links.txt +0 -0
  35. {cartesia-1.0.13 → cartesia-1.1.0}/cartesia.egg-info/top_level.txt +0 -0
  36. {cartesia-1.0.13 → cartesia-1.1.0}/setup.cfg +0 -0
  37. {cartesia-1.0.13 → cartesia-1.1.0}/tests/test_deprecated.py +0 -0
@@ -1,35 +1,32 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 1.0.13
3
+ Version: 1.1.0
4
4
  Summary: The official Python library for the Cartesia API.
5
- Home-page:
6
- Author: Cartesia, Inc.
7
- Author-email: support@cartesia.ai
8
- Classifier: Programming Language :: Python
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
- Requires-Python: >=3.8.0
5
+ Requires-Python: >=3.9
12
6
  Description-Content-Type: text/markdown
13
- Provides-Extra: dev
14
- Provides-Extra: all
15
7
  License-File: LICENSE.md
16
-
8
+ Requires-Dist: aiohttp>=3.10.10
9
+ Requires-Dist: httpx>=0.27.2
10
+ Requires-Dist: iterators>=0.2.0
11
+ Requires-Dist: requests>=2.32.3
12
+ Requires-Dist: websockets>=13.1
17
13
 
18
14
  # Cartesia Python API Library
19
15
 
20
16
  ![PyPI - Version](https://img.shields.io/pypi/v/cartesia)
21
- [![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/ZVxavqHB9X)
17
+ [![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/cartesia)
22
18
 
23
19
  The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
24
20
 
25
21
  > [!IMPORTANT]
26
- > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
22
+ > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/cartesia) for any support requests!
27
23
 
28
24
  - [Cartesia Python API Library](#cartesia-python-api-library)
29
25
  - [Documentation](#documentation)
30
26
  - [Installation](#installation)
31
27
  - [Voices](#voices)
32
28
  - [Text-to-Speech](#text-to-speech)
29
+ - [Bytes](#bytes)
33
30
  - [Server-Sent Events (SSE)](#server-sent-events-sse)
34
31
  - [WebSocket](#websocket)
35
32
  - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
@@ -88,6 +85,30 @@ new_voice = client.voices.create(
88
85
 
89
86
  ## Text-to-Speech
90
87
 
88
+ ### Bytes
89
+
90
+ ```python
91
+ from cartesia import Cartesia
92
+ import os
93
+
94
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
95
+
96
+ data = client.tts.bytes(
97
+ model_id="sonic-english",
98
+ transcript="Hello, world! I'm generating audio on Cartesia.",
99
+ voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
100
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/tts/bytes
101
+ output_format={
102
+ "container": "wav",
103
+ "encoding": "pcm_f32le",
104
+ "sample_rate": 44100,
105
+ },
106
+ )
107
+
108
+ with open("output.wav", "wb") as f:
109
+ f.write(data)
110
+ ```
111
+
91
112
  ### Server-Sent Events (SSE)
92
113
 
93
114
  ```python
@@ -96,7 +117,6 @@ import pyaudio
96
117
  import os
97
118
 
98
119
  client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
99
- voice_name = "Barbershop Man"
100
120
  voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
101
121
  voice = client.voices.get(id=voice_id)
102
122
 
@@ -105,7 +125,7 @@ transcript = "Hello! Welcome to Cartesia"
105
125
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
106
126
  model_id = "sonic-english"
107
127
 
108
- # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
128
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
109
129
  output_format = {
110
130
  "container": "raw",
111
131
  "encoding": "pcm_f32le",
@@ -149,14 +169,13 @@ import os
149
169
 
150
170
  async def write_stream():
151
171
  client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
152
- voice_name = "Barbershop Man"
153
172
  voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
154
173
  voice = client.voices.get(id=voice_id)
155
174
  transcript = "Hello! Welcome to Cartesia"
156
175
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
157
176
  model_id = "sonic-english"
158
177
 
159
- # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
178
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
160
179
  output_format = {
161
180
  "container": "raw",
162
181
  "encoding": "pcm_f32le",
@@ -203,7 +222,6 @@ import pyaudio
203
222
  import os
204
223
 
205
224
  client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
206
- voice_name = "Barbershop Man"
207
225
  voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
208
226
  voice = client.voices.get(id=voice_id)
209
227
  transcript = "Hello! Welcome to Cartesia"
@@ -211,7 +229,7 @@ transcript = "Hello! Welcome to Cartesia"
211
229
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
212
230
  model_id = "sonic-english"
213
231
 
214
- # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
232
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
215
233
  output_format = {
216
234
  "container": "raw",
217
235
  "encoding": "pcm_f32le",
@@ -272,7 +290,7 @@ async def send_transcripts(ctx):
272
290
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
273
291
  model_id = "sonic-english"
274
292
 
275
- # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
293
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
276
294
  output_format = {
277
295
  "container": "raw",
278
296
  "encoding": "pcm_f32le",
@@ -380,7 +398,7 @@ voice_id = "87748186-23bb-4158-a1eb-332911b0b708"
380
398
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
381
399
  model_id = "sonic-english"
382
400
 
383
- # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
401
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
384
402
  output_format = {
385
403
  "container": "raw",
386
404
  "encoding": "pcm_f32le",
@@ -460,7 +478,6 @@ import pyaudio
460
478
  import os
461
479
 
462
480
  client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
463
- voice_name = "Barbershop Man"
464
481
  voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
465
482
  voice = client.voices.get(id=voice_id)
466
483
 
@@ -470,7 +487,7 @@ language = "es" # Language code corresponding to the language of the transcript
470
487
  # Make sure you use the multilingual model! You can check out all models at https://docs.cartesia.ai/getting-started/available-models
471
488
  model_id = "sonic-multilingual"
472
489
 
473
- # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
490
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
474
491
  output_format = {
475
492
  "container": "raw",
476
493
  "encoding": "pcm_f32le",
@@ -623,7 +640,7 @@ display(audio)
623
640
 
624
641
  #### Output Formats
625
642
 
626
- You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
643
+ You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events).
627
644
 
628
645
  The previously used `output_format` strings are now deprecated and will be removed in v1.2.0. These are listed in the `DeprecatedOutputFormatMapping` class in `cartesia._types`.
629
646
 
@@ -1,35 +1,19 @@
1
- Metadata-Version: 2.1
2
- Name: cartesia
3
- Version: 1.0.13
4
- Summary: The official Python library for the Cartesia API.
5
- Home-page:
6
- Author: Cartesia, Inc.
7
- Author-email: support@cartesia.ai
8
- Classifier: Programming Language :: Python
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
- Requires-Python: >=3.8.0
12
- Description-Content-Type: text/markdown
13
- Provides-Extra: dev
14
- Provides-Extra: all
15
- License-File: LICENSE.md
16
-
17
-
18
1
  # Cartesia Python API Library
19
2
 
20
3
  ![PyPI - Version](https://img.shields.io/pypi/v/cartesia)
21
- [![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/ZVxavqHB9X)
4
+ [![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/cartesia)
22
5
 
23
6
  The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
24
7
 
25
8
  > [!IMPORTANT]
26
- > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
9
+ > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/cartesia) for any support requests!
27
10
 
28
11
  - [Cartesia Python API Library](#cartesia-python-api-library)
29
12
  - [Documentation](#documentation)
30
13
  - [Installation](#installation)
31
14
  - [Voices](#voices)
32
15
  - [Text-to-Speech](#text-to-speech)
16
+ - [Bytes](#bytes)
33
17
  - [Server-Sent Events (SSE)](#server-sent-events-sse)
34
18
  - [WebSocket](#websocket)
35
19
  - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
@@ -88,6 +72,30 @@ new_voice = client.voices.create(
88
72
 
89
73
  ## Text-to-Speech
90
74
 
75
+ ### Bytes
76
+
77
+ ```python
78
+ from cartesia import Cartesia
79
+ import os
80
+
81
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
82
+
83
+ data = client.tts.bytes(
84
+ model_id="sonic-english",
85
+ transcript="Hello, world! I'm generating audio on Cartesia.",
86
+ voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
87
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/tts/bytes
88
+ output_format={
89
+ "container": "wav",
90
+ "encoding": "pcm_f32le",
91
+ "sample_rate": 44100,
92
+ },
93
+ )
94
+
95
+ with open("output.wav", "wb") as f:
96
+ f.write(data)
97
+ ```
98
+
91
99
  ### Server-Sent Events (SSE)
92
100
 
93
101
  ```python
@@ -96,7 +104,6 @@ import pyaudio
96
104
  import os
97
105
 
98
106
  client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
99
- voice_name = "Barbershop Man"
100
107
  voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
101
108
  voice = client.voices.get(id=voice_id)
102
109
 
@@ -105,7 +112,7 @@ transcript = "Hello! Welcome to Cartesia"
105
112
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
106
113
  model_id = "sonic-english"
107
114
 
108
- # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
115
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
109
116
  output_format = {
110
117
  "container": "raw",
111
118
  "encoding": "pcm_f32le",
@@ -149,14 +156,13 @@ import os
149
156
 
150
157
  async def write_stream():
151
158
  client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
152
- voice_name = "Barbershop Man"
153
159
  voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
154
160
  voice = client.voices.get(id=voice_id)
155
161
  transcript = "Hello! Welcome to Cartesia"
156
162
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
157
163
  model_id = "sonic-english"
158
164
 
159
- # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
165
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
160
166
  output_format = {
161
167
  "container": "raw",
162
168
  "encoding": "pcm_f32le",
@@ -203,7 +209,6 @@ import pyaudio
203
209
  import os
204
210
 
205
211
  client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
206
- voice_name = "Barbershop Man"
207
212
  voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
208
213
  voice = client.voices.get(id=voice_id)
209
214
  transcript = "Hello! Welcome to Cartesia"
@@ -211,7 +216,7 @@ transcript = "Hello! Welcome to Cartesia"
211
216
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
212
217
  model_id = "sonic-english"
213
218
 
214
- # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
219
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
215
220
  output_format = {
216
221
  "container": "raw",
217
222
  "encoding": "pcm_f32le",
@@ -272,7 +277,7 @@ async def send_transcripts(ctx):
272
277
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
273
278
  model_id = "sonic-english"
274
279
 
275
- # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
280
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
276
281
  output_format = {
277
282
  "container": "raw",
278
283
  "encoding": "pcm_f32le",
@@ -380,7 +385,7 @@ voice_id = "87748186-23bb-4158-a1eb-332911b0b708"
380
385
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
381
386
  model_id = "sonic-english"
382
387
 
383
- # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
388
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
384
389
  output_format = {
385
390
  "container": "raw",
386
391
  "encoding": "pcm_f32le",
@@ -460,7 +465,6 @@ import pyaudio
460
465
  import os
461
466
 
462
467
  client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
463
- voice_name = "Barbershop Man"
464
468
  voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
465
469
  voice = client.voices.get(id=voice_id)
466
470
 
@@ -470,7 +474,7 @@ language = "es" # Language code corresponding to the language of the transcript
470
474
  # Make sure you use the multilingual model! You can check out all models at https://docs.cartesia.ai/getting-started/available-models
471
475
  model_id = "sonic-multilingual"
472
476
 
473
- # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
477
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
474
478
  output_format = {
475
479
  "container": "raw",
476
480
  "encoding": "pcm_f32le",
@@ -623,7 +627,7 @@ display(audio)
623
627
 
624
628
  #### Output Formats
625
629
 
626
- You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
630
+ You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events).
627
631
 
628
632
  The previously used `output_format` strings are now deprecated and will be removed in v1.2.0. These are listed in the `DeprecatedOutputFormatMapping` class in `cartesia._types`.
629
633
 
@@ -0,0 +1,4 @@
1
+ from cartesia.async_client import AsyncCartesia
2
+ from cartesia.client import Cartesia
3
+
4
+ __all__ = ["Cartesia", "AsyncCartesia"]
@@ -0,0 +1,95 @@
1
+ import base64
2
+ import json
3
+ from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Union
4
+
5
+ import aiohttp
6
+
7
+ from cartesia._constants import BACKOFF_FACTOR, MAX_RETRIES
8
+ from cartesia._logger import logger
9
+ from cartesia._sse import _SSE
10
+ from cartesia._types import OutputFormat, VoiceControls
11
+ from cartesia.utils.retry import retry_on_connection_error_async
12
+ from cartesia.utils.tts import _construct_tts_request
13
+
14
+
15
+ class _AsyncSSE(_SSE):
16
+ """This class contains methods to generate audio using Server-Sent Events asynchronously."""
17
+
18
+ def __init__(
19
+ self,
20
+ http_url: str,
21
+ headers: Dict[str, str],
22
+ timeout: float,
23
+ get_session: Callable[[], Optional[aiohttp.ClientSession]],
24
+ ):
25
+ super().__init__(http_url, headers, timeout)
26
+ self._get_session = get_session
27
+
28
+ async def send(
29
+ self,
30
+ model_id: str,
31
+ transcript: str,
32
+ output_format: OutputFormat,
33
+ voice_id: Optional[str] = None,
34
+ voice_embedding: Optional[List[float]] = None,
35
+ duration: Optional[int] = None,
36
+ language: Optional[str] = None,
37
+ stream: bool = True,
38
+ _experimental_voice_controls: Optional[VoiceControls] = None,
39
+ ) -> Union[bytes, AsyncGenerator[bytes, None]]:
40
+ request_body = _construct_tts_request(
41
+ model_id=model_id,
42
+ transcript=transcript,
43
+ output_format=output_format,
44
+ voice_id=voice_id,
45
+ voice_embedding=voice_embedding,
46
+ duration=duration,
47
+ language=language,
48
+ _experimental_voice_controls=_experimental_voice_controls,
49
+ )
50
+
51
+ generator = self._sse_generator_wrapper(request_body)
52
+
53
+ if stream:
54
+ return generator
55
+
56
+ chunks = []
57
+ async for chunk in generator:
58
+ chunks.append(chunk["audio"])
59
+
60
+ return {"audio": b"".join(chunks)}
61
+
62
+ @retry_on_connection_error_async(
63
+ max_retries=MAX_RETRIES, backoff_factor=BACKOFF_FACTOR, logger=logger
64
+ )
65
+ async def _sse_generator_wrapper(self, request_body: Dict[str, Any]):
66
+ """Need to wrap the sse generator in a function for the retry decorator to work."""
67
+ try:
68
+ async for chunk in self._sse_generator(request_body):
69
+ yield chunk
70
+ except Exception as e:
71
+ raise RuntimeError(f"Error generating audio. {e}")
72
+
73
+ async def _sse_generator(self, request_body: Dict[str, Any]):
74
+ session = await self._get_session()
75
+ async with session.post(
76
+ f"{self.http_url}/tts/sse",
77
+ data=json.dumps(request_body),
78
+ headers=self.headers,
79
+ ) as response:
80
+ if not response.ok:
81
+ raise ValueError(f"Failed to generate audio. {await response.text()}")
82
+
83
+ buffer = ""
84
+ async for chunk_bytes in response.content.iter_any():
85
+ buffer, outputs = self._update_buffer(buffer=buffer, chunk_bytes=chunk_bytes)
86
+ for output in outputs:
87
+ yield output
88
+
89
+ if buffer:
90
+ try:
91
+ chunk_json = json.loads(buffer)
92
+ audio = base64.b64decode(chunk_json["data"])
93
+ yield {"audio": audio}
94
+ except json.JSONDecodeError:
95
+ pass