cartesia 0.0.5__tar.gz → 0.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cartesia-0.0.5 → cartesia-0.0.6}/PKG-INFO +1 -1
- {cartesia-0.0.5 → cartesia-0.0.6}/cartesia/tts.py +19 -11
- cartesia-0.0.6/cartesia/version.py +1 -0
- {cartesia-0.0.5 → cartesia-0.0.6}/cartesia.egg-info/PKG-INFO +1 -1
- {cartesia-0.0.5 → cartesia-0.0.6}/tests/test_tts.py +1 -1
- cartesia-0.0.5/cartesia/version.py +0 -1
- {cartesia-0.0.5 → cartesia-0.0.6}/README.md +0 -0
- {cartesia-0.0.5 → cartesia-0.0.6}/cartesia/__init__.py +0 -0
- {cartesia-0.0.5 → cartesia-0.0.6}/cartesia/utils.py +0 -0
- {cartesia-0.0.5 → cartesia-0.0.6}/cartesia.egg-info/SOURCES.txt +0 -0
- {cartesia-0.0.5 → cartesia-0.0.6}/cartesia.egg-info/dependency_links.txt +0 -0
- {cartesia-0.0.5 → cartesia-0.0.6}/cartesia.egg-info/requires.txt +0 -0
- {cartesia-0.0.5 → cartesia-0.0.6}/cartesia.egg-info/top_level.txt +0 -0
- {cartesia-0.0.5 → cartesia-0.0.6}/pyproject.toml +0 -0
- {cartesia-0.0.5 → cartesia-0.0.6}/setup.cfg +0 -0
- {cartesia-0.0.5 → cartesia-0.0.6}/setup.py +0 -0
@@ -14,7 +14,7 @@ from websockets.sync.client import connect
|
|
14
14
|
|
15
15
|
from cartesia.utils import retry_on_connection_error, retry_on_connection_error_async
|
16
16
|
|
17
|
-
DEFAULT_MODEL_ID = "
|
17
|
+
DEFAULT_MODEL_ID = ""
|
18
18
|
DEFAULT_BASE_URL = "api.cartesia.ai"
|
19
19
|
DEFAULT_API_VERSION = "v0"
|
20
20
|
DEFAULT_TIMEOUT = 30 # seconds
|
@@ -160,9 +160,8 @@ class CartesiaTTS:
|
|
160
160
|
raise ValueError(f"Failed to get voices. Error: {response.text}")
|
161
161
|
|
162
162
|
voices = response.json()
|
163
|
-
|
164
|
-
|
165
|
-
for voice in voices:
|
163
|
+
for voice in voices:
|
164
|
+
if "embedding" in voice and isinstance(voice["embedding"], str):
|
166
165
|
voice["embedding"] = json.loads(voice["embedding"])
|
167
166
|
return {voice["name"]: voice for voice in voices}
|
168
167
|
|
@@ -210,9 +209,10 @@ class CartesiaTTS:
|
|
210
209
|
|
211
210
|
# Handle successful response
|
212
211
|
out = response.json()
|
213
|
-
|
214
|
-
|
215
|
-
|
212
|
+
embedding = out["embedding"]
|
213
|
+
if isinstance(embedding, str):
|
214
|
+
embedding = json.loads(embedding)
|
215
|
+
return embedding
|
216
216
|
|
217
217
|
def refresh_websocket(self):
|
218
218
|
"""Refresh the websocket connection.
|
@@ -249,6 +249,7 @@ class CartesiaTTS:
|
|
249
249
|
transcript: str,
|
250
250
|
voice: Embedding,
|
251
251
|
model_id: str,
|
252
|
+
output_format: str,
|
252
253
|
duration: int = None,
|
253
254
|
chunk_time: float = None,
|
254
255
|
) -> Dict[str, Any]:
|
@@ -262,6 +263,7 @@ class CartesiaTTS:
|
|
262
263
|
optional_body = dict(
|
263
264
|
duration=duration,
|
264
265
|
chunk_time=chunk_time,
|
266
|
+
output_format=output_format,
|
265
267
|
)
|
266
268
|
body.update({k: v for k, v in optional_body.items() if v is not None})
|
267
269
|
|
@@ -277,6 +279,7 @@ class CartesiaTTS:
|
|
277
279
|
chunk_time: float = None,
|
278
280
|
stream: bool = False,
|
279
281
|
websocket: bool = True,
|
282
|
+
output_format: str = "fp32",
|
280
283
|
) -> Union[AudioOutput, Generator[AudioOutput, None, None]]:
|
281
284
|
"""Generate audio from a transcript.
|
282
285
|
|
@@ -304,7 +307,8 @@ class CartesiaTTS:
|
|
304
307
|
voice=voice,
|
305
308
|
model_id=model_id,
|
306
309
|
duration=duration,
|
307
|
-
chunk_time=chunk_time
|
310
|
+
chunk_time=chunk_time,
|
311
|
+
output_format=output_format,
|
308
312
|
)
|
309
313
|
|
310
314
|
if websocket:
|
@@ -336,7 +340,7 @@ class CartesiaTTS:
|
|
336
340
|
|
337
341
|
def _generate_http(self, body: Dict[str, Any]):
|
338
342
|
response = requests.post(
|
339
|
-
f"{self._http_url()}/audio/
|
343
|
+
f"{self._http_url()}/audio/sse",
|
340
344
|
stream=True,
|
341
345
|
data=json.dumps(body),
|
342
346
|
headers=self.headers,
|
@@ -379,6 +383,8 @@ class CartesiaTTS:
|
|
379
383
|
try:
|
380
384
|
while True:
|
381
385
|
response = json.loads(self.websocket.recv())
|
386
|
+
if "error" in response:
|
387
|
+
raise RuntimeError(f"Error generating audio:\n{response['error']}")
|
382
388
|
if response["done"]:
|
383
389
|
break
|
384
390
|
|
@@ -515,6 +521,7 @@ class AsyncCartesiaTTS(CartesiaTTS):
|
|
515
521
|
chunk_time: float = None,
|
516
522
|
stream: bool = False,
|
517
523
|
websocket: bool = True,
|
524
|
+
output_format: str = "fp32"
|
518
525
|
) -> Union[AudioOutput, AsyncGenerator[AudioOutput, None]]:
|
519
526
|
"""Asynchronously generate audio from a transcript.
|
520
527
|
NOTE: This overrides the non-asynchronous generate method from the base class.
|
@@ -543,7 +550,8 @@ class AsyncCartesiaTTS(CartesiaTTS):
|
|
543
550
|
voice=voice,
|
544
551
|
model_id=model_id,
|
545
552
|
duration=duration,
|
546
|
-
chunk_time=chunk_time
|
553
|
+
chunk_time=chunk_time,
|
554
|
+
output_format=output_format,
|
547
555
|
)
|
548
556
|
|
549
557
|
if websocket:
|
@@ -576,7 +584,7 @@ class AsyncCartesiaTTS(CartesiaTTS):
|
|
576
584
|
async def _generate_http(self, body: Dict[str, Any]):
|
577
585
|
session = await self._get_session()
|
578
586
|
async with session.post(
|
579
|
-
f"{self._http_url()}/audio/
|
587
|
+
f"{self._http_url()}/audio/sse", data=json.dumps(body), headers=self.headers
|
580
588
|
) as response:
|
581
589
|
if not response.ok:
|
582
590
|
raise ValueError(f"Failed to generate audio. {await response.text()}")
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.0.6"
|
@@ -1 +0,0 @@
|
|
1
|
-
__version__ = "0.0.5"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|