cartesia 0.0.5__tar.gz → 0.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 0.0.5
3
+ Version: 0.0.6
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Home-page:
6
6
  Author: Cartesia, Inc.
@@ -14,7 +14,7 @@ from websockets.sync.client import connect
14
14
 
15
15
  from cartesia.utils import retry_on_connection_error, retry_on_connection_error_async
16
16
 
17
- DEFAULT_MODEL_ID = "genial-planet-1346"
17
+ DEFAULT_MODEL_ID = ""
18
18
  DEFAULT_BASE_URL = "api.cartesia.ai"
19
19
  DEFAULT_API_VERSION = "v0"
20
20
  DEFAULT_TIMEOUT = 30 # seconds
@@ -160,9 +160,8 @@ class CartesiaTTS:
160
160
  raise ValueError(f"Failed to get voices. Error: {response.text}")
161
161
 
162
162
  voices = response.json()
163
- # TODO: Update the API to return the embedding as a list of floats rather than string.
164
- if not skip_embeddings:
165
- for voice in voices:
163
+ for voice in voices:
164
+ if "embedding" in voice and isinstance(voice["embedding"], str):
166
165
  voice["embedding"] = json.loads(voice["embedding"])
167
166
  return {voice["name"]: voice for voice in voices}
168
167
 
@@ -210,9 +209,10 @@ class CartesiaTTS:
210
209
 
211
210
  # Handle successful response
212
211
  out = response.json()
213
- if isinstance(out["embedding"], str):
214
- out["embedding"] = json.loads(out["embedding"])
215
- return out["embedding"]
212
+ embedding = out["embedding"]
213
+ if isinstance(embedding, str):
214
+ embedding = json.loads(embedding)
215
+ return embedding
216
216
 
217
217
  def refresh_websocket(self):
218
218
  """Refresh the websocket connection.
@@ -249,6 +249,7 @@ class CartesiaTTS:
249
249
  transcript: str,
250
250
  voice: Embedding,
251
251
  model_id: str,
252
+ output_format: str,
252
253
  duration: int = None,
253
254
  chunk_time: float = None,
254
255
  ) -> Dict[str, Any]:
@@ -262,6 +263,7 @@ class CartesiaTTS:
262
263
  optional_body = dict(
263
264
  duration=duration,
264
265
  chunk_time=chunk_time,
266
+ output_format=output_format,
265
267
  )
266
268
  body.update({k: v for k, v in optional_body.items() if v is not None})
267
269
 
@@ -277,6 +279,7 @@ class CartesiaTTS:
277
279
  chunk_time: float = None,
278
280
  stream: bool = False,
279
281
  websocket: bool = True,
282
+ output_format: str = "fp32",
280
283
  ) -> Union[AudioOutput, Generator[AudioOutput, None, None]]:
281
284
  """Generate audio from a transcript.
282
285
 
@@ -304,7 +307,8 @@ class CartesiaTTS:
304
307
  voice=voice,
305
308
  model_id=model_id,
306
309
  duration=duration,
307
- chunk_time=chunk_time
310
+ chunk_time=chunk_time,
311
+ output_format=output_format,
308
312
  )
309
313
 
310
314
  if websocket:
@@ -336,7 +340,7 @@ class CartesiaTTS:
336
340
 
337
341
  def _generate_http(self, body: Dict[str, Any]):
338
342
  response = requests.post(
339
- f"{self._http_url()}/audio/stream",
343
+ f"{self._http_url()}/audio/sse",
340
344
  stream=True,
341
345
  data=json.dumps(body),
342
346
  headers=self.headers,
@@ -379,6 +383,8 @@ class CartesiaTTS:
379
383
  try:
380
384
  while True:
381
385
  response = json.loads(self.websocket.recv())
386
+ if "error" in response:
387
+ raise RuntimeError(f"Error generating audio:\n{response['error']}")
382
388
  if response["done"]:
383
389
  break
384
390
 
@@ -515,6 +521,7 @@ class AsyncCartesiaTTS(CartesiaTTS):
515
521
  chunk_time: float = None,
516
522
  stream: bool = False,
517
523
  websocket: bool = True,
524
+ output_format: str = "fp32"
518
525
  ) -> Union[AudioOutput, AsyncGenerator[AudioOutput, None]]:
519
526
  """Asynchronously generate audio from a transcript.
520
527
  NOTE: This overrides the non-asynchronous generate method from the base class.
@@ -543,7 +550,8 @@ class AsyncCartesiaTTS(CartesiaTTS):
543
550
  voice=voice,
544
551
  model_id=model_id,
545
552
  duration=duration,
546
- chunk_time=chunk_time
553
+ chunk_time=chunk_time,
554
+ output_format=output_format,
547
555
  )
548
556
 
549
557
  if websocket:
@@ -576,7 +584,7 @@ class AsyncCartesiaTTS(CartesiaTTS):
576
584
  async def _generate_http(self, body: Dict[str, Any]):
577
585
  session = await self._get_session()
578
586
  async with session.post(
579
- f"{self._http_url()}/audio/stream", data=json.dumps(body), headers=self.headers
587
+ f"{self._http_url()}/audio/sse", data=json.dumps(body), headers=self.headers
580
588
  ) as response:
581
589
  if not response.ok:
582
590
  raise ValueError(f"Failed to generate audio. {await response.text()}")
@@ -0,0 +1 @@
1
+ __version__ = "0.0.6"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 0.0.5
3
+ Version: 0.0.6
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Home-page:
6
6
  Author: Cartesia, Inc.
@@ -16,7 +16,7 @@ import pytest
16
16
  THISDIR = os.path.dirname(__file__)
17
17
  sys.path.insert(0, os.path.dirname(THISDIR))
18
18
 
19
- SAMPLE_VOICE = "Milo"
19
+ SAMPLE_VOICE = "Samantha"
20
20
 
21
21
  logger = logging.getLogger(__name__)
22
22
 
@@ -1 +0,0 @@
1
- __version__ = "0.0.5"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes