PyPI - cartesia - Versions diffs - 0.0.5rc1__py2.py3-none-any.whl → 0.0.6__py2.py3-none-any.whl - Mend

cartesia 0.0.5rc1py2.py3-none-any.whl → 0.0.6py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

cartesia/__init__.py +2 -2
cartesia/tts.py +249 -93
cartesia/utils.py +65 -0
cartesia/version.py +1 -1
{cartesia-0.0.5rc1.dist-info → cartesia-0.0.6.dist-info}/METADATA +75 -38
cartesia-0.0.6.dist-info/RECORD +8 -0
{cartesia-0.0.5rc1.dist-info → cartesia-0.0.6.dist-info}/WHEEL +1 -1
cartesia-0.0.5rc1.dist-info/RECORD +0 -7
{cartesia-0.0.5rc1.dist-info → cartesia-0.0.6.dist-info}/top_level.txt +0 -0

cartesia/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from cartesia.tts import CartesiaTTS
+from cartesia.tts import AsyncCartesiaTTS, CartesiaTTS
-__all__ = ["CartesiaTTS"]
+__all__ = ["CartesiaTTS", "AsyncCartesiaTTS"]

cartesia/tts.py CHANGED Viewed

@@ -3,19 +3,27 @@ import base64
 import json
 import os
 import uuid
+from types import TracebackType
 from typing import Any, AsyncGenerator, Dict, Generator, List, Optional, Tuple, TypedDict, Union
 import aiohttp
 import httpx
+import logging
 import requests
 from websockets.sync.client import connect
-DEFAULT_MODEL_ID = "genial-planet-1346"
+from cartesia.utils import retry_on_connection_error, retry_on_connection_error_async
+DEFAULT_MODEL_ID = ""
 DEFAULT_BASE_URL = "api.cartesia.ai"
 DEFAULT_API_VERSION = "v0"
-DEFAULT_TIMEOUT = 60  # seconds
+DEFAULT_TIMEOUT = 30  # seconds
 DEFAULT_NUM_CONNECTIONS = 10  # connections per client
+BACKOFF_FACTOR = 1
+MAX_RETRIES = 3
+logger = logging.getLogger(__name__)
 class AudioOutput(TypedDict):
     audio: bytes
@@ -74,7 +82,6 @@ class CartesiaTTS:
     To enable interrupt handling along the websocket, set `experimental_ws_handle_interrupts=True`.
     Examples:
         >>> client = CartesiaTTS()
         # Load available voices and their metadata (excluding the embeddings).
@@ -96,14 +103,13 @@ class CartesiaTTS:
     """
     def __init__(self, *, api_key: str = None, experimental_ws_handle_interrupts: bool = False):
-        """
-        Args:
-            api_key: The API key to use for authorization.
-                If not specified, the API key will be read from the environment variable
-                `CARTESIA_API_KEY`.
-            experimental_ws_handle_interrupts: Whether to handle interrupts when generating
-                audio using the websocket. This is an experimental feature and may have bugs
-                or be deprecated in the future.
+        """Args:
+        api_key: The API key to use for authorization.
+            If not specified, the API key will be read from the environment variable
+            `CARTESIA_API_KEY`.
+        experimental_ws_handle_interrupts: Whether to handle interrupts when generating
+            audio using the websocket. This is an experimental feature and may have bugs
+            or be deprecated in the future.
         """
         self.base_url = os.environ.get("CARTESIA_BASE_URL", DEFAULT_BASE_URL)
         self.api_key = api_key or os.environ.get("CARTESIA_API_KEY")
@@ -111,7 +117,6 @@ class CartesiaTTS:
         self.headers = {"X-API-Key": self.api_key, "Content-Type": "application/json"}
         self.websocket = None
         self.experimental_ws_handle_interrupts = experimental_ws_handle_interrupts
-        self.refresh_websocket()
     def get_voices(self, skip_embeddings: bool = True) -> Dict[str, VoiceMetadata]:
         """Returns a mapping from voice name -> voice metadata.
@@ -144,18 +149,23 @@ class CartesiaTTS:
             >>> audio = client.generate(transcript="Hello world!", voice=embedding)
         """
         params = {"select": "id, name, description"} if skip_embeddings else None
-        response = httpx.get(f"{self._http_url()}/voices", headers=self.headers, params=params)
+        response = httpx.get(
+            f"{self._http_url()}/voices",
+            headers=self.headers,
+            params=params,
+            timeout=DEFAULT_TIMEOUT,
+        )
         if not response.is_success:
             raise ValueError(f"Failed to get voices. Error: {response.text}")
         voices = response.json()
-        # TODO: Update the API to return the embedding as a list of floats rather than string.
-        if not skip_embeddings:
-            for voice in voices:
+        for voice in voices:
+            if "embedding" in voice and isinstance(voice["embedding"], str):
                 voice["embedding"] = json.loads(voice["embedding"])
         return {voice["name"]: voice for voice in voices}
+    @retry_on_connection_error(max_retries=MAX_RETRIES, backoff_factor=BACKOFF_FACTOR, logger=logger)
     def get_voice_embedding(
         self, *, voice_id: str = None, filepath: str = None, link: str = None
     ) -> Embedding:
@@ -178,18 +188,18 @@ class CartesiaTTS:
         if voice_id:
             url = f"{self._http_url()}/voices/embedding/{voice_id}"
-            response = httpx.get(url, headers=self.headers)
+            response = httpx.get(url, headers=self.headers, timeout=DEFAULT_TIMEOUT)
         elif filepath:
             url = f"{self._http_url()}/voices/clone/clip"
             files = {"clip": open(filepath, "rb")}
             headers = self.headers.copy()
             # The default content type of JSON is incorrect for file uploads
             headers.pop("Content-Type")
-            response = httpx.post(url, headers=headers, files=files)
+            response = httpx.post(url, headers=headers, files=files, timeout=DEFAULT_TIMEOUT)
         elif link:
             url = f"{self._http_url()}/voices/clone/url"
             params = {"link": link}
-            response = httpx.post(url, headers=self.headers, params=params)
+            response = httpx.post(url, headers=self.headers, params=params, timeout=DEFAULT_TIMEOUT)
         if not response.is_success:
             raise ValueError(
@@ -199,9 +209,10 @@ class CartesiaTTS:
         # Handle successful response
         out = response.json()
-        if isinstance(out["embedding"], str):
-            out["embedding"] = json.loads(out["embedding"])
-        return out["embedding"]
+        embedding = out["embedding"]
+        if isinstance(embedding, str):
+            embedding = json.loads(embedding)
+        return embedding
     def refresh_websocket(self):
         """Refresh the websocket connection.
@@ -209,15 +220,11 @@ class CartesiaTTS:
         Note:
             The connection is synchronous.
         """
-        if self.websocket and not self._is_websocket_closed():
-            self.websocket.close()
-        route = "audio/websocket"
-        if self.experimental_ws_handle_interrupts:
-            route = f"experimental/{route}"
-        self.websocket = connect(
-            f"{self._ws_url()}/{route}?api_key={self.api_key}",
-            close_timeout=None,
-        )
+        if self.websocket is None or self._is_websocket_closed():
+            route = "audio/websocket"
+            if self.experimental_ws_handle_interrupts:
+                route = f"experimental/{route}"
+            self.websocket = connect(f"{self._ws_url()}/{route}?api_key={self.api_key}")
     def _is_websocket_closed(self):
         return self.websocket.socket.fileno() == -1
@@ -240,20 +247,23 @@ class CartesiaTTS:
         self,
         *,
         transcript: str,
+        voice: Embedding,
+        model_id: str,
+        output_format: str,
         duration: int = None,
         chunk_time: float = None,
-        voice: Embedding = None,
     ) -> Dict[str, Any]:
+        """Create the request body for a stream request.
+        Note that anything that's not provided will use a default if available or be
+        filtered out otherwise.
         """
-        Create the request body for a stream request.
-        Note that anything that's not provided will use a default if available or be filtered out otherwise.
-        """
-        body = dict(transcript=transcript, model_id=DEFAULT_MODEL_ID, voice=voice)
+        body = dict(transcript=transcript, model_id=model_id, voice=voice)
         optional_body = dict(
             duration=duration,
             chunk_time=chunk_time,
-            voice=voice,
+            output_format=output_format,
         )
         body.update({k: v for k, v in optional_body.items() if v is not None})
@@ -263,25 +273,26 @@ class CartesiaTTS:
         self,
         *,
         transcript: str,
+        voice: Embedding,
+        model_id: str = DEFAULT_MODEL_ID,
         duration: int = None,
         chunk_time: float = None,
-        voice: Embedding = None,
         stream: bool = False,
         websocket: bool = True,
+        output_format: str = "fp32",
     ) -> Union[AudioOutput, Generator[AudioOutput, None, None]]:
         """Generate audio from a transcript.
         Args:
-            transcript: The text to generate audio for.
-            duration: The maximum duration of the audio in seconds.
-            chunk_time: How long each audio segment should be in seconds.
+            transcript (str): The text to generate audio for.
+            voice (Embedding (List[float])): The voice to use for generating audio.
+            duration (int, optional): The maximum duration of the audio in seconds.
+            chunk_time (float, optional): How long each audio segment should be in seconds.
                 This should not need to be adjusted.
-            voice: The voice to use for generating audio.
-                This can either be a voice id (string) or an embedding vector (List[float]).
-            stream: Whether to stream the audio or not.
-                If ``True`` this function returns a generator.
-            websocket: Whether to use a websocket for streaming audio.
-                Using the websocket reduces latency by pre-poning the handshake.
+            stream (bool, optional): Whether to stream the audio or not.
+                If True this function returns a generator. False by default.
+            websocket (bool, optional): Whether to use a websocket for streaming audio.
+                Using the websocket reduces latency by pre-poning the handshake. True by default.
         Returns:
             A generator if `stream` is True, otherwise a dictionary.
@@ -292,13 +303,18 @@ class CartesiaTTS:
         self._check_inputs(transcript, duration, chunk_time)
         body = self._generate_request_body(
-            transcript=transcript, duration=duration, chunk_time=chunk_time, voice=voice
+            transcript=transcript,
+            voice=voice,
+            model_id=model_id,
+            duration=duration,
+            chunk_time=chunk_time,
+            output_format=output_format,
         )
         if websocket:
             generator = self._generate_ws(body)
         else:
-            generator = self._generate_http(body)
+            generator = self._generate_http_wrapper(body)
         if stream:
             return generator
@@ -312,12 +328,23 @@ class CartesiaTTS:
         return {"audio": b"".join(chunks), "sampling_rate": sampling_rate}
+    @retry_on_connection_error(max_retries=MAX_RETRIES, backoff_factor=BACKOFF_FACTOR, logger=logger)
+    def _generate_http_wrapper(self, body: Dict[str, Any]):
+        """Need to wrap the http generator in a function for the retry decorator to work."""
+        try:
+            for chunk in self._generate_http(body):
+                yield chunk
+        except Exception as e:
+            logger.error(f"Failed to generate audio. {e}")
+            raise e
     def _generate_http(self, body: Dict[str, Any]):
         response = requests.post(
-            f"{self._http_url()}/audio/stream",
+            f"{self._http_url()}/audio/sse",
             stream=True,
             data=json.dumps(body),
             headers=self.headers,
+            timeout=(DEFAULT_TIMEOUT, DEFAULT_TIMEOUT),
         )
         if not response.ok:
             raise ValueError(f"Failed to generate audio. {response.text}")
@@ -356,6 +383,8 @@ class CartesiaTTS:
         try:
             while True:
                 response = json.loads(self.websocket.recv())
+                if "error" in response:
+                    raise RuntimeError(f"Error generating audio:\n{response['error']}")
                 if response["done"]:
                     break
@@ -370,7 +399,43 @@ class CartesiaTTS:
             if self.experimental_ws_handle_interrupts:
                 self.websocket.send(json.dumps({"context_id": context_id, "action": "cancel"}))
         except Exception as e:
+            # Close the websocket connection if an error occurs.
+            if self.websocket and not self._is_websocket_closed():
+                self.websocket.close()
             raise RuntimeError(f"Failed to generate audio. {response}") from e
+        finally:
+            # Ensure the websocket is ultimately closed.
+            if self.websocket and not self._is_websocket_closed():
+                self.websocket.close()
+    @retry_on_connection_error(max_retries=MAX_RETRIES, backoff_factor=BACKOFF_FACTOR, logger=logger)
+    def transcribe(self, raw_audio: Union[bytes, str]) -> str:
+        raw_audio_bytes, headers = self.prepare_audio_and_headers(raw_audio)
+        response = httpx.post(
+            f"{self._http_url()}/audio/transcriptions",
+            headers=headers,
+            files={"clip": ("input.wav", raw_audio_bytes)},
+            timeout=DEFAULT_TIMEOUT,
+        )
+        if not response.is_success:
+            raise ValueError(f"Failed to transcribe audio. Error: {response.text()}")
+        transcript = response.json()
+        return transcript["text"]
+    def prepare_audio_and_headers(
+        self, raw_audio: Union[bytes, str]
+    ) -> Tuple[bytes, Dict[str, Any]]:
+        if isinstance(raw_audio, str):
+            with open(raw_audio, "rb") as f:
+                raw_audio_bytes = f.read()
+        else:
+            raw_audio_bytes = raw_audio
+        # application/json is not the right content type for this request
+        headers = {k: v for k, v in self.headers.items() if k != "Content-Type"}
+        return raw_audio_bytes, headers
     def _http_url(self):
         prefix = "http" if "localhost" in self.base_url else "https"
@@ -380,63 +445,119 @@ class CartesiaTTS:
         prefix = "ws" if "localhost" in self.base_url else "wss"
         return f"{prefix}://{self.base_url}/{self.api_version}"
-    def __del__(self):
-        if self.websocket.socket.fileno() > -1:
+    def close(self):
+        if self.websocket and not self._is_websocket_closed():
             self.websocket.close()
+    def __del__(self):
+        self.close()
+    def __enter__(self):
+        self.refresh_websocket()
+        return self
+    def __exit__(
+        self,
+        exc_type: Union[type, None],
+        exc: Union[BaseException, None],
+        exc_tb: Union[TracebackType, None],
+    ):
+        self.close()
 class AsyncCartesiaTTS(CartesiaTTS):
     def __init__(self, *, api_key: str = None, experimental_ws_handle_interrupts: bool = False):
-        self.timeout = aiohttp.ClientTimeout(total=DEFAULT_TIMEOUT)
-        self.connector = aiohttp.TCPConnector(limit=DEFAULT_NUM_CONNECTIONS)
-        self._session = aiohttp.ClientSession(timeout=self.timeout, connector=self.connector)
+        self._session = None
+        self._loop = None
         super().__init__(
             api_key=api_key, experimental_ws_handle_interrupts=experimental_ws_handle_interrupts
         )
-    def refresh_websocket(self):
-        pass  # do not load the websocket for the client until asynchronously when it is needed
-    async def _async_refresh_websocket(self):
+    async def _get_session(self):
+        current_loop = asyncio.get_event_loop()
+        if self._loop is not current_loop:
+            # If the loop has changed, close the session and create a new one.
+            await self.close()
+        if self._session is None or self._session.closed:
+            timeout = aiohttp.ClientTimeout(total=DEFAULT_TIMEOUT)
+            connector = aiohttp.TCPConnector(limit=DEFAULT_NUM_CONNECTIONS)
+            self._session = aiohttp.ClientSession(
+                timeout=timeout, connector=connector
+            )
+            self._loop = current_loop
+        return self._session
+    async def refresh_websocket(self):
         """Refresh the websocket connection."""
-        if self.websocket and not self._is_websocket_closed():
-            self.websocket.close()
-        route = "audio/websocket"
-        if self.experimental_ws_handle_interrupts:
-            route = f"experimental/{route}"
-        self.websocket = await self._session.ws_connect(
-            f"{self._ws_url()}/{route}?api_key={self.api_key}"
-        )
+        if self.websocket is None or self._is_websocket_closed():
+            route = "audio/websocket"
+            if self.experimental_ws_handle_interrupts:
+                route = f"experimental/{route}"
+            session = await self._get_session()
+            self.websocket = await session.ws_connect(
+                f"{self._ws_url()}/{route}?api_key={self.api_key}"
+            )
+    def _is_websocket_closed(self):
+        return self.websocket.closed
+    async def close(self):
+        """This method closes the websocket and the session.
+        It is *strongly* recommended to call this method when you are done using the client.
+        """
+        if self.websocket is not None and not self._is_websocket_closed():
+            await self.websocket.close()
+        if self._session is not None and not self._session.closed:
+            await self._session.close()
     async def generate(
         self,
         *,
         transcript: str,
+        voice: Embedding,
+        model_id: str = DEFAULT_MODEL_ID,
         duration: int = None,
         chunk_time: float = None,
-        voice: Embedding = None,
         stream: bool = False,
         websocket: bool = True,
+        output_format: str = "fp32"
     ) -> Union[AudioOutput, AsyncGenerator[AudioOutput, None]]:
         """Asynchronously generate audio from a transcript.
         NOTE: This overrides the non-asynchronous generate method from the base class.
         Args:
-            transcript: The text to generate audio for.
-            voice: The embedding to use for generating audio.
-            options: The options to use for generating audio. See :class:`GenerateOptions`.
+            transcript (str): The text to generate audio for.
+            voice (Embedding (List[float])): The voice to use for generating audio.
+            duration (int, optional): The maximum duration of the audio in seconds.
+            chunk_time (float, optional): How long each audio segment should be in seconds.
+                This should not need to be adjusted.
+            stream (bool, optional): Whether to stream the audio or not.
+                If True this function returns a generator. False by default.
+            websocket (bool, optional): Whether to use a websocket for streaming audio.
+                Using the websocket reduces latency by pre-poning the handshake. True by default.
         Returns:
-            A dictionary containing the following:
-                * "audio": The audio as a 1D numpy array.
+            A generator if `stream` is True, otherwise a dictionary.
+            Dictionary from both generator and non-generator return types have the following keys:
+                * "audio": The audio as a bytes buffer.
                 * "sampling_rate": The sampling rate of the audio.
         """
+        self._check_inputs(transcript, duration, chunk_time)
         body = self._generate_request_body(
-            transcript=transcript, duration=duration, chunk_time=chunk_time, voice=voice
+            transcript=transcript,
+            voice=voice,
+            model_id=model_id,
+            duration=duration,
+            chunk_time=chunk_time,
+            output_format=output_format,
         )
         if websocket:
             generator = self._generate_ws(body)
         else:
-            generator = self._generate_http(body)
+            generator = self._generate_http_wrapper(body)
         if stream:
             return generator
@@ -450,12 +571,23 @@ class AsyncCartesiaTTS(CartesiaTTS):
         return {"audio": b"".join(chunks), "sampling_rate": sampling_rate}
+    @retry_on_connection_error_async(max_retries=MAX_RETRIES, backoff_factor=BACKOFF_FACTOR, logger=logger)
+    async def _generate_http_wrapper(self, body: Dict[str, Any]):
+        """Need to wrap the http generator in a function for the retry decorator to work."""
+        try:
+          async for chunk in self._generate_http(body):
+              yield chunk
+        except Exception as e:
+            logger.error(f"Failed to generate audio. {e}")
+            raise e
     async def _generate_http(self, body: Dict[str, Any]):
-        async with self._session.post(
-            f"{self._http_url()}/audio/stream", data=json.dumps(body), headers=self.headers
+        session = await self._get_session()
+        async with session.post(
+            f"{self._http_url()}/audio/sse", data=json.dumps(body), headers=self.headers
         ) as response:
-            if response.status < 200 or response.status >= 300:
-                raise ValueError(f"Failed to generate audio. {response.text}")
+            if not response.ok:
+                raise ValueError(f"Failed to generate audio. {await response.text()}")
             buffer = ""
             async for chunk_bytes in response.content.iter_any():
@@ -478,7 +610,7 @@ class AsyncCartesiaTTS(CartesiaTTS):
             route = f"experimental/{route}"
         if not self.websocket or self._is_websocket_closed():
-            await self._async_refresh_websocket()
+            await self.refresh_websocket()
         ws = self.websocket
         if context_id is None:
@@ -502,17 +634,29 @@ class AsyncCartesiaTTS(CartesiaTTS):
             if self.experimental_ws_handle_interrupts:
                 await ws.send_json({"context_id": context_id, "action": "cancel"})
         except Exception as e:
-            raise RuntimeError(f"Failed to generate audio. {response}") from e
-    def _is_websocket_closed(self):
-        return self.websocket.closed
-    async def cleanup(self):
-        if self.websocket is not None and not self._is_websocket_closed():
-            await self.websocket.close()
-        if not self._session.closed:
-            await self._session.close()
+            if self.websocket and not self._is_websocket_closed():
+                await self.websocket.close()
+            raise RuntimeError(f"Failed to generate audio. {await response.text()}") from e
+        finally:
+            # Ensure the websocket is ultimately closed.
+            if self.websocket and not self._is_websocket_closed():
+                await self.websocket.close()
+    async def transcribe(self, raw_audio: Union[bytes, str]) -> str:
+        raw_audio_bytes, headers = self.prepare_audio_and_headers(raw_audio)
+        data = aiohttp.FormData()
+        data.add_field("clip", raw_audio_bytes, filename="input.wav", content_type="audio/wav")
+        session = await self._get_session()
+        async with session.post(
+            f"{self._http_url()}/audio/transcriptions", headers=headers, data=data
+        ) as response:
+            if not response.ok:
+                raise ValueError(f"Failed to transcribe audio. Error: {await response.text()}")
+            transcript = await response.json()
+            return transcript["text"]
     def __del__(self):
         try:
             loop = asyncio.get_running_loop()
@@ -520,6 +664,18 @@ class AsyncCartesiaTTS(CartesiaTTS):
             loop = None
         if loop is None:
-            asyncio.run(self.cleanup())
+            asyncio.run(self.close())
         else:
-            loop.create_task(self.cleanup())
+            loop.create_task(self.close())
+    async def __aenter__(self):
+        await self.refresh_websocket()
+        return self
+    async def __aexit__(
+        self,
+        exc_type: Union[type, None],
+        exc: Union[BaseException, None],
+        exc_tb: Union[TracebackType, None],
+    ):
+        await self.close()

cartesia/utils.py ADDED Viewed

@@ -0,0 +1,65 @@
+import time
+from aiohttp.client_exceptions import ServerDisconnectedError
+import asyncio
+from functools import wraps
+from http.client import RemoteDisconnected
+from httpx import TimeoutException
+from requests.exceptions import ConnectionError
+def retry_on_connection_error(max_retries=3, backoff_factor=1, logger=None):
+    """Retry a function if a ConnectionError, RemoteDisconnected, ServerDisconnectedError, or TimeoutException occurs.
+    Args:
+        max_retries (int): The maximum number of retries.
+        backoff_factor (int): The factor to increase the delay between retries.
+        logger (logging.Logger): The logger to use for logging.
+    """
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            retry_count = 0
+            while retry_count < max_retries:
+                try:
+                    return func(*args, **kwargs)
+                except (ConnectionError, RemoteDisconnected, ServerDisconnectedError, TimeoutException) as e:
+                    logger.info(f"Retrying after exception: {e}")
+                    retry_count += 1
+                    if retry_count < max_retries:
+                        delay = backoff_factor * (2 ** (retry_count - 1))
+                        logger.warn(f"Attempt {retry_count + 1}/{max_retries} in {delay} seconds...")
+                        time.sleep(delay)
+                    else:
+                        raise Exception(f"Exception occurred after {max_retries} tries.") from e
+        return wrapper
+    return decorator
+def retry_on_connection_error_async(max_retries=3, backoff_factor=1, logger=None):
+    """Retry an asynchronous function if a ConnectionError, RemoteDisconnected, ServerDisconnectedError, or TimeoutException occurs.
+    Args:
+        max_retries (int): The maximum number of retries.
+        backoff_factor (int): The factor to increase the delay between retries.
+        logger (logging.Logger): The logger to use for logging.
+    """
+    def decorator(func):
+        @wraps(func)
+        async def wrapper(*args, **kwargs):
+            retry_count = 0
+            while retry_count < max_retries:
+                try:
+                    async for chunk in func(*args, **kwargs):
+                        yield chunk
+                    # If the function completes without raising an exception return
+                    return
+                except (ConnectionError, RemoteDisconnected, ServerDisconnectedError, TimeoutException) as e:
+                    logger.info(f"Retrying after exception: {e}")
+                    retry_count += 1
+                    if retry_count < max_retries:
+                        delay = backoff_factor * (2 ** (retry_count - 1))
+                        logger.warn(f"Attempt {retry_count + 1}/{max_retries} in {delay} seconds...")
+                        await asyncio.sleep(delay)
+                    else:
+                        raise Exception(f"Exception occurred after {max_retries} tries.") from e
+        return wrapper
+    return decorator

cartesia/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.0.~~5rc1~~"
1	+ __version__ = "0.0.6"

{cartesia-0.0.5rc1.dist-info → cartesia-0.0.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 0.0.5rc1
+Version: 0.0.6
 Summary: The official Python library for the Cartesia API.
 Home-page:
 Author: Cartesia, Inc.
@@ -16,25 +16,17 @@ Requires-Dist: pytest-asyncio
 Requires-Dist: requests
 Requires-Dist: websockets
 Provides-Extra: all
-Requires-Dist: pre-commit ; extra == 'all'
-Requires-Dist: docformatter ; extra == 'all'
-Requires-Dist: black ==24.1.1 ; extra == 'all'
-Requires-Dist: isort ==5.13.2 ; extra == 'all'
-Requires-Dist: flake8 ==7.0.0 ; extra == 'all'
-Requires-Dist: flake8-bugbear ==24.2.6 ; extra == 'all'
 Requires-Dist: pytest >=8.0.2 ; extra == 'all'
 Requires-Dist: pytest-cov >=4.1.0 ; extra == 'all'
 Requires-Dist: twine ; extra == 'all'
+Requires-Dist: setuptools ; extra == 'all'
+Requires-Dist: wheel ; extra == 'all'
 Provides-Extra: dev
-Requires-Dist: pre-commit ; extra == 'dev'
-Requires-Dist: docformatter ; extra == 'dev'
-Requires-Dist: black ==24.1.1 ; extra == 'dev'
-Requires-Dist: isort ==5.13.2 ; extra == 'dev'
-Requires-Dist: flake8 ==7.0.0 ; extra == 'dev'
-Requires-Dist: flake8-bugbear ==24.2.6 ; extra == 'dev'
 Requires-Dist: pytest >=8.0.2 ; extra == 'dev'
 Requires-Dist: pytest-cov >=4.1.0 ; extra == 'dev'
 Requires-Dist: twine ; extra == 'dev'
+Requires-Dist: setuptools ; extra == 'dev'
+Requires-Dist: wheel ; extra == 'dev'
 # Cartesia Python API Library
@@ -60,13 +52,14 @@ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
 voices = client.get_voices()
 voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
 transcript = "Hello! Welcome to Cartesia"
+model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
 p = pyaudio.PyAudio()
 stream = None
 # Generate and stream audio
-for output in client.generate(transcript=transcript, voice=voice, stream=True):
+for output in client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
     buffer = output["audio"]
     rate = output["sampling_rate"]
@@ -84,26 +77,68 @@ stream.close()
 p.terminate()
 ```
-If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook. Here's an example:
+You can also use the async client if you want to make asynchronous API calls:
+```python
+from cartesia.tts import AsyncCartesiaTTS
+import asyncio
+import pyaudio
+import os
+async def write_stream():
+    client = AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
+    voices = client.get_voices()
+    voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
+    transcript = "Hello! Welcome to Cartesia"
+    model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
+    p = pyaudio.PyAudio()
+    stream = None
+    # Generate and stream audio
+    async for output in await client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
+        buffer = output["audio"]
+        rate = output["sampling_rate"]
+        if not stream:
+            stream = p.open(format=pyaudio.paFloat32,
+                            channels=1,
+                            rate=rate,
+                            output=True)
+        # Write the audio data to the stream
+        stream.write(buffer)
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+asyncio.run(write_stream())
+```
+If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook.
+Additionally, in these notebook examples we show how to use the client as a context manager (though this is not required).
 ```python
-from cartesia.tts import CartesiaTTS
 from IPython.display import Audio
 import io
 import os
+import numpy as np
-client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
-voices = client.get_voices()
-voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
-transcript = "Hello! Welcome to Cartesia"
+from cartesia.tts import CartesiaTTS
-# Create a BytesIO object to store the audio data
-audio_data = io.BytesIO()
+with CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
+    voices = client.get_voices()
+    voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
+    transcript = "Hello! Welcome to Cartesia"
-# Generate and stream audio
-for output in client.generate(transcript=transcript, voice=voice, stream=True):
-    buffer = output["audio"]
-    audio_data.write(buffer)
+    # Create a BytesIO object to store the audio data
+    audio_data = io.BytesIO()
+    # Generate and stream audio
+    for output in client.generate(transcript=transcript, voice=voice, stream=True):
+        buffer = output["audio"]
+        audio_data.write(buffer)
 # Set the cursor position to the beginning of the BytesIO object
 audio_data.seek(0)
@@ -115,25 +150,27 @@ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=output["s
 display(audio)
 ```
-You can also use the async client if you want to make asynchronous API calls. The usage is very similar:
+Below is the same example using the async client:
 ```python
-from cartesia.tts import AsyncCartesiaTTS
 from IPython.display import Audio
 import io
 import os
+import numpy as np
-client = AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
-voices = client.get_voices()
-voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
-transcript = "Hello! Welcome to Cartesia"
+from cartesia.tts import AsyncCartesiaTTS
-# Create a BytesIO object to store the audio data
-audio_data = io.BytesIO()
+async with AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
+    voices = client.get_voices()
+    voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
+    transcript = "Hello! Welcome to Cartesia"
-# Generate and stream audio
-async for output in client.generate(transcript=transcript, voice=voice, stream=True):
-    buffer = output["audio"]
-    audio_data.write(buffer)
+    # Create a BytesIO object to store the audio data
+    audio_data = io.BytesIO()
+    # Generate and stream audio
+    async for output in await client.generate(transcript=transcript, voice=voice, stream=True):
+        buffer = output["audio"]
+        audio_data.write(buffer)
 # Set the cursor position to the beginning of the BytesIO object
 audio_data.seek(0)

cartesia-0.0.6.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+cartesia/__init__.py,sha256=uIc9xGNPs8_A6eAvbTUY1geazunYoEZVWFKhCwC9TRA,102
+cartesia/tts.py,sha256=YjOW8mlvvPbHblhcMUY71RsKn77K_WQi8ySok3ifeJg,26734
+cartesia/utils.py,sha256=GoTJe8LZ3WpS4hXkwoZauPYjo7Mbx7BvbBjAX5vEbwg,3024
+cartesia/version.py,sha256=QiiYsv0kcJaB8wCWyT-FnI2b6be87HA-CrrIUn8LQhg,22
+cartesia-0.0.6.dist-info/METADATA,sha256=yhq7LSvLrboBPI3IOcLTvaneisqhq-v1VMQ0sKBq8kk,5974
+cartesia-0.0.6.dist-info/WHEEL,sha256=DZajD4pwLWue70CAfc7YaxT1wLUciNBvN_TTcvXpltE,110
+cartesia-0.0.6.dist-info/top_level.txt,sha256=rTX4HnnCegMxl1FK9czpVC7GAvf3SwDzPG65qP-BS4w,9
+cartesia-0.0.6.dist-info/RECORD,,

{cartesia-0.0.5rc1.dist-info → cartesia-0.0.6.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.41.2)
+Generator: bdist_wheel (0.43.0)
 Root-Is-Purelib: true
 Tag: py2-none-any
 Tag: py3-none-any

cartesia-0.0.5rc1.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-cartesia/__init__.py,sha256=m8BX-qLjsMoI_JZtgf3jNi8R3cBZqYy-z4oEhYeJLdI,64
-cartesia/tts.py,sha256=yPLz41AR0oAYPUNW48mqmwEEbLBHCnbaK_wPT0iFBVk,20543
-cartesia/version.py,sha256=VkI5lk2CFatZR200RqGd8cBjTnMDmhtZW7DI6mPe6n4,25
-cartesia-0.0.5rc1.dist-info/METADATA,sha256=632D6iZ2IU3MLySAnMtwV2zQA38XkQv1rfFF4iRdAco,4893
-cartesia-0.0.5rc1.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
-cartesia-0.0.5rc1.dist-info/top_level.txt,sha256=rTX4HnnCegMxl1FK9czpVC7GAvf3SwDzPG65qP-BS4w,9
-cartesia-0.0.5rc1.dist-info/RECORD,,

{cartesia-0.0.5rc1.dist-info → cartesia-0.0.6.dist-info}/top_level.txt RENAMED Viewed

File without changes

cartesia 0.0.5rc1__py2.py3-none-any.whl → 0.0.6__py2.py3-none-any.whl

cartesia 0.0.5rc1py2.py3-none-any.whl → 0.0.6py2.py3-none-any.whl