PyPI - cartesia - Versions diffs - 1.0.12__tar.gz → 1.0.14__tar.gz - Mend

cartesia 1.0.12tar.gz → 1.0.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{cartesia-1.0.12 → cartesia-1.0.14}/PKG-INFO +10 -10
{cartesia-1.0.12 → cartesia-1.0.14}/README.md +9 -9
cartesia-1.0.14/cartesia/__init__.py +4 -0
cartesia-1.0.14/cartesia/_async_sse.py +105 -0
cartesia-1.0.14/cartesia/_async_websocket.py +323 -0
cartesia-1.0.14/cartesia/_constants.py +10 -0
cartesia-1.0.14/cartesia/_logger.py +3 -0
cartesia-1.0.14/cartesia/_sse.py +152 -0
{cartesia-1.0.12 → cartesia-1.0.14}/cartesia/_types.py +3 -2
cartesia-1.0.14/cartesia/_websocket.py +374 -0
cartesia-1.0.14/cartesia/async_client.py +82 -0
cartesia-1.0.14/cartesia/async_tts.py +22 -0
cartesia-1.0.14/cartesia/client.py +69 -0
cartesia-1.0.14/cartesia/resource.py +44 -0
cartesia-1.0.14/cartesia/tts.py +109 -0
cartesia-1.0.14/cartesia/utils/tts.py +25 -0
cartesia-1.0.14/cartesia/version.py +1 -0
cartesia-1.0.14/cartesia/voices.py +170 -0
{cartesia-1.0.12 → cartesia-1.0.14}/cartesia.egg-info/PKG-INFO +10 -10
{cartesia-1.0.12 → cartesia-1.0.14}/cartesia.egg-info/SOURCES.txt +12 -0
{cartesia-1.0.12 → cartesia-1.0.14}/cartesia.egg-info/requires.txt +4 -0
{cartesia-1.0.12 → cartesia-1.0.14}/tests/test_tts.py +635 -285
cartesia-1.0.12/cartesia/__init__.py +0 -3
cartesia-1.0.12/cartesia/client.py +0 -1390
cartesia-1.0.12/cartesia/version.py +0 -1
{cartesia-1.0.12 → cartesia-1.0.14}/LICENSE.md +0 -0
{cartesia-1.0.12 → cartesia-1.0.14}/cartesia/utils/__init__.py +0 -0
{cartesia-1.0.12 → cartesia-1.0.14}/cartesia/utils/deprecated.py +0 -0
{cartesia-1.0.12 → cartesia-1.0.14}/cartesia/utils/retry.py +0 -0
{cartesia-1.0.12 → cartesia-1.0.14}/cartesia.egg-info/dependency_links.txt +0 -0
{cartesia-1.0.12 → cartesia-1.0.14}/cartesia.egg-info/top_level.txt +0 -0
{cartesia-1.0.12 → cartesia-1.0.14}/pyproject.toml +0 -0
{cartesia-1.0.12 → cartesia-1.0.14}/setup.cfg +0 -0
{cartesia-1.0.12 → cartesia-1.0.14}/setup.py +0 -0
{cartesia-1.0.12 → cartesia-1.0.14}/tests/test_deprecated.py +0 -0

{cartesia-1.0.12 → cartesia-1.0.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 1.0.12
+Version: 1.0.14
 Summary: The official Python library for the Cartesia API.
 Home-page:
 Author: Cartesia, Inc.
@@ -18,12 +18,12 @@ License-File: LICENSE.md
 # Cartesia Python API Library
 ![PyPI - Version](https://img.shields.io/pypi/v/cartesia)
-[![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/ZVxavqHB9X)
+[![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/cartesia)
 The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
 > [!IMPORTANT]
-> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
+> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/cartesia) for any support requests!
 - [Cartesia Python API Library](#cartesia-python-api-library)
   - [Documentation](#documentation)
@@ -105,7 +105,7 @@ transcript = "Hello! Welcome to Cartesia"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-english"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -156,7 +156,7 @@ async def write_stream():
     # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
     model_id = "sonic-english"
-    # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+    # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
     output_format = {
         "container": "raw",
         "encoding": "pcm_f32le",
@@ -211,7 +211,7 @@ transcript = "Hello! Welcome to Cartesia"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-english"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -272,7 +272,7 @@ async def send_transcripts(ctx):
     # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
     model_id = "sonic-english"
-    # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+    # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
     output_format = {
         "container": "raw",
         "encoding": "pcm_f32le",
@@ -380,7 +380,7 @@ voice_id = "87748186-23bb-4158-a1eb-332911b0b708"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-english"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -470,7 +470,7 @@ language = "es"  # Language code corresponding to the language of the transcript
 # Make sure you use the multilingual model! You can check out all models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-multilingual"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -623,7 +623,7 @@ display(audio)
 #### Output Formats
-You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
+You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events).
 The previously used `output_format` strings are now deprecated and will be removed in v1.2.0. These are listed in the `DeprecatedOutputFormatMapping` class in `cartesia._types`.

{cartesia-1.0.12 → cartesia-1.0.14}/README.md RENAMED Viewed

@@ -1,12 +1,12 @@
 # Cartesia Python API Library
 ![PyPI - Version](https://img.shields.io/pypi/v/cartesia)
-[![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/ZVxavqHB9X)
+[![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/cartesia)
 The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
 > [!IMPORTANT]
-> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
+> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/cartesia) for any support requests!
 - [Cartesia Python API Library](#cartesia-python-api-library)
   - [Documentation](#documentation)
@@ -88,7 +88,7 @@ transcript = "Hello! Welcome to Cartesia"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-english"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -139,7 +139,7 @@ async def write_stream():
     # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
     model_id = "sonic-english"
-    # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+    # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
     output_format = {
         "container": "raw",
         "encoding": "pcm_f32le",
@@ -194,7 +194,7 @@ transcript = "Hello! Welcome to Cartesia"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-english"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -255,7 +255,7 @@ async def send_transcripts(ctx):
     # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
     model_id = "sonic-english"
-    # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+    # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
     output_format = {
         "container": "raw",
         "encoding": "pcm_f32le",
@@ -363,7 +363,7 @@ voice_id = "87748186-23bb-4158-a1eb-332911b0b708"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-english"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -453,7 +453,7 @@ language = "es"  # Language code corresponding to the language of the transcript
 # Make sure you use the multilingual model! You can check out all models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-multilingual"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -606,7 +606,7 @@ display(audio)
 #### Output Formats
-You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
+You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events).
 The previously used `output_format` strings are now deprecated and will be removed in v1.2.0. These are listed in the `DeprecatedOutputFormatMapping` class in `cartesia._types`.

cartesia-1.0.14/cartesia/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from cartesia.async_client import AsyncCartesia
+from cartesia.client import Cartesia
+__all__ = ["Cartesia", "AsyncCartesia"]

cartesia-1.0.14/cartesia/_async_sse.py ADDED Viewed

@@ -0,0 +1,105 @@
+import base64
+import json
+from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Union
+import aiohttp
+from cartesia._constants import BACKOFF_FACTOR, MAX_RETRIES
+from cartesia._logger import logger
+from cartesia._sse import _SSE
+from cartesia._types import OutputFormat, VoiceControls
+from cartesia.tts import TTS
+from cartesia.utils.retry import retry_on_connection_error_async
+class _AsyncSSE(_SSE):
+    """This class contains methods to generate audio using Server-Sent Events asynchronously."""
+    def __init__(
+        self,
+        http_url: str,
+        headers: Dict[str, str],
+        timeout: float,
+        get_session: Callable[[], Optional[aiohttp.ClientSession]],
+    ):
+        super().__init__(http_url, headers, timeout)
+        self._get_session = get_session
+    async def send(
+        self,
+        model_id: str,
+        transcript: str,
+        output_format: OutputFormat,
+        voice_id: Optional[str] = None,
+        voice_embedding: Optional[List[float]] = None,
+        duration: Optional[int] = None,
+        language: Optional[str] = None,
+        stream: bool = True,
+        _experimental_voice_controls: Optional[VoiceControls] = None,
+    ) -> Union[bytes, AsyncGenerator[bytes, None]]:
+        voice = TTS._validate_and_construct_voice(
+            voice_id,
+            voice_embedding=voice_embedding,
+            experimental_voice_controls=_experimental_voice_controls,
+        )
+        request_body = {
+            "model_id": model_id,
+            "transcript": transcript,
+            "voice": voice,
+            "output_format": {
+                "container": output_format["container"],
+                "encoding": output_format["encoding"],
+                "sample_rate": output_format["sample_rate"],
+            },
+            "language": language,
+        }
+        if duration is not None:
+            request_body["duration"] = duration
+        generator = self._sse_generator_wrapper(request_body)
+        if stream:
+            return generator
+        chunks = []
+        async for chunk in generator:
+            chunks.append(chunk["audio"])
+        return {"audio": b"".join(chunks)}
+    @retry_on_connection_error_async(
+        max_retries=MAX_RETRIES, backoff_factor=BACKOFF_FACTOR, logger=logger
+    )
+    async def _sse_generator_wrapper(self, request_body: Dict[str, Any]):
+        """Need to wrap the sse generator in a function for the retry decorator to work."""
+        try:
+            async for chunk in self._sse_generator(request_body):
+                yield chunk
+        except Exception as e:
+            raise RuntimeError(f"Error generating audio. {e}")
+    async def _sse_generator(self, request_body: Dict[str, Any]):
+        session = await self._get_session()
+        async with session.post(
+            f"{self.http_url}/tts/sse",
+            data=json.dumps(request_body),
+            headers=self.headers,
+        ) as response:
+            if not response.ok:
+                raise ValueError(f"Failed to generate audio. {await response.text()}")
+            buffer = ""
+            async for chunk_bytes in response.content.iter_any():
+                buffer, outputs = self._update_buffer(buffer=buffer, chunk_bytes=chunk_bytes)
+                for output in outputs:
+                    yield output
+            if buffer:
+                try:
+                    chunk_json = json.loads(buffer)
+                    audio = base64.b64decode(chunk_json["data"])
+                    yield {"audio": audio}
+                except json.JSONDecodeError:
+                    pass

cartesia-1.0.14/cartesia/_async_websocket.py ADDED Viewed

@@ -0,0 +1,323 @@
+import asyncio
+import uuid
+from collections import defaultdict
+from types import TracebackType
+from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Union
+import aiohttp
+from cartesia._constants import DEFAULT_MODEL_ID, DEFAULT_VOICE_EMBEDDING
+from cartesia._types import OutputFormat, VoiceControls
+from cartesia._websocket import _WebSocket
+from cartesia.tts import TTS
+class _AsyncTTSContext:
+    """Manage a single context over an AsyncWebSocket.
+    This class separates sending requests and receiving responses into two separate methods.
+    This can be used for sending multiple requests without awaiting the response.
+    Then you can listen to the responses in the order they were sent. See README for usage.
+    Each AsyncTTSContext will close automatically when a done message is received for that context.
+    This happens when the no_more_inputs method is called (equivalent to sending a request with `continue_ = False`),
+    or if no requests have been sent for 5 seconds on the same context. It also closes if there is an error.
+    """
+    def __init__(self, context_id: str, websocket: "_AsyncWebSocket", timeout: float):
+        self._context_id = context_id
+        self._websocket = websocket
+        self.timeout = timeout
+        self._error = None
+    @property
+    def context_id(self) -> str:
+        return self._context_id
+    async def send(
+        self,
+        model_id: str,
+        transcript: str,
+        output_format: OutputFormat,
+        voice_id: Optional[str] = None,
+        voice_embedding: Optional[List[float]] = None,
+        context_id: Optional[str] = None,
+        continue_: bool = False,
+        duration: Optional[int] = None,
+        language: Optional[str] = None,
+        add_timestamps: bool = False,
+        _experimental_voice_controls: Optional[VoiceControls] = None,
+    ) -> None:
+        """Send audio generation requests to the WebSocket. The response can be received using the `receive` method.
+        Args:
+            model_id: The ID of the model to use for generating audio.
+            transcript: The text to convert to speech.
+            output_format: A dictionary containing the details of the output format.
+            voice_id: The ID of the voice to use for generating audio.
+            voice_embedding: The embedding of the voice to use for generating audio.
+            context_id: The context ID to use for the request. If not specified, a random context ID will be generated.
+            continue_: Whether to continue the audio generation from the previous transcript or not.
+            duration: The duration of the audio in seconds.
+            language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`.
+            add_timestamps: Whether to return word-level timestamps.
+            _experimental_voice_controls: Experimental voice controls for controlling speed and emotion.
+                Note: This is an experimental feature and may change rapidly in future releases.
+        Returns:
+            None.
+        """
+        if context_id is not None and context_id != self._context_id:
+            raise ValueError("Context ID does not match the context ID of the current context.")
+        if continue_ and transcript == "":
+            raise ValueError("Transcript cannot be empty when continue_ is True.")
+        await self._websocket.connect()
+        voice = TTS._validate_and_construct_voice(
+            voice_id,
+            voice_embedding,
+            experimental_voice_controls=_experimental_voice_controls,
+        )
+        request_body = {
+            "model_id": model_id,
+            "transcript": transcript,
+            "voice": voice,
+            "output_format": {
+                "container": output_format["container"],
+                "encoding": output_format["encoding"],
+                "sample_rate": output_format["sample_rate"],
+            },
+            "context_id": self._context_id,
+            "continue": continue_,
+            "language": language,
+            "add_timestamps": add_timestamps,
+        }
+        if duration is not None:
+            request_body["duration"] = duration
+        await self._websocket.websocket.send_json(request_body)
+        # Start listening for responses on the WebSocket
+        self._websocket._dispatch_listener()
+    async def no_more_inputs(self) -> None:
+        """Send a request to the WebSocket to indicate that no more requests will be sent."""
+        await self.send(
+            model_id=DEFAULT_MODEL_ID,
+            transcript="",
+            output_format=TTS.get_output_format("raw_pcm_f32le_44100"),
+            voice_embedding=DEFAULT_VOICE_EMBEDDING,  # Default voice embedding since it's a required input for now.
+            context_id=self._context_id,
+            continue_=False,
+        )
+    async def receive(self) -> AsyncGenerator[Dict[str, Any], None]:
+        """Receive the audio chunks from the WebSocket. This method is a generator that yields audio chunks.
+        Returns:
+            An async generator that yields audio chunks. Each chunk is a dictionary containing the audio as bytes.
+        """
+        try:
+            while True:
+                response = await self._websocket._get_message(
+                    self._context_id, timeout=self.timeout
+                )
+                if "error" in response:
+                    raise RuntimeError(f"Error generating audio:\n{response['error']}")
+                if response["done"]:
+                    break
+                yield self._websocket._convert_response(response, include_context_id=True)
+        except Exception as e:
+            if isinstance(e, asyncio.TimeoutError):
+                raise RuntimeError("Timeout while waiting for audio chunk")
+            raise RuntimeError(f"Failed to generate audio:\n{e}")
+        finally:
+            self._close()
+    def _close(self) -> None:
+        """Closes the context. Automatically called when a done message is received for this context."""
+        self._websocket._remove_context(self._context_id)
+    def is_closed(self):
+        """Check if the context is closed or not. Returns True if closed."""
+        return self._context_id not in self._websocket._context_queues
+    async def __aenter__(self):
+        return self
+    async def __aexit__(
+        self,
+        exc_type: Union[type, None],
+        exc: Union[BaseException, None],
+        exc_tb: Union[TracebackType, None],
+    ):
+        self._close()
+    def __del__(self):
+        self._close()
+class _AsyncWebSocket(_WebSocket):
+    """This class contains methods to generate audio using WebSocket asynchronously."""
+    def __init__(
+        self,
+        ws_url: str,
+        api_key: str,
+        cartesia_version: str,
+        timeout: float,
+        get_session: Callable[[], Optional[aiohttp.ClientSession]],
+    ):
+        """
+        Args:
+            ws_url: The WebSocket URL for the Cartesia API.
+            api_key: The API key to use for authorization.
+            cartesia_version: The version of the Cartesia API to use.
+            timeout: The timeout for responses on the WebSocket in seconds.
+            get_session: A function that returns an aiohttp.ClientSession object.
+        """
+        super().__init__(ws_url, api_key, cartesia_version)
+        self.timeout = timeout
+        self._get_session = get_session
+        self.websocket = None
+        self._context_queues: Dict[str, asyncio.Queue] = {}
+        self._processing_task: asyncio.Task = None
+    def __del__(self):
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            loop = None
+        if loop is None:
+            asyncio.run(self.close())
+        elif loop.is_running():
+            loop.create_task(self.close())
+    async def connect(self):
+        if self.websocket is None or self._is_websocket_closed():
+            route = "tts/websocket"
+            session = await self._get_session()
+            try:
+                self.websocket = await session.ws_connect(
+                    f"{self.ws_url}/{route}?api_key={self.api_key}&cartesia_version={self.cartesia_version}"
+                )
+            except Exception as e:
+                raise RuntimeError(f"Failed to connect to WebSocket. {e}")
+    def _is_websocket_closed(self):
+        return self.websocket.closed
+    async def close(self):
+        """This method closes the websocket connection. *Highly* recommended to call this method when done."""
+        if self.websocket is not None and not self._is_websocket_closed():
+            await self.websocket.close()
+        if self._processing_task:
+            self._processing_task.cancel()
+            try:
+                self._processing_task = None
+            except asyncio.CancelledError:
+                pass
+            except TypeError as e:
+                # Ignore the error if the task is already cancelled
+                # For some reason we are getting None responses
+                # TODO: This needs to be fixed - we need to think about why we are getting None responses.
+                if "Received message 256:None" not in str(e):
+                    raise e
+        for context_id in list(self._context_queues.keys()):
+            self._remove_context(context_id)
+        self._context_queues.clear()
+        self._processing_task = None
+        self.websocket = None
+    async def send(
+        self,
+        model_id: str,
+        transcript: str,
+        output_format: OutputFormat,
+        voice_id: Optional[str] = None,
+        voice_embedding: Optional[List[float]] = None,
+        context_id: Optional[str] = None,
+        duration: Optional[int] = None,
+        language: Optional[str] = None,
+        stream: bool = True,
+        add_timestamps: bool = False,
+        _experimental_voice_controls: Optional[VoiceControls] = None,
+    ) -> Union[bytes, AsyncGenerator[bytes, None]]:
+        """See :meth:`_WebSocket.send` for details."""
+        if context_id is None:
+            context_id = str(uuid.uuid4())
+        ctx = self.context(context_id)
+        await ctx.send(
+            model_id=model_id,
+            transcript=transcript,
+            output_format=output_format,
+            voice_id=voice_id,
+            voice_embedding=voice_embedding,
+            context_id=context_id,
+            duration=duration,
+            language=language,
+            continue_=False,
+            add_timestamps=add_timestamps,
+            _experimental_voice_controls=_experimental_voice_controls,
+        )
+        generator = ctx.receive()
+        if stream:
+            return generator
+        chunks = []
+        word_timestamps = defaultdict(list)
+        async for chunk in generator:
+            if "audio" in chunk:
+                chunks.append(chunk["audio"])
+            if add_timestamps and "word_timestamps" in chunk:
+                for k, v in chunk["word_timestamps"].items():
+                    word_timestamps[k].extend(v)
+        out = {"audio": b"".join(chunks), "context_id": context_id}
+        if add_timestamps:
+            out["word_timestamps"] = word_timestamps
+        return out
+    async def _process_responses(self):
+        try:
+            while True:
+                response = await self.websocket.receive_json()
+                if response["context_id"]:
+                    context_id = response["context_id"]
+                if context_id in self._context_queues:
+                    await self._context_queues[context_id].put(response)
+        except Exception as e:
+            self._error = e
+            raise e
+    async def _get_message(self, context_id: str, timeout: float) -> Dict[str, Any]:
+        if context_id not in self._context_queues:
+            raise ValueError(f"Context ID {context_id} not found.")
+        return await asyncio.wait_for(self._context_queues[context_id].get(), timeout=timeout)
+    def _remove_context(self, context_id: str):
+        if context_id in self._context_queues:
+            del self._context_queues[context_id]
+    def _dispatch_listener(self):
+        if self._processing_task is None or self._processing_task.done():
+            self._processing_task = asyncio.create_task(self._process_responses())
+    def context(self, context_id: Optional[str] = None) -> _AsyncTTSContext:
+        if context_id in self._context_queues:
+            raise ValueError(f"AsyncContext for context ID {context_id} already exists.")
+        if context_id is None:
+            context_id = str(uuid.uuid4())
+        if context_id not in self._context_queues:
+            self._context_queues[context_id] = asyncio.Queue()
+        return _AsyncTTSContext(context_id, self, self.timeout)

cartesia-1.0.14/cartesia/_constants.py ADDED Viewed

@@ -0,0 +1,10 @@
+DEFAULT_MODEL_ID = "sonic-english"  # latest default model
+MULTILINGUAL_MODEL_ID = "sonic-multilingual"  # latest multilingual model
+DEFAULT_BASE_URL = "api.cartesia.ai"
+DEFAULT_CARTESIA_VERSION = "2024-06-10"  # latest version
+DEFAULT_TIMEOUT = 30  # seconds
+DEFAULT_NUM_CONNECTIONS = 10  # connections per client
+DEFAULT_VOICE_EMBEDDING = [1.0] * 192
+BACKOFF_FACTOR = 1
+MAX_RETRIES = 3

cartesia-1.0.14/cartesia/_logger.py ADDED Viewed

@@ -0,0 +1,3 @@
+import logging
+logger = logging.getLogger(__name__)

cartesia 1.0.12__tar.gz → 1.0.14__tar.gz

cartesia 1.0.12tar.gz → 1.0.14tar.gz