PyPI - cartesia - Versions diffs - 1.0.13__tar.gz → 1.1.0__tar.gz - Mend

cartesia 1.0.13tar.gz → 1.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

{cartesia-1.0.13/cartesia.egg-info → cartesia-1.1.0}/PKG-INFO +41 -24
cartesia-1.0.13/PKG-INFO → cartesia-1.1.0/README.md +34 -30
cartesia-1.1.0/cartesia/__init__.py +4 -0
cartesia-1.1.0/cartesia/_async_sse.py +95 -0
cartesia-1.1.0/cartesia/_async_websocket.py +313 -0
cartesia-1.1.0/cartesia/_constants.py +10 -0
cartesia-1.1.0/cartesia/_logger.py +3 -0
cartesia-1.1.0/cartesia/_sse.py +143 -0
{cartesia-1.0.13 → cartesia-1.1.0}/cartesia/_types.py +3 -2
cartesia-1.1.0/cartesia/_websocket.py +355 -0
cartesia-1.1.0/cartesia/async_client.py +82 -0
cartesia-1.1.0/cartesia/async_tts.py +63 -0
cartesia-1.1.0/cartesia/client.py +69 -0
cartesia-1.1.0/cartesia/resource.py +44 -0
cartesia-1.1.0/cartesia/tts.py +146 -0
cartesia-1.1.0/cartesia/utils/tts.py +74 -0
cartesia-1.1.0/cartesia/version.py +1 -0
cartesia-1.1.0/cartesia/voices.py +170 -0
cartesia-1.0.13/README.md → cartesia-1.1.0/cartesia.egg-info/PKG-INFO +47 -13
{cartesia-1.0.13 → cartesia-1.1.0}/cartesia.egg-info/SOURCES.txt +12 -1
cartesia-1.1.0/cartesia.egg-info/requires.txt +5 -0
cartesia-1.1.0/pyproject.toml +84 -0
{cartesia-1.0.13 → cartesia-1.1.0}/tests/test_tts.py +129 -30
cartesia-1.0.13/cartesia/__init__.py +0 -3
cartesia-1.0.13/cartesia/client.py +0 -1393
cartesia-1.0.13/cartesia/version.py +0 -1
cartesia-1.0.13/cartesia.egg-info/requires.txt +0 -22
cartesia-1.0.13/pyproject.toml +0 -56
cartesia-1.0.13/setup.py +0 -292
{cartesia-1.0.13 → cartesia-1.1.0}/LICENSE.md +0 -0
{cartesia-1.0.13 → cartesia-1.1.0}/cartesia/utils/__init__.py +0 -0
{cartesia-1.0.13 → cartesia-1.1.0}/cartesia/utils/deprecated.py +0 -0
{cartesia-1.0.13 → cartesia-1.1.0}/cartesia/utils/retry.py +0 -0
{cartesia-1.0.13 → cartesia-1.1.0}/cartesia.egg-info/dependency_links.txt +0 -0
{cartesia-1.0.13 → cartesia-1.1.0}/cartesia.egg-info/top_level.txt +0 -0
{cartesia-1.0.13 → cartesia-1.1.0}/setup.cfg +0 -0
{cartesia-1.0.13 → cartesia-1.1.0}/tests/test_deprecated.py +0 -0

{cartesia-1.0.13/cartesia.egg-info → cartesia-1.1.0}/PKG-INFO RENAMED Viewed

@@ -1,35 +1,32 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 1.0.13
+Version: 1.1.0
 Summary: The official Python library for the Cartesia API.
-Home-page:
-Author: Cartesia, Inc.
-Author-email: support@cartesia.ai
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3
-Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Python: >=3.8.0
+Requires-Python: >=3.9
 Description-Content-Type: text/markdown
-Provides-Extra: dev
-Provides-Extra: all
 License-File: LICENSE.md
+Requires-Dist: aiohttp>=3.10.10
+Requires-Dist: httpx>=0.27.2
+Requires-Dist: iterators>=0.2.0
+Requires-Dist: requests>=2.32.3
+Requires-Dist: websockets>=13.1
 # Cartesia Python API Library
 ![PyPI - Version](https://img.shields.io/pypi/v/cartesia)
-[![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/ZVxavqHB9X)
+[![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/cartesia)
 The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
 > [!IMPORTANT]
-> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
+> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/cartesia) for any support requests!
 - [Cartesia Python API Library](#cartesia-python-api-library)
   - [Documentation](#documentation)
   - [Installation](#installation)
   - [Voices](#voices)
   - [Text-to-Speech](#text-to-speech)
+    - [Bytes](#bytes)
     - [Server-Sent Events (SSE)](#server-sent-events-sse)
     - [WebSocket](#websocket)
       - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
@@ -88,6 +85,30 @@ new_voice = client.voices.create(
 ## Text-to-Speech
+### Bytes
+```python
+from cartesia import Cartesia
+import os
+client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
+data = client.tts.bytes(
+    model_id="sonic-english",
+    transcript="Hello, world! I'm generating audio on Cartesia.",
+    voice_id="a0e99841-438c-4a64-b679-ae501e7d6091",  # Barbershop Man
+    # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/tts/bytes
+    output_format={
+        "container": "wav",
+        "encoding": "pcm_f32le",
+        "sample_rate": 44100,
+    },
+)
+with open("output.wav", "wb") as f:
+    f.write(data)
+```
 ### Server-Sent Events (SSE)
 ```python
@@ -96,7 +117,6 @@ import pyaudio
 import os
 client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
-voice_name = "Barbershop Man"
 voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
 voice = client.voices.get(id=voice_id)
@@ -105,7 +125,7 @@ transcript = "Hello! Welcome to Cartesia"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-english"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -149,14 +169,13 @@ import os
 async def write_stream():
     client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
-    voice_name = "Barbershop Man"
     voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
     voice = client.voices.get(id=voice_id)
     transcript = "Hello! Welcome to Cartesia"
     # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
     model_id = "sonic-english"
-    # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+    # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
     output_format = {
         "container": "raw",
         "encoding": "pcm_f32le",
@@ -203,7 +222,6 @@ import pyaudio
 import os
 client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
-voice_name = "Barbershop Man"
 voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
 voice = client.voices.get(id=voice_id)
 transcript = "Hello! Welcome to Cartesia"
@@ -211,7 +229,7 @@ transcript = "Hello! Welcome to Cartesia"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-english"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -272,7 +290,7 @@ async def send_transcripts(ctx):
     # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
     model_id = "sonic-english"
-    # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+    # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
     output_format = {
         "container": "raw",
         "encoding": "pcm_f32le",
@@ -380,7 +398,7 @@ voice_id = "87748186-23bb-4158-a1eb-332911b0b708"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-english"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -460,7 +478,6 @@ import pyaudio
 import os
 client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
-voice_name = "Barbershop Man"
 voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
 voice = client.voices.get(id=voice_id)
@@ -470,7 +487,7 @@ language = "es"  # Language code corresponding to the language of the transcript
 # Make sure you use the multilingual model! You can check out all models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-multilingual"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -623,7 +640,7 @@ display(audio)
 #### Output Formats
-You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
+You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events).
 The previously used `output_format` strings are now deprecated and will be removed in v1.2.0. These are listed in the `DeprecatedOutputFormatMapping` class in `cartesia._types`.

cartesia-1.0.13/PKG-INFO → cartesia-1.1.0/README.md RENAMED Viewed

@@ -1,35 +1,19 @@
-Metadata-Version: 2.1
-Name: cartesia
-Version: 1.0.13
-Summary: The official Python library for the Cartesia API.
-Home-page:
-Author: Cartesia, Inc.
-Author-email: support@cartesia.ai
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3
-Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Python: >=3.8.0
-Description-Content-Type: text/markdown
-Provides-Extra: dev
-Provides-Extra: all
-License-File: LICENSE.md
 # Cartesia Python API Library
 ![PyPI - Version](https://img.shields.io/pypi/v/cartesia)
-[![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/ZVxavqHB9X)
+[![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/cartesia)
 The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
 > [!IMPORTANT]
-> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
+> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/cartesia) for any support requests!
 - [Cartesia Python API Library](#cartesia-python-api-library)
   - [Documentation](#documentation)
   - [Installation](#installation)
   - [Voices](#voices)
   - [Text-to-Speech](#text-to-speech)
+    - [Bytes](#bytes)
     - [Server-Sent Events (SSE)](#server-sent-events-sse)
     - [WebSocket](#websocket)
       - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
@@ -88,6 +72,30 @@ new_voice = client.voices.create(
 ## Text-to-Speech
+### Bytes
+```python
+from cartesia import Cartesia
+import os
+client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
+data = client.tts.bytes(
+    model_id="sonic-english",
+    transcript="Hello, world! I'm generating audio on Cartesia.",
+    voice_id="a0e99841-438c-4a64-b679-ae501e7d6091",  # Barbershop Man
+    # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/tts/bytes
+    output_format={
+        "container": "wav",
+        "encoding": "pcm_f32le",
+        "sample_rate": 44100,
+    },
+)
+with open("output.wav", "wb") as f:
+    f.write(data)
+```
 ### Server-Sent Events (SSE)
 ```python
@@ -96,7 +104,6 @@ import pyaudio
 import os
 client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
-voice_name = "Barbershop Man"
 voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
 voice = client.voices.get(id=voice_id)
@@ -105,7 +112,7 @@ transcript = "Hello! Welcome to Cartesia"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-english"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -149,14 +156,13 @@ import os
 async def write_stream():
     client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
-    voice_name = "Barbershop Man"
     voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
     voice = client.voices.get(id=voice_id)
     transcript = "Hello! Welcome to Cartesia"
     # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
     model_id = "sonic-english"
-    # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+    # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
     output_format = {
         "container": "raw",
         "encoding": "pcm_f32le",
@@ -203,7 +209,6 @@ import pyaudio
 import os
 client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
-voice_name = "Barbershop Man"
 voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
 voice = client.voices.get(id=voice_id)
 transcript = "Hello! Welcome to Cartesia"
@@ -211,7 +216,7 @@ transcript = "Hello! Welcome to Cartesia"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-english"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -272,7 +277,7 @@ async def send_transcripts(ctx):
     # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
     model_id = "sonic-english"
-    # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+    # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
     output_format = {
         "container": "raw",
         "encoding": "pcm_f32le",
@@ -380,7 +385,7 @@ voice_id = "87748186-23bb-4158-a1eb-332911b0b708"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-english"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -460,7 +465,6 @@ import pyaudio
 import os
 client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
-voice_name = "Barbershop Man"
 voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
 voice = client.voices.get(id=voice_id)
@@ -470,7 +474,7 @@ language = "es"  # Language code corresponding to the language of the transcript
 # Make sure you use the multilingual model! You can check out all models at https://docs.cartesia.ai/getting-started/available-models
 model_id = "sonic-multilingual"
-# You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
+# You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
 output_format = {
     "container": "raw",
     "encoding": "pcm_f32le",
@@ -623,7 +627,7 @@ display(audio)
 #### Output Formats
-You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
+You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events).
 The previously used `output_format` strings are now deprecated and will be removed in v1.2.0. These are listed in the `DeprecatedOutputFormatMapping` class in `cartesia._types`.

cartesia-1.1.0/cartesia/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from cartesia.async_client import AsyncCartesia
+from cartesia.client import Cartesia
+__all__ = ["Cartesia", "AsyncCartesia"]

cartesia-1.1.0/cartesia/_async_sse.py ADDED Viewed

@@ -0,0 +1,95 @@
+import base64
+import json
+from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Union
+import aiohttp
+from cartesia._constants import BACKOFF_FACTOR, MAX_RETRIES
+from cartesia._logger import logger
+from cartesia._sse import _SSE
+from cartesia._types import OutputFormat, VoiceControls
+from cartesia.utils.retry import retry_on_connection_error_async
+from cartesia.utils.tts import _construct_tts_request
+class _AsyncSSE(_SSE):
+    """This class contains methods to generate audio using Server-Sent Events asynchronously."""
+    def __init__(
+        self,
+        http_url: str,
+        headers: Dict[str, str],
+        timeout: float,
+        get_session: Callable[[], Optional[aiohttp.ClientSession]],
+    ):
+        super().__init__(http_url, headers, timeout)
+        self._get_session = get_session
+    async def send(
+        self,
+        model_id: str,
+        transcript: str,
+        output_format: OutputFormat,
+        voice_id: Optional[str] = None,
+        voice_embedding: Optional[List[float]] = None,
+        duration: Optional[int] = None,
+        language: Optional[str] = None,
+        stream: bool = True,
+        _experimental_voice_controls: Optional[VoiceControls] = None,
+    ) -> Union[bytes, AsyncGenerator[bytes, None]]:
+        request_body = _construct_tts_request(
+            model_id=model_id,
+            transcript=transcript,
+            output_format=output_format,
+            voice_id=voice_id,
+            voice_embedding=voice_embedding,
+            duration=duration,
+            language=language,
+            _experimental_voice_controls=_experimental_voice_controls,
+        )
+        generator = self._sse_generator_wrapper(request_body)
+        if stream:
+            return generator
+        chunks = []
+        async for chunk in generator:
+            chunks.append(chunk["audio"])
+        return {"audio": b"".join(chunks)}
+    @retry_on_connection_error_async(
+        max_retries=MAX_RETRIES, backoff_factor=BACKOFF_FACTOR, logger=logger
+    )
+    async def _sse_generator_wrapper(self, request_body: Dict[str, Any]):
+        """Need to wrap the sse generator in a function for the retry decorator to work."""
+        try:
+            async for chunk in self._sse_generator(request_body):
+                yield chunk
+        except Exception as e:
+            raise RuntimeError(f"Error generating audio. {e}")
+    async def _sse_generator(self, request_body: Dict[str, Any]):
+        session = await self._get_session()
+        async with session.post(
+            f"{self.http_url}/tts/sse",
+            data=json.dumps(request_body),
+            headers=self.headers,
+        ) as response:
+            if not response.ok:
+                raise ValueError(f"Failed to generate audio. {await response.text()}")
+            buffer = ""
+            async for chunk_bytes in response.content.iter_any():
+                buffer, outputs = self._update_buffer(buffer=buffer, chunk_bytes=chunk_bytes)
+                for output in outputs:
+                    yield output
+            if buffer:
+                try:
+                    chunk_json = json.loads(buffer)
+                    audio = base64.b64decode(chunk_json["data"])
+                    yield {"audio": audio}
+                except json.JSONDecodeError:
+                    pass

cartesia 1.0.13__tar.gz → 1.1.0__tar.gz

cartesia 1.0.13tar.gz → 1.1.0tar.gz