PyPI - cartesia - Versions diffs - 1.0.0__tar.gz → 1.0.1__tar.gz - Mend

cartesia 1.0.0tar.gz → 1.0.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{cartesia-1.0.0 → cartesia-1.0.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 1.0.0
+Version: 1.0.1
 Summary: The official Python library for the Cartesia API.
 Home-page:
 Author: Cartesia, Inc.
@@ -21,10 +21,8 @@ Provides-Extra: all
 The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
-**Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
 > [!IMPORTANT]
-> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/discussions/44) here and reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) if you have any questions!
+> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
 ## Documentation
@@ -59,7 +57,11 @@ print("The embedding for", voice["name"], "is", voice["embedding"])
 cloned_voice_embedding = client.voices.clone(filepath="path/to/voice")
 # Create a new voice
-new_voice = client.voices.create(name="New Voice", description="A clone of my own voice", embedding=cloned_voice_embedding)
+new_voice = client.voices.create(
+    name="New Voice",
+    description="A clone of my own voice",
+    embedding=cloned_voice_embedding,
+)
 ```
 ## Text-to-Speech
@@ -94,14 +96,17 @@ rate = 44100
 stream = None
 # Generate and stream audio
-for output in client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format):
+for output in client.tts.sse(
+    model_id=model_id,
+    transcript=transcript,
+    voice_embedding=voice["embedding"],
+    stream=True,
+    output_format=output_format,
+):
     buffer = output["audio"]
     if not stream:
-        stream = p.open(format=pyaudio.paFloat32,
-                        channels=1,
-                        rate=rate,
-                        output=True)
+        stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
     # Write the audio data to the stream
     stream.write(buffer)
@@ -119,6 +124,7 @@ import asyncio
 import pyaudio
 import os
 async def write_stream():
     client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
     voice_name = "Barbershop Man"
@@ -141,15 +147,19 @@ async def write_stream():
     stream = None
     # Generate and stream audio
-    async for output in await client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format
+    async for output in await client.tts.sse(
+        model_id=model_id,
+        transcript=transcript,
+        voice_embedding=voice["embedding"],
+        stream=True,
+        output_format=output_format,
     ):
         buffer = output["audio"]
         if not stream:
-            stream = p.open(format=pyaudio.paFloat32,
-                            channels=1,
-                            rate=rate,
-                            output=True)
+            stream = p.open(
+                format=pyaudio.paFloat32, channels=1, rate=rate, output=True
+            )
         # Write the audio data to the stream
         stream.write(buffer)
@@ -157,6 +167,8 @@ async def write_stream():
     stream.stop_stream()
     stream.close()
     p.terminate()
+    await client.close()
 asyncio.run(write_stream())
 ```
@@ -193,14 +205,17 @@ stream = None
 ws = client.tts.websocket()
 # Generate and stream audio using the websocket
-for output in ws.send(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format):
+for output in ws.send(
+    model_id=model_id,
+    transcript=transcript,
+    voice_embedding=voice["embedding"],
+    stream=True,
+    output_format=output_format,
+):
     buffer = output["audio"]
     if not stream:
-        stream = p.open(format=pyaudio.paFloat32,
-                        channels=1,
-                        rate=rate,
-                        output=True)
+        stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
     # Write the audio data to the stream
     stream.write(buffer)
@@ -209,7 +224,7 @@ stream.stop_stream()
 stream.close()
 p.terminate()
-ws.close() # Close the websocket connection
+ws.close()  # Close the websocket connection
 ```
 ### Multilingual Text-to-Speech [Alpha]
@@ -245,14 +260,18 @@ rate = 44100
 stream = None
 # Pass in the corresponding language code to the `language` parameter to generate and stream audio.
-for output in client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format, language=language):
+for output in client.tts.sse(
+    model_id=model_id,
+    transcript=transcript,
+    voice_embedding=voice["embedding"],
+    stream=True,
+    output_format=output_format,
+    language=language,
+):
     buffer = output["audio"]
     if not stream:
-        stream = p.open(format=pyaudio.paFloat32,
-                        channels=1,
-                        rate=rate,
-                        output=True)
+        stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
     stream.write(buffer)
@@ -287,7 +306,12 @@ with Cartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
     audio_data = io.BytesIO()
     # Generate and stream audio
-    for output in client.tts.sse(model_id="sonic-english", transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format
+    for output in client.tts.sse(
+        model_id="sonic-english",
+        transcript=transcript,
+        voice_embedding=voice["embedding"],
+        stream=True,
+        output_format=output_format,
     ):
         buffer = output["audio"]
         audio_data.write(buffer)
@@ -326,7 +350,12 @@ async with AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
     audio_data = io.BytesIO()
     # Generate and stream audio
-    async for output in client.tts.sse(model_id="sonic-english", transcript=transcript, voice_id=voice_id, stream=True, output_format=output_format
+    async for output in client.tts.sse(
+        model_id="sonic-english",
+        transcript=transcript,
+        voice_id=voice_id,
+        stream=True,
+        output_format=output_format,
     ):
         buffer = output["audio"]
         audio_data.write(buffer)
@@ -341,6 +370,28 @@ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=rate)
 display(audio)
 ```
+### Utility methods
+#### Output Formats
+You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
+The previously used `output_format` strings are now deprecated and will be removed in v1.2.0. These are listed in the `DeprecatedOutputFormatMapping` class in `cartesia._types`.
+```python
+# Get the output format dictionary from string name
+output_format = client.tts.get_output_format("raw_pcm_f32le_44100")
+# Pass in the output format dictionary to generate and stream audio
+generator = client.tts.sse(
+    model_id=model,
+    transcript=transcript,
+    voice_id=SAMPLE_VOICE_ID,
+    stream=True,
+    output_format=output_format,
+)
+```
 To avoid storing your API key in the source code, we recommend doing one of the following:
 1. Use [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file.

{cartesia-1.0.0 → cartesia-1.0.1}/README.md RENAMED Viewed

@@ -5,10 +5,8 @@
 The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
-**Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
 > [!IMPORTANT]
-> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/discussions/44) here and reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) if you have any questions!
+> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
 ## Documentation
@@ -43,7 +41,11 @@ print("The embedding for", voice["name"], "is", voice["embedding"])
 cloned_voice_embedding = client.voices.clone(filepath="path/to/voice")
 # Create a new voice
-new_voice = client.voices.create(name="New Voice", description="A clone of my own voice", embedding=cloned_voice_embedding)
+new_voice = client.voices.create(
+    name="New Voice",
+    description="A clone of my own voice",
+    embedding=cloned_voice_embedding,
+)
 ```
 ## Text-to-Speech
@@ -78,14 +80,17 @@ rate = 44100
 stream = None
 # Generate and stream audio
-for output in client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format):
+for output in client.tts.sse(
+    model_id=model_id,
+    transcript=transcript,
+    voice_embedding=voice["embedding"],
+    stream=True,
+    output_format=output_format,
+):
     buffer = output["audio"]
     if not stream:
-        stream = p.open(format=pyaudio.paFloat32,
-                        channels=1,
-                        rate=rate,
-                        output=True)
+        stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
     # Write the audio data to the stream
     stream.write(buffer)
@@ -103,6 +108,7 @@ import asyncio
 import pyaudio
 import os
 async def write_stream():
     client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
     voice_name = "Barbershop Man"
@@ -125,15 +131,19 @@ async def write_stream():
     stream = None
     # Generate and stream audio
-    async for output in await client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format
+    async for output in await client.tts.sse(
+        model_id=model_id,
+        transcript=transcript,
+        voice_embedding=voice["embedding"],
+        stream=True,
+        output_format=output_format,
     ):
         buffer = output["audio"]
         if not stream:
-            stream = p.open(format=pyaudio.paFloat32,
-                            channels=1,
-                            rate=rate,
-                            output=True)
+            stream = p.open(
+                format=pyaudio.paFloat32, channels=1, rate=rate, output=True
+            )
         # Write the audio data to the stream
         stream.write(buffer)
@@ -141,6 +151,8 @@ async def write_stream():
     stream.stop_stream()
     stream.close()
     p.terminate()
+    await client.close()
 asyncio.run(write_stream())
 ```
@@ -177,14 +189,17 @@ stream = None
 ws = client.tts.websocket()
 # Generate and stream audio using the websocket
-for output in ws.send(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format):
+for output in ws.send(
+    model_id=model_id,
+    transcript=transcript,
+    voice_embedding=voice["embedding"],
+    stream=True,
+    output_format=output_format,
+):
     buffer = output["audio"]
     if not stream:
-        stream = p.open(format=pyaudio.paFloat32,
-                        channels=1,
-                        rate=rate,
-                        output=True)
+        stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
     # Write the audio data to the stream
     stream.write(buffer)
@@ -193,7 +208,7 @@ stream.stop_stream()
 stream.close()
 p.terminate()
-ws.close() # Close the websocket connection
+ws.close()  # Close the websocket connection
 ```
 ### Multilingual Text-to-Speech [Alpha]
@@ -229,14 +244,18 @@ rate = 44100
 stream = None
 # Pass in the corresponding language code to the `language` parameter to generate and stream audio.
-for output in client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format, language=language):
+for output in client.tts.sse(
+    model_id=model_id,
+    transcript=transcript,
+    voice_embedding=voice["embedding"],
+    stream=True,
+    output_format=output_format,
+    language=language,
+):
     buffer = output["audio"]
     if not stream:
-        stream = p.open(format=pyaudio.paFloat32,
-                        channels=1,
-                        rate=rate,
-                        output=True)
+        stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
     stream.write(buffer)
@@ -271,7 +290,12 @@ with Cartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
     audio_data = io.BytesIO()
     # Generate and stream audio
-    for output in client.tts.sse(model_id="sonic-english", transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format
+    for output in client.tts.sse(
+        model_id="sonic-english",
+        transcript=transcript,
+        voice_embedding=voice["embedding"],
+        stream=True,
+        output_format=output_format,
     ):
         buffer = output["audio"]
         audio_data.write(buffer)
@@ -310,7 +334,12 @@ async with AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
     audio_data = io.BytesIO()
     # Generate and stream audio
-    async for output in client.tts.sse(model_id="sonic-english", transcript=transcript, voice_id=voice_id, stream=True, output_format=output_format
+    async for output in client.tts.sse(
+        model_id="sonic-english",
+        transcript=transcript,
+        voice_id=voice_id,
+        stream=True,
+        output_format=output_format,
     ):
         buffer = output["audio"]
         audio_data.write(buffer)
@@ -325,6 +354,28 @@ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=rate)
 display(audio)
 ```
+### Utility methods
+#### Output Formats
+You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
+The previously used `output_format` strings are now deprecated and will be removed in v1.2.0. These are listed in the `DeprecatedOutputFormatMapping` class in `cartesia._types`.
+```python
+# Get the output format dictionary from string name
+output_format = client.tts.get_output_format("raw_pcm_f32le_44100")
+# Pass in the output format dictionary to generate and stream audio
+generator = client.tts.sse(
+    model_id=model,
+    transcript=transcript,
+    voice_id=SAMPLE_VOICE_ID,
+    stream=True,
+    output_format=output_format,
+)
+```
 To avoid storing your API key in the source code, we recommend doing one of the following:
 1. Use [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file.

cartesia-1.0.1/cartesia/_types.py ADDED Viewed

@@ -0,0 +1,75 @@
+from typing import List, TypedDict
+from cartesia.utils.deprecated import deprecated
+class OutputFormatMapping:
+    _format_mapping = {
+        "raw_pcm_f32le_44100": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
+        "raw_pcm_s16le_44100": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
+        "raw_pcm_f32le_24000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 24000},
+        "raw_pcm_s16le_24000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 24000},
+        "raw_pcm_f32le_22050": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 22050},
+        "raw_pcm_s16le_22050": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 22050},
+        "raw_pcm_f32le_16000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 16000},
+        "raw_pcm_s16le_16000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 16000},
+        "raw_pcm_f32le_8000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 8000},
+        "raw_pcm_s16le_8000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 8000},
+        "raw_pcm_mulaw_8000": {"container": "raw", "encoding": "pcm_mulaw", "sample_rate": 8000},
+        "raw_pcm_alaw_8000": {"container": "raw", "encoding": "pcm_alaw", "sample_rate": 8000},
+    }
+    @classmethod
+    def get_format(cls, format_name):
+        if format_name in cls._format_mapping:
+            return cls._format_mapping[format_name]
+        else:
+            raise ValueError(f"Unsupported format: {format_name}")
+class DeprecatedOutputFormatMapping:
+    """Deprecated formats as of v1.0.1. These will be removed in v1.2.0. Use :class:`OutputFormatMapping` instead."""
+    _format_mapping = {
+        "fp32": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
+        "pcm": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
+        "fp32_8000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 8000},
+        "fp32_16000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 16000},
+        "fp32_22050": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 22050},
+        "fp32_24000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 24000},
+        "fp32_44100": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
+        "pcm_8000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 8000},
+        "pcm_16000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 16000},
+        "pcm_22050": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 22050},
+        "pcm_24000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 24000},
+        "pcm_44100": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
+        "mulaw_8000": {"container": "raw", "encoding": "pcm_mulaw", "sample_rate": 8000},
+        "alaw_8000": {"container": "raw", "encoding": "pcm_alaw", "sample_rate": 8000},
+    }
+    @deprecated(
+        vdeprecated="1.0.1",
+        vremove="1.2.0",
+        reason="Old output format names are being deprecated in favor of names aligned with the Cartesia API. Use names from `OutputFormatMapping` instead.",
+    )
+    def get_format_deprecated(self, format_name):
+        if format_name in self._format_mapping:
+            return self._format_mapping[format_name]
+        else:
+            raise ValueError(f"Unsupported format: {format_name}")
+class VoiceMetadata(TypedDict):
+    id: str
+    name: str
+    description: str
+    embedding: List[float]
+    is_public: bool
+    user_id: str
+    created_at: str
+    language: str
+class OutputFormat(TypedDict):
+    container: str
+    encoding: str
+    sample_rate: int

{cartesia-1.0.0 → cartesia-1.0.1}/cartesia/client.py RENAMED Viewed

@@ -12,10 +12,12 @@ import logging
 import requests
 from websockets.sync.client import connect
-from cartesia.utils import retry_on_connection_error, retry_on_connection_error_async
+from cartesia.utils.retry import retry_on_connection_error, retry_on_connection_error_async
+from cartesia.utils.deprecated import deprecated
 from cartesia._types import (
     OutputFormat,
     OutputFormatMapping,
+    DeprecatedOutputFormatMapping,
     VoiceMetadata,
 )
@@ -131,14 +133,7 @@ class Voices(Resource):
         """List all voices in your voice library.
         Returns:
-        This method returns a list of VoiceMetadata objects with the following keys:
-        - id: The ID of the voice.
-        - name: The name of the voice.
-        - description: The description of the voice.
-        - embedding: The embedding of the voice.
-        - is_public: Whether the voice is public.
-        - user_id: The ID of the user who created the voice.
-        - created_at: The timestamp (str) when the voice was created.
+        This method returns a list of VoiceMetadata objects.
         """
         response = httpx.get(
             f"{self._http_url()}/voices",
@@ -159,14 +154,7 @@ class Voices(Resource):
             id: The ID of the voice.
         Returns:
-            A dictionary containing the voice metadata with the following keys:
-            - id: The ID of the voice.
-            - name: The name of the voice.
-            - description: The description of the voice.
-            - embedding: The embedding of the voice as a list of floats.
-            - is_public: Whether the voice is public.
-            - user_id: The ID of the user who created the voice.
-            - created_at: The timestamp when the voice was created.
+            A VoiceMetadata object containing the voice metadata.
         """
         url = f"{self._http_url()}/voices/{id}"
         response = httpx.get(url, headers=self.headers, timeout=self.timeout)
@@ -344,8 +332,11 @@ class _WebSocket:
             stream: Whether to stream the audio or not. (Default is True)
         Returns:
-            If `stream` is True, the method returns a generator that yields chunks of audio as bytes.
-            If `stream` is False, the method returns a dictionary containing the concatenated audio as bytes and the context ID.
+            If `stream` is True, the method returns a generator that yields chunks. Each chunk is a dictionary.
+            If `stream` is False, the method returns a dictionary.
+            Both the generator and the dictionary contain the following key(s):
+            - audio: The audio as bytes.
+            - context_id: The context ID for the request.
         """
         self.connect()
@@ -490,8 +481,10 @@ class _SSE:
             stream: Whether to stream the audio or not.
         Returns:
-            If `stream` is True, the method returns a generator that yields chunks. Each chunk is a dictionary containing the audio as bytes.
-            If `stream` is False, the method returns a dictionary containing the audio as bytes.
+            If `stream` is True, the method returns a generator that yields chunks. Each chunk is a dictionary.
+            If `stream` is False, the method returns a dictionary.
+            Both the generator and the dictionary contain the following key(s):
+            - audio: The audio as bytes.
         """
         voice = self._validate_and_construct_voice(voice_id, voice_embedding)
@@ -581,15 +574,26 @@ class TTS(Resource):
         return ws
     def get_output_format(self, output_format_name: str) -> OutputFormat:
-        """Convenience method to get the output_format object from a given output format name.
+        """Convenience method to get the output_format dictionary from a given output format name.
         Args:
             output_format_name (str): The name of the output format.
         Returns:
             OutputFormat: A dictionary containing the details of the output format to be passed into tts.sse() or tts.websocket().send()
+        Raises:
+            ValueError: If the output_format name is not supported
         """
-        output_format_obj = OutputFormatMapping.get_format(output_format_name)
+        if output_format_name in OutputFormatMapping._format_mapping:
+            output_format_obj = OutputFormatMapping.get_format(output_format_name)
+        elif output_format_name in DeprecatedOutputFormatMapping._format_mapping:
+            output_format_obj = DeprecatedOutputFormatMapping.get_format_deprecated(
+                output_format_name
+            )
+        else:
+            raise ValueError(f"Unsupported format: {output_format_name}")
         return OutputFormat(
             container=output_format_obj["container"],
             encoding=output_format_obj["encoding"],
@@ -604,8 +608,19 @@ class TTS(Resource):
         Returns:
             int: The sample rate for the output format.
+        Raises:
+            ValueError: If the output_format name is not supported
         """
-        output_format_obj = OutputFormatMapping.get_format(output_format_name)
+        if output_format_name in OutputFormatMapping._format_mapping:
+            output_format_obj = OutputFormatMapping.get_format(output_format_name)
+        elif output_format_name in DeprecatedOutputFormatMapping._format_mapping:
+            output_format_obj = DeprecatedOutputFormatMapping.get_format_deprecated(
+                output_format_name
+            )
+        else:
+            raise ValueError(f"Unsupported format: {output_format_name}")
         return output_format_obj["sample_rate"]

cartesia-1.0.1/cartesia/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "1.0.1"

{cartesia-1.0.0 → cartesia-1.0.1}/cartesia.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 1.0.0
+Version: 1.0.1
 Summary: The official Python library for the Cartesia API.
 Home-page:
 Author: Cartesia, Inc.
@@ -21,10 +21,8 @@ Provides-Extra: all
 The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
-**Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
 > [!IMPORTANT]
-> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/discussions/44) here and reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) if you have any questions!
+> The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
 ## Documentation
@@ -59,7 +57,11 @@ print("The embedding for", voice["name"], "is", voice["embedding"])
 cloned_voice_embedding = client.voices.clone(filepath="path/to/voice")
 # Create a new voice
-new_voice = client.voices.create(name="New Voice", description="A clone of my own voice", embedding=cloned_voice_embedding)
+new_voice = client.voices.create(
+    name="New Voice",
+    description="A clone of my own voice",
+    embedding=cloned_voice_embedding,
+)
 ```
 ## Text-to-Speech
@@ -94,14 +96,17 @@ rate = 44100
 stream = None
 # Generate and stream audio
-for output in client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format):
+for output in client.tts.sse(
+    model_id=model_id,
+    transcript=transcript,
+    voice_embedding=voice["embedding"],
+    stream=True,
+    output_format=output_format,
+):
     buffer = output["audio"]
     if not stream:
-        stream = p.open(format=pyaudio.paFloat32,
-                        channels=1,
-                        rate=rate,
-                        output=True)
+        stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
     # Write the audio data to the stream
     stream.write(buffer)
@@ -119,6 +124,7 @@ import asyncio
 import pyaudio
 import os
 async def write_stream():
     client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
     voice_name = "Barbershop Man"
@@ -141,15 +147,19 @@ async def write_stream():
     stream = None
     # Generate and stream audio
-    async for output in await client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format
+    async for output in await client.tts.sse(
+        model_id=model_id,
+        transcript=transcript,
+        voice_embedding=voice["embedding"],
+        stream=True,
+        output_format=output_format,
     ):
         buffer = output["audio"]
         if not stream:
-            stream = p.open(format=pyaudio.paFloat32,
-                            channels=1,
-                            rate=rate,
-                            output=True)
+            stream = p.open(
+                format=pyaudio.paFloat32, channels=1, rate=rate, output=True
+            )
         # Write the audio data to the stream
         stream.write(buffer)
@@ -157,6 +167,8 @@ async def write_stream():
     stream.stop_stream()
     stream.close()
     p.terminate()
+    await client.close()
 asyncio.run(write_stream())
 ```
@@ -193,14 +205,17 @@ stream = None
 ws = client.tts.websocket()
 # Generate and stream audio using the websocket
-for output in ws.send(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format):
+for output in ws.send(
+    model_id=model_id,
+    transcript=transcript,
+    voice_embedding=voice["embedding"],
+    stream=True,
+    output_format=output_format,
+):
     buffer = output["audio"]
     if not stream:
-        stream = p.open(format=pyaudio.paFloat32,
-                        channels=1,
-                        rate=rate,
-                        output=True)
+        stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
     # Write the audio data to the stream
     stream.write(buffer)
@@ -209,7 +224,7 @@ stream.stop_stream()
 stream.close()
 p.terminate()
-ws.close() # Close the websocket connection
+ws.close()  # Close the websocket connection
 ```
 ### Multilingual Text-to-Speech [Alpha]
@@ -245,14 +260,18 @@ rate = 44100
 stream = None
 # Pass in the corresponding language code to the `language` parameter to generate and stream audio.
-for output in client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format, language=language):
+for output in client.tts.sse(
+    model_id=model_id,
+    transcript=transcript,
+    voice_embedding=voice["embedding"],
+    stream=True,
+    output_format=output_format,
+    language=language,
+):
     buffer = output["audio"]
     if not stream:
-        stream = p.open(format=pyaudio.paFloat32,
-                        channels=1,
-                        rate=rate,
-                        output=True)
+        stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
     stream.write(buffer)
@@ -287,7 +306,12 @@ with Cartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
     audio_data = io.BytesIO()
     # Generate and stream audio
-    for output in client.tts.sse(model_id="sonic-english", transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format
+    for output in client.tts.sse(
+        model_id="sonic-english",
+        transcript=transcript,
+        voice_embedding=voice["embedding"],
+        stream=True,
+        output_format=output_format,
     ):
         buffer = output["audio"]
         audio_data.write(buffer)
@@ -326,7 +350,12 @@ async with AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
     audio_data = io.BytesIO()
     # Generate and stream audio
-    async for output in client.tts.sse(model_id="sonic-english", transcript=transcript, voice_id=voice_id, stream=True, output_format=output_format
+    async for output in client.tts.sse(
+        model_id="sonic-english",
+        transcript=transcript,
+        voice_id=voice_id,
+        stream=True,
+        output_format=output_format,
     ):
         buffer = output["audio"]
         audio_data.write(buffer)
@@ -341,6 +370,28 @@ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=rate)
 display(audio)
 ```
+### Utility methods
+#### Output Formats
+You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
+The previously used `output_format` strings are now deprecated and will be removed in v1.2.0. These are listed in the `DeprecatedOutputFormatMapping` class in `cartesia._types`.
+```python
+# Get the output format dictionary from string name
+output_format = client.tts.get_output_format("raw_pcm_f32le_44100")
+# Pass in the output format dictionary to generate and stream audio
+generator = client.tts.sse(
+    model_id=model,
+    transcript=transcript,
+    voice_id=SAMPLE_VOICE_ID,
+    stream=True,
+    output_format=output_format,
+)
+```
 To avoid storing your API key in the source code, we recommend doing one of the following:
 1. Use [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file.

{cartesia-1.0.0 → cartesia-1.0.1}/cartesia.egg-info/SOURCES.txt RENAMED Viewed

@@ -4,11 +4,11 @@ setup.py
 cartesia/__init__.py
 cartesia/_types.py
 cartesia/client.py
-cartesia/utils.py
 cartesia/version.py
 cartesia.egg-info/PKG-INFO
 cartesia.egg-info/SOURCES.txt
 cartesia.egg-info/dependency_links.txt
 cartesia.egg-info/requires.txt
 cartesia.egg-info/top_level.txt
+tests/test_deprecated.py
 tests/test_tts.py

cartesia-1.0.1/tests/test_deprecated.py ADDED Viewed

@@ -0,0 +1,21 @@
+from packaging.version import Version
+import cartesia as Cartesia
+from cartesia.utils.deprecated import _DEPRECATED_FUNCTION_STATS
+import cartesia.version as version
+def test_deprecated_to_remove_by_version():
+    """
+    Test that all deprecated functions that are listed to be
+    removed by the current version are removed.
+    """
+    versions_to_remove = [x["vremove"] for x in _DEPRECATED_FUNCTION_STATS]
+    versions_to_remove = [Version(x) for x in versions_to_remove if x is not None]
+    curr_version = Version(version.__version__)
+    assert all(v > curr_version for v in versions_to_remove)
+# This test is taken from the following source:
+# https://github.com/ad12/meddlr/blob/main/tests/utils/test_deprecated.py

cartesia-1.0.0/cartesia/_types.py DELETED Viewed

@@ -1,37 +0,0 @@
-from typing import List, TypedDict
-class OutputFormatMapping:
-    _format_mapping = {
-        "fp32": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
-        "pcm": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
-        "fp32_16000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 16000},
-        "fp32_22050": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 22050},
-        "fp32_44100": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
-        "pcm_16000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 16000},
-        "pcm_22050": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 22050},
-        "pcm_44100": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
-        "mulaw_8000": {"container": "raw", "encoding": "pcm_mulaw", "sample_rate": 8000},
-        "alaw_8000": {"container": "raw", "encoding": "pcm_alaw", "sample_rate": 8000},
-    }
-    @classmethod
-    def get_format(cls, format_name):
-        if format_name in cls._format_mapping:
-            return cls._format_mapping[format_name]
-        else:
-            raise ValueError(f"Unsupported format: {format_name}")
-class VoiceMetadata(TypedDict):
-    id: str
-    name: str
-    description: str
-    embedding: List[float]
-    is_public: bool
-    user_id: str
-    created_at: str
-    language: str
-class OutputFormat(TypedDict):
-    container: str
-    encoding: str
-    sample_rate: int

cartesia-1.0.0/cartesia/utils.py DELETED Viewed

@@ -1,87 +0,0 @@
-import time
-from aiohttp.client_exceptions import ServerDisconnectedError
-import asyncio
-from functools import wraps
-from http.client import RemoteDisconnected
-from httpx import TimeoutException
-from requests.exceptions import ConnectionError
-def retry_on_connection_error(max_retries=3, backoff_factor=1, logger=None):
-    """Retry a function if a ConnectionError, RemoteDisconnected, ServerDisconnectedError, or TimeoutException occurs.
-    Args:
-        max_retries (int): The maximum number of retries.
-        backoff_factor (int): The factor to increase the delay between retries.
-        logger (logging.Logger): The logger to use for logging.
-    """
-    def decorator(func):
-        @wraps(func)
-        def wrapper(*args, **kwargs):
-            retry_count = 0
-            while retry_count < max_retries:
-                try:
-                    return func(*args, **kwargs)
-                except (
-                    ConnectionError,
-                    RemoteDisconnected,
-                    ServerDisconnectedError,
-                    TimeoutException,
-                ) as e:
-                    logger.info(f"Retrying after exception: {e}")
-                    retry_count += 1
-                    if retry_count < max_retries:
-                        delay = backoff_factor * (2 ** (retry_count - 1))
-                        logger.warn(
-                            f"Attempt {retry_count + 1}/{max_retries} in {delay} seconds..."
-                        )
-                        time.sleep(delay)
-                    else:
-                        raise Exception(f"Exception occurred after {max_retries} tries.") from e
-        return wrapper
-    return decorator
-def retry_on_connection_error_async(max_retries=3, backoff_factor=1, logger=None):
-    """Retry an asynchronous function if a ConnectionError, RemoteDisconnected, ServerDisconnectedError, or TimeoutException occurs.
-    Args:
-        max_retries (int): The maximum number of retries.
-        backoff_factor (int): The factor to increase the delay between retries.
-        logger (logging.Logger): The logger to use for logging.
-    """
-    def decorator(func):
-        @wraps(func)
-        async def wrapper(*args, **kwargs):
-            retry_count = 0
-            while retry_count < max_retries:
-                try:
-                    async for chunk in func(*args, **kwargs):
-                        yield chunk
-                    # If the function completes without raising an exception return
-                    return
-                except (
-                    ConnectionError,
-                    RemoteDisconnected,
-                    ServerDisconnectedError,
-                    TimeoutException,
-                ) as e:
-                    logger.info(f"Retrying after exception: {e}")
-                    retry_count += 1
-                    if retry_count < max_retries:
-                        delay = backoff_factor * (2 ** (retry_count - 1))
-                        logger.warn(
-                            f"Attempt {retry_count + 1}/{max_retries} in {delay} seconds..."
-                        )
-                        await asyncio.sleep(delay)
-                    else:
-                        raise Exception(f"Exception occurred after {max_retries} tries.") from e
-        return wrapper
-    return decorator