PyPI - cartesia - Versions diffs - 1.0.7__tar.gz → 1.0.9__tar.gz - Mend

cartesia 1.0.7tar.gz → 1.0.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{cartesia-1.0.7 → cartesia-1.0.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 1.0.7
+Version: 1.0.9
 Summary: The official Python library for the Cartesia API.
 Home-page:
 Author: Cartesia, Inc.
@@ -25,6 +25,22 @@ The official Cartesia Python library which provides convenient access to the Car
 > [!IMPORTANT]
 > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
+- [Cartesia Python API Library](#cartesia-python-api-library)
+  - [Documentation](#documentation)
+  - [Installation](#installation)
+  - [Voices](#voices)
+  - [Text-to-Speech](#text-to-speech)
+    - [Server-Sent Events (SSE)](#server-sent-events-sse)
+    - [WebSocket](#websocket)
+      - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
+    - [Generating timestamps using WebSocket](#generating-timestamps-using-websocket)
+    - [Multilingual Text-to-Speech \[Alpha\]](#multilingual-text-to-speech-alpha)
+    - [Speed and Emotion Control \[Experimental\]](#speed-and-emotion-control-experimental)
+    - [Jupyter Notebook Usage](#jupyter-notebook-usage)
+    - [Utility methods](#utility-methods)
+      - [Output Formats](#output-formats)
 ## Documentation
 Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
@@ -250,7 +266,7 @@ async def send_transcripts(ctx):
     # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
     model_id = "sonic-english"
     # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
     output_format = {
         "container": "raw",
@@ -266,7 +282,7 @@ async def send_transcripts(ctx):
         "As they near Eggman's lair, our heroes charge their abilities for an epic boss battle. ",
         "Get ready to spin, jump, and sound-blast your way to victory in this high-octane crossover!"
     ]
     for transcript in transcripts:
         # Send text inputs as they become available
         await ctx.send(
@@ -278,7 +294,7 @@ async def send_transcripts(ctx):
         )
     # Indicate that no more inputs will be sent. Otherwise, the context will close after 5 seconds of inactivity.
-    await ctx.no_more_inputs()
+    await ctx.no_more_inputs()
 async def receive_and_play_audio(ctx):
     p = pyaudio.PyAudio()
@@ -384,7 +400,7 @@ output_stream = ctx.send(
     voice_id=voice_id,
     output_format=output_format,
 )
 for output in output_stream:
     buffer = output["audio"]

{cartesia-1.0.7 → cartesia-1.0.9}/README.md RENAMED Viewed

@@ -8,6 +8,22 @@ The official Cartesia Python library which provides convenient access to the Car
 > [!IMPORTANT]
 > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
+- [Cartesia Python API Library](#cartesia-python-api-library)
+  - [Documentation](#documentation)
+  - [Installation](#installation)
+  - [Voices](#voices)
+  - [Text-to-Speech](#text-to-speech)
+    - [Server-Sent Events (SSE)](#server-sent-events-sse)
+    - [WebSocket](#websocket)
+      - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
+    - [Generating timestamps using WebSocket](#generating-timestamps-using-websocket)
+    - [Multilingual Text-to-Speech \[Alpha\]](#multilingual-text-to-speech-alpha)
+    - [Speed and Emotion Control \[Experimental\]](#speed-and-emotion-control-experimental)
+    - [Jupyter Notebook Usage](#jupyter-notebook-usage)
+    - [Utility methods](#utility-methods)
+      - [Output Formats](#output-formats)
 ## Documentation
 Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
@@ -233,7 +249,7 @@ async def send_transcripts(ctx):
     # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
     model_id = "sonic-english"
     # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
     output_format = {
         "container": "raw",
@@ -249,7 +265,7 @@ async def send_transcripts(ctx):
         "As they near Eggman's lair, our heroes charge their abilities for an epic boss battle. ",
         "Get ready to spin, jump, and sound-blast your way to victory in this high-octane crossover!"
     ]
     for transcript in transcripts:
         # Send text inputs as they become available
         await ctx.send(
@@ -261,7 +277,7 @@ async def send_transcripts(ctx):
         )
     # Indicate that no more inputs will be sent. Otherwise, the context will close after 5 seconds of inactivity.
-    await ctx.no_more_inputs()
+    await ctx.no_more_inputs()
 async def receive_and_play_audio(ctx):
     p = pyaudio.PyAudio()
@@ -367,7 +383,7 @@ output_stream = ctx.send(
     voice_id=voice_id,
     output_format=output_format,
 )
 for output in output_stream:
     buffer = output["audio"]

cartesia-1.0.9/cartesia/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from cartesia.client import AsyncCartesia, Cartesia
+__all__ = ["Cartesia", "AsyncCartesia"]

{cartesia-1.0.7 → cartesia-1.0.9}/cartesia/_types.py RENAMED Viewed

@@ -1,4 +1,5 @@
 from typing import List, TypedDict
 from cartesia.utils.deprecated import deprecated

{cartesia-1.0.7 → cartesia-1.0.9}/cartesia/client.py RENAMED Viewed

@@ -1,41 +1,47 @@
 import asyncio
 import base64
-from collections import defaultdict
 import json
+import logging
 import os
 import uuid
+from collections import defaultdict
 from types import TracebackType
 from typing import (
     Any,
     AsyncGenerator,
-    Iterator,
+    Callable,
     Dict,
     Generator,
+    Iterator,
     List,
     Optional,
+    Set,
     Tuple,
     Union,
-    Callable,
-    Set,
 )
 import aiohttp
 import httpx
-import logging
 import requests
-from websockets.sync.client import connect
-from iterators import TimeoutIterator
-from cartesia.utils.retry import retry_on_connection_error, retry_on_connection_error_async
+try:
+    from websockets.sync.client import connect
+    IS_WEBSOCKET_SYNC_AVAILABLE = True
+except ImportError:
+    IS_WEBSOCKET_SYNC_AVAILABLE = False
 from cartesia._types import (
+    DeprecatedOutputFormatMapping,
     EventType,
     OutputFormat,
     OutputFormatMapping,
-    DeprecatedOutputFormatMapping,
     VoiceControls,
     VoiceMetadata,
 )
+from cartesia.utils.retry import retry_on_connection_error, retry_on_connection_error_async
+from iterators import TimeoutIterator
+from websockets.sync.client import connect
 DEFAULT_MODEL_ID = "sonic-english"  # latest default model
 MULTILINGUAL_MODEL_ID = "sonic-multilingual"  # latest multilingual model
@@ -207,38 +213,27 @@ class Voices(Resource):
         return response.json()
-    def clone(self, filepath: Optional[str] = None, link: Optional[str] = None) -> List[float]:
-        """Clone a voice from a clip or a URL.
+    def clone(self, filepath: Optional[str] = None, enhance: str = True) -> List[float]:
+        """Clone a voice from a clip.
         Args:
             filepath: The path to the clip file.
-            link: The URL to the clip
+            enhance: Whether to enhance the clip before cloning the voice (highly recommended). Defaults to True.
         Returns:
             The embedding of the cloned voice as a list of floats.
         """
-        # TODO: Python has a bytes object, use that instead of a filepath
-        if not filepath and not link:
-            raise ValueError("At least one of 'filepath' or 'link' must be specified.")
-        if filepath and link:
-            raise ValueError("Only one of 'filepath' or 'link' should be specified.")
-        if filepath:
-            url = f"{self._http_url()}/voices/clone/clip"
-            with open(filepath, "rb") as file:
-                files = {"clip": file}
-                headers = self.headers.copy()
-                headers.pop("Content-Type", None)
-                response = httpx.post(url, headers=headers, files=files, timeout=self.timeout)
-                if not response.is_success:
-                    raise ValueError(f"Failed to clone voice from clip. Error: {response.text}")
-        elif link:
-            url = f"{self._http_url()}/voices/clone/url"
-            params = {"link": link}
+        if not filepath:
+            raise ValueError("Filepath must be specified.")
+        url = f"{self._http_url()}/voices/clone/clip"
+        with open(filepath, "rb") as file:
+            files = {"clip": file}
+            files["enhance"] = str(enhance).lower()
             headers = self.headers.copy()
-            headers.pop("Content-Type")  # The content type header is not required for URLs
-            response = httpx.post(url, headers=self.headers, params=params, timeout=self.timeout)
+            headers.pop("Content-Type", None)
+            response = httpx.post(url, headers=headers, files=files, timeout=self.timeout)
             if not response.is_success:
-                raise ValueError(f"Failed to clone voice from URL. Error: {response.text}")
+                raise ValueError(f"Failed to clone voice from clip. Error: {response.text}")
         return response.json()["embedding"]
@@ -469,6 +464,10 @@ class _WebSocket:
         Raises:
             RuntimeError: If the connection to the WebSocket fails.
         """
+        if not IS_WEBSOCKET_SYNC_AVAILABLE:
+            raise ImportError(
+                "The synchronous WebSocket client is not available. Please ensure that you have 'websockets>=12.0' or compatible version installed."
+            )
         if self.websocket is None or self._is_websocket_closed():
             route = "tts/websocket"
             try:

cartesia-1.0.9/cartesia/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "1.0.9"

{cartesia-1.0.7 → cartesia-1.0.9}/cartesia.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 1.0.7
+Version: 1.0.9
 Summary: The official Python library for the Cartesia API.
 Home-page:
 Author: Cartesia, Inc.
@@ -25,6 +25,22 @@ The official Cartesia Python library which provides convenient access to the Car
 > [!IMPORTANT]
 > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
+- [Cartesia Python API Library](#cartesia-python-api-library)
+  - [Documentation](#documentation)
+  - [Installation](#installation)
+  - [Voices](#voices)
+  - [Text-to-Speech](#text-to-speech)
+    - [Server-Sent Events (SSE)](#server-sent-events-sse)
+    - [WebSocket](#websocket)
+      - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
+    - [Generating timestamps using WebSocket](#generating-timestamps-using-websocket)
+    - [Multilingual Text-to-Speech \[Alpha\]](#multilingual-text-to-speech-alpha)
+    - [Speed and Emotion Control \[Experimental\]](#speed-and-emotion-control-experimental)
+    - [Jupyter Notebook Usage](#jupyter-notebook-usage)
+    - [Utility methods](#utility-methods)
+      - [Output Formats](#output-formats)
 ## Documentation
 Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
@@ -250,7 +266,7 @@ async def send_transcripts(ctx):
     # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
     model_id = "sonic-english"
     # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
     output_format = {
         "container": "raw",
@@ -266,7 +282,7 @@ async def send_transcripts(ctx):
         "As they near Eggman's lair, our heroes charge their abilities for an epic boss battle. ",
         "Get ready to spin, jump, and sound-blast your way to victory in this high-octane crossover!"
     ]
     for transcript in transcripts:
         # Send text inputs as they become available
         await ctx.send(
@@ -278,7 +294,7 @@ async def send_transcripts(ctx):
         )
     # Indicate that no more inputs will be sent. Otherwise, the context will close after 5 seconds of inactivity.
-    await ctx.no_more_inputs()
+    await ctx.no_more_inputs()
 async def receive_and_play_audio(ctx):
     p = pyaudio.PyAudio()
@@ -384,7 +400,7 @@ output_stream = ctx.send(
     voice_id=voice_id,
     output_format=output_format,
 )
 for output in output_stream:
     buffer = output["audio"]

{cartesia-1.0.7 → cartesia-1.0.9}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.ruff]
 # Add more rule codes as needed
-extend-select = [
+lint.extend-select = [
     "D",    # pydocstyle - to replace docformatter
 ]
@@ -43,14 +43,14 @@ line-length = 100
 # Enable the count of violations
 output-format = "full"
-[tool.ruff.isort]
-force-wrap-aliases = true
-combine-as-imports = true
-force-sort-within-sections = true
-known-first-party = []
-known-third-party = []
-known-local-folder = []
-lines-after-imports = 2
-[tool.ruff.pydocstyle]
+[tool.ruff.lint.pydocstyle]
 convention = "google"
+[tool.isort]
+profile = "black"
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+use_parentheses = true
+ensure_newline_before_comments = true
+line_length = 100

{cartesia-1.0.7 → cartesia-1.0.9}/tests/test_deprecated.py RENAMED Viewed

@@ -1,8 +1,7 @@
-from packaging.version import Version
 import cartesia as Cartesia
-from cartesia.utils.deprecated import _DEPRECATED_FUNCTION_STATS
 import cartesia.version as version
+from cartesia.utils.deprecated import _DEPRECATED_FUNCTION_STATS
+from packaging.version import Version
 def test_deprecated_to_remove_by_version():

{cartesia-1.0.7 → cartesia-1.0.9}/tests/test_tts.py RENAMED Viewed

@@ -5,17 +5,18 @@ different results. Therefore, we cannot test for complete correctness but rather
 general correctness.
 """
+import asyncio
 import logging
 import os
 import sys
-from cartesia import AsyncCartesia, Cartesia
-from cartesia.client import DEFAULT_MODEL_ID, MULTILINGUAL_MODEL_ID
-from cartesia._types import VoiceControls, VoiceMetadata
+import uuid
 from typing import AsyncGenerator, Generator, List
 import numpy as np
 import pytest
-import uuid
-import asyncio
+from cartesia import AsyncCartesia, Cartesia
+from cartesia._types import VoiceControls, VoiceMetadata
+from cartesia.client import DEFAULT_MODEL_ID, MULTILINGUAL_MODEL_ID
 THISDIR = os.path.dirname(__file__)
 sys.path.insert(0, os.path.dirname(THISDIR))
@@ -79,19 +80,17 @@ def test_get_voice_from_id(client: Cartesia):
     voices = client.voices.list()
     assert voice in voices
-# Does not work currently, LB issue
-# def test_clone_voice_with_link(client: Cartesia):
-#     url = "https://youtu.be/g2Z7Ddd573M?si=P8BM_hBqt5P8Ft6I&t=69"
-#     logger.info("Testing voices.clone with link")
-#     cloned_voice_embedding = client.voices.clone(link=url)
-#     assert isinstance(cloned_voice_embedding, list)
-#     assert len(cloned_voice_embedding) == 192
 def test_clone_voice_with_file(client: Cartesia):
     logger.info("Testing voices.clone with file")
     output = client.voices.clone(filepath=os.path.join(RESOURCES_DIR, "sample-speech-4s.wav"))
     assert isinstance(output, list)
+@pytest.mark.parametrize("enhance", [True, False])
+def test_clone_voice_with_file_enhance(client: Cartesia, enhance: bool):
+    logger.info("Testing voices.clone with file")
+    output = client.voices.clone(filepath=os.path.join(RESOURCES_DIR, "sample-speech-4s.wav"), enhance=enhance)
+    assert isinstance(output, list)
 def test_create_voice(client: Cartesia):
     logger.info("Testing voices.create")
     embedding = np.ones(192).tolist()

cartesia-1.0.7/cartesia/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from cartesia.client import Cartesia, AsyncCartesia
-__all__ = ["Cartesia", "AsyncCartesia"]

cartesia-1.0.7/cartesia/version.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __version__ = "1.0.7"

{cartesia-1.0.7 → cartesia-1.0.9}/LICENSE.md RENAMED Viewed

File without changes

{cartesia-1.0.7 → cartesia-1.0.9}/cartesia/utils/__init__.py RENAMED Viewed

File without changes

{cartesia-1.0.7 → cartesia-1.0.9}/cartesia/utils/deprecated.py RENAMED Viewed

File without changes

{cartesia-1.0.7 → cartesia-1.0.9}/cartesia/utils/retry.py RENAMED Viewed

@@ -1,9 +1,9 @@
-import time
-from aiohttp.client_exceptions import ServerDisconnectedError
 import asyncio
+import time
 from functools import wraps
 from http.client import RemoteDisconnected
+from aiohttp.client_exceptions import ServerDisconnectedError
 from httpx import TimeoutException
 from requests.exceptions import ConnectionError

{cartesia-1.0.7 → cartesia-1.0.9}/cartesia.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{cartesia-1.0.7 → cartesia-1.0.9}/cartesia.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{cartesia-1.0.7 → cartesia-1.0.9}/cartesia.egg-info/requires.txt RENAMED Viewed

File without changes

{cartesia-1.0.7 → cartesia-1.0.9}/cartesia.egg-info/top_level.txt RENAMED Viewed

File without changes

{cartesia-1.0.7 → cartesia-1.0.9}/setup.cfg RENAMED Viewed

File without changes

{cartesia-1.0.7 → cartesia-1.0.9}/setup.py RENAMED Viewed

File without changes

cartesia 1.0.7__tar.gz → 1.0.9__tar.gz

cartesia 1.0.7tar.gz → 1.0.9tar.gz