PyPI - cartesia - Versions diffs - 0.0.2__tar.gz → 0.0.4__tar.gz - Mend

cartesia 0.0.2tar.gz → 0.0.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{cartesia-0.0.2 → cartesia-0.0.4}/PKG-INFO +57 -12
cartesia-0.0.4/README.md +81 -0
{cartesia-0.0.2 → cartesia-0.0.4}/cartesia/tts.py +74 -29
cartesia-0.0.4/cartesia/version.py +1 -0
{cartesia-0.0.2 → cartesia-0.0.4}/cartesia.egg-info/PKG-INFO +57 -12
{cartesia-0.0.2 → cartesia-0.0.4}/cartesia.egg-info/requires.txt +0 -1
{cartesia-0.0.2 → cartesia-0.0.4}/setup.py +45 -17
cartesia-0.0.4/tests/test_tts.py +180 -0
cartesia-0.0.2/README.md +0 -35
cartesia-0.0.2/cartesia/version.py +0 -1
cartesia-0.0.2/tests/test_tts.py +0 -96
{cartesia-0.0.2 → cartesia-0.0.4}/cartesia/__init__.py +0 -0
{cartesia-0.0.2 → cartesia-0.0.4}/cartesia.egg-info/SOURCES.txt +0 -0
{cartesia-0.0.2 → cartesia-0.0.4}/cartesia.egg-info/dependency_links.txt +0 -0
{cartesia-0.0.2 → cartesia-0.0.4}/cartesia.egg-info/top_level.txt +0 -0
{cartesia-0.0.2 → cartesia-0.0.4}/pyproject.toml +0 -0
{cartesia-0.0.2 → cartesia-0.0.4}/setup.cfg +0 -0

{cartesia-0.0.2 → cartesia-0.0.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 0.0.2
+Version: 0.0.4
 Summary: The official Python library for the Cartesia API.
 Home-page:
 Author: Cartesia, Inc.
@@ -12,7 +12,6 @@ Requires-Python: >=3.8.0
 Description-Content-Type: text/markdown
 Requires-Dist: websockets
 Requires-Dist: requests
-Requires-Dist: numpy
 Provides-Extra: dev
 Requires-Dist: pre-commit; extra == "dev"
 Requires-Dist: docformatter; extra == "dev"
@@ -49,22 +48,68 @@ pip install -e '.[dev]'
 ## Usage
 ```python
 from cartesia.tts import CartesiaTTS
-from IPython.display import Audio
+import pyaudio
+import os
 client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
 voices = client.get_voices()
-embedding = voices["Milo"]["embedding"]
+voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
 transcript = "Hello! Welcome to Cartesia"
-# No streaming
-output = client.generate(transcript=transcript, voice=embedding)
-Audio(output["audio"], rate=output["sampling_rate"])
+p = pyaudio.PyAudio()
+stream = None
-# Streaming
-for output in client.generate(transcript=transcript, voice=embedding, stream=True):
-    arr = output["audio"]  # a numpy array
+# Generate and stream audio
+for output in client.generate(transcript=transcript, voice=voice, stream=True):
+    buffer = output["audio"]
     rate = output["sampling_rate"]
+    if not stream:
+        stream = p.open(format=pyaudio.paFloat32,
+                        channels=1,
+                        rate=rate,
+                        output=True)
+    # Write the audio data to the stream
+    stream.write(buffer)
+stream.stop_stream()
+stream.close()
+p.terminate()
+```
+If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook. Here's an example:
+```python
+from cartesia.tts import CartesiaTTS
+from IPython.display import Audio
+import io
+import os
+client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
+voices = client.get_voices()
+voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
+transcript = "Hello! Welcome to Cartesia"
+# Create a BytesIO object to store the audio data
+audio_data = io.BytesIO()
+# Generate and stream audio
+for output in client.generate(transcript=transcript, voice=voice, stream=True):
+    buffer = output["audio"]
+    audio_data.write(buffer)
+# Set the cursor position to the beginning of the BytesIO object
+audio_data.seek(0)
+# Create an Audio object from the BytesIO data
+audio = Audio(audio_data, rate=output["sampling_rate"])
+# Display the Audio object
+display(audio)
 ```
-We recommend using [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file so that your API Key is not stored in the source code.
+To avoid storing your API key in the source code, we recommend doing one of the following:
+1. Use [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file.
+1. Set the `CARTESIA_API_KEY` environment variable, preferably to a secure shell init file (e.g. `~/.zshrc`, `~/.bashrc`)

cartesia-0.0.4/README.md ADDED Viewed

@@ -0,0 +1,81 @@
+# Cartesia Python API Library
+The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
+**Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
+## Installation
+```bash
+pip install cartesia
+# pip install in editable mode w/ dev dependencies
+pip install -e '.[dev]'
+```
+## Usage
+```python
+from cartesia.tts import CartesiaTTS
+import pyaudio
+import os
+client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
+voices = client.get_voices()
+voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
+transcript = "Hello! Welcome to Cartesia"
+p = pyaudio.PyAudio()
+stream = None
+# Generate and stream audio
+for output in client.generate(transcript=transcript, voice=voice, stream=True):
+    buffer = output["audio"]
+    rate = output["sampling_rate"]
+    if not stream:
+        stream = p.open(format=pyaudio.paFloat32,
+                        channels=1,
+                        rate=rate,
+                        output=True)
+    # Write the audio data to the stream
+    stream.write(buffer)
+stream.stop_stream()
+stream.close()
+p.terminate()
+```
+If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook. Here's an example:
+```python
+from cartesia.tts import CartesiaTTS
+from IPython.display import Audio
+import io
+import os
+client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
+voices = client.get_voices()
+voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
+transcript = "Hello! Welcome to Cartesia"
+# Create a BytesIO object to store the audio data
+audio_data = io.BytesIO()
+# Generate and stream audio
+for output in client.generate(transcript=transcript, voice=voice, stream=True):
+    buffer = output["audio"]
+    audio_data.write(buffer)
+# Set the cursor position to the beginning of the BytesIO object
+audio_data.seek(0)
+# Create an Audio object from the BytesIO data
+audio = Audio(audio_data, rate=output["sampling_rate"])
+# Display the Audio object
+display(audio)
+```
+To avoid storing your API key in the source code, we recommend doing one of the following:
+1. Use [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file.
+1. Set the `CARTESIA_API_KEY` environment variable, preferably to a secure shell init file (e.g. `~/.zshrc`, `~/.bashrc`)

{cartesia-0.0.2 → cartesia-0.0.4}/cartesia/tts.py RENAMED Viewed

@@ -4,7 +4,6 @@ import os
 import uuid
 from typing import Any, Dict, Generator, List, Optional, TypedDict, Union
-import numpy as np
 import requests
 from websockets.sync.client import connect
@@ -14,7 +13,7 @@ DEFAULT_API_VERSION = "v0"
 class AudioOutput(TypedDict):
-    audio: np.ndarray
+    audio: bytes
     sampling_rate: int
@@ -32,7 +31,11 @@ class CartesiaTTS:
     """The client for Cartesia's text-to-speech library.
     This client contains methods to interact with the Cartesia text-to-speech API.
-    The API offers
+    The client can be used to retrieve available voices, compute new voice embeddings,
+    and generate speech from text.
+    The client also supports generating audio using a websocket for lower latency.
+    To enable interrupt handling along the websocket, set `experimental_ws_handle_interrupts=True`.
     Examples:
@@ -56,18 +59,22 @@ class CartesiaTTS:
         ...     audio, sr = audio_chunk["audio"], audio_chunk["sampling_rate"]
     """
-    def __init__(self, *, api_key: str = None):
+    def __init__(self, *, api_key: str = None, experimental_ws_handle_interrupts: bool = False):
         """
         Args:
             api_key: The API key to use for authorization.
                 If not specified, the API key will be read from the environment variable
                 `CARTESIA_API_KEY`.
+            experimental_ws_handle_interrupts: Whether to handle interrupts when generating
+                audio using the websocket. This is an experimental feature and may have bugs
+                or be deprecated in the future.
         """
         self.base_url = os.environ.get("CARTESIA_BASE_URL", DEFAULT_BASE_URL)
         self.api_key = api_key or os.environ.get("CARTESIA_API_KEY")
         self.api_version = os.environ.get("CARTESIA_API_VERSION", DEFAULT_API_VERSION)
         self.headers = {"X-API-Key": self.api_key, "Content-Type": "application/json"}
         self.websocket = None
+        self.experimental_ws_handle_interrupts = experimental_ws_handle_interrupts
         self.refresh_websocket()
     def get_voices(self, skip_embeddings: bool = True) -> Dict[str, VoiceMetadata]:
@@ -168,21 +175,37 @@ class CartesiaTTS:
         """
         if self.websocket and not self._is_websocket_closed():
             self.websocket.close()
+        route = "audio/websocket"
+        if self.experimental_ws_handle_interrupts:
+            route = f"experimental/{route}"
         self.websocket = connect(
-            f"{self._ws_url()}/audio/websocket?api_key={self.api_key}",
+            f"{self._ws_url()}/{route}?api_key={self.api_key}",
             close_timeout=None,
         )
     def _is_websocket_closed(self):
         return self.websocket.socket.fileno() == -1
+    def _check_inputs(
+        self, transcript: str, duration: Optional[float], chunk_time: Optional[float]
+    ):
+        if chunk_time is not None:
+            if chunk_time < 0.1 or chunk_time > 0.5:
+                raise ValueError("`chunk_time` must be between 0.1 and 0.5")
+        if chunk_time is not None and duration is not None:
+            if duration < chunk_time:
+                raise ValueError("`duration` must be greater than chunk_time")
+        if transcript.strip() == "":
+            raise ValueError("`transcript` must be non empty")
     def generate(
         self,
         *,
         transcript: str,
         duration: int = None,
         chunk_time: float = None,
-        lookahead: int = None,
         voice: Embedding = None,
         stream: bool = False,
         websocket: bool = True,
@@ -194,8 +217,6 @@ class CartesiaTTS:
             duration: The maximum duration of the audio in seconds.
             chunk_time: How long each audio segment should be in seconds.
                 This should not need to be adjusted.
-            lookahead: The number of seconds to look ahead for each chunk.
-                This should not need to be adjusted.
             voice: The voice to use for generating audio.
                 This can either be a voice id (string) or an embedding vector (List[float]).
             stream: Whether to stream the audio or not.
@@ -206,18 +227,16 @@ class CartesiaTTS:
         Returns:
             A generator if `stream` is True, otherwise a dictionary.
             Dictionary from both generator and non-generator return types have the following keys:
-                * "audio": The audio as a 1D numpy array.
+                * "audio": The audio as a bytes buffer.
                 * "sampling_rate": The sampling rate of the audio.
         """
-        body = dict(transcript=transcript, model_id=DEFAULT_MODEL_ID)
+        self._check_inputs(transcript, duration, chunk_time)
-        if isinstance(voice, str):
-            voice = self._voices[voice]
+        body = dict(transcript=transcript, model_id=DEFAULT_MODEL_ID)
         optional_body = dict(
             duration=duration,
             chunk_time=chunk_time,
-            lookahead=lookahead,
             voice=voice,
         )
         body.update({k: v for k, v in optional_body.items() if v is not None})
@@ -237,7 +256,7 @@ class CartesiaTTS:
                 sampling_rate = chunk["sampling_rate"]
             chunks.append(chunk["audio"])
-        return {"audio": np.concatenate(chunks), "sampling_rate": sampling_rate}
+        return {"audio": b"".join(chunks), "sampling_rate": sampling_rate}
     def _generate_http(self, body: Dict[str, Any]):
         response = requests.post(
@@ -258,8 +277,7 @@ class CartesiaTTS:
                 if start_index != -1 and end_index != -1:
                     try:
                         chunk_json = json.loads(buffer[start_index : end_index + 1])
-                        data = base64.b64decode(chunk_json["data"])
-                        audio = np.frombuffer(data, dtype=np.float32)
+                        audio = base64.b64decode(chunk_json["data"])
                         yield {"audio": audio, "sampling_rate": chunk_json["sampling_rate"]}
                         buffer = buffer[end_index + 1 :]
                     except json.JSONDecodeError:
@@ -268,28 +286,55 @@ class CartesiaTTS:
         if buffer:
             try:
                 chunk_json = json.loads(buffer)
-                data = base64.b64decode(chunk_json["data"])
-                audio = np.frombuffer(data, dtype=np.float32)
+                audio = base64.b64decode(chunk_json["data"])
                 yield {"audio": audio, "sampling_rate": chunk_json["sampling_rate"]}
             except json.JSONDecodeError:
                 pass
-    def _generate_ws(self, body: Dict[str, Any]):
+    def _generate_ws(self, body: Dict[str, Any], *, context_id: str = None):
+        """Generate audio using the websocket connection.
+        Args:
+            body: The request body.
+            context_id: The context id for the request.
+                The context id must be globally unique for the duration this client exists.
+                If this is provided, the context id that is in the response will
+                also be returned as part of the dict. This is helpful for testing.
+        """
         if not self.websocket or self._is_websocket_closed():
             self.refresh_websocket()
-        self.websocket.send(json.dumps({"data": body, "context_id": uuid.uuid4().hex}))
+        include_context_id = bool(context_id)
+        if context_id is None:
+            context_id = uuid.uuid4().hex
+        self.websocket.send(json.dumps({"data": body, "context_id": context_id}))
         try:
-            response = json.loads(self.websocket.recv())
-            while not response["done"]:
-                data = base64.b64decode(response["data"])
-                audio = np.frombuffer(data, dtype=np.float32)
-                # print("timing", time.perf_counter() - start)
-                yield {"audio": audio, "sampling_rate": response["sampling_rate"]}
+            while True:
                 response = json.loads(self.websocket.recv())
-        except Exception:
-            raise RuntimeError(f"Failed to generate audio. {response}")
+                if response["done"]:
+                    break
+                audio = base64.b64decode(response["data"])
+                optional_kwargs = {}
+                if include_context_id:
+                    optional_kwargs["context_id"] = response["context_id"]
+                yield {
+                    "audio": audio,
+                    "sampling_rate": response["sampling_rate"],
+                    **optional_kwargs,
+                }
+                if self.experimental_ws_handle_interrupts:
+                    self.websocket.send(json.dumps({"context_id": context_id}))
+        except GeneratorExit:
+            # The exit is only called when the generator is garbage collected.
+            # It may not be called directly after a break statement.
+            # However, the generator will be automatically cancelled on the next request.
+            if self.experimental_ws_handle_interrupts:
+                self.websocket.send(json.dumps({"context_id": context_id, "action": "cancel"}))
+        except Exception as e:
+            raise RuntimeError(f"Failed to generate audio. {response}") from e
     def _http_url(self):
         prefix = "http" if "localhost" in self.base_url else "https"

cartesia-0.0.4/cartesia/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.0.4"

{cartesia-0.0.2 → cartesia-0.0.4}/cartesia.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 0.0.2
+Version: 0.0.4
 Summary: The official Python library for the Cartesia API.
 Home-page:
 Author: Cartesia, Inc.
@@ -12,7 +12,6 @@ Requires-Python: >=3.8.0
 Description-Content-Type: text/markdown
 Requires-Dist: websockets
 Requires-Dist: requests
-Requires-Dist: numpy
 Provides-Extra: dev
 Requires-Dist: pre-commit; extra == "dev"
 Requires-Dist: docformatter; extra == "dev"
@@ -49,22 +48,68 @@ pip install -e '.[dev]'
 ## Usage
 ```python
 from cartesia.tts import CartesiaTTS
-from IPython.display import Audio
+import pyaudio
+import os
 client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
 voices = client.get_voices()
-embedding = voices["Milo"]["embedding"]
+voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
 transcript = "Hello! Welcome to Cartesia"
-# No streaming
-output = client.generate(transcript=transcript, voice=embedding)
-Audio(output["audio"], rate=output["sampling_rate"])
+p = pyaudio.PyAudio()
+stream = None
-# Streaming
-for output in client.generate(transcript=transcript, voice=embedding, stream=True):
-    arr = output["audio"]  # a numpy array
+# Generate and stream audio
+for output in client.generate(transcript=transcript, voice=voice, stream=True):
+    buffer = output["audio"]
     rate = output["sampling_rate"]
+    if not stream:
+        stream = p.open(format=pyaudio.paFloat32,
+                        channels=1,
+                        rate=rate,
+                        output=True)
+    # Write the audio data to the stream
+    stream.write(buffer)
+stream.stop_stream()
+stream.close()
+p.terminate()
+```
+If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook. Here's an example:
+```python
+from cartesia.tts import CartesiaTTS
+from IPython.display import Audio
+import io
+import os
+client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
+voices = client.get_voices()
+voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
+transcript = "Hello! Welcome to Cartesia"
+# Create a BytesIO object to store the audio data
+audio_data = io.BytesIO()
+# Generate and stream audio
+for output in client.generate(transcript=transcript, voice=voice, stream=True):
+    buffer = output["audio"]
+    audio_data.write(buffer)
+# Set the cursor position to the beginning of the BytesIO object
+audio_data.seek(0)
+# Create an Audio object from the BytesIO data
+audio = Audio(audio_data, rate=output["sampling_rate"])
+# Display the Audio object
+display(audio)
 ```
-We recommend using [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file so that your API Key is not stored in the source code.
+To avoid storing your API key in the source code, we recommend doing one of the following:
+1. Use [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file.
+1. Set the `CARTESIA_API_KEY` environment variable, preferably to a secure shell init file (e.g. `~/.zshrc`, `~/.bashrc`)

{cartesia-0.0.2 → cartesia-0.0.4}/cartesia.egg-info/requires.txt RENAMED Viewed

@@ -1,6 +1,5 @@
 websockets
 requests
-numpy
 [all]
 pre-commit

{cartesia-0.0.2 → cartesia-0.0.4}/setup.py RENAMED Viewed

@@ -78,7 +78,8 @@ class UploadCommand(Command):
     """Support setup.py upload."""
     description = "Build and publish the package."
-    user_options = []
+    user_options = [("skip-upload", "u", "skip git tagging and pypi upload")]
+    boolean_options = ["skip-upload"]
     @staticmethod
     def status(s):
@@ -86,21 +87,26 @@ class UploadCommand(Command):
         print("\033[1m{0}\033[0m".format(s))
     def initialize_options(self):
-        pass
+        self.skip_upload = False
     def finalize_options(self):
-        pass
+        self.skip_upload = bool(self.skip_upload)
     def run(self):
         try:
             self.status("Removing previous builds…")
             rmtree(os.path.join(here, "dist"))
+            rmtree(os.path.join(here, "build"))
         except OSError:
             pass
         self.status("Building Source and Wheel (universal) distribution…")
         os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable))
+        if self.skip_upload:
+            self.status("Skipping git tagging and pypi upload")
+            sys.exit()
         self.status("Uploading the package to PyPI via Twine…")
         os.system("twine upload dist/*")
@@ -116,6 +122,9 @@ class BumpVersionCommand(Command):
     To use: python setup.py bumpversion -v <version>
     This command will push the new version directly and tag it.
+    Usage:
+        python setup.py bumpversion --version=1.0.1
     """
     description = "Installs the foo."
@@ -130,6 +139,11 @@ class BumpVersionCommand(Command):
     def initialize_options(self):
         self.version = None
+        self.base_branch = None
+        self.version_branch = None
+        self.updated_files = [
+            "cartesia/version.py",
+        ]
     def finalize_options(self):
         # This package cannot be imported at top level because it
@@ -147,14 +161,18 @@ class BumpVersionCommand(Command):
             )
     def _undo(self):
-        os.system(f"git restore --staged {PACKAGE_DIR}/__init__.py")
-        os.system(f"git checkout -- {PACKAGE_DIR}/__init__.py")
+        os.system(f"git restore --staged {' '.join(self.updated_files)}")
+        os.system(f"git checkout -- {' '.join(self.updated_files)}")
+        # Return to the original branch
+        os.system(f"git checkout {self.base_branch}")
+        os.system(f"git branch -D {self.version_branch}")
     def run(self):
         current_version = about["__version__"]
         self.status("Checking current branch is 'main'")
-        current_branch = get_git_branch()
+        self.base_branch = current_branch = get_git_branch()
         if current_branch != "main":
             raise RuntimeError(
                 "You can only bump the version from the 'main' branch. "
@@ -174,18 +192,25 @@ class BumpVersionCommand(Command):
         # TODO: Add check to see if all tests are passing on main.
+        # Checkout new branch
+        self.version_branch = f"bumpversion/v{self.version}"
+        self.status(f"Create branch '{self.version_branch}'")
+        err_code = os.system(f"git checkout -b {self.version_branch}")
+        if err_code != 0:
+            raise RuntimeError("Failed to create branch.")
         # Change the version in __init__.py
         self.status(f"Updating version {current_version} -> {self.version}")
         update_version(self.version)
-        if current_version != self.version:
-            self._undo()
-            raise RuntimeError("Failed to update version.")
+        # if current_version != self.version:
+        #     self._undo()
+        #     raise RuntimeError("Failed to update version.")
-        self.status(f"Adding {PACKAGE_DIR}/__init__.py to git")
-        err_code = os.system(f"git add {PACKAGE_DIR}/__init__.py")
+        self.status(f"Adding {', '.join(self.updated_files)} to git")
+        err_code = os.system(f"git add {' '.join(self.updated_files)}")
         if err_code != 0:
             self._undo()
-            raise RuntimeError("Failed to add file to git.")
+            raise RuntimeError("Failed to add files to git.")
         # Commit the file with a message '[bumpversion] v<version>'.
         self.status(f"Commit with message '[bumpversion] v{self.version}'")
@@ -195,12 +220,15 @@ class BumpVersionCommand(Command):
             raise RuntimeError("Failed to commit file to git.")
         # Push the commit to origin.
-        # self.status("Pushing commit to origin")
-        # err_code = os.system("git push")
-        # if err_code != 0:
-        #     # TODO: undo the commit automatically.
-        #     raise RuntimeError("Failed to push commit to origin.")
+        self.status(f"Pushing commit to origin/{self.version_branch}")
+        err_code = os.system(f"git push --force --set-upstream origin {self.version_branch}")
+        if err_code != 0:
+            # TODO: undo the commit automatically.
+            self._undo()
+            raise RuntimeError("Failed to push commit to origin.")
+        os.system(f"git checkout {self.base_branch}")
+        os.system(f"git branch -D {self.version_branch}")
         sys.exit()

cartesia-0.0.4/tests/test_tts.py ADDED Viewed

@@ -0,0 +1,180 @@
+"""Test against the production Cartesia TTS API.
+This test suite tries to be as general as possible because different keys
+will lead to different results. Therefore, we cannot test for complete correctness
+but rather for general correctness.
+"""
+import os
+import uuid
+from typing import Dict, Generator, List
+import pytest
+from cartesia.tts import DEFAULT_MODEL_ID, CartesiaTTS, VoiceMetadata
+SAMPLE_VOICE = "Milo"
+class _Resources:
+    def __init__(self, *, client: CartesiaTTS, voices: Dict[str, VoiceMetadata]):
+        self.client = client
+        self.voices = voices
+@pytest.fixture(scope="session")
+def client():
+    return CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
+@pytest.fixture(scope="session")
+def client_with_ws_interrupt():
+    return CartesiaTTS(
+        api_key=os.environ.get("CARTESIA_API_KEY"), experimental_ws_handle_interrupts=True
+    )
+@pytest.fixture(scope="session")
+def resources(client: CartesiaTTS):
+    voices = client.get_voices()
+    voice_id = voices[SAMPLE_VOICE]["id"]
+    voices[SAMPLE_VOICE]["embedding"] = client.get_voice_embedding(voice_id=voice_id)
+    return _Resources(
+        client=client,
+        voices=voices,
+    )
+def test_get_voices(client: CartesiaTTS):
+    voices = client.get_voices()
+    assert isinstance(voices, dict)
+    assert all(isinstance(key, str) for key in voices.keys())
+    ids = [voice["id"] for voice in voices.values()]
+    assert len(ids) == len(set(ids)), "All ids must be unique"
+    assert all(
+        key == voice["name"] for key, voice in voices.items()
+    ), "The key must be the same as the name"
+def test_get_voice_embedding_from_id(client: CartesiaTTS):
+    voices = client.get_voices()
+    voice_id = voices[SAMPLE_VOICE]["id"]
+    client.get_voice_embedding(voice_id=voice_id)
+def test_get_voice_embedding_from_url(client: CartesiaTTS):
+    url = "https://youtu.be/g2Z7Ddd573M?si=P8BM_hBqt5P8Ft6I&t=69"
+    _ = client.get_voice_embedding(link=url)
+@pytest.mark.parametrize("websocket", [True, False])
+def test_generate(resources: _Resources, websocket: bool):
+    client = resources.client
+    voices = resources.voices
+    embedding = voices[SAMPLE_VOICE]["embedding"]
+    transcript = "Hello, world!"
+    output = client.generate(transcript=transcript, voice=embedding, websocket=websocket)
+    assert output.keys() == {"audio", "sampling_rate"}
+    assert isinstance(output["audio"], bytes)
+    assert isinstance(output["sampling_rate"], int)
+@pytest.mark.parametrize("websocket", [True, False])
+def test_generate_stream(resources: _Resources, websocket: bool):
+    client = resources.client
+    voices = resources.voices
+    embedding = voices[SAMPLE_VOICE]["embedding"]
+    transcript = "Hello, world!"
+    generator = client.generate(
+        transcript=transcript, voice=embedding, websocket=websocket, stream=True
+    )
+    assert isinstance(generator, Generator)
+    for output in generator:
+        assert output.keys() == {"audio", "sampling_rate"}
+        assert isinstance(output["audio"], bytes)
+        assert isinstance(output["sampling_rate"], int)
+@pytest.mark.parametrize(
+    "actions",
+    [
+        ["cancel-5", None],
+        ["cancel-5", "cancel-1", None],
+        [None, "cancel-3", None],
+        [None, "cancel-1", "cancel-2"],
+    ],
+)
+def test_generate_stream_interrupt(
+    client_with_ws_interrupt: CartesiaTTS, resources: _Resources, actions: List[str]
+):
+    client = client_with_ws_interrupt
+    voices = resources.voices
+    embedding = voices[SAMPLE_VOICE]["embedding"]
+    transcript = "Hello, world!"
+    context_ids = [f"test-{uuid.uuid4().hex[:6]}" for _ in range(len(actions))]
+    for context_id, action in zip(context_ids, actions):
+        body = dict(transcript=transcript, model_id=DEFAULT_MODEL_ID, voice=embedding)
+        # Parse actions to see what we should expect.
+        if action is None:
+            num_turns = None
+        elif "cancel" in action:
+            num_turns = int(action.split("-")[1])
+        generator = client._generate_ws(body, context_id=context_id)
+        for idx, response in enumerate(generator):
+            assert response.keys() == {"audio", "sampling_rate", "context_id"}
+            assert response["context_id"] == context_id, (
+                f"Context ID from response ({response['context_id']}) does not match "
+                f"the expected context ID ({context_id})"
+            )
+            if idx + 1 == num_turns:
+                break
+@pytest.mark.parametrize("chunk_time", [0.05, 0.6])
+def test_check_inputs_invalid_chunk_time(client: CartesiaTTS, chunk_time):
+    with pytest.raises(ValueError, match="`chunk_time` must be between 0.1 and 0.5"):
+        client._check_inputs("Test", None, chunk_time)
+@pytest.mark.parametrize("chunk_time", [0.1, 0.3, 0.5])
+def test_check_inputs_valid_chunk_time(client, chunk_time):
+    try:
+        client._check_inputs("Test", None, chunk_time)
+    except ValueError:
+        pytest.fail("Unexpected ValueError raised")
+def test_check_inputs_duration_less_than_chunk_time(client: CartesiaTTS):
+    with pytest.raises(ValueError, match="`duration` must be greater than chunk_time"):
+        client._check_inputs("Test", 0.2, 0.3)
+@pytest.mark.parametrize("duration,chunk_time", [(0.5, 0.2), (1.0, 0.5), (2.0, 0.1)])
+def test_check_inputs_valid_duration_and_chunk_time(client: CartesiaTTS, duration, chunk_time):
+    try:
+        client._check_inputs("Test", duration, chunk_time)
+    except ValueError:
+        pytest.fail("Unexpected ValueError raised")
+def test_check_inputs_empty_transcript(client: CartesiaTTS):
+    with pytest.raises(ValueError, match="`transcript` must be non empty"):
+        client._check_inputs("", None, None)
+@pytest.mark.parametrize("transcript", ["Hello", "Test transcript", "Lorem ipsum dolor sit amet"])
+def test_check_inputs_valid_transcript(client: CartesiaTTS, transcript):
+    try:
+        client._check_inputs(transcript, None, None)
+    except ValueError:
+        pytest.fail("Unexpected ValueError raised")

cartesia-0.0.2/README.md DELETED Viewed

@@ -1,35 +0,0 @@
-# Cartesia Python API Library
-The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
-**Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
-## Installation
-```bash
-pip install cartesia
-# pip install in editable mode w/ dev dependencies
-pip install -e '.[dev]'
-```
-## Usage
-```python
-from cartesia.tts import CartesiaTTS
-from IPython.display import Audio
-client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
-voices = client.get_voices()
-embedding = voices["Milo"]["embedding"]
-transcript = "Hello! Welcome to Cartesia"
-# No streaming
-output = client.generate(transcript=transcript, voice=embedding)
-Audio(output["audio"], rate=output["sampling_rate"])
-# Streaming
-for output in client.generate(transcript=transcript, voice=embedding, stream=True):
-    arr = output["audio"]  # a numpy array
-    rate = output["sampling_rate"]
-```
-We recommend using [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file so that your API Key is not stored in the source code.

cartesia-0.0.2/cartesia/version.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __version__ = "0.0.2"

cartesia-0.0.2/tests/test_tts.py DELETED Viewed

@@ -1,96 +0,0 @@
-"""Test against the production Cartesia TTS API.
-This test suite tries to be as general as possible because different keys
-will lead to different results. Therefore, we cannot test for complete correctness
-but rather for general correctness.
-"""
-import os
-from typing import Dict, Generator
-import numpy as np
-import pytest
-from cartesia.tts import CartesiaTTS, VoiceMetadata
-SAMPLE_VOICE = "Milo"
-class _Resources:
-    def __init__(self, *, client: CartesiaTTS, voices: Dict[str, VoiceMetadata]):
-        self.client = client
-        self.voices = voices
-@pytest.fixture(scope="session")
-def client():
-    return CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
-@pytest.fixture(scope="session")
-def resources(client: CartesiaTTS):
-    voices = client.get_voices()
-    voice_id = voices[SAMPLE_VOICE]["id"]
-    voices[SAMPLE_VOICE]["embedding"] = client.get_voice_embedding(voice_id=voice_id)
-    return _Resources(
-        client=client,
-        voices=voices,
-    )
-def test_get_voices(client: CartesiaTTS):
-    voices = client.get_voices()
-    assert isinstance(voices, dict)
-    assert all(isinstance(key, str) for key in voices.keys())
-    ids = [voice["id"] for voice in voices.values()]
-    assert len(ids) == len(set(ids)), "All ids must be unique"
-    assert all(
-        key == voice["name"] for key, voice in voices.items()
-    ), "The key must be the same as the name"
-def test_get_voice_embedding_from_id(client: CartesiaTTS):
-    voices = client.get_voices()
-    voice_id = voices[SAMPLE_VOICE]["id"]
-    client.get_voice_embedding(voice_id=voice_id)
-def test_get_voice_embedding_from_url(client: CartesiaTTS):
-    url = "https://youtu.be/g2Z7Ddd573M?si=P8BM_hBqt5P8Ft6I&t=69"
-    _ = client.get_voice_embedding(link=url)
-@pytest.mark.parametrize("websocket", [True, False])
-def test_generate(resources: _Resources, websocket: bool):
-    client = resources.client
-    voices = resources.voices
-    embedding = voices[SAMPLE_VOICE]["embedding"]
-    transcript = "Hello, world!"
-    output = client.generate(transcript=transcript, voice=embedding, websocket=websocket)
-    assert output.keys() == {"audio", "sampling_rate"}
-    assert isinstance(output["audio"], np.ndarray)
-    assert output["audio"].dtype == np.float32
-    assert isinstance(output["sampling_rate"], int)
-@pytest.mark.parametrize("websocket", [True, False])
-def test_generate_stream(resources: _Resources, websocket: bool):
-    client = resources.client
-    voices = resources.voices
-    embedding = voices[SAMPLE_VOICE]["embedding"]
-    transcript = "Hello, world!"
-    generator = client.generate(
-        transcript=transcript, voice=embedding, websocket=websocket, stream=True
-    )
-    assert isinstance(generator, Generator)
-    for output in generator:
-        assert output.keys() == {"audio", "sampling_rate"}
-        assert isinstance(output["audio"], np.ndarray)
-        assert output["audio"].dtype == np.float32
-        assert isinstance(output["sampling_rate"], int)

{cartesia-0.0.2 → cartesia-0.0.4}/cartesia/__init__.py RENAMED Viewed

File without changes

{cartesia-0.0.2 → cartesia-0.0.4}/cartesia.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{cartesia-0.0.2 → cartesia-0.0.4}/cartesia.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{cartesia-0.0.2 → cartesia-0.0.4}/cartesia.egg-info/top_level.txt RENAMED Viewed

File without changes

{cartesia-0.0.2 → cartesia-0.0.4}/pyproject.toml RENAMED Viewed

File without changes

{cartesia-0.0.2 → cartesia-0.0.4}/setup.cfg RENAMED Viewed

File without changes

cartesia 0.0.2__tar.gz → 0.0.4__tar.gz

cartesia 0.0.2tar.gz → 0.0.4tar.gz