PyPI - cartesia - Versions diffs - 1.2.0__tar.gz → 1.3.0__tar.gz - Mend

cartesia 1.2.0tar.gz → 1.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

cartesia-1.3.0/.github/workflows/ci.yaml ADDED Viewed

@@ -0,0 +1,64 @@
+name: CI
+on:
+  push:
+  pull_request:
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  Linting:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          enable-cache: true
+      - name: Set up Python
+        run: uv python install
+      - name: Install the project
+        run: uv sync --all-extras --dev
+      - name: Lint with ruff
+        run: |
+          make lint
+  Build:
+    runs-on: ubuntu-latest
+    # There is an issue with infinitely running tests when something fails due to failure to close the WebSocket, so we set a timeout.
+    timeout-minutes: 5
+    strategy:
+      matrix:
+        # If we test it against too many versions, we are making unnecessary
+        # requests to the production server.
+        python-version: ["3.8", "3.10"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          enable-cache: true
+      - name: Set up Python ${{ matrix.python-version }}
+        run: uv python install ${{ matrix.python-version }}
+      - name: Install the project
+        run: uv sync --all-extras --dev
+      - name: Test
+        env:
+          CARTESIA_API_KEY: ${{ secrets.TESTING_CARTESIA_API_KEY }}
+          CARTESIA_TEST_DEPRECATED: "true"
+        run: |
+          make test

cartesia-1.3.0/.github/workflows/publish.yaml ADDED Viewed

@@ -0,0 +1,58 @@
+name: Publish
+on:
+  push:
+    branches: [main]
+    paths:
+      # if the version.py changes, we should re-publish
+      # this also allows us to run the workflow manually without skipping.
+      - "cartesia/version.py"
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/cartesia
+    # TODO: Make permissions more restrictive
+    permissions: write-all
+    env:
+      commitmsg: ${{ github.event.head_commit.message }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          enable-cache: true
+      - name: Set up Python
+        run: uv python install
+      - name: Install the project
+        run: uv sync --all-extras --dev
+      - name: Build the project
+        run: uv build
+      - name: Publish to PyPI
+        run: uv publish
+      - name: Get version
+        run: |
+          export CARTESIA_PYTHON_VERSION=$(grep -o '".*"' cartesia/version.py | sed 's/"//g')
+          echo "CARTESIA_PYTHON_VERSION=${CARTESIA_PYTHON_VERSION}" >> $GITHUB_ENV
+      - name: Verify version
+        run: echo ${{ env.CARTESIA_PYTHON_VERSION }}
+      - name: Bump version and push tag
+        id: tag_version
+        uses: mathieudutour/github-tag-action@v6.1
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          custom_tag: ${{ env.CARTESIA_PYTHON_VERSION }}

cartesia-1.3.0/.gitignore ADDED Viewed

@@ -0,0 +1,60 @@
+data.tar.gz
+*.pth
+*.tsf
+*.ckpt
+.ipynb_checkpoints
+*/.ipynb_checkpoints/*
+*.lprof
+.coverage
+.DS_Store
+.idea/
+.vscode/
+outputs/
+outputs
+# logs can either be a directory or symlinked to a directory
+# ignore both
+logs/
+logs
+data
+# Created by https://www.gitignore.io/api/python
+# Edit at https://www.gitignore.io/?templates=python
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# pyenv
+.python-version
+# scratch
+scratch/
+# End of https://www.gitignore.io/api/python

cartesia-1.3.0/Makefile ADDED Viewed

@@ -0,0 +1,13 @@
+format:
+	uvx ruff check --fix .
+	uvx ruff format .
+lint:
+	uvx ruff check .
+	uvx ruff format --check .
+test:
+	uv run pytest -ra tests/ -sv --cov=cartesia/ --log-cli-level=INFO
+bump:  # Use as `make bump version=<version>`
+	uv run -m bumpversion $(version)

{cartesia-1.2.0 → cartesia-1.3.0}/PKG-INFO RENAMED Viewed

@@ -1,15 +1,14 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.3
 Name: cartesia
-Version: 1.2.0
+Version: 1.3.0
 Summary: The official Python library for the Cartesia API.
 Requires-Python: >=3.9
-Description-Content-Type: text/markdown
-License-File: LICENSE.md
 Requires-Dist: aiohttp>=3.10.10
 Requires-Dist: httpx>=0.27.2
 Requires-Dist: iterators>=0.2.0
 Requires-Dist: requests>=2.32.3
-Requires-Dist: websockets>=13.1
+Requires-Dist: websockets>=10.4
+Description-Content-Type: text/markdown
 # Cartesia Python API Library

cartesia-1.3.0/bumpversion.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""Bump the version of the package.
+Usage: bumpversion.py <version>
+<version> must be in the format of <major>.<minor>.<patch>[-<prelabel><preversion>]
+"""
+import re
+import tomlkit
+import sys
+from cartesia.version import __version__
+VERSION_REGEX = r"""(?x)
+    (?P<major>0|[1-9]\d*)\.
+    (?P<minor>0|[1-9]\d*)\.
+    (?P<patch>0|[1-9]\d*)
+    (?:
+        -                             # dash separator for pre-release section
+        (?P<prelabel>[a-zA-Z-]+)         # pre-release label
+        (?P<preversion>0|[1-9]\d*)        # pre-release version number
+    )?                                # pre-release section is optional
+"""  # Source: https://github.com/callowayproject/bump-my-version
+def main(version: str):
+    assert re.match(VERSION_REGEX, version), "Invalid version format"
+    with open("pyproject.toml", "r") as f:
+        pyproject = tomlkit.load(f)
+    pyproject["project"]["version"] = version
+    with open("pyproject.toml", "w") as f:
+        tomlkit.dump(pyproject, f)
+    with open("cartesia/version.py", "w") as f:
+        f.write(f'__version__ = "{version}"\n')
+if __name__ == "__main__":
+    main(sys.argv[1])

{cartesia-1.2.0 → cartesia-1.3.0}/cartesia/_async_websocket.py RENAMED Viewed

@@ -6,7 +6,7 @@ from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Union
 import aiohttp
-from cartesia._constants import DEFAULT_MODEL_ID, DEFAULT_VOICE_EMBEDDING
+from cartesia._constants import DEFAULT_MODEL_ID, DEFAULT_OUTPUT_FORMAT, DEFAULT_VOICE_EMBEDDING
 from cartesia._types import OutputFormat, VoiceControls
 from cartesia._websocket import _WebSocket
 from cartesia.tts import TTS
@@ -45,6 +45,7 @@ class _AsyncTTSContext:
         voice_embedding: Optional[List[float]] = None,
         context_id: Optional[str] = None,
         continue_: bool = False,
+        flush: bool = False,
         duration: Optional[int] = None,
         language: Optional[str] = None,
         add_timestamps: bool = False,
@@ -60,6 +61,7 @@ class _AsyncTTSContext:
             voice_embedding: The embedding of the voice to use for generating audio.
             context_id: The context ID to use for the request. If not specified, a random context ID will be generated.
             continue_: Whether to continue the audio generation from the previous transcript or not.
+            flush: Whether to trigger a manual flush for the current context's generation.
             duration: The duration of the audio in seconds.
             language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`.
             add_timestamps: Whether to return word-level timestamps.
@@ -71,7 +73,7 @@ class _AsyncTTSContext:
         """
         if context_id is not None and context_id != self._context_id:
             raise ValueError("Context ID does not match the context ID of the current context.")
-        if continue_ and transcript == "":
+        if continue_ and transcript == "" and not flush:
             raise ValueError("Transcript cannot be empty when continue_ is True.")
         await self._websocket.connect()
@@ -87,6 +89,7 @@ class _AsyncTTSContext:
             context_id=self._context_id,
             add_timestamps=add_timestamps,
             continue_=continue_,
+            flush=flush,
             _experimental_voice_controls=_experimental_voice_controls,
         )
@@ -100,12 +103,49 @@ class _AsyncTTSContext:
         await self.send(
             model_id=DEFAULT_MODEL_ID,
             transcript="",
-            output_format=TTS.get_output_format("raw_pcm_f32le_44100"),
+            output_format=TTS.get_output_format(DEFAULT_OUTPUT_FORMAT),
             voice_embedding=DEFAULT_VOICE_EMBEDDING,  # Default voice embedding since it's a required input for now.
             context_id=self._context_id,
             continue_=False,
         )
+    async def flush(self) -> Callable[[], AsyncGenerator[Dict[str, Any], None]]:
+        """Trigger a manual flush for the current context's generation. This method returns a generator that yields the audio prior to the flush."""
+        await self.send(
+            model_id=DEFAULT_MODEL_ID,
+            transcript="",
+            output_format=TTS.get_output_format(DEFAULT_OUTPUT_FORMAT),
+            voice_embedding=DEFAULT_VOICE_EMBEDDING,  # Default voice embedding since it's a required input for now.
+            context_id=self._context_id,
+            continue_=True,
+            flush=True,
+        )
+        # Save the old flush ID
+        flush_id = len(self._websocket._context_queues[self._context_id]) - 1
+        # Create a new Async Queue to store the responses for the new flush ID
+        self._websocket._context_queues[self._context_id].append(asyncio.Queue())
+        # Return the generator for the old flush ID
+        async def generator():
+            try:
+                while True:
+                    response = await self._websocket._get_message(
+                        self._context_id, timeout=self.timeout, flush_id=flush_id
+                    )
+                    if "error" in response:
+                        raise RuntimeError(f"Error generating audio:\n{response['error']}")
+                    if response.get("flush_done") or response["done"]:
+                        break
+                    yield self._websocket._convert_response(response, include_context_id=True)
+            except Exception as e:
+                if isinstance(e, asyncio.TimeoutError):
+                    raise RuntimeError("Timeout while waiting for audio chunk")
+                raise RuntimeError(f"Failed to generate audio:\n{e}")
+        return generator
     async def receive(self) -> AsyncGenerator[Dict[str, Any], None]:
         """Receive the audio chunks from the WebSocket. This method is a generator that yields audio chunks.
@@ -175,7 +215,7 @@ class _AsyncWebSocket(_WebSocket):
         self.timeout = timeout
         self._get_session = get_session
         self.websocket = None
-        self._context_queues: Dict[str, asyncio.Queue] = {}
+        self._context_queues: Dict[str, List[asyncio.Queue]] = {}
         self._processing_task: asyncio.Task = None
     def __del__(self):
@@ -213,7 +253,7 @@ class _AsyncWebSocket(_WebSocket):
             except asyncio.CancelledError:
                 pass
             except TypeError as e:
-                # Ignore the error if the task is already cancelled
+                # Ignore the error if the task is already canceled.
                 # For some reason we are getting None responses
                 # TODO: This needs to be fixed - we need to think about why we are getting None responses.
                 if "Received message 256:None" not in str(e):
@@ -284,16 +324,23 @@ class _AsyncWebSocket(_WebSocket):
                 response = await self.websocket.receive_json()
                 if response["context_id"]:
                     context_id = response["context_id"]
+                flush_id = response.get("flush_id", -1)
                 if context_id in self._context_queues:
-                    await self._context_queues[context_id].put(response)
+                    await self._context_queues[context_id][flush_id].put(response)
         except Exception as e:
             self._error = e
             raise e
-    async def _get_message(self, context_id: str, timeout: float) -> Dict[str, Any]:
+    async def _get_message(
+        self, context_id: str, timeout: float, flush_id: Optional[int] = -1
+    ) -> Dict[str, Any]:
         if context_id not in self._context_queues:
             raise ValueError(f"Context ID {context_id} not found.")
-        return await asyncio.wait_for(self._context_queues[context_id].get(), timeout=timeout)
+        if len(self._context_queues[context_id]) <= flush_id:
+            raise ValueError(f"Flush ID {flush_id} not found for context ID {context_id}.")
+        return await asyncio.wait_for(
+            self._context_queues[context_id][flush_id].get(), timeout=timeout
+        )
     def _remove_context(self, context_id: str):
         if context_id in self._context_queues:
@@ -309,5 +356,5 @@ class _AsyncWebSocket(_WebSocket):
         if context_id is None:
             context_id = str(uuid.uuid4())
         if context_id not in self._context_queues:
-            self._context_queues[context_id] = asyncio.Queue()
+            self._context_queues[context_id] = [asyncio.Queue()]
         return _AsyncTTSContext(context_id, self, self.timeout)

{cartesia-1.2.0 → cartesia-1.3.0}/cartesia/_constants.py RENAMED Viewed

@@ -2,6 +2,7 @@ DEFAULT_MODEL_ID = "sonic-english"  # latest default model
 MULTILINGUAL_MODEL_ID = "sonic-multilingual"  # latest multilingual model
 DEFAULT_BASE_URL = "api.cartesia.ai"
 DEFAULT_CARTESIA_VERSION = "2024-06-10"  # latest version
+DEFAULT_OUTPUT_FORMAT = "raw_pcm_f32le_44100"
 DEFAULT_TIMEOUT = 30  # seconds
 DEFAULT_NUM_CONNECTIONS = 10  # connections per client
 DEFAULT_VOICE_EMBEDDING = [1.0] * 192

{cartesia-1.2.0 → cartesia-1.3.0}/cartesia/_websocket.py RENAMED Viewed

@@ -239,7 +239,7 @@ class _WebSocket:
             self._contexts.clear()
     def _convert_response(
-        self, response: Dict[str, any], include_context_id: bool
+        self, response: Dict[str, any], include_context_id: bool, include_flush_id: bool = False
     ) -> Dict[str, Any]:
         out = {}
         if response["type"] == EventType.AUDIO:
@@ -250,6 +250,9 @@ class _WebSocket:
         if include_context_id:
             out["context_id"] = response["context_id"]
+        if include_flush_id and "flush_id" in response:
+            out["flush_id"] = response["flush_id"]
         return out
     def send(

{cartesia-1.2.0 → cartesia-1.3.0}/cartesia/utils/tts.py RENAMED Viewed

@@ -37,6 +37,7 @@ def _construct_tts_request(
     add_timestamps: bool = False,
     context_id: Optional[str] = None,
     continue_: bool = False,
+    flush: bool = False,
     _experimental_voice_controls: Optional[VoiceControls] = None,
 ):
     tts_request = {
@@ -71,4 +72,7 @@ def _construct_tts_request(
     if continue_:
         tts_request["continue"] = continue_
+    if flush:
+        tts_request["flush"] = flush
     return tts_request

cartesia-1.3.0/cartesia/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "1.3.0"

{cartesia-1.2.0 → cartesia-1.3.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "cartesia"
-version = "1.2.0"
+version = "1.3.0"
 description = "The official Python library for the Cartesia API."
 readme = "README.md"
 requires-python = ">=3.9"
@@ -9,9 +9,13 @@ dependencies = [
     "httpx>=0.27.2",
     "iterators>=0.2.0",
     "requests>=2.32.3",
-    "websockets>=13.1",
+    "websockets>=10.4",
 ]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
 [tool.uv]
 dev-dependencies = [
     "isort>=5.13.2",

cartesia-1.3.0/tests/__init__.py ADDED Viewed

File without changes

cartesia-1.3.0/tests/resources/sample-speech-4s.wav ADDED Viewed

Binary file

cartesia 1.2.0__tar.gz → 1.3.0__tar.gz

cartesia 1.2.0tar.gz → 1.3.0tar.gz