PyPI - cartesia - Versions diffs - 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

cartesia 1.2.0py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

cartesia/_async_websocket.py +56 -9
cartesia/_constants.py +1 -0
cartesia/_websocket.py +4 -1
cartesia/utils/tts.py +4 -0
cartesia/version.py +1 -1
{cartesia-1.2.0.dist-info → cartesia-1.3.0.dist-info}/METADATA +7 -8
{cartesia-1.2.0.dist-info → cartesia-1.3.0.dist-info}/RECORD +9 -10
{cartesia-1.2.0.dist-info → cartesia-1.3.0.dist-info}/WHEEL +1 -2
cartesia-1.2.0.dist-info/top_level.txt +0 -1
{cartesia-1.2.0.dist-info → cartesia-1.3.0.dist-info/licenses}/LICENSE.md +0 -0

cartesia/_async_websocket.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Union
 import aiohttp
-from cartesia._constants import DEFAULT_MODEL_ID, DEFAULT_VOICE_EMBEDDING
+from cartesia._constants import DEFAULT_MODEL_ID, DEFAULT_OUTPUT_FORMAT, DEFAULT_VOICE_EMBEDDING
 from cartesia._types import OutputFormat, VoiceControls
 from cartesia._websocket import _WebSocket
 from cartesia.tts import TTS
@@ -45,6 +45,7 @@ class _AsyncTTSContext:
         voice_embedding: Optional[List[float]] = None,
         context_id: Optional[str] = None,
         continue_: bool = False,
+        flush: bool = False,
         duration: Optional[int] = None,
         language: Optional[str] = None,
         add_timestamps: bool = False,
@@ -60,6 +61,7 @@ class _AsyncTTSContext:
             voice_embedding: The embedding of the voice to use for generating audio.
             context_id: The context ID to use for the request. If not specified, a random context ID will be generated.
             continue_: Whether to continue the audio generation from the previous transcript or not.
+            flush: Whether to trigger a manual flush for the current context's generation.
             duration: The duration of the audio in seconds.
             language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`.
             add_timestamps: Whether to return word-level timestamps.
@@ -71,7 +73,7 @@ class _AsyncTTSContext:
         """
         if context_id is not None and context_id != self._context_id:
             raise ValueError("Context ID does not match the context ID of the current context.")
-        if continue_ and transcript == "":
+        if continue_ and transcript == "" and not flush:
             raise ValueError("Transcript cannot be empty when continue_ is True.")
         await self._websocket.connect()
@@ -87,6 +89,7 @@ class _AsyncTTSContext:
             context_id=self._context_id,
             add_timestamps=add_timestamps,
             continue_=continue_,
+            flush=flush,
             _experimental_voice_controls=_experimental_voice_controls,
         )
@@ -100,12 +103,49 @@ class _AsyncTTSContext:
         await self.send(
             model_id=DEFAULT_MODEL_ID,
             transcript="",
-            output_format=TTS.get_output_format("raw_pcm_f32le_44100"),
+            output_format=TTS.get_output_format(DEFAULT_OUTPUT_FORMAT),
             voice_embedding=DEFAULT_VOICE_EMBEDDING,  # Default voice embedding since it's a required input for now.
             context_id=self._context_id,
             continue_=False,
         )
+    async def flush(self) -> Callable[[], AsyncGenerator[Dict[str, Any], None]]:
+        """Trigger a manual flush for the current context's generation. This method returns a generator that yields the audio prior to the flush."""
+        await self.send(
+            model_id=DEFAULT_MODEL_ID,
+            transcript="",
+            output_format=TTS.get_output_format(DEFAULT_OUTPUT_FORMAT),
+            voice_embedding=DEFAULT_VOICE_EMBEDDING,  # Default voice embedding since it's a required input for now.
+            context_id=self._context_id,
+            continue_=True,
+            flush=True,
+        )
+        # Save the old flush ID
+        flush_id = len(self._websocket._context_queues[self._context_id]) - 1
+        # Create a new Async Queue to store the responses for the new flush ID
+        self._websocket._context_queues[self._context_id].append(asyncio.Queue())
+        # Return the generator for the old flush ID
+        async def generator():
+            try:
+                while True:
+                    response = await self._websocket._get_message(
+                        self._context_id, timeout=self.timeout, flush_id=flush_id
+                    )
+                    if "error" in response:
+                        raise RuntimeError(f"Error generating audio:\n{response['error']}")
+                    if response.get("flush_done") or response["done"]:
+                        break
+                    yield self._websocket._convert_response(response, include_context_id=True)
+            except Exception as e:
+                if isinstance(e, asyncio.TimeoutError):
+                    raise RuntimeError("Timeout while waiting for audio chunk")
+                raise RuntimeError(f"Failed to generate audio:\n{e}")
+        return generator
     async def receive(self) -> AsyncGenerator[Dict[str, Any], None]:
         """Receive the audio chunks from the WebSocket. This method is a generator that yields audio chunks.
@@ -175,7 +215,7 @@ class _AsyncWebSocket(_WebSocket):
         self.timeout = timeout
         self._get_session = get_session
         self.websocket = None
-        self._context_queues: Dict[str, asyncio.Queue] = {}
+        self._context_queues: Dict[str, List[asyncio.Queue]] = {}
         self._processing_task: asyncio.Task = None
     def __del__(self):
@@ -213,7 +253,7 @@ class _AsyncWebSocket(_WebSocket):
             except asyncio.CancelledError:
                 pass
             except TypeError as e:
-                # Ignore the error if the task is already cancelled
+                # Ignore the error if the task is already canceled.
                 # For some reason we are getting None responses
                 # TODO: This needs to be fixed - we need to think about why we are getting None responses.
                 if "Received message 256:None" not in str(e):
@@ -284,16 +324,23 @@ class _AsyncWebSocket(_WebSocket):
                 response = await self.websocket.receive_json()
                 if response["context_id"]:
                     context_id = response["context_id"]
+                flush_id = response.get("flush_id", -1)
                 if context_id in self._context_queues:
-                    await self._context_queues[context_id].put(response)
+                    await self._context_queues[context_id][flush_id].put(response)
         except Exception as e:
             self._error = e
             raise e
-    async def _get_message(self, context_id: str, timeout: float) -> Dict[str, Any]:
+    async def _get_message(
+        self, context_id: str, timeout: float, flush_id: Optional[int] = -1
+    ) -> Dict[str, Any]:
         if context_id not in self._context_queues:
             raise ValueError(f"Context ID {context_id} not found.")
-        return await asyncio.wait_for(self._context_queues[context_id].get(), timeout=timeout)
+        if len(self._context_queues[context_id]) <= flush_id:
+            raise ValueError(f"Flush ID {flush_id} not found for context ID {context_id}.")
+        return await asyncio.wait_for(
+            self._context_queues[context_id][flush_id].get(), timeout=timeout
+        )
     def _remove_context(self, context_id: str):
         if context_id in self._context_queues:
@@ -309,5 +356,5 @@ class _AsyncWebSocket(_WebSocket):
         if context_id is None:
             context_id = str(uuid.uuid4())
         if context_id not in self._context_queues:
-            self._context_queues[context_id] = asyncio.Queue()
+            self._context_queues[context_id] = [asyncio.Queue()]
         return _AsyncTTSContext(context_id, self, self.timeout)

cartesia/_constants.py CHANGED Viewed

@@ -2,6 +2,7 @@ DEFAULT_MODEL_ID = "sonic-english"  # latest default model
 MULTILINGUAL_MODEL_ID = "sonic-multilingual"  # latest multilingual model
 DEFAULT_BASE_URL = "api.cartesia.ai"
 DEFAULT_CARTESIA_VERSION = "2024-06-10"  # latest version
+DEFAULT_OUTPUT_FORMAT = "raw_pcm_f32le_44100"
 DEFAULT_TIMEOUT = 30  # seconds
 DEFAULT_NUM_CONNECTIONS = 10  # connections per client
 DEFAULT_VOICE_EMBEDDING = [1.0] * 192

cartesia/_websocket.py CHANGED Viewed

@@ -239,7 +239,7 @@ class _WebSocket:
             self._contexts.clear()
     def _convert_response(
-        self, response: Dict[str, any], include_context_id: bool
+        self, response: Dict[str, any], include_context_id: bool, include_flush_id: bool = False
     ) -> Dict[str, Any]:
         out = {}
         if response["type"] == EventType.AUDIO:
@@ -250,6 +250,9 @@ class _WebSocket:
         if include_context_id:
             out["context_id"] = response["context_id"]
+        if include_flush_id and "flush_id" in response:
+            out["flush_id"] = response["flush_id"]
         return out
     def send(

cartesia/utils/tts.py CHANGED Viewed

@@ -37,6 +37,7 @@ def _construct_tts_request(
     add_timestamps: bool = False,
     context_id: Optional[str] = None,
     continue_: bool = False,
+    flush: bool = False,
     _experimental_voice_controls: Optional[VoiceControls] = None,
 ):
     tts_request = {
@@ -71,4 +72,7 @@ def _construct_tts_request(
     if continue_:
         tts_request["continue"] = continue_
+    if flush:
+        tts_request["flush"] = flush
     return tts_request

cartesia/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.0"
1	+ __version__ = "1.3.0"

{cartesia-1.2.0.dist-info → cartesia-1.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,15 +1,14 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.3
 Name: cartesia
-Version: 1.2.0
+Version: 1.3.0
 Summary: The official Python library for the Cartesia API.
 Requires-Python: >=3.9
+Requires-Dist: aiohttp>=3.10.10
+Requires-Dist: httpx>=0.27.2
+Requires-Dist: iterators>=0.2.0
+Requires-Dist: requests>=2.32.3
+Requires-Dist: websockets>=10.4
 Description-Content-Type: text/markdown
-License-File: LICENSE.md
-Requires-Dist: aiohttp >=3.10.10
-Requires-Dist: httpx >=0.27.2
-Requires-Dist: iterators >=0.2.0
-Requires-Dist: requests >=2.32.3
-Requires-Dist: websockets >=13.1
 # Cartesia Python API Library

{cartesia-1.2.0.dist-info → cartesia-1.3.0.dist-info}/RECORD RENAMED Viewed

@@ -1,24 +1,23 @@
 cartesia/__init__.py,sha256=rS7jIg4iqT0VgnwjzYK25JXxnF5hjZGE_-PGynAqHFo,126
 cartesia/_async_sse.py,sha256=76oIvstzVcWZCbcD8Ps419k1FEHF6lOB5qoHwawvj9k,3327
-cartesia/_async_websocket.py,sha256=Gy0nK3g2HKIBwh-PP1AunEBj83kgFpTGCvrq6tnwg9c,12515
-cartesia/_constants.py,sha256=lquaYIg7IThdmC1fCklnWC8EM7stbSeVCDwRqCzPq-U,389
+cartesia/_async_websocket.py,sha256=y9YL9fU8eLENZZECJUwRBVTfEx4ZMl96Y5zHaRY2BiI,14787
+cartesia/_constants.py,sha256=khGNVpiQVDmv1oZU7pKTd9C1AHjiaM8zQ2He9d5zI_c,435
 cartesia/_logger.py,sha256=vU7QiGSy_AJuJFmClUocqIJ-Ltku_8C24ZU8L6fLJR0,53
 cartesia/_sse.py,sha256=CugabGUAUM-N2BruxNFxDB20HyxDlRdbN-J_yAzvBMY,5667
 cartesia/_types.py,sha256=gixQbKbX-H8xbD7jxHmc02KXLyjEaup19lh_57_YBl8,2570
-cartesia/_websocket.py,sha256=CpqkShdl4qBjCGMR8s6dEBHK0LJxkrG-FjbPLhjOP-U,14735
+cartesia/_websocket.py,sha256=nRCq9xB0T9yYHoLqtn0GsJmcap-OAlJdSIrzTl40qMI,14875
 cartesia/async_client.py,sha256=y_K_Yuv0weA4k9ZYD0M9bNM3x3frsq07tqkg7R9h0-o,2714
 cartesia/async_tts.py,sha256=IbWVRKklNClXASR6ylHaukcMRR304LUguqc4yMopbDU,2076
 cartesia/client.py,sha256=OS1ORUSlR8Jg-em1imeTAFfwkC85AQFnw8PYtTdUuC8,2364
 cartesia/resource.py,sha256=wpnB3IPcTdxYSp0vxSkpntp4NSvqvnwUWF-0ZpgWV9o,1585
 cartesia/tts.py,sha256=kWvqce9K3gZ4QrWD-ciYdK29n49SNkxhd2A7ueTOwMY,4878
-cartesia/version.py,sha256=MpAT5hgNoHnTtG1XRD_GV_A7QrHVU6vJjGSw_8qMGA4,22
+cartesia/version.py,sha256=F5mW07pSyGrqDNY2Ehr-UpDzpBtN-FsYU0QGZWf6PJE,22
 cartesia/voices.py,sha256=bDYbs0KoikAROJlmbnLdo4TrW0YwzjMvp70uKG6Alp0,7180
 cartesia/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cartesia/utils/deprecated.py,sha256=2cXvGtrxhPeUZA5LWy2n_U5OFLDv7SHeFtzqhjSJGyk,1674
 cartesia/utils/retry.py,sha256=O6fyVWpH9Su8c0Fwupl57xMt6JrwJ52txBwP3faUL7k,3339
-cartesia/utils/tts.py,sha256=7tJmdyOYwe2QIav5d1UZxhpbcHaYqf7A77bBOlb4U_g,2100
-cartesia-1.2.0.dist-info/LICENSE.md,sha256=PT2YG5wEtEX1TNDn5sXkUXqbn-neyr7cZenTxd40ql4,1074
-cartesia-1.2.0.dist-info/METADATA,sha256=XkVlNno4gSjSecAC0fBIqcvRP_YUAYs6D6dzIdk-c7w,21006
-cartesia-1.2.0.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
-cartesia-1.2.0.dist-info/top_level.txt,sha256=rTX4HnnCegMxl1FK9czpVC7GAvf3SwDzPG65qP-BS4w,9
-cartesia-1.2.0.dist-info/RECORD,,
+cartesia/utils/tts.py,sha256=TbvBZqHR6LxPim6s5RyGiURi4hIfqWt3KUk5QYOOhfc,2177
+cartesia-1.3.0.dist-info/METADATA,sha256=eedG5B4V6MxvDuPUMdYwp6UHrX6yQ6dJTMRRZxq1-UA,20976
+cartesia-1.3.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
+cartesia-1.3.0.dist-info/licenses/LICENSE.md,sha256=PT2YG5wEtEX1TNDn5sXkUXqbn-neyr7cZenTxd40ql4,1074
+cartesia-1.3.0.dist-info/RECORD,,

{cartesia-1.2.0.dist-info → cartesia-1.3.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,4 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.5.0)
+Generator: hatchling 1.26.3
 Root-Is-Purelib: true
 Tag: py3-none-any

cartesia-1.2.0.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- cartesia

{cartesia-1.2.0.dist-info → cartesia-1.3.0.dist-info/licenses}/LICENSE.md RENAMED Viewed

File without changes

cartesia 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

cartesia 1.2.0py3-none-any.whl → 1.3.0py3-none-any.whl