PyPI - cartesia - Versions diffs - 1.0.9__tar.gz → 1.0.11__tar.gz - Mend

cartesia 1.0.9tar.gz → 1.0.11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{cartesia-1.0.9 → cartesia-1.0.11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 1.0.9
+Version: 1.0.11
 Summary: The official Python library for the Cartesia API.
 Home-page:
 Author: Cartesia, Inc.

{cartesia-1.0.9 → cartesia-1.0.11}/cartesia/_types.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import List, TypedDict
+from typing import List, TypedDict, Union
 from cartesia.utils.deprecated import deprecated
@@ -86,7 +86,7 @@ class VoiceControls(TypedDict):
         This is an experimental class and is subject to rapid change in future versions.
     """
-    speed: str = ""
+    speed: Union[str, float] = ""
     emotion: List[str] = []

{cartesia-1.0.9 → cartesia-1.0.11}/cartesia/client.py RENAMED Viewed

@@ -31,6 +31,9 @@ try:
 except ImportError:
     IS_WEBSOCKET_SYNC_AVAILABLE = False
+from iterators import TimeoutIterator
+from websockets.sync.client import connect
 from cartesia._types import (
     DeprecatedOutputFormatMapping,
     EventType,
@@ -40,8 +43,6 @@ from cartesia._types import (
     VoiceMetadata,
 )
 from cartesia.utils.retry import retry_on_connection_error, retry_on_connection_error_async
-from iterators import TimeoutIterator
-from websockets.sync.client import connect
 DEFAULT_MODEL_ID = "sonic-english"  # latest default model
 MULTILINGUAL_MODEL_ID = "sonic-multilingual"  # latest multilingual model
@@ -293,6 +294,7 @@ class _TTSContext:
         context_id: Optional[str] = None,
         duration: Optional[int] = None,
         language: Optional[str] = None,
+        add_timestamps: bool = False,
         _experimental_voice_controls: Optional[VoiceControls] = None,
     ) -> Generator[bytes, None, None]:
         """Send audio generation requests to the WebSocket and yield responses.
@@ -306,6 +308,7 @@ class _TTSContext:
             context_id: The context ID to use for the request. If not specified, a random context ID will be generated.
             duration: The duration of the audio in seconds.
             language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`
+            add_timestamps: Whether to return word-level timestamps.
             _experimental_voice_controls: Experimental voice controls for controlling speed and emotion.
                 Note: This is an experimental feature and may change rapidly in future releases.
@@ -340,6 +343,7 @@ class _TTSContext:
             },
             "context_id": self._context_id,
             "language": language,
+            "add_timestamps": add_timestamps,
         }
         if duration is not None:

cartesia-1.0.11/cartesia/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "1.0.11"

{cartesia-1.0.9 → cartesia-1.0.11}/cartesia.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 1.0.9
+Version: 1.0.11
 Summary: The official Python library for the Cartesia API.
 Home-page:
 Author: Cartesia, Inc.

{cartesia-1.0.9 → cartesia-1.0.11}/tests/test_deprecated.py RENAMED Viewed

@@ -1,7 +1,7 @@
-import cartesia as Cartesia
+from packaging.version import Version
 import cartesia.version as version
 from cartesia.utils.deprecated import _DEPRECATED_FUNCTION_STATS
-from packaging.version import Version
 def test_deprecated_to_remove_by_version():

{cartesia-1.0.9 → cartesia-1.0.11}/tests/test_tts.py RENAMED Viewed

@@ -14,6 +14,7 @@ from typing import AsyncGenerator, Generator, List
 import numpy as np
 import pytest
 from cartesia import AsyncCartesia, Cartesia
 from cartesia._types import VoiceControls, VoiceMetadata
 from cartesia.client import DEFAULT_MODEL_ID, MULTILINGUAL_MODEL_ID
@@ -25,6 +26,7 @@ RESOURCES_DIR = os.path.join(THISDIR, "resources")
 SAMPLE_VOICE = "Newsman"
 SAMPLE_VOICE_ID = "d46abd1d-2d02-43e8-819f-51fb652c1c61"
 EXPERIMENTAL_VOICE_CONTROLS = {"emotion": ["anger:high", "positivity:low"], "speed": "fastest"}
+EXPERIMENTAL_VOICE_CONTROLS_2 = {"speed": 0.4}
 logger = logging.getLogger(__name__)
@@ -102,7 +104,7 @@ def test_create_voice(client: Cartesia):
     assert voice in voices
 @pytest.mark.parametrize("stream", [True, False])
-@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
+@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
 def test_sse_send(resources: _Resources, stream: bool, _experimental_voice_controls: VoiceControls):
     logger.info("Testing SSE send")
     client = resources.client
@@ -139,7 +141,7 @@ def test_sse_send_with_model_id(resources: _Resources, stream: bool):
         assert isinstance(out["audio"], bytes)
 @pytest.mark.parametrize("stream", [True, False])
-@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
+@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
 def test_websocket_send(resources: _Resources, stream: bool, _experimental_voice_controls: VoiceControls):
     logger.info("Testing WebSocket send")
     client = resources.client
@@ -188,8 +190,7 @@ def test_websocket_send_timestamps(resources: _Resources, stream: bool):
     ws.close()
-@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
+@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
 def test_sse_send_context_manager(resources: _Resources, _experimental_voice_controls: VoiceControls):
     logger.info("Testing SSE send context manager")
     transcript = "Hello, world! I'\''m generating audio on Cartesia."
@@ -255,7 +256,7 @@ def test_websocket_send_context_manage_err(resources: _Resources):
         pass
 @pytest.mark.asyncio
-@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
+@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
 async def test_async_sse_send( resources: _Resources, _experimental_voice_controls: VoiceControls):
     logger.info("Testing async SSE send")
     transcript = "Hello, world! I'\''m generating audio on Cartesia."
@@ -276,7 +277,7 @@ async def test_async_sse_send( resources: _Resources, _experimental_voice_contro
         await async_client.close()
 @pytest.mark.asyncio
-@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
+@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
 async def test_async_websocket_send(resources: _Resources,  _experimental_voice_controls: VoiceControls):
     logger.info("Testing async WebSocket send")
     transcript = "Hello, world! I'\''m generating audio on Cartesia."
@@ -454,6 +455,28 @@ def test_sync_continuation_websocket_context_send():
             assert isinstance(out["audio"], bytes)
     finally:
         ws.close()
+def test_sync_context_send_timestamps(resources: _Resources):
+    logger.info("Testing WebSocket send")
+    client = resources.client
+    transcripts = ["Hello, world!", "I'\''m generating audio on Cartesia."]
+    ws = client.tts.websocket()
+    ctx = ws.context()
+    output_generate = ctx.send(transcript=chunk_generator(transcripts), voice_id=SAMPLE_VOICE_ID, output_format={
+        "container": "raw",
+        "encoding": "pcm_f32le",
+        "sample_rate": 44100
+    }, model_id=DEFAULT_MODEL_ID, add_timestamps=True)
+    has_wordtimestamps = False
+    for out in output_generate:
+        has_wordtimestamps |= "word_timestamps" in out
+        _validate_schema(out)
+    assert has_wordtimestamps, "No word timestamps found"
+    ws.close()
 @pytest.mark.asyncio
 async def test_continuation_websocket_context_send():