cartesia 1.0.9__tar.gz → 1.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 1.0.9
3
+ Version: 1.0.11
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Home-page:
6
6
  Author: Cartesia, Inc.
@@ -1,4 +1,4 @@
1
- from typing import List, TypedDict
1
+ from typing import List, TypedDict, Union
2
2
 
3
3
  from cartesia.utils.deprecated import deprecated
4
4
 
@@ -86,7 +86,7 @@ class VoiceControls(TypedDict):
86
86
  This is an experimental class and is subject to rapid change in future versions.
87
87
  """
88
88
 
89
- speed: str = ""
89
+ speed: Union[str, float] = ""
90
90
  emotion: List[str] = []
91
91
 
92
92
 
@@ -31,6 +31,9 @@ try:
31
31
  except ImportError:
32
32
  IS_WEBSOCKET_SYNC_AVAILABLE = False
33
33
 
34
+ from iterators import TimeoutIterator
35
+ from websockets.sync.client import connect
36
+
34
37
  from cartesia._types import (
35
38
  DeprecatedOutputFormatMapping,
36
39
  EventType,
@@ -40,8 +43,6 @@ from cartesia._types import (
40
43
  VoiceMetadata,
41
44
  )
42
45
  from cartesia.utils.retry import retry_on_connection_error, retry_on_connection_error_async
43
- from iterators import TimeoutIterator
44
- from websockets.sync.client import connect
45
46
 
46
47
  DEFAULT_MODEL_ID = "sonic-english" # latest default model
47
48
  MULTILINGUAL_MODEL_ID = "sonic-multilingual" # latest multilingual model
@@ -293,6 +294,7 @@ class _TTSContext:
293
294
  context_id: Optional[str] = None,
294
295
  duration: Optional[int] = None,
295
296
  language: Optional[str] = None,
297
+ add_timestamps: bool = False,
296
298
  _experimental_voice_controls: Optional[VoiceControls] = None,
297
299
  ) -> Generator[bytes, None, None]:
298
300
  """Send audio generation requests to the WebSocket and yield responses.
@@ -306,6 +308,7 @@ class _TTSContext:
306
308
  context_id: The context ID to use for the request. If not specified, a random context ID will be generated.
307
309
  duration: The duration of the audio in seconds.
308
310
  language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`
311
+ add_timestamps: Whether to return word-level timestamps.
309
312
  _experimental_voice_controls: Experimental voice controls for controlling speed and emotion.
310
313
  Note: This is an experimental feature and may change rapidly in future releases.
311
314
 
@@ -340,6 +343,7 @@ class _TTSContext:
340
343
  },
341
344
  "context_id": self._context_id,
342
345
  "language": language,
346
+ "add_timestamps": add_timestamps,
343
347
  }
344
348
 
345
349
  if duration is not None:
@@ -0,0 +1 @@
1
+ __version__ = "1.0.11"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 1.0.9
3
+ Version: 1.0.11
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Home-page:
6
6
  Author: Cartesia, Inc.
@@ -1,7 +1,7 @@
1
- import cartesia as Cartesia
1
+ from packaging.version import Version
2
+
2
3
  import cartesia.version as version
3
4
  from cartesia.utils.deprecated import _DEPRECATED_FUNCTION_STATS
4
- from packaging.version import Version
5
5
 
6
6
 
7
7
  def test_deprecated_to_remove_by_version():
@@ -14,6 +14,7 @@ from typing import AsyncGenerator, Generator, List
14
14
 
15
15
  import numpy as np
16
16
  import pytest
17
+
17
18
  from cartesia import AsyncCartesia, Cartesia
18
19
  from cartesia._types import VoiceControls, VoiceMetadata
19
20
  from cartesia.client import DEFAULT_MODEL_ID, MULTILINGUAL_MODEL_ID
@@ -25,6 +26,7 @@ RESOURCES_DIR = os.path.join(THISDIR, "resources")
25
26
  SAMPLE_VOICE = "Newsman"
26
27
  SAMPLE_VOICE_ID = "d46abd1d-2d02-43e8-819f-51fb652c1c61"
27
28
  EXPERIMENTAL_VOICE_CONTROLS = {"emotion": ["anger:high", "positivity:low"], "speed": "fastest"}
29
+ EXPERIMENTAL_VOICE_CONTROLS_2 = {"speed": 0.4}
28
30
 
29
31
  logger = logging.getLogger(__name__)
30
32
 
@@ -102,7 +104,7 @@ def test_create_voice(client: Cartesia):
102
104
  assert voice in voices
103
105
 
104
106
  @pytest.mark.parametrize("stream", [True, False])
105
- @pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
107
+ @pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
106
108
  def test_sse_send(resources: _Resources, stream: bool, _experimental_voice_controls: VoiceControls):
107
109
  logger.info("Testing SSE send")
108
110
  client = resources.client
@@ -139,7 +141,7 @@ def test_sse_send_with_model_id(resources: _Resources, stream: bool):
139
141
  assert isinstance(out["audio"], bytes)
140
142
 
141
143
  @pytest.mark.parametrize("stream", [True, False])
142
- @pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
144
+ @pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
143
145
  def test_websocket_send(resources: _Resources, stream: bool, _experimental_voice_controls: VoiceControls):
144
146
  logger.info("Testing WebSocket send")
145
147
  client = resources.client
@@ -188,8 +190,7 @@ def test_websocket_send_timestamps(resources: _Resources, stream: bool):
188
190
 
189
191
  ws.close()
190
192
 
191
-
192
- @pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
193
+ @pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
193
194
  def test_sse_send_context_manager(resources: _Resources, _experimental_voice_controls: VoiceControls):
194
195
  logger.info("Testing SSE send context manager")
195
196
  transcript = "Hello, world! I'\''m generating audio on Cartesia."
@@ -255,7 +256,7 @@ def test_websocket_send_context_manage_err(resources: _Resources):
255
256
  pass
256
257
 
257
258
  @pytest.mark.asyncio
258
- @pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
259
+ @pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
259
260
  async def test_async_sse_send( resources: _Resources, _experimental_voice_controls: VoiceControls):
260
261
  logger.info("Testing async SSE send")
261
262
  transcript = "Hello, world! I'\''m generating audio on Cartesia."
@@ -276,7 +277,7 @@ async def test_async_sse_send( resources: _Resources, _experimental_voice_contro
276
277
  await async_client.close()
277
278
 
278
279
  @pytest.mark.asyncio
279
- @pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
280
+ @pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
280
281
  async def test_async_websocket_send(resources: _Resources, _experimental_voice_controls: VoiceControls):
281
282
  logger.info("Testing async WebSocket send")
282
283
  transcript = "Hello, world! I'\''m generating audio on Cartesia."
@@ -454,6 +455,28 @@ def test_sync_continuation_websocket_context_send():
454
455
  assert isinstance(out["audio"], bytes)
455
456
  finally:
456
457
  ws.close()
458
+
459
+ def test_sync_context_send_timestamps(resources: _Resources):
460
+ logger.info("Testing WebSocket send")
461
+ client = resources.client
462
+ transcripts = ["Hello, world!", "I'\''m generating audio on Cartesia."]
463
+
464
+ ws = client.tts.websocket()
465
+ ctx = ws.context()
466
+ output_generate = ctx.send(transcript=chunk_generator(transcripts), voice_id=SAMPLE_VOICE_ID, output_format={
467
+ "container": "raw",
468
+ "encoding": "pcm_f32le",
469
+ "sample_rate": 44100
470
+ }, model_id=DEFAULT_MODEL_ID, add_timestamps=True)
471
+
472
+ has_wordtimestamps = False
473
+ for out in output_generate:
474
+ has_wordtimestamps |= "word_timestamps" in out
475
+ _validate_schema(out)
476
+
477
+ assert has_wordtimestamps, "No word timestamps found"
478
+
479
+ ws.close()
457
480
 
458
481
  @pytest.mark.asyncio
459
482
  async def test_continuation_websocket_context_send():
@@ -1 +0,0 @@
1
- __version__ = "1.0.9"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes