cartesia 1.0.10__tar.gz → 1.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 1.0.10
3
+ Version: 1.0.11
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Home-page:
6
6
  Author: Cartesia, Inc.
@@ -294,6 +294,7 @@ class _TTSContext:
294
294
  context_id: Optional[str] = None,
295
295
  duration: Optional[int] = None,
296
296
  language: Optional[str] = None,
297
+ add_timestamps: bool = False,
297
298
  _experimental_voice_controls: Optional[VoiceControls] = None,
298
299
  ) -> Generator[bytes, None, None]:
299
300
  """Send audio generation requests to the WebSocket and yield responses.
@@ -307,6 +308,7 @@ class _TTSContext:
307
308
  context_id: The context ID to use for the request. If not specified, a random context ID will be generated.
308
309
  duration: The duration of the audio in seconds.
309
310
  language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`
311
+ add_timestamps: Whether to return word-level timestamps.
310
312
  _experimental_voice_controls: Experimental voice controls for controlling speed and emotion.
311
313
  Note: This is an experimental feature and may change rapidly in future releases.
312
314
 
@@ -341,6 +343,7 @@ class _TTSContext:
341
343
  },
342
344
  "context_id": self._context_id,
343
345
  "language": language,
346
+ "add_timestamps": add_timestamps,
344
347
  }
345
348
 
346
349
  if duration is not None:
@@ -0,0 +1 @@
1
+ __version__ = "1.0.11"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 1.0.10
3
+ Version: 1.0.11
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Home-page:
6
6
  Author: Cartesia, Inc.
@@ -190,7 +190,6 @@ def test_websocket_send_timestamps(resources: _Resources, stream: bool):
190
190
 
191
191
  ws.close()
192
192
 
193
-
194
193
  @pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
195
194
  def test_sse_send_context_manager(resources: _Resources, _experimental_voice_controls: VoiceControls):
196
195
  logger.info("Testing SSE send context manager")
@@ -456,6 +455,28 @@ def test_sync_continuation_websocket_context_send():
456
455
  assert isinstance(out["audio"], bytes)
457
456
  finally:
458
457
  ws.close()
458
+
459
+ def test_sync_context_send_timestamps(resources: _Resources):
460
+ logger.info("Testing WebSocket send")
461
+ client = resources.client
462
+ transcripts = ["Hello, world!", "I'\''m generating audio on Cartesia."]
463
+
464
+ ws = client.tts.websocket()
465
+ ctx = ws.context()
466
+ output_generate = ctx.send(transcript=chunk_generator(transcripts), voice_id=SAMPLE_VOICE_ID, output_format={
467
+ "container": "raw",
468
+ "encoding": "pcm_f32le",
469
+ "sample_rate": 44100
470
+ }, model_id=DEFAULT_MODEL_ID, add_timestamps=True)
471
+
472
+ has_wordtimestamps = False
473
+ for out in output_generate:
474
+ has_wordtimestamps |= "word_timestamps" in out
475
+ _validate_schema(out)
476
+
477
+ assert has_wordtimestamps, "No word timestamps found"
478
+
479
+ ws.close()
459
480
 
460
481
  @pytest.mark.asyncio
461
482
  async def test_continuation_websocket_context_send():
@@ -1 +0,0 @@
1
- __version__ = "1.0.10"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes