cartesia 1.0.9__tar.gz → 1.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cartesia-1.0.9 → cartesia-1.0.11}/PKG-INFO +1 -1
- {cartesia-1.0.9 → cartesia-1.0.11}/cartesia/_types.py +2 -2
- {cartesia-1.0.9 → cartesia-1.0.11}/cartesia/client.py +6 -2
- cartesia-1.0.11/cartesia/version.py +1 -0
- {cartesia-1.0.9 → cartesia-1.0.11}/cartesia.egg-info/PKG-INFO +1 -1
- {cartesia-1.0.9 → cartesia-1.0.11}/tests/test_deprecated.py +2 -2
- {cartesia-1.0.9 → cartesia-1.0.11}/tests/test_tts.py +29 -6
- cartesia-1.0.9/cartesia/version.py +0 -1
- {cartesia-1.0.9 → cartesia-1.0.11}/LICENSE.md +0 -0
- {cartesia-1.0.9 → cartesia-1.0.11}/README.md +0 -0
- {cartesia-1.0.9 → cartesia-1.0.11}/cartesia/__init__.py +0 -0
- {cartesia-1.0.9 → cartesia-1.0.11}/cartesia/utils/__init__.py +0 -0
- {cartesia-1.0.9 → cartesia-1.0.11}/cartesia/utils/deprecated.py +0 -0
- {cartesia-1.0.9 → cartesia-1.0.11}/cartesia/utils/retry.py +0 -0
- {cartesia-1.0.9 → cartesia-1.0.11}/cartesia.egg-info/SOURCES.txt +0 -0
- {cartesia-1.0.9 → cartesia-1.0.11}/cartesia.egg-info/dependency_links.txt +0 -0
- {cartesia-1.0.9 → cartesia-1.0.11}/cartesia.egg-info/requires.txt +0 -0
- {cartesia-1.0.9 → cartesia-1.0.11}/cartesia.egg-info/top_level.txt +0 -0
- {cartesia-1.0.9 → cartesia-1.0.11}/pyproject.toml +0 -0
- {cartesia-1.0.9 → cartesia-1.0.11}/setup.cfg +0 -0
- {cartesia-1.0.9 → cartesia-1.0.11}/setup.py +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import List, TypedDict
|
1
|
+
from typing import List, TypedDict, Union
|
2
2
|
|
3
3
|
from cartesia.utils.deprecated import deprecated
|
4
4
|
|
@@ -86,7 +86,7 @@ class VoiceControls(TypedDict):
|
|
86
86
|
This is an experimental class and is subject to rapid change in future versions.
|
87
87
|
"""
|
88
88
|
|
89
|
-
speed: str = ""
|
89
|
+
speed: Union[str, float] = ""
|
90
90
|
emotion: List[str] = []
|
91
91
|
|
92
92
|
|
@@ -31,6 +31,9 @@ try:
|
|
31
31
|
except ImportError:
|
32
32
|
IS_WEBSOCKET_SYNC_AVAILABLE = False
|
33
33
|
|
34
|
+
from iterators import TimeoutIterator
|
35
|
+
from websockets.sync.client import connect
|
36
|
+
|
34
37
|
from cartesia._types import (
|
35
38
|
DeprecatedOutputFormatMapping,
|
36
39
|
EventType,
|
@@ -40,8 +43,6 @@ from cartesia._types import (
|
|
40
43
|
VoiceMetadata,
|
41
44
|
)
|
42
45
|
from cartesia.utils.retry import retry_on_connection_error, retry_on_connection_error_async
|
43
|
-
from iterators import TimeoutIterator
|
44
|
-
from websockets.sync.client import connect
|
45
46
|
|
46
47
|
DEFAULT_MODEL_ID = "sonic-english" # latest default model
|
47
48
|
MULTILINGUAL_MODEL_ID = "sonic-multilingual" # latest multilingual model
|
@@ -293,6 +294,7 @@ class _TTSContext:
|
|
293
294
|
context_id: Optional[str] = None,
|
294
295
|
duration: Optional[int] = None,
|
295
296
|
language: Optional[str] = None,
|
297
|
+
add_timestamps: bool = False,
|
296
298
|
_experimental_voice_controls: Optional[VoiceControls] = None,
|
297
299
|
) -> Generator[bytes, None, None]:
|
298
300
|
"""Send audio generation requests to the WebSocket and yield responses.
|
@@ -306,6 +308,7 @@ class _TTSContext:
|
|
306
308
|
context_id: The context ID to use for the request. If not specified, a random context ID will be generated.
|
307
309
|
duration: The duration of the audio in seconds.
|
308
310
|
language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`
|
311
|
+
add_timestamps: Whether to return word-level timestamps.
|
309
312
|
_experimental_voice_controls: Experimental voice controls for controlling speed and emotion.
|
310
313
|
Note: This is an experimental feature and may change rapidly in future releases.
|
311
314
|
|
@@ -340,6 +343,7 @@ class _TTSContext:
|
|
340
343
|
},
|
341
344
|
"context_id": self._context_id,
|
342
345
|
"language": language,
|
346
|
+
"add_timestamps": add_timestamps,
|
343
347
|
}
|
344
348
|
|
345
349
|
if duration is not None:
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "1.0.11"
|
@@ -14,6 +14,7 @@ from typing import AsyncGenerator, Generator, List
|
|
14
14
|
|
15
15
|
import numpy as np
|
16
16
|
import pytest
|
17
|
+
|
17
18
|
from cartesia import AsyncCartesia, Cartesia
|
18
19
|
from cartesia._types import VoiceControls, VoiceMetadata
|
19
20
|
from cartesia.client import DEFAULT_MODEL_ID, MULTILINGUAL_MODEL_ID
|
@@ -25,6 +26,7 @@ RESOURCES_DIR = os.path.join(THISDIR, "resources")
|
|
25
26
|
SAMPLE_VOICE = "Newsman"
|
26
27
|
SAMPLE_VOICE_ID = "d46abd1d-2d02-43e8-819f-51fb652c1c61"
|
27
28
|
EXPERIMENTAL_VOICE_CONTROLS = {"emotion": ["anger:high", "positivity:low"], "speed": "fastest"}
|
29
|
+
EXPERIMENTAL_VOICE_CONTROLS_2 = {"speed": 0.4}
|
28
30
|
|
29
31
|
logger = logging.getLogger(__name__)
|
30
32
|
|
@@ -102,7 +104,7 @@ def test_create_voice(client: Cartesia):
|
|
102
104
|
assert voice in voices
|
103
105
|
|
104
106
|
@pytest.mark.parametrize("stream", [True, False])
|
105
|
-
@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
|
107
|
+
@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
|
106
108
|
def test_sse_send(resources: _Resources, stream: bool, _experimental_voice_controls: VoiceControls):
|
107
109
|
logger.info("Testing SSE send")
|
108
110
|
client = resources.client
|
@@ -139,7 +141,7 @@ def test_sse_send_with_model_id(resources: _Resources, stream: bool):
|
|
139
141
|
assert isinstance(out["audio"], bytes)
|
140
142
|
|
141
143
|
@pytest.mark.parametrize("stream", [True, False])
|
142
|
-
@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
|
144
|
+
@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
|
143
145
|
def test_websocket_send(resources: _Resources, stream: bool, _experimental_voice_controls: VoiceControls):
|
144
146
|
logger.info("Testing WebSocket send")
|
145
147
|
client = resources.client
|
@@ -188,8 +190,7 @@ def test_websocket_send_timestamps(resources: _Resources, stream: bool):
|
|
188
190
|
|
189
191
|
ws.close()
|
190
192
|
|
191
|
-
|
192
|
-
@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
|
193
|
+
@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
|
193
194
|
def test_sse_send_context_manager(resources: _Resources, _experimental_voice_controls: VoiceControls):
|
194
195
|
logger.info("Testing SSE send context manager")
|
195
196
|
transcript = "Hello, world! I'\''m generating audio on Cartesia."
|
@@ -255,7 +256,7 @@ def test_websocket_send_context_manage_err(resources: _Resources):
|
|
255
256
|
pass
|
256
257
|
|
257
258
|
@pytest.mark.asyncio
|
258
|
-
@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
|
259
|
+
@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
|
259
260
|
async def test_async_sse_send( resources: _Resources, _experimental_voice_controls: VoiceControls):
|
260
261
|
logger.info("Testing async SSE send")
|
261
262
|
transcript = "Hello, world! I'\''m generating audio on Cartesia."
|
@@ -276,7 +277,7 @@ async def test_async_sse_send( resources: _Resources, _experimental_voice_contro
|
|
276
277
|
await async_client.close()
|
277
278
|
|
278
279
|
@pytest.mark.asyncio
|
279
|
-
@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS])
|
280
|
+
@pytest.mark.parametrize("_experimental_voice_controls", [None, EXPERIMENTAL_VOICE_CONTROLS, EXPERIMENTAL_VOICE_CONTROLS_2])
|
280
281
|
async def test_async_websocket_send(resources: _Resources, _experimental_voice_controls: VoiceControls):
|
281
282
|
logger.info("Testing async WebSocket send")
|
282
283
|
transcript = "Hello, world! I'\''m generating audio on Cartesia."
|
@@ -454,6 +455,28 @@ def test_sync_continuation_websocket_context_send():
|
|
454
455
|
assert isinstance(out["audio"], bytes)
|
455
456
|
finally:
|
456
457
|
ws.close()
|
458
|
+
|
459
|
+
def test_sync_context_send_timestamps(resources: _Resources):
|
460
|
+
logger.info("Testing WebSocket send")
|
461
|
+
client = resources.client
|
462
|
+
transcripts = ["Hello, world!", "I'\''m generating audio on Cartesia."]
|
463
|
+
|
464
|
+
ws = client.tts.websocket()
|
465
|
+
ctx = ws.context()
|
466
|
+
output_generate = ctx.send(transcript=chunk_generator(transcripts), voice_id=SAMPLE_VOICE_ID, output_format={
|
467
|
+
"container": "raw",
|
468
|
+
"encoding": "pcm_f32le",
|
469
|
+
"sample_rate": 44100
|
470
|
+
}, model_id=DEFAULT_MODEL_ID, add_timestamps=True)
|
471
|
+
|
472
|
+
has_wordtimestamps = False
|
473
|
+
for out in output_generate:
|
474
|
+
has_wordtimestamps |= "word_timestamps" in out
|
475
|
+
_validate_schema(out)
|
476
|
+
|
477
|
+
assert has_wordtimestamps, "No word timestamps found"
|
478
|
+
|
479
|
+
ws.close()
|
457
480
|
|
458
481
|
@pytest.mark.asyncio
|
459
482
|
async def test_continuation_websocket_context_send():
|
@@ -1 +0,0 @@
|
|
1
|
-
__version__ = "1.0.9"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|