cartesia 1.2.0__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cartesia/_async_websocket.py +56 -9
- cartesia/_constants.py +1 -0
- cartesia/_websocket.py +4 -1
- cartesia/utils/tts.py +4 -0
- cartesia/version.py +1 -1
- {cartesia-1.2.0.dist-info → cartesia-1.3.1.dist-info}/METADATA +7 -8
- {cartesia-1.2.0.dist-info → cartesia-1.3.1.dist-info}/RECORD +9 -10
- {cartesia-1.2.0.dist-info → cartesia-1.3.1.dist-info}/WHEEL +1 -2
- cartesia-1.2.0.dist-info/top_level.txt +0 -1
- {cartesia-1.2.0.dist-info → cartesia-1.3.1.dist-info/licenses}/LICENSE.md +0 -0
cartesia/_async_websocket.py
CHANGED
@@ -6,7 +6,7 @@ from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Union
|
|
6
6
|
|
7
7
|
import aiohttp
|
8
8
|
|
9
|
-
from cartesia._constants import DEFAULT_MODEL_ID, DEFAULT_VOICE_EMBEDDING
|
9
|
+
from cartesia._constants import DEFAULT_MODEL_ID, DEFAULT_OUTPUT_FORMAT, DEFAULT_VOICE_EMBEDDING
|
10
10
|
from cartesia._types import OutputFormat, VoiceControls
|
11
11
|
from cartesia._websocket import _WebSocket
|
12
12
|
from cartesia.tts import TTS
|
@@ -45,6 +45,7 @@ class _AsyncTTSContext:
|
|
45
45
|
voice_embedding: Optional[List[float]] = None,
|
46
46
|
context_id: Optional[str] = None,
|
47
47
|
continue_: bool = False,
|
48
|
+
flush: bool = False,
|
48
49
|
duration: Optional[int] = None,
|
49
50
|
language: Optional[str] = None,
|
50
51
|
add_timestamps: bool = False,
|
@@ -60,6 +61,7 @@ class _AsyncTTSContext:
|
|
60
61
|
voice_embedding: The embedding of the voice to use for generating audio.
|
61
62
|
context_id: The context ID to use for the request. If not specified, a random context ID will be generated.
|
62
63
|
continue_: Whether to continue the audio generation from the previous transcript or not.
|
64
|
+
flush: Whether to trigger a manual flush for the current context's generation.
|
63
65
|
duration: The duration of the audio in seconds.
|
64
66
|
language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`.
|
65
67
|
add_timestamps: Whether to return word-level timestamps.
|
@@ -71,7 +73,7 @@ class _AsyncTTSContext:
|
|
71
73
|
"""
|
72
74
|
if context_id is not None and context_id != self._context_id:
|
73
75
|
raise ValueError("Context ID does not match the context ID of the current context.")
|
74
|
-
if continue_ and transcript == "":
|
76
|
+
if continue_ and transcript == "" and not flush:
|
75
77
|
raise ValueError("Transcript cannot be empty when continue_ is True.")
|
76
78
|
|
77
79
|
await self._websocket.connect()
|
@@ -87,6 +89,7 @@ class _AsyncTTSContext:
|
|
87
89
|
context_id=self._context_id,
|
88
90
|
add_timestamps=add_timestamps,
|
89
91
|
continue_=continue_,
|
92
|
+
flush=flush,
|
90
93
|
_experimental_voice_controls=_experimental_voice_controls,
|
91
94
|
)
|
92
95
|
|
@@ -100,12 +103,49 @@ class _AsyncTTSContext:
|
|
100
103
|
await self.send(
|
101
104
|
model_id=DEFAULT_MODEL_ID,
|
102
105
|
transcript="",
|
103
|
-
output_format=TTS.get_output_format(
|
106
|
+
output_format=TTS.get_output_format(DEFAULT_OUTPUT_FORMAT),
|
104
107
|
voice_embedding=DEFAULT_VOICE_EMBEDDING, # Default voice embedding since it's a required input for now.
|
105
108
|
context_id=self._context_id,
|
106
109
|
continue_=False,
|
107
110
|
)
|
108
111
|
|
112
|
+
async def flush(self) -> Callable[[], AsyncGenerator[Dict[str, Any], None]]:
|
113
|
+
"""Trigger a manual flush for the current context's generation. This method returns a generator that yields the audio prior to the flush."""
|
114
|
+
await self.send(
|
115
|
+
model_id=DEFAULT_MODEL_ID,
|
116
|
+
transcript="",
|
117
|
+
output_format=TTS.get_output_format(DEFAULT_OUTPUT_FORMAT),
|
118
|
+
voice_embedding=DEFAULT_VOICE_EMBEDDING, # Default voice embedding since it's a required input for now.
|
119
|
+
context_id=self._context_id,
|
120
|
+
continue_=True,
|
121
|
+
flush=True,
|
122
|
+
)
|
123
|
+
|
124
|
+
# Save the old flush ID
|
125
|
+
flush_id = len(self._websocket._context_queues[self._context_id]) - 1
|
126
|
+
|
127
|
+
# Create a new Async Queue to store the responses for the new flush ID
|
128
|
+
self._websocket._context_queues[self._context_id].append(asyncio.Queue())
|
129
|
+
|
130
|
+
# Return the generator for the old flush ID
|
131
|
+
async def generator():
|
132
|
+
try:
|
133
|
+
while True:
|
134
|
+
response = await self._websocket._get_message(
|
135
|
+
self._context_id, timeout=self.timeout, flush_id=flush_id
|
136
|
+
)
|
137
|
+
if "error" in response:
|
138
|
+
raise RuntimeError(f"Error generating audio:\n{response['error']}")
|
139
|
+
if response.get("flush_done") or response["done"]:
|
140
|
+
break
|
141
|
+
yield self._websocket._convert_response(response, include_context_id=True)
|
142
|
+
except Exception as e:
|
143
|
+
if isinstance(e, asyncio.TimeoutError):
|
144
|
+
raise RuntimeError("Timeout while waiting for audio chunk")
|
145
|
+
raise RuntimeError(f"Failed to generate audio:\n{e}")
|
146
|
+
|
147
|
+
return generator
|
148
|
+
|
109
149
|
async def receive(self) -> AsyncGenerator[Dict[str, Any], None]:
|
110
150
|
"""Receive the audio chunks from the WebSocket. This method is a generator that yields audio chunks.
|
111
151
|
|
@@ -175,7 +215,7 @@ class _AsyncWebSocket(_WebSocket):
|
|
175
215
|
self.timeout = timeout
|
176
216
|
self._get_session = get_session
|
177
217
|
self.websocket = None
|
178
|
-
self._context_queues: Dict[str, asyncio.Queue] = {}
|
218
|
+
self._context_queues: Dict[str, List[asyncio.Queue]] = {}
|
179
219
|
self._processing_task: asyncio.Task = None
|
180
220
|
|
181
221
|
def __del__(self):
|
@@ -213,7 +253,7 @@ class _AsyncWebSocket(_WebSocket):
|
|
213
253
|
except asyncio.CancelledError:
|
214
254
|
pass
|
215
255
|
except TypeError as e:
|
216
|
-
# Ignore the error if the task is already
|
256
|
+
# Ignore the error if the task is already canceled.
|
217
257
|
# For some reason we are getting None responses
|
218
258
|
# TODO: This needs to be fixed - we need to think about why we are getting None responses.
|
219
259
|
if "Received message 256:None" not in str(e):
|
@@ -284,16 +324,23 @@ class _AsyncWebSocket(_WebSocket):
|
|
284
324
|
response = await self.websocket.receive_json()
|
285
325
|
if response["context_id"]:
|
286
326
|
context_id = response["context_id"]
|
327
|
+
flush_id = response.get("flush_id", -1)
|
287
328
|
if context_id in self._context_queues:
|
288
|
-
await self._context_queues[context_id].put(response)
|
329
|
+
await self._context_queues[context_id][flush_id].put(response)
|
289
330
|
except Exception as e:
|
290
331
|
self._error = e
|
291
332
|
raise e
|
292
333
|
|
293
|
-
async def _get_message(
|
334
|
+
async def _get_message(
|
335
|
+
self, context_id: str, timeout: float, flush_id: Optional[int] = -1
|
336
|
+
) -> Dict[str, Any]:
|
294
337
|
if context_id not in self._context_queues:
|
295
338
|
raise ValueError(f"Context ID {context_id} not found.")
|
296
|
-
|
339
|
+
if len(self._context_queues[context_id]) <= flush_id:
|
340
|
+
raise ValueError(f"Flush ID {flush_id} not found for context ID {context_id}.")
|
341
|
+
return await asyncio.wait_for(
|
342
|
+
self._context_queues[context_id][flush_id].get(), timeout=timeout
|
343
|
+
)
|
297
344
|
|
298
345
|
def _remove_context(self, context_id: str):
|
299
346
|
if context_id in self._context_queues:
|
@@ -309,5 +356,5 @@ class _AsyncWebSocket(_WebSocket):
|
|
309
356
|
if context_id is None:
|
310
357
|
context_id = str(uuid.uuid4())
|
311
358
|
if context_id not in self._context_queues:
|
312
|
-
self._context_queues[context_id] = asyncio.Queue()
|
359
|
+
self._context_queues[context_id] = [asyncio.Queue()]
|
313
360
|
return _AsyncTTSContext(context_id, self, self.timeout)
|
cartesia/_constants.py
CHANGED
@@ -2,6 +2,7 @@ DEFAULT_MODEL_ID = "sonic-english" # latest default model
|
|
2
2
|
MULTILINGUAL_MODEL_ID = "sonic-multilingual" # latest multilingual model
|
3
3
|
DEFAULT_BASE_URL = "api.cartesia.ai"
|
4
4
|
DEFAULT_CARTESIA_VERSION = "2024-06-10" # latest version
|
5
|
+
DEFAULT_OUTPUT_FORMAT = "raw_pcm_f32le_44100"
|
5
6
|
DEFAULT_TIMEOUT = 30 # seconds
|
6
7
|
DEFAULT_NUM_CONNECTIONS = 10 # connections per client
|
7
8
|
DEFAULT_VOICE_EMBEDDING = [1.0] * 192
|
cartesia/_websocket.py
CHANGED
@@ -239,7 +239,7 @@ class _WebSocket:
|
|
239
239
|
self._contexts.clear()
|
240
240
|
|
241
241
|
def _convert_response(
|
242
|
-
self, response: Dict[str, any], include_context_id: bool
|
242
|
+
self, response: Dict[str, any], include_context_id: bool, include_flush_id: bool = False
|
243
243
|
) -> Dict[str, Any]:
|
244
244
|
out = {}
|
245
245
|
if response["type"] == EventType.AUDIO:
|
@@ -250,6 +250,9 @@ class _WebSocket:
|
|
250
250
|
if include_context_id:
|
251
251
|
out["context_id"] = response["context_id"]
|
252
252
|
|
253
|
+
if include_flush_id and "flush_id" in response:
|
254
|
+
out["flush_id"] = response["flush_id"]
|
255
|
+
|
253
256
|
return out
|
254
257
|
|
255
258
|
def send(
|
cartesia/utils/tts.py
CHANGED
@@ -37,6 +37,7 @@ def _construct_tts_request(
|
|
37
37
|
add_timestamps: bool = False,
|
38
38
|
context_id: Optional[str] = None,
|
39
39
|
continue_: bool = False,
|
40
|
+
flush: bool = False,
|
40
41
|
_experimental_voice_controls: Optional[VoiceControls] = None,
|
41
42
|
):
|
42
43
|
tts_request = {
|
@@ -71,4 +72,7 @@ def _construct_tts_request(
|
|
71
72
|
if continue_:
|
72
73
|
tts_request["continue"] = continue_
|
73
74
|
|
75
|
+
if flush:
|
76
|
+
tts_request["flush"] = flush
|
77
|
+
|
74
78
|
return tts_request
|
cartesia/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.
|
1
|
+
__version__ = "1.3.1"
|
@@ -1,15 +1,14 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.3
|
2
2
|
Name: cartesia
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.3.1
|
4
4
|
Summary: The official Python library for the Cartesia API.
|
5
5
|
Requires-Python: >=3.9
|
6
|
+
Requires-Dist: aiohttp>=3.10.10
|
7
|
+
Requires-Dist: httpx>=0.27.0
|
8
|
+
Requires-Dist: iterators>=0.2.0
|
9
|
+
Requires-Dist: requests>=2.31.0
|
10
|
+
Requires-Dist: websockets>=10.4
|
6
11
|
Description-Content-Type: text/markdown
|
7
|
-
License-File: LICENSE.md
|
8
|
-
Requires-Dist: aiohttp >=3.10.10
|
9
|
-
Requires-Dist: httpx >=0.27.2
|
10
|
-
Requires-Dist: iterators >=0.2.0
|
11
|
-
Requires-Dist: requests >=2.32.3
|
12
|
-
Requires-Dist: websockets >=13.1
|
13
12
|
|
14
13
|
# Cartesia Python API Library
|
15
14
|
|
@@ -1,24 +1,23 @@
|
|
1
1
|
cartesia/__init__.py,sha256=rS7jIg4iqT0VgnwjzYK25JXxnF5hjZGE_-PGynAqHFo,126
|
2
2
|
cartesia/_async_sse.py,sha256=76oIvstzVcWZCbcD8Ps419k1FEHF6lOB5qoHwawvj9k,3327
|
3
|
-
cartesia/_async_websocket.py,sha256=
|
4
|
-
cartesia/_constants.py,sha256=
|
3
|
+
cartesia/_async_websocket.py,sha256=y9YL9fU8eLENZZECJUwRBVTfEx4ZMl96Y5zHaRY2BiI,14787
|
4
|
+
cartesia/_constants.py,sha256=khGNVpiQVDmv1oZU7pKTd9C1AHjiaM8zQ2He9d5zI_c,435
|
5
5
|
cartesia/_logger.py,sha256=vU7QiGSy_AJuJFmClUocqIJ-Ltku_8C24ZU8L6fLJR0,53
|
6
6
|
cartesia/_sse.py,sha256=CugabGUAUM-N2BruxNFxDB20HyxDlRdbN-J_yAzvBMY,5667
|
7
7
|
cartesia/_types.py,sha256=gixQbKbX-H8xbD7jxHmc02KXLyjEaup19lh_57_YBl8,2570
|
8
|
-
cartesia/_websocket.py,sha256=
|
8
|
+
cartesia/_websocket.py,sha256=nRCq9xB0T9yYHoLqtn0GsJmcap-OAlJdSIrzTl40qMI,14875
|
9
9
|
cartesia/async_client.py,sha256=y_K_Yuv0weA4k9ZYD0M9bNM3x3frsq07tqkg7R9h0-o,2714
|
10
10
|
cartesia/async_tts.py,sha256=IbWVRKklNClXASR6ylHaukcMRR304LUguqc4yMopbDU,2076
|
11
11
|
cartesia/client.py,sha256=OS1ORUSlR8Jg-em1imeTAFfwkC85AQFnw8PYtTdUuC8,2364
|
12
12
|
cartesia/resource.py,sha256=wpnB3IPcTdxYSp0vxSkpntp4NSvqvnwUWF-0ZpgWV9o,1585
|
13
13
|
cartesia/tts.py,sha256=kWvqce9K3gZ4QrWD-ciYdK29n49SNkxhd2A7ueTOwMY,4878
|
14
|
-
cartesia/version.py,sha256
|
14
|
+
cartesia/version.py,sha256=-ypEJktJToAL9by62JJKWEzDo_KPCQtmE5kwFgX24z4,22
|
15
15
|
cartesia/voices.py,sha256=bDYbs0KoikAROJlmbnLdo4TrW0YwzjMvp70uKG6Alp0,7180
|
16
16
|
cartesia/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
cartesia/utils/deprecated.py,sha256=2cXvGtrxhPeUZA5LWy2n_U5OFLDv7SHeFtzqhjSJGyk,1674
|
18
18
|
cartesia/utils/retry.py,sha256=O6fyVWpH9Su8c0Fwupl57xMt6JrwJ52txBwP3faUL7k,3339
|
19
|
-
cartesia/utils/tts.py,sha256=
|
20
|
-
cartesia-1.
|
21
|
-
cartesia-1.
|
22
|
-
cartesia-1.
|
23
|
-
cartesia-1.
|
24
|
-
cartesia-1.2.0.dist-info/RECORD,,
|
19
|
+
cartesia/utils/tts.py,sha256=TbvBZqHR6LxPim6s5RyGiURi4hIfqWt3KUk5QYOOhfc,2177
|
20
|
+
cartesia-1.3.1.dist-info/METADATA,sha256=EZPPEiwa164rpgnLs4YERYGj47lXKYX1X8bKsuKQ1nc,20976
|
21
|
+
cartesia-1.3.1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
22
|
+
cartesia-1.3.1.dist-info/licenses/LICENSE.md,sha256=PT2YG5wEtEX1TNDn5sXkUXqbn-neyr7cZenTxd40ql4,1074
|
23
|
+
cartesia-1.3.1.dist-info/RECORD,,
|
@@ -1 +0,0 @@
|
|
1
|
-
cartesia
|
File without changes
|