cartesia 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Union
6
6
 
7
7
  import aiohttp
8
8
 
9
- from cartesia._constants import DEFAULT_MODEL_ID, DEFAULT_VOICE_EMBEDDING
9
+ from cartesia._constants import DEFAULT_MODEL_ID, DEFAULT_OUTPUT_FORMAT, DEFAULT_VOICE_EMBEDDING
10
10
  from cartesia._types import OutputFormat, VoiceControls
11
11
  from cartesia._websocket import _WebSocket
12
12
  from cartesia.tts import TTS
@@ -45,6 +45,7 @@ class _AsyncTTSContext:
45
45
  voice_embedding: Optional[List[float]] = None,
46
46
  context_id: Optional[str] = None,
47
47
  continue_: bool = False,
48
+ flush: bool = False,
48
49
  duration: Optional[int] = None,
49
50
  language: Optional[str] = None,
50
51
  add_timestamps: bool = False,
@@ -60,6 +61,7 @@ class _AsyncTTSContext:
60
61
  voice_embedding: The embedding of the voice to use for generating audio.
61
62
  context_id: The context ID to use for the request. If not specified, a random context ID will be generated.
62
63
  continue_: Whether to continue the audio generation from the previous transcript or not.
64
+ flush: Whether to trigger a manual flush for the current context's generation.
63
65
  duration: The duration of the audio in seconds.
64
66
  language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`.
65
67
  add_timestamps: Whether to return word-level timestamps.
@@ -71,7 +73,7 @@ class _AsyncTTSContext:
71
73
  """
72
74
  if context_id is not None and context_id != self._context_id:
73
75
  raise ValueError("Context ID does not match the context ID of the current context.")
74
- if continue_ and transcript == "":
76
+ if continue_ and transcript == "" and not flush:
75
77
  raise ValueError("Transcript cannot be empty when continue_ is True.")
76
78
 
77
79
  await self._websocket.connect()
@@ -87,6 +89,7 @@ class _AsyncTTSContext:
87
89
  context_id=self._context_id,
88
90
  add_timestamps=add_timestamps,
89
91
  continue_=continue_,
92
+ flush=flush,
90
93
  _experimental_voice_controls=_experimental_voice_controls,
91
94
  )
92
95
 
@@ -100,12 +103,49 @@ class _AsyncTTSContext:
100
103
  await self.send(
101
104
  model_id=DEFAULT_MODEL_ID,
102
105
  transcript="",
103
- output_format=TTS.get_output_format("raw_pcm_f32le_44100"),
106
+ output_format=TTS.get_output_format(DEFAULT_OUTPUT_FORMAT),
104
107
  voice_embedding=DEFAULT_VOICE_EMBEDDING, # Default voice embedding since it's a required input for now.
105
108
  context_id=self._context_id,
106
109
  continue_=False,
107
110
  )
108
111
 
112
+ async def flush(self) -> Callable[[], AsyncGenerator[Dict[str, Any], None]]:
113
+ """Trigger a manual flush for the current context's generation. This method returns a generator that yields the audio prior to the flush."""
114
+ await self.send(
115
+ model_id=DEFAULT_MODEL_ID,
116
+ transcript="",
117
+ output_format=TTS.get_output_format(DEFAULT_OUTPUT_FORMAT),
118
+ voice_embedding=DEFAULT_VOICE_EMBEDDING, # Default voice embedding since it's a required input for now.
119
+ context_id=self._context_id,
120
+ continue_=True,
121
+ flush=True,
122
+ )
123
+
124
+ # Save the old flush ID
125
+ flush_id = len(self._websocket._context_queues[self._context_id]) - 1
126
+
127
+ # Create a new Async Queue to store the responses for the new flush ID
128
+ self._websocket._context_queues[self._context_id].append(asyncio.Queue())
129
+
130
+ # Return the generator for the old flush ID
131
+ async def generator():
132
+ try:
133
+ while True:
134
+ response = await self._websocket._get_message(
135
+ self._context_id, timeout=self.timeout, flush_id=flush_id
136
+ )
137
+ if "error" in response:
138
+ raise RuntimeError(f"Error generating audio:\n{response['error']}")
139
+ if response.get("flush_done") or response["done"]:
140
+ break
141
+ yield self._websocket._convert_response(response, include_context_id=True)
142
+ except Exception as e:
143
+ if isinstance(e, asyncio.TimeoutError):
144
+ raise RuntimeError("Timeout while waiting for audio chunk")
145
+ raise RuntimeError(f"Failed to generate audio:\n{e}")
146
+
147
+ return generator
148
+
109
149
  async def receive(self) -> AsyncGenerator[Dict[str, Any], None]:
110
150
  """Receive the audio chunks from the WebSocket. This method is a generator that yields audio chunks.
111
151
 
@@ -175,7 +215,7 @@ class _AsyncWebSocket(_WebSocket):
175
215
  self.timeout = timeout
176
216
  self._get_session = get_session
177
217
  self.websocket = None
178
- self._context_queues: Dict[str, asyncio.Queue] = {}
218
+ self._context_queues: Dict[str, List[asyncio.Queue]] = {}
179
219
  self._processing_task: asyncio.Task = None
180
220
 
181
221
  def __del__(self):
@@ -213,7 +253,7 @@ class _AsyncWebSocket(_WebSocket):
213
253
  except asyncio.CancelledError:
214
254
  pass
215
255
  except TypeError as e:
216
- # Ignore the error if the task is already cancelled
256
+ # Ignore the error if the task is already canceled.
217
257
  # For some reason we are getting None responses
218
258
  # TODO: This needs to be fixed - we need to think about why we are getting None responses.
219
259
  if "Received message 256:None" not in str(e):
@@ -284,16 +324,23 @@ class _AsyncWebSocket(_WebSocket):
284
324
  response = await self.websocket.receive_json()
285
325
  if response["context_id"]:
286
326
  context_id = response["context_id"]
327
+ flush_id = response.get("flush_id", -1)
287
328
  if context_id in self._context_queues:
288
- await self._context_queues[context_id].put(response)
329
+ await self._context_queues[context_id][flush_id].put(response)
289
330
  except Exception as e:
290
331
  self._error = e
291
332
  raise e
292
333
 
293
- async def _get_message(self, context_id: str, timeout: float) -> Dict[str, Any]:
334
+ async def _get_message(
335
+ self, context_id: str, timeout: float, flush_id: Optional[int] = -1
336
+ ) -> Dict[str, Any]:
294
337
  if context_id not in self._context_queues:
295
338
  raise ValueError(f"Context ID {context_id} not found.")
296
- return await asyncio.wait_for(self._context_queues[context_id].get(), timeout=timeout)
339
+ if len(self._context_queues[context_id]) <= flush_id:
340
+ raise ValueError(f"Flush ID {flush_id} not found for context ID {context_id}.")
341
+ return await asyncio.wait_for(
342
+ self._context_queues[context_id][flush_id].get(), timeout=timeout
343
+ )
297
344
 
298
345
  def _remove_context(self, context_id: str):
299
346
  if context_id in self._context_queues:
@@ -309,5 +356,5 @@ class _AsyncWebSocket(_WebSocket):
309
356
  if context_id is None:
310
357
  context_id = str(uuid.uuid4())
311
358
  if context_id not in self._context_queues:
312
- self._context_queues[context_id] = asyncio.Queue()
359
+ self._context_queues[context_id] = [asyncio.Queue()]
313
360
  return _AsyncTTSContext(context_id, self, self.timeout)
cartesia/_constants.py CHANGED
@@ -2,6 +2,7 @@ DEFAULT_MODEL_ID = "sonic-english" # latest default model
2
2
  MULTILINGUAL_MODEL_ID = "sonic-multilingual" # latest multilingual model
3
3
  DEFAULT_BASE_URL = "api.cartesia.ai"
4
4
  DEFAULT_CARTESIA_VERSION = "2024-06-10" # latest version
5
+ DEFAULT_OUTPUT_FORMAT = "raw_pcm_f32le_44100"
5
6
  DEFAULT_TIMEOUT = 30 # seconds
6
7
  DEFAULT_NUM_CONNECTIONS = 10 # connections per client
7
8
  DEFAULT_VOICE_EMBEDDING = [1.0] * 192
cartesia/_websocket.py CHANGED
@@ -239,7 +239,7 @@ class _WebSocket:
239
239
  self._contexts.clear()
240
240
 
241
241
  def _convert_response(
242
- self, response: Dict[str, any], include_context_id: bool
242
+ self, response: Dict[str, any], include_context_id: bool, include_flush_id: bool = False
243
243
  ) -> Dict[str, Any]:
244
244
  out = {}
245
245
  if response["type"] == EventType.AUDIO:
@@ -250,6 +250,9 @@ class _WebSocket:
250
250
  if include_context_id:
251
251
  out["context_id"] = response["context_id"]
252
252
 
253
+ if include_flush_id and "flush_id" in response:
254
+ out["flush_id"] = response["flush_id"]
255
+
253
256
  return out
254
257
 
255
258
  def send(
cartesia/utils/tts.py CHANGED
@@ -37,6 +37,7 @@ def _construct_tts_request(
37
37
  add_timestamps: bool = False,
38
38
  context_id: Optional[str] = None,
39
39
  continue_: bool = False,
40
+ flush: bool = False,
40
41
  _experimental_voice_controls: Optional[VoiceControls] = None,
41
42
  ):
42
43
  tts_request = {
@@ -71,4 +72,7 @@ def _construct_tts_request(
71
72
  if continue_:
72
73
  tts_request["continue"] = continue_
73
74
 
75
+ if flush:
76
+ tts_request["flush"] = flush
77
+
74
78
  return tts_request
cartesia/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.0"
1
+ __version__ = "1.3.0"
@@ -1,15 +1,14 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: cartesia
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Requires-Python: >=3.9
6
+ Requires-Dist: aiohttp>=3.10.10
7
+ Requires-Dist: httpx>=0.27.2
8
+ Requires-Dist: iterators>=0.2.0
9
+ Requires-Dist: requests>=2.32.3
10
+ Requires-Dist: websockets>=10.4
6
11
  Description-Content-Type: text/markdown
7
- License-File: LICENSE.md
8
- Requires-Dist: aiohttp >=3.10.10
9
- Requires-Dist: httpx >=0.27.2
10
- Requires-Dist: iterators >=0.2.0
11
- Requires-Dist: requests >=2.32.3
12
- Requires-Dist: websockets >=13.1
13
12
 
14
13
  # Cartesia Python API Library
15
14
 
@@ -1,24 +1,23 @@
1
1
  cartesia/__init__.py,sha256=rS7jIg4iqT0VgnwjzYK25JXxnF5hjZGE_-PGynAqHFo,126
2
2
  cartesia/_async_sse.py,sha256=76oIvstzVcWZCbcD8Ps419k1FEHF6lOB5qoHwawvj9k,3327
3
- cartesia/_async_websocket.py,sha256=Gy0nK3g2HKIBwh-PP1AunEBj83kgFpTGCvrq6tnwg9c,12515
4
- cartesia/_constants.py,sha256=lquaYIg7IThdmC1fCklnWC8EM7stbSeVCDwRqCzPq-U,389
3
+ cartesia/_async_websocket.py,sha256=y9YL9fU8eLENZZECJUwRBVTfEx4ZMl96Y5zHaRY2BiI,14787
4
+ cartesia/_constants.py,sha256=khGNVpiQVDmv1oZU7pKTd9C1AHjiaM8zQ2He9d5zI_c,435
5
5
  cartesia/_logger.py,sha256=vU7QiGSy_AJuJFmClUocqIJ-Ltku_8C24ZU8L6fLJR0,53
6
6
  cartesia/_sse.py,sha256=CugabGUAUM-N2BruxNFxDB20HyxDlRdbN-J_yAzvBMY,5667
7
7
  cartesia/_types.py,sha256=gixQbKbX-H8xbD7jxHmc02KXLyjEaup19lh_57_YBl8,2570
8
- cartesia/_websocket.py,sha256=CpqkShdl4qBjCGMR8s6dEBHK0LJxkrG-FjbPLhjOP-U,14735
8
+ cartesia/_websocket.py,sha256=nRCq9xB0T9yYHoLqtn0GsJmcap-OAlJdSIrzTl40qMI,14875
9
9
  cartesia/async_client.py,sha256=y_K_Yuv0weA4k9ZYD0M9bNM3x3frsq07tqkg7R9h0-o,2714
10
10
  cartesia/async_tts.py,sha256=IbWVRKklNClXASR6ylHaukcMRR304LUguqc4yMopbDU,2076
11
11
  cartesia/client.py,sha256=OS1ORUSlR8Jg-em1imeTAFfwkC85AQFnw8PYtTdUuC8,2364
12
12
  cartesia/resource.py,sha256=wpnB3IPcTdxYSp0vxSkpntp4NSvqvnwUWF-0ZpgWV9o,1585
13
13
  cartesia/tts.py,sha256=kWvqce9K3gZ4QrWD-ciYdK29n49SNkxhd2A7ueTOwMY,4878
14
- cartesia/version.py,sha256=MpAT5hgNoHnTtG1XRD_GV_A7QrHVU6vJjGSw_8qMGA4,22
14
+ cartesia/version.py,sha256=F5mW07pSyGrqDNY2Ehr-UpDzpBtN-FsYU0QGZWf6PJE,22
15
15
  cartesia/voices.py,sha256=bDYbs0KoikAROJlmbnLdo4TrW0YwzjMvp70uKG6Alp0,7180
16
16
  cartesia/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  cartesia/utils/deprecated.py,sha256=2cXvGtrxhPeUZA5LWy2n_U5OFLDv7SHeFtzqhjSJGyk,1674
18
18
  cartesia/utils/retry.py,sha256=O6fyVWpH9Su8c0Fwupl57xMt6JrwJ52txBwP3faUL7k,3339
19
- cartesia/utils/tts.py,sha256=7tJmdyOYwe2QIav5d1UZxhpbcHaYqf7A77bBOlb4U_g,2100
20
- cartesia-1.2.0.dist-info/LICENSE.md,sha256=PT2YG5wEtEX1TNDn5sXkUXqbn-neyr7cZenTxd40ql4,1074
21
- cartesia-1.2.0.dist-info/METADATA,sha256=XkVlNno4gSjSecAC0fBIqcvRP_YUAYs6D6dzIdk-c7w,21006
22
- cartesia-1.2.0.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
23
- cartesia-1.2.0.dist-info/top_level.txt,sha256=rTX4HnnCegMxl1FK9czpVC7GAvf3SwDzPG65qP-BS4w,9
24
- cartesia-1.2.0.dist-info/RECORD,,
19
+ cartesia/utils/tts.py,sha256=TbvBZqHR6LxPim6s5RyGiURi4hIfqWt3KUk5QYOOhfc,2177
20
+ cartesia-1.3.0.dist-info/METADATA,sha256=eedG5B4V6MxvDuPUMdYwp6UHrX6yQ6dJTMRRZxq1-UA,20976
21
+ cartesia-1.3.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
22
+ cartesia-1.3.0.dist-info/licenses/LICENSE.md,sha256=PT2YG5wEtEX1TNDn5sXkUXqbn-neyr7cZenTxd40ql4,1074
23
+ cartesia-1.3.0.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.5.0)
2
+ Generator: hatchling 1.26.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1 +0,0 @@
1
- cartesia