cartesia 1.3.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. cartesia/__init__.py +302 -3
  2. cartesia/api_status/__init__.py +6 -0
  3. cartesia/api_status/client.py +104 -0
  4. cartesia/api_status/requests/__init__.py +5 -0
  5. cartesia/api_status/requests/api_info.py +8 -0
  6. cartesia/api_status/types/__init__.py +5 -0
  7. cartesia/api_status/types/api_info.py +20 -0
  8. cartesia/base_client.py +156 -0
  9. cartesia/client.py +163 -40
  10. cartesia/core/__init__.py +50 -0
  11. cartesia/core/api_error.py +15 -0
  12. cartesia/core/client_wrapper.py +55 -0
  13. cartesia/core/datetime_utils.py +28 -0
  14. cartesia/core/file.py +67 -0
  15. cartesia/core/http_client.py +499 -0
  16. cartesia/core/jsonable_encoder.py +101 -0
  17. cartesia/core/pagination.py +88 -0
  18. cartesia/core/pydantic_utilities.py +296 -0
  19. cartesia/core/query_encoder.py +58 -0
  20. cartesia/core/remove_none_from_dict.py +11 -0
  21. cartesia/core/request_options.py +35 -0
  22. cartesia/core/serialization.py +272 -0
  23. cartesia/datasets/__init__.py +24 -0
  24. cartesia/datasets/requests/__init__.py +15 -0
  25. cartesia/datasets/requests/create_dataset_request.py +7 -0
  26. cartesia/datasets/requests/dataset.py +9 -0
  27. cartesia/datasets/requests/dataset_file.py +9 -0
  28. cartesia/datasets/requests/paginated_dataset_files.py +10 -0
  29. cartesia/datasets/requests/paginated_datasets.py +10 -0
  30. cartesia/datasets/types/__init__.py +17 -0
  31. cartesia/datasets/types/create_dataset_request.py +19 -0
  32. cartesia/datasets/types/dataset.py +21 -0
  33. cartesia/datasets/types/dataset_file.py +21 -0
  34. cartesia/datasets/types/file_purpose.py +5 -0
  35. cartesia/datasets/types/paginated_dataset_files.py +21 -0
  36. cartesia/datasets/types/paginated_datasets.py +21 -0
  37. cartesia/embedding/__init__.py +5 -0
  38. cartesia/embedding/types/__init__.py +5 -0
  39. cartesia/embedding/types/embedding.py +201 -0
  40. cartesia/environment.py +7 -0
  41. cartesia/infill/__init__.py +2 -0
  42. cartesia/infill/client.py +318 -0
  43. cartesia/tts/__init__.py +167 -0
  44. cartesia/{_async_websocket.py → tts/_async_websocket.py} +212 -85
  45. cartesia/tts/_websocket.py +479 -0
  46. cartesia/tts/client.py +407 -0
  47. cartesia/tts/requests/__init__.py +76 -0
  48. cartesia/tts/requests/cancel_context_request.py +17 -0
  49. cartesia/tts/requests/controls.py +11 -0
  50. cartesia/tts/requests/generation_request.py +58 -0
  51. cartesia/tts/requests/mp_3_output_format.py +11 -0
  52. cartesia/tts/requests/output_format.py +30 -0
  53. cartesia/tts/requests/phoneme_timestamps.py +10 -0
  54. cartesia/tts/requests/raw_output_format.py +11 -0
  55. cartesia/tts/requests/speed.py +7 -0
  56. cartesia/tts/requests/tts_request.py +24 -0
  57. cartesia/tts/requests/tts_request_embedding_specifier.py +16 -0
  58. cartesia/tts/requests/tts_request_id_specifier.py +16 -0
  59. cartesia/tts/requests/tts_request_voice_specifier.py +7 -0
  60. cartesia/tts/requests/wav_output_format.py +7 -0
  61. cartesia/tts/requests/web_socket_base_response.py +11 -0
  62. cartesia/tts/requests/web_socket_chunk_response.py +11 -0
  63. cartesia/tts/requests/web_socket_done_response.py +7 -0
  64. cartesia/tts/requests/web_socket_error_response.py +7 -0
  65. cartesia/tts/requests/web_socket_flush_done_response.py +9 -0
  66. cartesia/tts/requests/web_socket_phoneme_timestamps_response.py +9 -0
  67. cartesia/tts/requests/web_socket_raw_output_format.py +11 -0
  68. cartesia/tts/requests/web_socket_request.py +7 -0
  69. cartesia/tts/requests/web_socket_response.py +70 -0
  70. cartesia/tts/requests/web_socket_stream_options.py +8 -0
  71. cartesia/tts/requests/web_socket_timestamps_response.py +9 -0
  72. cartesia/tts/requests/web_socket_tts_output.py +18 -0
  73. cartesia/tts/requests/web_socket_tts_request.py +25 -0
  74. cartesia/tts/requests/word_timestamps.py +10 -0
  75. cartesia/tts/socket_client.py +302 -0
  76. cartesia/tts/types/__init__.py +90 -0
  77. cartesia/tts/types/cancel_context_request.py +28 -0
  78. cartesia/tts/types/context_id.py +3 -0
  79. cartesia/tts/types/controls.py +22 -0
  80. cartesia/tts/types/emotion.py +34 -0
  81. cartesia/tts/types/flush_id.py +3 -0
  82. cartesia/tts/types/generation_request.py +71 -0
  83. cartesia/tts/types/mp_3_output_format.py +23 -0
  84. cartesia/tts/types/natural_specifier.py +5 -0
  85. cartesia/tts/types/numerical_specifier.py +3 -0
  86. cartesia/tts/types/output_format.py +58 -0
  87. cartesia/tts/types/phoneme_timestamps.py +21 -0
  88. cartesia/tts/types/raw_encoding.py +5 -0
  89. cartesia/tts/types/raw_output_format.py +22 -0
  90. cartesia/tts/types/speed.py +7 -0
  91. cartesia/tts/types/supported_language.py +7 -0
  92. cartesia/tts/types/tts_request.py +35 -0
  93. cartesia/tts/types/tts_request_embedding_specifier.py +27 -0
  94. cartesia/tts/types/tts_request_id_specifier.py +27 -0
  95. cartesia/tts/types/tts_request_voice_specifier.py +7 -0
  96. cartesia/tts/types/wav_output_format.py +17 -0
  97. cartesia/tts/types/web_socket_base_response.py +22 -0
  98. cartesia/tts/types/web_socket_chunk_response.py +22 -0
  99. cartesia/tts/types/web_socket_done_response.py +17 -0
  100. cartesia/tts/types/web_socket_error_response.py +19 -0
  101. cartesia/tts/types/web_socket_flush_done_response.py +21 -0
  102. cartesia/tts/types/web_socket_phoneme_timestamps_response.py +20 -0
  103. cartesia/tts/types/web_socket_raw_output_format.py +22 -0
  104. cartesia/tts/types/web_socket_request.py +7 -0
  105. cartesia/tts/types/web_socket_response.py +125 -0
  106. cartesia/tts/types/web_socket_stream_options.py +19 -0
  107. cartesia/tts/types/web_socket_timestamps_response.py +20 -0
  108. cartesia/tts/types/web_socket_tts_output.py +29 -0
  109. cartesia/tts/types/web_socket_tts_request.py +37 -0
  110. cartesia/tts/types/word_timestamps.py +21 -0
  111. cartesia/{_constants.py → tts/utils/constants.py} +2 -2
  112. cartesia/tts/utils/tts.py +64 -0
  113. cartesia/tts/utils/types.py +70 -0
  114. cartesia/version.py +3 -1
  115. cartesia/voice_changer/__init__.py +27 -0
  116. cartesia/voice_changer/client.py +395 -0
  117. cartesia/voice_changer/requests/__init__.py +15 -0
  118. cartesia/voice_changer/requests/streaming_response.py +38 -0
  119. cartesia/voice_changer/types/__init__.py +17 -0
  120. cartesia/voice_changer/types/output_format_container.py +5 -0
  121. cartesia/voice_changer/types/streaming_response.py +64 -0
  122. cartesia/voices/__init__.py +81 -0
  123. cartesia/voices/client.py +1218 -0
  124. cartesia/voices/requests/__init__.py +29 -0
  125. cartesia/voices/requests/create_voice_request.py +23 -0
  126. cartesia/voices/requests/embedding_response.py +8 -0
  127. cartesia/voices/requests/embedding_specifier.py +10 -0
  128. cartesia/voices/requests/get_voices_response.py +24 -0
  129. cartesia/voices/requests/id_specifier.py +10 -0
  130. cartesia/voices/requests/localize_dialect.py +11 -0
  131. cartesia/voices/requests/localize_voice_request.py +28 -0
  132. cartesia/voices/requests/mix_voice_specifier.py +7 -0
  133. cartesia/voices/requests/mix_voices_request.py +9 -0
  134. cartesia/voices/requests/update_voice_request.py +15 -0
  135. cartesia/voices/requests/voice.py +43 -0
  136. cartesia/voices/requests/voice_metadata.py +36 -0
  137. cartesia/voices/types/__init__.py +53 -0
  138. cartesia/voices/types/base_voice_id.py +5 -0
  139. cartesia/voices/types/clone_mode.py +5 -0
  140. cartesia/voices/types/create_voice_request.py +34 -0
  141. cartesia/voices/types/embedding_response.py +20 -0
  142. cartesia/voices/types/embedding_specifier.py +22 -0
  143. cartesia/voices/types/gender.py +5 -0
  144. cartesia/voices/types/gender_presentation.py +5 -0
  145. cartesia/voices/types/get_voices_response.py +34 -0
  146. cartesia/voices/types/id_specifier.py +22 -0
  147. cartesia/voices/types/localize_dialect.py +11 -0
  148. cartesia/voices/types/localize_english_dialect.py +5 -0
  149. cartesia/voices/types/localize_french_dialect.py +5 -0
  150. cartesia/voices/types/localize_portuguese_dialect.py +5 -0
  151. cartesia/voices/types/localize_spanish_dialect.py +5 -0
  152. cartesia/voices/types/localize_target_language.py +7 -0
  153. cartesia/voices/types/localize_voice_request.py +39 -0
  154. cartesia/voices/types/mix_voice_specifier.py +7 -0
  155. cartesia/voices/types/mix_voices_request.py +20 -0
  156. cartesia/voices/types/update_voice_request.py +27 -0
  157. cartesia/voices/types/voice.py +54 -0
  158. cartesia/voices/types/voice_expand_options.py +5 -0
  159. cartesia/voices/types/voice_id.py +3 -0
  160. cartesia/voices/types/voice_metadata.py +48 -0
  161. cartesia/voices/types/weight.py +3 -0
  162. cartesia-2.0.0.dist-info/METADATA +414 -0
  163. cartesia-2.0.0.dist-info/RECORD +165 -0
  164. {cartesia-1.3.1.dist-info → cartesia-2.0.0.dist-info}/WHEEL +1 -1
  165. cartesia/_async_sse.py +0 -95
  166. cartesia/_logger.py +0 -3
  167. cartesia/_sse.py +0 -143
  168. cartesia/_types.py +0 -70
  169. cartesia/_websocket.py +0 -358
  170. cartesia/async_client.py +0 -82
  171. cartesia/async_tts.py +0 -63
  172. cartesia/resource.py +0 -44
  173. cartesia/tts.py +0 -137
  174. cartesia/utils/deprecated.py +0 -55
  175. cartesia/utils/retry.py +0 -87
  176. cartesia/utils/tts.py +0 -78
  177. cartesia/voices.py +0 -208
  178. cartesia-1.3.1.dist-info/METADATA +0 -661
  179. cartesia-1.3.1.dist-info/RECORD +0 -23
  180. cartesia-1.3.1.dist-info/licenses/LICENSE.md +0 -21
  181. /cartesia/{utils/__init__.py → py.typed} +0 -0
cartesia/_websocket.py DELETED
@@ -1,358 +0,0 @@
1
- import base64
2
- import json
3
- import uuid
4
- from collections import defaultdict
5
- from typing import Any, Dict, Generator, Iterator, List, Optional, Set, Union
6
-
7
- try:
8
- from websockets.sync.client import connect
9
-
10
- IS_WEBSOCKET_SYNC_AVAILABLE = True
11
- except ImportError:
12
- IS_WEBSOCKET_SYNC_AVAILABLE = False
13
-
14
- from iterators import TimeoutIterator
15
-
16
- from cartesia._types import EventType, OutputFormat, VoiceControls
17
- from cartesia.utils.tts import _construct_tts_request
18
-
19
-
20
- class _TTSContext:
21
- """Manage a single context over a WebSocket.
22
-
23
- This class can be used to stream inputs, as they become available, to a specific `context_id`. See README for usage.
24
-
25
- See :class:`_AsyncTTSContext` for asynchronous use cases.
26
-
27
- Each TTSContext will close automatically when a done message is received for that context. It also closes if there is an error.
28
- """
29
-
30
- def __init__(self, context_id: str, websocket: "_WebSocket"):
31
- self._context_id = context_id
32
- self._websocket = websocket
33
- self._error = None
34
-
35
- def __del__(self):
36
- self._close()
37
-
38
- @property
39
- def context_id(self) -> str:
40
- return self._context_id
41
-
42
- def send(
43
- self,
44
- model_id: str,
45
- transcript: Iterator[str],
46
- output_format: OutputFormat,
47
- voice_id: Optional[str] = None,
48
- voice_embedding: Optional[List[float]] = None,
49
- context_id: Optional[str] = None,
50
- duration: Optional[int] = None,
51
- language: Optional[str] = None,
52
- add_timestamps: bool = False,
53
- _experimental_voice_controls: Optional[VoiceControls] = None,
54
- ) -> Generator[bytes, None, None]:
55
- """Send audio generation requests to the WebSocket and yield responses.
56
-
57
- Args:
58
- model_id: The ID of the model to use for generating audio.
59
- transcript: Iterator over text chunks with <1s latency.
60
- output_format: A dictionary containing the details of the output format.
61
- voice_id: The ID of the voice to use for generating audio.
62
- voice_embedding: The embedding of the voice to use for generating audio.
63
- context_id: The context ID to use for the request. If not specified, a random context ID will be generated.
64
- duration: The duration of the audio in seconds.
65
- language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`
66
- add_timestamps: Whether to return word-level timestamps.
67
- _experimental_voice_controls: Experimental voice controls for controlling speed and emotion.
68
- Note: This is an experimental feature and may change rapidly in future releases.
69
-
70
- Yields:
71
- Dictionary containing the following key(s):
72
- - audio: The audio as bytes.
73
- - context_id: The context ID for the request.
74
-
75
- Raises:
76
- ValueError: If provided context_id doesn't match the current context.
77
- RuntimeError: If there's an error generating audio.
78
- """
79
- if context_id is not None and context_id != self._context_id:
80
- raise ValueError("Context ID does not match the context ID of the current context.")
81
-
82
- self._websocket.connect()
83
-
84
- # Create the initial request body
85
- request_body = _construct_tts_request(
86
- model_id=model_id,
87
- transcript=transcript,
88
- output_format=output_format,
89
- voice_id=voice_id,
90
- voice_embedding=voice_embedding,
91
- duration=duration,
92
- language=language,
93
- context_id=self._context_id,
94
- add_timestamps=add_timestamps,
95
- _experimental_voice_controls=_experimental_voice_controls,
96
- )
97
-
98
- try:
99
- # Create an iterator with a timeout to get text chunks
100
- text_iterator = TimeoutIterator(
101
- transcript, timeout=0.001
102
- ) # 1ms timeout for nearly non-blocking receive
103
- next_chunk = next(text_iterator, None)
104
-
105
- while True:
106
- # Send the next text chunk to the WebSocket if available
107
- if next_chunk is not None and next_chunk != text_iterator.get_sentinel():
108
- request_body["transcript"] = next_chunk
109
- request_body["continue"] = True
110
- self._websocket.websocket.send(json.dumps(request_body))
111
- next_chunk = next(text_iterator, None)
112
-
113
- try:
114
- # Receive responses from the WebSocket with a small timeout
115
- response = json.loads(
116
- self._websocket.websocket.recv(timeout=0.001)
117
- ) # 1ms timeout for nearly non-blocking receive
118
- if response["context_id"] != self._context_id:
119
- pass
120
- if "error" in response:
121
- raise RuntimeError(f"Error generating audio:\n{response['error']}")
122
- if response["done"]:
123
- break
124
- if response["data"]:
125
- yield self._websocket._convert_response(
126
- response=response, include_context_id=True
127
- )
128
- except TimeoutError:
129
- pass
130
-
131
- # Continuously receive from WebSocket until the next text chunk is available
132
- while next_chunk == text_iterator.get_sentinel():
133
- try:
134
- response = json.loads(self._websocket.websocket.recv(timeout=0.001))
135
- if response["context_id"] != self._context_id:
136
- continue
137
- if "error" in response:
138
- raise RuntimeError(f"Error generating audio:\n{response['error']}")
139
- if response["done"]:
140
- break
141
- if response["data"]:
142
- yield self._websocket._convert_response(
143
- response=response, include_context_id=True
144
- )
145
- except TimeoutError:
146
- pass
147
- next_chunk = next(text_iterator, None)
148
-
149
- # Send final message if all input text chunks are exhausted
150
- if next_chunk is None:
151
- request_body["transcript"] = ""
152
- request_body["continue"] = False
153
- self._websocket.websocket.send(json.dumps(request_body))
154
- break
155
-
156
- # Receive remaining messages from the WebSocket until "done" is received
157
- while True:
158
- response = json.loads(self._websocket.websocket.recv())
159
- if response["context_id"] != self._context_id:
160
- continue
161
- if "error" in response:
162
- raise RuntimeError(f"Error generating audio:\n{response['error']}")
163
- if response["done"]:
164
- break
165
- yield self._websocket._convert_response(response=response, include_context_id=True)
166
-
167
- except Exception as e:
168
- self._websocket.close()
169
- raise RuntimeError(f"Failed to generate audio. {e}")
170
-
171
- def _close(self):
172
- """Closes the context. Automatically called when a done message is received for this context."""
173
- self._websocket._remove_context(self._context_id)
174
-
175
- def is_closed(self):
176
- """Check if the context is closed or not. Returns True if closed."""
177
- return self._context_id not in self._websocket._contexts
178
-
179
-
180
- class _WebSocket:
181
- """This class contains methods to generate audio using WebSocket. Ideal for low-latency audio generation.
182
-
183
- Usage:
184
- >>> ws = client.tts.websocket()
185
- >>> for audio_chunk in ws.send(
186
- ... model_id="sonic-english", transcript="Hello world!", voice_embedding=embedding,
187
- ... output_format={"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
188
- ... context_id=context_id, stream=True
189
- ... ):
190
- ... audio = audio_chunk["audio"]
191
- """
192
-
193
- def __init__(
194
- self,
195
- ws_url: str,
196
- api_key: str,
197
- cartesia_version: str,
198
- ):
199
- self.ws_url = ws_url
200
- self.api_key = api_key
201
- self.cartesia_version = cartesia_version
202
- self.websocket = None
203
- self._contexts: Set[str] = set()
204
-
205
- def __del__(self):
206
- try:
207
- self.close()
208
- except Exception as e:
209
- raise RuntimeError("Failed to close WebSocket: ", e)
210
-
211
- def connect(self):
212
- """This method connects to the WebSocket if it is not already connected.
213
-
214
- Raises:
215
- RuntimeError: If the connection to the WebSocket fails.
216
- """
217
- if not IS_WEBSOCKET_SYNC_AVAILABLE:
218
- raise ImportError(
219
- "The synchronous WebSocket client is not available. Please ensure that you have 'websockets>=12.0' or compatible version installed."
220
- )
221
- if self.websocket is None or self._is_websocket_closed():
222
- route = "tts/websocket"
223
- try:
224
- self.websocket = connect(
225
- f"{self.ws_url}/{route}?api_key={self.api_key}&cartesia_version={self.cartesia_version}"
226
- )
227
- except Exception as e:
228
- raise RuntimeError(f"Failed to connect to WebSocket. {e}")
229
-
230
- def _is_websocket_closed(self):
231
- return self.websocket.socket.fileno() == -1
232
-
233
- def close(self):
234
- """This method closes the WebSocket connection. *Highly* recommended to call this method when done using the WebSocket."""
235
- if self.websocket and not self._is_websocket_closed():
236
- self.websocket.close()
237
-
238
- if self._contexts:
239
- self._contexts.clear()
240
-
241
- def _convert_response(
242
- self, response: Dict[str, any], include_context_id: bool, include_flush_id: bool = False
243
- ) -> Dict[str, Any]:
244
- out = {}
245
- if response["type"] == EventType.AUDIO:
246
- out["audio"] = base64.b64decode(response["data"])
247
- elif response["type"] == EventType.TIMESTAMPS:
248
- out["word_timestamps"] = response["word_timestamps"]
249
-
250
- if include_context_id:
251
- out["context_id"] = response["context_id"]
252
-
253
- if include_flush_id and "flush_id" in response:
254
- out["flush_id"] = response["flush_id"]
255
-
256
- return out
257
-
258
- def send(
259
- self,
260
- model_id: str,
261
- transcript: str,
262
- output_format: dict,
263
- voice_id: Optional[str] = None,
264
- voice_embedding: Optional[List[float]] = None,
265
- context_id: Optional[str] = None,
266
- duration: Optional[int] = None,
267
- language: Optional[str] = None,
268
- stream: bool = True,
269
- add_timestamps: bool = False,
270
- _experimental_voice_controls: Optional[VoiceControls] = None,
271
- ) -> Union[bytes, Generator[bytes, None, None]]:
272
- """Send a request to the WebSocket to generate audio.
273
-
274
- Args:
275
- model_id: The ID of the model to use for generating audio.
276
- transcript: The text to convert to speech.
277
- output_format: A dictionary containing the details of the output format.
278
- voice_id: The ID of the voice to use for generating audio.
279
- voice_embedding: The embedding of the voice to use for generating audio.
280
- context_id: The context ID to use for the request. If not specified, a random context ID will be generated.
281
- duration: The duration of the audio in seconds.
282
- language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`
283
- stream: Whether to stream the audio or not.
284
- add_timestamps: Whether to return word-level timestamps.
285
- _experimental_voice_controls: Experimental voice controls for controlling speed and emotion.
286
- Note: This is an experimental feature and may change rapidly in future releases.
287
-
288
- Returns:
289
- If `stream` is True, the method returns a generator that yields chunks. Each chunk is a dictionary.
290
- If `stream` is False, the method returns a dictionary.
291
- Both the generator and the dictionary contain the following key(s):
292
- - audio: The audio as bytes.
293
- - context_id: The context ID for the request.
294
- """
295
- self.connect()
296
-
297
- if context_id is None:
298
- context_id = str(uuid.uuid4())
299
-
300
- request_body = _construct_tts_request(
301
- model_id=model_id,
302
- transcript=transcript,
303
- output_format=output_format,
304
- voice_id=voice_id,
305
- voice_embedding=voice_embedding,
306
- context_id=context_id,
307
- duration=duration,
308
- language=language,
309
- add_timestamps=add_timestamps,
310
- _experimental_voice_controls=_experimental_voice_controls,
311
- )
312
-
313
- generator = self._websocket_generator(request_body)
314
-
315
- if stream:
316
- return generator
317
-
318
- chunks = []
319
- word_timestamps = defaultdict(list)
320
- for chunk in generator:
321
- if "audio" in chunk:
322
- chunks.append(chunk["audio"])
323
- if add_timestamps and "word_timestamps" in chunk:
324
- for k, v in chunk["word_timestamps"].items():
325
- word_timestamps[k].extend(v)
326
- out = {"audio": b"".join(chunks), "context_id": context_id}
327
- if add_timestamps:
328
- out["word_timestamps"] = word_timestamps
329
- return out
330
-
331
- def _websocket_generator(self, request_body: Dict[str, Any]):
332
- self.websocket.send(json.dumps(request_body))
333
-
334
- try:
335
- while True:
336
- response = json.loads(self.websocket.recv())
337
- if "error" in response:
338
- raise RuntimeError(f"Error generating audio:\n{response['error']}")
339
- if response["done"]:
340
- break
341
- yield self._convert_response(response=response, include_context_id=True)
342
- except Exception as e:
343
- # Close the websocket connection if an error occurs.
344
- self.close()
345
- raise RuntimeError(f"Failed to generate audio. {response}") from e
346
-
347
- def _remove_context(self, context_id: str):
348
- if context_id in self._contexts:
349
- self._contexts.remove(context_id)
350
-
351
- def context(self, context_id: Optional[str] = None) -> _TTSContext:
352
- if context_id in self._contexts:
353
- raise ValueError(f"Context for context ID {context_id} already exists.")
354
- if context_id is None:
355
- context_id = str(uuid.uuid4())
356
- if context_id not in self._contexts:
357
- self._contexts.add(context_id)
358
- return _TTSContext(context_id, self)
cartesia/async_client.py DELETED
@@ -1,82 +0,0 @@
1
- import asyncio
2
- from types import TracebackType
3
- from typing import Optional, Union
4
-
5
- import aiohttp
6
-
7
- from cartesia._constants import DEFAULT_NUM_CONNECTIONS, DEFAULT_TIMEOUT
8
- from cartesia.async_tts import AsyncTTS
9
- from cartesia.client import Cartesia
10
-
11
-
12
- class AsyncCartesia(Cartesia):
13
- """The asynchronous version of the Cartesia client."""
14
-
15
- def __init__(
16
- self,
17
- *,
18
- api_key: Optional[str] = None,
19
- base_url: Optional[str] = None,
20
- timeout: float = DEFAULT_TIMEOUT,
21
- max_num_connections: int = DEFAULT_NUM_CONNECTIONS,
22
- ):
23
- """
24
- Args:
25
- api_key: See :class:`Cartesia`.
26
- base_url: See :class:`Cartesia`.
27
- timeout: See :class:`Cartesia`.
28
- max_num_connections: The maximum number of concurrent connections to use for the client.
29
- This is used to limit the number of connections that can be made to the server.
30
- """
31
- self._session = None
32
- self._loop = None
33
- super().__init__(api_key=api_key, base_url=base_url, timeout=timeout)
34
- self.max_num_connections = max_num_connections
35
- self.tts = AsyncTTS(
36
- api_key=self.api_key,
37
- base_url=self._base_url,
38
- timeout=self.timeout,
39
- get_session=self._get_session,
40
- )
41
-
42
- async def _get_session(self):
43
- current_loop = asyncio.get_event_loop()
44
- if self._loop is not current_loop:
45
- # If the loop has changed, close the session and create a new one.
46
- await self.close()
47
- if self._session is None or self._session.closed:
48
- timeout = aiohttp.ClientTimeout(total=self.timeout)
49
- connector = aiohttp.TCPConnector(limit=self.max_num_connections)
50
- self._session = aiohttp.ClientSession(timeout=timeout, connector=connector)
51
- self._loop = current_loop
52
- return self._session
53
-
54
- async def close(self):
55
- """This method closes the session.
56
-
57
- It is *strongly* recommended to call this method when you are done using the client.
58
- """
59
- if self._session is not None and not self._session.closed:
60
- await self._session.close()
61
-
62
- def __del__(self):
63
- try:
64
- loop = asyncio.get_running_loop()
65
- except RuntimeError:
66
- loop = None
67
-
68
- if loop is None:
69
- asyncio.run(self.close())
70
- elif loop.is_running():
71
- loop.create_task(self.close())
72
-
73
- async def __aenter__(self):
74
- return self
75
-
76
- async def __aexit__(
77
- self,
78
- exc_type: Union[type, None],
79
- exc: Union[BaseException, None],
80
- exc_tb: Union[TracebackType, None],
81
- ):
82
- await self.close()
cartesia/async_tts.py DELETED
@@ -1,63 +0,0 @@
1
- from typing import Iterator, List, Optional
2
-
3
- import httpx
4
- from cartesia._async_sse import _AsyncSSE
5
- from cartesia._async_websocket import _AsyncWebSocket
6
- from cartesia._types import OutputFormat, VoiceControls
7
- from cartesia.tts import TTS
8
- from cartesia.utils.tts import _construct_tts_request
9
-
10
-
11
- class AsyncTTS(TTS):
12
- def __init__(self, api_key, base_url, timeout, get_session):
13
- super().__init__(api_key, base_url, timeout)
14
- self._get_session = get_session
15
- self._sse_class = _AsyncSSE(self._http_url(), self.headers, self.timeout, get_session)
16
- self.sse = self._sse_class.send
17
-
18
- async def websocket(self) -> _AsyncWebSocket:
19
- ws = _AsyncWebSocket(
20
- self._ws_url(),
21
- self.api_key,
22
- self.cartesia_version,
23
- self.timeout,
24
- self._get_session,
25
- )
26
- await ws.connect()
27
- return ws
28
-
29
- async def bytes(
30
- self,
31
- *,
32
- model_id: str,
33
- transcript: str,
34
- output_format: OutputFormat,
35
- voice_id: Optional[str] = None,
36
- voice_embedding: Optional[List[float]] = None,
37
- duration: Optional[int] = None,
38
- language: Optional[str] = None,
39
- _experimental_voice_controls: Optional[VoiceControls] = None,
40
- ) -> bytes:
41
- request_body = _construct_tts_request(
42
- model_id=model_id,
43
- transcript=transcript,
44
- output_format=output_format,
45
- voice_id=voice_id,
46
- voice_embedding=voice_embedding,
47
- duration=duration,
48
- language=language,
49
- _experimental_voice_controls=_experimental_voice_controls,
50
- )
51
-
52
- async with httpx.AsyncClient() as client:
53
- response = await client.post(
54
- f"{self._http_url()}/tts/bytes",
55
- headers=self.headers,
56
- timeout=self.timeout,
57
- json=request_body,
58
- )
59
-
60
- if not response.is_success:
61
- raise ValueError(f"Failed to generate audio. Error: {response.text}")
62
-
63
- return response.content
cartesia/resource.py DELETED
@@ -1,44 +0,0 @@
1
- from cartesia._constants import DEFAULT_CARTESIA_VERSION
2
-
3
-
4
- class Resource:
5
- def __init__(
6
- self,
7
- api_key: str,
8
- base_url: str,
9
- timeout: float,
10
- ):
11
- """Constructor for the Resource class. Used by the Voices and TTS classes."""
12
- self.api_key = api_key
13
- self.timeout = timeout
14
- self._base_url = base_url
15
- self.cartesia_version = DEFAULT_CARTESIA_VERSION
16
- self.headers = {
17
- "X-API-Key": self.api_key,
18
- "Cartesia-Version": self.cartesia_version,
19
- "Content-Type": "application/json",
20
- }
21
-
22
- @property
23
- def base_url(self):
24
- return self._base_url
25
-
26
- def _http_url(self):
27
- """Returns the HTTP URL for the Cartesia API.
28
- If the base URL is localhost, the URL will start with 'http'. Otherwise, it will start with 'https'.
29
- """
30
- if self._base_url.startswith("http://") or self._base_url.startswith("https://"):
31
- return self._base_url
32
- else:
33
- prefix = "http" if "localhost" in self._base_url else "https"
34
- return f"{prefix}://{self._base_url}"
35
-
36
- def _ws_url(self):
37
- """Returns the WebSocket URL for the Cartesia API.
38
- If the base URL is localhost, the URL will start with 'ws'. Otherwise, it will start with 'wss'.
39
- """
40
- if self._base_url.startswith("ws://") or self._base_url.startswith("wss://"):
41
- return self._base_url
42
- else:
43
- prefix = "ws" if "localhost" in self._base_url else "wss"
44
- return f"{prefix}://{self._base_url}"
cartesia/tts.py DELETED
@@ -1,137 +0,0 @@
1
- from typing import Iterator, List, Optional
2
-
3
- import httpx
4
-
5
- from cartesia._sse import _SSE
6
- from cartesia._types import (
7
- OutputFormat,
8
- OutputFormatMapping,
9
- VoiceControls,
10
- )
11
- from cartesia._websocket import _WebSocket
12
- from cartesia.resource import Resource
13
- from cartesia.utils.tts import _construct_tts_request, _validate_and_construct_voice
14
-
15
-
16
- class TTS(Resource):
17
- """This resource contains methods to generate audio using Cartesia's text-to-speech API."""
18
-
19
- def __init__(self, api_key: str, base_url: str, timeout: float):
20
- super().__init__(
21
- api_key=api_key,
22
- base_url=base_url,
23
- timeout=timeout,
24
- )
25
- self._sse_class = _SSE(self._http_url(), self.headers, self.timeout)
26
- self.sse = self._sse_class.send
27
-
28
- def websocket(self) -> _WebSocket:
29
- """This method returns a WebSocket object that can be used to generate audio using WebSocket.
30
-
31
- Returns:
32
- _WebSocket: A WebSocket object that can be used to generate audio using WebSocket.
33
- """
34
- ws = _WebSocket(self._ws_url(), self.api_key, self.cartesia_version)
35
- ws.connect()
36
- return ws
37
-
38
- def bytes(
39
- self,
40
- *,
41
- model_id: str,
42
- transcript: str,
43
- output_format: OutputFormat,
44
- voice_id: Optional[str] = None,
45
- voice_embedding: Optional[List[float]] = None,
46
- duration: Optional[int] = None,
47
- language: Optional[str] = None,
48
- _experimental_voice_controls: Optional[VoiceControls] = None,
49
- ) -> bytes:
50
- request_body = _construct_tts_request(
51
- model_id=model_id,
52
- transcript=transcript,
53
- output_format=output_format,
54
- voice_id=voice_id,
55
- voice_embedding=voice_embedding,
56
- duration=duration,
57
- language=language,
58
- _experimental_voice_controls=_experimental_voice_controls,
59
- )
60
-
61
- response = httpx.post(
62
- f"{self._http_url()}/tts/bytes",
63
- headers=self.headers,
64
- timeout=self.timeout,
65
- json=request_body,
66
- )
67
-
68
- if not response.is_success:
69
- raise ValueError(f"Failed to generate audio. Error: {response.text}")
70
-
71
- return response.content
72
-
73
- @staticmethod
74
- def get_output_format(output_format_name: str) -> OutputFormat:
75
- """Convenience method to get the output_format dictionary from a given output format name.
76
-
77
- Args:
78
- output_format_name (str): The name of the output format.
79
-
80
- Returns:
81
- OutputFormat: A dictionary containing the details of the output format to be passed into tts.sse() or tts.websocket().send()
82
-
83
- Raises:
84
- ValueError: If the output_format name is not supported
85
- """
86
- if output_format_name in OutputFormatMapping._format_mapping:
87
- output_format_obj = OutputFormatMapping.get_format(output_format_name)
88
- else:
89
- raise ValueError(f"Unsupported format: {output_format_name}")
90
-
91
- return OutputFormat(
92
- container=output_format_obj["container"],
93
- encoding=output_format_obj["encoding"],
94
- sample_rate=output_format_obj["sample_rate"],
95
- )
96
-
97
- @staticmethod
98
- def get_sample_rate(output_format_name: str) -> int:
99
- """Convenience method to get the sample rate for a given output format.
100
-
101
- Args:
102
- output_format_name (str): The name of the output format.
103
-
104
- Returns:
105
- int: The sample rate for the output format.
106
-
107
- Raises:
108
- ValueError: If the output_format name is not supported
109
- """
110
- if output_format_name in OutputFormatMapping._format_mapping:
111
- output_format_obj = OutputFormatMapping.get_format(output_format_name)
112
- else:
113
- raise ValueError(f"Unsupported format: {output_format_name}")
114
-
115
- return output_format_obj["sample_rate"]
116
-
117
- @staticmethod
118
- def _validate_and_construct_voice(
119
- voice_id: Optional[str] = None,
120
- voice_embedding: Optional[List[float]] = None,
121
- experimental_voice_controls: Optional[VoiceControls] = None,
122
- ) -> dict:
123
- """Validate and construct the voice dictionary for the request.
124
-
125
- Args:
126
- voice_id: The ID of the voice to use for generating audio.
127
- voice_embedding: The embedding of the voice to use for generating audio.
128
- experimental_voice_controls: Voice controls for emotion and speed.
129
- Note: This is an experimental feature and may rapidly change in the future.
130
-
131
- Returns:
132
- A dictionary representing the voice configuration.
133
-
134
- Raises:
135
- ValueError: If neither or both voice_id and voice_embedding are specified.
136
- """
137
- return _validate_and_construct_voice(voice_id, voice_embedding, experimental_voice_controls)