cartesia 1.0.7__tar.gz → 1.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 1.0.7
3
+ Version: 1.0.8
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Home-page:
6
6
  Author: Cartesia, Inc.
@@ -25,6 +25,22 @@ The official Cartesia Python library which provides convenient access to the Car
25
25
  > [!IMPORTANT]
26
26
  > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
27
27
 
28
+ - [Cartesia Python API Library](#cartesia-python-api-library)
29
+ - [Documentation](#documentation)
30
+ - [Installation](#installation)
31
+ - [Voices](#voices)
32
+ - [Text-to-Speech](#text-to-speech)
33
+ - [Server-Sent Events (SSE)](#server-sent-events-sse)
34
+ - [WebSocket](#websocket)
35
+ - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
36
+ - [Generating timestamps using WebSocket](#generating-timestamps-using-websocket)
37
+ - [Multilingual Text-to-Speech \[Alpha\]](#multilingual-text-to-speech-alpha)
38
+ - [Speed and Emotion Control \[Experimental\]](#speed-and-emotion-control-experimental)
39
+ - [Jupyter Notebook Usage](#jupyter-notebook-usage)
40
+ - [Utility methods](#utility-methods)
41
+ - [Output Formats](#output-formats)
42
+
43
+
28
44
  ## Documentation
29
45
 
30
46
  Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
@@ -250,7 +266,7 @@ async def send_transcripts(ctx):
250
266
 
251
267
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
252
268
  model_id = "sonic-english"
253
-
269
+
254
270
  # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
255
271
  output_format = {
256
272
  "container": "raw",
@@ -266,7 +282,7 @@ async def send_transcripts(ctx):
266
282
  "As they near Eggman's lair, our heroes charge their abilities for an epic boss battle. ",
267
283
  "Get ready to spin, jump, and sound-blast your way to victory in this high-octane crossover!"
268
284
  ]
269
-
285
+
270
286
  for transcript in transcripts:
271
287
  # Send text inputs as they become available
272
288
  await ctx.send(
@@ -278,7 +294,7 @@ async def send_transcripts(ctx):
278
294
  )
279
295
 
280
296
  # Indicate that no more inputs will be sent. Otherwise, the context will close after 5 seconds of inactivity.
281
- await ctx.no_more_inputs()
297
+ await ctx.no_more_inputs()
282
298
 
283
299
  async def receive_and_play_audio(ctx):
284
300
  p = pyaudio.PyAudio()
@@ -384,7 +400,7 @@ output_stream = ctx.send(
384
400
  voice_id=voice_id,
385
401
  output_format=output_format,
386
402
  )
387
-
403
+
388
404
  for output in output_stream:
389
405
  buffer = output["audio"]
390
406
 
@@ -8,6 +8,22 @@ The official Cartesia Python library which provides convenient access to the Car
8
8
  > [!IMPORTANT]
9
9
  > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
10
10
 
11
+ - [Cartesia Python API Library](#cartesia-python-api-library)
12
+ - [Documentation](#documentation)
13
+ - [Installation](#installation)
14
+ - [Voices](#voices)
15
+ - [Text-to-Speech](#text-to-speech)
16
+ - [Server-Sent Events (SSE)](#server-sent-events-sse)
17
+ - [WebSocket](#websocket)
18
+ - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
19
+ - [Generating timestamps using WebSocket](#generating-timestamps-using-websocket)
20
+ - [Multilingual Text-to-Speech \[Alpha\]](#multilingual-text-to-speech-alpha)
21
+ - [Speed and Emotion Control \[Experimental\]](#speed-and-emotion-control-experimental)
22
+ - [Jupyter Notebook Usage](#jupyter-notebook-usage)
23
+ - [Utility methods](#utility-methods)
24
+ - [Output Formats](#output-formats)
25
+
26
+
11
27
  ## Documentation
12
28
 
13
29
  Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
@@ -233,7 +249,7 @@ async def send_transcripts(ctx):
233
249
 
234
250
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
235
251
  model_id = "sonic-english"
236
-
252
+
237
253
  # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
238
254
  output_format = {
239
255
  "container": "raw",
@@ -249,7 +265,7 @@ async def send_transcripts(ctx):
249
265
  "As they near Eggman's lair, our heroes charge their abilities for an epic boss battle. ",
250
266
  "Get ready to spin, jump, and sound-blast your way to victory in this high-octane crossover!"
251
267
  ]
252
-
268
+
253
269
  for transcript in transcripts:
254
270
  # Send text inputs as they become available
255
271
  await ctx.send(
@@ -261,7 +277,7 @@ async def send_transcripts(ctx):
261
277
  )
262
278
 
263
279
  # Indicate that no more inputs will be sent. Otherwise, the context will close after 5 seconds of inactivity.
264
- await ctx.no_more_inputs()
280
+ await ctx.no_more_inputs()
265
281
 
266
282
  async def receive_and_play_audio(ctx):
267
283
  p = pyaudio.PyAudio()
@@ -367,7 +383,7 @@ output_stream = ctx.send(
367
383
  voice_id=voice_id,
368
384
  output_format=output_format,
369
385
  )
370
-
386
+
371
387
  for output in output_stream:
372
388
  buffer = output["audio"]
373
389
 
@@ -23,7 +23,12 @@ import aiohttp
23
23
  import httpx
24
24
  import logging
25
25
  import requests
26
- from websockets.sync.client import connect
26
+ try:
27
+ from websockets.sync.client import connect
28
+ IS_WEBSOCKET_SYNC_AVAILABLE = True
29
+ except ImportError:
30
+ IS_WEBSOCKET_SYNC_AVAILABLE = False
31
+
27
32
  from iterators import TimeoutIterator
28
33
 
29
34
  from cartesia.utils.retry import retry_on_connection_error, retry_on_connection_error_async
@@ -208,37 +213,25 @@ class Voices(Resource):
208
213
  return response.json()
209
214
 
210
215
  def clone(self, filepath: Optional[str] = None, link: Optional[str] = None) -> List[float]:
211
- """Clone a voice from a clip or a URL.
216
+ """Clone a voice from a clip.
212
217
 
213
218
  Args:
214
219
  filepath: The path to the clip file.
215
- link: The URL to the clip
216
220
 
217
221
  Returns:
218
222
  The embedding of the cloned voice as a list of floats.
219
223
  """
220
224
  # TODO: Python has a bytes object, use that instead of a filepath
221
- if not filepath and not link:
222
- raise ValueError("At least one of 'filepath' or 'link' must be specified.")
223
- if filepath and link:
224
- raise ValueError("Only one of 'filepath' or 'link' should be specified.")
225
- if filepath:
226
- url = f"{self._http_url()}/voices/clone/clip"
227
- with open(filepath, "rb") as file:
228
- files = {"clip": file}
229
- headers = self.headers.copy()
230
- headers.pop("Content-Type", None)
231
- response = httpx.post(url, headers=headers, files=files, timeout=self.timeout)
232
- if not response.is_success:
233
- raise ValueError(f"Failed to clone voice from clip. Error: {response.text}")
234
- elif link:
235
- url = f"{self._http_url()}/voices/clone/url"
236
- params = {"link": link}
225
+ if not filepath:
226
+ raise ValueError("Filepath must be specified.")
227
+ url = f"{self._http_url()}/voices/clone/clip"
228
+ with open(filepath, "rb") as file:
229
+ files = {"clip": file}
237
230
  headers = self.headers.copy()
238
- headers.pop("Content-Type") # The content type header is not required for URLs
239
- response = httpx.post(url, headers=self.headers, params=params, timeout=self.timeout)
231
+ headers.pop("Content-Type", None)
232
+ response = httpx.post(url, headers=headers, files=files, timeout=self.timeout)
240
233
  if not response.is_success:
241
- raise ValueError(f"Failed to clone voice from URL. Error: {response.text}")
234
+ raise ValueError(f"Failed to clone voice from clip. Error: {response.text}")
242
235
 
243
236
  return response.json()["embedding"]
244
237
 
@@ -469,6 +462,10 @@ class _WebSocket:
469
462
  Raises:
470
463
  RuntimeError: If the connection to the WebSocket fails.
471
464
  """
465
+ if not IS_WEBSOCKET_SYNC_AVAILABLE:
466
+ raise ImportError(
467
+ "The synchronous WebSocket client is not available. Please ensure that you have 'websockets>=12.0' or compatible version installed."
468
+ )
472
469
  if self.websocket is None or self._is_websocket_closed():
473
470
  route = "tts/websocket"
474
471
  try:
@@ -0,0 +1 @@
1
+ __version__ = "1.0.8"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 1.0.7
3
+ Version: 1.0.8
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Home-page:
6
6
  Author: Cartesia, Inc.
@@ -25,6 +25,22 @@ The official Cartesia Python library which provides convenient access to the Car
25
25
  > [!IMPORTANT]
26
26
  > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
27
27
 
28
+ - [Cartesia Python API Library](#cartesia-python-api-library)
29
+ - [Documentation](#documentation)
30
+ - [Installation](#installation)
31
+ - [Voices](#voices)
32
+ - [Text-to-Speech](#text-to-speech)
33
+ - [Server-Sent Events (SSE)](#server-sent-events-sse)
34
+ - [WebSocket](#websocket)
35
+ - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
36
+ - [Generating timestamps using WebSocket](#generating-timestamps-using-websocket)
37
+ - [Multilingual Text-to-Speech \[Alpha\]](#multilingual-text-to-speech-alpha)
38
+ - [Speed and Emotion Control \[Experimental\]](#speed-and-emotion-control-experimental)
39
+ - [Jupyter Notebook Usage](#jupyter-notebook-usage)
40
+ - [Utility methods](#utility-methods)
41
+ - [Output Formats](#output-formats)
42
+
43
+
28
44
  ## Documentation
29
45
 
30
46
  Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
@@ -250,7 +266,7 @@ async def send_transcripts(ctx):
250
266
 
251
267
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
252
268
  model_id = "sonic-english"
253
-
269
+
254
270
  # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
255
271
  output_format = {
256
272
  "container": "raw",
@@ -266,7 +282,7 @@ async def send_transcripts(ctx):
266
282
  "As they near Eggman's lair, our heroes charge their abilities for an epic boss battle. ",
267
283
  "Get ready to spin, jump, and sound-blast your way to victory in this high-octane crossover!"
268
284
  ]
269
-
285
+
270
286
  for transcript in transcripts:
271
287
  # Send text inputs as they become available
272
288
  await ctx.send(
@@ -278,7 +294,7 @@ async def send_transcripts(ctx):
278
294
  )
279
295
 
280
296
  # Indicate that no more inputs will be sent. Otherwise, the context will close after 5 seconds of inactivity.
281
- await ctx.no_more_inputs()
297
+ await ctx.no_more_inputs()
282
298
 
283
299
  async def receive_and_play_audio(ctx):
284
300
  p = pyaudio.PyAudio()
@@ -384,7 +400,7 @@ output_stream = ctx.send(
384
400
  voice_id=voice_id,
385
401
  output_format=output_format,
386
402
  )
387
-
403
+
388
404
  for output in output_stream:
389
405
  buffer = output["audio"]
390
406
 
@@ -79,14 +79,6 @@ def test_get_voice_from_id(client: Cartesia):
79
79
  voices = client.voices.list()
80
80
  assert voice in voices
81
81
 
82
- # Does not work currently, LB issue
83
- # def test_clone_voice_with_link(client: Cartesia):
84
- # url = "https://youtu.be/g2Z7Ddd573M?si=P8BM_hBqt5P8Ft6I&t=69"
85
- # logger.info("Testing voices.clone with link")
86
- # cloned_voice_embedding = client.voices.clone(link=url)
87
- # assert isinstance(cloned_voice_embedding, list)
88
- # assert len(cloned_voice_embedding) == 192
89
-
90
82
  def test_clone_voice_with_file(client: Cartesia):
91
83
  logger.info("Testing voices.clone with file")
92
84
  output = client.voices.clone(filepath=os.path.join(RESOURCES_DIR, "sample-speech-4s.wav"))
@@ -1 +0,0 @@
1
- __version__ = "1.0.7"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes