cartesia 1.0.7__tar.gz → 1.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 1.0.7
3
+ Version: 1.0.9
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Home-page:
6
6
  Author: Cartesia, Inc.
@@ -25,6 +25,22 @@ The official Cartesia Python library which provides convenient access to the Car
25
25
  > [!IMPORTANT]
26
26
  > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
27
27
 
28
+ - [Cartesia Python API Library](#cartesia-python-api-library)
29
+ - [Documentation](#documentation)
30
+ - [Installation](#installation)
31
+ - [Voices](#voices)
32
+ - [Text-to-Speech](#text-to-speech)
33
+ - [Server-Sent Events (SSE)](#server-sent-events-sse)
34
+ - [WebSocket](#websocket)
35
+ - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
36
+ - [Generating timestamps using WebSocket](#generating-timestamps-using-websocket)
37
+ - [Multilingual Text-to-Speech \[Alpha\]](#multilingual-text-to-speech-alpha)
38
+ - [Speed and Emotion Control \[Experimental\]](#speed-and-emotion-control-experimental)
39
+ - [Jupyter Notebook Usage](#jupyter-notebook-usage)
40
+ - [Utility methods](#utility-methods)
41
+ - [Output Formats](#output-formats)
42
+
43
+
28
44
  ## Documentation
29
45
 
30
46
  Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
@@ -250,7 +266,7 @@ async def send_transcripts(ctx):
250
266
 
251
267
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
252
268
  model_id = "sonic-english"
253
-
269
+
254
270
  # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
255
271
  output_format = {
256
272
  "container": "raw",
@@ -266,7 +282,7 @@ async def send_transcripts(ctx):
266
282
  "As they near Eggman's lair, our heroes charge their abilities for an epic boss battle. ",
267
283
  "Get ready to spin, jump, and sound-blast your way to victory in this high-octane crossover!"
268
284
  ]
269
-
285
+
270
286
  for transcript in transcripts:
271
287
  # Send text inputs as they become available
272
288
  await ctx.send(
@@ -278,7 +294,7 @@ async def send_transcripts(ctx):
278
294
  )
279
295
 
280
296
  # Indicate that no more inputs will be sent. Otherwise, the context will close after 5 seconds of inactivity.
281
- await ctx.no_more_inputs()
297
+ await ctx.no_more_inputs()
282
298
 
283
299
  async def receive_and_play_audio(ctx):
284
300
  p = pyaudio.PyAudio()
@@ -384,7 +400,7 @@ output_stream = ctx.send(
384
400
  voice_id=voice_id,
385
401
  output_format=output_format,
386
402
  )
387
-
403
+
388
404
  for output in output_stream:
389
405
  buffer = output["audio"]
390
406
 
@@ -8,6 +8,22 @@ The official Cartesia Python library which provides convenient access to the Car
8
8
  > [!IMPORTANT]
9
9
  > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
10
10
 
11
+ - [Cartesia Python API Library](#cartesia-python-api-library)
12
+ - [Documentation](#documentation)
13
+ - [Installation](#installation)
14
+ - [Voices](#voices)
15
+ - [Text-to-Speech](#text-to-speech)
16
+ - [Server-Sent Events (SSE)](#server-sent-events-sse)
17
+ - [WebSocket](#websocket)
18
+ - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
19
+ - [Generating timestamps using WebSocket](#generating-timestamps-using-websocket)
20
+ - [Multilingual Text-to-Speech \[Alpha\]](#multilingual-text-to-speech-alpha)
21
+ - [Speed and Emotion Control \[Experimental\]](#speed-and-emotion-control-experimental)
22
+ - [Jupyter Notebook Usage](#jupyter-notebook-usage)
23
+ - [Utility methods](#utility-methods)
24
+ - [Output Formats](#output-formats)
25
+
26
+
11
27
  ## Documentation
12
28
 
13
29
  Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
@@ -233,7 +249,7 @@ async def send_transcripts(ctx):
233
249
 
234
250
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
235
251
  model_id = "sonic-english"
236
-
252
+
237
253
  # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
238
254
  output_format = {
239
255
  "container": "raw",
@@ -249,7 +265,7 @@ async def send_transcripts(ctx):
249
265
  "As they near Eggman's lair, our heroes charge their abilities for an epic boss battle. ",
250
266
  "Get ready to spin, jump, and sound-blast your way to victory in this high-octane crossover!"
251
267
  ]
252
-
268
+
253
269
  for transcript in transcripts:
254
270
  # Send text inputs as they become available
255
271
  await ctx.send(
@@ -261,7 +277,7 @@ async def send_transcripts(ctx):
261
277
  )
262
278
 
263
279
  # Indicate that no more inputs will be sent. Otherwise, the context will close after 5 seconds of inactivity.
264
- await ctx.no_more_inputs()
280
+ await ctx.no_more_inputs()
265
281
 
266
282
  async def receive_and_play_audio(ctx):
267
283
  p = pyaudio.PyAudio()
@@ -367,7 +383,7 @@ output_stream = ctx.send(
367
383
  voice_id=voice_id,
368
384
  output_format=output_format,
369
385
  )
370
-
386
+
371
387
  for output in output_stream:
372
388
  buffer = output["audio"]
373
389
 
@@ -0,0 +1,3 @@
1
+ from cartesia.client import AsyncCartesia, Cartesia
2
+
3
+ __all__ = ["Cartesia", "AsyncCartesia"]
@@ -1,4 +1,5 @@
1
1
  from typing import List, TypedDict
2
+
2
3
  from cartesia.utils.deprecated import deprecated
3
4
 
4
5
 
@@ -1,41 +1,47 @@
1
1
  import asyncio
2
2
  import base64
3
- from collections import defaultdict
4
3
  import json
4
+ import logging
5
5
  import os
6
6
  import uuid
7
+ from collections import defaultdict
7
8
  from types import TracebackType
8
9
  from typing import (
9
10
  Any,
10
11
  AsyncGenerator,
11
- Iterator,
12
+ Callable,
12
13
  Dict,
13
14
  Generator,
15
+ Iterator,
14
16
  List,
15
17
  Optional,
18
+ Set,
16
19
  Tuple,
17
20
  Union,
18
- Callable,
19
- Set,
20
21
  )
21
22
 
22
23
  import aiohttp
23
24
  import httpx
24
- import logging
25
25
  import requests
26
- from websockets.sync.client import connect
27
- from iterators import TimeoutIterator
28
26
 
29
- from cartesia.utils.retry import retry_on_connection_error, retry_on_connection_error_async
27
+ try:
28
+ from websockets.sync.client import connect
29
+
30
+ IS_WEBSOCKET_SYNC_AVAILABLE = True
31
+ except ImportError:
32
+ IS_WEBSOCKET_SYNC_AVAILABLE = False
33
+
30
34
  from cartesia._types import (
35
+ DeprecatedOutputFormatMapping,
31
36
  EventType,
32
37
  OutputFormat,
33
38
  OutputFormatMapping,
34
- DeprecatedOutputFormatMapping,
35
39
  VoiceControls,
36
40
  VoiceMetadata,
37
41
  )
38
-
42
+ from cartesia.utils.retry import retry_on_connection_error, retry_on_connection_error_async
43
+ from iterators import TimeoutIterator
44
+ from websockets.sync.client import connect
39
45
 
40
46
  DEFAULT_MODEL_ID = "sonic-english" # latest default model
41
47
  MULTILINGUAL_MODEL_ID = "sonic-multilingual" # latest multilingual model
@@ -207,38 +213,27 @@ class Voices(Resource):
207
213
 
208
214
  return response.json()
209
215
 
210
- def clone(self, filepath: Optional[str] = None, link: Optional[str] = None) -> List[float]:
211
- """Clone a voice from a clip or a URL.
216
+ def clone(self, filepath: Optional[str] = None, enhance: str = True) -> List[float]:
217
+ """Clone a voice from a clip.
212
218
 
213
219
  Args:
214
220
  filepath: The path to the clip file.
215
- link: The URL to the clip
221
+ enhance: Whether to enhance the clip before cloning the voice (highly recommended). Defaults to True.
216
222
 
217
223
  Returns:
218
224
  The embedding of the cloned voice as a list of floats.
219
225
  """
220
- # TODO: Python has a bytes object, use that instead of a filepath
221
- if not filepath and not link:
222
- raise ValueError("At least one of 'filepath' or 'link' must be specified.")
223
- if filepath and link:
224
- raise ValueError("Only one of 'filepath' or 'link' should be specified.")
225
- if filepath:
226
- url = f"{self._http_url()}/voices/clone/clip"
227
- with open(filepath, "rb") as file:
228
- files = {"clip": file}
229
- headers = self.headers.copy()
230
- headers.pop("Content-Type", None)
231
- response = httpx.post(url, headers=headers, files=files, timeout=self.timeout)
232
- if not response.is_success:
233
- raise ValueError(f"Failed to clone voice from clip. Error: {response.text}")
234
- elif link:
235
- url = f"{self._http_url()}/voices/clone/url"
236
- params = {"link": link}
226
+ if not filepath:
227
+ raise ValueError("Filepath must be specified.")
228
+ url = f"{self._http_url()}/voices/clone/clip"
229
+ with open(filepath, "rb") as file:
230
+ files = {"clip": file}
231
+ files["enhance"] = str(enhance).lower()
237
232
  headers = self.headers.copy()
238
- headers.pop("Content-Type") # The content type header is not required for URLs
239
- response = httpx.post(url, headers=self.headers, params=params, timeout=self.timeout)
233
+ headers.pop("Content-Type", None)
234
+ response = httpx.post(url, headers=headers, files=files, timeout=self.timeout)
240
235
  if not response.is_success:
241
- raise ValueError(f"Failed to clone voice from URL. Error: {response.text}")
236
+ raise ValueError(f"Failed to clone voice from clip. Error: {response.text}")
242
237
 
243
238
  return response.json()["embedding"]
244
239
 
@@ -469,6 +464,10 @@ class _WebSocket:
469
464
  Raises:
470
465
  RuntimeError: If the connection to the WebSocket fails.
471
466
  """
467
+ if not IS_WEBSOCKET_SYNC_AVAILABLE:
468
+ raise ImportError(
469
+ "The synchronous WebSocket client is not available. Please ensure that you have 'websockets>=12.0' or compatible version installed."
470
+ )
472
471
  if self.websocket is None or self._is_websocket_closed():
473
472
  route = "tts/websocket"
474
473
  try:
@@ -0,0 +1 @@
1
+ __version__ = "1.0.9"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 1.0.7
3
+ Version: 1.0.9
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Home-page:
6
6
  Author: Cartesia, Inc.
@@ -25,6 +25,22 @@ The official Cartesia Python library which provides convenient access to the Car
25
25
  > [!IMPORTANT]
26
26
  > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
27
27
 
28
+ - [Cartesia Python API Library](#cartesia-python-api-library)
29
+ - [Documentation](#documentation)
30
+ - [Installation](#installation)
31
+ - [Voices](#voices)
32
+ - [Text-to-Speech](#text-to-speech)
33
+ - [Server-Sent Events (SSE)](#server-sent-events-sse)
34
+ - [WebSocket](#websocket)
35
+ - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
36
+ - [Generating timestamps using WebSocket](#generating-timestamps-using-websocket)
37
+ - [Multilingual Text-to-Speech \[Alpha\]](#multilingual-text-to-speech-alpha)
38
+ - [Speed and Emotion Control \[Experimental\]](#speed-and-emotion-control-experimental)
39
+ - [Jupyter Notebook Usage](#jupyter-notebook-usage)
40
+ - [Utility methods](#utility-methods)
41
+ - [Output Formats](#output-formats)
42
+
43
+
28
44
  ## Documentation
29
45
 
30
46
  Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
@@ -250,7 +266,7 @@ async def send_transcripts(ctx):
250
266
 
251
267
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
252
268
  model_id = "sonic-english"
253
-
269
+
254
270
  # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events
255
271
  output_format = {
256
272
  "container": "raw",
@@ -266,7 +282,7 @@ async def send_transcripts(ctx):
266
282
  "As they near Eggman's lair, our heroes charge their abilities for an epic boss battle. ",
267
283
  "Get ready to spin, jump, and sound-blast your way to victory in this high-octane crossover!"
268
284
  ]
269
-
285
+
270
286
  for transcript in transcripts:
271
287
  # Send text inputs as they become available
272
288
  await ctx.send(
@@ -278,7 +294,7 @@ async def send_transcripts(ctx):
278
294
  )
279
295
 
280
296
  # Indicate that no more inputs will be sent. Otherwise, the context will close after 5 seconds of inactivity.
281
- await ctx.no_more_inputs()
297
+ await ctx.no_more_inputs()
282
298
 
283
299
  async def receive_and_play_audio(ctx):
284
300
  p = pyaudio.PyAudio()
@@ -384,7 +400,7 @@ output_stream = ctx.send(
384
400
  voice_id=voice_id,
385
401
  output_format=output_format,
386
402
  )
387
-
403
+
388
404
  for output in output_stream:
389
405
  buffer = output["audio"]
390
406
 
@@ -1,6 +1,6 @@
1
1
  [tool.ruff]
2
2
  # Add more rule codes as needed
3
- extend-select = [
3
+ lint.extend-select = [
4
4
  "D", # pydocstyle - to replace docformatter
5
5
  ]
6
6
 
@@ -43,14 +43,14 @@ line-length = 100
43
43
  # Enable the count of violations
44
44
  output-format = "full"
45
45
 
46
- [tool.ruff.isort]
47
- force-wrap-aliases = true
48
- combine-as-imports = true
49
- force-sort-within-sections = true
50
- known-first-party = []
51
- known-third-party = []
52
- known-local-folder = []
53
- lines-after-imports = 2
54
-
55
- [tool.ruff.pydocstyle]
46
+ [tool.ruff.lint.pydocstyle]
56
47
  convention = "google"
48
+
49
+ [tool.isort]
50
+ profile = "black"
51
+ multi_line_output = 3
52
+ include_trailing_comma = true
53
+ force_grid_wrap = 0
54
+ use_parentheses = true
55
+ ensure_newline_before_comments = true
56
+ line_length = 100
@@ -1,8 +1,7 @@
1
- from packaging.version import Version
2
-
3
1
  import cartesia as Cartesia
4
- from cartesia.utils.deprecated import _DEPRECATED_FUNCTION_STATS
5
2
  import cartesia.version as version
3
+ from cartesia.utils.deprecated import _DEPRECATED_FUNCTION_STATS
4
+ from packaging.version import Version
6
5
 
7
6
 
8
7
  def test_deprecated_to_remove_by_version():
@@ -5,17 +5,18 @@ different results. Therefore, we cannot test for complete correctness but rather
5
5
  general correctness.
6
6
  """
7
7
 
8
+ import asyncio
8
9
  import logging
9
10
  import os
10
11
  import sys
11
- from cartesia import AsyncCartesia, Cartesia
12
- from cartesia.client import DEFAULT_MODEL_ID, MULTILINGUAL_MODEL_ID
13
- from cartesia._types import VoiceControls, VoiceMetadata
12
+ import uuid
14
13
  from typing import AsyncGenerator, Generator, List
14
+
15
15
  import numpy as np
16
16
  import pytest
17
- import uuid
18
- import asyncio
17
+ from cartesia import AsyncCartesia, Cartesia
18
+ from cartesia._types import VoiceControls, VoiceMetadata
19
+ from cartesia.client import DEFAULT_MODEL_ID, MULTILINGUAL_MODEL_ID
19
20
 
20
21
  THISDIR = os.path.dirname(__file__)
21
22
  sys.path.insert(0, os.path.dirname(THISDIR))
@@ -79,19 +80,17 @@ def test_get_voice_from_id(client: Cartesia):
79
80
  voices = client.voices.list()
80
81
  assert voice in voices
81
82
 
82
- # Does not work currently, LB issue
83
- # def test_clone_voice_with_link(client: Cartesia):
84
- # url = "https://youtu.be/g2Z7Ddd573M?si=P8BM_hBqt5P8Ft6I&t=69"
85
- # logger.info("Testing voices.clone with link")
86
- # cloned_voice_embedding = client.voices.clone(link=url)
87
- # assert isinstance(cloned_voice_embedding, list)
88
- # assert len(cloned_voice_embedding) == 192
89
-
90
83
  def test_clone_voice_with_file(client: Cartesia):
91
84
  logger.info("Testing voices.clone with file")
92
85
  output = client.voices.clone(filepath=os.path.join(RESOURCES_DIR, "sample-speech-4s.wav"))
93
86
  assert isinstance(output, list)
94
87
 
88
+ @pytest.mark.parametrize("enhance", [True, False])
89
+ def test_clone_voice_with_file_enhance(client: Cartesia, enhance: bool):
90
+ logger.info("Testing voices.clone with file")
91
+ output = client.voices.clone(filepath=os.path.join(RESOURCES_DIR, "sample-speech-4s.wav"), enhance=enhance)
92
+ assert isinstance(output, list)
93
+
95
94
  def test_create_voice(client: Cartesia):
96
95
  logger.info("Testing voices.create")
97
96
  embedding = np.ones(192).tolist()
@@ -1,3 +0,0 @@
1
- from cartesia.client import Cartesia, AsyncCartesia
2
-
3
- __all__ = ["Cartesia", "AsyncCartesia"]
@@ -1 +0,0 @@
1
- __version__ = "1.0.7"
File without changes
@@ -1,9 +1,9 @@
1
- import time
2
-
3
- from aiohttp.client_exceptions import ServerDisconnectedError
4
1
  import asyncio
2
+ import time
5
3
  from functools import wraps
6
4
  from http.client import RemoteDisconnected
5
+
6
+ from aiohttp.client_exceptions import ServerDisconnectedError
7
7
  from httpx import TimeoutException
8
8
  from requests.exceptions import ConnectionError
9
9
 
File without changes
File without changes