cartesia 1.0.11__py2.py3-none-any.whl → 1.0.13__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cartesia/client.py CHANGED
@@ -32,7 +32,6 @@ except ImportError:
32
32
  IS_WEBSOCKET_SYNC_AVAILABLE = False
33
33
 
34
34
  from iterators import TimeoutIterator
35
- from websockets.sync.client import connect
36
35
 
37
36
  from cartesia._types import (
38
37
  DeprecatedOutputFormatMapping,
@@ -50,6 +49,7 @@ DEFAULT_BASE_URL = "api.cartesia.ai"
50
49
  DEFAULT_CARTESIA_VERSION = "2024-06-10" # latest version
51
50
  DEFAULT_TIMEOUT = 30 # seconds
52
51
  DEFAULT_NUM_CONNECTIONS = 10 # connections per client
52
+ DEFAULT_VOICE_EMBEDDING = [1.0] * 192 # Default voice embedding is a 192 sized float array
53
53
 
54
54
  BACKOFF_FACTOR = 1
55
55
  MAX_RETRIES = 3
@@ -261,6 +261,40 @@ class Voices(Resource):
261
261
 
262
262
  return response.json()
263
263
 
264
+ def mix(self, voices: List[Dict[str, Union[str, float]]]) -> List[float]:
265
+ """Mix multiple voices together.
266
+
267
+ Args:
268
+ voices: A list of dictionaries, each containing either:
269
+ - 'id': The ID of an existing voice
270
+ - 'embedding': A voice embedding
271
+ AND
272
+ - 'weight': The weight of the voice in the mix (0.0 to 1.0)
273
+
274
+ Returns:
275
+ The embedding of the mixed voice as a list of floats.
276
+
277
+ Raises:
278
+ ValueError: If the request fails or if the input is invalid.
279
+ """
280
+ url = f"{self._http_url()}/voices/mix"
281
+
282
+ if not voices or not isinstance(voices, list):
283
+ raise ValueError("voices must be a non-empty list")
284
+
285
+ response = httpx.post(
286
+ url,
287
+ headers=self.headers,
288
+ json={"voices": voices},
289
+ timeout=self.timeout,
290
+ )
291
+
292
+ if not response.is_success:
293
+ raise ValueError(f"Failed to mix voices. Error: {response.text}")
294
+
295
+ result = response.json()
296
+ return result["embedding"]
297
+
264
298
 
265
299
  class _TTSContext:
266
300
  """Manage a single context over a WebSocket.
@@ -857,15 +891,17 @@ class TTS(Resource):
857
891
  if voice_id is None and voice_embedding is None:
858
892
  raise ValueError("Either voice_id or voice_embedding must be specified.")
859
893
 
860
- if voice_id is not None and voice_embedding is not None:
861
- raise ValueError("Only one of voice_id or voice_embedding should be specified.")
894
+ voice = {}
895
+
896
+ if voice_id is not None:
897
+ voice["id"] = voice_id
898
+
899
+ if voice_embedding is not None:
900
+ voice["embedding"] = voice_embedding
862
901
 
863
- if voice_id:
864
- voice = {"mode": "id", "id": voice_id}
865
- else:
866
- voice = {"mode": "embedding", "embedding": voice_embedding}
867
902
  if experimental_voice_controls is not None:
868
903
  voice["__experimental_controls"] = experimental_voice_controls
904
+
869
905
  return voice
870
906
 
871
907
 
@@ -1129,7 +1165,7 @@ class _AsyncTTSContext:
1129
1165
  model_id=DEFAULT_MODEL_ID,
1130
1166
  transcript="",
1131
1167
  output_format=TTS.get_output_format("raw_pcm_f32le_44100"),
1132
- voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Default voice ID since it's a required input for now
1168
+ voice_embedding=DEFAULT_VOICE_EMBEDDING, # Default voice embedding since it's a required input for now.
1133
1169
  context_id=self._context_id,
1134
1170
  continue_=False,
1135
1171
  )
cartesia/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.0.11"
1
+ __version__ = "1.0.13"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 1.0.11
3
+ Version: 1.0.13
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Home-page:
6
6
  Author: Cartesia, Inc.
@@ -91,6 +91,11 @@ print("The embedding for", voice["name"], "is", voice["embedding"])
91
91
  # Clone a voice using filepath
92
92
  cloned_voice_embedding = client.voices.clone(filepath="path/to/voice")
93
93
 
94
+ # Mix voices together
95
+ mixed_voice_embedding = client.voices.mix(
96
+ [{ "id": "voice_id_1", "weight": 0.5 }, { "id": "voice_id_2", "weight": 0.25 }, { "id": "voice_id_3", "weight": 0.25 }]
97
+ )
98
+
94
99
  # Create a new voice
95
100
  new_voice = client.voices.create(
96
101
  name="New Voice",
@@ -522,6 +527,7 @@ You can enhance the voice output by adjusting the `speed` and `emotion` paramete
522
527
 
523
528
  Speed Options:
524
529
  - `slowest`, `slow`, `normal`, `fast`, `fastest`
530
+ - Float values between -1.0 and 1.0, where -1.0 is the slowest speed and 1.0 is the fastest speed.
525
531
 
526
532
  Emotion Options:
527
533
  Use a list of tags in the format `emotion_name:level` where:
@@ -1,12 +1,12 @@
1
1
  cartesia/__init__.py,sha256=E4w7psbAwx8X6Iri3W8jGeo11gIlhr3mSU33zChipmI,93
2
2
  cartesia/_types.py,sha256=pkFJmsO-OWAJNtqxV80-YcR8KWWLhIwLFejzDjBewbw,4428
3
- cartesia/client.py,sha256=d5yhh1AvKO0yr6jPZaFoXvOW7DySNFXnn65W0lB92kA,51767
4
- cartesia/version.py,sha256=wygrEW3brUgbks4JvwNjcujOADEl2PWkdIF9d8vyM3c,23
3
+ cartesia/client.py,sha256=nO7WISbutrBo2icYllkEdYh0G553CCBMf_9OoixfYT0,52808
4
+ cartesia/version.py,sha256=wNoGY0qdTAFUvE0bIbXr21KwpofxY8Hj9hqYUIvspQA,23
5
5
  cartesia/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  cartesia/utils/deprecated.py,sha256=2cXvGtrxhPeUZA5LWy2n_U5OFLDv7SHeFtzqhjSJGyk,1674
7
7
  cartesia/utils/retry.py,sha256=O6fyVWpH9Su8c0Fwupl57xMt6JrwJ52txBwP3faUL7k,3339
8
- cartesia-1.0.11.dist-info/LICENSE.md,sha256=PT2YG5wEtEX1TNDn5sXkUXqbn-neyr7cZenTxd40ql4,1074
9
- cartesia-1.0.11.dist-info/METADATA,sha256=o6SMA344ywRFJqbMOawCgZCrb8ntqDBZdJ7flp9TcI8,21122
10
- cartesia-1.0.11.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
11
- cartesia-1.0.11.dist-info/top_level.txt,sha256=rTX4HnnCegMxl1FK9czpVC7GAvf3SwDzPG65qP-BS4w,9
12
- cartesia-1.0.11.dist-info/RECORD,,
8
+ cartesia-1.0.13.dist-info/LICENSE.md,sha256=PT2YG5wEtEX1TNDn5sXkUXqbn-neyr7cZenTxd40ql4,1074
9
+ cartesia-1.0.13.dist-info/METADATA,sha256=YlCxkOMjR4wnylJHMRA6q7JJeDgtfS5uIlf7XyNsB6k,21413
10
+ cartesia-1.0.13.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
11
+ cartesia-1.0.13.dist-info/top_level.txt,sha256=rTX4HnnCegMxl1FK9czpVC7GAvf3SwDzPG65qP-BS4w,9
12
+ cartesia-1.0.13.dist-info/RECORD,,