cartesia 2.0.0b7__py3-none-any.whl → 2.0.0b8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cartesia/__init__.py CHANGED
@@ -129,6 +129,9 @@ from .voices import (
129
129
  LocalizeDialect,
130
130
  LocalizeDialectParams,
131
131
  LocalizeEnglishDialect,
132
+ LocalizeFrenchDialect,
133
+ LocalizePortugueseDialect,
134
+ LocalizeSpanishDialect,
132
135
  LocalizeTargetLanguage,
133
136
  LocalizeVoiceRequest,
134
137
  LocalizeVoiceRequestParams,
@@ -187,6 +190,9 @@ __all__ = [
187
190
  "LocalizeDialect",
188
191
  "LocalizeDialectParams",
189
192
  "LocalizeEnglishDialect",
193
+ "LocalizeFrenchDialect",
194
+ "LocalizePortugueseDialect",
195
+ "LocalizeSpanishDialect",
190
196
  "LocalizeTargetLanguage",
191
197
  "LocalizeVoiceRequest",
192
198
  "LocalizeVoiceRequestParams",
@@ -16,7 +16,7 @@ class BaseClientWrapper:
16
16
  headers: typing.Dict[str, str] = {
17
17
  "X-Fern-Language": "Python",
18
18
  "X-Fern-SDK-Name": "cartesia",
19
- "X-Fern-SDK-Version": "2.0.0b7",
19
+ "X-Fern-SDK-Version": "2.0.0b8",
20
20
  }
21
21
  headers["X-API-Key"] = self.api_key
22
22
  headers["Cartesia-Version"] = "2024-11-13"
@@ -69,6 +69,7 @@ class _AsyncTTSContext:
69
69
  stream: bool = True,
70
70
  add_timestamps: bool = False,
71
71
  add_phoneme_timestamps: bool = False,
72
+ use_original_timestamps: bool = False,
72
73
  continue_: bool = False,
73
74
  flush: bool = False,
74
75
  ) -> None:
@@ -106,6 +107,8 @@ class _AsyncTTSContext:
106
107
  request_body["add_timestamps"] = add_timestamps
107
108
  if add_phoneme_timestamps:
108
109
  request_body["add_phoneme_timestamps"] = add_phoneme_timestamps
110
+ if use_original_timestamps:
111
+ request_body["use_original_timestamps"] = use_original_timestamps
109
112
  if continue_:
110
113
  request_body["continue"] = continue_
111
114
  if flush:
@@ -367,6 +370,7 @@ class AsyncTtsWebsocket(TtsWebsocket):
367
370
  stream: bool = True,
368
371
  add_timestamps: bool = False,
369
372
  add_phoneme_timestamps: bool = False,
373
+ use_original_timestamps: bool = False,
370
374
  ):
371
375
  """See :meth:`_WebSocket.send` for details."""
372
376
  if context_id is None:
@@ -385,6 +389,7 @@ class AsyncTtsWebsocket(TtsWebsocket):
385
389
  continue_=False,
386
390
  add_timestamps=add_timestamps,
387
391
  add_phoneme_timestamps=add_phoneme_timestamps,
392
+ use_original_timestamps=use_original_timestamps,
388
393
  )
389
394
 
390
395
  generator = ctx.receive()
@@ -67,6 +67,8 @@ class _TTSContext:
67
67
  language: Optional[str] = None,
68
68
  stream: bool = True,
69
69
  add_timestamps: bool = False,
70
+ add_phoneme_timestamps: bool = False,
71
+ use_original_timestamps: bool = False,
70
72
  ) -> Generator[bytes, None, None]:
71
73
  """Send audio generation requests to the WebSocket and yield responses.
72
74
 
@@ -102,6 +104,10 @@ class _TTSContext:
102
104
  request_body["stream"] = stream
103
105
  if add_timestamps:
104
106
  request_body["add_timestamps"] = add_timestamps
107
+ if add_phoneme_timestamps:
108
+ request_body["add_phoneme_timestamps"] = add_phoneme_timestamps
109
+ if use_original_timestamps:
110
+ request_body["use_original_timestamps"] = use_original_timestamps
105
111
 
106
112
  if (
107
113
  "context_id" in request_body
@@ -354,6 +360,7 @@ class TtsWebsocket:
354
360
  stream: bool = True,
355
361
  add_timestamps: bool = False,
356
362
  add_phoneme_timestamps: bool = False,
363
+ use_original_timestamps: bool = False,
357
364
  ):
358
365
  """Send a request to the WebSocket to generate audio.
359
366
 
@@ -384,6 +391,7 @@ class TtsWebsocket:
384
391
  "stream": stream,
385
392
  "add_timestamps": add_timestamps,
386
393
  "add_phoneme_timestamps": add_phoneme_timestamps,
394
+ "use_original_timestamps": use_original_timestamps,
387
395
  }
388
396
  generator = self._websocket_generator(request_body)
389
397
 
@@ -12,6 +12,9 @@ from .types import (
12
12
  IdSpecifier,
13
13
  LocalizeDialect,
14
14
  LocalizeEnglishDialect,
15
+ LocalizeFrenchDialect,
16
+ LocalizePortugueseDialect,
17
+ LocalizeSpanishDialect,
15
18
  LocalizeTargetLanguage,
16
19
  LocalizeVoiceRequest,
17
20
  MixVoiceSpecifier,
@@ -56,6 +59,9 @@ __all__ = [
56
59
  "LocalizeDialect",
57
60
  "LocalizeDialectParams",
58
61
  "LocalizeEnglishDialect",
62
+ "LocalizeFrenchDialect",
63
+ "LocalizePortugueseDialect",
64
+ "LocalizeSpanishDialect",
59
65
  "LocalizeTargetLanguage",
60
66
  "LocalizeVoiceRequest",
61
67
  "LocalizeVoiceRequestParams",
cartesia/voices/client.py CHANGED
@@ -11,19 +11,20 @@ from .types.get_voices_response import GetVoicesResponse
11
11
  from ..core.pydantic_utilities import parse_obj_as
12
12
  from json.decoder import JSONDecodeError
13
13
  from ..core.api_error import ApiError
14
- from ..embedding.types.embedding import Embedding
14
+ from .. import core
15
15
  from ..tts.types.supported_language import SupportedLanguage
16
+ from .types.clone_mode import CloneMode
17
+ from .types.voice_metadata import VoiceMetadata
16
18
  from .types.voice_id import VoiceId
17
19
  from ..core.jsonable_encoder import jsonable_encoder
18
20
  from .types.localize_target_language import LocalizeTargetLanguage
19
21
  from .types.gender import Gender
20
22
  from .requests.localize_dialect import LocalizeDialectParams
21
- from .types.embedding_response import EmbeddingResponse
22
23
  from ..core.serialization import convert_and_respect_annotation_metadata
23
24
  from .requests.mix_voice_specifier import MixVoiceSpecifierParams
24
- from .. import core
25
- from .types.clone_mode import CloneMode
26
- from .types.voice_metadata import VoiceMetadata
25
+ from .types.embedding_response import EmbeddingResponse
26
+ from ..embedding.types.embedding import Embedding
27
+ from .types.base_voice_id import BaseVoiceId
27
28
  from ..core.client_wrapper import AsyncClientWrapper
28
29
  from ..core.pagination import AsyncPager
29
30
 
@@ -140,34 +141,60 @@ class VoicesClient:
140
141
  raise ApiError(status_code=_response.status_code, body=_response.text)
141
142
  raise ApiError(status_code=_response.status_code, body=_response_json)
142
143
 
143
- def create(
144
+ def clone(
144
145
  self,
145
146
  *,
147
+ clip: core.File,
146
148
  name: str,
147
- description: str,
148
- embedding: Embedding,
149
- language: typing.Optional[SupportedLanguage] = OMIT,
149
+ language: SupportedLanguage,
150
+ mode: CloneMode,
151
+ enhance: bool,
152
+ description: typing.Optional[str] = OMIT,
153
+ transcript: typing.Optional[str] = OMIT,
150
154
  request_options: typing.Optional[RequestOptions] = None,
151
- ) -> Voice:
155
+ ) -> VoiceMetadata:
152
156
  """
157
+ Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
158
+
159
+ Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
160
+
161
+ Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
162
+
153
163
  Parameters
154
164
  ----------
165
+ clip : core.File
166
+ See core.File for more documentation
167
+
155
168
  name : str
156
169
  The name of the voice.
157
170
 
158
- description : str
159
- The description of the voice.
160
171
 
161
- embedding : Embedding
172
+ language : SupportedLanguage
173
+ The language of the voice.
174
+
175
+
176
+ mode : CloneMode
177
+ Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
178
+
179
+
180
+ enhance : bool
181
+ Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
182
+
183
+
184
+ description : typing.Optional[str]
185
+ A description for the voice.
186
+
187
+
188
+ transcript : typing.Optional[str]
189
+ Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
162
190
 
163
- language : typing.Optional[SupportedLanguage]
164
191
 
165
192
  request_options : typing.Optional[RequestOptions]
166
193
  Request-specific configuration.
167
194
 
168
195
  Returns
169
196
  -------
170
- Voice
197
+ VoiceMetadata
171
198
 
172
199
  Examples
173
200
  --------
@@ -176,20 +203,27 @@ class VoicesClient:
176
203
  client = Cartesia(
177
204
  api_key="YOUR_API_KEY",
178
205
  )
179
- client.voices.create(
180
- name="name",
181
- description="description",
182
- embedding=[1.1, 1.1],
206
+ client.voices.clone(
207
+ name="A high-stability cloned voice",
208
+ description="Copied from Cartesia docs",
209
+ mode="stability",
210
+ language="en",
211
+ enhance=True,
183
212
  )
184
213
  """
185
214
  _response = self._client_wrapper.httpx_client.request(
186
- "voices/",
215
+ "voices/clone",
187
216
  method="POST",
188
- json={
217
+ data={
189
218
  "name": name,
190
219
  "description": description,
191
- "embedding": embedding,
192
220
  "language": language,
221
+ "mode": mode,
222
+ "enhance": enhance,
223
+ "transcript": transcript,
224
+ },
225
+ files={
226
+ "clip": clip,
193
227
  },
194
228
  request_options=request_options,
195
229
  omit=OMIT,
@@ -197,9 +231,9 @@ class VoicesClient:
197
231
  try:
198
232
  if 200 <= _response.status_code < 300:
199
233
  return typing.cast(
200
- Voice,
234
+ VoiceMetadata,
201
235
  parse_obj_as(
202
- type_=Voice, # type: ignore
236
+ type_=VoiceMetadata, # type: ignore
203
237
  object_=_response.json(),
204
238
  ),
205
239
  )
@@ -349,16 +383,27 @@ class VoicesClient:
349
383
  def localize(
350
384
  self,
351
385
  *,
352
- embedding: Embedding,
386
+ voice_id: str,
387
+ name: str,
388
+ description: str,
353
389
  language: LocalizeTargetLanguage,
354
390
  original_speaker_gender: Gender,
355
391
  dialect: typing.Optional[LocalizeDialectParams] = OMIT,
356
392
  request_options: typing.Optional[RequestOptions] = None,
357
- ) -> EmbeddingResponse:
393
+ ) -> VoiceMetadata:
358
394
  """
395
+ Create a new voice from an existing voice localized to a new language and dialect.
396
+
359
397
  Parameters
360
398
  ----------
361
- embedding : Embedding
399
+ voice_id : str
400
+ The ID of the voice to localize.
401
+
402
+ name : str
403
+ The name of the new localized voice.
404
+
405
+ description : str
406
+ The description of the new localized voice.
362
407
 
363
408
  language : LocalizeTargetLanguage
364
409
 
@@ -371,7 +416,7 @@ class VoicesClient:
371
416
 
372
417
  Returns
373
418
  -------
374
- EmbeddingResponse
419
+ VoiceMetadata
375
420
 
376
421
  Examples
377
422
  --------
@@ -381,16 +426,21 @@ class VoicesClient:
381
426
  api_key="YOUR_API_KEY",
382
427
  )
383
428
  client.voices.localize(
384
- embedding=[1.1, 1.1],
385
- language="en",
386
- original_speaker_gender="male",
429
+ voice_id="694f9389-aac1-45b6-b726-9d9369183238",
430
+ name="Sarah Peninsular Spanish",
431
+ description="Sarah Voice in Peninsular Spanish",
432
+ language="es",
433
+ original_speaker_gender="female",
434
+ dialect="pe",
387
435
  )
388
436
  """
389
437
  _response = self._client_wrapper.httpx_client.request(
390
438
  "voices/localize",
391
439
  method="POST",
392
440
  json={
393
- "embedding": embedding,
441
+ "voice_id": voice_id,
442
+ "name": name,
443
+ "description": description,
394
444
  "language": language,
395
445
  "original_speaker_gender": original_speaker_gender,
396
446
  "dialect": convert_and_respect_annotation_metadata(
@@ -403,9 +453,9 @@ class VoicesClient:
403
453
  try:
404
454
  if 200 <= _response.status_code < 300:
405
455
  return typing.cast(
406
- EmbeddingResponse,
456
+ VoiceMetadata,
407
457
  parse_obj_as(
408
- type_=EmbeddingResponse, # type: ignore
458
+ type_=VoiceMetadata, # type: ignore
409
459
  object_=_response.json(),
410
460
  ),
411
461
  )
@@ -468,58 +518,39 @@ class VoicesClient:
468
518
  raise ApiError(status_code=_response.status_code, body=_response.text)
469
519
  raise ApiError(status_code=_response.status_code, body=_response_json)
470
520
 
471
- def clone(
521
+ def create(
472
522
  self,
473
523
  *,
474
- clip: core.File,
475
524
  name: str,
476
- language: SupportedLanguage,
477
- mode: CloneMode,
478
- enhance: bool,
479
- description: typing.Optional[str] = OMIT,
480
- transcript: typing.Optional[str] = OMIT,
525
+ description: str,
526
+ embedding: Embedding,
527
+ language: typing.Optional[SupportedLanguage] = OMIT,
528
+ base_voice_id: typing.Optional[BaseVoiceId] = OMIT,
481
529
  request_options: typing.Optional[RequestOptions] = None,
482
- ) -> VoiceMetadata:
530
+ ) -> Voice:
483
531
  """
484
- Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
485
- Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
486
- Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
532
+ Create voice from raw features. If you'd like to clone a voice from an audio file, please use Clone Voice instead.
487
533
 
488
534
  Parameters
489
535
  ----------
490
- clip : core.File
491
- See core.File for more documentation
492
-
493
536
  name : str
494
537
  The name of the voice.
495
538
 
539
+ description : str
540
+ The description of the voice.
496
541
 
497
- language : SupportedLanguage
498
- The language of the voice.
499
-
500
-
501
- mode : CloneMode
502
- Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
503
-
504
-
505
- enhance : bool
506
- Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
507
-
508
-
509
- description : typing.Optional[str]
510
- A description for the voice.
511
-
542
+ embedding : Embedding
512
543
 
513
- transcript : typing.Optional[str]
514
- Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
544
+ language : typing.Optional[SupportedLanguage]
515
545
 
546
+ base_voice_id : typing.Optional[BaseVoiceId]
516
547
 
517
548
  request_options : typing.Optional[RequestOptions]
518
549
  Request-specific configuration.
519
550
 
520
551
  Returns
521
552
  -------
522
- VoiceMetadata
553
+ Voice
523
554
 
524
555
  Examples
525
556
  --------
@@ -528,27 +559,21 @@ class VoicesClient:
528
559
  client = Cartesia(
529
560
  api_key="YOUR_API_KEY",
530
561
  )
531
- client.voices.clone(
532
- name="A high-stability cloned voice",
533
- description="Copied from Cartesia docs",
534
- mode="stability",
535
- language="en",
536
- enhance=True,
562
+ client.voices.create(
563
+ name="name",
564
+ description="description",
565
+ embedding=[1.1, 1.1],
537
566
  )
538
567
  """
539
568
  _response = self._client_wrapper.httpx_client.request(
540
- "voices/clone",
569
+ "voices/",
541
570
  method="POST",
542
- data={
571
+ json={
543
572
  "name": name,
544
573
  "description": description,
574
+ "embedding": embedding,
545
575
  "language": language,
546
- "mode": mode,
547
- "enhance": enhance,
548
- "transcript": transcript,
549
- },
550
- files={
551
- "clip": clip,
576
+ "base_voice_id": base_voice_id,
552
577
  },
553
578
  request_options=request_options,
554
579
  omit=OMIT,
@@ -556,9 +581,9 @@ class VoicesClient:
556
581
  try:
557
582
  if 200 <= _response.status_code < 300:
558
583
  return typing.cast(
559
- VoiceMetadata,
584
+ Voice,
560
585
  parse_obj_as(
561
- type_=VoiceMetadata, # type: ignore
586
+ type_=Voice, # type: ignore
562
587
  object_=_response.json(),
563
588
  ),
564
589
  )
@@ -685,34 +710,60 @@ class AsyncVoicesClient:
685
710
  raise ApiError(status_code=_response.status_code, body=_response.text)
686
711
  raise ApiError(status_code=_response.status_code, body=_response_json)
687
712
 
688
- async def create(
713
+ async def clone(
689
714
  self,
690
715
  *,
716
+ clip: core.File,
691
717
  name: str,
692
- description: str,
693
- embedding: Embedding,
694
- language: typing.Optional[SupportedLanguage] = OMIT,
718
+ language: SupportedLanguage,
719
+ mode: CloneMode,
720
+ enhance: bool,
721
+ description: typing.Optional[str] = OMIT,
722
+ transcript: typing.Optional[str] = OMIT,
695
723
  request_options: typing.Optional[RequestOptions] = None,
696
- ) -> Voice:
724
+ ) -> VoiceMetadata:
697
725
  """
726
+ Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
727
+
728
+ Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
729
+
730
+ Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
731
+
698
732
  Parameters
699
733
  ----------
734
+ clip : core.File
735
+ See core.File for more documentation
736
+
700
737
  name : str
701
738
  The name of the voice.
702
739
 
703
- description : str
704
- The description of the voice.
705
740
 
706
- embedding : Embedding
741
+ language : SupportedLanguage
742
+ The language of the voice.
743
+
744
+
745
+ mode : CloneMode
746
+ Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
747
+
748
+
749
+ enhance : bool
750
+ Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
751
+
752
+
753
+ description : typing.Optional[str]
754
+ A description for the voice.
755
+
756
+
757
+ transcript : typing.Optional[str]
758
+ Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
707
759
 
708
- language : typing.Optional[SupportedLanguage]
709
760
 
710
761
  request_options : typing.Optional[RequestOptions]
711
762
  Request-specific configuration.
712
763
 
713
764
  Returns
714
765
  -------
715
- Voice
766
+ VoiceMetadata
716
767
 
717
768
  Examples
718
769
  --------
@@ -726,23 +777,30 @@ class AsyncVoicesClient:
726
777
 
727
778
 
728
779
  async def main() -> None:
729
- await client.voices.create(
730
- name="name",
731
- description="description",
732
- embedding=[1.1, 1.1],
780
+ await client.voices.clone(
781
+ name="A high-stability cloned voice",
782
+ description="Copied from Cartesia docs",
783
+ mode="stability",
784
+ language="en",
785
+ enhance=True,
733
786
  )
734
787
 
735
788
 
736
789
  asyncio.run(main())
737
790
  """
738
791
  _response = await self._client_wrapper.httpx_client.request(
739
- "voices/",
792
+ "voices/clone",
740
793
  method="POST",
741
- json={
794
+ data={
742
795
  "name": name,
743
796
  "description": description,
744
- "embedding": embedding,
745
797
  "language": language,
798
+ "mode": mode,
799
+ "enhance": enhance,
800
+ "transcript": transcript,
801
+ },
802
+ files={
803
+ "clip": clip,
746
804
  },
747
805
  request_options=request_options,
748
806
  omit=OMIT,
@@ -750,9 +808,9 @@ class AsyncVoicesClient:
750
808
  try:
751
809
  if 200 <= _response.status_code < 300:
752
810
  return typing.cast(
753
- Voice,
811
+ VoiceMetadata,
754
812
  parse_obj_as(
755
- type_=Voice, # type: ignore
813
+ type_=VoiceMetadata, # type: ignore
756
814
  object_=_response.json(),
757
815
  ),
758
816
  )
@@ -926,16 +984,27 @@ class AsyncVoicesClient:
926
984
  async def localize(
927
985
  self,
928
986
  *,
929
- embedding: Embedding,
987
+ voice_id: str,
988
+ name: str,
989
+ description: str,
930
990
  language: LocalizeTargetLanguage,
931
991
  original_speaker_gender: Gender,
932
992
  dialect: typing.Optional[LocalizeDialectParams] = OMIT,
933
993
  request_options: typing.Optional[RequestOptions] = None,
934
- ) -> EmbeddingResponse:
994
+ ) -> VoiceMetadata:
935
995
  """
996
+ Create a new voice from an existing voice localized to a new language and dialect.
997
+
936
998
  Parameters
937
999
  ----------
938
- embedding : Embedding
1000
+ voice_id : str
1001
+ The ID of the voice to localize.
1002
+
1003
+ name : str
1004
+ The name of the new localized voice.
1005
+
1006
+ description : str
1007
+ The description of the new localized voice.
939
1008
 
940
1009
  language : LocalizeTargetLanguage
941
1010
 
@@ -948,7 +1017,7 @@ class AsyncVoicesClient:
948
1017
 
949
1018
  Returns
950
1019
  -------
951
- EmbeddingResponse
1020
+ VoiceMetadata
952
1021
 
953
1022
  Examples
954
1023
  --------
@@ -963,9 +1032,12 @@ class AsyncVoicesClient:
963
1032
 
964
1033
  async def main() -> None:
965
1034
  await client.voices.localize(
966
- embedding=[1.1, 1.1],
967
- language="en",
968
- original_speaker_gender="male",
1035
+ voice_id="694f9389-aac1-45b6-b726-9d9369183238",
1036
+ name="Sarah Peninsular Spanish",
1037
+ description="Sarah Voice in Peninsular Spanish",
1038
+ language="es",
1039
+ original_speaker_gender="female",
1040
+ dialect="pe",
969
1041
  )
970
1042
 
971
1043
 
@@ -975,7 +1047,9 @@ class AsyncVoicesClient:
975
1047
  "voices/localize",
976
1048
  method="POST",
977
1049
  json={
978
- "embedding": embedding,
1050
+ "voice_id": voice_id,
1051
+ "name": name,
1052
+ "description": description,
979
1053
  "language": language,
980
1054
  "original_speaker_gender": original_speaker_gender,
981
1055
  "dialect": convert_and_respect_annotation_metadata(
@@ -988,9 +1062,9 @@ class AsyncVoicesClient:
988
1062
  try:
989
1063
  if 200 <= _response.status_code < 300:
990
1064
  return typing.cast(
991
- EmbeddingResponse,
1065
+ VoiceMetadata,
992
1066
  parse_obj_as(
993
- type_=EmbeddingResponse, # type: ignore
1067
+ type_=VoiceMetadata, # type: ignore
994
1068
  object_=_response.json(),
995
1069
  ),
996
1070
  )
@@ -1061,58 +1135,39 @@ class AsyncVoicesClient:
1061
1135
  raise ApiError(status_code=_response.status_code, body=_response.text)
1062
1136
  raise ApiError(status_code=_response.status_code, body=_response_json)
1063
1137
 
1064
- async def clone(
1138
+ async def create(
1065
1139
  self,
1066
1140
  *,
1067
- clip: core.File,
1068
1141
  name: str,
1069
- language: SupportedLanguage,
1070
- mode: CloneMode,
1071
- enhance: bool,
1072
- description: typing.Optional[str] = OMIT,
1073
- transcript: typing.Optional[str] = OMIT,
1142
+ description: str,
1143
+ embedding: Embedding,
1144
+ language: typing.Optional[SupportedLanguage] = OMIT,
1145
+ base_voice_id: typing.Optional[BaseVoiceId] = OMIT,
1074
1146
  request_options: typing.Optional[RequestOptions] = None,
1075
- ) -> VoiceMetadata:
1147
+ ) -> Voice:
1076
1148
  """
1077
- Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
1078
- Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
1079
- Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
1149
+ Create voice from raw features. If you'd like to clone a voice from an audio file, please use Clone Voice instead.
1080
1150
 
1081
1151
  Parameters
1082
1152
  ----------
1083
- clip : core.File
1084
- See core.File for more documentation
1085
-
1086
1153
  name : str
1087
1154
  The name of the voice.
1088
1155
 
1156
+ description : str
1157
+ The description of the voice.
1089
1158
 
1090
- language : SupportedLanguage
1091
- The language of the voice.
1092
-
1093
-
1094
- mode : CloneMode
1095
- Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
1096
-
1097
-
1098
- enhance : bool
1099
- Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
1100
-
1101
-
1102
- description : typing.Optional[str]
1103
- A description for the voice.
1104
-
1159
+ embedding : Embedding
1105
1160
 
1106
- transcript : typing.Optional[str]
1107
- Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
1161
+ language : typing.Optional[SupportedLanguage]
1108
1162
 
1163
+ base_voice_id : typing.Optional[BaseVoiceId]
1109
1164
 
1110
1165
  request_options : typing.Optional[RequestOptions]
1111
1166
  Request-specific configuration.
1112
1167
 
1113
1168
  Returns
1114
1169
  -------
1115
- VoiceMetadata
1170
+ Voice
1116
1171
 
1117
1172
  Examples
1118
1173
  --------
@@ -1126,30 +1181,24 @@ class AsyncVoicesClient:
1126
1181
 
1127
1182
 
1128
1183
  async def main() -> None:
1129
- await client.voices.clone(
1130
- name="A high-stability cloned voice",
1131
- description="Copied from Cartesia docs",
1132
- mode="stability",
1133
- language="en",
1134
- enhance=True,
1184
+ await client.voices.create(
1185
+ name="name",
1186
+ description="description",
1187
+ embedding=[1.1, 1.1],
1135
1188
  )
1136
1189
 
1137
1190
 
1138
1191
  asyncio.run(main())
1139
1192
  """
1140
1193
  _response = await self._client_wrapper.httpx_client.request(
1141
- "voices/clone",
1194
+ "voices/",
1142
1195
  method="POST",
1143
- data={
1196
+ json={
1144
1197
  "name": name,
1145
1198
  "description": description,
1199
+ "embedding": embedding,
1146
1200
  "language": language,
1147
- "mode": mode,
1148
- "enhance": enhance,
1149
- "transcript": transcript,
1150
- },
1151
- files={
1152
- "clip": clip,
1201
+ "base_voice_id": base_voice_id,
1153
1202
  },
1154
1203
  request_options=request_options,
1155
1204
  omit=OMIT,
@@ -1157,9 +1206,9 @@ class AsyncVoicesClient:
1157
1206
  try:
1158
1207
  if 200 <= _response.status_code < 300:
1159
1208
  return typing.cast(
1160
- VoiceMetadata,
1209
+ Voice,
1161
1210
  parse_obj_as(
1162
- type_=VoiceMetadata, # type: ignore
1211
+ type_=Voice, # type: ignore
1163
1212
  object_=_response.json(),
1164
1213
  ),
1165
1214
  )
@@ -4,6 +4,7 @@ import typing_extensions
4
4
  from ...embedding.types.embedding import Embedding
5
5
  import typing_extensions
6
6
  from ...tts.types.supported_language import SupportedLanguage
7
+ from ..types.base_voice_id import BaseVoiceId
7
8
 
8
9
 
9
10
  class CreateVoiceRequestParams(typing_extensions.TypedDict):
@@ -19,3 +20,4 @@ class CreateVoiceRequestParams(typing_extensions.TypedDict):
19
20
 
20
21
  embedding: Embedding
21
22
  language: typing_extensions.NotRequired[SupportedLanguage]
23
+ base_voice_id: typing_extensions.NotRequired[BaseVoiceId]
@@ -2,5 +2,10 @@
2
2
 
3
3
  import typing
4
4
  from ..types.localize_english_dialect import LocalizeEnglishDialect
5
+ from ..types.localize_spanish_dialect import LocalizeSpanishDialect
6
+ from ..types.localize_portuguese_dialect import LocalizePortugueseDialect
7
+ from ..types.localize_french_dialect import LocalizeFrenchDialect
5
8
 
6
- LocalizeDialectParams = typing.Union[LocalizeEnglishDialect]
9
+ LocalizeDialectParams = typing.Union[
10
+ LocalizeEnglishDialect, LocalizeSpanishDialect, LocalizePortugueseDialect, LocalizeFrenchDialect
11
+ ]
@@ -1,7 +1,6 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
3
  import typing_extensions
4
- from ...embedding.types.embedding import Embedding
5
4
  from ..types.localize_target_language import LocalizeTargetLanguage
6
5
  from ..types.gender import Gender
7
6
  import typing_extensions
@@ -9,7 +8,21 @@ from .localize_dialect import LocalizeDialectParams
9
8
 
10
9
 
11
10
  class LocalizeVoiceRequestParams(typing_extensions.TypedDict):
12
- embedding: Embedding
11
+ voice_id: str
12
+ """
13
+ The ID of the voice to localize.
14
+ """
15
+
16
+ name: str
17
+ """
18
+ The name of the new localized voice.
19
+ """
20
+
21
+ description: str
22
+ """
23
+ The description of the new localized voice.
24
+ """
25
+
13
26
  language: LocalizeTargetLanguage
14
27
  original_speaker_gender: Gender
15
28
  dialect: typing_extensions.NotRequired[LocalizeDialectParams]
@@ -11,6 +11,9 @@ from .get_voices_response import GetVoicesResponse
11
11
  from .id_specifier import IdSpecifier
12
12
  from .localize_dialect import LocalizeDialect
13
13
  from .localize_english_dialect import LocalizeEnglishDialect
14
+ from .localize_french_dialect import LocalizeFrenchDialect
15
+ from .localize_portuguese_dialect import LocalizePortugueseDialect
16
+ from .localize_spanish_dialect import LocalizeSpanishDialect
14
17
  from .localize_target_language import LocalizeTargetLanguage
15
18
  from .localize_voice_request import LocalizeVoiceRequest
16
19
  from .mix_voice_specifier import MixVoiceSpecifier
@@ -34,6 +37,9 @@ __all__ = [
34
37
  "IdSpecifier",
35
38
  "LocalizeDialect",
36
39
  "LocalizeEnglishDialect",
40
+ "LocalizeFrenchDialect",
41
+ "LocalizePortugueseDialect",
42
+ "LocalizeSpanishDialect",
37
43
  "LocalizeTargetLanguage",
38
44
  "LocalizeVoiceRequest",
39
45
  "MixVoiceSpecifier",
@@ -5,6 +5,7 @@ import pydantic
5
5
  from ...embedding.types.embedding import Embedding
6
6
  import typing
7
7
  from ...tts.types.supported_language import SupportedLanguage
8
+ from .base_voice_id import BaseVoiceId
8
9
  from ...core.pydantic_utilities import IS_PYDANTIC_V2
9
10
 
10
11
 
@@ -21,6 +22,7 @@ class CreateVoiceRequest(UniversalBaseModel):
21
22
 
22
23
  embedding: Embedding
23
24
  language: typing.Optional[SupportedLanguage] = None
25
+ base_voice_id: typing.Optional[BaseVoiceId] = None
24
26
 
25
27
  if IS_PYDANTIC_V2:
26
28
  model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
@@ -2,5 +2,10 @@
2
2
 
3
3
  import typing
4
4
  from .localize_english_dialect import LocalizeEnglishDialect
5
+ from .localize_spanish_dialect import LocalizeSpanishDialect
6
+ from .localize_portuguese_dialect import LocalizePortugueseDialect
7
+ from .localize_french_dialect import LocalizeFrenchDialect
5
8
 
6
- LocalizeDialect = typing.Union[LocalizeEnglishDialect]
9
+ LocalizeDialect = typing.Union[
10
+ LocalizeEnglishDialect, LocalizeSpanishDialect, LocalizePortugueseDialect, LocalizeFrenchDialect
11
+ ]
@@ -0,0 +1,5 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ LocalizeFrenchDialect = typing.Union[typing.Literal["eu", "ca"], typing.Any]
@@ -0,0 +1,5 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ LocalizePortugueseDialect = typing.Union[typing.Literal["br", "eu"], typing.Any]
@@ -0,0 +1,5 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ LocalizeSpanishDialect = typing.Union[typing.Literal["mx", "pe"], typing.Any]
@@ -1,17 +1,30 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
3
  from ...core.pydantic_utilities import UniversalBaseModel
4
- from ...embedding.types.embedding import Embedding
4
+ import pydantic
5
5
  from .localize_target_language import LocalizeTargetLanguage
6
6
  from .gender import Gender
7
7
  import typing
8
8
  from .localize_dialect import LocalizeDialect
9
9
  from ...core.pydantic_utilities import IS_PYDANTIC_V2
10
- import pydantic
11
10
 
12
11
 
13
12
  class LocalizeVoiceRequest(UniversalBaseModel):
14
- embedding: Embedding
13
+ voice_id: str = pydantic.Field()
14
+ """
15
+ The ID of the voice to localize.
16
+ """
17
+
18
+ name: str = pydantic.Field()
19
+ """
20
+ The name of the new localized voice.
21
+ """
22
+
23
+ description: str = pydantic.Field()
24
+ """
25
+ The description of the new localized voice.
26
+ """
27
+
15
28
  language: LocalizeTargetLanguage
16
29
  original_speaker_gender: Gender
17
30
  dialect: typing.Optional[LocalizeDialect] = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 2.0.0b7
3
+ Version: 2.0.0b8
4
4
  Summary:
5
5
  Requires-Python: >=3.8,<4.0
6
6
  Classifier: Intended Audience :: Developers
@@ -47,53 +47,6 @@ Our complete API documentation can be found [on docs.cartesia.ai](https://docs.c
47
47
  pip install cartesia
48
48
  ```
49
49
 
50
- ## Reference
51
-
52
- A full reference for this library is available [here](./reference.md).
53
-
54
- ## Voices
55
-
56
- ```python
57
- from cartesia import Cartesia
58
- import os
59
-
60
- client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
61
-
62
- # Get all available voices
63
- voices = client.voices.list()
64
- print(voices)
65
-
66
- # Get a specific voice
67
- voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
68
- print("The embedding for", voice.name, "is", voice.embedding)
69
-
70
- # Clone a voice using file data
71
- cloned_voice = client.voices.clone(
72
- clip=open("path/to/voice.wav", "rb"),
73
- name="Test cloned voice",
74
- language="en",
75
- mode="similarity", # or "stability"
76
- enhance=False, # use enhance=True to clean and denoise the cloning audio
77
- description="Test voice description"
78
- )
79
-
80
- # Mix voices together
81
- mixed_voice = client.voices.mix(
82
- voices=[
83
- {"id": "voice_id_1", "weight": 0.25},
84
- {"id": "voice_id_2", "weight": 0.75}
85
- ]
86
- )
87
-
88
- # Create a new voice from embedding
89
- new_voice = client.voices.create(
90
- name="Test Voice",
91
- description="Test voice description",
92
- embedding=[...], # List[float] with 192 dimensions
93
- language="en"
94
- )
95
- ```
96
-
97
50
  ## Usage
98
51
 
99
52
  Instantiate and use the client with the following:
@@ -112,10 +65,6 @@ client.tts.bytes(
112
65
  voice={
113
66
  "mode": "id",
114
67
  "id": "694f9389-aac1-45b6-b726-9d9369183238",
115
- "experimental_controls": {
116
- "speed": 0.5, # range between [-1.0, 1.0], or "slow", "fastest", etc.
117
- "emotion": ["positivity", "curiosity:low"] # list of emotions with optional intensity
118
- }
119
68
  },
120
69
  language="en",
121
70
  output_format={
@@ -176,7 +125,7 @@ except ApiError as e:
176
125
 
177
126
  ## Streaming
178
127
 
179
- The SDK supports streaming responses, as well, the response will be a generator that you can loop over.
128
+ The SDK supports streaming responses as well, returning a generator that you can iterate over with a `for ... in ...` loop:
180
129
 
181
130
  ```python
182
131
  from cartesia import Cartesia
@@ -215,7 +164,9 @@ for chunk in chunks:
215
164
  print(f"Received chunk of size: {len(chunk.data)}")
216
165
  ```
217
166
 
218
- ## WebSocket
167
+ ## WebSockets
168
+
169
+ For the lowest latency in advanced usecases (such as streaming in an LLM-generated transcript and streaming out audio), you should use our websockets client:
219
170
 
220
171
  ```python
221
172
  from cartesia import Cartesia
@@ -223,15 +174,10 @@ from cartesia.tts import TtsRequestEmbeddingSpecifierParams, OutputFormat_RawPar
223
174
  import pyaudio
224
175
  import os
225
176
 
226
- client = Cartesia(
227
- api_key=os.getenv("CARTESIA_API_KEY"),
228
- )
177
+ client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
229
178
  voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
230
179
  transcript = "Hello! Welcome to Cartesia"
231
180
 
232
- # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
233
- model_id = "sonic-2"
234
-
235
181
  p = pyaudio.PyAudio()
236
182
  rate = 22050
237
183
 
@@ -242,14 +188,14 @@ ws = client.tts.websocket()
242
188
 
243
189
  # Generate and stream audio using the websocket
244
190
  for output in ws.send(
245
- model_id=model_id,
191
+ model_id="sonic-2", # see: https://docs.cartesia.ai/getting-started/available-models
246
192
  transcript=transcript,
247
193
  voice={"id": voice_id},
248
194
  stream=True,
249
195
  output_format={
250
196
  "container": "raw",
251
197
  "encoding": "pcm_f32le",
252
- "sample_rate": 22050
198
+ "sample_rate": rate
253
199
  },
254
200
  ):
255
201
  buffer = output.audio
@@ -267,6 +213,40 @@ p.terminate()
267
213
  ws.close() # Close the websocket connection
268
214
  ```
269
215
 
216
+ ## Voices
217
+
218
+ List all available Voices with `client.voices.list`, which returns an iterable that automatically handles pagination:
219
+
220
+ ```python
221
+ from cartesia import Cartesia
222
+ import os
223
+
224
+ client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
225
+
226
+ # Get all available Voices
227
+ voices = client.voices.list()
228
+ for voice in voices:
229
+ print(voice)
230
+ ```
231
+
232
+ You can also get the complete metadata for a specific Voice, or make a new Voice by cloning from an audio sample:
233
+
234
+ ```python
235
+ # Get a specific Voice
236
+ voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
237
+ print("The embedding for", voice.name, "is", voice.embedding)
238
+
239
+ # Clone a Voice using file data
240
+ cloned_voice = client.voices.clone(
241
+ clip=open("path/to/voice.wav", "rb"),
242
+ name="Test cloned voice",
243
+ language="en",
244
+ mode="similarity", # or "stability"
245
+ enhance=False, # use enhance=True to clean and denoise the cloning audio
246
+ description="Test voice description"
247
+ )
248
+ ```
249
+
270
250
  ## Requesting Timestamps
271
251
 
272
252
  ```python
@@ -290,7 +270,8 @@ async def main():
290
270
  "encoding": "pcm_f32le",
291
271
  "sample_rate": 44100
292
272
  },
293
- add_timestamps=True, # Enable word-level timestamps
273
+ add_timestamps=True, # Enable word-level timestamps
274
+ add_phoneme_timestamps=True, # Enable phonemized timestamps
294
275
  stream=True
295
276
  )
296
277
 
@@ -358,6 +339,26 @@ client.tts.bytes(..., request_options={
358
339
  })
359
340
  ```
360
341
 
342
+ ### Mixing voices and creating from embeddings
343
+
344
+ ```python
345
+ # Mix voices together
346
+ mixed_voice = client.voices.mix(
347
+ voices=[
348
+ {"id": "voice_id_1", "weight": 0.25},
349
+ {"id": "voice_id_2", "weight": 0.75}
350
+ ]
351
+ )
352
+
353
+ # Create a new voice from embedding
354
+ new_voice = client.voices.create(
355
+ name="Test Voice",
356
+ description="Test voice description",
357
+ embedding=[...], # List[float] with 192 dimensions
358
+ language="en"
359
+ )
360
+ ```
361
+
361
362
  ### Custom Client
362
363
 
363
364
  You can override the `httpx` client to customize it for your use-case. Some common use-cases include support for proxies
@@ -375,6 +376,10 @@ client = Cartesia(
375
376
  )
376
377
  ```
377
378
 
379
+ ## Reference
380
+
381
+ A full reference for this library is available [here](./reference.md).
382
+
378
383
  ## Contributing
379
384
 
380
385
  Note that most of this library is generated programmatically from
@@ -1,4 +1,4 @@
1
- cartesia/__init__.py,sha256=r67mE_XxfYDeojBqnpfBgpAo1FnUESbX7Qm-7Vjes_Q,7965
1
+ cartesia/__init__.py,sha256=k-YMKYUtzKObkF9Zn0TuHTC2_Z07mH6CTnZmn1my7po,8143
2
2
  cartesia/api_status/__init__.py,sha256=_dHNLdknrBjxHtU2PvLumttJM-JTQhJQqhhAQkLqt_U,168
3
3
  cartesia/api_status/client.py,sha256=GJ9Dq8iCn3hn8vCIqc6k1fCGEhSz0T0kaPGcdFnbMDY,3146
4
4
  cartesia/api_status/requests/__init__.py,sha256=ilEMzEy1JEw484CuL92bX5lHGOznc62pjiDMgiZ0tKM,130
@@ -9,7 +9,7 @@ cartesia/base_client.py,sha256=EIfMrSkJgMCgzYWJ5GN2RxsWikxcH0kMmcb3WYqfQ_g,6321
9
9
  cartesia/client.py,sha256=sPAYQLt9W2E_2F17ooocvvJImuNyLrL8xUypgf6dZeI,6238
10
10
  cartesia/core/__init__.py,sha256=-t9txgeQZL_1FDw_08GEoj4ft1Cn9Dti6X0Drsadlr0,1519
11
11
  cartesia/core/api_error.py,sha256=RE8LELok2QCjABadECTvtDp7qejA1VmINCh6TbqPwSE,426
12
- cartesia/core/client_wrapper.py,sha256=tTxN1WEjVJuMSKTZ4kVKQykuql_lcQuiUfDU89z-f0A,1856
12
+ cartesia/core/client_wrapper.py,sha256=BEIOireABuSTdCAcsHeQKtZ1D3sIi-CVQv5YFHmfi3Y,1856
13
13
  cartesia/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
14
14
  cartesia/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
15
15
  cartesia/core/http_client.py,sha256=KL5RGa0y4n8nX0-07WRg4ZQUTq30sc-XJbWcP5vjBDg,19552
@@ -42,8 +42,8 @@ cartesia/infill/__init__.py,sha256=FTtvy8EDg9nNNg9WCatVgKTRYV8-_v1roeGPAKoa_pw,6
42
42
  cartesia/infill/client.py,sha256=PWE5Ak-wsaBM_8g52oDl9PYx76PkW6f900mnxvZf4Bk,12571
43
43
  cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  cartesia/tts/__init__.py,sha256=G0wcYlPrr7hmu5DQgCG7bDTQq36fpP3iBM5164Z0-Js,4701
45
- cartesia/tts/_async_websocket.py,sha256=mPykrS40FJee58T8NtGUnQ7AurQy04Qz6ICzjCnr7Fg,18383
46
- cartesia/tts/_websocket.py,sha256=Gzd2GvTPUKn59u7quVHn53cGe44H_fCv1jr-opSHRZk,18689
45
+ cartesia/tts/_async_websocket.py,sha256=U7ySTJqb3V0RDSKPcFfzpBa0pqui05k5BTqiIpSBth0,18652
46
+ cartesia/tts/_websocket.py,sha256=roMJ7oDSjr5U5sTHM8EcGu-EtzbIVUH4HmOY1yI2JL4,19118
47
47
  cartesia/tts/client.py,sha256=KMhDaW0gG_uwkSq1EzoC-bCx1G0TLB4K4Gm57L4xDSs,14832
48
48
  cartesia/tts/requests/__init__.py,sha256=0rcfMLHNbUhkRI1xS09UE4p-WT1BCqrcblFtPxcATOI,3261
49
49
  cartesia/tts/requests/cancel_context_request.py,sha256=Wl8g-o5vwl9ENm-H1wsLx441FkIR_4Wt5UYtuWce2Yw,431
@@ -120,35 +120,38 @@ cartesia/voice_changer/requests/streaming_response.py,sha256=cV6L9mMY0w2JpJ0xKoF
120
120
  cartesia/voice_changer/types/__init__.py,sha256=qAiHsdRpnFeS0lBkYp_NRrhSJiRXCg5-uFibqDWzYVU,430
121
121
  cartesia/voice_changer/types/output_format_container.py,sha256=RqLDELdgeOjYqNTJX1Le62qjiFiJGxf0cYnol88-LLM,166
122
122
  cartesia/voice_changer/types/streaming_response.py,sha256=rQ4ZehtOHsCBKijyULz_ahGQYNj1yus6AM6u2wgcBsI,1963
123
- cartesia/voices/__init__.py,sha256=ipS0rBobAU31yoJEbZ-2LcENhmmpzjxfzc_h5v3R0zk,1713
124
- cartesia/voices/client.py,sha256=nOmRRJevMyBtmuTNa6aDFWpQXu1GFkjNdfzFrMMwl5k,37160
123
+ cartesia/voices/__init__.py,sha256=2D58Bir45LvcvP08QMnPlFE8DD8BONTjPLkIDdKs7vg,1891
124
+ cartesia/voices/client.py,sha256=8zQZAtaCAJi79puMxVhzR5OWCDjows53k4oTvSgcdJM,38867
125
125
  cartesia/voices/requests/__init__.py,sha256=XiBJbSYeQCgFMtwywKvQ0Nmp7Zf_0WskzRhgr9c8h38,1072
126
- cartesia/voices/requests/create_voice_request.py,sha256=HvxxWBwR5RMMMmxEU5Tj5jsDSXnlT0cS-C6AGlMPlr0,509
126
+ cartesia/voices/requests/create_voice_request.py,sha256=r6dKb9ga0ZsAi_6PXuE43u2lLgfQg2DIYjk2Neng7pI,617
127
127
  cartesia/voices/requests/embedding_response.py,sha256=PGZkBD8UBcv2MYQbBXyD4T6lzaE9oSGGwXx-MoXCp0M,228
128
128
  cartesia/voices/requests/embedding_specifier.py,sha256=PAHdGsVmLLeJC2b1fWHWI_OlhogO1WnJdzoX9pj5N8c,282
129
129
  cartesia/voices/requests/get_voices_response.py,sha256=g-ZCaCaLOlZSitcKVhdCtfdKQQz8N3W6E7_wZUNOi5M,747
130
130
  cartesia/voices/requests/id_specifier.py,sha256=UTtoXBEEYaGvg-Dn2QxUDACNB3Vm1O1XbrPtBA3rGzU,252
131
- cartesia/voices/requests/localize_dialect.py,sha256=9mmLHOFbBvWZoU2PyjXozG6hoDpE0uueymXHi0k_VtE,209
132
- cartesia/voices/requests/localize_voice_request.py,sha256=AkY4cvx31MF3_gkqMpUzibGIOh9cNF5cOCf3Yqnm7Vc,549
131
+ cartesia/voices/requests/localize_dialect.py,sha256=OHAInU6IP0LBzIY3VYSiU9bRLjXfr1pGXunsLgv1QHs,497
132
+ cartesia/voices/requests/localize_voice_request.py,sha256=oh828eqYkiticD_lerc8WemN3bW13mLZpfRDiKbG75g,703
133
133
  cartesia/voices/requests/mix_voice_specifier.py,sha256=YjOJ2Qt3nqMQzHsYbF1DnZgmZS9zZepLXpji6V9mfgs,266
134
134
  cartesia/voices/requests/mix_voices_request.py,sha256=6JCzFmWKIS1_t-uSoO1m-FQbLWB1zaykTcGV-1s-RqM,275
135
135
  cartesia/voices/requests/update_voice_request.py,sha256=XxJ6TKO4M2s1kXQAZRj8uA4okIABvmWiFhAHJv4BS0Q,282
136
136
  cartesia/voices/requests/voice.py,sha256=M-4lf4W57fx84_JFOy55b9mWcqO4LfzpY-G_Ekv-2Bo,1031
137
137
  cartesia/voices/requests/voice_metadata.py,sha256=S0jPQtBpEb2WSnYDLQTS7pcbNJpc0d01uWravHaqzso,697
138
- cartesia/voices/types/__init__.py,sha256=fsPgm1Ma1E_iBIKUMseIie9QrcGD-p31_KeMvPMb_KA,1503
138
+ cartesia/voices/types/__init__.py,sha256=yjxMWjoBpwAZ5UJ2iRSC_kKgZvGmqVd09kQxgcTnMac,1782
139
139
  cartesia/voices/types/base_voice_id.py,sha256=nWRC0rvLpjeMpRbLSmUTPziWo1ZrbPxw22l4gEBWp8Q,118
140
140
  cartesia/voices/types/clone_mode.py,sha256=3sR6wdxym4xDVsoHppp3-V9mpDwP9F9fDfMUQKG24xw,160
141
- cartesia/voices/types/create_voice_request.py,sha256=8vfKu6cD_VYFb3GN5gVpxlRUIZALYE-449NbDSnXaDg,911
141
+ cartesia/voices/types/create_voice_request.py,sha256=_q0d8QojmQrpU-Puzd_YvWmiC7cBp_lrbKmTLuknYqQ,1005
142
142
  cartesia/voices/types/embedding_response.py,sha256=B7MJ79HIAnxtiP6OT0tt27KBDYTZ3VU0MLuQfb5qVOg,624
143
143
  cartesia/voices/types/embedding_specifier.py,sha256=cf6JfVnISyrvjWup3oAg-RFdMVRxytem6HLwZgKl3gA,671
144
144
  cartesia/voices/types/gender.py,sha256=OrbTO__3HVNculvkcb5Pz-Yoa-Xv8N_rNMrFoy2DoaA,148
145
145
  cartesia/voices/types/gender_presentation.py,sha256=rM8pSurYCSH0AGgLsVpVAPp7uz7TQMM1nPa7-Vus7gw,185
146
146
  cartesia/voices/types/get_voices_response.py,sha256=c6KMkmJepTUmT7I6tAVOGrPst2kkXxDCXLIf1AnR9NE,1136
147
147
  cartesia/voices/types/id_specifier.py,sha256=yAY-uc9hRJkHXdsSfRZWkE8ga2Sb-KVipOTSXa8Wmp0,634
148
- cartesia/voices/types/localize_dialect.py,sha256=tRckNEq4EsdYPondF1rrjOrYRZUSL6WW_3627cFwG1I,196
148
+ cartesia/voices/types/localize_dialect.py,sha256=6JpJKeQvtDjCT2n-5yaGOe3D-4nYqUoYrvcCSE2Zxik,463
149
149
  cartesia/voices/types/localize_english_dialect.py,sha256=0PjZNjQv5ll2wWZxGveQIYCUGLtGDVELK9FBWFe7SNc,176
150
+ cartesia/voices/types/localize_french_dialect.py,sha256=aMhqLi_5goAaSGZguZIFOwQ9Yqh5ApL6gS3cDI315lQ,157
151
+ cartesia/voices/types/localize_portuguese_dialect.py,sha256=6dcThK1qWyS3c-W--3Zz7HK5ixS0qslEWrVQmKSrl9E,161
152
+ cartesia/voices/types/localize_spanish_dialect.py,sha256=h-H52vk0MBOvJqlzPVPgajfQU6oxpTzHoQAKmSDyaC4,158
150
153
  cartesia/voices/types/localize_target_language.py,sha256=ttngtFVpMvuWAKQztJu_pCaf7V62DzmNq9zthPCb2LI,242
151
- cartesia/voices/types/localize_voice_request.py,sha256=roZkcA7LiYs_L1R9FgTCTIgmHv9TUfXZMgLEnrajJ3I,887
154
+ cartesia/voices/types/localize_voice_request.py,sha256=gvjg292kMgji0L9TNO3VqDS0pHO1vGJUcf0l_vEW_5Y,1098
152
155
  cartesia/voices/types/mix_voice_specifier.py,sha256=B0FE6UREGk1TxlN0GOPwyCuqJbMkWVUs0EFqiJuQfZ8,236
153
156
  cartesia/voices/types/mix_voices_request.py,sha256=R_8bmUmE1br4wmfH1Qu6EnL9uC-V1z5BV3_B7u51EOw,641
154
157
  cartesia/voices/types/update_voice_request.py,sha256=_CEH8nuSZn2qZa9xZlANZXOhJd49XLel3dRy2dfOvr8,716
@@ -157,6 +160,6 @@ cartesia/voices/types/voice_expand_options.py,sha256=e4FroWdlxEE-LXQfT1RWlGHtswl
157
160
  cartesia/voices/types/voice_id.py,sha256=GDoXcRVeIm-V21R4suxG2zqLD3DLYkXE9kgizadzFKo,79
158
161
  cartesia/voices/types/voice_metadata.py,sha256=4KNGjXMUKm3niv-NvKIFVGtiilpH13heuzKcZYNQxk4,1181
159
162
  cartesia/voices/types/weight.py,sha256=XqDU7_JItNUb5QykIDqTbELlRYQdbt2SviRgW0w2LKo,80
160
- cartesia-2.0.0b7.dist-info/METADATA,sha256=8sWG16O3-gGLZWHd8FrRQRru7cKLH4RiHCr0uEWzqd0,10895
161
- cartesia-2.0.0b7.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
162
- cartesia-2.0.0b7.dist-info/RECORD,,
163
+ cartesia-2.0.0b8.dist-info/METADATA,sha256=ynQsxGb1v5ZHMnXkeqYceRFrC-bxwuRaopOPyuBbCsk,11208
164
+ cartesia-2.0.0b8.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
165
+ cartesia-2.0.0b8.dist-info/RECORD,,