cartesia 2.0.0b7__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. cartesia/__init__.py +15 -1
  2. cartesia/auth/__init__.py +13 -0
  3. cartesia/auth/client.py +159 -0
  4. cartesia/auth/requests/__init__.py +7 -0
  5. cartesia/auth/requests/token_grant.py +10 -0
  6. cartesia/auth/requests/token_request.py +17 -0
  7. cartesia/auth/requests/token_response.py +10 -0
  8. cartesia/auth/types/__init__.py +7 -0
  9. cartesia/auth/types/token_grant.py +22 -0
  10. cartesia/auth/types/token_request.py +28 -0
  11. cartesia/auth/types/token_response.py +22 -0
  12. cartesia/base_client.py +4 -0
  13. cartesia/core/client_wrapper.py +1 -1
  14. cartesia/tts/_async_websocket.py +8 -0
  15. cartesia/tts/_websocket.py +11 -0
  16. cartesia/tts/client.py +40 -4
  17. cartesia/tts/requests/generation_request.py +19 -1
  18. cartesia/tts/requests/tts_request.py +10 -1
  19. cartesia/tts/requests/web_socket_tts_request.py +3 -1
  20. cartesia/tts/types/generation_request.py +19 -1
  21. cartesia/tts/types/tts_request.py +10 -1
  22. cartesia/tts/types/web_socket_tts_request.py +3 -1
  23. cartesia/voices/__init__.py +6 -0
  24. cartesia/voices/client.py +208 -159
  25. cartesia/voices/requests/create_voice_request.py +2 -0
  26. cartesia/voices/requests/localize_dialect.py +6 -1
  27. cartesia/voices/requests/localize_voice_request.py +15 -2
  28. cartesia/voices/types/__init__.py +6 -0
  29. cartesia/voices/types/create_voice_request.py +2 -0
  30. cartesia/voices/types/localize_dialect.py +6 -1
  31. cartesia/voices/types/localize_french_dialect.py +5 -0
  32. cartesia/voices/types/localize_portuguese_dialect.py +5 -0
  33. cartesia/voices/types/localize_spanish_dialect.py +5 -0
  34. cartesia/voices/types/localize_voice_request.py +16 -3
  35. {cartesia-2.0.0b7.dist-info → cartesia-2.0.2.dist-info}/METADATA +68 -63
  36. {cartesia-2.0.0b7.dist-info → cartesia-2.0.2.dist-info}/RECORD +37 -24
  37. {cartesia-2.0.0b7.dist-info → cartesia-2.0.2.dist-info}/WHEEL +0 -0
cartesia/voices/client.py CHANGED
@@ -11,19 +11,20 @@ from .types.get_voices_response import GetVoicesResponse
11
11
  from ..core.pydantic_utilities import parse_obj_as
12
12
  from json.decoder import JSONDecodeError
13
13
  from ..core.api_error import ApiError
14
- from ..embedding.types.embedding import Embedding
14
+ from .. import core
15
15
  from ..tts.types.supported_language import SupportedLanguage
16
+ from .types.clone_mode import CloneMode
17
+ from .types.voice_metadata import VoiceMetadata
16
18
  from .types.voice_id import VoiceId
17
19
  from ..core.jsonable_encoder import jsonable_encoder
18
20
  from .types.localize_target_language import LocalizeTargetLanguage
19
21
  from .types.gender import Gender
20
22
  from .requests.localize_dialect import LocalizeDialectParams
21
- from .types.embedding_response import EmbeddingResponse
22
23
  from ..core.serialization import convert_and_respect_annotation_metadata
23
24
  from .requests.mix_voice_specifier import MixVoiceSpecifierParams
24
- from .. import core
25
- from .types.clone_mode import CloneMode
26
- from .types.voice_metadata import VoiceMetadata
25
+ from .types.embedding_response import EmbeddingResponse
26
+ from ..embedding.types.embedding import Embedding
27
+ from .types.base_voice_id import BaseVoiceId
27
28
  from ..core.client_wrapper import AsyncClientWrapper
28
29
  from ..core.pagination import AsyncPager
29
30
 
@@ -140,34 +141,60 @@ class VoicesClient:
140
141
  raise ApiError(status_code=_response.status_code, body=_response.text)
141
142
  raise ApiError(status_code=_response.status_code, body=_response_json)
142
143
 
143
- def create(
144
+ def clone(
144
145
  self,
145
146
  *,
147
+ clip: core.File,
146
148
  name: str,
147
- description: str,
148
- embedding: Embedding,
149
- language: typing.Optional[SupportedLanguage] = OMIT,
149
+ language: SupportedLanguage,
150
+ mode: CloneMode,
151
+ description: typing.Optional[str] = OMIT,
152
+ enhance: typing.Optional[bool] = OMIT,
153
+ transcript: typing.Optional[str] = OMIT,
150
154
  request_options: typing.Optional[RequestOptions] = None,
151
- ) -> Voice:
155
+ ) -> VoiceMetadata:
152
156
  """
157
+ Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
158
+
159
+ Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
160
+
161
+ Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
162
+
153
163
  Parameters
154
164
  ----------
165
+ clip : core.File
166
+ See core.File for more documentation
167
+
155
168
  name : str
156
169
  The name of the voice.
157
170
 
158
- description : str
159
- The description of the voice.
160
171
 
161
- embedding : Embedding
172
+ language : SupportedLanguage
173
+ The language of the voice.
174
+
175
+
176
+ mode : CloneMode
177
+ Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
178
+
179
+
180
+ description : typing.Optional[str]
181
+ A description for the voice.
182
+
183
+
184
+ enhance : typing.Optional[bool]
185
+ Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
186
+
187
+
188
+ transcript : typing.Optional[str]
189
+ Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
162
190
 
163
- language : typing.Optional[SupportedLanguage]
164
191
 
165
192
  request_options : typing.Optional[RequestOptions]
166
193
  Request-specific configuration.
167
194
 
168
195
  Returns
169
196
  -------
170
- Voice
197
+ VoiceMetadata
171
198
 
172
199
  Examples
173
200
  --------
@@ -176,20 +203,27 @@ class VoicesClient:
176
203
  client = Cartesia(
177
204
  api_key="YOUR_API_KEY",
178
205
  )
179
- client.voices.create(
180
- name="name",
181
- description="description",
182
- embedding=[1.1, 1.1],
206
+ client.voices.clone(
207
+ name="A high-stability cloned voice",
208
+ description="Copied from Cartesia docs",
209
+ mode="stability",
210
+ language="en",
211
+ enhance=True,
183
212
  )
184
213
  """
185
214
  _response = self._client_wrapper.httpx_client.request(
186
- "voices/",
215
+ "voices/clone",
187
216
  method="POST",
188
- json={
217
+ data={
189
218
  "name": name,
190
219
  "description": description,
191
- "embedding": embedding,
192
220
  "language": language,
221
+ "mode": mode,
222
+ "enhance": enhance,
223
+ "transcript": transcript,
224
+ },
225
+ files={
226
+ "clip": clip,
193
227
  },
194
228
  request_options=request_options,
195
229
  omit=OMIT,
@@ -197,9 +231,9 @@ class VoicesClient:
197
231
  try:
198
232
  if 200 <= _response.status_code < 300:
199
233
  return typing.cast(
200
- Voice,
234
+ VoiceMetadata,
201
235
  parse_obj_as(
202
- type_=Voice, # type: ignore
236
+ type_=VoiceMetadata, # type: ignore
203
237
  object_=_response.json(),
204
238
  ),
205
239
  )
@@ -349,16 +383,27 @@ class VoicesClient:
349
383
  def localize(
350
384
  self,
351
385
  *,
352
- embedding: Embedding,
386
+ voice_id: str,
387
+ name: str,
388
+ description: str,
353
389
  language: LocalizeTargetLanguage,
354
390
  original_speaker_gender: Gender,
355
391
  dialect: typing.Optional[LocalizeDialectParams] = OMIT,
356
392
  request_options: typing.Optional[RequestOptions] = None,
357
- ) -> EmbeddingResponse:
393
+ ) -> VoiceMetadata:
358
394
  """
395
+ Create a new voice from an existing voice localized to a new language and dialect.
396
+
359
397
  Parameters
360
398
  ----------
361
- embedding : Embedding
399
+ voice_id : str
400
+ The ID of the voice to localize.
401
+
402
+ name : str
403
+ The name of the new localized voice.
404
+
405
+ description : str
406
+ The description of the new localized voice.
362
407
 
363
408
  language : LocalizeTargetLanguage
364
409
 
@@ -371,7 +416,7 @@ class VoicesClient:
371
416
 
372
417
  Returns
373
418
  -------
374
- EmbeddingResponse
419
+ VoiceMetadata
375
420
 
376
421
  Examples
377
422
  --------
@@ -381,16 +426,21 @@ class VoicesClient:
381
426
  api_key="YOUR_API_KEY",
382
427
  )
383
428
  client.voices.localize(
384
- embedding=[1.1, 1.1],
385
- language="en",
386
- original_speaker_gender="male",
429
+ voice_id="694f9389-aac1-45b6-b726-9d9369183238",
430
+ name="Sarah Peninsular Spanish",
431
+ description="Sarah Voice in Peninsular Spanish",
432
+ language="es",
433
+ original_speaker_gender="female",
434
+ dialect="pe",
387
435
  )
388
436
  """
389
437
  _response = self._client_wrapper.httpx_client.request(
390
438
  "voices/localize",
391
439
  method="POST",
392
440
  json={
393
- "embedding": embedding,
441
+ "voice_id": voice_id,
442
+ "name": name,
443
+ "description": description,
394
444
  "language": language,
395
445
  "original_speaker_gender": original_speaker_gender,
396
446
  "dialect": convert_and_respect_annotation_metadata(
@@ -403,9 +453,9 @@ class VoicesClient:
403
453
  try:
404
454
  if 200 <= _response.status_code < 300:
405
455
  return typing.cast(
406
- EmbeddingResponse,
456
+ VoiceMetadata,
407
457
  parse_obj_as(
408
- type_=EmbeddingResponse, # type: ignore
458
+ type_=VoiceMetadata, # type: ignore
409
459
  object_=_response.json(),
410
460
  ),
411
461
  )
@@ -468,58 +518,39 @@ class VoicesClient:
468
518
  raise ApiError(status_code=_response.status_code, body=_response.text)
469
519
  raise ApiError(status_code=_response.status_code, body=_response_json)
470
520
 
471
- def clone(
521
+ def create(
472
522
  self,
473
523
  *,
474
- clip: core.File,
475
524
  name: str,
476
- language: SupportedLanguage,
477
- mode: CloneMode,
478
- enhance: bool,
479
- description: typing.Optional[str] = OMIT,
480
- transcript: typing.Optional[str] = OMIT,
525
+ description: str,
526
+ embedding: Embedding,
527
+ language: typing.Optional[SupportedLanguage] = OMIT,
528
+ base_voice_id: typing.Optional[BaseVoiceId] = OMIT,
481
529
  request_options: typing.Optional[RequestOptions] = None,
482
- ) -> VoiceMetadata:
530
+ ) -> Voice:
483
531
  """
484
- Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
485
- Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
486
- Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
532
+ Create voice from raw features. If you'd like to clone a voice from an audio file, please use Clone Voice instead.
487
533
 
488
534
  Parameters
489
535
  ----------
490
- clip : core.File
491
- See core.File for more documentation
492
-
493
536
  name : str
494
537
  The name of the voice.
495
538
 
539
+ description : str
540
+ The description of the voice.
496
541
 
497
- language : SupportedLanguage
498
- The language of the voice.
499
-
500
-
501
- mode : CloneMode
502
- Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
503
-
504
-
505
- enhance : bool
506
- Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
507
-
508
-
509
- description : typing.Optional[str]
510
- A description for the voice.
511
-
542
+ embedding : Embedding
512
543
 
513
- transcript : typing.Optional[str]
514
- Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
544
+ language : typing.Optional[SupportedLanguage]
515
545
 
546
+ base_voice_id : typing.Optional[BaseVoiceId]
516
547
 
517
548
  request_options : typing.Optional[RequestOptions]
518
549
  Request-specific configuration.
519
550
 
520
551
  Returns
521
552
  -------
522
- VoiceMetadata
553
+ Voice
523
554
 
524
555
  Examples
525
556
  --------
@@ -528,27 +559,21 @@ class VoicesClient:
528
559
  client = Cartesia(
529
560
  api_key="YOUR_API_KEY",
530
561
  )
531
- client.voices.clone(
532
- name="A high-stability cloned voice",
533
- description="Copied from Cartesia docs",
534
- mode="stability",
535
- language="en",
536
- enhance=True,
562
+ client.voices.create(
563
+ name="name",
564
+ description="description",
565
+ embedding=[1.1, 1.1],
537
566
  )
538
567
  """
539
568
  _response = self._client_wrapper.httpx_client.request(
540
- "voices/clone",
569
+ "voices/",
541
570
  method="POST",
542
- data={
571
+ json={
543
572
  "name": name,
544
573
  "description": description,
574
+ "embedding": embedding,
545
575
  "language": language,
546
- "mode": mode,
547
- "enhance": enhance,
548
- "transcript": transcript,
549
- },
550
- files={
551
- "clip": clip,
576
+ "base_voice_id": base_voice_id,
552
577
  },
553
578
  request_options=request_options,
554
579
  omit=OMIT,
@@ -556,9 +581,9 @@ class VoicesClient:
556
581
  try:
557
582
  if 200 <= _response.status_code < 300:
558
583
  return typing.cast(
559
- VoiceMetadata,
584
+ Voice,
560
585
  parse_obj_as(
561
- type_=VoiceMetadata, # type: ignore
586
+ type_=Voice, # type: ignore
562
587
  object_=_response.json(),
563
588
  ),
564
589
  )
@@ -685,34 +710,60 @@ class AsyncVoicesClient:
685
710
  raise ApiError(status_code=_response.status_code, body=_response.text)
686
711
  raise ApiError(status_code=_response.status_code, body=_response_json)
687
712
 
688
- async def create(
713
+ async def clone(
689
714
  self,
690
715
  *,
716
+ clip: core.File,
691
717
  name: str,
692
- description: str,
693
- embedding: Embedding,
694
- language: typing.Optional[SupportedLanguage] = OMIT,
718
+ language: SupportedLanguage,
719
+ mode: CloneMode,
720
+ description: typing.Optional[str] = OMIT,
721
+ enhance: typing.Optional[bool] = OMIT,
722
+ transcript: typing.Optional[str] = OMIT,
695
723
  request_options: typing.Optional[RequestOptions] = None,
696
- ) -> Voice:
724
+ ) -> VoiceMetadata:
697
725
  """
726
+ Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
727
+
728
+ Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
729
+
730
+ Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
731
+
698
732
  Parameters
699
733
  ----------
734
+ clip : core.File
735
+ See core.File for more documentation
736
+
700
737
  name : str
701
738
  The name of the voice.
702
739
 
703
- description : str
704
- The description of the voice.
705
740
 
706
- embedding : Embedding
741
+ language : SupportedLanguage
742
+ The language of the voice.
743
+
744
+
745
+ mode : CloneMode
746
+ Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
747
+
748
+
749
+ description : typing.Optional[str]
750
+ A description for the voice.
751
+
752
+
753
+ enhance : typing.Optional[bool]
754
+ Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
755
+
756
+
757
+ transcript : typing.Optional[str]
758
+ Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
707
759
 
708
- language : typing.Optional[SupportedLanguage]
709
760
 
710
761
  request_options : typing.Optional[RequestOptions]
711
762
  Request-specific configuration.
712
763
 
713
764
  Returns
714
765
  -------
715
- Voice
766
+ VoiceMetadata
716
767
 
717
768
  Examples
718
769
  --------
@@ -726,23 +777,30 @@ class AsyncVoicesClient:
726
777
 
727
778
 
728
779
  async def main() -> None:
729
- await client.voices.create(
730
- name="name",
731
- description="description",
732
- embedding=[1.1, 1.1],
780
+ await client.voices.clone(
781
+ name="A high-stability cloned voice",
782
+ description="Copied from Cartesia docs",
783
+ mode="stability",
784
+ language="en",
785
+ enhance=True,
733
786
  )
734
787
 
735
788
 
736
789
  asyncio.run(main())
737
790
  """
738
791
  _response = await self._client_wrapper.httpx_client.request(
739
- "voices/",
792
+ "voices/clone",
740
793
  method="POST",
741
- json={
794
+ data={
742
795
  "name": name,
743
796
  "description": description,
744
- "embedding": embedding,
745
797
  "language": language,
798
+ "mode": mode,
799
+ "enhance": enhance,
800
+ "transcript": transcript,
801
+ },
802
+ files={
803
+ "clip": clip,
746
804
  },
747
805
  request_options=request_options,
748
806
  omit=OMIT,
@@ -750,9 +808,9 @@ class AsyncVoicesClient:
750
808
  try:
751
809
  if 200 <= _response.status_code < 300:
752
810
  return typing.cast(
753
- Voice,
811
+ VoiceMetadata,
754
812
  parse_obj_as(
755
- type_=Voice, # type: ignore
813
+ type_=VoiceMetadata, # type: ignore
756
814
  object_=_response.json(),
757
815
  ),
758
816
  )
@@ -926,16 +984,27 @@ class AsyncVoicesClient:
926
984
  async def localize(
927
985
  self,
928
986
  *,
929
- embedding: Embedding,
987
+ voice_id: str,
988
+ name: str,
989
+ description: str,
930
990
  language: LocalizeTargetLanguage,
931
991
  original_speaker_gender: Gender,
932
992
  dialect: typing.Optional[LocalizeDialectParams] = OMIT,
933
993
  request_options: typing.Optional[RequestOptions] = None,
934
- ) -> EmbeddingResponse:
994
+ ) -> VoiceMetadata:
935
995
  """
996
+ Create a new voice from an existing voice localized to a new language and dialect.
997
+
936
998
  Parameters
937
999
  ----------
938
- embedding : Embedding
1000
+ voice_id : str
1001
+ The ID of the voice to localize.
1002
+
1003
+ name : str
1004
+ The name of the new localized voice.
1005
+
1006
+ description : str
1007
+ The description of the new localized voice.
939
1008
 
940
1009
  language : LocalizeTargetLanguage
941
1010
 
@@ -948,7 +1017,7 @@ class AsyncVoicesClient:
948
1017
 
949
1018
  Returns
950
1019
  -------
951
- EmbeddingResponse
1020
+ VoiceMetadata
952
1021
 
953
1022
  Examples
954
1023
  --------
@@ -963,9 +1032,12 @@ class AsyncVoicesClient:
963
1032
 
964
1033
  async def main() -> None:
965
1034
  await client.voices.localize(
966
- embedding=[1.1, 1.1],
967
- language="en",
968
- original_speaker_gender="male",
1035
+ voice_id="694f9389-aac1-45b6-b726-9d9369183238",
1036
+ name="Sarah Peninsular Spanish",
1037
+ description="Sarah Voice in Peninsular Spanish",
1038
+ language="es",
1039
+ original_speaker_gender="female",
1040
+ dialect="pe",
969
1041
  )
970
1042
 
971
1043
 
@@ -975,7 +1047,9 @@ class AsyncVoicesClient:
975
1047
  "voices/localize",
976
1048
  method="POST",
977
1049
  json={
978
- "embedding": embedding,
1050
+ "voice_id": voice_id,
1051
+ "name": name,
1052
+ "description": description,
979
1053
  "language": language,
980
1054
  "original_speaker_gender": original_speaker_gender,
981
1055
  "dialect": convert_and_respect_annotation_metadata(
@@ -988,9 +1062,9 @@ class AsyncVoicesClient:
988
1062
  try:
989
1063
  if 200 <= _response.status_code < 300:
990
1064
  return typing.cast(
991
- EmbeddingResponse,
1065
+ VoiceMetadata,
992
1066
  parse_obj_as(
993
- type_=EmbeddingResponse, # type: ignore
1067
+ type_=VoiceMetadata, # type: ignore
994
1068
  object_=_response.json(),
995
1069
  ),
996
1070
  )
@@ -1061,58 +1135,39 @@ class AsyncVoicesClient:
1061
1135
  raise ApiError(status_code=_response.status_code, body=_response.text)
1062
1136
  raise ApiError(status_code=_response.status_code, body=_response_json)
1063
1137
 
1064
- async def clone(
1138
+ async def create(
1065
1139
  self,
1066
1140
  *,
1067
- clip: core.File,
1068
1141
  name: str,
1069
- language: SupportedLanguage,
1070
- mode: CloneMode,
1071
- enhance: bool,
1072
- description: typing.Optional[str] = OMIT,
1073
- transcript: typing.Optional[str] = OMIT,
1142
+ description: str,
1143
+ embedding: Embedding,
1144
+ language: typing.Optional[SupportedLanguage] = OMIT,
1145
+ base_voice_id: typing.Optional[BaseVoiceId] = OMIT,
1074
1146
  request_options: typing.Optional[RequestOptions] = None,
1075
- ) -> VoiceMetadata:
1147
+ ) -> Voice:
1076
1148
  """
1077
- Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
1078
- Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
1079
- Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
1149
+ Create voice from raw features. If you'd like to clone a voice from an audio file, please use Clone Voice instead.
1080
1150
 
1081
1151
  Parameters
1082
1152
  ----------
1083
- clip : core.File
1084
- See core.File for more documentation
1085
-
1086
1153
  name : str
1087
1154
  The name of the voice.
1088
1155
 
1156
+ description : str
1157
+ The description of the voice.
1089
1158
 
1090
- language : SupportedLanguage
1091
- The language of the voice.
1092
-
1093
-
1094
- mode : CloneMode
1095
- Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
1096
-
1097
-
1098
- enhance : bool
1099
- Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
1100
-
1101
-
1102
- description : typing.Optional[str]
1103
- A description for the voice.
1104
-
1159
+ embedding : Embedding
1105
1160
 
1106
- transcript : typing.Optional[str]
1107
- Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
1161
+ language : typing.Optional[SupportedLanguage]
1108
1162
 
1163
+ base_voice_id : typing.Optional[BaseVoiceId]
1109
1164
 
1110
1165
  request_options : typing.Optional[RequestOptions]
1111
1166
  Request-specific configuration.
1112
1167
 
1113
1168
  Returns
1114
1169
  -------
1115
- VoiceMetadata
1170
+ Voice
1116
1171
 
1117
1172
  Examples
1118
1173
  --------
@@ -1126,30 +1181,24 @@ class AsyncVoicesClient:
1126
1181
 
1127
1182
 
1128
1183
  async def main() -> None:
1129
- await client.voices.clone(
1130
- name="A high-stability cloned voice",
1131
- description="Copied from Cartesia docs",
1132
- mode="stability",
1133
- language="en",
1134
- enhance=True,
1184
+ await client.voices.create(
1185
+ name="name",
1186
+ description="description",
1187
+ embedding=[1.1, 1.1],
1135
1188
  )
1136
1189
 
1137
1190
 
1138
1191
  asyncio.run(main())
1139
1192
  """
1140
1193
  _response = await self._client_wrapper.httpx_client.request(
1141
- "voices/clone",
1194
+ "voices/",
1142
1195
  method="POST",
1143
- data={
1196
+ json={
1144
1197
  "name": name,
1145
1198
  "description": description,
1199
+ "embedding": embedding,
1146
1200
  "language": language,
1147
- "mode": mode,
1148
- "enhance": enhance,
1149
- "transcript": transcript,
1150
- },
1151
- files={
1152
- "clip": clip,
1201
+ "base_voice_id": base_voice_id,
1153
1202
  },
1154
1203
  request_options=request_options,
1155
1204
  omit=OMIT,
@@ -1157,9 +1206,9 @@ class AsyncVoicesClient:
1157
1206
  try:
1158
1207
  if 200 <= _response.status_code < 300:
1159
1208
  return typing.cast(
1160
- VoiceMetadata,
1209
+ Voice,
1161
1210
  parse_obj_as(
1162
- type_=VoiceMetadata, # type: ignore
1211
+ type_=Voice, # type: ignore
1163
1212
  object_=_response.json(),
1164
1213
  ),
1165
1214
  )
@@ -4,6 +4,7 @@ import typing_extensions
4
4
  from ...embedding.types.embedding import Embedding
5
5
  import typing_extensions
6
6
  from ...tts.types.supported_language import SupportedLanguage
7
+ from ..types.base_voice_id import BaseVoiceId
7
8
 
8
9
 
9
10
  class CreateVoiceRequestParams(typing_extensions.TypedDict):
@@ -19,3 +20,4 @@ class CreateVoiceRequestParams(typing_extensions.TypedDict):
19
20
 
20
21
  embedding: Embedding
21
22
  language: typing_extensions.NotRequired[SupportedLanguage]
23
+ base_voice_id: typing_extensions.NotRequired[BaseVoiceId]
@@ -2,5 +2,10 @@
2
2
 
3
3
  import typing
4
4
  from ..types.localize_english_dialect import LocalizeEnglishDialect
5
+ from ..types.localize_spanish_dialect import LocalizeSpanishDialect
6
+ from ..types.localize_portuguese_dialect import LocalizePortugueseDialect
7
+ from ..types.localize_french_dialect import LocalizeFrenchDialect
5
8
 
6
- LocalizeDialectParams = typing.Union[LocalizeEnglishDialect]
9
+ LocalizeDialectParams = typing.Union[
10
+ LocalizeEnglishDialect, LocalizeSpanishDialect, LocalizePortugueseDialect, LocalizeFrenchDialect
11
+ ]