cartesia 2.0.0a0__py3-none-any.whl → 2.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cartesia/__init__.py +4 -0
- cartesia/core/client_wrapper.py +1 -1
- cartesia/core/http_client.py +2 -2
- cartesia/core/pydantic_utilities.py +2 -2
- cartesia/datasets/client.py +4 -34
- cartesia/infill/client.py +36 -12
- cartesia/tts/client.py +4 -4
- cartesia/voices/__init__.py +4 -0
- cartesia/voices/client.py +169 -928
- cartesia/voices/requests/create_voice_request.py +2 -0
- cartesia/voices/requests/localize_dialect.py +3 -1
- cartesia/voices/types/__init__.py +4 -0
- cartesia/voices/types/create_voice_request.py +2 -0
- cartesia/voices/types/localize_dialect.py +3 -1
- cartesia/voices/types/localize_portuguese_dialect.py +5 -0
- cartesia/voices/types/localize_spanish_dialect.py +5 -0
- {cartesia-2.0.0a0.dist-info → cartesia-2.0.0b1.dist-info}/METADATA +2 -1
- {cartesia-2.0.0a0.dist-info → cartesia-2.0.0b1.dist-info}/RECORD +19 -17
- {cartesia-2.0.0a0.dist-info → cartesia-2.0.0b1.dist-info}/WHEEL +0 -0
cartesia/voices/client.py
CHANGED
@@ -7,19 +7,20 @@ from .types.voice import Voice
|
|
7
7
|
from ..core.pydantic_utilities import parse_obj_as
|
8
8
|
from json.decoder import JSONDecodeError
|
9
9
|
from ..core.api_error import ApiError
|
10
|
-
from ..
|
10
|
+
from .. import core
|
11
11
|
from ..tts.types.supported_language import SupportedLanguage
|
12
|
+
from .types.clone_mode import CloneMode
|
13
|
+
from .types.voice_metadata import VoiceMetadata
|
12
14
|
from .types.voice_id import VoiceId
|
13
15
|
from ..core.jsonable_encoder import jsonable_encoder
|
16
|
+
from ..embedding.types.embedding import Embedding
|
14
17
|
from .types.localize_target_language import LocalizeTargetLanguage
|
15
18
|
from .types.gender import Gender
|
16
19
|
from .requests.localize_dialect import LocalizeDialectParams
|
17
20
|
from .types.embedding_response import EmbeddingResponse
|
18
21
|
from ..core.serialization import convert_and_respect_annotation_metadata
|
19
22
|
from .requests.mix_voice_specifier import MixVoiceSpecifierParams
|
20
|
-
from
|
21
|
-
from .types.clone_mode import CloneMode
|
22
|
-
from .types.voice_metadata import VoiceMetadata
|
23
|
+
from .types.base_voice_id import BaseVoiceId
|
23
24
|
from ..core.client_wrapper import AsyncClientWrapper
|
24
25
|
|
25
26
|
# this is used as the default value for optional parameters
|
@@ -69,34 +70,60 @@ class VoicesClient:
|
|
69
70
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
70
71
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
71
72
|
|
72
|
-
def
|
73
|
+
def clone(
|
73
74
|
self,
|
74
75
|
*,
|
76
|
+
clip: core.File,
|
75
77
|
name: str,
|
76
|
-
|
77
|
-
|
78
|
-
|
78
|
+
language: SupportedLanguage,
|
79
|
+
mode: CloneMode,
|
80
|
+
enhance: bool,
|
81
|
+
description: typing.Optional[str] = OMIT,
|
82
|
+
transcript: typing.Optional[str] = OMIT,
|
79
83
|
request_options: typing.Optional[RequestOptions] = None,
|
80
|
-
) ->
|
84
|
+
) -> VoiceMetadata:
|
81
85
|
"""
|
86
|
+
Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
|
87
|
+
|
88
|
+
Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
|
89
|
+
|
90
|
+
Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
|
91
|
+
|
82
92
|
Parameters
|
83
93
|
----------
|
94
|
+
clip : core.File
|
95
|
+
See core.File for more documentation
|
96
|
+
|
84
97
|
name : str
|
85
98
|
The name of the voice.
|
86
99
|
|
87
|
-
description : str
|
88
|
-
The description of the voice.
|
89
100
|
|
90
|
-
|
101
|
+
language : SupportedLanguage
|
102
|
+
The language of the voice.
|
103
|
+
|
104
|
+
|
105
|
+
mode : CloneMode
|
106
|
+
Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
|
107
|
+
|
108
|
+
|
109
|
+
enhance : bool
|
110
|
+
Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
|
111
|
+
|
112
|
+
|
113
|
+
description : typing.Optional[str]
|
114
|
+
A description for the voice.
|
115
|
+
|
116
|
+
|
117
|
+
transcript : typing.Optional[str]
|
118
|
+
Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
|
91
119
|
|
92
|
-
language : typing.Optional[SupportedLanguage]
|
93
120
|
|
94
121
|
request_options : typing.Optional[RequestOptions]
|
95
122
|
Request-specific configuration.
|
96
123
|
|
97
124
|
Returns
|
98
125
|
-------
|
99
|
-
|
126
|
+
VoiceMetadata
|
100
127
|
|
101
128
|
Examples
|
102
129
|
--------
|
@@ -105,214 +132,27 @@ class VoicesClient:
|
|
105
132
|
client = Cartesia(
|
106
133
|
api_key="YOUR_API_KEY",
|
107
134
|
)
|
108
|
-
client.voices.
|
109
|
-
name="
|
110
|
-
description="
|
111
|
-
|
112
|
-
1.0,
|
113
|
-
1.0,
|
114
|
-
1.0,
|
115
|
-
1.0,
|
116
|
-
1.0,
|
117
|
-
1.0,
|
118
|
-
1.0,
|
119
|
-
1.0,
|
120
|
-
1.0,
|
121
|
-
1.0,
|
122
|
-
1.0,
|
123
|
-
1.0,
|
124
|
-
1.0,
|
125
|
-
1.0,
|
126
|
-
1.0,
|
127
|
-
1.0,
|
128
|
-
1.0,
|
129
|
-
1.0,
|
130
|
-
1.0,
|
131
|
-
1.0,
|
132
|
-
1.0,
|
133
|
-
1.0,
|
134
|
-
1.0,
|
135
|
-
1.0,
|
136
|
-
1.0,
|
137
|
-
1.0,
|
138
|
-
1.0,
|
139
|
-
1.0,
|
140
|
-
1.0,
|
141
|
-
1.0,
|
142
|
-
1.0,
|
143
|
-
1.0,
|
144
|
-
1.0,
|
145
|
-
1.0,
|
146
|
-
1.0,
|
147
|
-
1.0,
|
148
|
-
1.0,
|
149
|
-
1.0,
|
150
|
-
1.0,
|
151
|
-
1.0,
|
152
|
-
1.0,
|
153
|
-
1.0,
|
154
|
-
1.0,
|
155
|
-
1.0,
|
156
|
-
1.0,
|
157
|
-
1.0,
|
158
|
-
1.0,
|
159
|
-
1.0,
|
160
|
-
1.0,
|
161
|
-
1.0,
|
162
|
-
1.0,
|
163
|
-
1.0,
|
164
|
-
1.0,
|
165
|
-
1.0,
|
166
|
-
1.0,
|
167
|
-
1.0,
|
168
|
-
1.0,
|
169
|
-
1.0,
|
170
|
-
1.0,
|
171
|
-
1.0,
|
172
|
-
1.0,
|
173
|
-
1.0,
|
174
|
-
1.0,
|
175
|
-
1.0,
|
176
|
-
1.0,
|
177
|
-
1.0,
|
178
|
-
1.0,
|
179
|
-
1.0,
|
180
|
-
1.0,
|
181
|
-
1.0,
|
182
|
-
1.0,
|
183
|
-
1.0,
|
184
|
-
1.0,
|
185
|
-
1.0,
|
186
|
-
1.0,
|
187
|
-
1.0,
|
188
|
-
1.0,
|
189
|
-
1.0,
|
190
|
-
1.0,
|
191
|
-
1.0,
|
192
|
-
1.0,
|
193
|
-
1.0,
|
194
|
-
1.0,
|
195
|
-
1.0,
|
196
|
-
1.0,
|
197
|
-
1.0,
|
198
|
-
1.0,
|
199
|
-
1.0,
|
200
|
-
1.0,
|
201
|
-
1.0,
|
202
|
-
1.0,
|
203
|
-
1.0,
|
204
|
-
1.0,
|
205
|
-
1.0,
|
206
|
-
1.0,
|
207
|
-
1.0,
|
208
|
-
1.0,
|
209
|
-
1.0,
|
210
|
-
1.0,
|
211
|
-
1.0,
|
212
|
-
1.0,
|
213
|
-
1.0,
|
214
|
-
1.0,
|
215
|
-
1.0,
|
216
|
-
1.0,
|
217
|
-
1.0,
|
218
|
-
1.0,
|
219
|
-
1.0,
|
220
|
-
1.0,
|
221
|
-
1.0,
|
222
|
-
1.0,
|
223
|
-
1.0,
|
224
|
-
1.0,
|
225
|
-
1.0,
|
226
|
-
1.0,
|
227
|
-
1.0,
|
228
|
-
1.0,
|
229
|
-
1.0,
|
230
|
-
1.0,
|
231
|
-
1.0,
|
232
|
-
1.0,
|
233
|
-
1.0,
|
234
|
-
1.0,
|
235
|
-
1.0,
|
236
|
-
1.0,
|
237
|
-
1.0,
|
238
|
-
1.0,
|
239
|
-
1.0,
|
240
|
-
1.0,
|
241
|
-
1.0,
|
242
|
-
1.0,
|
243
|
-
1.0,
|
244
|
-
1.0,
|
245
|
-
1.0,
|
246
|
-
1.0,
|
247
|
-
1.0,
|
248
|
-
1.0,
|
249
|
-
1.0,
|
250
|
-
1.0,
|
251
|
-
1.0,
|
252
|
-
1.0,
|
253
|
-
1.0,
|
254
|
-
1.0,
|
255
|
-
1.0,
|
256
|
-
1.0,
|
257
|
-
1.0,
|
258
|
-
1.0,
|
259
|
-
1.0,
|
260
|
-
1.0,
|
261
|
-
1.0,
|
262
|
-
1.0,
|
263
|
-
1.0,
|
264
|
-
1.0,
|
265
|
-
1.0,
|
266
|
-
1.0,
|
267
|
-
1.0,
|
268
|
-
1.0,
|
269
|
-
1.0,
|
270
|
-
1.0,
|
271
|
-
1.0,
|
272
|
-
1.0,
|
273
|
-
1.0,
|
274
|
-
1.0,
|
275
|
-
1.0,
|
276
|
-
1.0,
|
277
|
-
1.0,
|
278
|
-
1.0,
|
279
|
-
1.0,
|
280
|
-
1.0,
|
281
|
-
1.0,
|
282
|
-
1.0,
|
283
|
-
1.0,
|
284
|
-
1.0,
|
285
|
-
1.0,
|
286
|
-
1.0,
|
287
|
-
1.0,
|
288
|
-
1.0,
|
289
|
-
1.0,
|
290
|
-
1.0,
|
291
|
-
1.0,
|
292
|
-
1.0,
|
293
|
-
1.0,
|
294
|
-
1.0,
|
295
|
-
1.0,
|
296
|
-
1.0,
|
297
|
-
1.0,
|
298
|
-
1.0,
|
299
|
-
1.0,
|
300
|
-
1.0,
|
301
|
-
1.0,
|
302
|
-
1.0,
|
303
|
-
1.0,
|
304
|
-
],
|
135
|
+
client.voices.clone(
|
136
|
+
name="A high-stability cloned voice",
|
137
|
+
description="Copied from Cartesia docs",
|
138
|
+
mode="stability",
|
305
139
|
language="en",
|
140
|
+
enhance=True,
|
306
141
|
)
|
307
142
|
"""
|
308
143
|
_response = self._client_wrapper.httpx_client.request(
|
309
|
-
"voices/",
|
144
|
+
"voices/clone",
|
310
145
|
method="POST",
|
311
|
-
|
146
|
+
data={
|
312
147
|
"name": name,
|
313
148
|
"description": description,
|
314
|
-
"embedding": embedding,
|
315
149
|
"language": language,
|
150
|
+
"mode": mode,
|
151
|
+
"enhance": enhance,
|
152
|
+
"transcript": transcript,
|
153
|
+
},
|
154
|
+
files={
|
155
|
+
"clip": clip,
|
316
156
|
},
|
317
157
|
request_options=request_options,
|
318
158
|
omit=OMIT,
|
@@ -320,9 +160,9 @@ class VoicesClient:
|
|
320
160
|
try:
|
321
161
|
if 200 <= _response.status_code < 300:
|
322
162
|
return typing.cast(
|
323
|
-
|
163
|
+
VoiceMetadata,
|
324
164
|
parse_obj_as(
|
325
|
-
type_=
|
165
|
+
type_=VoiceMetadata, # type: ignore
|
326
166
|
object_=_response.json(),
|
327
167
|
),
|
328
168
|
)
|
@@ -352,7 +192,7 @@ class VoicesClient:
|
|
352
192
|
api_key="YOUR_API_KEY",
|
353
193
|
)
|
354
194
|
client.voices.delete(
|
355
|
-
id="
|
195
|
+
id="id",
|
356
196
|
)
|
357
197
|
"""
|
358
198
|
_response = self._client_wrapper.httpx_client.request(
|
@@ -397,9 +237,9 @@ class VoicesClient:
|
|
397
237
|
api_key="YOUR_API_KEY",
|
398
238
|
)
|
399
239
|
client.voices.update(
|
400
|
-
id="
|
401
|
-
name="
|
402
|
-
description="
|
240
|
+
id="id",
|
241
|
+
name="name",
|
242
|
+
description="description",
|
403
243
|
)
|
404
244
|
"""
|
405
245
|
_response = self._client_wrapper.httpx_client.request(
|
@@ -447,7 +287,7 @@ class VoicesClient:
|
|
447
287
|
api_key="YOUR_API_KEY",
|
448
288
|
)
|
449
289
|
client.voices.get(
|
450
|
-
id="
|
290
|
+
id="id",
|
451
291
|
)
|
452
292
|
"""
|
453
293
|
_response = self._client_wrapper.httpx_client.request(
|
@@ -504,203 +344,9 @@ class VoicesClient:
|
|
504
344
|
api_key="YOUR_API_KEY",
|
505
345
|
)
|
506
346
|
client.voices.localize(
|
507
|
-
embedding=[
|
508
|
-
1.0,
|
509
|
-
1.0,
|
510
|
-
1.0,
|
511
|
-
1.0,
|
512
|
-
1.0,
|
513
|
-
1.0,
|
514
|
-
1.0,
|
515
|
-
1.0,
|
516
|
-
1.0,
|
517
|
-
1.0,
|
518
|
-
1.0,
|
519
|
-
1.0,
|
520
|
-
1.0,
|
521
|
-
1.0,
|
522
|
-
1.0,
|
523
|
-
1.0,
|
524
|
-
1.0,
|
525
|
-
1.0,
|
526
|
-
1.0,
|
527
|
-
1.0,
|
528
|
-
1.0,
|
529
|
-
1.0,
|
530
|
-
1.0,
|
531
|
-
1.0,
|
532
|
-
1.0,
|
533
|
-
1.0,
|
534
|
-
1.0,
|
535
|
-
1.0,
|
536
|
-
1.0,
|
537
|
-
1.0,
|
538
|
-
1.0,
|
539
|
-
1.0,
|
540
|
-
1.0,
|
541
|
-
1.0,
|
542
|
-
1.0,
|
543
|
-
1.0,
|
544
|
-
1.0,
|
545
|
-
1.0,
|
546
|
-
1.0,
|
547
|
-
1.0,
|
548
|
-
1.0,
|
549
|
-
1.0,
|
550
|
-
1.0,
|
551
|
-
1.0,
|
552
|
-
1.0,
|
553
|
-
1.0,
|
554
|
-
1.0,
|
555
|
-
1.0,
|
556
|
-
1.0,
|
557
|
-
1.0,
|
558
|
-
1.0,
|
559
|
-
1.0,
|
560
|
-
1.0,
|
561
|
-
1.0,
|
562
|
-
1.0,
|
563
|
-
1.0,
|
564
|
-
1.0,
|
565
|
-
1.0,
|
566
|
-
1.0,
|
567
|
-
1.0,
|
568
|
-
1.0,
|
569
|
-
1.0,
|
570
|
-
1.0,
|
571
|
-
1.0,
|
572
|
-
1.0,
|
573
|
-
1.0,
|
574
|
-
1.0,
|
575
|
-
1.0,
|
576
|
-
1.0,
|
577
|
-
1.0,
|
578
|
-
1.0,
|
579
|
-
1.0,
|
580
|
-
1.0,
|
581
|
-
1.0,
|
582
|
-
1.0,
|
583
|
-
1.0,
|
584
|
-
1.0,
|
585
|
-
1.0,
|
586
|
-
1.0,
|
587
|
-
1.0,
|
588
|
-
1.0,
|
589
|
-
1.0,
|
590
|
-
1.0,
|
591
|
-
1.0,
|
592
|
-
1.0,
|
593
|
-
1.0,
|
594
|
-
1.0,
|
595
|
-
1.0,
|
596
|
-
1.0,
|
597
|
-
1.0,
|
598
|
-
1.0,
|
599
|
-
1.0,
|
600
|
-
1.0,
|
601
|
-
1.0,
|
602
|
-
1.0,
|
603
|
-
1.0,
|
604
|
-
1.0,
|
605
|
-
1.0,
|
606
|
-
1.0,
|
607
|
-
1.0,
|
608
|
-
1.0,
|
609
|
-
1.0,
|
610
|
-
1.0,
|
611
|
-
1.0,
|
612
|
-
1.0,
|
613
|
-
1.0,
|
614
|
-
1.0,
|
615
|
-
1.0,
|
616
|
-
1.0,
|
617
|
-
1.0,
|
618
|
-
1.0,
|
619
|
-
1.0,
|
620
|
-
1.0,
|
621
|
-
1.0,
|
622
|
-
1.0,
|
623
|
-
1.0,
|
624
|
-
1.0,
|
625
|
-
1.0,
|
626
|
-
1.0,
|
627
|
-
1.0,
|
628
|
-
1.0,
|
629
|
-
1.0,
|
630
|
-
1.0,
|
631
|
-
1.0,
|
632
|
-
1.0,
|
633
|
-
1.0,
|
634
|
-
1.0,
|
635
|
-
1.0,
|
636
|
-
1.0,
|
637
|
-
1.0,
|
638
|
-
1.0,
|
639
|
-
1.0,
|
640
|
-
1.0,
|
641
|
-
1.0,
|
642
|
-
1.0,
|
643
|
-
1.0,
|
644
|
-
1.0,
|
645
|
-
1.0,
|
646
|
-
1.0,
|
647
|
-
1.0,
|
648
|
-
1.0,
|
649
|
-
1.0,
|
650
|
-
1.0,
|
651
|
-
1.0,
|
652
|
-
1.0,
|
653
|
-
1.0,
|
654
|
-
1.0,
|
655
|
-
1.0,
|
656
|
-
1.0,
|
657
|
-
1.0,
|
658
|
-
1.0,
|
659
|
-
1.0,
|
660
|
-
1.0,
|
661
|
-
1.0,
|
662
|
-
1.0,
|
663
|
-
1.0,
|
664
|
-
1.0,
|
665
|
-
1.0,
|
666
|
-
1.0,
|
667
|
-
1.0,
|
668
|
-
1.0,
|
669
|
-
1.0,
|
670
|
-
1.0,
|
671
|
-
1.0,
|
672
|
-
1.0,
|
673
|
-
1.0,
|
674
|
-
1.0,
|
675
|
-
1.0,
|
676
|
-
1.0,
|
677
|
-
1.0,
|
678
|
-
1.0,
|
679
|
-
1.0,
|
680
|
-
1.0,
|
681
|
-
1.0,
|
682
|
-
1.0,
|
683
|
-
1.0,
|
684
|
-
1.0,
|
685
|
-
1.0,
|
686
|
-
1.0,
|
687
|
-
1.0,
|
688
|
-
1.0,
|
689
|
-
1.0,
|
690
|
-
1.0,
|
691
|
-
1.0,
|
692
|
-
1.0,
|
693
|
-
1.0,
|
694
|
-
1.0,
|
695
|
-
1.0,
|
696
|
-
1.0,
|
697
|
-
1.0,
|
698
|
-
1.0,
|
699
|
-
1.0,
|
700
|
-
],
|
347
|
+
embedding=[1.1, 1.1],
|
701
348
|
language="en",
|
702
349
|
original_speaker_gender="male",
|
703
|
-
dialect="au",
|
704
350
|
)
|
705
351
|
"""
|
706
352
|
_response = self._client_wrapper.httpx_client.request(
|
@@ -757,7 +403,7 @@ class VoicesClient:
|
|
757
403
|
api_key="YOUR_API_KEY",
|
758
404
|
)
|
759
405
|
client.voices.mix(
|
760
|
-
voices=[{"id": "
|
406
|
+
voices=[{"id": "id", "weight": 1.1}, {"id": "id", "weight": 1.1}],
|
761
407
|
)
|
762
408
|
"""
|
763
409
|
_response = self._client_wrapper.httpx_client.request(
|
@@ -785,60 +431,39 @@ class VoicesClient:
|
|
785
431
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
786
432
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
787
433
|
|
788
|
-
def
|
434
|
+
def create(
|
789
435
|
self,
|
790
436
|
*,
|
791
|
-
clip: core.File,
|
792
437
|
name: str,
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
transcript: typing.Optional[str] = OMIT,
|
438
|
+
description: str,
|
439
|
+
embedding: Embedding,
|
440
|
+
language: typing.Optional[SupportedLanguage] = OMIT,
|
441
|
+
base_voice_id: typing.Optional[BaseVoiceId] = OMIT,
|
798
442
|
request_options: typing.Optional[RequestOptions] = None,
|
799
|
-
) ->
|
443
|
+
) -> Voice:
|
800
444
|
"""
|
801
|
-
|
802
|
-
|
803
|
-
Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
|
804
|
-
|
805
|
-
Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
|
445
|
+
Create voice from raw features. If you'd like to clone a voice from an audio file, please use Clone Voice instead.
|
806
446
|
|
807
447
|
Parameters
|
808
448
|
----------
|
809
|
-
clip : core.File
|
810
|
-
See core.File for more documentation
|
811
|
-
|
812
449
|
name : str
|
813
450
|
The name of the voice.
|
814
451
|
|
452
|
+
description : str
|
453
|
+
The description of the voice.
|
815
454
|
|
816
|
-
|
817
|
-
The language of the voice.
|
818
|
-
|
819
|
-
|
820
|
-
mode : CloneMode
|
821
|
-
Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
|
822
|
-
|
823
|
-
|
824
|
-
enhance : bool
|
825
|
-
Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
|
826
|
-
|
827
|
-
|
828
|
-
description : typing.Optional[str]
|
829
|
-
A description for the voice.
|
830
|
-
|
455
|
+
embedding : Embedding
|
831
456
|
|
832
|
-
|
833
|
-
Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
|
457
|
+
language : typing.Optional[SupportedLanguage]
|
834
458
|
|
459
|
+
base_voice_id : typing.Optional[BaseVoiceId]
|
835
460
|
|
836
461
|
request_options : typing.Optional[RequestOptions]
|
837
462
|
Request-specific configuration.
|
838
463
|
|
839
464
|
Returns
|
840
465
|
-------
|
841
|
-
|
466
|
+
Voice
|
842
467
|
|
843
468
|
Examples
|
844
469
|
--------
|
@@ -847,27 +472,23 @@ class VoicesClient:
|
|
847
472
|
client = Cartesia(
|
848
473
|
api_key="YOUR_API_KEY",
|
849
474
|
)
|
850
|
-
client.voices.
|
851
|
-
name="
|
852
|
-
description="
|
853
|
-
|
475
|
+
client.voices.create(
|
476
|
+
name="My Custom Voice",
|
477
|
+
description="A custom voice created through the API",
|
478
|
+
embedding=[],
|
854
479
|
language="en",
|
855
|
-
|
480
|
+
base_voice_id="123e4567-e89b-12d3-a456-426614174000",
|
856
481
|
)
|
857
482
|
"""
|
858
483
|
_response = self._client_wrapper.httpx_client.request(
|
859
|
-
"voices/
|
484
|
+
"voices/",
|
860
485
|
method="POST",
|
861
|
-
|
486
|
+
json={
|
862
487
|
"name": name,
|
863
488
|
"description": description,
|
489
|
+
"embedding": embedding,
|
864
490
|
"language": language,
|
865
|
-
"
|
866
|
-
"enhance": enhance,
|
867
|
-
"transcript": transcript,
|
868
|
-
},
|
869
|
-
files={
|
870
|
-
"clip": clip,
|
491
|
+
"base_voice_id": base_voice_id,
|
871
492
|
},
|
872
493
|
request_options=request_options,
|
873
494
|
omit=OMIT,
|
@@ -875,9 +496,9 @@ class VoicesClient:
|
|
875
496
|
try:
|
876
497
|
if 200 <= _response.status_code < 300:
|
877
498
|
return typing.cast(
|
878
|
-
|
499
|
+
Voice,
|
879
500
|
parse_obj_as(
|
880
|
-
type_=
|
501
|
+
type_=Voice, # type: ignore
|
881
502
|
object_=_response.json(),
|
882
503
|
),
|
883
504
|
)
|
@@ -938,34 +559,60 @@ class AsyncVoicesClient:
|
|
938
559
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
939
560
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
940
561
|
|
941
|
-
async def
|
562
|
+
async def clone(
|
942
563
|
self,
|
943
564
|
*,
|
565
|
+
clip: core.File,
|
944
566
|
name: str,
|
945
|
-
|
946
|
-
|
947
|
-
|
567
|
+
language: SupportedLanguage,
|
568
|
+
mode: CloneMode,
|
569
|
+
enhance: bool,
|
570
|
+
description: typing.Optional[str] = OMIT,
|
571
|
+
transcript: typing.Optional[str] = OMIT,
|
948
572
|
request_options: typing.Optional[RequestOptions] = None,
|
949
|
-
) ->
|
573
|
+
) -> VoiceMetadata:
|
950
574
|
"""
|
575
|
+
Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
|
576
|
+
|
577
|
+
Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
|
578
|
+
|
579
|
+
Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
|
580
|
+
|
951
581
|
Parameters
|
952
582
|
----------
|
583
|
+
clip : core.File
|
584
|
+
See core.File for more documentation
|
585
|
+
|
953
586
|
name : str
|
954
587
|
The name of the voice.
|
955
588
|
|
956
|
-
description : str
|
957
|
-
The description of the voice.
|
958
589
|
|
959
|
-
|
590
|
+
language : SupportedLanguage
|
591
|
+
The language of the voice.
|
592
|
+
|
593
|
+
|
594
|
+
mode : CloneMode
|
595
|
+
Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
|
596
|
+
|
597
|
+
|
598
|
+
enhance : bool
|
599
|
+
Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
|
600
|
+
|
601
|
+
|
602
|
+
description : typing.Optional[str]
|
603
|
+
A description for the voice.
|
604
|
+
|
605
|
+
|
606
|
+
transcript : typing.Optional[str]
|
607
|
+
Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
|
960
608
|
|
961
|
-
language : typing.Optional[SupportedLanguage]
|
962
609
|
|
963
610
|
request_options : typing.Optional[RequestOptions]
|
964
611
|
Request-specific configuration.
|
965
612
|
|
966
613
|
Returns
|
967
614
|
-------
|
968
|
-
|
615
|
+
VoiceMetadata
|
969
616
|
|
970
617
|
Examples
|
971
618
|
--------
|
@@ -979,217 +626,30 @@ class AsyncVoicesClient:
|
|
979
626
|
|
980
627
|
|
981
628
|
async def main() -> None:
|
982
|
-
await client.voices.
|
983
|
-
name="
|
984
|
-
description="
|
985
|
-
|
986
|
-
1.0,
|
987
|
-
1.0,
|
988
|
-
1.0,
|
989
|
-
1.0,
|
990
|
-
1.0,
|
991
|
-
1.0,
|
992
|
-
1.0,
|
993
|
-
1.0,
|
994
|
-
1.0,
|
995
|
-
1.0,
|
996
|
-
1.0,
|
997
|
-
1.0,
|
998
|
-
1.0,
|
999
|
-
1.0,
|
1000
|
-
1.0,
|
1001
|
-
1.0,
|
1002
|
-
1.0,
|
1003
|
-
1.0,
|
1004
|
-
1.0,
|
1005
|
-
1.0,
|
1006
|
-
1.0,
|
1007
|
-
1.0,
|
1008
|
-
1.0,
|
1009
|
-
1.0,
|
1010
|
-
1.0,
|
1011
|
-
1.0,
|
1012
|
-
1.0,
|
1013
|
-
1.0,
|
1014
|
-
1.0,
|
1015
|
-
1.0,
|
1016
|
-
1.0,
|
1017
|
-
1.0,
|
1018
|
-
1.0,
|
1019
|
-
1.0,
|
1020
|
-
1.0,
|
1021
|
-
1.0,
|
1022
|
-
1.0,
|
1023
|
-
1.0,
|
1024
|
-
1.0,
|
1025
|
-
1.0,
|
1026
|
-
1.0,
|
1027
|
-
1.0,
|
1028
|
-
1.0,
|
1029
|
-
1.0,
|
1030
|
-
1.0,
|
1031
|
-
1.0,
|
1032
|
-
1.0,
|
1033
|
-
1.0,
|
1034
|
-
1.0,
|
1035
|
-
1.0,
|
1036
|
-
1.0,
|
1037
|
-
1.0,
|
1038
|
-
1.0,
|
1039
|
-
1.0,
|
1040
|
-
1.0,
|
1041
|
-
1.0,
|
1042
|
-
1.0,
|
1043
|
-
1.0,
|
1044
|
-
1.0,
|
1045
|
-
1.0,
|
1046
|
-
1.0,
|
1047
|
-
1.0,
|
1048
|
-
1.0,
|
1049
|
-
1.0,
|
1050
|
-
1.0,
|
1051
|
-
1.0,
|
1052
|
-
1.0,
|
1053
|
-
1.0,
|
1054
|
-
1.0,
|
1055
|
-
1.0,
|
1056
|
-
1.0,
|
1057
|
-
1.0,
|
1058
|
-
1.0,
|
1059
|
-
1.0,
|
1060
|
-
1.0,
|
1061
|
-
1.0,
|
1062
|
-
1.0,
|
1063
|
-
1.0,
|
1064
|
-
1.0,
|
1065
|
-
1.0,
|
1066
|
-
1.0,
|
1067
|
-
1.0,
|
1068
|
-
1.0,
|
1069
|
-
1.0,
|
1070
|
-
1.0,
|
1071
|
-
1.0,
|
1072
|
-
1.0,
|
1073
|
-
1.0,
|
1074
|
-
1.0,
|
1075
|
-
1.0,
|
1076
|
-
1.0,
|
1077
|
-
1.0,
|
1078
|
-
1.0,
|
1079
|
-
1.0,
|
1080
|
-
1.0,
|
1081
|
-
1.0,
|
1082
|
-
1.0,
|
1083
|
-
1.0,
|
1084
|
-
1.0,
|
1085
|
-
1.0,
|
1086
|
-
1.0,
|
1087
|
-
1.0,
|
1088
|
-
1.0,
|
1089
|
-
1.0,
|
1090
|
-
1.0,
|
1091
|
-
1.0,
|
1092
|
-
1.0,
|
1093
|
-
1.0,
|
1094
|
-
1.0,
|
1095
|
-
1.0,
|
1096
|
-
1.0,
|
1097
|
-
1.0,
|
1098
|
-
1.0,
|
1099
|
-
1.0,
|
1100
|
-
1.0,
|
1101
|
-
1.0,
|
1102
|
-
1.0,
|
1103
|
-
1.0,
|
1104
|
-
1.0,
|
1105
|
-
1.0,
|
1106
|
-
1.0,
|
1107
|
-
1.0,
|
1108
|
-
1.0,
|
1109
|
-
1.0,
|
1110
|
-
1.0,
|
1111
|
-
1.0,
|
1112
|
-
1.0,
|
1113
|
-
1.0,
|
1114
|
-
1.0,
|
1115
|
-
1.0,
|
1116
|
-
1.0,
|
1117
|
-
1.0,
|
1118
|
-
1.0,
|
1119
|
-
1.0,
|
1120
|
-
1.0,
|
1121
|
-
1.0,
|
1122
|
-
1.0,
|
1123
|
-
1.0,
|
1124
|
-
1.0,
|
1125
|
-
1.0,
|
1126
|
-
1.0,
|
1127
|
-
1.0,
|
1128
|
-
1.0,
|
1129
|
-
1.0,
|
1130
|
-
1.0,
|
1131
|
-
1.0,
|
1132
|
-
1.0,
|
1133
|
-
1.0,
|
1134
|
-
1.0,
|
1135
|
-
1.0,
|
1136
|
-
1.0,
|
1137
|
-
1.0,
|
1138
|
-
1.0,
|
1139
|
-
1.0,
|
1140
|
-
1.0,
|
1141
|
-
1.0,
|
1142
|
-
1.0,
|
1143
|
-
1.0,
|
1144
|
-
1.0,
|
1145
|
-
1.0,
|
1146
|
-
1.0,
|
1147
|
-
1.0,
|
1148
|
-
1.0,
|
1149
|
-
1.0,
|
1150
|
-
1.0,
|
1151
|
-
1.0,
|
1152
|
-
1.0,
|
1153
|
-
1.0,
|
1154
|
-
1.0,
|
1155
|
-
1.0,
|
1156
|
-
1.0,
|
1157
|
-
1.0,
|
1158
|
-
1.0,
|
1159
|
-
1.0,
|
1160
|
-
1.0,
|
1161
|
-
1.0,
|
1162
|
-
1.0,
|
1163
|
-
1.0,
|
1164
|
-
1.0,
|
1165
|
-
1.0,
|
1166
|
-
1.0,
|
1167
|
-
1.0,
|
1168
|
-
1.0,
|
1169
|
-
1.0,
|
1170
|
-
1.0,
|
1171
|
-
1.0,
|
1172
|
-
1.0,
|
1173
|
-
1.0,
|
1174
|
-
1.0,
|
1175
|
-
1.0,
|
1176
|
-
1.0,
|
1177
|
-
1.0,
|
1178
|
-
],
|
629
|
+
await client.voices.clone(
|
630
|
+
name="A high-stability cloned voice",
|
631
|
+
description="Copied from Cartesia docs",
|
632
|
+
mode="stability",
|
1179
633
|
language="en",
|
634
|
+
enhance=True,
|
1180
635
|
)
|
1181
636
|
|
1182
637
|
|
1183
638
|
asyncio.run(main())
|
1184
639
|
"""
|
1185
640
|
_response = await self._client_wrapper.httpx_client.request(
|
1186
|
-
"voices/",
|
641
|
+
"voices/clone",
|
1187
642
|
method="POST",
|
1188
|
-
|
643
|
+
data={
|
1189
644
|
"name": name,
|
1190
645
|
"description": description,
|
1191
|
-
"embedding": embedding,
|
1192
646
|
"language": language,
|
647
|
+
"mode": mode,
|
648
|
+
"enhance": enhance,
|
649
|
+
"transcript": transcript,
|
650
|
+
},
|
651
|
+
files={
|
652
|
+
"clip": clip,
|
1193
653
|
},
|
1194
654
|
request_options=request_options,
|
1195
655
|
omit=OMIT,
|
@@ -1197,9 +657,9 @@ class AsyncVoicesClient:
|
|
1197
657
|
try:
|
1198
658
|
if 200 <= _response.status_code < 300:
|
1199
659
|
return typing.cast(
|
1200
|
-
|
660
|
+
VoiceMetadata,
|
1201
661
|
parse_obj_as(
|
1202
|
-
type_=
|
662
|
+
type_=VoiceMetadata, # type: ignore
|
1203
663
|
object_=_response.json(),
|
1204
664
|
),
|
1205
665
|
)
|
@@ -1234,7 +694,7 @@ class AsyncVoicesClient:
|
|
1234
694
|
|
1235
695
|
async def main() -> None:
|
1236
696
|
await client.voices.delete(
|
1237
|
-
id="
|
697
|
+
id="id",
|
1238
698
|
)
|
1239
699
|
|
1240
700
|
|
@@ -1287,9 +747,9 @@ class AsyncVoicesClient:
|
|
1287
747
|
|
1288
748
|
async def main() -> None:
|
1289
749
|
await client.voices.update(
|
1290
|
-
id="
|
1291
|
-
name="
|
1292
|
-
description="
|
750
|
+
id="id",
|
751
|
+
name="name",
|
752
|
+
description="description",
|
1293
753
|
)
|
1294
754
|
|
1295
755
|
|
@@ -1345,7 +805,7 @@ class AsyncVoicesClient:
|
|
1345
805
|
|
1346
806
|
async def main() -> None:
|
1347
807
|
await client.voices.get(
|
1348
|
-
id="
|
808
|
+
id="id",
|
1349
809
|
)
|
1350
810
|
|
1351
811
|
|
@@ -1410,203 +870,9 @@ class AsyncVoicesClient:
|
|
1410
870
|
|
1411
871
|
async def main() -> None:
|
1412
872
|
await client.voices.localize(
|
1413
|
-
embedding=[
|
1414
|
-
1.0,
|
1415
|
-
1.0,
|
1416
|
-
1.0,
|
1417
|
-
1.0,
|
1418
|
-
1.0,
|
1419
|
-
1.0,
|
1420
|
-
1.0,
|
1421
|
-
1.0,
|
1422
|
-
1.0,
|
1423
|
-
1.0,
|
1424
|
-
1.0,
|
1425
|
-
1.0,
|
1426
|
-
1.0,
|
1427
|
-
1.0,
|
1428
|
-
1.0,
|
1429
|
-
1.0,
|
1430
|
-
1.0,
|
1431
|
-
1.0,
|
1432
|
-
1.0,
|
1433
|
-
1.0,
|
1434
|
-
1.0,
|
1435
|
-
1.0,
|
1436
|
-
1.0,
|
1437
|
-
1.0,
|
1438
|
-
1.0,
|
1439
|
-
1.0,
|
1440
|
-
1.0,
|
1441
|
-
1.0,
|
1442
|
-
1.0,
|
1443
|
-
1.0,
|
1444
|
-
1.0,
|
1445
|
-
1.0,
|
1446
|
-
1.0,
|
1447
|
-
1.0,
|
1448
|
-
1.0,
|
1449
|
-
1.0,
|
1450
|
-
1.0,
|
1451
|
-
1.0,
|
1452
|
-
1.0,
|
1453
|
-
1.0,
|
1454
|
-
1.0,
|
1455
|
-
1.0,
|
1456
|
-
1.0,
|
1457
|
-
1.0,
|
1458
|
-
1.0,
|
1459
|
-
1.0,
|
1460
|
-
1.0,
|
1461
|
-
1.0,
|
1462
|
-
1.0,
|
1463
|
-
1.0,
|
1464
|
-
1.0,
|
1465
|
-
1.0,
|
1466
|
-
1.0,
|
1467
|
-
1.0,
|
1468
|
-
1.0,
|
1469
|
-
1.0,
|
1470
|
-
1.0,
|
1471
|
-
1.0,
|
1472
|
-
1.0,
|
1473
|
-
1.0,
|
1474
|
-
1.0,
|
1475
|
-
1.0,
|
1476
|
-
1.0,
|
1477
|
-
1.0,
|
1478
|
-
1.0,
|
1479
|
-
1.0,
|
1480
|
-
1.0,
|
1481
|
-
1.0,
|
1482
|
-
1.0,
|
1483
|
-
1.0,
|
1484
|
-
1.0,
|
1485
|
-
1.0,
|
1486
|
-
1.0,
|
1487
|
-
1.0,
|
1488
|
-
1.0,
|
1489
|
-
1.0,
|
1490
|
-
1.0,
|
1491
|
-
1.0,
|
1492
|
-
1.0,
|
1493
|
-
1.0,
|
1494
|
-
1.0,
|
1495
|
-
1.0,
|
1496
|
-
1.0,
|
1497
|
-
1.0,
|
1498
|
-
1.0,
|
1499
|
-
1.0,
|
1500
|
-
1.0,
|
1501
|
-
1.0,
|
1502
|
-
1.0,
|
1503
|
-
1.0,
|
1504
|
-
1.0,
|
1505
|
-
1.0,
|
1506
|
-
1.0,
|
1507
|
-
1.0,
|
1508
|
-
1.0,
|
1509
|
-
1.0,
|
1510
|
-
1.0,
|
1511
|
-
1.0,
|
1512
|
-
1.0,
|
1513
|
-
1.0,
|
1514
|
-
1.0,
|
1515
|
-
1.0,
|
1516
|
-
1.0,
|
1517
|
-
1.0,
|
1518
|
-
1.0,
|
1519
|
-
1.0,
|
1520
|
-
1.0,
|
1521
|
-
1.0,
|
1522
|
-
1.0,
|
1523
|
-
1.0,
|
1524
|
-
1.0,
|
1525
|
-
1.0,
|
1526
|
-
1.0,
|
1527
|
-
1.0,
|
1528
|
-
1.0,
|
1529
|
-
1.0,
|
1530
|
-
1.0,
|
1531
|
-
1.0,
|
1532
|
-
1.0,
|
1533
|
-
1.0,
|
1534
|
-
1.0,
|
1535
|
-
1.0,
|
1536
|
-
1.0,
|
1537
|
-
1.0,
|
1538
|
-
1.0,
|
1539
|
-
1.0,
|
1540
|
-
1.0,
|
1541
|
-
1.0,
|
1542
|
-
1.0,
|
1543
|
-
1.0,
|
1544
|
-
1.0,
|
1545
|
-
1.0,
|
1546
|
-
1.0,
|
1547
|
-
1.0,
|
1548
|
-
1.0,
|
1549
|
-
1.0,
|
1550
|
-
1.0,
|
1551
|
-
1.0,
|
1552
|
-
1.0,
|
1553
|
-
1.0,
|
1554
|
-
1.0,
|
1555
|
-
1.0,
|
1556
|
-
1.0,
|
1557
|
-
1.0,
|
1558
|
-
1.0,
|
1559
|
-
1.0,
|
1560
|
-
1.0,
|
1561
|
-
1.0,
|
1562
|
-
1.0,
|
1563
|
-
1.0,
|
1564
|
-
1.0,
|
1565
|
-
1.0,
|
1566
|
-
1.0,
|
1567
|
-
1.0,
|
1568
|
-
1.0,
|
1569
|
-
1.0,
|
1570
|
-
1.0,
|
1571
|
-
1.0,
|
1572
|
-
1.0,
|
1573
|
-
1.0,
|
1574
|
-
1.0,
|
1575
|
-
1.0,
|
1576
|
-
1.0,
|
1577
|
-
1.0,
|
1578
|
-
1.0,
|
1579
|
-
1.0,
|
1580
|
-
1.0,
|
1581
|
-
1.0,
|
1582
|
-
1.0,
|
1583
|
-
1.0,
|
1584
|
-
1.0,
|
1585
|
-
1.0,
|
1586
|
-
1.0,
|
1587
|
-
1.0,
|
1588
|
-
1.0,
|
1589
|
-
1.0,
|
1590
|
-
1.0,
|
1591
|
-
1.0,
|
1592
|
-
1.0,
|
1593
|
-
1.0,
|
1594
|
-
1.0,
|
1595
|
-
1.0,
|
1596
|
-
1.0,
|
1597
|
-
1.0,
|
1598
|
-
1.0,
|
1599
|
-
1.0,
|
1600
|
-
1.0,
|
1601
|
-
1.0,
|
1602
|
-
1.0,
|
1603
|
-
1.0,
|
1604
|
-
1.0,
|
1605
|
-
1.0,
|
1606
|
-
],
|
873
|
+
embedding=[1.1, 1.1],
|
1607
874
|
language="en",
|
1608
875
|
original_speaker_gender="male",
|
1609
|
-
dialect="au",
|
1610
876
|
)
|
1611
877
|
|
1612
878
|
|
@@ -1671,7 +937,7 @@ class AsyncVoicesClient:
|
|
1671
937
|
|
1672
938
|
async def main() -> None:
|
1673
939
|
await client.voices.mix(
|
1674
|
-
voices=[{"id": "
|
940
|
+
voices=[{"id": "id", "weight": 1.1}, {"id": "id", "weight": 1.1}],
|
1675
941
|
)
|
1676
942
|
|
1677
943
|
|
@@ -1702,60 +968,39 @@ class AsyncVoicesClient:
|
|
1702
968
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
1703
969
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
1704
970
|
|
1705
|
-
async def
|
971
|
+
async def create(
|
1706
972
|
self,
|
1707
973
|
*,
|
1708
|
-
clip: core.File,
|
1709
974
|
name: str,
|
1710
|
-
|
1711
|
-
|
1712
|
-
|
1713
|
-
|
1714
|
-
transcript: typing.Optional[str] = OMIT,
|
975
|
+
description: str,
|
976
|
+
embedding: Embedding,
|
977
|
+
language: typing.Optional[SupportedLanguage] = OMIT,
|
978
|
+
base_voice_id: typing.Optional[BaseVoiceId] = OMIT,
|
1715
979
|
request_options: typing.Optional[RequestOptions] = None,
|
1716
|
-
) ->
|
980
|
+
) -> Voice:
|
1717
981
|
"""
|
1718
|
-
|
1719
|
-
|
1720
|
-
Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
|
1721
|
-
|
1722
|
-
Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
|
982
|
+
Create voice from raw features. If you'd like to clone a voice from an audio file, please use Clone Voice instead.
|
1723
983
|
|
1724
984
|
Parameters
|
1725
985
|
----------
|
1726
|
-
clip : core.File
|
1727
|
-
See core.File for more documentation
|
1728
|
-
|
1729
986
|
name : str
|
1730
987
|
The name of the voice.
|
1731
988
|
|
989
|
+
description : str
|
990
|
+
The description of the voice.
|
1732
991
|
|
1733
|
-
|
1734
|
-
The language of the voice.
|
1735
|
-
|
1736
|
-
|
1737
|
-
mode : CloneMode
|
1738
|
-
Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
|
1739
|
-
|
1740
|
-
|
1741
|
-
enhance : bool
|
1742
|
-
Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
|
1743
|
-
|
1744
|
-
|
1745
|
-
description : typing.Optional[str]
|
1746
|
-
A description for the voice.
|
1747
|
-
|
992
|
+
embedding : Embedding
|
1748
993
|
|
1749
|
-
|
1750
|
-
Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
|
994
|
+
language : typing.Optional[SupportedLanguage]
|
1751
995
|
|
996
|
+
base_voice_id : typing.Optional[BaseVoiceId]
|
1752
997
|
|
1753
998
|
request_options : typing.Optional[RequestOptions]
|
1754
999
|
Request-specific configuration.
|
1755
1000
|
|
1756
1001
|
Returns
|
1757
1002
|
-------
|
1758
|
-
|
1003
|
+
Voice
|
1759
1004
|
|
1760
1005
|
Examples
|
1761
1006
|
--------
|
@@ -1769,30 +1014,26 @@ class AsyncVoicesClient:
|
|
1769
1014
|
|
1770
1015
|
|
1771
1016
|
async def main() -> None:
|
1772
|
-
await client.voices.
|
1773
|
-
name="
|
1774
|
-
description="
|
1775
|
-
|
1017
|
+
await client.voices.create(
|
1018
|
+
name="My Custom Voice",
|
1019
|
+
description="A custom voice created through the API",
|
1020
|
+
embedding=[],
|
1776
1021
|
language="en",
|
1777
|
-
|
1022
|
+
base_voice_id="123e4567-e89b-12d3-a456-426614174000",
|
1778
1023
|
)
|
1779
1024
|
|
1780
1025
|
|
1781
1026
|
asyncio.run(main())
|
1782
1027
|
"""
|
1783
1028
|
_response = await self._client_wrapper.httpx_client.request(
|
1784
|
-
"voices/
|
1029
|
+
"voices/",
|
1785
1030
|
method="POST",
|
1786
|
-
|
1031
|
+
json={
|
1787
1032
|
"name": name,
|
1788
1033
|
"description": description,
|
1034
|
+
"embedding": embedding,
|
1789
1035
|
"language": language,
|
1790
|
-
"
|
1791
|
-
"enhance": enhance,
|
1792
|
-
"transcript": transcript,
|
1793
|
-
},
|
1794
|
-
files={
|
1795
|
-
"clip": clip,
|
1036
|
+
"base_voice_id": base_voice_id,
|
1796
1037
|
},
|
1797
1038
|
request_options=request_options,
|
1798
1039
|
omit=OMIT,
|
@@ -1800,9 +1041,9 @@ class AsyncVoicesClient:
|
|
1800
1041
|
try:
|
1801
1042
|
if 200 <= _response.status_code < 300:
|
1802
1043
|
return typing.cast(
|
1803
|
-
|
1044
|
+
Voice,
|
1804
1045
|
parse_obj_as(
|
1805
|
-
type_=
|
1046
|
+
type_=Voice, # type: ignore
|
1806
1047
|
object_=_response.json(),
|
1807
1048
|
),
|
1808
1049
|
)
|