cartesia 2.0.0b2__py3-none-any.whl → 2.0.0b8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cartesia/__init__.py +10 -0
- cartesia/base_client.py +0 -4
- cartesia/core/__init__.py +3 -0
- cartesia/core/client_wrapper.py +2 -2
- cartesia/core/pagination.py +88 -0
- cartesia/infill/client.py +4 -4
- cartesia/tts/_async_websocket.py +53 -1
- cartesia/tts/_websocket.py +52 -3
- cartesia/tts/client.py +4 -4
- cartesia/tts/requests/generation_request.py +5 -0
- cartesia/tts/requests/web_socket_chunk_response.py +3 -0
- cartesia/tts/requests/web_socket_response.py +2 -1
- cartesia/tts/requests/web_socket_tts_request.py +1 -0
- cartesia/tts/types/emotion.py +5 -0
- cartesia/tts/types/generation_request.py +5 -0
- cartesia/tts/types/web_socket_chunk_response.py +3 -1
- cartesia/tts/types/web_socket_response.py +2 -1
- cartesia/tts/types/web_socket_tts_output.py +2 -0
- cartesia/tts/types/web_socket_tts_request.py +1 -0
- cartesia/tts/utils/constants.py +2 -2
- cartesia/voice_changer/requests/streaming_response.py +2 -0
- cartesia/voice_changer/types/streaming_response.py +2 -0
- cartesia/voices/__init__.py +10 -0
- cartesia/voices/client.py +209 -44
- cartesia/voices/requests/__init__.py +2 -0
- cartesia/voices/requests/get_voices_response.py +24 -0
- cartesia/voices/requests/localize_dialect.py +4 -1
- cartesia/voices/requests/localize_voice_request.py +15 -2
- cartesia/voices/requests/voice.py +13 -9
- cartesia/voices/types/__init__.py +8 -0
- cartesia/voices/types/gender_presentation.py +5 -0
- cartesia/voices/types/get_voices_response.py +34 -0
- cartesia/voices/types/localize_dialect.py +4 -1
- cartesia/voices/types/localize_french_dialect.py +5 -0
- cartesia/voices/types/localize_voice_request.py +16 -3
- cartesia/voices/types/voice.py +13 -9
- cartesia/voices/types/voice_expand_options.py +5 -0
- {cartesia-2.0.0b2.dist-info → cartesia-2.0.0b8.dist-info}/METADATA +149 -73
- {cartesia-2.0.0b2.dist-info → cartesia-2.0.0b8.dist-info}/RECORD +40 -35
- cartesia/datasets/client.py +0 -392
- {cartesia-2.0.0b2.dist-info → cartesia-2.0.0b8.dist-info}/WHEEL +0 -0
@@ -7,11 +7,13 @@ import pydantic
|
|
7
7
|
from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
8
8
|
from .context_id import ContextId
|
9
9
|
from .flush_id import FlushId
|
10
|
+
from .phoneme_timestamps import PhonemeTimestamps
|
10
11
|
from .word_timestamps import WordTimestamps
|
11
12
|
|
12
13
|
|
13
14
|
class WebSocketTtsOutput(UniversalBaseModel):
|
14
15
|
word_timestamps: typing.Optional[WordTimestamps] = None
|
16
|
+
phoneme_timestamps: typing.Optional[PhonemeTimestamps] = None
|
15
17
|
audio: typing.Optional[bytes] = None
|
16
18
|
context_id: typing.Optional[ContextId] = None
|
17
19
|
flush_id: typing.Optional[FlushId] = None
|
@@ -22,6 +22,7 @@ class WebSocketTtsRequest(UniversalBaseModel):
|
|
22
22
|
duration: typing.Optional[int] = None
|
23
23
|
language: typing.Optional[str] = None
|
24
24
|
add_timestamps: typing.Optional[bool] = None
|
25
|
+
use_original_timestamps: typing.Optional[bool] = None
|
25
26
|
add_phoneme_timestamps: typing.Optional[bool] = None
|
26
27
|
continue_: typing_extensions.Annotated[typing.Optional[bool], FieldMetadata(alias="continue")] = None
|
27
28
|
context_id: typing.Optional[str] = None
|
cartesia/tts/utils/constants.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
DEFAULT_MODEL_ID = "sonic-
|
2
|
-
MULTILINGUAL_MODEL_ID = "sonic-
|
1
|
+
DEFAULT_MODEL_ID = "sonic-2" # latest default model
|
2
|
+
MULTILINGUAL_MODEL_ID = "sonic-2" # latest multilingual model
|
3
3
|
DEFAULT_BASE_URL = "api.cartesia.ai"
|
4
4
|
DEFAULT_CARTESIA_VERSION = "2024-06-10" # latest version
|
5
5
|
DEFAULT_OUTPUT_FORMAT = "raw_pcm_f32le_44100"
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
4
4
|
import typing_extensions
|
5
5
|
import typing
|
6
6
|
import typing_extensions
|
7
|
+
from ...tts.types.flush_id import FlushId
|
7
8
|
from ...tts.types.context_id import ContextId
|
8
9
|
|
9
10
|
|
@@ -11,6 +12,7 @@ class StreamingResponse_ChunkParams(typing_extensions.TypedDict):
|
|
11
12
|
type: typing.Literal["chunk"]
|
12
13
|
data: str
|
13
14
|
step_time: float
|
15
|
+
flush_id: typing_extensions.NotRequired[FlushId]
|
14
16
|
context_id: typing_extensions.NotRequired[ContextId]
|
15
17
|
status_code: int
|
16
18
|
done: bool
|
@@ -3,6 +3,7 @@
|
|
3
3
|
from __future__ import annotations
|
4
4
|
from ...core.pydantic_utilities import UniversalBaseModel
|
5
5
|
import typing
|
6
|
+
from ...tts.types.flush_id import FlushId
|
6
7
|
from ...tts.types.context_id import ContextId
|
7
8
|
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
8
9
|
import pydantic
|
@@ -12,6 +13,7 @@ class StreamingResponse_Chunk(UniversalBaseModel):
|
|
12
13
|
type: typing.Literal["chunk"] = "chunk"
|
13
14
|
data: str
|
14
15
|
step_time: float
|
16
|
+
flush_id: typing.Optional[FlushId] = None
|
15
17
|
context_id: typing.Optional[ContextId] = None
|
16
18
|
status_code: int
|
17
19
|
done: bool
|
cartesia/voices/__init__.py
CHANGED
@@ -7,9 +7,12 @@ from .types import (
|
|
7
7
|
EmbeddingResponse,
|
8
8
|
EmbeddingSpecifier,
|
9
9
|
Gender,
|
10
|
+
GenderPresentation,
|
11
|
+
GetVoicesResponse,
|
10
12
|
IdSpecifier,
|
11
13
|
LocalizeDialect,
|
12
14
|
LocalizeEnglishDialect,
|
15
|
+
LocalizeFrenchDialect,
|
13
16
|
LocalizePortugueseDialect,
|
14
17
|
LocalizeSpanishDialect,
|
15
18
|
LocalizeTargetLanguage,
|
@@ -18,6 +21,7 @@ from .types import (
|
|
18
21
|
MixVoicesRequest,
|
19
22
|
UpdateVoiceRequest,
|
20
23
|
Voice,
|
24
|
+
VoiceExpandOptions,
|
21
25
|
VoiceId,
|
22
26
|
VoiceMetadata,
|
23
27
|
Weight,
|
@@ -26,6 +30,7 @@ from .requests import (
|
|
26
30
|
CreateVoiceRequestParams,
|
27
31
|
EmbeddingResponseParams,
|
28
32
|
EmbeddingSpecifierParams,
|
33
|
+
GetVoicesResponseParams,
|
29
34
|
IdSpecifierParams,
|
30
35
|
LocalizeDialectParams,
|
31
36
|
LocalizeVoiceRequestParams,
|
@@ -46,11 +51,15 @@ __all__ = [
|
|
46
51
|
"EmbeddingSpecifier",
|
47
52
|
"EmbeddingSpecifierParams",
|
48
53
|
"Gender",
|
54
|
+
"GenderPresentation",
|
55
|
+
"GetVoicesResponse",
|
56
|
+
"GetVoicesResponseParams",
|
49
57
|
"IdSpecifier",
|
50
58
|
"IdSpecifierParams",
|
51
59
|
"LocalizeDialect",
|
52
60
|
"LocalizeDialectParams",
|
53
61
|
"LocalizeEnglishDialect",
|
62
|
+
"LocalizeFrenchDialect",
|
54
63
|
"LocalizePortugueseDialect",
|
55
64
|
"LocalizeSpanishDialect",
|
56
65
|
"LocalizeTargetLanguage",
|
@@ -63,6 +72,7 @@ __all__ = [
|
|
63
72
|
"UpdateVoiceRequest",
|
64
73
|
"UpdateVoiceRequestParams",
|
65
74
|
"Voice",
|
75
|
+
"VoiceExpandOptions",
|
66
76
|
"VoiceId",
|
67
77
|
"VoiceMetadata",
|
68
78
|
"VoiceMetadataParams",
|
cartesia/voices/client.py
CHANGED
@@ -2,8 +2,12 @@
|
|
2
2
|
|
3
3
|
import typing
|
4
4
|
from ..core.client_wrapper import SyncClientWrapper
|
5
|
+
from .types.gender_presentation import GenderPresentation
|
6
|
+
from .types.voice_expand_options import VoiceExpandOptions
|
5
7
|
from ..core.request_options import RequestOptions
|
8
|
+
from ..core.pagination import SyncPager
|
6
9
|
from .types.voice import Voice
|
10
|
+
from .types.get_voices_response import GetVoicesResponse
|
7
11
|
from ..core.pydantic_utilities import parse_obj_as
|
8
12
|
from json.decoder import JSONDecodeError
|
9
13
|
from ..core.api_error import ApiError
|
@@ -13,15 +17,16 @@ from .types.clone_mode import CloneMode
|
|
13
17
|
from .types.voice_metadata import VoiceMetadata
|
14
18
|
from .types.voice_id import VoiceId
|
15
19
|
from ..core.jsonable_encoder import jsonable_encoder
|
16
|
-
from ..embedding.types.embedding import Embedding
|
17
20
|
from .types.localize_target_language import LocalizeTargetLanguage
|
18
21
|
from .types.gender import Gender
|
19
22
|
from .requests.localize_dialect import LocalizeDialectParams
|
20
|
-
from .types.embedding_response import EmbeddingResponse
|
21
23
|
from ..core.serialization import convert_and_respect_annotation_metadata
|
22
24
|
from .requests.mix_voice_specifier import MixVoiceSpecifierParams
|
25
|
+
from .types.embedding_response import EmbeddingResponse
|
26
|
+
from ..embedding.types.embedding import Embedding
|
23
27
|
from .types.base_voice_id import BaseVoiceId
|
24
28
|
from ..core.client_wrapper import AsyncClientWrapper
|
29
|
+
from ..core.pagination import AsyncPager
|
25
30
|
|
26
31
|
# this is used as the default value for optional parameters
|
27
32
|
OMIT = typing.cast(typing.Any, ...)
|
@@ -31,16 +36,54 @@ class VoicesClient:
|
|
31
36
|
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
32
37
|
self._client_wrapper = client_wrapper
|
33
38
|
|
34
|
-
def list(
|
39
|
+
def list(
|
40
|
+
self,
|
41
|
+
*,
|
42
|
+
limit: typing.Optional[int] = None,
|
43
|
+
starting_after: typing.Optional[str] = None,
|
44
|
+
ending_before: typing.Optional[str] = None,
|
45
|
+
is_owner: typing.Optional[bool] = None,
|
46
|
+
is_starred: typing.Optional[bool] = None,
|
47
|
+
gender: typing.Optional[GenderPresentation] = None,
|
48
|
+
expand: typing.Optional[typing.Sequence[VoiceExpandOptions]] = None,
|
49
|
+
request_options: typing.Optional[RequestOptions] = None,
|
50
|
+
) -> SyncPager[Voice]:
|
35
51
|
"""
|
36
52
|
Parameters
|
37
53
|
----------
|
54
|
+
limit : typing.Optional[int]
|
55
|
+
The number of Voices to return per page, ranging between 1 and 100.
|
56
|
+
|
57
|
+
starting_after : typing.Optional[str]
|
58
|
+
A cursor to use in pagination. `starting_after` is a Voice ID that defines your
|
59
|
+
place in the list. For example, if you make a /voices request and receive 100
|
60
|
+
objects, ending with `voice_abc123`, your subsequent call can include
|
61
|
+
`starting_after=voice_abc123` to fetch the next page of the list.
|
62
|
+
|
63
|
+
ending_before : typing.Optional[str]
|
64
|
+
A cursor to use in pagination. `ending_before` is a Voice ID that defines your
|
65
|
+
place in the list. For example, if you make a /voices request and receive 100
|
66
|
+
objects, starting with `voice_abc123`, your subsequent call can include
|
67
|
+
`ending_before=voice_abc123` to fetch the previous page of the list.
|
68
|
+
|
69
|
+
is_owner : typing.Optional[bool]
|
70
|
+
Whether to only return voices owned by the current user.
|
71
|
+
|
72
|
+
is_starred : typing.Optional[bool]
|
73
|
+
Whether to only return starred voices.
|
74
|
+
|
75
|
+
gender : typing.Optional[GenderPresentation]
|
76
|
+
The gender presentation of the voices to return.
|
77
|
+
|
78
|
+
expand : typing.Optional[typing.Sequence[VoiceExpandOptions]]
|
79
|
+
Additional fields to include in the response.
|
80
|
+
|
38
81
|
request_options : typing.Optional[RequestOptions]
|
39
82
|
Request-specific configuration.
|
40
83
|
|
41
84
|
Returns
|
42
85
|
-------
|
43
|
-
|
86
|
+
SyncPager[Voice]
|
44
87
|
|
45
88
|
Examples
|
46
89
|
--------
|
@@ -49,22 +92,50 @@ class VoicesClient:
|
|
49
92
|
client = Cartesia(
|
50
93
|
api_key="YOUR_API_KEY",
|
51
94
|
)
|
52
|
-
client.voices.list()
|
95
|
+
response = client.voices.list()
|
96
|
+
for item in response:
|
97
|
+
yield item
|
98
|
+
# alternatively, you can paginate page-by-page
|
99
|
+
for page in response.iter_pages():
|
100
|
+
yield page
|
53
101
|
"""
|
54
102
|
_response = self._client_wrapper.httpx_client.request(
|
55
103
|
"voices/",
|
56
104
|
method="GET",
|
105
|
+
params={
|
106
|
+
"limit": limit,
|
107
|
+
"starting_after": starting_after,
|
108
|
+
"ending_before": ending_before,
|
109
|
+
"is_owner": is_owner,
|
110
|
+
"is_starred": is_starred,
|
111
|
+
"gender": gender,
|
112
|
+
"expand[]": expand,
|
113
|
+
},
|
57
114
|
request_options=request_options,
|
58
115
|
)
|
59
116
|
try:
|
60
117
|
if 200 <= _response.status_code < 300:
|
61
|
-
|
62
|
-
|
118
|
+
_parsed_response = typing.cast(
|
119
|
+
GetVoicesResponse,
|
63
120
|
parse_obj_as(
|
64
|
-
type_=
|
121
|
+
type_=GetVoicesResponse, # type: ignore
|
65
122
|
object_=_response.json(),
|
66
123
|
),
|
67
124
|
)
|
125
|
+
_parsed_next = _parsed_response.next_page
|
126
|
+
_has_next = _parsed_next is not None and _parsed_next != ""
|
127
|
+
_get_next = lambda: self.list(
|
128
|
+
limit=limit,
|
129
|
+
starting_after=_parsed_next,
|
130
|
+
ending_before=ending_before,
|
131
|
+
is_owner=is_owner,
|
132
|
+
is_starred=is_starred,
|
133
|
+
gender=gender,
|
134
|
+
expand=expand,
|
135
|
+
request_options=request_options,
|
136
|
+
)
|
137
|
+
_items = _parsed_response.data
|
138
|
+
return SyncPager(has_next=_has_next, items=_items, get_next=_get_next)
|
68
139
|
_response_json = _response.json()
|
69
140
|
except JSONDecodeError:
|
70
141
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
@@ -312,16 +383,27 @@ class VoicesClient:
|
|
312
383
|
def localize(
|
313
384
|
self,
|
314
385
|
*,
|
315
|
-
|
386
|
+
voice_id: str,
|
387
|
+
name: str,
|
388
|
+
description: str,
|
316
389
|
language: LocalizeTargetLanguage,
|
317
390
|
original_speaker_gender: Gender,
|
318
391
|
dialect: typing.Optional[LocalizeDialectParams] = OMIT,
|
319
392
|
request_options: typing.Optional[RequestOptions] = None,
|
320
|
-
) ->
|
393
|
+
) -> VoiceMetadata:
|
321
394
|
"""
|
395
|
+
Create a new voice from an existing voice localized to a new language and dialect.
|
396
|
+
|
322
397
|
Parameters
|
323
398
|
----------
|
324
|
-
|
399
|
+
voice_id : str
|
400
|
+
The ID of the voice to localize.
|
401
|
+
|
402
|
+
name : str
|
403
|
+
The name of the new localized voice.
|
404
|
+
|
405
|
+
description : str
|
406
|
+
The description of the new localized voice.
|
325
407
|
|
326
408
|
language : LocalizeTargetLanguage
|
327
409
|
|
@@ -334,7 +416,7 @@ class VoicesClient:
|
|
334
416
|
|
335
417
|
Returns
|
336
418
|
-------
|
337
|
-
|
419
|
+
VoiceMetadata
|
338
420
|
|
339
421
|
Examples
|
340
422
|
--------
|
@@ -344,16 +426,21 @@ class VoicesClient:
|
|
344
426
|
api_key="YOUR_API_KEY",
|
345
427
|
)
|
346
428
|
client.voices.localize(
|
347
|
-
|
348
|
-
|
349
|
-
|
429
|
+
voice_id="694f9389-aac1-45b6-b726-9d9369183238",
|
430
|
+
name="Sarah Peninsular Spanish",
|
431
|
+
description="Sarah Voice in Peninsular Spanish",
|
432
|
+
language="es",
|
433
|
+
original_speaker_gender="female",
|
434
|
+
dialect="pe",
|
350
435
|
)
|
351
436
|
"""
|
352
437
|
_response = self._client_wrapper.httpx_client.request(
|
353
438
|
"voices/localize",
|
354
439
|
method="POST",
|
355
440
|
json={
|
356
|
-
"
|
441
|
+
"voice_id": voice_id,
|
442
|
+
"name": name,
|
443
|
+
"description": description,
|
357
444
|
"language": language,
|
358
445
|
"original_speaker_gender": original_speaker_gender,
|
359
446
|
"dialect": convert_and_respect_annotation_metadata(
|
@@ -366,9 +453,9 @@ class VoicesClient:
|
|
366
453
|
try:
|
367
454
|
if 200 <= _response.status_code < 300:
|
368
455
|
return typing.cast(
|
369
|
-
|
456
|
+
VoiceMetadata,
|
370
457
|
parse_obj_as(
|
371
|
-
type_=
|
458
|
+
type_=VoiceMetadata, # type: ignore
|
372
459
|
object_=_response.json(),
|
373
460
|
),
|
374
461
|
)
|
@@ -473,11 +560,9 @@ class VoicesClient:
|
|
473
560
|
api_key="YOUR_API_KEY",
|
474
561
|
)
|
475
562
|
client.voices.create(
|
476
|
-
name="
|
477
|
-
description="
|
478
|
-
embedding=[],
|
479
|
-
language="en",
|
480
|
-
base_voice_id="123e4567-e89b-12d3-a456-426614174000",
|
563
|
+
name="name",
|
564
|
+
description="description",
|
565
|
+
embedding=[1.1, 1.1],
|
481
566
|
)
|
482
567
|
"""
|
483
568
|
_response = self._client_wrapper.httpx_client.request(
|
@@ -512,16 +597,54 @@ class AsyncVoicesClient:
|
|
512
597
|
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
513
598
|
self._client_wrapper = client_wrapper
|
514
599
|
|
515
|
-
async def list(
|
600
|
+
async def list(
|
601
|
+
self,
|
602
|
+
*,
|
603
|
+
limit: typing.Optional[int] = None,
|
604
|
+
starting_after: typing.Optional[str] = None,
|
605
|
+
ending_before: typing.Optional[str] = None,
|
606
|
+
is_owner: typing.Optional[bool] = None,
|
607
|
+
is_starred: typing.Optional[bool] = None,
|
608
|
+
gender: typing.Optional[GenderPresentation] = None,
|
609
|
+
expand: typing.Optional[typing.Sequence[VoiceExpandOptions]] = None,
|
610
|
+
request_options: typing.Optional[RequestOptions] = None,
|
611
|
+
) -> AsyncPager[Voice]:
|
516
612
|
"""
|
517
613
|
Parameters
|
518
614
|
----------
|
615
|
+
limit : typing.Optional[int]
|
616
|
+
The number of Voices to return per page, ranging between 1 and 100.
|
617
|
+
|
618
|
+
starting_after : typing.Optional[str]
|
619
|
+
A cursor to use in pagination. `starting_after` is a Voice ID that defines your
|
620
|
+
place in the list. For example, if you make a /voices request and receive 100
|
621
|
+
objects, ending with `voice_abc123`, your subsequent call can include
|
622
|
+
`starting_after=voice_abc123` to fetch the next page of the list.
|
623
|
+
|
624
|
+
ending_before : typing.Optional[str]
|
625
|
+
A cursor to use in pagination. `ending_before` is a Voice ID that defines your
|
626
|
+
place in the list. For example, if you make a /voices request and receive 100
|
627
|
+
objects, starting with `voice_abc123`, your subsequent call can include
|
628
|
+
`ending_before=voice_abc123` to fetch the previous page of the list.
|
629
|
+
|
630
|
+
is_owner : typing.Optional[bool]
|
631
|
+
Whether to only return voices owned by the current user.
|
632
|
+
|
633
|
+
is_starred : typing.Optional[bool]
|
634
|
+
Whether to only return starred voices.
|
635
|
+
|
636
|
+
gender : typing.Optional[GenderPresentation]
|
637
|
+
The gender presentation of the voices to return.
|
638
|
+
|
639
|
+
expand : typing.Optional[typing.Sequence[VoiceExpandOptions]]
|
640
|
+
Additional fields to include in the response.
|
641
|
+
|
519
642
|
request_options : typing.Optional[RequestOptions]
|
520
643
|
Request-specific configuration.
|
521
644
|
|
522
645
|
Returns
|
523
646
|
-------
|
524
|
-
|
647
|
+
AsyncPager[Voice]
|
525
648
|
|
526
649
|
Examples
|
527
650
|
--------
|
@@ -535,7 +658,12 @@ class AsyncVoicesClient:
|
|
535
658
|
|
536
659
|
|
537
660
|
async def main() -> None:
|
538
|
-
await client.voices.list()
|
661
|
+
response = await client.voices.list()
|
662
|
+
async for item in response:
|
663
|
+
yield item
|
664
|
+
# alternatively, you can paginate page-by-page
|
665
|
+
async for page in response.iter_pages():
|
666
|
+
yield page
|
539
667
|
|
540
668
|
|
541
669
|
asyncio.run(main())
|
@@ -543,17 +671,40 @@ class AsyncVoicesClient:
|
|
543
671
|
_response = await self._client_wrapper.httpx_client.request(
|
544
672
|
"voices/",
|
545
673
|
method="GET",
|
674
|
+
params={
|
675
|
+
"limit": limit,
|
676
|
+
"starting_after": starting_after,
|
677
|
+
"ending_before": ending_before,
|
678
|
+
"is_owner": is_owner,
|
679
|
+
"is_starred": is_starred,
|
680
|
+
"gender": gender,
|
681
|
+
"expand[]": expand,
|
682
|
+
},
|
546
683
|
request_options=request_options,
|
547
684
|
)
|
548
685
|
try:
|
549
686
|
if 200 <= _response.status_code < 300:
|
550
|
-
|
551
|
-
|
687
|
+
_parsed_response = typing.cast(
|
688
|
+
GetVoicesResponse,
|
552
689
|
parse_obj_as(
|
553
|
-
type_=
|
690
|
+
type_=GetVoicesResponse, # type: ignore
|
554
691
|
object_=_response.json(),
|
555
692
|
),
|
556
693
|
)
|
694
|
+
_parsed_next = _parsed_response.next_page
|
695
|
+
_has_next = _parsed_next is not None and _parsed_next != ""
|
696
|
+
_get_next = lambda: self.list(
|
697
|
+
limit=limit,
|
698
|
+
starting_after=_parsed_next,
|
699
|
+
ending_before=ending_before,
|
700
|
+
is_owner=is_owner,
|
701
|
+
is_starred=is_starred,
|
702
|
+
gender=gender,
|
703
|
+
expand=expand,
|
704
|
+
request_options=request_options,
|
705
|
+
)
|
706
|
+
_items = _parsed_response.data
|
707
|
+
return AsyncPager(has_next=_has_next, items=_items, get_next=_get_next)
|
557
708
|
_response_json = _response.json()
|
558
709
|
except JSONDecodeError:
|
559
710
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
@@ -833,16 +984,27 @@ class AsyncVoicesClient:
|
|
833
984
|
async def localize(
|
834
985
|
self,
|
835
986
|
*,
|
836
|
-
|
987
|
+
voice_id: str,
|
988
|
+
name: str,
|
989
|
+
description: str,
|
837
990
|
language: LocalizeTargetLanguage,
|
838
991
|
original_speaker_gender: Gender,
|
839
992
|
dialect: typing.Optional[LocalizeDialectParams] = OMIT,
|
840
993
|
request_options: typing.Optional[RequestOptions] = None,
|
841
|
-
) ->
|
994
|
+
) -> VoiceMetadata:
|
842
995
|
"""
|
996
|
+
Create a new voice from an existing voice localized to a new language and dialect.
|
997
|
+
|
843
998
|
Parameters
|
844
999
|
----------
|
845
|
-
|
1000
|
+
voice_id : str
|
1001
|
+
The ID of the voice to localize.
|
1002
|
+
|
1003
|
+
name : str
|
1004
|
+
The name of the new localized voice.
|
1005
|
+
|
1006
|
+
description : str
|
1007
|
+
The description of the new localized voice.
|
846
1008
|
|
847
1009
|
language : LocalizeTargetLanguage
|
848
1010
|
|
@@ -855,7 +1017,7 @@ class AsyncVoicesClient:
|
|
855
1017
|
|
856
1018
|
Returns
|
857
1019
|
-------
|
858
|
-
|
1020
|
+
VoiceMetadata
|
859
1021
|
|
860
1022
|
Examples
|
861
1023
|
--------
|
@@ -870,9 +1032,12 @@ class AsyncVoicesClient:
|
|
870
1032
|
|
871
1033
|
async def main() -> None:
|
872
1034
|
await client.voices.localize(
|
873
|
-
|
874
|
-
|
875
|
-
|
1035
|
+
voice_id="694f9389-aac1-45b6-b726-9d9369183238",
|
1036
|
+
name="Sarah Peninsular Spanish",
|
1037
|
+
description="Sarah Voice in Peninsular Spanish",
|
1038
|
+
language="es",
|
1039
|
+
original_speaker_gender="female",
|
1040
|
+
dialect="pe",
|
876
1041
|
)
|
877
1042
|
|
878
1043
|
|
@@ -882,7 +1047,9 @@ class AsyncVoicesClient:
|
|
882
1047
|
"voices/localize",
|
883
1048
|
method="POST",
|
884
1049
|
json={
|
885
|
-
"
|
1050
|
+
"voice_id": voice_id,
|
1051
|
+
"name": name,
|
1052
|
+
"description": description,
|
886
1053
|
"language": language,
|
887
1054
|
"original_speaker_gender": original_speaker_gender,
|
888
1055
|
"dialect": convert_and_respect_annotation_metadata(
|
@@ -895,9 +1062,9 @@ class AsyncVoicesClient:
|
|
895
1062
|
try:
|
896
1063
|
if 200 <= _response.status_code < 300:
|
897
1064
|
return typing.cast(
|
898
|
-
|
1065
|
+
VoiceMetadata,
|
899
1066
|
parse_obj_as(
|
900
|
-
type_=
|
1067
|
+
type_=VoiceMetadata, # type: ignore
|
901
1068
|
object_=_response.json(),
|
902
1069
|
),
|
903
1070
|
)
|
@@ -1015,11 +1182,9 @@ class AsyncVoicesClient:
|
|
1015
1182
|
|
1016
1183
|
async def main() -> None:
|
1017
1184
|
await client.voices.create(
|
1018
|
-
name="
|
1019
|
-
description="
|
1020
|
-
embedding=[],
|
1021
|
-
language="en",
|
1022
|
-
base_voice_id="123e4567-e89b-12d3-a456-426614174000",
|
1185
|
+
name="name",
|
1186
|
+
description="description",
|
1187
|
+
embedding=[1.1, 1.1],
|
1023
1188
|
)
|
1024
1189
|
|
1025
1190
|
|
@@ -3,6 +3,7 @@
|
|
3
3
|
from .create_voice_request import CreateVoiceRequestParams
|
4
4
|
from .embedding_response import EmbeddingResponseParams
|
5
5
|
from .embedding_specifier import EmbeddingSpecifierParams
|
6
|
+
from .get_voices_response import GetVoicesResponseParams
|
6
7
|
from .id_specifier import IdSpecifierParams
|
7
8
|
from .localize_dialect import LocalizeDialectParams
|
8
9
|
from .localize_voice_request import LocalizeVoiceRequestParams
|
@@ -16,6 +17,7 @@ __all__ = [
|
|
16
17
|
"CreateVoiceRequestParams",
|
17
18
|
"EmbeddingResponseParams",
|
18
19
|
"EmbeddingSpecifierParams",
|
20
|
+
"GetVoicesResponseParams",
|
19
21
|
"IdSpecifierParams",
|
20
22
|
"LocalizeDialectParams",
|
21
23
|
"LocalizeVoiceRequestParams",
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
import typing_extensions
|
4
|
+
import typing
|
5
|
+
from .voice import VoiceParams
|
6
|
+
import typing_extensions
|
7
|
+
from ..types.voice_id import VoiceId
|
8
|
+
|
9
|
+
|
10
|
+
class GetVoicesResponseParams(typing_extensions.TypedDict):
|
11
|
+
data: typing.Sequence[VoiceParams]
|
12
|
+
"""
|
13
|
+
The paginated list of Voices.
|
14
|
+
"""
|
15
|
+
|
16
|
+
has_more: bool
|
17
|
+
"""
|
18
|
+
Whether there are more Voices to fetch (using `starting_after=id`, where id is the ID of the last Voice in the current response).
|
19
|
+
"""
|
20
|
+
|
21
|
+
next_page: typing_extensions.NotRequired[VoiceId]
|
22
|
+
"""
|
23
|
+
(Deprecated - use the id of the last Voice in the current response instead.) An ID that can be passed as `starting_after` to get the next page of Voices.
|
24
|
+
"""
|
@@ -4,5 +4,8 @@ import typing
|
|
4
4
|
from ..types.localize_english_dialect import LocalizeEnglishDialect
|
5
5
|
from ..types.localize_spanish_dialect import LocalizeSpanishDialect
|
6
6
|
from ..types.localize_portuguese_dialect import LocalizePortugueseDialect
|
7
|
+
from ..types.localize_french_dialect import LocalizeFrenchDialect
|
7
8
|
|
8
|
-
LocalizeDialectParams = typing.Union[
|
9
|
+
LocalizeDialectParams = typing.Union[
|
10
|
+
LocalizeEnglishDialect, LocalizeSpanishDialect, LocalizePortugueseDialect, LocalizeFrenchDialect
|
11
|
+
]
|
@@ -1,7 +1,6 @@
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
2
2
|
|
3
3
|
import typing_extensions
|
4
|
-
from ...embedding.types.embedding import Embedding
|
5
4
|
from ..types.localize_target_language import LocalizeTargetLanguage
|
6
5
|
from ..types.gender import Gender
|
7
6
|
import typing_extensions
|
@@ -9,7 +8,21 @@ from .localize_dialect import LocalizeDialectParams
|
|
9
8
|
|
10
9
|
|
11
10
|
class LocalizeVoiceRequestParams(typing_extensions.TypedDict):
|
12
|
-
|
11
|
+
voice_id: str
|
12
|
+
"""
|
13
|
+
The ID of the voice to localize.
|
14
|
+
"""
|
15
|
+
|
16
|
+
name: str
|
17
|
+
"""
|
18
|
+
The name of the new localized voice.
|
19
|
+
"""
|
20
|
+
|
21
|
+
description: str
|
22
|
+
"""
|
23
|
+
The description of the new localized voice.
|
24
|
+
"""
|
25
|
+
|
13
26
|
language: LocalizeTargetLanguage
|
14
27
|
original_speaker_gender: Gender
|
15
28
|
dialect: typing_extensions.NotRequired[LocalizeDialectParams]
|
@@ -2,22 +2,17 @@
|
|
2
2
|
|
3
3
|
import typing_extensions
|
4
4
|
from ..types.voice_id import VoiceId
|
5
|
-
import typing_extensions
|
6
5
|
import datetime as dt
|
6
|
+
import typing_extensions
|
7
7
|
from ...embedding.types.embedding import Embedding
|
8
8
|
from ...tts.types.supported_language import SupportedLanguage
|
9
9
|
|
10
10
|
|
11
11
|
class VoiceParams(typing_extensions.TypedDict):
|
12
12
|
id: VoiceId
|
13
|
-
|
14
|
-
"""
|
15
|
-
The ID of the user who owns the voice.
|
13
|
+
is_owner: bool
|
16
14
|
"""
|
17
|
-
|
18
|
-
is_public: bool
|
19
|
-
"""
|
20
|
-
Whether the voice is publicly accessible.
|
15
|
+
Whether the current user is the owner of the voice.
|
21
16
|
"""
|
22
17
|
|
23
18
|
name: str
|
@@ -35,5 +30,14 @@ class VoiceParams(typing_extensions.TypedDict):
|
|
35
30
|
The date and time the voice was created.
|
36
31
|
"""
|
37
32
|
|
38
|
-
embedding: Embedding
|
33
|
+
embedding: typing_extensions.NotRequired[Embedding]
|
34
|
+
"""
|
35
|
+
The vector embedding of the voice. Only included when `expand` includes `embedding`.
|
36
|
+
"""
|
37
|
+
|
38
|
+
is_starred: typing_extensions.NotRequired[bool]
|
39
|
+
"""
|
40
|
+
Whether the current user has starred the voice. Only included when `expand` includes `is_starred`.
|
41
|
+
"""
|
42
|
+
|
39
43
|
language: SupportedLanguage
|