cartesia 2.0.0b2__py3-none-any.whl → 2.0.0b8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. cartesia/__init__.py +10 -0
  2. cartesia/base_client.py +0 -4
  3. cartesia/core/__init__.py +3 -0
  4. cartesia/core/client_wrapper.py +2 -2
  5. cartesia/core/pagination.py +88 -0
  6. cartesia/infill/client.py +4 -4
  7. cartesia/tts/_async_websocket.py +53 -1
  8. cartesia/tts/_websocket.py +52 -3
  9. cartesia/tts/client.py +4 -4
  10. cartesia/tts/requests/generation_request.py +5 -0
  11. cartesia/tts/requests/web_socket_chunk_response.py +3 -0
  12. cartesia/tts/requests/web_socket_response.py +2 -1
  13. cartesia/tts/requests/web_socket_tts_request.py +1 -0
  14. cartesia/tts/types/emotion.py +5 -0
  15. cartesia/tts/types/generation_request.py +5 -0
  16. cartesia/tts/types/web_socket_chunk_response.py +3 -1
  17. cartesia/tts/types/web_socket_response.py +2 -1
  18. cartesia/tts/types/web_socket_tts_output.py +2 -0
  19. cartesia/tts/types/web_socket_tts_request.py +1 -0
  20. cartesia/tts/utils/constants.py +2 -2
  21. cartesia/voice_changer/requests/streaming_response.py +2 -0
  22. cartesia/voice_changer/types/streaming_response.py +2 -0
  23. cartesia/voices/__init__.py +10 -0
  24. cartesia/voices/client.py +209 -44
  25. cartesia/voices/requests/__init__.py +2 -0
  26. cartesia/voices/requests/get_voices_response.py +24 -0
  27. cartesia/voices/requests/localize_dialect.py +4 -1
  28. cartesia/voices/requests/localize_voice_request.py +15 -2
  29. cartesia/voices/requests/voice.py +13 -9
  30. cartesia/voices/types/__init__.py +8 -0
  31. cartesia/voices/types/gender_presentation.py +5 -0
  32. cartesia/voices/types/get_voices_response.py +34 -0
  33. cartesia/voices/types/localize_dialect.py +4 -1
  34. cartesia/voices/types/localize_french_dialect.py +5 -0
  35. cartesia/voices/types/localize_voice_request.py +16 -3
  36. cartesia/voices/types/voice.py +13 -9
  37. cartesia/voices/types/voice_expand_options.py +5 -0
  38. {cartesia-2.0.0b2.dist-info → cartesia-2.0.0b8.dist-info}/METADATA +149 -73
  39. {cartesia-2.0.0b2.dist-info → cartesia-2.0.0b8.dist-info}/RECORD +40 -35
  40. cartesia/datasets/client.py +0 -392
  41. {cartesia-2.0.0b2.dist-info → cartesia-2.0.0b8.dist-info}/WHEEL +0 -0
@@ -7,11 +7,13 @@ import pydantic
7
7
  from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
8
8
  from .context_id import ContextId
9
9
  from .flush_id import FlushId
10
+ from .phoneme_timestamps import PhonemeTimestamps
10
11
  from .word_timestamps import WordTimestamps
11
12
 
12
13
 
13
14
  class WebSocketTtsOutput(UniversalBaseModel):
14
15
  word_timestamps: typing.Optional[WordTimestamps] = None
16
+ phoneme_timestamps: typing.Optional[PhonemeTimestamps] = None
15
17
  audio: typing.Optional[bytes] = None
16
18
  context_id: typing.Optional[ContextId] = None
17
19
  flush_id: typing.Optional[FlushId] = None
@@ -22,6 +22,7 @@ class WebSocketTtsRequest(UniversalBaseModel):
22
22
  duration: typing.Optional[int] = None
23
23
  language: typing.Optional[str] = None
24
24
  add_timestamps: typing.Optional[bool] = None
25
+ use_original_timestamps: typing.Optional[bool] = None
25
26
  add_phoneme_timestamps: typing.Optional[bool] = None
26
27
  continue_: typing_extensions.Annotated[typing.Optional[bool], FieldMetadata(alias="continue")] = None
27
28
  context_id: typing.Optional[str] = None
@@ -1,5 +1,5 @@
1
- DEFAULT_MODEL_ID = "sonic-english" # latest default model
2
- MULTILINGUAL_MODEL_ID = "sonic-multilingual" # latest multilingual model
1
+ DEFAULT_MODEL_ID = "sonic-2" # latest default model
2
+ MULTILINGUAL_MODEL_ID = "sonic-2" # latest multilingual model
3
3
  DEFAULT_BASE_URL = "api.cartesia.ai"
4
4
  DEFAULT_CARTESIA_VERSION = "2024-06-10" # latest version
5
5
  DEFAULT_OUTPUT_FORMAT = "raw_pcm_f32le_44100"
@@ -4,6 +4,7 @@ from __future__ import annotations
4
4
  import typing_extensions
5
5
  import typing
6
6
  import typing_extensions
7
+ from ...tts.types.flush_id import FlushId
7
8
  from ...tts.types.context_id import ContextId
8
9
 
9
10
 
@@ -11,6 +12,7 @@ class StreamingResponse_ChunkParams(typing_extensions.TypedDict):
11
12
  type: typing.Literal["chunk"]
12
13
  data: str
13
14
  step_time: float
15
+ flush_id: typing_extensions.NotRequired[FlushId]
14
16
  context_id: typing_extensions.NotRequired[ContextId]
15
17
  status_code: int
16
18
  done: bool
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
  from ...core.pydantic_utilities import UniversalBaseModel
5
5
  import typing
6
+ from ...tts.types.flush_id import FlushId
6
7
  from ...tts.types.context_id import ContextId
7
8
  from ...core.pydantic_utilities import IS_PYDANTIC_V2
8
9
  import pydantic
@@ -12,6 +13,7 @@ class StreamingResponse_Chunk(UniversalBaseModel):
12
13
  type: typing.Literal["chunk"] = "chunk"
13
14
  data: str
14
15
  step_time: float
16
+ flush_id: typing.Optional[FlushId] = None
15
17
  context_id: typing.Optional[ContextId] = None
16
18
  status_code: int
17
19
  done: bool
@@ -7,9 +7,12 @@ from .types import (
7
7
  EmbeddingResponse,
8
8
  EmbeddingSpecifier,
9
9
  Gender,
10
+ GenderPresentation,
11
+ GetVoicesResponse,
10
12
  IdSpecifier,
11
13
  LocalizeDialect,
12
14
  LocalizeEnglishDialect,
15
+ LocalizeFrenchDialect,
13
16
  LocalizePortugueseDialect,
14
17
  LocalizeSpanishDialect,
15
18
  LocalizeTargetLanguage,
@@ -18,6 +21,7 @@ from .types import (
18
21
  MixVoicesRequest,
19
22
  UpdateVoiceRequest,
20
23
  Voice,
24
+ VoiceExpandOptions,
21
25
  VoiceId,
22
26
  VoiceMetadata,
23
27
  Weight,
@@ -26,6 +30,7 @@ from .requests import (
26
30
  CreateVoiceRequestParams,
27
31
  EmbeddingResponseParams,
28
32
  EmbeddingSpecifierParams,
33
+ GetVoicesResponseParams,
29
34
  IdSpecifierParams,
30
35
  LocalizeDialectParams,
31
36
  LocalizeVoiceRequestParams,
@@ -46,11 +51,15 @@ __all__ = [
46
51
  "EmbeddingSpecifier",
47
52
  "EmbeddingSpecifierParams",
48
53
  "Gender",
54
+ "GenderPresentation",
55
+ "GetVoicesResponse",
56
+ "GetVoicesResponseParams",
49
57
  "IdSpecifier",
50
58
  "IdSpecifierParams",
51
59
  "LocalizeDialect",
52
60
  "LocalizeDialectParams",
53
61
  "LocalizeEnglishDialect",
62
+ "LocalizeFrenchDialect",
54
63
  "LocalizePortugueseDialect",
55
64
  "LocalizeSpanishDialect",
56
65
  "LocalizeTargetLanguage",
@@ -63,6 +72,7 @@ __all__ = [
63
72
  "UpdateVoiceRequest",
64
73
  "UpdateVoiceRequestParams",
65
74
  "Voice",
75
+ "VoiceExpandOptions",
66
76
  "VoiceId",
67
77
  "VoiceMetadata",
68
78
  "VoiceMetadataParams",
cartesia/voices/client.py CHANGED
@@ -2,8 +2,12 @@
2
2
 
3
3
  import typing
4
4
  from ..core.client_wrapper import SyncClientWrapper
5
+ from .types.gender_presentation import GenderPresentation
6
+ from .types.voice_expand_options import VoiceExpandOptions
5
7
  from ..core.request_options import RequestOptions
8
+ from ..core.pagination import SyncPager
6
9
  from .types.voice import Voice
10
+ from .types.get_voices_response import GetVoicesResponse
7
11
  from ..core.pydantic_utilities import parse_obj_as
8
12
  from json.decoder import JSONDecodeError
9
13
  from ..core.api_error import ApiError
@@ -13,15 +17,16 @@ from .types.clone_mode import CloneMode
13
17
  from .types.voice_metadata import VoiceMetadata
14
18
  from .types.voice_id import VoiceId
15
19
  from ..core.jsonable_encoder import jsonable_encoder
16
- from ..embedding.types.embedding import Embedding
17
20
  from .types.localize_target_language import LocalizeTargetLanguage
18
21
  from .types.gender import Gender
19
22
  from .requests.localize_dialect import LocalizeDialectParams
20
- from .types.embedding_response import EmbeddingResponse
21
23
  from ..core.serialization import convert_and_respect_annotation_metadata
22
24
  from .requests.mix_voice_specifier import MixVoiceSpecifierParams
25
+ from .types.embedding_response import EmbeddingResponse
26
+ from ..embedding.types.embedding import Embedding
23
27
  from .types.base_voice_id import BaseVoiceId
24
28
  from ..core.client_wrapper import AsyncClientWrapper
29
+ from ..core.pagination import AsyncPager
25
30
 
26
31
  # this is used as the default value for optional parameters
27
32
  OMIT = typing.cast(typing.Any, ...)
@@ -31,16 +36,54 @@ class VoicesClient:
31
36
  def __init__(self, *, client_wrapper: SyncClientWrapper):
32
37
  self._client_wrapper = client_wrapper
33
38
 
34
- def list(self, *, request_options: typing.Optional[RequestOptions] = None) -> typing.List[Voice]:
39
+ def list(
40
+ self,
41
+ *,
42
+ limit: typing.Optional[int] = None,
43
+ starting_after: typing.Optional[str] = None,
44
+ ending_before: typing.Optional[str] = None,
45
+ is_owner: typing.Optional[bool] = None,
46
+ is_starred: typing.Optional[bool] = None,
47
+ gender: typing.Optional[GenderPresentation] = None,
48
+ expand: typing.Optional[typing.Sequence[VoiceExpandOptions]] = None,
49
+ request_options: typing.Optional[RequestOptions] = None,
50
+ ) -> SyncPager[Voice]:
35
51
  """
36
52
  Parameters
37
53
  ----------
54
+ limit : typing.Optional[int]
55
+ The number of Voices to return per page, ranging between 1 and 100.
56
+
57
+ starting_after : typing.Optional[str]
58
+ A cursor to use in pagination. `starting_after` is a Voice ID that defines your
59
+ place in the list. For example, if you make a /voices request and receive 100
60
+ objects, ending with `voice_abc123`, your subsequent call can include
61
+ `starting_after=voice_abc123` to fetch the next page of the list.
62
+
63
+ ending_before : typing.Optional[str]
64
+ A cursor to use in pagination. `ending_before` is a Voice ID that defines your
65
+ place in the list. For example, if you make a /voices request and receive 100
66
+ objects, starting with `voice_abc123`, your subsequent call can include
67
+ `ending_before=voice_abc123` to fetch the previous page of the list.
68
+
69
+ is_owner : typing.Optional[bool]
70
+ Whether to only return voices owned by the current user.
71
+
72
+ is_starred : typing.Optional[bool]
73
+ Whether to only return starred voices.
74
+
75
+ gender : typing.Optional[GenderPresentation]
76
+ The gender presentation of the voices to return.
77
+
78
+ expand : typing.Optional[typing.Sequence[VoiceExpandOptions]]
79
+ Additional fields to include in the response.
80
+
38
81
  request_options : typing.Optional[RequestOptions]
39
82
  Request-specific configuration.
40
83
 
41
84
  Returns
42
85
  -------
43
- typing.List[Voice]
86
+ SyncPager[Voice]
44
87
 
45
88
  Examples
46
89
  --------
@@ -49,22 +92,50 @@ class VoicesClient:
49
92
  client = Cartesia(
50
93
  api_key="YOUR_API_KEY",
51
94
  )
52
- client.voices.list()
95
+ response = client.voices.list()
96
+ for item in response:
97
+ yield item
98
+ # alternatively, you can paginate page-by-page
99
+ for page in response.iter_pages():
100
+ yield page
53
101
  """
54
102
  _response = self._client_wrapper.httpx_client.request(
55
103
  "voices/",
56
104
  method="GET",
105
+ params={
106
+ "limit": limit,
107
+ "starting_after": starting_after,
108
+ "ending_before": ending_before,
109
+ "is_owner": is_owner,
110
+ "is_starred": is_starred,
111
+ "gender": gender,
112
+ "expand[]": expand,
113
+ },
57
114
  request_options=request_options,
58
115
  )
59
116
  try:
60
117
  if 200 <= _response.status_code < 300:
61
- return typing.cast(
62
- typing.List[Voice],
118
+ _parsed_response = typing.cast(
119
+ GetVoicesResponse,
63
120
  parse_obj_as(
64
- type_=typing.List[Voice], # type: ignore
121
+ type_=GetVoicesResponse, # type: ignore
65
122
  object_=_response.json(),
66
123
  ),
67
124
  )
125
+ _parsed_next = _parsed_response.next_page
126
+ _has_next = _parsed_next is not None and _parsed_next != ""
127
+ _get_next = lambda: self.list(
128
+ limit=limit,
129
+ starting_after=_parsed_next,
130
+ ending_before=ending_before,
131
+ is_owner=is_owner,
132
+ is_starred=is_starred,
133
+ gender=gender,
134
+ expand=expand,
135
+ request_options=request_options,
136
+ )
137
+ _items = _parsed_response.data
138
+ return SyncPager(has_next=_has_next, items=_items, get_next=_get_next)
68
139
  _response_json = _response.json()
69
140
  except JSONDecodeError:
70
141
  raise ApiError(status_code=_response.status_code, body=_response.text)
@@ -312,16 +383,27 @@ class VoicesClient:
312
383
  def localize(
313
384
  self,
314
385
  *,
315
- embedding: Embedding,
386
+ voice_id: str,
387
+ name: str,
388
+ description: str,
316
389
  language: LocalizeTargetLanguage,
317
390
  original_speaker_gender: Gender,
318
391
  dialect: typing.Optional[LocalizeDialectParams] = OMIT,
319
392
  request_options: typing.Optional[RequestOptions] = None,
320
- ) -> EmbeddingResponse:
393
+ ) -> VoiceMetadata:
321
394
  """
395
+ Create a new voice from an existing voice localized to a new language and dialect.
396
+
322
397
  Parameters
323
398
  ----------
324
- embedding : Embedding
399
+ voice_id : str
400
+ The ID of the voice to localize.
401
+
402
+ name : str
403
+ The name of the new localized voice.
404
+
405
+ description : str
406
+ The description of the new localized voice.
325
407
 
326
408
  language : LocalizeTargetLanguage
327
409
 
@@ -334,7 +416,7 @@ class VoicesClient:
334
416
 
335
417
  Returns
336
418
  -------
337
- EmbeddingResponse
419
+ VoiceMetadata
338
420
 
339
421
  Examples
340
422
  --------
@@ -344,16 +426,21 @@ class VoicesClient:
344
426
  api_key="YOUR_API_KEY",
345
427
  )
346
428
  client.voices.localize(
347
- embedding=[1.1, 1.1],
348
- language="en",
349
- original_speaker_gender="male",
429
+ voice_id="694f9389-aac1-45b6-b726-9d9369183238",
430
+ name="Sarah Peninsular Spanish",
431
+ description="Sarah Voice in Peninsular Spanish",
432
+ language="es",
433
+ original_speaker_gender="female",
434
+ dialect="pe",
350
435
  )
351
436
  """
352
437
  _response = self._client_wrapper.httpx_client.request(
353
438
  "voices/localize",
354
439
  method="POST",
355
440
  json={
356
- "embedding": embedding,
441
+ "voice_id": voice_id,
442
+ "name": name,
443
+ "description": description,
357
444
  "language": language,
358
445
  "original_speaker_gender": original_speaker_gender,
359
446
  "dialect": convert_and_respect_annotation_metadata(
@@ -366,9 +453,9 @@ class VoicesClient:
366
453
  try:
367
454
  if 200 <= _response.status_code < 300:
368
455
  return typing.cast(
369
- EmbeddingResponse,
456
+ VoiceMetadata,
370
457
  parse_obj_as(
371
- type_=EmbeddingResponse, # type: ignore
458
+ type_=VoiceMetadata, # type: ignore
372
459
  object_=_response.json(),
373
460
  ),
374
461
  )
@@ -473,11 +560,9 @@ class VoicesClient:
473
560
  api_key="YOUR_API_KEY",
474
561
  )
475
562
  client.voices.create(
476
- name="My Custom Voice",
477
- description="A custom voice created through the API",
478
- embedding=[],
479
- language="en",
480
- base_voice_id="123e4567-e89b-12d3-a456-426614174000",
563
+ name="name",
564
+ description="description",
565
+ embedding=[1.1, 1.1],
481
566
  )
482
567
  """
483
568
  _response = self._client_wrapper.httpx_client.request(
@@ -512,16 +597,54 @@ class AsyncVoicesClient:
512
597
  def __init__(self, *, client_wrapper: AsyncClientWrapper):
513
598
  self._client_wrapper = client_wrapper
514
599
 
515
- async def list(self, *, request_options: typing.Optional[RequestOptions] = None) -> typing.List[Voice]:
600
+ async def list(
601
+ self,
602
+ *,
603
+ limit: typing.Optional[int] = None,
604
+ starting_after: typing.Optional[str] = None,
605
+ ending_before: typing.Optional[str] = None,
606
+ is_owner: typing.Optional[bool] = None,
607
+ is_starred: typing.Optional[bool] = None,
608
+ gender: typing.Optional[GenderPresentation] = None,
609
+ expand: typing.Optional[typing.Sequence[VoiceExpandOptions]] = None,
610
+ request_options: typing.Optional[RequestOptions] = None,
611
+ ) -> AsyncPager[Voice]:
516
612
  """
517
613
  Parameters
518
614
  ----------
615
+ limit : typing.Optional[int]
616
+ The number of Voices to return per page, ranging between 1 and 100.
617
+
618
+ starting_after : typing.Optional[str]
619
+ A cursor to use in pagination. `starting_after` is a Voice ID that defines your
620
+ place in the list. For example, if you make a /voices request and receive 100
621
+ objects, ending with `voice_abc123`, your subsequent call can include
622
+ `starting_after=voice_abc123` to fetch the next page of the list.
623
+
624
+ ending_before : typing.Optional[str]
625
+ A cursor to use in pagination. `ending_before` is a Voice ID that defines your
626
+ place in the list. For example, if you make a /voices request and receive 100
627
+ objects, starting with `voice_abc123`, your subsequent call can include
628
+ `ending_before=voice_abc123` to fetch the previous page of the list.
629
+
630
+ is_owner : typing.Optional[bool]
631
+ Whether to only return voices owned by the current user.
632
+
633
+ is_starred : typing.Optional[bool]
634
+ Whether to only return starred voices.
635
+
636
+ gender : typing.Optional[GenderPresentation]
637
+ The gender presentation of the voices to return.
638
+
639
+ expand : typing.Optional[typing.Sequence[VoiceExpandOptions]]
640
+ Additional fields to include in the response.
641
+
519
642
  request_options : typing.Optional[RequestOptions]
520
643
  Request-specific configuration.
521
644
 
522
645
  Returns
523
646
  -------
524
- typing.List[Voice]
647
+ AsyncPager[Voice]
525
648
 
526
649
  Examples
527
650
  --------
@@ -535,7 +658,12 @@ class AsyncVoicesClient:
535
658
 
536
659
 
537
660
  async def main() -> None:
538
- await client.voices.list()
661
+ response = await client.voices.list()
662
+ async for item in response:
663
+ yield item
664
+ # alternatively, you can paginate page-by-page
665
+ async for page in response.iter_pages():
666
+ yield page
539
667
 
540
668
 
541
669
  asyncio.run(main())
@@ -543,17 +671,40 @@ class AsyncVoicesClient:
543
671
  _response = await self._client_wrapper.httpx_client.request(
544
672
  "voices/",
545
673
  method="GET",
674
+ params={
675
+ "limit": limit,
676
+ "starting_after": starting_after,
677
+ "ending_before": ending_before,
678
+ "is_owner": is_owner,
679
+ "is_starred": is_starred,
680
+ "gender": gender,
681
+ "expand[]": expand,
682
+ },
546
683
  request_options=request_options,
547
684
  )
548
685
  try:
549
686
  if 200 <= _response.status_code < 300:
550
- return typing.cast(
551
- typing.List[Voice],
687
+ _parsed_response = typing.cast(
688
+ GetVoicesResponse,
552
689
  parse_obj_as(
553
- type_=typing.List[Voice], # type: ignore
690
+ type_=GetVoicesResponse, # type: ignore
554
691
  object_=_response.json(),
555
692
  ),
556
693
  )
694
+ _parsed_next = _parsed_response.next_page
695
+ _has_next = _parsed_next is not None and _parsed_next != ""
696
+ _get_next = lambda: self.list(
697
+ limit=limit,
698
+ starting_after=_parsed_next,
699
+ ending_before=ending_before,
700
+ is_owner=is_owner,
701
+ is_starred=is_starred,
702
+ gender=gender,
703
+ expand=expand,
704
+ request_options=request_options,
705
+ )
706
+ _items = _parsed_response.data
707
+ return AsyncPager(has_next=_has_next, items=_items, get_next=_get_next)
557
708
  _response_json = _response.json()
558
709
  except JSONDecodeError:
559
710
  raise ApiError(status_code=_response.status_code, body=_response.text)
@@ -833,16 +984,27 @@ class AsyncVoicesClient:
833
984
  async def localize(
834
985
  self,
835
986
  *,
836
- embedding: Embedding,
987
+ voice_id: str,
988
+ name: str,
989
+ description: str,
837
990
  language: LocalizeTargetLanguage,
838
991
  original_speaker_gender: Gender,
839
992
  dialect: typing.Optional[LocalizeDialectParams] = OMIT,
840
993
  request_options: typing.Optional[RequestOptions] = None,
841
- ) -> EmbeddingResponse:
994
+ ) -> VoiceMetadata:
842
995
  """
996
+ Create a new voice from an existing voice localized to a new language and dialect.
997
+
843
998
  Parameters
844
999
  ----------
845
- embedding : Embedding
1000
+ voice_id : str
1001
+ The ID of the voice to localize.
1002
+
1003
+ name : str
1004
+ The name of the new localized voice.
1005
+
1006
+ description : str
1007
+ The description of the new localized voice.
846
1008
 
847
1009
  language : LocalizeTargetLanguage
848
1010
 
@@ -855,7 +1017,7 @@ class AsyncVoicesClient:
855
1017
 
856
1018
  Returns
857
1019
  -------
858
- EmbeddingResponse
1020
+ VoiceMetadata
859
1021
 
860
1022
  Examples
861
1023
  --------
@@ -870,9 +1032,12 @@ class AsyncVoicesClient:
870
1032
 
871
1033
  async def main() -> None:
872
1034
  await client.voices.localize(
873
- embedding=[1.1, 1.1],
874
- language="en",
875
- original_speaker_gender="male",
1035
+ voice_id="694f9389-aac1-45b6-b726-9d9369183238",
1036
+ name="Sarah Peninsular Spanish",
1037
+ description="Sarah Voice in Peninsular Spanish",
1038
+ language="es",
1039
+ original_speaker_gender="female",
1040
+ dialect="pe",
876
1041
  )
877
1042
 
878
1043
 
@@ -882,7 +1047,9 @@ class AsyncVoicesClient:
882
1047
  "voices/localize",
883
1048
  method="POST",
884
1049
  json={
885
- "embedding": embedding,
1050
+ "voice_id": voice_id,
1051
+ "name": name,
1052
+ "description": description,
886
1053
  "language": language,
887
1054
  "original_speaker_gender": original_speaker_gender,
888
1055
  "dialect": convert_and_respect_annotation_metadata(
@@ -895,9 +1062,9 @@ class AsyncVoicesClient:
895
1062
  try:
896
1063
  if 200 <= _response.status_code < 300:
897
1064
  return typing.cast(
898
- EmbeddingResponse,
1065
+ VoiceMetadata,
899
1066
  parse_obj_as(
900
- type_=EmbeddingResponse, # type: ignore
1067
+ type_=VoiceMetadata, # type: ignore
901
1068
  object_=_response.json(),
902
1069
  ),
903
1070
  )
@@ -1015,11 +1182,9 @@ class AsyncVoicesClient:
1015
1182
 
1016
1183
  async def main() -> None:
1017
1184
  await client.voices.create(
1018
- name="My Custom Voice",
1019
- description="A custom voice created through the API",
1020
- embedding=[],
1021
- language="en",
1022
- base_voice_id="123e4567-e89b-12d3-a456-426614174000",
1185
+ name="name",
1186
+ description="description",
1187
+ embedding=[1.1, 1.1],
1023
1188
  )
1024
1189
 
1025
1190
 
@@ -3,6 +3,7 @@
3
3
  from .create_voice_request import CreateVoiceRequestParams
4
4
  from .embedding_response import EmbeddingResponseParams
5
5
  from .embedding_specifier import EmbeddingSpecifierParams
6
+ from .get_voices_response import GetVoicesResponseParams
6
7
  from .id_specifier import IdSpecifierParams
7
8
  from .localize_dialect import LocalizeDialectParams
8
9
  from .localize_voice_request import LocalizeVoiceRequestParams
@@ -16,6 +17,7 @@ __all__ = [
16
17
  "CreateVoiceRequestParams",
17
18
  "EmbeddingResponseParams",
18
19
  "EmbeddingSpecifierParams",
20
+ "GetVoicesResponseParams",
19
21
  "IdSpecifierParams",
20
22
  "LocalizeDialectParams",
21
23
  "LocalizeVoiceRequestParams",
@@ -0,0 +1,24 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ import typing
5
+ from .voice import VoiceParams
6
+ import typing_extensions
7
+ from ..types.voice_id import VoiceId
8
+
9
+
10
+ class GetVoicesResponseParams(typing_extensions.TypedDict):
11
+ data: typing.Sequence[VoiceParams]
12
+ """
13
+ The paginated list of Voices.
14
+ """
15
+
16
+ has_more: bool
17
+ """
18
+ Whether there are more Voices to fetch (using `starting_after=id`, where id is the ID of the last Voice in the current response).
19
+ """
20
+
21
+ next_page: typing_extensions.NotRequired[VoiceId]
22
+ """
23
+ (Deprecated - use the id of the last Voice in the current response instead.) An ID that can be passed as `starting_after` to get the next page of Voices.
24
+ """
@@ -4,5 +4,8 @@ import typing
4
4
  from ..types.localize_english_dialect import LocalizeEnglishDialect
5
5
  from ..types.localize_spanish_dialect import LocalizeSpanishDialect
6
6
  from ..types.localize_portuguese_dialect import LocalizePortugueseDialect
7
+ from ..types.localize_french_dialect import LocalizeFrenchDialect
7
8
 
8
- LocalizeDialectParams = typing.Union[LocalizeEnglishDialect, LocalizeSpanishDialect, LocalizePortugueseDialect]
9
+ LocalizeDialectParams = typing.Union[
10
+ LocalizeEnglishDialect, LocalizeSpanishDialect, LocalizePortugueseDialect, LocalizeFrenchDialect
11
+ ]
@@ -1,7 +1,6 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
3
  import typing_extensions
4
- from ...embedding.types.embedding import Embedding
5
4
  from ..types.localize_target_language import LocalizeTargetLanguage
6
5
  from ..types.gender import Gender
7
6
  import typing_extensions
@@ -9,7 +8,21 @@ from .localize_dialect import LocalizeDialectParams
9
8
 
10
9
 
11
10
  class LocalizeVoiceRequestParams(typing_extensions.TypedDict):
12
- embedding: Embedding
11
+ voice_id: str
12
+ """
13
+ The ID of the voice to localize.
14
+ """
15
+
16
+ name: str
17
+ """
18
+ The name of the new localized voice.
19
+ """
20
+
21
+ description: str
22
+ """
23
+ The description of the new localized voice.
24
+ """
25
+
13
26
  language: LocalizeTargetLanguage
14
27
  original_speaker_gender: Gender
15
28
  dialect: typing_extensions.NotRequired[LocalizeDialectParams]
@@ -2,22 +2,17 @@
2
2
 
3
3
  import typing_extensions
4
4
  from ..types.voice_id import VoiceId
5
- import typing_extensions
6
5
  import datetime as dt
6
+ import typing_extensions
7
7
  from ...embedding.types.embedding import Embedding
8
8
  from ...tts.types.supported_language import SupportedLanguage
9
9
 
10
10
 
11
11
  class VoiceParams(typing_extensions.TypedDict):
12
12
  id: VoiceId
13
- user_id: typing_extensions.NotRequired[str]
14
- """
15
- The ID of the user who owns the voice.
13
+ is_owner: bool
16
14
  """
17
-
18
- is_public: bool
19
- """
20
- Whether the voice is publicly accessible.
15
+ Whether the current user is the owner of the voice.
21
16
  """
22
17
 
23
18
  name: str
@@ -35,5 +30,14 @@ class VoiceParams(typing_extensions.TypedDict):
35
30
  The date and time the voice was created.
36
31
  """
37
32
 
38
- embedding: Embedding
33
+ embedding: typing_extensions.NotRequired[Embedding]
34
+ """
35
+ The vector embedding of the voice. Only included when `expand` includes `embedding`.
36
+ """
37
+
38
+ is_starred: typing_extensions.NotRequired[bool]
39
+ """
40
+ Whether the current user has starred the voice. Only included when `expand` includes `is_starred`.
41
+ """
42
+
39
43
  language: SupportedLanguage