PyPI - cartesia - Versions diffs - 2.0.0a0__py3-none-any.whl → 2.0.0b1__py3-none-any.whl - Mend

cartesia 2.0.0a0py3-none-any.whl → 2.0.0b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

cartesia/__init__.py +4 -0
cartesia/core/client_wrapper.py +1 -1
cartesia/core/http_client.py +2 -2
cartesia/core/pydantic_utilities.py +2 -2
cartesia/datasets/client.py +4 -34
cartesia/infill/client.py +36 -12
cartesia/tts/client.py +4 -4
cartesia/voices/__init__.py +4 -0
cartesia/voices/client.py +169 -928
cartesia/voices/requests/create_voice_request.py +2 -0
cartesia/voices/requests/localize_dialect.py +3 -1
cartesia/voices/types/__init__.py +4 -0
cartesia/voices/types/create_voice_request.py +2 -0
cartesia/voices/types/localize_dialect.py +3 -1
cartesia/voices/types/localize_portuguese_dialect.py +5 -0
cartesia/voices/types/localize_spanish_dialect.py +5 -0
{cartesia-2.0.0a0.dist-info → cartesia-2.0.0b1.dist-info}/METADATA +2 -1
{cartesia-2.0.0a0.dist-info → cartesia-2.0.0b1.dist-info}/RECORD +19 -17
{cartesia-2.0.0a0.dist-info → cartesia-2.0.0b1.dist-info}/WHEEL +0 -0

cartesia/voices/client.py CHANGED Viewed

@@ -7,19 +7,20 @@ from .types.voice import Voice
 from ..core.pydantic_utilities import parse_obj_as
 from json.decoder import JSONDecodeError
 from ..core.api_error import ApiError
-from ..embedding.types.embedding import Embedding
+from .. import core
 from ..tts.types.supported_language import SupportedLanguage
+from .types.clone_mode import CloneMode
+from .types.voice_metadata import VoiceMetadata
 from .types.voice_id import VoiceId
 from ..core.jsonable_encoder import jsonable_encoder
+from ..embedding.types.embedding import Embedding
 from .types.localize_target_language import LocalizeTargetLanguage
 from .types.gender import Gender
 from .requests.localize_dialect import LocalizeDialectParams
 from .types.embedding_response import EmbeddingResponse
 from ..core.serialization import convert_and_respect_annotation_metadata
 from .requests.mix_voice_specifier import MixVoiceSpecifierParams
-from .. import core
-from .types.clone_mode import CloneMode
-from .types.voice_metadata import VoiceMetadata
+from .types.base_voice_id import BaseVoiceId
 from ..core.client_wrapper import AsyncClientWrapper
 # this is used as the default value for optional parameters
@@ -69,34 +70,60 @@ class VoicesClient:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
-    def create(
+    def clone(
         self,
         *,
+        clip: core.File,
         name: str,
-        description: str,
-        embedding: Embedding,
-        language: typing.Optional[SupportedLanguage] = OMIT,
+        language: SupportedLanguage,
+        mode: CloneMode,
+        enhance: bool,
+        description: typing.Optional[str] = OMIT,
+        transcript: typing.Optional[str] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> Voice:
+    ) -> VoiceMetadata:
         """
+        Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
+        Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
+        Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
         Parameters
         ----------
+        clip : core.File
+            See core.File for more documentation
         name : str
             The name of the voice.
-        description : str
-            The description of the voice.
-        embedding : Embedding
+        language : SupportedLanguage
+            The language of the voice.
+        mode : CloneMode
+            Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
+        enhance : bool
+            Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
+        description : typing.Optional[str]
+            A description for the voice.
+        transcript : typing.Optional[str]
+            Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
-        language : typing.Optional[SupportedLanguage]
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
         Returns
         -------
-        Voice
+        VoiceMetadata
         Examples
         --------
@@ -105,214 +132,27 @@ class VoicesClient:
         client = Cartesia(
             api_key="YOUR_API_KEY",
         )
-        client.voices.create(
-            name="string",
-            description="string",
-            embedding=[
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-            ],
+        client.voices.clone(
+            name="A high-stability cloned voice",
+            description="Copied from Cartesia docs",
+            mode="stability",
             language="en",
+            enhance=True,
         )
         """
         _response = self._client_wrapper.httpx_client.request(
-            "voices/",
+            "voices/clone",
             method="POST",
-            json={
+            data={
                 "name": name,
                 "description": description,
-                "embedding": embedding,
                 "language": language,
+                "mode": mode,
+                "enhance": enhance,
+                "transcript": transcript,
+            },
+            files={
+                "clip": clip,
             },
             request_options=request_options,
             omit=OMIT,
@@ -320,9 +160,9 @@ class VoicesClient:
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    Voice,
+                    VoiceMetadata,
                     parse_obj_as(
-                        type_=Voice,  # type: ignore
+                        type_=VoiceMetadata,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -352,7 +192,7 @@ class VoicesClient:
             api_key="YOUR_API_KEY",
         )
         client.voices.delete(
-            id="string",
+            id="id",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -397,9 +237,9 @@ class VoicesClient:
             api_key="YOUR_API_KEY",
         )
         client.voices.update(
-            id="string",
-            name="string",
-            description="string",
+            id="id",
+            name="name",
+            description="description",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -447,7 +287,7 @@ class VoicesClient:
             api_key="YOUR_API_KEY",
         )
         client.voices.get(
-            id="string",
+            id="id",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -504,203 +344,9 @@ class VoicesClient:
             api_key="YOUR_API_KEY",
         )
         client.voices.localize(
-            embedding=[
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-                1.0,
-            ],
+            embedding=[1.1, 1.1],
             language="en",
             original_speaker_gender="male",
-            dialect="au",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -757,7 +403,7 @@ class VoicesClient:
             api_key="YOUR_API_KEY",
         )
         client.voices.mix(
-            voices=[{"id": "string", "weight": 1.1}],
+            voices=[{"id": "id", "weight": 1.1}, {"id": "id", "weight": 1.1}],
         )
         """
         _response = self._client_wrapper.httpx_client.request(
@@ -785,60 +431,39 @@ class VoicesClient:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
-    def clone(
+    def create(
         self,
         *,
-        clip: core.File,
         name: str,
-        language: SupportedLanguage,
-        mode: CloneMode,
-        enhance: bool,
-        description: typing.Optional[str] = OMIT,
-        transcript: typing.Optional[str] = OMIT,
+        description: str,
+        embedding: Embedding,
+        language: typing.Optional[SupportedLanguage] = OMIT,
+        base_voice_id: typing.Optional[BaseVoiceId] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> VoiceMetadata:
+    ) -> Voice:
         """
-        Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
-        Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
-        Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
+        Create voice from raw features. If you'd like to clone a voice from an audio file, please use Clone Voice instead.
         Parameters
         ----------
-        clip : core.File
-            See core.File for more documentation
         name : str
             The name of the voice.
+        description : str
+            The description of the voice.
-        language : SupportedLanguage
-            The language of the voice.
-        mode : CloneMode
-            Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
-        enhance : bool
-            Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
-        description : typing.Optional[str]
-            A description for the voice.
+        embedding : Embedding
-        transcript : typing.Optional[str]
-            Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
+        language : typing.Optional[SupportedLanguage]
+        base_voice_id : typing.Optional[BaseVoiceId]
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
         Returns
         -------
-        VoiceMetadata
+        Voice
         Examples
         --------
@@ -847,27 +472,23 @@ class VoicesClient:
         client = Cartesia(
             api_key="YOUR_API_KEY",
         )
-        client.voices.clone(
-            name="A high-stability cloned voice",
-            description="Copied from Cartesia docs",
-            mode="stability",
+        client.voices.create(
+            name="My Custom Voice",
+            description="A custom voice created through the API",
+            embedding=[],
             language="en",
-            enhance=True,
+            base_voice_id="123e4567-e89b-12d3-a456-426614174000",
         )
         """
         _response = self._client_wrapper.httpx_client.request(
-            "voices/clone",
+            "voices/",
             method="POST",
-            data={
+            json={
                 "name": name,
                 "description": description,
+                "embedding": embedding,
                 "language": language,
-                "mode": mode,
-                "enhance": enhance,
-                "transcript": transcript,
-            },
-            files={
-                "clip": clip,
+                "base_voice_id": base_voice_id,
             },
             request_options=request_options,
             omit=OMIT,
@@ -875,9 +496,9 @@ class VoicesClient:
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    VoiceMetadata,
+                    Voice,
                     parse_obj_as(
-                        type_=VoiceMetadata,  # type: ignore
+                        type_=Voice,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -938,34 +559,60 @@ class AsyncVoicesClient:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
-    async def create(
+    async def clone(
         self,
         *,
+        clip: core.File,
         name: str,
-        description: str,
-        embedding: Embedding,
-        language: typing.Optional[SupportedLanguage] = OMIT,
+        language: SupportedLanguage,
+        mode: CloneMode,
+        enhance: bool,
+        description: typing.Optional[str] = OMIT,
+        transcript: typing.Optional[str] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> Voice:
+    ) -> VoiceMetadata:
         """
+        Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
+        Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
+        Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
         Parameters
         ----------
+        clip : core.File
+            See core.File for more documentation
         name : str
             The name of the voice.
-        description : str
-            The description of the voice.
-        embedding : Embedding
+        language : SupportedLanguage
+            The language of the voice.
+        mode : CloneMode
+            Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
+        enhance : bool
+            Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
+        description : typing.Optional[str]
+            A description for the voice.
+        transcript : typing.Optional[str]
+            Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
-        language : typing.Optional[SupportedLanguage]
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
         Returns
         -------
-        Voice
+        VoiceMetadata
         Examples
         --------
@@ -979,217 +626,30 @@ class AsyncVoicesClient:
         async def main() -> None:
-            await client.voices.create(
-                name="string",
-                description="string",
-                embedding=[
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                ],
+            await client.voices.clone(
+                name="A high-stability cloned voice",
+                description="Copied from Cartesia docs",
+                mode="stability",
                 language="en",
+                enhance=True,
             )
         asyncio.run(main())
         """
         _response = await self._client_wrapper.httpx_client.request(
-            "voices/",
+            "voices/clone",
             method="POST",
-            json={
+            data={
                 "name": name,
                 "description": description,
-                "embedding": embedding,
                 "language": language,
+                "mode": mode,
+                "enhance": enhance,
+                "transcript": transcript,
+            },
+            files={
+                "clip": clip,
             },
             request_options=request_options,
             omit=OMIT,
@@ -1197,9 +657,9 @@ class AsyncVoicesClient:
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    Voice,
+                    VoiceMetadata,
                     parse_obj_as(
-                        type_=Voice,  # type: ignore
+                        type_=VoiceMetadata,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -1234,7 +694,7 @@ class AsyncVoicesClient:
         async def main() -> None:
             await client.voices.delete(
-                id="string",
+                id="id",
             )
@@ -1287,9 +747,9 @@ class AsyncVoicesClient:
         async def main() -> None:
             await client.voices.update(
-                id="string",
-                name="string",
-                description="string",
+                id="id",
+                name="name",
+                description="description",
             )
@@ -1345,7 +805,7 @@ class AsyncVoicesClient:
         async def main() -> None:
             await client.voices.get(
-                id="string",
+                id="id",
             )
@@ -1410,203 +870,9 @@ class AsyncVoicesClient:
         async def main() -> None:
             await client.voices.localize(
-                embedding=[
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                    1.0,
-                ],
+                embedding=[1.1, 1.1],
                 language="en",
                 original_speaker_gender="male",
-                dialect="au",
             )
@@ -1671,7 +937,7 @@ class AsyncVoicesClient:
         async def main() -> None:
             await client.voices.mix(
-                voices=[{"id": "string", "weight": 1.1}],
+                voices=[{"id": "id", "weight": 1.1}, {"id": "id", "weight": 1.1}],
             )
@@ -1702,60 +968,39 @@ class AsyncVoicesClient:
             raise ApiError(status_code=_response.status_code, body=_response.text)
         raise ApiError(status_code=_response.status_code, body=_response_json)
-    async def clone(
+    async def create(
         self,
         *,
-        clip: core.File,
         name: str,
-        language: SupportedLanguage,
-        mode: CloneMode,
-        enhance: bool,
-        description: typing.Optional[str] = OMIT,
-        transcript: typing.Optional[str] = OMIT,
+        description: str,
+        embedding: Embedding,
+        language: typing.Optional[SupportedLanguage] = OMIT,
+        base_voice_id: typing.Optional[BaseVoiceId] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> VoiceMetadata:
+    ) -> Voice:
         """
-        Clone a voice from an audio clip. This endpoint has two modes, stability and similarity.
-        Similarity mode clones are more similar to the source clip, but may reproduce background noise. For these, use an audio clip about 5 seconds long.
-        Stability mode clones are more stable, but may not sound as similar to the source clip. For these, use an audio clip 10-20 seconds long.
+        Create voice from raw features. If you'd like to clone a voice from an audio file, please use Clone Voice instead.
         Parameters
         ----------
-        clip : core.File
-            See core.File for more documentation
         name : str
             The name of the voice.
+        description : str
+            The description of the voice.
-        language : SupportedLanguage
-            The language of the voice.
-        mode : CloneMode
-            Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
-        enhance : bool
-            Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
-        description : typing.Optional[str]
-            A description for the voice.
+        embedding : Embedding
-        transcript : typing.Optional[str]
-            Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
+        language : typing.Optional[SupportedLanguage]
+        base_voice_id : typing.Optional[BaseVoiceId]
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
         Returns
         -------
-        VoiceMetadata
+        Voice
         Examples
         --------
@@ -1769,30 +1014,26 @@ class AsyncVoicesClient:
         async def main() -> None:
-            await client.voices.clone(
-                name="A high-stability cloned voice",
-                description="Copied from Cartesia docs",
-                mode="stability",
+            await client.voices.create(
+                name="My Custom Voice",
+                description="A custom voice created through the API",
+                embedding=[],
                 language="en",
-                enhance=True,
+                base_voice_id="123e4567-e89b-12d3-a456-426614174000",
             )
         asyncio.run(main())
         """
         _response = await self._client_wrapper.httpx_client.request(
-            "voices/clone",
+            "voices/",
             method="POST",
-            data={
+            json={
                 "name": name,
                 "description": description,
+                "embedding": embedding,
                 "language": language,
-                "mode": mode,
-                "enhance": enhance,
-                "transcript": transcript,
-            },
-            files={
-                "clip": clip,
+                "base_voice_id": base_voice_id,
             },
             request_options=request_options,
             omit=OMIT,
@@ -1800,9 +1041,9 @@ class AsyncVoicesClient:
         try:
             if 200 <= _response.status_code < 300:
                 return typing.cast(
-                    VoiceMetadata,
+                    Voice,
                     parse_obj_as(
-                        type_=VoiceMetadata,  # type: ignore
+                        type_=Voice,  # type: ignore
                         object_=_response.json(),
                     ),
                 )

cartesia 2.0.0a0__py3-none-any.whl → 2.0.0b1__py3-none-any.whl

cartesia 2.0.0a0py3-none-any.whl → 2.0.0b1py3-none-any.whl