PyPI - cartesia - Versions diffs - 2.0.0b7__py3-none-any.whl → 2.0.2__py3-none-any.whl - Mend

cartesia 2.0.0b7py3-none-any.whl → 2.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

cartesia/__init__.py +15 -1
cartesia/auth/__init__.py +13 -0
cartesia/auth/client.py +159 -0
cartesia/auth/requests/__init__.py +7 -0
cartesia/auth/requests/token_grant.py +10 -0
cartesia/auth/requests/token_request.py +17 -0
cartesia/auth/requests/token_response.py +10 -0
cartesia/auth/types/__init__.py +7 -0
cartesia/auth/types/token_grant.py +22 -0
cartesia/auth/types/token_request.py +28 -0
cartesia/auth/types/token_response.py +22 -0
cartesia/base_client.py +4 -0
cartesia/core/client_wrapper.py +1 -1
cartesia/tts/_async_websocket.py +8 -0
cartesia/tts/_websocket.py +11 -0
cartesia/tts/client.py +40 -4
cartesia/tts/requests/generation_request.py +19 -1
cartesia/tts/requests/tts_request.py +10 -1
cartesia/tts/requests/web_socket_tts_request.py +3 -1
cartesia/tts/types/generation_request.py +19 -1
cartesia/tts/types/tts_request.py +10 -1
cartesia/tts/types/web_socket_tts_request.py +3 -1
cartesia/voices/__init__.py +6 -0
cartesia/voices/client.py +208 -159
cartesia/voices/requests/create_voice_request.py +2 -0
cartesia/voices/requests/localize_dialect.py +6 -1
cartesia/voices/requests/localize_voice_request.py +15 -2
cartesia/voices/types/__init__.py +6 -0
cartesia/voices/types/create_voice_request.py +2 -0
cartesia/voices/types/localize_dialect.py +6 -1
cartesia/voices/types/localize_french_dialect.py +5 -0
cartesia/voices/types/localize_portuguese_dialect.py +5 -0
cartesia/voices/types/localize_spanish_dialect.py +5 -0
cartesia/voices/types/localize_voice_request.py +16 -3
{cartesia-2.0.0b7.dist-info → cartesia-2.0.2.dist-info}/METADATA +68 -63
{cartesia-2.0.0b7.dist-info → cartesia-2.0.2.dist-info}/RECORD +37 -24
{cartesia-2.0.0b7.dist-info → cartesia-2.0.2.dist-info}/WHEEL +0 -0

cartesia/__init__.py CHANGED Viewed

@@ -1,7 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
-from . import api_status, datasets, embedding, infill, tts, voice_changer, voices
+from . import api_status, auth, datasets, embedding, infill, tts, voice_changer, voices
 from .api_status import ApiInfo, ApiInfoParams
+from .auth import TokenGrant, TokenGrantParams, TokenRequest, TokenRequestParams, TokenResponse, TokenResponseParams
 from .client import AsyncCartesia, Cartesia
 from .datasets import (
     CreateDatasetRequest,
@@ -129,6 +130,9 @@ from .voices import (
     LocalizeDialect,
     LocalizeDialectParams,
     LocalizeEnglishDialect,
+    LocalizeFrenchDialect,
+    LocalizePortugueseDialect,
+    LocalizeSpanishDialect,
     LocalizeTargetLanguage,
     LocalizeVoiceRequest,
     LocalizeVoiceRequestParams,
@@ -187,6 +191,9 @@ __all__ = [
     "LocalizeDialect",
     "LocalizeDialectParams",
     "LocalizeEnglishDialect",
+    "LocalizeFrenchDialect",
+    "LocalizePortugueseDialect",
+    "LocalizeSpanishDialect",
     "LocalizeTargetLanguage",
     "LocalizeVoiceRequest",
     "LocalizeVoiceRequestParams",
@@ -227,6 +234,12 @@ __all__ = [
     "StreamingResponse_Error",
     "StreamingResponse_ErrorParams",
     "SupportedLanguage",
+    "TokenGrant",
+    "TokenGrantParams",
+    "TokenRequest",
+    "TokenRequestParams",
+    "TokenResponse",
+    "TokenResponseParams",
     "TtsRequest",
     "TtsRequestEmbeddingSpecifier",
     "TtsRequestEmbeddingSpecifierParams",
@@ -288,6 +301,7 @@ __all__ = [
     "WordTimestampsParams",
     "__version__",
     "api_status",
+    "auth",
     "datasets",
     "embedding",
     "infill",

cartesia/auth/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# This file was auto-generated by Fern from our API Definition.
+from .types import TokenGrant, TokenRequest, TokenResponse
+from .requests import TokenGrantParams, TokenRequestParams, TokenResponseParams
+__all__ = [
+    "TokenGrant",
+    "TokenGrantParams",
+    "TokenRequest",
+    "TokenRequestParams",
+    "TokenResponse",
+    "TokenResponseParams",
+]

cartesia/auth/client.py ADDED Viewed

@@ -0,0 +1,159 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+from ..core.client_wrapper import SyncClientWrapper
+from .requests.token_grant import TokenGrantParams
+from ..core.request_options import RequestOptions
+from .types.token_response import TokenResponse
+from ..core.serialization import convert_and_respect_annotation_metadata
+from ..core.pydantic_utilities import parse_obj_as
+from json.decoder import JSONDecodeError
+from ..core.api_error import ApiError
+from ..core.client_wrapper import AsyncClientWrapper
+# this is used as the default value for optional parameters
+OMIT = typing.cast(typing.Any, ...)
+class AuthClient:
+    def __init__(self, *, client_wrapper: SyncClientWrapper):
+        self._client_wrapper = client_wrapper
+    def access_token(
+        self,
+        *,
+        grants: TokenGrantParams,
+        expires_in: typing.Optional[int] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> TokenResponse:
+        """
+        Generates a new Access Token for the client. These tokens are short-lived and should be used to make requests to the API from authenticated clients.
+        Parameters
+        ----------
+        grants : TokenGrantParams
+            The permissions to be granted via the token.
+        expires_in : typing.Optional[int]
+            The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Returns
+        -------
+        TokenResponse
+        Examples
+        --------
+        from cartesia import Cartesia
+        client = Cartesia(
+            api_key="YOUR_API_KEY",
+        )
+        client.auth.access_token(
+            grants={"tts": True},
+            expires_in=60,
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "access-token",
+            method="POST",
+            json={
+                "grants": convert_and_respect_annotation_metadata(
+                    object_=grants, annotation=TokenGrantParams, direction="write"
+                ),
+                "expires_in": expires_in,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    TokenResponse,
+                    parse_obj_as(
+                        type_=TokenResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+class AsyncAuthClient:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._client_wrapper = client_wrapper
+    async def access_token(
+        self,
+        *,
+        grants: TokenGrantParams,
+        expires_in: typing.Optional[int] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> TokenResponse:
+        """
+        Generates a new Access Token for the client. These tokens are short-lived and should be used to make requests to the API from authenticated clients.
+        Parameters
+        ----------
+        grants : TokenGrantParams
+            The permissions to be granted via the token.
+        expires_in : typing.Optional[int]
+            The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Returns
+        -------
+        TokenResponse
+        Examples
+        --------
+        import asyncio
+        from cartesia import AsyncCartesia
+        client = AsyncCartesia(
+            api_key="YOUR_API_KEY",
+        )
+        async def main() -> None:
+            await client.auth.access_token(
+                grants={"tts": True},
+                expires_in=60,
+            )
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "access-token",
+            method="POST",
+            json={
+                "grants": convert_and_respect_annotation_metadata(
+                    object_=grants, annotation=TokenGrantParams, direction="write"
+                ),
+                "expires_in": expires_in,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    TokenResponse,
+                    parse_obj_as(
+                        type_=TokenResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)

cartesia/auth/requests/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+from .token_grant import TokenGrantParams
+from .token_request import TokenRequestParams
+from .token_response import TokenResponseParams
+__all__ = ["TokenGrantParams", "TokenRequestParams", "TokenResponseParams"]

cartesia/auth/requests/token_grant.py ADDED Viewed

@@ -0,0 +1,10 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing_extensions
+class TokenGrantParams(typing_extensions.TypedDict):
+    tts: bool
+    """
+    The `tts` grant allows the token to be used to access any TTS endpoint.
+    """

cartesia/auth/requests/token_request.py ADDED Viewed

@@ -0,0 +1,17 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing_extensions
+from .token_grant import TokenGrantParams
+import typing_extensions
+class TokenRequestParams(typing_extensions.TypedDict):
+    grants: TokenGrantParams
+    """
+    The permissions to be granted via the token.
+    """
+    expires_in: typing_extensions.NotRequired[int]
+    """
+    The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
+    """

cartesia/auth/requests/token_response.py ADDED Viewed

@@ -0,0 +1,10 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing_extensions
+class TokenResponseParams(typing_extensions.TypedDict):
+    token: str
+    """
+    The generated Access Token.
+    """

cartesia/auth/types/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+from .token_grant import TokenGrant
+from .token_request import TokenRequest
+from .token_response import TokenResponse
+__all__ = ["TokenGrant", "TokenRequest", "TokenResponse"]

cartesia/auth/types/token_grant.py ADDED Viewed

@@ -0,0 +1,22 @@
+# This file was auto-generated by Fern from our API Definition.
+from ...core.pydantic_utilities import UniversalBaseModel
+import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+import typing
+class TokenGrant(UniversalBaseModel):
+    tts: bool = pydantic.Field()
+    """
+    The `tts` grant allows the token to be used to access any TTS endpoint.
+    """
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow

cartesia/auth/types/token_request.py ADDED Viewed

@@ -0,0 +1,28 @@
+# This file was auto-generated by Fern from our API Definition.
+from ...core.pydantic_utilities import UniversalBaseModel
+from .token_grant import TokenGrant
+import pydantic
+import typing
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+class TokenRequest(UniversalBaseModel):
+    grants: TokenGrant = pydantic.Field()
+    """
+    The permissions to be granted via the token.
+    """
+    expires_in: typing.Optional[int] = pydantic.Field(default=None)
+    """
+    The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
+    """
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow

cartesia/auth/types/token_response.py ADDED Viewed

@@ -0,0 +1,22 @@
+# This file was auto-generated by Fern from our API Definition.
+from ...core.pydantic_utilities import UniversalBaseModel
+import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+import typing
+class TokenResponse(UniversalBaseModel):
+    token: str = pydantic.Field()
+    """
+    The generated Access Token.
+    """
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow

cartesia/base_client.py CHANGED Viewed

@@ -5,12 +5,14 @@ from .environment import CartesiaEnvironment
 import httpx
 from .core.client_wrapper import SyncClientWrapper
 from .api_status.client import ApiStatusClient
+from .auth.client import AuthClient
 from .infill.client import InfillClient
 from .tts.client import TtsClient
 from .voice_changer.client import VoiceChangerClient
 from .voices.client import VoicesClient
 from .core.client_wrapper import AsyncClientWrapper
 from .api_status.client import AsyncApiStatusClient
+from .auth.client import AsyncAuthClient
 from .infill.client import AsyncInfillClient
 from .tts.client import AsyncTtsClient
 from .voice_changer.client import AsyncVoiceChangerClient
@@ -76,6 +78,7 @@ class BaseCartesia:
             timeout=_defaulted_timeout,
         )
         self.api_status = ApiStatusClient(client_wrapper=self._client_wrapper)
+        self.auth = AuthClient(client_wrapper=self._client_wrapper)
         self.infill = InfillClient(client_wrapper=self._client_wrapper)
         self.tts = TtsClient(client_wrapper=self._client_wrapper)
         self.voice_changer = VoiceChangerClient(client_wrapper=self._client_wrapper)
@@ -141,6 +144,7 @@ class AsyncBaseCartesia:
             timeout=_defaulted_timeout,
         )
         self.api_status = AsyncApiStatusClient(client_wrapper=self._client_wrapper)
+        self.auth = AsyncAuthClient(client_wrapper=self._client_wrapper)
         self.infill = AsyncInfillClient(client_wrapper=self._client_wrapper)
         self.tts = AsyncTtsClient(client_wrapper=self._client_wrapper)
         self.voice_changer = AsyncVoiceChangerClient(client_wrapper=self._client_wrapper)

cartesia/core/client_wrapper.py CHANGED Viewed

@@ -16,7 +16,7 @@ class BaseClientWrapper:
         headers: typing.Dict[str, str] = {
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "cartesia",
-            "X-Fern-SDK-Version": "2.0.0b7",
+            "X-Fern-SDK-Version": "2.0.2",
         }
         headers["X-API-Key"] = self.api_key
         headers["Cartesia-Version"] = "2024-11-13"

cartesia/tts/_async_websocket.py CHANGED Viewed

@@ -69,7 +69,9 @@ class _AsyncTTSContext:
         stream: bool = True,
         add_timestamps: bool = False,
         add_phoneme_timestamps: bool = False,
+        use_original_timestamps: bool = False,
         continue_: bool = False,
+        max_buffer_delay_ms: Optional[int] = None,
         flush: bool = False,
     ) -> None:
         """Send audio generation requests to the WebSocket. The response can be received using the `receive` method.
@@ -106,8 +108,12 @@ class _AsyncTTSContext:
             request_body["add_timestamps"] = add_timestamps
         if add_phoneme_timestamps:
             request_body["add_phoneme_timestamps"] = add_phoneme_timestamps
+        if use_original_timestamps:
+            request_body["use_original_timestamps"] = use_original_timestamps
         if continue_:
             request_body["continue"] = continue_
+        if max_buffer_delay_ms:
+            request_body["max_buffer_delay_ms"] = max_buffer_delay_ms
         if flush:
             request_body["flush"] = flush
@@ -367,6 +373,7 @@ class AsyncTtsWebsocket(TtsWebsocket):
         stream: bool = True,
         add_timestamps: bool = False,
         add_phoneme_timestamps: bool = False,
+        use_original_timestamps: bool = False,
     ):
         """See :meth:`_WebSocket.send` for details."""
         if context_id is None:
@@ -385,6 +392,7 @@ class AsyncTtsWebsocket(TtsWebsocket):
             continue_=False,
             add_timestamps=add_timestamps,
             add_phoneme_timestamps=add_phoneme_timestamps,
+            use_original_timestamps=use_original_timestamps,
         )
         generator = ctx.receive()

cartesia/tts/_websocket.py CHANGED Viewed

@@ -63,10 +63,13 @@ class _TTSContext:
         output_format: OutputFormatParams,
         voice: TtsRequestVoiceSpecifierParams,
         context_id: Optional[str] = None,
+        max_buffer_delay_ms: Optional[int] = None,
         duration: Optional[int] = None,
         language: Optional[str] = None,
         stream: bool = True,
         add_timestamps: bool = False,
+        add_phoneme_timestamps: bool = False,
+        use_original_timestamps: bool = False,
     ) -> Generator[bytes, None, None]:
         """Send audio generation requests to the WebSocket and yield responses.
@@ -102,6 +105,12 @@ class _TTSContext:
             request_body["stream"] = stream
         if add_timestamps:
             request_body["add_timestamps"] = add_timestamps
+        if add_phoneme_timestamps:
+            request_body["add_phoneme_timestamps"] = add_phoneme_timestamps
+        if use_original_timestamps:
+            request_body["use_original_timestamps"] = use_original_timestamps
+        if max_buffer_delay_ms:
+            request_body["max_buffer_delay_ms"] = max_buffer_delay_ms
         if (
             "context_id" in request_body
@@ -354,6 +363,7 @@ class TtsWebsocket:
         stream: bool = True,
         add_timestamps: bool = False,
         add_phoneme_timestamps: bool = False,
+        use_original_timestamps: bool = False,
     ):
         """Send a request to the WebSocket to generate audio.
@@ -384,6 +394,7 @@ class TtsWebsocket:
             "stream": stream,
             "add_timestamps": add_timestamps,
             "add_phoneme_timestamps": add_phoneme_timestamps,
+            "use_original_timestamps": use_original_timestamps,
         }
         generator = self._websocket_generator(request_body)

cartesia/tts/client.py CHANGED Viewed

@@ -32,13 +32,14 @@ class TtsClient:
         output_format: OutputFormatParams,
         language: typing.Optional[SupportedLanguage] = OMIT,
         duration: typing.Optional[float] = OMIT,
+        text_cfg: typing.Optional[float] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.Iterator[bytes]:
         """
         Parameters
         ----------
         model_id : str
-            The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models.
+            The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
         transcript : str
@@ -52,6 +53,13 @@ class TtsClient:
             The maximum duration of the audio in seconds. You do not usually need to specify this.
             If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
+        text_cfg : typing.Optional[float]
+            The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
+            Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
+            This parameter is only supported for `sonic-2` models.
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration. You can pass in configuration such as `chunk_size`, and more to customize the request and response.
@@ -92,6 +100,7 @@ class TtsClient:
                     object_=output_format, annotation=OutputFormatParams, direction="write"
                 ),
                 "duration": duration,
+                "text_cfg": text_cfg,
             },
             request_options=request_options,
             omit=OMIT,
@@ -117,13 +126,14 @@ class TtsClient:
         output_format: OutputFormatParams,
         language: typing.Optional[SupportedLanguage] = OMIT,
         duration: typing.Optional[float] = OMIT,
+        text_cfg: typing.Optional[float] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.Iterator[WebSocketResponse]:
         """
         Parameters
         ----------
         model_id : str
-            The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models.
+            The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
         transcript : str
@@ -137,6 +147,13 @@ class TtsClient:
             The maximum duration of the audio in seconds. You do not usually need to specify this.
             If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
+        text_cfg : typing.Optional[float]
+            The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
+            Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
+            This parameter is only supported for `sonic-2` models.
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -179,6 +196,7 @@ class TtsClient:
                     object_=output_format, annotation=OutputFormatParams, direction="write"
                 ),
                 "duration": duration,
+                "text_cfg": text_cfg,
             },
             request_options=request_options,
             omit=OMIT,
@@ -218,13 +236,14 @@ class AsyncTtsClient:
         output_format: OutputFormatParams,
         language: typing.Optional[SupportedLanguage] = OMIT,
         duration: typing.Optional[float] = OMIT,
+        text_cfg: typing.Optional[float] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.AsyncIterator[bytes]:
         """
         Parameters
         ----------
         model_id : str
-            The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models.
+            The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
         transcript : str
@@ -238,6 +257,13 @@ class AsyncTtsClient:
             The maximum duration of the audio in seconds. You do not usually need to specify this.
             If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
+        text_cfg : typing.Optional[float]
+            The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
+            Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
+            This parameter is only supported for `sonic-2` models.
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration. You can pass in configuration such as `chunk_size`, and more to customize the request and response.
@@ -286,6 +312,7 @@ class AsyncTtsClient:
                     object_=output_format, annotation=OutputFormatParams, direction="write"
                 ),
                 "duration": duration,
+                "text_cfg": text_cfg,
             },
             request_options=request_options,
             omit=OMIT,
@@ -311,13 +338,14 @@ class AsyncTtsClient:
         output_format: OutputFormatParams,
         language: typing.Optional[SupportedLanguage] = OMIT,
         duration: typing.Optional[float] = OMIT,
+        text_cfg: typing.Optional[float] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.AsyncIterator[WebSocketResponse]:
         """
         Parameters
         ----------
         model_id : str
-            The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models.
+            The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
         transcript : str
@@ -331,6 +359,13 @@ class AsyncTtsClient:
             The maximum duration of the audio in seconds. You do not usually need to specify this.
             If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
+        text_cfg : typing.Optional[float]
+            The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
+            Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
+            This parameter is only supported for `sonic-2` models.
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -381,6 +416,7 @@ class AsyncTtsClient:
                     object_=output_format, annotation=OutputFormatParams, direction="write"
                 ),
                 "duration": duration,
+                "text_cfg": text_cfg,
             },
             request_options=request_options,
             omit=OMIT,

cartesia/tts/requests/generation_request.py CHANGED Viewed

@@ -13,7 +13,7 @@ from ...core.serialization import FieldMetadata
 class GenerationRequestParams(typing_extensions.TypedDict):
     model_id: str
     """
-    The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models.
+    The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
     """
     transcript: typing.Optional[typing.Any]
@@ -30,6 +30,15 @@ class GenerationRequestParams(typing_extensions.TypedDict):
     If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
     """
+    text_cfg: typing_extensions.NotRequired[float]
+    """
+    The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
+    Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
+    This parameter is only supported for `sonic-2` models.
+    """
     context_id: typing_extensions.NotRequired[ContextId]
     continue_: typing_extensions.NotRequired[typing_extensions.Annotated[bool, FieldMetadata(alias="continue")]]
     """
@@ -37,6 +46,15 @@ class GenerationRequestParams(typing_extensions.TypedDict):
     If not specified, this defaults to `false`.
     """
+    max_buffer_delay_ms: typing_extensions.NotRequired[int]
+    """
+    The maximum time in milliseconds to buffer text before starting generation. Values between [0, 1000]ms are supported. Defaults to 0 (no buffering).
+    When set, the model will buffer incoming text chunks until it's confident it has enough context to generate high-quality speech, or the buffer delay elapses, whichever comes first. Without this option set, the model will kick off generations immediately, ceding control of buffering to the user.
+    Use this to balance responsiveness with higher quality speech generation, which often benefits from having more context.
+    """
     flush: typing_extensions.NotRequired[bool]
     """
     Whether to flush the context.

cartesia 2.0.0b7__py3-none-any.whl → 2.0.2__py3-none-any.whl

cartesia 2.0.0b7py3-none-any.whl → 2.0.2py3-none-any.whl