PyPI - cartesia - Versions diffs - 2.0.9__py3-none-any.whl → 2.0.13__py3-none-any.whl - Mend

cartesia 2.0.9py3-none-any.whl → 2.0.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cartesia might be problematic. Click here for more details.

Files changed (34) hide show

cartesia/__init__.py +6 -0
cartesia/core/client_wrapper.py +1 -1
cartesia/infill/client.py +2 -2
cartesia/tts/__init__.py +6 -0
cartesia/tts/client.py +25 -0
cartesia/tts/requests/__init__.py +2 -0
cartesia/tts/requests/controls.py +2 -2
cartesia/tts/requests/generation_config.py +26 -0
cartesia/tts/requests/generation_request.py +2 -0
cartesia/tts/requests/mp_3_output_format.py +4 -0
cartesia/tts/requests/raw_output_format.py +4 -0
cartesia/tts/requests/sse_output_format.py +3 -0
cartesia/tts/requests/tts_request.py +2 -0
cartesia/tts/requests/ttssse_request.py +2 -0
cartesia/tts/requests/web_socket_raw_output_format.py +3 -0
cartesia/tts/requests/web_socket_tts_request.py +2 -0
cartesia/tts/types/__init__.py +4 -0
cartesia/tts/types/controls.py +2 -2
cartesia/tts/types/emotion.py +1 -32
cartesia/tts/types/emotion_deprecated.py +34 -0
cartesia/tts/types/generation_config.py +37 -0
cartesia/tts/types/generation_request.py +2 -0
cartesia/tts/types/mp_3_output_format.py +5 -1
cartesia/tts/types/raw_output_format.py +6 -2
cartesia/tts/types/sse_output_format.py +5 -2
cartesia/tts/types/tts_request.py +2 -0
cartesia/tts/types/ttssse_request.py +2 -0
cartesia/tts/types/web_socket_raw_output_format.py +5 -2
cartesia/tts/types/web_socket_tts_request.py +2 -0
cartesia/voice_changer/client.py +2 -0
{cartesia-2.0.9.dist-info → cartesia-2.0.13.dist-info}/METADATA +82 -72
{cartesia-2.0.9.dist-info → cartesia-2.0.13.dist-info}/RECORD +34 -31
{cartesia-2.0.9.dist-info → cartesia-2.0.13.dist-info}/LICENSE +0 -0
{cartesia-2.0.9.dist-info → cartesia-2.0.13.dist-info}/WHEEL +0 -0

cartesia/__init__.py CHANGED Viewed

@@ -52,7 +52,10 @@ from .tts import (
     Controls,
     ControlsParams,
     Emotion,
+    EmotionDeprecated,
     FlushId,
+    GenerationConfig,
+    GenerationConfigParams,
     GenerationRequest,
     GenerationRequestParams,
     ModelSpeed,
@@ -211,6 +214,7 @@ __all__ = [
     "EmbeddingSpecifier",
     "EmbeddingSpecifierParams",
     "Emotion",
+    "EmotionDeprecated",
     "ErrorMessage",
     "ErrorMessageParams",
     "FilePurpose",
@@ -219,6 +223,8 @@ __all__ = [
     "FlushId",
     "Gender",
     "GenderPresentation",
+    "GenerationConfig",
+    "GenerationConfigParams",
     "GenerationRequest",
     "GenerationRequestParams",
     "GetVoicesResponse",

cartesia/core/client_wrapper.py CHANGED Viewed

@@ -16,7 +16,7 @@ class BaseClientWrapper:
         headers: typing.Dict[str, str] = {
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "cartesia",
-            "X-Fern-SDK-Version": "2.0.9",
+            "X-Fern-SDK-Version": "2.0.13",
         }
         headers["X-API-Key"] = self.api_key
         headers["Cartesia-Version"] = "2024-11-13"

cartesia/infill/client.py CHANGED Viewed

@@ -78,7 +78,7 @@ class InfillClient:
             The format of the output audio
         output_format_sample_rate : int
-            The sample rate of the output audio
+            The sample rate of the output audio in Hz. Supported sample rates are 8000, 16000, 22050, 24000, 44100, 48000.
         output_format_encoding : typing.Optional[RawEncoding]
             Required for `raw` and `wav` containers.
@@ -221,7 +221,7 @@ class AsyncInfillClient:
             The format of the output audio
         output_format_sample_rate : int
-            The sample rate of the output audio
+            The sample rate of the output audio in Hz. Supported sample rates are 8000, 16000, 22050, 24000, 44100, 48000.
         output_format_encoding : typing.Optional[RawEncoding]
             Required for `raw` and `wav` containers.

cartesia/tts/__init__.py CHANGED Viewed

@@ -5,7 +5,9 @@ from .types import (
     ContextId,
     Controls,
     Emotion,
+    EmotionDeprecated,
     FlushId,
+    GenerationConfig,
     GenerationRequest,
     ModelSpeed,
     Mp3OutputFormat,
@@ -51,6 +53,7 @@ from .types import (
 from .requests import (
     CancelContextRequestParams,
     ControlsParams,
+    GenerationConfigParams,
     GenerationRequestParams,
     Mp3OutputFormatParams,
     OutputFormatParams,
@@ -96,7 +99,10 @@ __all__ = [
     "Controls",
     "ControlsParams",
     "Emotion",
+    "EmotionDeprecated",
     "FlushId",
+    "GenerationConfig",
+    "GenerationConfigParams",
     "GenerationRequest",
     "GenerationRequestParams",
     "ModelSpeed",

cartesia/tts/client.py CHANGED Viewed

@@ -5,6 +5,7 @@ from ..core.client_wrapper import SyncClientWrapper
 from .requests.tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
 from .requests.output_format import OutputFormatParams
 from .types.supported_language import SupportedLanguage
+from .requests.generation_config import GenerationConfigParams
 from .types.model_speed import ModelSpeed
 from ..core.request_options import RequestOptions
 from ..core.serialization import convert_and_respect_annotation_metadata
@@ -34,6 +35,7 @@ class TtsClient:
         voice: TtsRequestVoiceSpecifierParams,
         output_format: OutputFormatParams,
         language: typing.Optional[SupportedLanguage] = OMIT,
+        generation_config: typing.Optional[GenerationConfigParams] = OMIT,
         duration: typing.Optional[float] = OMIT,
         speed: typing.Optional[ModelSpeed] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
@@ -52,6 +54,8 @@ class TtsClient:
         language : typing.Optional[SupportedLanguage]
+        generation_config : typing.Optional[GenerationConfigParams]
         duration : typing.Optional[float]
             The maximum duration of the audio in seconds. You do not usually need to specify this.
             If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
@@ -97,6 +101,9 @@ class TtsClient:
                 "output_format": convert_and_respect_annotation_metadata(
                     object_=output_format, annotation=OutputFormatParams, direction="write"
                 ),
+                "generation_config": convert_and_respect_annotation_metadata(
+                    object_=generation_config, annotation=GenerationConfigParams, direction="write"
+                ),
                 "duration": duration,
                 "speed": speed,
             },
@@ -123,6 +130,7 @@ class TtsClient:
         voice: TtsRequestVoiceSpecifierParams,
         output_format: SseOutputFormatParams,
         language: typing.Optional[SupportedLanguage] = OMIT,
+        generation_config: typing.Optional[GenerationConfigParams] = OMIT,
         duration: typing.Optional[float] = OMIT,
         speed: typing.Optional[ModelSpeed] = OMIT,
         add_timestamps: typing.Optional[bool] = OMIT,
@@ -145,6 +153,8 @@ class TtsClient:
         language : typing.Optional[SupportedLanguage]
+        generation_config : typing.Optional[GenerationConfigParams]
         duration : typing.Optional[float]
             The maximum duration of the audio in seconds. You do not usually need to specify this.
             If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
@@ -204,6 +214,9 @@ class TtsClient:
                 "output_format": convert_and_respect_annotation_metadata(
                     object_=output_format, annotation=SseOutputFormatParams, direction="write"
                 ),
+                "generation_config": convert_and_respect_annotation_metadata(
+                    object_=generation_config, annotation=GenerationConfigParams, direction="write"
+                ),
                 "duration": duration,
                 "speed": speed,
                 "add_timestamps": add_timestamps,
@@ -248,6 +261,7 @@ class AsyncTtsClient:
         voice: TtsRequestVoiceSpecifierParams,
         output_format: OutputFormatParams,
         language: typing.Optional[SupportedLanguage] = OMIT,
+        generation_config: typing.Optional[GenerationConfigParams] = OMIT,
         duration: typing.Optional[float] = OMIT,
         speed: typing.Optional[ModelSpeed] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
@@ -266,6 +280,8 @@ class AsyncTtsClient:
         language : typing.Optional[SupportedLanguage]
+        generation_config : typing.Optional[GenerationConfigParams]
         duration : typing.Optional[float]
             The maximum duration of the audio in seconds. You do not usually need to specify this.
             If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
@@ -319,6 +335,9 @@ class AsyncTtsClient:
                 "output_format": convert_and_respect_annotation_metadata(
                     object_=output_format, annotation=OutputFormatParams, direction="write"
                 ),
+                "generation_config": convert_and_respect_annotation_metadata(
+                    object_=generation_config, annotation=GenerationConfigParams, direction="write"
+                ),
                 "duration": duration,
                 "speed": speed,
             },
@@ -345,6 +364,7 @@ class AsyncTtsClient:
         voice: TtsRequestVoiceSpecifierParams,
         output_format: SseOutputFormatParams,
         language: typing.Optional[SupportedLanguage] = OMIT,
+        generation_config: typing.Optional[GenerationConfigParams] = OMIT,
         duration: typing.Optional[float] = OMIT,
         speed: typing.Optional[ModelSpeed] = OMIT,
         add_timestamps: typing.Optional[bool] = OMIT,
@@ -367,6 +387,8 @@ class AsyncTtsClient:
         language : typing.Optional[SupportedLanguage]
+        generation_config : typing.Optional[GenerationConfigParams]
         duration : typing.Optional[float]
             The maximum duration of the audio in seconds. You do not usually need to specify this.
             If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
@@ -434,6 +456,9 @@ class AsyncTtsClient:
                 "output_format": convert_and_respect_annotation_metadata(
                     object_=output_format, annotation=SseOutputFormatParams, direction="write"
                 ),
+                "generation_config": convert_and_respect_annotation_metadata(
+                    object_=generation_config, annotation=GenerationConfigParams, direction="write"
+                ),
                 "duration": duration,
                 "speed": speed,
                 "add_timestamps": add_timestamps,

cartesia/tts/requests/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@
 from .cancel_context_request import CancelContextRequestParams
 from .controls import ControlsParams
+from .generation_config import GenerationConfigParams
 from .generation_request import GenerationRequestParams
 from .mp_3_output_format import Mp3OutputFormatParams
 from .output_format import OutputFormatParams, OutputFormat_Mp3Params, OutputFormat_RawParams, OutputFormat_WavParams
@@ -41,6 +42,7 @@ from .word_timestamps import WordTimestampsParams
 __all__ = [
     "CancelContextRequestParams",
     "ControlsParams",
+    "GenerationConfigParams",
     "GenerationRequestParams",
     "Mp3OutputFormatParams",
     "OutputFormatParams",

cartesia/tts/requests/controls.py CHANGED Viewed

@@ -3,9 +3,9 @@
 import typing_extensions
 from .speed import SpeedParams
 import typing
-from ..types.emotion import Emotion
+from ..types.emotion_deprecated import EmotionDeprecated
 class ControlsParams(typing_extensions.TypedDict):
     speed: SpeedParams
-    emotion: typing.Sequence[Emotion]
+    emotion: typing.Sequence[EmotionDeprecated]

cartesia/tts/requests/generation_config.py ADDED Viewed

@@ -0,0 +1,26 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing_extensions
+import typing_extensions
+from ..types.emotion import Emotion
+class GenerationConfigParams(typing_extensions.TypedDict):
+    """
+    Configure the various attributes of the generated speech. These controls only for `sonic-3` and have no effect on earlier models.
+    """
+    volume: typing_extensions.NotRequired[float]
+    """
+    Adjust the volume of the generated speech between 0.5x and 2.0x the original volume (default is 1.0x). Valid values are between 0.5 and 2.0 inclusive.
+    """
+    speed: typing_extensions.NotRequired[float]
+    """
+    Adjust the speed of the generated speech between 0.6x and 1.5x the original speed (default is 1.0x). Valid values are between 0.6 and 1.5 inclusive.
+    """
+    emotion: typing_extensions.NotRequired[Emotion]
+    """
+    Guide the emotion of the generated speech.
+    """

cartesia/tts/requests/generation_request.py CHANGED Viewed

@@ -6,6 +6,7 @@ from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
 import typing_extensions
 from ..types.supported_language import SupportedLanguage
 from .web_socket_raw_output_format import WebSocketRawOutputFormatParams
+from .generation_config import GenerationConfigParams
 from ..types.model_speed import ModelSpeed
 from ..types.context_id import ContextId
 from ...core.serialization import FieldMetadata
@@ -25,6 +26,7 @@ class GenerationRequestParams(typing_extensions.TypedDict):
     voice: TtsRequestVoiceSpecifierParams
     language: typing_extensions.NotRequired[SupportedLanguage]
     output_format: WebSocketRawOutputFormatParams
+    generation_config: typing_extensions.NotRequired[GenerationConfigParams]
     duration: typing_extensions.NotRequired[float]
     """
     The maximum duration of the audio in seconds. You do not usually need to specify this.

cartesia/tts/requests/mp_3_output_format.py CHANGED Viewed

@@ -5,6 +5,10 @@ import typing_extensions
 class Mp3OutputFormatParams(typing_extensions.TypedDict):
     sample_rate: int
+    """
+    The sample rate of the audio in Hz. Supported sample rates are 8000, 16000, 22050, 24000, 44100, 48000.
+    """
     bit_rate: int
     """
     The bit rate of the audio in bits per second. Supported bit rates are 32000, 64000, 96000, 128000, 192000.

cartesia/tts/requests/raw_output_format.py CHANGED Viewed

@@ -8,4 +8,8 @@ import typing_extensions
 class RawOutputFormatParams(typing_extensions.TypedDict):
     encoding: RawEncoding
     sample_rate: int
+    """
+    The sample rate of the audio in Hz. Supported sample rates are 8000, 16000, 22050, 24000, 44100, 48000.
+    """
     bit_rate: typing_extensions.NotRequired[int]

cartesia/tts/requests/sse_output_format.py CHANGED Viewed

@@ -9,3 +9,6 @@ class SseOutputFormatParams(typing_extensions.TypedDict):
     container: typing.Literal["raw"]
     encoding: RawEncoding
     sample_rate: int
+    """
+    The sample rate of the audio in Hz. Supported sample rates are 8000, 16000, 22050, 24000, 44100, 48000.
+    """

cartesia/tts/requests/tts_request.py CHANGED Viewed

@@ -5,6 +5,7 @@ from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
 import typing_extensions
 from ..types.supported_language import SupportedLanguage
 from .output_format import OutputFormatParams
+from .generation_config import GenerationConfigParams
 from ..types.model_speed import ModelSpeed
@@ -18,6 +19,7 @@ class TtsRequestParams(typing_extensions.TypedDict):
     voice: TtsRequestVoiceSpecifierParams
     language: typing_extensions.NotRequired[SupportedLanguage]
     output_format: OutputFormatParams
+    generation_config: typing_extensions.NotRequired[GenerationConfigParams]
     duration: typing_extensions.NotRequired[float]
     """
     The maximum duration of the audio in seconds. You do not usually need to specify this.

cartesia/tts/requests/ttssse_request.py CHANGED Viewed

@@ -5,6 +5,7 @@ from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
 import typing_extensions
 from ..types.supported_language import SupportedLanguage
 from .sse_output_format import SseOutputFormatParams
+from .generation_config import GenerationConfigParams
 from ..types.model_speed import ModelSpeed
 from ..types.context_id import ContextId
@@ -19,6 +20,7 @@ class TtssseRequestParams(typing_extensions.TypedDict):
     voice: TtsRequestVoiceSpecifierParams
     language: typing_extensions.NotRequired[SupportedLanguage]
     output_format: SseOutputFormatParams
+    generation_config: typing_extensions.NotRequired[GenerationConfigParams]
     duration: typing_extensions.NotRequired[float]
     """
     The maximum duration of the audio in seconds. You do not usually need to specify this.

cartesia/tts/requests/web_socket_raw_output_format.py CHANGED Viewed

@@ -9,3 +9,6 @@ class WebSocketRawOutputFormatParams(typing_extensions.TypedDict):
     container: typing.Literal["raw"]
     encoding: RawEncoding
     sample_rate: int
+    """
+    The sample rate of the audio in Hz. Supported sample rates are 8000, 16000, 22050, 24000, 44100, 48000.
+    """

cartesia/tts/requests/web_socket_tts_request.py CHANGED Viewed

@@ -3,6 +3,7 @@
 import typing_extensions
 import typing_extensions
 from .output_format import OutputFormatParams
+from .generation_config import GenerationConfigParams
 from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
 from ...core.serialization import FieldMetadata
 from ..types.model_speed import ModelSpeed
@@ -15,6 +16,7 @@ class WebSocketTtsRequestParams(typing_extensions.TypedDict):
     """
     output_format: typing_extensions.NotRequired[OutputFormatParams]
+    generation_config: typing_extensions.NotRequired[GenerationConfigParams]
     transcript: typing_extensions.NotRequired[str]
     voice: TtsRequestVoiceSpecifierParams
     duration: typing_extensions.NotRequired[int]

cartesia/tts/types/__init__.py CHANGED Viewed

@@ -4,7 +4,9 @@ from .cancel_context_request import CancelContextRequest
 from .context_id import ContextId
 from .controls import Controls
 from .emotion import Emotion
+from .emotion_deprecated import EmotionDeprecated
 from .flush_id import FlushId
+from .generation_config import GenerationConfig
 from .generation_request import GenerationRequest
 from .model_speed import ModelSpeed
 from .mp_3_output_format import Mp3OutputFormat
@@ -51,7 +53,9 @@ __all__ = [
     "ContextId",
     "Controls",
     "Emotion",
+    "EmotionDeprecated",
     "FlushId",
+    "GenerationConfig",
     "GenerationRequest",
     "ModelSpeed",
     "Mp3OutputFormat",

cartesia/tts/types/controls.py CHANGED Viewed

@@ -3,14 +3,14 @@
 from ...core.pydantic_utilities import UniversalBaseModel
 from .speed import Speed
 import typing
-from .emotion import Emotion
+from .emotion_deprecated import EmotionDeprecated
 from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 class Controls(UniversalBaseModel):
     speed: Speed
-    emotion: typing.List[Emotion]
+    emotion: typing.List[EmotionDeprecated]
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2

cartesia/tts/types/emotion.py CHANGED Viewed

@@ -1,34 +1,3 @@
 # This file was auto-generated by Fern from our API Definition.
-import typing
-Emotion = typing.Union[
-    typing.Literal[
-        "anger:lowest",
-        "anger:low",
-        "anger",
-        "anger:high",
-        "anger:highest",
-        "positivity:lowest",
-        "positivity:low",
-        "positivity",
-        "positivity:high",
-        "positivity:highest",
-        "surprise:lowest",
-        "surprise:low",
-        "surprise",
-        "surprise:high",
-        "surprise:highest",
-        "sadness:lowest",
-        "sadness:low",
-        "sadness",
-        "sadness:high",
-        "sadness:highest",
-        "curiosity:lowest",
-        "curiosity:low",
-        "curiosity",
-        "curiosity:high",
-        "curiosity:highest",
-    ],
-    typing.Any,
-]
+Emotion = str

cartesia/tts/types/emotion_deprecated.py ADDED Viewed

@@ -0,0 +1,34 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+EmotionDeprecated = typing.Union[
+    typing.Literal[
+        "anger:lowest",
+        "anger:low",
+        "anger",
+        "anger:high",
+        "anger:highest",
+        "positivity:lowest",
+        "positivity:low",
+        "positivity",
+        "positivity:high",
+        "positivity:highest",
+        "surprise:lowest",
+        "surprise:low",
+        "surprise",
+        "surprise:high",
+        "surprise:highest",
+        "sadness:lowest",
+        "sadness:low",
+        "sadness",
+        "sadness:high",
+        "sadness:highest",
+        "curiosity:lowest",
+        "curiosity:low",
+        "curiosity",
+        "curiosity:high",
+        "curiosity:highest",
+    ],
+    typing.Any,
+]

cartesia/tts/types/generation_config.py ADDED Viewed

@@ -0,0 +1,37 @@
+# This file was auto-generated by Fern from our API Definition.
+from ...core.pydantic_utilities import UniversalBaseModel
+import typing
+import pydantic
+from .emotion import Emotion
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
+class GenerationConfig(UniversalBaseModel):
+    """
+    Configure the various attributes of the generated speech. These controls only for `sonic-3` and have no effect on earlier models.
+    """
+    volume: typing.Optional[float] = pydantic.Field(default=None)
+    """
+    Adjust the volume of the generated speech between 0.5x and 2.0x the original volume (default is 1.0x). Valid values are between 0.5 and 2.0 inclusive.
+    """
+    speed: typing.Optional[float] = pydantic.Field(default=None)
+    """
+    Adjust the speed of the generated speech between 0.6x and 1.5x the original speed (default is 1.0x). Valid values are between 0.6 and 1.5 inclusive.
+    """
+    emotion: typing.Optional[Emotion] = pydantic.Field(default=None)
+    """
+    Guide the emotion of the generated speech.
+    """
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow

cartesia/tts/types/generation_request.py CHANGED Viewed

@@ -6,6 +6,7 @@ import typing
 from .tts_request_voice_specifier import TtsRequestVoiceSpecifier
 from .supported_language import SupportedLanguage
 from .web_socket_raw_output_format import WebSocketRawOutputFormat
+from .generation_config import GenerationConfig
 from .model_speed import ModelSpeed
 from .context_id import ContextId
 import typing_extensions
@@ -27,6 +28,7 @@ class GenerationRequest(UniversalBaseModel):
     voice: TtsRequestVoiceSpecifier
     language: typing.Optional[SupportedLanguage] = None
     output_format: WebSocketRawOutputFormat
+    generation_config: typing.Optional[GenerationConfig] = None
     duration: typing.Optional[float] = pydantic.Field(default=None)
     """
     The maximum duration of the audio in seconds. You do not usually need to specify this.

cartesia/tts/types/mp_3_output_format.py CHANGED Viewed

@@ -7,7 +7,11 @@ import typing
 class Mp3OutputFormat(UniversalBaseModel):
-    sample_rate: int
+    sample_rate: int = pydantic.Field()
+    """
+    The sample rate of the audio in Hz. Supported sample rates are 8000, 16000, 22050, 24000, 44100, 48000.
+    """
     bit_rate: int = pydantic.Field()
     """
     The bit rate of the audio in bits per second. Supported bit rates are 32000, 64000, 96000, 128000, 192000.

cartesia/tts/types/raw_output_format.py CHANGED Viewed

@@ -2,14 +2,18 @@
 from ...core.pydantic_utilities import UniversalBaseModel
 from .raw_encoding import RawEncoding
+import pydantic
 import typing
 from ...core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
 class RawOutputFormat(UniversalBaseModel):
     encoding: RawEncoding
-    sample_rate: int
+    sample_rate: int = pydantic.Field()
+    """
+    The sample rate of the audio in Hz. Supported sample rates are 8000, 16000, 22050, 24000, 44100, 48000.
+    """
     bit_rate: typing.Optional[int] = None
     if IS_PYDANTIC_V2:

cartesia/tts/types/sse_output_format.py CHANGED Viewed

@@ -3,14 +3,17 @@
 from ...core.pydantic_utilities import UniversalBaseModel
 import typing
 from .raw_encoding import RawEncoding
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 class SseOutputFormat(UniversalBaseModel):
     container: typing.Literal["raw"] = "raw"
     encoding: RawEncoding
-    sample_rate: int
+    sample_rate: int = pydantic.Field()
+    """
+    The sample rate of the audio in Hz. Supported sample rates are 8000, 16000, 22050, 24000, 44100, 48000.
+    """
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2

cartesia/tts/types/tts_request.py CHANGED Viewed

@@ -6,6 +6,7 @@ from .tts_request_voice_specifier import TtsRequestVoiceSpecifier
 import typing
 from .supported_language import SupportedLanguage
 from .output_format import OutputFormat
+from .generation_config import GenerationConfig
 from .model_speed import ModelSpeed
 from ...core.pydantic_utilities import IS_PYDANTIC_V2
@@ -20,6 +21,7 @@ class TtsRequest(UniversalBaseModel):
     voice: TtsRequestVoiceSpecifier
     language: typing.Optional[SupportedLanguage] = None
     output_format: OutputFormat
+    generation_config: typing.Optional[GenerationConfig] = None
     duration: typing.Optional[float] = pydantic.Field(default=None)
     """
     The maximum duration of the audio in seconds. You do not usually need to specify this.

cartesia/tts/types/ttssse_request.py CHANGED Viewed

@@ -6,6 +6,7 @@ from .tts_request_voice_specifier import TtsRequestVoiceSpecifier
 import typing
 from .supported_language import SupportedLanguage
 from .sse_output_format import SseOutputFormat
+from .generation_config import GenerationConfig
 from .model_speed import ModelSpeed
 from .context_id import ContextId
 from ...core.pydantic_utilities import IS_PYDANTIC_V2
@@ -21,6 +22,7 @@ class TtssseRequest(UniversalBaseModel):
     voice: TtsRequestVoiceSpecifier
     language: typing.Optional[SupportedLanguage] = None
     output_format: SseOutputFormat
+    generation_config: typing.Optional[GenerationConfig] = None
     duration: typing.Optional[float] = pydantic.Field(default=None)
     """
     The maximum duration of the audio in seconds. You do not usually need to specify this.

cartesia/tts/types/web_socket_raw_output_format.py CHANGED Viewed

@@ -3,14 +3,17 @@
 from ...core.pydantic_utilities import UniversalBaseModel
 import typing
 from .raw_encoding import RawEncoding
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 class WebSocketRawOutputFormat(UniversalBaseModel):
     container: typing.Literal["raw"] = "raw"
     encoding: RawEncoding
-    sample_rate: int
+    sample_rate: int = pydantic.Field()
+    """
+    The sample rate of the audio in Hz. Supported sample rates are 8000, 16000, 22050, 24000, 44100, 48000.
+    """
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2

cartesia/tts/types/web_socket_tts_request.py CHANGED Viewed

@@ -4,6 +4,7 @@ from ...core.pydantic_utilities import UniversalBaseModel
 import pydantic
 import typing
 from .output_format import OutputFormat
+from .generation_config import GenerationConfig
 from .tts_request_voice_specifier import TtsRequestVoiceSpecifier
 import typing_extensions
 from ...core.serialization import FieldMetadata
@@ -18,6 +19,7 @@ class WebSocketTtsRequest(UniversalBaseModel):
     """
     output_format: typing.Optional[OutputFormat] = None
+    generation_config: typing.Optional[GenerationConfig] = None
     transcript: typing.Optional[str] = None
     voice: TtsRequestVoiceSpecifier
     duration: typing.Optional[int] = None

cartesia/voice_changer/client.py CHANGED Viewed

@@ -47,6 +47,7 @@ class VoiceChangerClient:
         output_format_container : OutputFormatContainer
         output_format_sample_rate : int
+            The sample rate of the output audio in Hz. Supported sample rates are 8000, 16000, 22050, 24000, 44100, 48000.
         output_format_encoding : typing.Optional[RawEncoding]
             Required for `raw` and `wav` containers.
@@ -224,6 +225,7 @@ class AsyncVoiceChangerClient:
         output_format_container : OutputFormatContainer
         output_format_sample_rate : int
+            The sample rate of the output audio in Hz. Supported sample rates are 8000, 16000, 22050, 24000, 44100, 48000.
         output_format_encoding : typing.Optional[RawEncoding]
             Required for `raw` and `wav` containers.

{cartesia-2.0.9.dist-info → cartesia-2.0.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 2.0.9
+Version: 2.0.13
 Summary:
 Requires-Python: >=3.8,<4.0
 Classifier: Intended Audience :: Developers
@@ -53,26 +53,36 @@ Instantiate and use the client with the following:
 ```python
 from cartesia import Cartesia
-from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
 import os
 client = Cartesia(
-    api_key=os.getenv("CARTESIA_API_KEY"),
-)
-client.tts.bytes(
-    model_id="sonic-2",
-    transcript="Hello, world!",
-    voice={
-        "mode": "id",
-        "id": "694f9389-aac1-45b6-b726-9d9369183238",
-    },
-    language="en",
-    output_format={
-        "container": "raw",
-        "sample_rate": 44100,
-        "encoding": "pcm_f32le",
-    },
+    api_key=os.environ["CARTESIA_API_KEY"],
 )
+def main():
+    with open("sonic.wav", "wb") as f:
+        bytes_iter = client.tts.bytes(
+            model_id="sonic-3",
+            transcript="Hello, world!",
+            voice={
+                "mode": "id",
+                "id": "6ccbfb76-1fc6-48f7-b71d-91ac6298247b",
+            },
+            language="en",
+            output_format={
+                "container": "wav",
+                "sample_rate": 44100,
+                "encoding": "pcm_f32le",
+            },
+        )
+        for chunk in bytes_iter:
+            f.write(chunk)
+if __name__ == "__main__":
+    main()
 ```
 ## Async Client
@@ -81,31 +91,37 @@ The SDK also exports an `async` client so that you can make non-blocking calls t
 ```python
 import asyncio
-import os
 from cartesia import AsyncCartesia
-from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
+import os
 client = AsyncCartesia(
-    api_key=os.getenv("CARTESIA_API_KEY"),
+    api_key=os.environ["CARTESIA_API_KEY"],
 )
-async def main() -> None:
-    async for output in client.tts.bytes(
-        model_id="sonic-2",
-        transcript="Hello, world!",
-        voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
-        language="en",
-        output_format={
-            "container": "raw",
-            "sample_rate": 44100,
-            "encoding": "pcm_f32le",
-        },
-    ):
-        print(f"Received chunk of size: {len(output)}")
+async def main():
+    with open("sonic.wav", "wb") as f:
+        bytes_iter = client.tts.bytes(
+            model_id="sonic-3",
+            transcript="Hello, world!",
+            voice={
+                "mode": "id",
+                "id": "6ccbfb76-1fc6-48f7-b71d-91ac6298247b",
+            },
+            language="en",
+            output_format={
+                "container": "wav",
+                "sample_rate": 44100,
+                "encoding": "pcm_f32le",
+            },
+        )
+        async for chunk in bytes_iter:
+            f.write(chunk)
-asyncio.run(main())
+if __name__ == "__main__":
+    asyncio.run(main())
 ```
 ## Exception Handling
@@ -129,7 +145,6 @@ The SDK supports streaming responses as well, returning a generator that you can
 ```python
 from cartesia import Cartesia
-from cartesia.tts import Controls, OutputFormat_RawParams, TtsRequestIdSpecifierParams
 import os
 def get_tts_chunks():
@@ -137,14 +152,11 @@ def get_tts_chunks():
         api_key=os.getenv("CARTESIA_API_KEY"),
     )
     response = client.tts.sse(
-        model_id="sonic-2",
+        model_id="sonic-3",
         transcript="Hello world!",
         voice={
+            "mode": "id",
             "id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94",
-            "experimental_controls": {
-                "speed": "normal",
-                "emotion": [],
-            },
         },
         language="en",
         output_format={
@@ -188,9 +200,9 @@ ws = client.tts.websocket()
 # Generate and stream audio using the websocket
 for output in ws.send(
-    model_id="sonic-2", # see: https://docs.cartesia.ai/getting-started/available-models
+    model_id="sonic-3", # see: https://docs.cartesia.ai/build-with-cartesia/tts-models
     transcript=transcript,
-    voice={"id": voice_id},
+    voice={"mode": "id", "id": voice_id},
     stream=True,
     output_format={
         "container": "raw",
@@ -252,7 +264,7 @@ ws.send("done")
 for result in ws.receive():
     if result['type'] == 'transcript':
         print(f"Transcription: {result['text']}")
         # Handle word-level timestamps if available
         if 'words' in result and result['words']:
             print("Word-level timestamps:")
@@ -261,7 +273,7 @@ for result in ws.receive():
                 start = word_info['start']
                 end = word_info['end']
                 print(f"  '{word}': {start:.2f}s - {end:.2f}s")
         if result['is_final']:
             print("Final result received")
     elif result['type'] == 'done':
@@ -286,7 +298,7 @@ async def streaming_stt_example():
     and demonstrates the new endpointing and word timestamp features.
     """
     client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
     try:
         # Create websocket connection with voice activity detection
         ws = await client.stt.websocket(
@@ -297,24 +309,24 @@ async def streaming_stt_example():
             min_volume=0.15,                 # Volume threshold for voice activity detection
             max_silence_duration_secs=0.3,   # Maximum silence duration before endpointing
         )
         # Simulate streaming audio data (replace with your audio source)
         async def audio_stream():
             """Simulate real-time audio streaming - replace with actual audio capture"""
             # Load audio file for simulation
             with open("path/to/audio.wav", "rb") as f:
                 audio_data = f.read()
             # Stream in 100ms chunks (realistic for real-time processing)
             chunk_size = int(16000 * 0.1 * 2)  # 100ms at 16kHz, 16-bit
             for i in range(0, len(audio_data), chunk_size):
                 chunk = audio_data[i:i + chunk_size]
                 if chunk:
                     yield chunk
                     # Simulate real-time streaming delay
                     await asyncio.sleep(0.1)
         # Send audio and receive results concurrently
         async def send_audio():
             """Send audio chunks to the STT websocket"""
@@ -324,31 +336,31 @@ async def streaming_stt_example():
                     print(f"Sent audio chunk of {len(chunk)} bytes")
                     # Small delay to simulate realtime applications
                     await asyncio.sleep(0.02)
                 # Signal end of audio stream
                 await ws.send("finalize")
                 await ws.send("done")
                 print("Audio streaming completed")
             except Exception as e:
                 print(f"Error sending audio: {e}")
         async def receive_transcripts():
             """Receive and process transcription results with word timestamps"""
             full_transcript = ""
             all_word_timestamps = []
             try:
                 async for result in ws.receive():
                     if result['type'] == 'transcript':
                         text = result['text']
                         is_final = result['is_final']
                         # Handle word-level timestamps
                         if 'words' in result and result['words']:
                             word_timestamps = result['words']
                             all_word_timestamps.extend(word_timestamps)
                             if is_final:
                                 print("Word-level timestamps:")
                                 for word_info in word_timestamps:
@@ -356,7 +368,7 @@ async def streaming_stt_example():
                                     start = word_info['start']
                                     end = word_info['end']
                                     print(f"  '{word}': {start:.2f}s - {end:.2f}s")
                         if is_final:
                             # Final result - this text won't change
                             full_transcript += text + " "
@@ -364,30 +376,30 @@ async def streaming_stt_example():
                         else:
                             # Partial result - may change as more audio is processed
                             print(f"PARTIAL: {text}")
                     elif result['type'] == 'done':
                         print("Transcription completed")
                         break
             except Exception as e:
                 print(f"Error receiving transcripts: {e}")
             return full_transcript.strip(), all_word_timestamps
         print("Starting streaming STT...")
         # Use asyncio.gather to run audio sending and transcript receiving concurrently
         _, (final_transcript, word_timestamps) = await asyncio.gather(
             send_audio(),
             receive_transcripts()
         )
         print(f"\nComplete transcript: {final_transcript}")
         print(f"Total words with timestamps: {len(word_timestamps)}")
         # Clean up
         await ws.close()
     except Exception as e:
         print(f"STT streaming error: {e}")
     finally:
@@ -442,7 +454,7 @@ import os
 async def transcribe_file():
     client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
     with open("path/to/audio.wav", "rb") as audio_file:
         response = await client.stt.transcribe(
             file=audio_file,
@@ -450,14 +462,14 @@ async def transcribe_file():
             language="en",
             timestamp_granularities=["word"],
         )
     print(f"Transcribed text: {response.text}")
     # Process word timestamps
     if response.words:
         for word_info in response.words:
             print(f"'{word_info.word}': {word_info.start:.2f}s - {word_info.end:.2f}s")
     await client.close()
 asyncio.run(transcribe_file())
@@ -545,6 +557,7 @@ async def main():
             all_ends.extend(out.word_timestamps.end)       # End time for each word (seconds)
     await ws.close()
+    await client.close()
 asyncio.run(main())
 ```
@@ -663,6 +676,3 @@ $ git commit --amend -m "manually regenerate from docs" # optional
 From https://github.com/cartesia-ai/docs click `Actions` then `Release Python SDK`. (Requires permissions.)

{cartesia-2.0.9.dist-info → cartesia-2.0.13.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-cartesia/__init__.py,sha256=P8YXd1NsmEHQOF4p0MpPMGLOSy_0cIPHOnFe-iV94oU,10311
+cartesia/__init__.py,sha256=bANRu5PeAnbf6O7MXltmngXPJa_G-xo7mck3sZy9B_Y,10463
 cartesia/api_status/__init__.py,sha256=_dHNLdknrBjxHtU2PvLumttJM-JTQhJQqhhAQkLqt_U,168
 cartesia/api_status/client.py,sha256=GJ9Dq8iCn3hn8vCIqc6k1fCGEhSz0T0kaPGcdFnbMDY,3146
 cartesia/api_status/requests/__init__.py,sha256=ilEMzEy1JEw484CuL92bX5lHGOznc62pjiDMgiZ0tKM,130
@@ -19,7 +19,7 @@ cartesia/base_client.py,sha256=igAZOMDXz2Nv58oXHa7I9UfgxVN48drqhEmfsCCQlg8,6701
 cartesia/client.py,sha256=LoJjlJW2kJA-hyDt-Wu7QuKQsiTiLQfLYZjsjtewPJM,6537
 cartesia/core/__init__.py,sha256=-t9txgeQZL_1FDw_08GEoj4ft1Cn9Dti6X0Drsadlr0,1519
 cartesia/core/api_error.py,sha256=RE8LELok2QCjABadECTvtDp7qejA1VmINCh6TbqPwSE,426
-cartesia/core/client_wrapper.py,sha256=ZeQ4DT3ZKd07u6qrZFu4Z1p2uw71eeQOlfgXpmz6b0A,1854
+cartesia/core/client_wrapper.py,sha256=AaBv3QuyR9s29eT23DbkL6YWcr6LjLQJTT0POZnzuTU,1855
 cartesia/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
 cartesia/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
 cartesia/core/http_client.py,sha256=KL5RGa0y4n8nX0-07WRg4ZQUTq30sc-XJbWcP5vjBDg,19552
@@ -49,7 +49,7 @@ cartesia/embedding/types/__init__.py,sha256=aOrEOGuiO6dlSGu7pckqVMTYEMVAR5I7qqca
 cartesia/embedding/types/embedding.py,sha256=C1OJg8M4T1Apfcv4qx79ndftg0SgH4Lfqe_iU3UF-bA,1851
 cartesia/environment.py,sha256=Qnp91BGLic7hXmKsiYub2m3nPfvDWm59aB1wWta1J6A,160
 cartesia/infill/__init__.py,sha256=FTtvy8EDg9nNNg9WCatVgKTRYV8-_v1roeGPAKoa_pw,65
-cartesia/infill/client.py,sha256=uEDhE3Cx47ZyG7ofR-GOR0LhHiHeTLkUcjkLSsyU2ug,12563
+cartesia/infill/client.py,sha256=_S7DG_697mU9LanMuWePJthq1vFFt1DoIvmgyMXGzCY,12713
 cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cartesia/stt/__init__.py,sha256=UHT5OM-5phGwLCckL8BXGdC3QepJoboScW5eSXUE2S4,1763
 cartesia/stt/_async_websocket.py,sha256=6MVYvSz3d9sI5-zzT_aIPEFKxXeCQU00RYFpYSF0dio,12385
@@ -74,25 +74,26 @@ cartesia/stt/types/timestamp_granularity.py,sha256=Oe39JvLeMgR2BIJnx32abhvs05dJe
 cartesia/stt/types/transcript_message.py,sha256=J-MchlahI96nVBiMSLJrEOXFw2pBShbMXVocysQRnrY,1693
 cartesia/stt/types/transcription_response.py,sha256=QMcD6eLmp_Z2uaRLVyxYYIdoiRiVSGhBoxN3kjRTK2I,1190
 cartesia/stt/types/transcription_word.py,sha256=yxTndKXNmToPOM6_F_QfF-B0dE6Kx8-UwBpHLj2_zWk,803
-cartesia/tts/__init__.py,sha256=DwNzIilOcdNUbeIHIknngnW8WyZ6K5xZremSQQoo5VM,4927
+cartesia/tts/__init__.py,sha256=KmlvJPusv7yRI3OmkEn3GlxqITbfewLVO5S0SJkqV5M,5079
 cartesia/tts/_async_websocket.py,sha256=YG0NJpfQU4j48Gy2riWu1ItelPFX-IUvSFD6eMBvfGM,19454
 cartesia/tts/_websocket.py,sha256=K93vHOdxhF4-Duk8xunNnIpvkAT_ztfAtaomD5im8c0,19247
-cartesia/tts/client.py,sha256=Oot_ctyaqBgRMpyBUaMwh3z1M62oPKVMXNvMkmo1fRw,18180
-cartesia/tts/requests/__init__.py,sha256=SeITRF5QSAjOE5pNxbD6VffwwttMnQwuv0Z5n9h7BKs,3418
+cartesia/tts/client.py,sha256=mJXJG9JliWHw7UYCCd9evIW3gaSr3JYORW606E8lHzU,19607
+cartesia/tts/requests/__init__.py,sha256=zS0ny3c6HXr2l6D9TiBmMuyp-tbVuszhBiOQ5RdcQyw,3502
 cartesia/tts/requests/cancel_context_request.py,sha256=Wl8g-o5vwl9ENm-H1wsLx441FkIR_4Wt5UYtuWce2Yw,431
-cartesia/tts/requests/controls.py,sha256=xzUJlfgqhaJ1A-JD0LTpoHYk4iEpCuGpSD7qE4YYsRg,285
-cartesia/tts/requests/generation_request.py,sha256=JQPumk0UMCHDQrcUvuqeDsdc8LCJAEolSs10LpJzK00,3083
-cartesia/tts/requests/mp_3_output_format.py,sha256=PGDVzC1d7-Jce12rFxtF8G1pTHmlUdiGAhykFTABg0w,316
+cartesia/tts/requests/controls.py,sha256=TkywdstN4X9odGF_HfN25zYXcCxaJS8Q0H1HR0nv_rg,316
+cartesia/tts/requests/generation_config.py,sha256=ZFed-oneBwyxkkI1DHmmvtYso7FjTYM01ApS1omr1ms,917
+cartesia/tts/requests/generation_request.py,sha256=rZVpfwUzSea72b5gqPY47Fgunu_IJQM2PiVNHqCR9Jk,3214
+cartesia/tts/requests/mp_3_output_format.py,sha256=HBM6452KdWD9tGa9QXNyUZcH1OlJrXt_PIwo2Jt3l2Q,441
 cartesia/tts/requests/output_format.py,sha256=8TKu9AAeHCR5L4edzYch8FIYIldn4bM7ySrsCl8W_g8,842
 cartesia/tts/requests/phoneme_timestamps.py,sha256=ft81nmqElZAnvTBT27lY6YWfF18ZGsCx3Y1XHv9J7cM,267
-cartesia/tts/requests/raw_output_format.py,sha256=S60Vp7DeAATCMLF3bXgxhw0zILJBWJ9GhI9irAg_UkI,316
+cartesia/tts/requests/raw_output_format.py,sha256=WigDQlM_YkLk_-GK1_pNseGq8g-_POO84Su7jqSLsHQ,441
 cartesia/tts/requests/speed.py,sha256=-YGBWwh7_VtCBnYlT5EVsnrmcHFMEBTxy9LathZhkMA,259
-cartesia/tts/requests/sse_output_format.py,sha256=z_f7dlDYNvpheYOSnf3lOslHF40vS852pYkxHTpqAcc,293
-cartesia/tts/requests/tts_request.py,sha256=KBoahYfPbDENlEWsqnR4z1ZIhGIJwhLrzQIzkbtqtzE,1021
+cartesia/tts/requests/sse_output_format.py,sha256=dsRyxFCD3Qt3hTppxV7HJhphPx3jTkZhryMXUP-Soc8,417
+cartesia/tts/requests/tts_request.py,sha256=CUFMg_U2BhJQAxrqLAv4tfxAN326ItiCi0fQfJFi4lU,1152
 cartesia/tts/requests/tts_request_embedding_specifier.py,sha256=-M54ZjV0H5LPwcKtz0bOVqlkvO1pPiMbqMbVBMko3Ns,565
 cartesia/tts/requests/tts_request_id_specifier.py,sha256=-0ClfyJnnaH0uAcF5r84s3cM_cw2wT39dp6T4JYzOQ8,536
 cartesia/tts/requests/tts_request_voice_specifier.py,sha256=eGzL4aVGq4gKPxeglsV7-wuhxg8x33Qth3uFTTytgeI,337
-cartesia/tts/requests/ttssse_request.py,sha256=S8EkuEtveOetkcydinfLr5lS66PYpLQTNesyRIf_LwI,2007
+cartesia/tts/requests/ttssse_request.py,sha256=IZ4Urm23VQBhuJmA8CqZegZnTVIBqfZWQ9ve2vy2gXc,2138
 cartesia/tts/requests/wav_output_format.py,sha256=qiipmT5hWsa8J-fwW1EH_rnUAX_zOUpGJUNzuLc65r4,181
 cartesia/tts/requests/web_socket_base_response.py,sha256=zCjHw-FaNJMOcHiAb2NQWrBBfrzU5rc95vqDp7y9RmA,315
 cartesia/tts/requests/web_socket_chunk_response.py,sha256=4fVPJH-ZZb8lJKwqyYGx5wyeYWzfuThGxMRXC6ku4bA,233
@@ -100,38 +101,40 @@ cartesia/tts/requests/web_socket_done_response.py,sha256=YLHrT6NkmDntBSxF-JGlXSa
 cartesia/tts/requests/web_socket_error_response.py,sha256=ek2O5Whlzn5Ma40NhYviVl3aJBVeCA8BBvbJPUYxEiQ,213
 cartesia/tts/requests/web_socket_flush_done_response.py,sha256=gP3fSWhEFVzdzBweUmVKo7JvdREW3TM9R6o9-u6V6FQ,282
 cartesia/tts/requests/web_socket_phoneme_timestamps_response.py,sha256=nDRK7wo4s6R7ayJrw-LJX9WCaW4mti0HAV4X5j7cxjI,370
-cartesia/tts/requests/web_socket_raw_output_format.py,sha256=9BJHE5l5bzmYCYuUoACRhbZdJBijnSiwkbR8K4EzPDY,302
+cartesia/tts/requests/web_socket_raw_output_format.py,sha256=dcpXwOrCkB369pJ1AFOCwa5XgAFPUh9xEojrerH52bM,426
 cartesia/tts/requests/web_socket_request.py,sha256=5xfE0NgkBEZdus_vC-3RVQkuqhNmXHxLMX4DW3ezcKc,290
 cartesia/tts/requests/web_socket_response.py,sha256=kS46YN94ilUn4qjpt1TpauZApe0N8PpAefT87jFiusY,2079
 cartesia/tts/requests/web_socket_stream_options.py,sha256=VIvblFw9hGZvDzFpOnC11G0NvrFSVt-1-0sY5rpcZPI,232
 cartesia/tts/requests/web_socket_timestamps_response.py,sha256=MK3zN2Q_PVWJtX5DidNB0uXoF2o33rv6qCYPVaourxY,351
 cartesia/tts/requests/web_socket_tts_output.py,sha256=pX2uf0XVdziFhXCydwLlVOWb-LvBiuq-cBI6R1INiMg,760
-cartesia/tts/requests/web_socket_tts_request.py,sha256=1jdRjRAO7z-KLOyp8FcDoQh933RGt-ZPR3E8Vz3XPnQ,1795
+cartesia/tts/requests/web_socket_tts_request.py,sha256=9IqZKwM8YSDoDqYNPQ6DrcRGfuaAExD0KIPC0Ptaq1U,1926
 cartesia/tts/requests/word_timestamps.py,sha256=WMfBJtETi6wTpES0pYZCFfFRfEbzWE-RtosDJ5seUWg,261
 cartesia/tts/socket_client.py,sha256=zTPayHbgy-yQQ50AE1HXN4GMyanisZcLXf7Ds1paYks,11621
-cartesia/tts/types/__init__.py,sha256=rXphJ9b9nSYYrepr2ssG6ghtQAOQBQcLegxbl-XG3tw,3438
+cartesia/tts/types/__init__.py,sha256=VsVhynuJM_G3zHAzkAtB8M6eK_tq0Pa76FOAiulbRBc,3585
 cartesia/tts/types/cancel_context_request.py,sha256=zInhk3qRZsSc0F1aYJ-Q5BHJsosTrb22IJWhzue-eKE,856
 cartesia/tts/types/context_id.py,sha256=UCEtq5xFGOeBCECcY6Y-gYVe_Peg1hFhH9YYOkpApQg,81
-cartesia/tts/types/controls.py,sha256=H4CSu79mM1Ld4NZx_5uXw3EwRzTEMQRxKBRvFpcFb8Y,644
-cartesia/tts/types/emotion.py,sha256=zocyDcHTiFFnNRgo2YLMi70iGyffa080B4mkg9lcqVc,764
+cartesia/tts/types/controls.py,sha256=SxeSPZ4KgvRiUawOUI9mycASv6ekQ11vZYKOMtZz5TU,675
+cartesia/tts/types/emotion.py,sha256=N5E5Tf7L9tHcH-MB5fDPEFusotygu85ybEc-YeslVjc,79
+cartesia/tts/types/emotion_deprecated.py,sha256=WQuI5pXbzgpNq4kT14NMfukCJPN58GbmTtPScMMLy4I,774
 cartesia/tts/types/flush_id.py,sha256=HCIKo9o8d7YWKtaSNU3TEvfUVBju93ckGQy01Z9wLcE,79
-cartesia/tts/types/generation_request.py,sha256=ZGVXmHZLaZg7kEg1cVGXLpr8uB3btr2eZt0NEJRZnSU,3582
+cartesia/tts/types/generation_config.py,sha256=lIb52e8Ua777uvFnFTYn1NghxpzSTMC4QmDlV1cturU,1332
+cartesia/tts/types/generation_request.py,sha256=qO7XKzvwIp8Foglv5_1DJL1pCZLVyea0fQ0oKJw0fGw,3694
 cartesia/tts/types/model_speed.py,sha256=iiTj8V0piFCX2FZh5B8EkgRhZDlj4z3VFcQhp66e7y8,160
-cartesia/tts/types/mp_3_output_format.py,sha256=0WGblkuDUL7pZO1aRuQ_mU2Z5gN9xIabRfRKkjtzms8,731
+cartesia/tts/types/mp_3_output_format.py,sha256=LQ1-rEYjkK6XXWoj_Z7bezsguPpNI_SmprlIipsyNMI,875
 cartesia/tts/types/natural_specifier.py,sha256=K526P1RRuBGy80hyd_tX8tohPrE8DR9EgTCxS5wce0o,188
 cartesia/tts/types/numerical_specifier.py,sha256=tJpIskWO545luCKMFM9JlVc7VVhBhSvqL1qurhzL9cI,92
 cartesia/tts/types/output_format.py,sha256=bi9iZVQKmddTw6RjNKG9XAVrgEB7JVNsBS_emFLlGLs,1736
 cartesia/tts/types/phoneme_timestamps.py,sha256=SrhPmE7-1-bCVi4qCgMU7QR9ezkwUfqsWfZ2PchzwN0,637
 cartesia/tts/types/raw_encoding.py,sha256=eyc2goiYOTxWcuKHAgYZ2SrnfePW22Fbmc-5fGPlV2Y,186
-cartesia/tts/types/raw_output_format.py,sha256=jZGVaS0KIi9mU6trfskgA3HbMKJolhrwICnuDhF01ic,673
+cartesia/tts/types/raw_output_format.py,sha256=ir5QxW986P8qB14pMD5PVsAgc0bdC37i7O8JipS1svA,817
 cartesia/tts/types/speed.py,sha256=4c5WdxocBw6WSMnundSaNnceUeooU0vikhy00FW6M-w,239
-cartesia/tts/types/sse_output_format.py,sha256=tRb4VcYqoPJMDyjfTZMCRTblT2NjwIsQhy1oMjxQWW0,676
+cartesia/tts/types/sse_output_format.py,sha256=1_GB3rftQYAsXO6WrgQmzr-tsjCntHCVgKeTjay7M9g,819
 cartesia/tts/types/supported_language.py,sha256=riDRduThMbMWAq9i2uCfxhwVTpgaFwNDZ9LhEIl4zHY,237
-cartesia/tts/types/tts_request.py,sha256=FGcxW-siiQpEzJZSHMET3nDSYHSzRt3WSTO-cCEz9u4,1376
+cartesia/tts/types/tts_request.py,sha256=TkngMxyGKnjQvIG5u4qFx9TKcohuLyjI1UeXv8xbj2U,1488
 cartesia/tts/types/tts_request_embedding_specifier.py,sha256=eL_qCEr4pvWfy4qp9hZBuVdCincX5DBVqfv1vLt2_Vk,942
 cartesia/tts/types/tts_request_id_specifier.py,sha256=ktGdkkTRQ9scA-lt8qJ2jn_E5WzoOK8AXMrVqi71gf0,906
 cartesia/tts/types/tts_request_voice_specifier.py,sha256=p-3UQ62uFL1SgbX73Ex1D_V73Ef0wmT1ApOt1iLZmwE,307
-cartesia/tts/types/ttssse_request.py,sha256=6KvDQYzetwbFOVvkMWDj94Biz08EZaiX6V1lChsy49U,2423
+cartesia/tts/types/ttssse_request.py,sha256=QZa0LOwhtsxLFaTxCGA0EzMOYqp7tVu-ezmu-ibcmiA,2535
 cartesia/tts/types/wav_output_format.py,sha256=OTAgVn_gBMk252XO12kiNI9lKrbw3n38aBAiqlG5mdU,531
 cartesia/tts/types/web_socket_base_response.py,sha256=MWoTt1rGRqUQ8BOad1Zk2SA-i0E8a3JwPLSiehIbFj4,672
 cartesia/tts/types/web_socket_chunk_response.py,sha256=VOPXAlyGFdnfC69KxqDWDo1PPMydvQKmAypoWfbW8_s,593
@@ -139,20 +142,20 @@ cartesia/tts/types/web_socket_done_response.py,sha256=zZ6V-_pKNifdyuuRHGlZe6Zbc-
 cartesia/tts/types/web_socket_error_response.py,sha256=Jm26GnK0axyLQI3-JLHC0buYVIU8gKWxLAJlzo-cJFQ,573
 cartesia/tts/types/web_socket_flush_done_response.py,sha256=JLiVPDftr1arl_Kvj6038yj0mnjq6x0ooihsbdXajfw,635
 cartesia/tts/types/web_socket_phoneme_timestamps_response.py,sha256=R1-Z_W3XF7L7rrPwEOK_EfXHT4FWRpSAX3g71WebM90,686
-cartesia/tts/types/web_socket_raw_output_format.py,sha256=9PiOVmPDfT32IDIsmU7UY_rTLOShMMEw1pNv2yZ9Kyg,685
+cartesia/tts/types/web_socket_raw_output_format.py,sha256=O9ay_TwnMs4r_D_Cml6lBJ_2BwnHHo18boIXYI4wTr0,828
 cartesia/tts/types/web_socket_request.py,sha256=_xoAShkCCNTVAWKCvHw5k0Wisq60y4fOWYjG7SA8edM,260
 cartesia/tts/types/web_socket_response.py,sha256=fUQbJ6yFzZbzUZPuQWgkFdzP8-FMiKTcya-DIPWjimY,3777
 cartesia/tts/types/web_socket_stream_options.py,sha256=MhDSxBFqMuQeWjoyPqXVnTEzLjF8g6aojeigb5dQUgU,596
 cartesia/tts/types/web_socket_timestamps_response.py,sha256=kuWXI82ncF1QapnaHEjwrL84qWob7ByQU-yh1e0IEmk,667
 cartesia/tts/types/web_socket_tts_output.py,sha256=uvkv0smTBhdm18Rl17C0Ml4Inh79YBHNzAcKnZBs14Y,979
-cartesia/tts/types/web_socket_tts_request.py,sha256=Gx8kSINX__VhQ3In3R1-4fq0bfjaMe7iL-M8nDNt0fQ,2150
+cartesia/tts/types/web_socket_tts_request.py,sha256=mBVFoOdZDlxm2cQbmPTHgQjENfM4xhm_DywlTm5OtGI,2262
 cartesia/tts/types/word_timestamps.py,sha256=XZ2Q0prdb3F9c3AiOKXu4s3A3jBxE-qIt1npHOf16R0,631
 cartesia/tts/utils/constants.py,sha256=1CHa5flJf8--L_eYyOyOiWJNZ-Q81ufHZxDbJs8xYSk,418
 cartesia/tts/utils/tts.py,sha256=u7PgPxlJs6fcQTfr-jqAvBCAaK3JWLhF5QF4s-PwoMo,2093
 cartesia/tts/utils/types.py,sha256=DtsiRwrYypXScLu71gNyprUiELuR1l_-ikVaj47gpg4,2047
 cartesia/version.py,sha256=xk5z2FYkgnvzyjqzmRg67rYl8fnCeHEjPpVmD08bjyE,75
 cartesia/voice_changer/__init__.py,sha256=UKA8CSAwUb41OL-dcWWUhIsKLLsyY_NQtrklPAVWf9E,685
-cartesia/voice_changer/client.py,sha256=w3Z3A-92Fu5k9NRrfdn7Gu2nqmOONL-xLCHknZhkANY,13509
+cartesia/voice_changer/client.py,sha256=CjTuptyKNpviBB21fsobBqQSn08WuYCgC6gXAxNPCUI,13755
 cartesia/voice_changer/requests/__init__.py,sha256=MRwSUqio3mg_tvfcpAS0wIZ69HvJsc2kYJ0tUBaJ53U,390
 cartesia/voice_changer/requests/streaming_response.py,sha256=lbo7CJeuh0f5hXT4lKG_sDUZDLJWaLqxcwCuSf1IbMQ,982
 cartesia/voice_changer/types/__init__.py,sha256=qAiHsdRpnFeS0lBkYp_NRrhSJiRXCg5-uFibqDWzYVU,430
@@ -198,7 +201,7 @@ cartesia/voices/types/voice_expand_options.py,sha256=e4FroWdlxEE-LXQfT1RWlGHtswl
 cartesia/voices/types/voice_id.py,sha256=GDoXcRVeIm-V21R4suxG2zqLD3DLYkXE9kgizadzFKo,79
 cartesia/voices/types/voice_metadata.py,sha256=4KNGjXMUKm3niv-NvKIFVGtiilpH13heuzKcZYNQxk4,1181
 cartesia/voices/types/weight.py,sha256=XqDU7_JItNUb5QykIDqTbELlRYQdbt2SviRgW0w2LKo,80
-cartesia-2.0.9.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-cartesia-2.0.9.dist-info/METADATA,sha256=GG86uKWZW4iX4S3p-QOsgPVv7yGStCHj6qHq97e9V6Q,20804
-cartesia-2.0.9.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
-cartesia-2.0.9.dist-info/RECORD,,
+cartesia-2.0.13.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+cartesia-2.0.13.dist-info/METADATA,sha256=NB9AzRg9IZKIC_ePwsUwBlaKG3X6kFQkObYJ8McqjMQ,20671
+cartesia-2.0.13.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
+cartesia-2.0.13.dist-info/RECORD,,

{cartesia-2.0.9.dist-info → cartesia-2.0.13.dist-info}/LICENSE RENAMED Viewed

File without changes

{cartesia-2.0.9.dist-info → cartesia-2.0.13.dist-info}/WHEEL RENAMED Viewed

File without changes

cartesia 2.0.9__py3-none-any.whl → 2.0.13__py3-none-any.whl

Potentially problematic release.

cartesia 2.0.9py3-none-any.whl → 2.0.13py3-none-any.whl