PyPI - cartesia - Versions diffs - 1.1.0.dev0__py3-none-any.whl - Mend

cartesia 1.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

cartesia/__init__.py +4 -0
cartesia/_async_sse.py +95 -0
cartesia/_async_websocket.py +313 -0
cartesia/_constants.py +10 -0
cartesia/_logger.py +3 -0
cartesia/_sse.py +143 -0
cartesia/_types.py +103 -0
cartesia/_websocket.py +355 -0
cartesia/async_client.py +82 -0
cartesia/async_tts.py +63 -0
cartesia/client.py +69 -0
cartesia/resource.py +44 -0
cartesia/tts.py +146 -0
cartesia/utils/__init__.py +0 -0
cartesia/utils/deprecated.py +55 -0
cartesia/utils/retry.py +87 -0
cartesia/utils/tts.py +74 -0
cartesia/version.py +1 -0
cartesia/voices.py +170 -0
cartesia-1.1.0.dev0.dist-info/LICENSE.md +21 -0
cartesia-1.1.0.dev0.dist-info/METADATA +664 -0
cartesia-1.1.0.dev0.dist-info/RECORD +24 -0
cartesia-1.1.0.dev0.dist-info/WHEEL +5 -0
cartesia-1.1.0.dev0.dist-info/top_level.txt +1 -0

cartesia/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from cartesia.async_client import AsyncCartesia
+from cartesia.client import Cartesia
+__all__ = ["Cartesia", "AsyncCartesia"]

cartesia/_async_sse.py ADDED Viewed

@@ -0,0 +1,95 @@
+import base64
+import json
+from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Union
+import aiohttp
+from cartesia._constants import BACKOFF_FACTOR, MAX_RETRIES
+from cartesia._logger import logger
+from cartesia._sse import _SSE
+from cartesia._types import OutputFormat, VoiceControls
+from cartesia.utils.retry import retry_on_connection_error_async
+from cartesia.utils.tts import _construct_tts_request
+class _AsyncSSE(_SSE):
+    """This class contains methods to generate audio using Server-Sent Events asynchronously."""
+    def __init__(
+        self,
+        http_url: str,
+        headers: Dict[str, str],
+        timeout: float,
+        get_session: Callable[[], Optional[aiohttp.ClientSession]],
+    ):
+        super().__init__(http_url, headers, timeout)
+        self._get_session = get_session
+    async def send(
+        self,
+        model_id: str,
+        transcript: str,
+        output_format: OutputFormat,
+        voice_id: Optional[str] = None,
+        voice_embedding: Optional[List[float]] = None,
+        duration: Optional[int] = None,
+        language: Optional[str] = None,
+        stream: bool = True,
+        _experimental_voice_controls: Optional[VoiceControls] = None,
+    ) -> Union[bytes, AsyncGenerator[bytes, None]]:
+        request_body = _construct_tts_request(
+            model_id=model_id,
+            transcript=transcript,
+            output_format=output_format,
+            voice_id=voice_id,
+            voice_embedding=voice_embedding,
+            duration=duration,
+            language=language,
+            _experimental_voice_controls=_experimental_voice_controls,
+        )
+        generator = self._sse_generator_wrapper(request_body)
+        if stream:
+            return generator
+        chunks = []
+        async for chunk in generator:
+            chunks.append(chunk["audio"])
+        return {"audio": b"".join(chunks)}
+    @retry_on_connection_error_async(
+        max_retries=MAX_RETRIES, backoff_factor=BACKOFF_FACTOR, logger=logger
+    )
+    async def _sse_generator_wrapper(self, request_body: Dict[str, Any]):
+        """Need to wrap the sse generator in a function for the retry decorator to work."""
+        try:
+            async for chunk in self._sse_generator(request_body):
+                yield chunk
+        except Exception as e:
+            raise RuntimeError(f"Error generating audio. {e}")
+    async def _sse_generator(self, request_body: Dict[str, Any]):
+        session = await self._get_session()
+        async with session.post(
+            f"{self.http_url}/tts/sse",
+            data=json.dumps(request_body),
+            headers=self.headers,
+        ) as response:
+            if not response.ok:
+                raise ValueError(f"Failed to generate audio. {await response.text()}")
+            buffer = ""
+            async for chunk_bytes in response.content.iter_any():
+                buffer, outputs = self._update_buffer(buffer=buffer, chunk_bytes=chunk_bytes)
+                for output in outputs:
+                    yield output
+            if buffer:
+                try:
+                    chunk_json = json.loads(buffer)
+                    audio = base64.b64decode(chunk_json["data"])
+                    yield {"audio": audio}
+                except json.JSONDecodeError:
+                    pass

cartesia/_async_websocket.py ADDED Viewed

@@ -0,0 +1,313 @@
+import asyncio
+import uuid
+from collections import defaultdict
+from types import TracebackType
+from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Union
+import aiohttp
+from cartesia._constants import DEFAULT_MODEL_ID, DEFAULT_VOICE_EMBEDDING
+from cartesia._types import OutputFormat, VoiceControls
+from cartesia._websocket import _WebSocket
+from cartesia.tts import TTS
+from cartesia.utils.tts import _construct_tts_request
+class _AsyncTTSContext:
+    """Manage a single context over an AsyncWebSocket.
+    This class separates sending requests and receiving responses into two separate methods.
+    This can be used for sending multiple requests without awaiting the response.
+    Then you can listen to the responses in the order they were sent. See README for usage.
+    Each AsyncTTSContext will close automatically when a done message is received for that context.
+    This happens when the no_more_inputs method is called (equivalent to sending a request with `continue_ = False`),
+    or if no requests have been sent for 5 seconds on the same context. It also closes if there is an error.
+    """
+    def __init__(self, context_id: str, websocket: "_AsyncWebSocket", timeout: float):
+        self._context_id = context_id
+        self._websocket = websocket
+        self.timeout = timeout
+        self._error = None
+    @property
+    def context_id(self) -> str:
+        return self._context_id
+    async def send(
+        self,
+        model_id: str,
+        transcript: str,
+        output_format: OutputFormat,
+        voice_id: Optional[str] = None,
+        voice_embedding: Optional[List[float]] = None,
+        context_id: Optional[str] = None,
+        continue_: bool = False,
+        duration: Optional[int] = None,
+        language: Optional[str] = None,
+        add_timestamps: bool = False,
+        _experimental_voice_controls: Optional[VoiceControls] = None,
+    ) -> None:
+        """Send audio generation requests to the WebSocket. The response can be received using the `receive` method.
+        Args:
+            model_id: The ID of the model to use for generating audio.
+            transcript: The text to convert to speech.
+            output_format: A dictionary containing the details of the output format.
+            voice_id: The ID of the voice to use for generating audio.
+            voice_embedding: The embedding of the voice to use for generating audio.
+            context_id: The context ID to use for the request. If not specified, a random context ID will be generated.
+            continue_: Whether to continue the audio generation from the previous transcript or not.
+            duration: The duration of the audio in seconds.
+            language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`.
+            add_timestamps: Whether to return word-level timestamps.
+            _experimental_voice_controls: Experimental voice controls for controlling speed and emotion.
+                Note: This is an experimental feature and may change rapidly in future releases.
+        Returns:
+            None.
+        """
+        if context_id is not None and context_id != self._context_id:
+            raise ValueError("Context ID does not match the context ID of the current context.")
+        if continue_ and transcript == "":
+            raise ValueError("Transcript cannot be empty when continue_ is True.")
+        await self._websocket.connect()
+        request_body = _construct_tts_request(
+            model_id=model_id,
+            transcript=transcript,
+            output_format=output_format,
+            voice_id=voice_id,
+            voice_embedding=voice_embedding,
+            duration=duration,
+            language=language,
+            context_id=self._context_id,
+            add_timestamps=add_timestamps,
+            continue_=continue_,
+            _experimental_voice_controls=_experimental_voice_controls,
+        )
+        await self._websocket.websocket.send_json(request_body)
+        # Start listening for responses on the WebSocket
+        self._websocket._dispatch_listener()
+    async def no_more_inputs(self) -> None:
+        """Send a request to the WebSocket to indicate that no more requests will be sent."""
+        await self.send(
+            model_id=DEFAULT_MODEL_ID,
+            transcript="",
+            output_format=TTS.get_output_format("raw_pcm_f32le_44100"),
+            voice_embedding=DEFAULT_VOICE_EMBEDDING,  # Default voice embedding since it's a required input for now.
+            context_id=self._context_id,
+            continue_=False,
+        )
+    async def receive(self) -> AsyncGenerator[Dict[str, Any], None]:
+        """Receive the audio chunks from the WebSocket. This method is a generator that yields audio chunks.
+        Returns:
+            An async generator that yields audio chunks. Each chunk is a dictionary containing the audio as bytes.
+        """
+        try:
+            while True:
+                response = await self._websocket._get_message(
+                    self._context_id, timeout=self.timeout
+                )
+                if "error" in response:
+                    raise RuntimeError(f"Error generating audio:\n{response['error']}")
+                if response["done"]:
+                    break
+                yield self._websocket._convert_response(response, include_context_id=True)
+        except Exception as e:
+            if isinstance(e, asyncio.TimeoutError):
+                raise RuntimeError("Timeout while waiting for audio chunk")
+            raise RuntimeError(f"Failed to generate audio:\n{e}")
+        finally:
+            self._close()
+    def _close(self) -> None:
+        """Closes the context. Automatically called when a done message is received for this context."""
+        self._websocket._remove_context(self._context_id)
+    def is_closed(self):
+        """Check if the context is closed or not. Returns True if closed."""
+        return self._context_id not in self._websocket._context_queues
+    async def __aenter__(self):
+        return self
+    async def __aexit__(
+        self,
+        exc_type: Union[type, None],
+        exc: Union[BaseException, None],
+        exc_tb: Union[TracebackType, None],
+    ):
+        self._close()
+    def __del__(self):
+        self._close()
+class _AsyncWebSocket(_WebSocket):
+    """This class contains methods to generate audio using WebSocket asynchronously."""
+    def __init__(
+        self,
+        ws_url: str,
+        api_key: str,
+        cartesia_version: str,
+        timeout: float,
+        get_session: Callable[[], Optional[aiohttp.ClientSession]],
+    ):
+        """
+        Args:
+            ws_url: The WebSocket URL for the Cartesia API.
+            api_key: The API key to use for authorization.
+            cartesia_version: The version of the Cartesia API to use.
+            timeout: The timeout for responses on the WebSocket in seconds.
+            get_session: A function that returns an aiohttp.ClientSession object.
+        """
+        super().__init__(ws_url, api_key, cartesia_version)
+        self.timeout = timeout
+        self._get_session = get_session
+        self.websocket = None
+        self._context_queues: Dict[str, asyncio.Queue] = {}
+        self._processing_task: asyncio.Task = None
+    def __del__(self):
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            loop = None
+        if loop is None:
+            asyncio.run(self.close())
+        elif loop.is_running():
+            loop.create_task(self.close())
+    async def connect(self):
+        if self.websocket is None or self._is_websocket_closed():
+            route = "tts/websocket"
+            session = await self._get_session()
+            url = f"{self.ws_url}/{route}?api_key={self.api_key}&cartesia_version={self.cartesia_version}"
+            try:
+                self.websocket = await session.ws_connect(url)
+            except Exception as e:
+                raise RuntimeError(f"Failed to connect to WebSocket at {url}. {e}")
+    def _is_websocket_closed(self):
+        return self.websocket.closed
+    async def close(self):
+        """This method closes the websocket connection. *Highly* recommended to call this method when done."""
+        if self.websocket is not None and not self._is_websocket_closed():
+            await self.websocket.close()
+        if self._processing_task:
+            self._processing_task.cancel()
+            try:
+                self._processing_task = None
+            except asyncio.CancelledError:
+                pass
+            except TypeError as e:
+                # Ignore the error if the task is already cancelled
+                # For some reason we are getting None responses
+                # TODO: This needs to be fixed - we need to think about why we are getting None responses.
+                if "Received message 256:None" not in str(e):
+                    raise e
+        for context_id in list(self._context_queues.keys()):
+            self._remove_context(context_id)
+        self._context_queues.clear()
+        self._processing_task = None
+        self.websocket = None
+    async def send(
+        self,
+        model_id: str,
+        transcript: str,
+        output_format: OutputFormat,
+        voice_id: Optional[str] = None,
+        voice_embedding: Optional[List[float]] = None,
+        context_id: Optional[str] = None,
+        duration: Optional[int] = None,
+        language: Optional[str] = None,
+        stream: bool = True,
+        add_timestamps: bool = False,
+        _experimental_voice_controls: Optional[VoiceControls] = None,
+    ) -> Union[bytes, AsyncGenerator[bytes, None]]:
+        """See :meth:`_WebSocket.send` for details."""
+        if context_id is None:
+            context_id = str(uuid.uuid4())
+        ctx = self.context(context_id)
+        await ctx.send(
+            model_id=model_id,
+            transcript=transcript,
+            output_format=output_format,
+            voice_id=voice_id,
+            voice_embedding=voice_embedding,
+            context_id=context_id,
+            duration=duration,
+            language=language,
+            continue_=False,
+            add_timestamps=add_timestamps,
+            _experimental_voice_controls=_experimental_voice_controls,
+        )
+        generator = ctx.receive()
+        if stream:
+            return generator
+        chunks = []
+        word_timestamps = defaultdict(list)
+        async for chunk in generator:
+            if "audio" in chunk:
+                chunks.append(chunk["audio"])
+            if add_timestamps and "word_timestamps" in chunk:
+                for k, v in chunk["word_timestamps"].items():
+                    word_timestamps[k].extend(v)
+        out = {"audio": b"".join(chunks), "context_id": context_id}
+        if add_timestamps:
+            out["word_timestamps"] = word_timestamps
+        return out
+    async def _process_responses(self):
+        try:
+            while True:
+                response = await self.websocket.receive_json()
+                if response["context_id"]:
+                    context_id = response["context_id"]
+                if context_id in self._context_queues:
+                    await self._context_queues[context_id].put(response)
+        except Exception as e:
+            self._error = e
+            raise e
+    async def _get_message(self, context_id: str, timeout: float) -> Dict[str, Any]:
+        if context_id not in self._context_queues:
+            raise ValueError(f"Context ID {context_id} not found.")
+        return await asyncio.wait_for(self._context_queues[context_id].get(), timeout=timeout)
+    def _remove_context(self, context_id: str):
+        if context_id in self._context_queues:
+            del self._context_queues[context_id]
+    def _dispatch_listener(self):
+        if self._processing_task is None or self._processing_task.done():
+            self._processing_task = asyncio.create_task(self._process_responses())
+    def context(self, context_id: Optional[str] = None) -> _AsyncTTSContext:
+        if context_id in self._context_queues:
+            raise ValueError(f"AsyncContext for context ID {context_id} already exists.")
+        if context_id is None:
+            context_id = str(uuid.uuid4())
+        if context_id not in self._context_queues:
+            self._context_queues[context_id] = asyncio.Queue()
+        return _AsyncTTSContext(context_id, self, self.timeout)

cartesia/_constants.py ADDED Viewed

@@ -0,0 +1,10 @@
+DEFAULT_MODEL_ID = "sonic-english"  # latest default model
+MULTILINGUAL_MODEL_ID = "sonic-multilingual"  # latest multilingual model
+DEFAULT_BASE_URL = "api.cartesia.ai"
+DEFAULT_CARTESIA_VERSION = "2024-06-10"  # latest version
+DEFAULT_TIMEOUT = 30  # seconds
+DEFAULT_NUM_CONNECTIONS = 10  # connections per client
+DEFAULT_VOICE_EMBEDDING = [1.0] * 192
+BACKOFF_FACTOR = 1
+MAX_RETRIES = 3

cartesia/_logger.py ADDED Viewed

@@ -0,0 +1,3 @@
+import logging
+logger = logging.getLogger(__name__)

cartesia/_sse.py ADDED Viewed

@@ -0,0 +1,143 @@
+import base64
+import json
+from typing import Any, Dict, Generator, List, Optional, Tuple, Union
+import requests
+from cartesia._constants import BACKOFF_FACTOR, MAX_RETRIES
+from cartesia._logger import logger
+from cartesia._types import OutputFormat, VoiceControls
+from cartesia.utils.retry import retry_on_connection_error
+from cartesia.utils.tts import _construct_tts_request, _validate_and_construct_voice
+class _SSE:
+    """This class contains methods to generate audio using Server-Sent Events.
+    Usage:
+        >>> for audio_chunk in client.tts.sse(
+        ...     model_id="sonic-english", transcript="Hello world!", voice_embedding=embedding,
+        ...     output_format={"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100}, stream=True
+        ... ):
+        ...     audio = audio_chunk["audio"]
+    """
+    def __init__(
+        self,
+        http_url: str,
+        headers: Dict[str, str],
+        timeout: float,
+    ):
+        self.http_url = http_url
+        self.headers = headers
+        self.timeout = timeout
+    def _update_buffer(self, buffer: str, chunk_bytes: bytes) -> Tuple[str, List[Dict[str, Any]]]:
+        buffer += chunk_bytes.decode("utf-8")
+        outputs = []
+        while "{" in buffer and "}" in buffer:
+            start_index = buffer.find("{")
+            end_index = buffer.find("}", start_index)
+            if start_index != -1 and end_index != -1:
+                try:
+                    chunk_json = json.loads(buffer[start_index : end_index + 1])
+                    if "error" in chunk_json:
+                        raise RuntimeError(f"Error generating audio:\n{chunk_json['error']}")
+                    if chunk_json["done"]:
+                        break
+                    audio = base64.b64decode(chunk_json["data"])
+                    outputs.append({"audio": audio})
+                    buffer = buffer[end_index + 1 :]
+                except json.JSONDecodeError:
+                    break
+        return buffer, outputs
+    def send(
+        self,
+        model_id: str,
+        transcript: str,
+        output_format: OutputFormat,
+        voice_id: Optional[str] = None,
+        voice_embedding: Optional[List[float]] = None,
+        duration: Optional[int] = None,
+        language: Optional[str] = None,
+        stream: bool = True,
+        _experimental_voice_controls: Optional[VoiceControls] = None,
+    ) -> Union[bytes, Generator[bytes, None, None]]:
+        """Send a request to the server to generate audio using Server-Sent Events.
+        Args:
+            model_id: The ID of the model to use for generating audio.
+            transcript: The text to convert to speech.
+            voice_id: The ID of the voice to use for generating audio.
+            voice_embedding: The embedding of the voice to use for generating audio.
+            output_format: A dictionary containing the details of the output format.
+            duration: The duration of the audio in seconds.
+            language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`
+            stream: Whether to stream the audio or not.
+            _experimental_voice_controls: Experimental voice controls for controlling speed and emotion.
+                Note: This is an experimental feature and may change rapidly in future releases.
+        Returns:
+            If `stream` is True, the method returns a generator that yields chunks. Each chunk is a dictionary.
+            If `stream` is False, the method returns a dictionary.
+            Both the generator and the dictionary contain the following key(s):
+            - audio: The audio as bytes.
+        """
+        request_body = _construct_tts_request(
+            model_id=model_id,
+            transcript=transcript,
+            output_format=output_format,
+            voice_id=voice_id,
+            voice_embedding=voice_embedding,
+            duration=duration,
+            language=language,
+            _experimental_voice_controls=_experimental_voice_controls,
+        )
+        generator = self._sse_generator_wrapper(request_body)
+        if stream:
+            return generator
+        chunks = []
+        for chunk in generator:
+            chunks.append(chunk["audio"])
+        return {"audio": b"".join(chunks)}
+    @retry_on_connection_error(
+        max_retries=MAX_RETRIES, backoff_factor=BACKOFF_FACTOR, logger=logger
+    )
+    def _sse_generator_wrapper(self, request_body: Dict[str, Any]):
+        """Need to wrap the sse generator in a function for the retry decorator to work."""
+        try:
+            for chunk in self._sse_generator(request_body):
+                yield chunk
+        except Exception as e:
+            raise RuntimeError(f"Error generating audio. {e}")
+    def _sse_generator(self, request_body: Dict[str, Any]):
+        response = requests.post(
+            f"{self.http_url}/tts/sse",
+            stream=True,
+            data=json.dumps(request_body),
+            headers=self.headers,
+            timeout=(self.timeout, self.timeout),
+        )
+        if not response.ok:
+            raise ValueError(f"Failed to generate audio. {response.text}")
+        buffer = ""
+        for chunk_bytes in response.iter_content(chunk_size=None):
+            buffer, outputs = self._update_buffer(buffer=buffer, chunk_bytes=chunk_bytes)
+            for output in outputs:
+                yield output
+        if buffer:
+            try:
+                chunk_json = json.loads(buffer)
+                audio = base64.b64decode(chunk_json["data"])
+                yield {"audio": audio}
+            except json.JSONDecodeError:
+                pass

cartesia/_types.py ADDED Viewed

@@ -0,0 +1,103 @@
+from typing import List, Optional, TypedDict, Union
+from cartesia.utils.deprecated import deprecated
+class OutputFormatMapping:
+    _format_mapping = {
+        "raw_pcm_f32le_44100": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
+        "raw_pcm_s16le_44100": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
+        "raw_pcm_f32le_24000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 24000},
+        "raw_pcm_s16le_24000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 24000},
+        "raw_pcm_f32le_22050": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 22050},
+        "raw_pcm_s16le_22050": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 22050},
+        "raw_pcm_f32le_16000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 16000},
+        "raw_pcm_s16le_16000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 16000},
+        "raw_pcm_f32le_8000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 8000},
+        "raw_pcm_s16le_8000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 8000},
+        "raw_pcm_mulaw_8000": {"container": "raw", "encoding": "pcm_mulaw", "sample_rate": 8000},
+        "raw_pcm_alaw_8000": {"container": "raw", "encoding": "pcm_alaw", "sample_rate": 8000},
+    }
+    @classmethod
+    def get_format(cls, format_name):
+        if format_name in cls._format_mapping:
+            return cls._format_mapping[format_name]
+        else:
+            raise ValueError(f"Unsupported format: {format_name}")
+class DeprecatedOutputFormatMapping:
+    """Deprecated formats as of v1.0.1. These will be removed in v1.2.0. Use :class:`OutputFormatMapping` instead."""
+    _format_mapping = {
+        "fp32": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
+        "pcm": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
+        "fp32_8000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 8000},
+        "fp32_16000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 16000},
+        "fp32_22050": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 22050},
+        "fp32_24000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 24000},
+        "fp32_44100": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
+        "pcm_8000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 8000},
+        "pcm_16000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 16000},
+        "pcm_22050": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 22050},
+        "pcm_24000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 24000},
+        "pcm_44100": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
+        "mulaw_8000": {"container": "raw", "encoding": "pcm_mulaw", "sample_rate": 8000},
+        "alaw_8000": {"container": "raw", "encoding": "pcm_alaw", "sample_rate": 8000},
+    }
+    @classmethod
+    @deprecated(
+        vdeprecated="1.0.1",
+        vremove="1.2.0",
+        reason="Old output format names are being deprecated in favor of names aligned with the Cartesia API. Use names from `OutputFormatMapping` instead.",
+    )
+    def get_format_deprecated(cls, format_name):
+        if format_name in cls._format_mapping:
+            return cls._format_mapping[format_name]
+        else:
+            raise ValueError(f"Unsupported format: {format_name}")
+class VoiceMetadata(TypedDict):
+    id: str
+    name: str
+    description: str
+    embedding: List[float]
+    is_public: bool
+    user_id: str
+    created_at: str
+    language: str
+    base_voice_id: Optional[str] = None
+class VoiceControls(TypedDict):
+    """Defines different voice control parameters for voice synthesis.
+    For a complete list of supported parameters, refer to the Cartesia API documentation.
+    https://docs.cartesia.ai/reference/api-reference
+    Examples:
+        >>> {"speed": "fastest"}
+        >>> {"speed": "slow", "emotion": ["sadness:high"]}
+        >>> {"emotion": ["surprise:highest", "curiosity"]}
+    Note:
+        This is an experimental class and is subject to rapid change in future versions.
+    """
+    speed: Union[str, float] = ""
+    emotion: List[str] = []
+class OutputFormat(TypedDict):
+    container: str
+    encoding: str
+    sample_rate: int
+class EventType:
+    NULL = ""
+    AUDIO = "chunk"
+    TIMESTAMPS = "timestamps"