PyPI - cartesia - Versions diffs - 2.0.0b1__py3-none-any.whl → 2.0.0b7__py3-none-any.whl - Mend

cartesia 2.0.0b1py3-none-any.whl → 2.0.0b7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

cartesia/__init__.py +8 -4
cartesia/base_client.py +0 -4
cartesia/core/__init__.py +3 -0
cartesia/core/client_wrapper.py +2 -2
cartesia/core/pagination.py +88 -0
cartesia/infill/client.py +4 -4
cartesia/tts/_async_websocket.py +48 -1
cartesia/tts/_websocket.py +44 -3
cartesia/tts/client.py +4 -4
cartesia/tts/requests/generation_request.py +5 -0
cartesia/tts/requests/web_socket_chunk_response.py +3 -0
cartesia/tts/requests/web_socket_response.py +2 -1
cartesia/tts/requests/web_socket_tts_request.py +1 -0
cartesia/tts/types/emotion.py +5 -0
cartesia/tts/types/generation_request.py +5 -0
cartesia/tts/types/web_socket_chunk_response.py +3 -1
cartesia/tts/types/web_socket_response.py +2 -1
cartesia/tts/types/web_socket_tts_output.py +2 -0
cartesia/tts/types/web_socket_tts_request.py +1 -0
cartesia/tts/utils/constants.py +2 -2
cartesia/voice_changer/requests/streaming_response.py +2 -0
cartesia/voice_changer/types/streaming_response.py +2 -0
cartesia/voices/__init__.py +8 -4
cartesia/voices/client.py +285 -169
cartesia/voices/requests/__init__.py +2 -0
cartesia/voices/requests/create_voice_request.py +0 -2
cartesia/voices/requests/get_voices_response.py +24 -0
cartesia/voices/requests/localize_dialect.py +1 -3
cartesia/voices/requests/voice.py +13 -9
cartesia/voices/types/__init__.py +6 -4
cartesia/voices/types/create_voice_request.py +0 -2
cartesia/voices/types/gender_presentation.py +5 -0
cartesia/voices/types/get_voices_response.py +34 -0
cartesia/voices/types/localize_dialect.py +1 -3
cartesia/voices/types/voice.py +13 -9
cartesia/voices/types/voice_expand_options.py +5 -0
{cartesia-2.0.0b1.dist-info → cartesia-2.0.0b7.dist-info}/METADATA +151 -49
{cartesia-2.0.0b1.dist-info → cartesia-2.0.0b7.dist-info}/RECORD +39 -37
cartesia/datasets/client.py +0 -392
cartesia/voices/types/localize_portuguese_dialect.py +0 -5
cartesia/voices/types/localize_spanish_dialect.py +0 -5
{cartesia-2.0.0b1.dist-info → cartesia-2.0.0b7.dist-info}/WHEEL +0 -0

cartesia/__init__.py CHANGED Viewed

@@ -121,13 +121,14 @@ from .voices import (
     EmbeddingSpecifier,
     EmbeddingSpecifierParams,
     Gender,
+    GenderPresentation,
+    GetVoicesResponse,
+    GetVoicesResponseParams,
     IdSpecifier,
     IdSpecifierParams,
     LocalizeDialect,
     LocalizeDialectParams,
     LocalizeEnglishDialect,
-    LocalizePortugueseDialect,
-    LocalizeSpanishDialect,
     LocalizeTargetLanguage,
     LocalizeVoiceRequest,
     LocalizeVoiceRequestParams,
@@ -138,6 +139,7 @@ from .voices import (
     UpdateVoiceRequest,
     UpdateVoiceRequestParams,
     Voice,
+    VoiceExpandOptions,
     VoiceId,
     VoiceMetadata,
     VoiceMetadataParams,
@@ -175,15 +177,16 @@ __all__ = [
     "FilePurpose",
     "FlushId",
     "Gender",
+    "GenderPresentation",
     "GenerationRequest",
     "GenerationRequestParams",
+    "GetVoicesResponse",
+    "GetVoicesResponseParams",
     "IdSpecifier",
     "IdSpecifierParams",
     "LocalizeDialect",
     "LocalizeDialectParams",
     "LocalizeEnglishDialect",
-    "LocalizePortugueseDialect",
-    "LocalizeSpanishDialect",
     "LocalizeTargetLanguage",
     "LocalizeVoiceRequest",
     "LocalizeVoiceRequestParams",
@@ -235,6 +238,7 @@ __all__ = [
     "UpdateVoiceRequest",
     "UpdateVoiceRequestParams",
     "Voice",
+    "VoiceExpandOptions",
     "VoiceId",
     "VoiceMetadata",
     "VoiceMetadataParams",

cartesia/base_client.py CHANGED Viewed

@@ -5,14 +5,12 @@ from .environment import CartesiaEnvironment
 import httpx
 from .core.client_wrapper import SyncClientWrapper
 from .api_status.client import ApiStatusClient
-from .datasets.client import DatasetsClient
 from .infill.client import InfillClient
 from .tts.client import TtsClient
 from .voice_changer.client import VoiceChangerClient
 from .voices.client import VoicesClient
 from .core.client_wrapper import AsyncClientWrapper
 from .api_status.client import AsyncApiStatusClient
-from .datasets.client import AsyncDatasetsClient
 from .infill.client import AsyncInfillClient
 from .tts.client import AsyncTtsClient
 from .voice_changer.client import AsyncVoiceChangerClient
@@ -78,7 +76,6 @@ class BaseCartesia:
             timeout=_defaulted_timeout,
         )
         self.api_status = ApiStatusClient(client_wrapper=self._client_wrapper)
-        self.datasets = DatasetsClient(client_wrapper=self._client_wrapper)
         self.infill = InfillClient(client_wrapper=self._client_wrapper)
         self.tts = TtsClient(client_wrapper=self._client_wrapper)
         self.voice_changer = VoiceChangerClient(client_wrapper=self._client_wrapper)
@@ -144,7 +141,6 @@ class AsyncBaseCartesia:
             timeout=_defaulted_timeout,
         )
         self.api_status = AsyncApiStatusClient(client_wrapper=self._client_wrapper)
-        self.datasets = AsyncDatasetsClient(client_wrapper=self._client_wrapper)
         self.infill = AsyncInfillClient(client_wrapper=self._client_wrapper)
         self.tts = AsyncTtsClient(client_wrapper=self._client_wrapper)
         self.voice_changer = AsyncVoiceChangerClient(client_wrapper=self._client_wrapper)

cartesia/core/__init__.py CHANGED Viewed

@@ -6,6 +6,7 @@ from .datetime_utils import serialize_datetime
 from .file import File, convert_file_dict_to_httpx_tuples, with_content_type
 from .http_client import AsyncHttpClient, HttpClient
 from .jsonable_encoder import jsonable_encoder
+from .pagination import AsyncPager, SyncPager
 from .pydantic_utilities import (
     IS_PYDANTIC_V2,
     UniversalBaseModel,
@@ -24,6 +25,7 @@ __all__ = [
     "ApiError",
     "AsyncClientWrapper",
     "AsyncHttpClient",
+    "AsyncPager",
     "BaseClientWrapper",
     "FieldMetadata",
     "File",
@@ -31,6 +33,7 @@ __all__ = [
     "IS_PYDANTIC_V2",
     "RequestOptions",
     "SyncClientWrapper",
+    "SyncPager",
     "UniversalBaseModel",
     "UniversalRootModel",
     "convert_and_respect_annotation_metadata",

cartesia/core/client_wrapper.py CHANGED Viewed

@@ -16,10 +16,10 @@ class BaseClientWrapper:
         headers: typing.Dict[str, str] = {
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "cartesia",
-            "X-Fern-SDK-Version": "2.0.0b1",
+            "X-Fern-SDK-Version": "2.0.0b7",
         }
         headers["X-API-Key"] = self.api_key
-        headers["Cartesia-Version"] = "2024-06-10"
+        headers["Cartesia-Version"] = "2024-11-13"
         return headers
     def get_base_url(self) -> str:

cartesia/core/pagination.py ADDED Viewed

@@ -0,0 +1,88 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+from typing_extensions import Self
+import pydantic
+# Generic to represent the underlying type of the results within a page
+T = typing.TypeVar("T")
+# SDKs implement a Page ABC per-pagination request, the endpoint then returns a pager that wraps this type
+# for example, an endpoint will return SyncPager[UserPage] where UserPage implements the Page ABC. ex:
+#
+# SyncPager<InnerListType>(
+#     has_next=response.list_metadata.after is not None,
+#     items=response.data,
+#     # This should be the outer function that returns the SyncPager again
+#     get_next=lambda: list(..., cursor: response.cursor) (or list(..., offset: offset + 1))
+# )
+class BasePage(pydantic.BaseModel, typing.Generic[T]):
+    has_next: bool
+    items: typing.Optional[typing.List[T]]
+class SyncPage(BasePage[T], typing.Generic[T]):
+    get_next: typing.Optional[typing.Callable[[], typing.Optional[Self]]]
+class AsyncPage(BasePage[T], typing.Generic[T]):
+    get_next: typing.Optional[typing.Callable[[], typing.Awaitable[typing.Optional[Self]]]]
+# ----------------------------
+class SyncPager(SyncPage[T], typing.Generic[T]):
+    # Here we type ignore the iterator to avoid a mypy error
+    # caused by the type conflict with Pydanitc's __iter__ method
+    # brought in by extending the base model
+    def __iter__(self) -> typing.Iterator[T]:  # type: ignore
+        for page in self.iter_pages():
+            if page.items is not None:
+                for item in page.items:
+                    yield item
+    def iter_pages(self) -> typing.Iterator[SyncPage[T]]:
+        page: typing.Union[SyncPager[T], None] = self
+        while True:
+            if page is not None:
+                yield page
+                if page.has_next and page.get_next is not None:
+                    page = page.get_next()
+                    if page is None or page.items is None or len(page.items) == 0:
+                        return
+                else:
+                    return
+            else:
+                return
+    def next_page(self) -> typing.Optional[SyncPage[T]]:
+        return self.get_next() if self.get_next is not None else None
+class AsyncPager(AsyncPage[T], typing.Generic[T]):
+    async def __aiter__(self) -> typing.AsyncIterator[T]:  # type: ignore
+        async for page in self.iter_pages():
+            if page.items is not None:
+                for item in page.items:
+                    yield item
+    async def iter_pages(self) -> typing.AsyncIterator[AsyncPage[T]]:
+        page: typing.Union[AsyncPager[T], None] = self
+        while True:
+            if page is not None:
+                yield page
+                if page is not None and page.has_next and page.get_next is not None:
+                    page = await page.get_next()
+                    if page is None or page.items is None or len(page.items) == 0:
+                        return
+                else:
+                    return
+            else:
+                return
+    async def next_page(self) -> typing.Optional[AsyncPage[T]]:
+        return await self.get_next() if self.get_next is not None else None

cartesia/infill/client.py CHANGED Viewed

@@ -42,7 +42,7 @@ class InfillClient:
         **The cost is 1 credit per character of the infill text plus a fixed cost of 300 credits.**
-        Only the `sonic-preview` model is supported for infill at this time.
+        Infilling is only available on `sonic-2` at this time.
         At least one of `left_audio` or `right_audio` must be provided.
@@ -117,7 +117,7 @@ class InfillClient:
             api_key="YOUR_API_KEY",
         )
         client.infill.bytes(
-            model_id="sonic-preview",
+            model_id="sonic-2",
             language="en",
             transcript="middle segment",
             voice_id="694f9389-aac1-45b6-b726-9d9369183238",
@@ -189,7 +189,7 @@ class AsyncInfillClient:
         **The cost is 1 credit per character of the infill text plus a fixed cost of 300 credits.**
-        Only the `sonic-preview` model is supported for infill at this time.
+        Infilling is only available on `sonic-2` at this time.
         At least one of `left_audio` or `right_audio` must be provided.
@@ -269,7 +269,7 @@ class AsyncInfillClient:
         async def main() -> None:
             await client.infill.bytes(
-                model_id="sonic-preview",
+                model_id="sonic-2",
                 language="en",
                 transcript="middle segment",
                 voice_id="694f9389-aac1-45b6-b726-9d9369183238",

cartesia/tts/_async_websocket.py CHANGED Viewed

@@ -17,6 +17,7 @@ from cartesia.tts.types import (
     WebSocketResponse_FlushDone,
     WebSocketTtsOutput,
     WordTimestamps,
+    PhonemeTimestamps,
 )
 from ..core.pydantic_utilities import parse_obj_as
@@ -67,6 +68,7 @@ class _AsyncTTSContext:
         language: Optional[str] = None,
         stream: bool = True,
         add_timestamps: bool = False,
+        add_phoneme_timestamps: bool = False,
         continue_: bool = False,
         flush: bool = False,
     ) -> None:
@@ -102,6 +104,8 @@ class _AsyncTTSContext:
             request_body["stream"] = stream
         if add_timestamps:
             request_body["add_timestamps"] = add_timestamps
+        if add_phoneme_timestamps:
+            request_body["add_phoneme_timestamps"] = add_phoneme_timestamps
         if continue_:
             request_body["continue"] = continue_
         if flush:
@@ -229,6 +233,11 @@ class _AsyncTTSContext:
         finally:
             self._close()
+    async def cancel(self):
+        """Cancel the context. This will stop the generation of audio for this context."""
+        await self._websocket.websocket.send_json({"context_id": self._context_id, "cancel": True})
+        self._close()
     def _close(self) -> None:
         """Closes the context. Automatically called when a done message is received for this context."""
         self._websocket._remove_context(self._context_id)
@@ -297,7 +306,26 @@ class AsyncTtsWebsocket(TtsWebsocket):
             try:
                 self.websocket = await session.ws_connect(url)
             except Exception as e:
-                raise RuntimeError(f"Failed to connect to WebSocket at {url}. {e}")
+                # Extract status code if available
+                status_code = None
+                error_message = str(e)
+                if hasattr(e, 'status') and e.status is not None:
+                    status_code = e.status
+                    # Create a meaningful error message based on status code
+                    if status_code == 402:
+                        error_message = "Payment required. Your API key may have insufficient credits or permissions."
+                    elif status_code == 401:
+                        error_message = "Unauthorized. Please check your API key."
+                    elif status_code == 403:
+                        error_message = "Forbidden. You don't have permission to access this resource."
+                    elif status_code == 404:
+                        error_message = "Not found. The requested resource doesn't exist."
+                    raise RuntimeError(f"Failed to connect to WebSocket.\nStatus: {status_code}. Error message: {error_message}")
+                else:
+                    raise RuntimeError(f"Failed to connect to WebSocket at {url}. {e}")
     def _is_websocket_closed(self):
         return self.websocket.closed
@@ -338,6 +366,7 @@ class AsyncTtsWebsocket(TtsWebsocket):
         language: Optional[str] = None,
         stream: bool = True,
         add_timestamps: bool = False,
+        add_phoneme_timestamps: bool = False,
     ):
         """See :meth:`_WebSocket.send` for details."""
         if context_id is None:
@@ -355,6 +384,7 @@ class AsyncTtsWebsocket(TtsWebsocket):
             language=language,
             continue_=False,
             add_timestamps=add_timestamps,
+            add_phoneme_timestamps=add_phoneme_timestamps,
         )
         generator = ctx.receive()
@@ -366,6 +396,9 @@ class AsyncTtsWebsocket(TtsWebsocket):
         words: typing.List[str] = []
         start: typing.List[float] = []
         end: typing.List[float] = []
+        phonemes: typing.List[str] = []
+        phoneme_start: typing.List[float] = []
+        phoneme_end: typing.List[float] = []
         async for chunk in generator:
             if chunk.audio is not None:
                 chunks.append(chunk.audio)
@@ -374,6 +407,11 @@ class AsyncTtsWebsocket(TtsWebsocket):
                     words.extend(chunk.word_timestamps.words)
                     start.extend(chunk.word_timestamps.start)
                     end.extend(chunk.word_timestamps.end)
+            if add_phoneme_timestamps and chunk.phoneme_timestamps is not None:
+                if chunk.phoneme_timestamps is not None:
+                    phonemes.extend(chunk.phoneme_timestamps.phonemes)
+                    phoneme_start.extend(chunk.phoneme_timestamps.start)
+                    phoneme_end.extend(chunk.phoneme_timestamps.end)
         return WebSocketTtsOutput(
             audio=b"".join(chunks),  # type: ignore
@@ -387,6 +425,15 @@ class AsyncTtsWebsocket(TtsWebsocket):
                 if add_timestamps
                 else None
             ),
+            phoneme_timestamps=(
+                PhonemeTimestamps(
+                    phonemes=phonemes,
+                    start=phoneme_start,
+                    end=phoneme_end,
+                )
+                if add_phoneme_timestamps
+                else None
+            ),
         )
     async def _process_responses(self):

cartesia/tts/_websocket.py CHANGED Viewed

@@ -26,6 +26,7 @@ from cartesia.tts.types import (
     WebSocketResponse_Timestamps,
     WebSocketTtsOutput,
     WordTimestamps,
+    PhonemeTimestamps,
 )
 from ..core.pydantic_utilities import parse_obj_as
@@ -58,7 +59,7 @@ class _TTSContext:
         self,
         *,
         model_id: str,
-        transcript: str,
+        transcript: typing.Generator[str, None, None],
         output_format: OutputFormatParams,
         voice: TtsRequestVoiceSpecifierParams,
         context_id: Optional[str] = None,
@@ -235,7 +236,7 @@ class TtsWebsocket:
     Usage:
         >>> ws = client.tts.websocket()
         >>> generation_request = GenerationRequest(
-        ...     model_id="sonic-english",
+        ...     model_id="sonic-2",
         ...     transcript="Hello world!",
         ...     voice_embedding=embedding
         ...     output_format={"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100}
@@ -281,7 +282,26 @@ class TtsWebsocket:
                     f"{self.ws_url}/{route}?api_key={self.api_key}&cartesia_version={self.cartesia_version}"
                 )
             except Exception as e:
-                raise RuntimeError(f"Failed to connect to WebSocket. {e}")
+                # Extract status code if available
+                status_code = None
+                error_message = str(e)
+                if hasattr(e, 'status') and e.status is not None:
+                    status_code = e.status
+                    # Create a meaningful error message based on status code
+                    if status_code == 402:
+                        error_message = "Payment required. Your API key may have insufficient credits or permissions."
+                    elif status_code == 401:
+                        error_message = "Unauthorized. Please check your API key."
+                    elif status_code == 403:
+                        error_message = "Forbidden. You don't have permission to access this resource."
+                    elif status_code == 404:
+                        error_message = "Not found. The requested resource doesn't exist."
+                    raise RuntimeError(f"Failed to connect to WebSocket.\nStatus: {status_code}. Error message: {error_message}")
+                else:
+                    raise RuntimeError(f"Failed to connect to WebSocket. {e}")
     def _is_websocket_closed(self):
         return self.websocket.socket.fileno() == -1
@@ -310,6 +330,8 @@ class TtsWebsocket:
             out["audio"] = base64.b64decode(response.data)
         elif isinstance(response, WebSocketResponse_Timestamps):
             out["word_timestamps"] = response.word_timestamps  # type: ignore
+        elif isinstance(response, WebSocketResponse_PhonemeTimestamps):
+            out["phoneme_timestamps"] = response.phoneme_timestamps  # type: ignore
         elif include_flush_id and isinstance(response, WebSocketResponse_FlushDone):
             out["flush_done"] = response.flush_done  # type: ignore
             out["flush_id"] = response.flush_id  # type: ignore
@@ -331,6 +353,7 @@ class TtsWebsocket:
         language: Optional[str] = None,
         stream: bool = True,
         add_timestamps: bool = False,
+        add_phoneme_timestamps: bool = False,
     ):
         """Send a request to the WebSocket to generate audio.
@@ -360,6 +383,7 @@ class TtsWebsocket:
             "language": language,
             "stream": stream,
             "add_timestamps": add_timestamps,
+            "add_phoneme_timestamps": add_phoneme_timestamps,
         }
         generator = self._websocket_generator(request_body)
@@ -370,6 +394,9 @@ class TtsWebsocket:
         words: typing.List[str] = []
         start: typing.List[float] = []
         end: typing.List[float] = []
+        phonemes: typing.List[str] = []
+        phoneme_start: typing.List[float] = []
+        phoneme_end: typing.List[float] = []
         for chunk in generator:
             if chunk.audio is not None:
                 chunks.append(chunk.audio)
@@ -378,6 +405,11 @@ class TtsWebsocket:
                     words.extend(chunk.word_timestamps.words)
                     start.extend(chunk.word_timestamps.start)
                     end.extend(chunk.word_timestamps.end)
+            if add_phoneme_timestamps and chunk.phoneme_timestamps is not None:
+                if chunk.phoneme_timestamps is not None:
+                    phonemes.extend(chunk.phoneme_timestamps.phonemes)
+                    phoneme_start.extend(chunk.phoneme_timestamps.start)
+                    phoneme_end.extend(chunk.phoneme_timestamps.end)
         return WebSocketTtsOutput(
             audio=b"".join(chunks),  # type: ignore
@@ -391,6 +423,15 @@ class TtsWebsocket:
                 if add_timestamps
                 else None
             ),
+            phoneme_timestamps=(
+                PhonemeTimestamps(
+                    phonemes=phonemes,
+                    start=phoneme_start,
+                    end=phoneme_end,
+                )
+                if add_phoneme_timestamps
+                else None
+            ),
         )
     def _websocket_generator(self, request_body: Dict[str, Any]):

cartesia/tts/client.py CHANGED Viewed

@@ -67,7 +67,7 @@ class TtsClient:
             api_key="YOUR_API_KEY",
         )
         client.tts.bytes(
-            model_id="sonic",
+            model_id="sonic-2",
             transcript="Hello, world!",
             voice={"mode": "id", "id": "694f9389-aac1-45b6-b726-9d9369183238"},
             language="en",
@@ -152,7 +152,7 @@ class TtsClient:
             api_key="YOUR_API_KEY",
         )
         response = client.tts.sse(
-            model_id="sonic",
+            model_id="sonic-2",
             transcript="Hello, world!",
             voice={"mode": "id", "id": "694f9389-aac1-45b6-b726-9d9369183238"},
             language="en",
@@ -258,7 +258,7 @@ class AsyncTtsClient:
         async def main() -> None:
             await client.tts.bytes(
-                model_id="sonic",
+                model_id="sonic-2",
                 transcript="Hello, world!",
                 voice={"mode": "id", "id": "694f9389-aac1-45b6-b726-9d9369183238"},
                 language="en",
@@ -351,7 +351,7 @@ class AsyncTtsClient:
         async def main() -> None:
             response = await client.tts.sse(
-                model_id="sonic",
+                model_id="sonic-2",
                 transcript="Hello, world!",
                 voice={"mode": "id", "id": "694f9389-aac1-45b6-b726-9d9369183238"},
                 language="en",

cartesia/tts/requests/generation_request.py CHANGED Viewed

@@ -51,3 +51,8 @@ class GenerationRequestParams(typing_extensions.TypedDict):
     """
     Whether to return phoneme-level timestamps.
     """
+    use_original_timestamps: typing_extensions.NotRequired[bool]
+    """
+    Whether to use the original transcript for timestamps.
+    """

cartesia/tts/requests/web_socket_chunk_response.py CHANGED Viewed

@@ -1,8 +1,11 @@
 # This file was auto-generated by Fern from our API Definition.
 from .web_socket_base_response import WebSocketBaseResponseParams
+import typing_extensions
+from ..types.flush_id import FlushId
 class WebSocketChunkResponseParams(WebSocketBaseResponseParams):
     data: str
     step_time: float
+    flush_id: typing_extensions.NotRequired[FlushId]

cartesia/tts/requests/web_socket_response.py CHANGED Viewed

@@ -4,8 +4,8 @@ from __future__ import annotations
 import typing_extensions
 import typing
 import typing_extensions
-from ..types.context_id import ContextId
 from ..types.flush_id import FlushId
+from ..types.context_id import ContextId
 from .word_timestamps import WordTimestampsParams
 from .phoneme_timestamps import PhonemeTimestampsParams
@@ -14,6 +14,7 @@ class WebSocketResponse_ChunkParams(typing_extensions.TypedDict):
     type: typing.Literal["chunk"]
     data: str
     step_time: float
+    flush_id: typing_extensions.NotRequired[FlushId]
     context_id: typing_extensions.NotRequired[ContextId]
     status_code: int
     done: bool

cartesia/tts/requests/web_socket_tts_request.py CHANGED Viewed

@@ -19,6 +19,7 @@ class WebSocketTtsRequestParams(typing_extensions.TypedDict):
     duration: typing_extensions.NotRequired[int]
     language: typing_extensions.NotRequired[str]
     add_timestamps: typing_extensions.NotRequired[bool]
+    use_original_timestamps: typing_extensions.NotRequired[bool]
     add_phoneme_timestamps: typing_extensions.NotRequired[bool]
     continue_: typing_extensions.NotRequired[typing_extensions.Annotated[bool, FieldMetadata(alias="continue")]]
     context_id: typing_extensions.NotRequired[str]

cartesia/tts/types/emotion.py CHANGED Viewed

@@ -6,22 +6,27 @@ Emotion = typing.Union[
     typing.Literal[
         "anger:lowest",
         "anger:low",
+        "anger",
         "anger:high",
         "anger:highest",
         "positivity:lowest",
         "positivity:low",
+        "positivity",
         "positivity:high",
         "positivity:highest",
         "surprise:lowest",
         "surprise:low",
+        "surprise",
         "surprise:high",
         "surprise:highest",
         "sadness:lowest",
         "sadness:low",
+        "sadness",
         "sadness:high",
         "sadness:highest",
         "curiosity:lowest",
         "curiosity:low",
+        "curiosity",
         "curiosity:high",
         "curiosity:highest",
     ],

cartesia/tts/types/generation_request.py CHANGED Viewed

@@ -56,6 +56,11 @@ class GenerationRequest(UniversalBaseModel):
     Whether to return phoneme-level timestamps.
     """
+    use_original_timestamps: typing.Optional[bool] = pydantic.Field(default=None)
+    """
+    Whether to use the original transcript for timestamps.
+    """
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
     else:

cartesia/tts/types/web_socket_chunk_response.py CHANGED Viewed

@@ -1,14 +1,16 @@
 # This file was auto-generated by Fern from our API Definition.
 from .web_socket_base_response import WebSocketBaseResponse
-from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
+from .flush_id import FlushId
+from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 class WebSocketChunkResponse(WebSocketBaseResponse):
     data: str
     step_time: float
+    flush_id: typing.Optional[FlushId] = None
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2

cartesia/tts/types/web_socket_response.py CHANGED Viewed

@@ -3,10 +3,10 @@
 from __future__ import annotations
 from ...core.pydantic_utilities import UniversalBaseModel
 import typing
+from .flush_id import FlushId
 from .context_id import ContextId
 from ...core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
-from .flush_id import FlushId
 from .word_timestamps import WordTimestamps
 from .phoneme_timestamps import PhonemeTimestamps
@@ -15,6 +15,7 @@ class WebSocketResponse_Chunk(UniversalBaseModel):
     type: typing.Literal["chunk"] = "chunk"
     data: str
     step_time: float
+    flush_id: typing.Optional[FlushId] = None
     context_id: typing.Optional[ContextId] = None
     status_code: int
     done: bool

cartesia/tts/types/web_socket_tts_output.py CHANGED Viewed

@@ -7,11 +7,13 @@ import pydantic
 from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
 from .context_id import ContextId
 from .flush_id import FlushId
+from .phoneme_timestamps import PhonemeTimestamps
 from .word_timestamps import WordTimestamps
 class WebSocketTtsOutput(UniversalBaseModel):
     word_timestamps: typing.Optional[WordTimestamps] = None
+    phoneme_timestamps: typing.Optional[PhonemeTimestamps] = None
     audio: typing.Optional[bytes] = None
     context_id: typing.Optional[ContextId] = None
     flush_id: typing.Optional[FlushId] = None

cartesia/tts/types/web_socket_tts_request.py CHANGED Viewed

@@ -22,6 +22,7 @@ class WebSocketTtsRequest(UniversalBaseModel):
     duration: typing.Optional[int] = None
     language: typing.Optional[str] = None
     add_timestamps: typing.Optional[bool] = None
+    use_original_timestamps: typing.Optional[bool] = None
     add_phoneme_timestamps: typing.Optional[bool] = None
     continue_: typing_extensions.Annotated[typing.Optional[bool], FieldMetadata(alias="continue")] = None
     context_id: typing.Optional[str] = None

cartesia 2.0.0b1__py3-none-any.whl → 2.0.0b7__py3-none-any.whl

cartesia 2.0.0b1py3-none-any.whl → 2.0.0b7py3-none-any.whl