PyPI - cartesia - Versions diffs - 2.0.0b7__tar.gz → 2.0.2__tar.gz - Mend

cartesia 2.0.0b7tar.gz → 2.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (177) hide show

{cartesia-2.0.0b7 → cartesia-2.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 2.0.0b7
+Version: 2.0.2
 Summary:
 Requires-Python: >=3.8,<4.0
 Classifier: Intended Audience :: Developers
@@ -47,53 +47,6 @@ Our complete API documentation can be found [on docs.cartesia.ai](https://docs.c
 pip install cartesia
 ```
-## Reference
-A full reference for this library is available [here](./reference.md).
-## Voices
-```python
-from cartesia import Cartesia
-import os
-client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
-# Get all available voices
-voices = client.voices.list()
-print(voices)
-# Get a specific voice
-voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
-print("The embedding for", voice.name, "is", voice.embedding)
-# Clone a voice using file data
-cloned_voice = client.voices.clone(
-    clip=open("path/to/voice.wav", "rb"),
-    name="Test cloned voice",
-    language="en",
-    mode="similarity",  # or "stability"
-    enhance=False, # use enhance=True to clean and denoise the cloning audio
-    description="Test voice description"
-)
-# Mix voices together
-mixed_voice = client.voices.mix(
-    voices=[
-        {"id": "voice_id_1", "weight": 0.25},
-        {"id": "voice_id_2", "weight": 0.75}
-    ]
-)
-# Create a new voice from embedding
-new_voice = client.voices.create(
-    name="Test Voice",
-    description="Test voice description",
-    embedding=[...],  # List[float] with 192 dimensions
-    language="en"
-)
-```
 ## Usage
 Instantiate and use the client with the following:
@@ -112,10 +65,6 @@ client.tts.bytes(
     voice={
         "mode": "id",
         "id": "694f9389-aac1-45b6-b726-9d9369183238",
-        "experimental_controls": {
-            "speed": 0.5,  # range between [-1.0, 1.0], or "slow", "fastest", etc.
-            "emotion": ["positivity", "curiosity:low"] # list of emotions with optional intensity
-        }
     },
     language="en",
     output_format={
@@ -176,7 +125,7 @@ except ApiError as e:
 ## Streaming
-The SDK supports streaming responses, as well, the response will be a generator that you can loop over.
+The SDK supports streaming responses as well, returning a generator that you can iterate over with a `for ... in ...` loop:
 ```python
 from cartesia import Cartesia
@@ -215,7 +164,9 @@ for chunk in chunks:
     print(f"Received chunk of size: {len(chunk.data)}")
 ```
-## WebSocket
+## WebSockets
+For the lowest latency in advanced usecases (such as streaming in an LLM-generated transcript and streaming out audio), you should use our websockets client:
 ```python
 from cartesia import Cartesia
@@ -223,15 +174,10 @@ from cartesia.tts import TtsRequestEmbeddingSpecifierParams, OutputFormat_RawPar
 import pyaudio
 import os
-client = Cartesia(
-    api_key=os.getenv("CARTESIA_API_KEY"),
-)
+client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
 voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
 transcript = "Hello! Welcome to Cartesia"
-# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
-model_id = "sonic-2"
 p = pyaudio.PyAudio()
 rate = 22050
@@ -242,14 +188,14 @@ ws = client.tts.websocket()
 # Generate and stream audio using the websocket
 for output in ws.send(
-    model_id=model_id,
+    model_id="sonic-2", # see: https://docs.cartesia.ai/getting-started/available-models
     transcript=transcript,
     voice={"id": voice_id},
     stream=True,
     output_format={
         "container": "raw",
         "encoding": "pcm_f32le",
-        "sample_rate": 22050
+        "sample_rate": rate
     },
 ):
     buffer = output.audio
@@ -267,6 +213,40 @@ p.terminate()
 ws.close()  # Close the websocket connection
 ```
+## Voices
+List all available Voices with `client.voices.list`, which returns an iterable that automatically handles pagination:
+```python
+from cartesia import Cartesia
+import os
+client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
+# Get all available Voices
+voices = client.voices.list()
+for voice in voices:
+    print(voice)
+```
+You can also get the complete metadata for a specific Voice, or make a new Voice by cloning from an audio sample:
+```python
+# Get a specific Voice
+voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
+print("The embedding for", voice.name, "is", voice.embedding)
+# Clone a Voice using file data
+cloned_voice = client.voices.clone(
+    clip=open("path/to/voice.wav", "rb"),
+    name="Test cloned voice",
+    language="en",
+    mode="similarity",  # or "stability"
+    enhance=False, # use enhance=True to clean and denoise the cloning audio
+    description="Test voice description"
+)
+```
 ## Requesting Timestamps
 ```python
@@ -290,7 +270,8 @@ async def main():
             "encoding": "pcm_f32le",
             "sample_rate": 44100
         },
-        add_timestamps=True,  # Enable word-level timestamps
+        add_timestamps=True,            # Enable word-level timestamps
+        add_phoneme_timestamps=True,    # Enable phonemized timestamps
         stream=True
     )
@@ -358,6 +339,26 @@ client.tts.bytes(..., request_options={
 })
 ```
+### Mixing voices and creating from embeddings
+```python
+# Mix voices together
+mixed_voice = client.voices.mix(
+    voices=[
+        {"id": "voice_id_1", "weight": 0.25},
+        {"id": "voice_id_2", "weight": 0.75}
+    ]
+)
+# Create a new voice from embedding
+new_voice = client.voices.create(
+    name="Test Voice",
+    description="Test voice description",
+    embedding=[...],  # List[float] with 192 dimensions
+    language="en"
+)
+```
 ### Custom Client
 You can override the `httpx` client to customize it for your use-case. Some common use-cases include support for proxies
@@ -375,6 +376,10 @@ client = Cartesia(
 )
 ```
+## Reference
+A full reference for this library is available [here](./reference.md).
 ## Contributing
 Note that most of this library is generated programmatically from

{cartesia-2.0.0b7 → cartesia-2.0.2}/README.md RENAMED Viewed

@@ -15,53 +15,6 @@ Our complete API documentation can be found [on docs.cartesia.ai](https://docs.c
 pip install cartesia
 ```
-## Reference
-A full reference for this library is available [here](./reference.md).
-## Voices
-```python
-from cartesia import Cartesia
-import os
-client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
-# Get all available voices
-voices = client.voices.list()
-print(voices)
-# Get a specific voice
-voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
-print("The embedding for", voice.name, "is", voice.embedding)
-# Clone a voice using file data
-cloned_voice = client.voices.clone(
-    clip=open("path/to/voice.wav", "rb"),
-    name="Test cloned voice",
-    language="en",
-    mode="similarity",  # or "stability"
-    enhance=False, # use enhance=True to clean and denoise the cloning audio
-    description="Test voice description"
-)
-# Mix voices together
-mixed_voice = client.voices.mix(
-    voices=[
-        {"id": "voice_id_1", "weight": 0.25},
-        {"id": "voice_id_2", "weight": 0.75}
-    ]
-)
-# Create a new voice from embedding
-new_voice = client.voices.create(
-    name="Test Voice",
-    description="Test voice description",
-    embedding=[...],  # List[float] with 192 dimensions
-    language="en"
-)
-```
 ## Usage
 Instantiate and use the client with the following:
@@ -80,10 +33,6 @@ client.tts.bytes(
     voice={
         "mode": "id",
         "id": "694f9389-aac1-45b6-b726-9d9369183238",
-        "experimental_controls": {
-            "speed": 0.5,  # range between [-1.0, 1.0], or "slow", "fastest", etc.
-            "emotion": ["positivity", "curiosity:low"] # list of emotions with optional intensity
-        }
     },
     language="en",
     output_format={
@@ -144,7 +93,7 @@ except ApiError as e:
 ## Streaming
-The SDK supports streaming responses, as well, the response will be a generator that you can loop over.
+The SDK supports streaming responses as well, returning a generator that you can iterate over with a `for ... in ...` loop:
 ```python
 from cartesia import Cartesia
@@ -183,7 +132,9 @@ for chunk in chunks:
     print(f"Received chunk of size: {len(chunk.data)}")
 ```
-## WebSocket
+## WebSockets
+For the lowest latency in advanced usecases (such as streaming in an LLM-generated transcript and streaming out audio), you should use our websockets client:
 ```python
 from cartesia import Cartesia
@@ -191,15 +142,10 @@ from cartesia.tts import TtsRequestEmbeddingSpecifierParams, OutputFormat_RawPar
 import pyaudio
 import os
-client = Cartesia(
-    api_key=os.getenv("CARTESIA_API_KEY"),
-)
+client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
 voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
 transcript = "Hello! Welcome to Cartesia"
-# You can check out our models at https://docs.cartesia.ai/getting-started/available-models
-model_id = "sonic-2"
 p = pyaudio.PyAudio()
 rate = 22050
@@ -210,14 +156,14 @@ ws = client.tts.websocket()
 # Generate and stream audio using the websocket
 for output in ws.send(
-    model_id=model_id,
+    model_id="sonic-2", # see: https://docs.cartesia.ai/getting-started/available-models
     transcript=transcript,
     voice={"id": voice_id},
     stream=True,
     output_format={
         "container": "raw",
         "encoding": "pcm_f32le",
-        "sample_rate": 22050
+        "sample_rate": rate
     },
 ):
     buffer = output.audio
@@ -235,6 +181,40 @@ p.terminate()
 ws.close()  # Close the websocket connection
 ```
+## Voices
+List all available Voices with `client.voices.list`, which returns an iterable that automatically handles pagination:
+```python
+from cartesia import Cartesia
+import os
+client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
+# Get all available Voices
+voices = client.voices.list()
+for voice in voices:
+    print(voice)
+```
+You can also get the complete metadata for a specific Voice, or make a new Voice by cloning from an audio sample:
+```python
+# Get a specific Voice
+voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
+print("The embedding for", voice.name, "is", voice.embedding)
+# Clone a Voice using file data
+cloned_voice = client.voices.clone(
+    clip=open("path/to/voice.wav", "rb"),
+    name="Test cloned voice",
+    language="en",
+    mode="similarity",  # or "stability"
+    enhance=False, # use enhance=True to clean and denoise the cloning audio
+    description="Test voice description"
+)
+```
 ## Requesting Timestamps
 ```python
@@ -258,7 +238,8 @@ async def main():
             "encoding": "pcm_f32le",
             "sample_rate": 44100
         },
-        add_timestamps=True,  # Enable word-level timestamps
+        add_timestamps=True,            # Enable word-level timestamps
+        add_phoneme_timestamps=True,    # Enable phonemized timestamps
         stream=True
     )
@@ -326,6 +307,26 @@ client.tts.bytes(..., request_options={
 })
 ```
+### Mixing voices and creating from embeddings
+```python
+# Mix voices together
+mixed_voice = client.voices.mix(
+    voices=[
+        {"id": "voice_id_1", "weight": 0.25},
+        {"id": "voice_id_2", "weight": 0.75}
+    ]
+)
+# Create a new voice from embedding
+new_voice = client.voices.create(
+    name="Test Voice",
+    description="Test voice description",
+    embedding=[...],  # List[float] with 192 dimensions
+    language="en"
+)
+```
 ### Custom Client
 You can override the `httpx` client to customize it for your use-case. Some common use-cases include support for proxies
@@ -343,6 +344,10 @@ client = Cartesia(
 )
 ```
+## Reference
+A full reference for this library is available [here](./reference.md).
 ## Contributing
 Note that most of this library is generated programmatically from

{cartesia-2.0.0b7 → cartesia-2.0.2}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "cartesia"
 [tool.poetry]
 name = "cartesia"
-version = "2.0.0b7"
+version = "2.0.2"
 description = ""
 readme = "README.md"
 authors = []

{cartesia-2.0.0b7 → cartesia-2.0.2}/src/cartesia/__init__.py RENAMED Viewed

@@ -1,7 +1,8 @@
 # This file was auto-generated by Fern from our API Definition.
-from . import api_status, datasets, embedding, infill, tts, voice_changer, voices
+from . import api_status, auth, datasets, embedding, infill, tts, voice_changer, voices
 from .api_status import ApiInfo, ApiInfoParams
+from .auth import TokenGrant, TokenGrantParams, TokenRequest, TokenRequestParams, TokenResponse, TokenResponseParams
 from .client import AsyncCartesia, Cartesia
 from .datasets import (
     CreateDatasetRequest,
@@ -129,6 +130,9 @@ from .voices import (
     LocalizeDialect,
     LocalizeDialectParams,
     LocalizeEnglishDialect,
+    LocalizeFrenchDialect,
+    LocalizePortugueseDialect,
+    LocalizeSpanishDialect,
     LocalizeTargetLanguage,
     LocalizeVoiceRequest,
     LocalizeVoiceRequestParams,
@@ -187,6 +191,9 @@ __all__ = [
     "LocalizeDialect",
     "LocalizeDialectParams",
     "LocalizeEnglishDialect",
+    "LocalizeFrenchDialect",
+    "LocalizePortugueseDialect",
+    "LocalizeSpanishDialect",
     "LocalizeTargetLanguage",
     "LocalizeVoiceRequest",
     "LocalizeVoiceRequestParams",
@@ -227,6 +234,12 @@ __all__ = [
     "StreamingResponse_Error",
     "StreamingResponse_ErrorParams",
     "SupportedLanguage",
+    "TokenGrant",
+    "TokenGrantParams",
+    "TokenRequest",
+    "TokenRequestParams",
+    "TokenResponse",
+    "TokenResponseParams",
     "TtsRequest",
     "TtsRequestEmbeddingSpecifier",
     "TtsRequestEmbeddingSpecifierParams",
@@ -288,6 +301,7 @@ __all__ = [
     "WordTimestampsParams",
     "__version__",
     "api_status",
+    "auth",
     "datasets",
     "embedding",
     "infill",

cartesia-2.0.2/src/cartesia/auth/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# This file was auto-generated by Fern from our API Definition.
+from .types import TokenGrant, TokenRequest, TokenResponse
+from .requests import TokenGrantParams, TokenRequestParams, TokenResponseParams
+__all__ = [
+    "TokenGrant",
+    "TokenGrantParams",
+    "TokenRequest",
+    "TokenRequestParams",
+    "TokenResponse",
+    "TokenResponseParams",
+]

cartesia-2.0.2/src/cartesia/auth/client.py ADDED Viewed

@@ -0,0 +1,159 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+from ..core.client_wrapper import SyncClientWrapper
+from .requests.token_grant import TokenGrantParams
+from ..core.request_options import RequestOptions
+from .types.token_response import TokenResponse
+from ..core.serialization import convert_and_respect_annotation_metadata
+from ..core.pydantic_utilities import parse_obj_as
+from json.decoder import JSONDecodeError
+from ..core.api_error import ApiError
+from ..core.client_wrapper import AsyncClientWrapper
+# this is used as the default value for optional parameters
+OMIT = typing.cast(typing.Any, ...)
+class AuthClient:
+    def __init__(self, *, client_wrapper: SyncClientWrapper):
+        self._client_wrapper = client_wrapper
+    def access_token(
+        self,
+        *,
+        grants: TokenGrantParams,
+        expires_in: typing.Optional[int] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> TokenResponse:
+        """
+        Generates a new Access Token for the client. These tokens are short-lived and should be used to make requests to the API from authenticated clients.
+        Parameters
+        ----------
+        grants : TokenGrantParams
+            The permissions to be granted via the token.
+        expires_in : typing.Optional[int]
+            The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Returns
+        -------
+        TokenResponse
+        Examples
+        --------
+        from cartesia import Cartesia
+        client = Cartesia(
+            api_key="YOUR_API_KEY",
+        )
+        client.auth.access_token(
+            grants={"tts": True},
+            expires_in=60,
+        )
+        """
+        _response = self._client_wrapper.httpx_client.request(
+            "access-token",
+            method="POST",
+            json={
+                "grants": convert_and_respect_annotation_metadata(
+                    object_=grants, annotation=TokenGrantParams, direction="write"
+                ),
+                "expires_in": expires_in,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    TokenResponse,
+                    parse_obj_as(
+                        type_=TokenResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+class AsyncAuthClient:
+    def __init__(self, *, client_wrapper: AsyncClientWrapper):
+        self._client_wrapper = client_wrapper
+    async def access_token(
+        self,
+        *,
+        grants: TokenGrantParams,
+        expires_in: typing.Optional[int] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> TokenResponse:
+        """
+        Generates a new Access Token for the client. These tokens are short-lived and should be used to make requests to the API from authenticated clients.
+        Parameters
+        ----------
+        grants : TokenGrantParams
+            The permissions to be granted via the token.
+        expires_in : typing.Optional[int]
+            The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Returns
+        -------
+        TokenResponse
+        Examples
+        --------
+        import asyncio
+        from cartesia import AsyncCartesia
+        client = AsyncCartesia(
+            api_key="YOUR_API_KEY",
+        )
+        async def main() -> None:
+            await client.auth.access_token(
+                grants={"tts": True},
+                expires_in=60,
+            )
+        asyncio.run(main())
+        """
+        _response = await self._client_wrapper.httpx_client.request(
+            "access-token",
+            method="POST",
+            json={
+                "grants": convert_and_respect_annotation_metadata(
+                    object_=grants, annotation=TokenGrantParams, direction="write"
+                ),
+                "expires_in": expires_in,
+            },
+            request_options=request_options,
+            omit=OMIT,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    TokenResponse,
+                    parse_obj_as(
+                        type_=TokenResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)

cartesia-2.0.2/src/cartesia/auth/requests/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+from .token_grant import TokenGrantParams
+from .token_request import TokenRequestParams
+from .token_response import TokenResponseParams
+__all__ = ["TokenGrantParams", "TokenRequestParams", "TokenResponseParams"]

cartesia-2.0.2/src/cartesia/auth/requests/token_grant.py ADDED Viewed

@@ -0,0 +1,10 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing_extensions
+class TokenGrantParams(typing_extensions.TypedDict):
+    tts: bool
+    """
+    The `tts` grant allows the token to be used to access any TTS endpoint.
+    """

cartesia-2.0.2/src/cartesia/auth/requests/token_request.py ADDED Viewed

@@ -0,0 +1,17 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing_extensions
+from .token_grant import TokenGrantParams
+import typing_extensions
+class TokenRequestParams(typing_extensions.TypedDict):
+    grants: TokenGrantParams
+    """
+    The permissions to be granted via the token.
+    """
+    expires_in: typing_extensions.NotRequired[int]
+    """
+    The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
+    """

cartesia-2.0.2/src/cartesia/auth/requests/token_response.py ADDED Viewed

@@ -0,0 +1,10 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing_extensions
+class TokenResponseParams(typing_extensions.TypedDict):
+    token: str
+    """
+    The generated Access Token.
+    """

cartesia 2.0.0b7__tar.gz → 2.0.2__tar.gz

cartesia 2.0.0b7tar.gz → 2.0.2tar.gz