PyPI - cartesia - Versions diffs - 2.0.0b2__tar.gz → 2.0.0b7__tar.gz - Mend

cartesia 2.0.0b2tar.gz → 2.0.0b7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

{cartesia-2.0.0b2 → cartesia-2.0.0b7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 2.0.0b2
+Version: 2.0.0b7
 Summary:
 Requires-Python: >=3.8,<4.0
 Classifier: Intended Audience :: Developers
@@ -70,7 +70,7 @@ print("The embedding for", voice.name, "is", voice.embedding)
 # Clone a voice using file data
 cloned_voice = client.voices.clone(
     clip=open("path/to/voice.wav", "rb"),
-    name="Test cloned voice",
+    name="Test cloned voice",
     language="en",
     mode="similarity",  # or "stability"
     enhance=False, # use enhance=True to clean and denoise the cloning audio
@@ -107,7 +107,7 @@ client = Cartesia(
     api_key=os.getenv("CARTESIA_API_KEY"),
 )
 client.tts.bytes(
-    model_id="sonic-english",
+    model_id="sonic-2",
     transcript="Hello, world!",
     voice={
         "mode": "id",
@@ -143,7 +143,7 @@ client = AsyncCartesia(
 async def main() -> None:
     async for output in client.tts.bytes(
-        model_id="sonic-english",
+        model_id="sonic-2",
         transcript="Hello, world!",
         voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
         language="en",
@@ -188,7 +188,7 @@ def get_tts_chunks():
         api_key=os.getenv("CARTESIA_API_KEY"),
     )
     response = client.tts.sse(
-        model_id="sonic",
+        model_id="sonic-2",
         transcript="Hello world!",
         voice={
             "id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94",
@@ -204,7 +204,7 @@ def get_tts_chunks():
             "sample_rate": 44100,
         },
     )
     audio_chunks = []
     for chunk in response:
         audio_chunks.append(chunk)
@@ -230,7 +230,7 @@ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
 transcript = "Hello! Welcome to Cartesia"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
-model_id = "sonic"
+model_id = "sonic-2"
 p = pyaudio.PyAudio()
 rate = 22050
@@ -248,7 +248,7 @@ for output in ws.send(
     stream=True,
     output_format={
         "container": "raw",
-        "encoding": "pcm_f32le",
+        "encoding": "pcm_f32le",
         "sample_rate": 22050
     },
 ):
@@ -267,6 +267,55 @@ p.terminate()
 ws.close()  # Close the websocket connection
 ```
+## Requesting Timestamps
+```python
+import asyncio
+from cartesia import AsyncCartesia
+import os
+async def main():
+    client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
+    # Connect to the websocket
+    ws = await client.tts.websocket()
+    # Generate speech with timestamps
+    output_generate = await ws.send(
+        model_id="sonic-2",
+        transcript="Hello! Welcome to Cartesia's text-to-speech.",
+        voice={"id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94"},
+        output_format={
+            "container": "raw",
+            "encoding": "pcm_f32le",
+            "sample_rate": 44100
+        },
+        add_timestamps=True,  # Enable word-level timestamps
+        stream=True
+    )
+    # Process the streaming response with timestamps
+    all_words = []
+    all_starts = []
+    all_ends = []
+    audio_chunks = []
+    async for out in output_generate:
+        # Collect audio data
+        if out.audio is not None:
+            audio_chunks.append(out.audio)
+        # Process timestamp data
+        if out.word_timestamps is not None:
+            all_words.extend(out.word_timestamps.words)    # List of words
+            all_starts.extend(out.word_timestamps.start)   # Start time for each word (seconds)
+            all_ends.extend(out.word_timestamps.end)       # End time for each word (seconds)
+    await ws.close()
+asyncio.run(main())
+```
 ## Advanced
 ### Retries
@@ -328,11 +377,33 @@ client = Cartesia(
 ## Contributing
-While we value open-source contributions to this SDK, this library is generated programmatically.
-Additions made directly to this library would have to be moved over to our generation code,
-otherwise they would be overwritten upon the next generated release. Feel free to open a PR as
-a proof of concept, but know that we will not be able to merge it as-is. We suggest opening
-an issue first to discuss with us!
+Note that most of this library is generated programmatically from
+<https://github.com/cartesia-ai/docs> — before making edits to a file, verify it's not autogenerated
+by checking for this comment at the top of the file:
+```
+# This file was auto-generated by Fern from our API Definition.
+```
+### Running tests
+```sh
+uv pip install -r requirements.txt
+uv run pytest -rP -vv tests/custom/test_client.py::test_get_voices
+```
+### Manually generating SDK code from docs
+Assuming all your repos are cloned into your home directory:
+```sh
+$ cd ~/docs
+$ fern generate --group python-sdk --log-level debug --api version-2024-11-13 --preview
+$ cd ~/cartesia-python
+$ git pull ~/docs/fern/apis/version-2024-11-13/.preview/fern-python-sdk
+$ git commit --amend -m "manually regenerate from docs" # optional
+```
+### Automatically generating new SDK releases
-On the other hand, contributions to the README are always very welcome!
+From https://github.com/cartesia-ai/docs click `Actions` then `Release Python SDK`. (Requires permissions.)

{cartesia-2.0.0b2 → cartesia-2.0.0b7}/README.md RENAMED Viewed

@@ -38,7 +38,7 @@ print("The embedding for", voice.name, "is", voice.embedding)
 # Clone a voice using file data
 cloned_voice = client.voices.clone(
     clip=open("path/to/voice.wav", "rb"),
-    name="Test cloned voice",
+    name="Test cloned voice",
     language="en",
     mode="similarity",  # or "stability"
     enhance=False, # use enhance=True to clean and denoise the cloning audio
@@ -75,7 +75,7 @@ client = Cartesia(
     api_key=os.getenv("CARTESIA_API_KEY"),
 )
 client.tts.bytes(
-    model_id="sonic-english",
+    model_id="sonic-2",
     transcript="Hello, world!",
     voice={
         "mode": "id",
@@ -111,7 +111,7 @@ client = AsyncCartesia(
 async def main() -> None:
     async for output in client.tts.bytes(
-        model_id="sonic-english",
+        model_id="sonic-2",
         transcript="Hello, world!",
         voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
         language="en",
@@ -156,7 +156,7 @@ def get_tts_chunks():
         api_key=os.getenv("CARTESIA_API_KEY"),
     )
     response = client.tts.sse(
-        model_id="sonic",
+        model_id="sonic-2",
         transcript="Hello world!",
         voice={
             "id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94",
@@ -172,7 +172,7 @@ def get_tts_chunks():
             "sample_rate": 44100,
         },
     )
     audio_chunks = []
     for chunk in response:
         audio_chunks.append(chunk)
@@ -198,7 +198,7 @@ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
 transcript = "Hello! Welcome to Cartesia"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
-model_id = "sonic"
+model_id = "sonic-2"
 p = pyaudio.PyAudio()
 rate = 22050
@@ -216,7 +216,7 @@ for output in ws.send(
     stream=True,
     output_format={
         "container": "raw",
-        "encoding": "pcm_f32le",
+        "encoding": "pcm_f32le",
         "sample_rate": 22050
     },
 ):
@@ -235,6 +235,55 @@ p.terminate()
 ws.close()  # Close the websocket connection
 ```
+## Requesting Timestamps
+```python
+import asyncio
+from cartesia import AsyncCartesia
+import os
+async def main():
+    client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
+    # Connect to the websocket
+    ws = await client.tts.websocket()
+    # Generate speech with timestamps
+    output_generate = await ws.send(
+        model_id="sonic-2",
+        transcript="Hello! Welcome to Cartesia's text-to-speech.",
+        voice={"id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94"},
+        output_format={
+            "container": "raw",
+            "encoding": "pcm_f32le",
+            "sample_rate": 44100
+        },
+        add_timestamps=True,  # Enable word-level timestamps
+        stream=True
+    )
+    # Process the streaming response with timestamps
+    all_words = []
+    all_starts = []
+    all_ends = []
+    audio_chunks = []
+    async for out in output_generate:
+        # Collect audio data
+        if out.audio is not None:
+            audio_chunks.append(out.audio)
+        # Process timestamp data
+        if out.word_timestamps is not None:
+            all_words.extend(out.word_timestamps.words)    # List of words
+            all_starts.extend(out.word_timestamps.start)   # Start time for each word (seconds)
+            all_ends.extend(out.word_timestamps.end)       # End time for each word (seconds)
+    await ws.close()
+asyncio.run(main())
+```
 ## Advanced
 ### Retries
@@ -296,10 +345,32 @@ client = Cartesia(
 ## Contributing
-While we value open-source contributions to this SDK, this library is generated programmatically.
-Additions made directly to this library would have to be moved over to our generation code,
-otherwise they would be overwritten upon the next generated release. Feel free to open a PR as
-a proof of concept, but know that we will not be able to merge it as-is. We suggest opening
-an issue first to discuss with us!
+Note that most of this library is generated programmatically from
+<https://github.com/cartesia-ai/docs> — before making edits to a file, verify it's not autogenerated
+by checking for this comment at the top of the file:
+```
+# This file was auto-generated by Fern from our API Definition.
+```
+### Running tests
+```sh
+uv pip install -r requirements.txt
+uv run pytest -rP -vv tests/custom/test_client.py::test_get_voices
+```
+### Manually generating SDK code from docs
+Assuming all your repos are cloned into your home directory:
+```sh
+$ cd ~/docs
+$ fern generate --group python-sdk --log-level debug --api version-2024-11-13 --preview
+$ cd ~/cartesia-python
+$ git pull ~/docs/fern/apis/version-2024-11-13/.preview/fern-python-sdk
+$ git commit --amend -m "manually regenerate from docs" # optional
+```
+### Automatically generating new SDK releases
-On the other hand, contributions to the README are always very welcome!
+From https://github.com/cartesia-ai/docs click `Actions` then `Release Python SDK`. (Requires permissions.)

{cartesia-2.0.0b2 → cartesia-2.0.0b7}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "cartesia"
 [tool.poetry]
 name = "cartesia"
-version = "2.0.0b2"
+version = "2.0.0b7"
 description = ""
 readme = "README.md"
 authors = []

{cartesia-2.0.0b2 → cartesia-2.0.0b7}/src/cartesia/__init__.py RENAMED Viewed

@@ -121,13 +121,14 @@ from .voices import (
     EmbeddingSpecifier,
     EmbeddingSpecifierParams,
     Gender,
+    GenderPresentation,
+    GetVoicesResponse,
+    GetVoicesResponseParams,
     IdSpecifier,
     IdSpecifierParams,
     LocalizeDialect,
     LocalizeDialectParams,
     LocalizeEnglishDialect,
-    LocalizePortugueseDialect,
-    LocalizeSpanishDialect,
     LocalizeTargetLanguage,
     LocalizeVoiceRequest,
     LocalizeVoiceRequestParams,
@@ -138,6 +139,7 @@ from .voices import (
     UpdateVoiceRequest,
     UpdateVoiceRequestParams,
     Voice,
+    VoiceExpandOptions,
     VoiceId,
     VoiceMetadata,
     VoiceMetadataParams,
@@ -175,15 +177,16 @@ __all__ = [
     "FilePurpose",
     "FlushId",
     "Gender",
+    "GenderPresentation",
     "GenerationRequest",
     "GenerationRequestParams",
+    "GetVoicesResponse",
+    "GetVoicesResponseParams",
     "IdSpecifier",
     "IdSpecifierParams",
     "LocalizeDialect",
     "LocalizeDialectParams",
     "LocalizeEnglishDialect",
-    "LocalizePortugueseDialect",
-    "LocalizeSpanishDialect",
     "LocalizeTargetLanguage",
     "LocalizeVoiceRequest",
     "LocalizeVoiceRequestParams",
@@ -235,6 +238,7 @@ __all__ = [
     "UpdateVoiceRequest",
     "UpdateVoiceRequestParams",
     "Voice",
+    "VoiceExpandOptions",
     "VoiceId",
     "VoiceMetadata",
     "VoiceMetadataParams",

{cartesia-2.0.0b2 → cartesia-2.0.0b7}/src/cartesia/base_client.py RENAMED Viewed

@@ -5,14 +5,12 @@ from .environment import CartesiaEnvironment
 import httpx
 from .core.client_wrapper import SyncClientWrapper
 from .api_status.client import ApiStatusClient
-from .datasets.client import DatasetsClient
 from .infill.client import InfillClient
 from .tts.client import TtsClient
 from .voice_changer.client import VoiceChangerClient
 from .voices.client import VoicesClient
 from .core.client_wrapper import AsyncClientWrapper
 from .api_status.client import AsyncApiStatusClient
-from .datasets.client import AsyncDatasetsClient
 from .infill.client import AsyncInfillClient
 from .tts.client import AsyncTtsClient
 from .voice_changer.client import AsyncVoiceChangerClient
@@ -78,7 +76,6 @@ class BaseCartesia:
             timeout=_defaulted_timeout,
         )
         self.api_status = ApiStatusClient(client_wrapper=self._client_wrapper)
-        self.datasets = DatasetsClient(client_wrapper=self._client_wrapper)
         self.infill = InfillClient(client_wrapper=self._client_wrapper)
         self.tts = TtsClient(client_wrapper=self._client_wrapper)
         self.voice_changer = VoiceChangerClient(client_wrapper=self._client_wrapper)
@@ -144,7 +141,6 @@ class AsyncBaseCartesia:
             timeout=_defaulted_timeout,
         )
         self.api_status = AsyncApiStatusClient(client_wrapper=self._client_wrapper)
-        self.datasets = AsyncDatasetsClient(client_wrapper=self._client_wrapper)
         self.infill = AsyncInfillClient(client_wrapper=self._client_wrapper)
         self.tts = AsyncTtsClient(client_wrapper=self._client_wrapper)
         self.voice_changer = AsyncVoiceChangerClient(client_wrapper=self._client_wrapper)

{cartesia-2.0.0b2 → cartesia-2.0.0b7}/src/cartesia/core/__init__.py RENAMED Viewed

@@ -6,6 +6,7 @@ from .datetime_utils import serialize_datetime
 from .file import File, convert_file_dict_to_httpx_tuples, with_content_type
 from .http_client import AsyncHttpClient, HttpClient
 from .jsonable_encoder import jsonable_encoder
+from .pagination import AsyncPager, SyncPager
 from .pydantic_utilities import (
     IS_PYDANTIC_V2,
     UniversalBaseModel,
@@ -24,6 +25,7 @@ __all__ = [
     "ApiError",
     "AsyncClientWrapper",
     "AsyncHttpClient",
+    "AsyncPager",
     "BaseClientWrapper",
     "FieldMetadata",
     "File",
@@ -31,6 +33,7 @@ __all__ = [
     "IS_PYDANTIC_V2",
     "RequestOptions",
     "SyncClientWrapper",
+    "SyncPager",
     "UniversalBaseModel",
     "UniversalRootModel",
     "convert_and_respect_annotation_metadata",

{cartesia-2.0.0b2 → cartesia-2.0.0b7}/src/cartesia/core/client_wrapper.py RENAMED Viewed

@@ -16,10 +16,10 @@ class BaseClientWrapper:
         headers: typing.Dict[str, str] = {
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "cartesia",
-            "X-Fern-SDK-Version": "2.0.0b2",
+            "X-Fern-SDK-Version": "2.0.0b7",
         }
         headers["X-API-Key"] = self.api_key
-        headers["Cartesia-Version"] = "2024-06-10"
+        headers["Cartesia-Version"] = "2024-11-13"
         return headers
     def get_base_url(self) -> str:

cartesia-2.0.0b7/src/cartesia/core/pagination.py ADDED Viewed

@@ -0,0 +1,88 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+from typing_extensions import Self
+import pydantic
+# Generic to represent the underlying type of the results within a page
+T = typing.TypeVar("T")
+# SDKs implement a Page ABC per-pagination request, the endpoint then returns a pager that wraps this type
+# for example, an endpoint will return SyncPager[UserPage] where UserPage implements the Page ABC. ex:
+#
+# SyncPager<InnerListType>(
+#     has_next=response.list_metadata.after is not None,
+#     items=response.data,
+#     # This should be the outer function that returns the SyncPager again
+#     get_next=lambda: list(..., cursor: response.cursor) (or list(..., offset: offset + 1))
+# )
+class BasePage(pydantic.BaseModel, typing.Generic[T]):
+    has_next: bool
+    items: typing.Optional[typing.List[T]]
+class SyncPage(BasePage[T], typing.Generic[T]):
+    get_next: typing.Optional[typing.Callable[[], typing.Optional[Self]]]
+class AsyncPage(BasePage[T], typing.Generic[T]):
+    get_next: typing.Optional[typing.Callable[[], typing.Awaitable[typing.Optional[Self]]]]
+# ----------------------------
+class SyncPager(SyncPage[T], typing.Generic[T]):
+    # Here we type ignore the iterator to avoid a mypy error
+    # caused by the type conflict with Pydanitc's __iter__ method
+    # brought in by extending the base model
+    def __iter__(self) -> typing.Iterator[T]:  # type: ignore
+        for page in self.iter_pages():
+            if page.items is not None:
+                for item in page.items:
+                    yield item
+    def iter_pages(self) -> typing.Iterator[SyncPage[T]]:
+        page: typing.Union[SyncPager[T], None] = self
+        while True:
+            if page is not None:
+                yield page
+                if page.has_next and page.get_next is not None:
+                    page = page.get_next()
+                    if page is None or page.items is None or len(page.items) == 0:
+                        return
+                else:
+                    return
+            else:
+                return
+    def next_page(self) -> typing.Optional[SyncPage[T]]:
+        return self.get_next() if self.get_next is not None else None
+class AsyncPager(AsyncPage[T], typing.Generic[T]):
+    async def __aiter__(self) -> typing.AsyncIterator[T]:  # type: ignore
+        async for page in self.iter_pages():
+            if page.items is not None:
+                for item in page.items:
+                    yield item
+    async def iter_pages(self) -> typing.AsyncIterator[AsyncPage[T]]:
+        page: typing.Union[AsyncPager[T], None] = self
+        while True:
+            if page is not None:
+                yield page
+                if page is not None and page.has_next and page.get_next is not None:
+                    page = await page.get_next()
+                    if page is None or page.items is None or len(page.items) == 0:
+                        return
+                else:
+                    return
+            else:
+                return
+    async def next_page(self) -> typing.Optional[AsyncPage[T]]:
+        return await self.get_next() if self.get_next is not None else None

{cartesia-2.0.0b2 → cartesia-2.0.0b7}/src/cartesia/infill/client.py RENAMED Viewed

@@ -42,7 +42,7 @@ class InfillClient:
         **The cost is 1 credit per character of the infill text plus a fixed cost of 300 credits.**
-        Only the `sonic-preview` model is supported for infill at this time.
+        Infilling is only available on `sonic-2` at this time.
         At least one of `left_audio` or `right_audio` must be provided.
@@ -117,7 +117,7 @@ class InfillClient:
             api_key="YOUR_API_KEY",
         )
         client.infill.bytes(
-            model_id="sonic-preview",
+            model_id="sonic-2",
             language="en",
             transcript="middle segment",
             voice_id="694f9389-aac1-45b6-b726-9d9369183238",
@@ -189,7 +189,7 @@ class AsyncInfillClient:
         **The cost is 1 credit per character of the infill text plus a fixed cost of 300 credits.**
-        Only the `sonic-preview` model is supported for infill at this time.
+        Infilling is only available on `sonic-2` at this time.
         At least one of `left_audio` or `right_audio` must be provided.
@@ -269,7 +269,7 @@ class AsyncInfillClient:
         async def main() -> None:
             await client.infill.bytes(
-                model_id="sonic-preview",
+                model_id="sonic-2",
                 language="en",
                 transcript="middle segment",
                 voice_id="694f9389-aac1-45b6-b726-9d9369183238",

cartesia 2.0.0b2__tar.gz → 2.0.0b7__tar.gz

cartesia 2.0.0b2tar.gz → 2.0.0b7tar.gz