PyPI - cartesia - Versions diffs - 2.0.0a2__py3-none-any.whl → 2.0.0b2__py3-none-any.whl - Mend

cartesia 2.0.0a2py3-none-any.whl → 2.0.0b2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

cartesia/core/client_wrapper.py CHANGED Viewed

@@ -16,7 +16,7 @@ class BaseClientWrapper:
         headers: typing.Dict[str, str] = {
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "cartesia",
-            "X-Fern-SDK-Version": "2.0.0a2",
+            "X-Fern-SDK-Version": "2.0.0b2",
         }
         headers["X-API-Key"] = self.api_key
         headers["Cartesia-Version"] = "2024-06-10"

{cartesia-2.0.0a2.dist-info → cartesia-2.0.0b2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cartesia
-Version: 2.0.0a2
+Version: 2.0.0b2
 Summary:
 Requires-Python: >=3.8,<4.0
 Classifier: Intended Audience :: Developers
@@ -57,7 +57,7 @@ A full reference for this library is available [here](./reference.md).
 from cartesia import Cartesia
 import os
-client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
+client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
 # Get all available voices
 voices = client.voices.list()
@@ -65,21 +65,32 @@ print(voices)
 # Get a specific voice
 voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
-print("The embedding for", voice["name"], "is", voice["embedding"])
+print("The embedding for", voice.name, "is", voice.embedding)
-# Clone a voice using filepath
-cloned_voice_embedding = client.voices.clone(filepath="path/to/voice")
+# Clone a voice using file data
+cloned_voice = client.voices.clone(
+    clip=open("path/to/voice.wav", "rb"),
+    name="Test cloned voice",
+    language="en",
+    mode="similarity",  # or "stability"
+    enhance=False, # use enhance=True to clean and denoise the cloning audio
+    description="Test voice description"
+)
 # Mix voices together
-mixed_voice_embedding = client.voices.mix(
-    [{ "id": "voice_id_1", "weight": 0.5 }, { "id": "voice_id_2", "weight": 0.25 }, { "id": "voice_id_3", "weight": 0.25 }]
+mixed_voice = client.voices.mix(
+    voices=[
+        {"id": "voice_id_1", "weight": 0.25},
+        {"id": "voice_id_2", "weight": 0.75}
+    ]
 )
-# Create a new voice
+# Create a new voice from embedding
 new_voice = client.voices.create(
-    name="New Voice",
-    description="A clone of my own voice",
-    embedding=cloned_voice_embedding,
+    name="Test Voice",
+    description="Test voice description",
+    embedding=[...],  # List[float] with 192 dimensions
+    language="en"
 )
 ```
@@ -90,15 +101,22 @@ Instantiate and use the client with the following:
 ```python
 from cartesia import Cartesia
 from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
+import os
 client = Cartesia(
-    api_key="YOUR_API_KEY",
+    api_key=os.getenv("CARTESIA_API_KEY"),
 )
 client.tts.bytes(
     model_id="sonic-english",
     transcript="Hello, world!",
-    voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
-    ),
+    voice={
+        "mode": "id",
+        "id": "694f9389-aac1-45b6-b726-9d9369183238",
+        "experimental_controls": {
+            "speed": 0.5,  # range between [-1.0, 1.0], or "slow", "fastest", etc.
+            "emotion": ["positivity", "curiosity:low"] # list of emotions with optional intensity
+        }
+    },
     language="en",
     output_format={
         "container": "raw",
@@ -114,17 +132,17 @@ The SDK also exports an `async` client so that you can make non-blocking calls t
 ```python
 import asyncio
+import os
 from cartesia import AsyncCartesia
 from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
 client = AsyncCartesia(
-    api_key="YOUR_API_KEY",
+    api_key=os.getenv("CARTESIA_API_KEY"),
 )
 async def main() -> None:
-    await client.tts.bytes(
+    async for output in client.tts.bytes(
         model_id="sonic-english",
         transcript="Hello, world!",
         voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
@@ -134,7 +152,8 @@ async def main() -> None:
             "sample_rate": 44100,
             "encoding": "pcm_f32le",
         },
-    )
+    ):
+        print(f"Received chunk of size: {len(output)}")
 asyncio.run(main())
@@ -162,26 +181,38 @@ The SDK supports streaming responses, as well, the response will be a generator
 ```python
 from cartesia import Cartesia
 from cartesia.tts import Controls, OutputFormat_RawParams, TtsRequestIdSpecifierParams
+import os
-client = Cartesia(
-    api_key="YOUR_API_KEY",
-)
-response = client.tts.sse(
-    model_id="string",
-    transcript="string",
-    voice={
-        "id": "string",
-        "experimental_controls": {
-            speed=1.1,
-            emotion="anger:lowest",
+def get_tts_chunks():
+    client = Cartesia(
+        api_key=os.getenv("CARTESIA_API_KEY"),
+    )
+    response = client.tts.sse(
+        model_id="sonic",
+        transcript="Hello world!",
+        voice={
+            "id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94",
+            "experimental_controls": {
+                "speed": "normal",
+                "emotion": [],
+            },
         },
-    },
-    language="en",
-    output_format={},
-    duration=1.1,
-)
-for chunk in response:
-    yield chunk
+        language="en",
+        output_format={
+            "container": "raw",
+            "encoding": "pcm_f32le",
+            "sample_rate": 44100,
+        },
+    )
+    audio_chunks = []
+    for chunk in response:
+        audio_chunks.append(chunk)
+    return audio_chunks
+chunks = get_tts_chunks()
+for chunk in chunks:
+    print(f"Received chunk of size: {len(chunk.data)}")
 ```
 ## WebSocket
@@ -190,16 +221,16 @@ for chunk in response:
 from cartesia import Cartesia
 from cartesia.tts import TtsRequestEmbeddingSpecifierParams, OutputFormat_RawParams
 import pyaudio
+import os
 client = Cartesia(
-    api_key="YOUR_API_KEY",
+    api_key=os.getenv("CARTESIA_API_KEY"),
 )
 voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
-voice = client.voices.get(id=voice_id)
 transcript = "Hello! Welcome to Cartesia"
 # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
-model_id = "sonic-english"
+model_id = "sonic"
 p = pyaudio.PyAudio()
 rate = 22050
@@ -213,7 +244,7 @@ ws = client.tts.websocket()
 for output in ws.send(
     model_id=model_id,
     transcript=transcript,
-    voice={"embedding": voice.embedding},
+    voice={"id": voice_id},
     stream=True,
     output_format={
         "container": "raw",

{cartesia-2.0.0a2.dist-info → cartesia-2.0.0b2.dist-info}/RECORD RENAMED Viewed

@@ -9,7 +9,7 @@ cartesia/base_client.py,sha256=fnRxqROt8Eh2_Vx54RmBxLyFsJKQGEMmRlznTKi4Rho,6571
 cartesia/client.py,sha256=sPAYQLt9W2E_2F17ooocvvJImuNyLrL8xUypgf6dZeI,6238
 cartesia/core/__init__.py,sha256=SQ85PF84B9MuKnBwHNHWemSGuy-g_515gFYNFhvEE0I,1438
 cartesia/core/api_error.py,sha256=RE8LELok2QCjABadECTvtDp7qejA1VmINCh6TbqPwSE,426
-cartesia/core/client_wrapper.py,sha256=RdPFkIVuzjk-BH6CvnMUmsflqQjAoSKNDr2kxIE1C7M,1856
+cartesia/core/client_wrapper.py,sha256=C7OD0ek-tvB54i6q9IKucqfyxuc76gxRZknDjFPKrXY,1856
 cartesia/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
 cartesia/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
 cartesia/core/http_client.py,sha256=KL5RGa0y4n8nX0-07WRg4ZQUTq30sc-XJbWcP5vjBDg,19552
@@ -155,6 +155,6 @@ cartesia/voices/types/voice.py,sha256=echDtXYwyNvoBkwnVBaUV2HzRBbXDqZz0ZZcnj4307
 cartesia/voices/types/voice_id.py,sha256=GDoXcRVeIm-V21R4suxG2zqLD3DLYkXE9kgizadzFKo,79
 cartesia/voices/types/voice_metadata.py,sha256=4KNGjXMUKm3niv-NvKIFVGtiilpH13heuzKcZYNQxk4,1181
 cartesia/voices/types/weight.py,sha256=XqDU7_JItNUb5QykIDqTbELlRYQdbt2SviRgW0w2LKo,80
-cartesia-2.0.0a2.dist-info/METADATA,sha256=H2qk2eF-ouhNTOpoQW_nSkFba1J8efjNAj8jMIGEmes,8064
-cartesia-2.0.0a2.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
-cartesia-2.0.0a2.dist-info/RECORD,,
+cartesia-2.0.0b2.dist-info/METADATA,sha256=oQCEf6M6zbdJbu9hmvJ_3h8OVDw72BCc8kErO1LE4T8,9056
+cartesia-2.0.0b2.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
+cartesia-2.0.0b2.dist-info/RECORD,,

{cartesia-2.0.0a2.dist-info → cartesia-2.0.0b2.dist-info}/WHEEL RENAMED Viewed

File without changes

cartesia 2.0.0a2__py3-none-any.whl → 2.0.0b2__py3-none-any.whl

cartesia 2.0.0a2py3-none-any.whl → 2.0.0b2py3-none-any.whl