cartesia 2.0.0b1__py3-none-any.whl → 2.0.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,7 @@ class BaseClientWrapper:
16
16
  headers: typing.Dict[str, str] = {
17
17
  "X-Fern-Language": "Python",
18
18
  "X-Fern-SDK-Name": "cartesia",
19
- "X-Fern-SDK-Version": "2.0.0b1",
19
+ "X-Fern-SDK-Version": "2.0.0b2",
20
20
  }
21
21
  headers["X-API-Key"] = self.api_key
22
22
  headers["Cartesia-Version"] = "2024-06-10"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 2.0.0b1
3
+ Version: 2.0.0b2
4
4
  Summary:
5
5
  Requires-Python: >=3.8,<4.0
6
6
  Classifier: Intended Audience :: Developers
@@ -57,7 +57,7 @@ A full reference for this library is available [here](./reference.md).
57
57
  from cartesia import Cartesia
58
58
  import os
59
59
 
60
- client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
60
+ client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
61
61
 
62
62
  # Get all available voices
63
63
  voices = client.voices.list()
@@ -65,21 +65,32 @@ print(voices)
65
65
 
66
66
  # Get a specific voice
67
67
  voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
68
- print("The embedding for", voice["name"], "is", voice["embedding"])
68
+ print("The embedding for", voice.name, "is", voice.embedding)
69
69
 
70
- # Clone a voice using filepath
71
- cloned_voice_embedding = client.voices.clone(filepath="path/to/voice")
70
+ # Clone a voice using file data
71
+ cloned_voice = client.voices.clone(
72
+ clip=open("path/to/voice.wav", "rb"),
73
+ name="Test cloned voice",
74
+ language="en",
75
+ mode="similarity", # or "stability"
76
+ enhance=False, # use enhance=True to clean and denoise the cloning audio
77
+ description="Test voice description"
78
+ )
72
79
 
73
80
  # Mix voices together
74
- mixed_voice_embedding = client.voices.mix(
75
- [{ "id": "voice_id_1", "weight": 0.5 }, { "id": "voice_id_2", "weight": 0.25 }, { "id": "voice_id_3", "weight": 0.25 }]
81
+ mixed_voice = client.voices.mix(
82
+ voices=[
83
+ {"id": "voice_id_1", "weight": 0.25},
84
+ {"id": "voice_id_2", "weight": 0.75}
85
+ ]
76
86
  )
77
87
 
78
- # Create a new voice
88
+ # Create a new voice from embedding
79
89
  new_voice = client.voices.create(
80
- name="New Voice",
81
- description="A clone of my own voice",
82
- embedding=cloned_voice_embedding,
90
+ name="Test Voice",
91
+ description="Test voice description",
92
+ embedding=[...], # List[float] with 192 dimensions
93
+ language="en"
83
94
  )
84
95
  ```
85
96
 
@@ -90,15 +101,22 @@ Instantiate and use the client with the following:
90
101
  ```python
91
102
  from cartesia import Cartesia
92
103
  from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
104
+ import os
93
105
 
94
106
  client = Cartesia(
95
- api_key="YOUR_API_KEY",
107
+ api_key=os.getenv("CARTESIA_API_KEY"),
96
108
  )
97
109
  client.tts.bytes(
98
110
  model_id="sonic-english",
99
111
  transcript="Hello, world!",
100
- voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
101
- ),
112
+ voice={
113
+ "mode": "id",
114
+ "id": "694f9389-aac1-45b6-b726-9d9369183238",
115
+ "experimental_controls": {
116
+ "speed": 0.5, # range between [-1.0, 1.0], or "slow", "fastest", etc.
117
+ "emotion": ["positivity", "curiosity:low"] # list of emotions with optional intensity
118
+ }
119
+ },
102
120
  language="en",
103
121
  output_format={
104
122
  "container": "raw",
@@ -114,17 +132,17 @@ The SDK also exports an `async` client so that you can make non-blocking calls t
114
132
 
115
133
  ```python
116
134
  import asyncio
135
+ import os
117
136
 
118
137
  from cartesia import AsyncCartesia
119
138
  from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
120
139
 
121
140
  client = AsyncCartesia(
122
- api_key="YOUR_API_KEY",
141
+ api_key=os.getenv("CARTESIA_API_KEY"),
123
142
  )
124
143
 
125
-
126
144
  async def main() -> None:
127
- await client.tts.bytes(
145
+ async for output in client.tts.bytes(
128
146
  model_id="sonic-english",
129
147
  transcript="Hello, world!",
130
148
  voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
@@ -134,7 +152,8 @@ async def main() -> None:
134
152
  "sample_rate": 44100,
135
153
  "encoding": "pcm_f32le",
136
154
  },
137
- )
155
+ ):
156
+ print(f"Received chunk of size: {len(output)}")
138
157
 
139
158
 
140
159
  asyncio.run(main())
@@ -162,26 +181,38 @@ The SDK supports streaming responses, as well, the response will be a generator
162
181
  ```python
163
182
  from cartesia import Cartesia
164
183
  from cartesia.tts import Controls, OutputFormat_RawParams, TtsRequestIdSpecifierParams
184
+ import os
165
185
 
166
- client = Cartesia(
167
- api_key="YOUR_API_KEY",
168
- )
169
- response = client.tts.sse(
170
- model_id="string",
171
- transcript="string",
172
- voice={
173
- "id": "string",
174
- "experimental_controls": {
175
- speed=1.1,
176
- emotion="anger:lowest",
186
+ def get_tts_chunks():
187
+ client = Cartesia(
188
+ api_key=os.getenv("CARTESIA_API_KEY"),
189
+ )
190
+ response = client.tts.sse(
191
+ model_id="sonic",
192
+ transcript="Hello world!",
193
+ voice={
194
+ "id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94",
195
+ "experimental_controls": {
196
+ "speed": "normal",
197
+ "emotion": [],
198
+ },
177
199
  },
178
- },
179
- language="en",
180
- output_format={},
181
- duration=1.1,
182
- )
183
- for chunk in response:
184
- yield chunk
200
+ language="en",
201
+ output_format={
202
+ "container": "raw",
203
+ "encoding": "pcm_f32le",
204
+ "sample_rate": 44100,
205
+ },
206
+ )
207
+
208
+ audio_chunks = []
209
+ for chunk in response:
210
+ audio_chunks.append(chunk)
211
+ return audio_chunks
212
+
213
+ chunks = get_tts_chunks()
214
+ for chunk in chunks:
215
+ print(f"Received chunk of size: {len(chunk.data)}")
185
216
  ```
186
217
 
187
218
  ## WebSocket
@@ -190,16 +221,16 @@ for chunk in response:
190
221
  from cartesia import Cartesia
191
222
  from cartesia.tts import TtsRequestEmbeddingSpecifierParams, OutputFormat_RawParams
192
223
  import pyaudio
224
+ import os
193
225
 
194
226
  client = Cartesia(
195
- api_key="YOUR_API_KEY",
227
+ api_key=os.getenv("CARTESIA_API_KEY"),
196
228
  )
197
229
  voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
198
- voice = client.voices.get(id=voice_id)
199
230
  transcript = "Hello! Welcome to Cartesia"
200
231
 
201
232
  # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
202
- model_id = "sonic-english"
233
+ model_id = "sonic"
203
234
 
204
235
  p = pyaudio.PyAudio()
205
236
  rate = 22050
@@ -213,7 +244,7 @@ ws = client.tts.websocket()
213
244
  for output in ws.send(
214
245
  model_id=model_id,
215
246
  transcript=transcript,
216
- voice={"embedding": voice.embedding},
247
+ voice={"id": voice_id},
217
248
  stream=True,
218
249
  output_format={
219
250
  "container": "raw",
@@ -9,7 +9,7 @@ cartesia/base_client.py,sha256=fnRxqROt8Eh2_Vx54RmBxLyFsJKQGEMmRlznTKi4Rho,6571
9
9
  cartesia/client.py,sha256=sPAYQLt9W2E_2F17ooocvvJImuNyLrL8xUypgf6dZeI,6238
10
10
  cartesia/core/__init__.py,sha256=SQ85PF84B9MuKnBwHNHWemSGuy-g_515gFYNFhvEE0I,1438
11
11
  cartesia/core/api_error.py,sha256=RE8LELok2QCjABadECTvtDp7qejA1VmINCh6TbqPwSE,426
12
- cartesia/core/client_wrapper.py,sha256=ncsVGbTVG4gxHVKpcQb_BQ_n9Zqm-f91Zum5t8FZ1uk,1856
12
+ cartesia/core/client_wrapper.py,sha256=C7OD0ek-tvB54i6q9IKucqfyxuc76gxRZknDjFPKrXY,1856
13
13
  cartesia/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
14
14
  cartesia/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
15
15
  cartesia/core/http_client.py,sha256=KL5RGa0y4n8nX0-07WRg4ZQUTq30sc-XJbWcP5vjBDg,19552
@@ -155,6 +155,6 @@ cartesia/voices/types/voice.py,sha256=echDtXYwyNvoBkwnVBaUV2HzRBbXDqZz0ZZcnj4307
155
155
  cartesia/voices/types/voice_id.py,sha256=GDoXcRVeIm-V21R4suxG2zqLD3DLYkXE9kgizadzFKo,79
156
156
  cartesia/voices/types/voice_metadata.py,sha256=4KNGjXMUKm3niv-NvKIFVGtiilpH13heuzKcZYNQxk4,1181
157
157
  cartesia/voices/types/weight.py,sha256=XqDU7_JItNUb5QykIDqTbELlRYQdbt2SviRgW0w2LKo,80
158
- cartesia-2.0.0b1.dist-info/METADATA,sha256=CA4u1fVi90jRNqM-G7Vc-IPXLPcbeU9rk1fnLkzkIFc,8064
159
- cartesia-2.0.0b1.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
160
- cartesia-2.0.0b1.dist-info/RECORD,,
158
+ cartesia-2.0.0b2.dist-info/METADATA,sha256=oQCEf6M6zbdJbu9hmvJ_3h8OVDw72BCc8kErO1LE4T8,9056
159
+ cartesia-2.0.0b2.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
160
+ cartesia-2.0.0b2.dist-info/RECORD,,