cartesia 0.1.1__py2.py3-none-any.whl → 1.0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cartesia/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.1"
1
+ __version__ = "1.0.1"
@@ -0,0 +1,415 @@
1
+ Metadata-Version: 2.1
2
+ Name: cartesia
3
+ Version: 1.0.1
4
+ Summary: The official Python library for the Cartesia API.
5
+ Home-page:
6
+ Author: Cartesia, Inc.
7
+ Author-email: support@cartesia.ai
8
+ Classifier: Programming Language :: Python
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
+ Requires-Python: >=3.8.0
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: aiohttp
14
+ Requires-Dist: httpx
15
+ Requires-Dist: pytest-asyncio
16
+ Requires-Dist: requests
17
+ Requires-Dist: websockets
18
+ Provides-Extra: all
19
+ Requires-Dist: pytest >=8.0.2 ; extra == 'all'
20
+ Requires-Dist: pytest-cov >=4.1.0 ; extra == 'all'
21
+ Requires-Dist: twine ; extra == 'all'
22
+ Requires-Dist: setuptools ; extra == 'all'
23
+ Requires-Dist: wheel ; extra == 'all'
24
+ Requires-Dist: numpy ; extra == 'all'
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest >=8.0.2 ; extra == 'dev'
27
+ Requires-Dist: pytest-cov >=4.1.0 ; extra == 'dev'
28
+ Requires-Dist: twine ; extra == 'dev'
29
+ Requires-Dist: setuptools ; extra == 'dev'
30
+ Requires-Dist: wheel ; extra == 'dev'
31
+ Requires-Dist: numpy ; extra == 'dev'
32
+
33
+
34
+ # Cartesia Python API Library
35
+
36
+ ![PyPI - Version](https://img.shields.io/pypi/v/cartesia)
37
+ [![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/ZVxavqHB9X)
38
+
39
+ The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
40
+
41
+ > [!IMPORTANT]
42
+ > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) for any support requests!
43
+
44
+ ## Documentation
45
+
46
+ Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
47
+
48
+ ## Installation
49
+
50
+ ```bash
51
+ pip install cartesia
52
+
53
+ # pip install in editable mode w/ dev dependencies
54
+ pip install -e '.[dev]'
55
+ ```
56
+
57
+ ## Voices
58
+
59
+ ```python
60
+ from cartesia import Cartesia
61
+ import os
62
+
63
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
64
+
65
+ # Get all available voices
66
+ voices = client.voices.list()
67
+ print(voices)
68
+
69
+ # Get a specific voice
70
+ voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
71
+ print("The embedding for", voice["name"], "is", voice["embedding"])
72
+
73
+ # Clone a voice using filepath
74
+ cloned_voice_embedding = client.voices.clone(filepath="path/to/voice")
75
+
76
+ # Create a new voice
77
+ new_voice = client.voices.create(
78
+ name="New Voice",
79
+ description="A clone of my own voice",
80
+ embedding=cloned_voice_embedding,
81
+ )
82
+ ```
83
+
84
+ ## Text-to-Speech
85
+
86
+ ### Server-Sent Events (SSE)
87
+
88
+ ```python
89
+ from cartesia import Cartesia
90
+ import pyaudio
91
+ import os
92
+
93
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
94
+ voice_name = "Barbershop Man"
95
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
96
+ voice = client.voices.get(id=voice_id)
97
+
98
+ transcript = "Hello! Welcome to Cartesia"
99
+
100
+ # You can check out our models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
101
+ model_id = "sonic-english"
102
+
103
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
104
+ output_format = {
105
+ "container": "raw",
106
+ "encoding": "pcm_f32le",
107
+ "sample_rate": 44100,
108
+ }
109
+
110
+ p = pyaudio.PyAudio()
111
+ rate = 44100
112
+
113
+ stream = None
114
+
115
+ # Generate and stream audio
116
+ for output in client.tts.sse(
117
+ model_id=model_id,
118
+ transcript=transcript,
119
+ voice_embedding=voice["embedding"],
120
+ stream=True,
121
+ output_format=output_format,
122
+ ):
123
+ buffer = output["audio"]
124
+
125
+ if not stream:
126
+ stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
127
+
128
+ # Write the audio data to the stream
129
+ stream.write(buffer)
130
+
131
+ stream.stop_stream()
132
+ stream.close()
133
+ p.terminate()
134
+ ```
135
+
136
+ You can also use the async client if you want to make asynchronous API calls. Simply import `AsyncCartesia` instead of `Cartesia` and use await with each API call:
137
+
138
+ ```python
139
+ from cartesia import AsyncCartesia
140
+ import asyncio
141
+ import pyaudio
142
+ import os
143
+
144
+
145
+ async def write_stream():
146
+ client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
147
+ voice_name = "Barbershop Man"
148
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
149
+ voice = client.voices.get(id=voice_id)
150
+ transcript = "Hello! Welcome to Cartesia"
151
+ # You can check out our models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
152
+ model_id = "sonic-english"
153
+
154
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
155
+ output_format = {
156
+ "container": "raw",
157
+ "encoding": "pcm_f32le",
158
+ "sample_rate": 44100,
159
+ }
160
+
161
+ p = pyaudio.PyAudio()
162
+ rate = 44100
163
+
164
+ stream = None
165
+
166
+ # Generate and stream audio
167
+ async for output in await client.tts.sse(
168
+ model_id=model_id,
169
+ transcript=transcript,
170
+ voice_embedding=voice["embedding"],
171
+ stream=True,
172
+ output_format=output_format,
173
+ ):
174
+ buffer = output["audio"]
175
+
176
+ if not stream:
177
+ stream = p.open(
178
+ format=pyaudio.paFloat32, channels=1, rate=rate, output=True
179
+ )
180
+
181
+ # Write the audio data to the stream
182
+ stream.write(buffer)
183
+
184
+ stream.stop_stream()
185
+ stream.close()
186
+ p.terminate()
187
+ await client.close()
188
+
189
+
190
+ asyncio.run(write_stream())
191
+ ```
192
+
193
+ ### WebSocket
194
+
195
+ ```python
196
+ from cartesia import Cartesia
197
+ import pyaudio
198
+ import os
199
+
200
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
201
+ voice_name = "Barbershop Man"
202
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
203
+ voice = client.voices.get(id=voice_id)
204
+ transcript = "Hello! Welcome to Cartesia"
205
+
206
+ # You can check out our models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
207
+ model_id = "sonic-english"
208
+
209
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
210
+ output_format = {
211
+ "container": "raw",
212
+ "encoding": "pcm_f32le",
213
+ "sample_rate": 22050,
214
+ }
215
+
216
+ p = pyaudio.PyAudio()
217
+ rate = 22050
218
+
219
+ stream = None
220
+
221
+ # Set up the websocket connection
222
+ ws = client.tts.websocket()
223
+
224
+ # Generate and stream audio using the websocket
225
+ for output in ws.send(
226
+ model_id=model_id,
227
+ transcript=transcript,
228
+ voice_embedding=voice["embedding"],
229
+ stream=True,
230
+ output_format=output_format,
231
+ ):
232
+ buffer = output["audio"]
233
+
234
+ if not stream:
235
+ stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
236
+
237
+ # Write the audio data to the stream
238
+ stream.write(buffer)
239
+
240
+ stream.stop_stream()
241
+ stream.close()
242
+ p.terminate()
243
+
244
+ ws.close() # Close the websocket connection
245
+ ```
246
+
247
+ ### Multilingual Text-to-Speech [Alpha]
248
+
249
+ You can use our `sonic-multilingual` model to generate audio in multiple languages. The languages supported are available at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
250
+
251
+ ```python
252
+ from cartesia import Cartesia
253
+ import pyaudio
254
+ import os
255
+
256
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
257
+ voice_name = "Barbershop Man"
258
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
259
+ voice = client.voices.get(id=voice_id)
260
+
261
+ transcript = "Hola! Bienvenido a Cartesia"
262
+ language = "es" # Language code corresponding to the language of the transcript
263
+
264
+ # Make sure you use the multilingual model! You can check out all models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
265
+ model_id = "sonic-multilingual"
266
+
267
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
268
+ output_format = {
269
+ "container": "raw",
270
+ "encoding": "pcm_f32le",
271
+ "sample_rate": 44100,
272
+ }
273
+
274
+ p = pyaudio.PyAudio()
275
+ rate = 44100
276
+
277
+ stream = None
278
+
279
+ # Pass in the corresponding language code to the `language` parameter to generate and stream audio.
280
+ for output in client.tts.sse(
281
+ model_id=model_id,
282
+ transcript=transcript,
283
+ voice_embedding=voice["embedding"],
284
+ stream=True,
285
+ output_format=output_format,
286
+ language=language,
287
+ ):
288
+ buffer = output["audio"]
289
+
290
+ if not stream:
291
+ stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
292
+
293
+ stream.write(buffer)
294
+
295
+ stream.stop_stream()
296
+ stream.close()
297
+ p.terminate()
298
+ ```
299
+
300
+ If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook.
301
+ Additionally, in these notebook examples we show how to use the client as a context manager (though this is not required).
302
+
303
+ ```python
304
+ from IPython.display import Audio
305
+ import io
306
+ import os
307
+ import numpy as np
308
+
309
+ from cartesia import Cartesia
310
+
311
+ with Cartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
312
+ output_format = {
313
+ "container": "raw",
314
+ "encoding": "pcm_f32le",
315
+ "sample_rate": 8000,
316
+ }
317
+ rate = 8000
318
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
319
+ voice = client.voices.get(id=voice_id)
320
+ transcript = "Hey there! Welcome to Cartesia"
321
+
322
+ # Create a BytesIO object to store the audio data
323
+ audio_data = io.BytesIO()
324
+
325
+ # Generate and stream audio
326
+ for output in client.tts.sse(
327
+ model_id="sonic-english",
328
+ transcript=transcript,
329
+ voice_embedding=voice["embedding"],
330
+ stream=True,
331
+ output_format=output_format,
332
+ ):
333
+ buffer = output["audio"]
334
+ audio_data.write(buffer)
335
+
336
+ # Set the cursor position to the beginning of the BytesIO object
337
+ audio_data.seek(0)
338
+
339
+ # Create an Audio object from the BytesIO data
340
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=rate)
341
+
342
+ # Display the Audio object
343
+ display(audio)
344
+ ```
345
+
346
+ Below is the same example using the async client:
347
+
348
+ ```python
349
+ from IPython.display import Audio
350
+ import io
351
+ import os
352
+ import numpy as np
353
+
354
+ from cartesia import AsyncCartesia
355
+
356
+ async with AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
357
+ output_format = {
358
+ "container": "raw",
359
+ "encoding": "pcm_f32le",
360
+ "sample_rate": 8000,
361
+ }
362
+ rate = 8000
363
+ voice_id = "248be419-c632-4f23-adf1-5324ed7dbf1d"
364
+ transcript = "Hey there! Welcome to Cartesia"
365
+
366
+ # Create a BytesIO object to store the audio data
367
+ audio_data = io.BytesIO()
368
+
369
+ # Generate and stream audio
370
+ async for output in client.tts.sse(
371
+ model_id="sonic-english",
372
+ transcript=transcript,
373
+ voice_id=voice_id,
374
+ stream=True,
375
+ output_format=output_format,
376
+ ):
377
+ buffer = output["audio"]
378
+ audio_data.write(buffer)
379
+
380
+ # Set the cursor position to the beginning of the BytesIO object
381
+ audio_data.seek(0)
382
+
383
+ # Create an Audio object from the BytesIO data
384
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=rate)
385
+
386
+ # Display the Audio object
387
+ display(audio)
388
+ ```
389
+
390
+ ### Utility methods
391
+
392
+ #### Output Formats
393
+
394
+ You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
395
+
396
+ The previously used `output_format` strings are now deprecated and will be removed in v1.2.0. These are listed in the `DeprecatedOutputFormatMapping` class in `cartesia._types`.
397
+
398
+ ```python
399
+ # Get the output format dictionary from string name
400
+ output_format = client.tts.get_output_format("raw_pcm_f32le_44100")
401
+
402
+ # Pass in the output format dictionary to generate and stream audio
403
+ generator = client.tts.sse(
404
+ model_id=model,
405
+ transcript=transcript,
406
+ voice_id=SAMPLE_VOICE_ID,
407
+ stream=True,
408
+ output_format=output_format,
409
+ )
410
+ ```
411
+
412
+ To avoid storing your API key in the source code, we recommend doing one of the following:
413
+
414
+ 1. Use [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file.
415
+ 1. Set the `CARTESIA_API_KEY` environment variable, preferably to a secure shell init file (e.g. `~/.zshrc`, `~/.bashrc`)
@@ -0,0 +1,8 @@
1
+ cartesia/__init__.py,sha256=jMIf2O7dTGxvTA5AfXtmh1H_EGfMtQseR5wXrjNRbLs,93
2
+ cartesia/_types.py,sha256=msXRqNwVx_mbcLIQgRJYEl8U-hO9LRPWmscnX89cBCY,3747
3
+ cartesia/client.py,sha256=jMlFDPRtKVDelqevHlv7YZJgOES3ws9BFN_6uUyN0W8,32720
4
+ cartesia/version.py,sha256=d4QHYmS_30j0hPN8NmNPnQ_Z0TphDRbu4MtQj9cT9e8,22
5
+ cartesia-1.0.1.dist-info/METADATA,sha256=rwqWm86ez9EzEovMQGPqClXh7U7cTsMGgDwl6DVO42o,12394
6
+ cartesia-1.0.1.dist-info/WHEEL,sha256=DZajD4pwLWue70CAfc7YaxT1wLUciNBvN_TTcvXpltE,110
7
+ cartesia-1.0.1.dist-info/top_level.txt,sha256=rTX4HnnCegMxl1FK9czpVC7GAvf3SwDzPG65qP-BS4w,9
8
+ cartesia-1.0.1.dist-info/RECORD,,