cartesia 0.1.1__py2.py3-none-any.whl → 1.0.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cartesia/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.1"
1
+ __version__ = "1.0.0"
@@ -0,0 +1,364 @@
1
+ Metadata-Version: 2.1
2
+ Name: cartesia
3
+ Version: 1.0.0
4
+ Summary: The official Python library for the Cartesia API.
5
+ Home-page:
6
+ Author: Cartesia, Inc.
7
+ Author-email: support@cartesia.ai
8
+ Classifier: Programming Language :: Python
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
+ Requires-Python: >=3.8.0
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: aiohttp
14
+ Requires-Dist: httpx
15
+ Requires-Dist: pytest-asyncio
16
+ Requires-Dist: requests
17
+ Requires-Dist: websockets
18
+ Provides-Extra: all
19
+ Requires-Dist: pytest >=8.0.2 ; extra == 'all'
20
+ Requires-Dist: pytest-cov >=4.1.0 ; extra == 'all'
21
+ Requires-Dist: twine ; extra == 'all'
22
+ Requires-Dist: setuptools ; extra == 'all'
23
+ Requires-Dist: wheel ; extra == 'all'
24
+ Requires-Dist: numpy ; extra == 'all'
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest >=8.0.2 ; extra == 'dev'
27
+ Requires-Dist: pytest-cov >=4.1.0 ; extra == 'dev'
28
+ Requires-Dist: twine ; extra == 'dev'
29
+ Requires-Dist: setuptools ; extra == 'dev'
30
+ Requires-Dist: wheel ; extra == 'dev'
31
+ Requires-Dist: numpy ; extra == 'dev'
32
+
33
+
34
+ # Cartesia Python API Library
35
+
36
+ ![PyPI - Version](https://img.shields.io/pypi/v/cartesia)
37
+ [![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/ZVxavqHB9X)
38
+
39
+ The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
40
+
41
+ **Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
42
+
43
+ > [!IMPORTANT]
44
+ > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/discussions/44) here and reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) if you have any questions!
45
+
46
+ ## Documentation
47
+
48
+ Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
49
+
50
+ ## Installation
51
+
52
+ ```bash
53
+ pip install cartesia
54
+
55
+ # pip install in editable mode w/ dev dependencies
56
+ pip install -e '.[dev]'
57
+ ```
58
+
59
+ ## Voices
60
+
61
+ ```python
62
+ from cartesia import Cartesia
63
+ import os
64
+
65
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
66
+
67
+ # Get all available voices
68
+ voices = client.voices.list()
69
+ print(voices)
70
+
71
+ # Get a specific voice
72
+ voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
73
+ print("The embedding for", voice["name"], "is", voice["embedding"])
74
+
75
+ # Clone a voice using filepath
76
+ cloned_voice_embedding = client.voices.clone(filepath="path/to/voice")
77
+
78
+ # Create a new voice
79
+ new_voice = client.voices.create(name="New Voice", description="A clone of my own voice", embedding=cloned_voice_embedding)
80
+ ```
81
+
82
+ ## Text-to-Speech
83
+
84
+ ### Server-Sent Events (SSE)
85
+
86
+ ```python
87
+ from cartesia import Cartesia
88
+ import pyaudio
89
+ import os
90
+
91
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
92
+ voice_name = "Barbershop Man"
93
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
94
+ voice = client.voices.get(id=voice_id)
95
+
96
+ transcript = "Hello! Welcome to Cartesia"
97
+
98
+ # You can check out our models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
99
+ model_id = "sonic-english"
100
+
101
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
102
+ output_format = {
103
+ "container": "raw",
104
+ "encoding": "pcm_f32le",
105
+ "sample_rate": 44100,
106
+ }
107
+
108
+ p = pyaudio.PyAudio()
109
+ rate = 44100
110
+
111
+ stream = None
112
+
113
+ # Generate and stream audio
114
+ for output in client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format):
115
+ buffer = output["audio"]
116
+
117
+ if not stream:
118
+ stream = p.open(format=pyaudio.paFloat32,
119
+ channels=1,
120
+ rate=rate,
121
+ output=True)
122
+
123
+ # Write the audio data to the stream
124
+ stream.write(buffer)
125
+
126
+ stream.stop_stream()
127
+ stream.close()
128
+ p.terminate()
129
+ ```
130
+
131
+ You can also use the async client if you want to make asynchronous API calls. Simply import `AsyncCartesia` instead of `Cartesia` and use await with each API call:
132
+
133
+ ```python
134
+ from cartesia import AsyncCartesia
135
+ import asyncio
136
+ import pyaudio
137
+ import os
138
+
139
+ async def write_stream():
140
+ client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
141
+ voice_name = "Barbershop Man"
142
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
143
+ voice = client.voices.get(id=voice_id)
144
+ transcript = "Hello! Welcome to Cartesia"
145
+ # You can check out our models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
146
+ model_id = "sonic-english"
147
+
148
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
149
+ output_format = {
150
+ "container": "raw",
151
+ "encoding": "pcm_f32le",
152
+ "sample_rate": 44100,
153
+ }
154
+
155
+ p = pyaudio.PyAudio()
156
+ rate = 44100
157
+
158
+ stream = None
159
+
160
+ # Generate and stream audio
161
+ async for output in await client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format
162
+ ):
163
+ buffer = output["audio"]
164
+
165
+ if not stream:
166
+ stream = p.open(format=pyaudio.paFloat32,
167
+ channels=1,
168
+ rate=rate,
169
+ output=True)
170
+
171
+ # Write the audio data to the stream
172
+ stream.write(buffer)
173
+
174
+ stream.stop_stream()
175
+ stream.close()
176
+ p.terminate()
177
+
178
+ asyncio.run(write_stream())
179
+ ```
180
+
181
+ ### WebSocket
182
+
183
+ ```python
184
+ from cartesia import Cartesia
185
+ import pyaudio
186
+ import os
187
+
188
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
189
+ voice_name = "Barbershop Man"
190
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
191
+ voice = client.voices.get(id=voice_id)
192
+ transcript = "Hello! Welcome to Cartesia"
193
+
194
+ # You can check out our models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
195
+ model_id = "sonic-english"
196
+
197
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
198
+ output_format = {
199
+ "container": "raw",
200
+ "encoding": "pcm_f32le",
201
+ "sample_rate": 22050,
202
+ }
203
+
204
+ p = pyaudio.PyAudio()
205
+ rate = 22050
206
+
207
+ stream = None
208
+
209
+ # Set up the websocket connection
210
+ ws = client.tts.websocket()
211
+
212
+ # Generate and stream audio using the websocket
213
+ for output in ws.send(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format):
214
+ buffer = output["audio"]
215
+
216
+ if not stream:
217
+ stream = p.open(format=pyaudio.paFloat32,
218
+ channels=1,
219
+ rate=rate,
220
+ output=True)
221
+
222
+ # Write the audio data to the stream
223
+ stream.write(buffer)
224
+
225
+ stream.stop_stream()
226
+ stream.close()
227
+ p.terminate()
228
+
229
+ ws.close() # Close the websocket connection
230
+ ```
231
+
232
+ ### Multilingual Text-to-Speech [Alpha]
233
+
234
+ You can use our `sonic-multilingual` model to generate audio in multiple languages. The languages supported are available at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
235
+
236
+ ```python
237
+ from cartesia import Cartesia
238
+ import pyaudio
239
+ import os
240
+
241
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
242
+ voice_name = "Barbershop Man"
243
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
244
+ voice = client.voices.get(id=voice_id)
245
+
246
+ transcript = "Hola! Bienvenido a Cartesia"
247
+ language = "es" # Language code corresponding to the language of the transcript
248
+
249
+ # Make sure you use the multilingual model! You can check out all models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
250
+ model_id = "sonic-multilingual"
251
+
252
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
253
+ output_format = {
254
+ "container": "raw",
255
+ "encoding": "pcm_f32le",
256
+ "sample_rate": 44100,
257
+ }
258
+
259
+ p = pyaudio.PyAudio()
260
+ rate = 44100
261
+
262
+ stream = None
263
+
264
+ # Pass in the corresponding language code to the `language` parameter to generate and stream audio.
265
+ for output in client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format, language=language):
266
+ buffer = output["audio"]
267
+
268
+ if not stream:
269
+ stream = p.open(format=pyaudio.paFloat32,
270
+ channels=1,
271
+ rate=rate,
272
+ output=True)
273
+
274
+ stream.write(buffer)
275
+
276
+ stream.stop_stream()
277
+ stream.close()
278
+ p.terminate()
279
+ ```
280
+
281
+ If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook.
282
+ Additionally, in these notebook examples we show how to use the client as a context manager (though this is not required).
283
+
284
+ ```python
285
+ from IPython.display import Audio
286
+ import io
287
+ import os
288
+ import numpy as np
289
+
290
+ from cartesia import Cartesia
291
+
292
+ with Cartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
293
+ output_format = {
294
+ "container": "raw",
295
+ "encoding": "pcm_f32le",
296
+ "sample_rate": 8000,
297
+ }
298
+ rate = 8000
299
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
300
+ voice = client.voices.get(id=voice_id)
301
+ transcript = "Hey there! Welcome to Cartesia"
302
+
303
+ # Create a BytesIO object to store the audio data
304
+ audio_data = io.BytesIO()
305
+
306
+ # Generate and stream audio
307
+ for output in client.tts.sse(model_id="sonic-english", transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format
308
+ ):
309
+ buffer = output["audio"]
310
+ audio_data.write(buffer)
311
+
312
+ # Set the cursor position to the beginning of the BytesIO object
313
+ audio_data.seek(0)
314
+
315
+ # Create an Audio object from the BytesIO data
316
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=rate)
317
+
318
+ # Display the Audio object
319
+ display(audio)
320
+ ```
321
+
322
+ Below is the same example using the async client:
323
+
324
+ ```python
325
+ from IPython.display import Audio
326
+ import io
327
+ import os
328
+ import numpy as np
329
+
330
+ from cartesia import AsyncCartesia
331
+
332
+ async with AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
333
+ output_format = {
334
+ "container": "raw",
335
+ "encoding": "pcm_f32le",
336
+ "sample_rate": 8000,
337
+ }
338
+ rate = 8000
339
+ voice_id = "248be419-c632-4f23-adf1-5324ed7dbf1d"
340
+ transcript = "Hey there! Welcome to Cartesia"
341
+
342
+ # Create a BytesIO object to store the audio data
343
+ audio_data = io.BytesIO()
344
+
345
+ # Generate and stream audio
346
+ async for output in client.tts.sse(model_id="sonic-english", transcript=transcript, voice_id=voice_id, stream=True, output_format=output_format
347
+ ):
348
+ buffer = output["audio"]
349
+ audio_data.write(buffer)
350
+
351
+ # Set the cursor position to the beginning of the BytesIO object
352
+ audio_data.seek(0)
353
+
354
+ # Create an Audio object from the BytesIO data
355
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=rate)
356
+
357
+ # Display the Audio object
358
+ display(audio)
359
+ ```
360
+
361
+ To avoid storing your API key in the source code, we recommend doing one of the following:
362
+
363
+ 1. Use [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file.
364
+ 1. Set the `CARTESIA_API_KEY` environment variable, preferably to a secure shell init file (e.g. `~/.zshrc`, `~/.bashrc`)
@@ -0,0 +1,9 @@
1
+ cartesia/__init__.py,sha256=jMIf2O7dTGxvTA5AfXtmh1H_EGfMtQseR5wXrjNRbLs,93
2
+ cartesia/_types.py,sha256=M-gtOFGBRWYAHUpJZNEl2jodqmNm_ZIPQUHkXLyP_-s,1503
3
+ cartesia/client.py,sha256=9uSIYzGGE-Bgcsq5qJ1ApVesAGz_l5Olg07ZUvw268Q,32306
4
+ cartesia/utils.py,sha256=nuwWRfu3MOVTxIQMLjYf6WLaxSlnu_GdE3QjTV0zisQ,3339
5
+ cartesia/version.py,sha256=J-j-u0itpEFT6irdmWmixQqYMadNl1X91TxUmoiLHMI,22
6
+ cartesia-1.0.0.dist-info/METADATA,sha256=PAsYxXc-s1TO_2ROmlLUH4w4CXI7zRMOjmSo0cHg_Os,11399
7
+ cartesia-1.0.0.dist-info/WHEEL,sha256=DZajD4pwLWue70CAfc7YaxT1wLUciNBvN_TTcvXpltE,110
8
+ cartesia-1.0.0.dist-info/top_level.txt,sha256=rTX4HnnCegMxl1FK9czpVC7GAvf3SwDzPG65qP-BS4w,9
9
+ cartesia-1.0.0.dist-info/RECORD,,