cartesia 0.1.0__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,347 @@
1
+ Metadata-Version: 2.1
2
+ Name: cartesia
3
+ Version: 1.0.0
4
+ Summary: The official Python library for the Cartesia API.
5
+ Home-page:
6
+ Author: Cartesia, Inc.
7
+ Author-email: support@cartesia.ai
8
+ Classifier: Programming Language :: Python
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
+ Requires-Python: >=3.8.0
12
+ Description-Content-Type: text/markdown
13
+ Provides-Extra: dev
14
+ Provides-Extra: all
15
+
16
+
17
+ # Cartesia Python API Library
18
+
19
+ ![PyPI - Version](https://img.shields.io/pypi/v/cartesia)
20
+ [![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/ZVxavqHB9X)
21
+
22
+ The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
23
+
24
+ **Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
25
+
26
+ > [!IMPORTANT]
27
+ > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/discussions/44) here and reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) if you have any questions!
28
+
29
+ ## Documentation
30
+
31
+ Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
32
+
33
+ ## Installation
34
+
35
+ ```bash
36
+ pip install cartesia
37
+
38
+ # pip install in editable mode w/ dev dependencies
39
+ pip install -e '.[dev]'
40
+ ```
41
+
42
+ ## Voices
43
+
44
+ ```python
45
+ from cartesia import Cartesia
46
+ import os
47
+
48
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
49
+
50
+ # Get all available voices
51
+ voices = client.voices.list()
52
+ print(voices)
53
+
54
+ # Get a specific voice
55
+ voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
56
+ print("The embedding for", voice["name"], "is", voice["embedding"])
57
+
58
+ # Clone a voice using filepath
59
+ cloned_voice_embedding = client.voices.clone(filepath="path/to/voice")
60
+
61
+ # Create a new voice
62
+ new_voice = client.voices.create(name="New Voice", description="A clone of my own voice", embedding=cloned_voice_embedding)
63
+ ```
64
+
65
+ ## Text-to-Speech
66
+
67
+ ### Server-Sent Events (SSE)
68
+
69
+ ```python
70
+ from cartesia import Cartesia
71
+ import pyaudio
72
+ import os
73
+
74
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
75
+ voice_name = "Barbershop Man"
76
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
77
+ voice = client.voices.get(id=voice_id)
78
+
79
+ transcript = "Hello! Welcome to Cartesia"
80
+
81
+ # You can check out our models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
82
+ model_id = "sonic-english"
83
+
84
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
85
+ output_format = {
86
+ "container": "raw",
87
+ "encoding": "pcm_f32le",
88
+ "sample_rate": 44100,
89
+ }
90
+
91
+ p = pyaudio.PyAudio()
92
+ rate = 44100
93
+
94
+ stream = None
95
+
96
+ # Generate and stream audio
97
+ for output in client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format):
98
+ buffer = output["audio"]
99
+
100
+ if not stream:
101
+ stream = p.open(format=pyaudio.paFloat32,
102
+ channels=1,
103
+ rate=rate,
104
+ output=True)
105
+
106
+ # Write the audio data to the stream
107
+ stream.write(buffer)
108
+
109
+ stream.stop_stream()
110
+ stream.close()
111
+ p.terminate()
112
+ ```
113
+
114
+ You can also use the async client if you want to make asynchronous API calls. Simply import `AsyncCartesia` instead of `Cartesia` and use await with each API call:
115
+
116
+ ```python
117
+ from cartesia import AsyncCartesia
118
+ import asyncio
119
+ import pyaudio
120
+ import os
121
+
122
+ async def write_stream():
123
+ client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
124
+ voice_name = "Barbershop Man"
125
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
126
+ voice = client.voices.get(id=voice_id)
127
+ transcript = "Hello! Welcome to Cartesia"
128
+ # You can check out our models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
129
+ model_id = "sonic-english"
130
+
131
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
132
+ output_format = {
133
+ "container": "raw",
134
+ "encoding": "pcm_f32le",
135
+ "sample_rate": 44100,
136
+ }
137
+
138
+ p = pyaudio.PyAudio()
139
+ rate = 44100
140
+
141
+ stream = None
142
+
143
+ # Generate and stream audio
144
+ async for output in await client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format
145
+ ):
146
+ buffer = output["audio"]
147
+
148
+ if not stream:
149
+ stream = p.open(format=pyaudio.paFloat32,
150
+ channels=1,
151
+ rate=rate,
152
+ output=True)
153
+
154
+ # Write the audio data to the stream
155
+ stream.write(buffer)
156
+
157
+ stream.stop_stream()
158
+ stream.close()
159
+ p.terminate()
160
+
161
+ asyncio.run(write_stream())
162
+ ```
163
+
164
+ ### WebSocket
165
+
166
+ ```python
167
+ from cartesia import Cartesia
168
+ import pyaudio
169
+ import os
170
+
171
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
172
+ voice_name = "Barbershop Man"
173
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
174
+ voice = client.voices.get(id=voice_id)
175
+ transcript = "Hello! Welcome to Cartesia"
176
+
177
+ # You can check out our models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
178
+ model_id = "sonic-english"
179
+
180
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
181
+ output_format = {
182
+ "container": "raw",
183
+ "encoding": "pcm_f32le",
184
+ "sample_rate": 22050,
185
+ }
186
+
187
+ p = pyaudio.PyAudio()
188
+ rate = 22050
189
+
190
+ stream = None
191
+
192
+ # Set up the websocket connection
193
+ ws = client.tts.websocket()
194
+
195
+ # Generate and stream audio using the websocket
196
+ for output in ws.send(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format):
197
+ buffer = output["audio"]
198
+
199
+ if not stream:
200
+ stream = p.open(format=pyaudio.paFloat32,
201
+ channels=1,
202
+ rate=rate,
203
+ output=True)
204
+
205
+ # Write the audio data to the stream
206
+ stream.write(buffer)
207
+
208
+ stream.stop_stream()
209
+ stream.close()
210
+ p.terminate()
211
+
212
+ ws.close() # Close the websocket connection
213
+ ```
214
+
215
+ ### Multilingual Text-to-Speech [Alpha]
216
+
217
+ You can use our `sonic-multilingual` model to generate audio in multiple languages. The languages supported are available at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
218
+
219
+ ```python
220
+ from cartesia import Cartesia
221
+ import pyaudio
222
+ import os
223
+
224
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
225
+ voice_name = "Barbershop Man"
226
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
227
+ voice = client.voices.get(id=voice_id)
228
+
229
+ transcript = "Hola! Bienvenido a Cartesia"
230
+ language = "es" # Language code corresponding to the language of the transcript
231
+
232
+ # Make sure you use the multilingual model! You can check out all models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
233
+ model_id = "sonic-multilingual"
234
+
235
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
236
+ output_format = {
237
+ "container": "raw",
238
+ "encoding": "pcm_f32le",
239
+ "sample_rate": 44100,
240
+ }
241
+
242
+ p = pyaudio.PyAudio()
243
+ rate = 44100
244
+
245
+ stream = None
246
+
247
+ # Pass in the corresponding language code to the `language` parameter to generate and stream audio.
248
+ for output in client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format, language=language):
249
+ buffer = output["audio"]
250
+
251
+ if not stream:
252
+ stream = p.open(format=pyaudio.paFloat32,
253
+ channels=1,
254
+ rate=rate,
255
+ output=True)
256
+
257
+ stream.write(buffer)
258
+
259
+ stream.stop_stream()
260
+ stream.close()
261
+ p.terminate()
262
+ ```
263
+
264
+ If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook.
265
+ Additionally, in these notebook examples we show how to use the client as a context manager (though this is not required).
266
+
267
+ ```python
268
+ from IPython.display import Audio
269
+ import io
270
+ import os
271
+ import numpy as np
272
+
273
+ from cartesia import Cartesia
274
+
275
+ with Cartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
276
+ output_format = {
277
+ "container": "raw",
278
+ "encoding": "pcm_f32le",
279
+ "sample_rate": 8000,
280
+ }
281
+ rate = 8000
282
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
283
+ voice = client.voices.get(id=voice_id)
284
+ transcript = "Hey there! Welcome to Cartesia"
285
+
286
+ # Create a BytesIO object to store the audio data
287
+ audio_data = io.BytesIO()
288
+
289
+ # Generate and stream audio
290
+ for output in client.tts.sse(model_id="sonic-english", transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format
291
+ ):
292
+ buffer = output["audio"]
293
+ audio_data.write(buffer)
294
+
295
+ # Set the cursor position to the beginning of the BytesIO object
296
+ audio_data.seek(0)
297
+
298
+ # Create an Audio object from the BytesIO data
299
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=rate)
300
+
301
+ # Display the Audio object
302
+ display(audio)
303
+ ```
304
+
305
+ Below is the same example using the async client:
306
+
307
+ ```python
308
+ from IPython.display import Audio
309
+ import io
310
+ import os
311
+ import numpy as np
312
+
313
+ from cartesia import AsyncCartesia
314
+
315
+ async with AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
316
+ output_format = {
317
+ "container": "raw",
318
+ "encoding": "pcm_f32le",
319
+ "sample_rate": 8000,
320
+ }
321
+ rate = 8000
322
+ voice_id = "248be419-c632-4f23-adf1-5324ed7dbf1d"
323
+ transcript = "Hey there! Welcome to Cartesia"
324
+
325
+ # Create a BytesIO object to store the audio data
326
+ audio_data = io.BytesIO()
327
+
328
+ # Generate and stream audio
329
+ async for output in client.tts.sse(model_id="sonic-english", transcript=transcript, voice_id=voice_id, stream=True, output_format=output_format
330
+ ):
331
+ buffer = output["audio"]
332
+ audio_data.write(buffer)
333
+
334
+ # Set the cursor position to the beginning of the BytesIO object
335
+ audio_data.seek(0)
336
+
337
+ # Create an Audio object from the BytesIO data
338
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=rate)
339
+
340
+ # Display the Audio object
341
+ display(audio)
342
+ ```
343
+
344
+ To avoid storing your API key in the source code, we recommend doing one of the following:
345
+
346
+ 1. Use [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file.
347
+ 1. Set the `CARTESIA_API_KEY` environment variable, preferably to a secure shell init file (e.g. `~/.zshrc`, `~/.bashrc`)
@@ -0,0 +1,331 @@
1
+ # Cartesia Python API Library
2
+
3
+ ![PyPI - Version](https://img.shields.io/pypi/v/cartesia)
4
+ [![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/ZVxavqHB9X)
5
+
6
+ The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
7
+
8
+ **Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
9
+
10
+ > [!IMPORTANT]
11
+ > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/discussions/44) here and reach out to us on [Discord](https://discord.gg/ZVxavqHB9X) if you have any questions!
12
+
13
+ ## Documentation
14
+
15
+ Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install cartesia
21
+
22
+ # pip install in editable mode w/ dev dependencies
23
+ pip install -e '.[dev]'
24
+ ```
25
+
26
+ ## Voices
27
+
28
+ ```python
29
+ from cartesia import Cartesia
30
+ import os
31
+
32
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
33
+
34
+ # Get all available voices
35
+ voices = client.voices.list()
36
+ print(voices)
37
+
38
+ # Get a specific voice
39
+ voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
40
+ print("The embedding for", voice["name"], "is", voice["embedding"])
41
+
42
+ # Clone a voice using filepath
43
+ cloned_voice_embedding = client.voices.clone(filepath="path/to/voice")
44
+
45
+ # Create a new voice
46
+ new_voice = client.voices.create(name="New Voice", description="A clone of my own voice", embedding=cloned_voice_embedding)
47
+ ```
48
+
49
+ ## Text-to-Speech
50
+
51
+ ### Server-Sent Events (SSE)
52
+
53
+ ```python
54
+ from cartesia import Cartesia
55
+ import pyaudio
56
+ import os
57
+
58
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
59
+ voice_name = "Barbershop Man"
60
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
61
+ voice = client.voices.get(id=voice_id)
62
+
63
+ transcript = "Hello! Welcome to Cartesia"
64
+
65
+ # You can check out our models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
66
+ model_id = "sonic-english"
67
+
68
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
69
+ output_format = {
70
+ "container": "raw",
71
+ "encoding": "pcm_f32le",
72
+ "sample_rate": 44100,
73
+ }
74
+
75
+ p = pyaudio.PyAudio()
76
+ rate = 44100
77
+
78
+ stream = None
79
+
80
+ # Generate and stream audio
81
+ for output in client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format):
82
+ buffer = output["audio"]
83
+
84
+ if not stream:
85
+ stream = p.open(format=pyaudio.paFloat32,
86
+ channels=1,
87
+ rate=rate,
88
+ output=True)
89
+
90
+ # Write the audio data to the stream
91
+ stream.write(buffer)
92
+
93
+ stream.stop_stream()
94
+ stream.close()
95
+ p.terminate()
96
+ ```
97
+
98
+ You can also use the async client if you want to make asynchronous API calls. Simply import `AsyncCartesia` instead of `Cartesia` and use await with each API call:
99
+
100
+ ```python
101
+ from cartesia import AsyncCartesia
102
+ import asyncio
103
+ import pyaudio
104
+ import os
105
+
106
+ async def write_stream():
107
+ client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
108
+ voice_name = "Barbershop Man"
109
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
110
+ voice = client.voices.get(id=voice_id)
111
+ transcript = "Hello! Welcome to Cartesia"
112
+ # You can check out our models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
113
+ model_id = "sonic-english"
114
+
115
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
116
+ output_format = {
117
+ "container": "raw",
118
+ "encoding": "pcm_f32le",
119
+ "sample_rate": 44100,
120
+ }
121
+
122
+ p = pyaudio.PyAudio()
123
+ rate = 44100
124
+
125
+ stream = None
126
+
127
+ # Generate and stream audio
128
+ async for output in await client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format
129
+ ):
130
+ buffer = output["audio"]
131
+
132
+ if not stream:
133
+ stream = p.open(format=pyaudio.paFloat32,
134
+ channels=1,
135
+ rate=rate,
136
+ output=True)
137
+
138
+ # Write the audio data to the stream
139
+ stream.write(buffer)
140
+
141
+ stream.stop_stream()
142
+ stream.close()
143
+ p.terminate()
144
+
145
+ asyncio.run(write_stream())
146
+ ```
147
+
148
+ ### WebSocket
149
+
150
+ ```python
151
+ from cartesia import Cartesia
152
+ import pyaudio
153
+ import os
154
+
155
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
156
+ voice_name = "Barbershop Man"
157
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
158
+ voice = client.voices.get(id=voice_id)
159
+ transcript = "Hello! Welcome to Cartesia"
160
+
161
+ # You can check out our models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
162
+ model_id = "sonic-english"
163
+
164
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
165
+ output_format = {
166
+ "container": "raw",
167
+ "encoding": "pcm_f32le",
168
+ "sample_rate": 22050,
169
+ }
170
+
171
+ p = pyaudio.PyAudio()
172
+ rate = 22050
173
+
174
+ stream = None
175
+
176
+ # Set up the websocket connection
177
+ ws = client.tts.websocket()
178
+
179
+ # Generate and stream audio using the websocket
180
+ for output in ws.send(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format):
181
+ buffer = output["audio"]
182
+
183
+ if not stream:
184
+ stream = p.open(format=pyaudio.paFloat32,
185
+ channels=1,
186
+ rate=rate,
187
+ output=True)
188
+
189
+ # Write the audio data to the stream
190
+ stream.write(buffer)
191
+
192
+ stream.stop_stream()
193
+ stream.close()
194
+ p.terminate()
195
+
196
+ ws.close() # Close the websocket connection
197
+ ```
198
+
199
+ ### Multilingual Text-to-Speech [Alpha]
200
+
201
+ You can use our `sonic-multilingual` model to generate audio in multiple languages. The languages supported are available at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
202
+
203
+ ```python
204
+ from cartesia import Cartesia
205
+ import pyaudio
206
+ import os
207
+
208
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
209
+ voice_name = "Barbershop Man"
210
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
211
+ voice = client.voices.get(id=voice_id)
212
+
213
+ transcript = "Hola! Bienvenido a Cartesia"
214
+ language = "es" # Language code corresponding to the language of the transcript
215
+
216
+ # Make sure you use the multilingual model! You can check out all models at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
217
+ model_id = "sonic-multilingual"
218
+
219
+ # You can find the supported `output_format`s in our [API Reference](https://docs.cartesia.ai/api-reference/endpoints/stream-speech-server-sent-events).
220
+ output_format = {
221
+ "container": "raw",
222
+ "encoding": "pcm_f32le",
223
+ "sample_rate": 44100,
224
+ }
225
+
226
+ p = pyaudio.PyAudio()
227
+ rate = 44100
228
+
229
+ stream = None
230
+
231
+ # Pass in the corresponding language code to the `language` parameter to generate and stream audio.
232
+ for output in client.tts.sse(model_id=model_id, transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format, language=language):
233
+ buffer = output["audio"]
234
+
235
+ if not stream:
236
+ stream = p.open(format=pyaudio.paFloat32,
237
+ channels=1,
238
+ rate=rate,
239
+ output=True)
240
+
241
+ stream.write(buffer)
242
+
243
+ stream.stop_stream()
244
+ stream.close()
245
+ p.terminate()
246
+ ```
247
+
248
+ If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook.
249
+ Additionally, in these notebook examples we show how to use the client as a context manager (though this is not required).
250
+
251
+ ```python
252
+ from IPython.display import Audio
253
+ import io
254
+ import os
255
+ import numpy as np
256
+
257
+ from cartesia import Cartesia
258
+
259
+ with Cartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
260
+ output_format = {
261
+ "container": "raw",
262
+ "encoding": "pcm_f32le",
263
+ "sample_rate": 8000,
264
+ }
265
+ rate = 8000
266
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
267
+ voice = client.voices.get(id=voice_id)
268
+ transcript = "Hey there! Welcome to Cartesia"
269
+
270
+ # Create a BytesIO object to store the audio data
271
+ audio_data = io.BytesIO()
272
+
273
+ # Generate and stream audio
274
+ for output in client.tts.sse(model_id="sonic-english", transcript=transcript, voice_embedding=voice["embedding"], stream=True, output_format=output_format
275
+ ):
276
+ buffer = output["audio"]
277
+ audio_data.write(buffer)
278
+
279
+ # Set the cursor position to the beginning of the BytesIO object
280
+ audio_data.seek(0)
281
+
282
+ # Create an Audio object from the BytesIO data
283
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=rate)
284
+
285
+ # Display the Audio object
286
+ display(audio)
287
+ ```
288
+
289
+ Below is the same example using the async client:
290
+
291
+ ```python
292
+ from IPython.display import Audio
293
+ import io
294
+ import os
295
+ import numpy as np
296
+
297
+ from cartesia import AsyncCartesia
298
+
299
+ async with AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
300
+ output_format = {
301
+ "container": "raw",
302
+ "encoding": "pcm_f32le",
303
+ "sample_rate": 8000,
304
+ }
305
+ rate = 8000
306
+ voice_id = "248be419-c632-4f23-adf1-5324ed7dbf1d"
307
+ transcript = "Hey there! Welcome to Cartesia"
308
+
309
+ # Create a BytesIO object to store the audio data
310
+ audio_data = io.BytesIO()
311
+
312
+ # Generate and stream audio
313
+ async for output in client.tts.sse(model_id="sonic-english", transcript=transcript, voice_id=voice_id, stream=True, output_format=output_format
314
+ ):
315
+ buffer = output["audio"]
316
+ audio_data.write(buffer)
317
+
318
+ # Set the cursor position to the beginning of the BytesIO object
319
+ audio_data.seek(0)
320
+
321
+ # Create an Audio object from the BytesIO data
322
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=rate)
323
+
324
+ # Display the Audio object
325
+ display(audio)
326
+ ```
327
+
328
+ To avoid storing your API key in the source code, we recommend doing one of the following:
329
+
330
+ 1. Use [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file.
331
+ 1. Set the `CARTESIA_API_KEY` environment variable, preferably to a secure shell init file (e.g. `~/.zshrc`, `~/.bashrc`)
@@ -0,0 +1,3 @@
1
+ from cartesia.client import Cartesia, AsyncCartesia
2
+
3
+ __all__ = ["Cartesia", "AsyncCartesia"]