cartesia 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,664 @@
1
+ Metadata-Version: 2.1
2
+ Name: cartesia
3
+ Version: 1.1.0
4
+ Summary: The official Python library for the Cartesia API.
5
+ Requires-Python: >=3.9
6
+ Description-Content-Type: text/markdown
7
+ License-File: LICENSE.md
8
+ Requires-Dist: aiohttp >=3.10.10
9
+ Requires-Dist: httpx >=0.27.2
10
+ Requires-Dist: iterators >=0.2.0
11
+ Requires-Dist: requests >=2.32.3
12
+ Requires-Dist: websockets >=13.1
13
+
14
+ # Cartesia Python API Library
15
+
16
+ ![PyPI - Version](https://img.shields.io/pypi/v/cartesia)
17
+ [![Discord](https://badgen.net/badge/black/Cartesia/icon?icon=discord&label)](https://discord.gg/cartesia)
18
+
19
+ The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
20
+
21
+ > [!IMPORTANT]
22
+ > The client library introduces breaking changes in v1.0.0, which was released on June 24th 2024. See the [release notes](https://github.com/cartesia-ai/cartesia-python/releases/tag/v1.0.0) and [migration guide](https://github.com/cartesia-ai/cartesia-python/discussions/44). Reach out to us on [Discord](https://discord.gg/cartesia) for any support requests!
23
+
24
+ - [Cartesia Python API Library](#cartesia-python-api-library)
25
+ - [Documentation](#documentation)
26
+ - [Installation](#installation)
27
+ - [Voices](#voices)
28
+ - [Text-to-Speech](#text-to-speech)
29
+ - [Bytes](#bytes)
30
+ - [Server-Sent Events (SSE)](#server-sent-events-sse)
31
+ - [WebSocket](#websocket)
32
+ - [Conditioning speech on previous generations using WebSocket](#conditioning-speech-on-previous-generations-using-websocket)
33
+ - [Generating timestamps using WebSocket](#generating-timestamps-using-websocket)
34
+ - [Multilingual Text-to-Speech \[Alpha\]](#multilingual-text-to-speech-alpha)
35
+ - [Speed and Emotion Control \[Experimental\]](#speed-and-emotion-control-experimental)
36
+ - [Jupyter Notebook Usage](#jupyter-notebook-usage)
37
+ - [Utility methods](#utility-methods)
38
+ - [Output Formats](#output-formats)
39
+
40
+
41
+ ## Documentation
42
+
43
+ Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
44
+
45
+ ## Installation
46
+
47
+ ```bash
48
+ pip install cartesia
49
+
50
+ # pip install in editable mode w/ dev dependencies
51
+ pip install -e '.[dev]'
52
+ ```
53
+
54
+ ## Voices
55
+
56
+ ```python
57
+ from cartesia import Cartesia
58
+ import os
59
+
60
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
61
+
62
+ # Get all available voices
63
+ voices = client.voices.list()
64
+ print(voices)
65
+
66
+ # Get a specific voice
67
+ voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
68
+ print("The embedding for", voice["name"], "is", voice["embedding"])
69
+
70
+ # Clone a voice using filepath
71
+ cloned_voice_embedding = client.voices.clone(filepath="path/to/voice")
72
+
73
+ # Mix voices together
74
+ mixed_voice_embedding = client.voices.mix(
75
+ [{ "id": "voice_id_1", "weight": 0.5 }, { "id": "voice_id_2", "weight": 0.25 }, { "id": "voice_id_3", "weight": 0.25 }]
76
+ )
77
+
78
+ # Create a new voice
79
+ new_voice = client.voices.create(
80
+ name="New Voice",
81
+ description="A clone of my own voice",
82
+ embedding=cloned_voice_embedding,
83
+ )
84
+ ```
85
+
86
+ ## Text-to-Speech
87
+
88
+ ### Bytes
89
+
90
+ ```python
91
+ from cartesia import Cartesia
92
+ import os
93
+
94
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
95
+
96
+ data = client.tts.bytes(
97
+ model_id="sonic-english",
98
+ transcript="Hello, world! I'm generating audio on Cartesia.",
99
+ voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
100
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/api-reference/tts/bytes
101
+ output_format={
102
+ "container": "wav",
103
+ "encoding": "pcm_f32le",
104
+ "sample_rate": 44100,
105
+ },
106
+ )
107
+
108
+ with open("output.wav", "wb") as f:
109
+ f.write(data)
110
+ ```
111
+
112
+ ### Server-Sent Events (SSE)
113
+
114
+ ```python
115
+ from cartesia import Cartesia
116
+ import pyaudio
117
+ import os
118
+
119
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
120
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
121
+ voice = client.voices.get(id=voice_id)
122
+
123
+ transcript = "Hello! Welcome to Cartesia"
124
+
125
+ # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
126
+ model_id = "sonic-english"
127
+
128
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
129
+ output_format = {
130
+ "container": "raw",
131
+ "encoding": "pcm_f32le",
132
+ "sample_rate": 44100,
133
+ }
134
+
135
+ p = pyaudio.PyAudio()
136
+ rate = 44100
137
+
138
+ stream = None
139
+
140
+ # Generate and stream audio
141
+ for output in client.tts.sse(
142
+ model_id=model_id,
143
+ transcript=transcript,
144
+ voice_embedding=voice["embedding"],
145
+ stream=True,
146
+ output_format=output_format,
147
+ ):
148
+ buffer = output["audio"]
149
+
150
+ if not stream:
151
+ stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
152
+
153
+ # Write the audio data to the stream
154
+ stream.write(buffer)
155
+
156
+ stream.stop_stream()
157
+ stream.close()
158
+ p.terminate()
159
+ ```
160
+
161
+ You can also use the async client if you want to make asynchronous API calls. Simply import `AsyncCartesia` instead of `Cartesia` and use await with each API call:
162
+
163
+ ```python
164
+ from cartesia import AsyncCartesia
165
+ import asyncio
166
+ import pyaudio
167
+ import os
168
+
169
+
170
+ async def write_stream():
171
+ client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
172
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
173
+ voice = client.voices.get(id=voice_id)
174
+ transcript = "Hello! Welcome to Cartesia"
175
+ # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
176
+ model_id = "sonic-english"
177
+
178
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
179
+ output_format = {
180
+ "container": "raw",
181
+ "encoding": "pcm_f32le",
182
+ "sample_rate": 44100,
183
+ }
184
+
185
+ p = pyaudio.PyAudio()
186
+ rate = 44100
187
+
188
+ stream = None
189
+
190
+ # Generate and stream audio
191
+ async for output in await client.tts.sse(
192
+ model_id=model_id,
193
+ transcript=transcript,
194
+ voice_embedding=voice["embedding"],
195
+ stream=True,
196
+ output_format=output_format,
197
+ ):
198
+ buffer = output["audio"]
199
+
200
+ if not stream:
201
+ stream = p.open(
202
+ format=pyaudio.paFloat32, channels=1, rate=rate, output=True
203
+ )
204
+
205
+ # Write the audio data to the stream
206
+ stream.write(buffer)
207
+
208
+ stream.stop_stream()
209
+ stream.close()
210
+ p.terminate()
211
+ await client.close()
212
+
213
+
214
+ asyncio.run(write_stream())
215
+ ```
216
+
217
+ ### WebSocket
218
+
219
+ ```python
220
+ from cartesia import Cartesia
221
+ import pyaudio
222
+ import os
223
+
224
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
225
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
226
+ voice = client.voices.get(id=voice_id)
227
+ transcript = "Hello! Welcome to Cartesia"
228
+
229
+ # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
230
+ model_id = "sonic-english"
231
+
232
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
233
+ output_format = {
234
+ "container": "raw",
235
+ "encoding": "pcm_f32le",
236
+ "sample_rate": 22050,
237
+ }
238
+
239
+ p = pyaudio.PyAudio()
240
+ rate = 22050
241
+
242
+ stream = None
243
+
244
+ # Set up the websocket connection
245
+ ws = client.tts.websocket()
246
+
247
+ # Generate and stream audio using the websocket
248
+ for output in ws.send(
249
+ model_id=model_id,
250
+ transcript=transcript,
251
+ voice_embedding=voice["embedding"],
252
+ stream=True,
253
+ output_format=output_format,
254
+ ):
255
+ buffer = output["audio"]
256
+
257
+ if not stream:
258
+ stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
259
+
260
+ # Write the audio data to the stream
261
+ stream.write(buffer)
262
+
263
+ stream.stop_stream()
264
+ stream.close()
265
+ p.terminate()
266
+
267
+ ws.close() # Close the websocket connection
268
+ ```
269
+
270
+ #### Conditioning speech on previous generations using WebSocket
271
+
272
+ In some cases, input text may need to be streamed in. In these cases, it would be slow to wait for all the text to buffer before sending it to Cartesia's TTS service.
273
+
274
+ To mitigate this, Cartesia offers audio continuations. In this setting, users can send input text, as it becomes available, over a websocket connection.
275
+
276
+ To do this, we will create a `context` and send multiple requests without awaiting the response. Then you can listen to the responses in the order they were sent.
277
+
278
+ Each `context` will be closed automatically after 5 seconds of inactivity or when the `no_more_inputs` method is called. `no_more_inputs` sends a request with the `continue_=False`, which indicates no more inputs will be sent over this context
279
+
280
+ ```python
281
+ import asyncio
282
+ import os
283
+ import pyaudio
284
+ from cartesia import AsyncCartesia
285
+
286
+ async def send_transcripts(ctx):
287
+ # Check out voice IDs by calling `client.voices.list()` or on https://play.cartesia.ai/
288
+ voice_id = "87748186-23bb-4158-a1eb-332911b0b708"
289
+
290
+ # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
291
+ model_id = "sonic-english"
292
+
293
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
294
+ output_format = {
295
+ "container": "raw",
296
+ "encoding": "pcm_f32le",
297
+ "sample_rate": 44100,
298
+ }
299
+
300
+ transcripts = [
301
+ "Sonic and Yoshi team up in a dimension-hopping adventure! ",
302
+ "Racing through twisting zones, they dodge Eggman's badniks and solve ancient puzzles. ",
303
+ "In the Echoing Caverns, they find the Harmonic Crystal, unlocking new powers. ",
304
+ "Sonic's speed creates sound waves, while Yoshi's eggs become sonic bolts. ",
305
+ "As they near Eggman's lair, our heroes charge their abilities for an epic boss battle. ",
306
+ "Get ready to spin, jump, and sound-blast your way to victory in this high-octane crossover!"
307
+ ]
308
+
309
+ for transcript in transcripts:
310
+ # Send text inputs as they become available
311
+ await ctx.send(
312
+ model_id=model_id,
313
+ transcript=transcript,
314
+ voice_id=voice_id,
315
+ continue_=True,
316
+ output_format=output_format,
317
+ )
318
+
319
+ # Indicate that no more inputs will be sent. Otherwise, the context will close after 5 seconds of inactivity.
320
+ await ctx.no_more_inputs()
321
+
322
+ async def receive_and_play_audio(ctx):
323
+ p = pyaudio.PyAudio()
324
+ stream = None
325
+ rate = 44100
326
+
327
+ async for output in ctx.receive():
328
+ buffer = output["audio"]
329
+
330
+ if not stream:
331
+ stream = p.open(
332
+ format=pyaudio.paFloat32,
333
+ channels=1,
334
+ rate=rate,
335
+ output=True
336
+ )
337
+
338
+ stream.write(buffer)
339
+
340
+ stream.stop_stream()
341
+ stream.close()
342
+ p.terminate()
343
+
344
+ async def stream_and_listen():
345
+ client = AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
346
+
347
+ # Set up the websocket connection
348
+ ws = await client.tts.websocket()
349
+
350
+ # Create a context to send and receive audio
351
+ ctx = ws.context() # Generates a random context ID if not provided
352
+
353
+ send_task = asyncio.create_task(send_transcripts(ctx))
354
+ listen_task = asyncio.create_task(receive_and_play_audio(ctx))
355
+
356
+ # Call the two coroutine tasks concurrently
357
+ await asyncio.gather(send_task, listen_task)
358
+
359
+ await ws.close()
360
+ await client.close()
361
+
362
+ asyncio.run(stream_and_listen())
363
+ ```
364
+
365
+ You can also use continuations on the synchronous Cartesia client to stream in text as it becomes available. To do this, pass in a text generator that produces text chunks at intervals of less than 1 second, as shown below. This ensures smooth audio playback.
366
+
367
+ Note: the sync client has a different API for continuations compared to the async client.
368
+
369
+ ```python
370
+ from cartesia import Cartesia
371
+ import pyaudio
372
+ import os
373
+
374
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
375
+
376
+ transcripts = [
377
+ "The crew engaged in a range of activities designed to mirror those "
378
+ "they might perform on a real Mars mission. ",
379
+ "Aside from growing vegetables and maintaining their habitat, they faced "
380
+ "additional stressors like communication delays with Earth, ",
381
+ "up to twenty-two minutes each way, to simulate the distance from Mars to our planet. ",
382
+ "These exercises were critical for understanding how astronauts can "
383
+ "maintain not just physical health but also mental well-being under such challenging conditions. ",
384
+ ]
385
+
386
+ # Ending each transcript with a space makes the audio smoother
387
+ def chunk_generator(transcripts):
388
+ for transcript in transcripts:
389
+ if transcript.endswith(" "):
390
+ yield transcript
391
+ else:
392
+ yield transcript + " "
393
+
394
+
395
+ # You can check out voice IDs by calling `client.voices.list()` or on https://play.cartesia.ai/
396
+ voice_id = "87748186-23bb-4158-a1eb-332911b0b708"
397
+
398
+ # You can check out our models at https://docs.cartesia.ai/getting-started/available-models
399
+ model_id = "sonic-english"
400
+
401
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
402
+ output_format = {
403
+ "container": "raw",
404
+ "encoding": "pcm_f32le",
405
+ "sample_rate": 44100,
406
+ }
407
+
408
+ p = pyaudio.PyAudio()
409
+ rate = 44100
410
+
411
+ stream = None
412
+
413
+ # Set up the websocket connection
414
+ ws = client.tts.websocket()
415
+
416
+ # Create a context to send and receive audio
417
+ ctx = ws.context() # Generates a random context ID if not provided
418
+
419
+ # Pass in a text generator to generate & stream the audio
420
+ output_stream = ctx.send(
421
+ model_id=model_id,
422
+ transcript=chunk_generator(transcripts),
423
+ voice_id=voice_id,
424
+ output_format=output_format,
425
+ )
426
+
427
+ for output in output_stream:
428
+ buffer = output["audio"]
429
+
430
+ if not stream:
431
+ stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
432
+
433
+ # Write the audio data to the stream
434
+ stream.write(buffer)
435
+
436
+ stream.stop_stream()
437
+ stream.close()
438
+ p.terminate()
439
+
440
+ ws.close() # Close the websocket connection
441
+ ```
442
+
443
+ ### Generating timestamps using WebSocket
444
+
445
+ The WebSocket endpoint supports timestamps, allowing you to get detailed timing information for each word in the transcript. To enable this feature, pass an `add_timestamps` boolean flag to the `send` method. The results are returned in the `word_timestamps` object, which contains three keys:
446
+ - words (list): The individual words in the transcript.
447
+ - start (list): The starting timestamp for each word (in seconds).
448
+ - end (list): The ending timestamp for each word (in seconds).
449
+
450
+ ```python
451
+ response = ws.send(
452
+ model_id=model_id,
453
+ transcript=transcript,
454
+ voice_id=voice_id,
455
+ output_format=output_format,
456
+ stream=False,
457
+ add_timestamps=True
458
+ )
459
+
460
+ # Accessing the word_timestamps object
461
+ word_timestamps = response['word_timestamps']
462
+
463
+ words = word_timestamps['words']
464
+ start_times = word_timestamps['start']
465
+ end_times = word_timestamps['end']
466
+
467
+ for word, start, end in zip(words, start_times, end_times):
468
+ print(f"Word: {word}, Start: {start}, End: {end}")
469
+ ```
470
+
471
+ ### Multilingual Text-to-Speech [Alpha]
472
+
473
+ You can use our `sonic-multilingual` model to generate audio in multiple languages. The languages supported are available at [docs.cartesia.ai](https://docs.cartesia.ai/getting-started/available-models).
474
+
475
+ ```python
476
+ from cartesia import Cartesia
477
+ import pyaudio
478
+ import os
479
+
480
+ client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
481
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
482
+ voice = client.voices.get(id=voice_id)
483
+
484
+ transcript = "Hola! Bienvenido a Cartesia"
485
+ language = "es" # Language code corresponding to the language of the transcript
486
+
487
+ # Make sure you use the multilingual model! You can check out all models at https://docs.cartesia.ai/getting-started/available-models
488
+ model_id = "sonic-multilingual"
489
+
490
+ # You can find the supported `output_format`s at https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events
491
+ output_format = {
492
+ "container": "raw",
493
+ "encoding": "pcm_f32le",
494
+ "sample_rate": 44100,
495
+ }
496
+
497
+ p = pyaudio.PyAudio()
498
+ rate = 44100
499
+
500
+ stream = None
501
+
502
+ # Pass in the corresponding language code to the `language` parameter to generate and stream audio.
503
+ for output in client.tts.sse(
504
+ model_id=model_id,
505
+ transcript=transcript,
506
+ voice_embedding=voice["embedding"],
507
+ stream=True,
508
+ output_format=output_format,
509
+ language=language,
510
+ ):
511
+ buffer = output["audio"]
512
+
513
+ if not stream:
514
+ stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
515
+
516
+ stream.write(buffer)
517
+
518
+ stream.stop_stream()
519
+ stream.close()
520
+ p.terminate()
521
+ ```
522
+
523
+ ### Speed and Emotion Control [Experimental]
524
+
525
+ You can enhance the voice output by adjusting the `speed` and `emotion` parameters. To do this, pass a `_experimental_voice_controls` dictionary with the desired `speed` and `emotion` values to any `send` method.
526
+
527
+ Speed Options:
528
+ - `slowest`, `slow`, `normal`, `fast`, `fastest`
529
+ - Float values between -1.0 and 1.0, where -1.0 is the slowest speed and 1.0 is the fastest speed.
530
+
531
+ Emotion Options:
532
+ Use a list of tags in the format `emotion_name:level` where:
533
+ - Emotion Names: `anger`, `positivity`, `surprise`, `sadness`, `curiosity`
534
+ - Levels: `lowest`, `low`, (omit for medium level), `high`, `highest`
535
+ The emotion tag levels add the specified emotion to the voice at the indicated intensity, with the omission of a level tag resulting in a medium intensity.
536
+
537
+ ```python
538
+ ws.send(
539
+ model_id=model_id,
540
+ transcript=transcript,
541
+ voice_id=voice_id,
542
+ output_format=output_format,
543
+ _experimental_voice_controls={"speed": "fast", "emotion": ["positivity:high"]},
544
+ )
545
+ ```
546
+
547
+ ### Jupyter Notebook Usage
548
+
549
+ If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook.
550
+ Additionally, in these notebook examples we show how to use the client as a context manager (though this is not required).
551
+
552
+ ```python
553
+ from IPython.display import Audio
554
+ import io
555
+ import os
556
+ import numpy as np
557
+
558
+ from cartesia import Cartesia
559
+
560
+ with Cartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
561
+ output_format = {
562
+ "container": "raw",
563
+ "encoding": "pcm_f32le",
564
+ "sample_rate": 8000,
565
+ }
566
+ rate = 8000
567
+ voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
568
+ voice = client.voices.get(id=voice_id)
569
+ transcript = "Hey there! Welcome to Cartesia"
570
+
571
+ # Create a BytesIO object to store the audio data
572
+ audio_data = io.BytesIO()
573
+
574
+ # Generate and stream audio
575
+ for output in client.tts.sse(
576
+ model_id="sonic-english",
577
+ transcript=transcript,
578
+ voice_embedding=voice["embedding"],
579
+ stream=True,
580
+ output_format=output_format,
581
+ ):
582
+ buffer = output["audio"]
583
+ audio_data.write(buffer)
584
+
585
+ # Set the cursor position to the beginning of the BytesIO object
586
+ audio_data.seek(0)
587
+
588
+ # Create an Audio object from the BytesIO data
589
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=rate)
590
+
591
+ # Display the Audio object
592
+ display(audio)
593
+ ```
594
+
595
+ Below is the same example using the async client:
596
+
597
+ ```python
598
+ from IPython.display import Audio
599
+ import io
600
+ import os
601
+ import numpy as np
602
+
603
+ from cartesia import AsyncCartesia
604
+
605
+ async with AsyncCartesia(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
606
+ output_format = {
607
+ "container": "raw",
608
+ "encoding": "pcm_f32le",
609
+ "sample_rate": 8000,
610
+ }
611
+ rate = 8000
612
+ voice_id = "248be419-c632-4f23-adf1-5324ed7dbf1d"
613
+ transcript = "Hey there! Welcome to Cartesia"
614
+
615
+ # Create a BytesIO object to store the audio data
616
+ audio_data = io.BytesIO()
617
+
618
+ # Generate and stream audio
619
+ async for output in client.tts.sse(
620
+ model_id="sonic-english",
621
+ transcript=transcript,
622
+ voice_id=voice_id,
623
+ stream=True,
624
+ output_format=output_format,
625
+ ):
626
+ buffer = output["audio"]
627
+ audio_data.write(buffer)
628
+
629
+ # Set the cursor position to the beginning of the BytesIO object
630
+ audio_data.seek(0)
631
+
632
+ # Create an Audio object from the BytesIO data
633
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=rate)
634
+
635
+ # Display the Audio object
636
+ display(audio)
637
+ ```
638
+
639
+ ### Utility methods
640
+
641
+ #### Output Formats
642
+
643
+ You can use the `client.tts.get_output_format` method to convert string-based output format names into the `output_format` dictionary which is expected by the `output_format` parameter. You can see the `OutputFormatMapping` class in `cartesia._types` for the currently supported output format names. You can also view the currently supported `output_format`s in our [API Reference](https://docs.cartesia.ai/reference/api-reference/rest/stream-speech-server-sent-events).
644
+
645
+ The previously used `output_format` strings are now deprecated and will be removed in v1.2.0. These are listed in the `DeprecatedOutputFormatMapping` class in `cartesia._types`.
646
+
647
+ ```python
648
+ # Get the output format dictionary from string name
649
+ output_format = client.tts.get_output_format("raw_pcm_f32le_44100")
650
+
651
+ # Pass in the output format dictionary to generate and stream audio
652
+ generator = client.tts.sse(
653
+ model_id=model,
654
+ transcript=transcript,
655
+ voice_id=SAMPLE_VOICE_ID,
656
+ stream=True,
657
+ output_format=output_format,
658
+ )
659
+ ```
660
+
661
+ To avoid storing your API key in the source code, we recommend doing one of the following:
662
+
663
+ 1. Use [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file.
664
+ 1. Set the `CARTESIA_API_KEY` environment variable, preferably to a secure shell init file (e.g. `~/.zshrc`, `~/.bashrc`)
@@ -0,0 +1,24 @@
1
+ cartesia/__init__.py,sha256=rS7jIg4iqT0VgnwjzYK25JXxnF5hjZGE_-PGynAqHFo,126
2
+ cartesia/_async_sse.py,sha256=76oIvstzVcWZCbcD8Ps419k1FEHF6lOB5qoHwawvj9k,3327
3
+ cartesia/_async_websocket.py,sha256=Gy0nK3g2HKIBwh-PP1AunEBj83kgFpTGCvrq6tnwg9c,12515
4
+ cartesia/_constants.py,sha256=lquaYIg7IThdmC1fCklnWC8EM7stbSeVCDwRqCzPq-U,389
5
+ cartesia/_logger.py,sha256=vU7QiGSy_AJuJFmClUocqIJ-Ltku_8C24ZU8L6fLJR0,53
6
+ cartesia/_sse.py,sha256=CugabGUAUM-N2BruxNFxDB20HyxDlRdbN-J_yAzvBMY,5667
7
+ cartesia/_types.py,sha256=2fTSCwjL9lJ3jsdbs0P9fHsjkhejyrrYt6oqIXGk1y4,4488
8
+ cartesia/_websocket.py,sha256=CpqkShdl4qBjCGMR8s6dEBHK0LJxkrG-FjbPLhjOP-U,14735
9
+ cartesia/async_client.py,sha256=y_K_Yuv0weA4k9ZYD0M9bNM3x3frsq07tqkg7R9h0-o,2714
10
+ cartesia/async_tts.py,sha256=IbWVRKklNClXASR6ylHaukcMRR304LUguqc4yMopbDU,2076
11
+ cartesia/client.py,sha256=OS1ORUSlR8Jg-em1imeTAFfwkC85AQFnw8PYtTdUuC8,2364
12
+ cartesia/resource.py,sha256=wpnB3IPcTdxYSp0vxSkpntp4NSvqvnwUWF-0ZpgWV9o,1585
13
+ cartesia/tts.py,sha256=RiADE9wjukfq595DrtgBZY8OKoTaFBzef0wCG93yvFM,5345
14
+ cartesia/version.py,sha256=LGVQyDsWifdACo7qztwb8RWWHds1E7uQ-ZqD8SAjyw4,22
15
+ cartesia/voices.py,sha256=DB4tEiSJp7jfnQM0HoiSFS09ZY2oAFbOwMlKe6pofTs,5606
16
+ cartesia/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ cartesia/utils/deprecated.py,sha256=2cXvGtrxhPeUZA5LWy2n_U5OFLDv7SHeFtzqhjSJGyk,1674
18
+ cartesia/utils/retry.py,sha256=O6fyVWpH9Su8c0Fwupl57xMt6JrwJ52txBwP3faUL7k,3339
19
+ cartesia/utils/tts.py,sha256=7tJmdyOYwe2QIav5d1UZxhpbcHaYqf7A77bBOlb4U_g,2100
20
+ cartesia-1.1.0.dist-info/LICENSE.md,sha256=PT2YG5wEtEX1TNDn5sXkUXqbn-neyr7cZenTxd40ql4,1074
21
+ cartesia-1.1.0.dist-info/METADATA,sha256=irSnehEd1m1sqD6W3mUn1JN_j6O7kOe-EUny2y84aFU,21185
22
+ cartesia-1.1.0.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
23
+ cartesia-1.1.0.dist-info/top_level.txt,sha256=rTX4HnnCegMxl1FK9czpVC7GAvf3SwDzPG65qP-BS4w,9
24
+ cartesia-1.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.2.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ cartesia