cartesia 0.0.4__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,172 @@
1
+ Metadata-Version: 2.1
2
+ Name: cartesia
3
+ Version: 0.0.5
4
+ Summary: The official Python library for the Cartesia API.
5
+ Home-page:
6
+ Author: Cartesia, Inc.
7
+ Author-email: support@cartesia.ai
8
+ Classifier: Programming Language :: Python
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
+ Requires-Python: >=3.8.0
12
+ Description-Content-Type: text/markdown
13
+ Provides-Extra: dev
14
+ Provides-Extra: all
15
+
16
+
17
+ # Cartesia Python API Library
18
+ The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
19
+
20
+ **Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
21
+
22
+ ## Installation
23
+ ```bash
24
+ pip install cartesia
25
+
26
+ # pip install in editable mode w/ dev dependencies
27
+ pip install -e '.[dev]'
28
+ ```
29
+
30
+ ## Usage
31
+ ```python
32
+ from cartesia.tts import CartesiaTTS
33
+ import pyaudio
34
+ import os
35
+
36
+ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
37
+ voices = client.get_voices()
38
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
39
+ transcript = "Hello! Welcome to Cartesia"
40
+ model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
41
+
42
+ p = pyaudio.PyAudio()
43
+
44
+ stream = None
45
+
46
+ # Generate and stream audio
47
+ for output in client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
48
+ buffer = output["audio"]
49
+ rate = output["sampling_rate"]
50
+
51
+ if not stream:
52
+ stream = p.open(format=pyaudio.paFloat32,
53
+ channels=1,
54
+ rate=rate,
55
+ output=True)
56
+
57
+ # Write the audio data to the stream
58
+ stream.write(buffer)
59
+
60
+ stream.stop_stream()
61
+ stream.close()
62
+ p.terminate()
63
+ ```
64
+
65
+ You can also use the async client if you want to make asynchronous API calls:
66
+ ```python
67
+ from cartesia.tts import AsyncCartesiaTTS
68
+ import asyncio
69
+ import pyaudio
70
+ import os
71
+
72
+ async def write_stream():
73
+ client = AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
74
+ voices = client.get_voices()
75
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
76
+ transcript = "Hello! Welcome to Cartesia"
77
+ model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
78
+
79
+ p = pyaudio.PyAudio()
80
+
81
+ stream = None
82
+
83
+ # Generate and stream audio
84
+ async for output in await client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
85
+ buffer = output["audio"]
86
+ rate = output["sampling_rate"]
87
+
88
+ if not stream:
89
+ stream = p.open(format=pyaudio.paFloat32,
90
+ channels=1,
91
+ rate=rate,
92
+ output=True)
93
+
94
+ # Write the audio data to the stream
95
+ stream.write(buffer)
96
+
97
+ stream.stop_stream()
98
+ stream.close()
99
+ p.terminate()
100
+
101
+ asyncio.run(write_stream())
102
+ ```
103
+
104
+ If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook.
105
+ Additionally, in these notebook examples we show how to use the client as a context manager (though this is not required).
106
+
107
+ ```python
108
+ from IPython.display import Audio
109
+ import io
110
+ import os
111
+ import numpy as np
112
+
113
+ from cartesia.tts import CartesiaTTS
114
+
115
+ with CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
116
+ voices = client.get_voices()
117
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
118
+ transcript = "Hello! Welcome to Cartesia"
119
+
120
+ # Create a BytesIO object to store the audio data
121
+ audio_data = io.BytesIO()
122
+
123
+ # Generate and stream audio
124
+ for output in client.generate(transcript=transcript, voice=voice, stream=True):
125
+ buffer = output["audio"]
126
+ audio_data.write(buffer)
127
+
128
+ # Set the cursor position to the beginning of the BytesIO object
129
+ audio_data.seek(0)
130
+
131
+ # Create an Audio object from the BytesIO data
132
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=output["sampling_rate"])
133
+
134
+ # Display the Audio object
135
+ display(audio)
136
+ ```
137
+
138
+ Below is the same example using the async client:
139
+ ```python
140
+ from IPython.display import Audio
141
+ import io
142
+ import os
143
+ import numpy as np
144
+
145
+ from cartesia.tts import AsyncCartesiaTTS
146
+
147
+ async with AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
148
+ voices = client.get_voices()
149
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
150
+ transcript = "Hello! Welcome to Cartesia"
151
+
152
+ # Create a BytesIO object to store the audio data
153
+ audio_data = io.BytesIO()
154
+
155
+ # Generate and stream audio
156
+ async for output in await client.generate(transcript=transcript, voice=voice, stream=True):
157
+ buffer = output["audio"]
158
+ audio_data.write(buffer)
159
+
160
+ # Set the cursor position to the beginning of the BytesIO object
161
+ audio_data.seek(0)
162
+
163
+ # Create an Audio object from the BytesIO data
164
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=output["sampling_rate"])
165
+
166
+ # Display the Audio object
167
+ display(audio)
168
+ ```
169
+
170
+ To avoid storing your API key in the source code, we recommend doing one of the following:
171
+ 1. Use [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file.
172
+ 1. Set the `CARTESIA_API_KEY` environment variable, preferably to a secure shell init file (e.g. `~/.zshrc`, `~/.bashrc`)
@@ -0,0 +1,156 @@
1
+ # Cartesia Python API Library
2
+ The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
3
+
4
+ **Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
5
+
6
+ ## Installation
7
+ ```bash
8
+ pip install cartesia
9
+
10
+ # pip install in editable mode w/ dev dependencies
11
+ pip install -e '.[dev]'
12
+ ```
13
+
14
+ ## Usage
15
+ ```python
16
+ from cartesia.tts import CartesiaTTS
17
+ import pyaudio
18
+ import os
19
+
20
+ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
21
+ voices = client.get_voices()
22
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
23
+ transcript = "Hello! Welcome to Cartesia"
24
+ model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
25
+
26
+ p = pyaudio.PyAudio()
27
+
28
+ stream = None
29
+
30
+ # Generate and stream audio
31
+ for output in client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
32
+ buffer = output["audio"]
33
+ rate = output["sampling_rate"]
34
+
35
+ if not stream:
36
+ stream = p.open(format=pyaudio.paFloat32,
37
+ channels=1,
38
+ rate=rate,
39
+ output=True)
40
+
41
+ # Write the audio data to the stream
42
+ stream.write(buffer)
43
+
44
+ stream.stop_stream()
45
+ stream.close()
46
+ p.terminate()
47
+ ```
48
+
49
+ You can also use the async client if you want to make asynchronous API calls:
50
+ ```python
51
+ from cartesia.tts import AsyncCartesiaTTS
52
+ import asyncio
53
+ import pyaudio
54
+ import os
55
+
56
+ async def write_stream():
57
+ client = AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
58
+ voices = client.get_voices()
59
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
60
+ transcript = "Hello! Welcome to Cartesia"
61
+ model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
62
+
63
+ p = pyaudio.PyAudio()
64
+
65
+ stream = None
66
+
67
+ # Generate and stream audio
68
+ async for output in await client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
69
+ buffer = output["audio"]
70
+ rate = output["sampling_rate"]
71
+
72
+ if not stream:
73
+ stream = p.open(format=pyaudio.paFloat32,
74
+ channels=1,
75
+ rate=rate,
76
+ output=True)
77
+
78
+ # Write the audio data to the stream
79
+ stream.write(buffer)
80
+
81
+ stream.stop_stream()
82
+ stream.close()
83
+ p.terminate()
84
+
85
+ asyncio.run(write_stream())
86
+ ```
87
+
88
+ If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook.
89
+ Additionally, in these notebook examples we show how to use the client as a context manager (though this is not required).
90
+
91
+ ```python
92
+ from IPython.display import Audio
93
+ import io
94
+ import os
95
+ import numpy as np
96
+
97
+ from cartesia.tts import CartesiaTTS
98
+
99
+ with CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
100
+ voices = client.get_voices()
101
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
102
+ transcript = "Hello! Welcome to Cartesia"
103
+
104
+ # Create a BytesIO object to store the audio data
105
+ audio_data = io.BytesIO()
106
+
107
+ # Generate and stream audio
108
+ for output in client.generate(transcript=transcript, voice=voice, stream=True):
109
+ buffer = output["audio"]
110
+ audio_data.write(buffer)
111
+
112
+ # Set the cursor position to the beginning of the BytesIO object
113
+ audio_data.seek(0)
114
+
115
+ # Create an Audio object from the BytesIO data
116
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=output["sampling_rate"])
117
+
118
+ # Display the Audio object
119
+ display(audio)
120
+ ```
121
+
122
+ Below is the same example using the async client:
123
+ ```python
124
+ from IPython.display import Audio
125
+ import io
126
+ import os
127
+ import numpy as np
128
+
129
+ from cartesia.tts import AsyncCartesiaTTS
130
+
131
+ async with AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
132
+ voices = client.get_voices()
133
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
134
+ transcript = "Hello! Welcome to Cartesia"
135
+
136
+ # Create a BytesIO object to store the audio data
137
+ audio_data = io.BytesIO()
138
+
139
+ # Generate and stream audio
140
+ async for output in await client.generate(transcript=transcript, voice=voice, stream=True):
141
+ buffer = output["audio"]
142
+ audio_data.write(buffer)
143
+
144
+ # Set the cursor position to the beginning of the BytesIO object
145
+ audio_data.seek(0)
146
+
147
+ # Create an Audio object from the BytesIO data
148
+ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=output["sampling_rate"])
149
+
150
+ # Display the Audio object
151
+ display(audio)
152
+ ```
153
+
154
+ To avoid storing your API key in the source code, we recommend doing one of the following:
155
+ 1. Use [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file.
156
+ 1. Set the `CARTESIA_API_KEY` environment variable, preferably to a secure shell init file (e.g. `~/.zshrc`, `~/.bashrc`)
@@ -0,0 +1,3 @@
1
+ from cartesia.tts import AsyncCartesiaTTS, CartesiaTTS
2
+
3
+ __all__ = ["CartesiaTTS", "AsyncCartesiaTTS"]