cartesia 0.0.5rc1__tar.gz → 0.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cartesia-0.0.5rc1 → cartesia-0.0.6}/PKG-INFO +71 -49
- {cartesia-0.0.5rc1 → cartesia-0.0.6}/README.md +70 -25
- cartesia-0.0.6/cartesia/__init__.py +3 -0
- {cartesia-0.0.5rc1 → cartesia-0.0.6}/cartesia/tts.py +249 -93
- cartesia-0.0.6/cartesia/utils.py +65 -0
- cartesia-0.0.6/cartesia/version.py +1 -0
- {cartesia-0.0.5rc1 → cartesia-0.0.6}/cartesia.egg-info/PKG-INFO +71 -49
- {cartesia-0.0.5rc1 → cartesia-0.0.6}/cartesia.egg-info/SOURCES.txt +1 -0
- cartesia-0.0.6/cartesia.egg-info/requires.txt +19 -0
- cartesia-0.0.6/pyproject.toml +55 -0
- {cartesia-0.0.5rc1 → cartesia-0.0.6}/setup.py +3 -2
- {cartesia-0.0.5rc1 → cartesia-0.0.6}/tests/test_tts.py +127 -58
- cartesia-0.0.5rc1/cartesia/__init__.py +0 -3
- cartesia-0.0.5rc1/cartesia/version.py +0 -1
- cartesia-0.0.5rc1/cartesia.egg-info/requires.txt +0 -27
- cartesia-0.0.5rc1/pyproject.toml +0 -11
- {cartesia-0.0.5rc1 → cartesia-0.0.6}/cartesia.egg-info/dependency_links.txt +0 -0
- {cartesia-0.0.5rc1 → cartesia-0.0.6}/cartesia.egg-info/top_level.txt +0 -0
- {cartesia-0.0.5rc1 → cartesia-0.0.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cartesia
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.6
|
4
4
|
Summary: The official Python library for the Cartesia API.
|
5
5
|
Home-page:
|
6
6
|
Author: Cartesia, Inc.
|
@@ -10,31 +10,8 @@ Classifier: Programming Language :: Python :: 3
|
|
10
10
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
11
11
|
Requires-Python: >=3.8.0
|
12
12
|
Description-Content-Type: text/markdown
|
13
|
-
Requires-Dist: aiohttp
|
14
|
-
Requires-Dist: httpx
|
15
|
-
Requires-Dist: pytest-asyncio
|
16
|
-
Requires-Dist: requests
|
17
|
-
Requires-Dist: websockets
|
18
13
|
Provides-Extra: dev
|
19
|
-
Requires-Dist: pre-commit; extra == "dev"
|
20
|
-
Requires-Dist: docformatter; extra == "dev"
|
21
|
-
Requires-Dist: black==24.1.1; extra == "dev"
|
22
|
-
Requires-Dist: isort==5.13.2; extra == "dev"
|
23
|
-
Requires-Dist: flake8==7.0.0; extra == "dev"
|
24
|
-
Requires-Dist: flake8-bugbear==24.2.6; extra == "dev"
|
25
|
-
Requires-Dist: pytest>=8.0.2; extra == "dev"
|
26
|
-
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
27
|
-
Requires-Dist: twine; extra == "dev"
|
28
14
|
Provides-Extra: all
|
29
|
-
Requires-Dist: pre-commit; extra == "all"
|
30
|
-
Requires-Dist: docformatter; extra == "all"
|
31
|
-
Requires-Dist: black==24.1.1; extra == "all"
|
32
|
-
Requires-Dist: isort==5.13.2; extra == "all"
|
33
|
-
Requires-Dist: flake8==7.0.0; extra == "all"
|
34
|
-
Requires-Dist: flake8-bugbear==24.2.6; extra == "all"
|
35
|
-
Requires-Dist: pytest>=8.0.2; extra == "all"
|
36
|
-
Requires-Dist: pytest-cov>=4.1.0; extra == "all"
|
37
|
-
Requires-Dist: twine; extra == "all"
|
38
15
|
|
39
16
|
|
40
17
|
# Cartesia Python API Library
|
@@ -60,13 +37,14 @@ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
|
|
60
37
|
voices = client.get_voices()
|
61
38
|
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
62
39
|
transcript = "Hello! Welcome to Cartesia"
|
40
|
+
model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
|
63
41
|
|
64
42
|
p = pyaudio.PyAudio()
|
65
43
|
|
66
44
|
stream = None
|
67
45
|
|
68
46
|
# Generate and stream audio
|
69
|
-
for output in client.generate(transcript=transcript, voice=voice, stream=True):
|
47
|
+
for output in client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
|
70
48
|
buffer = output["audio"]
|
71
49
|
rate = output["sampling_rate"]
|
72
50
|
|
@@ -84,26 +62,68 @@ stream.close()
|
|
84
62
|
p.terminate()
|
85
63
|
```
|
86
64
|
|
87
|
-
|
65
|
+
You can also use the async client if you want to make asynchronous API calls:
|
66
|
+
```python
|
67
|
+
from cartesia.tts import AsyncCartesiaTTS
|
68
|
+
import asyncio
|
69
|
+
import pyaudio
|
70
|
+
import os
|
71
|
+
|
72
|
+
async def write_stream():
|
73
|
+
client = AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
|
74
|
+
voices = client.get_voices()
|
75
|
+
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
76
|
+
transcript = "Hello! Welcome to Cartesia"
|
77
|
+
model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
|
78
|
+
|
79
|
+
p = pyaudio.PyAudio()
|
80
|
+
|
81
|
+
stream = None
|
82
|
+
|
83
|
+
# Generate and stream audio
|
84
|
+
async for output in await client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
|
85
|
+
buffer = output["audio"]
|
86
|
+
rate = output["sampling_rate"]
|
87
|
+
|
88
|
+
if not stream:
|
89
|
+
stream = p.open(format=pyaudio.paFloat32,
|
90
|
+
channels=1,
|
91
|
+
rate=rate,
|
92
|
+
output=True)
|
93
|
+
|
94
|
+
# Write the audio data to the stream
|
95
|
+
stream.write(buffer)
|
96
|
+
|
97
|
+
stream.stop_stream()
|
98
|
+
stream.close()
|
99
|
+
p.terminate()
|
100
|
+
|
101
|
+
asyncio.run(write_stream())
|
102
|
+
```
|
103
|
+
|
104
|
+
If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook.
|
105
|
+
Additionally, in these notebook examples we show how to use the client as a context manager (though this is not required).
|
88
106
|
|
89
107
|
```python
|
90
|
-
from cartesia.tts import CartesiaTTS
|
91
108
|
from IPython.display import Audio
|
92
109
|
import io
|
93
110
|
import os
|
111
|
+
import numpy as np
|
94
112
|
|
95
|
-
|
96
|
-
voices = client.get_voices()
|
97
|
-
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
98
|
-
transcript = "Hello! Welcome to Cartesia"
|
113
|
+
from cartesia.tts import CartesiaTTS
|
99
114
|
|
100
|
-
|
101
|
-
|
115
|
+
with CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
|
116
|
+
voices = client.get_voices()
|
117
|
+
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
118
|
+
transcript = "Hello! Welcome to Cartesia"
|
102
119
|
|
103
|
-
#
|
104
|
-
|
105
|
-
|
106
|
-
|
120
|
+
# Create a BytesIO object to store the audio data
|
121
|
+
audio_data = io.BytesIO()
|
122
|
+
|
123
|
+
# Generate and stream audio
|
124
|
+
for output in client.generate(transcript=transcript, voice=voice, stream=True):
|
125
|
+
buffer = output["audio"]
|
126
|
+
audio_data.write(buffer)
|
107
127
|
|
108
128
|
# Set the cursor position to the beginning of the BytesIO object
|
109
129
|
audio_data.seek(0)
|
@@ -115,25 +135,27 @@ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=output["s
|
|
115
135
|
display(audio)
|
116
136
|
```
|
117
137
|
|
118
|
-
|
138
|
+
Below is the same example using the async client:
|
119
139
|
```python
|
120
|
-
from cartesia.tts import AsyncCartesiaTTS
|
121
140
|
from IPython.display import Audio
|
122
141
|
import io
|
123
142
|
import os
|
143
|
+
import numpy as np
|
124
144
|
|
125
|
-
|
126
|
-
voices = client.get_voices()
|
127
|
-
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
128
|
-
transcript = "Hello! Welcome to Cartesia"
|
145
|
+
from cartesia.tts import AsyncCartesiaTTS
|
129
146
|
|
130
|
-
|
131
|
-
|
147
|
+
async with AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
|
148
|
+
voices = client.get_voices()
|
149
|
+
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
150
|
+
transcript = "Hello! Welcome to Cartesia"
|
132
151
|
|
133
|
-
#
|
134
|
-
|
135
|
-
|
136
|
-
|
152
|
+
# Create a BytesIO object to store the audio data
|
153
|
+
audio_data = io.BytesIO()
|
154
|
+
|
155
|
+
# Generate and stream audio
|
156
|
+
async for output in await client.generate(transcript=transcript, voice=voice, stream=True):
|
157
|
+
buffer = output["audio"]
|
158
|
+
audio_data.write(buffer)
|
137
159
|
|
138
160
|
# Set the cursor position to the beginning of the BytesIO object
|
139
161
|
audio_data.seek(0)
|
@@ -21,13 +21,14 @@ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
|
|
21
21
|
voices = client.get_voices()
|
22
22
|
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
23
23
|
transcript = "Hello! Welcome to Cartesia"
|
24
|
+
model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
|
24
25
|
|
25
26
|
p = pyaudio.PyAudio()
|
26
27
|
|
27
28
|
stream = None
|
28
29
|
|
29
30
|
# Generate and stream audio
|
30
|
-
for output in client.generate(transcript=transcript, voice=voice, stream=True):
|
31
|
+
for output in client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
|
31
32
|
buffer = output["audio"]
|
32
33
|
rate = output["sampling_rate"]
|
33
34
|
|
@@ -45,26 +46,68 @@ stream.close()
|
|
45
46
|
p.terminate()
|
46
47
|
```
|
47
48
|
|
48
|
-
|
49
|
+
You can also use the async client if you want to make asynchronous API calls:
|
50
|
+
```python
|
51
|
+
from cartesia.tts import AsyncCartesiaTTS
|
52
|
+
import asyncio
|
53
|
+
import pyaudio
|
54
|
+
import os
|
55
|
+
|
56
|
+
async def write_stream():
|
57
|
+
client = AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
|
58
|
+
voices = client.get_voices()
|
59
|
+
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
60
|
+
transcript = "Hello! Welcome to Cartesia"
|
61
|
+
model_id = "genial-planet-1346" # (Optional) We'll specify a default if you don't have a specific model in mind
|
62
|
+
|
63
|
+
p = pyaudio.PyAudio()
|
64
|
+
|
65
|
+
stream = None
|
66
|
+
|
67
|
+
# Generate and stream audio
|
68
|
+
async for output in await client.generate(transcript=transcript, voice=voice, model_id=model_id, stream=True):
|
69
|
+
buffer = output["audio"]
|
70
|
+
rate = output["sampling_rate"]
|
71
|
+
|
72
|
+
if not stream:
|
73
|
+
stream = p.open(format=pyaudio.paFloat32,
|
74
|
+
channels=1,
|
75
|
+
rate=rate,
|
76
|
+
output=True)
|
77
|
+
|
78
|
+
# Write the audio data to the stream
|
79
|
+
stream.write(buffer)
|
80
|
+
|
81
|
+
stream.stop_stream()
|
82
|
+
stream.close()
|
83
|
+
p.terminate()
|
84
|
+
|
85
|
+
asyncio.run(write_stream())
|
86
|
+
```
|
87
|
+
|
88
|
+
If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook.
|
89
|
+
Additionally, in these notebook examples we show how to use the client as a context manager (though this is not required).
|
49
90
|
|
50
91
|
```python
|
51
|
-
from cartesia.tts import CartesiaTTS
|
52
92
|
from IPython.display import Audio
|
53
93
|
import io
|
54
94
|
import os
|
95
|
+
import numpy as np
|
55
96
|
|
56
|
-
|
57
|
-
voices = client.get_voices()
|
58
|
-
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
59
|
-
transcript = "Hello! Welcome to Cartesia"
|
97
|
+
from cartesia.tts import CartesiaTTS
|
60
98
|
|
61
|
-
|
62
|
-
|
99
|
+
with CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
|
100
|
+
voices = client.get_voices()
|
101
|
+
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
102
|
+
transcript = "Hello! Welcome to Cartesia"
|
63
103
|
|
64
|
-
#
|
65
|
-
|
66
|
-
|
67
|
-
|
104
|
+
# Create a BytesIO object to store the audio data
|
105
|
+
audio_data = io.BytesIO()
|
106
|
+
|
107
|
+
# Generate and stream audio
|
108
|
+
for output in client.generate(transcript=transcript, voice=voice, stream=True):
|
109
|
+
buffer = output["audio"]
|
110
|
+
audio_data.write(buffer)
|
68
111
|
|
69
112
|
# Set the cursor position to the beginning of the BytesIO object
|
70
113
|
audio_data.seek(0)
|
@@ -76,25 +119,27 @@ audio = Audio(np.frombuffer(audio_data.read(), dtype=np.float32), rate=output["s
|
|
76
119
|
display(audio)
|
77
120
|
```
|
78
121
|
|
79
|
-
|
122
|
+
Below is the same example using the async client:
|
80
123
|
```python
|
81
|
-
from cartesia.tts import AsyncCartesiaTTS
|
82
124
|
from IPython.display import Audio
|
83
125
|
import io
|
84
126
|
import os
|
127
|
+
import numpy as np
|
85
128
|
|
86
|
-
|
87
|
-
voices = client.get_voices()
|
88
|
-
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
89
|
-
transcript = "Hello! Welcome to Cartesia"
|
129
|
+
from cartesia.tts import AsyncCartesiaTTS
|
90
130
|
|
91
|
-
|
92
|
-
|
131
|
+
async with AsyncCartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY")) as client:
|
132
|
+
voices = client.get_voices()
|
133
|
+
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
134
|
+
transcript = "Hello! Welcome to Cartesia"
|
93
135
|
|
94
|
-
#
|
95
|
-
|
96
|
-
|
97
|
-
|
136
|
+
# Create a BytesIO object to store the audio data
|
137
|
+
audio_data = io.BytesIO()
|
138
|
+
|
139
|
+
# Generate and stream audio
|
140
|
+
async for output in await client.generate(transcript=transcript, voice=voice, stream=True):
|
141
|
+
buffer = output["audio"]
|
142
|
+
audio_data.write(buffer)
|
98
143
|
|
99
144
|
# Set the cursor position to the beginning of the BytesIO object
|
100
145
|
audio_data.seek(0)
|