cartesia 0.0.0__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cartesia-0.0.3/PKG-INFO +113 -0
- cartesia-0.0.3/README.md +79 -0
- cartesia-0.0.3/cartesia/__init__.py +3 -0
- cartesia-0.0.3/cartesia/tts.py +309 -0
- cartesia-0.0.3/cartesia/version.py +1 -0
- cartesia-0.0.3/cartesia.egg-info/PKG-INFO +113 -0
- cartesia-0.0.3/cartesia.egg-info/SOURCES.txt +12 -0
- cartesia-0.0.3/cartesia.egg-info/requires.txt +22 -0
- cartesia-0.0.3/cartesia.egg-info/top_level.txt +1 -0
- cartesia-0.0.3/pyproject.toml +11 -0
- cartesia-0.0.3/setup.py +255 -0
- cartesia-0.0.3/tests/test_tts.py +133 -0
- cartesia-0.0.0/PKG-INFO +0 -10
- cartesia-0.0.0/cartesia.egg-info/PKG-INFO +0 -10
- cartesia-0.0.0/cartesia.egg-info/SOURCES.txt +0 -5
- cartesia-0.0.0/cartesia.egg-info/top_level.txt +0 -1
- cartesia-0.0.0/setup.py +0 -17
- {cartesia-0.0.0 → cartesia-0.0.3}/cartesia.egg-info/dependency_links.txt +0 -0
- {cartesia-0.0.0 → cartesia-0.0.3}/setup.cfg +0 -0
cartesia-0.0.3/PKG-INFO
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: cartesia
|
3
|
+
Version: 0.0.3
|
4
|
+
Summary: The official Python library for the Cartesia API.
|
5
|
+
Home-page:
|
6
|
+
Author: Cartesia, Inc.
|
7
|
+
Author-email: support@cartesia.ai
|
8
|
+
Classifier: Programming Language :: Python
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
10
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
11
|
+
Requires-Python: >=3.8.0
|
12
|
+
Description-Content-Type: text/markdown
|
13
|
+
Requires-Dist: websockets
|
14
|
+
Requires-Dist: requests
|
15
|
+
Provides-Extra: dev
|
16
|
+
Requires-Dist: pre-commit; extra == "dev"
|
17
|
+
Requires-Dist: docformatter; extra == "dev"
|
18
|
+
Requires-Dist: black==24.1.1; extra == "dev"
|
19
|
+
Requires-Dist: isort==5.13.2; extra == "dev"
|
20
|
+
Requires-Dist: flake8==7.0.0; extra == "dev"
|
21
|
+
Requires-Dist: flake8-bugbear==24.2.6; extra == "dev"
|
22
|
+
Requires-Dist: pytest>=8.0.2; extra == "dev"
|
23
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
24
|
+
Provides-Extra: all
|
25
|
+
Requires-Dist: pre-commit; extra == "all"
|
26
|
+
Requires-Dist: docformatter; extra == "all"
|
27
|
+
Requires-Dist: black==24.1.1; extra == "all"
|
28
|
+
Requires-Dist: isort==5.13.2; extra == "all"
|
29
|
+
Requires-Dist: flake8==7.0.0; extra == "all"
|
30
|
+
Requires-Dist: flake8-bugbear==24.2.6; extra == "all"
|
31
|
+
Requires-Dist: pytest>=8.0.2; extra == "all"
|
32
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "all"
|
33
|
+
|
34
|
+
|
35
|
+
# Cartesia Python API Library
|
36
|
+
The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
|
37
|
+
|
38
|
+
**Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
|
39
|
+
|
40
|
+
## Installation
|
41
|
+
```bash
|
42
|
+
pip install cartesia
|
43
|
+
|
44
|
+
# pip install in editable mode w/ dev dependencies
|
45
|
+
pip install -e '.[dev]'
|
46
|
+
```
|
47
|
+
|
48
|
+
## Usage
|
49
|
+
```python
|
50
|
+
from cartesia.tts import CartesiaTTS
|
51
|
+
import pyaudio
|
52
|
+
import os
|
53
|
+
|
54
|
+
client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
|
55
|
+
voices = client.get_voices()
|
56
|
+
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
57
|
+
transcript = "Hello! Welcome to Cartesia"
|
58
|
+
|
59
|
+
p = pyaudio.PyAudio()
|
60
|
+
|
61
|
+
stream = None
|
62
|
+
|
63
|
+
# Generate and stream audio
|
64
|
+
for output in client.generate(transcript=transcript, voice=voice, stream=True):
|
65
|
+
buffer = output["audio"]
|
66
|
+
rate = output["sampling_rate"]
|
67
|
+
|
68
|
+
if not stream:
|
69
|
+
stream = p.open(format=pyaudio.paFloat32,
|
70
|
+
channels=1,
|
71
|
+
rate=rate,
|
72
|
+
output=True)
|
73
|
+
|
74
|
+
# Write the audio data to the stream
|
75
|
+
stream.write(buffer)
|
76
|
+
|
77
|
+
stream.stop_stream()
|
78
|
+
stream.close()
|
79
|
+
p.terminate()
|
80
|
+
```
|
81
|
+
|
82
|
+
If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook. Here's an example:
|
83
|
+
|
84
|
+
```python
|
85
|
+
from cartesia.tts import CartesiaTTS
|
86
|
+
from IPython.display import Audio
|
87
|
+
import io
|
88
|
+
import os
|
89
|
+
|
90
|
+
client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
|
91
|
+
voices = client.get_voices()
|
92
|
+
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
93
|
+
transcript = "Hello! Welcome to Cartesia"
|
94
|
+
|
95
|
+
# Create a BytesIO object to store the audio data
|
96
|
+
audio_data = io.BytesIO()
|
97
|
+
|
98
|
+
# Generate and stream audio
|
99
|
+
for output in client.generate(transcript=transcript, voice=voice, stream=True):
|
100
|
+
buffer = output["audio"]
|
101
|
+
audio_data.write(buffer)
|
102
|
+
|
103
|
+
# Set the cursor position to the beginning of the BytesIO object
|
104
|
+
audio_data.seek(0)
|
105
|
+
|
106
|
+
# Create an Audio object from the BytesIO data
|
107
|
+
audio = Audio(audio_data, rate=output["sampling_rate"])
|
108
|
+
|
109
|
+
# Display the Audio object
|
110
|
+
display(audio)
|
111
|
+
```
|
112
|
+
|
113
|
+
We recommend using [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file so that your API Key is not stored in the source code.
|
cartesia-0.0.3/README.md
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# Cartesia Python API Library
|
2
|
+
The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
|
3
|
+
|
4
|
+
**Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
```bash
|
8
|
+
pip install cartesia
|
9
|
+
|
10
|
+
# pip install in editable mode w/ dev dependencies
|
11
|
+
pip install -e '.[dev]'
|
12
|
+
```
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
```python
|
16
|
+
from cartesia.tts import CartesiaTTS
|
17
|
+
import pyaudio
|
18
|
+
import os
|
19
|
+
|
20
|
+
client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
|
21
|
+
voices = client.get_voices()
|
22
|
+
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
23
|
+
transcript = "Hello! Welcome to Cartesia"
|
24
|
+
|
25
|
+
p = pyaudio.PyAudio()
|
26
|
+
|
27
|
+
stream = None
|
28
|
+
|
29
|
+
# Generate and stream audio
|
30
|
+
for output in client.generate(transcript=transcript, voice=voice, stream=True):
|
31
|
+
buffer = output["audio"]
|
32
|
+
rate = output["sampling_rate"]
|
33
|
+
|
34
|
+
if not stream:
|
35
|
+
stream = p.open(format=pyaudio.paFloat32,
|
36
|
+
channels=1,
|
37
|
+
rate=rate,
|
38
|
+
output=True)
|
39
|
+
|
40
|
+
# Write the audio data to the stream
|
41
|
+
stream.write(buffer)
|
42
|
+
|
43
|
+
stream.stop_stream()
|
44
|
+
stream.close()
|
45
|
+
p.terminate()
|
46
|
+
```
|
47
|
+
|
48
|
+
If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook. Here's an example:
|
49
|
+
|
50
|
+
```python
|
51
|
+
from cartesia.tts import CartesiaTTS
|
52
|
+
from IPython.display import Audio
|
53
|
+
import io
|
54
|
+
import os
|
55
|
+
|
56
|
+
client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
|
57
|
+
voices = client.get_voices()
|
58
|
+
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
59
|
+
transcript = "Hello! Welcome to Cartesia"
|
60
|
+
|
61
|
+
# Create a BytesIO object to store the audio data
|
62
|
+
audio_data = io.BytesIO()
|
63
|
+
|
64
|
+
# Generate and stream audio
|
65
|
+
for output in client.generate(transcript=transcript, voice=voice, stream=True):
|
66
|
+
buffer = output["audio"]
|
67
|
+
audio_data.write(buffer)
|
68
|
+
|
69
|
+
# Set the cursor position to the beginning of the BytesIO object
|
70
|
+
audio_data.seek(0)
|
71
|
+
|
72
|
+
# Create an Audio object from the BytesIO data
|
73
|
+
audio = Audio(audio_data, rate=output["sampling_rate"])
|
74
|
+
|
75
|
+
# Display the Audio object
|
76
|
+
display(audio)
|
77
|
+
```
|
78
|
+
|
79
|
+
We recommend using [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file so that your API Key is not stored in the source code.
|
@@ -0,0 +1,309 @@
|
|
1
|
+
import base64
|
2
|
+
import json
|
3
|
+
import os
|
4
|
+
import uuid
|
5
|
+
from typing import Any, Dict, Generator, List, Optional, TypedDict, Union
|
6
|
+
|
7
|
+
import requests
|
8
|
+
from websockets.sync.client import connect
|
9
|
+
|
10
|
+
DEFAULT_MODEL_ID = "genial-planet-1346"
|
11
|
+
DEFAULT_BASE_URL = "api.cartesia.ai"
|
12
|
+
DEFAULT_API_VERSION = "v0"
|
13
|
+
|
14
|
+
|
15
|
+
class AudioOutput(TypedDict):
|
16
|
+
audio: bytes
|
17
|
+
sampling_rate: int
|
18
|
+
|
19
|
+
|
20
|
+
Embedding = List[float]
|
21
|
+
|
22
|
+
|
23
|
+
class VoiceMetadata(TypedDict):
|
24
|
+
id: str
|
25
|
+
name: str
|
26
|
+
description: str
|
27
|
+
embedding: Optional[Embedding]
|
28
|
+
|
29
|
+
|
30
|
+
class CartesiaTTS:
|
31
|
+
"""The client for Cartesia's text-to-speech library.
|
32
|
+
|
33
|
+
This client contains methods to interact with the Cartesia text-to-speech API.
|
34
|
+
The API offers
|
35
|
+
|
36
|
+
Examples:
|
37
|
+
|
38
|
+
>>> client = CartesiaTTS()
|
39
|
+
|
40
|
+
# Load available voices and their metadata (excluding the embeddings).
|
41
|
+
# Embeddings are fetched with `get_voice_embedding`. This avoids preloading
|
42
|
+
# all of the embeddings, which can be expensive if there are a lot of voices.
|
43
|
+
>>> voices = client.get_voices()
|
44
|
+
>>> embedding = client.get_voice_embedding(voice_id=voices["Milo"]["id"])
|
45
|
+
>>> audio = client.generate(transcript="Hello world!", voice=embedding)
|
46
|
+
|
47
|
+
# Preload all available voices and their embeddings if you plan on reusing
|
48
|
+
# all of the embeddings often.
|
49
|
+
>>> voices = client.get_voices(skip_embeddings=False)
|
50
|
+
>>> embedding = voices["Milo"]["embedding"]
|
51
|
+
>>> audio = client.generate(transcript="Hello world!", voice=embedding)
|
52
|
+
|
53
|
+
# Generate audio stream
|
54
|
+
>>> for audio_chunk in client.generate(transcript="Hello world!", voice=embedding, stream=True):
|
55
|
+
... audio, sr = audio_chunk["audio"], audio_chunk["sampling_rate"]
|
56
|
+
"""
|
57
|
+
|
58
|
+
def __init__(self, *, api_key: str = None):
|
59
|
+
"""
|
60
|
+
Args:
|
61
|
+
api_key: The API key to use for authorization.
|
62
|
+
If not specified, the API key will be read from the environment variable
|
63
|
+
`CARTESIA_API_KEY`.
|
64
|
+
"""
|
65
|
+
self.base_url = os.environ.get("CARTESIA_BASE_URL", DEFAULT_BASE_URL)
|
66
|
+
self.api_key = api_key or os.environ.get("CARTESIA_API_KEY")
|
67
|
+
self.api_version = os.environ.get("CARTESIA_API_VERSION", DEFAULT_API_VERSION)
|
68
|
+
self.headers = {"X-API-Key": self.api_key, "Content-Type": "application/json"}
|
69
|
+
self.websocket = None
|
70
|
+
self.refresh_websocket()
|
71
|
+
|
72
|
+
def get_voices(self, skip_embeddings: bool = True) -> Dict[str, VoiceMetadata]:
|
73
|
+
"""Returns a mapping from voice name -> voice metadata.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
skip_embeddings: Whether to skip returning the embeddings.
|
77
|
+
It is recommended to skip if you only want to see what
|
78
|
+
voices are available, since loading embeddings for all your voices can be expensive.
|
79
|
+
You can then use ``get_voice_embedding`` to get the embeddings for the voices you are
|
80
|
+
interested in.
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
A mapping from voice name -> voice metadata.
|
84
|
+
|
85
|
+
Note:
|
86
|
+
If the voice name is not unique, there is undefined behavior as to which
|
87
|
+
voice will correspond to the name. To be more thorough, look at the web
|
88
|
+
client to find the `voice_id` for the voice you are looking for.
|
89
|
+
|
90
|
+
Usage:
|
91
|
+
>>> client = CartesiaTTS()
|
92
|
+
>>> voices = client.get_voices()
|
93
|
+
>>> voices
|
94
|
+
{
|
95
|
+
"Jane": {
|
96
|
+
"id": "c1d1d3a8-6f4e-4b3f-8b3e-2e1b3e1b3e1b",
|
97
|
+
"name": "Jane",
|
98
|
+
}
|
99
|
+
>>> embedding = client.get_voice_embedding(voice_id=voices["Jane"]["id"])
|
100
|
+
>>> audio = client.generate(transcript="Hello world!", voice=embedding)
|
101
|
+
"""
|
102
|
+
params = {"select": "id, name, description"} if skip_embeddings else None
|
103
|
+
response = requests.get(f"{self._http_url()}/voices", headers=self.headers, params=params)
|
104
|
+
|
105
|
+
if response.status_code != 200:
|
106
|
+
raise ValueError(f"Failed to get voices. Error: {response.text}")
|
107
|
+
|
108
|
+
voices = response.json()
|
109
|
+
# TODO: Update the API to return the embedding as a list of floats rather than string.
|
110
|
+
if not skip_embeddings:
|
111
|
+
for voice in voices:
|
112
|
+
voice["embedding"] = json.loads(voice["embedding"])
|
113
|
+
return {voice["name"]: voice for voice in voices}
|
114
|
+
|
115
|
+
def get_voice_embedding(
|
116
|
+
self, *, voice_id: str = None, filepath: str = None, link: str = None
|
117
|
+
) -> Embedding:
|
118
|
+
"""Get a voice embedding from voice_id, a filepath or YouTube url.
|
119
|
+
|
120
|
+
Args:
|
121
|
+
voice_id: The voice id.
|
122
|
+
filepath: Path to audio file from which to get the audio.
|
123
|
+
link: The url to get the audio from. Currently only supports youtube shared urls.
|
124
|
+
|
125
|
+
Returns:
|
126
|
+
The voice embedding.
|
127
|
+
|
128
|
+
Raises:
|
129
|
+
ValueError: If more than one of `voice_id`, `filepath` or `link` is specified.
|
130
|
+
Only one should be specified.
|
131
|
+
"""
|
132
|
+
if sum(bool(x) for x in (voice_id, filepath, link)) != 1:
|
133
|
+
raise ValueError("Exactly one of `voice_id`, `filepath` or `url` should be specified.")
|
134
|
+
|
135
|
+
if voice_id:
|
136
|
+
url = f"{self._http_url()}/voices/embedding/{voice_id}"
|
137
|
+
response = requests.get(url, headers=self.headers)
|
138
|
+
elif filepath:
|
139
|
+
url = f"{self._http_url()}/voices/clone/clip"
|
140
|
+
files = {"clip": open(filepath, "rb")}
|
141
|
+
headers = self.headers.copy()
|
142
|
+
# The default content type of JSON is incorrect for file uploads
|
143
|
+
headers.pop("Content-Type")
|
144
|
+
response = requests.post(url, headers=headers, files=files)
|
145
|
+
elif link:
|
146
|
+
url = f"{self._http_url()}/voices/clone/url"
|
147
|
+
params = {"link": link}
|
148
|
+
response = requests.post(url, headers=self.headers, params=params)
|
149
|
+
|
150
|
+
if response.status_code != 200:
|
151
|
+
raise ValueError(
|
152
|
+
f"Failed to clone voice. Status Code: {response.status_code}\n"
|
153
|
+
f"Error: {response.text}"
|
154
|
+
)
|
155
|
+
|
156
|
+
# Handle successful response
|
157
|
+
out = response.json()
|
158
|
+
if isinstance(out["embedding"], str):
|
159
|
+
out["embedding"] = json.loads(out["embedding"])
|
160
|
+
return out["embedding"]
|
161
|
+
|
162
|
+
def refresh_websocket(self):
|
163
|
+
"""Refresh the websocket connection.
|
164
|
+
|
165
|
+
Note:
|
166
|
+
The connection is synchronous.
|
167
|
+
"""
|
168
|
+
if self.websocket and not self._is_websocket_closed():
|
169
|
+
self.websocket.close()
|
170
|
+
self.websocket = connect(
|
171
|
+
f"{self._ws_url()}/audio/websocket?api_key={self.api_key}",
|
172
|
+
close_timeout=None,
|
173
|
+
)
|
174
|
+
|
175
|
+
def _is_websocket_closed(self):
|
176
|
+
return self.websocket.socket.fileno() == -1
|
177
|
+
|
178
|
+
def _check_inputs(
|
179
|
+
self, transcript: str, duration: Optional[float], chunk_time: Optional[float]
|
180
|
+
):
|
181
|
+
if chunk_time is not None:
|
182
|
+
if chunk_time < 0.1 or chunk_time > 0.5:
|
183
|
+
raise ValueError("`chunk_time` must be between 0.1 and 0.5")
|
184
|
+
|
185
|
+
if chunk_time is not None and duration is not None:
|
186
|
+
if duration < chunk_time:
|
187
|
+
raise ValueError("`duration` must be greater than chunk_time")
|
188
|
+
|
189
|
+
if transcript.strip() == "":
|
190
|
+
raise ValueError("`transcript` must be non empty")
|
191
|
+
|
192
|
+
def generate(
|
193
|
+
self,
|
194
|
+
*,
|
195
|
+
transcript: str,
|
196
|
+
duration: int = None,
|
197
|
+
chunk_time: float = None,
|
198
|
+
voice: Embedding = None,
|
199
|
+
stream: bool = False,
|
200
|
+
websocket: bool = True,
|
201
|
+
) -> Union[AudioOutput, Generator[AudioOutput, None, None]]:
|
202
|
+
"""Generate audio from a transcript.
|
203
|
+
|
204
|
+
Args:
|
205
|
+
transcript: The text to generate audio for.
|
206
|
+
duration: The maximum duration of the audio in seconds.
|
207
|
+
chunk_time: How long each audio segment should be in seconds.
|
208
|
+
This should not need to be adjusted.
|
209
|
+
voice: The voice to use for generating audio.
|
210
|
+
This can either be a voice id (string) or an embedding vector (List[float]).
|
211
|
+
stream: Whether to stream the audio or not.
|
212
|
+
If ``True`` this function returns a generator.
|
213
|
+
websocket: Whether to use a websocket for streaming audio.
|
214
|
+
Using the websocket reduces latency by pre-poning the handshake.
|
215
|
+
|
216
|
+
Returns:
|
217
|
+
A generator if `stream` is True, otherwise a dictionary.
|
218
|
+
Dictionary from both generator and non-generator return types have the following keys:
|
219
|
+
* "audio": The audio as a bytes buffer.
|
220
|
+
* "sampling_rate": The sampling rate of the audio.
|
221
|
+
"""
|
222
|
+
self._check_inputs(transcript, duration, chunk_time)
|
223
|
+
|
224
|
+
body = dict(transcript=transcript, model_id=DEFAULT_MODEL_ID)
|
225
|
+
|
226
|
+
optional_body = dict(
|
227
|
+
duration=duration,
|
228
|
+
chunk_time=chunk_time,
|
229
|
+
voice=voice,
|
230
|
+
)
|
231
|
+
body.update({k: v for k, v in optional_body.items() if v is not None})
|
232
|
+
|
233
|
+
if websocket:
|
234
|
+
generator = self._generate_ws(body)
|
235
|
+
else:
|
236
|
+
generator = self._generate_http(body)
|
237
|
+
|
238
|
+
if stream:
|
239
|
+
return generator
|
240
|
+
|
241
|
+
chunks = []
|
242
|
+
sampling_rate = None
|
243
|
+
for chunk in generator:
|
244
|
+
if sampling_rate is None:
|
245
|
+
sampling_rate = chunk["sampling_rate"]
|
246
|
+
chunks.append(chunk["audio"])
|
247
|
+
|
248
|
+
return {"audio": b"".join(chunks), "sampling_rate": sampling_rate}
|
249
|
+
|
250
|
+
def _generate_http(self, body: Dict[str, Any]):
|
251
|
+
response = requests.post(
|
252
|
+
f"{self._http_url()}/audio/stream",
|
253
|
+
stream=True,
|
254
|
+
data=json.dumps(body),
|
255
|
+
headers=self.headers,
|
256
|
+
)
|
257
|
+
if response.status_code != 200:
|
258
|
+
raise ValueError(f"Failed to generate audio. {response.text}")
|
259
|
+
|
260
|
+
buffer = ""
|
261
|
+
for chunk_bytes in response.iter_content(chunk_size=None):
|
262
|
+
buffer += chunk_bytes.decode("utf-8")
|
263
|
+
while "{" in buffer and "}" in buffer:
|
264
|
+
start_index = buffer.find("{")
|
265
|
+
end_index = buffer.find("}", start_index)
|
266
|
+
if start_index != -1 and end_index != -1:
|
267
|
+
try:
|
268
|
+
chunk_json = json.loads(buffer[start_index : end_index + 1])
|
269
|
+
audio = base64.b64decode(chunk_json["data"])
|
270
|
+
yield {"audio": audio, "sampling_rate": chunk_json["sampling_rate"]}
|
271
|
+
buffer = buffer[end_index + 1 :]
|
272
|
+
except json.JSONDecodeError:
|
273
|
+
break
|
274
|
+
|
275
|
+
if buffer:
|
276
|
+
try:
|
277
|
+
chunk_json = json.loads(buffer)
|
278
|
+
audio = base64.b64decode(chunk_json["data"])
|
279
|
+
yield {"audio": audio, "sampling_rate": chunk_json["sampling_rate"]}
|
280
|
+
except json.JSONDecodeError:
|
281
|
+
pass
|
282
|
+
|
283
|
+
def _generate_ws(self, body: Dict[str, Any]):
|
284
|
+
if not self.websocket or self._is_websocket_closed():
|
285
|
+
self.refresh_websocket()
|
286
|
+
|
287
|
+
self.websocket.send(json.dumps({"data": body, "context_id": uuid.uuid4().hex}))
|
288
|
+
try:
|
289
|
+
response = json.loads(self.websocket.recv())
|
290
|
+
while not response["done"]:
|
291
|
+
audio = base64.b64decode(response["data"])
|
292
|
+
# print("timing", time.perf_counter() - start)
|
293
|
+
yield {"audio": audio, "sampling_rate": response["sampling_rate"]}
|
294
|
+
|
295
|
+
response = json.loads(self.websocket.recv())
|
296
|
+
except Exception:
|
297
|
+
raise RuntimeError(f"Failed to generate audio. {response}")
|
298
|
+
|
299
|
+
def _http_url(self):
|
300
|
+
prefix = "http" if "localhost" in self.base_url else "https"
|
301
|
+
return f"{prefix}://{self.base_url}/{self.api_version}"
|
302
|
+
|
303
|
+
def _ws_url(self):
|
304
|
+
prefix = "ws" if "localhost" in self.base_url else "wss"
|
305
|
+
return f"{prefix}://{self.base_url}/{self.api_version}"
|
306
|
+
|
307
|
+
def __del__(self):
|
308
|
+
if self.websocket.socket.fileno() > -1:
|
309
|
+
self.websocket.close()
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.0.3"
|
@@ -0,0 +1,113 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: cartesia
|
3
|
+
Version: 0.0.3
|
4
|
+
Summary: The official Python library for the Cartesia API.
|
5
|
+
Home-page:
|
6
|
+
Author: Cartesia, Inc.
|
7
|
+
Author-email: support@cartesia.ai
|
8
|
+
Classifier: Programming Language :: Python
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
10
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
11
|
+
Requires-Python: >=3.8.0
|
12
|
+
Description-Content-Type: text/markdown
|
13
|
+
Requires-Dist: websockets
|
14
|
+
Requires-Dist: requests
|
15
|
+
Provides-Extra: dev
|
16
|
+
Requires-Dist: pre-commit; extra == "dev"
|
17
|
+
Requires-Dist: docformatter; extra == "dev"
|
18
|
+
Requires-Dist: black==24.1.1; extra == "dev"
|
19
|
+
Requires-Dist: isort==5.13.2; extra == "dev"
|
20
|
+
Requires-Dist: flake8==7.0.0; extra == "dev"
|
21
|
+
Requires-Dist: flake8-bugbear==24.2.6; extra == "dev"
|
22
|
+
Requires-Dist: pytest>=8.0.2; extra == "dev"
|
23
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
24
|
+
Provides-Extra: all
|
25
|
+
Requires-Dist: pre-commit; extra == "all"
|
26
|
+
Requires-Dist: docformatter; extra == "all"
|
27
|
+
Requires-Dist: black==24.1.1; extra == "all"
|
28
|
+
Requires-Dist: isort==5.13.2; extra == "all"
|
29
|
+
Requires-Dist: flake8==7.0.0; extra == "all"
|
30
|
+
Requires-Dist: flake8-bugbear==24.2.6; extra == "all"
|
31
|
+
Requires-Dist: pytest>=8.0.2; extra == "all"
|
32
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "all"
|
33
|
+
|
34
|
+
|
35
|
+
# Cartesia Python API Library
|
36
|
+
The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
|
37
|
+
|
38
|
+
**Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
|
39
|
+
|
40
|
+
## Installation
|
41
|
+
```bash
|
42
|
+
pip install cartesia
|
43
|
+
|
44
|
+
# pip install in editable mode w/ dev dependencies
|
45
|
+
pip install -e '.[dev]'
|
46
|
+
```
|
47
|
+
|
48
|
+
## Usage
|
49
|
+
```python
|
50
|
+
from cartesia.tts import CartesiaTTS
|
51
|
+
import pyaudio
|
52
|
+
import os
|
53
|
+
|
54
|
+
client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
|
55
|
+
voices = client.get_voices()
|
56
|
+
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
57
|
+
transcript = "Hello! Welcome to Cartesia"
|
58
|
+
|
59
|
+
p = pyaudio.PyAudio()
|
60
|
+
|
61
|
+
stream = None
|
62
|
+
|
63
|
+
# Generate and stream audio
|
64
|
+
for output in client.generate(transcript=transcript, voice=voice, stream=True):
|
65
|
+
buffer = output["audio"]
|
66
|
+
rate = output["sampling_rate"]
|
67
|
+
|
68
|
+
if not stream:
|
69
|
+
stream = p.open(format=pyaudio.paFloat32,
|
70
|
+
channels=1,
|
71
|
+
rate=rate,
|
72
|
+
output=True)
|
73
|
+
|
74
|
+
# Write the audio data to the stream
|
75
|
+
stream.write(buffer)
|
76
|
+
|
77
|
+
stream.stop_stream()
|
78
|
+
stream.close()
|
79
|
+
p.terminate()
|
80
|
+
```
|
81
|
+
|
82
|
+
If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook. Here's an example:
|
83
|
+
|
84
|
+
```python
|
85
|
+
from cartesia.tts import CartesiaTTS
|
86
|
+
from IPython.display import Audio
|
87
|
+
import io
|
88
|
+
import os
|
89
|
+
|
90
|
+
client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
|
91
|
+
voices = client.get_voices()
|
92
|
+
voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
|
93
|
+
transcript = "Hello! Welcome to Cartesia"
|
94
|
+
|
95
|
+
# Create a BytesIO object to store the audio data
|
96
|
+
audio_data = io.BytesIO()
|
97
|
+
|
98
|
+
# Generate and stream audio
|
99
|
+
for output in client.generate(transcript=transcript, voice=voice, stream=True):
|
100
|
+
buffer = output["audio"]
|
101
|
+
audio_data.write(buffer)
|
102
|
+
|
103
|
+
# Set the cursor position to the beginning of the BytesIO object
|
104
|
+
audio_data.seek(0)
|
105
|
+
|
106
|
+
# Create an Audio object from the BytesIO data
|
107
|
+
audio = Audio(audio_data, rate=output["sampling_rate"])
|
108
|
+
|
109
|
+
# Display the Audio object
|
110
|
+
display(audio)
|
111
|
+
```
|
112
|
+
|
113
|
+
We recommend using [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file so that your API Key is not stored in the source code.
|
@@ -0,0 +1,12 @@
|
|
1
|
+
README.md
|
2
|
+
pyproject.toml
|
3
|
+
setup.py
|
4
|
+
cartesia/__init__.py
|
5
|
+
cartesia/tts.py
|
6
|
+
cartesia/version.py
|
7
|
+
cartesia.egg-info/PKG-INFO
|
8
|
+
cartesia.egg-info/SOURCES.txt
|
9
|
+
cartesia.egg-info/dependency_links.txt
|
10
|
+
cartesia.egg-info/requires.txt
|
11
|
+
cartesia.egg-info/top_level.txt
|
12
|
+
tests/test_tts.py
|
@@ -0,0 +1,22 @@
|
|
1
|
+
websockets
|
2
|
+
requests
|
3
|
+
|
4
|
+
[all]
|
5
|
+
pre-commit
|
6
|
+
docformatter
|
7
|
+
black==24.1.1
|
8
|
+
isort==5.13.2
|
9
|
+
flake8==7.0.0
|
10
|
+
flake8-bugbear==24.2.6
|
11
|
+
pytest>=8.0.2
|
12
|
+
pytest-cov>=4.1.0
|
13
|
+
|
14
|
+
[dev]
|
15
|
+
pre-commit
|
16
|
+
docformatter
|
17
|
+
black==24.1.1
|
18
|
+
isort==5.13.2
|
19
|
+
flake8==7.0.0
|
20
|
+
flake8-bugbear==24.2.6
|
21
|
+
pytest>=8.0.2
|
22
|
+
pytest-cov>=4.1.0
|
@@ -0,0 +1 @@
|
|
1
|
+
cartesia
|
cartesia-0.0.3/setup.py
ADDED
@@ -0,0 +1,255 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
import io
|
5
|
+
import os
|
6
|
+
|
7
|
+
# Note: To use the 'upload' functionality of this file, you must:
|
8
|
+
# $ pipenv install twine --dev
|
9
|
+
import shutil
|
10
|
+
import subprocess
|
11
|
+
import sys
|
12
|
+
from distutils.util import convert_path
|
13
|
+
from shutil import rmtree
|
14
|
+
|
15
|
+
from setuptools import Command, find_packages, setup
|
16
|
+
|
17
|
+
PACKAGE_DIR = "cartesia"
|
18
|
+
main_ns = {}
|
19
|
+
ver_path = convert_path(os.path.join(PACKAGE_DIR, "version.py"))
|
20
|
+
with open(ver_path) as ver_file:
|
21
|
+
exec(ver_file.read(), main_ns)
|
22
|
+
|
23
|
+
|
24
|
+
# Package meta-data.
|
25
|
+
NAME = "cartesia"
|
26
|
+
DESCRIPTION = "The official Python library for the Cartesia API."
|
27
|
+
URL = ""
|
28
|
+
EMAIL = "support@cartesia.ai"
|
29
|
+
AUTHOR = "Cartesia, Inc."
|
30
|
+
REQUIRES_PYTHON = ">=3.8.0"
|
31
|
+
VERSION = main_ns["__version__"]
|
32
|
+
|
33
|
+
|
34
|
+
# What packages are required for this module to be executed?
|
35
|
+
def get_requirements(path):
|
36
|
+
with open(path, "r") as f:
|
37
|
+
out = f.read().splitlines()
|
38
|
+
|
39
|
+
out = [line.strip() for line in out]
|
40
|
+
return out
|
41
|
+
|
42
|
+
|
43
|
+
REQUIRED = get_requirements("requirements.txt")
|
44
|
+
REQUIRED_DEV = get_requirements("requirements-dev.txt")
|
45
|
+
|
46
|
+
# What packages are optional?
|
47
|
+
EXTRAS = {
|
48
|
+
"dev": REQUIRED_DEV,
|
49
|
+
}
|
50
|
+
EXTRAS["all"] = [pkg for group in EXTRAS.values() for pkg in group]
|
51
|
+
|
52
|
+
# The rest you shouldn't have to touch too much :)
|
53
|
+
# ------------------------------------------------
|
54
|
+
# Except, perhaps the License and Trove Classifiers!
|
55
|
+
# If you do change the License, remember to change the Trove Classifier for that!
|
56
|
+
|
57
|
+
here = os.path.abspath(os.path.dirname(__file__))
|
58
|
+
|
59
|
+
# Import the README and use it as the long-description.
|
60
|
+
# Note: this will only work if 'README.md' is present in your MANIFEST.in file!
|
61
|
+
try:
|
62
|
+
with io.open(os.path.join(here, "README.md"), encoding="utf-8") as f:
|
63
|
+
long_description = "\n" + f.read()
|
64
|
+
except FileNotFoundError:
|
65
|
+
long_description = DESCRIPTION
|
66
|
+
|
67
|
+
# Load the package's __version__.py module as a dictionary.
|
68
|
+
about = {}
|
69
|
+
if not VERSION:
|
70
|
+
project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
|
71
|
+
with open(os.path.join(here, project_slug, "__version__.py")) as f:
|
72
|
+
exec(f.read(), about)
|
73
|
+
else:
|
74
|
+
about["__version__"] = VERSION
|
75
|
+
|
76
|
+
|
77
|
+
class UploadCommand(Command):
|
78
|
+
"""Support setup.py upload."""
|
79
|
+
|
80
|
+
description = "Build and publish the package."
|
81
|
+
user_options = []
|
82
|
+
|
83
|
+
@staticmethod
|
84
|
+
def status(s):
|
85
|
+
"""Prints things in bold."""
|
86
|
+
print("\033[1m{0}\033[0m".format(s))
|
87
|
+
|
88
|
+
def initialize_options(self):
|
89
|
+
pass
|
90
|
+
|
91
|
+
def finalize_options(self):
|
92
|
+
pass
|
93
|
+
|
94
|
+
def run(self):
|
95
|
+
try:
|
96
|
+
self.status("Removing previous builds…")
|
97
|
+
rmtree(os.path.join(here, "dist"))
|
98
|
+
except OSError:
|
99
|
+
pass
|
100
|
+
|
101
|
+
self.status("Building Source and Wheel (universal) distribution…")
|
102
|
+
os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable))
|
103
|
+
|
104
|
+
self.status("Uploading the package to PyPI via Twine…")
|
105
|
+
os.system("twine upload dist/*")
|
106
|
+
|
107
|
+
self.status("Pushing git tags…")
|
108
|
+
os.system("git tag v{0}".format(about["__version__"]))
|
109
|
+
os.system("git push --tags")
|
110
|
+
|
111
|
+
sys.exit()
|
112
|
+
|
113
|
+
|
114
|
+
class BumpVersionCommand(Command):
|
115
|
+
"""
|
116
|
+
To use: python setup.py bumpversion -v <version>
|
117
|
+
|
118
|
+
This command will push the new version directly and tag it.
|
119
|
+
"""
|
120
|
+
|
121
|
+
description = "Installs the foo."
|
122
|
+
user_options = [
|
123
|
+
("version=", "v", "the new version number"),
|
124
|
+
]
|
125
|
+
|
126
|
+
@staticmethod
|
127
|
+
def status(s):
|
128
|
+
"""Prints things in bold."""
|
129
|
+
print("\033[1m{0}\033[0m".format(s))
|
130
|
+
|
131
|
+
def initialize_options(self):
|
132
|
+
self.version = None
|
133
|
+
|
134
|
+
def finalize_options(self):
|
135
|
+
# This package cannot be imported at top level because it
|
136
|
+
# is not recognized by Github Actions.
|
137
|
+
from packaging import version
|
138
|
+
|
139
|
+
if self.version is None:
|
140
|
+
raise ValueError("Please specify a version number.")
|
141
|
+
|
142
|
+
current_version = about["__version__"]
|
143
|
+
if not version.Version(self.version) > version.Version(current_version):
|
144
|
+
raise ValueError(
|
145
|
+
f"New version ({self.version}) must be greater than "
|
146
|
+
f"current version ({current_version})."
|
147
|
+
)
|
148
|
+
|
149
|
+
def _undo(self):
|
150
|
+
os.system(f"git restore --staged {PACKAGE_DIR}/__init__.py")
|
151
|
+
os.system(f"git checkout -- {PACKAGE_DIR}/__init__.py")
|
152
|
+
|
153
|
+
def run(self):
|
154
|
+
current_version = about["__version__"]
|
155
|
+
|
156
|
+
self.status("Checking current branch is 'main'")
|
157
|
+
current_branch = get_git_branch()
|
158
|
+
if current_branch != "main":
|
159
|
+
raise RuntimeError(
|
160
|
+
"You can only bump the version from the 'main' branch. "
|
161
|
+
"You are currently on the '{}' branch.".format(current_branch)
|
162
|
+
)
|
163
|
+
|
164
|
+
self.status("Pulling latest changes from origin")
|
165
|
+
err_code = os.system("git pull")
|
166
|
+
if err_code != 0:
|
167
|
+
raise RuntimeError("Failed to pull from origin.")
|
168
|
+
|
169
|
+
self.status("Checking working directory is clean")
|
170
|
+
err_code = os.system("git diff --exit-code")
|
171
|
+
err_code += os.system("git diff --cached --exit-code")
|
172
|
+
if err_code != 0:
|
173
|
+
raise RuntimeError("Working directory is not clean.")
|
174
|
+
|
175
|
+
# TODO: Add check to see if all tests are passing on main.
|
176
|
+
|
177
|
+
# Change the version in __init__.py
|
178
|
+
self.status(f"Updating version {current_version} -> {self.version}")
|
179
|
+
update_version(self.version)
|
180
|
+
if current_version != self.version:
|
181
|
+
self._undo()
|
182
|
+
raise RuntimeError("Failed to update version.")
|
183
|
+
|
184
|
+
self.status(f"Adding {PACKAGE_DIR}/__init__.py to git")
|
185
|
+
err_code = os.system(f"git add {PACKAGE_DIR}/__init__.py")
|
186
|
+
if err_code != 0:
|
187
|
+
self._undo()
|
188
|
+
raise RuntimeError("Failed to add file to git.")
|
189
|
+
|
190
|
+
# Commit the file with a message '[bumpversion] v<version>'.
|
191
|
+
self.status(f"Commit with message '[bumpversion] v{self.version}'")
|
192
|
+
err_code = os.system("git commit -m '[bumpversion] v{}'".format(current_version))
|
193
|
+
if err_code != 0:
|
194
|
+
self._undo()
|
195
|
+
raise RuntimeError("Failed to commit file to git.")
|
196
|
+
|
197
|
+
# Push the commit to origin.
|
198
|
+
# self.status("Pushing commit to origin")
|
199
|
+
# err_code = os.system("git push")
|
200
|
+
# if err_code != 0:
|
201
|
+
# # TODO: undo the commit automatically.
|
202
|
+
# raise RuntimeError("Failed to push commit to origin.")
|
203
|
+
|
204
|
+
sys.exit()
|
205
|
+
|
206
|
+
|
207
|
+
def update_version(version):
|
208
|
+
import json
|
209
|
+
|
210
|
+
# Update python.
|
211
|
+
init_py = [
|
212
|
+
line if not line.startswith("__version__") else f'__version__ = "{version}"\n'
|
213
|
+
for line in open(ver_path, "r").readlines()
|
214
|
+
]
|
215
|
+
with open(ver_path, "w") as f:
|
216
|
+
f.writelines(init_py)
|
217
|
+
|
218
|
+
|
219
|
+
def get_git_branch():
|
220
|
+
"""Return the name of the current branch."""
|
221
|
+
proc = subprocess.Popen(["git branch"], stdout=subprocess.PIPE, shell=True)
|
222
|
+
(out, err) = proc.communicate()
|
223
|
+
if err is not None:
|
224
|
+
raise RuntimeError(f"Error finding git branch: {err}")
|
225
|
+
out = out.decode("utf-8").split("\n")
|
226
|
+
current_branch = [line for line in out if line.startswith("*")][0]
|
227
|
+
current_branch = current_branch.replace("*", "").strip()
|
228
|
+
return current_branch
|
229
|
+
|
230
|
+
|
231
|
+
# Where the magic happens:
|
232
|
+
setup(
|
233
|
+
name=NAME,
|
234
|
+
version=about["__version__"],
|
235
|
+
description=DESCRIPTION,
|
236
|
+
long_description=long_description,
|
237
|
+
long_description_content_type="text/markdown",
|
238
|
+
author=AUTHOR,
|
239
|
+
author_email=EMAIL,
|
240
|
+
python_requires=REQUIRES_PYTHON,
|
241
|
+
url=URL,
|
242
|
+
packages=[PACKAGE_DIR],
|
243
|
+
install_requires=REQUIRED,
|
244
|
+
extras_require=EXTRAS,
|
245
|
+
include_package_data=True,
|
246
|
+
classifiers=[
|
247
|
+
# Trove classifiers
|
248
|
+
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
|
249
|
+
"Programming Language :: Python",
|
250
|
+
"Programming Language :: Python :: 3",
|
251
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
252
|
+
],
|
253
|
+
# $ setup.py publish support.
|
254
|
+
cmdclass={"upload": UploadCommand, "bumpversion": BumpVersionCommand},
|
255
|
+
)
|
@@ -0,0 +1,133 @@
|
|
1
|
+
"""Test against the production Cartesia TTS API.
|
2
|
+
|
3
|
+
This test suite tries to be as general as possible because different keys
|
4
|
+
will lead to different results. Therefore, we cannot test for complete correctness
|
5
|
+
but rather for general correctness.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import os
|
9
|
+
from typing import Dict, Generator
|
10
|
+
|
11
|
+
import pytest
|
12
|
+
|
13
|
+
from cartesia.tts import CartesiaTTS, VoiceMetadata
|
14
|
+
|
15
|
+
SAMPLE_VOICE = "Milo"
|
16
|
+
|
17
|
+
|
18
|
+
class _Resources:
|
19
|
+
def __init__(self, *, client: CartesiaTTS, voices: Dict[str, VoiceMetadata]):
|
20
|
+
self.client = client
|
21
|
+
self.voices = voices
|
22
|
+
|
23
|
+
|
24
|
+
@pytest.fixture(scope="session")
|
25
|
+
def client():
|
26
|
+
return CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
|
27
|
+
|
28
|
+
|
29
|
+
@pytest.fixture(scope="session")
|
30
|
+
def resources(client: CartesiaTTS):
|
31
|
+
voices = client.get_voices()
|
32
|
+
voice_id = voices[SAMPLE_VOICE]["id"]
|
33
|
+
voices[SAMPLE_VOICE]["embedding"] = client.get_voice_embedding(voice_id=voice_id)
|
34
|
+
|
35
|
+
return _Resources(
|
36
|
+
client=client,
|
37
|
+
voices=voices,
|
38
|
+
)
|
39
|
+
|
40
|
+
|
41
|
+
def test_get_voices(client: CartesiaTTS):
|
42
|
+
voices = client.get_voices()
|
43
|
+
|
44
|
+
assert isinstance(voices, dict)
|
45
|
+
assert all(isinstance(key, str) for key in voices.keys())
|
46
|
+
ids = [voice["id"] for voice in voices.values()]
|
47
|
+
assert len(ids) == len(set(ids)), "All ids must be unique"
|
48
|
+
assert all(
|
49
|
+
key == voice["name"] for key, voice in voices.items()
|
50
|
+
), "The key must be the same as the name"
|
51
|
+
|
52
|
+
|
53
|
+
def test_get_voice_embedding_from_id(client: CartesiaTTS):
|
54
|
+
voices = client.get_voices()
|
55
|
+
voice_id = voices[SAMPLE_VOICE]["id"]
|
56
|
+
|
57
|
+
client.get_voice_embedding(voice_id=voice_id)
|
58
|
+
|
59
|
+
|
60
|
+
def test_get_voice_embedding_from_url(client: CartesiaTTS):
|
61
|
+
url = "https://youtu.be/g2Z7Ddd573M?si=P8BM_hBqt5P8Ft6I&t=69"
|
62
|
+
_ = client.get_voice_embedding(link=url)
|
63
|
+
|
64
|
+
|
65
|
+
@pytest.mark.parametrize("websocket", [True, False])
|
66
|
+
def test_generate(resources: _Resources, websocket: bool):
|
67
|
+
client = resources.client
|
68
|
+
voices = resources.voices
|
69
|
+
embedding = voices[SAMPLE_VOICE]["embedding"]
|
70
|
+
transcript = "Hello, world!"
|
71
|
+
|
72
|
+
output = client.generate(transcript=transcript, voice=embedding, websocket=websocket)
|
73
|
+
assert output.keys() == {"audio", "sampling_rate"}
|
74
|
+
assert isinstance(output["audio"], bytes)
|
75
|
+
assert isinstance(output["sampling_rate"], int)
|
76
|
+
|
77
|
+
|
78
|
+
@pytest.mark.parametrize("websocket", [True, False])
|
79
|
+
def test_generate_stream(resources: _Resources, websocket: bool):
|
80
|
+
client = resources.client
|
81
|
+
voices = resources.voices
|
82
|
+
embedding = voices[SAMPLE_VOICE]["embedding"]
|
83
|
+
transcript = "Hello, world!"
|
84
|
+
|
85
|
+
generator = client.generate(
|
86
|
+
transcript=transcript, voice=embedding, websocket=websocket, stream=True
|
87
|
+
)
|
88
|
+
assert isinstance(generator, Generator)
|
89
|
+
|
90
|
+
for output in generator:
|
91
|
+
assert output.keys() == {"audio", "sampling_rate"}
|
92
|
+
assert isinstance(output["audio"], bytes)
|
93
|
+
assert isinstance(output["sampling_rate"], int)
|
94
|
+
|
95
|
+
|
96
|
+
@pytest.mark.parametrize("chunk_time", [0.05, 0.6])
|
97
|
+
def test_check_inputs_invalid_chunk_time(client: CartesiaTTS, chunk_time):
|
98
|
+
with pytest.raises(ValueError, match="`chunk_time` must be between 0.1 and 0.5"):
|
99
|
+
client._check_inputs("Test", None, chunk_time)
|
100
|
+
|
101
|
+
|
102
|
+
@pytest.mark.parametrize("chunk_time", [0.1, 0.3, 0.5])
|
103
|
+
def test_check_inputs_valid_chunk_time(client, chunk_time):
|
104
|
+
try:
|
105
|
+
client._check_inputs("Test", None, chunk_time)
|
106
|
+
except ValueError:
|
107
|
+
pytest.fail("Unexpected ValueError raised")
|
108
|
+
|
109
|
+
|
110
|
+
def test_check_inputs_duration_less_than_chunk_time(client: CartesiaTTS):
|
111
|
+
with pytest.raises(ValueError, match="`duration` must be greater than chunk_time"):
|
112
|
+
client._check_inputs("Test", 0.2, 0.3)
|
113
|
+
|
114
|
+
|
115
|
+
@pytest.mark.parametrize("duration,chunk_time", [(0.5, 0.2), (1.0, 0.5), (2.0, 0.1)])
|
116
|
+
def test_check_inputs_valid_duration_and_chunk_time(client: CartesiaTTS, duration, chunk_time):
|
117
|
+
try:
|
118
|
+
client._check_inputs("Test", duration, chunk_time)
|
119
|
+
except ValueError:
|
120
|
+
pytest.fail("Unexpected ValueError raised")
|
121
|
+
|
122
|
+
|
123
|
+
def test_check_inputs_empty_transcript(client: CartesiaTTS):
|
124
|
+
with pytest.raises(ValueError, match="`transcript` must be non empty"):
|
125
|
+
client._check_inputs("", None, None)
|
126
|
+
|
127
|
+
|
128
|
+
@pytest.mark.parametrize("transcript", ["Hello", "Test transcript", "Lorem ipsum dolor sit amet"])
|
129
|
+
def test_check_inputs_valid_transcript(client: CartesiaTTS, transcript):
|
130
|
+
try:
|
131
|
+
client._check_inputs(transcript, None, None)
|
132
|
+
except ValueError:
|
133
|
+
pytest.fail("Unexpected ValueError raised")
|
cartesia-0.0.0/PKG-INFO
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.1
|
2
|
-
Name: cartesia
|
3
|
-
Version: 0.0.0
|
4
|
-
Summary: Library for the Cartesia API.
|
5
|
-
Author: Kabir Goel
|
6
|
-
Author-email: kabir@cartesia.ai
|
7
|
-
Classifier: Programming Language :: Python :: 3
|
8
|
-
Classifier: License :: OSI Approved :: MIT License
|
9
|
-
Classifier: Operating System :: OS Independent
|
10
|
-
Requires-Python: >=3.6
|
@@ -1,10 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.1
|
2
|
-
Name: cartesia
|
3
|
-
Version: 0.0.0
|
4
|
-
Summary: Library for the Cartesia API.
|
5
|
-
Author: Kabir Goel
|
6
|
-
Author-email: kabir@cartesia.ai
|
7
|
-
Classifier: Programming Language :: Python :: 3
|
8
|
-
Classifier: License :: OSI Approved :: MIT License
|
9
|
-
Classifier: Operating System :: OS Independent
|
10
|
-
Requires-Python: >=3.6
|
@@ -1 +0,0 @@
|
|
1
|
-
|
cartesia-0.0.0/setup.py
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
from setuptools import setup, find_packages
|
2
|
-
|
3
|
-
setup(
|
4
|
-
name='cartesia',
|
5
|
-
version='0.0.0',
|
6
|
-
author='Kabir Goel',
|
7
|
-
author_email='kabir@cartesia.ai',
|
8
|
-
description='Library for the Cartesia API.',
|
9
|
-
packages=find_packages(),
|
10
|
-
classifiers=[
|
11
|
-
'Programming Language :: Python :: 3',
|
12
|
-
'License :: OSI Approved :: MIT License',
|
13
|
-
'Operating System :: OS Independent',
|
14
|
-
],
|
15
|
-
python_requires='>=3.6',
|
16
|
-
)
|
17
|
-
|
File without changes
|
File without changes
|