cartesia 0.0.0__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,113 @@
1
+ Metadata-Version: 2.1
2
+ Name: cartesia
3
+ Version: 0.0.3
4
+ Summary: The official Python library for the Cartesia API.
5
+ Home-page:
6
+ Author: Cartesia, Inc.
7
+ Author-email: support@cartesia.ai
8
+ Classifier: Programming Language :: Python
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
+ Requires-Python: >=3.8.0
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: websockets
14
+ Requires-Dist: requests
15
+ Provides-Extra: dev
16
+ Requires-Dist: pre-commit; extra == "dev"
17
+ Requires-Dist: docformatter; extra == "dev"
18
+ Requires-Dist: black==24.1.1; extra == "dev"
19
+ Requires-Dist: isort==5.13.2; extra == "dev"
20
+ Requires-Dist: flake8==7.0.0; extra == "dev"
21
+ Requires-Dist: flake8-bugbear==24.2.6; extra == "dev"
22
+ Requires-Dist: pytest>=8.0.2; extra == "dev"
23
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
24
+ Provides-Extra: all
25
+ Requires-Dist: pre-commit; extra == "all"
26
+ Requires-Dist: docformatter; extra == "all"
27
+ Requires-Dist: black==24.1.1; extra == "all"
28
+ Requires-Dist: isort==5.13.2; extra == "all"
29
+ Requires-Dist: flake8==7.0.0; extra == "all"
30
+ Requires-Dist: flake8-bugbear==24.2.6; extra == "all"
31
+ Requires-Dist: pytest>=8.0.2; extra == "all"
32
+ Requires-Dist: pytest-cov>=4.1.0; extra == "all"
33
+
34
+
35
+ # Cartesia Python API Library
36
+ The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
37
+
38
+ **Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
39
+
40
+ ## Installation
41
+ ```bash
42
+ pip install cartesia
43
+
44
+ # pip install in editable mode w/ dev dependencies
45
+ pip install -e '.[dev]'
46
+ ```
47
+
48
+ ## Usage
49
+ ```python
50
+ from cartesia.tts import CartesiaTTS
51
+ import pyaudio
52
+ import os
53
+
54
+ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
55
+ voices = client.get_voices()
56
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
57
+ transcript = "Hello! Welcome to Cartesia"
58
+
59
+ p = pyaudio.PyAudio()
60
+
61
+ stream = None
62
+
63
+ # Generate and stream audio
64
+ for output in client.generate(transcript=transcript, voice=voice, stream=True):
65
+ buffer = output["audio"]
66
+ rate = output["sampling_rate"]
67
+
68
+ if not stream:
69
+ stream = p.open(format=pyaudio.paFloat32,
70
+ channels=1,
71
+ rate=rate,
72
+ output=True)
73
+
74
+ # Write the audio data to the stream
75
+ stream.write(buffer)
76
+
77
+ stream.stop_stream()
78
+ stream.close()
79
+ p.terminate()
80
+ ```
81
+
82
+ If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook. Here's an example:
83
+
84
+ ```python
85
+ from cartesia.tts import CartesiaTTS
86
+ from IPython.display import Audio
87
+ import io
88
+ import os
89
+
90
+ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
91
+ voices = client.get_voices()
92
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
93
+ transcript = "Hello! Welcome to Cartesia"
94
+
95
+ # Create a BytesIO object to store the audio data
96
+ audio_data = io.BytesIO()
97
+
98
+ # Generate and stream audio
99
+ for output in client.generate(transcript=transcript, voice=voice, stream=True):
100
+ buffer = output["audio"]
101
+ audio_data.write(buffer)
102
+
103
+ # Set the cursor position to the beginning of the BytesIO object
104
+ audio_data.seek(0)
105
+
106
+ # Create an Audio object from the BytesIO data
107
+ audio = Audio(audio_data, rate=output["sampling_rate"])
108
+
109
+ # Display the Audio object
110
+ display(audio)
111
+ ```
112
+
113
+ We recommend using [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file so that your API Key is not stored in the source code.
@@ -0,0 +1,79 @@
1
+ # Cartesia Python API Library
2
+ The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
3
+
4
+ **Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
5
+
6
+ ## Installation
7
+ ```bash
8
+ pip install cartesia
9
+
10
+ # pip install in editable mode w/ dev dependencies
11
+ pip install -e '.[dev]'
12
+ ```
13
+
14
+ ## Usage
15
+ ```python
16
+ from cartesia.tts import CartesiaTTS
17
+ import pyaudio
18
+ import os
19
+
20
+ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
21
+ voices = client.get_voices()
22
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
23
+ transcript = "Hello! Welcome to Cartesia"
24
+
25
+ p = pyaudio.PyAudio()
26
+
27
+ stream = None
28
+
29
+ # Generate and stream audio
30
+ for output in client.generate(transcript=transcript, voice=voice, stream=True):
31
+ buffer = output["audio"]
32
+ rate = output["sampling_rate"]
33
+
34
+ if not stream:
35
+ stream = p.open(format=pyaudio.paFloat32,
36
+ channels=1,
37
+ rate=rate,
38
+ output=True)
39
+
40
+ # Write the audio data to the stream
41
+ stream.write(buffer)
42
+
43
+ stream.stop_stream()
44
+ stream.close()
45
+ p.terminate()
46
+ ```
47
+
48
+ If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook. Here's an example:
49
+
50
+ ```python
51
+ from cartesia.tts import CartesiaTTS
52
+ from IPython.display import Audio
53
+ import io
54
+ import os
55
+
56
+ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
57
+ voices = client.get_voices()
58
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
59
+ transcript = "Hello! Welcome to Cartesia"
60
+
61
+ # Create a BytesIO object to store the audio data
62
+ audio_data = io.BytesIO()
63
+
64
+ # Generate and stream audio
65
+ for output in client.generate(transcript=transcript, voice=voice, stream=True):
66
+ buffer = output["audio"]
67
+ audio_data.write(buffer)
68
+
69
+ # Set the cursor position to the beginning of the BytesIO object
70
+ audio_data.seek(0)
71
+
72
+ # Create an Audio object from the BytesIO data
73
+ audio = Audio(audio_data, rate=output["sampling_rate"])
74
+
75
+ # Display the Audio object
76
+ display(audio)
77
+ ```
78
+
79
+ We recommend using [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file so that your API Key is not stored in the source code.
@@ -0,0 +1,3 @@
1
+ from cartesia.tts import CartesiaTTS
2
+
3
+ __all__ = ["CartesiaTTS"]
@@ -0,0 +1,309 @@
1
+ import base64
2
+ import json
3
+ import os
4
+ import uuid
5
+ from typing import Any, Dict, Generator, List, Optional, TypedDict, Union
6
+
7
+ import requests
8
+ from websockets.sync.client import connect
9
+
10
+ DEFAULT_MODEL_ID = "genial-planet-1346"
11
+ DEFAULT_BASE_URL = "api.cartesia.ai"
12
+ DEFAULT_API_VERSION = "v0"
13
+
14
+
15
+ class AudioOutput(TypedDict):
16
+ audio: bytes
17
+ sampling_rate: int
18
+
19
+
20
+ Embedding = List[float]
21
+
22
+
23
+ class VoiceMetadata(TypedDict):
24
+ id: str
25
+ name: str
26
+ description: str
27
+ embedding: Optional[Embedding]
28
+
29
+
30
+ class CartesiaTTS:
31
+ """The client for Cartesia's text-to-speech library.
32
+
33
+ This client contains methods to interact with the Cartesia text-to-speech API.
34
+ The API offers
35
+
36
+ Examples:
37
+
38
+ >>> client = CartesiaTTS()
39
+
40
+ # Load available voices and their metadata (excluding the embeddings).
41
+ # Embeddings are fetched with `get_voice_embedding`. This avoids preloading
42
+ # all of the embeddings, which can be expensive if there are a lot of voices.
43
+ >>> voices = client.get_voices()
44
+ >>> embedding = client.get_voice_embedding(voice_id=voices["Milo"]["id"])
45
+ >>> audio = client.generate(transcript="Hello world!", voice=embedding)
46
+
47
+ # Preload all available voices and their embeddings if you plan on reusing
48
+ # all of the embeddings often.
49
+ >>> voices = client.get_voices(skip_embeddings=False)
50
+ >>> embedding = voices["Milo"]["embedding"]
51
+ >>> audio = client.generate(transcript="Hello world!", voice=embedding)
52
+
53
+ # Generate audio stream
54
+ >>> for audio_chunk in client.generate(transcript="Hello world!", voice=embedding, stream=True):
55
+ ... audio, sr = audio_chunk["audio"], audio_chunk["sampling_rate"]
56
+ """
57
+
58
+ def __init__(self, *, api_key: str = None):
59
+ """
60
+ Args:
61
+ api_key: The API key to use for authorization.
62
+ If not specified, the API key will be read from the environment variable
63
+ `CARTESIA_API_KEY`.
64
+ """
65
+ self.base_url = os.environ.get("CARTESIA_BASE_URL", DEFAULT_BASE_URL)
66
+ self.api_key = api_key or os.environ.get("CARTESIA_API_KEY")
67
+ self.api_version = os.environ.get("CARTESIA_API_VERSION", DEFAULT_API_VERSION)
68
+ self.headers = {"X-API-Key": self.api_key, "Content-Type": "application/json"}
69
+ self.websocket = None
70
+ self.refresh_websocket()
71
+
72
+ def get_voices(self, skip_embeddings: bool = True) -> Dict[str, VoiceMetadata]:
73
+ """Returns a mapping from voice name -> voice metadata.
74
+
75
+ Args:
76
+ skip_embeddings: Whether to skip returning the embeddings.
77
+ It is recommended to skip if you only want to see what
78
+ voices are available, since loading embeddings for all your voices can be expensive.
79
+ You can then use ``get_voice_embedding`` to get the embeddings for the voices you are
80
+ interested in.
81
+
82
+ Returns:
83
+ A mapping from voice name -> voice metadata.
84
+
85
+ Note:
86
+ If the voice name is not unique, there is undefined behavior as to which
87
+ voice will correspond to the name. To be more thorough, look at the web
88
+ client to find the `voice_id` for the voice you are looking for.
89
+
90
+ Usage:
91
+ >>> client = CartesiaTTS()
92
+ >>> voices = client.get_voices()
93
+ >>> voices
94
+ {
95
+ "Jane": {
96
+ "id": "c1d1d3a8-6f4e-4b3f-8b3e-2e1b3e1b3e1b",
97
+ "name": "Jane",
98
+ }
99
+ >>> embedding = client.get_voice_embedding(voice_id=voices["Jane"]["id"])
100
+ >>> audio = client.generate(transcript="Hello world!", voice=embedding)
101
+ """
102
+ params = {"select": "id, name, description"} if skip_embeddings else None
103
+ response = requests.get(f"{self._http_url()}/voices", headers=self.headers, params=params)
104
+
105
+ if response.status_code != 200:
106
+ raise ValueError(f"Failed to get voices. Error: {response.text}")
107
+
108
+ voices = response.json()
109
+ # TODO: Update the API to return the embedding as a list of floats rather than string.
110
+ if not skip_embeddings:
111
+ for voice in voices:
112
+ voice["embedding"] = json.loads(voice["embedding"])
113
+ return {voice["name"]: voice for voice in voices}
114
+
115
+ def get_voice_embedding(
116
+ self, *, voice_id: str = None, filepath: str = None, link: str = None
117
+ ) -> Embedding:
118
+ """Get a voice embedding from voice_id, a filepath or YouTube url.
119
+
120
+ Args:
121
+ voice_id: The voice id.
122
+ filepath: Path to audio file from which to get the audio.
123
+ link: The url to get the audio from. Currently only supports youtube shared urls.
124
+
125
+ Returns:
126
+ The voice embedding.
127
+
128
+ Raises:
129
+ ValueError: If more than one of `voice_id`, `filepath` or `link` is specified.
130
+ Only one should be specified.
131
+ """
132
+ if sum(bool(x) for x in (voice_id, filepath, link)) != 1:
133
+ raise ValueError("Exactly one of `voice_id`, `filepath` or `url` should be specified.")
134
+
135
+ if voice_id:
136
+ url = f"{self._http_url()}/voices/embedding/{voice_id}"
137
+ response = requests.get(url, headers=self.headers)
138
+ elif filepath:
139
+ url = f"{self._http_url()}/voices/clone/clip"
140
+ files = {"clip": open(filepath, "rb")}
141
+ headers = self.headers.copy()
142
+ # The default content type of JSON is incorrect for file uploads
143
+ headers.pop("Content-Type")
144
+ response = requests.post(url, headers=headers, files=files)
145
+ elif link:
146
+ url = f"{self._http_url()}/voices/clone/url"
147
+ params = {"link": link}
148
+ response = requests.post(url, headers=self.headers, params=params)
149
+
150
+ if response.status_code != 200:
151
+ raise ValueError(
152
+ f"Failed to clone voice. Status Code: {response.status_code}\n"
153
+ f"Error: {response.text}"
154
+ )
155
+
156
+ # Handle successful response
157
+ out = response.json()
158
+ if isinstance(out["embedding"], str):
159
+ out["embedding"] = json.loads(out["embedding"])
160
+ return out["embedding"]
161
+
162
+ def refresh_websocket(self):
163
+ """Refresh the websocket connection.
164
+
165
+ Note:
166
+ The connection is synchronous.
167
+ """
168
+ if self.websocket and not self._is_websocket_closed():
169
+ self.websocket.close()
170
+ self.websocket = connect(
171
+ f"{self._ws_url()}/audio/websocket?api_key={self.api_key}",
172
+ close_timeout=None,
173
+ )
174
+
175
+ def _is_websocket_closed(self):
176
+ return self.websocket.socket.fileno() == -1
177
+
178
+ def _check_inputs(
179
+ self, transcript: str, duration: Optional[float], chunk_time: Optional[float]
180
+ ):
181
+ if chunk_time is not None:
182
+ if chunk_time < 0.1 or chunk_time > 0.5:
183
+ raise ValueError("`chunk_time` must be between 0.1 and 0.5")
184
+
185
+ if chunk_time is not None and duration is not None:
186
+ if duration < chunk_time:
187
+ raise ValueError("`duration` must be greater than chunk_time")
188
+
189
+ if transcript.strip() == "":
190
+ raise ValueError("`transcript` must be non empty")
191
+
192
+ def generate(
193
+ self,
194
+ *,
195
+ transcript: str,
196
+ duration: int = None,
197
+ chunk_time: float = None,
198
+ voice: Embedding = None,
199
+ stream: bool = False,
200
+ websocket: bool = True,
201
+ ) -> Union[AudioOutput, Generator[AudioOutput, None, None]]:
202
+ """Generate audio from a transcript.
203
+
204
+ Args:
205
+ transcript: The text to generate audio for.
206
+ duration: The maximum duration of the audio in seconds.
207
+ chunk_time: How long each audio segment should be in seconds.
208
+ This should not need to be adjusted.
209
+ voice: The voice to use for generating audio.
210
+ This can either be a voice id (string) or an embedding vector (List[float]).
211
+ stream: Whether to stream the audio or not.
212
+ If ``True`` this function returns a generator.
213
+ websocket: Whether to use a websocket for streaming audio.
214
+ Using the websocket reduces latency by pre-poning the handshake.
215
+
216
+ Returns:
217
+ A generator if `stream` is True, otherwise a dictionary.
218
+ Dictionary from both generator and non-generator return types have the following keys:
219
+ * "audio": The audio as a bytes buffer.
220
+ * "sampling_rate": The sampling rate of the audio.
221
+ """
222
+ self._check_inputs(transcript, duration, chunk_time)
223
+
224
+ body = dict(transcript=transcript, model_id=DEFAULT_MODEL_ID)
225
+
226
+ optional_body = dict(
227
+ duration=duration,
228
+ chunk_time=chunk_time,
229
+ voice=voice,
230
+ )
231
+ body.update({k: v for k, v in optional_body.items() if v is not None})
232
+
233
+ if websocket:
234
+ generator = self._generate_ws(body)
235
+ else:
236
+ generator = self._generate_http(body)
237
+
238
+ if stream:
239
+ return generator
240
+
241
+ chunks = []
242
+ sampling_rate = None
243
+ for chunk in generator:
244
+ if sampling_rate is None:
245
+ sampling_rate = chunk["sampling_rate"]
246
+ chunks.append(chunk["audio"])
247
+
248
+ return {"audio": b"".join(chunks), "sampling_rate": sampling_rate}
249
+
250
+ def _generate_http(self, body: Dict[str, Any]):
251
+ response = requests.post(
252
+ f"{self._http_url()}/audio/stream",
253
+ stream=True,
254
+ data=json.dumps(body),
255
+ headers=self.headers,
256
+ )
257
+ if response.status_code != 200:
258
+ raise ValueError(f"Failed to generate audio. {response.text}")
259
+
260
+ buffer = ""
261
+ for chunk_bytes in response.iter_content(chunk_size=None):
262
+ buffer += chunk_bytes.decode("utf-8")
263
+ while "{" in buffer and "}" in buffer:
264
+ start_index = buffer.find("{")
265
+ end_index = buffer.find("}", start_index)
266
+ if start_index != -1 and end_index != -1:
267
+ try:
268
+ chunk_json = json.loads(buffer[start_index : end_index + 1])
269
+ audio = base64.b64decode(chunk_json["data"])
270
+ yield {"audio": audio, "sampling_rate": chunk_json["sampling_rate"]}
271
+ buffer = buffer[end_index + 1 :]
272
+ except json.JSONDecodeError:
273
+ break
274
+
275
+ if buffer:
276
+ try:
277
+ chunk_json = json.loads(buffer)
278
+ audio = base64.b64decode(chunk_json["data"])
279
+ yield {"audio": audio, "sampling_rate": chunk_json["sampling_rate"]}
280
+ except json.JSONDecodeError:
281
+ pass
282
+
283
+ def _generate_ws(self, body: Dict[str, Any]):
284
+ if not self.websocket or self._is_websocket_closed():
285
+ self.refresh_websocket()
286
+
287
+ self.websocket.send(json.dumps({"data": body, "context_id": uuid.uuid4().hex}))
288
+ try:
289
+ response = json.loads(self.websocket.recv())
290
+ while not response["done"]:
291
+ audio = base64.b64decode(response["data"])
292
+ # print("timing", time.perf_counter() - start)
293
+ yield {"audio": audio, "sampling_rate": response["sampling_rate"]}
294
+
295
+ response = json.loads(self.websocket.recv())
296
+ except Exception:
297
+ raise RuntimeError(f"Failed to generate audio. {response}")
298
+
299
+ def _http_url(self):
300
+ prefix = "http" if "localhost" in self.base_url else "https"
301
+ return f"{prefix}://{self.base_url}/{self.api_version}"
302
+
303
+ def _ws_url(self):
304
+ prefix = "ws" if "localhost" in self.base_url else "wss"
305
+ return f"{prefix}://{self.base_url}/{self.api_version}"
306
+
307
+ def __del__(self):
308
+ if self.websocket.socket.fileno() > -1:
309
+ self.websocket.close()
@@ -0,0 +1 @@
1
+ __version__ = "0.0.3"
@@ -0,0 +1,113 @@
1
+ Metadata-Version: 2.1
2
+ Name: cartesia
3
+ Version: 0.0.3
4
+ Summary: The official Python library for the Cartesia API.
5
+ Home-page:
6
+ Author: Cartesia, Inc.
7
+ Author-email: support@cartesia.ai
8
+ Classifier: Programming Language :: Python
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
+ Requires-Python: >=3.8.0
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: websockets
14
+ Requires-Dist: requests
15
+ Provides-Extra: dev
16
+ Requires-Dist: pre-commit; extra == "dev"
17
+ Requires-Dist: docformatter; extra == "dev"
18
+ Requires-Dist: black==24.1.1; extra == "dev"
19
+ Requires-Dist: isort==5.13.2; extra == "dev"
20
+ Requires-Dist: flake8==7.0.0; extra == "dev"
21
+ Requires-Dist: flake8-bugbear==24.2.6; extra == "dev"
22
+ Requires-Dist: pytest>=8.0.2; extra == "dev"
23
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
24
+ Provides-Extra: all
25
+ Requires-Dist: pre-commit; extra == "all"
26
+ Requires-Dist: docformatter; extra == "all"
27
+ Requires-Dist: black==24.1.1; extra == "all"
28
+ Requires-Dist: isort==5.13.2; extra == "all"
29
+ Requires-Dist: flake8==7.0.0; extra == "all"
30
+ Requires-Dist: flake8-bugbear==24.2.6; extra == "all"
31
+ Requires-Dist: pytest>=8.0.2; extra == "all"
32
+ Requires-Dist: pytest-cov>=4.1.0; extra == "all"
33
+
34
+
35
+ # Cartesia Python API Library
36
+ The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
37
+
38
+ **Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
39
+
40
+ ## Installation
41
+ ```bash
42
+ pip install cartesia
43
+
44
+ # pip install in editable mode w/ dev dependencies
45
+ pip install -e '.[dev]'
46
+ ```
47
+
48
+ ## Usage
49
+ ```python
50
+ from cartesia.tts import CartesiaTTS
51
+ import pyaudio
52
+ import os
53
+
54
+ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
55
+ voices = client.get_voices()
56
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
57
+ transcript = "Hello! Welcome to Cartesia"
58
+
59
+ p = pyaudio.PyAudio()
60
+
61
+ stream = None
62
+
63
+ # Generate and stream audio
64
+ for output in client.generate(transcript=transcript, voice=voice, stream=True):
65
+ buffer = output["audio"]
66
+ rate = output["sampling_rate"]
67
+
68
+ if not stream:
69
+ stream = p.open(format=pyaudio.paFloat32,
70
+ channels=1,
71
+ rate=rate,
72
+ output=True)
73
+
74
+ # Write the audio data to the stream
75
+ stream.write(buffer)
76
+
77
+ stream.stop_stream()
78
+ stream.close()
79
+ p.terminate()
80
+ ```
81
+
82
+ If you are using Jupyter Notebook or JupyterLab, you can use IPython.display.Audio to play the generated audio directly in the notebook. Here's an example:
83
+
84
+ ```python
85
+ from cartesia.tts import CartesiaTTS
86
+ from IPython.display import Audio
87
+ import io
88
+ import os
89
+
90
+ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
91
+ voices = client.get_voices()
92
+ voice = client.get_voice_embedding(voice_id=voices["Graham"]["id"])
93
+ transcript = "Hello! Welcome to Cartesia"
94
+
95
+ # Create a BytesIO object to store the audio data
96
+ audio_data = io.BytesIO()
97
+
98
+ # Generate and stream audio
99
+ for output in client.generate(transcript=transcript, voice=voice, stream=True):
100
+ buffer = output["audio"]
101
+ audio_data.write(buffer)
102
+
103
+ # Set the cursor position to the beginning of the BytesIO object
104
+ audio_data.seek(0)
105
+
106
+ # Create an Audio object from the BytesIO data
107
+ audio = Audio(audio_data, rate=output["sampling_rate"])
108
+
109
+ # Display the Audio object
110
+ display(audio)
111
+ ```
112
+
113
+ We recommend using [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file so that your API Key is not stored in the source code.
@@ -0,0 +1,12 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.py
4
+ cartesia/__init__.py
5
+ cartesia/tts.py
6
+ cartesia/version.py
7
+ cartesia.egg-info/PKG-INFO
8
+ cartesia.egg-info/SOURCES.txt
9
+ cartesia.egg-info/dependency_links.txt
10
+ cartesia.egg-info/requires.txt
11
+ cartesia.egg-info/top_level.txt
12
+ tests/test_tts.py
@@ -0,0 +1,22 @@
1
+ websockets
2
+ requests
3
+
4
+ [all]
5
+ pre-commit
6
+ docformatter
7
+ black==24.1.1
8
+ isort==5.13.2
9
+ flake8==7.0.0
10
+ flake8-bugbear==24.2.6
11
+ pytest>=8.0.2
12
+ pytest-cov>=4.1.0
13
+
14
+ [dev]
15
+ pre-commit
16
+ docformatter
17
+ black==24.1.1
18
+ isort==5.13.2
19
+ flake8==7.0.0
20
+ flake8-bugbear==24.2.6
21
+ pytest>=8.0.2
22
+ pytest-cov>=4.1.0
@@ -0,0 +1 @@
1
+ cartesia
@@ -0,0 +1,11 @@
1
+ [tool.black]
2
+ line-length = 100
3
+
4
+ [tool.isort]
5
+ profile = "black"
6
+ multi_line_output = 3
7
+ include_trailing_comma = true
8
+ force_grid_wrap = 0
9
+ use_parentheses = true
10
+ ensure_newline_before_comments = true
11
+ line_length = 100
@@ -0,0 +1,255 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import io
5
+ import os
6
+
7
+ # Note: To use the 'upload' functionality of this file, you must:
8
+ # $ pipenv install twine --dev
9
+ import shutil
10
+ import subprocess
11
+ import sys
12
+ from distutils.util import convert_path
13
+ from shutil import rmtree
14
+
15
+ from setuptools import Command, find_packages, setup
16
+
17
+ PACKAGE_DIR = "cartesia"
18
+ main_ns = {}
19
+ ver_path = convert_path(os.path.join(PACKAGE_DIR, "version.py"))
20
+ with open(ver_path) as ver_file:
21
+ exec(ver_file.read(), main_ns)
22
+
23
+
24
+ # Package meta-data.
25
+ NAME = "cartesia"
26
+ DESCRIPTION = "The official Python library for the Cartesia API."
27
+ URL = ""
28
+ EMAIL = "support@cartesia.ai"
29
+ AUTHOR = "Cartesia, Inc."
30
+ REQUIRES_PYTHON = ">=3.8.0"
31
+ VERSION = main_ns["__version__"]
32
+
33
+
34
+ # What packages are required for this module to be executed?
35
+ def get_requirements(path):
36
+ with open(path, "r") as f:
37
+ out = f.read().splitlines()
38
+
39
+ out = [line.strip() for line in out]
40
+ return out
41
+
42
+
43
+ REQUIRED = get_requirements("requirements.txt")
44
+ REQUIRED_DEV = get_requirements("requirements-dev.txt")
45
+
46
+ # What packages are optional?
47
+ EXTRAS = {
48
+ "dev": REQUIRED_DEV,
49
+ }
50
+ EXTRAS["all"] = [pkg for group in EXTRAS.values() for pkg in group]
51
+
52
+ # The rest you shouldn't have to touch too much :)
53
+ # ------------------------------------------------
54
+ # Except, perhaps the License and Trove Classifiers!
55
+ # If you do change the License, remember to change the Trove Classifier for that!
56
+
57
+ here = os.path.abspath(os.path.dirname(__file__))
58
+
59
+ # Import the README and use it as the long-description.
60
+ # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
61
+ try:
62
+ with io.open(os.path.join(here, "README.md"), encoding="utf-8") as f:
63
+ long_description = "\n" + f.read()
64
+ except FileNotFoundError:
65
+ long_description = DESCRIPTION
66
+
67
+ # Load the package's __version__.py module as a dictionary.
68
+ about = {}
69
+ if not VERSION:
70
+ project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
71
+ with open(os.path.join(here, project_slug, "__version__.py")) as f:
72
+ exec(f.read(), about)
73
+ else:
74
+ about["__version__"] = VERSION
75
+
76
+
77
+ class UploadCommand(Command):
78
+ """Support setup.py upload."""
79
+
80
+ description = "Build and publish the package."
81
+ user_options = []
82
+
83
+ @staticmethod
84
+ def status(s):
85
+ """Prints things in bold."""
86
+ print("\033[1m{0}\033[0m".format(s))
87
+
88
+ def initialize_options(self):
89
+ pass
90
+
91
+ def finalize_options(self):
92
+ pass
93
+
94
+ def run(self):
95
+ try:
96
+ self.status("Removing previous builds…")
97
+ rmtree(os.path.join(here, "dist"))
98
+ except OSError:
99
+ pass
100
+
101
+ self.status("Building Source and Wheel (universal) distribution…")
102
+ os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable))
103
+
104
+ self.status("Uploading the package to PyPI via Twine…")
105
+ os.system("twine upload dist/*")
106
+
107
+ self.status("Pushing git tags…")
108
+ os.system("git tag v{0}".format(about["__version__"]))
109
+ os.system("git push --tags")
110
+
111
+ sys.exit()
112
+
113
+
114
+ class BumpVersionCommand(Command):
115
+ """
116
+ To use: python setup.py bumpversion -v <version>
117
+
118
+ This command will push the new version directly and tag it.
119
+ """
120
+
121
+ description = "Installs the foo."
122
+ user_options = [
123
+ ("version=", "v", "the new version number"),
124
+ ]
125
+
126
+ @staticmethod
127
+ def status(s):
128
+ """Prints things in bold."""
129
+ print("\033[1m{0}\033[0m".format(s))
130
+
131
+ def initialize_options(self):
132
+ self.version = None
133
+
134
+ def finalize_options(self):
135
+ # This package cannot be imported at top level because it
136
+ # is not recognized by Github Actions.
137
+ from packaging import version
138
+
139
+ if self.version is None:
140
+ raise ValueError("Please specify a version number.")
141
+
142
+ current_version = about["__version__"]
143
+ if not version.Version(self.version) > version.Version(current_version):
144
+ raise ValueError(
145
+ f"New version ({self.version}) must be greater than "
146
+ f"current version ({current_version})."
147
+ )
148
+
149
+ def _undo(self):
150
+ os.system(f"git restore --staged {PACKAGE_DIR}/__init__.py")
151
+ os.system(f"git checkout -- {PACKAGE_DIR}/__init__.py")
152
+
153
+ def run(self):
154
+ current_version = about["__version__"]
155
+
156
+ self.status("Checking current branch is 'main'")
157
+ current_branch = get_git_branch()
158
+ if current_branch != "main":
159
+ raise RuntimeError(
160
+ "You can only bump the version from the 'main' branch. "
161
+ "You are currently on the '{}' branch.".format(current_branch)
162
+ )
163
+
164
+ self.status("Pulling latest changes from origin")
165
+ err_code = os.system("git pull")
166
+ if err_code != 0:
167
+ raise RuntimeError("Failed to pull from origin.")
168
+
169
+ self.status("Checking working directory is clean")
170
+ err_code = os.system("git diff --exit-code")
171
+ err_code += os.system("git diff --cached --exit-code")
172
+ if err_code != 0:
173
+ raise RuntimeError("Working directory is not clean.")
174
+
175
+ # TODO: Add check to see if all tests are passing on main.
176
+
177
+ # Change the version in __init__.py
178
+ self.status(f"Updating version {current_version} -> {self.version}")
179
+ update_version(self.version)
180
+ if current_version != self.version:
181
+ self._undo()
182
+ raise RuntimeError("Failed to update version.")
183
+
184
+ self.status(f"Adding {PACKAGE_DIR}/__init__.py to git")
185
+ err_code = os.system(f"git add {PACKAGE_DIR}/__init__.py")
186
+ if err_code != 0:
187
+ self._undo()
188
+ raise RuntimeError("Failed to add file to git.")
189
+
190
+ # Commit the file with a message '[bumpversion] v<version>'.
191
+ self.status(f"Commit with message '[bumpversion] v{self.version}'")
192
+ err_code = os.system("git commit -m '[bumpversion] v{}'".format(current_version))
193
+ if err_code != 0:
194
+ self._undo()
195
+ raise RuntimeError("Failed to commit file to git.")
196
+
197
+ # Push the commit to origin.
198
+ # self.status("Pushing commit to origin")
199
+ # err_code = os.system("git push")
200
+ # if err_code != 0:
201
+ # # TODO: undo the commit automatically.
202
+ # raise RuntimeError("Failed to push commit to origin.")
203
+
204
+ sys.exit()
205
+
206
+
207
+ def update_version(version):
208
+ import json
209
+
210
+ # Update python.
211
+ init_py = [
212
+ line if not line.startswith("__version__") else f'__version__ = "{version}"\n'
213
+ for line in open(ver_path, "r").readlines()
214
+ ]
215
+ with open(ver_path, "w") as f:
216
+ f.writelines(init_py)
217
+
218
+
219
+ def get_git_branch():
220
+ """Return the name of the current branch."""
221
+ proc = subprocess.Popen(["git branch"], stdout=subprocess.PIPE, shell=True)
222
+ (out, err) = proc.communicate()
223
+ if err is not None:
224
+ raise RuntimeError(f"Error finding git branch: {err}")
225
+ out = out.decode("utf-8").split("\n")
226
+ current_branch = [line for line in out if line.startswith("*")][0]
227
+ current_branch = current_branch.replace("*", "").strip()
228
+ return current_branch
229
+
230
+
231
+ # Where the magic happens:
232
+ setup(
233
+ name=NAME,
234
+ version=about["__version__"],
235
+ description=DESCRIPTION,
236
+ long_description=long_description,
237
+ long_description_content_type="text/markdown",
238
+ author=AUTHOR,
239
+ author_email=EMAIL,
240
+ python_requires=REQUIRES_PYTHON,
241
+ url=URL,
242
+ packages=[PACKAGE_DIR],
243
+ install_requires=REQUIRED,
244
+ extras_require=EXTRAS,
245
+ include_package_data=True,
246
+ classifiers=[
247
+ # Trove classifiers
248
+ # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
249
+ "Programming Language :: Python",
250
+ "Programming Language :: Python :: 3",
251
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
252
+ ],
253
+ # $ setup.py publish support.
254
+ cmdclass={"upload": UploadCommand, "bumpversion": BumpVersionCommand},
255
+ )
@@ -0,0 +1,133 @@
1
+ """Test against the production Cartesia TTS API.
2
+
3
+ This test suite tries to be as general as possible because different keys
4
+ will lead to different results. Therefore, we cannot test for complete correctness
5
+ but rather for general correctness.
6
+ """
7
+
8
+ import os
9
+ from typing import Dict, Generator
10
+
11
+ import pytest
12
+
13
+ from cartesia.tts import CartesiaTTS, VoiceMetadata
14
+
15
+ SAMPLE_VOICE = "Milo"
16
+
17
+
18
+ class _Resources:
19
+ def __init__(self, *, client: CartesiaTTS, voices: Dict[str, VoiceMetadata]):
20
+ self.client = client
21
+ self.voices = voices
22
+
23
+
24
+ @pytest.fixture(scope="session")
25
+ def client():
26
+ return CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
27
+
28
+
29
+ @pytest.fixture(scope="session")
30
+ def resources(client: CartesiaTTS):
31
+ voices = client.get_voices()
32
+ voice_id = voices[SAMPLE_VOICE]["id"]
33
+ voices[SAMPLE_VOICE]["embedding"] = client.get_voice_embedding(voice_id=voice_id)
34
+
35
+ return _Resources(
36
+ client=client,
37
+ voices=voices,
38
+ )
39
+
40
+
41
+ def test_get_voices(client: CartesiaTTS):
42
+ voices = client.get_voices()
43
+
44
+ assert isinstance(voices, dict)
45
+ assert all(isinstance(key, str) for key in voices.keys())
46
+ ids = [voice["id"] for voice in voices.values()]
47
+ assert len(ids) == len(set(ids)), "All ids must be unique"
48
+ assert all(
49
+ key == voice["name"] for key, voice in voices.items()
50
+ ), "The key must be the same as the name"
51
+
52
+
53
+ def test_get_voice_embedding_from_id(client: CartesiaTTS):
54
+ voices = client.get_voices()
55
+ voice_id = voices[SAMPLE_VOICE]["id"]
56
+
57
+ client.get_voice_embedding(voice_id=voice_id)
58
+
59
+
60
+ def test_get_voice_embedding_from_url(client: CartesiaTTS):
61
+ url = "https://youtu.be/g2Z7Ddd573M?si=P8BM_hBqt5P8Ft6I&t=69"
62
+ _ = client.get_voice_embedding(link=url)
63
+
64
+
65
+ @pytest.mark.parametrize("websocket", [True, False])
66
+ def test_generate(resources: _Resources, websocket: bool):
67
+ client = resources.client
68
+ voices = resources.voices
69
+ embedding = voices[SAMPLE_VOICE]["embedding"]
70
+ transcript = "Hello, world!"
71
+
72
+ output = client.generate(transcript=transcript, voice=embedding, websocket=websocket)
73
+ assert output.keys() == {"audio", "sampling_rate"}
74
+ assert isinstance(output["audio"], bytes)
75
+ assert isinstance(output["sampling_rate"], int)
76
+
77
+
78
+ @pytest.mark.parametrize("websocket", [True, False])
79
+ def test_generate_stream(resources: _Resources, websocket: bool):
80
+ client = resources.client
81
+ voices = resources.voices
82
+ embedding = voices[SAMPLE_VOICE]["embedding"]
83
+ transcript = "Hello, world!"
84
+
85
+ generator = client.generate(
86
+ transcript=transcript, voice=embedding, websocket=websocket, stream=True
87
+ )
88
+ assert isinstance(generator, Generator)
89
+
90
+ for output in generator:
91
+ assert output.keys() == {"audio", "sampling_rate"}
92
+ assert isinstance(output["audio"], bytes)
93
+ assert isinstance(output["sampling_rate"], int)
94
+
95
+
96
+ @pytest.mark.parametrize("chunk_time", [0.05, 0.6])
97
+ def test_check_inputs_invalid_chunk_time(client: CartesiaTTS, chunk_time):
98
+ with pytest.raises(ValueError, match="`chunk_time` must be between 0.1 and 0.5"):
99
+ client._check_inputs("Test", None, chunk_time)
100
+
101
+
102
+ @pytest.mark.parametrize("chunk_time", [0.1, 0.3, 0.5])
103
+ def test_check_inputs_valid_chunk_time(client, chunk_time):
104
+ try:
105
+ client._check_inputs("Test", None, chunk_time)
106
+ except ValueError:
107
+ pytest.fail("Unexpected ValueError raised")
108
+
109
+
110
+ def test_check_inputs_duration_less_than_chunk_time(client: CartesiaTTS):
111
+ with pytest.raises(ValueError, match="`duration` must be greater than chunk_time"):
112
+ client._check_inputs("Test", 0.2, 0.3)
113
+
114
+
115
+ @pytest.mark.parametrize("duration,chunk_time", [(0.5, 0.2), (1.0, 0.5), (2.0, 0.1)])
116
+ def test_check_inputs_valid_duration_and_chunk_time(client: CartesiaTTS, duration, chunk_time):
117
+ try:
118
+ client._check_inputs("Test", duration, chunk_time)
119
+ except ValueError:
120
+ pytest.fail("Unexpected ValueError raised")
121
+
122
+
123
+ def test_check_inputs_empty_transcript(client: CartesiaTTS):
124
+ with pytest.raises(ValueError, match="`transcript` must be non empty"):
125
+ client._check_inputs("", None, None)
126
+
127
+
128
+ @pytest.mark.parametrize("transcript", ["Hello", "Test transcript", "Lorem ipsum dolor sit amet"])
129
+ def test_check_inputs_valid_transcript(client: CartesiaTTS, transcript):
130
+ try:
131
+ client._check_inputs(transcript, None, None)
132
+ except ValueError:
133
+ pytest.fail("Unexpected ValueError raised")
cartesia-0.0.0/PKG-INFO DELETED
@@ -1,10 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: cartesia
3
- Version: 0.0.0
4
- Summary: Library for the Cartesia API.
5
- Author: Kabir Goel
6
- Author-email: kabir@cartesia.ai
7
- Classifier: Programming Language :: Python :: 3
8
- Classifier: License :: OSI Approved :: MIT License
9
- Classifier: Operating System :: OS Independent
10
- Requires-Python: >=3.6
@@ -1,10 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: cartesia
3
- Version: 0.0.0
4
- Summary: Library for the Cartesia API.
5
- Author: Kabir Goel
6
- Author-email: kabir@cartesia.ai
7
- Classifier: Programming Language :: Python :: 3
8
- Classifier: License :: OSI Approved :: MIT License
9
- Classifier: Operating System :: OS Independent
10
- Requires-Python: >=3.6
@@ -1,5 +0,0 @@
1
- setup.py
2
- cartesia.egg-info/PKG-INFO
3
- cartesia.egg-info/SOURCES.txt
4
- cartesia.egg-info/dependency_links.txt
5
- cartesia.egg-info/top_level.txt
cartesia-0.0.0/setup.py DELETED
@@ -1,17 +0,0 @@
1
- from setuptools import setup, find_packages
2
-
3
- setup(
4
- name='cartesia',
5
- version='0.0.0',
6
- author='Kabir Goel',
7
- author_email='kabir@cartesia.ai',
8
- description='Library for the Cartesia API.',
9
- packages=find_packages(),
10
- classifiers=[
11
- 'Programming Language :: Python :: 3',
12
- 'License :: OSI Approved :: MIT License',
13
- 'Operating System :: OS Independent',
14
- ],
15
- python_requires='>=3.6',
16
- )
17
-
File without changes