cartesia 0.0.0__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,70 @@
1
+ Metadata-Version: 2.1
2
+ Name: cartesia
3
+ Version: 0.0.2
4
+ Summary: The official Python library for the Cartesia API.
5
+ Home-page:
6
+ Author: Cartesia, Inc.
7
+ Author-email: support@cartesia.ai
8
+ Classifier: Programming Language :: Python
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
+ Requires-Python: >=3.8.0
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: websockets
14
+ Requires-Dist: requests
15
+ Requires-Dist: numpy
16
+ Provides-Extra: dev
17
+ Requires-Dist: pre-commit; extra == "dev"
18
+ Requires-Dist: docformatter; extra == "dev"
19
+ Requires-Dist: black==24.1.1; extra == "dev"
20
+ Requires-Dist: isort==5.13.2; extra == "dev"
21
+ Requires-Dist: flake8==7.0.0; extra == "dev"
22
+ Requires-Dist: flake8-bugbear==24.2.6; extra == "dev"
23
+ Requires-Dist: pytest>=8.0.2; extra == "dev"
24
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
25
+ Provides-Extra: all
26
+ Requires-Dist: pre-commit; extra == "all"
27
+ Requires-Dist: docformatter; extra == "all"
28
+ Requires-Dist: black==24.1.1; extra == "all"
29
+ Requires-Dist: isort==5.13.2; extra == "all"
30
+ Requires-Dist: flake8==7.0.0; extra == "all"
31
+ Requires-Dist: flake8-bugbear==24.2.6; extra == "all"
32
+ Requires-Dist: pytest>=8.0.2; extra == "all"
33
+ Requires-Dist: pytest-cov>=4.1.0; extra == "all"
34
+
35
+
36
+ # Cartesia Python API Library
37
+ The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
38
+
39
+ **Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
40
+
41
+ ## Installation
42
+ ```bash
43
+ pip install cartesia
44
+
45
+ # pip install in editable mode w/ dev dependencies
46
+ pip install -e '.[dev]'
47
+ ```
48
+
49
+ ## Usage
50
+ ```python
51
+ from cartesia.tts import CartesiaTTS
52
+ from IPython.display import Audio
53
+
54
+ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
55
+
56
+ voices = client.get_voices()
57
+ embedding = voices["Milo"]["embedding"]
58
+ transcript = "Hello! Welcome to Cartesia"
59
+
60
+ # No streaming
61
+ output = client.generate(transcript=transcript, voice=embedding)
62
+ Audio(output["audio"], rate=output["sampling_rate"])
63
+
64
+ # Streaming
65
+ for output in client.generate(transcript=transcript, voice=embedding, stream=True):
66
+ arr = output["audio"] # a numpy array
67
+ rate = output["sampling_rate"]
68
+ ```
69
+
70
+ We recommend using [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file so that your API Key is not stored in the source code.
@@ -0,0 +1,35 @@
1
+ # Cartesia Python API Library
2
+ The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
3
+
4
+ **Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
5
+
6
+ ## Installation
7
+ ```bash
8
+ pip install cartesia
9
+
10
+ # pip install in editable mode w/ dev dependencies
11
+ pip install -e '.[dev]'
12
+ ```
13
+
14
+ ## Usage
15
+ ```python
16
+ from cartesia.tts import CartesiaTTS
17
+ from IPython.display import Audio
18
+
19
+ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
20
+
21
+ voices = client.get_voices()
22
+ embedding = voices["Milo"]["embedding"]
23
+ transcript = "Hello! Welcome to Cartesia"
24
+
25
+ # No streaming
26
+ output = client.generate(transcript=transcript, voice=embedding)
27
+ Audio(output["audio"], rate=output["sampling_rate"])
28
+
29
+ # Streaming
30
+ for output in client.generate(transcript=transcript, voice=embedding, stream=True):
31
+ arr = output["audio"] # a numpy array
32
+ rate = output["sampling_rate"]
33
+ ```
34
+
35
+ We recommend using [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file so that your API Key is not stored in the source code.
@@ -0,0 +1,3 @@
1
+ from cartesia.tts import CartesiaTTS
2
+
3
+ __all__ = ["CartesiaTTS"]
@@ -0,0 +1,304 @@
1
+ import base64
2
+ import json
3
+ import os
4
+ import uuid
5
+ from typing import Any, Dict, Generator, List, Optional, TypedDict, Union
6
+
7
+ import numpy as np
8
+ import requests
9
+ from websockets.sync.client import connect
10
+
11
+ DEFAULT_MODEL_ID = "genial-planet-1346"
12
+ DEFAULT_BASE_URL = "api.cartesia.ai"
13
+ DEFAULT_API_VERSION = "v0"
14
+
15
+
16
+ class AudioOutput(TypedDict):
17
+ audio: np.ndarray
18
+ sampling_rate: int
19
+
20
+
21
+ Embedding = List[float]
22
+
23
+
24
+ class VoiceMetadata(TypedDict):
25
+ id: str
26
+ name: str
27
+ description: str
28
+ embedding: Optional[Embedding]
29
+
30
+
31
+ class CartesiaTTS:
32
+ """The client for Cartesia's text-to-speech library.
33
+
34
+ This client contains methods to interact with the Cartesia text-to-speech API.
35
+ The API offers
36
+
37
+ Examples:
38
+
39
+ >>> client = CartesiaTTS()
40
+
41
+ # Load available voices and their metadata (excluding the embeddings).
42
+ # Embeddings are fetched with `get_voice_embedding`. This avoids preloading
43
+ # all of the embeddings, which can be expensive if there are a lot of voices.
44
+ >>> voices = client.get_voices()
45
+ >>> embedding = client.get_voice_embedding(voice_id=voices["Milo"]["id"])
46
+ >>> audio = client.generate(transcript="Hello world!", voice=embedding)
47
+
48
+ # Preload all available voices and their embeddings if you plan on reusing
49
+ # all of the embeddings often.
50
+ >>> voices = client.get_voices(skip_embeddings=False)
51
+ >>> embedding = voices["Milo"]["embedding"]
52
+ >>> audio = client.generate(transcript="Hello world!", voice=embedding)
53
+
54
+ # Generate audio stream
55
+ >>> for audio_chunk in client.generate(transcript="Hello world!", voice=embedding, stream=True):
56
+ ... audio, sr = audio_chunk["audio"], audio_chunk["sampling_rate"]
57
+ """
58
+
59
+ def __init__(self, *, api_key: str = None):
60
+ """
61
+ Args:
62
+ api_key: The API key to use for authorization.
63
+ If not specified, the API key will be read from the environment variable
64
+ `CARTESIA_API_KEY`.
65
+ """
66
+ self.base_url = os.environ.get("CARTESIA_BASE_URL", DEFAULT_BASE_URL)
67
+ self.api_key = api_key or os.environ.get("CARTESIA_API_KEY")
68
+ self.api_version = os.environ.get("CARTESIA_API_VERSION", DEFAULT_API_VERSION)
69
+ self.headers = {"X-API-Key": self.api_key, "Content-Type": "application/json"}
70
+ self.websocket = None
71
+ self.refresh_websocket()
72
+
73
+ def get_voices(self, skip_embeddings: bool = True) -> Dict[str, VoiceMetadata]:
74
+ """Returns a mapping from voice name -> voice metadata.
75
+
76
+ Args:
77
+ skip_embeddings: Whether to skip returning the embeddings.
78
+ It is recommended to skip if you only want to see what
79
+ voices are available, since loading embeddings for all your voices can be expensive.
80
+ You can then use ``get_voice_embedding`` to get the embeddings for the voices you are
81
+ interested in.
82
+
83
+ Returns:
84
+ A mapping from voice name -> voice metadata.
85
+
86
+ Note:
87
+ If the voice name is not unique, there is undefined behavior as to which
88
+ voice will correspond to the name. To be more thorough, look at the web
89
+ client to find the `voice_id` for the voice you are looking for.
90
+
91
+ Usage:
92
+ >>> client = CartesiaTTS()
93
+ >>> voices = client.get_voices()
94
+ >>> voices
95
+ {
96
+ "Jane": {
97
+ "id": "c1d1d3a8-6f4e-4b3f-8b3e-2e1b3e1b3e1b",
98
+ "name": "Jane",
99
+ }
100
+ >>> embedding = client.get_voice_embedding(voice_id=voices["Jane"]["id"])
101
+ >>> audio = client.generate(transcript="Hello world!", voice=embedding)
102
+ """
103
+ params = {"select": "id, name, description"} if skip_embeddings else None
104
+ response = requests.get(f"{self._http_url()}/voices", headers=self.headers, params=params)
105
+
106
+ if response.status_code != 200:
107
+ raise ValueError(f"Failed to get voices. Error: {response.text}")
108
+
109
+ voices = response.json()
110
+ # TODO: Update the API to return the embedding as a list of floats rather than string.
111
+ if not skip_embeddings:
112
+ for voice in voices:
113
+ voice["embedding"] = json.loads(voice["embedding"])
114
+ return {voice["name"]: voice for voice in voices}
115
+
116
+ def get_voice_embedding(
117
+ self, *, voice_id: str = None, filepath: str = None, link: str = None
118
+ ) -> Embedding:
119
+ """Get a voice embedding from voice_id, a filepath or YouTube url.
120
+
121
+ Args:
122
+ voice_id: The voice id.
123
+ filepath: Path to audio file from which to get the audio.
124
+ link: The url to get the audio from. Currently only supports youtube shared urls.
125
+
126
+ Returns:
127
+ The voice embedding.
128
+
129
+ Raises:
130
+ ValueError: If more than one of `voice_id`, `filepath` or `link` is specified.
131
+ Only one should be specified.
132
+ """
133
+ if sum(bool(x) for x in (voice_id, filepath, link)) != 1:
134
+ raise ValueError("Exactly one of `voice_id`, `filepath` or `url` should be specified.")
135
+
136
+ if voice_id:
137
+ url = f"{self._http_url()}/voices/embedding/{voice_id}"
138
+ response = requests.get(url, headers=self.headers)
139
+ elif filepath:
140
+ url = f"{self._http_url()}/voices/clone/clip"
141
+ files = {"clip": open(filepath, "rb")}
142
+ headers = self.headers.copy()
143
+ # The default content type of JSON is incorrect for file uploads
144
+ headers.pop("Content-Type")
145
+ response = requests.post(url, headers=headers, files=files)
146
+ elif link:
147
+ url = f"{self._http_url()}/voices/clone/url"
148
+ params = {"link": link}
149
+ response = requests.post(url, headers=self.headers, params=params)
150
+
151
+ if response.status_code != 200:
152
+ raise ValueError(
153
+ f"Failed to clone voice. Status Code: {response.status_code}\n"
154
+ f"Error: {response.text}"
155
+ )
156
+
157
+ # Handle successful response
158
+ out = response.json()
159
+ if isinstance(out["embedding"], str):
160
+ out["embedding"] = json.loads(out["embedding"])
161
+ return out["embedding"]
162
+
163
+ def refresh_websocket(self):
164
+ """Refresh the websocket connection.
165
+
166
+ Note:
167
+ The connection is synchronous.
168
+ """
169
+ if self.websocket and not self._is_websocket_closed():
170
+ self.websocket.close()
171
+ self.websocket = connect(
172
+ f"{self._ws_url()}/audio/websocket?api_key={self.api_key}",
173
+ close_timeout=None,
174
+ )
175
+
176
+ def _is_websocket_closed(self):
177
+ return self.websocket.socket.fileno() == -1
178
+
179
+ def generate(
180
+ self,
181
+ *,
182
+ transcript: str,
183
+ duration: int = None,
184
+ chunk_time: float = None,
185
+ lookahead: int = None,
186
+ voice: Embedding = None,
187
+ stream: bool = False,
188
+ websocket: bool = True,
189
+ ) -> Union[AudioOutput, Generator[AudioOutput, None, None]]:
190
+ """Generate audio from a transcript.
191
+
192
+ Args:
193
+ transcript: The text to generate audio for.
194
+ duration: The maximum duration of the audio in seconds.
195
+ chunk_time: How long each audio segment should be in seconds.
196
+ This should not need to be adjusted.
197
+ lookahead: The number of seconds to look ahead for each chunk.
198
+ This should not need to be adjusted.
199
+ voice: The voice to use for generating audio.
200
+ This can either be a voice id (string) or an embedding vector (List[float]).
201
+ stream: Whether to stream the audio or not.
202
+ If ``True`` this function returns a generator.
203
+ websocket: Whether to use a websocket for streaming audio.
204
+ Using the websocket reduces latency by pre-poning the handshake.
205
+
206
+ Returns:
207
+ A generator if `stream` is True, otherwise a dictionary.
208
+ Dictionary from both generator and non-generator return types have the following keys:
209
+ * "audio": The audio as a 1D numpy array.
210
+ * "sampling_rate": The sampling rate of the audio.
211
+ """
212
+ body = dict(transcript=transcript, model_id=DEFAULT_MODEL_ID)
213
+
214
+ if isinstance(voice, str):
215
+ voice = self._voices[voice]
216
+
217
+ optional_body = dict(
218
+ duration=duration,
219
+ chunk_time=chunk_time,
220
+ lookahead=lookahead,
221
+ voice=voice,
222
+ )
223
+ body.update({k: v for k, v in optional_body.items() if v is not None})
224
+
225
+ if websocket:
226
+ generator = self._generate_ws(body)
227
+ else:
228
+ generator = self._generate_http(body)
229
+
230
+ if stream:
231
+ return generator
232
+
233
+ chunks = []
234
+ sampling_rate = None
235
+ for chunk in generator:
236
+ if sampling_rate is None:
237
+ sampling_rate = chunk["sampling_rate"]
238
+ chunks.append(chunk["audio"])
239
+
240
+ return {"audio": np.concatenate(chunks), "sampling_rate": sampling_rate}
241
+
242
+ def _generate_http(self, body: Dict[str, Any]):
243
+ response = requests.post(
244
+ f"{self._http_url()}/audio/stream",
245
+ stream=True,
246
+ data=json.dumps(body),
247
+ headers=self.headers,
248
+ )
249
+ if response.status_code != 200:
250
+ raise ValueError(f"Failed to generate audio. {response.text}")
251
+
252
+ buffer = ""
253
+ for chunk_bytes in response.iter_content(chunk_size=None):
254
+ buffer += chunk_bytes.decode("utf-8")
255
+ while "{" in buffer and "}" in buffer:
256
+ start_index = buffer.find("{")
257
+ end_index = buffer.find("}", start_index)
258
+ if start_index != -1 and end_index != -1:
259
+ try:
260
+ chunk_json = json.loads(buffer[start_index : end_index + 1])
261
+ data = base64.b64decode(chunk_json["data"])
262
+ audio = np.frombuffer(data, dtype=np.float32)
263
+ yield {"audio": audio, "sampling_rate": chunk_json["sampling_rate"]}
264
+ buffer = buffer[end_index + 1 :]
265
+ except json.JSONDecodeError:
266
+ break
267
+
268
+ if buffer:
269
+ try:
270
+ chunk_json = json.loads(buffer)
271
+ data = base64.b64decode(chunk_json["data"])
272
+ audio = np.frombuffer(data, dtype=np.float32)
273
+ yield {"audio": audio, "sampling_rate": chunk_json["sampling_rate"]}
274
+ except json.JSONDecodeError:
275
+ pass
276
+
277
+ def _generate_ws(self, body: Dict[str, Any]):
278
+ if not self.websocket or self._is_websocket_closed():
279
+ self.refresh_websocket()
280
+
281
+ self.websocket.send(json.dumps({"data": body, "context_id": uuid.uuid4().hex}))
282
+ try:
283
+ response = json.loads(self.websocket.recv())
284
+ while not response["done"]:
285
+ data = base64.b64decode(response["data"])
286
+ audio = np.frombuffer(data, dtype=np.float32)
287
+ # print("timing", time.perf_counter() - start)
288
+ yield {"audio": audio, "sampling_rate": response["sampling_rate"]}
289
+
290
+ response = json.loads(self.websocket.recv())
291
+ except Exception:
292
+ raise RuntimeError(f"Failed to generate audio. {response}")
293
+
294
+ def _http_url(self):
295
+ prefix = "http" if "localhost" in self.base_url else "https"
296
+ return f"{prefix}://{self.base_url}/{self.api_version}"
297
+
298
+ def _ws_url(self):
299
+ prefix = "ws" if "localhost" in self.base_url else "wss"
300
+ return f"{prefix}://{self.base_url}/{self.api_version}"
301
+
302
+ def __del__(self):
303
+ if self.websocket.socket.fileno() > -1:
304
+ self.websocket.close()
@@ -0,0 +1 @@
1
+ __version__ = "0.0.2"
@@ -0,0 +1,70 @@
1
+ Metadata-Version: 2.1
2
+ Name: cartesia
3
+ Version: 0.0.2
4
+ Summary: The official Python library for the Cartesia API.
5
+ Home-page:
6
+ Author: Cartesia, Inc.
7
+ Author-email: support@cartesia.ai
8
+ Classifier: Programming Language :: Python
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
+ Requires-Python: >=3.8.0
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: websockets
14
+ Requires-Dist: requests
15
+ Requires-Dist: numpy
16
+ Provides-Extra: dev
17
+ Requires-Dist: pre-commit; extra == "dev"
18
+ Requires-Dist: docformatter; extra == "dev"
19
+ Requires-Dist: black==24.1.1; extra == "dev"
20
+ Requires-Dist: isort==5.13.2; extra == "dev"
21
+ Requires-Dist: flake8==7.0.0; extra == "dev"
22
+ Requires-Dist: flake8-bugbear==24.2.6; extra == "dev"
23
+ Requires-Dist: pytest>=8.0.2; extra == "dev"
24
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
25
+ Provides-Extra: all
26
+ Requires-Dist: pre-commit; extra == "all"
27
+ Requires-Dist: docformatter; extra == "all"
28
+ Requires-Dist: black==24.1.1; extra == "all"
29
+ Requires-Dist: isort==5.13.2; extra == "all"
30
+ Requires-Dist: flake8==7.0.0; extra == "all"
31
+ Requires-Dist: flake8-bugbear==24.2.6; extra == "all"
32
+ Requires-Dist: pytest>=8.0.2; extra == "all"
33
+ Requires-Dist: pytest-cov>=4.1.0; extra == "all"
34
+
35
+
36
+ # Cartesia Python API Library
37
+ The official Cartesia Python library which provides convenient access to the Cartesia REST and Websocket API from any Python 3.8+ application.
38
+
39
+ **Note:** This API is still in alpha. Please expect breaking changes and report any issues you encounter.
40
+
41
+ ## Installation
42
+ ```bash
43
+ pip install cartesia
44
+
45
+ # pip install in editable mode w/ dev dependencies
46
+ pip install -e '.[dev]'
47
+ ```
48
+
49
+ ## Usage
50
+ ```python
51
+ from cartesia.tts import CartesiaTTS
52
+ from IPython.display import Audio
53
+
54
+ client = CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
55
+
56
+ voices = client.get_voices()
57
+ embedding = voices["Milo"]["embedding"]
58
+ transcript = "Hello! Welcome to Cartesia"
59
+
60
+ # No streaming
61
+ output = client.generate(transcript=transcript, voice=embedding)
62
+ Audio(output["audio"], rate=output["sampling_rate"])
63
+
64
+ # Streaming
65
+ for output in client.generate(transcript=transcript, voice=embedding, stream=True):
66
+ arr = output["audio"] # a numpy array
67
+ rate = output["sampling_rate"]
68
+ ```
69
+
70
+ We recommend using [`python-dotenv`](https://pypi.org/project/python-dotenv/) to add `CARTESIA_API_KEY="my-api-key"` to your .env file so that your API Key is not stored in the source code.
@@ -0,0 +1,12 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.py
4
+ cartesia/__init__.py
5
+ cartesia/tts.py
6
+ cartesia/version.py
7
+ cartesia.egg-info/PKG-INFO
8
+ cartesia.egg-info/SOURCES.txt
9
+ cartesia.egg-info/dependency_links.txt
10
+ cartesia.egg-info/requires.txt
11
+ cartesia.egg-info/top_level.txt
12
+ tests/test_tts.py
@@ -0,0 +1,23 @@
1
+ websockets
2
+ requests
3
+ numpy
4
+
5
+ [all]
6
+ pre-commit
7
+ docformatter
8
+ black==24.1.1
9
+ isort==5.13.2
10
+ flake8==7.0.0
11
+ flake8-bugbear==24.2.6
12
+ pytest>=8.0.2
13
+ pytest-cov>=4.1.0
14
+
15
+ [dev]
16
+ pre-commit
17
+ docformatter
18
+ black==24.1.1
19
+ isort==5.13.2
20
+ flake8==7.0.0
21
+ flake8-bugbear==24.2.6
22
+ pytest>=8.0.2
23
+ pytest-cov>=4.1.0
@@ -0,0 +1 @@
1
+ cartesia
@@ -0,0 +1,11 @@
1
+ [tool.black]
2
+ line-length = 100
3
+
4
+ [tool.isort]
5
+ profile = "black"
6
+ multi_line_output = 3
7
+ include_trailing_comma = true
8
+ force_grid_wrap = 0
9
+ use_parentheses = true
10
+ ensure_newline_before_comments = true
11
+ line_length = 100
@@ -0,0 +1,255 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import io
5
+ import os
6
+
7
+ # Note: To use the 'upload' functionality of this file, you must:
8
+ # $ pipenv install twine --dev
9
+ import shutil
10
+ import subprocess
11
+ import sys
12
+ from distutils.util import convert_path
13
+ from shutil import rmtree
14
+
15
+ from setuptools import Command, find_packages, setup
16
+
17
+ PACKAGE_DIR = "cartesia"
18
+ main_ns = {}
19
+ ver_path = convert_path(os.path.join(PACKAGE_DIR, "version.py"))
20
+ with open(ver_path) as ver_file:
21
+ exec(ver_file.read(), main_ns)
22
+
23
+
24
+ # Package meta-data.
25
+ NAME = "cartesia"
26
+ DESCRIPTION = "The official Python library for the Cartesia API."
27
+ URL = ""
28
+ EMAIL = "support@cartesia.ai"
29
+ AUTHOR = "Cartesia, Inc."
30
+ REQUIRES_PYTHON = ">=3.8.0"
31
+ VERSION = main_ns["__version__"]
32
+
33
+
34
+ # What packages are required for this module to be executed?
35
+ def get_requirements(path):
36
+ with open(path, "r") as f:
37
+ out = f.read().splitlines()
38
+
39
+ out = [line.strip() for line in out]
40
+ return out
41
+
42
+
43
+ REQUIRED = get_requirements("requirements.txt")
44
+ REQUIRED_DEV = get_requirements("requirements-dev.txt")
45
+
46
+ # What packages are optional?
47
+ EXTRAS = {
48
+ "dev": REQUIRED_DEV,
49
+ }
50
+ EXTRAS["all"] = [pkg for group in EXTRAS.values() for pkg in group]
51
+
52
+ # The rest you shouldn't have to touch too much :)
53
+ # ------------------------------------------------
54
+ # Except, perhaps the License and Trove Classifiers!
55
+ # If you do change the License, remember to change the Trove Classifier for that!
56
+
57
+ here = os.path.abspath(os.path.dirname(__file__))
58
+
59
+ # Import the README and use it as the long-description.
60
+ # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
61
+ try:
62
+ with io.open(os.path.join(here, "README.md"), encoding="utf-8") as f:
63
+ long_description = "\n" + f.read()
64
+ except FileNotFoundError:
65
+ long_description = DESCRIPTION
66
+
67
+ # Load the package's __version__.py module as a dictionary.
68
+ about = {}
69
+ if not VERSION:
70
+ project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
71
+ with open(os.path.join(here, project_slug, "__version__.py")) as f:
72
+ exec(f.read(), about)
73
+ else:
74
+ about["__version__"] = VERSION
75
+
76
+
77
+ class UploadCommand(Command):
78
+ """Support setup.py upload."""
79
+
80
+ description = "Build and publish the package."
81
+ user_options = []
82
+
83
+ @staticmethod
84
+ def status(s):
85
+ """Prints things in bold."""
86
+ print("\033[1m{0}\033[0m".format(s))
87
+
88
+ def initialize_options(self):
89
+ pass
90
+
91
+ def finalize_options(self):
92
+ pass
93
+
94
+ def run(self):
95
+ try:
96
+ self.status("Removing previous builds…")
97
+ rmtree(os.path.join(here, "dist"))
98
+ except OSError:
99
+ pass
100
+
101
+ self.status("Building Source and Wheel (universal) distribution…")
102
+ os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable))
103
+
104
+ self.status("Uploading the package to PyPI via Twine…")
105
+ os.system("twine upload dist/*")
106
+
107
+ self.status("Pushing git tags…")
108
+ os.system("git tag v{0}".format(about["__version__"]))
109
+ os.system("git push --tags")
110
+
111
+ sys.exit()
112
+
113
+
114
+ class BumpVersionCommand(Command):
115
+ """
116
+ To use: python setup.py bumpversion -v <version>
117
+
118
+ This command will push the new version directly and tag it.
119
+ """
120
+
121
+ description = "Installs the foo."
122
+ user_options = [
123
+ ("version=", "v", "the new version number"),
124
+ ]
125
+
126
+ @staticmethod
127
+ def status(s):
128
+ """Prints things in bold."""
129
+ print("\033[1m{0}\033[0m".format(s))
130
+
131
+ def initialize_options(self):
132
+ self.version = None
133
+
134
+ def finalize_options(self):
135
+ # This package cannot be imported at top level because it
136
+ # is not recognized by Github Actions.
137
+ from packaging import version
138
+
139
+ if self.version is None:
140
+ raise ValueError("Please specify a version number.")
141
+
142
+ current_version = about["__version__"]
143
+ if not version.Version(self.version) > version.Version(current_version):
144
+ raise ValueError(
145
+ f"New version ({self.version}) must be greater than "
146
+ f"current version ({current_version})."
147
+ )
148
+
149
+ def _undo(self):
150
+ os.system(f"git restore --staged {PACKAGE_DIR}/__init__.py")
151
+ os.system(f"git checkout -- {PACKAGE_DIR}/__init__.py")
152
+
153
+ def run(self):
154
+ current_version = about["__version__"]
155
+
156
+ self.status("Checking current branch is 'main'")
157
+ current_branch = get_git_branch()
158
+ if current_branch != "main":
159
+ raise RuntimeError(
160
+ "You can only bump the version from the 'main' branch. "
161
+ "You are currently on the '{}' branch.".format(current_branch)
162
+ )
163
+
164
+ self.status("Pulling latest changes from origin")
165
+ err_code = os.system("git pull")
166
+ if err_code != 0:
167
+ raise RuntimeError("Failed to pull from origin.")
168
+
169
+ self.status("Checking working directory is clean")
170
+ err_code = os.system("git diff --exit-code")
171
+ err_code += os.system("git diff --cached --exit-code")
172
+ if err_code != 0:
173
+ raise RuntimeError("Working directory is not clean.")
174
+
175
+ # TODO: Add check to see if all tests are passing on main.
176
+
177
+ # Change the version in __init__.py
178
+ self.status(f"Updating version {current_version} -> {self.version}")
179
+ update_version(self.version)
180
+ if current_version != self.version:
181
+ self._undo()
182
+ raise RuntimeError("Failed to update version.")
183
+
184
+ self.status(f"Adding {PACKAGE_DIR}/__init__.py to git")
185
+ err_code = os.system(f"git add {PACKAGE_DIR}/__init__.py")
186
+ if err_code != 0:
187
+ self._undo()
188
+ raise RuntimeError("Failed to add file to git.")
189
+
190
+ # Commit the file with a message '[bumpversion] v<version>'.
191
+ self.status(f"Commit with message '[bumpversion] v{self.version}'")
192
+ err_code = os.system("git commit -m '[bumpversion] v{}'".format(current_version))
193
+ if err_code != 0:
194
+ self._undo()
195
+ raise RuntimeError("Failed to commit file to git.")
196
+
197
+ # Push the commit to origin.
198
+ # self.status("Pushing commit to origin")
199
+ # err_code = os.system("git push")
200
+ # if err_code != 0:
201
+ # # TODO: undo the commit automatically.
202
+ # raise RuntimeError("Failed to push commit to origin.")
203
+
204
+ sys.exit()
205
+
206
+
207
+ def update_version(version):
208
+ import json
209
+
210
+ # Update python.
211
+ init_py = [
212
+ line if not line.startswith("__version__") else f'__version__ = "{version}"\n'
213
+ for line in open(ver_path, "r").readlines()
214
+ ]
215
+ with open(ver_path, "w") as f:
216
+ f.writelines(init_py)
217
+
218
+
219
+ def get_git_branch():
220
+ """Return the name of the current branch."""
221
+ proc = subprocess.Popen(["git branch"], stdout=subprocess.PIPE, shell=True)
222
+ (out, err) = proc.communicate()
223
+ if err is not None:
224
+ raise RuntimeError(f"Error finding git branch: {err}")
225
+ out = out.decode("utf-8").split("\n")
226
+ current_branch = [line for line in out if line.startswith("*")][0]
227
+ current_branch = current_branch.replace("*", "").strip()
228
+ return current_branch
229
+
230
+
231
+ # Where the magic happens:
232
+ setup(
233
+ name=NAME,
234
+ version=about["__version__"],
235
+ description=DESCRIPTION,
236
+ long_description=long_description,
237
+ long_description_content_type="text/markdown",
238
+ author=AUTHOR,
239
+ author_email=EMAIL,
240
+ python_requires=REQUIRES_PYTHON,
241
+ url=URL,
242
+ packages=[PACKAGE_DIR],
243
+ install_requires=REQUIRED,
244
+ extras_require=EXTRAS,
245
+ include_package_data=True,
246
+ classifiers=[
247
+ # Trove classifiers
248
+ # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
249
+ "Programming Language :: Python",
250
+ "Programming Language :: Python :: 3",
251
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
252
+ ],
253
+ # $ setup.py publish support.
254
+ cmdclass={"upload": UploadCommand, "bumpversion": BumpVersionCommand},
255
+ )
@@ -0,0 +1,96 @@
1
+ """Test against the production Cartesia TTS API.
2
+
3
+ This test suite tries to be as general as possible because different keys
4
+ will lead to different results. Therefore, we cannot test for complete correctness
5
+ but rather for general correctness.
6
+ """
7
+
8
+ import os
9
+ from typing import Dict, Generator
10
+
11
+ import numpy as np
12
+ import pytest
13
+
14
+ from cartesia.tts import CartesiaTTS, VoiceMetadata
15
+
16
+ SAMPLE_VOICE = "Milo"
17
+
18
+
19
+ class _Resources:
20
+ def __init__(self, *, client: CartesiaTTS, voices: Dict[str, VoiceMetadata]):
21
+ self.client = client
22
+ self.voices = voices
23
+
24
+
25
+ @pytest.fixture(scope="session")
26
+ def client():
27
+ return CartesiaTTS(api_key=os.environ.get("CARTESIA_API_KEY"))
28
+
29
+
30
+ @pytest.fixture(scope="session")
31
+ def resources(client: CartesiaTTS):
32
+ voices = client.get_voices()
33
+ voice_id = voices[SAMPLE_VOICE]["id"]
34
+ voices[SAMPLE_VOICE]["embedding"] = client.get_voice_embedding(voice_id=voice_id)
35
+
36
+ return _Resources(
37
+ client=client,
38
+ voices=voices,
39
+ )
40
+
41
+
42
+ def test_get_voices(client: CartesiaTTS):
43
+ voices = client.get_voices()
44
+
45
+ assert isinstance(voices, dict)
46
+ assert all(isinstance(key, str) for key in voices.keys())
47
+ ids = [voice["id"] for voice in voices.values()]
48
+ assert len(ids) == len(set(ids)), "All ids must be unique"
49
+ assert all(
50
+ key == voice["name"] for key, voice in voices.items()
51
+ ), "The key must be the same as the name"
52
+
53
+
54
+ def test_get_voice_embedding_from_id(client: CartesiaTTS):
55
+ voices = client.get_voices()
56
+ voice_id = voices[SAMPLE_VOICE]["id"]
57
+
58
+ client.get_voice_embedding(voice_id=voice_id)
59
+
60
+
61
+ def test_get_voice_embedding_from_url(client: CartesiaTTS):
62
+ url = "https://youtu.be/g2Z7Ddd573M?si=P8BM_hBqt5P8Ft6I&t=69"
63
+ _ = client.get_voice_embedding(link=url)
64
+
65
+
66
+ @pytest.mark.parametrize("websocket", [True, False])
67
+ def test_generate(resources: _Resources, websocket: bool):
68
+ client = resources.client
69
+ voices = resources.voices
70
+ embedding = voices[SAMPLE_VOICE]["embedding"]
71
+ transcript = "Hello, world!"
72
+
73
+ output = client.generate(transcript=transcript, voice=embedding, websocket=websocket)
74
+ assert output.keys() == {"audio", "sampling_rate"}
75
+ assert isinstance(output["audio"], np.ndarray)
76
+ assert output["audio"].dtype == np.float32
77
+ assert isinstance(output["sampling_rate"], int)
78
+
79
+
80
+ @pytest.mark.parametrize("websocket", [True, False])
81
+ def test_generate_stream(resources: _Resources, websocket: bool):
82
+ client = resources.client
83
+ voices = resources.voices
84
+ embedding = voices[SAMPLE_VOICE]["embedding"]
85
+ transcript = "Hello, world!"
86
+
87
+ generator = client.generate(
88
+ transcript=transcript, voice=embedding, websocket=websocket, stream=True
89
+ )
90
+ assert isinstance(generator, Generator)
91
+
92
+ for output in generator:
93
+ assert output.keys() == {"audio", "sampling_rate"}
94
+ assert isinstance(output["audio"], np.ndarray)
95
+ assert output["audio"].dtype == np.float32
96
+ assert isinstance(output["sampling_rate"], int)
cartesia-0.0.0/PKG-INFO DELETED
@@ -1,10 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: cartesia
3
- Version: 0.0.0
4
- Summary: Library for the Cartesia API.
5
- Author: Kabir Goel
6
- Author-email: kabir@cartesia.ai
7
- Classifier: Programming Language :: Python :: 3
8
- Classifier: License :: OSI Approved :: MIT License
9
- Classifier: Operating System :: OS Independent
10
- Requires-Python: >=3.6
@@ -1,10 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: cartesia
3
- Version: 0.0.0
4
- Summary: Library for the Cartesia API.
5
- Author: Kabir Goel
6
- Author-email: kabir@cartesia.ai
7
- Classifier: Programming Language :: Python :: 3
8
- Classifier: License :: OSI Approved :: MIT License
9
- Classifier: Operating System :: OS Independent
10
- Requires-Python: >=3.6
@@ -1,5 +0,0 @@
1
- setup.py
2
- cartesia.egg-info/PKG-INFO
3
- cartesia.egg-info/SOURCES.txt
4
- cartesia.egg-info/dependency_links.txt
5
- cartesia.egg-info/top_level.txt
cartesia-0.0.0/setup.py DELETED
@@ -1,17 +0,0 @@
1
- from setuptools import setup, find_packages
2
-
3
- setup(
4
- name='cartesia',
5
- version='0.0.0',
6
- author='Kabir Goel',
7
- author_email='kabir@cartesia.ai',
8
- description='Library for the Cartesia API.',
9
- packages=find_packages(),
10
- classifiers=[
11
- 'Programming Language :: Python :: 3',
12
- 'License :: OSI Approved :: MIT License',
13
- 'Operating System :: OS Independent',
14
- ],
15
- python_requires='>=3.6',
16
- )
17
-
File without changes