cartesia 1.3.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cartesia/__init__.py +302 -3
- cartesia/api_status/__init__.py +6 -0
- cartesia/api_status/client.py +104 -0
- cartesia/api_status/requests/__init__.py +5 -0
- cartesia/api_status/requests/api_info.py +8 -0
- cartesia/api_status/types/__init__.py +5 -0
- cartesia/api_status/types/api_info.py +20 -0
- cartesia/base_client.py +156 -0
- cartesia/client.py +163 -40
- cartesia/core/__init__.py +50 -0
- cartesia/core/api_error.py +15 -0
- cartesia/core/client_wrapper.py +55 -0
- cartesia/core/datetime_utils.py +28 -0
- cartesia/core/file.py +67 -0
- cartesia/core/http_client.py +499 -0
- cartesia/core/jsonable_encoder.py +101 -0
- cartesia/core/pagination.py +88 -0
- cartesia/core/pydantic_utilities.py +296 -0
- cartesia/core/query_encoder.py +58 -0
- cartesia/core/remove_none_from_dict.py +11 -0
- cartesia/core/request_options.py +35 -0
- cartesia/core/serialization.py +272 -0
- cartesia/datasets/__init__.py +24 -0
- cartesia/datasets/requests/__init__.py +15 -0
- cartesia/datasets/requests/create_dataset_request.py +7 -0
- cartesia/datasets/requests/dataset.py +9 -0
- cartesia/datasets/requests/dataset_file.py +9 -0
- cartesia/datasets/requests/paginated_dataset_files.py +10 -0
- cartesia/datasets/requests/paginated_datasets.py +10 -0
- cartesia/datasets/types/__init__.py +17 -0
- cartesia/datasets/types/create_dataset_request.py +19 -0
- cartesia/datasets/types/dataset.py +21 -0
- cartesia/datasets/types/dataset_file.py +21 -0
- cartesia/datasets/types/file_purpose.py +5 -0
- cartesia/datasets/types/paginated_dataset_files.py +21 -0
- cartesia/datasets/types/paginated_datasets.py +21 -0
- cartesia/embedding/__init__.py +5 -0
- cartesia/embedding/types/__init__.py +5 -0
- cartesia/embedding/types/embedding.py +201 -0
- cartesia/environment.py +7 -0
- cartesia/infill/__init__.py +2 -0
- cartesia/infill/client.py +318 -0
- cartesia/tts/__init__.py +167 -0
- cartesia/{_async_websocket.py → tts/_async_websocket.py} +212 -85
- cartesia/tts/_websocket.py +479 -0
- cartesia/tts/client.py +407 -0
- cartesia/tts/requests/__init__.py +76 -0
- cartesia/tts/requests/cancel_context_request.py +17 -0
- cartesia/tts/requests/controls.py +11 -0
- cartesia/tts/requests/generation_request.py +58 -0
- cartesia/tts/requests/mp_3_output_format.py +11 -0
- cartesia/tts/requests/output_format.py +30 -0
- cartesia/tts/requests/phoneme_timestamps.py +10 -0
- cartesia/tts/requests/raw_output_format.py +11 -0
- cartesia/tts/requests/speed.py +7 -0
- cartesia/tts/requests/tts_request.py +24 -0
- cartesia/tts/requests/tts_request_embedding_specifier.py +16 -0
- cartesia/tts/requests/tts_request_id_specifier.py +16 -0
- cartesia/tts/requests/tts_request_voice_specifier.py +7 -0
- cartesia/tts/requests/wav_output_format.py +7 -0
- cartesia/tts/requests/web_socket_base_response.py +11 -0
- cartesia/tts/requests/web_socket_chunk_response.py +11 -0
- cartesia/tts/requests/web_socket_done_response.py +7 -0
- cartesia/tts/requests/web_socket_error_response.py +7 -0
- cartesia/tts/requests/web_socket_flush_done_response.py +9 -0
- cartesia/tts/requests/web_socket_phoneme_timestamps_response.py +9 -0
- cartesia/tts/requests/web_socket_raw_output_format.py +11 -0
- cartesia/tts/requests/web_socket_request.py +7 -0
- cartesia/tts/requests/web_socket_response.py +70 -0
- cartesia/tts/requests/web_socket_stream_options.py +8 -0
- cartesia/tts/requests/web_socket_timestamps_response.py +9 -0
- cartesia/tts/requests/web_socket_tts_output.py +18 -0
- cartesia/tts/requests/web_socket_tts_request.py +25 -0
- cartesia/tts/requests/word_timestamps.py +10 -0
- cartesia/tts/socket_client.py +302 -0
- cartesia/tts/types/__init__.py +90 -0
- cartesia/tts/types/cancel_context_request.py +28 -0
- cartesia/tts/types/context_id.py +3 -0
- cartesia/tts/types/controls.py +22 -0
- cartesia/tts/types/emotion.py +34 -0
- cartesia/tts/types/flush_id.py +3 -0
- cartesia/tts/types/generation_request.py +71 -0
- cartesia/tts/types/mp_3_output_format.py +23 -0
- cartesia/tts/types/natural_specifier.py +5 -0
- cartesia/tts/types/numerical_specifier.py +3 -0
- cartesia/tts/types/output_format.py +58 -0
- cartesia/tts/types/phoneme_timestamps.py +21 -0
- cartesia/tts/types/raw_encoding.py +5 -0
- cartesia/tts/types/raw_output_format.py +22 -0
- cartesia/tts/types/speed.py +7 -0
- cartesia/tts/types/supported_language.py +7 -0
- cartesia/tts/types/tts_request.py +35 -0
- cartesia/tts/types/tts_request_embedding_specifier.py +27 -0
- cartesia/tts/types/tts_request_id_specifier.py +27 -0
- cartesia/tts/types/tts_request_voice_specifier.py +7 -0
- cartesia/tts/types/wav_output_format.py +17 -0
- cartesia/tts/types/web_socket_base_response.py +22 -0
- cartesia/tts/types/web_socket_chunk_response.py +22 -0
- cartesia/tts/types/web_socket_done_response.py +17 -0
- cartesia/tts/types/web_socket_error_response.py +19 -0
- cartesia/tts/types/web_socket_flush_done_response.py +21 -0
- cartesia/tts/types/web_socket_phoneme_timestamps_response.py +20 -0
- cartesia/tts/types/web_socket_raw_output_format.py +22 -0
- cartesia/tts/types/web_socket_request.py +7 -0
- cartesia/tts/types/web_socket_response.py +125 -0
- cartesia/tts/types/web_socket_stream_options.py +19 -0
- cartesia/tts/types/web_socket_timestamps_response.py +20 -0
- cartesia/tts/types/web_socket_tts_output.py +29 -0
- cartesia/tts/types/web_socket_tts_request.py +37 -0
- cartesia/tts/types/word_timestamps.py +21 -0
- cartesia/{_constants.py → tts/utils/constants.py} +2 -2
- cartesia/tts/utils/tts.py +64 -0
- cartesia/tts/utils/types.py +70 -0
- cartesia/version.py +3 -1
- cartesia/voice_changer/__init__.py +27 -0
- cartesia/voice_changer/client.py +395 -0
- cartesia/voice_changer/requests/__init__.py +15 -0
- cartesia/voice_changer/requests/streaming_response.py +38 -0
- cartesia/voice_changer/types/__init__.py +17 -0
- cartesia/voice_changer/types/output_format_container.py +5 -0
- cartesia/voice_changer/types/streaming_response.py +64 -0
- cartesia/voices/__init__.py +81 -0
- cartesia/voices/client.py +1218 -0
- cartesia/voices/requests/__init__.py +29 -0
- cartesia/voices/requests/create_voice_request.py +23 -0
- cartesia/voices/requests/embedding_response.py +8 -0
- cartesia/voices/requests/embedding_specifier.py +10 -0
- cartesia/voices/requests/get_voices_response.py +24 -0
- cartesia/voices/requests/id_specifier.py +10 -0
- cartesia/voices/requests/localize_dialect.py +11 -0
- cartesia/voices/requests/localize_voice_request.py +28 -0
- cartesia/voices/requests/mix_voice_specifier.py +7 -0
- cartesia/voices/requests/mix_voices_request.py +9 -0
- cartesia/voices/requests/update_voice_request.py +15 -0
- cartesia/voices/requests/voice.py +43 -0
- cartesia/voices/requests/voice_metadata.py +36 -0
- cartesia/voices/types/__init__.py +53 -0
- cartesia/voices/types/base_voice_id.py +5 -0
- cartesia/voices/types/clone_mode.py +5 -0
- cartesia/voices/types/create_voice_request.py +34 -0
- cartesia/voices/types/embedding_response.py +20 -0
- cartesia/voices/types/embedding_specifier.py +22 -0
- cartesia/voices/types/gender.py +5 -0
- cartesia/voices/types/gender_presentation.py +5 -0
- cartesia/voices/types/get_voices_response.py +34 -0
- cartesia/voices/types/id_specifier.py +22 -0
- cartesia/voices/types/localize_dialect.py +11 -0
- cartesia/voices/types/localize_english_dialect.py +5 -0
- cartesia/voices/types/localize_french_dialect.py +5 -0
- cartesia/voices/types/localize_portuguese_dialect.py +5 -0
- cartesia/voices/types/localize_spanish_dialect.py +5 -0
- cartesia/voices/types/localize_target_language.py +7 -0
- cartesia/voices/types/localize_voice_request.py +39 -0
- cartesia/voices/types/mix_voice_specifier.py +7 -0
- cartesia/voices/types/mix_voices_request.py +20 -0
- cartesia/voices/types/update_voice_request.py +27 -0
- cartesia/voices/types/voice.py +54 -0
- cartesia/voices/types/voice_expand_options.py +5 -0
- cartesia/voices/types/voice_id.py +3 -0
- cartesia/voices/types/voice_metadata.py +48 -0
- cartesia/voices/types/weight.py +3 -0
- cartesia-2.0.0.dist-info/METADATA +414 -0
- cartesia-2.0.0.dist-info/RECORD +165 -0
- {cartesia-1.3.1.dist-info → cartesia-2.0.0.dist-info}/WHEEL +1 -1
- cartesia/_async_sse.py +0 -95
- cartesia/_logger.py +0 -3
- cartesia/_sse.py +0 -143
- cartesia/_types.py +0 -70
- cartesia/_websocket.py +0 -358
- cartesia/async_client.py +0 -82
- cartesia/async_tts.py +0 -63
- cartesia/resource.py +0 -44
- cartesia/tts.py +0 -137
- cartesia/utils/deprecated.py +0 -55
- cartesia/utils/retry.py +0 -87
- cartesia/utils/tts.py +0 -78
- cartesia/voices.py +0 -208
- cartesia-1.3.1.dist-info/METADATA +0 -661
- cartesia-1.3.1.dist-info/RECORD +0 -23
- cartesia-1.3.1.dist-info/licenses/LICENSE.md +0 -21
- /cartesia/{utils/__init__.py → py.typed} +0 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from ...core.pydantic_utilities import UniversalBaseModel
|
4
|
+
from .voice_id import VoiceId
|
5
|
+
import pydantic
|
6
|
+
import datetime as dt
|
7
|
+
import typing
|
8
|
+
from ...embedding.types.embedding import Embedding
|
9
|
+
from ...tts.types.supported_language import SupportedLanguage
|
10
|
+
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
11
|
+
|
12
|
+
|
13
|
+
class Voice(UniversalBaseModel):
|
14
|
+
id: VoiceId
|
15
|
+
is_owner: bool = pydantic.Field()
|
16
|
+
"""
|
17
|
+
Whether the current user is the owner of the voice.
|
18
|
+
"""
|
19
|
+
|
20
|
+
name: str = pydantic.Field()
|
21
|
+
"""
|
22
|
+
The name of the voice.
|
23
|
+
"""
|
24
|
+
|
25
|
+
description: str = pydantic.Field()
|
26
|
+
"""
|
27
|
+
The description of the voice.
|
28
|
+
"""
|
29
|
+
|
30
|
+
created_at: dt.datetime = pydantic.Field()
|
31
|
+
"""
|
32
|
+
The date and time the voice was created.
|
33
|
+
"""
|
34
|
+
|
35
|
+
embedding: typing.Optional[Embedding] = pydantic.Field(default=None)
|
36
|
+
"""
|
37
|
+
The vector embedding of the voice. Only included when `expand` includes `embedding`.
|
38
|
+
"""
|
39
|
+
|
40
|
+
is_starred: typing.Optional[bool] = pydantic.Field(default=None)
|
41
|
+
"""
|
42
|
+
Whether the current user has starred the voice. Only included when `expand` includes `is_starred`.
|
43
|
+
"""
|
44
|
+
|
45
|
+
language: SupportedLanguage
|
46
|
+
|
47
|
+
if IS_PYDANTIC_V2:
|
48
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
49
|
+
else:
|
50
|
+
|
51
|
+
class Config:
|
52
|
+
frozen = True
|
53
|
+
smart_union = True
|
54
|
+
extra = pydantic.Extra.allow
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from ...core.pydantic_utilities import UniversalBaseModel
|
4
|
+
from .voice_id import VoiceId
|
5
|
+
import pydantic
|
6
|
+
import datetime as dt
|
7
|
+
from ...tts.types.supported_language import SupportedLanguage
|
8
|
+
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
9
|
+
import typing
|
10
|
+
|
11
|
+
|
12
|
+
class VoiceMetadata(UniversalBaseModel):
|
13
|
+
id: VoiceId
|
14
|
+
user_id: str = pydantic.Field()
|
15
|
+
"""
|
16
|
+
The ID of the user who owns the voice.
|
17
|
+
"""
|
18
|
+
|
19
|
+
is_public: bool = pydantic.Field()
|
20
|
+
"""
|
21
|
+
Whether the voice is publicly accessible.
|
22
|
+
"""
|
23
|
+
|
24
|
+
name: str = pydantic.Field()
|
25
|
+
"""
|
26
|
+
The name of the voice.
|
27
|
+
"""
|
28
|
+
|
29
|
+
description: str = pydantic.Field()
|
30
|
+
"""
|
31
|
+
The description of the voice.
|
32
|
+
"""
|
33
|
+
|
34
|
+
created_at: dt.datetime = pydantic.Field()
|
35
|
+
"""
|
36
|
+
The date and time the voice was created.
|
37
|
+
"""
|
38
|
+
|
39
|
+
language: SupportedLanguage
|
40
|
+
|
41
|
+
if IS_PYDANTIC_V2:
|
42
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
43
|
+
else:
|
44
|
+
|
45
|
+
class Config:
|
46
|
+
frozen = True
|
47
|
+
smart_union = True
|
48
|
+
extra = pydantic.Extra.allow
|
@@ -0,0 +1,414 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: cartesia
|
3
|
+
Version: 2.0.0
|
4
|
+
Summary:
|
5
|
+
Requires-Python: >=3.8,<4.0
|
6
|
+
Classifier: Intended Audience :: Developers
|
7
|
+
Classifier: Operating System :: MacOS
|
8
|
+
Classifier: Operating System :: Microsoft :: Windows
|
9
|
+
Classifier: Operating System :: OS Independent
|
10
|
+
Classifier: Operating System :: POSIX
|
11
|
+
Classifier: Operating System :: POSIX :: Linux
|
12
|
+
Classifier: Programming Language :: Python
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
20
|
+
Classifier: Typing :: Typed
|
21
|
+
Requires-Dist: aiohttp (>=3.10.10)
|
22
|
+
Requires-Dist: audioop-lts (==0.2.1) ; python_version >= "3.13" and python_version < "4.0"
|
23
|
+
Requires-Dist: httpx (>=0.21.2)
|
24
|
+
Requires-Dist: httpx-sse (==0.4.0)
|
25
|
+
Requires-Dist: iterators (>=0.2.0)
|
26
|
+
Requires-Dist: pydantic (>=1.9.2)
|
27
|
+
Requires-Dist: pydantic-core (>=2.18.2,<3.0.0)
|
28
|
+
Requires-Dist: pydub (>=0.25.1)
|
29
|
+
Requires-Dist: typing_extensions (>=4.0.0)
|
30
|
+
Requires-Dist: websockets (>=10.4)
|
31
|
+
Description-Content-Type: text/markdown
|
32
|
+
|
33
|
+
# Cartesia Python Library
|
34
|
+
|
35
|
+
[](https://buildwithfern.com?utm_source=github&utm_medium=github&utm_campaign=readme&utm_source=https%3A%2F%2Fgithub.com%2Fcartesia-ai%2Fcartesia-python)
|
36
|
+
[](https://pypi.python.org/pypi/cartesia)
|
37
|
+
|
38
|
+
The Cartesia Python library provides convenient access to the Cartesia API from Python.
|
39
|
+
|
40
|
+
## Documentation
|
41
|
+
|
42
|
+
Our complete API documentation can be found [on docs.cartesia.ai](https://docs.cartesia.ai).
|
43
|
+
|
44
|
+
## Installation
|
45
|
+
|
46
|
+
```sh
|
47
|
+
pip install cartesia
|
48
|
+
```
|
49
|
+
|
50
|
+
## Usage
|
51
|
+
|
52
|
+
Instantiate and use the client with the following:
|
53
|
+
|
54
|
+
```python
|
55
|
+
from cartesia import Cartesia
|
56
|
+
from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
|
57
|
+
import os
|
58
|
+
|
59
|
+
client = Cartesia(
|
60
|
+
api_key=os.getenv("CARTESIA_API_KEY"),
|
61
|
+
)
|
62
|
+
client.tts.bytes(
|
63
|
+
model_id="sonic-2",
|
64
|
+
transcript="Hello, world!",
|
65
|
+
voice={
|
66
|
+
"mode": "id",
|
67
|
+
"id": "694f9389-aac1-45b6-b726-9d9369183238",
|
68
|
+
},
|
69
|
+
language="en",
|
70
|
+
output_format={
|
71
|
+
"container": "raw",
|
72
|
+
"sample_rate": 44100,
|
73
|
+
"encoding": "pcm_f32le",
|
74
|
+
},
|
75
|
+
)
|
76
|
+
```
|
77
|
+
|
78
|
+
## Async Client
|
79
|
+
|
80
|
+
The SDK also exports an `async` client so that you can make non-blocking calls to our API.
|
81
|
+
|
82
|
+
```python
|
83
|
+
import asyncio
|
84
|
+
import os
|
85
|
+
|
86
|
+
from cartesia import AsyncCartesia
|
87
|
+
from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
|
88
|
+
|
89
|
+
client = AsyncCartesia(
|
90
|
+
api_key=os.getenv("CARTESIA_API_KEY"),
|
91
|
+
)
|
92
|
+
|
93
|
+
async def main() -> None:
|
94
|
+
async for output in client.tts.bytes(
|
95
|
+
model_id="sonic-2",
|
96
|
+
transcript="Hello, world!",
|
97
|
+
voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
|
98
|
+
language="en",
|
99
|
+
output_format={
|
100
|
+
"container": "raw",
|
101
|
+
"sample_rate": 44100,
|
102
|
+
"encoding": "pcm_f32le",
|
103
|
+
},
|
104
|
+
):
|
105
|
+
print(f"Received chunk of size: {len(output)}")
|
106
|
+
|
107
|
+
|
108
|
+
asyncio.run(main())
|
109
|
+
```
|
110
|
+
|
111
|
+
## Exception Handling
|
112
|
+
|
113
|
+
When the API returns a non-success status code (4xx or 5xx response), a subclass of the following error
|
114
|
+
will be thrown.
|
115
|
+
|
116
|
+
```python
|
117
|
+
from cartesia.core.api_error import ApiError
|
118
|
+
|
119
|
+
try:
|
120
|
+
client.tts.bytes(...)
|
121
|
+
except ApiError as e:
|
122
|
+
print(e.status_code)
|
123
|
+
print(e.body)
|
124
|
+
```
|
125
|
+
|
126
|
+
## Streaming
|
127
|
+
|
128
|
+
The SDK supports streaming responses as well, returning a generator that you can iterate over with a `for ... in ...` loop:
|
129
|
+
|
130
|
+
```python
|
131
|
+
from cartesia import Cartesia
|
132
|
+
from cartesia.tts import Controls, OutputFormat_RawParams, TtsRequestIdSpecifierParams
|
133
|
+
import os
|
134
|
+
|
135
|
+
def get_tts_chunks():
|
136
|
+
client = Cartesia(
|
137
|
+
api_key=os.getenv("CARTESIA_API_KEY"),
|
138
|
+
)
|
139
|
+
response = client.tts.sse(
|
140
|
+
model_id="sonic-2",
|
141
|
+
transcript="Hello world!",
|
142
|
+
voice={
|
143
|
+
"id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94",
|
144
|
+
"experimental_controls": {
|
145
|
+
"speed": "normal",
|
146
|
+
"emotion": [],
|
147
|
+
},
|
148
|
+
},
|
149
|
+
language="en",
|
150
|
+
output_format={
|
151
|
+
"container": "raw",
|
152
|
+
"encoding": "pcm_f32le",
|
153
|
+
"sample_rate": 44100,
|
154
|
+
},
|
155
|
+
)
|
156
|
+
|
157
|
+
audio_chunks = []
|
158
|
+
for chunk in response:
|
159
|
+
audio_chunks.append(chunk)
|
160
|
+
return audio_chunks
|
161
|
+
|
162
|
+
chunks = get_tts_chunks()
|
163
|
+
for chunk in chunks:
|
164
|
+
print(f"Received chunk of size: {len(chunk.data)}")
|
165
|
+
```
|
166
|
+
|
167
|
+
## WebSockets
|
168
|
+
|
169
|
+
For the lowest latency in advanced usecases (such as streaming in an LLM-generated transcript and streaming out audio), you should use our websockets client:
|
170
|
+
|
171
|
+
```python
|
172
|
+
from cartesia import Cartesia
|
173
|
+
from cartesia.tts import TtsRequestEmbeddingSpecifierParams, OutputFormat_RawParams
|
174
|
+
import pyaudio
|
175
|
+
import os
|
176
|
+
|
177
|
+
client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
178
|
+
voice_id = "a0e99841-438c-4a64-b679-ae501e7d6091"
|
179
|
+
transcript = "Hello! Welcome to Cartesia"
|
180
|
+
|
181
|
+
p = pyaudio.PyAudio()
|
182
|
+
rate = 22050
|
183
|
+
|
184
|
+
stream = None
|
185
|
+
|
186
|
+
# Set up the websocket connection
|
187
|
+
ws = client.tts.websocket()
|
188
|
+
|
189
|
+
# Generate and stream audio using the websocket
|
190
|
+
for output in ws.send(
|
191
|
+
model_id="sonic-2", # see: https://docs.cartesia.ai/getting-started/available-models
|
192
|
+
transcript=transcript,
|
193
|
+
voice={"id": voice_id},
|
194
|
+
stream=True,
|
195
|
+
output_format={
|
196
|
+
"container": "raw",
|
197
|
+
"encoding": "pcm_f32le",
|
198
|
+
"sample_rate": rate
|
199
|
+
},
|
200
|
+
):
|
201
|
+
buffer = output.audio
|
202
|
+
|
203
|
+
if not stream:
|
204
|
+
stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=True)
|
205
|
+
|
206
|
+
# Write the audio data to the stream
|
207
|
+
stream.write(buffer)
|
208
|
+
|
209
|
+
stream.stop_stream()
|
210
|
+
stream.close()
|
211
|
+
p.terminate()
|
212
|
+
|
213
|
+
ws.close() # Close the websocket connection
|
214
|
+
```
|
215
|
+
|
216
|
+
## Voices
|
217
|
+
|
218
|
+
List all available Voices with `client.voices.list`, which returns an iterable that automatically handles pagination:
|
219
|
+
|
220
|
+
```python
|
221
|
+
from cartesia import Cartesia
|
222
|
+
import os
|
223
|
+
|
224
|
+
client = Cartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
225
|
+
|
226
|
+
# Get all available Voices
|
227
|
+
voices = client.voices.list()
|
228
|
+
for voice in voices:
|
229
|
+
print(voice)
|
230
|
+
```
|
231
|
+
|
232
|
+
You can also get the complete metadata for a specific Voice, or make a new Voice by cloning from an audio sample:
|
233
|
+
|
234
|
+
```python
|
235
|
+
# Get a specific Voice
|
236
|
+
voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
|
237
|
+
print("The embedding for", voice.name, "is", voice.embedding)
|
238
|
+
|
239
|
+
# Clone a Voice using file data
|
240
|
+
cloned_voice = client.voices.clone(
|
241
|
+
clip=open("path/to/voice.wav", "rb"),
|
242
|
+
name="Test cloned voice",
|
243
|
+
language="en",
|
244
|
+
mode="similarity", # or "stability"
|
245
|
+
enhance=False, # use enhance=True to clean and denoise the cloning audio
|
246
|
+
description="Test voice description"
|
247
|
+
)
|
248
|
+
```
|
249
|
+
|
250
|
+
## Requesting Timestamps
|
251
|
+
|
252
|
+
```python
|
253
|
+
import asyncio
|
254
|
+
from cartesia import AsyncCartesia
|
255
|
+
import os
|
256
|
+
|
257
|
+
async def main():
|
258
|
+
client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
|
259
|
+
|
260
|
+
# Connect to the websocket
|
261
|
+
ws = await client.tts.websocket()
|
262
|
+
|
263
|
+
# Generate speech with timestamps
|
264
|
+
output_generate = await ws.send(
|
265
|
+
model_id="sonic-2",
|
266
|
+
transcript="Hello! Welcome to Cartesia's text-to-speech.",
|
267
|
+
voice={"id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94"},
|
268
|
+
output_format={
|
269
|
+
"container": "raw",
|
270
|
+
"encoding": "pcm_f32le",
|
271
|
+
"sample_rate": 44100
|
272
|
+
},
|
273
|
+
add_timestamps=True, # Enable word-level timestamps
|
274
|
+
add_phoneme_timestamps=True, # Enable phonemized timestamps
|
275
|
+
stream=True
|
276
|
+
)
|
277
|
+
|
278
|
+
# Process the streaming response with timestamps
|
279
|
+
all_words = []
|
280
|
+
all_starts = []
|
281
|
+
all_ends = []
|
282
|
+
audio_chunks = []
|
283
|
+
|
284
|
+
async for out in output_generate:
|
285
|
+
# Collect audio data
|
286
|
+
if out.audio is not None:
|
287
|
+
audio_chunks.append(out.audio)
|
288
|
+
|
289
|
+
# Process timestamp data
|
290
|
+
if out.word_timestamps is not None:
|
291
|
+
all_words.extend(out.word_timestamps.words) # List of words
|
292
|
+
all_starts.extend(out.word_timestamps.start) # Start time for each word (seconds)
|
293
|
+
all_ends.extend(out.word_timestamps.end) # End time for each word (seconds)
|
294
|
+
|
295
|
+
await ws.close()
|
296
|
+
|
297
|
+
asyncio.run(main())
|
298
|
+
```
|
299
|
+
|
300
|
+
## Advanced
|
301
|
+
|
302
|
+
### Retries
|
303
|
+
|
304
|
+
The SDK is instrumented with automatic retries with exponential backoff. A request will be retried as long
|
305
|
+
as the request is deemed retriable and the number of retry attempts has not grown larger than the configured
|
306
|
+
retry limit (default: 2).
|
307
|
+
|
308
|
+
A request is deemed retriable when any of the following HTTP status codes is returned:
|
309
|
+
|
310
|
+
- [408](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/408) (Timeout)
|
311
|
+
- [429](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/429) (Too Many Requests)
|
312
|
+
- [5XX](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/500) (Internal Server Errors)
|
313
|
+
|
314
|
+
Use the `max_retries` request option to configure this behavior.
|
315
|
+
|
316
|
+
```python
|
317
|
+
client.tts.bytes(..., request_options={
|
318
|
+
"max_retries": 1
|
319
|
+
})
|
320
|
+
```
|
321
|
+
|
322
|
+
### Timeouts
|
323
|
+
|
324
|
+
The SDK defaults to a 60 second timeout. You can configure this with a timeout option at the client or request level.
|
325
|
+
|
326
|
+
```python
|
327
|
+
|
328
|
+
from cartesia import Cartesia
|
329
|
+
|
330
|
+
client = Cartesia(
|
331
|
+
...,
|
332
|
+
timeout=20.0,
|
333
|
+
)
|
334
|
+
|
335
|
+
|
336
|
+
# Override timeout for a specific method
|
337
|
+
client.tts.bytes(..., request_options={
|
338
|
+
"timeout_in_seconds": 1
|
339
|
+
})
|
340
|
+
```
|
341
|
+
|
342
|
+
### Mixing voices and creating from embeddings
|
343
|
+
|
344
|
+
```python
|
345
|
+
# Mix voices together
|
346
|
+
mixed_voice = client.voices.mix(
|
347
|
+
voices=[
|
348
|
+
{"id": "voice_id_1", "weight": 0.25},
|
349
|
+
{"id": "voice_id_2", "weight": 0.75}
|
350
|
+
]
|
351
|
+
)
|
352
|
+
|
353
|
+
# Create a new voice from embedding
|
354
|
+
new_voice = client.voices.create(
|
355
|
+
name="Test Voice",
|
356
|
+
description="Test voice description",
|
357
|
+
embedding=[...], # List[float] with 192 dimensions
|
358
|
+
language="en"
|
359
|
+
)
|
360
|
+
```
|
361
|
+
|
362
|
+
### Custom Client
|
363
|
+
|
364
|
+
You can override the `httpx` client to customize it for your use-case. Some common use-cases include support for proxies
|
365
|
+
and transports.
|
366
|
+
```python
|
367
|
+
import httpx
|
368
|
+
from cartesia import Cartesia
|
369
|
+
|
370
|
+
client = Cartesia(
|
371
|
+
...,
|
372
|
+
httpx_client=httpx.Client(
|
373
|
+
proxies="http://my.test.proxy.example.com",
|
374
|
+
transport=httpx.HTTPTransport(local_address="0.0.0.0"),
|
375
|
+
),
|
376
|
+
)
|
377
|
+
```
|
378
|
+
|
379
|
+
## Reference
|
380
|
+
|
381
|
+
A full reference for this library is available [here](./reference.md).
|
382
|
+
|
383
|
+
## Contributing
|
384
|
+
|
385
|
+
Note that most of this library is generated programmatically from
|
386
|
+
<https://github.com/cartesia-ai/docs> — before making edits to a file, verify it's not autogenerated
|
387
|
+
by checking for this comment at the top of the file:
|
388
|
+
|
389
|
+
```
|
390
|
+
# This file was auto-generated by Fern from our API Definition.
|
391
|
+
```
|
392
|
+
|
393
|
+
### Running tests
|
394
|
+
|
395
|
+
```sh
|
396
|
+
uv pip install -r requirements.txt
|
397
|
+
uv run pytest -rP -vv tests/custom/test_client.py::test_get_voices
|
398
|
+
```
|
399
|
+
### Manually generating SDK code from docs
|
400
|
+
|
401
|
+
Assuming all your repos are cloned into your home directory:
|
402
|
+
|
403
|
+
```sh
|
404
|
+
$ cd ~/docs
|
405
|
+
$ fern generate --group python-sdk --log-level debug --api version-2024-11-13 --preview
|
406
|
+
$ cd ~/cartesia-python
|
407
|
+
$ git pull ~/docs/fern/apis/version-2024-11-13/.preview/fern-python-sdk
|
408
|
+
$ git commit --amend -m "manually regenerate from docs" # optional
|
409
|
+
```
|
410
|
+
|
411
|
+
### Automatically generating new SDK releases
|
412
|
+
|
413
|
+
From https://github.com/cartesia-ai/docs click `Actions` then `Release Python SDK`. (Requires permissions.)
|
414
|
+
|