cartesia 1.3.1__py3-none-any.whl → 2.0.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cartesia/__init__.py +288 -3
- cartesia/api_status/__init__.py +6 -0
- cartesia/api_status/client.py +104 -0
- cartesia/api_status/requests/__init__.py +5 -0
- cartesia/api_status/requests/api_info.py +8 -0
- cartesia/api_status/types/__init__.py +5 -0
- cartesia/api_status/types/api_info.py +20 -0
- cartesia/base_client.py +160 -0
- cartesia/client.py +163 -40
- cartesia/core/__init__.py +47 -0
- cartesia/core/api_error.py +15 -0
- cartesia/core/client_wrapper.py +55 -0
- cartesia/core/datetime_utils.py +28 -0
- cartesia/core/file.py +67 -0
- cartesia/core/http_client.py +499 -0
- cartesia/core/jsonable_encoder.py +101 -0
- cartesia/core/pydantic_utilities.py +296 -0
- cartesia/core/query_encoder.py +58 -0
- cartesia/core/remove_none_from_dict.py +11 -0
- cartesia/core/request_options.py +35 -0
- cartesia/core/serialization.py +272 -0
- cartesia/datasets/__init__.py +24 -0
- cartesia/datasets/client.py +422 -0
- cartesia/datasets/requests/__init__.py +15 -0
- cartesia/datasets/requests/create_dataset_request.py +7 -0
- cartesia/datasets/requests/dataset.py +9 -0
- cartesia/datasets/requests/dataset_file.py +9 -0
- cartesia/datasets/requests/paginated_dataset_files.py +10 -0
- cartesia/datasets/requests/paginated_datasets.py +10 -0
- cartesia/datasets/types/__init__.py +17 -0
- cartesia/datasets/types/create_dataset_request.py +19 -0
- cartesia/datasets/types/dataset.py +21 -0
- cartesia/datasets/types/dataset_file.py +21 -0
- cartesia/datasets/types/file_purpose.py +5 -0
- cartesia/datasets/types/paginated_dataset_files.py +21 -0
- cartesia/datasets/types/paginated_datasets.py +21 -0
- cartesia/embedding/__init__.py +5 -0
- cartesia/embedding/types/__init__.py +5 -0
- cartesia/embedding/types/embedding.py +201 -0
- cartesia/environment.py +7 -0
- cartesia/infill/__init__.py +2 -0
- cartesia/infill/client.py +294 -0
- cartesia/tts/__init__.py +167 -0
- cartesia/{_async_websocket.py → tts/_async_websocket.py} +159 -84
- cartesia/tts/_websocket.py +430 -0
- cartesia/tts/client.py +407 -0
- cartesia/tts/requests/__init__.py +76 -0
- cartesia/tts/requests/cancel_context_request.py +17 -0
- cartesia/tts/requests/controls.py +11 -0
- cartesia/tts/requests/generation_request.py +53 -0
- cartesia/tts/requests/mp_3_output_format.py +11 -0
- cartesia/tts/requests/output_format.py +30 -0
- cartesia/tts/requests/phoneme_timestamps.py +10 -0
- cartesia/tts/requests/raw_output_format.py +11 -0
- cartesia/tts/requests/speed.py +7 -0
- cartesia/tts/requests/tts_request.py +24 -0
- cartesia/tts/requests/tts_request_embedding_specifier.py +16 -0
- cartesia/tts/requests/tts_request_id_specifier.py +16 -0
- cartesia/tts/requests/tts_request_voice_specifier.py +7 -0
- cartesia/tts/requests/wav_output_format.py +7 -0
- cartesia/tts/requests/web_socket_base_response.py +11 -0
- cartesia/tts/requests/web_socket_chunk_response.py +8 -0
- cartesia/tts/requests/web_socket_done_response.py +7 -0
- cartesia/tts/requests/web_socket_error_response.py +7 -0
- cartesia/tts/requests/web_socket_flush_done_response.py +9 -0
- cartesia/tts/requests/web_socket_phoneme_timestamps_response.py +9 -0
- cartesia/tts/requests/web_socket_raw_output_format.py +11 -0
- cartesia/tts/requests/web_socket_request.py +7 -0
- cartesia/tts/requests/web_socket_response.py +69 -0
- cartesia/tts/requests/web_socket_stream_options.py +8 -0
- cartesia/tts/requests/web_socket_timestamps_response.py +9 -0
- cartesia/tts/requests/web_socket_tts_output.py +18 -0
- cartesia/tts/requests/web_socket_tts_request.py +24 -0
- cartesia/tts/requests/word_timestamps.py +10 -0
- cartesia/tts/socket_client.py +302 -0
- cartesia/tts/types/__init__.py +90 -0
- cartesia/tts/types/cancel_context_request.py +28 -0
- cartesia/tts/types/context_id.py +3 -0
- cartesia/tts/types/controls.py +22 -0
- cartesia/tts/types/emotion.py +29 -0
- cartesia/tts/types/flush_id.py +3 -0
- cartesia/tts/types/generation_request.py +66 -0
- cartesia/tts/types/mp_3_output_format.py +23 -0
- cartesia/tts/types/natural_specifier.py +5 -0
- cartesia/tts/types/numerical_specifier.py +3 -0
- cartesia/tts/types/output_format.py +58 -0
- cartesia/tts/types/phoneme_timestamps.py +21 -0
- cartesia/tts/types/raw_encoding.py +5 -0
- cartesia/tts/types/raw_output_format.py +22 -0
- cartesia/tts/types/speed.py +7 -0
- cartesia/tts/types/supported_language.py +7 -0
- cartesia/tts/types/tts_request.py +35 -0
- cartesia/tts/types/tts_request_embedding_specifier.py +27 -0
- cartesia/tts/types/tts_request_id_specifier.py +27 -0
- cartesia/tts/types/tts_request_voice_specifier.py +7 -0
- cartesia/tts/types/wav_output_format.py +17 -0
- cartesia/tts/types/web_socket_base_response.py +22 -0
- cartesia/tts/types/web_socket_chunk_response.py +20 -0
- cartesia/tts/types/web_socket_done_response.py +17 -0
- cartesia/tts/types/web_socket_error_response.py +19 -0
- cartesia/tts/types/web_socket_flush_done_response.py +21 -0
- cartesia/tts/types/web_socket_phoneme_timestamps_response.py +20 -0
- cartesia/tts/types/web_socket_raw_output_format.py +22 -0
- cartesia/tts/types/web_socket_request.py +7 -0
- cartesia/tts/types/web_socket_response.py +124 -0
- cartesia/tts/types/web_socket_stream_options.py +19 -0
- cartesia/tts/types/web_socket_timestamps_response.py +20 -0
- cartesia/tts/types/web_socket_tts_output.py +27 -0
- cartesia/tts/types/web_socket_tts_request.py +36 -0
- cartesia/tts/types/word_timestamps.py +21 -0
- cartesia/tts/utils/tts.py +64 -0
- cartesia/tts/utils/types.py +70 -0
- cartesia/version.py +3 -1
- cartesia/voice_changer/__init__.py +27 -0
- cartesia/voice_changer/client.py +395 -0
- cartesia/voice_changer/requests/__init__.py +15 -0
- cartesia/voice_changer/requests/streaming_response.py +36 -0
- cartesia/voice_changer/types/__init__.py +17 -0
- cartesia/voice_changer/types/output_format_container.py +5 -0
- cartesia/voice_changer/types/streaming_response.py +62 -0
- cartesia/voices/__init__.py +67 -0
- cartesia/voices/client.py +1812 -0
- cartesia/voices/requests/__init__.py +27 -0
- cartesia/voices/requests/create_voice_request.py +21 -0
- cartesia/voices/requests/embedding_response.py +8 -0
- cartesia/voices/requests/embedding_specifier.py +10 -0
- cartesia/voices/requests/id_specifier.py +10 -0
- cartesia/voices/requests/localize_dialect.py +6 -0
- cartesia/voices/requests/localize_voice_request.py +15 -0
- cartesia/voices/requests/mix_voice_specifier.py +7 -0
- cartesia/voices/requests/mix_voices_request.py +9 -0
- cartesia/voices/requests/update_voice_request.py +15 -0
- cartesia/voices/requests/voice.py +39 -0
- cartesia/voices/requests/voice_metadata.py +36 -0
- cartesia/voices/types/__init__.py +41 -0
- cartesia/voices/types/base_voice_id.py +5 -0
- cartesia/voices/types/clone_mode.py +5 -0
- cartesia/voices/types/create_voice_request.py +32 -0
- cartesia/voices/types/embedding_response.py +20 -0
- cartesia/voices/types/embedding_specifier.py +22 -0
- cartesia/voices/types/gender.py +5 -0
- cartesia/voices/types/id_specifier.py +22 -0
- cartesia/voices/types/localize_dialect.py +6 -0
- cartesia/voices/types/localize_english_dialect.py +5 -0
- cartesia/voices/types/localize_target_language.py +7 -0
- cartesia/voices/types/localize_voice_request.py +26 -0
- cartesia/voices/types/mix_voice_specifier.py +7 -0
- cartesia/voices/types/mix_voices_request.py +20 -0
- cartesia/voices/types/update_voice_request.py +27 -0
- cartesia/voices/types/voice.py +50 -0
- cartesia/voices/types/voice_id.py +3 -0
- cartesia/voices/types/voice_metadata.py +48 -0
- cartesia/voices/types/weight.py +3 -0
- cartesia-2.0.0a0.dist-info/METADATA +306 -0
- cartesia-2.0.0a0.dist-info/RECORD +158 -0
- {cartesia-1.3.1.dist-info → cartesia-2.0.0a0.dist-info}/WHEEL +1 -1
- cartesia/_async_sse.py +0 -95
- cartesia/_logger.py +0 -3
- cartesia/_sse.py +0 -143
- cartesia/_types.py +0 -70
- cartesia/_websocket.py +0 -358
- cartesia/async_client.py +0 -82
- cartesia/async_tts.py +0 -63
- cartesia/resource.py +0 -44
- cartesia/tts.py +0 -137
- cartesia/utils/deprecated.py +0 -55
- cartesia/utils/retry.py +0 -87
- cartesia/utils/tts.py +0 -78
- cartesia/voices.py +0 -208
- cartesia-1.3.1.dist-info/METADATA +0 -661
- cartesia-1.3.1.dist-info/RECORD +0 -23
- cartesia-1.3.1.dist-info/licenses/LICENSE.md +0 -21
- /cartesia/{utils/__init__.py → py.typed} +0 -0
- /cartesia/{_constants.py → tts/utils/constants.py} +0 -0
cartesia/async_tts.py
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
from typing import Iterator, List, Optional
|
2
|
-
|
3
|
-
import httpx
|
4
|
-
from cartesia._async_sse import _AsyncSSE
|
5
|
-
from cartesia._async_websocket import _AsyncWebSocket
|
6
|
-
from cartesia._types import OutputFormat, VoiceControls
|
7
|
-
from cartesia.tts import TTS
|
8
|
-
from cartesia.utils.tts import _construct_tts_request
|
9
|
-
|
10
|
-
|
11
|
-
class AsyncTTS(TTS):
|
12
|
-
def __init__(self, api_key, base_url, timeout, get_session):
|
13
|
-
super().__init__(api_key, base_url, timeout)
|
14
|
-
self._get_session = get_session
|
15
|
-
self._sse_class = _AsyncSSE(self._http_url(), self.headers, self.timeout, get_session)
|
16
|
-
self.sse = self._sse_class.send
|
17
|
-
|
18
|
-
async def websocket(self) -> _AsyncWebSocket:
|
19
|
-
ws = _AsyncWebSocket(
|
20
|
-
self._ws_url(),
|
21
|
-
self.api_key,
|
22
|
-
self.cartesia_version,
|
23
|
-
self.timeout,
|
24
|
-
self._get_session,
|
25
|
-
)
|
26
|
-
await ws.connect()
|
27
|
-
return ws
|
28
|
-
|
29
|
-
async def bytes(
|
30
|
-
self,
|
31
|
-
*,
|
32
|
-
model_id: str,
|
33
|
-
transcript: str,
|
34
|
-
output_format: OutputFormat,
|
35
|
-
voice_id: Optional[str] = None,
|
36
|
-
voice_embedding: Optional[List[float]] = None,
|
37
|
-
duration: Optional[int] = None,
|
38
|
-
language: Optional[str] = None,
|
39
|
-
_experimental_voice_controls: Optional[VoiceControls] = None,
|
40
|
-
) -> bytes:
|
41
|
-
request_body = _construct_tts_request(
|
42
|
-
model_id=model_id,
|
43
|
-
transcript=transcript,
|
44
|
-
output_format=output_format,
|
45
|
-
voice_id=voice_id,
|
46
|
-
voice_embedding=voice_embedding,
|
47
|
-
duration=duration,
|
48
|
-
language=language,
|
49
|
-
_experimental_voice_controls=_experimental_voice_controls,
|
50
|
-
)
|
51
|
-
|
52
|
-
async with httpx.AsyncClient() as client:
|
53
|
-
response = await client.post(
|
54
|
-
f"{self._http_url()}/tts/bytes",
|
55
|
-
headers=self.headers,
|
56
|
-
timeout=self.timeout,
|
57
|
-
json=request_body,
|
58
|
-
)
|
59
|
-
|
60
|
-
if not response.is_success:
|
61
|
-
raise ValueError(f"Failed to generate audio. Error: {response.text}")
|
62
|
-
|
63
|
-
return response.content
|
cartesia/resource.py
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
from cartesia._constants import DEFAULT_CARTESIA_VERSION
|
2
|
-
|
3
|
-
|
4
|
-
class Resource:
|
5
|
-
def __init__(
|
6
|
-
self,
|
7
|
-
api_key: str,
|
8
|
-
base_url: str,
|
9
|
-
timeout: float,
|
10
|
-
):
|
11
|
-
"""Constructor for the Resource class. Used by the Voices and TTS classes."""
|
12
|
-
self.api_key = api_key
|
13
|
-
self.timeout = timeout
|
14
|
-
self._base_url = base_url
|
15
|
-
self.cartesia_version = DEFAULT_CARTESIA_VERSION
|
16
|
-
self.headers = {
|
17
|
-
"X-API-Key": self.api_key,
|
18
|
-
"Cartesia-Version": self.cartesia_version,
|
19
|
-
"Content-Type": "application/json",
|
20
|
-
}
|
21
|
-
|
22
|
-
@property
|
23
|
-
def base_url(self):
|
24
|
-
return self._base_url
|
25
|
-
|
26
|
-
def _http_url(self):
|
27
|
-
"""Returns the HTTP URL for the Cartesia API.
|
28
|
-
If the base URL is localhost, the URL will start with 'http'. Otherwise, it will start with 'https'.
|
29
|
-
"""
|
30
|
-
if self._base_url.startswith("http://") or self._base_url.startswith("https://"):
|
31
|
-
return self._base_url
|
32
|
-
else:
|
33
|
-
prefix = "http" if "localhost" in self._base_url else "https"
|
34
|
-
return f"{prefix}://{self._base_url}"
|
35
|
-
|
36
|
-
def _ws_url(self):
|
37
|
-
"""Returns the WebSocket URL for the Cartesia API.
|
38
|
-
If the base URL is localhost, the URL will start with 'ws'. Otherwise, it will start with 'wss'.
|
39
|
-
"""
|
40
|
-
if self._base_url.startswith("ws://") or self._base_url.startswith("wss://"):
|
41
|
-
return self._base_url
|
42
|
-
else:
|
43
|
-
prefix = "ws" if "localhost" in self._base_url else "wss"
|
44
|
-
return f"{prefix}://{self._base_url}"
|
cartesia/tts.py
DELETED
@@ -1,137 +0,0 @@
|
|
1
|
-
from typing import Iterator, List, Optional
|
2
|
-
|
3
|
-
import httpx
|
4
|
-
|
5
|
-
from cartesia._sse import _SSE
|
6
|
-
from cartesia._types import (
|
7
|
-
OutputFormat,
|
8
|
-
OutputFormatMapping,
|
9
|
-
VoiceControls,
|
10
|
-
)
|
11
|
-
from cartesia._websocket import _WebSocket
|
12
|
-
from cartesia.resource import Resource
|
13
|
-
from cartesia.utils.tts import _construct_tts_request, _validate_and_construct_voice
|
14
|
-
|
15
|
-
|
16
|
-
class TTS(Resource):
|
17
|
-
"""This resource contains methods to generate audio using Cartesia's text-to-speech API."""
|
18
|
-
|
19
|
-
def __init__(self, api_key: str, base_url: str, timeout: float):
|
20
|
-
super().__init__(
|
21
|
-
api_key=api_key,
|
22
|
-
base_url=base_url,
|
23
|
-
timeout=timeout,
|
24
|
-
)
|
25
|
-
self._sse_class = _SSE(self._http_url(), self.headers, self.timeout)
|
26
|
-
self.sse = self._sse_class.send
|
27
|
-
|
28
|
-
def websocket(self) -> _WebSocket:
|
29
|
-
"""This method returns a WebSocket object that can be used to generate audio using WebSocket.
|
30
|
-
|
31
|
-
Returns:
|
32
|
-
_WebSocket: A WebSocket object that can be used to generate audio using WebSocket.
|
33
|
-
"""
|
34
|
-
ws = _WebSocket(self._ws_url(), self.api_key, self.cartesia_version)
|
35
|
-
ws.connect()
|
36
|
-
return ws
|
37
|
-
|
38
|
-
def bytes(
|
39
|
-
self,
|
40
|
-
*,
|
41
|
-
model_id: str,
|
42
|
-
transcript: str,
|
43
|
-
output_format: OutputFormat,
|
44
|
-
voice_id: Optional[str] = None,
|
45
|
-
voice_embedding: Optional[List[float]] = None,
|
46
|
-
duration: Optional[int] = None,
|
47
|
-
language: Optional[str] = None,
|
48
|
-
_experimental_voice_controls: Optional[VoiceControls] = None,
|
49
|
-
) -> bytes:
|
50
|
-
request_body = _construct_tts_request(
|
51
|
-
model_id=model_id,
|
52
|
-
transcript=transcript,
|
53
|
-
output_format=output_format,
|
54
|
-
voice_id=voice_id,
|
55
|
-
voice_embedding=voice_embedding,
|
56
|
-
duration=duration,
|
57
|
-
language=language,
|
58
|
-
_experimental_voice_controls=_experimental_voice_controls,
|
59
|
-
)
|
60
|
-
|
61
|
-
response = httpx.post(
|
62
|
-
f"{self._http_url()}/tts/bytes",
|
63
|
-
headers=self.headers,
|
64
|
-
timeout=self.timeout,
|
65
|
-
json=request_body,
|
66
|
-
)
|
67
|
-
|
68
|
-
if not response.is_success:
|
69
|
-
raise ValueError(f"Failed to generate audio. Error: {response.text}")
|
70
|
-
|
71
|
-
return response.content
|
72
|
-
|
73
|
-
@staticmethod
|
74
|
-
def get_output_format(output_format_name: str) -> OutputFormat:
|
75
|
-
"""Convenience method to get the output_format dictionary from a given output format name.
|
76
|
-
|
77
|
-
Args:
|
78
|
-
output_format_name (str): The name of the output format.
|
79
|
-
|
80
|
-
Returns:
|
81
|
-
OutputFormat: A dictionary containing the details of the output format to be passed into tts.sse() or tts.websocket().send()
|
82
|
-
|
83
|
-
Raises:
|
84
|
-
ValueError: If the output_format name is not supported
|
85
|
-
"""
|
86
|
-
if output_format_name in OutputFormatMapping._format_mapping:
|
87
|
-
output_format_obj = OutputFormatMapping.get_format(output_format_name)
|
88
|
-
else:
|
89
|
-
raise ValueError(f"Unsupported format: {output_format_name}")
|
90
|
-
|
91
|
-
return OutputFormat(
|
92
|
-
container=output_format_obj["container"],
|
93
|
-
encoding=output_format_obj["encoding"],
|
94
|
-
sample_rate=output_format_obj["sample_rate"],
|
95
|
-
)
|
96
|
-
|
97
|
-
@staticmethod
|
98
|
-
def get_sample_rate(output_format_name: str) -> int:
|
99
|
-
"""Convenience method to get the sample rate for a given output format.
|
100
|
-
|
101
|
-
Args:
|
102
|
-
output_format_name (str): The name of the output format.
|
103
|
-
|
104
|
-
Returns:
|
105
|
-
int: The sample rate for the output format.
|
106
|
-
|
107
|
-
Raises:
|
108
|
-
ValueError: If the output_format name is not supported
|
109
|
-
"""
|
110
|
-
if output_format_name in OutputFormatMapping._format_mapping:
|
111
|
-
output_format_obj = OutputFormatMapping.get_format(output_format_name)
|
112
|
-
else:
|
113
|
-
raise ValueError(f"Unsupported format: {output_format_name}")
|
114
|
-
|
115
|
-
return output_format_obj["sample_rate"]
|
116
|
-
|
117
|
-
@staticmethod
|
118
|
-
def _validate_and_construct_voice(
|
119
|
-
voice_id: Optional[str] = None,
|
120
|
-
voice_embedding: Optional[List[float]] = None,
|
121
|
-
experimental_voice_controls: Optional[VoiceControls] = None,
|
122
|
-
) -> dict:
|
123
|
-
"""Validate and construct the voice dictionary for the request.
|
124
|
-
|
125
|
-
Args:
|
126
|
-
voice_id: The ID of the voice to use for generating audio.
|
127
|
-
voice_embedding: The embedding of the voice to use for generating audio.
|
128
|
-
experimental_voice_controls: Voice controls for emotion and speed.
|
129
|
-
Note: This is an experimental feature and may rapidly change in the future.
|
130
|
-
|
131
|
-
Returns:
|
132
|
-
A dictionary representing the voice configuration.
|
133
|
-
|
134
|
-
Raises:
|
135
|
-
ValueError: If neither or both voice_id and voice_embedding are specified.
|
136
|
-
"""
|
137
|
-
return _validate_and_construct_voice(voice_id, voice_embedding, experimental_voice_controls)
|
cartesia/utils/deprecated.py
DELETED
@@ -1,55 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import warnings
|
3
|
-
from typing import Any, Callable, TypeVar
|
4
|
-
|
5
|
-
TCallable = TypeVar("TCallable", bound=Callable[..., Any])
|
6
|
-
|
7
|
-
# List of statistics of deprecated functions.
|
8
|
-
# This should only be used by the test suite to find any deprecated functions
|
9
|
-
# that should be removed for this version.
|
10
|
-
_TRACK_DEPRECATED_FUNCTION_STATS = os.environ.get("CARTESIA_TEST_DEPRECATED", "").lower() == "true"
|
11
|
-
_DEPRECATED_FUNCTION_STATS = []
|
12
|
-
|
13
|
-
|
14
|
-
def deprecated(
|
15
|
-
reason=None, vdeprecated=None, vremove=None, replacement=None
|
16
|
-
) -> Callable[[TCallable], TCallable]:
|
17
|
-
local_vars = locals()
|
18
|
-
|
19
|
-
def fn(func: TCallable) -> TCallable:
|
20
|
-
if isinstance(func, classmethod):
|
21
|
-
func = func.__func__
|
22
|
-
msg = _get_deprecated_msg(func, reason, vdeprecated, vremove, replacement)
|
23
|
-
warnings.warn(msg, DeprecationWarning)
|
24
|
-
return func
|
25
|
-
|
26
|
-
if _TRACK_DEPRECATED_FUNCTION_STATS: # pragma: no cover
|
27
|
-
_DEPRECATED_FUNCTION_STATS.append(local_vars)
|
28
|
-
|
29
|
-
return fn
|
30
|
-
|
31
|
-
|
32
|
-
def _get_deprecated_msg(wrapped, reason, vdeprecated, vremoved, replacement=None):
|
33
|
-
fmt = "{name} is deprecated"
|
34
|
-
if vdeprecated:
|
35
|
-
fmt += " since v{vdeprecated}"
|
36
|
-
if vremoved:
|
37
|
-
fmt += " and will be removed in v{vremoved}"
|
38
|
-
fmt += "."
|
39
|
-
|
40
|
-
if reason:
|
41
|
-
fmt += " ({reason})"
|
42
|
-
if replacement:
|
43
|
-
fmt += " -- Use {replacement} instead."
|
44
|
-
|
45
|
-
return fmt.format(
|
46
|
-
name=wrapped.__name__,
|
47
|
-
reason=reason or "",
|
48
|
-
vdeprecated=vdeprecated or "",
|
49
|
-
vremoved=vremoved or "",
|
50
|
-
replacement=replacement or "",
|
51
|
-
)
|
52
|
-
|
53
|
-
|
54
|
-
# This method is taken from the following source:
|
55
|
-
# https://github.com/ad12/meddlr/blob/main/meddlr/utils/deprecated.py
|
cartesia/utils/retry.py
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
import time
|
3
|
-
from functools import wraps
|
4
|
-
from http.client import RemoteDisconnected
|
5
|
-
|
6
|
-
from aiohttp.client_exceptions import ServerDisconnectedError
|
7
|
-
from httpx import TimeoutException
|
8
|
-
from requests.exceptions import ConnectionError
|
9
|
-
|
10
|
-
|
11
|
-
def retry_on_connection_error(max_retries=3, backoff_factor=1, logger=None):
|
12
|
-
"""Retry a function if a ConnectionError, RemoteDisconnected, ServerDisconnectedError, or TimeoutException occurs.
|
13
|
-
|
14
|
-
Args:
|
15
|
-
max_retries (int): The maximum number of retries.
|
16
|
-
backoff_factor (int): The factor to increase the delay between retries.
|
17
|
-
logger (logging.Logger): The logger to use for logging.
|
18
|
-
"""
|
19
|
-
|
20
|
-
def decorator(func):
|
21
|
-
@wraps(func)
|
22
|
-
def wrapper(*args, **kwargs):
|
23
|
-
retry_count = 0
|
24
|
-
while retry_count < max_retries:
|
25
|
-
try:
|
26
|
-
return func(*args, **kwargs)
|
27
|
-
except (
|
28
|
-
ConnectionError,
|
29
|
-
RemoteDisconnected,
|
30
|
-
ServerDisconnectedError,
|
31
|
-
TimeoutException,
|
32
|
-
) as e:
|
33
|
-
logger.info(f"Retrying after exception: {e}")
|
34
|
-
retry_count += 1
|
35
|
-
if retry_count < max_retries:
|
36
|
-
delay = backoff_factor * (2 ** (retry_count - 1))
|
37
|
-
logger.warn(
|
38
|
-
f"Attempt {retry_count + 1}/{max_retries} in {delay} seconds..."
|
39
|
-
)
|
40
|
-
time.sleep(delay)
|
41
|
-
else:
|
42
|
-
raise Exception(f"Exception occurred after {max_retries} tries.") from e
|
43
|
-
|
44
|
-
return wrapper
|
45
|
-
|
46
|
-
return decorator
|
47
|
-
|
48
|
-
|
49
|
-
def retry_on_connection_error_async(max_retries=3, backoff_factor=1, logger=None):
|
50
|
-
"""Retry an asynchronous function if a ConnectionError, RemoteDisconnected, ServerDisconnectedError, or TimeoutException occurs.
|
51
|
-
|
52
|
-
Args:
|
53
|
-
max_retries (int): The maximum number of retries.
|
54
|
-
backoff_factor (int): The factor to increase the delay between retries.
|
55
|
-
logger (logging.Logger): The logger to use for logging.
|
56
|
-
"""
|
57
|
-
|
58
|
-
def decorator(func):
|
59
|
-
@wraps(func)
|
60
|
-
async def wrapper(*args, **kwargs):
|
61
|
-
retry_count = 0
|
62
|
-
while retry_count < max_retries:
|
63
|
-
try:
|
64
|
-
async for chunk in func(*args, **kwargs):
|
65
|
-
yield chunk
|
66
|
-
# If the function completes without raising an exception return
|
67
|
-
return
|
68
|
-
except (
|
69
|
-
ConnectionError,
|
70
|
-
RemoteDisconnected,
|
71
|
-
ServerDisconnectedError,
|
72
|
-
TimeoutException,
|
73
|
-
) as e:
|
74
|
-
logger.info(f"Retrying after exception: {e}")
|
75
|
-
retry_count += 1
|
76
|
-
if retry_count < max_retries:
|
77
|
-
delay = backoff_factor * (2 ** (retry_count - 1))
|
78
|
-
logger.warn(
|
79
|
-
f"Attempt {retry_count + 1}/{max_retries} in {delay} seconds..."
|
80
|
-
)
|
81
|
-
await asyncio.sleep(delay)
|
82
|
-
else:
|
83
|
-
raise Exception(f"Exception occurred after {max_retries} tries.") from e
|
84
|
-
|
85
|
-
return wrapper
|
86
|
-
|
87
|
-
return decorator
|
cartesia/utils/tts.py
DELETED
@@ -1,78 +0,0 @@
|
|
1
|
-
from typing import List, Optional
|
2
|
-
|
3
|
-
from cartesia._types import OutputFormat, VoiceControls
|
4
|
-
|
5
|
-
|
6
|
-
def _validate_and_construct_voice(
|
7
|
-
voice_id: Optional[str] = None,
|
8
|
-
voice_embedding: Optional[List[float]] = None,
|
9
|
-
experimental_voice_controls: Optional[VoiceControls] = None,
|
10
|
-
) -> dict:
|
11
|
-
if voice_id is None and voice_embedding is None:
|
12
|
-
raise ValueError("Either voice_id or voice_embedding must be specified.")
|
13
|
-
|
14
|
-
voice = {}
|
15
|
-
|
16
|
-
if voice_id is not None:
|
17
|
-
voice["id"] = voice_id
|
18
|
-
|
19
|
-
if voice_embedding is not None:
|
20
|
-
voice["embedding"] = voice_embedding
|
21
|
-
|
22
|
-
if experimental_voice_controls is not None:
|
23
|
-
voice["__experimental_controls"] = experimental_voice_controls
|
24
|
-
|
25
|
-
return voice
|
26
|
-
|
27
|
-
|
28
|
-
def _construct_tts_request(
|
29
|
-
*,
|
30
|
-
model_id: str,
|
31
|
-
output_format: OutputFormat,
|
32
|
-
transcript: Optional[str] = None,
|
33
|
-
voice_id: Optional[str] = None,
|
34
|
-
voice_embedding: Optional[List[float]] = None,
|
35
|
-
duration: Optional[int] = None,
|
36
|
-
language: Optional[str] = None,
|
37
|
-
add_timestamps: bool = False,
|
38
|
-
context_id: Optional[str] = None,
|
39
|
-
continue_: bool = False,
|
40
|
-
flush: bool = False,
|
41
|
-
_experimental_voice_controls: Optional[VoiceControls] = None,
|
42
|
-
):
|
43
|
-
tts_request = {
|
44
|
-
"model_id": model_id,
|
45
|
-
"voice": _validate_and_construct_voice(
|
46
|
-
voice_id,
|
47
|
-
voice_embedding=voice_embedding,
|
48
|
-
experimental_voice_controls=_experimental_voice_controls,
|
49
|
-
),
|
50
|
-
"output_format": {
|
51
|
-
"container": output_format["container"],
|
52
|
-
"encoding": output_format["encoding"],
|
53
|
-
"sample_rate": output_format["sample_rate"],
|
54
|
-
},
|
55
|
-
}
|
56
|
-
|
57
|
-
if language is not None:
|
58
|
-
tts_request["language"] = language
|
59
|
-
|
60
|
-
if transcript is not None:
|
61
|
-
tts_request["transcript"] = transcript
|
62
|
-
|
63
|
-
if duration is not None:
|
64
|
-
tts_request["duration"] = duration
|
65
|
-
|
66
|
-
if add_timestamps:
|
67
|
-
tts_request["add_timestamps"] = add_timestamps
|
68
|
-
|
69
|
-
if context_id is not None:
|
70
|
-
tts_request["context_id"] = context_id
|
71
|
-
|
72
|
-
if continue_:
|
73
|
-
tts_request["continue"] = continue_
|
74
|
-
|
75
|
-
if flush:
|
76
|
-
tts_request["flush"] = flush
|
77
|
-
|
78
|
-
return tts_request
|
cartesia/voices.py
DELETED
@@ -1,208 +0,0 @@
|
|
1
|
-
from typing import Dict, List, Optional, Union
|
2
|
-
|
3
|
-
import httpx
|
4
|
-
|
5
|
-
from cartesia._types import VoiceMetadata
|
6
|
-
from cartesia.resource import Resource
|
7
|
-
|
8
|
-
|
9
|
-
class Voices(Resource):
|
10
|
-
"""This resource contains methods to list, get, clone, and create voices in your Cartesia voice library.
|
11
|
-
|
12
|
-
Usage:
|
13
|
-
>>> client = Cartesia(api_key="your_api_key")
|
14
|
-
>>> voices = client.voices.list()
|
15
|
-
>>> voice = client.voices.get(id="a0e99841-438c-4a64-b679-ae501e7d6091")
|
16
|
-
>>> print("Voice Name:", voice["name"], "Voice Description:", voice["description"])
|
17
|
-
>>> embedding = client.voices.clone(filepath="path/to/clip.wav")
|
18
|
-
>>> new_voice = client.voices.create(
|
19
|
-
... name="My Voice", description="A new voice", embedding=embedding
|
20
|
-
... )
|
21
|
-
"""
|
22
|
-
|
23
|
-
def list(self) -> List[VoiceMetadata]:
|
24
|
-
"""List all voices in your voice library.
|
25
|
-
|
26
|
-
Returns:
|
27
|
-
This method returns a list of VoiceMetadata objects.
|
28
|
-
"""
|
29
|
-
response = httpx.get(
|
30
|
-
f"{self._http_url()}/voices",
|
31
|
-
headers=self.headers,
|
32
|
-
timeout=self.timeout,
|
33
|
-
)
|
34
|
-
|
35
|
-
if not response.is_success:
|
36
|
-
raise ValueError(f"Failed to get voices. Error: {response.text}")
|
37
|
-
|
38
|
-
voices = response.json()
|
39
|
-
return voices
|
40
|
-
|
41
|
-
def get(self, id: str) -> VoiceMetadata:
|
42
|
-
"""Get a voice by its ID.
|
43
|
-
|
44
|
-
Args:
|
45
|
-
id: The ID of the voice.
|
46
|
-
|
47
|
-
Returns:
|
48
|
-
A VoiceMetadata object containing the voice metadata.
|
49
|
-
"""
|
50
|
-
url = f"{self._http_url()}/voices/{id}"
|
51
|
-
response = httpx.get(url, headers=self.headers, timeout=self.timeout)
|
52
|
-
|
53
|
-
if not response.is_success:
|
54
|
-
raise ValueError(
|
55
|
-
f"Failed to get voice. Status Code: {response.status_code}\n"
|
56
|
-
f"Error: {response.text}"
|
57
|
-
)
|
58
|
-
|
59
|
-
return response.json()
|
60
|
-
|
61
|
-
def clone(
|
62
|
-
self,
|
63
|
-
filepath: Optional[str] = None,
|
64
|
-
enhance: str = True,
|
65
|
-
mode: str = "clip",
|
66
|
-
language: str = "en",
|
67
|
-
name: Optional[str] = None,
|
68
|
-
description: Optional[str] = None,
|
69
|
-
transcript: Optional[str] = None,
|
70
|
-
) -> Union[List[float], VoiceMetadata]:
|
71
|
-
"""Clone a voice from a clip.
|
72
|
-
|
73
|
-
Args:
|
74
|
-
filepath: The path to the clip file.
|
75
|
-
enhance: Whether to enhance the clip before cloning the voice (highly recommended). Defaults to True.
|
76
|
-
mode: The mode to use for cloning. Either "similarity" or "stability".
|
77
|
-
language: The language code of the language spoken in the clip. Defaults to "en".
|
78
|
-
name: The name of the cloned voice.
|
79
|
-
description: The description of the cloned voice.
|
80
|
-
transcript: The transcript of the clip. Only used if mode is "similarity".
|
81
|
-
|
82
|
-
Returns:
|
83
|
-
The embedding of the cloned voice as a list of floats.
|
84
|
-
"""
|
85
|
-
if not filepath:
|
86
|
-
raise ValueError("Filepath must be specified.")
|
87
|
-
headers = self.headers.copy()
|
88
|
-
headers.pop("Content-Type", None)
|
89
|
-
|
90
|
-
with open(filepath, "rb") as file:
|
91
|
-
files = {"clip": file}
|
92
|
-
data = {
|
93
|
-
"enhance": str(enhance).lower(),
|
94
|
-
"mode": mode,
|
95
|
-
}
|
96
|
-
if mode == "clip":
|
97
|
-
url = f"{self._http_url()}/voices/clone/clip"
|
98
|
-
response = httpx.post(
|
99
|
-
url, headers=headers, files=files, data=data, timeout=self.timeout
|
100
|
-
)
|
101
|
-
if not response.is_success:
|
102
|
-
raise ValueError(f"Failed to clone voice from clip. Error: {response.text}")
|
103
|
-
return response.json()["embedding"]
|
104
|
-
else:
|
105
|
-
data["name"] = name
|
106
|
-
data["description"] = description
|
107
|
-
data["language"] = language
|
108
|
-
if mode == "similarity" and transcript:
|
109
|
-
data["transcript"] = transcript
|
110
|
-
url = f"{self._http_url()}/voices/clone"
|
111
|
-
response = httpx.post(
|
112
|
-
url, headers=headers, files=files, data=data, timeout=self.timeout
|
113
|
-
)
|
114
|
-
if not response.is_success:
|
115
|
-
raise ValueError(
|
116
|
-
f"Failed to clone voice. Status Code: {response.status_code}\n"
|
117
|
-
f"Error: {response.text}"
|
118
|
-
)
|
119
|
-
return response.json()
|
120
|
-
|
121
|
-
def create(
|
122
|
-
self,
|
123
|
-
name: str,
|
124
|
-
description: str,
|
125
|
-
embedding: List[float],
|
126
|
-
base_voice_id: Optional[str] = None,
|
127
|
-
language: str = "en",
|
128
|
-
) -> VoiceMetadata:
|
129
|
-
"""Create a new voice.
|
130
|
-
|
131
|
-
Args:
|
132
|
-
name: The name of the voice.
|
133
|
-
description: The description of the voice.
|
134
|
-
embedding: The embedding of the voice. This should be generated with :meth:`clone`.
|
135
|
-
base_voice_id: The ID of the base voice. This should be a valid voice ID if specified.
|
136
|
-
|
137
|
-
Returns:
|
138
|
-
A dictionary containing the voice metadata.
|
139
|
-
"""
|
140
|
-
response = httpx.post(
|
141
|
-
f"{self._http_url()}/voices",
|
142
|
-
headers=self.headers,
|
143
|
-
json={
|
144
|
-
"name": name,
|
145
|
-
"description": description,
|
146
|
-
"embedding": embedding,
|
147
|
-
"base_voice_id": base_voice_id,
|
148
|
-
"language": language,
|
149
|
-
},
|
150
|
-
timeout=self.timeout,
|
151
|
-
)
|
152
|
-
|
153
|
-
if not response.is_success:
|
154
|
-
raise ValueError(f"Failed to create voice. Error: {response.text}")
|
155
|
-
|
156
|
-
return response.json()
|
157
|
-
|
158
|
-
def delete(self, id: str) -> bool:
|
159
|
-
"""Delete a voice by its ID.
|
160
|
-
|
161
|
-
Args:
|
162
|
-
id: The ID of the voice.
|
163
|
-
|
164
|
-
Raises:
|
165
|
-
ValueError: If the request fails.
|
166
|
-
"""
|
167
|
-
response = httpx.delete(
|
168
|
-
f"{self._http_url()}/voices/{id}",
|
169
|
-
headers=self.headers,
|
170
|
-
timeout=self.timeout,
|
171
|
-
)
|
172
|
-
|
173
|
-
if not response.is_success:
|
174
|
-
raise ValueError(f"Failed to delete voice. Error: {response.text}")
|
175
|
-
|
176
|
-
def mix(self, voices: List[Dict[str, Union[str, float]]]) -> List[float]:
|
177
|
-
"""Mix multiple voices together.
|
178
|
-
|
179
|
-
Args:
|
180
|
-
voices: A list of dictionaries, each containing either:
|
181
|
-
- 'id': The ID of an existing voice
|
182
|
-
- 'embedding': A voice embedding
|
183
|
-
AND
|
184
|
-
- 'weight': The weight of the voice in the mix (0.0 to 1.0)
|
185
|
-
|
186
|
-
Returns:
|
187
|
-
The embedding of the mixed voice as a list of floats.
|
188
|
-
|
189
|
-
Raises:
|
190
|
-
ValueError: If the request fails or if the input is invalid.
|
191
|
-
"""
|
192
|
-
url = f"{self._http_url()}/voices/mix"
|
193
|
-
|
194
|
-
if not voices or not isinstance(voices, list):
|
195
|
-
raise ValueError("voices must be a non-empty list")
|
196
|
-
|
197
|
-
response = httpx.post(
|
198
|
-
url,
|
199
|
-
headers=self.headers,
|
200
|
-
json={"voices": voices},
|
201
|
-
timeout=self.timeout,
|
202
|
-
)
|
203
|
-
|
204
|
-
if not response.is_success:
|
205
|
-
raise ValueError(f"Failed to mix voices. Error: {response.text}")
|
206
|
-
|
207
|
-
result = response.json()
|
208
|
-
return result["embedding"]
|