cartesia 2.0.2__py3-none-any.whl → 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cartesia/__init__.py +2 -0
- cartesia/core/client_wrapper.py +1 -1
- cartesia/tts/__init__.py +2 -0
- cartesia/tts/client.py +13 -32
- cartesia/tts/requests/generation_request.py +2 -9
- cartesia/tts/requests/tts_request.py +2 -8
- cartesia/tts/requests/web_socket_tts_request.py +2 -1
- cartesia/tts/types/__init__.py +2 -0
- cartesia/tts/types/generation_request.py +2 -9
- cartesia/tts/types/model_speed.py +5 -0
- cartesia/tts/types/tts_request.py +2 -8
- cartesia/tts/types/web_socket_tts_request.py +2 -1
- cartesia/voices/client.py +11 -13
- {cartesia-2.0.2.dist-info → cartesia-2.0.3.dist-info}/METADATA +1 -1
- {cartesia-2.0.2.dist-info → cartesia-2.0.3.dist-info}/RECORD +16 -15
- {cartesia-2.0.2.dist-info → cartesia-2.0.3.dist-info}/WHEEL +0 -0
cartesia/__init__.py
CHANGED
@@ -29,6 +29,7 @@ from .tts import (
|
|
29
29
|
FlushId,
|
30
30
|
GenerationRequest,
|
31
31
|
GenerationRequestParams,
|
32
|
+
ModelSpeed,
|
32
33
|
Mp3OutputFormat,
|
33
34
|
Mp3OutputFormatParams,
|
34
35
|
NaturalSpecifier,
|
@@ -201,6 +202,7 @@ __all__ = [
|
|
201
202
|
"MixVoiceSpecifierParams",
|
202
203
|
"MixVoicesRequest",
|
203
204
|
"MixVoicesRequestParams",
|
205
|
+
"ModelSpeed",
|
204
206
|
"Mp3OutputFormat",
|
205
207
|
"Mp3OutputFormatParams",
|
206
208
|
"NaturalSpecifier",
|
cartesia/core/client_wrapper.py
CHANGED
@@ -16,7 +16,7 @@ class BaseClientWrapper:
|
|
16
16
|
headers: typing.Dict[str, str] = {
|
17
17
|
"X-Fern-Language": "Python",
|
18
18
|
"X-Fern-SDK-Name": "cartesia",
|
19
|
-
"X-Fern-SDK-Version": "2.0.
|
19
|
+
"X-Fern-SDK-Version": "2.0.3",
|
20
20
|
}
|
21
21
|
headers["X-API-Key"] = self.api_key
|
22
22
|
headers["Cartesia-Version"] = "2024-11-13"
|
cartesia/tts/__init__.py
CHANGED
@@ -7,6 +7,7 @@ from .types import (
|
|
7
7
|
Emotion,
|
8
8
|
FlushId,
|
9
9
|
GenerationRequest,
|
10
|
+
ModelSpeed,
|
10
11
|
Mp3OutputFormat,
|
11
12
|
NaturalSpecifier,
|
12
13
|
NumericalSpecifier,
|
@@ -94,6 +95,7 @@ __all__ = [
|
|
94
95
|
"FlushId",
|
95
96
|
"GenerationRequest",
|
96
97
|
"GenerationRequestParams",
|
98
|
+
"ModelSpeed",
|
97
99
|
"Mp3OutputFormat",
|
98
100
|
"Mp3OutputFormatParams",
|
99
101
|
"NaturalSpecifier",
|
cartesia/tts/client.py
CHANGED
@@ -5,6 +5,7 @@ from ..core.client_wrapper import SyncClientWrapper
|
|
5
5
|
from .requests.tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
|
6
6
|
from .requests.output_format import OutputFormatParams
|
7
7
|
from .types.supported_language import SupportedLanguage
|
8
|
+
from .types.model_speed import ModelSpeed
|
8
9
|
from ..core.request_options import RequestOptions
|
9
10
|
from ..core.serialization import convert_and_respect_annotation_metadata
|
10
11
|
from json.decoder import JSONDecodeError
|
@@ -32,7 +33,7 @@ class TtsClient:
|
|
32
33
|
output_format: OutputFormatParams,
|
33
34
|
language: typing.Optional[SupportedLanguage] = OMIT,
|
34
35
|
duration: typing.Optional[float] = OMIT,
|
35
|
-
|
36
|
+
speed: typing.Optional[ModelSpeed] = OMIT,
|
36
37
|
request_options: typing.Optional[RequestOptions] = None,
|
37
38
|
) -> typing.Iterator[bytes]:
|
38
39
|
"""
|
@@ -53,12 +54,7 @@ class TtsClient:
|
|
53
54
|
The maximum duration of the audio in seconds. You do not usually need to specify this.
|
54
55
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
55
56
|
|
56
|
-
|
57
|
-
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
58
|
-
|
59
|
-
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
60
|
-
|
61
|
-
This parameter is only supported for `sonic-2` models.
|
57
|
+
speed : typing.Optional[ModelSpeed]
|
62
58
|
|
63
59
|
request_options : typing.Optional[RequestOptions]
|
64
60
|
Request-specific configuration. You can pass in configuration such as `chunk_size`, and more to customize the request and response.
|
@@ -100,7 +96,7 @@ class TtsClient:
|
|
100
96
|
object_=output_format, annotation=OutputFormatParams, direction="write"
|
101
97
|
),
|
102
98
|
"duration": duration,
|
103
|
-
"
|
99
|
+
"speed": speed,
|
104
100
|
},
|
105
101
|
request_options=request_options,
|
106
102
|
omit=OMIT,
|
@@ -126,7 +122,7 @@ class TtsClient:
|
|
126
122
|
output_format: OutputFormatParams,
|
127
123
|
language: typing.Optional[SupportedLanguage] = OMIT,
|
128
124
|
duration: typing.Optional[float] = OMIT,
|
129
|
-
|
125
|
+
speed: typing.Optional[ModelSpeed] = OMIT,
|
130
126
|
request_options: typing.Optional[RequestOptions] = None,
|
131
127
|
) -> typing.Iterator[WebSocketResponse]:
|
132
128
|
"""
|
@@ -147,12 +143,7 @@ class TtsClient:
|
|
147
143
|
The maximum duration of the audio in seconds. You do not usually need to specify this.
|
148
144
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
149
145
|
|
150
|
-
|
151
|
-
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
152
|
-
|
153
|
-
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
154
|
-
|
155
|
-
This parameter is only supported for `sonic-2` models.
|
146
|
+
speed : typing.Optional[ModelSpeed]
|
156
147
|
|
157
148
|
request_options : typing.Optional[RequestOptions]
|
158
149
|
Request-specific configuration.
|
@@ -196,7 +187,7 @@ class TtsClient:
|
|
196
187
|
object_=output_format, annotation=OutputFormatParams, direction="write"
|
197
188
|
),
|
198
189
|
"duration": duration,
|
199
|
-
"
|
190
|
+
"speed": speed,
|
200
191
|
},
|
201
192
|
request_options=request_options,
|
202
193
|
omit=OMIT,
|
@@ -236,7 +227,7 @@ class AsyncTtsClient:
|
|
236
227
|
output_format: OutputFormatParams,
|
237
228
|
language: typing.Optional[SupportedLanguage] = OMIT,
|
238
229
|
duration: typing.Optional[float] = OMIT,
|
239
|
-
|
230
|
+
speed: typing.Optional[ModelSpeed] = OMIT,
|
240
231
|
request_options: typing.Optional[RequestOptions] = None,
|
241
232
|
) -> typing.AsyncIterator[bytes]:
|
242
233
|
"""
|
@@ -257,12 +248,7 @@ class AsyncTtsClient:
|
|
257
248
|
The maximum duration of the audio in seconds. You do not usually need to specify this.
|
258
249
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
259
250
|
|
260
|
-
|
261
|
-
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
262
|
-
|
263
|
-
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
264
|
-
|
265
|
-
This parameter is only supported for `sonic-2` models.
|
251
|
+
speed : typing.Optional[ModelSpeed]
|
266
252
|
|
267
253
|
request_options : typing.Optional[RequestOptions]
|
268
254
|
Request-specific configuration. You can pass in configuration such as `chunk_size`, and more to customize the request and response.
|
@@ -312,7 +298,7 @@ class AsyncTtsClient:
|
|
312
298
|
object_=output_format, annotation=OutputFormatParams, direction="write"
|
313
299
|
),
|
314
300
|
"duration": duration,
|
315
|
-
"
|
301
|
+
"speed": speed,
|
316
302
|
},
|
317
303
|
request_options=request_options,
|
318
304
|
omit=OMIT,
|
@@ -338,7 +324,7 @@ class AsyncTtsClient:
|
|
338
324
|
output_format: OutputFormatParams,
|
339
325
|
language: typing.Optional[SupportedLanguage] = OMIT,
|
340
326
|
duration: typing.Optional[float] = OMIT,
|
341
|
-
|
327
|
+
speed: typing.Optional[ModelSpeed] = OMIT,
|
342
328
|
request_options: typing.Optional[RequestOptions] = None,
|
343
329
|
) -> typing.AsyncIterator[WebSocketResponse]:
|
344
330
|
"""
|
@@ -359,12 +345,7 @@ class AsyncTtsClient:
|
|
359
345
|
The maximum duration of the audio in seconds. You do not usually need to specify this.
|
360
346
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
361
347
|
|
362
|
-
|
363
|
-
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
364
|
-
|
365
|
-
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
366
|
-
|
367
|
-
This parameter is only supported for `sonic-2` models.
|
348
|
+
speed : typing.Optional[ModelSpeed]
|
368
349
|
|
369
350
|
request_options : typing.Optional[RequestOptions]
|
370
351
|
Request-specific configuration.
|
@@ -416,7 +397,7 @@ class AsyncTtsClient:
|
|
416
397
|
object_=output_format, annotation=OutputFormatParams, direction="write"
|
417
398
|
),
|
418
399
|
"duration": duration,
|
419
|
-
"
|
400
|
+
"speed": speed,
|
420
401
|
},
|
421
402
|
request_options=request_options,
|
422
403
|
omit=OMIT,
|
@@ -6,6 +6,7 @@ from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
|
|
6
6
|
import typing_extensions
|
7
7
|
from ..types.supported_language import SupportedLanguage
|
8
8
|
from .web_socket_raw_output_format import WebSocketRawOutputFormatParams
|
9
|
+
from ..types.model_speed import ModelSpeed
|
9
10
|
from ..types.context_id import ContextId
|
10
11
|
from ...core.serialization import FieldMetadata
|
11
12
|
|
@@ -30,15 +31,7 @@ class GenerationRequestParams(typing_extensions.TypedDict):
|
|
30
31
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
31
32
|
"""
|
32
33
|
|
33
|
-
|
34
|
-
"""
|
35
|
-
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
36
|
-
|
37
|
-
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
38
|
-
|
39
|
-
This parameter is only supported for `sonic-2` models.
|
40
|
-
"""
|
41
|
-
|
34
|
+
speed: typing_extensions.NotRequired[ModelSpeed]
|
42
35
|
context_id: typing_extensions.NotRequired[ContextId]
|
43
36
|
continue_: typing_extensions.NotRequired[typing_extensions.Annotated[bool, FieldMetadata(alias="continue")]]
|
44
37
|
"""
|
@@ -5,6 +5,7 @@ from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
|
|
5
5
|
import typing_extensions
|
6
6
|
from ..types.supported_language import SupportedLanguage
|
7
7
|
from .output_format import OutputFormatParams
|
8
|
+
from ..types.model_speed import ModelSpeed
|
8
9
|
|
9
10
|
|
10
11
|
class TtsRequestParams(typing_extensions.TypedDict):
|
@@ -23,11 +24,4 @@ class TtsRequestParams(typing_extensions.TypedDict):
|
|
23
24
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
24
25
|
"""
|
25
26
|
|
26
|
-
|
27
|
-
"""
|
28
|
-
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
29
|
-
|
30
|
-
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
31
|
-
|
32
|
-
This parameter is only supported for `sonic-2` models.
|
33
|
-
"""
|
27
|
+
speed: typing_extensions.NotRequired[ModelSpeed]
|
@@ -5,6 +5,7 @@ import typing_extensions
|
|
5
5
|
from .output_format import OutputFormatParams
|
6
6
|
from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
|
7
7
|
from ...core.serialization import FieldMetadata
|
8
|
+
from ..types.model_speed import ModelSpeed
|
8
9
|
|
9
10
|
|
10
11
|
class WebSocketTtsRequestParams(typing_extensions.TypedDict):
|
@@ -24,4 +25,4 @@ class WebSocketTtsRequestParams(typing_extensions.TypedDict):
|
|
24
25
|
continue_: typing_extensions.NotRequired[typing_extensions.Annotated[bool, FieldMetadata(alias="continue")]]
|
25
26
|
context_id: typing_extensions.NotRequired[str]
|
26
27
|
max_buffer_delay_ms: typing_extensions.NotRequired[int]
|
27
|
-
|
28
|
+
speed: typing_extensions.NotRequired[ModelSpeed]
|
cartesia/tts/types/__init__.py
CHANGED
@@ -6,6 +6,7 @@ from .controls import Controls
|
|
6
6
|
from .emotion import Emotion
|
7
7
|
from .flush_id import FlushId
|
8
8
|
from .generation_request import GenerationRequest
|
9
|
+
from .model_speed import ModelSpeed
|
9
10
|
from .mp_3_output_format import Mp3OutputFormat
|
10
11
|
from .natural_specifier import NaturalSpecifier
|
11
12
|
from .numerical_specifier import NumericalSpecifier
|
@@ -50,6 +51,7 @@ __all__ = [
|
|
50
51
|
"Emotion",
|
51
52
|
"FlushId",
|
52
53
|
"GenerationRequest",
|
54
|
+
"ModelSpeed",
|
53
55
|
"Mp3OutputFormat",
|
54
56
|
"NaturalSpecifier",
|
55
57
|
"NumericalSpecifier",
|
@@ -6,6 +6,7 @@ import typing
|
|
6
6
|
from .tts_request_voice_specifier import TtsRequestVoiceSpecifier
|
7
7
|
from .supported_language import SupportedLanguage
|
8
8
|
from .web_socket_raw_output_format import WebSocketRawOutputFormat
|
9
|
+
from .model_speed import ModelSpeed
|
9
10
|
from .context_id import ContextId
|
10
11
|
import typing_extensions
|
11
12
|
from ...core.serialization import FieldMetadata
|
@@ -32,15 +33,7 @@ class GenerationRequest(UniversalBaseModel):
|
|
32
33
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
33
34
|
"""
|
34
35
|
|
35
|
-
|
36
|
-
"""
|
37
|
-
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
38
|
-
|
39
|
-
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
40
|
-
|
41
|
-
This parameter is only supported for `sonic-2` models.
|
42
|
-
"""
|
43
|
-
|
36
|
+
speed: typing.Optional[ModelSpeed] = None
|
44
37
|
context_id: typing.Optional[ContextId] = None
|
45
38
|
continue_: typing_extensions.Annotated[typing.Optional[bool], FieldMetadata(alias="continue")] = pydantic.Field(
|
46
39
|
default=None
|
@@ -6,6 +6,7 @@ from .tts_request_voice_specifier import TtsRequestVoiceSpecifier
|
|
6
6
|
import typing
|
7
7
|
from .supported_language import SupportedLanguage
|
8
8
|
from .output_format import OutputFormat
|
9
|
+
from .model_speed import ModelSpeed
|
9
10
|
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
10
11
|
|
11
12
|
|
@@ -25,14 +26,7 @@ class TtsRequest(UniversalBaseModel):
|
|
25
26
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
26
27
|
"""
|
27
28
|
|
28
|
-
|
29
|
-
"""
|
30
|
-
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
31
|
-
|
32
|
-
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
33
|
-
|
34
|
-
This parameter is only supported for `sonic-2` models.
|
35
|
-
"""
|
29
|
+
speed: typing.Optional[ModelSpeed] = None
|
36
30
|
|
37
31
|
if IS_PYDANTIC_V2:
|
38
32
|
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
@@ -7,6 +7,7 @@ from .output_format import OutputFormat
|
|
7
7
|
from .tts_request_voice_specifier import TtsRequestVoiceSpecifier
|
8
8
|
import typing_extensions
|
9
9
|
from ...core.serialization import FieldMetadata
|
10
|
+
from .model_speed import ModelSpeed
|
10
11
|
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
11
12
|
|
12
13
|
|
@@ -27,7 +28,7 @@ class WebSocketTtsRequest(UniversalBaseModel):
|
|
27
28
|
continue_: typing_extensions.Annotated[typing.Optional[bool], FieldMetadata(alias="continue")] = None
|
28
29
|
context_id: typing.Optional[str] = None
|
29
30
|
max_buffer_delay_ms: typing.Optional[int] = None
|
30
|
-
|
31
|
+
speed: typing.Optional[ModelSpeed] = None
|
31
32
|
|
32
33
|
if IS_PYDANTIC_V2:
|
33
34
|
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
cartesia/voices/client.py
CHANGED
@@ -14,8 +14,8 @@ from ..core.api_error import ApiError
|
|
14
14
|
from .. import core
|
15
15
|
from ..tts.types.supported_language import SupportedLanguage
|
16
16
|
from .types.clone_mode import CloneMode
|
17
|
-
from .types.voice_metadata import VoiceMetadata
|
18
17
|
from .types.voice_id import VoiceId
|
18
|
+
from .types.voice_metadata import VoiceMetadata
|
19
19
|
from ..core.jsonable_encoder import jsonable_encoder
|
20
20
|
from .types.localize_target_language import LocalizeTargetLanguage
|
21
21
|
from .types.gender import Gender
|
@@ -150,7 +150,7 @@ class VoicesClient:
|
|
150
150
|
mode: CloneMode,
|
151
151
|
description: typing.Optional[str] = OMIT,
|
152
152
|
enhance: typing.Optional[bool] = OMIT,
|
153
|
-
|
153
|
+
base_voice_id: typing.Optional[VoiceId] = OMIT,
|
154
154
|
request_options: typing.Optional[RequestOptions] = None,
|
155
155
|
) -> VoiceMetadata:
|
156
156
|
"""
|
@@ -182,11 +182,11 @@ class VoicesClient:
|
|
182
182
|
|
183
183
|
|
184
184
|
enhance : typing.Optional[bool]
|
185
|
-
Whether to
|
185
|
+
Whether to apply AI enhancements to the clip to reduce background noise. This leads to cleaner generated speech at the cost of reduced similarity to the source clip.
|
186
186
|
|
187
187
|
|
188
|
-
|
189
|
-
Optional
|
188
|
+
base_voice_id : typing.Optional[VoiceId]
|
189
|
+
Optional base voice ID that the cloned voice is derived from.
|
190
190
|
|
191
191
|
|
192
192
|
request_options : typing.Optional[RequestOptions]
|
@@ -208,7 +208,6 @@ class VoicesClient:
|
|
208
208
|
description="Copied from Cartesia docs",
|
209
209
|
mode="stability",
|
210
210
|
language="en",
|
211
|
-
enhance=True,
|
212
211
|
)
|
213
212
|
"""
|
214
213
|
_response = self._client_wrapper.httpx_client.request(
|
@@ -220,7 +219,7 @@ class VoicesClient:
|
|
220
219
|
"language": language,
|
221
220
|
"mode": mode,
|
222
221
|
"enhance": enhance,
|
223
|
-
"
|
222
|
+
"base_voice_id": base_voice_id,
|
224
223
|
},
|
225
224
|
files={
|
226
225
|
"clip": clip,
|
@@ -719,7 +718,7 @@ class AsyncVoicesClient:
|
|
719
718
|
mode: CloneMode,
|
720
719
|
description: typing.Optional[str] = OMIT,
|
721
720
|
enhance: typing.Optional[bool] = OMIT,
|
722
|
-
|
721
|
+
base_voice_id: typing.Optional[VoiceId] = OMIT,
|
723
722
|
request_options: typing.Optional[RequestOptions] = None,
|
724
723
|
) -> VoiceMetadata:
|
725
724
|
"""
|
@@ -751,11 +750,11 @@ class AsyncVoicesClient:
|
|
751
750
|
|
752
751
|
|
753
752
|
enhance : typing.Optional[bool]
|
754
|
-
Whether to
|
753
|
+
Whether to apply AI enhancements to the clip to reduce background noise. This leads to cleaner generated speech at the cost of reduced similarity to the source clip.
|
755
754
|
|
756
755
|
|
757
|
-
|
758
|
-
Optional
|
756
|
+
base_voice_id : typing.Optional[VoiceId]
|
757
|
+
Optional base voice ID that the cloned voice is derived from.
|
759
758
|
|
760
759
|
|
761
760
|
request_options : typing.Optional[RequestOptions]
|
@@ -782,7 +781,6 @@ class AsyncVoicesClient:
|
|
782
781
|
description="Copied from Cartesia docs",
|
783
782
|
mode="stability",
|
784
783
|
language="en",
|
785
|
-
enhance=True,
|
786
784
|
)
|
787
785
|
|
788
786
|
|
@@ -797,7 +795,7 @@ class AsyncVoicesClient:
|
|
797
795
|
"language": language,
|
798
796
|
"mode": mode,
|
799
797
|
"enhance": enhance,
|
800
|
-
"
|
798
|
+
"base_voice_id": base_voice_id,
|
801
799
|
},
|
802
800
|
files={
|
803
801
|
"clip": clip,
|
@@ -1,4 +1,4 @@
|
|
1
|
-
cartesia/__init__.py,sha256=
|
1
|
+
cartesia/__init__.py,sha256=p9ilqR-oE6N6pPQbiKUbnfEdDy00ihjNTtg7ES7EHg0,8448
|
2
2
|
cartesia/api_status/__init__.py,sha256=_dHNLdknrBjxHtU2PvLumttJM-JTQhJQqhhAQkLqt_U,168
|
3
3
|
cartesia/api_status/client.py,sha256=GJ9Dq8iCn3hn8vCIqc6k1fCGEhSz0T0kaPGcdFnbMDY,3146
|
4
4
|
cartesia/api_status/requests/__init__.py,sha256=ilEMzEy1JEw484CuL92bX5lHGOznc62pjiDMgiZ0tKM,130
|
@@ -19,7 +19,7 @@ cartesia/base_client.py,sha256=YH0l0UUzanAa9mDdJU6BFQ9XKELiaPTm9NsJpVQ4evA,6539
|
|
19
19
|
cartesia/client.py,sha256=sPAYQLt9W2E_2F17ooocvvJImuNyLrL8xUypgf6dZeI,6238
|
20
20
|
cartesia/core/__init__.py,sha256=-t9txgeQZL_1FDw_08GEoj4ft1Cn9Dti6X0Drsadlr0,1519
|
21
21
|
cartesia/core/api_error.py,sha256=RE8LELok2QCjABadECTvtDp7qejA1VmINCh6TbqPwSE,426
|
22
|
-
cartesia/core/client_wrapper.py,sha256=
|
22
|
+
cartesia/core/client_wrapper.py,sha256=xKyHqaGJ7V2OuMGkVOEjxofnt8QaFjgiEIom7EWhBBI,1854
|
23
23
|
cartesia/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
|
24
24
|
cartesia/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
|
25
25
|
cartesia/core/http_client.py,sha256=KL5RGa0y4n8nX0-07WRg4ZQUTq30sc-XJbWcP5vjBDg,19552
|
@@ -51,20 +51,20 @@ cartesia/environment.py,sha256=Qnp91BGLic7hXmKsiYub2m3nPfvDWm59aB1wWta1J6A,160
|
|
51
51
|
cartesia/infill/__init__.py,sha256=FTtvy8EDg9nNNg9WCatVgKTRYV8-_v1roeGPAKoa_pw,65
|
52
52
|
cartesia/infill/client.py,sha256=PWE5Ak-wsaBM_8g52oDl9PYx76PkW6f900mnxvZf4Bk,12571
|
53
53
|
cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
54
|
-
cartesia/tts/__init__.py,sha256=
|
54
|
+
cartesia/tts/__init__.py,sha256=YrVxJT7i-0wygFgN2hOVftYLEM7JoFgCo3SvLoN7pkA,4735
|
55
55
|
cartesia/tts/_async_websocket.py,sha256=tJ-6rdJrviKvGhSW8J8t-rCinXM5gXXQJgDO8OgW3EE,18805
|
56
56
|
cartesia/tts/_websocket.py,sha256=Uk6B-TP-0nzeyFE9w-_PzNIiVYP14rKqSDZlm6bU24Q,19271
|
57
|
-
cartesia/tts/client.py,sha256=
|
57
|
+
cartesia/tts/client.py,sha256=0jWpiNKPp4QbyibfB2tsFb7wqQ9vb4m_QlLB-qxzKP8,15398
|
58
58
|
cartesia/tts/requests/__init__.py,sha256=0rcfMLHNbUhkRI1xS09UE4p-WT1BCqrcblFtPxcATOI,3261
|
59
59
|
cartesia/tts/requests/cancel_context_request.py,sha256=Wl8g-o5vwl9ENm-H1wsLx441FkIR_4Wt5UYtuWce2Yw,431
|
60
60
|
cartesia/tts/requests/controls.py,sha256=xzUJlfgqhaJ1A-JD0LTpoHYk4iEpCuGpSD7qE4YYsRg,285
|
61
|
-
cartesia/tts/requests/generation_request.py,sha256=
|
61
|
+
cartesia/tts/requests/generation_request.py,sha256=cUy--5WFsC7E-KEPVay3QpU_-a3GVGnatTKsBw6hIV4,2743
|
62
62
|
cartesia/tts/requests/mp_3_output_format.py,sha256=PGDVzC1d7-Jce12rFxtF8G1pTHmlUdiGAhykFTABg0w,316
|
63
63
|
cartesia/tts/requests/output_format.py,sha256=8TKu9AAeHCR5L4edzYch8FIYIldn4bM7ySrsCl8W_g8,842
|
64
64
|
cartesia/tts/requests/phoneme_timestamps.py,sha256=ft81nmqElZAnvTBT27lY6YWfF18ZGsCx3Y1XHv9J7cM,267
|
65
65
|
cartesia/tts/requests/raw_output_format.py,sha256=S60Vp7DeAATCMLF3bXgxhw0zILJBWJ9GhI9irAg_UkI,316
|
66
66
|
cartesia/tts/requests/speed.py,sha256=-YGBWwh7_VtCBnYlT5EVsnrmcHFMEBTxy9LathZhkMA,259
|
67
|
-
cartesia/tts/requests/tts_request.py,sha256=
|
67
|
+
cartesia/tts/requests/tts_request.py,sha256=KBoahYfPbDENlEWsqnR4z1ZIhGIJwhLrzQIzkbtqtzE,1021
|
68
68
|
cartesia/tts/requests/tts_request_embedding_specifier.py,sha256=-M54ZjV0H5LPwcKtz0bOVqlkvO1pPiMbqMbVBMko3Ns,565
|
69
69
|
cartesia/tts/requests/tts_request_id_specifier.py,sha256=-0ClfyJnnaH0uAcF5r84s3cM_cw2wT39dp6T4JYzOQ8,536
|
70
70
|
cartesia/tts/requests/tts_request_voice_specifier.py,sha256=eGzL4aVGq4gKPxeglsV7-wuhxg8x33Qth3uFTTytgeI,337
|
@@ -81,16 +81,17 @@ cartesia/tts/requests/web_socket_response.py,sha256=WqZ6RgO4suG78wiVSIsOWwyXBioV
|
|
81
81
|
cartesia/tts/requests/web_socket_stream_options.py,sha256=VIvblFw9hGZvDzFpOnC11G0NvrFSVt-1-0sY5rpcZPI,232
|
82
82
|
cartesia/tts/requests/web_socket_timestamps_response.py,sha256=MK3zN2Q_PVWJtX5DidNB0uXoF2o33rv6qCYPVaourxY,351
|
83
83
|
cartesia/tts/requests/web_socket_tts_output.py,sha256=pX2uf0XVdziFhXCydwLlVOWb-LvBiuq-cBI6R1INiMg,760
|
84
|
-
cartesia/tts/requests/web_socket_tts_request.py,sha256=
|
84
|
+
cartesia/tts/requests/web_socket_tts_request.py,sha256=RBFrdmYe0SRlzhEUwhTFCL8ZC1tbIwD2aFnUgF-my80,1260
|
85
85
|
cartesia/tts/requests/word_timestamps.py,sha256=WMfBJtETi6wTpES0pYZCFfFRfEbzWE-RtosDJ5seUWg,261
|
86
86
|
cartesia/tts/socket_client.py,sha256=zTPayHbgy-yQQ50AE1HXN4GMyanisZcLXf7Ds1paYks,11621
|
87
|
-
cartesia/tts/types/__init__.py,sha256=
|
87
|
+
cartesia/tts/types/__init__.py,sha256=44KWnBiqkNHZZpy8M5uze8cdEFE79sbMRVJStxQSEhM,3305
|
88
88
|
cartesia/tts/types/cancel_context_request.py,sha256=zInhk3qRZsSc0F1aYJ-Q5BHJsosTrb22IJWhzue-eKE,856
|
89
89
|
cartesia/tts/types/context_id.py,sha256=UCEtq5xFGOeBCECcY6Y-gYVe_Peg1hFhH9YYOkpApQg,81
|
90
90
|
cartesia/tts/types/controls.py,sha256=H4CSu79mM1Ld4NZx_5uXw3EwRzTEMQRxKBRvFpcFb8Y,644
|
91
91
|
cartesia/tts/types/emotion.py,sha256=zocyDcHTiFFnNRgo2YLMi70iGyffa080B4mkg9lcqVc,764
|
92
92
|
cartesia/tts/types/flush_id.py,sha256=HCIKo9o8d7YWKtaSNU3TEvfUVBju93ckGQy01Z9wLcE,79
|
93
|
-
cartesia/tts/types/generation_request.py,sha256=
|
93
|
+
cartesia/tts/types/generation_request.py,sha256=H5ZaNGH2ngTZY-NQ7wqLUiIAArH8KFo3rt690bxWCUw,3242
|
94
|
+
cartesia/tts/types/model_speed.py,sha256=iiTj8V0piFCX2FZh5B8EkgRhZDlj4z3VFcQhp66e7y8,160
|
94
95
|
cartesia/tts/types/mp_3_output_format.py,sha256=0WGblkuDUL7pZO1aRuQ_mU2Z5gN9xIabRfRKkjtzms8,731
|
95
96
|
cartesia/tts/types/natural_specifier.py,sha256=K526P1RRuBGy80hyd_tX8tohPrE8DR9EgTCxS5wce0o,188
|
96
97
|
cartesia/tts/types/numerical_specifier.py,sha256=tJpIskWO545luCKMFM9JlVc7VVhBhSvqL1qurhzL9cI,92
|
@@ -100,7 +101,7 @@ cartesia/tts/types/raw_encoding.py,sha256=eyc2goiYOTxWcuKHAgYZ2SrnfePW22Fbmc-5fG
|
|
100
101
|
cartesia/tts/types/raw_output_format.py,sha256=jZGVaS0KIi9mU6trfskgA3HbMKJolhrwICnuDhF01ic,673
|
101
102
|
cartesia/tts/types/speed.py,sha256=4c5WdxocBw6WSMnundSaNnceUeooU0vikhy00FW6M-w,239
|
102
103
|
cartesia/tts/types/supported_language.py,sha256=riDRduThMbMWAq9i2uCfxhwVTpgaFwNDZ9LhEIl4zHY,237
|
103
|
-
cartesia/tts/types/tts_request.py,sha256=
|
104
|
+
cartesia/tts/types/tts_request.py,sha256=FGcxW-siiQpEzJZSHMET3nDSYHSzRt3WSTO-cCEz9u4,1376
|
104
105
|
cartesia/tts/types/tts_request_embedding_specifier.py,sha256=eL_qCEr4pvWfy4qp9hZBuVdCincX5DBVqfv1vLt2_Vk,942
|
105
106
|
cartesia/tts/types/tts_request_id_specifier.py,sha256=ktGdkkTRQ9scA-lt8qJ2jn_E5WzoOK8AXMrVqi71gf0,906
|
106
107
|
cartesia/tts/types/tts_request_voice_specifier.py,sha256=p-3UQ62uFL1SgbX73Ex1D_V73Ef0wmT1ApOt1iLZmwE,307
|
@@ -117,7 +118,7 @@ cartesia/tts/types/web_socket_response.py,sha256=mHDECZ4K84QmN2s0IWuBsXBt83Yq7Qx
|
|
117
118
|
cartesia/tts/types/web_socket_stream_options.py,sha256=MhDSxBFqMuQeWjoyPqXVnTEzLjF8g6aojeigb5dQUgU,596
|
118
119
|
cartesia/tts/types/web_socket_timestamps_response.py,sha256=kuWXI82ncF1QapnaHEjwrL84qWob7ByQU-yh1e0IEmk,667
|
119
120
|
cartesia/tts/types/web_socket_tts_output.py,sha256=uvkv0smTBhdm18Rl17C0Ml4Inh79YBHNzAcKnZBs14Y,979
|
120
|
-
cartesia/tts/types/web_socket_tts_request.py,sha256=
|
121
|
+
cartesia/tts/types/web_socket_tts_request.py,sha256=TlqUQPhqZcDZ6jgpzLqzJnYVGBIc9_TQYSu_SVYkVVk,1567
|
121
122
|
cartesia/tts/types/word_timestamps.py,sha256=XZ2Q0prdb3F9c3AiOKXu4s3A3jBxE-qIt1npHOf16R0,631
|
122
123
|
cartesia/tts/utils/constants.py,sha256=1CHa5flJf8--L_eYyOyOiWJNZ-Q81ufHZxDbJs8xYSk,418
|
123
124
|
cartesia/tts/utils/tts.py,sha256=u7PgPxlJs6fcQTfr-jqAvBCAaK3JWLhF5QF4s-PwoMo,2093
|
@@ -131,7 +132,7 @@ cartesia/voice_changer/types/__init__.py,sha256=qAiHsdRpnFeS0lBkYp_NRrhSJiRXCg5-
|
|
131
132
|
cartesia/voice_changer/types/output_format_container.py,sha256=RqLDELdgeOjYqNTJX1Le62qjiFiJGxf0cYnol88-LLM,166
|
132
133
|
cartesia/voice_changer/types/streaming_response.py,sha256=rQ4ZehtOHsCBKijyULz_ahGQYNj1yus6AM6u2wgcBsI,1963
|
133
134
|
cartesia/voices/__init__.py,sha256=2D58Bir45LvcvP08QMnPlFE8DD8BONTjPLkIDdKs7vg,1891
|
134
|
-
cartesia/voices/client.py,sha256=
|
135
|
+
cartesia/voices/client.py,sha256=A_PEoCLko1znexKKicp-gZVMUcSpDoKqz3p1r4Aa04k,38993
|
135
136
|
cartesia/voices/requests/__init__.py,sha256=XiBJbSYeQCgFMtwywKvQ0Nmp7Zf_0WskzRhgr9c8h38,1072
|
136
137
|
cartesia/voices/requests/create_voice_request.py,sha256=r6dKb9ga0ZsAi_6PXuE43u2lLgfQg2DIYjk2Neng7pI,617
|
137
138
|
cartesia/voices/requests/embedding_response.py,sha256=PGZkBD8UBcv2MYQbBXyD4T6lzaE9oSGGwXx-MoXCp0M,228
|
@@ -170,6 +171,6 @@ cartesia/voices/types/voice_expand_options.py,sha256=e4FroWdlxEE-LXQfT1RWlGHtswl
|
|
170
171
|
cartesia/voices/types/voice_id.py,sha256=GDoXcRVeIm-V21R4suxG2zqLD3DLYkXE9kgizadzFKo,79
|
171
172
|
cartesia/voices/types/voice_metadata.py,sha256=4KNGjXMUKm3niv-NvKIFVGtiilpH13heuzKcZYNQxk4,1181
|
172
173
|
cartesia/voices/types/weight.py,sha256=XqDU7_JItNUb5QykIDqTbELlRYQdbt2SviRgW0w2LKo,80
|
173
|
-
cartesia-2.0.
|
174
|
-
cartesia-2.0.
|
175
|
-
cartesia-2.0.
|
174
|
+
cartesia-2.0.3.dist-info/METADATA,sha256=cW9xivCIN1lB-8xc8V_-DMwh0pwJa3gwmsYO3XwJl0M,11206
|
175
|
+
cartesia-2.0.3.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
176
|
+
cartesia-2.0.3.dist-info/RECORD,,
|
File without changes
|