cartesia 2.0.0b8__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cartesia/__init__.py +9 -1
- cartesia/auth/__init__.py +13 -0
- cartesia/auth/client.py +159 -0
- cartesia/auth/requests/__init__.py +7 -0
- cartesia/auth/requests/token_grant.py +10 -0
- cartesia/auth/requests/token_request.py +17 -0
- cartesia/auth/requests/token_response.py +10 -0
- cartesia/auth/types/__init__.py +7 -0
- cartesia/auth/types/token_grant.py +22 -0
- cartesia/auth/types/token_request.py +28 -0
- cartesia/auth/types/token_response.py +22 -0
- cartesia/base_client.py +4 -0
- cartesia/core/client_wrapper.py +1 -1
- cartesia/tts/_async_websocket.py +3 -0
- cartesia/tts/_websocket.py +3 -0
- cartesia/tts/client.py +40 -4
- cartesia/tts/requests/generation_request.py +19 -1
- cartesia/tts/requests/tts_request.py +10 -1
- cartesia/tts/requests/web_socket_tts_request.py +3 -1
- cartesia/tts/types/generation_request.py +19 -1
- cartesia/tts/types/tts_request.py +10 -1
- cartesia/tts/types/web_socket_tts_request.py +3 -1
- cartesia/voices/client.py +10 -10
- {cartesia-2.0.0b8.dist-info → cartesia-2.0.2.dist-info}/METADATA +1 -1
- {cartesia-2.0.0b8.dist-info → cartesia-2.0.2.dist-info}/RECORD +26 -16
- {cartesia-2.0.0b8.dist-info → cartesia-2.0.2.dist-info}/WHEEL +0 -0
cartesia/__init__.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
2
2
|
|
3
|
-
from . import api_status, datasets, embedding, infill, tts, voice_changer, voices
|
3
|
+
from . import api_status, auth, datasets, embedding, infill, tts, voice_changer, voices
|
4
4
|
from .api_status import ApiInfo, ApiInfoParams
|
5
|
+
from .auth import TokenGrant, TokenGrantParams, TokenRequest, TokenRequestParams, TokenResponse, TokenResponseParams
|
5
6
|
from .client import AsyncCartesia, Cartesia
|
6
7
|
from .datasets import (
|
7
8
|
CreateDatasetRequest,
|
@@ -233,6 +234,12 @@ __all__ = [
|
|
233
234
|
"StreamingResponse_Error",
|
234
235
|
"StreamingResponse_ErrorParams",
|
235
236
|
"SupportedLanguage",
|
237
|
+
"TokenGrant",
|
238
|
+
"TokenGrantParams",
|
239
|
+
"TokenRequest",
|
240
|
+
"TokenRequestParams",
|
241
|
+
"TokenResponse",
|
242
|
+
"TokenResponseParams",
|
236
243
|
"TtsRequest",
|
237
244
|
"TtsRequestEmbeddingSpecifier",
|
238
245
|
"TtsRequestEmbeddingSpecifierParams",
|
@@ -294,6 +301,7 @@ __all__ = [
|
|
294
301
|
"WordTimestampsParams",
|
295
302
|
"__version__",
|
296
303
|
"api_status",
|
304
|
+
"auth",
|
297
305
|
"datasets",
|
298
306
|
"embedding",
|
299
307
|
"infill",
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from .types import TokenGrant, TokenRequest, TokenResponse
|
4
|
+
from .requests import TokenGrantParams, TokenRequestParams, TokenResponseParams
|
5
|
+
|
6
|
+
__all__ = [
|
7
|
+
"TokenGrant",
|
8
|
+
"TokenGrantParams",
|
9
|
+
"TokenRequest",
|
10
|
+
"TokenRequestParams",
|
11
|
+
"TokenResponse",
|
12
|
+
"TokenResponseParams",
|
13
|
+
]
|
cartesia/auth/client.py
ADDED
@@ -0,0 +1,159 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
import typing
|
4
|
+
from ..core.client_wrapper import SyncClientWrapper
|
5
|
+
from .requests.token_grant import TokenGrantParams
|
6
|
+
from ..core.request_options import RequestOptions
|
7
|
+
from .types.token_response import TokenResponse
|
8
|
+
from ..core.serialization import convert_and_respect_annotation_metadata
|
9
|
+
from ..core.pydantic_utilities import parse_obj_as
|
10
|
+
from json.decoder import JSONDecodeError
|
11
|
+
from ..core.api_error import ApiError
|
12
|
+
from ..core.client_wrapper import AsyncClientWrapper
|
13
|
+
|
14
|
+
# this is used as the default value for optional parameters
|
15
|
+
OMIT = typing.cast(typing.Any, ...)
|
16
|
+
|
17
|
+
|
18
|
+
class AuthClient:
|
19
|
+
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
20
|
+
self._client_wrapper = client_wrapper
|
21
|
+
|
22
|
+
def access_token(
|
23
|
+
self,
|
24
|
+
*,
|
25
|
+
grants: TokenGrantParams,
|
26
|
+
expires_in: typing.Optional[int] = OMIT,
|
27
|
+
request_options: typing.Optional[RequestOptions] = None,
|
28
|
+
) -> TokenResponse:
|
29
|
+
"""
|
30
|
+
Generates a new Access Token for the client. These tokens are short-lived and should be used to make requests to the API from authenticated clients.
|
31
|
+
|
32
|
+
Parameters
|
33
|
+
----------
|
34
|
+
grants : TokenGrantParams
|
35
|
+
The permissions to be granted via the token.
|
36
|
+
|
37
|
+
expires_in : typing.Optional[int]
|
38
|
+
The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
|
39
|
+
|
40
|
+
request_options : typing.Optional[RequestOptions]
|
41
|
+
Request-specific configuration.
|
42
|
+
|
43
|
+
Returns
|
44
|
+
-------
|
45
|
+
TokenResponse
|
46
|
+
|
47
|
+
Examples
|
48
|
+
--------
|
49
|
+
from cartesia import Cartesia
|
50
|
+
|
51
|
+
client = Cartesia(
|
52
|
+
api_key="YOUR_API_KEY",
|
53
|
+
)
|
54
|
+
client.auth.access_token(
|
55
|
+
grants={"tts": True},
|
56
|
+
expires_in=60,
|
57
|
+
)
|
58
|
+
"""
|
59
|
+
_response = self._client_wrapper.httpx_client.request(
|
60
|
+
"access-token",
|
61
|
+
method="POST",
|
62
|
+
json={
|
63
|
+
"grants": convert_and_respect_annotation_metadata(
|
64
|
+
object_=grants, annotation=TokenGrantParams, direction="write"
|
65
|
+
),
|
66
|
+
"expires_in": expires_in,
|
67
|
+
},
|
68
|
+
request_options=request_options,
|
69
|
+
omit=OMIT,
|
70
|
+
)
|
71
|
+
try:
|
72
|
+
if 200 <= _response.status_code < 300:
|
73
|
+
return typing.cast(
|
74
|
+
TokenResponse,
|
75
|
+
parse_obj_as(
|
76
|
+
type_=TokenResponse, # type: ignore
|
77
|
+
object_=_response.json(),
|
78
|
+
),
|
79
|
+
)
|
80
|
+
_response_json = _response.json()
|
81
|
+
except JSONDecodeError:
|
82
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
83
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
84
|
+
|
85
|
+
|
86
|
+
class AsyncAuthClient:
|
87
|
+
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
88
|
+
self._client_wrapper = client_wrapper
|
89
|
+
|
90
|
+
async def access_token(
|
91
|
+
self,
|
92
|
+
*,
|
93
|
+
grants: TokenGrantParams,
|
94
|
+
expires_in: typing.Optional[int] = OMIT,
|
95
|
+
request_options: typing.Optional[RequestOptions] = None,
|
96
|
+
) -> TokenResponse:
|
97
|
+
"""
|
98
|
+
Generates a new Access Token for the client. These tokens are short-lived and should be used to make requests to the API from authenticated clients.
|
99
|
+
|
100
|
+
Parameters
|
101
|
+
----------
|
102
|
+
grants : TokenGrantParams
|
103
|
+
The permissions to be granted via the token.
|
104
|
+
|
105
|
+
expires_in : typing.Optional[int]
|
106
|
+
The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
|
107
|
+
|
108
|
+
request_options : typing.Optional[RequestOptions]
|
109
|
+
Request-specific configuration.
|
110
|
+
|
111
|
+
Returns
|
112
|
+
-------
|
113
|
+
TokenResponse
|
114
|
+
|
115
|
+
Examples
|
116
|
+
--------
|
117
|
+
import asyncio
|
118
|
+
|
119
|
+
from cartesia import AsyncCartesia
|
120
|
+
|
121
|
+
client = AsyncCartesia(
|
122
|
+
api_key="YOUR_API_KEY",
|
123
|
+
)
|
124
|
+
|
125
|
+
|
126
|
+
async def main() -> None:
|
127
|
+
await client.auth.access_token(
|
128
|
+
grants={"tts": True},
|
129
|
+
expires_in=60,
|
130
|
+
)
|
131
|
+
|
132
|
+
|
133
|
+
asyncio.run(main())
|
134
|
+
"""
|
135
|
+
_response = await self._client_wrapper.httpx_client.request(
|
136
|
+
"access-token",
|
137
|
+
method="POST",
|
138
|
+
json={
|
139
|
+
"grants": convert_and_respect_annotation_metadata(
|
140
|
+
object_=grants, annotation=TokenGrantParams, direction="write"
|
141
|
+
),
|
142
|
+
"expires_in": expires_in,
|
143
|
+
},
|
144
|
+
request_options=request_options,
|
145
|
+
omit=OMIT,
|
146
|
+
)
|
147
|
+
try:
|
148
|
+
if 200 <= _response.status_code < 300:
|
149
|
+
return typing.cast(
|
150
|
+
TokenResponse,
|
151
|
+
parse_obj_as(
|
152
|
+
type_=TokenResponse, # type: ignore
|
153
|
+
object_=_response.json(),
|
154
|
+
),
|
155
|
+
)
|
156
|
+
_response_json = _response.json()
|
157
|
+
except JSONDecodeError:
|
158
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
159
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
@@ -0,0 +1,7 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from .token_grant import TokenGrantParams
|
4
|
+
from .token_request import TokenRequestParams
|
5
|
+
from .token_response import TokenResponseParams
|
6
|
+
|
7
|
+
__all__ = ["TokenGrantParams", "TokenRequestParams", "TokenResponseParams"]
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
import typing_extensions
|
4
|
+
from .token_grant import TokenGrantParams
|
5
|
+
import typing_extensions
|
6
|
+
|
7
|
+
|
8
|
+
class TokenRequestParams(typing_extensions.TypedDict):
|
9
|
+
grants: TokenGrantParams
|
10
|
+
"""
|
11
|
+
The permissions to be granted via the token.
|
12
|
+
"""
|
13
|
+
|
14
|
+
expires_in: typing_extensions.NotRequired[int]
|
15
|
+
"""
|
16
|
+
The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
|
17
|
+
"""
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from ...core.pydantic_utilities import UniversalBaseModel
|
4
|
+
import pydantic
|
5
|
+
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
6
|
+
import typing
|
7
|
+
|
8
|
+
|
9
|
+
class TokenGrant(UniversalBaseModel):
|
10
|
+
tts: bool = pydantic.Field()
|
11
|
+
"""
|
12
|
+
The `tts` grant allows the token to be used to access any TTS endpoint.
|
13
|
+
"""
|
14
|
+
|
15
|
+
if IS_PYDANTIC_V2:
|
16
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
17
|
+
else:
|
18
|
+
|
19
|
+
class Config:
|
20
|
+
frozen = True
|
21
|
+
smart_union = True
|
22
|
+
extra = pydantic.Extra.allow
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from ...core.pydantic_utilities import UniversalBaseModel
|
4
|
+
from .token_grant import TokenGrant
|
5
|
+
import pydantic
|
6
|
+
import typing
|
7
|
+
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
8
|
+
|
9
|
+
|
10
|
+
class TokenRequest(UniversalBaseModel):
|
11
|
+
grants: TokenGrant = pydantic.Field()
|
12
|
+
"""
|
13
|
+
The permissions to be granted via the token.
|
14
|
+
"""
|
15
|
+
|
16
|
+
expires_in: typing.Optional[int] = pydantic.Field(default=None)
|
17
|
+
"""
|
18
|
+
The number of seconds the token will be valid for since the time of generation. The maximum is 1 hour (3600 seconds).
|
19
|
+
"""
|
20
|
+
|
21
|
+
if IS_PYDANTIC_V2:
|
22
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
23
|
+
else:
|
24
|
+
|
25
|
+
class Config:
|
26
|
+
frozen = True
|
27
|
+
smart_union = True
|
28
|
+
extra = pydantic.Extra.allow
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from ...core.pydantic_utilities import UniversalBaseModel
|
4
|
+
import pydantic
|
5
|
+
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
6
|
+
import typing
|
7
|
+
|
8
|
+
|
9
|
+
class TokenResponse(UniversalBaseModel):
|
10
|
+
token: str = pydantic.Field()
|
11
|
+
"""
|
12
|
+
The generated Access Token.
|
13
|
+
"""
|
14
|
+
|
15
|
+
if IS_PYDANTIC_V2:
|
16
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
17
|
+
else:
|
18
|
+
|
19
|
+
class Config:
|
20
|
+
frozen = True
|
21
|
+
smart_union = True
|
22
|
+
extra = pydantic.Extra.allow
|
cartesia/base_client.py
CHANGED
@@ -5,12 +5,14 @@ from .environment import CartesiaEnvironment
|
|
5
5
|
import httpx
|
6
6
|
from .core.client_wrapper import SyncClientWrapper
|
7
7
|
from .api_status.client import ApiStatusClient
|
8
|
+
from .auth.client import AuthClient
|
8
9
|
from .infill.client import InfillClient
|
9
10
|
from .tts.client import TtsClient
|
10
11
|
from .voice_changer.client import VoiceChangerClient
|
11
12
|
from .voices.client import VoicesClient
|
12
13
|
from .core.client_wrapper import AsyncClientWrapper
|
13
14
|
from .api_status.client import AsyncApiStatusClient
|
15
|
+
from .auth.client import AsyncAuthClient
|
14
16
|
from .infill.client import AsyncInfillClient
|
15
17
|
from .tts.client import AsyncTtsClient
|
16
18
|
from .voice_changer.client import AsyncVoiceChangerClient
|
@@ -76,6 +78,7 @@ class BaseCartesia:
|
|
76
78
|
timeout=_defaulted_timeout,
|
77
79
|
)
|
78
80
|
self.api_status = ApiStatusClient(client_wrapper=self._client_wrapper)
|
81
|
+
self.auth = AuthClient(client_wrapper=self._client_wrapper)
|
79
82
|
self.infill = InfillClient(client_wrapper=self._client_wrapper)
|
80
83
|
self.tts = TtsClient(client_wrapper=self._client_wrapper)
|
81
84
|
self.voice_changer = VoiceChangerClient(client_wrapper=self._client_wrapper)
|
@@ -141,6 +144,7 @@ class AsyncBaseCartesia:
|
|
141
144
|
timeout=_defaulted_timeout,
|
142
145
|
)
|
143
146
|
self.api_status = AsyncApiStatusClient(client_wrapper=self._client_wrapper)
|
147
|
+
self.auth = AsyncAuthClient(client_wrapper=self._client_wrapper)
|
144
148
|
self.infill = AsyncInfillClient(client_wrapper=self._client_wrapper)
|
145
149
|
self.tts = AsyncTtsClient(client_wrapper=self._client_wrapper)
|
146
150
|
self.voice_changer = AsyncVoiceChangerClient(client_wrapper=self._client_wrapper)
|
cartesia/core/client_wrapper.py
CHANGED
@@ -16,7 +16,7 @@ class BaseClientWrapper:
|
|
16
16
|
headers: typing.Dict[str, str] = {
|
17
17
|
"X-Fern-Language": "Python",
|
18
18
|
"X-Fern-SDK-Name": "cartesia",
|
19
|
-
"X-Fern-SDK-Version": "2.0.
|
19
|
+
"X-Fern-SDK-Version": "2.0.2",
|
20
20
|
}
|
21
21
|
headers["X-API-Key"] = self.api_key
|
22
22
|
headers["Cartesia-Version"] = "2024-11-13"
|
cartesia/tts/_async_websocket.py
CHANGED
@@ -71,6 +71,7 @@ class _AsyncTTSContext:
|
|
71
71
|
add_phoneme_timestamps: bool = False,
|
72
72
|
use_original_timestamps: bool = False,
|
73
73
|
continue_: bool = False,
|
74
|
+
max_buffer_delay_ms: Optional[int] = None,
|
74
75
|
flush: bool = False,
|
75
76
|
) -> None:
|
76
77
|
"""Send audio generation requests to the WebSocket. The response can be received using the `receive` method.
|
@@ -111,6 +112,8 @@ class _AsyncTTSContext:
|
|
111
112
|
request_body["use_original_timestamps"] = use_original_timestamps
|
112
113
|
if continue_:
|
113
114
|
request_body["continue"] = continue_
|
115
|
+
if max_buffer_delay_ms:
|
116
|
+
request_body["max_buffer_delay_ms"] = max_buffer_delay_ms
|
114
117
|
if flush:
|
115
118
|
request_body["flush"] = flush
|
116
119
|
|
cartesia/tts/_websocket.py
CHANGED
@@ -63,6 +63,7 @@ class _TTSContext:
|
|
63
63
|
output_format: OutputFormatParams,
|
64
64
|
voice: TtsRequestVoiceSpecifierParams,
|
65
65
|
context_id: Optional[str] = None,
|
66
|
+
max_buffer_delay_ms: Optional[int] = None,
|
66
67
|
duration: Optional[int] = None,
|
67
68
|
language: Optional[str] = None,
|
68
69
|
stream: bool = True,
|
@@ -108,6 +109,8 @@ class _TTSContext:
|
|
108
109
|
request_body["add_phoneme_timestamps"] = add_phoneme_timestamps
|
109
110
|
if use_original_timestamps:
|
110
111
|
request_body["use_original_timestamps"] = use_original_timestamps
|
112
|
+
if max_buffer_delay_ms:
|
113
|
+
request_body["max_buffer_delay_ms"] = max_buffer_delay_ms
|
111
114
|
|
112
115
|
if (
|
113
116
|
"context_id" in request_body
|
cartesia/tts/client.py
CHANGED
@@ -32,13 +32,14 @@ class TtsClient:
|
|
32
32
|
output_format: OutputFormatParams,
|
33
33
|
language: typing.Optional[SupportedLanguage] = OMIT,
|
34
34
|
duration: typing.Optional[float] = OMIT,
|
35
|
+
text_cfg: typing.Optional[float] = OMIT,
|
35
36
|
request_options: typing.Optional[RequestOptions] = None,
|
36
37
|
) -> typing.Iterator[bytes]:
|
37
38
|
"""
|
38
39
|
Parameters
|
39
40
|
----------
|
40
41
|
model_id : str
|
41
|
-
The ID of the model to use for the generation. See [Models](/build-with-
|
42
|
+
The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
|
42
43
|
|
43
44
|
transcript : str
|
44
45
|
|
@@ -52,6 +53,13 @@ class TtsClient:
|
|
52
53
|
The maximum duration of the audio in seconds. You do not usually need to specify this.
|
53
54
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
54
55
|
|
56
|
+
text_cfg : typing.Optional[float]
|
57
|
+
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
58
|
+
|
59
|
+
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
60
|
+
|
61
|
+
This parameter is only supported for `sonic-2` models.
|
62
|
+
|
55
63
|
request_options : typing.Optional[RequestOptions]
|
56
64
|
Request-specific configuration. You can pass in configuration such as `chunk_size`, and more to customize the request and response.
|
57
65
|
|
@@ -92,6 +100,7 @@ class TtsClient:
|
|
92
100
|
object_=output_format, annotation=OutputFormatParams, direction="write"
|
93
101
|
),
|
94
102
|
"duration": duration,
|
103
|
+
"text_cfg": text_cfg,
|
95
104
|
},
|
96
105
|
request_options=request_options,
|
97
106
|
omit=OMIT,
|
@@ -117,13 +126,14 @@ class TtsClient:
|
|
117
126
|
output_format: OutputFormatParams,
|
118
127
|
language: typing.Optional[SupportedLanguage] = OMIT,
|
119
128
|
duration: typing.Optional[float] = OMIT,
|
129
|
+
text_cfg: typing.Optional[float] = OMIT,
|
120
130
|
request_options: typing.Optional[RequestOptions] = None,
|
121
131
|
) -> typing.Iterator[WebSocketResponse]:
|
122
132
|
"""
|
123
133
|
Parameters
|
124
134
|
----------
|
125
135
|
model_id : str
|
126
|
-
The ID of the model to use for the generation. See [Models](/build-with-
|
136
|
+
The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
|
127
137
|
|
128
138
|
transcript : str
|
129
139
|
|
@@ -137,6 +147,13 @@ class TtsClient:
|
|
137
147
|
The maximum duration of the audio in seconds. You do not usually need to specify this.
|
138
148
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
139
149
|
|
150
|
+
text_cfg : typing.Optional[float]
|
151
|
+
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
152
|
+
|
153
|
+
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
154
|
+
|
155
|
+
This parameter is only supported for `sonic-2` models.
|
156
|
+
|
140
157
|
request_options : typing.Optional[RequestOptions]
|
141
158
|
Request-specific configuration.
|
142
159
|
|
@@ -179,6 +196,7 @@ class TtsClient:
|
|
179
196
|
object_=output_format, annotation=OutputFormatParams, direction="write"
|
180
197
|
),
|
181
198
|
"duration": duration,
|
199
|
+
"text_cfg": text_cfg,
|
182
200
|
},
|
183
201
|
request_options=request_options,
|
184
202
|
omit=OMIT,
|
@@ -218,13 +236,14 @@ class AsyncTtsClient:
|
|
218
236
|
output_format: OutputFormatParams,
|
219
237
|
language: typing.Optional[SupportedLanguage] = OMIT,
|
220
238
|
duration: typing.Optional[float] = OMIT,
|
239
|
+
text_cfg: typing.Optional[float] = OMIT,
|
221
240
|
request_options: typing.Optional[RequestOptions] = None,
|
222
241
|
) -> typing.AsyncIterator[bytes]:
|
223
242
|
"""
|
224
243
|
Parameters
|
225
244
|
----------
|
226
245
|
model_id : str
|
227
|
-
The ID of the model to use for the generation. See [Models](/build-with-
|
246
|
+
The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
|
228
247
|
|
229
248
|
transcript : str
|
230
249
|
|
@@ -238,6 +257,13 @@ class AsyncTtsClient:
|
|
238
257
|
The maximum duration of the audio in seconds. You do not usually need to specify this.
|
239
258
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
240
259
|
|
260
|
+
text_cfg : typing.Optional[float]
|
261
|
+
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
262
|
+
|
263
|
+
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
264
|
+
|
265
|
+
This parameter is only supported for `sonic-2` models.
|
266
|
+
|
241
267
|
request_options : typing.Optional[RequestOptions]
|
242
268
|
Request-specific configuration. You can pass in configuration such as `chunk_size`, and more to customize the request and response.
|
243
269
|
|
@@ -286,6 +312,7 @@ class AsyncTtsClient:
|
|
286
312
|
object_=output_format, annotation=OutputFormatParams, direction="write"
|
287
313
|
),
|
288
314
|
"duration": duration,
|
315
|
+
"text_cfg": text_cfg,
|
289
316
|
},
|
290
317
|
request_options=request_options,
|
291
318
|
omit=OMIT,
|
@@ -311,13 +338,14 @@ class AsyncTtsClient:
|
|
311
338
|
output_format: OutputFormatParams,
|
312
339
|
language: typing.Optional[SupportedLanguage] = OMIT,
|
313
340
|
duration: typing.Optional[float] = OMIT,
|
341
|
+
text_cfg: typing.Optional[float] = OMIT,
|
314
342
|
request_options: typing.Optional[RequestOptions] = None,
|
315
343
|
) -> typing.AsyncIterator[WebSocketResponse]:
|
316
344
|
"""
|
317
345
|
Parameters
|
318
346
|
----------
|
319
347
|
model_id : str
|
320
|
-
The ID of the model to use for the generation. See [Models](/build-with-
|
348
|
+
The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
|
321
349
|
|
322
350
|
transcript : str
|
323
351
|
|
@@ -331,6 +359,13 @@ class AsyncTtsClient:
|
|
331
359
|
The maximum duration of the audio in seconds. You do not usually need to specify this.
|
332
360
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
333
361
|
|
362
|
+
text_cfg : typing.Optional[float]
|
363
|
+
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
364
|
+
|
365
|
+
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
366
|
+
|
367
|
+
This parameter is only supported for `sonic-2` models.
|
368
|
+
|
334
369
|
request_options : typing.Optional[RequestOptions]
|
335
370
|
Request-specific configuration.
|
336
371
|
|
@@ -381,6 +416,7 @@ class AsyncTtsClient:
|
|
381
416
|
object_=output_format, annotation=OutputFormatParams, direction="write"
|
382
417
|
),
|
383
418
|
"duration": duration,
|
419
|
+
"text_cfg": text_cfg,
|
384
420
|
},
|
385
421
|
request_options=request_options,
|
386
422
|
omit=OMIT,
|
@@ -13,7 +13,7 @@ from ...core.serialization import FieldMetadata
|
|
13
13
|
class GenerationRequestParams(typing_extensions.TypedDict):
|
14
14
|
model_id: str
|
15
15
|
"""
|
16
|
-
The ID of the model to use for the generation. See [Models](/build-with-
|
16
|
+
The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
|
17
17
|
"""
|
18
18
|
|
19
19
|
transcript: typing.Optional[typing.Any]
|
@@ -30,6 +30,15 @@ class GenerationRequestParams(typing_extensions.TypedDict):
|
|
30
30
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
31
31
|
"""
|
32
32
|
|
33
|
+
text_cfg: typing_extensions.NotRequired[float]
|
34
|
+
"""
|
35
|
+
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
36
|
+
|
37
|
+
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
38
|
+
|
39
|
+
This parameter is only supported for `sonic-2` models.
|
40
|
+
"""
|
41
|
+
|
33
42
|
context_id: typing_extensions.NotRequired[ContextId]
|
34
43
|
continue_: typing_extensions.NotRequired[typing_extensions.Annotated[bool, FieldMetadata(alias="continue")]]
|
35
44
|
"""
|
@@ -37,6 +46,15 @@ class GenerationRequestParams(typing_extensions.TypedDict):
|
|
37
46
|
If not specified, this defaults to `false`.
|
38
47
|
"""
|
39
48
|
|
49
|
+
max_buffer_delay_ms: typing_extensions.NotRequired[int]
|
50
|
+
"""
|
51
|
+
The maximum time in milliseconds to buffer text before starting generation. Values between [0, 1000]ms are supported. Defaults to 0 (no buffering).
|
52
|
+
|
53
|
+
When set, the model will buffer incoming text chunks until it's confident it has enough context to generate high-quality speech, or the buffer delay elapses, whichever comes first. Without this option set, the model will kick off generations immediately, ceding control of buffering to the user.
|
54
|
+
|
55
|
+
Use this to balance responsiveness with higher quality speech generation, which often benefits from having more context.
|
56
|
+
"""
|
57
|
+
|
40
58
|
flush: typing_extensions.NotRequired[bool]
|
41
59
|
"""
|
42
60
|
Whether to flush the context.
|
@@ -10,7 +10,7 @@ from .output_format import OutputFormatParams
|
|
10
10
|
class TtsRequestParams(typing_extensions.TypedDict):
|
11
11
|
model_id: str
|
12
12
|
"""
|
13
|
-
The ID of the model to use for the generation. See [Models](/build-with-
|
13
|
+
The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
|
14
14
|
"""
|
15
15
|
|
16
16
|
transcript: str
|
@@ -22,3 +22,12 @@ class TtsRequestParams(typing_extensions.TypedDict):
|
|
22
22
|
The maximum duration of the audio in seconds. You do not usually need to specify this.
|
23
23
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
24
24
|
"""
|
25
|
+
|
26
|
+
text_cfg: typing_extensions.NotRequired[float]
|
27
|
+
"""
|
28
|
+
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
29
|
+
|
30
|
+
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
31
|
+
|
32
|
+
This parameter is only supported for `sonic-2` models.
|
33
|
+
"""
|
@@ -10,7 +10,7 @@ from ...core.serialization import FieldMetadata
|
|
10
10
|
class WebSocketTtsRequestParams(typing_extensions.TypedDict):
|
11
11
|
model_id: str
|
12
12
|
"""
|
13
|
-
The ID of the model to use for the generation. See [Models](/build-with-
|
13
|
+
The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
|
14
14
|
"""
|
15
15
|
|
16
16
|
output_format: typing_extensions.NotRequired[OutputFormatParams]
|
@@ -23,3 +23,5 @@ class WebSocketTtsRequestParams(typing_extensions.TypedDict):
|
|
23
23
|
add_phoneme_timestamps: typing_extensions.NotRequired[bool]
|
24
24
|
continue_: typing_extensions.NotRequired[typing_extensions.Annotated[bool, FieldMetadata(alias="continue")]]
|
25
25
|
context_id: typing_extensions.NotRequired[str]
|
26
|
+
max_buffer_delay_ms: typing_extensions.NotRequired[int]
|
27
|
+
text_cfg: typing_extensions.NotRequired[float]
|
@@ -15,7 +15,7 @@ from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
|
15
15
|
class GenerationRequest(UniversalBaseModel):
|
16
16
|
model_id: str = pydantic.Field()
|
17
17
|
"""
|
18
|
-
The ID of the model to use for the generation. See [Models](/build-with-
|
18
|
+
The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
|
19
19
|
"""
|
20
20
|
|
21
21
|
transcript: typing.Optional[typing.Any] = pydantic.Field(default=None)
|
@@ -32,6 +32,15 @@ class GenerationRequest(UniversalBaseModel):
|
|
32
32
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
33
33
|
"""
|
34
34
|
|
35
|
+
text_cfg: typing.Optional[float] = pydantic.Field(default=None)
|
36
|
+
"""
|
37
|
+
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
38
|
+
|
39
|
+
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
40
|
+
|
41
|
+
This parameter is only supported for `sonic-2` models.
|
42
|
+
"""
|
43
|
+
|
35
44
|
context_id: typing.Optional[ContextId] = None
|
36
45
|
continue_: typing_extensions.Annotated[typing.Optional[bool], FieldMetadata(alias="continue")] = pydantic.Field(
|
37
46
|
default=None
|
@@ -41,6 +50,15 @@ class GenerationRequest(UniversalBaseModel):
|
|
41
50
|
If not specified, this defaults to `false`.
|
42
51
|
"""
|
43
52
|
|
53
|
+
max_buffer_delay_ms: typing.Optional[int] = pydantic.Field(default=None)
|
54
|
+
"""
|
55
|
+
The maximum time in milliseconds to buffer text before starting generation. Values between [0, 1000]ms are supported. Defaults to 0 (no buffering).
|
56
|
+
|
57
|
+
When set, the model will buffer incoming text chunks until it's confident it has enough context to generate high-quality speech, or the buffer delay elapses, whichever comes first. Without this option set, the model will kick off generations immediately, ceding control of buffering to the user.
|
58
|
+
|
59
|
+
Use this to balance responsiveness with higher quality speech generation, which often benefits from having more context.
|
60
|
+
"""
|
61
|
+
|
44
62
|
flush: typing.Optional[bool] = pydantic.Field(default=None)
|
45
63
|
"""
|
46
64
|
Whether to flush the context.
|
@@ -12,7 +12,7 @@ from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
|
12
12
|
class TtsRequest(UniversalBaseModel):
|
13
13
|
model_id: str = pydantic.Field()
|
14
14
|
"""
|
15
|
-
The ID of the model to use for the generation. See [Models](/build-with-
|
15
|
+
The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
|
16
16
|
"""
|
17
17
|
|
18
18
|
transcript: str
|
@@ -25,6 +25,15 @@ class TtsRequest(UniversalBaseModel):
|
|
25
25
|
If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
|
26
26
|
"""
|
27
27
|
|
28
|
+
text_cfg: typing.Optional[float] = pydantic.Field(default=None)
|
29
|
+
"""
|
30
|
+
The text [classifier-free guidance](https://arxiv.org/abs/2207.12598) value for the request.
|
31
|
+
|
32
|
+
Higher values causes the model to attend more to the text but speed up the generation. Lower values reduce the speaking rate but can increase the risk of hallucinations. The default value is `3.0`. For a slower speaking rate, we recommend values between `2.0` and `3.0`. Values are supported between `1.5` and `3.0`.
|
33
|
+
|
34
|
+
This parameter is only supported for `sonic-2` models.
|
35
|
+
"""
|
36
|
+
|
28
37
|
if IS_PYDANTIC_V2:
|
29
38
|
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
30
39
|
else:
|
@@ -13,7 +13,7 @@ from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
|
13
13
|
class WebSocketTtsRequest(UniversalBaseModel):
|
14
14
|
model_id: str = pydantic.Field()
|
15
15
|
"""
|
16
|
-
The ID of the model to use for the generation. See [Models](/build-with-
|
16
|
+
The ID of the model to use for the generation. See [Models](/build-with-cartesia/models) for available models.
|
17
17
|
"""
|
18
18
|
|
19
19
|
output_format: typing.Optional[OutputFormat] = None
|
@@ -26,6 +26,8 @@ class WebSocketTtsRequest(UniversalBaseModel):
|
|
26
26
|
add_phoneme_timestamps: typing.Optional[bool] = None
|
27
27
|
continue_: typing_extensions.Annotated[typing.Optional[bool], FieldMetadata(alias="continue")] = None
|
28
28
|
context_id: typing.Optional[str] = None
|
29
|
+
max_buffer_delay_ms: typing.Optional[int] = None
|
30
|
+
text_cfg: typing.Optional[float] = None
|
29
31
|
|
30
32
|
if IS_PYDANTIC_V2:
|
31
33
|
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
cartesia/voices/client.py
CHANGED
@@ -148,8 +148,8 @@ class VoicesClient:
|
|
148
148
|
name: str,
|
149
149
|
language: SupportedLanguage,
|
150
150
|
mode: CloneMode,
|
151
|
-
enhance: bool,
|
152
151
|
description: typing.Optional[str] = OMIT,
|
152
|
+
enhance: typing.Optional[bool] = OMIT,
|
153
153
|
transcript: typing.Optional[str] = OMIT,
|
154
154
|
request_options: typing.Optional[RequestOptions] = None,
|
155
155
|
) -> VoiceMetadata:
|
@@ -177,14 +177,14 @@ class VoicesClient:
|
|
177
177
|
Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
|
178
178
|
|
179
179
|
|
180
|
-
enhance : bool
|
181
|
-
Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
|
182
|
-
|
183
|
-
|
184
180
|
description : typing.Optional[str]
|
185
181
|
A description for the voice.
|
186
182
|
|
187
183
|
|
184
|
+
enhance : typing.Optional[bool]
|
185
|
+
Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
|
186
|
+
|
187
|
+
|
188
188
|
transcript : typing.Optional[str]
|
189
189
|
Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
|
190
190
|
|
@@ -717,8 +717,8 @@ class AsyncVoicesClient:
|
|
717
717
|
name: str,
|
718
718
|
language: SupportedLanguage,
|
719
719
|
mode: CloneMode,
|
720
|
-
enhance: bool,
|
721
720
|
description: typing.Optional[str] = OMIT,
|
721
|
+
enhance: typing.Optional[bool] = OMIT,
|
722
722
|
transcript: typing.Optional[str] = OMIT,
|
723
723
|
request_options: typing.Optional[RequestOptions] = None,
|
724
724
|
) -> VoiceMetadata:
|
@@ -746,14 +746,14 @@ class AsyncVoicesClient:
|
|
746
746
|
Tradeoff between similarity and stability. Similarity clones sound more like the source clip, but may reproduce background noise. Stability clones always sound like a studio recording, but may not sound as similar to the source clip.
|
747
747
|
|
748
748
|
|
749
|
-
enhance : bool
|
750
|
-
Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
|
751
|
-
|
752
|
-
|
753
749
|
description : typing.Optional[str]
|
754
750
|
A description for the voice.
|
755
751
|
|
756
752
|
|
753
|
+
enhance : typing.Optional[bool]
|
754
|
+
Whether to enhance the clip to improve its quality before cloning. Useful if the clip has background noise.
|
755
|
+
|
756
|
+
|
757
757
|
transcript : typing.Optional[str]
|
758
758
|
Optional transcript of the words spoken in the audio clip. Only used for similarity mode.
|
759
759
|
|
@@ -1,15 +1,25 @@
|
|
1
|
-
cartesia/__init__.py,sha256=
|
1
|
+
cartesia/__init__.py,sha256=wqKLSdaX3HebPUATK1p8tTcd5TMcLzvhJR2dNcEBKgo,8414
|
2
2
|
cartesia/api_status/__init__.py,sha256=_dHNLdknrBjxHtU2PvLumttJM-JTQhJQqhhAQkLqt_U,168
|
3
3
|
cartesia/api_status/client.py,sha256=GJ9Dq8iCn3hn8vCIqc6k1fCGEhSz0T0kaPGcdFnbMDY,3146
|
4
4
|
cartesia/api_status/requests/__init__.py,sha256=ilEMzEy1JEw484CuL92bX5lHGOznc62pjiDMgiZ0tKM,130
|
5
5
|
cartesia/api_status/requests/api_info.py,sha256=AmB6RpquI2yUlTQBtOk8e0qtLmXHYLcGZKpXZahOwmc,172
|
6
6
|
cartesia/api_status/types/__init__.py,sha256=6NUyGWiGK1Wl3mXlSMJN2ObKf2LK3vjX2MUP1uopfEQ,118
|
7
7
|
cartesia/api_status/types/api_info.py,sha256=o1LwSxnoHpCR7huw9J-cF6LRlC_fiftDQLYUz8p-vTc,568
|
8
|
-
cartesia/
|
8
|
+
cartesia/auth/__init__.py,sha256=T8_EGgzdzyJLqfD7DAgdkE6G1Ey2sUMyze-7x8HTzGg,355
|
9
|
+
cartesia/auth/client.py,sha256=gJurqzV5r-edd5DV2xc1Uy1Fm8Fi3ndaourZmbOh15E,5261
|
10
|
+
cartesia/auth/requests/__init__.py,sha256=hR7qCSJCPiOG7f8z8jTKQLOC7QoonSvvPKe0JbcEYEs,278
|
11
|
+
cartesia/auth/requests/token_grant.py,sha256=HTrgl6TsokxYIMXeTK-NjSKQ8WQfDwnbEfbyzirs0kk,251
|
12
|
+
cartesia/auth/requests/token_request.py,sha256=DeQQhHOLfmL4O3ZqrFq1FtxTDmTsYGpMtmRRiCvxUcE,498
|
13
|
+
cartesia/auth/requests/token_response.py,sha256=jXpHZmFe6RWO837e_lC2GJWwqO-b6KHOA-b6tTJVC54,211
|
14
|
+
cartesia/auth/types/__init__.py,sha256=iZrkHrlWs8e9KkR27f2IG-B72HC_N05A7Lcyt_EU9SM,242
|
15
|
+
cartesia/auth/types/token_grant.py,sha256=sdEqlqS95XSy_Xdp4TEeRSC1hQp4nbPv1HHZFxbU0So,666
|
16
|
+
cartesia/auth/types/token_request.py,sha256=2cx2OBXTEjrbuVMOpBzkIm9-DZD2mGiWE6Ui3kumxGI,893
|
17
|
+
cartesia/auth/types/token_response.py,sha256=_GcvfQdjwgNu1ODj8EuTkaMsez508a6xuOo8HOVNOJQ,626
|
18
|
+
cartesia/base_client.py,sha256=YH0l0UUzanAa9mDdJU6BFQ9XKELiaPTm9NsJpVQ4evA,6539
|
9
19
|
cartesia/client.py,sha256=sPAYQLt9W2E_2F17ooocvvJImuNyLrL8xUypgf6dZeI,6238
|
10
20
|
cartesia/core/__init__.py,sha256=-t9txgeQZL_1FDw_08GEoj4ft1Cn9Dti6X0Drsadlr0,1519
|
11
21
|
cartesia/core/api_error.py,sha256=RE8LELok2QCjABadECTvtDp7qejA1VmINCh6TbqPwSE,426
|
12
|
-
cartesia/core/client_wrapper.py,sha256=
|
22
|
+
cartesia/core/client_wrapper.py,sha256=RLjY6z-CDfSO5WPCxechq__-Xhikk8_gn3-HAdFFXiE,1854
|
13
23
|
cartesia/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
|
14
24
|
cartesia/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
|
15
25
|
cartesia/core/http_client.py,sha256=KL5RGa0y4n8nX0-07WRg4ZQUTq30sc-XJbWcP5vjBDg,19552
|
@@ -42,19 +52,19 @@ cartesia/infill/__init__.py,sha256=FTtvy8EDg9nNNg9WCatVgKTRYV8-_v1roeGPAKoa_pw,6
|
|
42
52
|
cartesia/infill/client.py,sha256=PWE5Ak-wsaBM_8g52oDl9PYx76PkW6f900mnxvZf4Bk,12571
|
43
53
|
cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
54
|
cartesia/tts/__init__.py,sha256=G0wcYlPrr7hmu5DQgCG7bDTQq36fpP3iBM5164Z0-Js,4701
|
45
|
-
cartesia/tts/_async_websocket.py,sha256=
|
46
|
-
cartesia/tts/_websocket.py,sha256=
|
47
|
-
cartesia/tts/client.py,sha256=
|
55
|
+
cartesia/tts/_async_websocket.py,sha256=tJ-6rdJrviKvGhSW8J8t-rCinXM5gXXQJgDO8OgW3EE,18805
|
56
|
+
cartesia/tts/_websocket.py,sha256=Uk6B-TP-0nzeyFE9w-_PzNIiVYP14rKqSDZlm6bU24Q,19271
|
57
|
+
cartesia/tts/client.py,sha256=oZZDSb9XVVvON4ng5tdL3NeVIMhfUdLs1qSHQ_HBtQw,17376
|
48
58
|
cartesia/tts/requests/__init__.py,sha256=0rcfMLHNbUhkRI1xS09UE4p-WT1BCqrcblFtPxcATOI,3261
|
49
59
|
cartesia/tts/requests/cancel_context_request.py,sha256=Wl8g-o5vwl9ENm-H1wsLx441FkIR_4Wt5UYtuWce2Yw,431
|
50
60
|
cartesia/tts/requests/controls.py,sha256=xzUJlfgqhaJ1A-JD0LTpoHYk4iEpCuGpSD7qE4YYsRg,285
|
51
|
-
cartesia/tts/requests/generation_request.py,sha256=
|
61
|
+
cartesia/tts/requests/generation_request.py,sha256=oGCBfIVXc6SvKV5IDdadHvqBT5qLAZLhz_CBVtKfV2Y,3202
|
52
62
|
cartesia/tts/requests/mp_3_output_format.py,sha256=PGDVzC1d7-Jce12rFxtF8G1pTHmlUdiGAhykFTABg0w,316
|
53
63
|
cartesia/tts/requests/output_format.py,sha256=8TKu9AAeHCR5L4edzYch8FIYIldn4bM7ySrsCl8W_g8,842
|
54
64
|
cartesia/tts/requests/phoneme_timestamps.py,sha256=ft81nmqElZAnvTBT27lY6YWfF18ZGsCx3Y1XHv9J7cM,267
|
55
65
|
cartesia/tts/requests/raw_output_format.py,sha256=S60Vp7DeAATCMLF3bXgxhw0zILJBWJ9GhI9irAg_UkI,316
|
56
66
|
cartesia/tts/requests/speed.py,sha256=-YGBWwh7_VtCBnYlT5EVsnrmcHFMEBTxy9LathZhkMA,259
|
57
|
-
cartesia/tts/requests/tts_request.py,sha256=
|
67
|
+
cartesia/tts/requests/tts_request.py,sha256=VqBtdNF6JFcBh392e6tyONCexvJZMUyKpamv03hjTjA,1479
|
58
68
|
cartesia/tts/requests/tts_request_embedding_specifier.py,sha256=-M54ZjV0H5LPwcKtz0bOVqlkvO1pPiMbqMbVBMko3Ns,565
|
59
69
|
cartesia/tts/requests/tts_request_id_specifier.py,sha256=-0ClfyJnnaH0uAcF5r84s3cM_cw2wT39dp6T4JYzOQ8,536
|
60
70
|
cartesia/tts/requests/tts_request_voice_specifier.py,sha256=eGzL4aVGq4gKPxeglsV7-wuhxg8x33Qth3uFTTytgeI,337
|
@@ -71,7 +81,7 @@ cartesia/tts/requests/web_socket_response.py,sha256=WqZ6RgO4suG78wiVSIsOWwyXBioV
|
|
71
81
|
cartesia/tts/requests/web_socket_stream_options.py,sha256=VIvblFw9hGZvDzFpOnC11G0NvrFSVt-1-0sY5rpcZPI,232
|
72
82
|
cartesia/tts/requests/web_socket_timestamps_response.py,sha256=MK3zN2Q_PVWJtX5DidNB0uXoF2o33rv6qCYPVaourxY,351
|
73
83
|
cartesia/tts/requests/web_socket_tts_output.py,sha256=pX2uf0XVdziFhXCydwLlVOWb-LvBiuq-cBI6R1INiMg,760
|
74
|
-
cartesia/tts/requests/web_socket_tts_request.py,sha256=
|
84
|
+
cartesia/tts/requests/web_socket_tts_request.py,sha256=i6gwa4bvPPCnS2ZnSnu5FY1bjwjp76Kfi0eTb_atBlI,1215
|
75
85
|
cartesia/tts/requests/word_timestamps.py,sha256=WMfBJtETi6wTpES0pYZCFfFRfEbzWE-RtosDJ5seUWg,261
|
76
86
|
cartesia/tts/socket_client.py,sha256=zTPayHbgy-yQQ50AE1HXN4GMyanisZcLXf7Ds1paYks,11621
|
77
87
|
cartesia/tts/types/__init__.py,sha256=yV_-DY9EPNAFEfuIk3wgRLcc4Ta5igv0T5g-IIQ53v0,3251
|
@@ -80,7 +90,7 @@ cartesia/tts/types/context_id.py,sha256=UCEtq5xFGOeBCECcY6Y-gYVe_Peg1hFhH9YYOkpA
|
|
80
90
|
cartesia/tts/types/controls.py,sha256=H4CSu79mM1Ld4NZx_5uXw3EwRzTEMQRxKBRvFpcFb8Y,644
|
81
91
|
cartesia/tts/types/emotion.py,sha256=zocyDcHTiFFnNRgo2YLMi70iGyffa080B4mkg9lcqVc,764
|
82
92
|
cartesia/tts/types/flush_id.py,sha256=HCIKo9o8d7YWKtaSNU3TEvfUVBju93ckGQy01Z9wLcE,79
|
83
|
-
cartesia/tts/types/generation_request.py,sha256=
|
93
|
+
cartesia/tts/types/generation_request.py,sha256=HfMLj_HOCeKy5p_yLcltvrJly9WNkzoNCxarwlLE7Nw,3732
|
84
94
|
cartesia/tts/types/mp_3_output_format.py,sha256=0WGblkuDUL7pZO1aRuQ_mU2Z5gN9xIabRfRKkjtzms8,731
|
85
95
|
cartesia/tts/types/natural_specifier.py,sha256=K526P1RRuBGy80hyd_tX8tohPrE8DR9EgTCxS5wce0o,188
|
86
96
|
cartesia/tts/types/numerical_specifier.py,sha256=tJpIskWO545luCKMFM9JlVc7VVhBhSvqL1qurhzL9cI,92
|
@@ -90,7 +100,7 @@ cartesia/tts/types/raw_encoding.py,sha256=eyc2goiYOTxWcuKHAgYZ2SrnfePW22Fbmc-5fG
|
|
90
100
|
cartesia/tts/types/raw_output_format.py,sha256=jZGVaS0KIi9mU6trfskgA3HbMKJolhrwICnuDhF01ic,673
|
91
101
|
cartesia/tts/types/speed.py,sha256=4c5WdxocBw6WSMnundSaNnceUeooU0vikhy00FW6M-w,239
|
92
102
|
cartesia/tts/types/supported_language.py,sha256=riDRduThMbMWAq9i2uCfxhwVTpgaFwNDZ9LhEIl4zHY,237
|
93
|
-
cartesia/tts/types/tts_request.py,sha256=
|
103
|
+
cartesia/tts/types/tts_request.py,sha256=kUTOjNOZsZivSLbVvLA85EzPNsKCEOnY51NPfUmlDwM,1865
|
94
104
|
cartesia/tts/types/tts_request_embedding_specifier.py,sha256=eL_qCEr4pvWfy4qp9hZBuVdCincX5DBVqfv1vLt2_Vk,942
|
95
105
|
cartesia/tts/types/tts_request_id_specifier.py,sha256=ktGdkkTRQ9scA-lt8qJ2jn_E5WzoOK8AXMrVqi71gf0,906
|
96
106
|
cartesia/tts/types/tts_request_voice_specifier.py,sha256=p-3UQ62uFL1SgbX73Ex1D_V73Ef0wmT1ApOt1iLZmwE,307
|
@@ -107,7 +117,7 @@ cartesia/tts/types/web_socket_response.py,sha256=mHDECZ4K84QmN2s0IWuBsXBt83Yq7Qx
|
|
107
117
|
cartesia/tts/types/web_socket_stream_options.py,sha256=MhDSxBFqMuQeWjoyPqXVnTEzLjF8g6aojeigb5dQUgU,596
|
108
118
|
cartesia/tts/types/web_socket_timestamps_response.py,sha256=kuWXI82ncF1QapnaHEjwrL84qWob7ByQU-yh1e0IEmk,667
|
109
119
|
cartesia/tts/types/web_socket_tts_output.py,sha256=uvkv0smTBhdm18Rl17C0Ml4Inh79YBHNzAcKnZBs14Y,979
|
110
|
-
cartesia/tts/types/web_socket_tts_request.py,sha256=
|
120
|
+
cartesia/tts/types/web_socket_tts_request.py,sha256=PzdIyFcj6V9MLwr4rpuh_H3NfEnzq0dHlEv-bKKeTR0,1529
|
111
121
|
cartesia/tts/types/word_timestamps.py,sha256=XZ2Q0prdb3F9c3AiOKXu4s3A3jBxE-qIt1npHOf16R0,631
|
112
122
|
cartesia/tts/utils/constants.py,sha256=1CHa5flJf8--L_eYyOyOiWJNZ-Q81ufHZxDbJs8xYSk,418
|
113
123
|
cartesia/tts/utils/tts.py,sha256=u7PgPxlJs6fcQTfr-jqAvBCAaK3JWLhF5QF4s-PwoMo,2093
|
@@ -121,7 +131,7 @@ cartesia/voice_changer/types/__init__.py,sha256=qAiHsdRpnFeS0lBkYp_NRrhSJiRXCg5-
|
|
121
131
|
cartesia/voice_changer/types/output_format_container.py,sha256=RqLDELdgeOjYqNTJX1Le62qjiFiJGxf0cYnol88-LLM,166
|
122
132
|
cartesia/voice_changer/types/streaming_response.py,sha256=rQ4ZehtOHsCBKijyULz_ahGQYNj1yus6AM6u2wgcBsI,1963
|
123
133
|
cartesia/voices/__init__.py,sha256=2D58Bir45LvcvP08QMnPlFE8DD8BONTjPLkIDdKs7vg,1891
|
124
|
-
cartesia/voices/client.py,sha256=
|
134
|
+
cartesia/voices/client.py,sha256=B0T1YRjrAX7fssBw1hyq5qpQ2CCf-dC-E9F1-AvwvSs,38949
|
125
135
|
cartesia/voices/requests/__init__.py,sha256=XiBJbSYeQCgFMtwywKvQ0Nmp7Zf_0WskzRhgr9c8h38,1072
|
126
136
|
cartesia/voices/requests/create_voice_request.py,sha256=r6dKb9ga0ZsAi_6PXuE43u2lLgfQg2DIYjk2Neng7pI,617
|
127
137
|
cartesia/voices/requests/embedding_response.py,sha256=PGZkBD8UBcv2MYQbBXyD4T6lzaE9oSGGwXx-MoXCp0M,228
|
@@ -160,6 +170,6 @@ cartesia/voices/types/voice_expand_options.py,sha256=e4FroWdlxEE-LXQfT1RWlGHtswl
|
|
160
170
|
cartesia/voices/types/voice_id.py,sha256=GDoXcRVeIm-V21R4suxG2zqLD3DLYkXE9kgizadzFKo,79
|
161
171
|
cartesia/voices/types/voice_metadata.py,sha256=4KNGjXMUKm3niv-NvKIFVGtiilpH13heuzKcZYNQxk4,1181
|
162
172
|
cartesia/voices/types/weight.py,sha256=XqDU7_JItNUb5QykIDqTbELlRYQdbt2SviRgW0w2LKo,80
|
163
|
-
cartesia-2.0.
|
164
|
-
cartesia-2.0.
|
165
|
-
cartesia-2.0.
|
173
|
+
cartesia-2.0.2.dist-info/METADATA,sha256=EB_DtN2AaHi0snXgpbisp2T1nsTQ7CQlR3S1rowO8L0,11206
|
174
|
+
cartesia-2.0.2.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
175
|
+
cartesia-2.0.2.dist-info/RECORD,,
|
File without changes
|