cartesia 2.0.11__py3-none-any.whl → 2.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartesia might be problematic. Click here for more details.

cartesia/__init__.py CHANGED
@@ -52,7 +52,10 @@ from .tts import (
52
52
  Controls,
53
53
  ControlsParams,
54
54
  Emotion,
55
+ EmotionDeprecated,
55
56
  FlushId,
57
+ GenerationConfig,
58
+ GenerationConfigParams,
56
59
  GenerationRequest,
57
60
  GenerationRequestParams,
58
61
  ModelSpeed,
@@ -211,6 +214,7 @@ __all__ = [
211
214
  "EmbeddingSpecifier",
212
215
  "EmbeddingSpecifierParams",
213
216
  "Emotion",
217
+ "EmotionDeprecated",
214
218
  "ErrorMessage",
215
219
  "ErrorMessageParams",
216
220
  "FilePurpose",
@@ -219,6 +223,8 @@ __all__ = [
219
223
  "FlushId",
220
224
  "Gender",
221
225
  "GenderPresentation",
226
+ "GenerationConfig",
227
+ "GenerationConfigParams",
222
228
  "GenerationRequest",
223
229
  "GenerationRequestParams",
224
230
  "GetVoicesResponse",
@@ -16,7 +16,7 @@ class BaseClientWrapper:
16
16
  headers: typing.Dict[str, str] = {
17
17
  "X-Fern-Language": "Python",
18
18
  "X-Fern-SDK-Name": "cartesia",
19
- "X-Fern-SDK-Version": "2.0.11",
19
+ "X-Fern-SDK-Version": "2.0.14",
20
20
  }
21
21
  headers["X-API-Key"] = self.api_key
22
22
  headers["Cartesia-Version"] = "2024-11-13"
cartesia/tts/__init__.py CHANGED
@@ -5,7 +5,9 @@ from .types import (
5
5
  ContextId,
6
6
  Controls,
7
7
  Emotion,
8
+ EmotionDeprecated,
8
9
  FlushId,
10
+ GenerationConfig,
9
11
  GenerationRequest,
10
12
  ModelSpeed,
11
13
  Mp3OutputFormat,
@@ -51,6 +53,7 @@ from .types import (
51
53
  from .requests import (
52
54
  CancelContextRequestParams,
53
55
  ControlsParams,
56
+ GenerationConfigParams,
54
57
  GenerationRequestParams,
55
58
  Mp3OutputFormatParams,
56
59
  OutputFormatParams,
@@ -96,7 +99,10 @@ __all__ = [
96
99
  "Controls",
97
100
  "ControlsParams",
98
101
  "Emotion",
102
+ "EmotionDeprecated",
99
103
  "FlushId",
104
+ "GenerationConfig",
105
+ "GenerationConfigParams",
100
106
  "GenerationRequest",
101
107
  "GenerationRequestParams",
102
108
  "ModelSpeed",
@@ -8,7 +8,7 @@ from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Union
8
8
 
9
9
  import aiohttp
10
10
 
11
- from cartesia.tts.requests import TtsRequestVoiceSpecifierParams
11
+ from cartesia.tts.requests import GenerationConfigParams, TtsRequestVoiceSpecifierParams
12
12
  from cartesia.tts.requests.output_format import OutputFormatParams
13
13
  from cartesia.tts.types import (
14
14
  WebSocketResponse,
@@ -61,6 +61,7 @@ class _AsyncTTSContext:
61
61
  model_id: str,
62
62
  transcript: str,
63
63
  output_format: OutputFormatParams,
64
+ generation_config: Optional[GenerationConfigParams] = None,
64
65
  voice: TtsRequestVoiceSpecifierParams,
65
66
  context_id: Optional[str] = None,
66
67
  duration: Optional[int] = None,
@@ -116,6 +117,12 @@ class _AsyncTTSContext:
116
117
  if flush:
117
118
  request_body["flush"] = flush
118
119
 
120
+ if generation_config is not None:
121
+ if isinstance(generation_config, dict):
122
+ request_body["generation_config"] = generation_config
123
+ else:
124
+ request_body["generation_config"] = generation_config.dict()
125
+
119
126
  if (
120
127
  "context_id" in request_body
121
128
  and request_body["context_id"] is not None
@@ -315,10 +322,10 @@ class AsyncTtsWebsocket(TtsWebsocket):
315
322
  # Extract status code if available
316
323
  status_code = None
317
324
  error_message = str(e)
318
-
325
+
319
326
  if hasattr(e, 'status') and e.status is not None:
320
327
  status_code = e.status
321
-
328
+
322
329
  # Create a meaningful error message based on status code
323
330
  if status_code == 402:
324
331
  error_message = "Payment required. Your API key may have insufficient credits or permissions."
@@ -328,7 +335,7 @@ class AsyncTtsWebsocket(TtsWebsocket):
328
335
  error_message = "Forbidden. You don't have permission to access this resource."
329
336
  elif status_code == 404:
330
337
  error_message = "Not found. The requested resource doesn't exist."
331
-
338
+
332
339
  raise RuntimeError(f"Failed to connect to WebSocket.\nStatus: {status_code}. Error message: {error_message}")
333
340
  else:
334
341
  raise RuntimeError(f"Failed to connect to WebSocket at {url}. {e}")
@@ -14,7 +14,7 @@ except ImportError:
14
14
 
15
15
  from iterators import TimeoutIterator # type: ignore
16
16
 
17
- from cartesia.tts.requests import TtsRequestVoiceSpecifierParams
17
+ from cartesia.tts.requests import GenerationConfigParams, TtsRequestVoiceSpecifierParams
18
18
  from cartesia.tts.requests.output_format import OutputFormatParams
19
19
  from cartesia.tts.types import (
20
20
  WebSocketResponse,
@@ -60,6 +60,7 @@ class _TTSContext:
60
60
  model_id: str,
61
61
  transcript: typing.Generator[str, None, None],
62
62
  output_format: OutputFormatParams,
63
+ generation_config: Optional[GenerationConfigParams] = None,
63
64
  voice: TtsRequestVoiceSpecifierParams,
64
65
  context_id: Optional[str] = None,
65
66
  max_buffer_delay_ms: Optional[int] = None,
@@ -111,6 +112,11 @@ class _TTSContext:
111
112
  if max_buffer_delay_ms:
112
113
  request_body["max_buffer_delay_ms"] = max_buffer_delay_ms
113
114
 
115
+ if generation_config is not None:
116
+ if isinstance(generation_config, dict):
117
+ request_body["generation_config"] = generation_config
118
+ else:
119
+ request_body["generation_config"] = generation_config.dict()
114
120
  if (
115
121
  "context_id" in request_body
116
122
  and request_body["context_id"] is not None
@@ -293,10 +299,10 @@ class TtsWebsocket:
293
299
  # Extract status code if available
294
300
  status_code = None
295
301
  error_message = str(e)
296
-
302
+
297
303
  if hasattr(e, 'status') and e.status is not None:
298
304
  status_code = e.status
299
-
305
+
300
306
  # Create a meaningful error message based on status code
301
307
  if status_code == 402:
302
308
  error_message = "Payment required. Your API key may have insufficient credits or permissions."
@@ -306,7 +312,7 @@ class TtsWebsocket:
306
312
  error_message = "Forbidden. You don't have permission to access this resource."
307
313
  elif status_code == 404:
308
314
  error_message = "Not found. The requested resource doesn't exist."
309
-
315
+
310
316
  raise RuntimeError(f"Failed to connect to WebSocket.\nStatus: {status_code}. Error message: {error_message}")
311
317
  else:
312
318
  raise RuntimeError(f"Failed to connect to WebSocket. {e}")
cartesia/tts/client.py CHANGED
@@ -5,6 +5,7 @@ from ..core.client_wrapper import SyncClientWrapper
5
5
  from .requests.tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
6
6
  from .requests.output_format import OutputFormatParams
7
7
  from .types.supported_language import SupportedLanguage
8
+ from .requests.generation_config import GenerationConfigParams
8
9
  from .types.model_speed import ModelSpeed
9
10
  from ..core.request_options import RequestOptions
10
11
  from ..core.serialization import convert_and_respect_annotation_metadata
@@ -34,6 +35,7 @@ class TtsClient:
34
35
  voice: TtsRequestVoiceSpecifierParams,
35
36
  output_format: OutputFormatParams,
36
37
  language: typing.Optional[SupportedLanguage] = OMIT,
38
+ generation_config: typing.Optional[GenerationConfigParams] = OMIT,
37
39
  duration: typing.Optional[float] = OMIT,
38
40
  speed: typing.Optional[ModelSpeed] = OMIT,
39
41
  request_options: typing.Optional[RequestOptions] = None,
@@ -52,6 +54,8 @@ class TtsClient:
52
54
 
53
55
  language : typing.Optional[SupportedLanguage]
54
56
 
57
+ generation_config : typing.Optional[GenerationConfigParams]
58
+
55
59
  duration : typing.Optional[float]
56
60
  The maximum duration of the audio in seconds. You do not usually need to specify this.
57
61
  If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
@@ -97,6 +101,9 @@ class TtsClient:
97
101
  "output_format": convert_and_respect_annotation_metadata(
98
102
  object_=output_format, annotation=OutputFormatParams, direction="write"
99
103
  ),
104
+ "generation_config": convert_and_respect_annotation_metadata(
105
+ object_=generation_config, annotation=GenerationConfigParams, direction="write"
106
+ ),
100
107
  "duration": duration,
101
108
  "speed": speed,
102
109
  },
@@ -123,6 +130,7 @@ class TtsClient:
123
130
  voice: TtsRequestVoiceSpecifierParams,
124
131
  output_format: SseOutputFormatParams,
125
132
  language: typing.Optional[SupportedLanguage] = OMIT,
133
+ generation_config: typing.Optional[GenerationConfigParams] = OMIT,
126
134
  duration: typing.Optional[float] = OMIT,
127
135
  speed: typing.Optional[ModelSpeed] = OMIT,
128
136
  add_timestamps: typing.Optional[bool] = OMIT,
@@ -145,6 +153,8 @@ class TtsClient:
145
153
 
146
154
  language : typing.Optional[SupportedLanguage]
147
155
 
156
+ generation_config : typing.Optional[GenerationConfigParams]
157
+
148
158
  duration : typing.Optional[float]
149
159
  The maximum duration of the audio in seconds. You do not usually need to specify this.
150
160
  If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
@@ -204,6 +214,9 @@ class TtsClient:
204
214
  "output_format": convert_and_respect_annotation_metadata(
205
215
  object_=output_format, annotation=SseOutputFormatParams, direction="write"
206
216
  ),
217
+ "generation_config": convert_and_respect_annotation_metadata(
218
+ object_=generation_config, annotation=GenerationConfigParams, direction="write"
219
+ ),
207
220
  "duration": duration,
208
221
  "speed": speed,
209
222
  "add_timestamps": add_timestamps,
@@ -248,6 +261,7 @@ class AsyncTtsClient:
248
261
  voice: TtsRequestVoiceSpecifierParams,
249
262
  output_format: OutputFormatParams,
250
263
  language: typing.Optional[SupportedLanguage] = OMIT,
264
+ generation_config: typing.Optional[GenerationConfigParams] = OMIT,
251
265
  duration: typing.Optional[float] = OMIT,
252
266
  speed: typing.Optional[ModelSpeed] = OMIT,
253
267
  request_options: typing.Optional[RequestOptions] = None,
@@ -266,6 +280,8 @@ class AsyncTtsClient:
266
280
 
267
281
  language : typing.Optional[SupportedLanguage]
268
282
 
283
+ generation_config : typing.Optional[GenerationConfigParams]
284
+
269
285
  duration : typing.Optional[float]
270
286
  The maximum duration of the audio in seconds. You do not usually need to specify this.
271
287
  If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
@@ -319,6 +335,9 @@ class AsyncTtsClient:
319
335
  "output_format": convert_and_respect_annotation_metadata(
320
336
  object_=output_format, annotation=OutputFormatParams, direction="write"
321
337
  ),
338
+ "generation_config": convert_and_respect_annotation_metadata(
339
+ object_=generation_config, annotation=GenerationConfigParams, direction="write"
340
+ ),
322
341
  "duration": duration,
323
342
  "speed": speed,
324
343
  },
@@ -345,6 +364,7 @@ class AsyncTtsClient:
345
364
  voice: TtsRequestVoiceSpecifierParams,
346
365
  output_format: SseOutputFormatParams,
347
366
  language: typing.Optional[SupportedLanguage] = OMIT,
367
+ generation_config: typing.Optional[GenerationConfigParams] = OMIT,
348
368
  duration: typing.Optional[float] = OMIT,
349
369
  speed: typing.Optional[ModelSpeed] = OMIT,
350
370
  add_timestamps: typing.Optional[bool] = OMIT,
@@ -367,6 +387,8 @@ class AsyncTtsClient:
367
387
 
368
388
  language : typing.Optional[SupportedLanguage]
369
389
 
390
+ generation_config : typing.Optional[GenerationConfigParams]
391
+
370
392
  duration : typing.Optional[float]
371
393
  The maximum duration of the audio in seconds. You do not usually need to specify this.
372
394
  If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
@@ -434,6 +456,9 @@ class AsyncTtsClient:
434
456
  "output_format": convert_and_respect_annotation_metadata(
435
457
  object_=output_format, annotation=SseOutputFormatParams, direction="write"
436
458
  ),
459
+ "generation_config": convert_and_respect_annotation_metadata(
460
+ object_=generation_config, annotation=GenerationConfigParams, direction="write"
461
+ ),
437
462
  "duration": duration,
438
463
  "speed": speed,
439
464
  "add_timestamps": add_timestamps,
@@ -2,6 +2,7 @@
2
2
 
3
3
  from .cancel_context_request import CancelContextRequestParams
4
4
  from .controls import ControlsParams
5
+ from .generation_config import GenerationConfigParams
5
6
  from .generation_request import GenerationRequestParams
6
7
  from .mp_3_output_format import Mp3OutputFormatParams
7
8
  from .output_format import OutputFormatParams, OutputFormat_Mp3Params, OutputFormat_RawParams, OutputFormat_WavParams
@@ -41,6 +42,7 @@ from .word_timestamps import WordTimestampsParams
41
42
  __all__ = [
42
43
  "CancelContextRequestParams",
43
44
  "ControlsParams",
45
+ "GenerationConfigParams",
44
46
  "GenerationRequestParams",
45
47
  "Mp3OutputFormatParams",
46
48
  "OutputFormatParams",
@@ -3,9 +3,9 @@
3
3
  import typing_extensions
4
4
  from .speed import SpeedParams
5
5
  import typing
6
- from ..types.emotion import Emotion
6
+ from ..types.emotion_deprecated import EmotionDeprecated
7
7
 
8
8
 
9
9
  class ControlsParams(typing_extensions.TypedDict):
10
10
  speed: SpeedParams
11
- emotion: typing.Sequence[Emotion]
11
+ emotion: typing.Sequence[EmotionDeprecated]
@@ -0,0 +1,26 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ import typing_extensions
5
+ from ..types.emotion import Emotion
6
+
7
+
8
+ class GenerationConfigParams(typing_extensions.TypedDict):
9
+ """
10
+ Configure the various attributes of the generated speech. These controls only for `sonic-3` and have no effect on earlier models.
11
+ """
12
+
13
+ volume: typing_extensions.NotRequired[float]
14
+ """
15
+ Adjust the volume of the generated speech between 0.5x and 2.0x the original volume (default is 1.0x). Valid values are between 0.5 and 2.0 inclusive.
16
+ """
17
+
18
+ speed: typing_extensions.NotRequired[float]
19
+ """
20
+ Adjust the speed of the generated speech between 0.6x and 1.5x the original speed (default is 1.0x). Valid values are between 0.6 and 1.5 inclusive.
21
+ """
22
+
23
+ emotion: typing_extensions.NotRequired[Emotion]
24
+ """
25
+ Guide the emotion of the generated speech.
26
+ """
@@ -6,6 +6,7 @@ from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
6
6
  import typing_extensions
7
7
  from ..types.supported_language import SupportedLanguage
8
8
  from .web_socket_raw_output_format import WebSocketRawOutputFormatParams
9
+ from .generation_config import GenerationConfigParams
9
10
  from ..types.model_speed import ModelSpeed
10
11
  from ..types.context_id import ContextId
11
12
  from ...core.serialization import FieldMetadata
@@ -25,6 +26,7 @@ class GenerationRequestParams(typing_extensions.TypedDict):
25
26
  voice: TtsRequestVoiceSpecifierParams
26
27
  language: typing_extensions.NotRequired[SupportedLanguage]
27
28
  output_format: WebSocketRawOutputFormatParams
29
+ generation_config: typing_extensions.NotRequired[GenerationConfigParams]
28
30
  duration: typing_extensions.NotRequired[float]
29
31
  """
30
32
  The maximum duration of the audio in seconds. You do not usually need to specify this.
@@ -5,6 +5,7 @@ from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
5
5
  import typing_extensions
6
6
  from ..types.supported_language import SupportedLanguage
7
7
  from .output_format import OutputFormatParams
8
+ from .generation_config import GenerationConfigParams
8
9
  from ..types.model_speed import ModelSpeed
9
10
 
10
11
 
@@ -18,6 +19,7 @@ class TtsRequestParams(typing_extensions.TypedDict):
18
19
  voice: TtsRequestVoiceSpecifierParams
19
20
  language: typing_extensions.NotRequired[SupportedLanguage]
20
21
  output_format: OutputFormatParams
22
+ generation_config: typing_extensions.NotRequired[GenerationConfigParams]
21
23
  duration: typing_extensions.NotRequired[float]
22
24
  """
23
25
  The maximum duration of the audio in seconds. You do not usually need to specify this.
@@ -5,6 +5,7 @@ from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
5
5
  import typing_extensions
6
6
  from ..types.supported_language import SupportedLanguage
7
7
  from .sse_output_format import SseOutputFormatParams
8
+ from .generation_config import GenerationConfigParams
8
9
  from ..types.model_speed import ModelSpeed
9
10
  from ..types.context_id import ContextId
10
11
 
@@ -19,6 +20,7 @@ class TtssseRequestParams(typing_extensions.TypedDict):
19
20
  voice: TtsRequestVoiceSpecifierParams
20
21
  language: typing_extensions.NotRequired[SupportedLanguage]
21
22
  output_format: SseOutputFormatParams
23
+ generation_config: typing_extensions.NotRequired[GenerationConfigParams]
22
24
  duration: typing_extensions.NotRequired[float]
23
25
  """
24
26
  The maximum duration of the audio in seconds. You do not usually need to specify this.
@@ -3,6 +3,7 @@
3
3
  import typing_extensions
4
4
  import typing_extensions
5
5
  from .output_format import OutputFormatParams
6
+ from .generation_config import GenerationConfigParams
6
7
  from .tts_request_voice_specifier import TtsRequestVoiceSpecifierParams
7
8
  from ...core.serialization import FieldMetadata
8
9
  from ..types.model_speed import ModelSpeed
@@ -15,6 +16,7 @@ class WebSocketTtsRequestParams(typing_extensions.TypedDict):
15
16
  """
16
17
 
17
18
  output_format: typing_extensions.NotRequired[OutputFormatParams]
19
+ generation_config: typing_extensions.NotRequired[GenerationConfigParams]
18
20
  transcript: typing_extensions.NotRequired[str]
19
21
  voice: TtsRequestVoiceSpecifierParams
20
22
  duration: typing_extensions.NotRequired[int]
@@ -4,7 +4,9 @@ from .cancel_context_request import CancelContextRequest
4
4
  from .context_id import ContextId
5
5
  from .controls import Controls
6
6
  from .emotion import Emotion
7
+ from .emotion_deprecated import EmotionDeprecated
7
8
  from .flush_id import FlushId
9
+ from .generation_config import GenerationConfig
8
10
  from .generation_request import GenerationRequest
9
11
  from .model_speed import ModelSpeed
10
12
  from .mp_3_output_format import Mp3OutputFormat
@@ -51,7 +53,9 @@ __all__ = [
51
53
  "ContextId",
52
54
  "Controls",
53
55
  "Emotion",
56
+ "EmotionDeprecated",
54
57
  "FlushId",
58
+ "GenerationConfig",
55
59
  "GenerationRequest",
56
60
  "ModelSpeed",
57
61
  "Mp3OutputFormat",
@@ -3,14 +3,14 @@
3
3
  from ...core.pydantic_utilities import UniversalBaseModel
4
4
  from .speed import Speed
5
5
  import typing
6
- from .emotion import Emotion
6
+ from .emotion_deprecated import EmotionDeprecated
7
7
  from ...core.pydantic_utilities import IS_PYDANTIC_V2
8
8
  import pydantic
9
9
 
10
10
 
11
11
  class Controls(UniversalBaseModel):
12
12
  speed: Speed
13
- emotion: typing.List[Emotion]
13
+ emotion: typing.List[EmotionDeprecated]
14
14
 
15
15
  if IS_PYDANTIC_V2:
16
16
  model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
@@ -1,34 +1,3 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
- import typing
4
-
5
- Emotion = typing.Union[
6
- typing.Literal[
7
- "anger:lowest",
8
- "anger:low",
9
- "anger",
10
- "anger:high",
11
- "anger:highest",
12
- "positivity:lowest",
13
- "positivity:low",
14
- "positivity",
15
- "positivity:high",
16
- "positivity:highest",
17
- "surprise:lowest",
18
- "surprise:low",
19
- "surprise",
20
- "surprise:high",
21
- "surprise:highest",
22
- "sadness:lowest",
23
- "sadness:low",
24
- "sadness",
25
- "sadness:high",
26
- "sadness:highest",
27
- "curiosity:lowest",
28
- "curiosity:low",
29
- "curiosity",
30
- "curiosity:high",
31
- "curiosity:highest",
32
- ],
33
- typing.Any,
34
- ]
3
+ Emotion = str
@@ -0,0 +1,34 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ EmotionDeprecated = typing.Union[
6
+ typing.Literal[
7
+ "anger:lowest",
8
+ "anger:low",
9
+ "anger",
10
+ "anger:high",
11
+ "anger:highest",
12
+ "positivity:lowest",
13
+ "positivity:low",
14
+ "positivity",
15
+ "positivity:high",
16
+ "positivity:highest",
17
+ "surprise:lowest",
18
+ "surprise:low",
19
+ "surprise",
20
+ "surprise:high",
21
+ "surprise:highest",
22
+ "sadness:lowest",
23
+ "sadness:low",
24
+ "sadness",
25
+ "sadness:high",
26
+ "sadness:highest",
27
+ "curiosity:lowest",
28
+ "curiosity:low",
29
+ "curiosity",
30
+ "curiosity:high",
31
+ "curiosity:highest",
32
+ ],
33
+ typing.Any,
34
+ ]
@@ -0,0 +1,37 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from ...core.pydantic_utilities import UniversalBaseModel
4
+ import typing
5
+ import pydantic
6
+ from .emotion import Emotion
7
+ from ...core.pydantic_utilities import IS_PYDANTIC_V2
8
+
9
+
10
+ class GenerationConfig(UniversalBaseModel):
11
+ """
12
+ Configure the various attributes of the generated speech. These controls only for `sonic-3` and have no effect on earlier models.
13
+ """
14
+
15
+ volume: typing.Optional[float] = pydantic.Field(default=None)
16
+ """
17
+ Adjust the volume of the generated speech between 0.5x and 2.0x the original volume (default is 1.0x). Valid values are between 0.5 and 2.0 inclusive.
18
+ """
19
+
20
+ speed: typing.Optional[float] = pydantic.Field(default=None)
21
+ """
22
+ Adjust the speed of the generated speech between 0.6x and 1.5x the original speed (default is 1.0x). Valid values are between 0.6 and 1.5 inclusive.
23
+ """
24
+
25
+ emotion: typing.Optional[Emotion] = pydantic.Field(default=None)
26
+ """
27
+ Guide the emotion of the generated speech.
28
+ """
29
+
30
+ if IS_PYDANTIC_V2:
31
+ model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
32
+ else:
33
+
34
+ class Config:
35
+ frozen = True
36
+ smart_union = True
37
+ extra = pydantic.Extra.allow
@@ -6,6 +6,7 @@ import typing
6
6
  from .tts_request_voice_specifier import TtsRequestVoiceSpecifier
7
7
  from .supported_language import SupportedLanguage
8
8
  from .web_socket_raw_output_format import WebSocketRawOutputFormat
9
+ from .generation_config import GenerationConfig
9
10
  from .model_speed import ModelSpeed
10
11
  from .context_id import ContextId
11
12
  import typing_extensions
@@ -27,6 +28,7 @@ class GenerationRequest(UniversalBaseModel):
27
28
  voice: TtsRequestVoiceSpecifier
28
29
  language: typing.Optional[SupportedLanguage] = None
29
30
  output_format: WebSocketRawOutputFormat
31
+ generation_config: typing.Optional[GenerationConfig] = None
30
32
  duration: typing.Optional[float] = pydantic.Field(default=None)
31
33
  """
32
34
  The maximum duration of the audio in seconds. You do not usually need to specify this.
@@ -6,6 +6,7 @@ from .tts_request_voice_specifier import TtsRequestVoiceSpecifier
6
6
  import typing
7
7
  from .supported_language import SupportedLanguage
8
8
  from .output_format import OutputFormat
9
+ from .generation_config import GenerationConfig
9
10
  from .model_speed import ModelSpeed
10
11
  from ...core.pydantic_utilities import IS_PYDANTIC_V2
11
12
 
@@ -20,6 +21,7 @@ class TtsRequest(UniversalBaseModel):
20
21
  voice: TtsRequestVoiceSpecifier
21
22
  language: typing.Optional[SupportedLanguage] = None
22
23
  output_format: OutputFormat
24
+ generation_config: typing.Optional[GenerationConfig] = None
23
25
  duration: typing.Optional[float] = pydantic.Field(default=None)
24
26
  """
25
27
  The maximum duration of the audio in seconds. You do not usually need to specify this.
@@ -6,6 +6,7 @@ from .tts_request_voice_specifier import TtsRequestVoiceSpecifier
6
6
  import typing
7
7
  from .supported_language import SupportedLanguage
8
8
  from .sse_output_format import SseOutputFormat
9
+ from .generation_config import GenerationConfig
9
10
  from .model_speed import ModelSpeed
10
11
  from .context_id import ContextId
11
12
  from ...core.pydantic_utilities import IS_PYDANTIC_V2
@@ -21,6 +22,7 @@ class TtssseRequest(UniversalBaseModel):
21
22
  voice: TtsRequestVoiceSpecifier
22
23
  language: typing.Optional[SupportedLanguage] = None
23
24
  output_format: SseOutputFormat
25
+ generation_config: typing.Optional[GenerationConfig] = None
24
26
  duration: typing.Optional[float] = pydantic.Field(default=None)
25
27
  """
26
28
  The maximum duration of the audio in seconds. You do not usually need to specify this.
@@ -4,6 +4,7 @@ from ...core.pydantic_utilities import UniversalBaseModel
4
4
  import pydantic
5
5
  import typing
6
6
  from .output_format import OutputFormat
7
+ from .generation_config import GenerationConfig
7
8
  from .tts_request_voice_specifier import TtsRequestVoiceSpecifier
8
9
  import typing_extensions
9
10
  from ...core.serialization import FieldMetadata
@@ -18,6 +19,7 @@ class WebSocketTtsRequest(UniversalBaseModel):
18
19
  """
19
20
 
20
21
  output_format: typing.Optional[OutputFormat] = None
22
+ generation_config: typing.Optional[GenerationConfig] = None
21
23
  transcript: typing.Optional[str] = None
22
24
  voice: TtsRequestVoiceSpecifier
23
25
  duration: typing.Optional[int] = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cartesia
3
- Version: 2.0.11
3
+ Version: 2.0.14
4
4
  Summary:
5
5
  Requires-Python: >=3.8,<4.0
6
6
  Classifier: Intended Audience :: Developers
@@ -53,26 +53,36 @@ Instantiate and use the client with the following:
53
53
 
54
54
  ```python
55
55
  from cartesia import Cartesia
56
- from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
57
56
  import os
58
57
 
59
58
  client = Cartesia(
60
- api_key=os.getenv("CARTESIA_API_KEY"),
61
- )
62
- client.tts.bytes(
63
- model_id="sonic-2",
64
- transcript="Hello, world!",
65
- voice={
66
- "mode": "id",
67
- "id": "694f9389-aac1-45b6-b726-9d9369183238",
68
- },
69
- language="en",
70
- output_format={
71
- "container": "raw",
72
- "sample_rate": 44100,
73
- "encoding": "pcm_f32le",
74
- },
59
+ api_key=os.environ["CARTESIA_API_KEY"],
75
60
  )
61
+
62
+
63
+ def main():
64
+ with open("sonic.wav", "wb") as f:
65
+ bytes_iter = client.tts.bytes(
66
+ model_id="sonic-3",
67
+ transcript="Hello, world!",
68
+ voice={
69
+ "mode": "id",
70
+ "id": "6ccbfb76-1fc6-48f7-b71d-91ac6298247b",
71
+ },
72
+ language="en",
73
+ output_format={
74
+ "container": "wav",
75
+ "sample_rate": 44100,
76
+ "encoding": "pcm_f32le",
77
+ },
78
+ )
79
+
80
+ for chunk in bytes_iter:
81
+ f.write(chunk)
82
+
83
+
84
+ if __name__ == "__main__":
85
+ main()
76
86
  ```
77
87
 
78
88
  ## Async Client
@@ -81,31 +91,37 @@ The SDK also exports an `async` client so that you can make non-blocking calls t
81
91
 
82
92
  ```python
83
93
  import asyncio
84
- import os
85
-
86
94
  from cartesia import AsyncCartesia
87
- from cartesia.tts import OutputFormat_Raw, TtsRequestIdSpecifier
95
+ import os
88
96
 
89
97
  client = AsyncCartesia(
90
- api_key=os.getenv("CARTESIA_API_KEY"),
98
+ api_key=os.environ["CARTESIA_API_KEY"],
91
99
  )
92
100
 
93
- async def main() -> None:
94
- async for output in client.tts.bytes(
95
- model_id="sonic-2",
96
- transcript="Hello, world!",
97
- voice={"id": "694f9389-aac1-45b6-b726-9d9369183238"},
98
- language="en",
99
- output_format={
100
- "container": "raw",
101
- "sample_rate": 44100,
102
- "encoding": "pcm_f32le",
103
- },
104
- ):
105
- print(f"Received chunk of size: {len(output)}")
106
101
 
102
+ async def main():
103
+ with open("sonic.wav", "wb") as f:
104
+ bytes_iter = client.tts.bytes(
105
+ model_id="sonic-3",
106
+ transcript="Hello, world!",
107
+ voice={
108
+ "mode": "id",
109
+ "id": "6ccbfb76-1fc6-48f7-b71d-91ac6298247b",
110
+ },
111
+ language="en",
112
+ output_format={
113
+ "container": "wav",
114
+ "sample_rate": 44100,
115
+ "encoding": "pcm_f32le",
116
+ },
117
+ )
118
+
119
+ async for chunk in bytes_iter:
120
+ f.write(chunk)
107
121
 
108
- asyncio.run(main())
122
+
123
+ if __name__ == "__main__":
124
+ asyncio.run(main())
109
125
  ```
110
126
 
111
127
  ## Exception Handling
@@ -129,7 +145,6 @@ The SDK supports streaming responses as well, returning a generator that you can
129
145
 
130
146
  ```python
131
147
  from cartesia import Cartesia
132
- from cartesia.tts import Controls, OutputFormat_RawParams, TtsRequestIdSpecifierParams
133
148
  import os
134
149
 
135
150
  def get_tts_chunks():
@@ -137,14 +152,11 @@ def get_tts_chunks():
137
152
  api_key=os.getenv("CARTESIA_API_KEY"),
138
153
  )
139
154
  response = client.tts.sse(
140
- model_id="sonic-2",
155
+ model_id="sonic-3",
141
156
  transcript="Hello world!",
142
157
  voice={
158
+ "mode": "id",
143
159
  "id": "f9836c6e-a0bd-460e-9d3c-f7299fa60f94",
144
- "experimental_controls": {
145
- "speed": "normal",
146
- "emotion": [],
147
- },
148
160
  },
149
161
  language="en",
150
162
  output_format={
@@ -188,9 +200,9 @@ ws = client.tts.websocket()
188
200
 
189
201
  # Generate and stream audio using the websocket
190
202
  for output in ws.send(
191
- model_id="sonic-2", # see: https://docs.cartesia.ai/getting-started/available-models
203
+ model_id="sonic-3", # see: https://docs.cartesia.ai/build-with-cartesia/tts-models
192
204
  transcript=transcript,
193
- voice={"id": voice_id},
205
+ voice={"mode": "id", "id": voice_id},
194
206
  stream=True,
195
207
  output_format={
196
208
  "container": "raw",
@@ -252,7 +264,7 @@ ws.send("done")
252
264
  for result in ws.receive():
253
265
  if result['type'] == 'transcript':
254
266
  print(f"Transcription: {result['text']}")
255
-
267
+
256
268
  # Handle word-level timestamps if available
257
269
  if 'words' in result and result['words']:
258
270
  print("Word-level timestamps:")
@@ -261,7 +273,7 @@ for result in ws.receive():
261
273
  start = word_info['start']
262
274
  end = word_info['end']
263
275
  print(f" '{word}': {start:.2f}s - {end:.2f}s")
264
-
276
+
265
277
  if result['is_final']:
266
278
  print("Final result received")
267
279
  elif result['type'] == 'done':
@@ -286,7 +298,7 @@ async def streaming_stt_example():
286
298
  and demonstrates the new endpointing and word timestamp features.
287
299
  """
288
300
  client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
289
-
301
+
290
302
  try:
291
303
  # Create websocket connection with voice activity detection
292
304
  ws = await client.stt.websocket(
@@ -297,24 +309,24 @@ async def streaming_stt_example():
297
309
  min_volume=0.15, # Volume threshold for voice activity detection
298
310
  max_silence_duration_secs=0.3, # Maximum silence duration before endpointing
299
311
  )
300
-
312
+
301
313
  # Simulate streaming audio data (replace with your audio source)
302
314
  async def audio_stream():
303
315
  """Simulate real-time audio streaming - replace with actual audio capture"""
304
316
  # Load audio file for simulation
305
317
  with open("path/to/audio.wav", "rb") as f:
306
318
  audio_data = f.read()
307
-
319
+
308
320
  # Stream in 100ms chunks (realistic for real-time processing)
309
321
  chunk_size = int(16000 * 0.1 * 2) # 100ms at 16kHz, 16-bit
310
-
322
+
311
323
  for i in range(0, len(audio_data), chunk_size):
312
324
  chunk = audio_data[i:i + chunk_size]
313
325
  if chunk:
314
326
  yield chunk
315
327
  # Simulate real-time streaming delay
316
328
  await asyncio.sleep(0.1)
317
-
329
+
318
330
  # Send audio and receive results concurrently
319
331
  async def send_audio():
320
332
  """Send audio chunks to the STT websocket"""
@@ -324,31 +336,31 @@ async def streaming_stt_example():
324
336
  print(f"Sent audio chunk of {len(chunk)} bytes")
325
337
  # Small delay to simulate realtime applications
326
338
  await asyncio.sleep(0.02)
327
-
339
+
328
340
  # Signal end of audio stream
329
341
  await ws.send("finalize")
330
342
  await ws.send("done")
331
343
  print("Audio streaming completed")
332
-
344
+
333
345
  except Exception as e:
334
346
  print(f"Error sending audio: {e}")
335
-
347
+
336
348
  async def receive_transcripts():
337
349
  """Receive and process transcription results with word timestamps"""
338
350
  full_transcript = ""
339
351
  all_word_timestamps = []
340
-
352
+
341
353
  try:
342
354
  async for result in ws.receive():
343
355
  if result['type'] == 'transcript':
344
356
  text = result['text']
345
357
  is_final = result['is_final']
346
-
358
+
347
359
  # Handle word-level timestamps
348
360
  if 'words' in result and result['words']:
349
361
  word_timestamps = result['words']
350
362
  all_word_timestamps.extend(word_timestamps)
351
-
363
+
352
364
  if is_final:
353
365
  print("Word-level timestamps:")
354
366
  for word_info in word_timestamps:
@@ -356,7 +368,7 @@ async def streaming_stt_example():
356
368
  start = word_info['start']
357
369
  end = word_info['end']
358
370
  print(f" '{word}': {start:.2f}s - {end:.2f}s")
359
-
371
+
360
372
  if is_final:
361
373
  # Final result - this text won't change
362
374
  full_transcript += text + " "
@@ -364,30 +376,30 @@ async def streaming_stt_example():
364
376
  else:
365
377
  # Partial result - may change as more audio is processed
366
378
  print(f"PARTIAL: {text}")
367
-
379
+
368
380
  elif result['type'] == 'done':
369
381
  print("Transcription completed")
370
382
  break
371
-
383
+
372
384
  except Exception as e:
373
385
  print(f"Error receiving transcripts: {e}")
374
-
386
+
375
387
  return full_transcript.strip(), all_word_timestamps
376
-
388
+
377
389
  print("Starting streaming STT...")
378
-
390
+
379
391
  # Use asyncio.gather to run audio sending and transcript receiving concurrently
380
392
  _, (final_transcript, word_timestamps) = await asyncio.gather(
381
393
  send_audio(),
382
394
  receive_transcripts()
383
395
  )
384
-
396
+
385
397
  print(f"\nComplete transcript: {final_transcript}")
386
398
  print(f"Total words with timestamps: {len(word_timestamps)}")
387
-
399
+
388
400
  # Clean up
389
401
  await ws.close()
390
-
402
+
391
403
  except Exception as e:
392
404
  print(f"STT streaming error: {e}")
393
405
  finally:
@@ -442,7 +454,7 @@ import os
442
454
 
443
455
  async def transcribe_file():
444
456
  client = AsyncCartesia(api_key=os.getenv("CARTESIA_API_KEY"))
445
-
457
+
446
458
  with open("path/to/audio.wav", "rb") as audio_file:
447
459
  response = await client.stt.transcribe(
448
460
  file=audio_file,
@@ -450,14 +462,14 @@ async def transcribe_file():
450
462
  language="en",
451
463
  timestamp_granularities=["word"],
452
464
  )
453
-
465
+
454
466
  print(f"Transcribed text: {response.text}")
455
-
467
+
456
468
  # Process word timestamps
457
469
  if response.words:
458
470
  for word_info in response.words:
459
471
  print(f"'{word_info.word}': {word_info.start:.2f}s - {word_info.end:.2f}s")
460
-
472
+
461
473
  await client.close()
462
474
 
463
475
  asyncio.run(transcribe_file())
@@ -664,6 +676,3 @@ $ git commit --amend -m "manually regenerate from docs" # optional
664
676
 
665
677
  From https://github.com/cartesia-ai/docs click `Actions` then `Release Python SDK`. (Requires permissions.)
666
678
 
667
-
668
-
669
-
@@ -1,4 +1,4 @@
1
- cartesia/__init__.py,sha256=P8YXd1NsmEHQOF4p0MpPMGLOSy_0cIPHOnFe-iV94oU,10311
1
+ cartesia/__init__.py,sha256=bANRu5PeAnbf6O7MXltmngXPJa_G-xo7mck3sZy9B_Y,10463
2
2
  cartesia/api_status/__init__.py,sha256=_dHNLdknrBjxHtU2PvLumttJM-JTQhJQqhhAQkLqt_U,168
3
3
  cartesia/api_status/client.py,sha256=GJ9Dq8iCn3hn8vCIqc6k1fCGEhSz0T0kaPGcdFnbMDY,3146
4
4
  cartesia/api_status/requests/__init__.py,sha256=ilEMzEy1JEw484CuL92bX5lHGOznc62pjiDMgiZ0tKM,130
@@ -19,7 +19,7 @@ cartesia/base_client.py,sha256=igAZOMDXz2Nv58oXHa7I9UfgxVN48drqhEmfsCCQlg8,6701
19
19
  cartesia/client.py,sha256=LoJjlJW2kJA-hyDt-Wu7QuKQsiTiLQfLYZjsjtewPJM,6537
20
20
  cartesia/core/__init__.py,sha256=-t9txgeQZL_1FDw_08GEoj4ft1Cn9Dti6X0Drsadlr0,1519
21
21
  cartesia/core/api_error.py,sha256=RE8LELok2QCjABadECTvtDp7qejA1VmINCh6TbqPwSE,426
22
- cartesia/core/client_wrapper.py,sha256=Iww9Ft7JDV4a7w9mcWLjAOOkzJGRyla5eLGeQWtm0pQ,1855
22
+ cartesia/core/client_wrapper.py,sha256=0KSb2fMEKPh62KA7qLmVkQjJ20sJzkJEeU8QQqwh2Sk,1855
23
23
  cartesia/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
24
24
  cartesia/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
25
25
  cartesia/core/http_client.py,sha256=KL5RGa0y4n8nX0-07WRg4ZQUTq30sc-XJbWcP5vjBDg,19552
@@ -74,25 +74,26 @@ cartesia/stt/types/timestamp_granularity.py,sha256=Oe39JvLeMgR2BIJnx32abhvs05dJe
74
74
  cartesia/stt/types/transcript_message.py,sha256=J-MchlahI96nVBiMSLJrEOXFw2pBShbMXVocysQRnrY,1693
75
75
  cartesia/stt/types/transcription_response.py,sha256=QMcD6eLmp_Z2uaRLVyxYYIdoiRiVSGhBoxN3kjRTK2I,1190
76
76
  cartesia/stt/types/transcription_word.py,sha256=yxTndKXNmToPOM6_F_QfF-B0dE6Kx8-UwBpHLj2_zWk,803
77
- cartesia/tts/__init__.py,sha256=DwNzIilOcdNUbeIHIknngnW8WyZ6K5xZremSQQoo5VM,4927
78
- cartesia/tts/_async_websocket.py,sha256=YG0NJpfQU4j48Gy2riWu1ItelPFX-IUvSFD6eMBvfGM,19454
79
- cartesia/tts/_websocket.py,sha256=K93vHOdxhF4-Duk8xunNnIpvkAT_ztfAtaomD5im8c0,19247
80
- cartesia/tts/client.py,sha256=Oot_ctyaqBgRMpyBUaMwh3z1M62oPKVMXNvMkmo1fRw,18180
81
- cartesia/tts/requests/__init__.py,sha256=SeITRF5QSAjOE5pNxbD6VffwwttMnQwuv0Z5n9h7BKs,3418
77
+ cartesia/tts/__init__.py,sha256=KmlvJPusv7yRI3OmkEn3GlxqITbfewLVO5S0SJkqV5M,5079
78
+ cartesia/tts/_async_websocket.py,sha256=zcLNLWT7ndf1izMxKLkcyMBNM_nCcW5Nt14e0Z3DOEM,19754
79
+ cartesia/tts/_websocket.py,sha256=A9jTgP_PJhr9JMFk91ZlBQSrh9JSnoDzzwlhnuAuOTE,19546
80
+ cartesia/tts/client.py,sha256=mJXJG9JliWHw7UYCCd9evIW3gaSr3JYORW606E8lHzU,19607
81
+ cartesia/tts/requests/__init__.py,sha256=zS0ny3c6HXr2l6D9TiBmMuyp-tbVuszhBiOQ5RdcQyw,3502
82
82
  cartesia/tts/requests/cancel_context_request.py,sha256=Wl8g-o5vwl9ENm-H1wsLx441FkIR_4Wt5UYtuWce2Yw,431
83
- cartesia/tts/requests/controls.py,sha256=xzUJlfgqhaJ1A-JD0LTpoHYk4iEpCuGpSD7qE4YYsRg,285
84
- cartesia/tts/requests/generation_request.py,sha256=JQPumk0UMCHDQrcUvuqeDsdc8LCJAEolSs10LpJzK00,3083
83
+ cartesia/tts/requests/controls.py,sha256=TkywdstN4X9odGF_HfN25zYXcCxaJS8Q0H1HR0nv_rg,316
84
+ cartesia/tts/requests/generation_config.py,sha256=ZFed-oneBwyxkkI1DHmmvtYso7FjTYM01ApS1omr1ms,917
85
+ cartesia/tts/requests/generation_request.py,sha256=rZVpfwUzSea72b5gqPY47Fgunu_IJQM2PiVNHqCR9Jk,3214
85
86
  cartesia/tts/requests/mp_3_output_format.py,sha256=HBM6452KdWD9tGa9QXNyUZcH1OlJrXt_PIwo2Jt3l2Q,441
86
87
  cartesia/tts/requests/output_format.py,sha256=8TKu9AAeHCR5L4edzYch8FIYIldn4bM7ySrsCl8W_g8,842
87
88
  cartesia/tts/requests/phoneme_timestamps.py,sha256=ft81nmqElZAnvTBT27lY6YWfF18ZGsCx3Y1XHv9J7cM,267
88
89
  cartesia/tts/requests/raw_output_format.py,sha256=WigDQlM_YkLk_-GK1_pNseGq8g-_POO84Su7jqSLsHQ,441
89
90
  cartesia/tts/requests/speed.py,sha256=-YGBWwh7_VtCBnYlT5EVsnrmcHFMEBTxy9LathZhkMA,259
90
91
  cartesia/tts/requests/sse_output_format.py,sha256=dsRyxFCD3Qt3hTppxV7HJhphPx3jTkZhryMXUP-Soc8,417
91
- cartesia/tts/requests/tts_request.py,sha256=KBoahYfPbDENlEWsqnR4z1ZIhGIJwhLrzQIzkbtqtzE,1021
92
+ cartesia/tts/requests/tts_request.py,sha256=CUFMg_U2BhJQAxrqLAv4tfxAN326ItiCi0fQfJFi4lU,1152
92
93
  cartesia/tts/requests/tts_request_embedding_specifier.py,sha256=-M54ZjV0H5LPwcKtz0bOVqlkvO1pPiMbqMbVBMko3Ns,565
93
94
  cartesia/tts/requests/tts_request_id_specifier.py,sha256=-0ClfyJnnaH0uAcF5r84s3cM_cw2wT39dp6T4JYzOQ8,536
94
95
  cartesia/tts/requests/tts_request_voice_specifier.py,sha256=eGzL4aVGq4gKPxeglsV7-wuhxg8x33Qth3uFTTytgeI,337
95
- cartesia/tts/requests/ttssse_request.py,sha256=S8EkuEtveOetkcydinfLr5lS66PYpLQTNesyRIf_LwI,2007
96
+ cartesia/tts/requests/ttssse_request.py,sha256=IZ4Urm23VQBhuJmA8CqZegZnTVIBqfZWQ9ve2vy2gXc,2138
96
97
  cartesia/tts/requests/wav_output_format.py,sha256=qiipmT5hWsa8J-fwW1EH_rnUAX_zOUpGJUNzuLc65r4,181
97
98
  cartesia/tts/requests/web_socket_base_response.py,sha256=zCjHw-FaNJMOcHiAb2NQWrBBfrzU5rc95vqDp7y9RmA,315
98
99
  cartesia/tts/requests/web_socket_chunk_response.py,sha256=4fVPJH-ZZb8lJKwqyYGx5wyeYWzfuThGxMRXC6ku4bA,233
@@ -106,16 +107,18 @@ cartesia/tts/requests/web_socket_response.py,sha256=kS46YN94ilUn4qjpt1TpauZApe0N
106
107
  cartesia/tts/requests/web_socket_stream_options.py,sha256=VIvblFw9hGZvDzFpOnC11G0NvrFSVt-1-0sY5rpcZPI,232
107
108
  cartesia/tts/requests/web_socket_timestamps_response.py,sha256=MK3zN2Q_PVWJtX5DidNB0uXoF2o33rv6qCYPVaourxY,351
108
109
  cartesia/tts/requests/web_socket_tts_output.py,sha256=pX2uf0XVdziFhXCydwLlVOWb-LvBiuq-cBI6R1INiMg,760
109
- cartesia/tts/requests/web_socket_tts_request.py,sha256=1jdRjRAO7z-KLOyp8FcDoQh933RGt-ZPR3E8Vz3XPnQ,1795
110
+ cartesia/tts/requests/web_socket_tts_request.py,sha256=9IqZKwM8YSDoDqYNPQ6DrcRGfuaAExD0KIPC0Ptaq1U,1926
110
111
  cartesia/tts/requests/word_timestamps.py,sha256=WMfBJtETi6wTpES0pYZCFfFRfEbzWE-RtosDJ5seUWg,261
111
112
  cartesia/tts/socket_client.py,sha256=zTPayHbgy-yQQ50AE1HXN4GMyanisZcLXf7Ds1paYks,11621
112
- cartesia/tts/types/__init__.py,sha256=rXphJ9b9nSYYrepr2ssG6ghtQAOQBQcLegxbl-XG3tw,3438
113
+ cartesia/tts/types/__init__.py,sha256=VsVhynuJM_G3zHAzkAtB8M6eK_tq0Pa76FOAiulbRBc,3585
113
114
  cartesia/tts/types/cancel_context_request.py,sha256=zInhk3qRZsSc0F1aYJ-Q5BHJsosTrb22IJWhzue-eKE,856
114
115
  cartesia/tts/types/context_id.py,sha256=UCEtq5xFGOeBCECcY6Y-gYVe_Peg1hFhH9YYOkpApQg,81
115
- cartesia/tts/types/controls.py,sha256=H4CSu79mM1Ld4NZx_5uXw3EwRzTEMQRxKBRvFpcFb8Y,644
116
- cartesia/tts/types/emotion.py,sha256=zocyDcHTiFFnNRgo2YLMi70iGyffa080B4mkg9lcqVc,764
116
+ cartesia/tts/types/controls.py,sha256=SxeSPZ4KgvRiUawOUI9mycASv6ekQ11vZYKOMtZz5TU,675
117
+ cartesia/tts/types/emotion.py,sha256=N5E5Tf7L9tHcH-MB5fDPEFusotygu85ybEc-YeslVjc,79
118
+ cartesia/tts/types/emotion_deprecated.py,sha256=WQuI5pXbzgpNq4kT14NMfukCJPN58GbmTtPScMMLy4I,774
117
119
  cartesia/tts/types/flush_id.py,sha256=HCIKo9o8d7YWKtaSNU3TEvfUVBju93ckGQy01Z9wLcE,79
118
- cartesia/tts/types/generation_request.py,sha256=ZGVXmHZLaZg7kEg1cVGXLpr8uB3btr2eZt0NEJRZnSU,3582
120
+ cartesia/tts/types/generation_config.py,sha256=lIb52e8Ua777uvFnFTYn1NghxpzSTMC4QmDlV1cturU,1332
121
+ cartesia/tts/types/generation_request.py,sha256=qO7XKzvwIp8Foglv5_1DJL1pCZLVyea0fQ0oKJw0fGw,3694
119
122
  cartesia/tts/types/model_speed.py,sha256=iiTj8V0piFCX2FZh5B8EkgRhZDlj4z3VFcQhp66e7y8,160
120
123
  cartesia/tts/types/mp_3_output_format.py,sha256=LQ1-rEYjkK6XXWoj_Z7bezsguPpNI_SmprlIipsyNMI,875
121
124
  cartesia/tts/types/natural_specifier.py,sha256=K526P1RRuBGy80hyd_tX8tohPrE8DR9EgTCxS5wce0o,188
@@ -127,11 +130,11 @@ cartesia/tts/types/raw_output_format.py,sha256=ir5QxW986P8qB14pMD5PVsAgc0bdC37i7
127
130
  cartesia/tts/types/speed.py,sha256=4c5WdxocBw6WSMnundSaNnceUeooU0vikhy00FW6M-w,239
128
131
  cartesia/tts/types/sse_output_format.py,sha256=1_GB3rftQYAsXO6WrgQmzr-tsjCntHCVgKeTjay7M9g,819
129
132
  cartesia/tts/types/supported_language.py,sha256=riDRduThMbMWAq9i2uCfxhwVTpgaFwNDZ9LhEIl4zHY,237
130
- cartesia/tts/types/tts_request.py,sha256=FGcxW-siiQpEzJZSHMET3nDSYHSzRt3WSTO-cCEz9u4,1376
133
+ cartesia/tts/types/tts_request.py,sha256=TkngMxyGKnjQvIG5u4qFx9TKcohuLyjI1UeXv8xbj2U,1488
131
134
  cartesia/tts/types/tts_request_embedding_specifier.py,sha256=eL_qCEr4pvWfy4qp9hZBuVdCincX5DBVqfv1vLt2_Vk,942
132
135
  cartesia/tts/types/tts_request_id_specifier.py,sha256=ktGdkkTRQ9scA-lt8qJ2jn_E5WzoOK8AXMrVqi71gf0,906
133
136
  cartesia/tts/types/tts_request_voice_specifier.py,sha256=p-3UQ62uFL1SgbX73Ex1D_V73Ef0wmT1ApOt1iLZmwE,307
134
- cartesia/tts/types/ttssse_request.py,sha256=6KvDQYzetwbFOVvkMWDj94Biz08EZaiX6V1lChsy49U,2423
137
+ cartesia/tts/types/ttssse_request.py,sha256=QZa0LOwhtsxLFaTxCGA0EzMOYqp7tVu-ezmu-ibcmiA,2535
135
138
  cartesia/tts/types/wav_output_format.py,sha256=OTAgVn_gBMk252XO12kiNI9lKrbw3n38aBAiqlG5mdU,531
136
139
  cartesia/tts/types/web_socket_base_response.py,sha256=MWoTt1rGRqUQ8BOad1Zk2SA-i0E8a3JwPLSiehIbFj4,672
137
140
  cartesia/tts/types/web_socket_chunk_response.py,sha256=VOPXAlyGFdnfC69KxqDWDo1PPMydvQKmAypoWfbW8_s,593
@@ -145,7 +148,7 @@ cartesia/tts/types/web_socket_response.py,sha256=fUQbJ6yFzZbzUZPuQWgkFdzP8-FMiKT
145
148
  cartesia/tts/types/web_socket_stream_options.py,sha256=MhDSxBFqMuQeWjoyPqXVnTEzLjF8g6aojeigb5dQUgU,596
146
149
  cartesia/tts/types/web_socket_timestamps_response.py,sha256=kuWXI82ncF1QapnaHEjwrL84qWob7ByQU-yh1e0IEmk,667
147
150
  cartesia/tts/types/web_socket_tts_output.py,sha256=uvkv0smTBhdm18Rl17C0Ml4Inh79YBHNzAcKnZBs14Y,979
148
- cartesia/tts/types/web_socket_tts_request.py,sha256=Gx8kSINX__VhQ3In3R1-4fq0bfjaMe7iL-M8nDNt0fQ,2150
151
+ cartesia/tts/types/web_socket_tts_request.py,sha256=mBVFoOdZDlxm2cQbmPTHgQjENfM4xhm_DywlTm5OtGI,2262
149
152
  cartesia/tts/types/word_timestamps.py,sha256=XZ2Q0prdb3F9c3AiOKXu4s3A3jBxE-qIt1npHOf16R0,631
150
153
  cartesia/tts/utils/constants.py,sha256=1CHa5flJf8--L_eYyOyOiWJNZ-Q81ufHZxDbJs8xYSk,418
151
154
  cartesia/tts/utils/tts.py,sha256=u7PgPxlJs6fcQTfr-jqAvBCAaK3JWLhF5QF4s-PwoMo,2093
@@ -198,7 +201,7 @@ cartesia/voices/types/voice_expand_options.py,sha256=e4FroWdlxEE-LXQfT1RWlGHtswl
198
201
  cartesia/voices/types/voice_id.py,sha256=GDoXcRVeIm-V21R4suxG2zqLD3DLYkXE9kgizadzFKo,79
199
202
  cartesia/voices/types/voice_metadata.py,sha256=4KNGjXMUKm3niv-NvKIFVGtiilpH13heuzKcZYNQxk4,1181
200
203
  cartesia/voices/types/weight.py,sha256=XqDU7_JItNUb5QykIDqTbELlRYQdbt2SviRgW0w2LKo,80
201
- cartesia-2.0.11.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
202
- cartesia-2.0.11.dist-info/METADATA,sha256=gnwMfy2FMzDo87EdMntSM9RCPZB5a4c67ItBNT98EKg,20830
203
- cartesia-2.0.11.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
204
- cartesia-2.0.11.dist-info/RECORD,,
204
+ cartesia-2.0.14.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
205
+ cartesia-2.0.14.dist-info/METADATA,sha256=PVHHNLFx-PEOKbszJkawRJ6xxooIpJpxGNRM8wtBa8k,20671
206
+ cartesia-2.0.14.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
207
+ cartesia-2.0.14.dist-info/RECORD,,