cartesia 1.4.0__py3-none-any.whl → 2.0.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cartesia/__init__.py +292 -3
- cartesia/api_status/__init__.py +6 -0
- cartesia/api_status/client.py +104 -0
- cartesia/api_status/requests/__init__.py +5 -0
- cartesia/api_status/requests/api_info.py +8 -0
- cartesia/api_status/types/__init__.py +5 -0
- cartesia/api_status/types/api_info.py +20 -0
- cartesia/base_client.py +160 -0
- cartesia/client.py +163 -40
- cartesia/core/__init__.py +47 -0
- cartesia/core/api_error.py +15 -0
- cartesia/core/client_wrapper.py +55 -0
- cartesia/core/datetime_utils.py +28 -0
- cartesia/core/file.py +67 -0
- cartesia/core/http_client.py +499 -0
- cartesia/core/jsonable_encoder.py +101 -0
- cartesia/core/pydantic_utilities.py +296 -0
- cartesia/core/query_encoder.py +58 -0
- cartesia/core/remove_none_from_dict.py +11 -0
- cartesia/core/request_options.py +35 -0
- cartesia/core/serialization.py +272 -0
- cartesia/datasets/__init__.py +24 -0
- cartesia/datasets/client.py +392 -0
- cartesia/datasets/requests/__init__.py +15 -0
- cartesia/datasets/requests/create_dataset_request.py +7 -0
- cartesia/datasets/requests/dataset.py +9 -0
- cartesia/datasets/requests/dataset_file.py +9 -0
- cartesia/datasets/requests/paginated_dataset_files.py +10 -0
- cartesia/datasets/requests/paginated_datasets.py +10 -0
- cartesia/datasets/types/__init__.py +17 -0
- cartesia/datasets/types/create_dataset_request.py +19 -0
- cartesia/datasets/types/dataset.py +21 -0
- cartesia/datasets/types/dataset_file.py +21 -0
- cartesia/datasets/types/file_purpose.py +5 -0
- cartesia/datasets/types/paginated_dataset_files.py +21 -0
- cartesia/datasets/types/paginated_datasets.py +21 -0
- cartesia/embedding/__init__.py +5 -0
- cartesia/embedding/types/__init__.py +5 -0
- cartesia/embedding/types/embedding.py +201 -0
- cartesia/environment.py +7 -0
- cartesia/infill/__init__.py +2 -0
- cartesia/infill/client.py +318 -0
- cartesia/tts/__init__.py +167 -0
- cartesia/{_async_websocket.py → tts/_async_websocket.py} +159 -84
- cartesia/tts/_websocket.py +430 -0
- cartesia/tts/client.py +407 -0
- cartesia/tts/requests/__init__.py +76 -0
- cartesia/tts/requests/cancel_context_request.py +17 -0
- cartesia/tts/requests/controls.py +11 -0
- cartesia/tts/requests/generation_request.py +53 -0
- cartesia/tts/requests/mp_3_output_format.py +11 -0
- cartesia/tts/requests/output_format.py +30 -0
- cartesia/tts/requests/phoneme_timestamps.py +10 -0
- cartesia/tts/requests/raw_output_format.py +11 -0
- cartesia/tts/requests/speed.py +7 -0
- cartesia/tts/requests/tts_request.py +24 -0
- cartesia/tts/requests/tts_request_embedding_specifier.py +16 -0
- cartesia/tts/requests/tts_request_id_specifier.py +16 -0
- cartesia/tts/requests/tts_request_voice_specifier.py +7 -0
- cartesia/tts/requests/wav_output_format.py +7 -0
- cartesia/tts/requests/web_socket_base_response.py +11 -0
- cartesia/tts/requests/web_socket_chunk_response.py +8 -0
- cartesia/tts/requests/web_socket_done_response.py +7 -0
- cartesia/tts/requests/web_socket_error_response.py +7 -0
- cartesia/tts/requests/web_socket_flush_done_response.py +9 -0
- cartesia/tts/requests/web_socket_phoneme_timestamps_response.py +9 -0
- cartesia/tts/requests/web_socket_raw_output_format.py +11 -0
- cartesia/tts/requests/web_socket_request.py +7 -0
- cartesia/tts/requests/web_socket_response.py +69 -0
- cartesia/tts/requests/web_socket_stream_options.py +8 -0
- cartesia/tts/requests/web_socket_timestamps_response.py +9 -0
- cartesia/tts/requests/web_socket_tts_output.py +18 -0
- cartesia/tts/requests/web_socket_tts_request.py +24 -0
- cartesia/tts/requests/word_timestamps.py +10 -0
- cartesia/tts/socket_client.py +302 -0
- cartesia/tts/types/__init__.py +90 -0
- cartesia/tts/types/cancel_context_request.py +28 -0
- cartesia/tts/types/context_id.py +3 -0
- cartesia/tts/types/controls.py +22 -0
- cartesia/tts/types/emotion.py +29 -0
- cartesia/tts/types/flush_id.py +3 -0
- cartesia/tts/types/generation_request.py +66 -0
- cartesia/tts/types/mp_3_output_format.py +23 -0
- cartesia/tts/types/natural_specifier.py +5 -0
- cartesia/tts/types/numerical_specifier.py +3 -0
- cartesia/tts/types/output_format.py +58 -0
- cartesia/tts/types/phoneme_timestamps.py +21 -0
- cartesia/tts/types/raw_encoding.py +5 -0
- cartesia/tts/types/raw_output_format.py +22 -0
- cartesia/tts/types/speed.py +7 -0
- cartesia/tts/types/supported_language.py +7 -0
- cartesia/tts/types/tts_request.py +35 -0
- cartesia/tts/types/tts_request_embedding_specifier.py +27 -0
- cartesia/tts/types/tts_request_id_specifier.py +27 -0
- cartesia/tts/types/tts_request_voice_specifier.py +7 -0
- cartesia/tts/types/wav_output_format.py +17 -0
- cartesia/tts/types/web_socket_base_response.py +22 -0
- cartesia/tts/types/web_socket_chunk_response.py +20 -0
- cartesia/tts/types/web_socket_done_response.py +17 -0
- cartesia/tts/types/web_socket_error_response.py +19 -0
- cartesia/tts/types/web_socket_flush_done_response.py +21 -0
- cartesia/tts/types/web_socket_phoneme_timestamps_response.py +20 -0
- cartesia/tts/types/web_socket_raw_output_format.py +22 -0
- cartesia/tts/types/web_socket_request.py +7 -0
- cartesia/tts/types/web_socket_response.py +124 -0
- cartesia/tts/types/web_socket_stream_options.py +19 -0
- cartesia/tts/types/web_socket_timestamps_response.py +20 -0
- cartesia/tts/types/web_socket_tts_output.py +27 -0
- cartesia/tts/types/web_socket_tts_request.py +36 -0
- cartesia/tts/types/word_timestamps.py +21 -0
- cartesia/tts/utils/tts.py +64 -0
- cartesia/tts/utils/types.py +70 -0
- cartesia/version.py +3 -1
- cartesia/voice_changer/__init__.py +27 -0
- cartesia/voice_changer/client.py +395 -0
- cartesia/voice_changer/requests/__init__.py +15 -0
- cartesia/voice_changer/requests/streaming_response.py +36 -0
- cartesia/voice_changer/types/__init__.py +17 -0
- cartesia/voice_changer/types/output_format_container.py +5 -0
- cartesia/voice_changer/types/streaming_response.py +62 -0
- cartesia/voices/__init__.py +71 -0
- cartesia/voices/client.py +1053 -0
- cartesia/voices/requests/__init__.py +27 -0
- cartesia/voices/requests/create_voice_request.py +23 -0
- cartesia/voices/requests/embedding_response.py +8 -0
- cartesia/voices/requests/embedding_specifier.py +10 -0
- cartesia/voices/requests/id_specifier.py +10 -0
- cartesia/voices/requests/localize_dialect.py +8 -0
- cartesia/voices/requests/localize_voice_request.py +15 -0
- cartesia/voices/requests/mix_voice_specifier.py +7 -0
- cartesia/voices/requests/mix_voices_request.py +9 -0
- cartesia/voices/requests/update_voice_request.py +15 -0
- cartesia/voices/requests/voice.py +39 -0
- cartesia/voices/requests/voice_metadata.py +36 -0
- cartesia/voices/types/__init__.py +45 -0
- cartesia/voices/types/base_voice_id.py +5 -0
- cartesia/voices/types/clone_mode.py +5 -0
- cartesia/voices/types/create_voice_request.py +34 -0
- cartesia/voices/types/embedding_response.py +20 -0
- cartesia/voices/types/embedding_specifier.py +22 -0
- cartesia/voices/types/gender.py +5 -0
- cartesia/voices/types/id_specifier.py +22 -0
- cartesia/voices/types/localize_dialect.py +8 -0
- cartesia/voices/types/localize_english_dialect.py +5 -0
- cartesia/voices/types/localize_portuguese_dialect.py +5 -0
- cartesia/voices/types/localize_spanish_dialect.py +5 -0
- cartesia/voices/types/localize_target_language.py +7 -0
- cartesia/voices/types/localize_voice_request.py +26 -0
- cartesia/voices/types/mix_voice_specifier.py +7 -0
- cartesia/voices/types/mix_voices_request.py +20 -0
- cartesia/voices/types/update_voice_request.py +27 -0
- cartesia/voices/types/voice.py +50 -0
- cartesia/voices/types/voice_id.py +3 -0
- cartesia/voices/types/voice_metadata.py +48 -0
- cartesia/voices/types/weight.py +3 -0
- cartesia-2.0.0a2.dist-info/METADATA +307 -0
- cartesia-2.0.0a2.dist-info/RECORD +160 -0
- {cartesia-1.4.0.dist-info → cartesia-2.0.0a2.dist-info}/WHEEL +1 -1
- cartesia/_async_sse.py +0 -95
- cartesia/_logger.py +0 -3
- cartesia/_sse.py +0 -143
- cartesia/_types.py +0 -70
- cartesia/_websocket.py +0 -358
- cartesia/async_client.py +0 -82
- cartesia/async_tts.py +0 -176
- cartesia/resource.py +0 -44
- cartesia/tts.py +0 -292
- cartesia/utils/deprecated.py +0 -55
- cartesia/utils/retry.py +0 -87
- cartesia/utils/tts.py +0 -78
- cartesia/voices.py +0 -204
- cartesia-1.4.0.dist-info/METADATA +0 -663
- cartesia-1.4.0.dist-info/RECORD +0 -23
- cartesia-1.4.0.dist-info/licenses/LICENSE.md +0 -21
- /cartesia/{utils/__init__.py → py.typed} +0 -0
- /cartesia/{_constants.py → tts/utils/constants.py} +0 -0
@@ -0,0 +1,124 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
from ...core.pydantic_utilities import UniversalBaseModel
|
5
|
+
import typing
|
6
|
+
from .context_id import ContextId
|
7
|
+
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
8
|
+
import pydantic
|
9
|
+
from .flush_id import FlushId
|
10
|
+
from .word_timestamps import WordTimestamps
|
11
|
+
from .phoneme_timestamps import PhonemeTimestamps
|
12
|
+
|
13
|
+
|
14
|
+
class WebSocketResponse_Chunk(UniversalBaseModel):
|
15
|
+
type: typing.Literal["chunk"] = "chunk"
|
16
|
+
data: str
|
17
|
+
step_time: float
|
18
|
+
context_id: typing.Optional[ContextId] = None
|
19
|
+
status_code: int
|
20
|
+
done: bool
|
21
|
+
|
22
|
+
if IS_PYDANTIC_V2:
|
23
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
24
|
+
else:
|
25
|
+
|
26
|
+
class Config:
|
27
|
+
frozen = True
|
28
|
+
smart_union = True
|
29
|
+
extra = pydantic.Extra.allow
|
30
|
+
|
31
|
+
|
32
|
+
class WebSocketResponse_FlushDone(UniversalBaseModel):
|
33
|
+
type: typing.Literal["flush_done"] = "flush_done"
|
34
|
+
flush_id: FlushId
|
35
|
+
flush_done: bool
|
36
|
+
context_id: typing.Optional[ContextId] = None
|
37
|
+
status_code: int
|
38
|
+
done: bool
|
39
|
+
|
40
|
+
if IS_PYDANTIC_V2:
|
41
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
42
|
+
else:
|
43
|
+
|
44
|
+
class Config:
|
45
|
+
frozen = True
|
46
|
+
smart_union = True
|
47
|
+
extra = pydantic.Extra.allow
|
48
|
+
|
49
|
+
|
50
|
+
class WebSocketResponse_Done(UniversalBaseModel):
|
51
|
+
type: typing.Literal["done"] = "done"
|
52
|
+
context_id: typing.Optional[ContextId] = None
|
53
|
+
status_code: int
|
54
|
+
done: bool
|
55
|
+
|
56
|
+
if IS_PYDANTIC_V2:
|
57
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
58
|
+
else:
|
59
|
+
|
60
|
+
class Config:
|
61
|
+
frozen = True
|
62
|
+
smart_union = True
|
63
|
+
extra = pydantic.Extra.allow
|
64
|
+
|
65
|
+
|
66
|
+
class WebSocketResponse_Timestamps(UniversalBaseModel):
|
67
|
+
type: typing.Literal["timestamps"] = "timestamps"
|
68
|
+
word_timestamps: typing.Optional[WordTimestamps] = None
|
69
|
+
context_id: typing.Optional[ContextId] = None
|
70
|
+
status_code: int
|
71
|
+
done: bool
|
72
|
+
|
73
|
+
if IS_PYDANTIC_V2:
|
74
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
75
|
+
else:
|
76
|
+
|
77
|
+
class Config:
|
78
|
+
frozen = True
|
79
|
+
smart_union = True
|
80
|
+
extra = pydantic.Extra.allow
|
81
|
+
|
82
|
+
|
83
|
+
class WebSocketResponse_Error(UniversalBaseModel):
|
84
|
+
type: typing.Literal["error"] = "error"
|
85
|
+
error: str
|
86
|
+
context_id: typing.Optional[ContextId] = None
|
87
|
+
status_code: int
|
88
|
+
done: bool
|
89
|
+
|
90
|
+
if IS_PYDANTIC_V2:
|
91
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
92
|
+
else:
|
93
|
+
|
94
|
+
class Config:
|
95
|
+
frozen = True
|
96
|
+
smart_union = True
|
97
|
+
extra = pydantic.Extra.allow
|
98
|
+
|
99
|
+
|
100
|
+
class WebSocketResponse_PhonemeTimestamps(UniversalBaseModel):
|
101
|
+
type: typing.Literal["phoneme_timestamps"] = "phoneme_timestamps"
|
102
|
+
phoneme_timestamps: typing.Optional[PhonemeTimestamps] = None
|
103
|
+
context_id: typing.Optional[ContextId] = None
|
104
|
+
status_code: int
|
105
|
+
done: bool
|
106
|
+
|
107
|
+
if IS_PYDANTIC_V2:
|
108
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
109
|
+
else:
|
110
|
+
|
111
|
+
class Config:
|
112
|
+
frozen = True
|
113
|
+
smart_union = True
|
114
|
+
extra = pydantic.Extra.allow
|
115
|
+
|
116
|
+
|
117
|
+
WebSocketResponse = typing.Union[
|
118
|
+
WebSocketResponse_Chunk,
|
119
|
+
WebSocketResponse_FlushDone,
|
120
|
+
WebSocketResponse_Done,
|
121
|
+
WebSocketResponse_Timestamps,
|
122
|
+
WebSocketResponse_Error,
|
123
|
+
WebSocketResponse_PhonemeTimestamps,
|
124
|
+
]
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from ...core.pydantic_utilities import UniversalBaseModel
|
4
|
+
import typing
|
5
|
+
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
6
|
+
import pydantic
|
7
|
+
|
8
|
+
|
9
|
+
class WebSocketStreamOptions(UniversalBaseModel):
|
10
|
+
timeout: typing.Optional[float] = None
|
11
|
+
|
12
|
+
if IS_PYDANTIC_V2:
|
13
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
14
|
+
else:
|
15
|
+
|
16
|
+
class Config:
|
17
|
+
frozen = True
|
18
|
+
smart_union = True
|
19
|
+
extra = pydantic.Extra.allow
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from .web_socket_base_response import WebSocketBaseResponse
|
4
|
+
import typing
|
5
|
+
from .word_timestamps import WordTimestamps
|
6
|
+
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
7
|
+
import pydantic
|
8
|
+
|
9
|
+
|
10
|
+
class WebSocketTimestampsResponse(WebSocketBaseResponse):
|
11
|
+
word_timestamps: typing.Optional[WordTimestamps] = None
|
12
|
+
|
13
|
+
if IS_PYDANTIC_V2:
|
14
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
15
|
+
else:
|
16
|
+
|
17
|
+
class Config:
|
18
|
+
frozen = True
|
19
|
+
smart_union = True
|
20
|
+
extra = pydantic.Extra.allow
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
import typing
|
4
|
+
|
5
|
+
import pydantic
|
6
|
+
|
7
|
+
from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
8
|
+
from .context_id import ContextId
|
9
|
+
from .flush_id import FlushId
|
10
|
+
from .word_timestamps import WordTimestamps
|
11
|
+
|
12
|
+
|
13
|
+
class WebSocketTtsOutput(UniversalBaseModel):
|
14
|
+
word_timestamps: typing.Optional[WordTimestamps] = None
|
15
|
+
audio: typing.Optional[bytes] = None
|
16
|
+
context_id: typing.Optional[ContextId] = None
|
17
|
+
flush_id: typing.Optional[FlushId] = None
|
18
|
+
flush_done: typing.Optional[bool] = None
|
19
|
+
|
20
|
+
if IS_PYDANTIC_V2:
|
21
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
22
|
+
else:
|
23
|
+
|
24
|
+
class Config:
|
25
|
+
frozen = True
|
26
|
+
smart_union = True
|
27
|
+
extra = pydantic.Extra.allow
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from ...core.pydantic_utilities import UniversalBaseModel
|
4
|
+
import pydantic
|
5
|
+
import typing
|
6
|
+
from .output_format import OutputFormat
|
7
|
+
from .tts_request_voice_specifier import TtsRequestVoiceSpecifier
|
8
|
+
import typing_extensions
|
9
|
+
from ...core.serialization import FieldMetadata
|
10
|
+
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
11
|
+
|
12
|
+
|
13
|
+
class WebSocketTtsRequest(UniversalBaseModel):
|
14
|
+
model_id: str = pydantic.Field()
|
15
|
+
"""
|
16
|
+
The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models.
|
17
|
+
"""
|
18
|
+
|
19
|
+
output_format: typing.Optional[OutputFormat] = None
|
20
|
+
transcript: typing.Optional[str] = None
|
21
|
+
voice: TtsRequestVoiceSpecifier
|
22
|
+
duration: typing.Optional[int] = None
|
23
|
+
language: typing.Optional[str] = None
|
24
|
+
add_timestamps: typing.Optional[bool] = None
|
25
|
+
add_phoneme_timestamps: typing.Optional[bool] = None
|
26
|
+
continue_: typing_extensions.Annotated[typing.Optional[bool], FieldMetadata(alias="continue")] = None
|
27
|
+
context_id: typing.Optional[str] = None
|
28
|
+
|
29
|
+
if IS_PYDANTIC_V2:
|
30
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
31
|
+
else:
|
32
|
+
|
33
|
+
class Config:
|
34
|
+
frozen = True
|
35
|
+
smart_union = True
|
36
|
+
extra = pydantic.Extra.allow
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from ...core.pydantic_utilities import UniversalBaseModel
|
4
|
+
import typing
|
5
|
+
from ...core.pydantic_utilities import IS_PYDANTIC_V2
|
6
|
+
import pydantic
|
7
|
+
|
8
|
+
|
9
|
+
class WordTimestamps(UniversalBaseModel):
|
10
|
+
words: typing.List[str]
|
11
|
+
start: typing.List[float]
|
12
|
+
end: typing.List[float]
|
13
|
+
|
14
|
+
if IS_PYDANTIC_V2:
|
15
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
16
|
+
else:
|
17
|
+
|
18
|
+
class Config:
|
19
|
+
frozen = True
|
20
|
+
smart_union = True
|
21
|
+
extra = pydantic.Extra.allow
|
@@ -0,0 +1,64 @@
|
|
1
|
+
import io
|
2
|
+
import typing
|
3
|
+
|
4
|
+
from pydub import AudioSegment # type: ignore
|
5
|
+
|
6
|
+
from .types import OutputFormatMapping
|
7
|
+
|
8
|
+
|
9
|
+
def get_output_format(output_format_name: str):
|
10
|
+
"""Convenience method to get the output_format dictionary from a given output format name.
|
11
|
+
|
12
|
+
Args:
|
13
|
+
output_format_name (str): The name of the output format.
|
14
|
+
|
15
|
+
Returns:
|
16
|
+
OutputFormat: A dictionary containing the details of the output format to be passed into tts.sse() or tts.websocket().send()
|
17
|
+
|
18
|
+
Raises:
|
19
|
+
ValueError: If the output_format name is not supported
|
20
|
+
"""
|
21
|
+
if output_format_name in OutputFormatMapping._format_mapping:
|
22
|
+
output_format_obj = OutputFormatMapping.get_format(output_format_name)
|
23
|
+
else:
|
24
|
+
raise ValueError(f"Unsupported format: {output_format_name}")
|
25
|
+
|
26
|
+
return output_format_obj
|
27
|
+
|
28
|
+
|
29
|
+
def concat_audio_segments(
|
30
|
+
left_audio: typing.Optional[bytes],
|
31
|
+
infill_audio: bytes,
|
32
|
+
right_audio: typing.Optional[bytes],
|
33
|
+
format: str = "wav",
|
34
|
+
) -> bytes:
|
35
|
+
"""Helper method to concatenate three audio segments while preserving audio format and headers.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
left_audio: The audio segment that comes before the infill
|
39
|
+
infill_audio: The generated infill audio segment
|
40
|
+
right_audio: The audio segment that comes after the infill
|
41
|
+
format: The audio format (e.g., 'wav', 'mp3'). Defaults to 'wav'
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
bytes: The concatenated audio as bytes
|
45
|
+
|
46
|
+
Raises:
|
47
|
+
ValueError: If the audio segments cannot be loaded or concatenated
|
48
|
+
"""
|
49
|
+
try:
|
50
|
+
combined = AudioSegment.empty()
|
51
|
+
if left_audio:
|
52
|
+
combined += AudioSegment.from_file(io.BytesIO(left_audio), format=format)
|
53
|
+
|
54
|
+
combined += AudioSegment.from_file(io.BytesIO(infill_audio), format=format)
|
55
|
+
|
56
|
+
if right_audio:
|
57
|
+
combined += AudioSegment.from_file(io.BytesIO(right_audio), format=format)
|
58
|
+
|
59
|
+
output = io.BytesIO()
|
60
|
+
combined.export(output, format=format)
|
61
|
+
return output.getvalue()
|
62
|
+
|
63
|
+
except Exception as e:
|
64
|
+
raise ValueError(f"Failed to concatenate audio segments: {str(e)}")
|
@@ -0,0 +1,70 @@
|
|
1
|
+
class OutputFormatMapping:
|
2
|
+
_format_mapping = {
|
3
|
+
"raw_pcm_f32le_44100": {
|
4
|
+
"container": "raw",
|
5
|
+
"encoding": "pcm_f32le",
|
6
|
+
"sample_rate": 44100,
|
7
|
+
},
|
8
|
+
"raw_pcm_s16le_44100": {
|
9
|
+
"container": "raw",
|
10
|
+
"encoding": "pcm_s16le",
|
11
|
+
"sample_rate": 44100,
|
12
|
+
},
|
13
|
+
"raw_pcm_f32le_24000": {
|
14
|
+
"container": "raw",
|
15
|
+
"encoding": "pcm_f32le",
|
16
|
+
"sample_rate": 24000,
|
17
|
+
},
|
18
|
+
"raw_pcm_s16le_24000": {
|
19
|
+
"container": "raw",
|
20
|
+
"encoding": "pcm_s16le",
|
21
|
+
"sample_rate": 24000,
|
22
|
+
},
|
23
|
+
"raw_pcm_f32le_22050": {
|
24
|
+
"container": "raw",
|
25
|
+
"encoding": "pcm_f32le",
|
26
|
+
"sample_rate": 22050,
|
27
|
+
},
|
28
|
+
"raw_pcm_s16le_22050": {
|
29
|
+
"container": "raw",
|
30
|
+
"encoding": "pcm_s16le",
|
31
|
+
"sample_rate": 22050,
|
32
|
+
},
|
33
|
+
"raw_pcm_f32le_16000": {
|
34
|
+
"container": "raw",
|
35
|
+
"encoding": "pcm_f32le",
|
36
|
+
"sample_rate": 16000,
|
37
|
+
},
|
38
|
+
"raw_pcm_s16le_16000": {
|
39
|
+
"container": "raw",
|
40
|
+
"encoding": "pcm_s16le",
|
41
|
+
"sample_rate": 16000,
|
42
|
+
},
|
43
|
+
"raw_pcm_f32le_8000": {
|
44
|
+
"container": "raw",
|
45
|
+
"encoding": "pcm_f32le",
|
46
|
+
"sample_rate": 8000,
|
47
|
+
},
|
48
|
+
"raw_pcm_s16le_8000": {
|
49
|
+
"container": "raw",
|
50
|
+
"encoding": "pcm_s16le",
|
51
|
+
"sample_rate": 8000,
|
52
|
+
},
|
53
|
+
"raw_pcm_mulaw_8000": {
|
54
|
+
"container": "raw",
|
55
|
+
"encoding": "pcm_mulaw",
|
56
|
+
"sample_rate": 8000,
|
57
|
+
},
|
58
|
+
"raw_pcm_alaw_8000": {
|
59
|
+
"container": "raw",
|
60
|
+
"encoding": "pcm_alaw",
|
61
|
+
"sample_rate": 8000,
|
62
|
+
},
|
63
|
+
}
|
64
|
+
|
65
|
+
@classmethod
|
66
|
+
def get_format(cls, format_name):
|
67
|
+
if format_name in cls._format_mapping:
|
68
|
+
return cls._format_mapping[format_name]
|
69
|
+
else:
|
70
|
+
raise ValueError(f"Unsupported format: {format_name}")
|
cartesia/version.py
CHANGED
@@ -0,0 +1,27 @@
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
2
|
+
|
3
|
+
from .types import (
|
4
|
+
OutputFormatContainer,
|
5
|
+
StreamingResponse,
|
6
|
+
StreamingResponse_Chunk,
|
7
|
+
StreamingResponse_Done,
|
8
|
+
StreamingResponse_Error,
|
9
|
+
)
|
10
|
+
from .requests import (
|
11
|
+
StreamingResponseParams,
|
12
|
+
StreamingResponse_ChunkParams,
|
13
|
+
StreamingResponse_DoneParams,
|
14
|
+
StreamingResponse_ErrorParams,
|
15
|
+
)
|
16
|
+
|
17
|
+
__all__ = [
|
18
|
+
"OutputFormatContainer",
|
19
|
+
"StreamingResponse",
|
20
|
+
"StreamingResponseParams",
|
21
|
+
"StreamingResponse_Chunk",
|
22
|
+
"StreamingResponse_ChunkParams",
|
23
|
+
"StreamingResponse_Done",
|
24
|
+
"StreamingResponse_DoneParams",
|
25
|
+
"StreamingResponse_Error",
|
26
|
+
"StreamingResponse_ErrorParams",
|
27
|
+
]
|