sarvamai 0.1.8rc6__py3-none-any.whl → 0.1.8rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +14 -14
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/requests/__init__.py +4 -4
- sarvamai/requests/{initialize_connection.py → configure_connection.py} +3 -3
- sarvamai/requests/{initialize_connection_data.py → configure_connection_data.py} +7 -7
- sarvamai/text_to_speech_streaming/socket_client.py +80 -46
- sarvamai/types/__init__.py +10 -10
- sarvamai/types/{initialize_connection.py → configure_connection.py} +3 -3
- sarvamai/types/{initialize_connection_data.py → configure_connection_data.py} +7 -7
- sarvamai/types/{initialize_connection_data_output_audio_bitrate.py → configure_connection_data_output_audio_bitrate.py} +1 -1
- sarvamai/types/{initialize_connection_data_speaker.py → configure_connection_data_speaker.py} +1 -1
- sarvamai/types/{initialize_connection_data_target_language_code.py → configure_connection_data_target_language_code.py} +1 -1
- {sarvamai-0.1.8rc6.dist-info → sarvamai-0.1.8rc7.dist-info}/METADATA +1 -1
- {sarvamai-0.1.8rc6.dist-info → sarvamai-0.1.8rc7.dist-info}/RECORD +15 -15
- {sarvamai-0.1.8rc6.dist-info → sarvamai-0.1.8rc7.dist-info}/WHEEL +0 -0
sarvamai/__init__.py
CHANGED
|
@@ -18,6 +18,11 @@ from .types import (
|
|
|
18
18
|
Choice,
|
|
19
19
|
CompletionUsage,
|
|
20
20
|
ConfigMessage,
|
|
21
|
+
ConfigureConnection,
|
|
22
|
+
ConfigureConnectionData,
|
|
23
|
+
ConfigureConnectionDataOutputAudioBitrate,
|
|
24
|
+
ConfigureConnectionDataSpeaker,
|
|
25
|
+
ConfigureConnectionDataTargetLanguageCode,
|
|
21
26
|
CreateChatCompletionResponse,
|
|
22
27
|
DiarizedEntry,
|
|
23
28
|
DiarizedTranscript,
|
|
@@ -31,11 +36,6 @@ from .types import (
|
|
|
31
36
|
FinishReason,
|
|
32
37
|
FlushSignal,
|
|
33
38
|
Format,
|
|
34
|
-
InitializeConnection,
|
|
35
|
-
InitializeConnectionData,
|
|
36
|
-
InitializeConnectionDataOutputAudioBitrate,
|
|
37
|
-
InitializeConnectionDataSpeaker,
|
|
38
|
-
InitializeConnectionDataTargetLanguageCode,
|
|
39
39
|
LanguageIdentificationResponse,
|
|
40
40
|
NumeralsFormat,
|
|
41
41
|
PingSignal,
|
|
@@ -112,6 +112,8 @@ from .requests import (
|
|
|
112
112
|
ChoiceParams,
|
|
113
113
|
CompletionUsageParams,
|
|
114
114
|
ConfigMessageParams,
|
|
115
|
+
ConfigureConnectionDataParams,
|
|
116
|
+
ConfigureConnectionParams,
|
|
115
117
|
CreateChatCompletionResponseParams,
|
|
116
118
|
DiarizedEntryParams,
|
|
117
119
|
DiarizedTranscriptParams,
|
|
@@ -122,8 +124,6 @@ from .requests import (
|
|
|
122
124
|
ErrorResponseParams,
|
|
123
125
|
EventsDataParams,
|
|
124
126
|
FlushSignalParams,
|
|
125
|
-
InitializeConnectionDataParams,
|
|
126
|
-
InitializeConnectionParams,
|
|
127
127
|
LanguageIdentificationResponseParams,
|
|
128
128
|
PingSignalParams,
|
|
129
129
|
SendTextDataParams,
|
|
@@ -189,6 +189,13 @@ __all__ = [
|
|
|
189
189
|
"CompletionUsageParams",
|
|
190
190
|
"ConfigMessage",
|
|
191
191
|
"ConfigMessageParams",
|
|
192
|
+
"ConfigureConnection",
|
|
193
|
+
"ConfigureConnectionData",
|
|
194
|
+
"ConfigureConnectionDataOutputAudioBitrate",
|
|
195
|
+
"ConfigureConnectionDataParams",
|
|
196
|
+
"ConfigureConnectionDataSpeaker",
|
|
197
|
+
"ConfigureConnectionDataTargetLanguageCode",
|
|
198
|
+
"ConfigureConnectionParams",
|
|
192
199
|
"CreateChatCompletionResponse",
|
|
193
200
|
"CreateChatCompletionResponseParams",
|
|
194
201
|
"DiarizedEntry",
|
|
@@ -213,13 +220,6 @@ __all__ = [
|
|
|
213
220
|
"FlushSignalParams",
|
|
214
221
|
"ForbiddenError",
|
|
215
222
|
"Format",
|
|
216
|
-
"InitializeConnection",
|
|
217
|
-
"InitializeConnectionData",
|
|
218
|
-
"InitializeConnectionDataOutputAudioBitrate",
|
|
219
|
-
"InitializeConnectionDataParams",
|
|
220
|
-
"InitializeConnectionDataSpeaker",
|
|
221
|
-
"InitializeConnectionDataTargetLanguageCode",
|
|
222
|
-
"InitializeConnectionParams",
|
|
223
223
|
"InternalServerError",
|
|
224
224
|
"LanguageIdentificationResponse",
|
|
225
225
|
"LanguageIdentificationResponseParams",
|
sarvamai/core/client_wrapper.py
CHANGED
|
@@ -17,10 +17,10 @@ class BaseClientWrapper:
|
|
|
17
17
|
|
|
18
18
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
19
19
|
headers: typing.Dict[str, str] = {
|
|
20
|
-
"User-Agent": "sarvamai/0.1.
|
|
20
|
+
"User-Agent": "sarvamai/0.1.8rc7",
|
|
21
21
|
"X-Fern-Language": "Python",
|
|
22
22
|
"X-Fern-SDK-Name": "sarvamai",
|
|
23
|
-
"X-Fern-SDK-Version": "0.1.
|
|
23
|
+
"X-Fern-SDK-Version": "0.1.8rc7",
|
|
24
24
|
}
|
|
25
25
|
headers["api-subscription-key"] = self.api_subscription_key
|
|
26
26
|
return headers
|
sarvamai/requests/__init__.py
CHANGED
|
@@ -19,6 +19,8 @@ from .chat_completion_response_message import ChatCompletionResponseMessageParam
|
|
|
19
19
|
from .choice import ChoiceParams
|
|
20
20
|
from .completion_usage import CompletionUsageParams
|
|
21
21
|
from .config_message import ConfigMessageParams
|
|
22
|
+
from .configure_connection import ConfigureConnectionParams
|
|
23
|
+
from .configure_connection_data import ConfigureConnectionDataParams
|
|
22
24
|
from .create_chat_completion_response import CreateChatCompletionResponseParams
|
|
23
25
|
from .diarized_entry import DiarizedEntryParams
|
|
24
26
|
from .diarized_transcript import DiarizedTranscriptParams
|
|
@@ -29,8 +31,6 @@ from .error_response import ErrorResponseParams
|
|
|
29
31
|
from .error_response_data import ErrorResponseDataParams
|
|
30
32
|
from .events_data import EventsDataParams
|
|
31
33
|
from .flush_signal import FlushSignalParams
|
|
32
|
-
from .initialize_connection import InitializeConnectionParams
|
|
33
|
-
from .initialize_connection_data import InitializeConnectionDataParams
|
|
34
34
|
from .language_identification_response import LanguageIdentificationResponseParams
|
|
35
35
|
from .ping_signal import PingSignalParams
|
|
36
36
|
from .send_text import SendTextParams
|
|
@@ -66,6 +66,8 @@ __all__ = [
|
|
|
66
66
|
"ChoiceParams",
|
|
67
67
|
"CompletionUsageParams",
|
|
68
68
|
"ConfigMessageParams",
|
|
69
|
+
"ConfigureConnectionDataParams",
|
|
70
|
+
"ConfigureConnectionParams",
|
|
69
71
|
"CreateChatCompletionResponseParams",
|
|
70
72
|
"DiarizedEntryParams",
|
|
71
73
|
"DiarizedTranscriptParams",
|
|
@@ -76,8 +78,6 @@ __all__ = [
|
|
|
76
78
|
"ErrorResponseParams",
|
|
77
79
|
"EventsDataParams",
|
|
78
80
|
"FlushSignalParams",
|
|
79
|
-
"InitializeConnectionDataParams",
|
|
80
|
-
"InitializeConnectionParams",
|
|
81
81
|
"LanguageIdentificationResponseParams",
|
|
82
82
|
"PingSignalParams",
|
|
83
83
|
"SendTextDataParams",
|
|
@@ -3,10 +3,10 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
import typing_extensions
|
|
6
|
-
from .
|
|
6
|
+
from .configure_connection_data import ConfigureConnectionDataParams
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class
|
|
9
|
+
class ConfigureConnectionParams(typing_extensions.TypedDict):
|
|
10
10
|
"""
|
|
11
11
|
Configuration message required as the first message after establishing the WebSocket connection.
|
|
12
12
|
This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
|
|
@@ -15,4 +15,4 @@ class InitializeConnectionParams(typing_extensions.TypedDict):
|
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
17
|
type: typing.Literal["config"]
|
|
18
|
-
data:
|
|
18
|
+
data: ConfigureConnectionDataParams
|
|
@@ -3,18 +3,18 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
import typing_extensions
|
|
6
|
-
from ..types.
|
|
7
|
-
from ..types.
|
|
8
|
-
from ..types.
|
|
6
|
+
from ..types.configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
|
|
7
|
+
from ..types.configure_connection_data_speaker import ConfigureConnectionDataSpeaker
|
|
8
|
+
from ..types.configure_connection_data_target_language_code import ConfigureConnectionDataTargetLanguageCode
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
class
|
|
12
|
-
target_language_code:
|
|
11
|
+
class ConfigureConnectionDataParams(typing_extensions.TypedDict):
|
|
12
|
+
target_language_code: ConfigureConnectionDataTargetLanguageCode
|
|
13
13
|
"""
|
|
14
14
|
The language of the text is BCP-47 format
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
speaker:
|
|
17
|
+
speaker: ConfigureConnectionDataSpeaker
|
|
18
18
|
"""
|
|
19
19
|
The speaker voice to be used for the output audio.
|
|
20
20
|
|
|
@@ -67,7 +67,7 @@ class InitializeConnectionDataParams(typing_extensions.TypedDict):
|
|
|
67
67
|
Audio codec (currently supports MP3 only, optimized for real-time playback)
|
|
68
68
|
"""
|
|
69
69
|
|
|
70
|
-
output_audio_bitrate: typing_extensions.NotRequired[
|
|
70
|
+
output_audio_bitrate: typing_extensions.NotRequired[ConfigureConnectionDataOutputAudioBitrate]
|
|
71
71
|
"""
|
|
72
72
|
Audio bitrate (choose from 5 supported bitrate options)
|
|
73
73
|
"""
|
|
@@ -10,8 +10,8 @@ from ..core.pydantic_utilities import parse_obj_as
|
|
|
10
10
|
from ..types.audio_output import AudioOutput
|
|
11
11
|
from ..types.flush_signal import FlushSignal
|
|
12
12
|
from ..types.error_response import ErrorResponse
|
|
13
|
-
from ..types.
|
|
14
|
-
from ..types.
|
|
13
|
+
from ..types.configure_connection import ConfigureConnection
|
|
14
|
+
from ..types.configure_connection_data import ConfigureConnectionData
|
|
15
15
|
from ..types.ping_signal import PingSignal
|
|
16
16
|
from ..types.send_text import SendText
|
|
17
17
|
from ..types.send_text_data import SendTextData
|
|
@@ -54,10 +54,10 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
|
|
|
54
54
|
finally:
|
|
55
55
|
self._emit(EventType.CLOSE, None)
|
|
56
56
|
|
|
57
|
-
async def
|
|
57
|
+
async def configure(
|
|
58
58
|
self,
|
|
59
59
|
target_language_code: str,
|
|
60
|
-
speaker: str,
|
|
60
|
+
speaker: str = "anushka",
|
|
61
61
|
pitch: float = 0.0,
|
|
62
62
|
pace: float = 1.0,
|
|
63
63
|
loudness: float = 1.0,
|
|
@@ -69,21 +69,35 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
|
|
|
69
69
|
max_chunk_length: int = 150,
|
|
70
70
|
) -> None:
|
|
71
71
|
"""
|
|
72
|
-
|
|
72
|
+
Configuration message required as the first message after establishing the WebSocket connection.
|
|
73
|
+
This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
|
|
74
|
+
by sending a new config message. When a config update is sent, any text currently in the buffer
|
|
75
|
+
will be automatically flushed and processed before applying the new configuration.
|
|
73
76
|
|
|
74
|
-
:param target_language_code:
|
|
75
|
-
:param speaker:
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
:param
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
:param
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
:param
|
|
85
|
-
|
|
86
|
-
|
|
77
|
+
:param target_language_code: The language of the text is BCP-47 format
|
|
78
|
+
:param speaker: The speaker voice to be used for the output audio. Default: Anushka.
|
|
79
|
+
Model Compatibility (bulbul:v2): Female: Anushka, Manisha, Vidya, Arya;
|
|
80
|
+
Male: Abhilash, Karun, Hitesh
|
|
81
|
+
:param pitch: Controls the pitch of the audio. Lower values result in a deeper voice,
|
|
82
|
+
while higher values make it sharper. The suitable range is between -0.75
|
|
83
|
+
and 0.75. Default is 0.0.
|
|
84
|
+
:param pace: Controls the speed of the audio. Lower values result in slower speech,
|
|
85
|
+
while higher values make it faster. The suitable range is between 0.5
|
|
86
|
+
and 2.0. Default is 1.0.
|
|
87
|
+
:param loudness: Controls the loudness of the audio. Lower values result in quieter audio,
|
|
88
|
+
while higher values make it louder. The suitable range is between 0.3
|
|
89
|
+
and 3.0. Default is 1.0.
|
|
90
|
+
:param speech_sample_rate: Specifies the sample rate of the output audio. Supported values are
|
|
91
|
+
8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
|
|
92
|
+
:param enable_preprocessing: Controls whether normalization of English words and numeric entities
|
|
93
|
+
(e.g., numbers, dates) is performed. Set to true for better handling
|
|
94
|
+
of mixed-language text. Default is false.
|
|
95
|
+
:param output_audio_codec: Audio codec (currently supports MP3 only, optimized for real-time playback)
|
|
96
|
+
:param output_audio_bitrate: Audio bitrate (choose from 5 supported bitrate options)
|
|
97
|
+
:param min_buffer_size: Minimum character length that triggers buffer flushing for TTS model processing
|
|
98
|
+
:param max_chunk_length: Maximum length for sentence splitting (adjust based on content length)
|
|
99
|
+
"""
|
|
100
|
+
data = ConfigureConnectionData(
|
|
87
101
|
target_language_code=target_language_code,
|
|
88
102
|
speaker=speaker,
|
|
89
103
|
pitch=pitch,
|
|
@@ -96,14 +110,16 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
|
|
|
96
110
|
min_buffer_size=min_buffer_size,
|
|
97
111
|
max_chunk_length=max_chunk_length,
|
|
98
112
|
)
|
|
99
|
-
message =
|
|
113
|
+
message = ConfigureConnection(data=data)
|
|
100
114
|
await self._send_model(message)
|
|
101
115
|
|
|
102
116
|
async def convert(self, text: str) -> None:
|
|
103
117
|
"""
|
|
104
|
-
Send text to be converted to speech.
|
|
118
|
+
Send text to be converted to speech. Text length should be 1-2500 characters.
|
|
119
|
+
Recommended: <500 characters for optimal streaming performance.
|
|
120
|
+
Real-time endpoints perform better with longer character counts.
|
|
105
121
|
|
|
106
|
-
:param text: Text to be synthesized (1-2500 characters)
|
|
122
|
+
:param text: Text to be synthesized (1-2500 characters, recommended <500)
|
|
107
123
|
"""
|
|
108
124
|
data = SendTextData(text=text)
|
|
109
125
|
message = SendText(data=data)
|
|
@@ -111,15 +127,16 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
|
|
|
111
127
|
|
|
112
128
|
async def flush(self) -> None:
|
|
113
129
|
"""
|
|
114
|
-
|
|
115
|
-
|
|
130
|
+
Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
|
|
131
|
+
Use this when you need to process remaining text that hasn't reached the minimum buffer size.
|
|
116
132
|
"""
|
|
117
133
|
message = FlushSignal()
|
|
118
134
|
await self._send_model(message)
|
|
119
135
|
|
|
120
136
|
async def ping(self) -> None:
|
|
121
137
|
"""
|
|
122
|
-
Send ping signal to keep the WebSocket connection alive.
|
|
138
|
+
Send ping signal to keep the WebSocket connection alive. The connection automatically
|
|
139
|
+
closes after one minute of inactivity.
|
|
123
140
|
"""
|
|
124
141
|
message = PingSignal()
|
|
125
142
|
await self._send_model(message)
|
|
@@ -182,10 +199,10 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
|
|
|
182
199
|
finally:
|
|
183
200
|
self._emit(EventType.CLOSE, None)
|
|
184
201
|
|
|
185
|
-
def
|
|
202
|
+
def configure(
|
|
186
203
|
self,
|
|
187
204
|
target_language_code: str,
|
|
188
|
-
speaker: str,
|
|
205
|
+
speaker: str = "anushka",
|
|
189
206
|
pitch: float = 0.0,
|
|
190
207
|
pace: float = 1.0,
|
|
191
208
|
loudness: float = 1.0,
|
|
@@ -197,21 +214,35 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
|
|
|
197
214
|
max_chunk_length: int = 150,
|
|
198
215
|
) -> None:
|
|
199
216
|
"""
|
|
200
|
-
|
|
217
|
+
Configuration message required as the first message after establishing the WebSocket connection.
|
|
218
|
+
This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
|
|
219
|
+
by sending a new config message. When a config update is sent, any text currently in the buffer
|
|
220
|
+
will be automatically flushed and processed before applying the new configuration.
|
|
201
221
|
|
|
202
|
-
:param target_language_code:
|
|
203
|
-
:param speaker:
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
:param
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
:param
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
:param
|
|
213
|
-
|
|
214
|
-
|
|
222
|
+
:param target_language_code: The language of the text is BCP-47 format
|
|
223
|
+
:param speaker: The speaker voice to be used for the output audio. Default: Anushka.
|
|
224
|
+
Model Compatibility (bulbul:v2): Female: Anushka, Manisha, Vidya, Arya;
|
|
225
|
+
Male: Abhilash, Karun, Hitesh
|
|
226
|
+
:param pitch: Controls the pitch of the audio. Lower values result in a deeper voice,
|
|
227
|
+
while higher values make it sharper. The suitable range is between -0.75
|
|
228
|
+
and 0.75. Default is 0.0.
|
|
229
|
+
:param pace: Controls the speed of the audio. Lower values result in slower speech,
|
|
230
|
+
while higher values make it faster. The suitable range is between 0.5
|
|
231
|
+
and 2.0. Default is 1.0.
|
|
232
|
+
:param loudness: Controls the loudness of the audio. Lower values result in quieter audio,
|
|
233
|
+
while higher values make it louder. The suitable range is between 0.3
|
|
234
|
+
and 3.0. Default is 1.0.
|
|
235
|
+
:param speech_sample_rate: Specifies the sample rate of the output audio. Supported values are
|
|
236
|
+
8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
|
|
237
|
+
:param enable_preprocessing: Controls whether normalization of English words and numeric entities
|
|
238
|
+
(e.g., numbers, dates) is performed. Set to true for better handling
|
|
239
|
+
of mixed-language text. Default is false.
|
|
240
|
+
:param output_audio_codec: Audio codec (currently supports MP3 only, optimized for real-time playback)
|
|
241
|
+
:param output_audio_bitrate: Audio bitrate (choose from 5 supported bitrate options)
|
|
242
|
+
:param min_buffer_size: Minimum character length that triggers buffer flushing for TTS model processing
|
|
243
|
+
:param max_chunk_length: Maximum length for sentence splitting (adjust based on content length)
|
|
244
|
+
"""
|
|
245
|
+
data = ConfigureConnectionData(
|
|
215
246
|
target_language_code=target_language_code,
|
|
216
247
|
speaker=speaker,
|
|
217
248
|
pitch=pitch,
|
|
@@ -224,14 +255,16 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
|
|
|
224
255
|
min_buffer_size=min_buffer_size,
|
|
225
256
|
max_chunk_length=max_chunk_length,
|
|
226
257
|
)
|
|
227
|
-
message =
|
|
258
|
+
message = ConfigureConnection(data=data)
|
|
228
259
|
self._send_model(message)
|
|
229
260
|
|
|
230
261
|
def convert(self, text: str) -> None:
|
|
231
262
|
"""
|
|
232
|
-
Send text to be converted to speech.
|
|
263
|
+
Send text to be converted to speech. Text length should be 1-2500 characters.
|
|
264
|
+
Recommended: <500 characters for optimal streaming performance.
|
|
265
|
+
Real-time endpoints perform better with longer character counts.
|
|
233
266
|
|
|
234
|
-
:param text: Text to be synthesized (1-2500 characters)
|
|
267
|
+
:param text: Text to be synthesized (1-2500 characters, recommended <500)
|
|
235
268
|
"""
|
|
236
269
|
data = SendTextData(text=text)
|
|
237
270
|
message = SendText(data=data)
|
|
@@ -239,15 +272,16 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
|
|
|
239
272
|
|
|
240
273
|
def flush(self) -> None:
|
|
241
274
|
"""
|
|
242
|
-
|
|
243
|
-
|
|
275
|
+
Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
|
|
276
|
+
Use this when you need to process remaining text that hasn't reached the minimum buffer size.
|
|
244
277
|
"""
|
|
245
278
|
message = FlushSignal()
|
|
246
279
|
self._send_model(message)
|
|
247
280
|
|
|
248
281
|
def ping(self) -> None:
|
|
249
282
|
"""
|
|
250
|
-
Send ping signal to keep the WebSocket connection alive.
|
|
283
|
+
Send ping signal to keep the WebSocket connection alive. The connection automatically
|
|
284
|
+
closes after one minute of inactivity.
|
|
251
285
|
"""
|
|
252
286
|
message = PingSignal()
|
|
253
287
|
self._send_model(message)
|
sarvamai/types/__init__.py
CHANGED
|
@@ -19,6 +19,11 @@ from .chat_completion_response_message import ChatCompletionResponseMessage
|
|
|
19
19
|
from .choice import Choice
|
|
20
20
|
from .completion_usage import CompletionUsage
|
|
21
21
|
from .config_message import ConfigMessage
|
|
22
|
+
from .configure_connection import ConfigureConnection
|
|
23
|
+
from .configure_connection_data import ConfigureConnectionData
|
|
24
|
+
from .configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
|
|
25
|
+
from .configure_connection_data_speaker import ConfigureConnectionDataSpeaker
|
|
26
|
+
from .configure_connection_data_target_language_code import ConfigureConnectionDataTargetLanguageCode
|
|
22
27
|
from .create_chat_completion_response import CreateChatCompletionResponse
|
|
23
28
|
from .diarized_entry import DiarizedEntry
|
|
24
29
|
from .diarized_transcript import DiarizedTranscript
|
|
@@ -32,11 +37,6 @@ from .events_data import EventsData
|
|
|
32
37
|
from .finish_reason import FinishReason
|
|
33
38
|
from .flush_signal import FlushSignal
|
|
34
39
|
from .format import Format
|
|
35
|
-
from .initialize_connection import InitializeConnection
|
|
36
|
-
from .initialize_connection_data import InitializeConnectionData
|
|
37
|
-
from .initialize_connection_data_output_audio_bitrate import InitializeConnectionDataOutputAudioBitrate
|
|
38
|
-
from .initialize_connection_data_speaker import InitializeConnectionDataSpeaker
|
|
39
|
-
from .initialize_connection_data_target_language_code import InitializeConnectionDataTargetLanguageCode
|
|
40
40
|
from .language_identification_response import LanguageIdentificationResponse
|
|
41
41
|
from .numerals_format import NumeralsFormat
|
|
42
42
|
from .ping_signal import PingSignal
|
|
@@ -94,6 +94,11 @@ __all__ = [
|
|
|
94
94
|
"Choice",
|
|
95
95
|
"CompletionUsage",
|
|
96
96
|
"ConfigMessage",
|
|
97
|
+
"ConfigureConnection",
|
|
98
|
+
"ConfigureConnectionData",
|
|
99
|
+
"ConfigureConnectionDataOutputAudioBitrate",
|
|
100
|
+
"ConfigureConnectionDataSpeaker",
|
|
101
|
+
"ConfigureConnectionDataTargetLanguageCode",
|
|
97
102
|
"CreateChatCompletionResponse",
|
|
98
103
|
"DiarizedEntry",
|
|
99
104
|
"DiarizedTranscript",
|
|
@@ -107,11 +112,6 @@ __all__ = [
|
|
|
107
112
|
"FinishReason",
|
|
108
113
|
"FlushSignal",
|
|
109
114
|
"Format",
|
|
110
|
-
"InitializeConnection",
|
|
111
|
-
"InitializeConnectionData",
|
|
112
|
-
"InitializeConnectionDataOutputAudioBitrate",
|
|
113
|
-
"InitializeConnectionDataSpeaker",
|
|
114
|
-
"InitializeConnectionDataTargetLanguageCode",
|
|
115
115
|
"LanguageIdentificationResponse",
|
|
116
116
|
"NumeralsFormat",
|
|
117
117
|
"PingSignal",
|
|
@@ -4,10 +4,10 @@ import typing
|
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
6
|
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
|
-
from .
|
|
7
|
+
from .configure_connection_data import ConfigureConnectionData
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
class
|
|
10
|
+
class ConfigureConnection(UniversalBaseModel):
|
|
11
11
|
"""
|
|
12
12
|
Configuration message required as the first message after establishing the WebSocket connection.
|
|
13
13
|
This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
|
|
@@ -16,7 +16,7 @@ class InitializeConnection(UniversalBaseModel):
|
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
18
|
type: typing.Literal["config"] = "config"
|
|
19
|
-
data:
|
|
19
|
+
data: ConfigureConnectionData
|
|
20
20
|
|
|
21
21
|
if IS_PYDANTIC_V2:
|
|
22
22
|
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
|
@@ -4,18 +4,18 @@ import typing
|
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
6
|
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
|
-
from .
|
|
8
|
-
from .
|
|
9
|
-
from .
|
|
7
|
+
from .configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
|
|
8
|
+
from .configure_connection_data_speaker import ConfigureConnectionDataSpeaker
|
|
9
|
+
from .configure_connection_data_target_language_code import ConfigureConnectionDataTargetLanguageCode
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
class
|
|
13
|
-
target_language_code:
|
|
12
|
+
class ConfigureConnectionData(UniversalBaseModel):
|
|
13
|
+
target_language_code: ConfigureConnectionDataTargetLanguageCode = pydantic.Field()
|
|
14
14
|
"""
|
|
15
15
|
The language of the text is BCP-47 format
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
|
-
speaker:
|
|
18
|
+
speaker: ConfigureConnectionDataSpeaker = pydantic.Field()
|
|
19
19
|
"""
|
|
20
20
|
The speaker voice to be used for the output audio.
|
|
21
21
|
|
|
@@ -68,7 +68,7 @@ class InitializeConnectionData(UniversalBaseModel):
|
|
|
68
68
|
Audio codec (currently supports MP3 only, optimized for real-time playback)
|
|
69
69
|
"""
|
|
70
70
|
|
|
71
|
-
output_audio_bitrate: typing.Optional[
|
|
71
|
+
output_audio_bitrate: typing.Optional[ConfigureConnectionDataOutputAudioBitrate] = pydantic.Field(default=None)
|
|
72
72
|
"""
|
|
73
73
|
Audio bitrate (choose from 5 supported bitrate options)
|
|
74
74
|
"""
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
ConfigureConnectionDataTargetLanguageCode = typing.Union[
|
|
6
6
|
typing.Literal["bn-IN", "en-IN", "gu-IN", "hi-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN"],
|
|
7
7
|
typing.Any,
|
|
8
8
|
]
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
sarvamai/__init__.py,sha256=
|
|
1
|
+
sarvamai/__init__.py,sha256=xkQNmsHue7UaEN7PgNfH2ExBGBdyszDgg5omMkSnZMM,8824
|
|
2
2
|
sarvamai/chat/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
3
3
|
sarvamai/chat/client.py,sha256=xOSj83Gr6Q7eY2qUeATiuXYQqBqWqSCQlIEopK5fKus,11022
|
|
4
4
|
sarvamai/chat/raw_client.py,sha256=A2kRuZcVWlJhyYCD7YKgqNkZEp3cYa1731KhRkhirU0,17885
|
|
5
5
|
sarvamai/client.py,sha256=aI1sw5LVGMjgukgZLDlUmA17ecK1yGsQxH-W_JiCrco,7177
|
|
6
6
|
sarvamai/core/__init__.py,sha256=YE2CtXeASe1RAbaI39twKWYKCuT4tW5is9HWHhJjR_g,1653
|
|
7
7
|
sarvamai/core/api_error.py,sha256=44vPoTyWN59gonCIZMdzw7M1uspygiLnr3GNFOoVL2Q,614
|
|
8
|
-
sarvamai/core/client_wrapper.py,sha256
|
|
8
|
+
sarvamai/core/client_wrapper.py,sha256=-Vik8MO2fDeTSfXmt3KybXUHsipm3dqlcwxLaBwaqA8,2080
|
|
9
9
|
sarvamai/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
|
|
10
10
|
sarvamai/core/events.py,sha256=j7VWXgMpOsjCXdzY22wIhI7Q-v5InZ4WchRzA88x_Sk,856
|
|
11
11
|
sarvamai/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
|
|
@@ -28,7 +28,7 @@ sarvamai/errors/too_many_requests_error.py,sha256=Dl-_pfpboXJh-OtSbRaPQOB-UXvpVO
|
|
|
28
28
|
sarvamai/errors/unprocessable_entity_error.py,sha256=JqxtzIhvjkpQDqbT9Q-go1n-gyv9PsYqq0ng_ZYyBMo,347
|
|
29
29
|
sarvamai/play.py,sha256=4fh86zy8g8IPU2O8yPBY7QxXQOivv_nWQvPQsOa1arw,2183
|
|
30
30
|
sarvamai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
-
sarvamai/requests/__init__.py,sha256=
|
|
31
|
+
sarvamai/requests/__init__.py,sha256=AYpixS_3RLRqMW4U2OfeACJjD3u3NnethhdYr_V-P9M,4478
|
|
32
32
|
sarvamai/requests/audio_data.py,sha256=QI3SK5aiAg2yJ-m3l9CxOkONnH3CCKMFCl9kAdMs19o,410
|
|
33
33
|
sarvamai/requests/audio_message.py,sha256=ZBeogjGE6YFXXM-0g8zq9SoizDk21reR0YXSB-0fMjg,214
|
|
34
34
|
sarvamai/requests/audio_output.py,sha256=BnoX345rwoWgaMaj24u_19-SjmPV0xt7vlFEEDKRw20,280
|
|
@@ -41,6 +41,8 @@ sarvamai/requests/chat_completion_response_message.py,sha256=JFazj4zK-nj_wjdvNLD
|
|
|
41
41
|
sarvamai/requests/choice.py,sha256=uulX4MZUoThEMcD3a80o_3V5YpnpqN8DfPaNZWVz-1o,867
|
|
42
42
|
sarvamai/requests/completion_usage.py,sha256=LbZV-RxcxKdCAYqhCiaRtSFF3VwMJq71A989Z1rm-I8,428
|
|
43
43
|
sarvamai/requests/config_message.py,sha256=EpYioGvDhCXDMvGH7Q1F7448zJzoHmlkQ1owoNGbWAw,383
|
|
44
|
+
sarvamai/requests/configure_connection.py,sha256=a-foQtLxArL4CulvKEdeebbRqmS1GRmko3MZdnHVPEk,716
|
|
45
|
+
sarvamai/requests/configure_connection_data.py,sha256=Niil2OrVBzQEtmWFn1JC-StLVp6WzzRIsu2i_M_8_44,2908
|
|
44
46
|
sarvamai/requests/create_chat_completion_response.py,sha256=TqS9u5_WVWMok_NreT4TeOsLJQeybPkbJm45Q0Zxw30,857
|
|
45
47
|
sarvamai/requests/diarized_entry.py,sha256=gbXB4D_r5_Q8gs1arRKjxPeFcYg16dVDLcg2VhxmKQA,462
|
|
46
48
|
sarvamai/requests/diarized_transcript.py,sha256=X-znuJ45oqwXzVyJumBHSqVGLz6JnoYFZmluQlEpEAw,323
|
|
@@ -51,8 +53,6 @@ sarvamai/requests/error_response.py,sha256=A8j12JQ7JJkUcnt26k2M9uwXXkwyT-LNqG3BO
|
|
|
51
53
|
sarvamai/requests/error_response_data.py,sha256=l9tGTykaKZ8pKxdw9RKitpW49kKcs4aGibH7rKG2v7w,461
|
|
52
54
|
sarvamai/requests/events_data.py,sha256=3seSash8DysPUWX6mKPzoEzWZlsrK4Tann2GFSbQjZg,286
|
|
53
55
|
sarvamai/requests/flush_signal.py,sha256=Aj_PzphMNcHMMOaxvTi1uQ5y36ZTtKEsUGCprbWIOvw,406
|
|
54
|
-
sarvamai/requests/initialize_connection.py,sha256=H4SMx6-TkJ_oyDtQkUpbS8H-njItwNDfHkl9bppLuwk,720
|
|
55
|
-
sarvamai/requests/initialize_connection_data.py,sha256=NEvOWOO5UmIH3qxXsfBgjH2sLnG54HrR9x1AdYfBgCE,2918
|
|
56
56
|
sarvamai/requests/language_identification_response.py,sha256=BdS5U9Gic-71vb--ph6HGvd2hGNKDXERC7yrn8vFcvI,1098
|
|
57
57
|
sarvamai/requests/ping_signal.py,sha256=TSgmfz2k4X1L6TzvX8u2SKZ6XQY3bSf7nPZf8mUViaM,343
|
|
58
58
|
sarvamai/requests/send_text.py,sha256=DWzbNgeNN2xSIYgk2zEisgLqjwq5oleqJVHrtOnIqbE,267
|
|
@@ -100,8 +100,8 @@ sarvamai/text_to_speech/raw_client.py,sha256=3Zu6HN_FOY683Vm-EN-OL7YAbLsftjJlFm5
|
|
|
100
100
|
sarvamai/text_to_speech_streaming/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
101
101
|
sarvamai/text_to_speech_streaming/client.py,sha256=geTF5xy-batzO12XVt0sPw_XJCi7-m2sDFK_B7SL7qc,6088
|
|
102
102
|
sarvamai/text_to_speech_streaming/raw_client.py,sha256=asOcNw1WAViOiXDVWH4sxWSXGVoLwAOh9vUtq_xralA,5269
|
|
103
|
-
sarvamai/text_to_speech_streaming/socket_client.py,sha256=
|
|
104
|
-
sarvamai/types/__init__.py,sha256=
|
|
103
|
+
sarvamai/text_to_speech_streaming/socket_client.py,sha256=NEcijnvjuNcWfzqpBi-xWsXVkL0NPq6EGAkEjnaq9hw,13909
|
|
104
|
+
sarvamai/types/__init__.py,sha256=cEfGVQMYlbz13iS9v83_CB9160Cky4JOImmxM30fGhg,6305
|
|
105
105
|
sarvamai/types/audio_data.py,sha256=rgOukLkLNJ_HBBVE2g5dfEL2CWjRoGiMvCtpq0qTB1Y,829
|
|
106
106
|
sarvamai/types/audio_message.py,sha256=sB4EgkWkWJzipYXobkmM9AYZTTZtCpg_ySKssUeznUE,560
|
|
107
107
|
sarvamai/types/audio_output.py,sha256=Eq-YUZa1mSDwt7bax2c4Vv2gBlyM_JBJWzHhTAhFSko,621
|
|
@@ -114,6 +114,11 @@ sarvamai/types/chat_completion_response_message.py,sha256=wz935eBnCkSIl0I0qMxBuH
|
|
|
114
114
|
sarvamai/types/choice.py,sha256=uXBCsjWP9VK3XWQWZUeI4EnU10w0G9nAfKn2tJZvxko,1244
|
|
115
115
|
sarvamai/types/completion_usage.py,sha256=xYQGlQUbKqsksuV73H-1ajjfT5M7w47eLfdWXSlrI5M,843
|
|
116
116
|
sarvamai/types/config_message.py,sha256=sGrT-qYTRqLVfIo5nRUuRlqPtPVmiAkUAnaMtlmQYCU,778
|
|
117
|
+
sarvamai/types/configure_connection.py,sha256=SnSNk02gQqP8e4VB4y88jjeFQ4ClpImjGLn2ANI8cZ4,1058
|
|
118
|
+
sarvamai/types/configure_connection_data.py,sha256=brMO-Z1TDq3oTJ22m1icBkkmnd9k67p_DzecnMcqNko,3421
|
|
119
|
+
sarvamai/types/configure_connection_data_output_audio_bitrate.py,sha256=h00YvKLxsZC8L3__rH4XH53nN_GY40UElW1EjysCwUs,208
|
|
120
|
+
sarvamai/types/configure_connection_data_speaker.py,sha256=SzyAiK5LynXwb9KniaO2qoOLY-II3-PMZbRuIsQ9shw,230
|
|
121
|
+
sarvamai/types/configure_connection_data_target_language_code.py,sha256=jrU1EblAtDYbybUO1KUkHhevmlSBj2AQxX13ii3QhAQ,275
|
|
117
122
|
sarvamai/types/create_chat_completion_response.py,sha256=4nEzeWzHGW1_BmRAtOuGsbRZ0ojNgnzJSMUFyYuYviw,1285
|
|
118
123
|
sarvamai/types/diarized_entry.py,sha256=kf9DLrcoMHZdTKNCAaF0z46q_iAe7CE-DFP4CNrZGTw,896
|
|
119
124
|
sarvamai/types/diarized_transcript.py,sha256=a491XmALLE7AQcByaaOYTew0BZoFTlewEMHLMJyj-Js,669
|
|
@@ -127,11 +132,6 @@ sarvamai/types/events_data.py,sha256=hDSOyODc8-lmpduJIQkps9kHlUZKYXGw3lETi8irHt0
|
|
|
127
132
|
sarvamai/types/finish_reason.py,sha256=PBWtBNkX4FMaODmlUehpF6qLB5uH_zR-Mw3M4uhIB6U,209
|
|
128
133
|
sarvamai/types/flush_signal.py,sha256=N7MJWb658KoxRpFN9cIbyQGY45zZcg8YCou3E1v--9o,759
|
|
129
134
|
sarvamai/types/format.py,sha256=57LicD0XLqW4D1QEnZWsWGifzRy1GV9P5utKPXLoxtg,144
|
|
130
|
-
sarvamai/types/initialize_connection.py,sha256=cXHmWJ1GrD1JX9fDMpQzUSRBXWTDAPekoEYtESRtQos,1062
|
|
131
|
-
sarvamai/types/initialize_connection_data.py,sha256=vfHqhN29dg6IwjYMsDKgoAaoYwi6JAljPOQTrCx5CEM,3431
|
|
132
|
-
sarvamai/types/initialize_connection_data_output_audio_bitrate.py,sha256=Wl2b6KR_shf6FzsBr1RJxBeVduS6AtO8skwQqOaffFc,209
|
|
133
|
-
sarvamai/types/initialize_connection_data_speaker.py,sha256=pdLyERHk0NtAKj-7lvebElF7QR1ac2T2jZ-keJsWba8,231
|
|
134
|
-
sarvamai/types/initialize_connection_data_target_language_code.py,sha256=iEuRFQJPsXO6lCwbNOoB7BzvoLCPIXL7UbIOKOMIjPs,276
|
|
135
135
|
sarvamai/types/language_identification_response.py,sha256=jG4ZQ6KQHCiEDqC51OniOwiRdW14Fbz22bbTsUDp_kc,1483
|
|
136
136
|
sarvamai/types/numerals_format.py,sha256=xg3lYiHcnzyFwuwRcaIteJLH_Pz6pJ9n9kTlYPEnCBU,165
|
|
137
137
|
sarvamai/types/ping_signal.py,sha256=cE53FRIXlc8bSo18z6jlAnOh6DhZEMX36huWEX6X3-A,695
|
|
@@ -173,6 +173,6 @@ sarvamai/types/transliterate_mode.py,sha256=1jSEMlGcoLkWuk12TgoOpSgwifa4rThGKZ1h
|
|
|
173
173
|
sarvamai/types/transliterate_source_language.py,sha256=bSY9wJszF0sg-Cgg6F-YcWC8ly1mIlj9rqa15-jBtx8,283
|
|
174
174
|
sarvamai/types/transliteration_response.py,sha256=yt-lzTbDeJ_ZL4I8kQa6oESxA9ebeJJY7LfFHpdEsmM,815
|
|
175
175
|
sarvamai/version.py,sha256=Qkp3Ee9YH-O9RTix90e0i7iNrFAGN-QDt2AFwGA4n8k,75
|
|
176
|
-
sarvamai-0.1.
|
|
177
|
-
sarvamai-0.1.
|
|
178
|
-
sarvamai-0.1.
|
|
176
|
+
sarvamai-0.1.8rc7.dist-info/METADATA,sha256=byv0QyLj3QnQ13nxHgu9h8pKdiCy8e_hRmp7HL01tCU,26760
|
|
177
|
+
sarvamai-0.1.8rc7.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
|
178
|
+
sarvamai-0.1.8rc7.dist-info/RECORD,,
|
|
File without changes
|