sarvamai 0.1.23a2__py3-none-any.whl → 0.1.23a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +203 -405
- sarvamai/chat/raw_client.py +20 -20
- sarvamai/client.py +34 -186
- sarvamai/core/__init__.py +21 -76
- sarvamai/core/client_wrapper.py +3 -19
- sarvamai/core/force_multipart.py +2 -4
- sarvamai/core/http_client.py +97 -217
- sarvamai/core/http_response.py +1 -1
- sarvamai/core/jsonable_encoder.py +0 -8
- sarvamai/core/pydantic_utilities.py +4 -110
- sarvamai/errors/__init__.py +6 -40
- sarvamai/errors/bad_request_error.py +1 -1
- sarvamai/errors/forbidden_error.py +1 -1
- sarvamai/errors/internal_server_error.py +1 -1
- sarvamai/errors/service_unavailable_error.py +1 -1
- sarvamai/errors/too_many_requests_error.py +1 -1
- sarvamai/errors/unprocessable_entity_error.py +1 -1
- sarvamai/requests/__init__.py +62 -150
- sarvamai/requests/configure_connection.py +4 -0
- sarvamai/requests/configure_connection_data.py +40 -11
- sarvamai/requests/error_response_data.py +1 -1
- sarvamai/requests/file_signed_url_details.py +1 -1
- sarvamai/requests/speech_to_text_job_parameters.py +10 -1
- sarvamai/requests/speech_to_text_transcription_data.py +2 -2
- sarvamai/speech_to_text/client.py +29 -2
- sarvamai/speech_to_text/raw_client.py +81 -56
- sarvamai/speech_to_text_job/client.py +60 -15
- sarvamai/speech_to_text_job/raw_client.py +120 -120
- sarvamai/speech_to_text_streaming/__init__.py +10 -38
- sarvamai/speech_to_text_streaming/client.py +32 -6
- sarvamai/speech_to_text_streaming/raw_client.py +32 -6
- sarvamai/speech_to_text_streaming/types/__init__.py +8 -36
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +7 -0
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
- sarvamai/speech_to_text_translate_job/raw_client.py +120 -120
- sarvamai/speech_to_text_translate_streaming/__init__.py +5 -36
- sarvamai/speech_to_text_translate_streaming/types/__init__.py +3 -36
- sarvamai/text/raw_client.py +60 -60
- sarvamai/text_to_speech/client.py +100 -16
- sarvamai/text_to_speech/raw_client.py +120 -36
- sarvamai/text_to_speech_streaming/__init__.py +2 -29
- sarvamai/text_to_speech_streaming/client.py +19 -6
- sarvamai/text_to_speech_streaming/raw_client.py +19 -6
- sarvamai/text_to_speech_streaming/types/__init__.py +3 -31
- sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +5 -0
- sarvamai/types/__init__.py +102 -222
- sarvamai/types/chat_completion_request_message.py +2 -6
- sarvamai/types/configure_connection.py +4 -0
- sarvamai/types/configure_connection_data.py +40 -11
- sarvamai/types/configure_connection_data_model.py +5 -0
- sarvamai/types/configure_connection_data_speaker.py +35 -1
- sarvamai/types/error_response_data.py +1 -1
- sarvamai/types/file_signed_url_details.py +1 -1
- sarvamai/types/mode.py +7 -0
- sarvamai/types/speech_to_text_job_parameters.py +10 -1
- sarvamai/types/speech_to_text_model.py +3 -1
- sarvamai/types/speech_to_text_transcription_data.py +2 -2
- sarvamai/types/speech_to_text_translate_model.py +1 -1
- sarvamai/types/text_to_speech_model.py +1 -1
- sarvamai/types/text_to_speech_speaker.py +35 -1
- {sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/METADATA +1 -2
- {sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/RECORD +63 -63
- sarvamai/core/http_sse/__init__.py +0 -42
- sarvamai/core/http_sse/_api.py +0 -112
- sarvamai/core/http_sse/_decoders.py +0 -61
- sarvamai/core/http_sse/_exceptions.py +0 -7
- sarvamai/core/http_sse/_models.py +0 -17
- {sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/WHEEL +0 -0
|
@@ -2,48 +2,20 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
-
import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
SpeechToTextStreamingVadSignals,
|
|
14
|
-
)
|
|
15
|
-
_dynamic_imports: typing.Dict[str, str] = {
|
|
16
|
-
"SpeechToTextStreamingFlushSignal": ".types",
|
|
17
|
-
"SpeechToTextStreamingHighVadSensitivity": ".types",
|
|
18
|
-
"SpeechToTextStreamingLanguageCode": ".types",
|
|
19
|
-
"SpeechToTextStreamingVadSignals": ".types",
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def __getattr__(attr_name: str) -> typing.Any:
|
|
24
|
-
module_name = _dynamic_imports.get(attr_name)
|
|
25
|
-
if module_name is None:
|
|
26
|
-
raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
|
|
27
|
-
try:
|
|
28
|
-
module = import_module(module_name, __package__)
|
|
29
|
-
if module_name == f".{attr_name}":
|
|
30
|
-
return module
|
|
31
|
-
else:
|
|
32
|
-
return getattr(module, attr_name)
|
|
33
|
-
except ImportError as e:
|
|
34
|
-
raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
|
|
35
|
-
except AttributeError as e:
|
|
36
|
-
raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def __dir__():
|
|
40
|
-
lazy_attrs = list(_dynamic_imports.keys())
|
|
41
|
-
return sorted(lazy_attrs)
|
|
42
|
-
|
|
5
|
+
from .types import (
|
|
6
|
+
SpeechToTextStreamingFlushSignal,
|
|
7
|
+
SpeechToTextStreamingHighVadSensitivity,
|
|
8
|
+
SpeechToTextStreamingLanguageCode,
|
|
9
|
+
SpeechToTextStreamingMode,
|
|
10
|
+
SpeechToTextStreamingModel,
|
|
11
|
+
SpeechToTextStreamingVadSignals,
|
|
12
|
+
)
|
|
43
13
|
|
|
44
14
|
__all__ = [
|
|
45
15
|
"SpeechToTextStreamingFlushSignal",
|
|
46
16
|
"SpeechToTextStreamingHighVadSensitivity",
|
|
47
17
|
"SpeechToTextStreamingLanguageCode",
|
|
18
|
+
"SpeechToTextStreamingMode",
|
|
19
|
+
"SpeechToTextStreamingModel",
|
|
48
20
|
"SpeechToTextStreamingVadSignals",
|
|
49
21
|
]
|
|
@@ -14,6 +14,8 @@ from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextS
|
|
|
14
14
|
from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
|
|
15
15
|
from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
16
16
|
from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
17
|
+
from .types.speech_to_text_streaming_mode import SpeechToTextStreamingMode
|
|
18
|
+
from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
|
|
17
19
|
from .types.speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
|
|
18
20
|
|
|
19
21
|
try:
|
|
@@ -42,7 +44,8 @@ class SpeechToTextStreamingClient:
|
|
|
42
44
|
self,
|
|
43
45
|
*,
|
|
44
46
|
language_code: SpeechToTextStreamingLanguageCode,
|
|
45
|
-
model: typing.Optional[
|
|
47
|
+
model: typing.Optional[SpeechToTextStreamingModel] = None,
|
|
48
|
+
mode: typing.Optional[SpeechToTextStreamingMode] = None,
|
|
46
49
|
sample_rate: typing.Optional[str] = None,
|
|
47
50
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
48
51
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
@@ -60,11 +63,20 @@ class SpeechToTextStreamingClient:
|
|
|
60
63
|
Parameters
|
|
61
64
|
----------
|
|
62
65
|
language_code : SpeechToTextStreamingLanguageCode
|
|
63
|
-
Language code for speech recognition
|
|
66
|
+
Language code for speech recognition (BCP-47 format)
|
|
64
67
|
|
|
65
|
-
model : typing.Optional[
|
|
68
|
+
model : typing.Optional[SpeechToTextStreamingModel]
|
|
66
69
|
Speech to text model to use
|
|
67
70
|
|
|
71
|
+
mode : typing.Optional[SpeechToTextStreamingMode]
|
|
72
|
+
Mode of operation for saaras:v3 model. Only applicable when model is 'saaras:v3'.
|
|
73
|
+
- transcribe: Standard Whisper transcription
|
|
74
|
+
- translate: Standard Whisper translation to English
|
|
75
|
+
- indic-en: Translate Indic languages to English
|
|
76
|
+
- verbatim: Exact transcription in original script
|
|
77
|
+
- translit: Transliteration to Latin script
|
|
78
|
+
- codemix: Code-mixed output (native + English)
|
|
79
|
+
|
|
68
80
|
sample_rate : typing.Optional[str]
|
|
69
81
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
70
82
|
|
|
@@ -93,6 +105,8 @@ class SpeechToTextStreamingClient:
|
|
|
93
105
|
query_params = query_params.add("language-code", language_code)
|
|
94
106
|
if model is not None:
|
|
95
107
|
query_params = query_params.add("model", model)
|
|
108
|
+
if mode is not None:
|
|
109
|
+
query_params = query_params.add("mode", mode)
|
|
96
110
|
if sample_rate is not None:
|
|
97
111
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
98
112
|
if high_vad_sensitivity is not None:
|
|
@@ -145,7 +159,8 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
145
159
|
self,
|
|
146
160
|
*,
|
|
147
161
|
language_code: SpeechToTextStreamingLanguageCode,
|
|
148
|
-
model: typing.Optional[
|
|
162
|
+
model: typing.Optional[SpeechToTextStreamingModel] = None,
|
|
163
|
+
mode: typing.Optional[SpeechToTextStreamingMode] = None,
|
|
149
164
|
sample_rate: typing.Optional[str] = None,
|
|
150
165
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
151
166
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
@@ -163,11 +178,20 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
163
178
|
Parameters
|
|
164
179
|
----------
|
|
165
180
|
language_code : SpeechToTextStreamingLanguageCode
|
|
166
|
-
Language code for speech recognition
|
|
181
|
+
Language code for speech recognition (BCP-47 format)
|
|
167
182
|
|
|
168
|
-
model : typing.Optional[
|
|
183
|
+
model : typing.Optional[SpeechToTextStreamingModel]
|
|
169
184
|
Speech to text model to use
|
|
170
185
|
|
|
186
|
+
mode : typing.Optional[SpeechToTextStreamingMode]
|
|
187
|
+
Mode of operation for saaras:v3 model. Only applicable when model is 'saaras:v3'.
|
|
188
|
+
- transcribe: Standard Whisper transcription
|
|
189
|
+
- translate: Standard Whisper translation to English
|
|
190
|
+
- indic-en: Translate Indic languages to English
|
|
191
|
+
- verbatim: Exact transcription in original script
|
|
192
|
+
- translit: Transliteration to Latin script
|
|
193
|
+
- codemix: Code-mixed output (native + English)
|
|
194
|
+
|
|
171
195
|
sample_rate : typing.Optional[str]
|
|
172
196
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
173
197
|
|
|
@@ -196,6 +220,8 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
196
220
|
query_params = query_params.add("language-code", language_code)
|
|
197
221
|
if model is not None:
|
|
198
222
|
query_params = query_params.add("model", model)
|
|
223
|
+
if mode is not None:
|
|
224
|
+
query_params = query_params.add("mode", mode)
|
|
199
225
|
if sample_rate is not None:
|
|
200
226
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
201
227
|
if high_vad_sensitivity is not None:
|
|
@@ -13,6 +13,8 @@ from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextS
|
|
|
13
13
|
from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
|
|
14
14
|
from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
15
15
|
from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
16
|
+
from .types.speech_to_text_streaming_mode import SpeechToTextStreamingMode
|
|
17
|
+
from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
|
|
16
18
|
from .types.speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
|
|
17
19
|
|
|
18
20
|
try:
|
|
@@ -30,7 +32,8 @@ class RawSpeechToTextStreamingClient:
|
|
|
30
32
|
self,
|
|
31
33
|
*,
|
|
32
34
|
language_code: SpeechToTextStreamingLanguageCode,
|
|
33
|
-
model: typing.Optional[
|
|
35
|
+
model: typing.Optional[SpeechToTextStreamingModel] = None,
|
|
36
|
+
mode: typing.Optional[SpeechToTextStreamingMode] = None,
|
|
34
37
|
sample_rate: typing.Optional[str] = None,
|
|
35
38
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
36
39
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
@@ -48,11 +51,20 @@ class RawSpeechToTextStreamingClient:
|
|
|
48
51
|
Parameters
|
|
49
52
|
----------
|
|
50
53
|
language_code : SpeechToTextStreamingLanguageCode
|
|
51
|
-
Language code for speech recognition
|
|
54
|
+
Language code for speech recognition (BCP-47 format)
|
|
52
55
|
|
|
53
|
-
model : typing.Optional[
|
|
56
|
+
model : typing.Optional[SpeechToTextStreamingModel]
|
|
54
57
|
Speech to text model to use
|
|
55
58
|
|
|
59
|
+
mode : typing.Optional[SpeechToTextStreamingMode]
|
|
60
|
+
Mode of operation for saaras:v3 model. Only applicable when model is 'saaras:v3'.
|
|
61
|
+
- transcribe: Standard Whisper transcription
|
|
62
|
+
- translate: Standard Whisper translation to English
|
|
63
|
+
- indic-en: Translate Indic languages to English
|
|
64
|
+
- verbatim: Exact transcription in original script
|
|
65
|
+
- translit: Transliteration to Latin script
|
|
66
|
+
- codemix: Code-mixed output (native + English)
|
|
67
|
+
|
|
56
68
|
sample_rate : typing.Optional[str]
|
|
57
69
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
58
70
|
|
|
@@ -81,6 +93,8 @@ class RawSpeechToTextStreamingClient:
|
|
|
81
93
|
query_params = query_params.add("language-code", language_code)
|
|
82
94
|
if model is not None:
|
|
83
95
|
query_params = query_params.add("model", model)
|
|
96
|
+
if mode is not None:
|
|
97
|
+
query_params = query_params.add("mode", mode)
|
|
84
98
|
if sample_rate is not None:
|
|
85
99
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
86
100
|
if high_vad_sensitivity is not None:
|
|
@@ -122,7 +136,8 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
122
136
|
self,
|
|
123
137
|
*,
|
|
124
138
|
language_code: SpeechToTextStreamingLanguageCode,
|
|
125
|
-
model: typing.Optional[
|
|
139
|
+
model: typing.Optional[SpeechToTextStreamingModel] = None,
|
|
140
|
+
mode: typing.Optional[SpeechToTextStreamingMode] = None,
|
|
126
141
|
sample_rate: typing.Optional[str] = None,
|
|
127
142
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
128
143
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
@@ -140,11 +155,20 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
140
155
|
Parameters
|
|
141
156
|
----------
|
|
142
157
|
language_code : SpeechToTextStreamingLanguageCode
|
|
143
|
-
Language code for speech recognition
|
|
158
|
+
Language code for speech recognition (BCP-47 format)
|
|
144
159
|
|
|
145
|
-
model : typing.Optional[
|
|
160
|
+
model : typing.Optional[SpeechToTextStreamingModel]
|
|
146
161
|
Speech to text model to use
|
|
147
162
|
|
|
163
|
+
mode : typing.Optional[SpeechToTextStreamingMode]
|
|
164
|
+
Mode of operation for saaras:v3 model. Only applicable when model is 'saaras:v3'.
|
|
165
|
+
- transcribe: Standard Whisper transcription
|
|
166
|
+
- translate: Standard Whisper translation to English
|
|
167
|
+
- indic-en: Translate Indic languages to English
|
|
168
|
+
- verbatim: Exact transcription in original script
|
|
169
|
+
- translit: Transliteration to Latin script
|
|
170
|
+
- codemix: Code-mixed output (native + English)
|
|
171
|
+
|
|
148
172
|
sample_rate : typing.Optional[str]
|
|
149
173
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
150
174
|
|
|
@@ -173,6 +197,8 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
173
197
|
query_params = query_params.add("language-code", language_code)
|
|
174
198
|
if model is not None:
|
|
175
199
|
query_params = query_params.add("model", model)
|
|
200
|
+
if mode is not None:
|
|
201
|
+
query_params = query_params.add("mode", mode)
|
|
176
202
|
if sample_rate is not None:
|
|
177
203
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
178
204
|
if high_vad_sensitivity is not None:
|
|
@@ -2,46 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
-
import
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
from .speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
12
|
-
from .speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
|
|
13
|
-
_dynamic_imports: typing.Dict[str, str] = {
|
|
14
|
-
"SpeechToTextStreamingFlushSignal": ".speech_to_text_streaming_flush_signal",
|
|
15
|
-
"SpeechToTextStreamingHighVadSensitivity": ".speech_to_text_streaming_high_vad_sensitivity",
|
|
16
|
-
"SpeechToTextStreamingLanguageCode": ".speech_to_text_streaming_language_code",
|
|
17
|
-
"SpeechToTextStreamingVadSignals": ".speech_to_text_streaming_vad_signals",
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def __getattr__(attr_name: str) -> typing.Any:
|
|
22
|
-
module_name = _dynamic_imports.get(attr_name)
|
|
23
|
-
if module_name is None:
|
|
24
|
-
raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
|
|
25
|
-
try:
|
|
26
|
-
module = import_module(module_name, __package__)
|
|
27
|
-
if module_name == f".{attr_name}":
|
|
28
|
-
return module
|
|
29
|
-
else:
|
|
30
|
-
return getattr(module, attr_name)
|
|
31
|
-
except ImportError as e:
|
|
32
|
-
raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
|
|
33
|
-
except AttributeError as e:
|
|
34
|
-
raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def __dir__():
|
|
38
|
-
lazy_attrs = list(_dynamic_imports.keys())
|
|
39
|
-
return sorted(lazy_attrs)
|
|
40
|
-
|
|
5
|
+
from .speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
|
|
6
|
+
from .speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
7
|
+
from .speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
8
|
+
from .speech_to_text_streaming_mode import SpeechToTextStreamingMode
|
|
9
|
+
from .speech_to_text_streaming_model import SpeechToTextStreamingModel
|
|
10
|
+
from .speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
|
|
41
11
|
|
|
42
12
|
__all__ = [
|
|
43
13
|
"SpeechToTextStreamingFlushSignal",
|
|
44
14
|
"SpeechToTextStreamingHighVadSensitivity",
|
|
45
15
|
"SpeechToTextStreamingLanguageCode",
|
|
16
|
+
"SpeechToTextStreamingMode",
|
|
17
|
+
"SpeechToTextStreamingModel",
|
|
46
18
|
"SpeechToTextStreamingVadSignals",
|
|
47
19
|
]
|