sarvamai 0.1.22a3__py3-none-any.whl → 0.1.23a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +405 -206
- sarvamai/chat/raw_client.py +20 -20
- sarvamai/client.py +186 -34
- sarvamai/core/__init__.py +76 -21
- sarvamai/core/client_wrapper.py +19 -3
- sarvamai/core/force_multipart.py +4 -2
- sarvamai/core/http_client.py +217 -97
- sarvamai/core/http_response.py +1 -1
- sarvamai/core/http_sse/__init__.py +42 -0
- sarvamai/core/http_sse/_api.py +112 -0
- sarvamai/core/http_sse/_decoders.py +61 -0
- sarvamai/core/http_sse/_exceptions.py +7 -0
- sarvamai/core/http_sse/_models.py +17 -0
- sarvamai/core/jsonable_encoder.py +8 -0
- sarvamai/core/pydantic_utilities.py +110 -4
- sarvamai/errors/__init__.py +40 -6
- sarvamai/errors/bad_request_error.py +1 -1
- sarvamai/errors/forbidden_error.py +1 -1
- sarvamai/errors/internal_server_error.py +1 -1
- sarvamai/errors/service_unavailable_error.py +1 -1
- sarvamai/errors/too_many_requests_error.py +1 -1
- sarvamai/errors/unprocessable_entity_error.py +1 -1
- sarvamai/requests/__init__.py +150 -62
- sarvamai/requests/audio_data.py +0 -6
- sarvamai/requests/error_response_data.py +1 -1
- sarvamai/requests/file_signed_url_details.py +1 -1
- sarvamai/requests/speech_to_text_transcription_data.py +2 -8
- sarvamai/requests/speech_to_text_translate_transcription_data.py +0 -6
- sarvamai/speech_to_text/raw_client.py +54 -52
- sarvamai/speech_to_text_job/job.py +100 -2
- sarvamai/speech_to_text_job/raw_client.py +134 -130
- sarvamai/speech_to_text_streaming/__init__.py +38 -10
- sarvamai/speech_to_text_streaming/client.py +0 -44
- sarvamai/speech_to_text_streaming/raw_client.py +0 -44
- sarvamai/speech_to_text_streaming/types/__init__.py +36 -8
- sarvamai/speech_to_text_translate_job/job.py +100 -2
- sarvamai/speech_to_text_translate_job/raw_client.py +134 -130
- sarvamai/speech_to_text_translate_streaming/__init__.py +36 -9
- sarvamai/speech_to_text_translate_streaming/client.py +0 -44
- sarvamai/speech_to_text_translate_streaming/raw_client.py +0 -44
- sarvamai/speech_to_text_translate_streaming/types/__init__.py +36 -9
- sarvamai/text/client.py +0 -12
- sarvamai/text/raw_client.py +60 -72
- sarvamai/text_to_speech/client.py +18 -0
- sarvamai/text_to_speech/raw_client.py +38 -20
- sarvamai/text_to_speech_streaming/__init__.py +28 -1
- sarvamai/text_to_speech_streaming/types/__init__.py +30 -1
- sarvamai/types/__init__.py +222 -102
- sarvamai/types/audio_data.py +0 -6
- sarvamai/types/chat_completion_request_message.py +6 -2
- sarvamai/types/completion_event_flag.py +3 -1
- sarvamai/types/error_response_data.py +1 -1
- sarvamai/types/file_signed_url_details.py +1 -1
- sarvamai/types/speech_to_text_transcription_data.py +2 -8
- sarvamai/types/speech_to_text_translate_transcription_data.py +0 -6
- {sarvamai-0.1.22a3.dist-info → sarvamai-0.1.23a1.dist-info}/METADATA +2 -1
- {sarvamai-0.1.22a3.dist-info → sarvamai-0.1.23a1.dist-info}/RECORD +58 -59
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_input_audio_codec.py +0 -33
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_stream_ongoing_speech_results.py +0 -5
- sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_input_audio_codec.py +0 -33
- sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_stream_ongoing_speech_results.py +0 -5
- sarvamai/types/audio_data_input_audio_codec.py +0 -33
- sarvamai/types/response_speech_state.py +0 -7
- {sarvamai-0.1.22a3.dist-info → sarvamai-0.1.23a1.dist-info}/WHEEL +0 -0
|
@@ -2,18 +2,45 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
5
|
+
import typing
|
|
6
|
+
from importlib import import_module
|
|
7
|
+
|
|
8
|
+
if typing.TYPE_CHECKING:
|
|
9
|
+
from .types import (
|
|
10
|
+
SpeechToTextTranslateStreamingFlushSignal,
|
|
11
|
+
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
12
|
+
SpeechToTextTranslateStreamingVadSignals,
|
|
13
|
+
)
|
|
14
|
+
_dynamic_imports: typing.Dict[str, str] = {
|
|
15
|
+
"SpeechToTextTranslateStreamingFlushSignal": ".types",
|
|
16
|
+
"SpeechToTextTranslateStreamingHighVadSensitivity": ".types",
|
|
17
|
+
"SpeechToTextTranslateStreamingVadSignals": ".types",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __getattr__(attr_name: str) -> typing.Any:
|
|
22
|
+
module_name = _dynamic_imports.get(attr_name)
|
|
23
|
+
if module_name is None:
|
|
24
|
+
raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
|
|
25
|
+
try:
|
|
26
|
+
module = import_module(module_name, __package__)
|
|
27
|
+
if module_name == f".{attr_name}":
|
|
28
|
+
return module
|
|
29
|
+
else:
|
|
30
|
+
return getattr(module, attr_name)
|
|
31
|
+
except ImportError as e:
|
|
32
|
+
raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
|
|
33
|
+
except AttributeError as e:
|
|
34
|
+
raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def __dir__():
|
|
38
|
+
lazy_attrs = list(_dynamic_imports.keys())
|
|
39
|
+
return sorted(lazy_attrs)
|
|
40
|
+
|
|
12
41
|
|
|
13
42
|
__all__ = [
|
|
14
43
|
"SpeechToTextTranslateStreamingFlushSignal",
|
|
15
44
|
"SpeechToTextTranslateStreamingHighVadSensitivity",
|
|
16
|
-
"SpeechToTextTranslateStreamingInputAudioCodec",
|
|
17
|
-
"SpeechToTextTranslateStreamingStreamOngoingSpeechResults",
|
|
18
45
|
"SpeechToTextTranslateStreamingVadSignals",
|
|
19
46
|
]
|
|
@@ -15,10 +15,6 @@ from .types.speech_to_text_translate_streaming_flush_signal import SpeechToTextT
|
|
|
15
15
|
from .types.speech_to_text_translate_streaming_high_vad_sensitivity import (
|
|
16
16
|
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
17
17
|
)
|
|
18
|
-
from .types.speech_to_text_translate_streaming_input_audio_codec import SpeechToTextTranslateStreamingInputAudioCodec
|
|
19
|
-
from .types.speech_to_text_translate_streaming_stream_ongoing_speech_results import (
|
|
20
|
-
SpeechToTextTranslateStreamingStreamOngoingSpeechResults,
|
|
21
|
-
)
|
|
22
18
|
from .types.speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
|
|
23
19
|
|
|
24
20
|
try:
|
|
@@ -47,13 +43,10 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
47
43
|
self,
|
|
48
44
|
*,
|
|
49
45
|
model: typing.Optional[typing.Literal["saaras:v2.5"]] = None,
|
|
50
|
-
input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
|
|
51
46
|
sample_rate: typing.Optional[str] = None,
|
|
52
47
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
53
48
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
54
49
|
flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
|
|
55
|
-
stream_ongoing_speech_results: typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults] = None,
|
|
56
|
-
streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
|
|
57
50
|
api_subscription_key: typing.Optional[str] = None,
|
|
58
51
|
request_options: typing.Optional[RequestOptions] = None,
|
|
59
52
|
) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -69,9 +62,6 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
69
62
|
model : typing.Optional[typing.Literal["saaras:v2.5"]]
|
|
70
63
|
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
71
64
|
|
|
72
|
-
input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
|
|
73
|
-
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
|
|
74
|
-
|
|
75
65
|
sample_rate : typing.Optional[str]
|
|
76
66
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
77
67
|
|
|
@@ -84,12 +74,6 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
84
74
|
flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
|
|
85
75
|
Signal to flush the audio buffer and finalize transcription and translation
|
|
86
76
|
|
|
87
|
-
stream_ongoing_speech_results : typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults]
|
|
88
|
-
Enable streaming of ongoing speech results during active speech
|
|
89
|
-
|
|
90
|
-
streaming_ongoing_requests_frame_size : typing.Optional[str]
|
|
91
|
-
Frame size for streaming ongoing speech results (1-100)
|
|
92
|
-
|
|
93
77
|
api_subscription_key : typing.Optional[str]
|
|
94
78
|
API subscription key for authentication
|
|
95
79
|
|
|
@@ -104,8 +88,6 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
104
88
|
query_params = httpx.QueryParams()
|
|
105
89
|
if model is not None:
|
|
106
90
|
query_params = query_params.add("model", model)
|
|
107
|
-
if input_audio_codec is not None:
|
|
108
|
-
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
109
91
|
if sample_rate is not None:
|
|
110
92
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
111
93
|
if high_vad_sensitivity is not None:
|
|
@@ -114,12 +96,6 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
114
96
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
115
97
|
if flush_signal is not None:
|
|
116
98
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
117
|
-
if stream_ongoing_speech_results is not None:
|
|
118
|
-
query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
|
|
119
|
-
if streaming_ongoing_requests_frame_size is not None:
|
|
120
|
-
query_params = query_params.add(
|
|
121
|
-
"streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
|
|
122
|
-
)
|
|
123
99
|
ws_url = ws_url + f"?{query_params}"
|
|
124
100
|
headers = self._raw_client._client_wrapper.get_headers()
|
|
125
101
|
if api_subscription_key is not None:
|
|
@@ -164,13 +140,10 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
164
140
|
self,
|
|
165
141
|
*,
|
|
166
142
|
model: typing.Optional[typing.Literal["saaras:v2.5"]] = None,
|
|
167
|
-
input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
|
|
168
143
|
sample_rate: typing.Optional[str] = None,
|
|
169
144
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
170
145
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
171
146
|
flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
|
|
172
|
-
stream_ongoing_speech_results: typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults] = None,
|
|
173
|
-
streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
|
|
174
147
|
api_subscription_key: typing.Optional[str] = None,
|
|
175
148
|
request_options: typing.Optional[RequestOptions] = None,
|
|
176
149
|
) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -186,9 +159,6 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
186
159
|
model : typing.Optional[typing.Literal["saaras:v2.5"]]
|
|
187
160
|
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
188
161
|
|
|
189
|
-
input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
|
|
190
|
-
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
|
|
191
|
-
|
|
192
162
|
sample_rate : typing.Optional[str]
|
|
193
163
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
194
164
|
|
|
@@ -201,12 +171,6 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
201
171
|
flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
|
|
202
172
|
Signal to flush the audio buffer and finalize transcription and translation
|
|
203
173
|
|
|
204
|
-
stream_ongoing_speech_results : typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults]
|
|
205
|
-
Enable streaming of ongoing speech results during active speech
|
|
206
|
-
|
|
207
|
-
streaming_ongoing_requests_frame_size : typing.Optional[str]
|
|
208
|
-
Frame size for streaming ongoing speech results (1-100)
|
|
209
|
-
|
|
210
174
|
api_subscription_key : typing.Optional[str]
|
|
211
175
|
API subscription key for authentication
|
|
212
176
|
|
|
@@ -221,8 +185,6 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
221
185
|
query_params = httpx.QueryParams()
|
|
222
186
|
if model is not None:
|
|
223
187
|
query_params = query_params.add("model", model)
|
|
224
|
-
if input_audio_codec is not None:
|
|
225
|
-
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
226
188
|
if sample_rate is not None:
|
|
227
189
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
228
190
|
if high_vad_sensitivity is not None:
|
|
@@ -231,12 +193,6 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
231
193
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
232
194
|
if flush_signal is not None:
|
|
233
195
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
234
|
-
if stream_ongoing_speech_results is not None:
|
|
235
|
-
query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
|
|
236
|
-
if streaming_ongoing_requests_frame_size is not None:
|
|
237
|
-
query_params = query_params.add(
|
|
238
|
-
"streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
|
|
239
|
-
)
|
|
240
196
|
ws_url = ws_url + f"?{query_params}"
|
|
241
197
|
headers = self._raw_client._client_wrapper.get_headers()
|
|
242
198
|
if api_subscription_key is not None:
|
|
@@ -14,10 +14,6 @@ from .types.speech_to_text_translate_streaming_flush_signal import SpeechToTextT
|
|
|
14
14
|
from .types.speech_to_text_translate_streaming_high_vad_sensitivity import (
|
|
15
15
|
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
16
16
|
)
|
|
17
|
-
from .types.speech_to_text_translate_streaming_input_audio_codec import SpeechToTextTranslateStreamingInputAudioCodec
|
|
18
|
-
from .types.speech_to_text_translate_streaming_stream_ongoing_speech_results import (
|
|
19
|
-
SpeechToTextTranslateStreamingStreamOngoingSpeechResults,
|
|
20
|
-
)
|
|
21
17
|
from .types.speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
|
|
22
18
|
|
|
23
19
|
try:
|
|
@@ -35,13 +31,10 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
35
31
|
self,
|
|
36
32
|
*,
|
|
37
33
|
model: typing.Optional[typing.Literal["saaras:v2.5"]] = None,
|
|
38
|
-
input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
|
|
39
34
|
sample_rate: typing.Optional[str] = None,
|
|
40
35
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
41
36
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
42
37
|
flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
|
|
43
|
-
stream_ongoing_speech_results: typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults] = None,
|
|
44
|
-
streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
|
|
45
38
|
api_subscription_key: typing.Optional[str] = None,
|
|
46
39
|
request_options: typing.Optional[RequestOptions] = None,
|
|
47
40
|
) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -57,9 +50,6 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
57
50
|
model : typing.Optional[typing.Literal["saaras:v2.5"]]
|
|
58
51
|
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
59
52
|
|
|
60
|
-
input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
|
|
61
|
-
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
|
|
62
|
-
|
|
63
53
|
sample_rate : typing.Optional[str]
|
|
64
54
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
65
55
|
|
|
@@ -72,12 +62,6 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
72
62
|
flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
|
|
73
63
|
Signal to flush the audio buffer and finalize transcription and translation
|
|
74
64
|
|
|
75
|
-
stream_ongoing_speech_results : typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults]
|
|
76
|
-
Enable streaming of ongoing speech results during active speech
|
|
77
|
-
|
|
78
|
-
streaming_ongoing_requests_frame_size : typing.Optional[str]
|
|
79
|
-
Frame size for streaming ongoing speech results (1-100)
|
|
80
|
-
|
|
81
65
|
api_subscription_key : typing.Optional[str]
|
|
82
66
|
API subscription key for authentication
|
|
83
67
|
|
|
@@ -92,8 +76,6 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
92
76
|
query_params = httpx.QueryParams()
|
|
93
77
|
if model is not None:
|
|
94
78
|
query_params = query_params.add("model", model)
|
|
95
|
-
if input_audio_codec is not None:
|
|
96
|
-
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
97
79
|
if sample_rate is not None:
|
|
98
80
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
99
81
|
if high_vad_sensitivity is not None:
|
|
@@ -102,12 +84,6 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
102
84
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
103
85
|
if flush_signal is not None:
|
|
104
86
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
105
|
-
if stream_ongoing_speech_results is not None:
|
|
106
|
-
query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
|
|
107
|
-
if streaming_ongoing_requests_frame_size is not None:
|
|
108
|
-
query_params = query_params.add(
|
|
109
|
-
"streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
|
|
110
|
-
)
|
|
111
87
|
ws_url = ws_url + f"?{query_params}"
|
|
112
88
|
headers = self._client_wrapper.get_headers()
|
|
113
89
|
if api_subscription_key is not None:
|
|
@@ -141,13 +117,10 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
141
117
|
self,
|
|
142
118
|
*,
|
|
143
119
|
model: typing.Optional[typing.Literal["saaras:v2.5"]] = None,
|
|
144
|
-
input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
|
|
145
120
|
sample_rate: typing.Optional[str] = None,
|
|
146
121
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
147
122
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
148
123
|
flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
|
|
149
|
-
stream_ongoing_speech_results: typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults] = None,
|
|
150
|
-
streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
|
|
151
124
|
api_subscription_key: typing.Optional[str] = None,
|
|
152
125
|
request_options: typing.Optional[RequestOptions] = None,
|
|
153
126
|
) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -163,9 +136,6 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
163
136
|
model : typing.Optional[typing.Literal["saaras:v2.5"]]
|
|
164
137
|
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
165
138
|
|
|
166
|
-
input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
|
|
167
|
-
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
|
|
168
|
-
|
|
169
139
|
sample_rate : typing.Optional[str]
|
|
170
140
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
171
141
|
|
|
@@ -178,12 +148,6 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
178
148
|
flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
|
|
179
149
|
Signal to flush the audio buffer and finalize transcription and translation
|
|
180
150
|
|
|
181
|
-
stream_ongoing_speech_results : typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults]
|
|
182
|
-
Enable streaming of ongoing speech results during active speech
|
|
183
|
-
|
|
184
|
-
streaming_ongoing_requests_frame_size : typing.Optional[str]
|
|
185
|
-
Frame size for streaming ongoing speech results (1-100)
|
|
186
|
-
|
|
187
151
|
api_subscription_key : typing.Optional[str]
|
|
188
152
|
API subscription key for authentication
|
|
189
153
|
|
|
@@ -198,8 +162,6 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
198
162
|
query_params = httpx.QueryParams()
|
|
199
163
|
if model is not None:
|
|
200
164
|
query_params = query_params.add("model", model)
|
|
201
|
-
if input_audio_codec is not None:
|
|
202
|
-
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
203
165
|
if sample_rate is not None:
|
|
204
166
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
205
167
|
if high_vad_sensitivity is not None:
|
|
@@ -208,12 +170,6 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
208
170
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
209
171
|
if flush_signal is not None:
|
|
210
172
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
211
|
-
if stream_ongoing_speech_results is not None:
|
|
212
|
-
query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
|
|
213
|
-
if streaming_ongoing_requests_frame_size is not None:
|
|
214
|
-
query_params = query_params.add(
|
|
215
|
-
"streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
|
|
216
|
-
)
|
|
217
173
|
ws_url = ws_url + f"?{query_params}"
|
|
218
174
|
headers = self._client_wrapper.get_headers()
|
|
219
175
|
if api_subscription_key is not None:
|
|
@@ -2,18 +2,45 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
5
|
+
import typing
|
|
6
|
+
from importlib import import_module
|
|
7
|
+
|
|
8
|
+
if typing.TYPE_CHECKING:
|
|
9
|
+
from .speech_to_text_translate_streaming_flush_signal import SpeechToTextTranslateStreamingFlushSignal
|
|
10
|
+
from .speech_to_text_translate_streaming_high_vad_sensitivity import (
|
|
11
|
+
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
12
|
+
)
|
|
13
|
+
from .speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
|
|
14
|
+
_dynamic_imports: typing.Dict[str, str] = {
|
|
15
|
+
"SpeechToTextTranslateStreamingFlushSignal": ".speech_to_text_translate_streaming_flush_signal",
|
|
16
|
+
"SpeechToTextTranslateStreamingHighVadSensitivity": ".speech_to_text_translate_streaming_high_vad_sensitivity",
|
|
17
|
+
"SpeechToTextTranslateStreamingVadSignals": ".speech_to_text_translate_streaming_vad_signals",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __getattr__(attr_name: str) -> typing.Any:
|
|
22
|
+
module_name = _dynamic_imports.get(attr_name)
|
|
23
|
+
if module_name is None:
|
|
24
|
+
raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
|
|
25
|
+
try:
|
|
26
|
+
module = import_module(module_name, __package__)
|
|
27
|
+
if module_name == f".{attr_name}":
|
|
28
|
+
return module
|
|
29
|
+
else:
|
|
30
|
+
return getattr(module, attr_name)
|
|
31
|
+
except ImportError as e:
|
|
32
|
+
raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
|
|
33
|
+
except AttributeError as e:
|
|
34
|
+
raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def __dir__():
|
|
38
|
+
lazy_attrs = list(_dynamic_imports.keys())
|
|
39
|
+
return sorted(lazy_attrs)
|
|
40
|
+
|
|
12
41
|
|
|
13
42
|
__all__ = [
|
|
14
43
|
"SpeechToTextTranslateStreamingFlushSignal",
|
|
15
44
|
"SpeechToTextTranslateStreamingHighVadSensitivity",
|
|
16
|
-
"SpeechToTextTranslateStreamingInputAudioCodec",
|
|
17
|
-
"SpeechToTextTranslateStreamingStreamOngoingSpeechResults",
|
|
18
45
|
"SpeechToTextTranslateStreamingVadSignals",
|
|
19
46
|
]
|
sarvamai/text/client.py
CHANGED
|
@@ -47,7 +47,6 @@ class TextClient:
|
|
|
47
47
|
speaker_gender: typing.Optional[TranslateSpeakerGender] = OMIT,
|
|
48
48
|
mode: typing.Optional[TranslateMode] = OMIT,
|
|
49
49
|
model: typing.Optional[TranslateModel] = OMIT,
|
|
50
|
-
enable_preprocessing: typing.Optional[bool] = OMIT,
|
|
51
50
|
output_script: typing.Optional[TransliterateMode] = OMIT,
|
|
52
51
|
numerals_format: typing.Optional[NumeralsFormat] = OMIT,
|
|
53
52
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -125,10 +124,6 @@ class TextClient:
|
|
|
125
124
|
- mayura:v1: Supports 12 languages with all modes, output scripts, and automatic language detection.
|
|
126
125
|
- sarvam-translate:v1: Supports all 22 scheduled languages of India, formal mode only.
|
|
127
126
|
|
|
128
|
-
enable_preprocessing : typing.Optional[bool]
|
|
129
|
-
This will enable custom preprocessing of the input text which can result in better translations.
|
|
130
|
-
Recommendation- You can switch on whenever there is some complex text with difficult vocabulary and sentences, for which you want simple translations that people can understand.
|
|
131
|
-
|
|
132
127
|
output_script : typing.Optional[TransliterateMode]
|
|
133
128
|
**output_script**: This is an optional parameter which controls the transliteration style applied to the output text.
|
|
134
129
|
|
|
@@ -186,7 +181,6 @@ class TextClient:
|
|
|
186
181
|
speaker_gender=speaker_gender,
|
|
187
182
|
mode=mode,
|
|
188
183
|
model=model,
|
|
189
|
-
enable_preprocessing=enable_preprocessing,
|
|
190
184
|
output_script=output_script,
|
|
191
185
|
numerals_format=numerals_format,
|
|
192
186
|
request_options=request_options,
|
|
@@ -371,7 +365,6 @@ class AsyncTextClient:
|
|
|
371
365
|
speaker_gender: typing.Optional[TranslateSpeakerGender] = OMIT,
|
|
372
366
|
mode: typing.Optional[TranslateMode] = OMIT,
|
|
373
367
|
model: typing.Optional[TranslateModel] = OMIT,
|
|
374
|
-
enable_preprocessing: typing.Optional[bool] = OMIT,
|
|
375
368
|
output_script: typing.Optional[TransliterateMode] = OMIT,
|
|
376
369
|
numerals_format: typing.Optional[NumeralsFormat] = OMIT,
|
|
377
370
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -449,10 +442,6 @@ class AsyncTextClient:
|
|
|
449
442
|
- mayura:v1: Supports 12 languages with all modes, output scripts, and automatic language detection.
|
|
450
443
|
- sarvam-translate:v1: Supports all 22 scheduled languages of India, formal mode only.
|
|
451
444
|
|
|
452
|
-
enable_preprocessing : typing.Optional[bool]
|
|
453
|
-
This will enable custom preprocessing of the input text which can result in better translations.
|
|
454
|
-
Recommendation- You can switch on whenever there is some complex text with difficult vocabulary and sentences, for which you want simple translations that people can understand.
|
|
455
|
-
|
|
456
445
|
output_script : typing.Optional[TransliterateMode]
|
|
457
446
|
**output_script**: This is an optional parameter which controls the transliteration style applied to the output text.
|
|
458
447
|
|
|
@@ -518,7 +507,6 @@ class AsyncTextClient:
|
|
|
518
507
|
speaker_gender=speaker_gender,
|
|
519
508
|
mode=mode,
|
|
520
509
|
model=model,
|
|
521
|
-
enable_preprocessing=enable_preprocessing,
|
|
522
510
|
output_script=output_script,
|
|
523
511
|
numerals_format=numerals_format,
|
|
524
512
|
request_options=request_options,
|