sarvamai 0.1.22a3__py3-none-any.whl → 0.1.23a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +405 -206
- sarvamai/chat/raw_client.py +20 -20
- sarvamai/client.py +186 -34
- sarvamai/core/__init__.py +76 -21
- sarvamai/core/client_wrapper.py +19 -3
- sarvamai/core/force_multipart.py +4 -2
- sarvamai/core/http_client.py +217 -97
- sarvamai/core/http_response.py +1 -1
- sarvamai/core/http_sse/__init__.py +42 -0
- sarvamai/core/http_sse/_api.py +112 -0
- sarvamai/core/http_sse/_decoders.py +61 -0
- sarvamai/core/http_sse/_exceptions.py +7 -0
- sarvamai/core/http_sse/_models.py +17 -0
- sarvamai/core/jsonable_encoder.py +8 -0
- sarvamai/core/pydantic_utilities.py +110 -4
- sarvamai/errors/__init__.py +40 -6
- sarvamai/errors/bad_request_error.py +1 -1
- sarvamai/errors/forbidden_error.py +1 -1
- sarvamai/errors/internal_server_error.py +1 -1
- sarvamai/errors/service_unavailable_error.py +1 -1
- sarvamai/errors/too_many_requests_error.py +1 -1
- sarvamai/errors/unprocessable_entity_error.py +1 -1
- sarvamai/requests/__init__.py +150 -62
- sarvamai/requests/audio_data.py +0 -6
- sarvamai/requests/error_response_data.py +1 -1
- sarvamai/requests/file_signed_url_details.py +1 -1
- sarvamai/requests/speech_to_text_transcription_data.py +2 -8
- sarvamai/requests/speech_to_text_translate_transcription_data.py +0 -6
- sarvamai/speech_to_text/raw_client.py +54 -52
- sarvamai/speech_to_text_job/job.py +100 -2
- sarvamai/speech_to_text_job/raw_client.py +134 -130
- sarvamai/speech_to_text_streaming/__init__.py +38 -10
- sarvamai/speech_to_text_streaming/client.py +0 -44
- sarvamai/speech_to_text_streaming/raw_client.py +0 -44
- sarvamai/speech_to_text_streaming/types/__init__.py +36 -8
- sarvamai/speech_to_text_translate_job/job.py +100 -2
- sarvamai/speech_to_text_translate_job/raw_client.py +134 -130
- sarvamai/speech_to_text_translate_streaming/__init__.py +36 -9
- sarvamai/speech_to_text_translate_streaming/client.py +0 -44
- sarvamai/speech_to_text_translate_streaming/raw_client.py +0 -44
- sarvamai/speech_to_text_translate_streaming/types/__init__.py +36 -9
- sarvamai/text/client.py +0 -12
- sarvamai/text/raw_client.py +60 -72
- sarvamai/text_to_speech/client.py +18 -0
- sarvamai/text_to_speech/raw_client.py +38 -20
- sarvamai/text_to_speech_streaming/__init__.py +28 -1
- sarvamai/text_to_speech_streaming/types/__init__.py +30 -1
- sarvamai/types/__init__.py +222 -102
- sarvamai/types/audio_data.py +0 -6
- sarvamai/types/chat_completion_request_message.py +6 -2
- sarvamai/types/completion_event_flag.py +3 -1
- sarvamai/types/error_response_data.py +1 -1
- sarvamai/types/file_signed_url_details.py +1 -1
- sarvamai/types/speech_to_text_transcription_data.py +2 -8
- sarvamai/types/speech_to_text_translate_transcription_data.py +0 -6
- {sarvamai-0.1.22a3.dist-info → sarvamai-0.1.23a1.dist-info}/METADATA +2 -1
- {sarvamai-0.1.22a3.dist-info → sarvamai-0.1.23a1.dist-info}/RECORD +58 -59
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_input_audio_codec.py +0 -33
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_stream_ongoing_speech_results.py +0 -5
- sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_input_audio_codec.py +0 -33
- sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_stream_ongoing_speech_results.py +0 -5
- sarvamai/types/audio_data_input_audio_codec.py +0 -33
- sarvamai/types/response_speech_state.py +0 -7
- {sarvamai-0.1.22a3.dist-info → sarvamai-0.1.23a1.dist-info}/WHEEL +0 -0
|
@@ -2,20 +2,48 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
5
|
+
import typing
|
|
6
|
+
from importlib import import_module
|
|
7
|
+
|
|
8
|
+
if typing.TYPE_CHECKING:
|
|
9
|
+
from .types import (
|
|
10
|
+
SpeechToTextStreamingFlushSignal,
|
|
11
|
+
SpeechToTextStreamingHighVadSensitivity,
|
|
12
|
+
SpeechToTextStreamingLanguageCode,
|
|
13
|
+
SpeechToTextStreamingVadSignals,
|
|
14
|
+
)
|
|
15
|
+
_dynamic_imports: typing.Dict[str, str] = {
|
|
16
|
+
"SpeechToTextStreamingFlushSignal": ".types",
|
|
17
|
+
"SpeechToTextStreamingHighVadSensitivity": ".types",
|
|
18
|
+
"SpeechToTextStreamingLanguageCode": ".types",
|
|
19
|
+
"SpeechToTextStreamingVadSignals": ".types",
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def __getattr__(attr_name: str) -> typing.Any:
|
|
24
|
+
module_name = _dynamic_imports.get(attr_name)
|
|
25
|
+
if module_name is None:
|
|
26
|
+
raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
|
|
27
|
+
try:
|
|
28
|
+
module = import_module(module_name, __package__)
|
|
29
|
+
if module_name == f".{attr_name}":
|
|
30
|
+
return module
|
|
31
|
+
else:
|
|
32
|
+
return getattr(module, attr_name)
|
|
33
|
+
except ImportError as e:
|
|
34
|
+
raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
|
|
35
|
+
except AttributeError as e:
|
|
36
|
+
raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def __dir__():
|
|
40
|
+
lazy_attrs = list(_dynamic_imports.keys())
|
|
41
|
+
return sorted(lazy_attrs)
|
|
42
|
+
|
|
13
43
|
|
|
14
44
|
__all__ = [
|
|
15
45
|
"SpeechToTextStreamingFlushSignal",
|
|
16
46
|
"SpeechToTextStreamingHighVadSensitivity",
|
|
17
|
-
"SpeechToTextStreamingInputAudioCodec",
|
|
18
47
|
"SpeechToTextStreamingLanguageCode",
|
|
19
|
-
"SpeechToTextStreamingStreamOngoingSpeechResults",
|
|
20
48
|
"SpeechToTextStreamingVadSignals",
|
|
21
49
|
]
|
|
@@ -13,11 +13,7 @@ from .raw_client import AsyncRawSpeechToTextStreamingClient, RawSpeechToTextStre
|
|
|
13
13
|
from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
|
|
14
14
|
from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
|
|
15
15
|
from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
16
|
-
from .types.speech_to_text_streaming_input_audio_codec import SpeechToTextStreamingInputAudioCodec
|
|
17
16
|
from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
18
|
-
from .types.speech_to_text_streaming_stream_ongoing_speech_results import (
|
|
19
|
-
SpeechToTextStreamingStreamOngoingSpeechResults,
|
|
20
|
-
)
|
|
21
17
|
from .types.speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
|
|
22
18
|
|
|
23
19
|
try:
|
|
@@ -47,13 +43,10 @@ class SpeechToTextStreamingClient:
|
|
|
47
43
|
*,
|
|
48
44
|
language_code: SpeechToTextStreamingLanguageCode,
|
|
49
45
|
model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
|
|
50
|
-
input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
|
|
51
46
|
sample_rate: typing.Optional[str] = None,
|
|
52
47
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
53
48
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
54
49
|
flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
|
|
55
|
-
stream_ongoing_speech_results: typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults] = None,
|
|
56
|
-
streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
|
|
57
50
|
api_subscription_key: typing.Optional[str] = None,
|
|
58
51
|
request_options: typing.Optional[RequestOptions] = None,
|
|
59
52
|
) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
|
|
@@ -72,9 +65,6 @@ class SpeechToTextStreamingClient:
|
|
|
72
65
|
model : typing.Optional[typing.Literal["saarika:v2.5"]]
|
|
73
66
|
Speech to text model to use
|
|
74
67
|
|
|
75
|
-
input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
|
|
76
|
-
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
|
|
77
|
-
|
|
78
68
|
sample_rate : typing.Optional[str]
|
|
79
69
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
80
70
|
|
|
@@ -87,12 +77,6 @@ class SpeechToTextStreamingClient:
|
|
|
87
77
|
flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
|
|
88
78
|
Signal to flush the audio buffer and finalize transcription
|
|
89
79
|
|
|
90
|
-
stream_ongoing_speech_results : typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults]
|
|
91
|
-
Enable streaming of ongoing speech results during active speech
|
|
92
|
-
|
|
93
|
-
streaming_ongoing_requests_frame_size : typing.Optional[str]
|
|
94
|
-
Frame size for streaming ongoing speech results (1-100)
|
|
95
|
-
|
|
96
80
|
api_subscription_key : typing.Optional[str]
|
|
97
81
|
API subscription key for authentication
|
|
98
82
|
|
|
@@ -109,8 +93,6 @@ class SpeechToTextStreamingClient:
|
|
|
109
93
|
query_params = query_params.add("language-code", language_code)
|
|
110
94
|
if model is not None:
|
|
111
95
|
query_params = query_params.add("model", model)
|
|
112
|
-
if input_audio_codec is not None:
|
|
113
|
-
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
114
96
|
if sample_rate is not None:
|
|
115
97
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
116
98
|
if high_vad_sensitivity is not None:
|
|
@@ -119,12 +101,6 @@ class SpeechToTextStreamingClient:
|
|
|
119
101
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
120
102
|
if flush_signal is not None:
|
|
121
103
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
122
|
-
if stream_ongoing_speech_results is not None:
|
|
123
|
-
query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
|
|
124
|
-
if streaming_ongoing_requests_frame_size is not None:
|
|
125
|
-
query_params = query_params.add(
|
|
126
|
-
"streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
|
|
127
|
-
)
|
|
128
104
|
ws_url = ws_url + f"?{query_params}"
|
|
129
105
|
headers = self._raw_client._client_wrapper.get_headers()
|
|
130
106
|
if api_subscription_key is not None:
|
|
@@ -170,13 +146,10 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
170
146
|
*,
|
|
171
147
|
language_code: SpeechToTextStreamingLanguageCode,
|
|
172
148
|
model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
|
|
173
|
-
input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
|
|
174
149
|
sample_rate: typing.Optional[str] = None,
|
|
175
150
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
176
151
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
177
152
|
flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
|
|
178
|
-
stream_ongoing_speech_results: typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults] = None,
|
|
179
|
-
streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
|
|
180
153
|
api_subscription_key: typing.Optional[str] = None,
|
|
181
154
|
request_options: typing.Optional[RequestOptions] = None,
|
|
182
155
|
) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
|
|
@@ -195,9 +168,6 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
195
168
|
model : typing.Optional[typing.Literal["saarika:v2.5"]]
|
|
196
169
|
Speech to text model to use
|
|
197
170
|
|
|
198
|
-
input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
|
|
199
|
-
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
|
|
200
|
-
|
|
201
171
|
sample_rate : typing.Optional[str]
|
|
202
172
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
203
173
|
|
|
@@ -210,12 +180,6 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
210
180
|
flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
|
|
211
181
|
Signal to flush the audio buffer and finalize transcription
|
|
212
182
|
|
|
213
|
-
stream_ongoing_speech_results : typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults]
|
|
214
|
-
Enable streaming of ongoing speech results during active speech
|
|
215
|
-
|
|
216
|
-
streaming_ongoing_requests_frame_size : typing.Optional[str]
|
|
217
|
-
Frame size for streaming ongoing speech results (1-100)
|
|
218
|
-
|
|
219
183
|
api_subscription_key : typing.Optional[str]
|
|
220
184
|
API subscription key for authentication
|
|
221
185
|
|
|
@@ -232,8 +196,6 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
232
196
|
query_params = query_params.add("language-code", language_code)
|
|
233
197
|
if model is not None:
|
|
234
198
|
query_params = query_params.add("model", model)
|
|
235
|
-
if input_audio_codec is not None:
|
|
236
|
-
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
237
199
|
if sample_rate is not None:
|
|
238
200
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
239
201
|
if high_vad_sensitivity is not None:
|
|
@@ -242,12 +204,6 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
242
204
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
243
205
|
if flush_signal is not None:
|
|
244
206
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
245
|
-
if stream_ongoing_speech_results is not None:
|
|
246
|
-
query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
|
|
247
|
-
if streaming_ongoing_requests_frame_size is not None:
|
|
248
|
-
query_params = query_params.add(
|
|
249
|
-
"streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
|
|
250
|
-
)
|
|
251
207
|
ws_url = ws_url + f"?{query_params}"
|
|
252
208
|
headers = self._raw_client._client_wrapper.get_headers()
|
|
253
209
|
if api_subscription_key is not None:
|
|
@@ -12,11 +12,7 @@ from ..core.request_options import RequestOptions
|
|
|
12
12
|
from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
|
|
13
13
|
from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
|
|
14
14
|
from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
15
|
-
from .types.speech_to_text_streaming_input_audio_codec import SpeechToTextStreamingInputAudioCodec
|
|
16
15
|
from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
17
|
-
from .types.speech_to_text_streaming_stream_ongoing_speech_results import (
|
|
18
|
-
SpeechToTextStreamingStreamOngoingSpeechResults,
|
|
19
|
-
)
|
|
20
16
|
from .types.speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
|
|
21
17
|
|
|
22
18
|
try:
|
|
@@ -35,13 +31,10 @@ class RawSpeechToTextStreamingClient:
|
|
|
35
31
|
*,
|
|
36
32
|
language_code: SpeechToTextStreamingLanguageCode,
|
|
37
33
|
model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
|
|
38
|
-
input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
|
|
39
34
|
sample_rate: typing.Optional[str] = None,
|
|
40
35
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
41
36
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
42
37
|
flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
|
|
43
|
-
stream_ongoing_speech_results: typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults] = None,
|
|
44
|
-
streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
|
|
45
38
|
api_subscription_key: typing.Optional[str] = None,
|
|
46
39
|
request_options: typing.Optional[RequestOptions] = None,
|
|
47
40
|
) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
|
|
@@ -60,9 +53,6 @@ class RawSpeechToTextStreamingClient:
|
|
|
60
53
|
model : typing.Optional[typing.Literal["saarika:v2.5"]]
|
|
61
54
|
Speech to text model to use
|
|
62
55
|
|
|
63
|
-
input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
|
|
64
|
-
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
|
|
65
|
-
|
|
66
56
|
sample_rate : typing.Optional[str]
|
|
67
57
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
68
58
|
|
|
@@ -75,12 +65,6 @@ class RawSpeechToTextStreamingClient:
|
|
|
75
65
|
flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
|
|
76
66
|
Signal to flush the audio buffer and finalize transcription
|
|
77
67
|
|
|
78
|
-
stream_ongoing_speech_results : typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults]
|
|
79
|
-
Enable streaming of ongoing speech results during active speech
|
|
80
|
-
|
|
81
|
-
streaming_ongoing_requests_frame_size : typing.Optional[str]
|
|
82
|
-
Frame size for streaming ongoing speech results (1-100)
|
|
83
|
-
|
|
84
68
|
api_subscription_key : typing.Optional[str]
|
|
85
69
|
API subscription key for authentication
|
|
86
70
|
|
|
@@ -97,8 +81,6 @@ class RawSpeechToTextStreamingClient:
|
|
|
97
81
|
query_params = query_params.add("language-code", language_code)
|
|
98
82
|
if model is not None:
|
|
99
83
|
query_params = query_params.add("model", model)
|
|
100
|
-
if input_audio_codec is not None:
|
|
101
|
-
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
102
84
|
if sample_rate is not None:
|
|
103
85
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
104
86
|
if high_vad_sensitivity is not None:
|
|
@@ -107,12 +89,6 @@ class RawSpeechToTextStreamingClient:
|
|
|
107
89
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
108
90
|
if flush_signal is not None:
|
|
109
91
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
110
|
-
if stream_ongoing_speech_results is not None:
|
|
111
|
-
query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
|
|
112
|
-
if streaming_ongoing_requests_frame_size is not None:
|
|
113
|
-
query_params = query_params.add(
|
|
114
|
-
"streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
|
|
115
|
-
)
|
|
116
92
|
ws_url = ws_url + f"?{query_params}"
|
|
117
93
|
headers = self._client_wrapper.get_headers()
|
|
118
94
|
if api_subscription_key is not None:
|
|
@@ -147,13 +123,10 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
147
123
|
*,
|
|
148
124
|
language_code: SpeechToTextStreamingLanguageCode,
|
|
149
125
|
model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
|
|
150
|
-
input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
|
|
151
126
|
sample_rate: typing.Optional[str] = None,
|
|
152
127
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
153
128
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
154
129
|
flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
|
|
155
|
-
stream_ongoing_speech_results: typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults] = None,
|
|
156
|
-
streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
|
|
157
130
|
api_subscription_key: typing.Optional[str] = None,
|
|
158
131
|
request_options: typing.Optional[RequestOptions] = None,
|
|
159
132
|
) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
|
|
@@ -172,9 +145,6 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
172
145
|
model : typing.Optional[typing.Literal["saarika:v2.5"]]
|
|
173
146
|
Speech to text model to use
|
|
174
147
|
|
|
175
|
-
input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
|
|
176
|
-
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
|
|
177
|
-
|
|
178
148
|
sample_rate : typing.Optional[str]
|
|
179
149
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
180
150
|
|
|
@@ -187,12 +157,6 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
187
157
|
flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
|
|
188
158
|
Signal to flush the audio buffer and finalize transcription
|
|
189
159
|
|
|
190
|
-
stream_ongoing_speech_results : typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults]
|
|
191
|
-
Enable streaming of ongoing speech results during active speech
|
|
192
|
-
|
|
193
|
-
streaming_ongoing_requests_frame_size : typing.Optional[str]
|
|
194
|
-
Frame size for streaming ongoing speech results (1-100)
|
|
195
|
-
|
|
196
160
|
api_subscription_key : typing.Optional[str]
|
|
197
161
|
API subscription key for authentication
|
|
198
162
|
|
|
@@ -209,8 +173,6 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
209
173
|
query_params = query_params.add("language-code", language_code)
|
|
210
174
|
if model is not None:
|
|
211
175
|
query_params = query_params.add("model", model)
|
|
212
|
-
if input_audio_codec is not None:
|
|
213
|
-
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
214
176
|
if sample_rate is not None:
|
|
215
177
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
216
178
|
if high_vad_sensitivity is not None:
|
|
@@ -219,12 +181,6 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
219
181
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
220
182
|
if flush_signal is not None:
|
|
221
183
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
222
|
-
if stream_ongoing_speech_results is not None:
|
|
223
|
-
query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
|
|
224
|
-
if streaming_ongoing_requests_frame_size is not None:
|
|
225
|
-
query_params = query_params.add(
|
|
226
|
-
"streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
|
|
227
|
-
)
|
|
228
184
|
ws_url = ws_url + f"?{query_params}"
|
|
229
185
|
headers = self._client_wrapper.get_headers()
|
|
230
186
|
if api_subscription_key is not None:
|
|
@@ -2,18 +2,46 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
from .
|
|
10
|
-
from .
|
|
5
|
+
import typing
|
|
6
|
+
from importlib import import_module
|
|
7
|
+
|
|
8
|
+
if typing.TYPE_CHECKING:
|
|
9
|
+
from .speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
|
|
10
|
+
from .speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
11
|
+
from .speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
12
|
+
from .speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
|
|
13
|
+
_dynamic_imports: typing.Dict[str, str] = {
|
|
14
|
+
"SpeechToTextStreamingFlushSignal": ".speech_to_text_streaming_flush_signal",
|
|
15
|
+
"SpeechToTextStreamingHighVadSensitivity": ".speech_to_text_streaming_high_vad_sensitivity",
|
|
16
|
+
"SpeechToTextStreamingLanguageCode": ".speech_to_text_streaming_language_code",
|
|
17
|
+
"SpeechToTextStreamingVadSignals": ".speech_to_text_streaming_vad_signals",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __getattr__(attr_name: str) -> typing.Any:
|
|
22
|
+
module_name = _dynamic_imports.get(attr_name)
|
|
23
|
+
if module_name is None:
|
|
24
|
+
raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
|
|
25
|
+
try:
|
|
26
|
+
module = import_module(module_name, __package__)
|
|
27
|
+
if module_name == f".{attr_name}":
|
|
28
|
+
return module
|
|
29
|
+
else:
|
|
30
|
+
return getattr(module, attr_name)
|
|
31
|
+
except ImportError as e:
|
|
32
|
+
raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
|
|
33
|
+
except AttributeError as e:
|
|
34
|
+
raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def __dir__():
|
|
38
|
+
lazy_attrs = list(_dynamic_imports.keys())
|
|
39
|
+
return sorted(lazy_attrs)
|
|
40
|
+
|
|
11
41
|
|
|
12
42
|
__all__ = [
|
|
13
43
|
"SpeechToTextStreamingFlushSignal",
|
|
14
44
|
"SpeechToTextStreamingHighVadSensitivity",
|
|
15
|
-
"SpeechToTextStreamingInputAudioCodec",
|
|
16
45
|
"SpeechToTextStreamingLanguageCode",
|
|
17
|
-
"SpeechToTextStreamingStreamOngoingSpeechResults",
|
|
18
46
|
"SpeechToTextStreamingVadSignals",
|
|
19
47
|
]
|
|
@@ -150,9 +150,58 @@ class AsyncSpeechToTextTranslateJob:
|
|
|
150
150
|
"output_file": detail.outputs[0].file_name,
|
|
151
151
|
}
|
|
152
152
|
for detail in (job_status.job_details or [])
|
|
153
|
-
if detail.inputs and detail.outputs
|
|
153
|
+
if detail.inputs and detail.outputs and detail.state == "Success"
|
|
154
154
|
]
|
|
155
155
|
|
|
156
|
+
async def get_file_results(
|
|
157
|
+
self,
|
|
158
|
+
) -> typing.Dict[str, typing.List[typing.Dict[str, typing.Any]]]:
|
|
159
|
+
"""
|
|
160
|
+
Get detailed results for each file in the batch job.
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
Dict[str, List[Dict[str, Any]]]
|
|
165
|
+
Dictionary with 'successful' and 'failed' keys, each containing a list of file details.
|
|
166
|
+
Each file detail includes:
|
|
167
|
+
- 'file_name': Name of the input file
|
|
168
|
+
- 'status': Status of processing ('Success' or 'Failed')
|
|
169
|
+
- 'error_message': Error message if failed (None if successful)
|
|
170
|
+
- 'output_file': Name of output file if successful (None if failed)
|
|
171
|
+
"""
|
|
172
|
+
job_status = await self.get_status()
|
|
173
|
+
results: typing.Dict[str, typing.List[typing.Dict[str, typing.Any]]] = {
|
|
174
|
+
"successful": [],
|
|
175
|
+
"failed": [],
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
for detail in job_status.job_details or []:
|
|
179
|
+
# Check for empty lists explicitly
|
|
180
|
+
if not detail.inputs or len(detail.inputs) == 0:
|
|
181
|
+
continue
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
file_info = {
|
|
185
|
+
"file_name": detail.inputs[0].file_name,
|
|
186
|
+
"status": detail.state,
|
|
187
|
+
"error_message": detail.error_message,
|
|
188
|
+
"output_file": (
|
|
189
|
+
detail.outputs[0].file_name
|
|
190
|
+
if detail.outputs and len(detail.outputs) > 0
|
|
191
|
+
else None
|
|
192
|
+
),
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if detail.state == "Success":
|
|
196
|
+
results["successful"].append(file_info)
|
|
197
|
+
else:
|
|
198
|
+
results["failed"].append(file_info)
|
|
199
|
+
except (IndexError, AttributeError):
|
|
200
|
+
# Skip malformed job details
|
|
201
|
+
continue
|
|
202
|
+
|
|
203
|
+
return results
|
|
204
|
+
|
|
156
205
|
async def download_outputs(self, output_dir: str) -> bool:
|
|
157
206
|
"""
|
|
158
207
|
Download output files to the specified directory.
|
|
@@ -395,9 +444,58 @@ class SpeechToTextTranslateJob:
|
|
|
395
444
|
"output_file": detail.outputs[0].file_name,
|
|
396
445
|
}
|
|
397
446
|
for detail in (job_status.job_details or [])
|
|
398
|
-
if detail.inputs and detail.outputs
|
|
447
|
+
if detail.inputs and detail.outputs and detail.state == "Success"
|
|
399
448
|
]
|
|
400
449
|
|
|
450
|
+
def get_file_results(
|
|
451
|
+
self,
|
|
452
|
+
) -> typing.Dict[str, typing.List[typing.Dict[str, typing.Any]]]:
|
|
453
|
+
"""
|
|
454
|
+
Get detailed results for each file in the batch job.
|
|
455
|
+
|
|
456
|
+
Returns
|
|
457
|
+
-------
|
|
458
|
+
Dict[str, List[Dict[str, Any]]]
|
|
459
|
+
Dictionary with 'successful' and 'failed' keys, each containing a list of file details.
|
|
460
|
+
Each file detail includes:
|
|
461
|
+
- 'file_name': Name of the input file
|
|
462
|
+
- 'status': Status of processing ('Success' or 'Failed')
|
|
463
|
+
- 'error_message': Error message if failed (None if successful)
|
|
464
|
+
- 'output_file': Name of output file if successful (None if failed)
|
|
465
|
+
"""
|
|
466
|
+
job_status = self.get_status()
|
|
467
|
+
results: typing.Dict[str, typing.List[typing.Dict[str, typing.Any]]] = {
|
|
468
|
+
"successful": [],
|
|
469
|
+
"failed": [],
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
for detail in job_status.job_details or []:
|
|
473
|
+
# Check for empty lists explicitly
|
|
474
|
+
if not detail.inputs or len(detail.inputs) == 0:
|
|
475
|
+
continue
|
|
476
|
+
|
|
477
|
+
try:
|
|
478
|
+
file_info = {
|
|
479
|
+
"file_name": detail.inputs[0].file_name,
|
|
480
|
+
"status": detail.state,
|
|
481
|
+
"error_message": detail.error_message,
|
|
482
|
+
"output_file": (
|
|
483
|
+
detail.outputs[0].file_name
|
|
484
|
+
if detail.outputs and len(detail.outputs) > 0
|
|
485
|
+
else None
|
|
486
|
+
),
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
if detail.state == "Success":
|
|
490
|
+
results["successful"].append(file_info)
|
|
491
|
+
else:
|
|
492
|
+
results["failed"].append(file_info)
|
|
493
|
+
except (IndexError, AttributeError):
|
|
494
|
+
# Skip malformed job details
|
|
495
|
+
continue
|
|
496
|
+
|
|
497
|
+
return results
|
|
498
|
+
|
|
401
499
|
def download_outputs(self, output_dir: str) -> bool:
|
|
402
500
|
"""
|
|
403
501
|
Download output files to the specified directory.
|