sarvamai 0.1.22a3__py3-none-any.whl → 0.1.23a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. sarvamai/__init__.py +405 -206
  2. sarvamai/chat/raw_client.py +20 -20
  3. sarvamai/client.py +186 -34
  4. sarvamai/core/__init__.py +76 -21
  5. sarvamai/core/client_wrapper.py +19 -3
  6. sarvamai/core/force_multipart.py +4 -2
  7. sarvamai/core/http_client.py +217 -97
  8. sarvamai/core/http_response.py +1 -1
  9. sarvamai/core/http_sse/__init__.py +42 -0
  10. sarvamai/core/http_sse/_api.py +112 -0
  11. sarvamai/core/http_sse/_decoders.py +61 -0
  12. sarvamai/core/http_sse/_exceptions.py +7 -0
  13. sarvamai/core/http_sse/_models.py +17 -0
  14. sarvamai/core/jsonable_encoder.py +8 -0
  15. sarvamai/core/pydantic_utilities.py +110 -4
  16. sarvamai/errors/__init__.py +40 -6
  17. sarvamai/errors/bad_request_error.py +1 -1
  18. sarvamai/errors/forbidden_error.py +1 -1
  19. sarvamai/errors/internal_server_error.py +1 -1
  20. sarvamai/errors/service_unavailable_error.py +1 -1
  21. sarvamai/errors/too_many_requests_error.py +1 -1
  22. sarvamai/errors/unprocessable_entity_error.py +1 -1
  23. sarvamai/requests/__init__.py +150 -62
  24. sarvamai/requests/audio_data.py +0 -6
  25. sarvamai/requests/error_response_data.py +1 -1
  26. sarvamai/requests/file_signed_url_details.py +1 -1
  27. sarvamai/requests/speech_to_text_transcription_data.py +2 -8
  28. sarvamai/requests/speech_to_text_translate_transcription_data.py +0 -6
  29. sarvamai/speech_to_text/raw_client.py +54 -52
  30. sarvamai/speech_to_text_job/job.py +100 -2
  31. sarvamai/speech_to_text_job/raw_client.py +134 -130
  32. sarvamai/speech_to_text_streaming/__init__.py +38 -10
  33. sarvamai/speech_to_text_streaming/client.py +0 -44
  34. sarvamai/speech_to_text_streaming/raw_client.py +0 -44
  35. sarvamai/speech_to_text_streaming/types/__init__.py +36 -8
  36. sarvamai/speech_to_text_translate_job/job.py +100 -2
  37. sarvamai/speech_to_text_translate_job/raw_client.py +134 -130
  38. sarvamai/speech_to_text_translate_streaming/__init__.py +36 -9
  39. sarvamai/speech_to_text_translate_streaming/client.py +0 -44
  40. sarvamai/speech_to_text_translate_streaming/raw_client.py +0 -44
  41. sarvamai/speech_to_text_translate_streaming/types/__init__.py +36 -9
  42. sarvamai/text/client.py +0 -12
  43. sarvamai/text/raw_client.py +60 -72
  44. sarvamai/text_to_speech/client.py +18 -0
  45. sarvamai/text_to_speech/raw_client.py +38 -20
  46. sarvamai/text_to_speech_streaming/__init__.py +28 -1
  47. sarvamai/text_to_speech_streaming/types/__init__.py +30 -1
  48. sarvamai/types/__init__.py +222 -102
  49. sarvamai/types/audio_data.py +0 -6
  50. sarvamai/types/chat_completion_request_message.py +6 -2
  51. sarvamai/types/completion_event_flag.py +3 -1
  52. sarvamai/types/error_response_data.py +1 -1
  53. sarvamai/types/file_signed_url_details.py +1 -1
  54. sarvamai/types/speech_to_text_transcription_data.py +2 -8
  55. sarvamai/types/speech_to_text_translate_transcription_data.py +0 -6
  56. {sarvamai-0.1.22a3.dist-info → sarvamai-0.1.23a1.dist-info}/METADATA +2 -1
  57. {sarvamai-0.1.22a3.dist-info → sarvamai-0.1.23a1.dist-info}/RECORD +58 -59
  58. sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_input_audio_codec.py +0 -33
  59. sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_stream_ongoing_speech_results.py +0 -5
  60. sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_input_audio_codec.py +0 -33
  61. sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_stream_ongoing_speech_results.py +0 -5
  62. sarvamai/types/audio_data_input_audio_codec.py +0 -33
  63. sarvamai/types/response_speech_state.py +0 -7
  64. {sarvamai-0.1.22a3.dist-info → sarvamai-0.1.23a1.dist-info}/WHEEL +0 -0
@@ -2,20 +2,48 @@
2
2
 
3
3
  # isort: skip_file
4
4
 
5
- from .types import (
6
- SpeechToTextStreamingFlushSignal,
7
- SpeechToTextStreamingHighVadSensitivity,
8
- SpeechToTextStreamingInputAudioCodec,
9
- SpeechToTextStreamingLanguageCode,
10
- SpeechToTextStreamingStreamOngoingSpeechResults,
11
- SpeechToTextStreamingVadSignals,
12
- )
5
+ import typing
6
+ from importlib import import_module
7
+
8
+ if typing.TYPE_CHECKING:
9
+ from .types import (
10
+ SpeechToTextStreamingFlushSignal,
11
+ SpeechToTextStreamingHighVadSensitivity,
12
+ SpeechToTextStreamingLanguageCode,
13
+ SpeechToTextStreamingVadSignals,
14
+ )
15
+ _dynamic_imports: typing.Dict[str, str] = {
16
+ "SpeechToTextStreamingFlushSignal": ".types",
17
+ "SpeechToTextStreamingHighVadSensitivity": ".types",
18
+ "SpeechToTextStreamingLanguageCode": ".types",
19
+ "SpeechToTextStreamingVadSignals": ".types",
20
+ }
21
+
22
+
23
+ def __getattr__(attr_name: str) -> typing.Any:
24
+ module_name = _dynamic_imports.get(attr_name)
25
+ if module_name is None:
26
+ raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
27
+ try:
28
+ module = import_module(module_name, __package__)
29
+ if module_name == f".{attr_name}":
30
+ return module
31
+ else:
32
+ return getattr(module, attr_name)
33
+ except ImportError as e:
34
+ raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
35
+ except AttributeError as e:
36
+ raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
37
+
38
+
39
+ def __dir__():
40
+ lazy_attrs = list(_dynamic_imports.keys())
41
+ return sorted(lazy_attrs)
42
+
13
43
 
14
44
  __all__ = [
15
45
  "SpeechToTextStreamingFlushSignal",
16
46
  "SpeechToTextStreamingHighVadSensitivity",
17
- "SpeechToTextStreamingInputAudioCodec",
18
47
  "SpeechToTextStreamingLanguageCode",
19
- "SpeechToTextStreamingStreamOngoingSpeechResults",
20
48
  "SpeechToTextStreamingVadSignals",
21
49
  ]
@@ -13,11 +13,7 @@ from .raw_client import AsyncRawSpeechToTextStreamingClient, RawSpeechToTextStre
13
13
  from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
14
14
  from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
15
15
  from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
16
- from .types.speech_to_text_streaming_input_audio_codec import SpeechToTextStreamingInputAudioCodec
17
16
  from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
18
- from .types.speech_to_text_streaming_stream_ongoing_speech_results import (
19
- SpeechToTextStreamingStreamOngoingSpeechResults,
20
- )
21
17
  from .types.speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
22
18
 
23
19
  try:
@@ -47,13 +43,10 @@ class SpeechToTextStreamingClient:
47
43
  *,
48
44
  language_code: SpeechToTextStreamingLanguageCode,
49
45
  model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
50
- input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
51
46
  sample_rate: typing.Optional[str] = None,
52
47
  high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
53
48
  vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
54
49
  flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
55
- stream_ongoing_speech_results: typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults] = None,
56
- streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
57
50
  api_subscription_key: typing.Optional[str] = None,
58
51
  request_options: typing.Optional[RequestOptions] = None,
59
52
  ) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
@@ -72,9 +65,6 @@ class SpeechToTextStreamingClient:
72
65
  model : typing.Optional[typing.Literal["saarika:v2.5"]]
73
66
  Speech to text model to use
74
67
 
75
- input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
76
- Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
77
-
78
68
  sample_rate : typing.Optional[str]
79
69
  Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
80
70
 
@@ -87,12 +77,6 @@ class SpeechToTextStreamingClient:
87
77
  flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
88
78
  Signal to flush the audio buffer and finalize transcription
89
79
 
90
- stream_ongoing_speech_results : typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults]
91
- Enable streaming of ongoing speech results during active speech
92
-
93
- streaming_ongoing_requests_frame_size : typing.Optional[str]
94
- Frame size for streaming ongoing speech results (1-100)
95
-
96
80
  api_subscription_key : typing.Optional[str]
97
81
  API subscription key for authentication
98
82
 
@@ -109,8 +93,6 @@ class SpeechToTextStreamingClient:
109
93
  query_params = query_params.add("language-code", language_code)
110
94
  if model is not None:
111
95
  query_params = query_params.add("model", model)
112
- if input_audio_codec is not None:
113
- query_params = query_params.add("input_audio_codec", input_audio_codec)
114
96
  if sample_rate is not None:
115
97
  query_params = query_params.add("sample_rate", sample_rate)
116
98
  if high_vad_sensitivity is not None:
@@ -119,12 +101,6 @@ class SpeechToTextStreamingClient:
119
101
  query_params = query_params.add("vad_signals", vad_signals)
120
102
  if flush_signal is not None:
121
103
  query_params = query_params.add("flush_signal", flush_signal)
122
- if stream_ongoing_speech_results is not None:
123
- query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
124
- if streaming_ongoing_requests_frame_size is not None:
125
- query_params = query_params.add(
126
- "streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
127
- )
128
104
  ws_url = ws_url + f"?{query_params}"
129
105
  headers = self._raw_client._client_wrapper.get_headers()
130
106
  if api_subscription_key is not None:
@@ -170,13 +146,10 @@ class AsyncSpeechToTextStreamingClient:
170
146
  *,
171
147
  language_code: SpeechToTextStreamingLanguageCode,
172
148
  model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
173
- input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
174
149
  sample_rate: typing.Optional[str] = None,
175
150
  high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
176
151
  vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
177
152
  flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
178
- stream_ongoing_speech_results: typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults] = None,
179
- streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
180
153
  api_subscription_key: typing.Optional[str] = None,
181
154
  request_options: typing.Optional[RequestOptions] = None,
182
155
  ) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
@@ -195,9 +168,6 @@ class AsyncSpeechToTextStreamingClient:
195
168
  model : typing.Optional[typing.Literal["saarika:v2.5"]]
196
169
  Speech to text model to use
197
170
 
198
- input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
199
- Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
200
-
201
171
  sample_rate : typing.Optional[str]
202
172
  Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
203
173
 
@@ -210,12 +180,6 @@ class AsyncSpeechToTextStreamingClient:
210
180
  flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
211
181
  Signal to flush the audio buffer and finalize transcription
212
182
 
213
- stream_ongoing_speech_results : typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults]
214
- Enable streaming of ongoing speech results during active speech
215
-
216
- streaming_ongoing_requests_frame_size : typing.Optional[str]
217
- Frame size for streaming ongoing speech results (1-100)
218
-
219
183
  api_subscription_key : typing.Optional[str]
220
184
  API subscription key for authentication
221
185
 
@@ -232,8 +196,6 @@ class AsyncSpeechToTextStreamingClient:
232
196
  query_params = query_params.add("language-code", language_code)
233
197
  if model is not None:
234
198
  query_params = query_params.add("model", model)
235
- if input_audio_codec is not None:
236
- query_params = query_params.add("input_audio_codec", input_audio_codec)
237
199
  if sample_rate is not None:
238
200
  query_params = query_params.add("sample_rate", sample_rate)
239
201
  if high_vad_sensitivity is not None:
@@ -242,12 +204,6 @@ class AsyncSpeechToTextStreamingClient:
242
204
  query_params = query_params.add("vad_signals", vad_signals)
243
205
  if flush_signal is not None:
244
206
  query_params = query_params.add("flush_signal", flush_signal)
245
- if stream_ongoing_speech_results is not None:
246
- query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
247
- if streaming_ongoing_requests_frame_size is not None:
248
- query_params = query_params.add(
249
- "streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
250
- )
251
207
  ws_url = ws_url + f"?{query_params}"
252
208
  headers = self._raw_client._client_wrapper.get_headers()
253
209
  if api_subscription_key is not None:
@@ -12,11 +12,7 @@ from ..core.request_options import RequestOptions
12
12
  from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
13
13
  from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
14
14
  from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
15
- from .types.speech_to_text_streaming_input_audio_codec import SpeechToTextStreamingInputAudioCodec
16
15
  from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
17
- from .types.speech_to_text_streaming_stream_ongoing_speech_results import (
18
- SpeechToTextStreamingStreamOngoingSpeechResults,
19
- )
20
16
  from .types.speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
21
17
 
22
18
  try:
@@ -35,13 +31,10 @@ class RawSpeechToTextStreamingClient:
35
31
  *,
36
32
  language_code: SpeechToTextStreamingLanguageCode,
37
33
  model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
38
- input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
39
34
  sample_rate: typing.Optional[str] = None,
40
35
  high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
41
36
  vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
42
37
  flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
43
- stream_ongoing_speech_results: typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults] = None,
44
- streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
45
38
  api_subscription_key: typing.Optional[str] = None,
46
39
  request_options: typing.Optional[RequestOptions] = None,
47
40
  ) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
@@ -60,9 +53,6 @@ class RawSpeechToTextStreamingClient:
60
53
  model : typing.Optional[typing.Literal["saarika:v2.5"]]
61
54
  Speech to text model to use
62
55
 
63
- input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
64
- Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
65
-
66
56
  sample_rate : typing.Optional[str]
67
57
  Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
68
58
 
@@ -75,12 +65,6 @@ class RawSpeechToTextStreamingClient:
75
65
  flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
76
66
  Signal to flush the audio buffer and finalize transcription
77
67
 
78
- stream_ongoing_speech_results : typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults]
79
- Enable streaming of ongoing speech results during active speech
80
-
81
- streaming_ongoing_requests_frame_size : typing.Optional[str]
82
- Frame size for streaming ongoing speech results (1-100)
83
-
84
68
  api_subscription_key : typing.Optional[str]
85
69
  API subscription key for authentication
86
70
 
@@ -97,8 +81,6 @@ class RawSpeechToTextStreamingClient:
97
81
  query_params = query_params.add("language-code", language_code)
98
82
  if model is not None:
99
83
  query_params = query_params.add("model", model)
100
- if input_audio_codec is not None:
101
- query_params = query_params.add("input_audio_codec", input_audio_codec)
102
84
  if sample_rate is not None:
103
85
  query_params = query_params.add("sample_rate", sample_rate)
104
86
  if high_vad_sensitivity is not None:
@@ -107,12 +89,6 @@ class RawSpeechToTextStreamingClient:
107
89
  query_params = query_params.add("vad_signals", vad_signals)
108
90
  if flush_signal is not None:
109
91
  query_params = query_params.add("flush_signal", flush_signal)
110
- if stream_ongoing_speech_results is not None:
111
- query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
112
- if streaming_ongoing_requests_frame_size is not None:
113
- query_params = query_params.add(
114
- "streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
115
- )
116
92
  ws_url = ws_url + f"?{query_params}"
117
93
  headers = self._client_wrapper.get_headers()
118
94
  if api_subscription_key is not None:
@@ -147,13 +123,10 @@ class AsyncRawSpeechToTextStreamingClient:
147
123
  *,
148
124
  language_code: SpeechToTextStreamingLanguageCode,
149
125
  model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
150
- input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
151
126
  sample_rate: typing.Optional[str] = None,
152
127
  high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
153
128
  vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
154
129
  flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
155
- stream_ongoing_speech_results: typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults] = None,
156
- streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
157
130
  api_subscription_key: typing.Optional[str] = None,
158
131
  request_options: typing.Optional[RequestOptions] = None,
159
132
  ) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
@@ -172,9 +145,6 @@ class AsyncRawSpeechToTextStreamingClient:
172
145
  model : typing.Optional[typing.Literal["saarika:v2.5"]]
173
146
  Speech to text model to use
174
147
 
175
- input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
176
- Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
177
-
178
148
  sample_rate : typing.Optional[str]
179
149
  Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
180
150
 
@@ -187,12 +157,6 @@ class AsyncRawSpeechToTextStreamingClient:
187
157
  flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
188
158
  Signal to flush the audio buffer and finalize transcription
189
159
 
190
- stream_ongoing_speech_results : typing.Optional[SpeechToTextStreamingStreamOngoingSpeechResults]
191
- Enable streaming of ongoing speech results during active speech
192
-
193
- streaming_ongoing_requests_frame_size : typing.Optional[str]
194
- Frame size for streaming ongoing speech results (1-100)
195
-
196
160
  api_subscription_key : typing.Optional[str]
197
161
  API subscription key for authentication
198
162
 
@@ -209,8 +173,6 @@ class AsyncRawSpeechToTextStreamingClient:
209
173
  query_params = query_params.add("language-code", language_code)
210
174
  if model is not None:
211
175
  query_params = query_params.add("model", model)
212
- if input_audio_codec is not None:
213
- query_params = query_params.add("input_audio_codec", input_audio_codec)
214
176
  if sample_rate is not None:
215
177
  query_params = query_params.add("sample_rate", sample_rate)
216
178
  if high_vad_sensitivity is not None:
@@ -219,12 +181,6 @@ class AsyncRawSpeechToTextStreamingClient:
219
181
  query_params = query_params.add("vad_signals", vad_signals)
220
182
  if flush_signal is not None:
221
183
  query_params = query_params.add("flush_signal", flush_signal)
222
- if stream_ongoing_speech_results is not None:
223
- query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
224
- if streaming_ongoing_requests_frame_size is not None:
225
- query_params = query_params.add(
226
- "streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
227
- )
228
184
  ws_url = ws_url + f"?{query_params}"
229
185
  headers = self._client_wrapper.get_headers()
230
186
  if api_subscription_key is not None:
@@ -2,18 +2,46 @@
2
2
 
3
3
  # isort: skip_file
4
4
 
5
- from .speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
6
- from .speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
7
- from .speech_to_text_streaming_input_audio_codec import SpeechToTextStreamingInputAudioCodec
8
- from .speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
9
- from .speech_to_text_streaming_stream_ongoing_speech_results import SpeechToTextStreamingStreamOngoingSpeechResults
10
- from .speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
5
+ import typing
6
+ from importlib import import_module
7
+
8
+ if typing.TYPE_CHECKING:
9
+ from .speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
10
+ from .speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
11
+ from .speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
12
+ from .speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
13
+ _dynamic_imports: typing.Dict[str, str] = {
14
+ "SpeechToTextStreamingFlushSignal": ".speech_to_text_streaming_flush_signal",
15
+ "SpeechToTextStreamingHighVadSensitivity": ".speech_to_text_streaming_high_vad_sensitivity",
16
+ "SpeechToTextStreamingLanguageCode": ".speech_to_text_streaming_language_code",
17
+ "SpeechToTextStreamingVadSignals": ".speech_to_text_streaming_vad_signals",
18
+ }
19
+
20
+
21
+ def __getattr__(attr_name: str) -> typing.Any:
22
+ module_name = _dynamic_imports.get(attr_name)
23
+ if module_name is None:
24
+ raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
25
+ try:
26
+ module = import_module(module_name, __package__)
27
+ if module_name == f".{attr_name}":
28
+ return module
29
+ else:
30
+ return getattr(module, attr_name)
31
+ except ImportError as e:
32
+ raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
33
+ except AttributeError as e:
34
+ raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
35
+
36
+
37
+ def __dir__():
38
+ lazy_attrs = list(_dynamic_imports.keys())
39
+ return sorted(lazy_attrs)
40
+
11
41
 
12
42
  __all__ = [
13
43
  "SpeechToTextStreamingFlushSignal",
14
44
  "SpeechToTextStreamingHighVadSensitivity",
15
- "SpeechToTextStreamingInputAudioCodec",
16
45
  "SpeechToTextStreamingLanguageCode",
17
- "SpeechToTextStreamingStreamOngoingSpeechResults",
18
46
  "SpeechToTextStreamingVadSignals",
19
47
  ]
@@ -150,9 +150,58 @@ class AsyncSpeechToTextTranslateJob:
150
150
  "output_file": detail.outputs[0].file_name,
151
151
  }
152
152
  for detail in (job_status.job_details or [])
153
- if detail.inputs and detail.outputs
153
+ if detail.inputs and detail.outputs and detail.state == "Success"
154
154
  ]
155
155
 
156
+ async def get_file_results(
157
+ self,
158
+ ) -> typing.Dict[str, typing.List[typing.Dict[str, typing.Any]]]:
159
+ """
160
+ Get detailed results for each file in the batch job.
161
+
162
+ Returns
163
+ -------
164
+ Dict[str, List[Dict[str, Any]]]
165
+ Dictionary with 'successful' and 'failed' keys, each containing a list of file details.
166
+ Each file detail includes:
167
+ - 'file_name': Name of the input file
168
+ - 'status': Status of processing ('Success' or 'Failed')
169
+ - 'error_message': Error message if failed (None if successful)
170
+ - 'output_file': Name of output file if successful (None if failed)
171
+ """
172
+ job_status = await self.get_status()
173
+ results: typing.Dict[str, typing.List[typing.Dict[str, typing.Any]]] = {
174
+ "successful": [],
175
+ "failed": [],
176
+ }
177
+
178
+ for detail in job_status.job_details or []:
179
+ # Check for empty lists explicitly
180
+ if not detail.inputs or len(detail.inputs) == 0:
181
+ continue
182
+
183
+ try:
184
+ file_info = {
185
+ "file_name": detail.inputs[0].file_name,
186
+ "status": detail.state,
187
+ "error_message": detail.error_message,
188
+ "output_file": (
189
+ detail.outputs[0].file_name
190
+ if detail.outputs and len(detail.outputs) > 0
191
+ else None
192
+ ),
193
+ }
194
+
195
+ if detail.state == "Success":
196
+ results["successful"].append(file_info)
197
+ else:
198
+ results["failed"].append(file_info)
199
+ except (IndexError, AttributeError):
200
+ # Skip malformed job details
201
+ continue
202
+
203
+ return results
204
+
156
205
  async def download_outputs(self, output_dir: str) -> bool:
157
206
  """
158
207
  Download output files to the specified directory.
@@ -395,9 +444,58 @@ class SpeechToTextTranslateJob:
395
444
  "output_file": detail.outputs[0].file_name,
396
445
  }
397
446
  for detail in (job_status.job_details or [])
398
- if detail.inputs and detail.outputs
447
+ if detail.inputs and detail.outputs and detail.state == "Success"
399
448
  ]
400
449
 
450
+ def get_file_results(
451
+ self,
452
+ ) -> typing.Dict[str, typing.List[typing.Dict[str, typing.Any]]]:
453
+ """
454
+ Get detailed results for each file in the batch job.
455
+
456
+ Returns
457
+ -------
458
+ Dict[str, List[Dict[str, Any]]]
459
+ Dictionary with 'successful' and 'failed' keys, each containing a list of file details.
460
+ Each file detail includes:
461
+ - 'file_name': Name of the input file
462
+ - 'status': Status of processing ('Success' or 'Failed')
463
+ - 'error_message': Error message if failed (None if successful)
464
+ - 'output_file': Name of output file if successful (None if failed)
465
+ """
466
+ job_status = self.get_status()
467
+ results: typing.Dict[str, typing.List[typing.Dict[str, typing.Any]]] = {
468
+ "successful": [],
469
+ "failed": [],
470
+ }
471
+
472
+ for detail in job_status.job_details or []:
473
+ # Check for empty lists explicitly
474
+ if not detail.inputs or len(detail.inputs) == 0:
475
+ continue
476
+
477
+ try:
478
+ file_info = {
479
+ "file_name": detail.inputs[0].file_name,
480
+ "status": detail.state,
481
+ "error_message": detail.error_message,
482
+ "output_file": (
483
+ detail.outputs[0].file_name
484
+ if detail.outputs and len(detail.outputs) > 0
485
+ else None
486
+ ),
487
+ }
488
+
489
+ if detail.state == "Success":
490
+ results["successful"].append(file_info)
491
+ else:
492
+ results["failed"].append(file_info)
493
+ except (IndexError, AttributeError):
494
+ # Skip malformed job details
495
+ continue
496
+
497
+ return results
498
+
401
499
  def download_outputs(self, output_dir: str) -> bool:
402
500
  """
403
501
  Download output files to the specified directory.