sarvamai 0.1.23a5__py3-none-any.whl → 0.1.23a6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +4 -0
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/requests/speech_to_text_response.py +6 -14
- sarvamai/requests/speech_to_text_transcription_data.py +14 -0
- sarvamai/requests/speech_to_text_translate_response.py +6 -9
- sarvamai/requests/speech_to_text_translate_transcription_data.py +13 -0
- sarvamai/speech_to_text_streaming/__init__.py +2 -0
- sarvamai/speech_to_text_streaming/client.py +45 -2
- sarvamai/speech_to_text_streaming/raw_client.py +45 -2
- sarvamai/speech_to_text_streaming/types/__init__.py +2 -0
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_input_audio_codec.py +7 -0
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_language_code.py +25 -1
- sarvamai/speech_to_text_translate_streaming/__init__.py +2 -0
- sarvamai/speech_to_text_translate_streaming/client.py +15 -0
- sarvamai/speech_to_text_translate_streaming/raw_client.py +15 -0
- sarvamai/speech_to_text_translate_streaming/types/__init__.py +2 -0
- sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_input_audio_codec.py +7 -0
- sarvamai/types/speech_to_text_language.py +24 -1
- sarvamai/types/speech_to_text_response.py +6 -14
- sarvamai/types/speech_to_text_transcription_data.py +14 -0
- sarvamai/types/speech_to_text_translate_language.py +25 -1
- sarvamai/types/speech_to_text_translate_response.py +6 -9
- sarvamai/types/speech_to_text_translate_transcription_data.py +13 -0
- {sarvamai-0.1.23a5.dist-info → sarvamai-0.1.23a6.dist-info}/METADATA +1 -1
- {sarvamai-0.1.23a5.dist-info → sarvamai-0.1.23a6.dist-info}/RECORD +26 -24
- {sarvamai-0.1.23a5.dist-info → sarvamai-0.1.23a6.dist-info}/WHEEL +0 -0
sarvamai/__init__.py
CHANGED
|
@@ -188,6 +188,7 @@ from .requests import (
|
|
|
188
188
|
from .speech_to_text_streaming import (
|
|
189
189
|
SpeechToTextStreamingFlushSignal,
|
|
190
190
|
SpeechToTextStreamingHighVadSensitivity,
|
|
191
|
+
SpeechToTextStreamingInputAudioCodec,
|
|
191
192
|
SpeechToTextStreamingLanguageCode,
|
|
192
193
|
SpeechToTextStreamingMode,
|
|
193
194
|
SpeechToTextStreamingModel,
|
|
@@ -196,6 +197,7 @@ from .speech_to_text_streaming import (
|
|
|
196
197
|
from .speech_to_text_translate_streaming import (
|
|
197
198
|
SpeechToTextTranslateStreamingFlushSignal,
|
|
198
199
|
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
200
|
+
SpeechToTextTranslateStreamingInputAudioCodec,
|
|
199
201
|
SpeechToTextTranslateStreamingVadSignals,
|
|
200
202
|
)
|
|
201
203
|
from .text_to_speech_streaming import TextToSpeechStreamingModel, TextToSpeechStreamingSendCompletionEvent
|
|
@@ -320,6 +322,7 @@ __all__ = [
|
|
|
320
322
|
"SpeechToTextResponseParams",
|
|
321
323
|
"SpeechToTextStreamingFlushSignal",
|
|
322
324
|
"SpeechToTextStreamingHighVadSensitivity",
|
|
325
|
+
"SpeechToTextStreamingInputAudioCodec",
|
|
323
326
|
"SpeechToTextStreamingLanguageCode",
|
|
324
327
|
"SpeechToTextStreamingMode",
|
|
325
328
|
"SpeechToTextStreamingModel",
|
|
@@ -338,6 +341,7 @@ __all__ = [
|
|
|
338
341
|
"SpeechToTextTranslateResponseParams",
|
|
339
342
|
"SpeechToTextTranslateStreamingFlushSignal",
|
|
340
343
|
"SpeechToTextTranslateStreamingHighVadSensitivity",
|
|
344
|
+
"SpeechToTextTranslateStreamingInputAudioCodec",
|
|
341
345
|
"SpeechToTextTranslateStreamingResponse",
|
|
342
346
|
"SpeechToTextTranslateStreamingResponseParams",
|
|
343
347
|
"SpeechToTextTranslateStreamingVadSignals",
|
sarvamai/core/client_wrapper.py
CHANGED
|
@@ -23,10 +23,10 @@ class BaseClientWrapper:
|
|
|
23
23
|
|
|
24
24
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
25
25
|
headers: typing.Dict[str, str] = {
|
|
26
|
-
"User-Agent": "sarvamai/0.1.
|
|
26
|
+
"User-Agent": "sarvamai/0.1.23a6",
|
|
27
27
|
"X-Fern-Language": "Python",
|
|
28
28
|
"X-Fern-SDK-Name": "sarvamai",
|
|
29
|
-
"X-Fern-SDK-Version": "0.1.
|
|
29
|
+
"X-Fern-SDK-Version": "0.1.23a6",
|
|
30
30
|
**(self.get_custom_headers() or {}),
|
|
31
31
|
}
|
|
32
32
|
headers["api-subscription-key"] = self.api_subscription_key
|
|
@@ -1,28 +1,20 @@
|
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
|
2
2
|
|
|
3
3
|
import typing_extensions
|
|
4
|
-
from .diarized_transcript import DiarizedTranscriptParams
|
|
5
|
-
from .timestamps_model import TimestampsModelParams
|
|
6
4
|
|
|
7
5
|
|
|
8
6
|
class SpeechToTextResponseParams(typing_extensions.TypedDict):
|
|
9
|
-
request_id:
|
|
10
|
-
transcript: str
|
|
7
|
+
request_id: str
|
|
11
8
|
"""
|
|
12
|
-
|
|
9
|
+
Unique identifier for the request
|
|
13
10
|
"""
|
|
14
11
|
|
|
15
|
-
|
|
16
|
-
"""
|
|
17
|
-
Contains timestamps for the transcribed text. This field is included only if with_timestamps is set to true
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
diarized_transcript: typing_extensions.NotRequired[DiarizedTranscriptParams]
|
|
12
|
+
transcript: str
|
|
21
13
|
"""
|
|
22
|
-
|
|
14
|
+
The transcribed text from the provided audio file.
|
|
23
15
|
"""
|
|
24
16
|
|
|
25
|
-
language_code:
|
|
17
|
+
language_code: str
|
|
26
18
|
"""
|
|
27
|
-
|
|
19
|
+
The BCP-47 code of language spoken in the input (e.g., hi-IN, en-IN). If multiple languages are detected, returns the most predominant spoken language.
|
|
28
20
|
"""
|
|
@@ -32,4 +32,18 @@ class SpeechToTextTranscriptionDataParams(typing_extensions.TypedDict):
|
|
|
32
32
|
BCP-47 code of detected language
|
|
33
33
|
"""
|
|
34
34
|
|
|
35
|
+
language_probability: typing_extensions.NotRequired[float]
|
|
36
|
+
"""
|
|
37
|
+
Float value (0.0 to 1.0) indicating the probability of the detected language being correct. Higher values indicate higher confidence.
|
|
38
|
+
|
|
39
|
+
**When it returns a value:**
|
|
40
|
+
- When `language_code` is not provided in the request
|
|
41
|
+
- When `language_code` is set to `unknown`
|
|
42
|
+
|
|
43
|
+
**When it returns null:**
|
|
44
|
+
- When a specific `language_code` is provided (language detection is skipped)
|
|
45
|
+
|
|
46
|
+
The parameter is always present in the response.
|
|
47
|
+
"""
|
|
48
|
+
|
|
35
49
|
metrics: TranscriptionMetricsParams
|
|
@@ -1,23 +1,20 @@
|
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
|
2
2
|
|
|
3
3
|
import typing_extensions
|
|
4
|
-
from ..types.speech_to_text_translate_language import SpeechToTextTranslateLanguage
|
|
5
|
-
from .diarized_transcript import DiarizedTranscriptParams
|
|
6
4
|
|
|
7
5
|
|
|
8
6
|
class SpeechToTextTranslateResponseParams(typing_extensions.TypedDict):
|
|
9
|
-
request_id:
|
|
10
|
-
transcript: str
|
|
7
|
+
request_id: str
|
|
11
8
|
"""
|
|
12
|
-
|
|
9
|
+
Unique identifier for the request
|
|
13
10
|
"""
|
|
14
11
|
|
|
15
|
-
|
|
12
|
+
transcript: str
|
|
16
13
|
"""
|
|
17
|
-
|
|
14
|
+
English translation of the provided speech
|
|
18
15
|
"""
|
|
19
16
|
|
|
20
|
-
|
|
17
|
+
language_code: str
|
|
21
18
|
"""
|
|
22
|
-
|
|
19
|
+
The BCP-47 code of the detected source language spoken in the input (e.g., hi-IN, kn-IN).
|
|
23
20
|
"""
|
|
@@ -20,4 +20,17 @@ class SpeechToTextTranslateTranscriptionDataParams(typing_extensions.TypedDict):
|
|
|
20
20
|
BCP-47 code of detected source language (null when language detection is in progress)
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
+
language_probability: typing_extensions.NotRequired[float]
|
|
24
|
+
"""
|
|
25
|
+
Float value (0.0 to 1.0) indicating the probability of the detected source language being correct. Higher values indicate higher confidence.
|
|
26
|
+
|
|
27
|
+
**When it returns a value:**
|
|
28
|
+
- Always returns a value as source language is auto-detected for translation
|
|
29
|
+
|
|
30
|
+
**When it returns null:**
|
|
31
|
+
- When language detection confidence is unavailable
|
|
32
|
+
|
|
33
|
+
The parameter is always present in the response.
|
|
34
|
+
"""
|
|
35
|
+
|
|
23
36
|
metrics: TranscriptionMetricsParams
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
from .types import (
|
|
6
6
|
SpeechToTextStreamingFlushSignal,
|
|
7
7
|
SpeechToTextStreamingHighVadSensitivity,
|
|
8
|
+
SpeechToTextStreamingInputAudioCodec,
|
|
8
9
|
SpeechToTextStreamingLanguageCode,
|
|
9
10
|
SpeechToTextStreamingMode,
|
|
10
11
|
SpeechToTextStreamingModel,
|
|
@@ -14,6 +15,7 @@ from .types import (
|
|
|
14
15
|
__all__ = [
|
|
15
16
|
"SpeechToTextStreamingFlushSignal",
|
|
16
17
|
"SpeechToTextStreamingHighVadSensitivity",
|
|
18
|
+
"SpeechToTextStreamingInputAudioCodec",
|
|
17
19
|
"SpeechToTextStreamingLanguageCode",
|
|
18
20
|
"SpeechToTextStreamingMode",
|
|
19
21
|
"SpeechToTextStreamingModel",
|
|
@@ -13,6 +13,7 @@ from .raw_client import AsyncRawSpeechToTextStreamingClient, RawSpeechToTextStre
|
|
|
13
13
|
from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
|
|
14
14
|
from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
|
|
15
15
|
from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
16
|
+
from .types.speech_to_text_streaming_input_audio_codec import SpeechToTextStreamingInputAudioCodec
|
|
16
17
|
from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
17
18
|
from .types.speech_to_text_streaming_mode import SpeechToTextStreamingMode
|
|
18
19
|
from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
|
|
@@ -50,6 +51,7 @@ class SpeechToTextStreamingClient:
|
|
|
50
51
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
51
52
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
52
53
|
flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
|
|
54
|
+
input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
|
|
53
55
|
api_subscription_key: typing.Optional[str] = None,
|
|
54
56
|
request_options: typing.Optional[RequestOptions] = None,
|
|
55
57
|
) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
|
|
@@ -65,7 +67,7 @@ class SpeechToTextStreamingClient:
|
|
|
65
67
|
language_code : SpeechToTextStreamingLanguageCode
|
|
66
68
|
Specifies the language of the input audio in BCP-47 format.
|
|
67
69
|
|
|
68
|
-
**Available Options:**
|
|
70
|
+
**Available Options (saarika:v2.5):**
|
|
69
71
|
- `hi-IN`: Hindi
|
|
70
72
|
- `bn-IN`: Bengali
|
|
71
73
|
- `gu-IN`: Gujarati
|
|
@@ -78,6 +80,20 @@ class SpeechToTextStreamingClient:
|
|
|
78
80
|
- `te-IN`: Telugu
|
|
79
81
|
- `en-IN`: English
|
|
80
82
|
|
|
83
|
+
**Additional Options (saaras:v3 only):**
|
|
84
|
+
- `as-IN`: Assamese
|
|
85
|
+
- `ur-IN`: Urdu
|
|
86
|
+
- `ne-IN`: Nepali
|
|
87
|
+
- `kok-IN`: Konkani
|
|
88
|
+
- `ks-IN`: Kashmiri
|
|
89
|
+
- `sd-IN`: Sindhi
|
|
90
|
+
- `sa-IN`: Sanskrit
|
|
91
|
+
- `sat-IN`: Santali
|
|
92
|
+
- `mni-IN`: Manipuri
|
|
93
|
+
- `brx-IN`: Bodo
|
|
94
|
+
- `mai-IN`: Maithili
|
|
95
|
+
- `doi-IN`: Dogri
|
|
96
|
+
|
|
81
97
|
model : typing.Optional[SpeechToTextStreamingModel]
|
|
82
98
|
Specifies the model to use for speech-to-text conversion.
|
|
83
99
|
|
|
@@ -117,6 +133,10 @@ class SpeechToTextStreamingClient:
|
|
|
117
133
|
flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
|
|
118
134
|
Signal to flush the audio buffer and finalize transcription
|
|
119
135
|
|
|
136
|
+
input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
|
|
137
|
+
Audio codec/format of the input stream. Use this when sending raw PCM audio.
|
|
138
|
+
Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
|
|
139
|
+
|
|
120
140
|
api_subscription_key : typing.Optional[str]
|
|
121
141
|
API subscription key for authentication
|
|
122
142
|
|
|
@@ -143,6 +163,8 @@ class SpeechToTextStreamingClient:
|
|
|
143
163
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
144
164
|
if flush_signal is not None:
|
|
145
165
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
166
|
+
if input_audio_codec is not None:
|
|
167
|
+
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
146
168
|
ws_url = ws_url + f"?{query_params}"
|
|
147
169
|
headers = self._raw_client._client_wrapper.get_headers()
|
|
148
170
|
if api_subscription_key is not None:
|
|
@@ -193,6 +215,7 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
193
215
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
194
216
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
195
217
|
flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
|
|
218
|
+
input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
|
|
196
219
|
api_subscription_key: typing.Optional[str] = None,
|
|
197
220
|
request_options: typing.Optional[RequestOptions] = None,
|
|
198
221
|
) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
|
|
@@ -208,7 +231,7 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
208
231
|
language_code : SpeechToTextStreamingLanguageCode
|
|
209
232
|
Specifies the language of the input audio in BCP-47 format.
|
|
210
233
|
|
|
211
|
-
**Available Options:**
|
|
234
|
+
**Available Options (saarika:v2.5):**
|
|
212
235
|
- `hi-IN`: Hindi
|
|
213
236
|
- `bn-IN`: Bengali
|
|
214
237
|
- `gu-IN`: Gujarati
|
|
@@ -221,6 +244,20 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
221
244
|
- `te-IN`: Telugu
|
|
222
245
|
- `en-IN`: English
|
|
223
246
|
|
|
247
|
+
**Additional Options (saaras:v3 only):**
|
|
248
|
+
- `as-IN`: Assamese
|
|
249
|
+
- `ur-IN`: Urdu
|
|
250
|
+
- `ne-IN`: Nepali
|
|
251
|
+
- `kok-IN`: Konkani
|
|
252
|
+
- `ks-IN`: Kashmiri
|
|
253
|
+
- `sd-IN`: Sindhi
|
|
254
|
+
- `sa-IN`: Sanskrit
|
|
255
|
+
- `sat-IN`: Santali
|
|
256
|
+
- `mni-IN`: Manipuri
|
|
257
|
+
- `brx-IN`: Bodo
|
|
258
|
+
- `mai-IN`: Maithili
|
|
259
|
+
- `doi-IN`: Dogri
|
|
260
|
+
|
|
224
261
|
model : typing.Optional[SpeechToTextStreamingModel]
|
|
225
262
|
Specifies the model to use for speech-to-text conversion.
|
|
226
263
|
|
|
@@ -260,6 +297,10 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
260
297
|
flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
|
|
261
298
|
Signal to flush the audio buffer and finalize transcription
|
|
262
299
|
|
|
300
|
+
input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
|
|
301
|
+
Audio codec/format of the input stream. Use this when sending raw PCM audio.
|
|
302
|
+
Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
|
|
303
|
+
|
|
263
304
|
api_subscription_key : typing.Optional[str]
|
|
264
305
|
API subscription key for authentication
|
|
265
306
|
|
|
@@ -286,6 +327,8 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
286
327
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
287
328
|
if flush_signal is not None:
|
|
288
329
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
330
|
+
if input_audio_codec is not None:
|
|
331
|
+
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
289
332
|
ws_url = ws_url + f"?{query_params}"
|
|
290
333
|
headers = self._raw_client._client_wrapper.get_headers()
|
|
291
334
|
if api_subscription_key is not None:
|
|
@@ -12,6 +12,7 @@ from ..core.request_options import RequestOptions
|
|
|
12
12
|
from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
|
|
13
13
|
from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
|
|
14
14
|
from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
15
|
+
from .types.speech_to_text_streaming_input_audio_codec import SpeechToTextStreamingInputAudioCodec
|
|
15
16
|
from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
16
17
|
from .types.speech_to_text_streaming_mode import SpeechToTextStreamingMode
|
|
17
18
|
from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
|
|
@@ -38,6 +39,7 @@ class RawSpeechToTextStreamingClient:
|
|
|
38
39
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
39
40
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
40
41
|
flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
|
|
42
|
+
input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
|
|
41
43
|
api_subscription_key: typing.Optional[str] = None,
|
|
42
44
|
request_options: typing.Optional[RequestOptions] = None,
|
|
43
45
|
) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
|
|
@@ -53,7 +55,7 @@ class RawSpeechToTextStreamingClient:
|
|
|
53
55
|
language_code : SpeechToTextStreamingLanguageCode
|
|
54
56
|
Specifies the language of the input audio in BCP-47 format.
|
|
55
57
|
|
|
56
|
-
**Available Options:**
|
|
58
|
+
**Available Options (saarika:v2.5):**
|
|
57
59
|
- `hi-IN`: Hindi
|
|
58
60
|
- `bn-IN`: Bengali
|
|
59
61
|
- `gu-IN`: Gujarati
|
|
@@ -66,6 +68,20 @@ class RawSpeechToTextStreamingClient:
|
|
|
66
68
|
- `te-IN`: Telugu
|
|
67
69
|
- `en-IN`: English
|
|
68
70
|
|
|
71
|
+
**Additional Options (saaras:v3 only):**
|
|
72
|
+
- `as-IN`: Assamese
|
|
73
|
+
- `ur-IN`: Urdu
|
|
74
|
+
- `ne-IN`: Nepali
|
|
75
|
+
- `kok-IN`: Konkani
|
|
76
|
+
- `ks-IN`: Kashmiri
|
|
77
|
+
- `sd-IN`: Sindhi
|
|
78
|
+
- `sa-IN`: Sanskrit
|
|
79
|
+
- `sat-IN`: Santali
|
|
80
|
+
- `mni-IN`: Manipuri
|
|
81
|
+
- `brx-IN`: Bodo
|
|
82
|
+
- `mai-IN`: Maithili
|
|
83
|
+
- `doi-IN`: Dogri
|
|
84
|
+
|
|
69
85
|
model : typing.Optional[SpeechToTextStreamingModel]
|
|
70
86
|
Specifies the model to use for speech-to-text conversion.
|
|
71
87
|
|
|
@@ -105,6 +121,10 @@ class RawSpeechToTextStreamingClient:
|
|
|
105
121
|
flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
|
|
106
122
|
Signal to flush the audio buffer and finalize transcription
|
|
107
123
|
|
|
124
|
+
input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
|
|
125
|
+
Audio codec/format of the input stream. Use this when sending raw PCM audio.
|
|
126
|
+
Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
|
|
127
|
+
|
|
108
128
|
api_subscription_key : typing.Optional[str]
|
|
109
129
|
API subscription key for authentication
|
|
110
130
|
|
|
@@ -131,6 +151,8 @@ class RawSpeechToTextStreamingClient:
|
|
|
131
151
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
132
152
|
if flush_signal is not None:
|
|
133
153
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
154
|
+
if input_audio_codec is not None:
|
|
155
|
+
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
134
156
|
ws_url = ws_url + f"?{query_params}"
|
|
135
157
|
headers = self._client_wrapper.get_headers()
|
|
136
158
|
if api_subscription_key is not None:
|
|
@@ -170,6 +192,7 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
170
192
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
171
193
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
172
194
|
flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
|
|
195
|
+
input_audio_codec: typing.Optional[SpeechToTextStreamingInputAudioCodec] = None,
|
|
173
196
|
api_subscription_key: typing.Optional[str] = None,
|
|
174
197
|
request_options: typing.Optional[RequestOptions] = None,
|
|
175
198
|
) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
|
|
@@ -185,7 +208,7 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
185
208
|
language_code : SpeechToTextStreamingLanguageCode
|
|
186
209
|
Specifies the language of the input audio in BCP-47 format.
|
|
187
210
|
|
|
188
|
-
**Available Options:**
|
|
211
|
+
**Available Options (saarika:v2.5):**
|
|
189
212
|
- `hi-IN`: Hindi
|
|
190
213
|
- `bn-IN`: Bengali
|
|
191
214
|
- `gu-IN`: Gujarati
|
|
@@ -198,6 +221,20 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
198
221
|
- `te-IN`: Telugu
|
|
199
222
|
- `en-IN`: English
|
|
200
223
|
|
|
224
|
+
**Additional Options (saaras:v3 only):**
|
|
225
|
+
- `as-IN`: Assamese
|
|
226
|
+
- `ur-IN`: Urdu
|
|
227
|
+
- `ne-IN`: Nepali
|
|
228
|
+
- `kok-IN`: Konkani
|
|
229
|
+
- `ks-IN`: Kashmiri
|
|
230
|
+
- `sd-IN`: Sindhi
|
|
231
|
+
- `sa-IN`: Sanskrit
|
|
232
|
+
- `sat-IN`: Santali
|
|
233
|
+
- `mni-IN`: Manipuri
|
|
234
|
+
- `brx-IN`: Bodo
|
|
235
|
+
- `mai-IN`: Maithili
|
|
236
|
+
- `doi-IN`: Dogri
|
|
237
|
+
|
|
201
238
|
model : typing.Optional[SpeechToTextStreamingModel]
|
|
202
239
|
Specifies the model to use for speech-to-text conversion.
|
|
203
240
|
|
|
@@ -237,6 +274,10 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
237
274
|
flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
|
|
238
275
|
Signal to flush the audio buffer and finalize transcription
|
|
239
276
|
|
|
277
|
+
input_audio_codec : typing.Optional[SpeechToTextStreamingInputAudioCodec]
|
|
278
|
+
Audio codec/format of the input stream. Use this when sending raw PCM audio.
|
|
279
|
+
Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
|
|
280
|
+
|
|
240
281
|
api_subscription_key : typing.Optional[str]
|
|
241
282
|
API subscription key for authentication
|
|
242
283
|
|
|
@@ -263,6 +304,8 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
263
304
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
264
305
|
if flush_signal is not None:
|
|
265
306
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
307
|
+
if input_audio_codec is not None:
|
|
308
|
+
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
266
309
|
ws_url = ws_url + f"?{query_params}"
|
|
267
310
|
headers = self._client_wrapper.get_headers()
|
|
268
311
|
if api_subscription_key is not None:
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
from .speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
|
|
6
6
|
from .speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
7
|
+
from .speech_to_text_streaming_input_audio_codec import SpeechToTextStreamingInputAudioCodec
|
|
7
8
|
from .speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
8
9
|
from .speech_to_text_streaming_mode import SpeechToTextStreamingMode
|
|
9
10
|
from .speech_to_text_streaming_model import SpeechToTextStreamingModel
|
|
@@ -12,6 +13,7 @@ from .speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignal
|
|
|
12
13
|
__all__ = [
|
|
13
14
|
"SpeechToTextStreamingFlushSignal",
|
|
14
15
|
"SpeechToTextStreamingHighVadSensitivity",
|
|
16
|
+
"SpeechToTextStreamingInputAudioCodec",
|
|
15
17
|
"SpeechToTextStreamingLanguageCode",
|
|
16
18
|
"SpeechToTextStreamingMode",
|
|
17
19
|
"SpeechToTextStreamingModel",
|
|
@@ -3,6 +3,30 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
SpeechToTextStreamingLanguageCode = typing.Union[
|
|
6
|
-
typing.Literal[
|
|
6
|
+
typing.Literal[
|
|
7
|
+
"en-IN",
|
|
8
|
+
"hi-IN",
|
|
9
|
+
"bn-IN",
|
|
10
|
+
"gu-IN",
|
|
11
|
+
"kn-IN",
|
|
12
|
+
"ml-IN",
|
|
13
|
+
"mr-IN",
|
|
14
|
+
"od-IN",
|
|
15
|
+
"pa-IN",
|
|
16
|
+
"ta-IN",
|
|
17
|
+
"te-IN",
|
|
18
|
+
"as-IN",
|
|
19
|
+
"ur-IN",
|
|
20
|
+
"ne-IN",
|
|
21
|
+
"kok-IN",
|
|
22
|
+
"ks-IN",
|
|
23
|
+
"sd-IN",
|
|
24
|
+
"sa-IN",
|
|
25
|
+
"sat-IN",
|
|
26
|
+
"mni-IN",
|
|
27
|
+
"brx-IN",
|
|
28
|
+
"mai-IN",
|
|
29
|
+
"doi-IN",
|
|
30
|
+
],
|
|
7
31
|
typing.Any,
|
|
8
32
|
]
|
|
@@ -5,11 +5,13 @@
|
|
|
5
5
|
from .types import (
|
|
6
6
|
SpeechToTextTranslateStreamingFlushSignal,
|
|
7
7
|
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
8
|
+
SpeechToTextTranslateStreamingInputAudioCodec,
|
|
8
9
|
SpeechToTextTranslateStreamingVadSignals,
|
|
9
10
|
)
|
|
10
11
|
|
|
11
12
|
__all__ = [
|
|
12
13
|
"SpeechToTextTranslateStreamingFlushSignal",
|
|
13
14
|
"SpeechToTextTranslateStreamingHighVadSensitivity",
|
|
15
|
+
"SpeechToTextTranslateStreamingInputAudioCodec",
|
|
14
16
|
"SpeechToTextTranslateStreamingVadSignals",
|
|
15
17
|
]
|
|
@@ -15,6 +15,7 @@ from .types.speech_to_text_translate_streaming_flush_signal import SpeechToTextT
|
|
|
15
15
|
from .types.speech_to_text_translate_streaming_high_vad_sensitivity import (
|
|
16
16
|
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
17
17
|
)
|
|
18
|
+
from .types.speech_to_text_translate_streaming_input_audio_codec import SpeechToTextTranslateStreamingInputAudioCodec
|
|
18
19
|
from .types.speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
|
|
19
20
|
|
|
20
21
|
try:
|
|
@@ -47,6 +48,7 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
47
48
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
48
49
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
49
50
|
flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
|
|
51
|
+
input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
|
|
50
52
|
api_subscription_key: typing.Optional[str] = None,
|
|
51
53
|
request_options: typing.Optional[RequestOptions] = None,
|
|
52
54
|
) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -77,6 +79,10 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
77
79
|
flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
|
|
78
80
|
Signal to flush the audio buffer and finalize transcription and translation
|
|
79
81
|
|
|
82
|
+
input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
|
|
83
|
+
Audio codec/format of the input stream. Use this when sending raw PCM audio.
|
|
84
|
+
Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
|
|
85
|
+
|
|
80
86
|
api_subscription_key : typing.Optional[str]
|
|
81
87
|
API subscription key for authentication
|
|
82
88
|
|
|
@@ -99,6 +105,8 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
99
105
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
100
106
|
if flush_signal is not None:
|
|
101
107
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
108
|
+
if input_audio_codec is not None:
|
|
109
|
+
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
102
110
|
ws_url = ws_url + f"?{query_params}"
|
|
103
111
|
headers = self._raw_client._client_wrapper.get_headers()
|
|
104
112
|
if api_subscription_key is not None:
|
|
@@ -147,6 +155,7 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
147
155
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
148
156
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
149
157
|
flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
|
|
158
|
+
input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
|
|
150
159
|
api_subscription_key: typing.Optional[str] = None,
|
|
151
160
|
request_options: typing.Optional[RequestOptions] = None,
|
|
152
161
|
) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -177,6 +186,10 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
177
186
|
flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
|
|
178
187
|
Signal to flush the audio buffer and finalize transcription and translation
|
|
179
188
|
|
|
189
|
+
input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
|
|
190
|
+
Audio codec/format of the input stream. Use this when sending raw PCM audio.
|
|
191
|
+
Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
|
|
192
|
+
|
|
180
193
|
api_subscription_key : typing.Optional[str]
|
|
181
194
|
API subscription key for authentication
|
|
182
195
|
|
|
@@ -199,6 +212,8 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
199
212
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
200
213
|
if flush_signal is not None:
|
|
201
214
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
215
|
+
if input_audio_codec is not None:
|
|
216
|
+
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
202
217
|
ws_url = ws_url + f"?{query_params}"
|
|
203
218
|
headers = self._raw_client._client_wrapper.get_headers()
|
|
204
219
|
if api_subscription_key is not None:
|
|
@@ -14,6 +14,7 @@ from .types.speech_to_text_translate_streaming_flush_signal import SpeechToTextT
|
|
|
14
14
|
from .types.speech_to_text_translate_streaming_high_vad_sensitivity import (
|
|
15
15
|
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
16
16
|
)
|
|
17
|
+
from .types.speech_to_text_translate_streaming_input_audio_codec import SpeechToTextTranslateStreamingInputAudioCodec
|
|
17
18
|
from .types.speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
|
|
18
19
|
|
|
19
20
|
try:
|
|
@@ -35,6 +36,7 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
35
36
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
36
37
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
37
38
|
flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
|
|
39
|
+
input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
|
|
38
40
|
api_subscription_key: typing.Optional[str] = None,
|
|
39
41
|
request_options: typing.Optional[RequestOptions] = None,
|
|
40
42
|
) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -65,6 +67,10 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
65
67
|
flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
|
|
66
68
|
Signal to flush the audio buffer and finalize transcription and translation
|
|
67
69
|
|
|
70
|
+
input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
|
|
71
|
+
Audio codec/format of the input stream. Use this when sending raw PCM audio.
|
|
72
|
+
Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
|
|
73
|
+
|
|
68
74
|
api_subscription_key : typing.Optional[str]
|
|
69
75
|
API subscription key for authentication
|
|
70
76
|
|
|
@@ -87,6 +93,8 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
87
93
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
88
94
|
if flush_signal is not None:
|
|
89
95
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
96
|
+
if input_audio_codec is not None:
|
|
97
|
+
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
90
98
|
ws_url = ws_url + f"?{query_params}"
|
|
91
99
|
headers = self._client_wrapper.get_headers()
|
|
92
100
|
if api_subscription_key is not None:
|
|
@@ -124,6 +132,7 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
124
132
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
125
133
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
126
134
|
flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
|
|
135
|
+
input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
|
|
127
136
|
api_subscription_key: typing.Optional[str] = None,
|
|
128
137
|
request_options: typing.Optional[RequestOptions] = None,
|
|
129
138
|
) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -154,6 +163,10 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
154
163
|
flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
|
|
155
164
|
Signal to flush the audio buffer and finalize transcription and translation
|
|
156
165
|
|
|
166
|
+
input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
|
|
167
|
+
Audio codec/format of the input stream. Use this when sending raw PCM audio.
|
|
168
|
+
Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
|
|
169
|
+
|
|
157
170
|
api_subscription_key : typing.Optional[str]
|
|
158
171
|
API subscription key for authentication
|
|
159
172
|
|
|
@@ -176,6 +189,8 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
176
189
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
177
190
|
if flush_signal is not None:
|
|
178
191
|
query_params = query_params.add("flush_signal", flush_signal)
|
|
192
|
+
if input_audio_codec is not None:
|
|
193
|
+
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
179
194
|
ws_url = ws_url + f"?{query_params}"
|
|
180
195
|
headers = self._client_wrapper.get_headers()
|
|
181
196
|
if api_subscription_key is not None:
|
|
@@ -4,10 +4,12 @@
|
|
|
4
4
|
|
|
5
5
|
from .speech_to_text_translate_streaming_flush_signal import SpeechToTextTranslateStreamingFlushSignal
|
|
6
6
|
from .speech_to_text_translate_streaming_high_vad_sensitivity import SpeechToTextTranslateStreamingHighVadSensitivity
|
|
7
|
+
from .speech_to_text_translate_streaming_input_audio_codec import SpeechToTextTranslateStreamingInputAudioCodec
|
|
7
8
|
from .speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
|
|
8
9
|
|
|
9
10
|
__all__ = [
|
|
10
11
|
"SpeechToTextTranslateStreamingFlushSignal",
|
|
11
12
|
"SpeechToTextTranslateStreamingHighVadSensitivity",
|
|
13
|
+
"SpeechToTextTranslateStreamingInputAudioCodec",
|
|
12
14
|
"SpeechToTextTranslateStreamingVadSignals",
|
|
13
15
|
]
|
|
@@ -4,7 +4,30 @@ import typing
|
|
|
4
4
|
|
|
5
5
|
SpeechToTextLanguage = typing.Union[
|
|
6
6
|
typing.Literal[
|
|
7
|
-
"unknown",
|
|
7
|
+
"unknown",
|
|
8
|
+
"hi-IN",
|
|
9
|
+
"bn-IN",
|
|
10
|
+
"kn-IN",
|
|
11
|
+
"ml-IN",
|
|
12
|
+
"mr-IN",
|
|
13
|
+
"od-IN",
|
|
14
|
+
"pa-IN",
|
|
15
|
+
"ta-IN",
|
|
16
|
+
"te-IN",
|
|
17
|
+
"en-IN",
|
|
18
|
+
"gu-IN",
|
|
19
|
+
"as-IN",
|
|
20
|
+
"ur-IN",
|
|
21
|
+
"ne-IN",
|
|
22
|
+
"kok-IN",
|
|
23
|
+
"ks-IN",
|
|
24
|
+
"sd-IN",
|
|
25
|
+
"sa-IN",
|
|
26
|
+
"sat-IN",
|
|
27
|
+
"mni-IN",
|
|
28
|
+
"brx-IN",
|
|
29
|
+
"mai-IN",
|
|
30
|
+
"doi-IN",
|
|
8
31
|
],
|
|
9
32
|
typing.Any,
|
|
10
33
|
]
|
|
@@ -4,30 +4,22 @@ import typing
|
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
6
|
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
|
-
from .diarized_transcript import DiarizedTranscript
|
|
8
|
-
from .timestamps_model import TimestampsModel
|
|
9
7
|
|
|
10
8
|
|
|
11
9
|
class SpeechToTextResponse(UniversalBaseModel):
|
|
12
|
-
request_id:
|
|
13
|
-
transcript: str = pydantic.Field()
|
|
10
|
+
request_id: str = pydantic.Field()
|
|
14
11
|
"""
|
|
15
|
-
|
|
12
|
+
Unique identifier for the request
|
|
16
13
|
"""
|
|
17
14
|
|
|
18
|
-
|
|
19
|
-
"""
|
|
20
|
-
Contains timestamps for the transcribed text. This field is included only if with_timestamps is set to true
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
diarized_transcript: typing.Optional[DiarizedTranscript] = pydantic.Field(default=None)
|
|
15
|
+
transcript: str = pydantic.Field()
|
|
24
16
|
"""
|
|
25
|
-
|
|
17
|
+
The transcribed text from the provided audio file.
|
|
26
18
|
"""
|
|
27
19
|
|
|
28
|
-
language_code:
|
|
20
|
+
language_code: str = pydantic.Field()
|
|
29
21
|
"""
|
|
30
|
-
|
|
22
|
+
The BCP-47 code of language spoken in the input (e.g., hi-IN, en-IN). If multiple languages are detected, returns the most predominant spoken language.
|
|
31
23
|
"""
|
|
32
24
|
|
|
33
25
|
if IS_PYDANTIC_V2:
|
|
@@ -33,6 +33,20 @@ class SpeechToTextTranscriptionData(UniversalBaseModel):
|
|
|
33
33
|
BCP-47 code of detected language
|
|
34
34
|
"""
|
|
35
35
|
|
|
36
|
+
language_probability: typing.Optional[float] = pydantic.Field(default=None)
|
|
37
|
+
"""
|
|
38
|
+
Float value (0.0 to 1.0) indicating the probability of the detected language being correct. Higher values indicate higher confidence.
|
|
39
|
+
|
|
40
|
+
**When it returns a value:**
|
|
41
|
+
- When `language_code` is not provided in the request
|
|
42
|
+
- When `language_code` is set to `unknown`
|
|
43
|
+
|
|
44
|
+
**When it returns null:**
|
|
45
|
+
- When a specific `language_code` is provided (language detection is skipped)
|
|
46
|
+
|
|
47
|
+
The parameter is always present in the response.
|
|
48
|
+
"""
|
|
49
|
+
|
|
36
50
|
metrics: TranscriptionMetrics
|
|
37
51
|
|
|
38
52
|
if IS_PYDANTIC_V2:
|
|
@@ -3,6 +3,30 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
SpeechToTextTranslateLanguage = typing.Union[
|
|
6
|
-
typing.Literal[
|
|
6
|
+
typing.Literal[
|
|
7
|
+
"hi-IN",
|
|
8
|
+
"bn-IN",
|
|
9
|
+
"kn-IN",
|
|
10
|
+
"ml-IN",
|
|
11
|
+
"mr-IN",
|
|
12
|
+
"od-IN",
|
|
13
|
+
"pa-IN",
|
|
14
|
+
"ta-IN",
|
|
15
|
+
"te-IN",
|
|
16
|
+
"gu-IN",
|
|
17
|
+
"en-IN",
|
|
18
|
+
"as-IN",
|
|
19
|
+
"ur-IN",
|
|
20
|
+
"ne-IN",
|
|
21
|
+
"kok-IN",
|
|
22
|
+
"ks-IN",
|
|
23
|
+
"sd-IN",
|
|
24
|
+
"sa-IN",
|
|
25
|
+
"sat-IN",
|
|
26
|
+
"mni-IN",
|
|
27
|
+
"brx-IN",
|
|
28
|
+
"mai-IN",
|
|
29
|
+
"doi-IN",
|
|
30
|
+
],
|
|
7
31
|
typing.Any,
|
|
8
32
|
]
|
|
@@ -4,25 +4,22 @@ import typing
|
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
6
|
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
|
-
from .diarized_transcript import DiarizedTranscript
|
|
8
|
-
from .speech_to_text_translate_language import SpeechToTextTranslateLanguage
|
|
9
7
|
|
|
10
8
|
|
|
11
9
|
class SpeechToTextTranslateResponse(UniversalBaseModel):
|
|
12
|
-
request_id:
|
|
13
|
-
transcript: str = pydantic.Field()
|
|
10
|
+
request_id: str = pydantic.Field()
|
|
14
11
|
"""
|
|
15
|
-
|
|
12
|
+
Unique identifier for the request
|
|
16
13
|
"""
|
|
17
14
|
|
|
18
|
-
|
|
15
|
+
transcript: str = pydantic.Field()
|
|
19
16
|
"""
|
|
20
|
-
|
|
17
|
+
English translation of the provided speech
|
|
21
18
|
"""
|
|
22
19
|
|
|
23
|
-
|
|
20
|
+
language_code: str = pydantic.Field()
|
|
24
21
|
"""
|
|
25
|
-
|
|
22
|
+
The BCP-47 code of the detected source language spoken in the input (e.g., hi-IN, kn-IN).
|
|
26
23
|
"""
|
|
27
24
|
|
|
28
25
|
if IS_PYDANTIC_V2:
|
|
@@ -23,6 +23,19 @@ class SpeechToTextTranslateTranscriptionData(UniversalBaseModel):
|
|
|
23
23
|
BCP-47 code of detected source language (null when language detection is in progress)
|
|
24
24
|
"""
|
|
25
25
|
|
|
26
|
+
language_probability: typing.Optional[float] = pydantic.Field(default=None)
|
|
27
|
+
"""
|
|
28
|
+
Float value (0.0 to 1.0) indicating the probability of the detected source language being correct. Higher values indicate higher confidence.
|
|
29
|
+
|
|
30
|
+
**When it returns a value:**
|
|
31
|
+
- Always returns a value as source language is auto-detected for translation
|
|
32
|
+
|
|
33
|
+
**When it returns null:**
|
|
34
|
+
- When language detection confidence is unavailable
|
|
35
|
+
|
|
36
|
+
The parameter is always present in the response.
|
|
37
|
+
"""
|
|
38
|
+
|
|
26
39
|
metrics: TranscriptionMetrics
|
|
27
40
|
|
|
28
41
|
if IS_PYDANTIC_V2:
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
sarvamai/__init__.py,sha256=
|
|
1
|
+
sarvamai/__init__.py,sha256=YgHvuv6dxlTz4zYxWfRcgZKp1imFf0gmwvCQRclWOj0,11721
|
|
2
2
|
sarvamai/chat/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
3
3
|
sarvamai/chat/client.py,sha256=xOSj83Gr6Q7eY2qUeATiuXYQqBqWqSCQlIEopK5fKus,11022
|
|
4
4
|
sarvamai/chat/raw_client.py,sha256=A2kRuZcVWlJhyYCD7YKgqNkZEp3cYa1731KhRkhirU0,17885
|
|
5
5
|
sarvamai/client.py,sha256=J30X_os1lPf8Wml0KDFEf6p8VGHhgF_lf3nw1T2D3qo,8207
|
|
6
6
|
sarvamai/core/__init__.py,sha256=YE2CtXeASe1RAbaI39twKWYKCuT4tW5is9HWHhJjR_g,1653
|
|
7
7
|
sarvamai/core/api_error.py,sha256=44vPoTyWN59gonCIZMdzw7M1uspygiLnr3GNFOoVL2Q,614
|
|
8
|
-
sarvamai/core/client_wrapper.py,sha256=
|
|
8
|
+
sarvamai/core/client_wrapper.py,sha256=Ijmv6KUlK_OtCy7p9OYCUSgVRv62-xOAyyAvR5uw3AY,2570
|
|
9
9
|
sarvamai/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
|
|
10
10
|
sarvamai/core/events.py,sha256=HvKBdSoYcFetk7cgNXb7FxuY-FtY8NtUhZIN7mGVx8U,1159
|
|
11
11
|
sarvamai/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
|
|
@@ -68,15 +68,15 @@ sarvamai/requests/ping_signal.py,sha256=TSgmfz2k4X1L6TzvX8u2SKZ6XQY3bSf7nPZf8mUV
|
|
|
68
68
|
sarvamai/requests/send_text.py,sha256=DWzbNgeNN2xSIYgk2zEisgLqjwq5oleqJVHrtOnIqbE,267
|
|
69
69
|
sarvamai/requests/send_text_data.py,sha256=2jds-xd77u-YTgIWQsTUBKE-_7tsrFshXXuC_Ld4ULo,161
|
|
70
70
|
sarvamai/requests/speech_to_text_job_parameters.py,sha256=C1dpp5IkylFXdSNfKDhSo8jbFUquFh3SURFVME9XXq4,2854
|
|
71
|
-
sarvamai/requests/speech_to_text_response.py,sha256=
|
|
71
|
+
sarvamai/requests/speech_to_text_response.py,sha256=5Le4BCjYQcf87seNWdrFhHi5YHRJ-ljMLv3nTwsNnWc,517
|
|
72
72
|
sarvamai/requests/speech_to_text_response_data.py,sha256=69fYRdL0tCKpgKQqwzcM4T4Nf_lRxJFh-VCFe_tN964,364
|
|
73
73
|
sarvamai/requests/speech_to_text_streaming_response.py,sha256=cN5tKE9wOWuyBna4wmrf-0LfkOULMpRaJ7qjLuu76V0,348
|
|
74
|
-
sarvamai/requests/speech_to_text_transcription_data.py,sha256=
|
|
74
|
+
sarvamai/requests/speech_to_text_transcription_data.py,sha256=6YjW2ySX-yIql9MGM02wMue4lNOX1rwif8eSg7jQWo0,1413
|
|
75
75
|
sarvamai/requests/speech_to_text_translate_job_parameters.py,sha256=Q1mL3ul3WYArryp-HP_wHc8WCTLq6ZFhCUFeH3NM80E,1147
|
|
76
|
-
sarvamai/requests/speech_to_text_translate_response.py,sha256=
|
|
76
|
+
sarvamai/requests/speech_to_text_translate_response.py,sha256=pVxKjb9DAtlXM1jXA94TmCAzOrmzIuMjcM-mUwTnDRA,456
|
|
77
77
|
sarvamai/requests/speech_to_text_translate_response_data.py,sha256=OmjunP9R2xertJKn4fmpyzjDdj1_B_Yh6ZjH1eOlR-Q,407
|
|
78
78
|
sarvamai/requests/speech_to_text_translate_streaming_response.py,sha256=KTjYZ0_oLapuM5Iiq7UwejMsrL1TGgFAW4k5l17TkZs,385
|
|
79
|
-
sarvamai/requests/speech_to_text_translate_transcription_data.py,sha256=
|
|
79
|
+
sarvamai/requests/speech_to_text_translate_transcription_data.py,sha256=AEECwgmzR_uInedR38jiksZN8wx_l3sQKQA2dLCi13c,1088
|
|
80
80
|
sarvamai/requests/stop_configuration.py,sha256=Xmp8zyUpnN65pH5A7NqefckB8wk53_BBzOUrgRm2gXs,146
|
|
81
81
|
sarvamai/requests/stt_flush_signal.py,sha256=Gb-SoPPAyVKFVPZKxebLgV4bAv21NjVgvfCl5cqcxrY,360
|
|
82
82
|
sarvamai/requests/task_detail_v_1.py,sha256=2rehl7dSDSgzaw13b9bODamhiN2uB-IK4cOksq8Vmqc,582
|
|
@@ -93,14 +93,15 @@ sarvamai/speech_to_text_job/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23
|
|
|
93
93
|
sarvamai/speech_to_text_job/client.py,sha256=nMhBJa1rf1qQdQhlB1WUtyzOGhsWxECCPAJwBvWnq8M,18930
|
|
94
94
|
sarvamai/speech_to_text_job/job.py,sha256=9AfVSp5nzrl-Cx_1n2AJZqTMzp6Dkz2cvmbdq78fCgM,18751
|
|
95
95
|
sarvamai/speech_to_text_job/raw_client.py,sha256=6MB82mSqAOi92mE8vUeNSTB0wuxLZYRwizt15R6r-wo,49394
|
|
96
|
-
sarvamai/speech_to_text_streaming/__init__.py,sha256=
|
|
97
|
-
sarvamai/speech_to_text_streaming/client.py,sha256
|
|
98
|
-
sarvamai/speech_to_text_streaming/raw_client.py,sha256=
|
|
96
|
+
sarvamai/speech_to_text_streaming/__init__.py,sha256=D_WTGMhL_12vOb4IazZpC3o91HKFgPRNjs2r0EEyPBk,665
|
|
97
|
+
sarvamai/speech_to_text_streaming/client.py,sha256=-6fmOpPZXu6iRHSrWCU6mrWUhE24_AIzyBj9yljpxfY,16686
|
|
98
|
+
sarvamai/speech_to_text_streaming/raw_client.py,sha256=jJIiKxj4QmU3HAu-ZVk7vZnPWO3kOXXUXVWT37o6hIg,15867
|
|
99
99
|
sarvamai/speech_to_text_streaming/socket_client.py,sha256=P6qXRN0s3UFAp6CP5lkqrW2KPK9me70ZVfWquxLB4wI,7538
|
|
100
|
-
sarvamai/speech_to_text_streaming/types/__init__.py,sha256=
|
|
100
|
+
sarvamai/speech_to_text_streaming/types/__init__.py,sha256=u6sdJI-GFD-CAKX7xFOeS64qwbDW5xuICovpYwBzwLY,962
|
|
101
101
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_flush_signal.py,sha256=dDJOBlzAjhuiSVqW2RHHY1f6xy0DU_Yoo9UV8-7MjnA,173
|
|
102
102
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_high_vad_sensitivity.py,sha256=OwPwffa8TkLPGMnOTn5S7d-HmV8QmN3B7fHz8I1-VT8,180
|
|
103
|
-
sarvamai/speech_to_text_streaming/types/
|
|
103
|
+
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_input_audio_codec.py,sha256=dxnhjo9zo9WFk8CVUklubPhMaTxlaRvgFtuMmeQcAqo,208
|
|
104
|
+
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_language_code.py,sha256=7JdW_xi7IljKpdpOfwgNqQnfpng8VwASVPJ4QuOMh24,572
|
|
104
105
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py,sha256=jce75h2M9dEMD_eC29zCLQZFbLfCy8sdxEIISxtfubQ,217
|
|
105
106
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py,sha256=CpWC1HmcPJKWCSHhTYSaC8_pMsfNTBA-EHq-sfCjS-A,179
|
|
106
107
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_vad_signals.py,sha256=8wiFOB7WDMbYCcMTYgNFJaIjEytYeXpJLwr_O_mH0TI,172
|
|
@@ -108,13 +109,14 @@ sarvamai/speech_to_text_translate_job/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3
|
|
|
108
109
|
sarvamai/speech_to_text_translate_job/client.py,sha256=xu8kYtCESDB7LzL8YKBUq5qhTPMIl3_H3XD2L_7y4UU,18969
|
|
109
110
|
sarvamai/speech_to_text_translate_job/job.py,sha256=tL1Zemsogb_AK9wqZwN4ooPaN176sFKduTH9g87y-WU,18938
|
|
110
111
|
sarvamai/speech_to_text_translate_job/raw_client.py,sha256=Emx14cRiAZXg1PqZkoJbDOKwyDmOgwxWlqPkAPZ9GPU,50797
|
|
111
|
-
sarvamai/speech_to_text_translate_streaming/__init__.py,sha256=
|
|
112
|
-
sarvamai/speech_to_text_translate_streaming/client.py,sha256=
|
|
113
|
-
sarvamai/speech_to_text_translate_streaming/raw_client.py,sha256=
|
|
112
|
+
sarvamai/speech_to_text_translate_streaming/__init__.py,sha256=s6HPwrkABpkhDSsd_t6pVRiWfY4MfVE0lVj9b4V_fx4,527
|
|
113
|
+
sarvamai/speech_to_text_translate_streaming/client.py,sha256=QiKQWWjX4iVTLF6YwmeJKcqzU9TNFzENCJeI4xk2ndQ,11813
|
|
114
|
+
sarvamai/speech_to_text_translate_streaming/raw_client.py,sha256=lUiORV21loTC3Fq5m8D_XJhxGdFFOfnmDNem3v2B64Y,10922
|
|
114
115
|
sarvamai/speech_to_text_translate_streaming/socket_client.py,sha256=ipEPSj5eHAyDpuEXfaP7JJL1rXJXGEo-IB888ReAFKs,8901
|
|
115
|
-
sarvamai/speech_to_text_translate_streaming/types/__init__.py,sha256=
|
|
116
|
+
sarvamai/speech_to_text_translate_streaming/types/__init__.py,sha256=nsKmvwkhcPekF9kcStDhTDilALFf2jT-wfCn25KVe7U,740
|
|
116
117
|
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_flush_signal.py,sha256=jkjvCGJ1pFKi3AOTkwMW-lo18WGgrgAhMpoe5P0AMzA,182
|
|
117
118
|
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_high_vad_sensitivity.py,sha256=r6MvTlkM0VEpb4dpnMHtINOZ-gYc22o0Fx_Xce2rjvo,189
|
|
119
|
+
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_input_audio_codec.py,sha256=x8K3YSZ_mVkfiyhmRk8RvEInw3KTOwCapj5q2tOPggo,217
|
|
118
120
|
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_vad_signals.py,sha256=EV3xd9qyKMnMvA9rO-qFDDIac4b84roBu7n-maaPxG8,181
|
|
119
121
|
sarvamai/text/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
120
122
|
sarvamai/text/client.py,sha256=jNOHjc3Pu7xsnuDMuf7xX3WHAIO7B-AgMgKj3pL3eZA,29657
|
|
@@ -188,19 +190,19 @@ sarvamai/types/send_text.py,sha256=kgST6V5NuURzgBpuiDi8fVwDg768ttDoeY9k1dSSb1Y,6
|
|
|
188
190
|
sarvamai/types/send_text_data.py,sha256=H8yfcvd4gvyN34RrZ9i4qQvieednNBhL7i7isX4asuY,519
|
|
189
191
|
sarvamai/types/speech_sample_rate.py,sha256=Hfi79KL2Y1W7OIvvrfWnt7EUvmU5i7bxYvXivrY_aUA,88
|
|
190
192
|
sarvamai/types/speech_to_text_job_parameters.py,sha256=JFKO2rIyBJE_PK4mrMMwY4e562l_6HbZtP2kd4CbKa8,3293
|
|
191
|
-
sarvamai/types/speech_to_text_language.py,sha256=
|
|
193
|
+
sarvamai/types/speech_to_text_language.py,sha256=dYmAo9lg7iC0w_GVjmnYmALTQgJjma2wFDXQY3_Q4pc,578
|
|
192
194
|
sarvamai/types/speech_to_text_model.py,sha256=hHC3aOXzdPt8i32qJw4ZLz5bdREWVQl4P7Y_lOURJu4,170
|
|
193
|
-
sarvamai/types/speech_to_text_response.py,sha256=
|
|
195
|
+
sarvamai/types/speech_to_text_response.py,sha256=0tXgHr-1zN54hA_6nUkMV5Jkt7pEmjl7zJQ8gOt5Nq8,932
|
|
194
196
|
sarvamai/types/speech_to_text_response_data.py,sha256=gbxZTBSjbN3ZIa10K6tWPYtymcpnQTFIaUnXkOmsmD4,322
|
|
195
197
|
sarvamai/types/speech_to_text_streaming_response.py,sha256=z6tVAHbVK9lC3w3lac__LEUfO8AAzEilkeGlaLskTtc,687
|
|
196
|
-
sarvamai/types/speech_to_text_transcription_data.py,sha256=
|
|
198
|
+
sarvamai/types/speech_to_text_transcription_data.py,sha256=nrGmNnA9Au5CpBTIyG9wto2PSl2I7T8Kll4KWOkCrFg,1850
|
|
197
199
|
sarvamai/types/speech_to_text_translate_job_parameters.py,sha256=-E85BoIBxW5Ck638aRFE0fC_f43RCoIkboAFu2QlBBs,1566
|
|
198
|
-
sarvamai/types/speech_to_text_translate_language.py,sha256=
|
|
200
|
+
sarvamai/types/speech_to_text_translate_language.py,sha256=lmWyAWMwSSDNPU5HrJtmhNYLRPhWo41ShMUCrXyEPoc,568
|
|
199
201
|
sarvamai/types/speech_to_text_translate_model.py,sha256=CVSz6gJBY82GhhEuWSdzRLJW9XTsAgweRnKd1tN6mXo,139
|
|
200
|
-
sarvamai/types/speech_to_text_translate_response.py,sha256=
|
|
202
|
+
sarvamai/types/speech_to_text_translate_response.py,sha256=v1xTwIva81c74hWy9-ipbJFQWvXlAvw_b7o9xvo6mgc,871
|
|
201
203
|
sarvamai/types/speech_to_text_translate_response_data.py,sha256=_NlLVp7oQU3em_4E47QVbIP9nromPE07Z9HtMpY1lrU,359
|
|
202
204
|
sarvamai/types/speech_to_text_translate_streaming_response.py,sha256=J6h3AGdAJxpODFs30bR-e6OaWKa__oVhwv_TrbPSO98,724
|
|
203
|
-
sarvamai/types/speech_to_text_translate_transcription_data.py,sha256
|
|
205
|
+
sarvamai/types/speech_to_text_translate_transcription_data.py,sha256=HR-7y3LKVt4e3FY028aEXQmORtjdpjBY29AWTIRsRVA,1506
|
|
204
206
|
sarvamai/types/spoken_form_numerals_format.py,sha256=soBly93wMkazIcp2GDM0Mf1MjY140Pe24hBlwNoWge0,169
|
|
205
207
|
sarvamai/types/stop_configuration.py,sha256=yA_q4s4BIrbl3FotZpg4ZcyL10C7gVI0s2dqvH32BNw,136
|
|
206
208
|
sarvamai/types/storage_container_type.py,sha256=DZXDiDj74lMmUq6jaZfIMW1zMXgoVdY6rs_FcyB9OGk,184
|
|
@@ -226,6 +228,6 @@ sarvamai/types/transliterate_mode.py,sha256=1jSEMlGcoLkWuk12TgoOpSgwifa4rThGKZ1h
|
|
|
226
228
|
sarvamai/types/transliterate_source_language.py,sha256=bSY9wJszF0sg-Cgg6F-YcWC8ly1mIlj9rqa15-jBtx8,283
|
|
227
229
|
sarvamai/types/transliteration_response.py,sha256=yt-lzTbDeJ_ZL4I8kQa6oESxA9ebeJJY7LfFHpdEsmM,815
|
|
228
230
|
sarvamai/version.py,sha256=Qkp3Ee9YH-O9RTix90e0i7iNrFAGN-QDt2AFwGA4n8k,75
|
|
229
|
-
sarvamai-0.1.
|
|
230
|
-
sarvamai-0.1.
|
|
231
|
-
sarvamai-0.1.
|
|
231
|
+
sarvamai-0.1.23a6.dist-info/METADATA,sha256=5pdMsPKXwU2rQqBN-U7GmnWbAKVgSLmtfd7E0G8e6KU,26753
|
|
232
|
+
sarvamai-0.1.23a6.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
|
233
|
+
sarvamai-0.1.23a6.dist-info/RECORD,,
|
|
File without changes
|