sarvamai 0.1.23a4__py3-none-any.whl → 0.1.23a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/requests/speech_to_text_job_parameters.py +37 -5
- sarvamai/requests/speech_to_text_translate_job_parameters.py +4 -1
- sarvamai/speech_to_text/client.py +84 -26
- sarvamai/speech_to_text/raw_client.py +84 -26
- sarvamai/speech_to_text_streaming/client.py +74 -18
- sarvamai/speech_to_text_streaming/raw_client.py +74 -18
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +1 -1
- sarvamai/speech_to_text_translate_streaming/client.py +8 -2
- sarvamai/speech_to_text_translate_streaming/raw_client.py +8 -2
- sarvamai/types/mode.py +1 -3
- sarvamai/types/speech_to_text_job_parameters.py +37 -5
- sarvamai/types/speech_to_text_model.py +1 -3
- sarvamai/types/speech_to_text_translate_job_parameters.py +4 -1
- sarvamai/types/speech_to_text_translate_model.py +1 -1
- {sarvamai-0.1.23a4.dist-info → sarvamai-0.1.23a5.dist-info}/METADATA +1 -1
- {sarvamai-0.1.23a4.dist-info → sarvamai-0.1.23a5.dist-info}/RECORD +18 -18
- {sarvamai-0.1.23a4.dist-info → sarvamai-0.1.23a5.dist-info}/WHEEL +0 -0
sarvamai/core/client_wrapper.py
CHANGED
|
@@ -23,10 +23,10 @@ class BaseClientWrapper:
|
|
|
23
23
|
|
|
24
24
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
25
25
|
headers: typing.Dict[str, str] = {
|
|
26
|
-
"User-Agent": "sarvamai/0.1.
|
|
26
|
+
"User-Agent": "sarvamai/0.1.23a5",
|
|
27
27
|
"X-Fern-Language": "Python",
|
|
28
28
|
"X-Fern-SDK-Name": "sarvamai",
|
|
29
|
-
"X-Fern-SDK-Version": "0.1.
|
|
29
|
+
"X-Fern-SDK-Version": "0.1.23a5",
|
|
30
30
|
**(self.get_custom_headers() or {}),
|
|
31
31
|
}
|
|
32
32
|
headers["api-subscription-key"] = self.api_subscription_key
|
|
@@ -9,20 +9,52 @@ from ..types.speech_to_text_translate_language import SpeechToTextTranslateLangu
|
|
|
9
9
|
class SpeechToTextJobParametersParams(typing_extensions.TypedDict):
|
|
10
10
|
language_code: typing_extensions.NotRequired[SpeechToTextTranslateLanguage]
|
|
11
11
|
"""
|
|
12
|
-
|
|
12
|
+
Specifies the language of the input audio in BCP-47 format.
|
|
13
|
+
|
|
14
|
+
**Available Options:**
|
|
15
|
+
- `unknown` (default): Use when the language is not known; the API will auto-detect.
|
|
16
|
+
- `hi-IN`: Hindi
|
|
17
|
+
- `bn-IN`: Bengali
|
|
18
|
+
- `kn-IN`: Kannada
|
|
19
|
+
- `ml-IN`: Malayalam
|
|
20
|
+
- `mr-IN`: Marathi
|
|
21
|
+
- `od-IN`: Odia
|
|
22
|
+
- `pa-IN`: Punjabi
|
|
23
|
+
- `ta-IN`: Tamil
|
|
24
|
+
- `te-IN`: Telugu
|
|
25
|
+
- `en-IN`: English
|
|
26
|
+
- `gu-IN`: Gujarati
|
|
13
27
|
"""
|
|
14
28
|
|
|
15
29
|
model: typing_extensions.NotRequired[SpeechToTextModel]
|
|
16
30
|
"""
|
|
17
31
|
Model to be used for speech to text.
|
|
18
|
-
|
|
19
|
-
- **saarika:
|
|
20
|
-
|
|
32
|
+
|
|
33
|
+
- **saarika:v2.5** (default): Transcribes audio in the spoken language.
|
|
34
|
+
|
|
35
|
+
- **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
|
|
21
36
|
"""
|
|
22
37
|
|
|
23
38
|
mode: typing_extensions.NotRequired[Mode]
|
|
24
39
|
"""
|
|
25
|
-
Mode of operation. Only applicable
|
|
40
|
+
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
41
|
+
|
|
42
|
+
Example audio: 'मेरा फोन नंबर है 9840950950'
|
|
43
|
+
|
|
44
|
+
- **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
|
|
45
|
+
- Output: `मेरा फोन नंबर है 9840950950`
|
|
46
|
+
|
|
47
|
+
- **translate**: Translates speech from any supported Indic language to English.
|
|
48
|
+
- Output: `My phone number is 9840950950`
|
|
49
|
+
|
|
50
|
+
- **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
|
|
51
|
+
- Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
|
|
52
|
+
|
|
53
|
+
- **translit**: Romanization - Transliterates speech to Latin/Roman script only.
|
|
54
|
+
- Output: `mera phone number hai 9840950950`
|
|
55
|
+
|
|
56
|
+
- **codemix**: Code-mixed text with English words in English and Indic words in native script.
|
|
57
|
+
- Output: `मेरा phone number है 9840950950`
|
|
26
58
|
"""
|
|
27
59
|
|
|
28
60
|
with_timestamps: typing_extensions.NotRequired[bool]
|
|
@@ -12,7 +12,10 @@ class SpeechToTextTranslateJobParametersParams(typing_extensions.TypedDict):
|
|
|
12
12
|
|
|
13
13
|
model: typing_extensions.NotRequired[SpeechToTextTranslateModel]
|
|
14
14
|
"""
|
|
15
|
-
Model to be used for
|
|
15
|
+
Model to be used for speech to text translation.
|
|
16
|
+
|
|
17
|
+
- **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
|
|
18
|
+
- Example: Hindi audio → English text output
|
|
16
19
|
"""
|
|
17
20
|
|
|
18
21
|
with_diarization: typing_extensions.NotRequired[bool]
|
|
@@ -65,23 +65,49 @@ class SpeechToTextClient:
|
|
|
65
65
|
|
|
66
66
|
model : typing.Optional[SpeechToTextModel]
|
|
67
67
|
Specifies the model to use for speech-to-text conversion.
|
|
68
|
-
|
|
69
|
-
- **saarika:
|
|
70
|
-
|
|
68
|
+
|
|
69
|
+
- **saarika:v2.5** (default): Transcribes audio in the spoken language.
|
|
70
|
+
|
|
71
|
+
- **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
|
|
71
72
|
|
|
72
73
|
mode : typing.Optional[Mode]
|
|
73
74
|
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
- **
|
|
78
|
-
|
|
79
|
-
|
|
75
|
+
|
|
76
|
+
Example audio: 'मेरा फोन नंबर है 9840950950'
|
|
77
|
+
|
|
78
|
+
- **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
|
|
79
|
+
- Output: `मेरा फोन नंबर है 9840950950`
|
|
80
|
+
|
|
81
|
+
- **translate**: Translates speech from any supported Indic language to English.
|
|
82
|
+
- Output: `My phone number is 9840950950`
|
|
83
|
+
|
|
84
|
+
- **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
|
|
85
|
+
- Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
|
|
86
|
+
|
|
87
|
+
- **translit**: Romanization - Transliterates speech to Latin/Roman script only.
|
|
88
|
+
- Output: `mera phone number hai 9840950950`
|
|
89
|
+
|
|
90
|
+
- **codemix**: Code-mixed text with English words in English and Indic words in native script.
|
|
91
|
+
- Output: `मेरा phone number है 9840950950`
|
|
80
92
|
|
|
81
93
|
language_code : typing.Optional[SpeechToTextLanguage]
|
|
82
|
-
Specifies the language of the input audio.
|
|
83
|
-
|
|
84
|
-
|
|
94
|
+
Specifies the language of the input audio in BCP-47 format.
|
|
95
|
+
|
|
96
|
+
**Note:** This parameter is optional for `saarika:v2.5` model.
|
|
97
|
+
|
|
98
|
+
**Available Options:**
|
|
99
|
+
- `unknown`: Use when the language is not known; the API will auto-detect.
|
|
100
|
+
- `hi-IN`: Hindi
|
|
101
|
+
- `bn-IN`: Bengali
|
|
102
|
+
- `kn-IN`: Kannada
|
|
103
|
+
- `ml-IN`: Malayalam
|
|
104
|
+
- `mr-IN`: Marathi
|
|
105
|
+
- `od-IN`: Odia
|
|
106
|
+
- `pa-IN`: Punjabi
|
|
107
|
+
- `ta-IN`: Tamil
|
|
108
|
+
- `te-IN`: Telugu
|
|
109
|
+
- `en-IN`: English
|
|
110
|
+
- `gu-IN`: Gujarati
|
|
85
111
|
|
|
86
112
|
input_audio_codec : typing.Optional[InputAudioCodec]
|
|
87
113
|
Input Audio codec/format of the input file. PCM files are supported only at 16kHz sample rate.
|
|
@@ -146,7 +172,10 @@ class SpeechToTextClient:
|
|
|
146
172
|
Conversation context can be passed as a prompt to boost model accuracy. However, the current system is at an experimentation stage and doesn't match the prompt performance of large language models.
|
|
147
173
|
|
|
148
174
|
model : typing.Optional[SpeechToTextTranslateModel]
|
|
149
|
-
Model to be used for
|
|
175
|
+
Model to be used for speech to text translation.
|
|
176
|
+
|
|
177
|
+
- **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
|
|
178
|
+
- Example: Hindi audio → English text output
|
|
150
179
|
|
|
151
180
|
input_audio_codec : typing.Optional[InputAudioCodec]
|
|
152
181
|
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files are supported only at 16kHz sample rate.
|
|
@@ -221,23 +250,49 @@ class AsyncSpeechToTextClient:
|
|
|
221
250
|
|
|
222
251
|
model : typing.Optional[SpeechToTextModel]
|
|
223
252
|
Specifies the model to use for speech-to-text conversion.
|
|
224
|
-
|
|
225
|
-
- **saarika:
|
|
226
|
-
|
|
253
|
+
|
|
254
|
+
- **saarika:v2.5** (default): Transcribes audio in the spoken language.
|
|
255
|
+
|
|
256
|
+
- **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
|
|
227
257
|
|
|
228
258
|
mode : typing.Optional[Mode]
|
|
229
259
|
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
- **
|
|
234
|
-
|
|
235
|
-
|
|
260
|
+
|
|
261
|
+
Example audio: 'मेरा फोन नंबर है 9840950950'
|
|
262
|
+
|
|
263
|
+
- **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
|
|
264
|
+
- Output: `मेरा फोन नंबर है 9840950950`
|
|
265
|
+
|
|
266
|
+
- **translate**: Translates speech from any supported Indic language to English.
|
|
267
|
+
- Output: `My phone number is 9840950950`
|
|
268
|
+
|
|
269
|
+
- **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
|
|
270
|
+
- Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
|
|
271
|
+
|
|
272
|
+
- **translit**: Romanization - Transliterates speech to Latin/Roman script only.
|
|
273
|
+
- Output: `mera phone number hai 9840950950`
|
|
274
|
+
|
|
275
|
+
- **codemix**: Code-mixed text with English words in English and Indic words in native script.
|
|
276
|
+
- Output: `मेरा phone number है 9840950950`
|
|
236
277
|
|
|
237
278
|
language_code : typing.Optional[SpeechToTextLanguage]
|
|
238
|
-
Specifies the language of the input audio.
|
|
239
|
-
|
|
240
|
-
|
|
279
|
+
Specifies the language of the input audio in BCP-47 format.
|
|
280
|
+
|
|
281
|
+
**Note:** This parameter is optional for `saarika:v2.5` model.
|
|
282
|
+
|
|
283
|
+
**Available Options:**
|
|
284
|
+
- `unknown`: Use when the language is not known; the API will auto-detect.
|
|
285
|
+
- `hi-IN`: Hindi
|
|
286
|
+
- `bn-IN`: Bengali
|
|
287
|
+
- `kn-IN`: Kannada
|
|
288
|
+
- `ml-IN`: Malayalam
|
|
289
|
+
- `mr-IN`: Marathi
|
|
290
|
+
- `od-IN`: Odia
|
|
291
|
+
- `pa-IN`: Punjabi
|
|
292
|
+
- `ta-IN`: Tamil
|
|
293
|
+
- `te-IN`: Telugu
|
|
294
|
+
- `en-IN`: English
|
|
295
|
+
- `gu-IN`: Gujarati
|
|
241
296
|
|
|
242
297
|
input_audio_codec : typing.Optional[InputAudioCodec]
|
|
243
298
|
Input Audio codec/format of the input file. PCM files are supported only at 16kHz sample rate.
|
|
@@ -310,7 +365,10 @@ class AsyncSpeechToTextClient:
|
|
|
310
365
|
Conversation context can be passed as a prompt to boost model accuracy. However, the current system is at an experimentation stage and doesn't match the prompt performance of large language models.
|
|
311
366
|
|
|
312
367
|
model : typing.Optional[SpeechToTextTranslateModel]
|
|
313
|
-
Model to be used for
|
|
368
|
+
Model to be used for speech to text translation.
|
|
369
|
+
|
|
370
|
+
- **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
|
|
371
|
+
- Example: Hindi audio → English text output
|
|
314
372
|
|
|
315
373
|
input_audio_codec : typing.Optional[InputAudioCodec]
|
|
316
374
|
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files are supported only at 16kHz sample rate.
|
|
@@ -63,23 +63,49 @@ class RawSpeechToTextClient:
|
|
|
63
63
|
|
|
64
64
|
model : typing.Optional[SpeechToTextModel]
|
|
65
65
|
Specifies the model to use for speech-to-text conversion.
|
|
66
|
-
|
|
67
|
-
- **saarika:
|
|
68
|
-
|
|
66
|
+
|
|
67
|
+
- **saarika:v2.5** (default): Transcribes audio in the spoken language.
|
|
68
|
+
|
|
69
|
+
- **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
|
|
69
70
|
|
|
70
71
|
mode : typing.Optional[Mode]
|
|
71
72
|
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
- **
|
|
76
|
-
|
|
77
|
-
|
|
73
|
+
|
|
74
|
+
Example audio: 'मेरा फोन नंबर है 9840950950'
|
|
75
|
+
|
|
76
|
+
- **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
|
|
77
|
+
- Output: `मेरा फोन नंबर है 9840950950`
|
|
78
|
+
|
|
79
|
+
- **translate**: Translates speech from any supported Indic language to English.
|
|
80
|
+
- Output: `My phone number is 9840950950`
|
|
81
|
+
|
|
82
|
+
- **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
|
|
83
|
+
- Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
|
|
84
|
+
|
|
85
|
+
- **translit**: Romanization - Transliterates speech to Latin/Roman script only.
|
|
86
|
+
- Output: `mera phone number hai 9840950950`
|
|
87
|
+
|
|
88
|
+
- **codemix**: Code-mixed text with English words in English and Indic words in native script.
|
|
89
|
+
- Output: `मेरा phone number है 9840950950`
|
|
78
90
|
|
|
79
91
|
language_code : typing.Optional[SpeechToTextLanguage]
|
|
80
|
-
Specifies the language of the input audio.
|
|
81
|
-
|
|
82
|
-
|
|
92
|
+
Specifies the language of the input audio in BCP-47 format.
|
|
93
|
+
|
|
94
|
+
**Note:** This parameter is optional for `saarika:v2.5` model.
|
|
95
|
+
|
|
96
|
+
**Available Options:**
|
|
97
|
+
- `unknown`: Use when the language is not known; the API will auto-detect.
|
|
98
|
+
- `hi-IN`: Hindi
|
|
99
|
+
- `bn-IN`: Bengali
|
|
100
|
+
- `kn-IN`: Kannada
|
|
101
|
+
- `ml-IN`: Malayalam
|
|
102
|
+
- `mr-IN`: Marathi
|
|
103
|
+
- `od-IN`: Odia
|
|
104
|
+
- `pa-IN`: Punjabi
|
|
105
|
+
- `ta-IN`: Tamil
|
|
106
|
+
- `te-IN`: Telugu
|
|
107
|
+
- `en-IN`: English
|
|
108
|
+
- `gu-IN`: Gujarati
|
|
83
109
|
|
|
84
110
|
input_audio_codec : typing.Optional[InputAudioCodec]
|
|
85
111
|
Input Audio codec/format of the input file. PCM files are supported only at 16kHz sample rate.
|
|
@@ -223,7 +249,10 @@ class RawSpeechToTextClient:
|
|
|
223
249
|
Conversation context can be passed as a prompt to boost model accuracy. However, the current system is at an experimentation stage and doesn't match the prompt performance of large language models.
|
|
224
250
|
|
|
225
251
|
model : typing.Optional[SpeechToTextTranslateModel]
|
|
226
|
-
Model to be used for
|
|
252
|
+
Model to be used for speech to text translation.
|
|
253
|
+
|
|
254
|
+
- **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
|
|
255
|
+
- Example: Hindi audio → English text output
|
|
227
256
|
|
|
228
257
|
input_audio_codec : typing.Optional[InputAudioCodec]
|
|
229
258
|
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files are supported only at 16kHz sample rate.
|
|
@@ -370,23 +399,49 @@ class AsyncRawSpeechToTextClient:
|
|
|
370
399
|
|
|
371
400
|
model : typing.Optional[SpeechToTextModel]
|
|
372
401
|
Specifies the model to use for speech-to-text conversion.
|
|
373
|
-
|
|
374
|
-
- **saarika:
|
|
375
|
-
|
|
402
|
+
|
|
403
|
+
- **saarika:v2.5** (default): Transcribes audio in the spoken language.
|
|
404
|
+
|
|
405
|
+
- **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
|
|
376
406
|
|
|
377
407
|
mode : typing.Optional[Mode]
|
|
378
408
|
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
- **
|
|
383
|
-
|
|
384
|
-
|
|
409
|
+
|
|
410
|
+
Example audio: 'मेरा फोन नंबर है 9840950950'
|
|
411
|
+
|
|
412
|
+
- **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
|
|
413
|
+
- Output: `मेरा फोन नंबर है 9840950950`
|
|
414
|
+
|
|
415
|
+
- **translate**: Translates speech from any supported Indic language to English.
|
|
416
|
+
- Output: `My phone number is 9840950950`
|
|
417
|
+
|
|
418
|
+
- **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
|
|
419
|
+
- Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
|
|
420
|
+
|
|
421
|
+
- **translit**: Romanization - Transliterates speech to Latin/Roman script only.
|
|
422
|
+
- Output: `mera phone number hai 9840950950`
|
|
423
|
+
|
|
424
|
+
- **codemix**: Code-mixed text with English words in English and Indic words in native script.
|
|
425
|
+
- Output: `मेरा phone number है 9840950950`
|
|
385
426
|
|
|
386
427
|
language_code : typing.Optional[SpeechToTextLanguage]
|
|
387
|
-
Specifies the language of the input audio.
|
|
388
|
-
|
|
389
|
-
|
|
428
|
+
Specifies the language of the input audio in BCP-47 format.
|
|
429
|
+
|
|
430
|
+
**Note:** This parameter is optional for `saarika:v2.5` model.
|
|
431
|
+
|
|
432
|
+
**Available Options:**
|
|
433
|
+
- `unknown`: Use when the language is not known; the API will auto-detect.
|
|
434
|
+
- `hi-IN`: Hindi
|
|
435
|
+
- `bn-IN`: Bengali
|
|
436
|
+
- `kn-IN`: Kannada
|
|
437
|
+
- `ml-IN`: Malayalam
|
|
438
|
+
- `mr-IN`: Marathi
|
|
439
|
+
- `od-IN`: Odia
|
|
440
|
+
- `pa-IN`: Punjabi
|
|
441
|
+
- `ta-IN`: Tamil
|
|
442
|
+
- `te-IN`: Telugu
|
|
443
|
+
- `en-IN`: English
|
|
444
|
+
- `gu-IN`: Gujarati
|
|
390
445
|
|
|
391
446
|
input_audio_codec : typing.Optional[InputAudioCodec]
|
|
392
447
|
Input Audio codec/format of the input file. PCM files are supported only at 16kHz sample rate.
|
|
@@ -530,7 +585,10 @@ class AsyncRawSpeechToTextClient:
|
|
|
530
585
|
Conversation context can be passed as a prompt to boost model accuracy. However, the current system is at an experimentation stage and doesn't match the prompt performance of large language models.
|
|
531
586
|
|
|
532
587
|
model : typing.Optional[SpeechToTextTranslateModel]
|
|
533
|
-
Model to be used for
|
|
588
|
+
Model to be used for speech to text translation.
|
|
589
|
+
|
|
590
|
+
- **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
|
|
591
|
+
- Example: Hindi audio → English text output
|
|
534
592
|
|
|
535
593
|
input_audio_codec : typing.Optional[InputAudioCodec]
|
|
536
594
|
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files are supported only at 16kHz sample rate.
|
|
@@ -63,19 +63,47 @@ class SpeechToTextStreamingClient:
|
|
|
63
63
|
Parameters
|
|
64
64
|
----------
|
|
65
65
|
language_code : SpeechToTextStreamingLanguageCode
|
|
66
|
-
|
|
66
|
+
Specifies the language of the input audio in BCP-47 format.
|
|
67
|
+
|
|
68
|
+
**Available Options:**
|
|
69
|
+
- `hi-IN`: Hindi
|
|
70
|
+
- `bn-IN`: Bengali
|
|
71
|
+
- `gu-IN`: Gujarati
|
|
72
|
+
- `kn-IN`: Kannada
|
|
73
|
+
- `ml-IN`: Malayalam
|
|
74
|
+
- `mr-IN`: Marathi
|
|
75
|
+
- `od-IN`: Odia
|
|
76
|
+
- `pa-IN`: Punjabi
|
|
77
|
+
- `ta-IN`: Tamil
|
|
78
|
+
- `te-IN`: Telugu
|
|
79
|
+
- `en-IN`: English
|
|
67
80
|
|
|
68
81
|
model : typing.Optional[SpeechToTextStreamingModel]
|
|
69
|
-
|
|
82
|
+
Specifies the model to use for speech-to-text conversion.
|
|
83
|
+
|
|
84
|
+
- **saarika:v2.5** (default): Transcribes audio in the spoken language.
|
|
85
|
+
|
|
86
|
+
- **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
|
|
70
87
|
|
|
71
88
|
mode : typing.Optional[SpeechToTextStreamingMode]
|
|
72
|
-
Mode of operation
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
-
|
|
77
|
-
|
|
78
|
-
|
|
89
|
+
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
90
|
+
|
|
91
|
+
Example audio: 'मेरा फोन नंबर है 9840950950'
|
|
92
|
+
|
|
93
|
+
- **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
|
|
94
|
+
- Output: `मेरा फोन नंबर है 9840950950`
|
|
95
|
+
|
|
96
|
+
- **translate**: Translates speech from any supported Indic language to English.
|
|
97
|
+
- Output: `My phone number is 9840950950`
|
|
98
|
+
|
|
99
|
+
- **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
|
|
100
|
+
- Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
|
|
101
|
+
|
|
102
|
+
- **translit**: Romanization - Transliterates speech to Latin/Roman script only.
|
|
103
|
+
- Output: `mera phone number hai 9840950950`
|
|
104
|
+
|
|
105
|
+
- **codemix**: Code-mixed text with English words in English and Indic words in native script.
|
|
106
|
+
- Output: `मेरा phone number है 9840950950`
|
|
79
107
|
|
|
80
108
|
sample_rate : typing.Optional[str]
|
|
81
109
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
@@ -178,19 +206,47 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
178
206
|
Parameters
|
|
179
207
|
----------
|
|
180
208
|
language_code : SpeechToTextStreamingLanguageCode
|
|
181
|
-
|
|
209
|
+
Specifies the language of the input audio in BCP-47 format.
|
|
210
|
+
|
|
211
|
+
**Available Options:**
|
|
212
|
+
- `hi-IN`: Hindi
|
|
213
|
+
- `bn-IN`: Bengali
|
|
214
|
+
- `gu-IN`: Gujarati
|
|
215
|
+
- `kn-IN`: Kannada
|
|
216
|
+
- `ml-IN`: Malayalam
|
|
217
|
+
- `mr-IN`: Marathi
|
|
218
|
+
- `od-IN`: Odia
|
|
219
|
+
- `pa-IN`: Punjabi
|
|
220
|
+
- `ta-IN`: Tamil
|
|
221
|
+
- `te-IN`: Telugu
|
|
222
|
+
- `en-IN`: English
|
|
182
223
|
|
|
183
224
|
model : typing.Optional[SpeechToTextStreamingModel]
|
|
184
|
-
|
|
225
|
+
Specifies the model to use for speech-to-text conversion.
|
|
226
|
+
|
|
227
|
+
- **saarika:v2.5** (default): Transcribes audio in the spoken language.
|
|
228
|
+
|
|
229
|
+
- **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
|
|
185
230
|
|
|
186
231
|
mode : typing.Optional[SpeechToTextStreamingMode]
|
|
187
|
-
Mode of operation
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
-
|
|
192
|
-
|
|
193
|
-
|
|
232
|
+
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
233
|
+
|
|
234
|
+
Example audio: 'मेरा फोन नंबर है 9840950950'
|
|
235
|
+
|
|
236
|
+
- **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
|
|
237
|
+
- Output: `मेरा फोन नंबर है 9840950950`
|
|
238
|
+
|
|
239
|
+
- **translate**: Translates speech from any supported Indic language to English.
|
|
240
|
+
- Output: `My phone number is 9840950950`
|
|
241
|
+
|
|
242
|
+
- **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
|
|
243
|
+
- Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
|
|
244
|
+
|
|
245
|
+
- **translit**: Romanization - Transliterates speech to Latin/Roman script only.
|
|
246
|
+
- Output: `mera phone number hai 9840950950`
|
|
247
|
+
|
|
248
|
+
- **codemix**: Code-mixed text with English words in English and Indic words in native script.
|
|
249
|
+
- Output: `मेरा phone number है 9840950950`
|
|
194
250
|
|
|
195
251
|
sample_rate : typing.Optional[str]
|
|
196
252
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
@@ -51,19 +51,47 @@ class RawSpeechToTextStreamingClient:
|
|
|
51
51
|
Parameters
|
|
52
52
|
----------
|
|
53
53
|
language_code : SpeechToTextStreamingLanguageCode
|
|
54
|
-
|
|
54
|
+
Specifies the language of the input audio in BCP-47 format.
|
|
55
|
+
|
|
56
|
+
**Available Options:**
|
|
57
|
+
- `hi-IN`: Hindi
|
|
58
|
+
- `bn-IN`: Bengali
|
|
59
|
+
- `gu-IN`: Gujarati
|
|
60
|
+
- `kn-IN`: Kannada
|
|
61
|
+
- `ml-IN`: Malayalam
|
|
62
|
+
- `mr-IN`: Marathi
|
|
63
|
+
- `od-IN`: Odia
|
|
64
|
+
- `pa-IN`: Punjabi
|
|
65
|
+
- `ta-IN`: Tamil
|
|
66
|
+
- `te-IN`: Telugu
|
|
67
|
+
- `en-IN`: English
|
|
55
68
|
|
|
56
69
|
model : typing.Optional[SpeechToTextStreamingModel]
|
|
57
|
-
|
|
70
|
+
Specifies the model to use for speech-to-text conversion.
|
|
71
|
+
|
|
72
|
+
- **saarika:v2.5** (default): Transcribes audio in the spoken language.
|
|
73
|
+
|
|
74
|
+
- **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
|
|
58
75
|
|
|
59
76
|
mode : typing.Optional[SpeechToTextStreamingMode]
|
|
60
|
-
Mode of operation
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
-
|
|
65
|
-
|
|
66
|
-
|
|
77
|
+
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
78
|
+
|
|
79
|
+
Example audio: 'मेरा फोन नंबर है 9840950950'
|
|
80
|
+
|
|
81
|
+
- **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
|
|
82
|
+
- Output: `मेरा फोन नंबर है 9840950950`
|
|
83
|
+
|
|
84
|
+
- **translate**: Translates speech from any supported Indic language to English.
|
|
85
|
+
- Output: `My phone number is 9840950950`
|
|
86
|
+
|
|
87
|
+
- **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
|
|
88
|
+
- Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
|
|
89
|
+
|
|
90
|
+
- **translit**: Romanization - Transliterates speech to Latin/Roman script only.
|
|
91
|
+
- Output: `mera phone number hai 9840950950`
|
|
92
|
+
|
|
93
|
+
- **codemix**: Code-mixed text with English words in English and Indic words in native script.
|
|
94
|
+
- Output: `मेरा phone number है 9840950950`
|
|
67
95
|
|
|
68
96
|
sample_rate : typing.Optional[str]
|
|
69
97
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
@@ -155,19 +183,47 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
155
183
|
Parameters
|
|
156
184
|
----------
|
|
157
185
|
language_code : SpeechToTextStreamingLanguageCode
|
|
158
|
-
|
|
186
|
+
Specifies the language of the input audio in BCP-47 format.
|
|
187
|
+
|
|
188
|
+
**Available Options:**
|
|
189
|
+
- `hi-IN`: Hindi
|
|
190
|
+
- `bn-IN`: Bengali
|
|
191
|
+
- `gu-IN`: Gujarati
|
|
192
|
+
- `kn-IN`: Kannada
|
|
193
|
+
- `ml-IN`: Malayalam
|
|
194
|
+
- `mr-IN`: Marathi
|
|
195
|
+
- `od-IN`: Odia
|
|
196
|
+
- `pa-IN`: Punjabi
|
|
197
|
+
- `ta-IN`: Tamil
|
|
198
|
+
- `te-IN`: Telugu
|
|
199
|
+
- `en-IN`: English
|
|
159
200
|
|
|
160
201
|
model : typing.Optional[SpeechToTextStreamingModel]
|
|
161
|
-
|
|
202
|
+
Specifies the model to use for speech-to-text conversion.
|
|
203
|
+
|
|
204
|
+
- **saarika:v2.5** (default): Transcribes audio in the spoken language.
|
|
205
|
+
|
|
206
|
+
- **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
|
|
162
207
|
|
|
163
208
|
mode : typing.Optional[SpeechToTextStreamingMode]
|
|
164
|
-
Mode of operation
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
-
|
|
169
|
-
|
|
170
|
-
|
|
209
|
+
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
210
|
+
|
|
211
|
+
Example audio: 'मेरा फोन नंबर है 9840950950'
|
|
212
|
+
|
|
213
|
+
- **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
|
|
214
|
+
- Output: `मेरा फोन नंबर है 9840950950`
|
|
215
|
+
|
|
216
|
+
- **translate**: Translates speech from any supported Indic language to English.
|
|
217
|
+
- Output: `My phone number is 9840950950`
|
|
218
|
+
|
|
219
|
+
- **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
|
|
220
|
+
- Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
|
|
221
|
+
|
|
222
|
+
- **translit**: Romanization - Transliterates speech to Latin/Roman script only.
|
|
223
|
+
- Output: `mera phone number hai 9840950950`
|
|
224
|
+
|
|
225
|
+
- **codemix**: Code-mixed text with English words in English and Indic words in native script.
|
|
226
|
+
- Output: `मेरा phone number है 9840950950`
|
|
171
227
|
|
|
172
228
|
sample_rate : typing.Optional[str]
|
|
173
229
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
@@ -60,7 +60,10 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
60
60
|
Parameters
|
|
61
61
|
----------
|
|
62
62
|
model : typing.Optional[typing.Literal["saaras:v2.5"]]
|
|
63
|
-
|
|
63
|
+
Model to be used for speech to text translation.
|
|
64
|
+
|
|
65
|
+
- **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
|
|
66
|
+
- Example: Hindi audio → English text output
|
|
64
67
|
|
|
65
68
|
sample_rate : typing.Optional[str]
|
|
66
69
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
@@ -157,7 +160,10 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
157
160
|
Parameters
|
|
158
161
|
----------
|
|
159
162
|
model : typing.Optional[typing.Literal["saaras:v2.5"]]
|
|
160
|
-
|
|
163
|
+
Model to be used for speech to text translation.
|
|
164
|
+
|
|
165
|
+
- **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
|
|
166
|
+
- Example: Hindi audio → English text output
|
|
161
167
|
|
|
162
168
|
sample_rate : typing.Optional[str]
|
|
163
169
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
@@ -48,7 +48,10 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
48
48
|
Parameters
|
|
49
49
|
----------
|
|
50
50
|
model : typing.Optional[typing.Literal["saaras:v2.5"]]
|
|
51
|
-
|
|
51
|
+
Model to be used for speech to text translation.
|
|
52
|
+
|
|
53
|
+
- **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
|
|
54
|
+
- Example: Hindi audio → English text output
|
|
52
55
|
|
|
53
56
|
sample_rate : typing.Optional[str]
|
|
54
57
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
@@ -134,7 +137,10 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
134
137
|
Parameters
|
|
135
138
|
----------
|
|
136
139
|
model : typing.Optional[typing.Literal["saaras:v2.5"]]
|
|
137
|
-
|
|
140
|
+
Model to be used for speech to text translation.
|
|
141
|
+
|
|
142
|
+
- **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
|
|
143
|
+
- Example: Hindi audio → English text output
|
|
138
144
|
|
|
139
145
|
sample_rate : typing.Optional[str]
|
|
140
146
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
sarvamai/types/mode.py
CHANGED
|
@@ -12,20 +12,52 @@ from .speech_to_text_translate_language import SpeechToTextTranslateLanguage
|
|
|
12
12
|
class SpeechToTextJobParameters(UniversalBaseModel):
|
|
13
13
|
language_code: typing.Optional[SpeechToTextTranslateLanguage] = pydantic.Field(default=None)
|
|
14
14
|
"""
|
|
15
|
-
|
|
15
|
+
Specifies the language of the input audio in BCP-47 format.
|
|
16
|
+
|
|
17
|
+
**Available Options:**
|
|
18
|
+
- `unknown` (default): Use when the language is not known; the API will auto-detect.
|
|
19
|
+
- `hi-IN`: Hindi
|
|
20
|
+
- `bn-IN`: Bengali
|
|
21
|
+
- `kn-IN`: Kannada
|
|
22
|
+
- `ml-IN`: Malayalam
|
|
23
|
+
- `mr-IN`: Marathi
|
|
24
|
+
- `od-IN`: Odia
|
|
25
|
+
- `pa-IN`: Punjabi
|
|
26
|
+
- `ta-IN`: Tamil
|
|
27
|
+
- `te-IN`: Telugu
|
|
28
|
+
- `en-IN`: English
|
|
29
|
+
- `gu-IN`: Gujarati
|
|
16
30
|
"""
|
|
17
31
|
|
|
18
32
|
model: typing.Optional[SpeechToTextModel] = pydantic.Field(default=None)
|
|
19
33
|
"""
|
|
20
34
|
Model to be used for speech to text.
|
|
21
|
-
|
|
22
|
-
- **saarika:
|
|
23
|
-
|
|
35
|
+
|
|
36
|
+
- **saarika:v2.5** (default): Transcribes audio in the spoken language.
|
|
37
|
+
|
|
38
|
+
- **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
|
|
24
39
|
"""
|
|
25
40
|
|
|
26
41
|
mode: typing.Optional[Mode] = pydantic.Field(default=None)
|
|
27
42
|
"""
|
|
28
|
-
Mode of operation. Only applicable
|
|
43
|
+
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
44
|
+
|
|
45
|
+
Example audio: 'मेरा फोन नंबर है 9840950950'
|
|
46
|
+
|
|
47
|
+
- **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
|
|
48
|
+
- Output: `मेरा फोन नंबर है 9840950950`
|
|
49
|
+
|
|
50
|
+
- **translate**: Translates speech from any supported Indic language to English.
|
|
51
|
+
- Output: `My phone number is 9840950950`
|
|
52
|
+
|
|
53
|
+
- **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
|
|
54
|
+
- Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
|
|
55
|
+
|
|
56
|
+
- **translit**: Romanization - Transliterates speech to Latin/Roman script only.
|
|
57
|
+
- Output: `mera phone number hai 9840950950`
|
|
58
|
+
|
|
59
|
+
- **codemix**: Code-mixed text with English words in English and Indic words in native script.
|
|
60
|
+
- Output: `मेरा phone number है 9840950950`
|
|
29
61
|
"""
|
|
30
62
|
|
|
31
63
|
with_timestamps: typing.Optional[bool] = pydantic.Field(default=None)
|
|
@@ -15,7 +15,10 @@ class SpeechToTextTranslateJobParameters(UniversalBaseModel):
|
|
|
15
15
|
|
|
16
16
|
model: typing.Optional[SpeechToTextTranslateModel] = pydantic.Field(default=None)
|
|
17
17
|
"""
|
|
18
|
-
Model to be used for
|
|
18
|
+
Model to be used for speech to text translation.
|
|
19
|
+
|
|
20
|
+
- **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
|
|
21
|
+
- Example: Hindi audio → English text output
|
|
19
22
|
"""
|
|
20
23
|
|
|
21
24
|
with_diarization: typing.Optional[bool] = pydantic.Field(default=None)
|
|
@@ -5,7 +5,7 @@ sarvamai/chat/raw_client.py,sha256=A2kRuZcVWlJhyYCD7YKgqNkZEp3cYa1731KhRkhirU0,1
|
|
|
5
5
|
sarvamai/client.py,sha256=J30X_os1lPf8Wml0KDFEf6p8VGHhgF_lf3nw1T2D3qo,8207
|
|
6
6
|
sarvamai/core/__init__.py,sha256=YE2CtXeASe1RAbaI39twKWYKCuT4tW5is9HWHhJjR_g,1653
|
|
7
7
|
sarvamai/core/api_error.py,sha256=44vPoTyWN59gonCIZMdzw7M1uspygiLnr3GNFOoVL2Q,614
|
|
8
|
-
sarvamai/core/client_wrapper.py,sha256=
|
|
8
|
+
sarvamai/core/client_wrapper.py,sha256=xycjM0SER509jhTOV9GZTxV42go3HQOVrskOdattRwM,2570
|
|
9
9
|
sarvamai/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
|
|
10
10
|
sarvamai/core/events.py,sha256=HvKBdSoYcFetk7cgNXb7FxuY-FtY8NtUhZIN7mGVx8U,1159
|
|
11
11
|
sarvamai/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
|
|
@@ -67,12 +67,12 @@ sarvamai/requests/language_identification_response.py,sha256=BdS5U9Gic-71vb--ph6
|
|
|
67
67
|
sarvamai/requests/ping_signal.py,sha256=TSgmfz2k4X1L6TzvX8u2SKZ6XQY3bSf7nPZf8mUViaM,343
|
|
68
68
|
sarvamai/requests/send_text.py,sha256=DWzbNgeNN2xSIYgk2zEisgLqjwq5oleqJVHrtOnIqbE,267
|
|
69
69
|
sarvamai/requests/send_text_data.py,sha256=2jds-xd77u-YTgIWQsTUBKE-_7tsrFshXXuC_Ld4ULo,161
|
|
70
|
-
sarvamai/requests/speech_to_text_job_parameters.py,sha256=
|
|
70
|
+
sarvamai/requests/speech_to_text_job_parameters.py,sha256=C1dpp5IkylFXdSNfKDhSo8jbFUquFh3SURFVME9XXq4,2854
|
|
71
71
|
sarvamai/requests/speech_to_text_response.py,sha256=GS3jNmHDOxqNZ7cvftD62khUMSBIQUu6zEPdCqk8zJk,1041
|
|
72
72
|
sarvamai/requests/speech_to_text_response_data.py,sha256=69fYRdL0tCKpgKQqwzcM4T4Nf_lRxJFh-VCFe_tN964,364
|
|
73
73
|
sarvamai/requests/speech_to_text_streaming_response.py,sha256=cN5tKE9wOWuyBna4wmrf-0LfkOULMpRaJ7qjLuu76V0,348
|
|
74
74
|
sarvamai/requests/speech_to_text_transcription_data.py,sha256=Vc65hXDq65d14cP-fDJm151bi7XEKgPItNGt1UL6cOY,877
|
|
75
|
-
sarvamai/requests/speech_to_text_translate_job_parameters.py,sha256=
|
|
75
|
+
sarvamai/requests/speech_to_text_translate_job_parameters.py,sha256=Q1mL3ul3WYArryp-HP_wHc8WCTLq6ZFhCUFeH3NM80E,1147
|
|
76
76
|
sarvamai/requests/speech_to_text_translate_response.py,sha256=xLV2F37PkGR0erRDfTBEPWvywR8eVSL9JbH5a0C9wkY,893
|
|
77
77
|
sarvamai/requests/speech_to_text_translate_response_data.py,sha256=OmjunP9R2xertJKn4fmpyzjDdj1_B_Yh6ZjH1eOlR-Q,407
|
|
78
78
|
sarvamai/requests/speech_to_text_translate_streaming_response.py,sha256=KTjYZ0_oLapuM5Iiq7UwejMsrL1TGgFAW4k5l17TkZs,385
|
|
@@ -87,21 +87,21 @@ sarvamai/requests/transcription_metrics.py,sha256=FDclX2Z9Z3azrDXxtZW8xbkxxWMZQX
|
|
|
87
87
|
sarvamai/requests/translation_response.py,sha256=8iwQeZB1purHY757bIQI-n9QeVRBItaAVcBJ_la-k1Y,414
|
|
88
88
|
sarvamai/requests/transliteration_response.py,sha256=KqRkqnegLmt7LjdVxjRePX6RoqaLm64KFGZ6q7mXyfw,426
|
|
89
89
|
sarvamai/speech_to_text/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
90
|
-
sarvamai/speech_to_text/client.py,sha256=
|
|
91
|
-
sarvamai/speech_to_text/raw_client.py,sha256=
|
|
90
|
+
sarvamai/speech_to_text/client.py,sha256=K9lb57rQRfYwwAonj7BSP9aKI7io_fXhjn79M-Brwag,16483
|
|
91
|
+
sarvamai/speech_to_text/raw_client.py,sha256=Rw0x9ipXFWs6xF4aly4DgIN9tqdPZyLz5Powg2ZnQkA,30781
|
|
92
92
|
sarvamai/speech_to_text_job/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
93
93
|
sarvamai/speech_to_text_job/client.py,sha256=nMhBJa1rf1qQdQhlB1WUtyzOGhsWxECCPAJwBvWnq8M,18930
|
|
94
94
|
sarvamai/speech_to_text_job/job.py,sha256=9AfVSp5nzrl-Cx_1n2AJZqTMzp6Dkz2cvmbdq78fCgM,18751
|
|
95
95
|
sarvamai/speech_to_text_job/raw_client.py,sha256=6MB82mSqAOi92mE8vUeNSTB0wuxLZYRwizt15R6r-wo,49394
|
|
96
96
|
sarvamai/speech_to_text_streaming/__init__.py,sha256=5l81Q5goyVA8oC5cKaS9-Hv4_PR2nYC318VcmaUTpg8,579
|
|
97
|
-
sarvamai/speech_to_text_streaming/client.py,sha256=
|
|
98
|
-
sarvamai/speech_to_text_streaming/raw_client.py,sha256=
|
|
97
|
+
sarvamai/speech_to_text_streaming/client.py,sha256=KiJEcJuHYc7bfbOXIBOEras0KjM2zhgVYo4lUK4Hlek,14801
|
|
98
|
+
sarvamai/speech_to_text_streaming/raw_client.py,sha256=QzC3ytldXbH6b5-tpPT7tmie5VhaXSGd0J1RbvspnaY,13982
|
|
99
99
|
sarvamai/speech_to_text_streaming/socket_client.py,sha256=P6qXRN0s3UFAp6CP5lkqrW2KPK9me70ZVfWquxLB4wI,7538
|
|
100
100
|
sarvamai/speech_to_text_streaming/types/__init__.py,sha256=M6FNnzx7E98J7AzP2oU_94qVsWoxaYZG9_Syc5p5qQg,825
|
|
101
101
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_flush_signal.py,sha256=dDJOBlzAjhuiSVqW2RHHY1f6xy0DU_Yoo9UV8-7MjnA,173
|
|
102
102
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_high_vad_sensitivity.py,sha256=OwPwffa8TkLPGMnOTn5S7d-HmV8QmN3B7fHz8I1-VT8,180
|
|
103
103
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_language_code.py,sha256=LxgEifmgWTCFZn9U-f-TWKxRPng3a2J26Zt526QrA0Y,267
|
|
104
|
-
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py,sha256=
|
|
104
|
+
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py,sha256=jce75h2M9dEMD_eC29zCLQZFbLfCy8sdxEIISxtfubQ,217
|
|
105
105
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py,sha256=CpWC1HmcPJKWCSHhTYSaC8_pMsfNTBA-EHq-sfCjS-A,179
|
|
106
106
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_vad_signals.py,sha256=8wiFOB7WDMbYCcMTYgNFJaIjEytYeXpJLwr_O_mH0TI,172
|
|
107
107
|
sarvamai/speech_to_text_translate_job/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
@@ -109,8 +109,8 @@ sarvamai/speech_to_text_translate_job/client.py,sha256=xu8kYtCESDB7LzL8YKBUq5qhT
|
|
|
109
109
|
sarvamai/speech_to_text_translate_job/job.py,sha256=tL1Zemsogb_AK9wqZwN4ooPaN176sFKduTH9g87y-WU,18938
|
|
110
110
|
sarvamai/speech_to_text_translate_job/raw_client.py,sha256=Emx14cRiAZXg1PqZkoJbDOKwyDmOgwxWlqPkAPZ9GPU,50797
|
|
111
111
|
sarvamai/speech_to_text_translate_streaming/__init__.py,sha256=HYq3MzUyWa4Kt1ou6vgAkbMCHoIcXzoqn6V1kya5c6g,423
|
|
112
|
-
sarvamai/speech_to_text_translate_streaming/client.py,sha256=
|
|
113
|
-
sarvamai/speech_to_text_translate_streaming/raw_client.py,sha256=
|
|
112
|
+
sarvamai/speech_to_text_translate_streaming/client.py,sha256=wFAn-mrcOUFFa5sAgjSkAx9OZiTNbQvyJwBlid3hJP8,10757
|
|
113
|
+
sarvamai/speech_to_text_translate_streaming/raw_client.py,sha256=jDUuPdYWeh-sxeNHdEzLeMaAyCKJZDz-5mrVRUPoFZU,9866
|
|
114
114
|
sarvamai/speech_to_text_translate_streaming/socket_client.py,sha256=ipEPSj5eHAyDpuEXfaP7JJL1rXJXGEo-IB888ReAFKs,8901
|
|
115
115
|
sarvamai/speech_to_text_translate_streaming/types/__init__.py,sha256=NFfpVz1gILXj96rWMFw4MgSpni0Yvs8CHgwJ9Xry7OU,575
|
|
116
116
|
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_flush_signal.py,sha256=jkjvCGJ1pFKi3AOTkwMW-lo18WGgrgAhMpoe5P0AMzA,182
|
|
@@ -177,7 +177,7 @@ sarvamai/types/input_audio_codec.py,sha256=P3rz6lg-T34E7psQ1DX4e2yvPxVJDDsMVn4TS
|
|
|
177
177
|
sarvamai/types/job_state.py,sha256=H6Zph2mIcjsd3upEDt1VzIEORkEpnIDs0kH8BvIyrow,189
|
|
178
178
|
sarvamai/types/job_status_v_1_response.py,sha256=jusn-3XLCk8vCnu3q3GGSzz_yYgKB5PY_01Q32-1aJk,1605
|
|
179
179
|
sarvamai/types/language_identification_response.py,sha256=jG4ZQ6KQHCiEDqC51OniOwiRdW14Fbz22bbTsUDp_kc,1483
|
|
180
|
-
sarvamai/types/mode.py,sha256=
|
|
180
|
+
sarvamai/types/mode.py,sha256=ENoXfbrT2rt9ClT6QHC3xQHiSJwM0Q0dZK5iEKJp41k,190
|
|
181
181
|
sarvamai/types/numerals_format.py,sha256=xg3lYiHcnzyFwuwRcaIteJLH_Pz6pJ9n9kTlYPEnCBU,165
|
|
182
182
|
sarvamai/types/ping_signal.py,sha256=cE53FRIXlc8bSo18z6jlAnOh6DhZEMX36huWEX6X3-A,695
|
|
183
183
|
sarvamai/types/reasoning_effort.py,sha256=_TBLn3rQgzJAdnKqV2g0PETbrSBZl0fPLfQ5ZE9H4Pc,164
|
|
@@ -187,16 +187,16 @@ sarvamai/types/sarvam_model_ids.py,sha256=iYBMglf31KQ1iUZeAMQ-2PP9NDcyHRG7goz7O9
|
|
|
187
187
|
sarvamai/types/send_text.py,sha256=kgST6V5NuURzgBpuiDi8fVwDg768ttDoeY9k1dSSb1Y,607
|
|
188
188
|
sarvamai/types/send_text_data.py,sha256=H8yfcvd4gvyN34RrZ9i4qQvieednNBhL7i7isX4asuY,519
|
|
189
189
|
sarvamai/types/speech_sample_rate.py,sha256=Hfi79KL2Y1W7OIvvrfWnt7EUvmU5i7bxYvXivrY_aUA,88
|
|
190
|
-
sarvamai/types/speech_to_text_job_parameters.py,sha256=
|
|
190
|
+
sarvamai/types/speech_to_text_job_parameters.py,sha256=JFKO2rIyBJE_PK4mrMMwY4e562l_6HbZtP2kd4CbKa8,3293
|
|
191
191
|
sarvamai/types/speech_to_text_language.py,sha256=cq8FBOX0DfYB3v8jgNteQtHeJcqWqzKWJVyYGwwo_w0,279
|
|
192
|
-
sarvamai/types/speech_to_text_model.py,sha256=
|
|
192
|
+
sarvamai/types/speech_to_text_model.py,sha256=hHC3aOXzdPt8i32qJw4ZLz5bdREWVQl4P7Y_lOURJu4,170
|
|
193
193
|
sarvamai/types/speech_to_text_response.py,sha256=iWRGEJeHUFIOxeEhoCQu68njeA6lcqXbT2czV-O8Wx0,1438
|
|
194
194
|
sarvamai/types/speech_to_text_response_data.py,sha256=gbxZTBSjbN3ZIa10K6tWPYtymcpnQTFIaUnXkOmsmD4,322
|
|
195
195
|
sarvamai/types/speech_to_text_streaming_response.py,sha256=z6tVAHbVK9lC3w3lac__LEUfO8AAzEilkeGlaLskTtc,687
|
|
196
196
|
sarvamai/types/speech_to_text_transcription_data.py,sha256=EqwPAPSi98PwARaTj-ufzFUSHyN-NPoPla5vi_KERrU,1297
|
|
197
|
-
sarvamai/types/speech_to_text_translate_job_parameters.py,sha256
|
|
197
|
+
sarvamai/types/speech_to_text_translate_job_parameters.py,sha256=-E85BoIBxW5Ck638aRFE0fC_f43RCoIkboAFu2QlBBs,1566
|
|
198
198
|
sarvamai/types/speech_to_text_translate_language.py,sha256=yikNM-roIumVG-eqBWss93wLGudZdLPwd0i3VcXH5zo,263
|
|
199
|
-
sarvamai/types/speech_to_text_translate_model.py,sha256=
|
|
199
|
+
sarvamai/types/speech_to_text_translate_model.py,sha256=CVSz6gJBY82GhhEuWSdzRLJW9XTsAgweRnKd1tN6mXo,139
|
|
200
200
|
sarvamai/types/speech_to_text_translate_response.py,sha256=Z5Na7IQW2ok3TP21xd-jKkwioplEKfonNIMhoJQKkVw,1278
|
|
201
201
|
sarvamai/types/speech_to_text_translate_response_data.py,sha256=_NlLVp7oQU3em_4E47QVbIP9nromPE07Z9HtMpY1lrU,359
|
|
202
202
|
sarvamai/types/speech_to_text_translate_streaming_response.py,sha256=J6h3AGdAJxpODFs30bR-e6OaWKa__oVhwv_TrbPSO98,724
|
|
@@ -226,6 +226,6 @@ sarvamai/types/transliterate_mode.py,sha256=1jSEMlGcoLkWuk12TgoOpSgwifa4rThGKZ1h
|
|
|
226
226
|
sarvamai/types/transliterate_source_language.py,sha256=bSY9wJszF0sg-Cgg6F-YcWC8ly1mIlj9rqa15-jBtx8,283
|
|
227
227
|
sarvamai/types/transliteration_response.py,sha256=yt-lzTbDeJ_ZL4I8kQa6oESxA9ebeJJY7LfFHpdEsmM,815
|
|
228
228
|
sarvamai/version.py,sha256=Qkp3Ee9YH-O9RTix90e0i7iNrFAGN-QDt2AFwGA4n8k,75
|
|
229
|
-
sarvamai-0.1.
|
|
230
|
-
sarvamai-0.1.
|
|
231
|
-
sarvamai-0.1.
|
|
229
|
+
sarvamai-0.1.23a5.dist-info/METADATA,sha256=nPZ4wzgXrEkNFWPm-2mlOxCW4n4xIFhbBehaDWxkTiE,26753
|
|
230
|
+
sarvamai-0.1.23a5.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
|
231
|
+
sarvamai-0.1.23a5.dist-info/RECORD,,
|
|
File without changes
|