sarvamai 0.1.23a2__py3-none-any.whl → 0.1.23a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +203 -405
- sarvamai/chat/raw_client.py +20 -20
- sarvamai/client.py +34 -186
- sarvamai/core/__init__.py +21 -76
- sarvamai/core/client_wrapper.py +3 -19
- sarvamai/core/force_multipart.py +2 -4
- sarvamai/core/http_client.py +97 -217
- sarvamai/core/http_response.py +1 -1
- sarvamai/core/jsonable_encoder.py +0 -8
- sarvamai/core/pydantic_utilities.py +4 -110
- sarvamai/errors/__init__.py +6 -40
- sarvamai/errors/bad_request_error.py +1 -1
- sarvamai/errors/forbidden_error.py +1 -1
- sarvamai/errors/internal_server_error.py +1 -1
- sarvamai/errors/service_unavailable_error.py +1 -1
- sarvamai/errors/too_many_requests_error.py +1 -1
- sarvamai/errors/unprocessable_entity_error.py +1 -1
- sarvamai/requests/__init__.py +62 -150
- sarvamai/requests/configure_connection.py +4 -0
- sarvamai/requests/configure_connection_data.py +40 -11
- sarvamai/requests/error_response_data.py +1 -1
- sarvamai/requests/file_signed_url_details.py +1 -1
- sarvamai/requests/speech_to_text_job_parameters.py +10 -1
- sarvamai/requests/speech_to_text_transcription_data.py +2 -2
- sarvamai/speech_to_text/client.py +29 -2
- sarvamai/speech_to_text/raw_client.py +81 -56
- sarvamai/speech_to_text_job/client.py +60 -15
- sarvamai/speech_to_text_job/raw_client.py +120 -120
- sarvamai/speech_to_text_streaming/__init__.py +10 -38
- sarvamai/speech_to_text_streaming/client.py +32 -6
- sarvamai/speech_to_text_streaming/raw_client.py +32 -6
- sarvamai/speech_to_text_streaming/types/__init__.py +8 -36
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +7 -0
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
- sarvamai/speech_to_text_translate_job/raw_client.py +120 -120
- sarvamai/speech_to_text_translate_streaming/__init__.py +5 -36
- sarvamai/speech_to_text_translate_streaming/types/__init__.py +3 -36
- sarvamai/text/raw_client.py +60 -60
- sarvamai/text_to_speech/client.py +100 -16
- sarvamai/text_to_speech/raw_client.py +120 -36
- sarvamai/text_to_speech_streaming/__init__.py +2 -29
- sarvamai/text_to_speech_streaming/client.py +19 -6
- sarvamai/text_to_speech_streaming/raw_client.py +19 -6
- sarvamai/text_to_speech_streaming/types/__init__.py +3 -31
- sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +5 -0
- sarvamai/types/__init__.py +102 -222
- sarvamai/types/chat_completion_request_message.py +2 -6
- sarvamai/types/configure_connection.py +4 -0
- sarvamai/types/configure_connection_data.py +40 -11
- sarvamai/types/configure_connection_data_model.py +5 -0
- sarvamai/types/configure_connection_data_speaker.py +35 -1
- sarvamai/types/error_response_data.py +1 -1
- sarvamai/types/file_signed_url_details.py +1 -1
- sarvamai/types/mode.py +7 -0
- sarvamai/types/speech_to_text_job_parameters.py +10 -1
- sarvamai/types/speech_to_text_model.py +3 -1
- sarvamai/types/speech_to_text_transcription_data.py +2 -2
- sarvamai/types/speech_to_text_translate_model.py +1 -1
- sarvamai/types/text_to_speech_model.py +1 -1
- sarvamai/types/text_to_speech_speaker.py +35 -1
- {sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/METADATA +1 -2
- {sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/RECORD +63 -63
- sarvamai/core/http_sse/__init__.py +0 -42
- sarvamai/core/http_sse/_api.py +0 -112
- sarvamai/core/http_sse/_decoders.py +0 -61
- sarvamai/core/http_sse/_exceptions.py +0 -7
- sarvamai/core/http_sse/_models.py +0 -17
- {sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/WHEEL +0 -0
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
3
|
import typing
|
|
5
4
|
from json.decoder import JSONDecodeError
|
|
6
5
|
|
|
@@ -8,7 +7,6 @@ from .. import core
|
|
|
8
7
|
from ..core.api_error import ApiError
|
|
9
8
|
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
10
9
|
from ..core.http_response import AsyncHttpResponse, HttpResponse
|
|
11
|
-
from ..core.jsonable_encoder import jsonable_encoder
|
|
12
10
|
from ..core.pydantic_utilities import parse_obj_as
|
|
13
11
|
from ..core.request_options import RequestOptions
|
|
14
12
|
from ..errors.bad_request_error import BadRequestError
|
|
@@ -18,6 +16,7 @@ from ..errors.service_unavailable_error import ServiceUnavailableError
|
|
|
18
16
|
from ..errors.too_many_requests_error import TooManyRequestsError
|
|
19
17
|
from ..errors.unprocessable_entity_error import UnprocessableEntityError
|
|
20
18
|
from ..types.input_audio_codec import InputAudioCodec
|
|
19
|
+
from ..types.mode import Mode
|
|
21
20
|
from ..types.speech_to_text_language import SpeechToTextLanguage
|
|
22
21
|
from ..types.speech_to_text_model import SpeechToTextModel
|
|
23
22
|
from ..types.speech_to_text_response import SpeechToTextResponse
|
|
@@ -37,6 +36,7 @@ class RawSpeechToTextClient:
|
|
|
37
36
|
*,
|
|
38
37
|
file: core.File,
|
|
39
38
|
model: typing.Optional[SpeechToTextModel] = OMIT,
|
|
39
|
+
mode: typing.Optional[Mode] = OMIT,
|
|
40
40
|
language_code: typing.Optional[SpeechToTextLanguage] = OMIT,
|
|
41
41
|
input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
|
|
42
42
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -63,7 +63,18 @@ class RawSpeechToTextClient:
|
|
|
63
63
|
|
|
64
64
|
model : typing.Optional[SpeechToTextModel]
|
|
65
65
|
Specifies the model to use for speech-to-text conversion.
|
|
66
|
-
|
|
66
|
+
- **saarika:v2.5** (default): Standard transcription model
|
|
67
|
+
- **saarika:v3**: Advanced transcription model
|
|
68
|
+
- **saaras:v3**: Advanced model with multiple output modes
|
|
69
|
+
|
|
70
|
+
mode : typing.Optional[Mode]
|
|
71
|
+
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
72
|
+
- **transcribe** (default): Standard transcription
|
|
73
|
+
- **translate**: Translation to English
|
|
74
|
+
- **indic-en**: Indic to English translation
|
|
75
|
+
- **verbatim**: Exact transcription
|
|
76
|
+
- **translit**: Transliteration to Latin script
|
|
77
|
+
- **codemix**: Code-mixed output
|
|
67
78
|
|
|
68
79
|
language_code : typing.Optional[SpeechToTextLanguage]
|
|
69
80
|
Specifies the language of the input audio.
|
|
@@ -86,7 +97,8 @@ class RawSpeechToTextClient:
|
|
|
86
97
|
base_url=self._client_wrapper.get_environment().base,
|
|
87
98
|
method="POST",
|
|
88
99
|
data={
|
|
89
|
-
"model":
|
|
100
|
+
"model": model,
|
|
101
|
+
"mode": mode,
|
|
90
102
|
"language_code": language_code,
|
|
91
103
|
"input_audio_codec": input_audio_codec,
|
|
92
104
|
},
|
|
@@ -111,9 +123,9 @@ class RawSpeechToTextClient:
|
|
|
111
123
|
raise BadRequestError(
|
|
112
124
|
headers=dict(_response.headers),
|
|
113
125
|
body=typing.cast(
|
|
114
|
-
typing.Any,
|
|
126
|
+
typing.Optional[typing.Any],
|
|
115
127
|
parse_obj_as(
|
|
116
|
-
type_=typing.Any, # type: ignore
|
|
128
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
117
129
|
object_=_response.json(),
|
|
118
130
|
),
|
|
119
131
|
),
|
|
@@ -122,9 +134,9 @@ class RawSpeechToTextClient:
|
|
|
122
134
|
raise ForbiddenError(
|
|
123
135
|
headers=dict(_response.headers),
|
|
124
136
|
body=typing.cast(
|
|
125
|
-
typing.Any,
|
|
137
|
+
typing.Optional[typing.Any],
|
|
126
138
|
parse_obj_as(
|
|
127
|
-
type_=typing.Any, # type: ignore
|
|
139
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
128
140
|
object_=_response.json(),
|
|
129
141
|
),
|
|
130
142
|
),
|
|
@@ -133,9 +145,9 @@ class RawSpeechToTextClient:
|
|
|
133
145
|
raise UnprocessableEntityError(
|
|
134
146
|
headers=dict(_response.headers),
|
|
135
147
|
body=typing.cast(
|
|
136
|
-
typing.Any,
|
|
148
|
+
typing.Optional[typing.Any],
|
|
137
149
|
parse_obj_as(
|
|
138
|
-
type_=typing.Any, # type: ignore
|
|
150
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
139
151
|
object_=_response.json(),
|
|
140
152
|
),
|
|
141
153
|
),
|
|
@@ -144,9 +156,9 @@ class RawSpeechToTextClient:
|
|
|
144
156
|
raise TooManyRequestsError(
|
|
145
157
|
headers=dict(_response.headers),
|
|
146
158
|
body=typing.cast(
|
|
147
|
-
typing.Any,
|
|
159
|
+
typing.Optional[typing.Any],
|
|
148
160
|
parse_obj_as(
|
|
149
|
-
type_=typing.Any, # type: ignore
|
|
161
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
150
162
|
object_=_response.json(),
|
|
151
163
|
),
|
|
152
164
|
),
|
|
@@ -155,9 +167,9 @@ class RawSpeechToTextClient:
|
|
|
155
167
|
raise InternalServerError(
|
|
156
168
|
headers=dict(_response.headers),
|
|
157
169
|
body=typing.cast(
|
|
158
|
-
typing.Any,
|
|
170
|
+
typing.Optional[typing.Any],
|
|
159
171
|
parse_obj_as(
|
|
160
|
-
type_=typing.Any, # type: ignore
|
|
172
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
161
173
|
object_=_response.json(),
|
|
162
174
|
),
|
|
163
175
|
),
|
|
@@ -166,9 +178,9 @@ class RawSpeechToTextClient:
|
|
|
166
178
|
raise ServiceUnavailableError(
|
|
167
179
|
headers=dict(_response.headers),
|
|
168
180
|
body=typing.cast(
|
|
169
|
-
typing.Any,
|
|
181
|
+
typing.Optional[typing.Any],
|
|
170
182
|
parse_obj_as(
|
|
171
|
-
type_=typing.Any, # type: ignore
|
|
183
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
172
184
|
object_=_response.json(),
|
|
173
185
|
),
|
|
174
186
|
),
|
|
@@ -230,7 +242,7 @@ class RawSpeechToTextClient:
|
|
|
230
242
|
method="POST",
|
|
231
243
|
data={
|
|
232
244
|
"prompt": prompt,
|
|
233
|
-
"model":
|
|
245
|
+
"model": model,
|
|
234
246
|
"input_audio_codec": input_audio_codec,
|
|
235
247
|
},
|
|
236
248
|
files={
|
|
@@ -254,9 +266,9 @@ class RawSpeechToTextClient:
|
|
|
254
266
|
raise BadRequestError(
|
|
255
267
|
headers=dict(_response.headers),
|
|
256
268
|
body=typing.cast(
|
|
257
|
-
typing.Any,
|
|
269
|
+
typing.Optional[typing.Any],
|
|
258
270
|
parse_obj_as(
|
|
259
|
-
type_=typing.Any, # type: ignore
|
|
271
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
260
272
|
object_=_response.json(),
|
|
261
273
|
),
|
|
262
274
|
),
|
|
@@ -265,9 +277,9 @@ class RawSpeechToTextClient:
|
|
|
265
277
|
raise ForbiddenError(
|
|
266
278
|
headers=dict(_response.headers),
|
|
267
279
|
body=typing.cast(
|
|
268
|
-
typing.Any,
|
|
280
|
+
typing.Optional[typing.Any],
|
|
269
281
|
parse_obj_as(
|
|
270
|
-
type_=typing.Any, # type: ignore
|
|
282
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
271
283
|
object_=_response.json(),
|
|
272
284
|
),
|
|
273
285
|
),
|
|
@@ -276,9 +288,9 @@ class RawSpeechToTextClient:
|
|
|
276
288
|
raise UnprocessableEntityError(
|
|
277
289
|
headers=dict(_response.headers),
|
|
278
290
|
body=typing.cast(
|
|
279
|
-
typing.Any,
|
|
291
|
+
typing.Optional[typing.Any],
|
|
280
292
|
parse_obj_as(
|
|
281
|
-
type_=typing.Any, # type: ignore
|
|
293
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
282
294
|
object_=_response.json(),
|
|
283
295
|
),
|
|
284
296
|
),
|
|
@@ -287,9 +299,9 @@ class RawSpeechToTextClient:
|
|
|
287
299
|
raise TooManyRequestsError(
|
|
288
300
|
headers=dict(_response.headers),
|
|
289
301
|
body=typing.cast(
|
|
290
|
-
typing.Any,
|
|
302
|
+
typing.Optional[typing.Any],
|
|
291
303
|
parse_obj_as(
|
|
292
|
-
type_=typing.Any, # type: ignore
|
|
304
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
293
305
|
object_=_response.json(),
|
|
294
306
|
),
|
|
295
307
|
),
|
|
@@ -298,9 +310,9 @@ class RawSpeechToTextClient:
|
|
|
298
310
|
raise InternalServerError(
|
|
299
311
|
headers=dict(_response.headers),
|
|
300
312
|
body=typing.cast(
|
|
301
|
-
typing.Any,
|
|
313
|
+
typing.Optional[typing.Any],
|
|
302
314
|
parse_obj_as(
|
|
303
|
-
type_=typing.Any, # type: ignore
|
|
315
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
304
316
|
object_=_response.json(),
|
|
305
317
|
),
|
|
306
318
|
),
|
|
@@ -309,9 +321,9 @@ class RawSpeechToTextClient:
|
|
|
309
321
|
raise ServiceUnavailableError(
|
|
310
322
|
headers=dict(_response.headers),
|
|
311
323
|
body=typing.cast(
|
|
312
|
-
typing.Any,
|
|
324
|
+
typing.Optional[typing.Any],
|
|
313
325
|
parse_obj_as(
|
|
314
|
-
type_=typing.Any, # type: ignore
|
|
326
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
315
327
|
object_=_response.json(),
|
|
316
328
|
),
|
|
317
329
|
),
|
|
@@ -331,6 +343,7 @@ class AsyncRawSpeechToTextClient:
|
|
|
331
343
|
*,
|
|
332
344
|
file: core.File,
|
|
333
345
|
model: typing.Optional[SpeechToTextModel] = OMIT,
|
|
346
|
+
mode: typing.Optional[Mode] = OMIT,
|
|
334
347
|
language_code: typing.Optional[SpeechToTextLanguage] = OMIT,
|
|
335
348
|
input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
|
|
336
349
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -357,7 +370,18 @@ class AsyncRawSpeechToTextClient:
|
|
|
357
370
|
|
|
358
371
|
model : typing.Optional[SpeechToTextModel]
|
|
359
372
|
Specifies the model to use for speech-to-text conversion.
|
|
360
|
-
|
|
373
|
+
- **saarika:v2.5** (default): Standard transcription model
|
|
374
|
+
- **saarika:v3**: Advanced transcription model
|
|
375
|
+
- **saaras:v3**: Advanced model with multiple output modes
|
|
376
|
+
|
|
377
|
+
mode : typing.Optional[Mode]
|
|
378
|
+
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
379
|
+
- **transcribe** (default): Standard transcription
|
|
380
|
+
- **translate**: Translation to English
|
|
381
|
+
- **indic-en**: Indic to English translation
|
|
382
|
+
- **verbatim**: Exact transcription
|
|
383
|
+
- **translit**: Transliteration to Latin script
|
|
384
|
+
- **codemix**: Code-mixed output
|
|
361
385
|
|
|
362
386
|
language_code : typing.Optional[SpeechToTextLanguage]
|
|
363
387
|
Specifies the language of the input audio.
|
|
@@ -380,7 +404,8 @@ class AsyncRawSpeechToTextClient:
|
|
|
380
404
|
base_url=self._client_wrapper.get_environment().base,
|
|
381
405
|
method="POST",
|
|
382
406
|
data={
|
|
383
|
-
"model":
|
|
407
|
+
"model": model,
|
|
408
|
+
"mode": mode,
|
|
384
409
|
"language_code": language_code,
|
|
385
410
|
"input_audio_codec": input_audio_codec,
|
|
386
411
|
},
|
|
@@ -405,9 +430,9 @@ class AsyncRawSpeechToTextClient:
|
|
|
405
430
|
raise BadRequestError(
|
|
406
431
|
headers=dict(_response.headers),
|
|
407
432
|
body=typing.cast(
|
|
408
|
-
typing.Any,
|
|
433
|
+
typing.Optional[typing.Any],
|
|
409
434
|
parse_obj_as(
|
|
410
|
-
type_=typing.Any, # type: ignore
|
|
435
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
411
436
|
object_=_response.json(),
|
|
412
437
|
),
|
|
413
438
|
),
|
|
@@ -416,9 +441,9 @@ class AsyncRawSpeechToTextClient:
|
|
|
416
441
|
raise ForbiddenError(
|
|
417
442
|
headers=dict(_response.headers),
|
|
418
443
|
body=typing.cast(
|
|
419
|
-
typing.Any,
|
|
444
|
+
typing.Optional[typing.Any],
|
|
420
445
|
parse_obj_as(
|
|
421
|
-
type_=typing.Any, # type: ignore
|
|
446
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
422
447
|
object_=_response.json(),
|
|
423
448
|
),
|
|
424
449
|
),
|
|
@@ -427,9 +452,9 @@ class AsyncRawSpeechToTextClient:
|
|
|
427
452
|
raise UnprocessableEntityError(
|
|
428
453
|
headers=dict(_response.headers),
|
|
429
454
|
body=typing.cast(
|
|
430
|
-
typing.Any,
|
|
455
|
+
typing.Optional[typing.Any],
|
|
431
456
|
parse_obj_as(
|
|
432
|
-
type_=typing.Any, # type: ignore
|
|
457
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
433
458
|
object_=_response.json(),
|
|
434
459
|
),
|
|
435
460
|
),
|
|
@@ -438,9 +463,9 @@ class AsyncRawSpeechToTextClient:
|
|
|
438
463
|
raise TooManyRequestsError(
|
|
439
464
|
headers=dict(_response.headers),
|
|
440
465
|
body=typing.cast(
|
|
441
|
-
typing.Any,
|
|
466
|
+
typing.Optional[typing.Any],
|
|
442
467
|
parse_obj_as(
|
|
443
|
-
type_=typing.Any, # type: ignore
|
|
468
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
444
469
|
object_=_response.json(),
|
|
445
470
|
),
|
|
446
471
|
),
|
|
@@ -449,9 +474,9 @@ class AsyncRawSpeechToTextClient:
|
|
|
449
474
|
raise InternalServerError(
|
|
450
475
|
headers=dict(_response.headers),
|
|
451
476
|
body=typing.cast(
|
|
452
|
-
typing.Any,
|
|
477
|
+
typing.Optional[typing.Any],
|
|
453
478
|
parse_obj_as(
|
|
454
|
-
type_=typing.Any, # type: ignore
|
|
479
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
455
480
|
object_=_response.json(),
|
|
456
481
|
),
|
|
457
482
|
),
|
|
@@ -460,9 +485,9 @@ class AsyncRawSpeechToTextClient:
|
|
|
460
485
|
raise ServiceUnavailableError(
|
|
461
486
|
headers=dict(_response.headers),
|
|
462
487
|
body=typing.cast(
|
|
463
|
-
typing.Any,
|
|
488
|
+
typing.Optional[typing.Any],
|
|
464
489
|
parse_obj_as(
|
|
465
|
-
type_=typing.Any, # type: ignore
|
|
490
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
466
491
|
object_=_response.json(),
|
|
467
492
|
),
|
|
468
493
|
),
|
|
@@ -524,7 +549,7 @@ class AsyncRawSpeechToTextClient:
|
|
|
524
549
|
method="POST",
|
|
525
550
|
data={
|
|
526
551
|
"prompt": prompt,
|
|
527
|
-
"model":
|
|
552
|
+
"model": model,
|
|
528
553
|
"input_audio_codec": input_audio_codec,
|
|
529
554
|
},
|
|
530
555
|
files={
|
|
@@ -548,9 +573,9 @@ class AsyncRawSpeechToTextClient:
|
|
|
548
573
|
raise BadRequestError(
|
|
549
574
|
headers=dict(_response.headers),
|
|
550
575
|
body=typing.cast(
|
|
551
|
-
typing.Any,
|
|
576
|
+
typing.Optional[typing.Any],
|
|
552
577
|
parse_obj_as(
|
|
553
|
-
type_=typing.Any, # type: ignore
|
|
578
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
554
579
|
object_=_response.json(),
|
|
555
580
|
),
|
|
556
581
|
),
|
|
@@ -559,9 +584,9 @@ class AsyncRawSpeechToTextClient:
|
|
|
559
584
|
raise ForbiddenError(
|
|
560
585
|
headers=dict(_response.headers),
|
|
561
586
|
body=typing.cast(
|
|
562
|
-
typing.Any,
|
|
587
|
+
typing.Optional[typing.Any],
|
|
563
588
|
parse_obj_as(
|
|
564
|
-
type_=typing.Any, # type: ignore
|
|
589
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
565
590
|
object_=_response.json(),
|
|
566
591
|
),
|
|
567
592
|
),
|
|
@@ -570,9 +595,9 @@ class AsyncRawSpeechToTextClient:
|
|
|
570
595
|
raise UnprocessableEntityError(
|
|
571
596
|
headers=dict(_response.headers),
|
|
572
597
|
body=typing.cast(
|
|
573
|
-
typing.Any,
|
|
598
|
+
typing.Optional[typing.Any],
|
|
574
599
|
parse_obj_as(
|
|
575
|
-
type_=typing.Any, # type: ignore
|
|
600
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
576
601
|
object_=_response.json(),
|
|
577
602
|
),
|
|
578
603
|
),
|
|
@@ -581,9 +606,9 @@ class AsyncRawSpeechToTextClient:
|
|
|
581
606
|
raise TooManyRequestsError(
|
|
582
607
|
headers=dict(_response.headers),
|
|
583
608
|
body=typing.cast(
|
|
584
|
-
typing.Any,
|
|
609
|
+
typing.Optional[typing.Any],
|
|
585
610
|
parse_obj_as(
|
|
586
|
-
type_=typing.Any, # type: ignore
|
|
611
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
587
612
|
object_=_response.json(),
|
|
588
613
|
),
|
|
589
614
|
),
|
|
@@ -592,9 +617,9 @@ class AsyncRawSpeechToTextClient:
|
|
|
592
617
|
raise InternalServerError(
|
|
593
618
|
headers=dict(_response.headers),
|
|
594
619
|
body=typing.cast(
|
|
595
|
-
typing.Any,
|
|
620
|
+
typing.Optional[typing.Any],
|
|
596
621
|
parse_obj_as(
|
|
597
|
-
type_=typing.Any, # type: ignore
|
|
622
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
598
623
|
object_=_response.json(),
|
|
599
624
|
),
|
|
600
625
|
),
|
|
@@ -603,9 +628,9 @@ class AsyncRawSpeechToTextClient:
|
|
|
603
628
|
raise ServiceUnavailableError(
|
|
604
629
|
headers=dict(_response.headers),
|
|
605
630
|
body=typing.cast(
|
|
606
|
-
typing.Any,
|
|
631
|
+
typing.Optional[typing.Any],
|
|
607
632
|
parse_obj_as(
|
|
608
|
-
type_=typing.Any, # type: ignore
|
|
633
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
609
634
|
object_=_response.json(),
|
|
610
635
|
),
|
|
611
636
|
),
|
|
@@ -12,6 +12,7 @@ from ..types.files_upload_response import FilesUploadResponse
|
|
|
12
12
|
from ..types.job_status_v_1_response import JobStatusV1Response
|
|
13
13
|
from ..types.speech_to_text_model import SpeechToTextModel
|
|
14
14
|
from ..types.speech_to_text_language import SpeechToTextLanguage
|
|
15
|
+
from ..types.mode import Mode
|
|
15
16
|
from .raw_client import AsyncRawSpeechToTextJobClient, RawSpeechToTextJobClient
|
|
16
17
|
from .job import AsyncSpeechToTextJob, SpeechToTextJob
|
|
17
18
|
|
|
@@ -72,7 +73,9 @@ class SpeechToTextJobClient:
|
|
|
72
73
|
)
|
|
73
74
|
"""
|
|
74
75
|
_response = self._raw_client.initialise(
|
|
75
|
-
job_parameters=job_parameters,
|
|
76
|
+
job_parameters=job_parameters,
|
|
77
|
+
callback=callback,
|
|
78
|
+
request_options=request_options,
|
|
76
79
|
)
|
|
77
80
|
return _response.data
|
|
78
81
|
|
|
@@ -145,11 +148,17 @@ class SpeechToTextJobClient:
|
|
|
145
148
|
job_id="job_id",
|
|
146
149
|
)
|
|
147
150
|
"""
|
|
148
|
-
_response = self._raw_client.start(
|
|
151
|
+
_response = self._raw_client.start(
|
|
152
|
+
job_id, ptu_id=ptu_id, request_options=request_options
|
|
153
|
+
)
|
|
149
154
|
return _response.data
|
|
150
155
|
|
|
151
156
|
def get_upload_links(
|
|
152
|
-
self,
|
|
157
|
+
self,
|
|
158
|
+
*,
|
|
159
|
+
job_id: str,
|
|
160
|
+
files: typing.Sequence[str],
|
|
161
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
153
162
|
) -> FilesUploadResponse:
|
|
154
163
|
"""
|
|
155
164
|
Start a speech to text bulk job V1
|
|
@@ -180,11 +189,17 @@ class SpeechToTextJobClient:
|
|
|
180
189
|
files=["files"],
|
|
181
190
|
)
|
|
182
191
|
"""
|
|
183
|
-
_response = self._raw_client.get_upload_links(
|
|
192
|
+
_response = self._raw_client.get_upload_links(
|
|
193
|
+
job_id=job_id, files=files, request_options=request_options
|
|
194
|
+
)
|
|
184
195
|
return _response.data
|
|
185
196
|
|
|
186
197
|
def get_download_links(
|
|
187
|
-
self,
|
|
198
|
+
self,
|
|
199
|
+
*,
|
|
200
|
+
job_id: str,
|
|
201
|
+
files: typing.Sequence[str],
|
|
202
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
188
203
|
) -> FilesDownloadResponse:
|
|
189
204
|
"""
|
|
190
205
|
Start a speech to text bulk job V1
|
|
@@ -215,12 +230,15 @@ class SpeechToTextJobClient:
|
|
|
215
230
|
files=["files"],
|
|
216
231
|
)
|
|
217
232
|
"""
|
|
218
|
-
_response = self._raw_client.get_download_links(
|
|
233
|
+
_response = self._raw_client.get_download_links(
|
|
234
|
+
job_id=job_id, files=files, request_options=request_options
|
|
235
|
+
)
|
|
219
236
|
return _response.data
|
|
220
237
|
|
|
221
238
|
def create_job(
|
|
222
239
|
self,
|
|
223
240
|
model: SpeechToTextModel = "saarika:v2.5",
|
|
241
|
+
mode: typing.Optional[Mode] = None,
|
|
224
242
|
with_diarization: bool = False,
|
|
225
243
|
with_timestamps: bool = False,
|
|
226
244
|
language_code: typing.Optional[SpeechToTextLanguage] = None,
|
|
@@ -236,6 +254,10 @@ class SpeechToTextJobClient:
|
|
|
236
254
|
model : SpeechToTextModel, default="saarika:v2.5"
|
|
237
255
|
The model to use for transcription.
|
|
238
256
|
|
|
257
|
+
mode : typing.Optional[Mode], default=None
|
|
258
|
+
Mode of operation. Only applicable for saaras:v3 model.
|
|
259
|
+
Options: transcribe, translate, indic-en, verbatim, translit, codemix
|
|
260
|
+
|
|
239
261
|
with_diarization : typing.Optional[bool], default=False
|
|
240
262
|
Whether to enable speaker diarization (distinguishing who said what).
|
|
241
263
|
|
|
@@ -244,7 +266,7 @@ class SpeechToTextJobClient:
|
|
|
244
266
|
|
|
245
267
|
language_code : typing.Optional[SpeechToTextLanguage], default=None
|
|
246
268
|
The language code of the input audio (e.g., "hi-IN", "bn-IN").
|
|
247
|
-
|
|
269
|
+
|
|
248
270
|
num_speakers : typing.Optional[int], default=None
|
|
249
271
|
The number of distinct speakers in the audio, if known.
|
|
250
272
|
|
|
@@ -263,6 +285,7 @@ class SpeechToTextJobClient:
|
|
|
263
285
|
job_parameters=SpeechToTextJobParametersParams(
|
|
264
286
|
language_code=language_code,
|
|
265
287
|
model=model,
|
|
288
|
+
mode=mode, # type: ignore[typeddict-item]
|
|
266
289
|
num_speakers=num_speakers, # type: ignore[typeddict-item]
|
|
267
290
|
with_diarization=with_diarization,
|
|
268
291
|
with_timestamps=with_timestamps,
|
|
@@ -350,7 +373,9 @@ class AsyncSpeechToTextJobClient:
|
|
|
350
373
|
asyncio.run(main())
|
|
351
374
|
"""
|
|
352
375
|
_response = await self._raw_client.initialise(
|
|
353
|
-
job_parameters=job_parameters,
|
|
376
|
+
job_parameters=job_parameters,
|
|
377
|
+
callback=callback,
|
|
378
|
+
request_options=request_options,
|
|
354
379
|
)
|
|
355
380
|
return _response.data
|
|
356
381
|
|
|
@@ -392,7 +417,9 @@ class AsyncSpeechToTextJobClient:
|
|
|
392
417
|
|
|
393
418
|
asyncio.run(main())
|
|
394
419
|
"""
|
|
395
|
-
_response = await self._raw_client.get_status(
|
|
420
|
+
_response = await self._raw_client.get_status(
|
|
421
|
+
job_id, request_options=request_options
|
|
422
|
+
)
|
|
396
423
|
return _response.data
|
|
397
424
|
|
|
398
425
|
async def start(
|
|
@@ -439,11 +466,17 @@ class AsyncSpeechToTextJobClient:
|
|
|
439
466
|
|
|
440
467
|
asyncio.run(main())
|
|
441
468
|
"""
|
|
442
|
-
_response = await self._raw_client.start(
|
|
469
|
+
_response = await self._raw_client.start(
|
|
470
|
+
job_id, ptu_id=ptu_id, request_options=request_options
|
|
471
|
+
)
|
|
443
472
|
return _response.data
|
|
444
473
|
|
|
445
474
|
async def get_upload_links(
|
|
446
|
-
self,
|
|
475
|
+
self,
|
|
476
|
+
*,
|
|
477
|
+
job_id: str,
|
|
478
|
+
files: typing.Sequence[str],
|
|
479
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
447
480
|
) -> FilesUploadResponse:
|
|
448
481
|
"""
|
|
449
482
|
Start a speech to text bulk job V1
|
|
@@ -482,11 +515,17 @@ class AsyncSpeechToTextJobClient:
|
|
|
482
515
|
|
|
483
516
|
asyncio.run(main())
|
|
484
517
|
"""
|
|
485
|
-
_response = await self._raw_client.get_upload_links(
|
|
518
|
+
_response = await self._raw_client.get_upload_links(
|
|
519
|
+
job_id=job_id, files=files, request_options=request_options
|
|
520
|
+
)
|
|
486
521
|
return _response.data
|
|
487
522
|
|
|
488
523
|
async def get_download_links(
|
|
489
|
-
self,
|
|
524
|
+
self,
|
|
525
|
+
*,
|
|
526
|
+
job_id: str,
|
|
527
|
+
files: typing.Sequence[str],
|
|
528
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
490
529
|
) -> FilesDownloadResponse:
|
|
491
530
|
"""
|
|
492
531
|
Start a speech to text bulk job V1
|
|
@@ -533,6 +572,7 @@ class AsyncSpeechToTextJobClient:
|
|
|
533
572
|
async def create_job(
|
|
534
573
|
self,
|
|
535
574
|
model: SpeechToTextModel = "saarika:v2.5",
|
|
575
|
+
mode: typing.Optional[Mode] = None,
|
|
536
576
|
with_diarization: bool = False,
|
|
537
577
|
with_timestamps: bool = False,
|
|
538
578
|
language_code: typing.Optional[SpeechToTextLanguage] = None,
|
|
@@ -548,6 +588,10 @@ class AsyncSpeechToTextJobClient:
|
|
|
548
588
|
model : SpeechToTextModel, default="saarika:v2.5"
|
|
549
589
|
The model to use for transcription.
|
|
550
590
|
|
|
591
|
+
mode : typing.Optional[Mode], default=None
|
|
592
|
+
Mode of operation. Only applicable for saaras:v3 model.
|
|
593
|
+
Options: transcribe, translate, indic-en, verbatim, translit, codemix
|
|
594
|
+
|
|
551
595
|
with_diarization : typing.Optional[bool], default=False
|
|
552
596
|
Whether to enable speaker diarization (distinguishing who said what).
|
|
553
597
|
|
|
@@ -556,8 +600,8 @@ class AsyncSpeechToTextJobClient:
|
|
|
556
600
|
|
|
557
601
|
language_code : typing.Optional[SpeechToTextLanguage], default=None
|
|
558
602
|
The language code of the input audio (e.g., "hi-IN", "bn-IN").
|
|
559
|
-
|
|
560
|
-
num_speakers : typing.Optional[int]
|
|
603
|
+
|
|
604
|
+
num_speakers : typing.Optional[int] = None
|
|
561
605
|
The number of distinct speakers in the audio, if known.
|
|
562
606
|
|
|
563
607
|
callback : typing.Optional[BulkJobCallbackParams], default=OMIT
|
|
@@ -575,6 +619,7 @@ class AsyncSpeechToTextJobClient:
|
|
|
575
619
|
job_parameters=SpeechToTextJobParametersParams(
|
|
576
620
|
language_code=language_code,
|
|
577
621
|
model=model,
|
|
622
|
+
mode=mode, # type: ignore[typeddict-item]
|
|
578
623
|
with_diarization=with_diarization,
|
|
579
624
|
with_timestamps=with_timestamps,
|
|
580
625
|
num_speakers=num_speakers, # type: ignore[typeddict-item]
|