sarvamai 0.1.23a2__py3-none-any.whl → 0.1.23a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. sarvamai/__init__.py +203 -405
  2. sarvamai/chat/raw_client.py +20 -20
  3. sarvamai/client.py +34 -186
  4. sarvamai/core/__init__.py +21 -76
  5. sarvamai/core/client_wrapper.py +3 -19
  6. sarvamai/core/force_multipart.py +2 -4
  7. sarvamai/core/http_client.py +97 -217
  8. sarvamai/core/http_response.py +1 -1
  9. sarvamai/core/jsonable_encoder.py +0 -8
  10. sarvamai/core/pydantic_utilities.py +4 -110
  11. sarvamai/errors/__init__.py +6 -40
  12. sarvamai/errors/bad_request_error.py +1 -1
  13. sarvamai/errors/forbidden_error.py +1 -1
  14. sarvamai/errors/internal_server_error.py +1 -1
  15. sarvamai/errors/service_unavailable_error.py +1 -1
  16. sarvamai/errors/too_many_requests_error.py +1 -1
  17. sarvamai/errors/unprocessable_entity_error.py +1 -1
  18. sarvamai/requests/__init__.py +62 -150
  19. sarvamai/requests/configure_connection.py +4 -0
  20. sarvamai/requests/configure_connection_data.py +40 -11
  21. sarvamai/requests/error_response_data.py +1 -1
  22. sarvamai/requests/file_signed_url_details.py +1 -1
  23. sarvamai/requests/speech_to_text_job_parameters.py +10 -1
  24. sarvamai/requests/speech_to_text_transcription_data.py +2 -2
  25. sarvamai/speech_to_text/client.py +29 -2
  26. sarvamai/speech_to_text/raw_client.py +81 -56
  27. sarvamai/speech_to_text_job/client.py +60 -15
  28. sarvamai/speech_to_text_job/raw_client.py +120 -120
  29. sarvamai/speech_to_text_streaming/__init__.py +10 -38
  30. sarvamai/speech_to_text_streaming/client.py +32 -6
  31. sarvamai/speech_to_text_streaming/raw_client.py +32 -6
  32. sarvamai/speech_to_text_streaming/types/__init__.py +8 -36
  33. sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +7 -0
  34. sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
  35. sarvamai/speech_to_text_translate_job/raw_client.py +120 -120
  36. sarvamai/speech_to_text_translate_streaming/__init__.py +5 -36
  37. sarvamai/speech_to_text_translate_streaming/types/__init__.py +3 -36
  38. sarvamai/text/raw_client.py +60 -60
  39. sarvamai/text_to_speech/client.py +100 -16
  40. sarvamai/text_to_speech/raw_client.py +120 -36
  41. sarvamai/text_to_speech_streaming/__init__.py +2 -29
  42. sarvamai/text_to_speech_streaming/client.py +19 -6
  43. sarvamai/text_to_speech_streaming/raw_client.py +19 -6
  44. sarvamai/text_to_speech_streaming/types/__init__.py +3 -31
  45. sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +5 -0
  46. sarvamai/types/__init__.py +102 -222
  47. sarvamai/types/chat_completion_request_message.py +2 -6
  48. sarvamai/types/configure_connection.py +4 -0
  49. sarvamai/types/configure_connection_data.py +40 -11
  50. sarvamai/types/configure_connection_data_model.py +5 -0
  51. sarvamai/types/configure_connection_data_speaker.py +35 -1
  52. sarvamai/types/error_response_data.py +1 -1
  53. sarvamai/types/file_signed_url_details.py +1 -1
  54. sarvamai/types/mode.py +7 -0
  55. sarvamai/types/speech_to_text_job_parameters.py +10 -1
  56. sarvamai/types/speech_to_text_model.py +3 -1
  57. sarvamai/types/speech_to_text_transcription_data.py +2 -2
  58. sarvamai/types/speech_to_text_translate_model.py +1 -1
  59. sarvamai/types/text_to_speech_model.py +1 -1
  60. sarvamai/types/text_to_speech_speaker.py +35 -1
  61. {sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/METADATA +1 -2
  62. {sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/RECORD +63 -63
  63. sarvamai/core/http_sse/__init__.py +0 -42
  64. sarvamai/core/http_sse/_api.py +0 -112
  65. sarvamai/core/http_sse/_decoders.py +0 -61
  66. sarvamai/core/http_sse/_exceptions.py +0 -7
  67. sarvamai/core/http_sse/_models.py +0 -17
  68. {sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/WHEEL +0 -0
@@ -1,6 +1,5 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
- import json
4
3
  import typing
5
4
  from json.decoder import JSONDecodeError
6
5
 
@@ -8,7 +7,6 @@ from .. import core
8
7
  from ..core.api_error import ApiError
9
8
  from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
10
9
  from ..core.http_response import AsyncHttpResponse, HttpResponse
11
- from ..core.jsonable_encoder import jsonable_encoder
12
10
  from ..core.pydantic_utilities import parse_obj_as
13
11
  from ..core.request_options import RequestOptions
14
12
  from ..errors.bad_request_error import BadRequestError
@@ -18,6 +16,7 @@ from ..errors.service_unavailable_error import ServiceUnavailableError
18
16
  from ..errors.too_many_requests_error import TooManyRequestsError
19
17
  from ..errors.unprocessable_entity_error import UnprocessableEntityError
20
18
  from ..types.input_audio_codec import InputAudioCodec
19
+ from ..types.mode import Mode
21
20
  from ..types.speech_to_text_language import SpeechToTextLanguage
22
21
  from ..types.speech_to_text_model import SpeechToTextModel
23
22
  from ..types.speech_to_text_response import SpeechToTextResponse
@@ -37,6 +36,7 @@ class RawSpeechToTextClient:
37
36
  *,
38
37
  file: core.File,
39
38
  model: typing.Optional[SpeechToTextModel] = OMIT,
39
+ mode: typing.Optional[Mode] = OMIT,
40
40
  language_code: typing.Optional[SpeechToTextLanguage] = OMIT,
41
41
  input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
42
42
  request_options: typing.Optional[RequestOptions] = None,
@@ -63,7 +63,18 @@ class RawSpeechToTextClient:
63
63
 
64
64
  model : typing.Optional[SpeechToTextModel]
65
65
  Specifies the model to use for speech-to-text conversion.
66
- Note:- Default model is `saarika:v2.5`
66
+ - **saarika:v2.5** (default): Standard transcription model
67
+ - **saarika:v3**: Advanced transcription model
68
+ - **saaras:v3**: Advanced model with multiple output modes
69
+
70
+ mode : typing.Optional[Mode]
71
+ Mode of operation. **Only applicable when using saaras:v3 model.**
72
+ - **transcribe** (default): Standard transcription
73
+ - **translate**: Translation to English
74
+ - **indic-en**: Indic to English translation
75
+ - **verbatim**: Exact transcription
76
+ - **translit**: Transliteration to Latin script
77
+ - **codemix**: Code-mixed output
67
78
 
68
79
  language_code : typing.Optional[SpeechToTextLanguage]
69
80
  Specifies the language of the input audio.
@@ -86,7 +97,8 @@ class RawSpeechToTextClient:
86
97
  base_url=self._client_wrapper.get_environment().base,
87
98
  method="POST",
88
99
  data={
89
- "model": json.dumps(jsonable_encoder(model)),
100
+ "model": model,
101
+ "mode": mode,
90
102
  "language_code": language_code,
91
103
  "input_audio_codec": input_audio_codec,
92
104
  },
@@ -111,9 +123,9 @@ class RawSpeechToTextClient:
111
123
  raise BadRequestError(
112
124
  headers=dict(_response.headers),
113
125
  body=typing.cast(
114
- typing.Any,
126
+ typing.Optional[typing.Any],
115
127
  parse_obj_as(
116
- type_=typing.Any, # type: ignore
128
+ type_=typing.Optional[typing.Any], # type: ignore
117
129
  object_=_response.json(),
118
130
  ),
119
131
  ),
@@ -122,9 +134,9 @@ class RawSpeechToTextClient:
122
134
  raise ForbiddenError(
123
135
  headers=dict(_response.headers),
124
136
  body=typing.cast(
125
- typing.Any,
137
+ typing.Optional[typing.Any],
126
138
  parse_obj_as(
127
- type_=typing.Any, # type: ignore
139
+ type_=typing.Optional[typing.Any], # type: ignore
128
140
  object_=_response.json(),
129
141
  ),
130
142
  ),
@@ -133,9 +145,9 @@ class RawSpeechToTextClient:
133
145
  raise UnprocessableEntityError(
134
146
  headers=dict(_response.headers),
135
147
  body=typing.cast(
136
- typing.Any,
148
+ typing.Optional[typing.Any],
137
149
  parse_obj_as(
138
- type_=typing.Any, # type: ignore
150
+ type_=typing.Optional[typing.Any], # type: ignore
139
151
  object_=_response.json(),
140
152
  ),
141
153
  ),
@@ -144,9 +156,9 @@ class RawSpeechToTextClient:
144
156
  raise TooManyRequestsError(
145
157
  headers=dict(_response.headers),
146
158
  body=typing.cast(
147
- typing.Any,
159
+ typing.Optional[typing.Any],
148
160
  parse_obj_as(
149
- type_=typing.Any, # type: ignore
161
+ type_=typing.Optional[typing.Any], # type: ignore
150
162
  object_=_response.json(),
151
163
  ),
152
164
  ),
@@ -155,9 +167,9 @@ class RawSpeechToTextClient:
155
167
  raise InternalServerError(
156
168
  headers=dict(_response.headers),
157
169
  body=typing.cast(
158
- typing.Any,
170
+ typing.Optional[typing.Any],
159
171
  parse_obj_as(
160
- type_=typing.Any, # type: ignore
172
+ type_=typing.Optional[typing.Any], # type: ignore
161
173
  object_=_response.json(),
162
174
  ),
163
175
  ),
@@ -166,9 +178,9 @@ class RawSpeechToTextClient:
166
178
  raise ServiceUnavailableError(
167
179
  headers=dict(_response.headers),
168
180
  body=typing.cast(
169
- typing.Any,
181
+ typing.Optional[typing.Any],
170
182
  parse_obj_as(
171
- type_=typing.Any, # type: ignore
183
+ type_=typing.Optional[typing.Any], # type: ignore
172
184
  object_=_response.json(),
173
185
  ),
174
186
  ),
@@ -230,7 +242,7 @@ class RawSpeechToTextClient:
230
242
  method="POST",
231
243
  data={
232
244
  "prompt": prompt,
233
- "model": json.dumps(jsonable_encoder(model)),
245
+ "model": model,
234
246
  "input_audio_codec": input_audio_codec,
235
247
  },
236
248
  files={
@@ -254,9 +266,9 @@ class RawSpeechToTextClient:
254
266
  raise BadRequestError(
255
267
  headers=dict(_response.headers),
256
268
  body=typing.cast(
257
- typing.Any,
269
+ typing.Optional[typing.Any],
258
270
  parse_obj_as(
259
- type_=typing.Any, # type: ignore
271
+ type_=typing.Optional[typing.Any], # type: ignore
260
272
  object_=_response.json(),
261
273
  ),
262
274
  ),
@@ -265,9 +277,9 @@ class RawSpeechToTextClient:
265
277
  raise ForbiddenError(
266
278
  headers=dict(_response.headers),
267
279
  body=typing.cast(
268
- typing.Any,
280
+ typing.Optional[typing.Any],
269
281
  parse_obj_as(
270
- type_=typing.Any, # type: ignore
282
+ type_=typing.Optional[typing.Any], # type: ignore
271
283
  object_=_response.json(),
272
284
  ),
273
285
  ),
@@ -276,9 +288,9 @@ class RawSpeechToTextClient:
276
288
  raise UnprocessableEntityError(
277
289
  headers=dict(_response.headers),
278
290
  body=typing.cast(
279
- typing.Any,
291
+ typing.Optional[typing.Any],
280
292
  parse_obj_as(
281
- type_=typing.Any, # type: ignore
293
+ type_=typing.Optional[typing.Any], # type: ignore
282
294
  object_=_response.json(),
283
295
  ),
284
296
  ),
@@ -287,9 +299,9 @@ class RawSpeechToTextClient:
287
299
  raise TooManyRequestsError(
288
300
  headers=dict(_response.headers),
289
301
  body=typing.cast(
290
- typing.Any,
302
+ typing.Optional[typing.Any],
291
303
  parse_obj_as(
292
- type_=typing.Any, # type: ignore
304
+ type_=typing.Optional[typing.Any], # type: ignore
293
305
  object_=_response.json(),
294
306
  ),
295
307
  ),
@@ -298,9 +310,9 @@ class RawSpeechToTextClient:
298
310
  raise InternalServerError(
299
311
  headers=dict(_response.headers),
300
312
  body=typing.cast(
301
- typing.Any,
313
+ typing.Optional[typing.Any],
302
314
  parse_obj_as(
303
- type_=typing.Any, # type: ignore
315
+ type_=typing.Optional[typing.Any], # type: ignore
304
316
  object_=_response.json(),
305
317
  ),
306
318
  ),
@@ -309,9 +321,9 @@ class RawSpeechToTextClient:
309
321
  raise ServiceUnavailableError(
310
322
  headers=dict(_response.headers),
311
323
  body=typing.cast(
312
- typing.Any,
324
+ typing.Optional[typing.Any],
313
325
  parse_obj_as(
314
- type_=typing.Any, # type: ignore
326
+ type_=typing.Optional[typing.Any], # type: ignore
315
327
  object_=_response.json(),
316
328
  ),
317
329
  ),
@@ -331,6 +343,7 @@ class AsyncRawSpeechToTextClient:
331
343
  *,
332
344
  file: core.File,
333
345
  model: typing.Optional[SpeechToTextModel] = OMIT,
346
+ mode: typing.Optional[Mode] = OMIT,
334
347
  language_code: typing.Optional[SpeechToTextLanguage] = OMIT,
335
348
  input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
336
349
  request_options: typing.Optional[RequestOptions] = None,
@@ -357,7 +370,18 @@ class AsyncRawSpeechToTextClient:
357
370
 
358
371
  model : typing.Optional[SpeechToTextModel]
359
372
  Specifies the model to use for speech-to-text conversion.
360
- Note:- Default model is `saarika:v2.5`
373
+ - **saarika:v2.5** (default): Standard transcription model
374
+ - **saarika:v3**: Advanced transcription model
375
+ - **saaras:v3**: Advanced model with multiple output modes
376
+
377
+ mode : typing.Optional[Mode]
378
+ Mode of operation. **Only applicable when using saaras:v3 model.**
379
+ - **transcribe** (default): Standard transcription
380
+ - **translate**: Translation to English
381
+ - **indic-en**: Indic to English translation
382
+ - **verbatim**: Exact transcription
383
+ - **translit**: Transliteration to Latin script
384
+ - **codemix**: Code-mixed output
361
385
 
362
386
  language_code : typing.Optional[SpeechToTextLanguage]
363
387
  Specifies the language of the input audio.
@@ -380,7 +404,8 @@ class AsyncRawSpeechToTextClient:
380
404
  base_url=self._client_wrapper.get_environment().base,
381
405
  method="POST",
382
406
  data={
383
- "model": json.dumps(jsonable_encoder(model)),
407
+ "model": model,
408
+ "mode": mode,
384
409
  "language_code": language_code,
385
410
  "input_audio_codec": input_audio_codec,
386
411
  },
@@ -405,9 +430,9 @@ class AsyncRawSpeechToTextClient:
405
430
  raise BadRequestError(
406
431
  headers=dict(_response.headers),
407
432
  body=typing.cast(
408
- typing.Any,
433
+ typing.Optional[typing.Any],
409
434
  parse_obj_as(
410
- type_=typing.Any, # type: ignore
435
+ type_=typing.Optional[typing.Any], # type: ignore
411
436
  object_=_response.json(),
412
437
  ),
413
438
  ),
@@ -416,9 +441,9 @@ class AsyncRawSpeechToTextClient:
416
441
  raise ForbiddenError(
417
442
  headers=dict(_response.headers),
418
443
  body=typing.cast(
419
- typing.Any,
444
+ typing.Optional[typing.Any],
420
445
  parse_obj_as(
421
- type_=typing.Any, # type: ignore
446
+ type_=typing.Optional[typing.Any], # type: ignore
422
447
  object_=_response.json(),
423
448
  ),
424
449
  ),
@@ -427,9 +452,9 @@ class AsyncRawSpeechToTextClient:
427
452
  raise UnprocessableEntityError(
428
453
  headers=dict(_response.headers),
429
454
  body=typing.cast(
430
- typing.Any,
455
+ typing.Optional[typing.Any],
431
456
  parse_obj_as(
432
- type_=typing.Any, # type: ignore
457
+ type_=typing.Optional[typing.Any], # type: ignore
433
458
  object_=_response.json(),
434
459
  ),
435
460
  ),
@@ -438,9 +463,9 @@ class AsyncRawSpeechToTextClient:
438
463
  raise TooManyRequestsError(
439
464
  headers=dict(_response.headers),
440
465
  body=typing.cast(
441
- typing.Any,
466
+ typing.Optional[typing.Any],
442
467
  parse_obj_as(
443
- type_=typing.Any, # type: ignore
468
+ type_=typing.Optional[typing.Any], # type: ignore
444
469
  object_=_response.json(),
445
470
  ),
446
471
  ),
@@ -449,9 +474,9 @@ class AsyncRawSpeechToTextClient:
449
474
  raise InternalServerError(
450
475
  headers=dict(_response.headers),
451
476
  body=typing.cast(
452
- typing.Any,
477
+ typing.Optional[typing.Any],
453
478
  parse_obj_as(
454
- type_=typing.Any, # type: ignore
479
+ type_=typing.Optional[typing.Any], # type: ignore
455
480
  object_=_response.json(),
456
481
  ),
457
482
  ),
@@ -460,9 +485,9 @@ class AsyncRawSpeechToTextClient:
460
485
  raise ServiceUnavailableError(
461
486
  headers=dict(_response.headers),
462
487
  body=typing.cast(
463
- typing.Any,
488
+ typing.Optional[typing.Any],
464
489
  parse_obj_as(
465
- type_=typing.Any, # type: ignore
490
+ type_=typing.Optional[typing.Any], # type: ignore
466
491
  object_=_response.json(),
467
492
  ),
468
493
  ),
@@ -524,7 +549,7 @@ class AsyncRawSpeechToTextClient:
524
549
  method="POST",
525
550
  data={
526
551
  "prompt": prompt,
527
- "model": json.dumps(jsonable_encoder(model)),
552
+ "model": model,
528
553
  "input_audio_codec": input_audio_codec,
529
554
  },
530
555
  files={
@@ -548,9 +573,9 @@ class AsyncRawSpeechToTextClient:
548
573
  raise BadRequestError(
549
574
  headers=dict(_response.headers),
550
575
  body=typing.cast(
551
- typing.Any,
576
+ typing.Optional[typing.Any],
552
577
  parse_obj_as(
553
- type_=typing.Any, # type: ignore
578
+ type_=typing.Optional[typing.Any], # type: ignore
554
579
  object_=_response.json(),
555
580
  ),
556
581
  ),
@@ -559,9 +584,9 @@ class AsyncRawSpeechToTextClient:
559
584
  raise ForbiddenError(
560
585
  headers=dict(_response.headers),
561
586
  body=typing.cast(
562
- typing.Any,
587
+ typing.Optional[typing.Any],
563
588
  parse_obj_as(
564
- type_=typing.Any, # type: ignore
589
+ type_=typing.Optional[typing.Any], # type: ignore
565
590
  object_=_response.json(),
566
591
  ),
567
592
  ),
@@ -570,9 +595,9 @@ class AsyncRawSpeechToTextClient:
570
595
  raise UnprocessableEntityError(
571
596
  headers=dict(_response.headers),
572
597
  body=typing.cast(
573
- typing.Any,
598
+ typing.Optional[typing.Any],
574
599
  parse_obj_as(
575
- type_=typing.Any, # type: ignore
600
+ type_=typing.Optional[typing.Any], # type: ignore
576
601
  object_=_response.json(),
577
602
  ),
578
603
  ),
@@ -581,9 +606,9 @@ class AsyncRawSpeechToTextClient:
581
606
  raise TooManyRequestsError(
582
607
  headers=dict(_response.headers),
583
608
  body=typing.cast(
584
- typing.Any,
609
+ typing.Optional[typing.Any],
585
610
  parse_obj_as(
586
- type_=typing.Any, # type: ignore
611
+ type_=typing.Optional[typing.Any], # type: ignore
587
612
  object_=_response.json(),
588
613
  ),
589
614
  ),
@@ -592,9 +617,9 @@ class AsyncRawSpeechToTextClient:
592
617
  raise InternalServerError(
593
618
  headers=dict(_response.headers),
594
619
  body=typing.cast(
595
- typing.Any,
620
+ typing.Optional[typing.Any],
596
621
  parse_obj_as(
597
- type_=typing.Any, # type: ignore
622
+ type_=typing.Optional[typing.Any], # type: ignore
598
623
  object_=_response.json(),
599
624
  ),
600
625
  ),
@@ -603,9 +628,9 @@ class AsyncRawSpeechToTextClient:
603
628
  raise ServiceUnavailableError(
604
629
  headers=dict(_response.headers),
605
630
  body=typing.cast(
606
- typing.Any,
631
+ typing.Optional[typing.Any],
607
632
  parse_obj_as(
608
- type_=typing.Any, # type: ignore
633
+ type_=typing.Optional[typing.Any], # type: ignore
609
634
  object_=_response.json(),
610
635
  ),
611
636
  ),
@@ -12,6 +12,7 @@ from ..types.files_upload_response import FilesUploadResponse
12
12
  from ..types.job_status_v_1_response import JobStatusV1Response
13
13
  from ..types.speech_to_text_model import SpeechToTextModel
14
14
  from ..types.speech_to_text_language import SpeechToTextLanguage
15
+ from ..types.mode import Mode
15
16
  from .raw_client import AsyncRawSpeechToTextJobClient, RawSpeechToTextJobClient
16
17
  from .job import AsyncSpeechToTextJob, SpeechToTextJob
17
18
 
@@ -72,7 +73,9 @@ class SpeechToTextJobClient:
72
73
  )
73
74
  """
74
75
  _response = self._raw_client.initialise(
75
- job_parameters=job_parameters, callback=callback, request_options=request_options
76
+ job_parameters=job_parameters,
77
+ callback=callback,
78
+ request_options=request_options,
76
79
  )
77
80
  return _response.data
78
81
 
@@ -145,11 +148,17 @@ class SpeechToTextJobClient:
145
148
  job_id="job_id",
146
149
  )
147
150
  """
148
- _response = self._raw_client.start(job_id, ptu_id=ptu_id, request_options=request_options)
151
+ _response = self._raw_client.start(
152
+ job_id, ptu_id=ptu_id, request_options=request_options
153
+ )
149
154
  return _response.data
150
155
 
151
156
  def get_upload_links(
152
- self, *, job_id: str, files: typing.Sequence[str], request_options: typing.Optional[RequestOptions] = None
157
+ self,
158
+ *,
159
+ job_id: str,
160
+ files: typing.Sequence[str],
161
+ request_options: typing.Optional[RequestOptions] = None,
153
162
  ) -> FilesUploadResponse:
154
163
  """
155
164
  Start a speech to text bulk job V1
@@ -180,11 +189,17 @@ class SpeechToTextJobClient:
180
189
  files=["files"],
181
190
  )
182
191
  """
183
- _response = self._raw_client.get_upload_links(job_id=job_id, files=files, request_options=request_options)
192
+ _response = self._raw_client.get_upload_links(
193
+ job_id=job_id, files=files, request_options=request_options
194
+ )
184
195
  return _response.data
185
196
 
186
197
  def get_download_links(
187
- self, *, job_id: str, files: typing.Sequence[str], request_options: typing.Optional[RequestOptions] = None
198
+ self,
199
+ *,
200
+ job_id: str,
201
+ files: typing.Sequence[str],
202
+ request_options: typing.Optional[RequestOptions] = None,
188
203
  ) -> FilesDownloadResponse:
189
204
  """
190
205
  Start a speech to text bulk job V1
@@ -215,12 +230,15 @@ class SpeechToTextJobClient:
215
230
  files=["files"],
216
231
  )
217
232
  """
218
- _response = self._raw_client.get_download_links(job_id=job_id, files=files, request_options=request_options)
233
+ _response = self._raw_client.get_download_links(
234
+ job_id=job_id, files=files, request_options=request_options
235
+ )
219
236
  return _response.data
220
237
 
221
238
  def create_job(
222
239
  self,
223
240
  model: SpeechToTextModel = "saarika:v2.5",
241
+ mode: typing.Optional[Mode] = None,
224
242
  with_diarization: bool = False,
225
243
  with_timestamps: bool = False,
226
244
  language_code: typing.Optional[SpeechToTextLanguage] = None,
@@ -236,6 +254,10 @@ class SpeechToTextJobClient:
236
254
  model : SpeechToTextModel, default="saarika:v2.5"
237
255
  The model to use for transcription.
238
256
 
257
+ mode : typing.Optional[Mode], default=None
258
+ Mode of operation. Only applicable for saaras:v3 model.
259
+ Options: transcribe, translate, indic-en, verbatim, translit, codemix
260
+
239
261
  with_diarization : typing.Optional[bool], default=False
240
262
  Whether to enable speaker diarization (distinguishing who said what).
241
263
 
@@ -244,7 +266,7 @@ class SpeechToTextJobClient:
244
266
 
245
267
  language_code : typing.Optional[SpeechToTextLanguage], default=None
246
268
  The language code of the input audio (e.g., "hi-IN", "bn-IN").
247
-
269
+
248
270
  num_speakers : typing.Optional[int], default=None
249
271
  The number of distinct speakers in the audio, if known.
250
272
 
@@ -263,6 +285,7 @@ class SpeechToTextJobClient:
263
285
  job_parameters=SpeechToTextJobParametersParams(
264
286
  language_code=language_code,
265
287
  model=model,
288
+ mode=mode, # type: ignore[typeddict-item]
266
289
  num_speakers=num_speakers, # type: ignore[typeddict-item]
267
290
  with_diarization=with_diarization,
268
291
  with_timestamps=with_timestamps,
@@ -350,7 +373,9 @@ class AsyncSpeechToTextJobClient:
350
373
  asyncio.run(main())
351
374
  """
352
375
  _response = await self._raw_client.initialise(
353
- job_parameters=job_parameters, callback=callback, request_options=request_options
376
+ job_parameters=job_parameters,
377
+ callback=callback,
378
+ request_options=request_options,
354
379
  )
355
380
  return _response.data
356
381
 
@@ -392,7 +417,9 @@ class AsyncSpeechToTextJobClient:
392
417
 
393
418
  asyncio.run(main())
394
419
  """
395
- _response = await self._raw_client.get_status(job_id, request_options=request_options)
420
+ _response = await self._raw_client.get_status(
421
+ job_id, request_options=request_options
422
+ )
396
423
  return _response.data
397
424
 
398
425
  async def start(
@@ -439,11 +466,17 @@ class AsyncSpeechToTextJobClient:
439
466
 
440
467
  asyncio.run(main())
441
468
  """
442
- _response = await self._raw_client.start(job_id, ptu_id=ptu_id, request_options=request_options)
469
+ _response = await self._raw_client.start(
470
+ job_id, ptu_id=ptu_id, request_options=request_options
471
+ )
443
472
  return _response.data
444
473
 
445
474
  async def get_upload_links(
446
- self, *, job_id: str, files: typing.Sequence[str], request_options: typing.Optional[RequestOptions] = None
475
+ self,
476
+ *,
477
+ job_id: str,
478
+ files: typing.Sequence[str],
479
+ request_options: typing.Optional[RequestOptions] = None,
447
480
  ) -> FilesUploadResponse:
448
481
  """
449
482
  Start a speech to text bulk job V1
@@ -482,11 +515,17 @@ class AsyncSpeechToTextJobClient:
482
515
 
483
516
  asyncio.run(main())
484
517
  """
485
- _response = await self._raw_client.get_upload_links(job_id=job_id, files=files, request_options=request_options)
518
+ _response = await self._raw_client.get_upload_links(
519
+ job_id=job_id, files=files, request_options=request_options
520
+ )
486
521
  return _response.data
487
522
 
488
523
  async def get_download_links(
489
- self, *, job_id: str, files: typing.Sequence[str], request_options: typing.Optional[RequestOptions] = None
524
+ self,
525
+ *,
526
+ job_id: str,
527
+ files: typing.Sequence[str],
528
+ request_options: typing.Optional[RequestOptions] = None,
490
529
  ) -> FilesDownloadResponse:
491
530
  """
492
531
  Start a speech to text bulk job V1
@@ -533,6 +572,7 @@ class AsyncSpeechToTextJobClient:
533
572
  async def create_job(
534
573
  self,
535
574
  model: SpeechToTextModel = "saarika:v2.5",
575
+ mode: typing.Optional[Mode] = None,
536
576
  with_diarization: bool = False,
537
577
  with_timestamps: bool = False,
538
578
  language_code: typing.Optional[SpeechToTextLanguage] = None,
@@ -548,6 +588,10 @@ class AsyncSpeechToTextJobClient:
548
588
  model : SpeechToTextModel, default="saarika:v2.5"
549
589
  The model to use for transcription.
550
590
 
591
+ mode : typing.Optional[Mode], default=None
592
+ Mode of operation. Only applicable for saaras:v3 model.
593
+ Options: transcribe, translate, indic-en, verbatim, translit, codemix
594
+
551
595
  with_diarization : typing.Optional[bool], default=False
552
596
  Whether to enable speaker diarization (distinguishing who said what).
553
597
 
@@ -556,8 +600,8 @@ class AsyncSpeechToTextJobClient:
556
600
 
557
601
  language_code : typing.Optional[SpeechToTextLanguage], default=None
558
602
  The language code of the input audio (e.g., "hi-IN", "bn-IN").
559
-
560
- num_speakers : typing.Optional[int], default=None
603
+
604
+ num_speakers : typing.Optional[int] = None
561
605
  The number of distinct speakers in the audio, if known.
562
606
 
563
607
  callback : typing.Optional[BulkJobCallbackParams], default=OMIT
@@ -575,6 +619,7 @@ class AsyncSpeechToTextJobClient:
575
619
  job_parameters=SpeechToTextJobParametersParams(
576
620
  language_code=language_code,
577
621
  model=model,
622
+ mode=mode, # type: ignore[typeddict-item]
578
623
  with_diarization=with_diarization,
579
624
  with_timestamps=with_timestamps,
580
625
  num_speakers=num_speakers, # type: ignore[typeddict-item]