PyPI - sarvamai - Versions diffs - 0.1.23a2__py3-none-any.whl → 0.1.23a4__py3-none-any.whl - Mend

sarvamai 0.1.23a2py3-none-any.whl → 0.1.23a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

sarvamai/__init__.py +203 -405
sarvamai/chat/raw_client.py +20 -20
sarvamai/client.py +34 -186
sarvamai/core/__init__.py +21 -76
sarvamai/core/client_wrapper.py +3 -19
sarvamai/core/force_multipart.py +2 -4
sarvamai/core/http_client.py +97 -217
sarvamai/core/http_response.py +1 -1
sarvamai/core/jsonable_encoder.py +0 -8
sarvamai/core/pydantic_utilities.py +4 -110
sarvamai/errors/__init__.py +6 -40
sarvamai/errors/bad_request_error.py +1 -1
sarvamai/errors/forbidden_error.py +1 -1
sarvamai/errors/internal_server_error.py +1 -1
sarvamai/errors/service_unavailable_error.py +1 -1
sarvamai/errors/too_many_requests_error.py +1 -1
sarvamai/errors/unprocessable_entity_error.py +1 -1
sarvamai/requests/__init__.py +62 -150
sarvamai/requests/configure_connection.py +4 -0
sarvamai/requests/configure_connection_data.py +40 -11
sarvamai/requests/error_response_data.py +1 -1
sarvamai/requests/file_signed_url_details.py +1 -1
sarvamai/requests/speech_to_text_job_parameters.py +10 -1
sarvamai/requests/speech_to_text_transcription_data.py +2 -2
sarvamai/speech_to_text/client.py +29 -2
sarvamai/speech_to_text/raw_client.py +81 -56
sarvamai/speech_to_text_job/client.py +60 -15
sarvamai/speech_to_text_job/raw_client.py +120 -120
sarvamai/speech_to_text_streaming/__init__.py +10 -38
sarvamai/speech_to_text_streaming/client.py +32 -6
sarvamai/speech_to_text_streaming/raw_client.py +32 -6
sarvamai/speech_to_text_streaming/types/__init__.py +8 -36
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +7 -0
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
sarvamai/speech_to_text_translate_job/raw_client.py +120 -120
sarvamai/speech_to_text_translate_streaming/__init__.py +5 -36
sarvamai/speech_to_text_translate_streaming/types/__init__.py +3 -36
sarvamai/text/raw_client.py +60 -60
sarvamai/text_to_speech/client.py +100 -16
sarvamai/text_to_speech/raw_client.py +120 -36
sarvamai/text_to_speech_streaming/__init__.py +2 -29
sarvamai/text_to_speech_streaming/client.py +19 -6
sarvamai/text_to_speech_streaming/raw_client.py +19 -6
sarvamai/text_to_speech_streaming/types/__init__.py +3 -31
sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +5 -0
sarvamai/types/__init__.py +102 -222
sarvamai/types/chat_completion_request_message.py +2 -6
sarvamai/types/configure_connection.py +4 -0
sarvamai/types/configure_connection_data.py +40 -11
sarvamai/types/configure_connection_data_model.py +5 -0
sarvamai/types/configure_connection_data_speaker.py +35 -1
sarvamai/types/error_response_data.py +1 -1
sarvamai/types/file_signed_url_details.py +1 -1
sarvamai/types/mode.py +7 -0
sarvamai/types/speech_to_text_job_parameters.py +10 -1
sarvamai/types/speech_to_text_model.py +3 -1
sarvamai/types/speech_to_text_transcription_data.py +2 -2
sarvamai/types/speech_to_text_translate_model.py +1 -1
sarvamai/types/text_to_speech_model.py +1 -1
sarvamai/types/text_to_speech_speaker.py +35 -1
{sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/METADATA +1 -2
{sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/RECORD +63 -63
sarvamai/core/http_sse/__init__.py +0 -42
sarvamai/core/http_sse/_api.py +0 -112
sarvamai/core/http_sse/_decoders.py +0 -61
sarvamai/core/http_sse/_exceptions.py +0 -7
sarvamai/core/http_sse/_models.py +0 -17
{sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/WHEEL +0 -0

sarvamai/speech_to_text/raw_client.py CHANGED Viewed

@@ -1,6 +1,5 @@
 # This file was auto-generated by Fern from our API Definition.
-import json
 import typing
 from json.decoder import JSONDecodeError
@@ -8,7 +7,6 @@ from .. import core
 from ..core.api_error import ApiError
 from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
 from ..core.http_response import AsyncHttpResponse, HttpResponse
-from ..core.jsonable_encoder import jsonable_encoder
 from ..core.pydantic_utilities import parse_obj_as
 from ..core.request_options import RequestOptions
 from ..errors.bad_request_error import BadRequestError
@@ -18,6 +16,7 @@ from ..errors.service_unavailable_error import ServiceUnavailableError
 from ..errors.too_many_requests_error import TooManyRequestsError
 from ..errors.unprocessable_entity_error import UnprocessableEntityError
 from ..types.input_audio_codec import InputAudioCodec
+from ..types.mode import Mode
 from ..types.speech_to_text_language import SpeechToTextLanguage
 from ..types.speech_to_text_model import SpeechToTextModel
 from ..types.speech_to_text_response import SpeechToTextResponse
@@ -37,6 +36,7 @@ class RawSpeechToTextClient:
         *,
         file: core.File,
         model: typing.Optional[SpeechToTextModel] = OMIT,
+        mode: typing.Optional[Mode] = OMIT,
         language_code: typing.Optional[SpeechToTextLanguage] = OMIT,
         input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
@@ -63,7 +63,18 @@ class RawSpeechToTextClient:
         model : typing.Optional[SpeechToTextModel]
             Specifies the model to use for speech-to-text conversion.
-            Note:- Default model is `saarika:v2.5`
+            - **saarika:v2.5** (default): Standard transcription model
+            - **saarika:v3**: Advanced transcription model
+            - **saaras:v3**: Advanced model with multiple output modes
+        mode : typing.Optional[Mode]
+            Mode of operation. **Only applicable when using saaras:v3 model.**
+            - **transcribe** (default): Standard transcription
+            - **translate**: Translation to English
+            - **indic-en**: Indic to English translation
+            - **verbatim**: Exact transcription
+            - **translit**: Transliteration to Latin script
+            - **codemix**: Code-mixed output
         language_code : typing.Optional[SpeechToTextLanguage]
             Specifies the language of the input audio.
@@ -86,7 +97,8 @@ class RawSpeechToTextClient:
             base_url=self._client_wrapper.get_environment().base,
             method="POST",
             data={
-                "model": json.dumps(jsonable_encoder(model)),
+                "model": model,
+                "mode": mode,
                 "language_code": language_code,
                 "input_audio_codec": input_audio_codec,
             },
@@ -111,9 +123,9 @@ class RawSpeechToTextClient:
                 raise BadRequestError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -122,9 +134,9 @@ class RawSpeechToTextClient:
                 raise ForbiddenError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -133,9 +145,9 @@ class RawSpeechToTextClient:
                 raise UnprocessableEntityError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -144,9 +156,9 @@ class RawSpeechToTextClient:
                 raise TooManyRequestsError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -155,9 +167,9 @@ class RawSpeechToTextClient:
                 raise InternalServerError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -166,9 +178,9 @@ class RawSpeechToTextClient:
                 raise ServiceUnavailableError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -230,7 +242,7 @@ class RawSpeechToTextClient:
             method="POST",
             data={
                 "prompt": prompt,
-                "model": json.dumps(jsonable_encoder(model)),
+                "model": model,
                 "input_audio_codec": input_audio_codec,
             },
             files={
@@ -254,9 +266,9 @@ class RawSpeechToTextClient:
                 raise BadRequestError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -265,9 +277,9 @@ class RawSpeechToTextClient:
                 raise ForbiddenError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -276,9 +288,9 @@ class RawSpeechToTextClient:
                 raise UnprocessableEntityError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -287,9 +299,9 @@ class RawSpeechToTextClient:
                 raise TooManyRequestsError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -298,9 +310,9 @@ class RawSpeechToTextClient:
                 raise InternalServerError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -309,9 +321,9 @@ class RawSpeechToTextClient:
                 raise ServiceUnavailableError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -331,6 +343,7 @@ class AsyncRawSpeechToTextClient:
         *,
         file: core.File,
         model: typing.Optional[SpeechToTextModel] = OMIT,
+        mode: typing.Optional[Mode] = OMIT,
         language_code: typing.Optional[SpeechToTextLanguage] = OMIT,
         input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
@@ -357,7 +370,18 @@ class AsyncRawSpeechToTextClient:
         model : typing.Optional[SpeechToTextModel]
             Specifies the model to use for speech-to-text conversion.
-            Note:- Default model is `saarika:v2.5`
+            - **saarika:v2.5** (default): Standard transcription model
+            - **saarika:v3**: Advanced transcription model
+            - **saaras:v3**: Advanced model with multiple output modes
+        mode : typing.Optional[Mode]
+            Mode of operation. **Only applicable when using saaras:v3 model.**
+            - **transcribe** (default): Standard transcription
+            - **translate**: Translation to English
+            - **indic-en**: Indic to English translation
+            - **verbatim**: Exact transcription
+            - **translit**: Transliteration to Latin script
+            - **codemix**: Code-mixed output
         language_code : typing.Optional[SpeechToTextLanguage]
             Specifies the language of the input audio.
@@ -380,7 +404,8 @@ class AsyncRawSpeechToTextClient:
             base_url=self._client_wrapper.get_environment().base,
             method="POST",
             data={
-                "model": json.dumps(jsonable_encoder(model)),
+                "model": model,
+                "mode": mode,
                 "language_code": language_code,
                 "input_audio_codec": input_audio_codec,
             },
@@ -405,9 +430,9 @@ class AsyncRawSpeechToTextClient:
                 raise BadRequestError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -416,9 +441,9 @@ class AsyncRawSpeechToTextClient:
                 raise ForbiddenError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -427,9 +452,9 @@ class AsyncRawSpeechToTextClient:
                 raise UnprocessableEntityError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -438,9 +463,9 @@ class AsyncRawSpeechToTextClient:
                 raise TooManyRequestsError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -449,9 +474,9 @@ class AsyncRawSpeechToTextClient:
                 raise InternalServerError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -460,9 +485,9 @@ class AsyncRawSpeechToTextClient:
                 raise ServiceUnavailableError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -524,7 +549,7 @@ class AsyncRawSpeechToTextClient:
             method="POST",
             data={
                 "prompt": prompt,
-                "model": json.dumps(jsonable_encoder(model)),
+                "model": model,
                 "input_audio_codec": input_audio_codec,
             },
             files={
@@ -548,9 +573,9 @@ class AsyncRawSpeechToTextClient:
                 raise BadRequestError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -559,9 +584,9 @@ class AsyncRawSpeechToTextClient:
                 raise ForbiddenError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -570,9 +595,9 @@ class AsyncRawSpeechToTextClient:
                 raise UnprocessableEntityError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -581,9 +606,9 @@ class AsyncRawSpeechToTextClient:
                 raise TooManyRequestsError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -592,9 +617,9 @@ class AsyncRawSpeechToTextClient:
                 raise InternalServerError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -603,9 +628,9 @@ class AsyncRawSpeechToTextClient:
                 raise ServiceUnavailableError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        typing.Any,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=typing.Any,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),

sarvamai/speech_to_text_job/client.py CHANGED Viewed

@@ -12,6 +12,7 @@ from ..types.files_upload_response import FilesUploadResponse
 from ..types.job_status_v_1_response import JobStatusV1Response
 from ..types.speech_to_text_model import SpeechToTextModel
 from ..types.speech_to_text_language import SpeechToTextLanguage
+from ..types.mode import Mode
 from .raw_client import AsyncRawSpeechToTextJobClient, RawSpeechToTextJobClient
 from .job import AsyncSpeechToTextJob, SpeechToTextJob
@@ -72,7 +73,9 @@ class SpeechToTextJobClient:
         )
         """
         _response = self._raw_client.initialise(
-            job_parameters=job_parameters, callback=callback, request_options=request_options
+            job_parameters=job_parameters,
+            callback=callback,
+            request_options=request_options,
         )
         return _response.data
@@ -145,11 +148,17 @@ class SpeechToTextJobClient:
             job_id="job_id",
         )
         """
-        _response = self._raw_client.start(job_id, ptu_id=ptu_id, request_options=request_options)
+        _response = self._raw_client.start(
+            job_id, ptu_id=ptu_id, request_options=request_options
+        )
         return _response.data
     def get_upload_links(
-        self, *, job_id: str, files: typing.Sequence[str], request_options: typing.Optional[RequestOptions] = None
+        self,
+        *,
+        job_id: str,
+        files: typing.Sequence[str],
+        request_options: typing.Optional[RequestOptions] = None,
     ) -> FilesUploadResponse:
         """
         Start a speech to text bulk job V1
@@ -180,11 +189,17 @@ class SpeechToTextJobClient:
             files=["files"],
         )
         """
-        _response = self._raw_client.get_upload_links(job_id=job_id, files=files, request_options=request_options)
+        _response = self._raw_client.get_upload_links(
+            job_id=job_id, files=files, request_options=request_options
+        )
         return _response.data
     def get_download_links(
-        self, *, job_id: str, files: typing.Sequence[str], request_options: typing.Optional[RequestOptions] = None
+        self,
+        *,
+        job_id: str,
+        files: typing.Sequence[str],
+        request_options: typing.Optional[RequestOptions] = None,
     ) -> FilesDownloadResponse:
         """
         Start a speech to text bulk job V1
@@ -215,12 +230,15 @@ class SpeechToTextJobClient:
             files=["files"],
         )
         """
-        _response = self._raw_client.get_download_links(job_id=job_id, files=files, request_options=request_options)
+        _response = self._raw_client.get_download_links(
+            job_id=job_id, files=files, request_options=request_options
+        )
         return _response.data
     def create_job(
         self,
         model: SpeechToTextModel = "saarika:v2.5",
+        mode: typing.Optional[Mode] = None,
         with_diarization: bool = False,
         with_timestamps: bool = False,
         language_code: typing.Optional[SpeechToTextLanguage] = None,
@@ -236,6 +254,10 @@ class SpeechToTextJobClient:
         model : SpeechToTextModel, default="saarika:v2.5"
             The model to use for transcription.
+        mode : typing.Optional[Mode], default=None
+            Mode of operation. Only applicable for saaras:v3 model.
+            Options: transcribe, translate, indic-en, verbatim, translit, codemix
         with_diarization : typing.Optional[bool], default=False
             Whether to enable speaker diarization (distinguishing who said what).
@@ -244,7 +266,7 @@ class SpeechToTextJobClient:
         language_code : typing.Optional[SpeechToTextLanguage], default=None
             The language code of the input audio (e.g., "hi-IN", "bn-IN").
         num_speakers : typing.Optional[int], default=None
             The number of distinct speakers in the audio, if known.
@@ -263,6 +285,7 @@ class SpeechToTextJobClient:
             job_parameters=SpeechToTextJobParametersParams(
                 language_code=language_code,
                 model=model,
+                mode=mode,  # type: ignore[typeddict-item]
                 num_speakers=num_speakers,  # type: ignore[typeddict-item]
                 with_diarization=with_diarization,
                 with_timestamps=with_timestamps,
@@ -350,7 +373,9 @@ class AsyncSpeechToTextJobClient:
         asyncio.run(main())
         """
         _response = await self._raw_client.initialise(
-            job_parameters=job_parameters, callback=callback, request_options=request_options
+            job_parameters=job_parameters,
+            callback=callback,
+            request_options=request_options,
         )
         return _response.data
@@ -392,7 +417,9 @@ class AsyncSpeechToTextJobClient:
         asyncio.run(main())
         """
-        _response = await self._raw_client.get_status(job_id, request_options=request_options)
+        _response = await self._raw_client.get_status(
+            job_id, request_options=request_options
+        )
         return _response.data
     async def start(
@@ -439,11 +466,17 @@ class AsyncSpeechToTextJobClient:
         asyncio.run(main())
         """
-        _response = await self._raw_client.start(job_id, ptu_id=ptu_id, request_options=request_options)
+        _response = await self._raw_client.start(
+            job_id, ptu_id=ptu_id, request_options=request_options
+        )
         return _response.data
     async def get_upload_links(
-        self, *, job_id: str, files: typing.Sequence[str], request_options: typing.Optional[RequestOptions] = None
+        self,
+        *,
+        job_id: str,
+        files: typing.Sequence[str],
+        request_options: typing.Optional[RequestOptions] = None,
     ) -> FilesUploadResponse:
         """
         Start a speech to text bulk job V1
@@ -482,11 +515,17 @@ class AsyncSpeechToTextJobClient:
         asyncio.run(main())
         """
-        _response = await self._raw_client.get_upload_links(job_id=job_id, files=files, request_options=request_options)
+        _response = await self._raw_client.get_upload_links(
+            job_id=job_id, files=files, request_options=request_options
+        )
         return _response.data
     async def get_download_links(
-        self, *, job_id: str, files: typing.Sequence[str], request_options: typing.Optional[RequestOptions] = None
+        self,
+        *,
+        job_id: str,
+        files: typing.Sequence[str],
+        request_options: typing.Optional[RequestOptions] = None,
     ) -> FilesDownloadResponse:
         """
         Start a speech to text bulk job V1
@@ -533,6 +572,7 @@ class AsyncSpeechToTextJobClient:
     async def create_job(
         self,
         model: SpeechToTextModel = "saarika:v2.5",
+        mode: typing.Optional[Mode] = None,
         with_diarization: bool = False,
         with_timestamps: bool = False,
         language_code: typing.Optional[SpeechToTextLanguage] = None,
@@ -548,6 +588,10 @@ class AsyncSpeechToTextJobClient:
         model : SpeechToTextModel, default="saarika:v2.5"
             The model to use for transcription.
+        mode : typing.Optional[Mode], default=None
+            Mode of operation. Only applicable for saaras:v3 model.
+            Options: transcribe, translate, indic-en, verbatim, translit, codemix
         with_diarization : typing.Optional[bool], default=False
             Whether to enable speaker diarization (distinguishing who said what).
@@ -556,8 +600,8 @@ class AsyncSpeechToTextJobClient:
         language_code : typing.Optional[SpeechToTextLanguage], default=None
             The language code of the input audio (e.g., "hi-IN", "bn-IN").
-        num_speakers : typing.Optional[int], default=None
+        num_speakers : typing.Optional[int] = None
             The number of distinct speakers in the audio, if known.
         callback : typing.Optional[BulkJobCallbackParams], default=OMIT
@@ -575,6 +619,7 @@ class AsyncSpeechToTextJobClient:
             job_parameters=SpeechToTextJobParametersParams(
                 language_code=language_code,
                 model=model,
+                mode=mode,  # type: ignore[typeddict-item]
                 with_diarization=with_diarization,
                 with_timestamps=with_timestamps,
                 num_speakers=num_speakers,  # type: ignore[typeddict-item]

sarvamai 0.1.23a2__py3-none-any.whl → 0.1.23a4__py3-none-any.whl

sarvamai 0.1.23a2py3-none-any.whl → 0.1.23a4py3-none-any.whl