PyPI - sarvamai - Versions diffs - 0.1.22a3__py3-none-any.whl → 0.1.22a7__py3-none-any.whl - Mend

sarvamai 0.1.22a3py3-none-any.whl → 0.1.22a7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

sarvamai/__init__.py +62 -9
sarvamai/client.py +3 -0
sarvamai/core/client_wrapper.py +2 -2
sarvamai/doc_digitization_job/__init__.py +4 -0
sarvamai/doc_digitization_job/client.py +776 -0
sarvamai/doc_digitization_job/job.py +496 -0
sarvamai/doc_digitization_job/raw_client.py +1176 -0
sarvamai/requests/__init__.py +20 -0
sarvamai/requests/audio_data.py +0 -6
sarvamai/requests/configure_connection.py +4 -0
sarvamai/requests/configure_connection_data.py +40 -11
sarvamai/requests/doc_digitization_create_job_response.py +25 -0
sarvamai/requests/doc_digitization_download_files_response.py +37 -0
sarvamai/requests/doc_digitization_error_details.py +21 -0
sarvamai/requests/doc_digitization_error_message.py +11 -0
sarvamai/requests/doc_digitization_job_detail.py +64 -0
sarvamai/requests/doc_digitization_job_parameters.py +21 -0
sarvamai/requests/doc_digitization_job_status_response.py +65 -0
sarvamai/requests/doc_digitization_page_error.py +24 -0
sarvamai/requests/doc_digitization_upload_files_response.py +34 -0
sarvamai/requests/doc_digitization_webhook_callback.py +19 -0
sarvamai/requests/speech_to_text_job_parameters.py +43 -2
sarvamai/requests/speech_to_text_transcription_data.py +0 -6
sarvamai/requests/speech_to_text_translate_job_parameters.py +4 -1
sarvamai/requests/speech_to_text_translate_transcription_data.py +0 -6
sarvamai/speech_to_text/client.py +95 -10
sarvamai/speech_to_text/raw_client.py +95 -10
sarvamai/speech_to_text_job/client.py +60 -15
sarvamai/speech_to_text_job/job.py +100 -2
sarvamai/speech_to_text_job/raw_client.py +14 -10
sarvamai/speech_to_text_streaming/__init__.py +4 -2
sarvamai/speech_to_text_streaming/client.py +100 -47
sarvamai/speech_to_text_streaming/raw_client.py +100 -47
sarvamai/speech_to_text_streaming/types/__init__.py +4 -2
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_input_audio_codec.py +1 -27
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +7 -0
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
sarvamai/speech_to_text_translate_job/job.py +100 -2
sarvamai/speech_to_text_translate_job/raw_client.py +14 -10
sarvamai/speech_to_text_translate_streaming/__init__.py +0 -2
sarvamai/speech_to_text_translate_streaming/client.py +18 -41
sarvamai/speech_to_text_translate_streaming/raw_client.py +18 -41
sarvamai/speech_to_text_translate_streaming/types/__init__.py +0 -4
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_input_audio_codec.py +1 -27
sarvamai/text/client.py +0 -12
sarvamai/text/raw_client.py +0 -12
sarvamai/text_to_speech/client.py +116 -14
sarvamai/text_to_speech/raw_client.py +116 -14
sarvamai/text_to_speech_streaming/__init__.py +2 -2
sarvamai/text_to_speech_streaming/client.py +19 -6
sarvamai/text_to_speech_streaming/raw_client.py +19 -6
sarvamai/text_to_speech_streaming/types/__init__.py +2 -1
sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +5 -0
sarvamai/types/__init__.py +34 -4
sarvamai/types/audio_data.py +0 -6
sarvamai/types/completion_event_flag.py +3 -1
sarvamai/types/configure_connection.py +4 -0
sarvamai/types/configure_connection_data.py +40 -11
sarvamai/types/configure_connection_data_model.py +5 -0
sarvamai/types/configure_connection_data_speaker.py +35 -1
sarvamai/types/doc_digitization_create_job_response.py +37 -0
sarvamai/types/doc_digitization_download_files_response.py +47 -0
sarvamai/types/doc_digitization_error_code.py +15 -0
sarvamai/types/doc_digitization_error_details.py +33 -0
sarvamai/types/doc_digitization_error_message.py +23 -0
sarvamai/types/doc_digitization_job_detail.py +74 -0
sarvamai/types/doc_digitization_job_detail_state.py +7 -0
sarvamai/types/doc_digitization_job_parameters.py +33 -0
sarvamai/types/doc_digitization_job_state.py +7 -0
sarvamai/types/doc_digitization_job_status_response.py +75 -0
sarvamai/types/doc_digitization_output_format.py +5 -0
sarvamai/types/doc_digitization_page_error.py +36 -0
sarvamai/types/doc_digitization_supported_language.py +32 -0
sarvamai/types/doc_digitization_upload_files_response.py +44 -0
sarvamai/types/doc_digitization_webhook_callback.py +31 -0
sarvamai/types/mode.py +5 -0
sarvamai/types/speech_to_text_job_parameters.py +43 -2
sarvamai/types/speech_to_text_model.py +1 -1
sarvamai/types/speech_to_text_transcription_data.py +0 -6
sarvamai/types/speech_to_text_translate_job_parameters.py +4 -1
sarvamai/types/speech_to_text_translate_transcription_data.py +0 -6
sarvamai/types/text_to_speech_model.py +1 -1
sarvamai/types/text_to_speech_speaker.py +35 -1
{sarvamai-0.1.22a3.dist-info → sarvamai-0.1.22a7.dist-info}/METADATA +1 -1
{sarvamai-0.1.22a3.dist-info → sarvamai-0.1.22a7.dist-info}/RECORD +86 -56
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_stream_ongoing_speech_results.py +0 -5
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_stream_ongoing_speech_results.py +0 -5
sarvamai/types/audio_data_input_audio_codec.py +0 -33
sarvamai/types/response_speech_state.py +0 -7
{sarvamai-0.1.22a3.dist-info → sarvamai-0.1.22a7.dist-info}/WHEEL +0 -0

sarvamai/speech_to_text_translate_job/job.py CHANGED Viewed

@@ -150,9 +150,58 @@ class AsyncSpeechToTextTranslateJob:
                 "output_file": detail.outputs[0].file_name,
             }
             for detail in (job_status.job_details or [])
-            if detail.inputs and detail.outputs
+            if detail.inputs and detail.outputs and detail.state == "Success"
         ]
+    async def get_file_results(
+        self,
+    ) -> typing.Dict[str, typing.List[typing.Dict[str, typing.Any]]]:
+        """
+        Get detailed results for each file in the batch job.
+        Returns
+        -------
+        Dict[str, List[Dict[str, Any]]]
+            Dictionary with 'successful' and 'failed' keys, each containing a list of file details.
+            Each file detail includes:
+            - 'file_name': Name of the input file
+            - 'status': Status of processing ('Success' or 'Failed')
+            - 'error_message': Error message if failed (None if successful)
+            - 'output_file': Name of output file if successful (None if failed)
+        """
+        job_status = await self.get_status()
+        results: typing.Dict[str, typing.List[typing.Dict[str, typing.Any]]] = {
+            "successful": [],
+            "failed": [],
+        }
+        for detail in job_status.job_details or []:
+            # Check for empty lists explicitly
+            if not detail.inputs or len(detail.inputs) == 0:
+                continue
+            try:
+                file_info = {
+                    "file_name": detail.inputs[0].file_name,
+                    "status": detail.state,
+                    "error_message": detail.error_message,
+                    "output_file": (
+                        detail.outputs[0].file_name
+                        if detail.outputs and len(detail.outputs) > 0
+                        else None
+                    ),
+                }
+                if detail.state == "Success":
+                    results["successful"].append(file_info)
+                else:
+                    results["failed"].append(file_info)
+            except (IndexError, AttributeError):
+                # Skip malformed job details
+                continue
+        return results
     async def download_outputs(self, output_dir: str) -> bool:
         """
         Download output files to the specified directory.
@@ -395,9 +444,58 @@ class SpeechToTextTranslateJob:
                 "output_file": detail.outputs[0].file_name,
             }
             for detail in (job_status.job_details or [])
-            if detail.inputs and detail.outputs
+            if detail.inputs and detail.outputs and detail.state == "Success"
         ]
+    def get_file_results(
+        self,
+    ) -> typing.Dict[str, typing.List[typing.Dict[str, typing.Any]]]:
+        """
+        Get detailed results for each file in the batch job.
+        Returns
+        -------
+        Dict[str, List[Dict[str, Any]]]
+            Dictionary with 'successful' and 'failed' keys, each containing a list of file details.
+            Each file detail includes:
+            - 'file_name': Name of the input file
+            - 'status': Status of processing ('Success' or 'Failed')
+            - 'error_message': Error message if failed (None if successful)
+            - 'output_file': Name of output file if successful (None if failed)
+        """
+        job_status = self.get_status()
+        results: typing.Dict[str, typing.List[typing.Dict[str, typing.Any]]] = {
+            "successful": [],
+            "failed": [],
+        }
+        for detail in job_status.job_details or []:
+            # Check for empty lists explicitly
+            if not detail.inputs or len(detail.inputs) == 0:
+                continue
+            try:
+                file_info = {
+                    "file_name": detail.inputs[0].file_name,
+                    "status": detail.state,
+                    "error_message": detail.error_message,
+                    "output_file": (
+                        detail.outputs[0].file_name
+                        if detail.outputs and len(detail.outputs) > 0
+                        else None
+                    ),
+                }
+                if detail.state == "Success":
+                    results["successful"].append(file_info)
+                else:
+                    results["failed"].append(file_info)
+            except (IndexError, AttributeError):
+                # Skip malformed job details
+                continue
+        return results
     def download_outputs(self, output_dir: str) -> bool:
         """
         Download output files to the specified directory.

sarvamai/speech_to_text_translate_job/raw_client.py CHANGED Viewed

@@ -40,7 +40,7 @@ class RawSpeechToTextTranslateJobClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> HttpResponse[BulkJobInitResponseV1]:
         """
-        Get a job uuid, and storage folder details for speech to text tranlsate bulk job v1
+        Create a new speech to text translate bulk job and receive a job UUID and storage folder details for processing multiple audio files with translation
         Parameters
         ----------
@@ -166,7 +166,9 @@ class RawSpeechToTextTranslateJobClient:
         self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
     ) -> HttpResponse[JobStatusV1Response]:
         """
-        Get the status of a speech to text translate bulk job V1
+        Retrieve the current status and details of a speech to text translate bulk job, including progress and file-level information.
+        **Rate Limiting Best Practice:** To prevent rate limit errors and ensure optimal server performance, we recommend implementing a minimum 5-millisecond delay between consecutive status polling requests. This helps maintain system stability while still providing timely status updates.
         Parameters
         ----------
@@ -276,7 +278,7 @@ class RawSpeechToTextTranslateJobClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> HttpResponse[JobStatusV1Response]:
         """
-        Start a speech to text translate bulk job V1
+        Start processing a speech to text translate bulk job after all audio files have been uploaded
         Parameters
         ----------
@@ -392,7 +394,7 @@ class RawSpeechToTextTranslateJobClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> HttpResponse[FilesUploadResponse]:
         """
-        Start a speech to text bulk job V1
+        Generate presigned upload URLs for audio files that will be processed in a speech to text translate bulk job
         Parameters
         ----------
@@ -517,7 +519,7 @@ class RawSpeechToTextTranslateJobClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> HttpResponse[FilesDownloadResponse]:
         """
-        Start a speech to text bulk job V1
+        Generate presigned download URLs for the translated transcription output files of a completed speech to text translate bulk job
         Parameters
         ----------
@@ -647,7 +649,7 @@ class AsyncRawSpeechToTextTranslateJobClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> AsyncHttpResponse[BulkJobInitResponseV1]:
         """
-        Get a job uuid, and storage folder details for speech to text tranlsate bulk job v1
+        Create a new speech to text translate bulk job and receive a job UUID and storage folder details for processing multiple audio files with translation
         Parameters
         ----------
@@ -773,7 +775,9 @@ class AsyncRawSpeechToTextTranslateJobClient:
         self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
     ) -> AsyncHttpResponse[JobStatusV1Response]:
         """
-        Get the status of a speech to text translate bulk job V1
+        Retrieve the current status and details of a speech to text translate bulk job, including progress and file-level information.
+        **Rate Limiting Best Practice:** To prevent rate limit errors and ensure optimal server performance, we recommend implementing a minimum 5-millisecond delay between consecutive status polling requests. This helps maintain system stability while still providing timely status updates.
         Parameters
         ----------
@@ -883,7 +887,7 @@ class AsyncRawSpeechToTextTranslateJobClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> AsyncHttpResponse[JobStatusV1Response]:
         """
-        Start a speech to text translate bulk job V1
+        Start processing a speech to text translate bulk job after all audio files have been uploaded
         Parameters
         ----------
@@ -999,7 +1003,7 @@ class AsyncRawSpeechToTextTranslateJobClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> AsyncHttpResponse[FilesUploadResponse]:
         """
-        Start a speech to text bulk job V1
+        Generate presigned upload URLs for audio files that will be processed in a speech to text translate bulk job
         Parameters
         ----------
@@ -1124,7 +1128,7 @@ class AsyncRawSpeechToTextTranslateJobClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> AsyncHttpResponse[FilesDownloadResponse]:
         """
-        Start a speech to text bulk job V1
+        Generate presigned download URLs for the translated transcription output files of a completed speech to text translate bulk job
         Parameters
         ----------

sarvamai/speech_to_text_translate_streaming/__init__.py CHANGED Viewed

@@ -6,7 +6,6 @@ from .types import (
     SpeechToTextTranslateStreamingFlushSignal,
     SpeechToTextTranslateStreamingHighVadSensitivity,
     SpeechToTextTranslateStreamingInputAudioCodec,
-    SpeechToTextTranslateStreamingStreamOngoingSpeechResults,
     SpeechToTextTranslateStreamingVadSignals,
 )
@@ -14,6 +13,5 @@ __all__ = [
     "SpeechToTextTranslateStreamingFlushSignal",
     "SpeechToTextTranslateStreamingHighVadSensitivity",
     "SpeechToTextTranslateStreamingInputAudioCodec",
-    "SpeechToTextTranslateStreamingStreamOngoingSpeechResults",
     "SpeechToTextTranslateStreamingVadSignals",
 ]

sarvamai/speech_to_text_translate_streaming/client.py CHANGED Viewed

@@ -16,9 +16,6 @@ from .types.speech_to_text_translate_streaming_high_vad_sensitivity import (
     SpeechToTextTranslateStreamingHighVadSensitivity,
 )
 from .types.speech_to_text_translate_streaming_input_audio_codec import SpeechToTextTranslateStreamingInputAudioCodec
-from .types.speech_to_text_translate_streaming_stream_ongoing_speech_results import (
-    SpeechToTextTranslateStreamingStreamOngoingSpeechResults,
-)
 from .types.speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
 try:
@@ -47,13 +44,11 @@ class SpeechToTextTranslateStreamingClient:
         self,
         *,
         model: typing.Optional[typing.Literal["saaras:v2.5"]] = None,
-        input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
         sample_rate: typing.Optional[str] = None,
         high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
         vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
         flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
-        stream_ongoing_speech_results: typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults] = None,
-        streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
+        input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
         api_subscription_key: typing.Optional[str] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
@@ -67,10 +62,10 @@ class SpeechToTextTranslateStreamingClient:
         Parameters
         ----------
         model : typing.Optional[typing.Literal["saaras:v2.5"]]
-            Speech to text model to use (defaults to "saaras:v2.5" if not specified)
+            Model to be used for speech to text translation.
-        input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
-            Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
+            - **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
+              - Example: Hindi audio → English text output
         sample_rate : typing.Optional[str]
             Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
@@ -84,11 +79,9 @@ class SpeechToTextTranslateStreamingClient:
         flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
             Signal to flush the audio buffer and finalize transcription and translation
-        stream_ongoing_speech_results : typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults]
-            Enable streaming of ongoing speech results during active speech
-        streaming_ongoing_requests_frame_size : typing.Optional[str]
-            Frame size for streaming ongoing speech results (1-100)
+        input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
+            Audio codec/format of the input stream. Use this when sending raw PCM audio.
+            Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
         api_subscription_key : typing.Optional[str]
             API subscription key for authentication
@@ -104,8 +97,6 @@ class SpeechToTextTranslateStreamingClient:
         query_params = httpx.QueryParams()
         if model is not None:
             query_params = query_params.add("model", model)
-        if input_audio_codec is not None:
-            query_params = query_params.add("input_audio_codec", input_audio_codec)
         if sample_rate is not None:
             query_params = query_params.add("sample_rate", sample_rate)
         if high_vad_sensitivity is not None:
@@ -114,12 +105,8 @@ class SpeechToTextTranslateStreamingClient:
             query_params = query_params.add("vad_signals", vad_signals)
         if flush_signal is not None:
             query_params = query_params.add("flush_signal", flush_signal)
-        if stream_ongoing_speech_results is not None:
-            query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
-        if streaming_ongoing_requests_frame_size is not None:
-            query_params = query_params.add(
-                "streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
-            )
+        if input_audio_codec is not None:
+            query_params = query_params.add("input_audio_codec", input_audio_codec)
         ws_url = ws_url + f"?{query_params}"
         headers = self._raw_client._client_wrapper.get_headers()
         if api_subscription_key is not None:
@@ -164,13 +151,11 @@ class AsyncSpeechToTextTranslateStreamingClient:
         self,
         *,
         model: typing.Optional[typing.Literal["saaras:v2.5"]] = None,
-        input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
         sample_rate: typing.Optional[str] = None,
         high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
         vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
         flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
-        stream_ongoing_speech_results: typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults] = None,
-        streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
+        input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
         api_subscription_key: typing.Optional[str] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
@@ -184,10 +169,10 @@ class AsyncSpeechToTextTranslateStreamingClient:
         Parameters
         ----------
         model : typing.Optional[typing.Literal["saaras:v2.5"]]
-            Speech to text model to use (defaults to "saaras:v2.5" if not specified)
+            Model to be used for speech to text translation.
-        input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
-            Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
+            - **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
+              - Example: Hindi audio → English text output
         sample_rate : typing.Optional[str]
             Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
@@ -201,11 +186,9 @@ class AsyncSpeechToTextTranslateStreamingClient:
         flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
             Signal to flush the audio buffer and finalize transcription and translation
-        stream_ongoing_speech_results : typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults]
-            Enable streaming of ongoing speech results during active speech
-        streaming_ongoing_requests_frame_size : typing.Optional[str]
-            Frame size for streaming ongoing speech results (1-100)
+        input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
+            Audio codec/format of the input stream. Use this when sending raw PCM audio.
+            Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
         api_subscription_key : typing.Optional[str]
             API subscription key for authentication
@@ -221,8 +204,6 @@ class AsyncSpeechToTextTranslateStreamingClient:
         query_params = httpx.QueryParams()
         if model is not None:
             query_params = query_params.add("model", model)
-        if input_audio_codec is not None:
-            query_params = query_params.add("input_audio_codec", input_audio_codec)
         if sample_rate is not None:
             query_params = query_params.add("sample_rate", sample_rate)
         if high_vad_sensitivity is not None:
@@ -231,12 +212,8 @@ class AsyncSpeechToTextTranslateStreamingClient:
             query_params = query_params.add("vad_signals", vad_signals)
         if flush_signal is not None:
             query_params = query_params.add("flush_signal", flush_signal)
-        if stream_ongoing_speech_results is not None:
-            query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
-        if streaming_ongoing_requests_frame_size is not None:
-            query_params = query_params.add(
-                "streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
-            )
+        if input_audio_codec is not None:
+            query_params = query_params.add("input_audio_codec", input_audio_codec)
         ws_url = ws_url + f"?{query_params}"
         headers = self._raw_client._client_wrapper.get_headers()
         if api_subscription_key is not None:

sarvamai/speech_to_text_translate_streaming/raw_client.py CHANGED Viewed

@@ -15,9 +15,6 @@ from .types.speech_to_text_translate_streaming_high_vad_sensitivity import (
     SpeechToTextTranslateStreamingHighVadSensitivity,
 )
 from .types.speech_to_text_translate_streaming_input_audio_codec import SpeechToTextTranslateStreamingInputAudioCodec
-from .types.speech_to_text_translate_streaming_stream_ongoing_speech_results import (
-    SpeechToTextTranslateStreamingStreamOngoingSpeechResults,
-)
 from .types.speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
 try:
@@ -35,13 +32,11 @@ class RawSpeechToTextTranslateStreamingClient:
         self,
         *,
         model: typing.Optional[typing.Literal["saaras:v2.5"]] = None,
-        input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
         sample_rate: typing.Optional[str] = None,
         high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
         vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
         flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
-        stream_ongoing_speech_results: typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults] = None,
-        streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
+        input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
         api_subscription_key: typing.Optional[str] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
@@ -55,10 +50,10 @@ class RawSpeechToTextTranslateStreamingClient:
         Parameters
         ----------
         model : typing.Optional[typing.Literal["saaras:v2.5"]]
-            Speech to text model to use (defaults to "saaras:v2.5" if not specified)
+            Model to be used for speech to text translation.
-        input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
-            Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
+            - **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
+              - Example: Hindi audio → English text output
         sample_rate : typing.Optional[str]
             Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
@@ -72,11 +67,9 @@ class RawSpeechToTextTranslateStreamingClient:
         flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
             Signal to flush the audio buffer and finalize transcription and translation
-        stream_ongoing_speech_results : typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults]
-            Enable streaming of ongoing speech results during active speech
-        streaming_ongoing_requests_frame_size : typing.Optional[str]
-            Frame size for streaming ongoing speech results (1-100)
+        input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
+            Audio codec/format of the input stream. Use this when sending raw PCM audio.
+            Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
         api_subscription_key : typing.Optional[str]
             API subscription key for authentication
@@ -92,8 +85,6 @@ class RawSpeechToTextTranslateStreamingClient:
         query_params = httpx.QueryParams()
         if model is not None:
             query_params = query_params.add("model", model)
-        if input_audio_codec is not None:
-            query_params = query_params.add("input_audio_codec", input_audio_codec)
         if sample_rate is not None:
             query_params = query_params.add("sample_rate", sample_rate)
         if high_vad_sensitivity is not None:
@@ -102,12 +93,8 @@ class RawSpeechToTextTranslateStreamingClient:
             query_params = query_params.add("vad_signals", vad_signals)
         if flush_signal is not None:
             query_params = query_params.add("flush_signal", flush_signal)
-        if stream_ongoing_speech_results is not None:
-            query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
-        if streaming_ongoing_requests_frame_size is not None:
-            query_params = query_params.add(
-                "streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
-            )
+        if input_audio_codec is not None:
+            query_params = query_params.add("input_audio_codec", input_audio_codec)
         ws_url = ws_url + f"?{query_params}"
         headers = self._client_wrapper.get_headers()
         if api_subscription_key is not None:
@@ -141,13 +128,11 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
         self,
         *,
         model: typing.Optional[typing.Literal["saaras:v2.5"]] = None,
-        input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
         sample_rate: typing.Optional[str] = None,
         high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
         vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
         flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
-        stream_ongoing_speech_results: typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults] = None,
-        streaming_ongoing_requests_frame_size: typing.Optional[str] = None,
+        input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
         api_subscription_key: typing.Optional[str] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
@@ -161,10 +146,10 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
         Parameters
         ----------
         model : typing.Optional[typing.Literal["saaras:v2.5"]]
-            Speech to text model to use (defaults to "saaras:v2.5" if not specified)
+            Model to be used for speech to text translation.
-        input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
-            Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
+            - **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
+              - Example: Hindi audio → English text output
         sample_rate : typing.Optional[str]
             Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
@@ -178,11 +163,9 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
         flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
             Signal to flush the audio buffer and finalize transcription and translation
-        stream_ongoing_speech_results : typing.Optional[SpeechToTextTranslateStreamingStreamOngoingSpeechResults]
-            Enable streaming of ongoing speech results during active speech
-        streaming_ongoing_requests_frame_size : typing.Optional[str]
-            Frame size for streaming ongoing speech results (1-100)
+        input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
+            Audio codec/format of the input stream. Use this when sending raw PCM audio.
+            Supported values: wav, pcm_s16le, pcm_l16, pcm_raw.
         api_subscription_key : typing.Optional[str]
             API subscription key for authentication
@@ -198,8 +181,6 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
         query_params = httpx.QueryParams()
         if model is not None:
             query_params = query_params.add("model", model)
-        if input_audio_codec is not None:
-            query_params = query_params.add("input_audio_codec", input_audio_codec)
         if sample_rate is not None:
             query_params = query_params.add("sample_rate", sample_rate)
         if high_vad_sensitivity is not None:
@@ -208,12 +189,8 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
             query_params = query_params.add("vad_signals", vad_signals)
         if flush_signal is not None:
             query_params = query_params.add("flush_signal", flush_signal)
-        if stream_ongoing_speech_results is not None:
-            query_params = query_params.add("stream_ongoing_speech_results", stream_ongoing_speech_results)
-        if streaming_ongoing_requests_frame_size is not None:
-            query_params = query_params.add(
-                "streaming_ongoing_requests_frame_size", streaming_ongoing_requests_frame_size
-            )
+        if input_audio_codec is not None:
+            query_params = query_params.add("input_audio_codec", input_audio_codec)
         ws_url = ws_url + f"?{query_params}"
         headers = self._client_wrapper.get_headers()
         if api_subscription_key is not None:

sarvamai/speech_to_text_translate_streaming/types/__init__.py CHANGED Viewed

@@ -5,15 +5,11 @@
 from .speech_to_text_translate_streaming_flush_signal import SpeechToTextTranslateStreamingFlushSignal
 from .speech_to_text_translate_streaming_high_vad_sensitivity import SpeechToTextTranslateStreamingHighVadSensitivity
 from .speech_to_text_translate_streaming_input_audio_codec import SpeechToTextTranslateStreamingInputAudioCodec
-from .speech_to_text_translate_streaming_stream_ongoing_speech_results import (
-    SpeechToTextTranslateStreamingStreamOngoingSpeechResults,
-)
 from .speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
 __all__ = [
     "SpeechToTextTranslateStreamingFlushSignal",
     "SpeechToTextTranslateStreamingHighVadSensitivity",
     "SpeechToTextTranslateStreamingInputAudioCodec",
-    "SpeechToTextTranslateStreamingStreamOngoingSpeechResults",
     "SpeechToTextTranslateStreamingVadSignals",
 ]

sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_input_audio_codec.py CHANGED Viewed

@@ -3,31 +3,5 @@
 import typing
 SpeechToTextTranslateStreamingInputAudioCodec = typing.Union[
-    typing.Literal[
-        "wav",
-        "x-wav",
-        "wave",
-        "mp3",
-        "mpeg",
-        "mpeg3",
-        "x-mp3",
-        "x-mpeg-3",
-        "aac",
-        "x-aac",
-        "aiff",
-        "x-aiff",
-        "ogg",
-        "opus",
-        "flac",
-        "x-flac",
-        "mp4",
-        "x-m4a",
-        "amr",
-        "x-ms-wma",
-        "webm",
-        "pcm_s16le",
-        "pcm_l16",
-        "pcm_raw",
-    ],
-    typing.Any,
+    typing.Literal["wav", "pcm_s16le", "pcm_l16", "pcm_raw"], typing.Any
 ]

sarvamai/text/client.py CHANGED Viewed

@@ -47,7 +47,6 @@ class TextClient:
         speaker_gender: typing.Optional[TranslateSpeakerGender] = OMIT,
         mode: typing.Optional[TranslateMode] = OMIT,
         model: typing.Optional[TranslateModel] = OMIT,
-        enable_preprocessing: typing.Optional[bool] = OMIT,
         output_script: typing.Optional[TransliterateMode] = OMIT,
         numerals_format: typing.Optional[NumeralsFormat] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
@@ -125,10 +124,6 @@ class TextClient:
             - mayura:v1: Supports 12 languages with all modes, output scripts, and automatic language detection.
             - sarvam-translate:v1: Supports all 22 scheduled languages of India, formal mode only.
-        enable_preprocessing : typing.Optional[bool]
-            This will enable custom preprocessing of the input text which can result in better translations.
-             Recommendation- You can switch on whenever there is some complex text with difficult vocabulary and sentences, for which you want simple translations that people can understand.
         output_script : typing.Optional[TransliterateMode]
             **output_script**: This is an optional parameter which controls the transliteration style applied to the output text.
@@ -186,7 +181,6 @@ class TextClient:
             speaker_gender=speaker_gender,
             mode=mode,
             model=model,
-            enable_preprocessing=enable_preprocessing,
             output_script=output_script,
             numerals_format=numerals_format,
             request_options=request_options,
@@ -371,7 +365,6 @@ class AsyncTextClient:
         speaker_gender: typing.Optional[TranslateSpeakerGender] = OMIT,
         mode: typing.Optional[TranslateMode] = OMIT,
         model: typing.Optional[TranslateModel] = OMIT,
-        enable_preprocessing: typing.Optional[bool] = OMIT,
         output_script: typing.Optional[TransliterateMode] = OMIT,
         numerals_format: typing.Optional[NumeralsFormat] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
@@ -449,10 +442,6 @@ class AsyncTextClient:
             - mayura:v1: Supports 12 languages with all modes, output scripts, and automatic language detection.
             - sarvam-translate:v1: Supports all 22 scheduled languages of India, formal mode only.
-        enable_preprocessing : typing.Optional[bool]
-            This will enable custom preprocessing of the input text which can result in better translations.
-             Recommendation- You can switch on whenever there is some complex text with difficult vocabulary and sentences, for which you want simple translations that people can understand.
         output_script : typing.Optional[TransliterateMode]
             **output_script**: This is an optional parameter which controls the transliteration style applied to the output text.
@@ -518,7 +507,6 @@ class AsyncTextClient:
             speaker_gender=speaker_gender,
             mode=mode,
             model=model,
-            enable_preprocessing=enable_preprocessing,
             output_script=output_script,
             numerals_format=numerals_format,
             request_options=request_options,

sarvamai 0.1.22a3__py3-none-any.whl → 0.1.22a7__py3-none-any.whl

sarvamai 0.1.22a3py3-none-any.whl → 0.1.22a7py3-none-any.whl