PyPI - sarvamai - Versions diffs - 0.1.13a2__py3-none-any.whl → 0.1.15__py3-none-any.whl - Mend

sarvamai 0.1.13a2py3-none-any.whl → 0.1.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

sarvamai/core/client_wrapper.py +2 -2
sarvamai/speech_to_text/client.py +22 -22
sarvamai/speech_to_text/raw_client.py +22 -22
sarvamai/speech_to_text_job/client.py +143 -0
sarvamai/speech_to_text_job/job.py +497 -0
sarvamai/speech_to_text_translate_job/client.py +133 -0
sarvamai/speech_to_text_translate_job/job.py +505 -0
{sarvamai-0.1.13a2.dist-info → sarvamai-0.1.15.dist-info}/METADATA +1 -1
{sarvamai-0.1.13a2.dist-info → sarvamai-0.1.15.dist-info}/RECORD +10 -8
{sarvamai-0.1.13a2.dist-info → sarvamai-0.1.15.dist-info}/WHEEL +0 -0

sarvamai/core/client_wrapper.py CHANGED Viewed

@@ -23,10 +23,10 @@ class BaseClientWrapper:
     def get_headers(self) -> typing.Dict[str, str]:
         headers: typing.Dict[str, str] = {
-            "User-Agent": "sarvamai/0.1.13a2",
+            "User-Agent": "sarvamai/0.1.15",
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "sarvamai",
-            "X-Fern-SDK-Version": "0.1.13a2",
+            "X-Fern-SDK-Version": "0.1.15",
             **(self.get_custom_headers() or {}),
         }
         headers["api-subscription-key"] = self.api_subscription_key

sarvamai/speech_to_text/client.py CHANGED Viewed

@@ -40,19 +40,19 @@ class SpeechToTextClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> SpeechToTextResponse:
         """
-        ## Real-Time Speech to Text API
+        ## Speech to Text API
-        This API transcribes speech to text in multiple Indian languages and English. Supports real-time transcription for interactive applications.
+        This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.
         ### Available Options:
-        - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
-        - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt/stt-batch-api)
+        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
+        - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
           - Supports diarization (speaker identification)
         ### Note:
-        - Pricing differs for Real-Time and Batch APIs
+        - Pricing differs for REST and Batch APIs
         - Diarization is only available in Batch API with separate pricing
-        - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
+        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
         Parameters
         ----------
@@ -99,19 +99,19 @@ class SpeechToTextClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> SpeechToTextTranslateResponse:
         """
-        ## Real-Time Speech to Text Translation API
+        ## Speech to Text Translation API
         This API automatically detects the input language, transcribes the speech, and translates the text to English.
         ### Available Options:
-        - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
-        - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt-translate/stt-translate-batch-api)
+        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
+        - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
           - Supports diarization (speaker identification)
         ### Note:
-        - Pricing differs for Real-Time and Batch APIs
+        - Pricing differs for REST and Batch APIs
         - Diarization is only available in Batch API with separate pricing
-        - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
+        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
         Parameters
         ----------
@@ -169,19 +169,19 @@ class AsyncSpeechToTextClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> SpeechToTextResponse:
         """
-        ## Real-Time Speech to Text API
+        ## Speech to Text API
-        This API transcribes speech to text in multiple Indian languages and English. Supports real-time transcription for interactive applications.
+        This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.
         ### Available Options:
-        - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
-        - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt/stt-batch-api)
+        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
+        - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
           - Supports diarization (speaker identification)
         ### Note:
-        - Pricing differs for Real-Time and Batch APIs
+        - Pricing differs for REST and Batch APIs
         - Diarization is only available in Batch API with separate pricing
-        - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
+        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
         Parameters
         ----------
@@ -236,19 +236,19 @@ class AsyncSpeechToTextClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> SpeechToTextTranslateResponse:
         """
-        ## Real-Time Speech to Text Translation API
+        ## Speech to Text Translation API
         This API automatically detects the input language, transcribes the speech, and translates the text to English.
         ### Available Options:
-        - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
-        - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt-translate/stt-translate-batch-api)
+        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
+        - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
           - Supports diarization (speaker identification)
         ### Note:
-        - Pricing differs for Real-Time and Batch APIs
+        - Pricing differs for REST and Batch APIs
         - Diarization is only available in Batch API with separate pricing
-        - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
+        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
         Parameters
         ----------

sarvamai/speech_to_text/raw_client.py CHANGED Viewed

@@ -38,19 +38,19 @@ class RawSpeechToTextClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> HttpResponse[SpeechToTextResponse]:
         """
-        ## Real-Time Speech to Text API
+        ## Speech to Text API
-        This API transcribes speech to text in multiple Indian languages and English. Supports real-time transcription for interactive applications.
+        This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.
         ### Available Options:
-        - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
-        - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt/stt-batch-api)
+        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
+        - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
           - Supports diarization (speaker identification)
         ### Note:
-        - Pricing differs for Real-Time and Batch APIs
+        - Pricing differs for REST and Batch APIs
         - Diarization is only available in Batch API with separate pricing
-        - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
+        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
         Parameters
         ----------
@@ -179,19 +179,19 @@ class RawSpeechToTextClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> HttpResponse[SpeechToTextTranslateResponse]:
         """
-        ## Real-Time Speech to Text Translation API
+        ## Speech to Text Translation API
         This API automatically detects the input language, transcribes the speech, and translates the text to English.
         ### Available Options:
-        - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
-        - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt-translate/stt-translate-batch-api)
+        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
+        - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
           - Supports diarization (speaker identification)
         ### Note:
-        - Pricing differs for Real-Time and Batch APIs
+        - Pricing differs for REST and Batch APIs
         - Diarization is only available in Batch API with separate pricing
-        - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
+        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
         Parameters
         ----------
@@ -322,19 +322,19 @@ class AsyncRawSpeechToTextClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> AsyncHttpResponse[SpeechToTextResponse]:
         """
-        ## Real-Time Speech to Text API
+        ## Speech to Text API
-        This API transcribes speech to text in multiple Indian languages and English. Supports real-time transcription for interactive applications.
+        This API transcribes speech to text in multiple Indian languages and English. Supports transcription for interactive applications.
         ### Available Options:
-        - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
-        - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt/stt-batch-api)
+        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
+        - **Batch API**: For longer audio files, [Follow This Documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
           - Supports diarization (speaker identification)
         ### Note:
-        - Pricing differs for Real-Time and Batch APIs
+        - Pricing differs for REST and Batch APIs
         - Diarization is only available in Batch API with separate pricing
-        - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
+        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
         Parameters
         ----------
@@ -463,19 +463,19 @@ class AsyncRawSpeechToTextClient:
         request_options: typing.Optional[RequestOptions] = None,
     ) -> AsyncHttpResponse[SpeechToTextTranslateResponse]:
         """
-        ## Real-Time Speech to Text Translation API
+        ## Speech to Text Translation API
         This API automatically detects the input language, transcribes the speech, and translates the text to English.
         ### Available Options:
-        - **Real-Time API** (Current Endpoint): For quick responses under 30 seconds with immediate results
-        - **Batch API**: For longer audio files, requires following a notebook script - [View Notebook](https://github.com/sarvamai/sarvam-ai-cookbook/tree/main/notebooks/stt-translate/stt-translate-batch-api)
+        - **REST API** (Current Endpoint): For quick responses under 30 seconds with immediate results
+        - **Batch API**: For longer audio files [Follow this documentation](https://docs.sarvam.ai/api-reference-docs/api-guides-tutorials/speech-to-text/batch-api)
           - Supports diarization (speaker identification)
         ### Note:
-        - Pricing differs for Real-Time and Batch APIs
+        - Pricing differs for REST and Batch APIs
         - Diarization is only available in Batch API with separate pricing
-        - Please refer to [dashboard.sarvam.ai](https://dashboard.sarvam.ai) for detailed pricing information
+        - Please refer to [here](https://docs.sarvam.ai/api-reference-docs/getting-started/pricing) for detailed pricing information
         Parameters
         ----------

sarvamai/speech_to_text_job/client.py CHANGED Viewed

@@ -10,7 +10,10 @@ from ..types.bulk_job_init_response_v_1 import BulkJobInitResponseV1
 from ..types.files_download_response import FilesDownloadResponse
 from ..types.files_upload_response import FilesUploadResponse
 from ..types.job_status_v_1_response import JobStatusV1Response
+from ..types.speech_to_text_model import SpeechToTextModel
+from ..types.speech_to_text_language import SpeechToTextLanguage
 from .raw_client import AsyncRawSpeechToTextJobClient, RawSpeechToTextJobClient
+from .job import AsyncSpeechToTextJob, SpeechToTextJob
 # this is used as the default value for optional parameters
 OMIT = typing.cast(typing.Any, ...)
@@ -215,6 +218,76 @@ class SpeechToTextJobClient:
         _response = self._raw_client.get_download_links(job_id=job_id, files=files, request_options=request_options)
         return _response.data
+    def create_job(
+        self,
+        model: SpeechToTextModel = "saarika:v2.5",
+        with_diarization: bool = False,
+        with_timestamps: bool = False,
+        language_code: typing.Optional[SpeechToTextLanguage] = None,
+        num_speakers: typing.Optional[int] = None,
+        callback: typing.Optional[BulkJobCallbackParams] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> SpeechToTextJob:
+        """
+        Create a new Speech-to-Text bulk job.
+        Parameters
+        ----------
+        model : SpeechToTextModel, default="saarika:v2.5"
+            The model to use for transcription.
+        with_diarization : typing.Optional[bool], default=False
+            Whether to enable speaker diarization (distinguishing who said what).
+        with_timestamps : typing.Optional[bool], default=False
+            Whether to include word-level timestamps in the transcription output.
+        language_code : typing.Optional[SpeechToTextLanguage], default=None
+            The language code of the input audio (e.g., "hi-IN", "bn-IN").
+        num_speakers : typing.Optional[int], default=None
+            The number of distinct speakers in the audio, if known.
+        callback : typing.Optional[BulkJobCallbackParams], default=OMIT
+            Optional callback configuration to receive job completion events.
+        request_options : typing.Optional[RequestOptions], default=None
+            Request-specific configuration.
+        Returns
+        -------
+        SpeechToTextJob
+            A handle to the newly created Speech-to-Text job.
+        """
+        response = self.initialise(
+            job_parameters=SpeechToTextJobParametersParams(
+                language_code=language_code,
+                model=model,
+                num_speakers=num_speakers,  # type: ignore[typeddict-item]
+                with_diarization=with_diarization,
+                with_timestamps=with_timestamps,
+            ),
+            callback=callback,
+            request_options=request_options,
+        )
+        return SpeechToTextJob(job_id=response.job_id, client=self)
+    def get_job(self, job_id: str) -> SpeechToTextJob:
+        """
+        Get an existing Speech-to-Text job handle by job ID.
+        Parameters
+        ----------
+        job_id : str
+            The job ID of the previously created Speech-to-Text job.
+        Returns
+        -------
+        SpeechToTextJob
+            A job handle which can be used to check status or retrieve results.
+        """
+        return SpeechToTextJob(job_id=job_id, client=self)
 class AsyncSpeechToTextJobClient:
     def __init__(self, *, client_wrapper: AsyncClientWrapper):
@@ -456,3 +529,73 @@ class AsyncSpeechToTextJobClient:
             job_id=job_id, files=files, request_options=request_options
         )
         return _response.data
+    async def create_job(
+        self,
+        model: SpeechToTextModel = "saarika:v2.5",
+        with_diarization: bool = False,
+        with_timestamps: bool = False,
+        language_code: typing.Optional[SpeechToTextLanguage] = None,
+        num_speakers: typing.Optional[int] = None,
+        callback: typing.Optional[BulkJobCallbackParams] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> "AsyncSpeechToTextJob":
+        """
+        Create a new Speech-to-Text bulk job.
+        Parameters
+        ----------
+        model : SpeechToTextModel, default="saarika:v2.5"
+            The model to use for transcription.
+        with_diarization : typing.Optional[bool], default=False
+            Whether to enable speaker diarization (distinguishing who said what).
+        with_timestamps : typing.Optional[bool], default=False
+            Whether to include word-level timestamps in the transcription output.
+        language_code : typing.Optional[SpeechToTextLanguage], default=None
+            The language code of the input audio (e.g., "hi-IN", "bn-IN").
+        num_speakers : typing.Optional[int], default=None
+            The number of distinct speakers in the audio, if known.
+        callback : typing.Optional[BulkJobCallbackParams], default=OMIT
+            Optional callback configuration to receive job completion events.
+        request_options : typing.Optional[RequestOptions], default=None
+            Request-specific configuration.
+        Returns
+        -------
+        AsyncSpeechToTextJob
+            A handle to the newly created job.
+        """
+        response = await self.initialise(
+            job_parameters=SpeechToTextJobParametersParams(
+                language_code=language_code,
+                model=model,
+                with_diarization=with_diarization,
+                with_timestamps=with_timestamps,
+                num_speakers=num_speakers,  # type: ignore[typeddict-item]
+            ),
+            callback=callback,
+            request_options=request_options,
+        )
+        return AsyncSpeechToTextJob(job_id=response.job_id, client=self)
+    async def get_job(self, job_id: str) -> "AsyncSpeechToTextJob":
+        """
+        Get an existing Speech-to-Text job handle by job ID.
+        Parameters
+        ----------
+        job_id : str
+            The job ID of the previously created speech-to-text job.
+        Returns
+        -------
+        AsyncSpeechToTextJob
+            A job handle which can be used to check status or retrieve results.
+        """
+        return AsyncSpeechToTextJob(job_id=job_id, client=self)

sarvamai 0.1.13a2__py3-none-any.whl → 0.1.15__py3-none-any.whl

sarvamai 0.1.13a2py3-none-any.whl → 0.1.15py3-none-any.whl