PyPI - sarvamai - Versions diffs - 0.1.13a2__py3-none-any.whl → 0.1.15__py3-none-any.whl - Mend

sarvamai 0.1.13a2py3-none-any.whl → 0.1.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

sarvamai/core/client_wrapper.py +2 -2
sarvamai/speech_to_text/client.py +22 -22
sarvamai/speech_to_text/raw_client.py +22 -22
sarvamai/speech_to_text_job/client.py +143 -0
sarvamai/speech_to_text_job/job.py +497 -0
sarvamai/speech_to_text_translate_job/client.py +133 -0
sarvamai/speech_to_text_translate_job/job.py +505 -0
{sarvamai-0.1.13a2.dist-info → sarvamai-0.1.15.dist-info}/METADATA +1 -1
{sarvamai-0.1.13a2.dist-info → sarvamai-0.1.15.dist-info}/RECORD +10 -8
{sarvamai-0.1.13a2.dist-info → sarvamai-0.1.15.dist-info}/WHEEL +0 -0

sarvamai/speech_to_text_job/job.py ADDED Viewed

@@ -0,0 +1,497 @@
+import asyncio
+import mimetypes
+import os
+import time
+import typing
+import httpx
+from http import HTTPStatus
+from ..types import JobStatusV1Response
+if typing.TYPE_CHECKING:
+    from .client import AsyncSpeechToTextJobClient, SpeechToTextJobClient
+class AsyncSpeechToTextJob:
+    def __init__(self, job_id: str, client: "AsyncSpeechToTextJobClient"):
+        """
+        Initialize the asynchronous speech-to-text job.
+        Parameters
+        ----------
+        job_id : str
+            The unique job identifier returned from a previous job initialization.
+        client : AsyncSpeechToTextJobClient
+            The async client instance used to create the job.
+            !!! important
+                This must be the **same client instance** that was used to initialize
+                the job originally, as it contains the subscription key and configuration
+                required to authenticate and manage the job.
+        """
+        self._job_id = job_id
+        self._client = client
+    @property
+    def job_id(self) -> str:
+        """
+        Returns the job ID associated with this job instance.
+        Returns
+        -------
+        str
+        """
+        return self._job_id
+    async def upload_files(
+        self, file_paths: typing.Sequence[str], timeout: float = 60.0
+    ) -> bool:
+        """
+        Upload input audio files for the speech-to-text job.
+        Parameters
+        ----------
+        file_paths : Sequence[str]
+            List of full paths to local audio files.
+        timeout : float, optional
+            The maximum time to wait for the upload to complete (in seconds),
+            by default 60.0
+        Returns
+        -------
+        bool
+            True if all files are uploaded successfully.
+        """
+        upload_links = await self._client.get_upload_links(
+            job_id=self._job_id,
+            files=[os.path.basename(p) for p in file_paths],
+        )
+        client_timeout = httpx.Timeout(timeout=timeout)
+        async with httpx.AsyncClient(timeout=client_timeout) as session:
+            for path in file_paths:
+                file_name = os.path.basename(path)
+                url = upload_links.upload_urls[file_name].file_url
+                with open(path, "rb") as f:
+                    content_type, _ = mimetypes.guess_type(path)
+                    if content_type is None:
+                        content_type = "audio/wav"
+                    response = await session.put(
+                        url,
+                        content=f.read(),
+                        headers={
+                            "x-ms-blob-type": "BlockBlob",
+                            "Content-Type": content_type,
+                        },
+                    )
+                if (
+                    response.status_code > HTTPStatus.IM_USED
+                    or response.status_code < HTTPStatus.OK
+                ):
+                    raise RuntimeError(
+                        f"Upload failed for {file_name}: {response.status_code}"
+                    )
+        return True
+    async def wait_until_complete(
+        self, poll_interval: int = 5, timeout: int = 600
+    ) -> JobStatusV1Response:
+        """
+        Polls job status until it completes or fails.
+        Parameters
+        ----------
+        poll_interval : int, optional
+            Time in seconds between polling attempts (default is 5).
+        timeout : int, optional
+            Maximum time to wait for completion in seconds (default is 600).
+        Returns
+        -------
+        JobStatusV1Response
+            Final job status.
+        Raises
+        ------
+        TimeoutError
+            If the job does not complete within the given timeout.
+        """
+        start = asyncio.get_event_loop().time()
+        while True:
+            status = await self.get_status()
+            state = status.job_state.lower()
+            if state in {"completed", "failed"}:
+                return status
+            if asyncio.get_event_loop().time() - start > timeout:
+                raise TimeoutError(
+                    f"Job {self._job_id} did not complete within {timeout} seconds."
+                )
+            await asyncio.sleep(poll_interval)
+    async def get_output_mappings(self) -> typing.List[typing.Dict[str, str]]:
+        """
+        Get the mapping of input files to their corresponding output files.
+        Returns
+        -------
+        List[Dict[str, str]]
+            List of mappings with keys 'input_file' and 'output_file'.
+        """
+        job_status = await self.get_status()
+        return [
+            {
+                "input_file": detail.inputs[0].file_name,
+                "output_file": detail.outputs[0].file_name,
+            }
+            for detail in (job_status.job_details or [])
+            if detail.inputs and detail.outputs
+        ]
+    async def download_outputs(self, output_dir: str) -> bool:
+        """
+        Download output files to the specified directory.
+        Parameters
+        ----------
+        output_dir : str
+            Local directory where outputs will be saved.
+        Returns
+        -------
+        bool
+            True if all files downloaded successfully.
+        Raises
+        ------
+        RuntimeError
+            If a file fails to download.
+        """
+        mappings = await self.get_output_mappings()
+        file_names = [m["output_file"] for m in mappings]
+        download_links = await self._client.get_download_links(
+            job_id=self._job_id, files=file_names
+        )
+        os.makedirs(output_dir, exist_ok=True)
+        async with httpx.AsyncClient() as session:
+            for m in mappings:
+                url = download_links.download_urls[m["output_file"]].file_url
+                response = await session.get(url)
+                if (
+                    response.status_code > HTTPStatus.IM_USED
+                    or response.status_code < HTTPStatus.OK
+                ):
+                    raise RuntimeError(
+                        f"Download failed for {m['output_file']}: {response.status_code}"
+                    )
+                output_path = os.path.join(output_dir, f"{m['input_file']}.json")
+                with open(output_path, "wb") as f:
+                    f.write(response.content)
+        return True
+    async def get_status(self) -> JobStatusV1Response:
+        """
+        Retrieve the current status of the job.
+        Returns
+        -------
+        JobStatusV1Response
+        """
+        return await self._client.get_status(self._job_id)
+    async def start(self) -> JobStatusV1Response:
+        """
+        Start the speech-to-text job processing.
+        Returns
+        -------
+        JobStatusV1Response
+        """
+        return await self._client.start(job_id=self._job_id)
+    async def exists(self) -> bool:
+        """
+        Check if the job exists in the system.
+        Returns
+        -------
+        bool
+        """
+        try:
+            await self.get_status()
+            return True
+        except httpx.HTTPStatusError:
+            return False
+    async def is_complete(self) -> bool:
+        """
+        Check if the job is either completed or failed.
+        Returns
+        -------
+        bool
+        """
+        state = (await self.get_status()).job_state.lower()
+        return state in {"completed", "failed"}
+    async def is_successful(self) -> bool:
+        """
+        Check if the job completed successfully.
+        Returns
+        -------
+        bool
+        """
+        return (await self.get_status()).job_state.lower() == "completed"
+    async def is_failed(self) -> bool:
+        """
+        Check if the job has failed.
+        Returns
+        -------
+        bool
+        """
+        return (await self.get_status()).job_state.lower() == "failed"
+class SpeechToTextJob:
+    def __init__(self, job_id: str, client: "SpeechToTextJobClient"):
+        """
+        Initialize the synchronous speech-to-text job.
+        Parameters
+        ----------
+        job_id : str
+            The unique job identifier returned from a previous job initialization.
+        client : SpeechToTextJobClient
+            The client instance used to create the job.
+            !!! important
+                This must be the **same client instance** that was used to initialize
+                the job originally, as it contains the subscription key and configuration
+                required to authenticate and manage the job.
+        """
+        self._job_id = job_id
+        self._client = client
+    @property
+    def job_id(self) -> str:
+        """
+        Returns the job ID associated with this job instance.
+        Returns
+        -------
+        str
+        """
+        return self._job_id
+    def upload_files(
+        self, file_paths: typing.Sequence[str], timeout: float = 60.0
+    ) -> bool:
+        """
+        Upload input audio files for the speech-to-text job.
+        Parameters
+        ----------
+        file_paths : Sequence[str]
+            List of full paths to local audio files.
+        timeout : float, optional
+            The maximum time to wait for the upload to complete (in seconds),
+            by default 60.0
+        Returns
+        -------
+        bool
+            True if all files are uploaded successfully.
+        """
+        upload_links = self._client.get_upload_links(
+            job_id=self._job_id, files=[os.path.basename(p) for p in file_paths]
+        )
+        client_timeout = httpx.Timeout(timeout=timeout)
+        with httpx.Client(timeout=client_timeout) as client:
+            for path in file_paths:
+                file_name = os.path.basename(path)
+                url = upload_links.upload_urls[file_name].file_url
+                with open(path, "rb") as f:
+                    response = client.put(
+                        url,
+                        content=f,
+                        headers={
+                            "x-ms-blob-type": "BlockBlob",
+                            "Content-Type": "audio/wav",
+                        },
+                    )
+                if (
+                    response.status_code > HTTPStatus.IM_USED
+                    or response.status_code < HTTPStatus.OK
+                ):
+                    raise RuntimeError(
+                        f"Upload failed for {file_name}: {response.status_code}"
+                    )
+        return True
+    def wait_until_complete(
+        self, poll_interval: int = 5, timeout: int = 600
+    ) -> JobStatusV1Response:
+        """
+        Polls job status until it completes or fails.
+        Parameters
+        ----------
+        poll_interval : int, optional
+            Time in seconds between polling attempts (default is 5).
+        timeout : int, optional
+            Maximum time to wait for completion in seconds (default is 600).
+        Returns
+        -------
+        JobStatusV1Response
+            Final job status.
+        Raises
+        ------
+        TimeoutError
+            If the job does not complete within the given timeout.
+        """
+        start = time.monotonic()
+        while True:
+            status = self.get_status()
+            state = status.job_state.lower()
+            if state in {"completed", "failed"}:
+                return status
+            if time.monotonic() - start > timeout:
+                raise TimeoutError(
+                    f"Job {self._job_id} did not complete within {timeout} seconds."
+                )
+            time.sleep(poll_interval)
+    def get_output_mappings(self) -> typing.List[typing.Dict[str, str]]:
+        """
+        Get the mapping of input files to their corresponding output files.
+        Returns
+        -------
+        List[Dict[str, str]]
+            List of mappings with keys 'input_file' and 'output_file'.
+        """
+        job_status = self.get_status()
+        return [
+            {
+                "input_file": detail.inputs[0].file_name,
+                "output_file": detail.outputs[0].file_name,
+            }
+            for detail in (job_status.job_details or [])
+            if detail.inputs and detail.outputs
+        ]
+    def download_outputs(self, output_dir: str) -> bool:
+        """
+        Download output files to the specified directory.
+        Parameters
+        ----------
+        output_dir : str
+            Local directory where outputs will be saved.
+        Returns
+        -------
+        bool
+            True if all files downloaded successfully.
+        Raises
+        ------
+        RuntimeError
+            If a file fails to download.
+        """
+        mappings = self.get_output_mappings()
+        file_names = [m["output_file"] for m in mappings]
+        download_links = self._client.get_download_links(
+            job_id=self._job_id, files=file_names
+        )
+        os.makedirs(output_dir, exist_ok=True)
+        with httpx.Client() as client:
+            for m in mappings:
+                url = download_links.download_urls[m["output_file"]].file_url
+                response = client.get(url)
+                if (
+                    response.status_code > HTTPStatus.IM_USED
+                    or response.status_code < HTTPStatus.OK
+                ):
+                    raise RuntimeError(
+                        f"Download failed for {m['output_file']}: {response.status_code}"
+                    )
+                output_path = os.path.join(output_dir, f"{m['input_file']}.json")
+                with open(output_path, "wb") as f:
+                    f.write(response.content)
+        return True
+    def get_status(self) -> JobStatusV1Response:
+        """
+        Retrieve the current status of the job.
+        Returns
+        -------
+        JobStatusV1Response
+        """
+        return self._client.get_status(self._job_id)
+    def start(self) -> JobStatusV1Response:
+        """
+        Start the speech-to-text job processing.
+        Returns
+        -------
+        JobStatusV1Response
+        """
+        return self._client.start(job_id=self._job_id)
+    def exists(self) -> bool:
+        """
+        Check if the job exists in the system.
+        Returns
+        -------
+        bool
+        """
+        try:
+            self.get_status()
+            return True
+        except httpx.HTTPStatusError:
+            return False
+    def is_complete(self) -> bool:
+        """
+        Check if the job is either completed or failed.
+        Returns
+        -------
+        bool
+        """
+        return self.get_status().job_state.lower() in {"completed", "failed"}
+    def is_successful(self) -> bool:
+        """
+        Check if the job completed successfully.
+        Returns
+        -------
+        bool
+        """
+        return self.get_status().job_state.lower() == "completed"
+    def is_failed(self) -> bool:
+        """
+        Check if the job has failed.
+        Returns
+        -------
+        bool
+        """
+        return self.get_status().job_state.lower() == "failed"

sarvamai/speech_to_text_translate_job/client.py CHANGED Viewed

@@ -10,7 +10,9 @@ from ..types.bulk_job_init_response_v_1 import BulkJobInitResponseV1
 from ..types.files_download_response import FilesDownloadResponse
 from ..types.files_upload_response import FilesUploadResponse
 from ..types.job_status_v_1_response import JobStatusV1Response
+from ..types.speech_to_text_translate_model import SpeechToTextTranslateModel
 from .raw_client import AsyncRawSpeechToTextTranslateJobClient, RawSpeechToTextTranslateJobClient
+from .job import AsyncSpeechToTextTranslateJob, SpeechToTextTranslateJob
 # this is used as the default value for optional parameters
 OMIT = typing.cast(typing.Any, ...)
@@ -236,6 +238,72 @@ class SpeechToTextTranslateJobClient:
         )
         return _response.data
+    def create_job(
+        self,
+        model: SpeechToTextTranslateModel = "saaras:v2.5",
+        with_diarization: bool = False,
+        prompt: typing.Optional[str] = None,
+        num_speakers: typing.Optional[int] = None,
+        callback: typing.Optional[BulkJobCallbackParams] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> SpeechToTextTranslateJob:
+        """
+        Create a new Speech-to-Text-Translate bulk job.
+        Parameters
+        ----------
+        model : typing.Optional[SpeechToTextTranslateModel], default="saaras:v2.5"
+            The model to use for speech-to-text translation.
+        with_diarization : typing.Optional[bool], default=False
+            Whether to enable speaker diarization (i.e., distinguishing who is speaking).
+        prompt : typing.Optional[str], default=None
+            An optional prompt to guide the transcription and translation model.
+        num_speakers : typing.Optional[int], default=None
+            The number of distinct speakers in the input audio, if known.
+        callback : typing.Optional[BulkJobCallbackParams], default=OMIT
+            Optional callback configuration to receive job completion events via webhook.
+        request_options : typing.Optional[RequestOptions], default=None
+            Optional configuration for request timeout, retries, etc.
+        Returns
+        -------
+        SpeechToTextTranslateJob
+            A handle to the newly created Speech-to-Text-Translate job.
+        """
+        response = self.initialise(
+            job_parameters=SpeechToTextTranslateJobParametersParams(
+                prompt=prompt,  # type: ignore[typeddict-item]
+                model=model,
+                with_diarization=with_diarization,
+                num_speakers=num_speakers,  # type: ignore[typeddict-item]
+            ),
+            callback=callback,
+            request_options=request_options,
+        )
+        return SpeechToTextTranslateJob(job_id=response.job_id, client=self)
+    def get_job(self, job_id: str) -> SpeechToTextTranslateJob:
+        """
+        Get an existing Speech-to-Text-Translate job handle by job ID.
+        Parameters
+        ----------
+        job_id : str
+            The job ID of the previously created Speech-to-Text-Translate job.
+        Returns
+        -------
+        SpeechToTextTranslateJob
+            A job handle which can be used to check status or retrieve results.
+        """
+        return SpeechToTextTranslateJob(job_id=job_id, client=self)
 class AsyncSpeechToTextTranslateJobClient:
     def __init__(self, *, client_wrapper: AsyncClientWrapper):
@@ -496,3 +564,68 @@ class AsyncSpeechToTextTranslateJobClient:
             job_id=job_id, files=files, ptu_id=ptu_id, request_options=request_options
         )
         return _response.data
+    async def create_job(
+        self,
+        model: SpeechToTextTranslateModel = "saaras:v2.5",
+        with_diarization: bool = False,
+        prompt: typing.Optional[str] = None,
+        num_speakers: typing.Optional[int] = None,
+        callback: typing.Optional[BulkJobCallbackParams] = OMIT,
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> "AsyncSpeechToTextTranslateJob":
+        """
+        Create a new Speech-to-Text-Translate bulk job.
+        Parameters
+        ----------
+        model : typing.Optional[SpeechToTextTranslateModel], default="saaras:v2.5"
+            The model to use for speech-to-text translation.
+        with_diarization : typing.Optional[bool], default=False
+            Whether to enable speaker diarization (i.e., distinguishing who is speaking).
+        prompt : typing.Optional[str], default=None
+            An optional prompt to guide the transcription and translation model.
+        num_speakers : typing.Optional[int], default=None
+            The number of distinct speakers in the input audio, if known.
+        callback : typing.Optional[BulkJobCallbackParams], default=OMIT
+            Optional callback configuration to receive job completion events via webhook.
+        request_options : typing.Optional[RequestOptions], default=None
+            Optional configuration for request timeout, retries, etc.
+        Returns
+        -------
+        AsyncSpeechToTextTranslateJob
+            A handle to the newly created job.
+        """
+        response = await self.initialise(
+            job_parameters=SpeechToTextTranslateJobParametersParams(
+                prompt=prompt,  # type: ignore[typeddict-item]
+                model=model,
+                with_diarization=with_diarization,  # type: ignore[typeddict-item]
+                num_speakers=num_speakers,  # type: ignore[typeddict-item]
+            ),
+            callback=callback,
+            request_options=request_options,
+        )
+        return AsyncSpeechToTextTranslateJob(job_id=response.job_id, client=self)
+    async def get_job(self, job_id: str) -> "AsyncSpeechToTextTranslateJob":
+        """
+        Get an existing Speech-to-Text-Translate job handle by job ID.
+        Parameters
+        ----------
+        job_id : str
+            The job ID of the previously created speech-to-text-translate job.
+        Returns
+        -------
+        AsyncSpeechToTextTranslateJob
+            A job handle which can be used to check status or retrieve results.
+        """
+        return AsyncSpeechToTextTranslateJob(job_id=job_id, client=self)

sarvamai 0.1.13a2__py3-none-any.whl → 0.1.15__py3-none-any.whl

sarvamai 0.1.13a2py3-none-any.whl → 0.1.15py3-none-any.whl