PyPI - sarvamai - Versions diffs - 0.1.5a10__tar.gz → 0.1.5a13__tar.gz - Mend

sarvamai 0.1.5a10tar.gz → 0.1.5a13tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

{sarvamai-0.1.5a10 → sarvamai-0.1.5a13}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sarvamai
-Version: 0.1.5a10
+Version: 0.1.5a13
 Summary:
 Requires-Python: >=3.8,<4.0
 Classifier: Intended Audience :: Developers

{sarvamai-0.1.5a10 → sarvamai-0.1.5a13}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "sarvamai"
 [tool.poetry]
 name = "sarvamai"
-version = "0.1.5a10"
+version = "0.1.5a13"
 description = ""
 readme = "README.md"
 authors = []

{sarvamai-0.1.5a10 → sarvamai-0.1.5a13}/src/sarvamai/__init__.py RENAMED Viewed

@@ -16,6 +16,7 @@ from .types import (
     ChatCompletionResponseMessage,
     Choice,
     CompletionUsage,
+    ConfigMessage,
     CreateChatCompletionResponse,
     DiarizedEntry,
     DiarizedTranscript,
@@ -29,18 +30,22 @@ from .types import (
     LanguageIdentificationResponse,
     NumeralsFormat,
     ReasoningEffort,
+    ResponseType,
     Role,
     SarvamModelIds,
     SpeechSampleRate,
     SpeechToTextLanguage,
     SpeechToTextModel,
     SpeechToTextResponse,
+    SpeechToTextResponseData,
     SpeechToTextStreamingResponse,
-    SpeechToTextStreamingResponseData,
-    SpeechToTextStreamingResponseType,
+    SpeechToTextTranscriptionData,
     SpeechToTextTranslateLanguage,
     SpeechToTextTranslateModel,
     SpeechToTextTranslateResponse,
+    SpeechToTextTranslateResponseData,
+    SpeechToTextTranslateStreamingResponse,
+    SpeechToTextTranslateTranscriptionData,
     SpokenFormNumeralsFormat,
     StopConfiguration,
     TextToSpeechLanguage,
@@ -48,7 +53,6 @@ from .types import (
     TextToSpeechResponse,
     TextToSpeechSpeaker,
     TimestampsModel,
-    TranscriptionData,
     TranscriptionMetrics,
     TranslateMode,
     TranslateModel,
@@ -69,7 +73,7 @@ from .errors import (
     TooManyRequestsError,
     UnprocessableEntityError,
 )
-from . import chat, speech_to_text, speech_to_text_streaming, text, text_to_speech
+from . import chat, speech_to_text, speech_to_text_streaming, speech_to_text_translate_streaming, text, text_to_speech
 from .client import AsyncSarvamAI, SarvamAI
 from .environment import SarvamAIEnvironment
 from .requests import (
@@ -85,6 +89,7 @@ from .requests import (
     ChatCompletionResponseMessageParams,
     ChoiceParams,
     CompletionUsageParams,
+    ConfigMessageParams,
     CreateChatCompletionResponseParams,
     DiarizedEntryParams,
     DiarizedTranscriptParams,
@@ -93,19 +98,23 @@ from .requests import (
     ErrorMessageParams,
     EventsDataParams,
     LanguageIdentificationResponseParams,
+    SpeechToTextResponseDataParams,
     SpeechToTextResponseParams,
-    SpeechToTextStreamingResponseDataParams,
     SpeechToTextStreamingResponseParams,
+    SpeechToTextTranscriptionDataParams,
+    SpeechToTextTranslateResponseDataParams,
     SpeechToTextTranslateResponseParams,
+    SpeechToTextTranslateStreamingResponseParams,
+    SpeechToTextTranslateTranscriptionDataParams,
     StopConfigurationParams,
     TextToSpeechResponseParams,
     TimestampsModelParams,
-    TranscriptionDataParams,
     TranscriptionMetricsParams,
     TranslationResponseParams,
     TransliterationResponseParams,
 )
 from .speech_to_text_streaming import SpeechToTextStreamingLanguageCode, SpeechToTextStreamingModel
+from .speech_to_text_translate_streaming import SpeechToTextTranslateStreamingModel
 from .version import __version__
 __all__ = [
@@ -136,6 +145,8 @@ __all__ = [
     "ChoiceParams",
     "CompletionUsage",
     "CompletionUsageParams",
+    "ConfigMessage",
+    "ConfigMessageParams",
     "CreateChatCompletionResponse",
     "CreateChatCompletionResponseParams",
     "DiarizedEntry",
@@ -159,6 +170,7 @@ __all__ = [
     "LanguageIdentificationResponseParams",
     "NumeralsFormat",
     "ReasoningEffort",
+    "ResponseType",
     "Role",
     "SarvamAI",
     "SarvamAIEnvironment",
@@ -168,18 +180,26 @@ __all__ = [
     "SpeechToTextLanguage",
     "SpeechToTextModel",
     "SpeechToTextResponse",
+    "SpeechToTextResponseData",
+    "SpeechToTextResponseDataParams",
     "SpeechToTextResponseParams",
     "SpeechToTextStreamingLanguageCode",
     "SpeechToTextStreamingModel",
     "SpeechToTextStreamingResponse",
-    "SpeechToTextStreamingResponseData",
-    "SpeechToTextStreamingResponseDataParams",
     "SpeechToTextStreamingResponseParams",
-    "SpeechToTextStreamingResponseType",
+    "SpeechToTextTranscriptionData",
+    "SpeechToTextTranscriptionDataParams",
     "SpeechToTextTranslateLanguage",
     "SpeechToTextTranslateModel",
     "SpeechToTextTranslateResponse",
+    "SpeechToTextTranslateResponseData",
+    "SpeechToTextTranslateResponseDataParams",
     "SpeechToTextTranslateResponseParams",
+    "SpeechToTextTranslateStreamingModel",
+    "SpeechToTextTranslateStreamingResponse",
+    "SpeechToTextTranslateStreamingResponseParams",
+    "SpeechToTextTranslateTranscriptionData",
+    "SpeechToTextTranslateTranscriptionDataParams",
     "SpokenFormNumeralsFormat",
     "StopConfiguration",
     "StopConfigurationParams",
@@ -191,8 +211,6 @@ __all__ = [
     "TimestampsModel",
     "TimestampsModelParams",
     "TooManyRequestsError",
-    "TranscriptionData",
-    "TranscriptionDataParams",
     "TranscriptionMetrics",
     "TranscriptionMetricsParams",
     "TranslateMode",
@@ -212,6 +230,7 @@ __all__ = [
     "chat",
     "speech_to_text",
     "speech_to_text_streaming",
+    "speech_to_text_translate_streaming",
     "text",
     "text_to_speech",
 ]

{sarvamai-0.1.5a10 → sarvamai-0.1.5a13}/src/sarvamai/client.py RENAMED Viewed

@@ -10,6 +10,10 @@ from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
 from .environment import SarvamAIEnvironment
 from .speech_to_text.client import AsyncSpeechToTextClient, SpeechToTextClient
 from .speech_to_text_streaming.client import AsyncSpeechToTextStreamingClient, SpeechToTextStreamingClient
+from .speech_to_text_translate_streaming.client import (
+    AsyncSpeechToTextTranslateStreamingClient,
+    SpeechToTextTranslateStreamingClient,
+)
 from .text.client import AsyncTextClient, TextClient
 from .text_to_speech.client import AsyncTextToSpeechClient, TextToSpeechClient
@@ -79,6 +83,9 @@ class SarvamAI:
         self.text_to_speech = TextToSpeechClient(client_wrapper=self._client_wrapper)
         self.chat = ChatClient(client_wrapper=self._client_wrapper)
         self.speech_to_text_streaming = SpeechToTextStreamingClient(client_wrapper=self._client_wrapper)
+        self.speech_to_text_translate_streaming = SpeechToTextTranslateStreamingClient(
+            client_wrapper=self._client_wrapper
+        )
 class AsyncSarvamAI:
@@ -146,3 +153,6 @@ class AsyncSarvamAI:
         self.text_to_speech = AsyncTextToSpeechClient(client_wrapper=self._client_wrapper)
         self.chat = AsyncChatClient(client_wrapper=self._client_wrapper)
         self.speech_to_text_streaming = AsyncSpeechToTextStreamingClient(client_wrapper=self._client_wrapper)
+        self.speech_to_text_translate_streaming = AsyncSpeechToTextTranslateStreamingClient(
+            client_wrapper=self._client_wrapper
+        )

{sarvamai-0.1.5a10 → sarvamai-0.1.5a13}/src/sarvamai/core/client_wrapper.py RENAMED Viewed

@@ -17,10 +17,10 @@ class BaseClientWrapper:
     def get_headers(self) -> typing.Dict[str, str]:
         headers: typing.Dict[str, str] = {
-            "User-Agent": "sarvamai/0.1.5a10",
+            "User-Agent": "sarvamai/0.1.5a13",
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "sarvamai",
-            "X-Fern-SDK-Version": "0.1.5a10",
+            "X-Fern-SDK-Version": "0.1.5a13",
         }
         headers["api-subscription-key"] = self.api_subscription_key
         return headers

{sarvamai-0.1.5a10 → sarvamai-0.1.5a13}/src/sarvamai/requests/__init__.py RENAMED Viewed

@@ -16,6 +16,7 @@ from .chat_completion_request_user_message import ChatCompletionRequestUserMessa
 from .chat_completion_response_message import ChatCompletionResponseMessageParams
 from .choice import ChoiceParams
 from .completion_usage import CompletionUsageParams
+from .config_message import ConfigMessageParams
 from .create_chat_completion_response import CreateChatCompletionResponseParams
 from .diarized_entry import DiarizedEntryParams
 from .diarized_transcript import DiarizedTranscriptParams
@@ -25,13 +26,16 @@ from .error_message import ErrorMessageParams
 from .events_data import EventsDataParams
 from .language_identification_response import LanguageIdentificationResponseParams
 from .speech_to_text_response import SpeechToTextResponseParams
+from .speech_to_text_response_data import SpeechToTextResponseDataParams
 from .speech_to_text_streaming_response import SpeechToTextStreamingResponseParams
-from .speech_to_text_streaming_response_data import SpeechToTextStreamingResponseDataParams
+from .speech_to_text_transcription_data import SpeechToTextTranscriptionDataParams
 from .speech_to_text_translate_response import SpeechToTextTranslateResponseParams
+from .speech_to_text_translate_response_data import SpeechToTextTranslateResponseDataParams
+from .speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponseParams
+from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTranscriptionDataParams
 from .stop_configuration import StopConfigurationParams
 from .text_to_speech_response import TextToSpeechResponseParams
 from .timestamps_model import TimestampsModelParams
-from .transcription_data import TranscriptionDataParams
 from .transcription_metrics import TranscriptionMetricsParams
 from .translation_response import TranslationResponseParams
 from .transliteration_response import TransliterationResponseParams
@@ -49,6 +53,7 @@ __all__ = [
     "ChatCompletionResponseMessageParams",
     "ChoiceParams",
     "CompletionUsageParams",
+    "ConfigMessageParams",
     "CreateChatCompletionResponseParams",
     "DiarizedEntryParams",
     "DiarizedTranscriptParams",
@@ -57,14 +62,17 @@ __all__ = [
     "ErrorMessageParams",
     "EventsDataParams",
     "LanguageIdentificationResponseParams",
+    "SpeechToTextResponseDataParams",
     "SpeechToTextResponseParams",
-    "SpeechToTextStreamingResponseDataParams",
     "SpeechToTextStreamingResponseParams",
+    "SpeechToTextTranscriptionDataParams",
+    "SpeechToTextTranslateResponseDataParams",
     "SpeechToTextTranslateResponseParams",
+    "SpeechToTextTranslateStreamingResponseParams",
+    "SpeechToTextTranslateTranscriptionDataParams",
     "StopConfigurationParams",
     "TextToSpeechResponseParams",
     "TimestampsModelParams",
-    "TranscriptionDataParams",
     "TranscriptionMetricsParams",
     "TranslationResponseParams",
     "TransliterationResponseParams",

sarvamai-0.1.5a13/src/sarvamai/requests/config_message.py ADDED Viewed

@@ -0,0 +1,17 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+import typing_extensions
+class ConfigMessageParams(typing_extensions.TypedDict):
+    type: typing.Literal["config"]
+    """
+    Message type identifier for configuration
+    """
+    prompt: typing_extensions.NotRequired[str]
+    """
+    Prompt for ASR model to improve transcription accuracy
+    """

sarvamai-0.1.5a13/src/sarvamai/requests/speech_to_text_response_data.py ADDED Viewed

@@ -0,0 +1,9 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+from .error_data import ErrorDataParams
+from .events_data import EventsDataParams
+from .speech_to_text_transcription_data import SpeechToTextTranscriptionDataParams
+SpeechToTextResponseDataParams = typing.Union[SpeechToTextTranscriptionDataParams, ErrorDataParams, EventsDataParams]

sarvamai-0.1.5a13/src/sarvamai/requests/speech_to_text_streaming_response.py ADDED Viewed

@@ -0,0 +1,10 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing_extensions
+from ..types.response_type import ResponseType
+from .speech_to_text_response_data import SpeechToTextResponseDataParams
+class SpeechToTextStreamingResponseParams(typing_extensions.TypedDict):
+    type: ResponseType
+    data: SpeechToTextResponseDataParams

sarvamai-0.1.5a10/src/sarvamai/requests/transcription_data.py → sarvamai-0.1.5a13/src/sarvamai/requests/speech_to_text_transcription_data.py RENAMED Viewed

@@ -6,7 +6,7 @@ import typing_extensions
 from .transcription_metrics import TranscriptionMetricsParams
-class TranscriptionDataParams(typing_extensions.TypedDict):
+class SpeechToTextTranscriptionDataParams(typing_extensions.TypedDict):
     request_id: str
     """
     Unique identifier for the request
@@ -14,7 +14,7 @@ class TranscriptionDataParams(typing_extensions.TypedDict):
     transcript: str
     """
-    Transcript of the provided speech
+    Transcript of the provided speech in original language
     """
     timestamps: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]

sarvamai-0.1.5a13/src/sarvamai/requests/speech_to_text_translate_response_data.py ADDED Viewed

@@ -0,0 +1,11 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+from .error_data import ErrorDataParams
+from .events_data import EventsDataParams
+from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTranscriptionDataParams
+SpeechToTextTranslateResponseDataParams = typing.Union[
+    SpeechToTextTranslateTranscriptionDataParams, ErrorDataParams, EventsDataParams
+]

sarvamai-0.1.5a13/src/sarvamai/requests/speech_to_text_translate_streaming_response.py ADDED Viewed

@@ -0,0 +1,10 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing_extensions
+from ..types.response_type import ResponseType
+from .speech_to_text_translate_response_data import SpeechToTextTranslateResponseDataParams
+class SpeechToTextTranslateStreamingResponseParams(typing_extensions.TypedDict):
+    type: ResponseType
+    data: SpeechToTextTranslateResponseDataParams

sarvamai-0.1.5a13/src/sarvamai/requests/speech_to_text_translate_transcription_data.py ADDED Viewed

@@ -0,0 +1,23 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing_extensions
+from .transcription_metrics import TranscriptionMetricsParams
+class SpeechToTextTranslateTranscriptionDataParams(typing_extensions.TypedDict):
+    request_id: str
+    """
+    Unique identifier for the request
+    """
+    transcript: str
+    """
+    English translation of the provided speech
+    """
+    language_code: typing_extensions.NotRequired[str]
+    """
+    BCP-47 code of detected source language (null when language detection is in progress)
+    """
+    metrics: TranscriptionMetricsParams

{sarvamai-0.1.5a10 → sarvamai-0.1.5a13}/src/sarvamai/speech_to_text_streaming/client.py RENAMED Viewed

@@ -35,7 +35,7 @@ class SpeechToTextStreamingClient:
         self,
         *,
         language_code: SpeechToTextStreamingLanguageCode,
-        model: SpeechToTextStreamingModel,
+        model: typing.Optional[SpeechToTextStreamingModel] = None,
         high_vad_sensitivity: typing.Optional[str] = None,
         vad_signals: typing.Optional[str] = None,
         api_subscription_key: typing.Optional[str] = None,
@@ -49,7 +49,7 @@ class SpeechToTextStreamingClient:
         language_code : SpeechToTextStreamingLanguageCode
             Language code for speech recognition
-        model : SpeechToTextStreamingModel
+        model : typing.Optional[SpeechToTextStreamingModel]
             Speech to text model to use
         high_vad_sensitivity : typing.Optional[str]
@@ -122,7 +122,7 @@ class AsyncSpeechToTextStreamingClient:
         self,
         *,
         language_code: SpeechToTextStreamingLanguageCode,
-        model: SpeechToTextStreamingModel,
+        model: typing.Optional[SpeechToTextStreamingModel] = None,
         high_vad_sensitivity: typing.Optional[str] = None,
         vad_signals: typing.Optional[str] = None,
         api_subscription_key: typing.Optional[str] = None,
@@ -136,7 +136,7 @@ class AsyncSpeechToTextStreamingClient:
         language_code : SpeechToTextStreamingLanguageCode
             Language code for speech recognition
-        model : SpeechToTextStreamingModel
+        model : typing.Optional[SpeechToTextStreamingModel]
             Speech to text model to use
         high_vad_sensitivity : typing.Optional[str]

{sarvamai-0.1.5a10 → sarvamai-0.1.5a13}/src/sarvamai/speech_to_text_streaming/raw_client.py RENAMED Viewed

@@ -23,7 +23,7 @@ class RawSpeechToTextStreamingClient:
         self,
         *,
         language_code: SpeechToTextStreamingLanguageCode,
-        model: SpeechToTextStreamingModel,
+        model: typing.Optional[SpeechToTextStreamingModel] = None,
         high_vad_sensitivity: typing.Optional[str] = None,
         vad_signals: typing.Optional[str] = None,
         api_subscription_key: typing.Optional[str] = None,
@@ -37,7 +37,7 @@ class RawSpeechToTextStreamingClient:
         language_code : SpeechToTextStreamingLanguageCode
             Language code for speech recognition
-        model : SpeechToTextStreamingModel
+        model : typing.Optional[SpeechToTextStreamingModel]
             Speech to text model to use
         high_vad_sensitivity : typing.Optional[str]
@@ -99,7 +99,7 @@ class AsyncRawSpeechToTextStreamingClient:
         self,
         *,
         language_code: SpeechToTextStreamingLanguageCode,
-        model: SpeechToTextStreamingModel,
+        model: typing.Optional[SpeechToTextStreamingModel] = None,
         high_vad_sensitivity: typing.Optional[str] = None,
         vad_signals: typing.Optional[str] = None,
         api_subscription_key: typing.Optional[str] = None,
@@ -113,7 +113,7 @@ class AsyncRawSpeechToTextStreamingClient:
         language_code : SpeechToTextStreamingLanguageCode
             Language code for speech recognition
-        model : SpeechToTextStreamingModel
+        model : typing.Optional[SpeechToTextStreamingModel]
             Speech to text model to use
         high_vad_sensitivity : typing.Optional[str]

{sarvamai-0.1.5a10 → sarvamai-0.1.5a13}/src/sarvamai/speech_to_text_streaming/socket_client.py RENAMED Viewed

@@ -7,6 +7,7 @@ import websockets
 import websockets.sync.connection as websockets_sync_connection
 from ..core.events import EventEmitterMixin, EventType
 from ..core.pydantic_utilities import parse_obj_as
+from ..types.audio_data import AudioData
 from ..types.audio_message import AudioMessage
 from ..types.speech_to_text_streaming_response import SpeechToTextStreamingResponse
@@ -44,7 +45,19 @@ class AsyncSpeechToTextStreamingSocketClient(EventEmitterMixin):
         finally:
             self._emit(EventType.CLOSE, None)
-    async def send_audio_message(self, message: AudioMessage) -> None:
+    async def transcribe(self, audio: str, encoding="audio/wav", sample_rate=16000):
+        """
+        Sends transcription request to the server.
+        :param audio: Base64 encoded audio data
+        :param encoding: Audio encoding format (default is "audio/wav")
+        :param sample_rate: Audio sample rate in Hz (default is 16000)
+        """
+        return await self._send_speech_to_text_streaming_audio_message(
+            message=AudioMessage(audio=AudioData(data=audio, sample_rate=sample_rate, encoding=encoding))
+        )
+    async def _send_speech_to_text_streaming_audio_message(self, message: AudioMessage) -> None:
         """
         Send a message to the websocket connection.
         The message will be sent as a AudioMessage.
@@ -81,6 +94,7 @@ class SpeechToTextStreamingSocketClient(EventEmitterMixin):
     def __iter__(self):
         for message in self._websocket:
+            message = json.loads(message) if isinstance(message, str) else message
             yield parse_obj_as(SpeechToTextStreamingSocketClientResponse, message)  # type: ignore
     def start_listening(self):
@@ -104,12 +118,16 @@ class SpeechToTextStreamingSocketClient(EventEmitterMixin):
         finally:
             self._emit(EventType.CLOSE, None)
-    def send_audio_message(self, message: AudioMessage) -> None:
+    def transcribe(self, audio: str, encoding="audio/wav", sample_rate=16000) -> None:
         """
-        Send a message to the websocket connection.
-        The message will be sent as a AudioMessage.
+        Sends transcription request to the server.
+        :param audio: Base64 encoded audio data
+        :param encoding (Optional): Audio encoding format (default is "audio/wav")
+        :param sample_rate (Optional): Audio sample rate in Hz (default is 16000)
         """
-        self._send_model(message)
+        return self._send_speech_to_text_streaming_audio_message(
+            message=AudioMessage(audio=AudioData(data=audio, sample_rate=sample_rate, encoding=encoding))
+        )
     def recv(self) -> SpeechToTextStreamingSocketClientResponse:
         """
@@ -119,6 +137,13 @@ class SpeechToTextStreamingSocketClient(EventEmitterMixin):
         data = json.loads(data) if isinstance(data, str) else data
         return parse_obj_as(SpeechToTextStreamingSocketClientResponse, data)  # type: ignore
+    def _send_speech_to_text_streaming_audio_message(self, message: AudioMessage) -> None:
+        """
+        Send a message to the websocket connection.
+        The message will be sent as a AudioMessage.
+        """
+        self._send_model(message)
     def _send(self, data: typing.Any) -> None:
         """
         Send a message to the websocket connection.

sarvamai-0.1.5a13/src/sarvamai/speech_to_text_translate_streaming/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+# isort: skip_file
+from .types import SpeechToTextTranslateStreamingModel
+__all__ = ["SpeechToTextTranslateStreamingModel"]

sarvamai 0.1.5a10__tar.gz → 0.1.5a13__tar.gz

sarvamai 0.1.5a10tar.gz → 0.1.5a13tar.gz