PyPI - sarvamai - Versions diffs - 0.1.23a2__py3-none-any.whl → 0.1.23a4__py3-none-any.whl - Mend

sarvamai 0.1.23a2py3-none-any.whl → 0.1.23a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

sarvamai/__init__.py +203 -405
sarvamai/chat/raw_client.py +20 -20
sarvamai/client.py +34 -186
sarvamai/core/__init__.py +21 -76
sarvamai/core/client_wrapper.py +3 -19
sarvamai/core/force_multipart.py +2 -4
sarvamai/core/http_client.py +97 -217
sarvamai/core/http_response.py +1 -1
sarvamai/core/jsonable_encoder.py +0 -8
sarvamai/core/pydantic_utilities.py +4 -110
sarvamai/errors/__init__.py +6 -40
sarvamai/errors/bad_request_error.py +1 -1
sarvamai/errors/forbidden_error.py +1 -1
sarvamai/errors/internal_server_error.py +1 -1
sarvamai/errors/service_unavailable_error.py +1 -1
sarvamai/errors/too_many_requests_error.py +1 -1
sarvamai/errors/unprocessable_entity_error.py +1 -1
sarvamai/requests/__init__.py +62 -150
sarvamai/requests/configure_connection.py +4 -0
sarvamai/requests/configure_connection_data.py +40 -11
sarvamai/requests/error_response_data.py +1 -1
sarvamai/requests/file_signed_url_details.py +1 -1
sarvamai/requests/speech_to_text_job_parameters.py +10 -1
sarvamai/requests/speech_to_text_transcription_data.py +2 -2
sarvamai/speech_to_text/client.py +29 -2
sarvamai/speech_to_text/raw_client.py +81 -56
sarvamai/speech_to_text_job/client.py +60 -15
sarvamai/speech_to_text_job/raw_client.py +120 -120
sarvamai/speech_to_text_streaming/__init__.py +10 -38
sarvamai/speech_to_text_streaming/client.py +32 -6
sarvamai/speech_to_text_streaming/raw_client.py +32 -6
sarvamai/speech_to_text_streaming/types/__init__.py +8 -36
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +7 -0
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
sarvamai/speech_to_text_translate_job/raw_client.py +120 -120
sarvamai/speech_to_text_translate_streaming/__init__.py +5 -36
sarvamai/speech_to_text_translate_streaming/types/__init__.py +3 -36
sarvamai/text/raw_client.py +60 -60
sarvamai/text_to_speech/client.py +100 -16
sarvamai/text_to_speech/raw_client.py +120 -36
sarvamai/text_to_speech_streaming/__init__.py +2 -29
sarvamai/text_to_speech_streaming/client.py +19 -6
sarvamai/text_to_speech_streaming/raw_client.py +19 -6
sarvamai/text_to_speech_streaming/types/__init__.py +3 -31
sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +5 -0
sarvamai/types/__init__.py +102 -222
sarvamai/types/chat_completion_request_message.py +2 -6
sarvamai/types/configure_connection.py +4 -0
sarvamai/types/configure_connection_data.py +40 -11
sarvamai/types/configure_connection_data_model.py +5 -0
sarvamai/types/configure_connection_data_speaker.py +35 -1
sarvamai/types/error_response_data.py +1 -1
sarvamai/types/file_signed_url_details.py +1 -1
sarvamai/types/mode.py +7 -0
sarvamai/types/speech_to_text_job_parameters.py +10 -1
sarvamai/types/speech_to_text_model.py +3 -1
sarvamai/types/speech_to_text_transcription_data.py +2 -2
sarvamai/types/speech_to_text_translate_model.py +1 -1
sarvamai/types/text_to_speech_model.py +1 -1
sarvamai/types/text_to_speech_speaker.py +35 -1
{sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/METADATA +1 -2
{sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/RECORD +63 -63
sarvamai/core/http_sse/__init__.py +0 -42
sarvamai/core/http_sse/_api.py +0 -112
sarvamai/core/http_sse/_decoders.py +0 -61
sarvamai/core/http_sse/_exceptions.py +0 -7
sarvamai/core/http_sse/_models.py +0 -17
{sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/WHEEL +0 -0

sarvamai/speech_to_text_streaming/__init__.py CHANGED Viewed

@@ -2,48 +2,20 @@
 # isort: skip_file
-import typing
-from importlib import import_module
-if typing.TYPE_CHECKING:
-    from .types import (
-        SpeechToTextStreamingFlushSignal,
-        SpeechToTextStreamingHighVadSensitivity,
-        SpeechToTextStreamingLanguageCode,
-        SpeechToTextStreamingVadSignals,
-    )
-_dynamic_imports: typing.Dict[str, str] = {
-    "SpeechToTextStreamingFlushSignal": ".types",
-    "SpeechToTextStreamingHighVadSensitivity": ".types",
-    "SpeechToTextStreamingLanguageCode": ".types",
-    "SpeechToTextStreamingVadSignals": ".types",
-}
-def __getattr__(attr_name: str) -> typing.Any:
-    module_name = _dynamic_imports.get(attr_name)
-    if module_name is None:
-        raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
-    try:
-        module = import_module(module_name, __package__)
-        if module_name == f".{attr_name}":
-            return module
-        else:
-            return getattr(module, attr_name)
-    except ImportError as e:
-        raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
-    except AttributeError as e:
-        raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
-def __dir__():
-    lazy_attrs = list(_dynamic_imports.keys())
-    return sorted(lazy_attrs)
+from .types import (
+    SpeechToTextStreamingFlushSignal,
+    SpeechToTextStreamingHighVadSensitivity,
+    SpeechToTextStreamingLanguageCode,
+    SpeechToTextStreamingMode,
+    SpeechToTextStreamingModel,
+    SpeechToTextStreamingVadSignals,
+)
 __all__ = [
     "SpeechToTextStreamingFlushSignal",
     "SpeechToTextStreamingHighVadSensitivity",
     "SpeechToTextStreamingLanguageCode",
+    "SpeechToTextStreamingMode",
+    "SpeechToTextStreamingModel",
     "SpeechToTextStreamingVadSignals",
 ]

sarvamai/speech_to_text_streaming/client.py CHANGED Viewed

@@ -14,6 +14,8 @@ from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextS
 from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
 from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
 from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
+from .types.speech_to_text_streaming_mode import SpeechToTextStreamingMode
+from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
 from .types.speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
 try:
@@ -42,7 +44,8 @@ class SpeechToTextStreamingClient:
         self,
         *,
         language_code: SpeechToTextStreamingLanguageCode,
-        model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
+        model: typing.Optional[SpeechToTextStreamingModel] = None,
+        mode: typing.Optional[SpeechToTextStreamingMode] = None,
         sample_rate: typing.Optional[str] = None,
         high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
         vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
@@ -60,11 +63,20 @@ class SpeechToTextStreamingClient:
         Parameters
         ----------
         language_code : SpeechToTextStreamingLanguageCode
-            Language code for speech recognition
+            Language code for speech recognition (BCP-47 format)
-        model : typing.Optional[typing.Literal["saarika:v2.5"]]
+        model : typing.Optional[SpeechToTextStreamingModel]
             Speech to text model to use
+        mode : typing.Optional[SpeechToTextStreamingMode]
+            Mode of operation for saaras:v3 model. Only applicable when model is 'saaras:v3'.
+            - transcribe: Standard Whisper transcription
+            - translate: Standard Whisper translation to English
+            - indic-en: Translate Indic languages to English
+            - verbatim: Exact transcription in original script
+            - translit: Transliteration to Latin script
+            - codemix: Code-mixed output (native + English)
         sample_rate : typing.Optional[str]
             Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
@@ -93,6 +105,8 @@ class SpeechToTextStreamingClient:
             query_params = query_params.add("language-code", language_code)
         if model is not None:
             query_params = query_params.add("model", model)
+        if mode is not None:
+            query_params = query_params.add("mode", mode)
         if sample_rate is not None:
             query_params = query_params.add("sample_rate", sample_rate)
         if high_vad_sensitivity is not None:
@@ -145,7 +159,8 @@ class AsyncSpeechToTextStreamingClient:
         self,
         *,
         language_code: SpeechToTextStreamingLanguageCode,
-        model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
+        model: typing.Optional[SpeechToTextStreamingModel] = None,
+        mode: typing.Optional[SpeechToTextStreamingMode] = None,
         sample_rate: typing.Optional[str] = None,
         high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
         vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
@@ -163,11 +178,20 @@ class AsyncSpeechToTextStreamingClient:
         Parameters
         ----------
         language_code : SpeechToTextStreamingLanguageCode
-            Language code for speech recognition
+            Language code for speech recognition (BCP-47 format)
-        model : typing.Optional[typing.Literal["saarika:v2.5"]]
+        model : typing.Optional[SpeechToTextStreamingModel]
             Speech to text model to use
+        mode : typing.Optional[SpeechToTextStreamingMode]
+            Mode of operation for saaras:v3 model. Only applicable when model is 'saaras:v3'.
+            - transcribe: Standard Whisper transcription
+            - translate: Standard Whisper translation to English
+            - indic-en: Translate Indic languages to English
+            - verbatim: Exact transcription in original script
+            - translit: Transliteration to Latin script
+            - codemix: Code-mixed output (native + English)
         sample_rate : typing.Optional[str]
             Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
@@ -196,6 +220,8 @@ class AsyncSpeechToTextStreamingClient:
             query_params = query_params.add("language-code", language_code)
         if model is not None:
             query_params = query_params.add("model", model)
+        if mode is not None:
+            query_params = query_params.add("mode", mode)
         if sample_rate is not None:
             query_params = query_params.add("sample_rate", sample_rate)
         if high_vad_sensitivity is not None:

sarvamai/speech_to_text_streaming/raw_client.py CHANGED Viewed

@@ -13,6 +13,8 @@ from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextS
 from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
 from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
 from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
+from .types.speech_to_text_streaming_mode import SpeechToTextStreamingMode
+from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
 from .types.speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
 try:
@@ -30,7 +32,8 @@ class RawSpeechToTextStreamingClient:
         self,
         *,
         language_code: SpeechToTextStreamingLanguageCode,
-        model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
+        model: typing.Optional[SpeechToTextStreamingModel] = None,
+        mode: typing.Optional[SpeechToTextStreamingMode] = None,
         sample_rate: typing.Optional[str] = None,
         high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
         vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
@@ -48,11 +51,20 @@ class RawSpeechToTextStreamingClient:
         Parameters
         ----------
         language_code : SpeechToTextStreamingLanguageCode
-            Language code for speech recognition
+            Language code for speech recognition (BCP-47 format)
-        model : typing.Optional[typing.Literal["saarika:v2.5"]]
+        model : typing.Optional[SpeechToTextStreamingModel]
             Speech to text model to use
+        mode : typing.Optional[SpeechToTextStreamingMode]
+            Mode of operation for saaras:v3 model. Only applicable when model is 'saaras:v3'.
+            - transcribe: Standard Whisper transcription
+            - translate: Standard Whisper translation to English
+            - indic-en: Translate Indic languages to English
+            - verbatim: Exact transcription in original script
+            - translit: Transliteration to Latin script
+            - codemix: Code-mixed output (native + English)
         sample_rate : typing.Optional[str]
             Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
@@ -81,6 +93,8 @@ class RawSpeechToTextStreamingClient:
             query_params = query_params.add("language-code", language_code)
         if model is not None:
             query_params = query_params.add("model", model)
+        if mode is not None:
+            query_params = query_params.add("mode", mode)
         if sample_rate is not None:
             query_params = query_params.add("sample_rate", sample_rate)
         if high_vad_sensitivity is not None:
@@ -122,7 +136,8 @@ class AsyncRawSpeechToTextStreamingClient:
         self,
         *,
         language_code: SpeechToTextStreamingLanguageCode,
-        model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
+        model: typing.Optional[SpeechToTextStreamingModel] = None,
+        mode: typing.Optional[SpeechToTextStreamingMode] = None,
         sample_rate: typing.Optional[str] = None,
         high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
         vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
@@ -140,11 +155,20 @@ class AsyncRawSpeechToTextStreamingClient:
         Parameters
         ----------
         language_code : SpeechToTextStreamingLanguageCode
-            Language code for speech recognition
+            Language code for speech recognition (BCP-47 format)
-        model : typing.Optional[typing.Literal["saarika:v2.5"]]
+        model : typing.Optional[SpeechToTextStreamingModel]
             Speech to text model to use
+        mode : typing.Optional[SpeechToTextStreamingMode]
+            Mode of operation for saaras:v3 model. Only applicable when model is 'saaras:v3'.
+            - transcribe: Standard Whisper transcription
+            - translate: Standard Whisper translation to English
+            - indic-en: Translate Indic languages to English
+            - verbatim: Exact transcription in original script
+            - translit: Transliteration to Latin script
+            - codemix: Code-mixed output (native + English)
         sample_rate : typing.Optional[str]
             Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
@@ -173,6 +197,8 @@ class AsyncRawSpeechToTextStreamingClient:
             query_params = query_params.add("language-code", language_code)
         if model is not None:
             query_params = query_params.add("model", model)
+        if mode is not None:
+            query_params = query_params.add("mode", mode)
         if sample_rate is not None:
             query_params = query_params.add("sample_rate", sample_rate)
         if high_vad_sensitivity is not None:

sarvamai/speech_to_text_streaming/types/__init__.py CHANGED Viewed

@@ -2,46 +2,18 @@
 # isort: skip_file
-import typing
-from importlib import import_module
-if typing.TYPE_CHECKING:
-    from .speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
-    from .speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
-    from .speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
-    from .speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
-_dynamic_imports: typing.Dict[str, str] = {
-    "SpeechToTextStreamingFlushSignal": ".speech_to_text_streaming_flush_signal",
-    "SpeechToTextStreamingHighVadSensitivity": ".speech_to_text_streaming_high_vad_sensitivity",
-    "SpeechToTextStreamingLanguageCode": ".speech_to_text_streaming_language_code",
-    "SpeechToTextStreamingVadSignals": ".speech_to_text_streaming_vad_signals",
-}
-def __getattr__(attr_name: str) -> typing.Any:
-    module_name = _dynamic_imports.get(attr_name)
-    if module_name is None:
-        raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
-    try:
-        module = import_module(module_name, __package__)
-        if module_name == f".{attr_name}":
-            return module
-        else:
-            return getattr(module, attr_name)
-    except ImportError as e:
-        raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
-    except AttributeError as e:
-        raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
-def __dir__():
-    lazy_attrs = list(_dynamic_imports.keys())
-    return sorted(lazy_attrs)
+from .speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
+from .speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
+from .speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
+from .speech_to_text_streaming_mode import SpeechToTextStreamingMode
+from .speech_to_text_streaming_model import SpeechToTextStreamingModel
+from .speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
 __all__ = [
     "SpeechToTextStreamingFlushSignal",
     "SpeechToTextStreamingHighVadSensitivity",
     "SpeechToTextStreamingLanguageCode",
+    "SpeechToTextStreamingMode",
+    "SpeechToTextStreamingModel",
     "SpeechToTextStreamingVadSignals",
 ]

sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py ADDED Viewed

@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+SpeechToTextStreamingMode = typing.Union[
+    typing.Literal["transcribe", "translate", "indic-en", "verbatim", "translit", "codemix"], typing.Any
+]

sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py ADDED Viewed

@@ -0,0 +1,5 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+SpeechToTextStreamingModel = typing.Union[typing.Literal["saarika:v2.5", "saaras:v3"], typing.Any]

sarvamai 0.1.23a2__py3-none-any.whl → 0.1.23a4__py3-none-any.whl

sarvamai 0.1.23a2py3-none-any.whl → 0.1.23a4py3-none-any.whl