PyPI - sarvamai - Versions diffs - 0.1.23a3__py3-none-any.whl → 0.1.23a4__py3-none-any.whl - Mend

sarvamai 0.1.23a3py3-none-any.whl → 0.1.23a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

sarvamai/__init__.py +203 -405
sarvamai/chat/raw_client.py +20 -20
sarvamai/client.py +34 -186
sarvamai/core/__init__.py +21 -76
sarvamai/core/client_wrapper.py +3 -19
sarvamai/core/force_multipart.py +2 -4
sarvamai/core/http_client.py +97 -217
sarvamai/core/http_response.py +1 -1
sarvamai/core/jsonable_encoder.py +0 -8
sarvamai/core/pydantic_utilities.py +4 -110
sarvamai/errors/__init__.py +6 -40
sarvamai/errors/bad_request_error.py +1 -1
sarvamai/errors/forbidden_error.py +1 -1
sarvamai/errors/internal_server_error.py +1 -1
sarvamai/errors/service_unavailable_error.py +1 -1
sarvamai/errors/too_many_requests_error.py +1 -1
sarvamai/errors/unprocessable_entity_error.py +1 -1
sarvamai/requests/__init__.py +62 -150
sarvamai/requests/configure_connection.py +4 -0
sarvamai/requests/configure_connection_data.py +40 -11
sarvamai/requests/error_response_data.py +1 -1
sarvamai/requests/file_signed_url_details.py +1 -1
sarvamai/requests/speech_to_text_job_parameters.py +10 -1
sarvamai/requests/speech_to_text_transcription_data.py +2 -2
sarvamai/speech_to_text/client.py +29 -2
sarvamai/speech_to_text/raw_client.py +81 -56
sarvamai/speech_to_text_job/client.py +60 -15
sarvamai/speech_to_text_job/raw_client.py +120 -120
sarvamai/speech_to_text_streaming/__init__.py +10 -38
sarvamai/speech_to_text_streaming/client.py +32 -6
sarvamai/speech_to_text_streaming/raw_client.py +32 -6
sarvamai/speech_to_text_streaming/types/__init__.py +8 -36
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +7 -0
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
sarvamai/speech_to_text_translate_job/raw_client.py +120 -120
sarvamai/speech_to_text_translate_streaming/__init__.py +5 -36
sarvamai/speech_to_text_translate_streaming/types/__init__.py +3 -36
sarvamai/text/raw_client.py +60 -60
sarvamai/text_to_speech/client.py +100 -16
sarvamai/text_to_speech/raw_client.py +120 -36
sarvamai/text_to_speech_streaming/__init__.py +2 -29
sarvamai/text_to_speech_streaming/client.py +19 -6
sarvamai/text_to_speech_streaming/raw_client.py +19 -6
sarvamai/text_to_speech_streaming/types/__init__.py +3 -31
sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +5 -0
sarvamai/types/__init__.py +102 -222
sarvamai/types/chat_completion_request_message.py +2 -6
sarvamai/types/configure_connection.py +4 -0
sarvamai/types/configure_connection_data.py +40 -11
sarvamai/types/configure_connection_data_model.py +5 -0
sarvamai/types/configure_connection_data_speaker.py +35 -1
sarvamai/types/error_response_data.py +1 -1
sarvamai/types/file_signed_url_details.py +1 -1
sarvamai/types/mode.py +7 -0
sarvamai/types/speech_to_text_job_parameters.py +10 -1
sarvamai/types/speech_to_text_model.py +3 -1
sarvamai/types/speech_to_text_transcription_data.py +2 -2
sarvamai/types/speech_to_text_translate_model.py +1 -1
sarvamai/types/text_to_speech_model.py +1 -1
sarvamai/types/text_to_speech_speaker.py +35 -1
{sarvamai-0.1.23a3.dist-info → sarvamai-0.1.23a4.dist-info}/METADATA +1 -2
{sarvamai-0.1.23a3.dist-info → sarvamai-0.1.23a4.dist-info}/RECORD +63 -63
sarvamai/core/http_sse/__init__.py +0 -42
sarvamai/core/http_sse/_api.py +0 -112
sarvamai/core/http_sse/_decoders.py +0 -61
sarvamai/core/http_sse/_exceptions.py +0 -7
sarvamai/core/http_sse/_models.py +0 -17
{sarvamai-0.1.23a3.dist-info → sarvamai-0.1.23a4.dist-info}/WHEEL +0 -0

sarvamai/errors/__init__.py CHANGED Viewed

@@ -2,46 +2,12 @@
 # isort: skip_file
-import typing
-from importlib import import_module
-if typing.TYPE_CHECKING:
-    from .bad_request_error import BadRequestError
-    from .forbidden_error import ForbiddenError
-    from .internal_server_error import InternalServerError
-    from .service_unavailable_error import ServiceUnavailableError
-    from .too_many_requests_error import TooManyRequestsError
-    from .unprocessable_entity_error import UnprocessableEntityError
-_dynamic_imports: typing.Dict[str, str] = {
-    "BadRequestError": ".bad_request_error",
-    "ForbiddenError": ".forbidden_error",
-    "InternalServerError": ".internal_server_error",
-    "ServiceUnavailableError": ".service_unavailable_error",
-    "TooManyRequestsError": ".too_many_requests_error",
-    "UnprocessableEntityError": ".unprocessable_entity_error",
-}
-def __getattr__(attr_name: str) -> typing.Any:
-    module_name = _dynamic_imports.get(attr_name)
-    if module_name is None:
-        raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
-    try:
-        module = import_module(module_name, __package__)
-        if module_name == f".{attr_name}":
-            return module
-        else:
-            return getattr(module, attr_name)
-    except ImportError as e:
-        raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
-    except AttributeError as e:
-        raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
-def __dir__():
-    lazy_attrs = list(_dynamic_imports.keys())
-    return sorted(lazy_attrs)
+from .bad_request_error import BadRequestError
+from .forbidden_error import ForbiddenError
+from .internal_server_error import InternalServerError
+from .service_unavailable_error import ServiceUnavailableError
+from .too_many_requests_error import TooManyRequestsError
+from .unprocessable_entity_error import UnprocessableEntityError
 __all__ = [
     "BadRequestError",

sarvamai/errors/bad_request_error.py CHANGED Viewed

@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
 class BadRequestError(ApiError):
-    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
+    def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
         super().__init__(status_code=400, headers=headers, body=body)

sarvamai/errors/forbidden_error.py CHANGED Viewed

@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
 class ForbiddenError(ApiError):
-    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
+    def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
         super().__init__(status_code=403, headers=headers, body=body)

sarvamai/errors/internal_server_error.py CHANGED Viewed

@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
 class InternalServerError(ApiError):
-    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
+    def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
         super().__init__(status_code=500, headers=headers, body=body)

sarvamai/errors/service_unavailable_error.py CHANGED Viewed

@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
 class ServiceUnavailableError(ApiError):
-    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
+    def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
         super().__init__(status_code=503, headers=headers, body=body)

sarvamai/errors/too_many_requests_error.py CHANGED Viewed

@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
 class TooManyRequestsError(ApiError):
-    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
+    def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
         super().__init__(status_code=429, headers=headers, body=body)

sarvamai/errors/unprocessable_entity_error.py CHANGED Viewed

@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
 class UnprocessableEntityError(ApiError):
-    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
+    def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
         super().__init__(status_code=422, headers=headers, body=body)

sarvamai/requests/__init__.py CHANGED Viewed

@@ -2,156 +2,68 @@
 # isort: skip_file
-import typing
-from importlib import import_module
-if typing.TYPE_CHECKING:
-    from .audio_data import AudioDataParams
-    from .audio_message import AudioMessageParams
-    from .audio_output import AudioOutputParams
-    from .audio_output_data import AudioOutputDataParams
-    from .base_job_parameters import BaseJobParametersParams
-    from .bulk_job_callback import BulkJobCallbackParams
-    from .bulk_job_init_response_v_1 import BulkJobInitResponseV1Params
-    from .chat_completion_request_assistant_message import ChatCompletionRequestAssistantMessageParams
-    from .chat_completion_request_message import (
-        ChatCompletionRequestMessageParams,
-        ChatCompletionRequestMessage_AssistantParams,
-        ChatCompletionRequestMessage_SystemParams,
-        ChatCompletionRequestMessage_UserParams,
-    )
-    from .chat_completion_request_system_message import ChatCompletionRequestSystemMessageParams
-    from .chat_completion_request_user_message import ChatCompletionRequestUserMessageParams
-    from .chat_completion_response_message import ChatCompletionResponseMessageParams
-    from .choice import ChoiceParams
-    from .completion_usage import CompletionUsageParams
-    from .config_message import ConfigMessageParams
-    from .configure_connection import ConfigureConnectionParams
-    from .configure_connection_data import ConfigureConnectionDataParams
-    from .create_chat_completion_response import CreateChatCompletionResponseParams
-    from .diarized_entry import DiarizedEntryParams
-    from .diarized_transcript import DiarizedTranscriptParams
-    from .error_data import ErrorDataParams
-    from .error_details import ErrorDetailsParams
-    from .error_message import ErrorMessageParams
-    from .error_response import ErrorResponseParams
-    from .error_response_data import ErrorResponseDataParams
-    from .event_response import EventResponseParams
-    from .event_response_data import EventResponseDataParams
-    from .events_data import EventsDataParams
-    from .file_signed_url_details import FileSignedUrlDetailsParams
-    from .files_download_response import FilesDownloadResponseParams
-    from .files_request import FilesRequestParams
-    from .files_upload_response import FilesUploadResponseParams
-    from .flush_signal import FlushSignalParams
-    from .job_status_v_1_response import JobStatusV1ResponseParams
-    from .language_identification_response import LanguageIdentificationResponseParams
-    from .ping_signal import PingSignalParams
-    from .send_text import SendTextParams
-    from .send_text_data import SendTextDataParams
-    from .speech_to_text_job_parameters import SpeechToTextJobParametersParams
-    from .speech_to_text_response import SpeechToTextResponseParams
-    from .speech_to_text_response_data import SpeechToTextResponseDataParams
-    from .speech_to_text_streaming_response import SpeechToTextStreamingResponseParams
-    from .speech_to_text_transcription_data import SpeechToTextTranscriptionDataParams
-    from .speech_to_text_translate_job_parameters import SpeechToTextTranslateJobParametersParams
-    from .speech_to_text_translate_response import SpeechToTextTranslateResponseParams
-    from .speech_to_text_translate_response_data import SpeechToTextTranslateResponseDataParams
-    from .speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponseParams
-    from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTranscriptionDataParams
-    from .stop_configuration import StopConfigurationParams
-    from .stt_flush_signal import SttFlushSignalParams
-    from .task_detail_v_1 import TaskDetailV1Params
-    from .task_file_details import TaskFileDetailsParams
-    from .text_to_speech_response import TextToSpeechResponseParams
-    from .timestamps_model import TimestampsModelParams
-    from .transcription_metrics import TranscriptionMetricsParams
-    from .translation_response import TranslationResponseParams
-    from .transliteration_response import TransliterationResponseParams
-_dynamic_imports: typing.Dict[str, str] = {
-    "AudioDataParams": ".audio_data",
-    "AudioMessageParams": ".audio_message",
-    "AudioOutputDataParams": ".audio_output_data",
-    "AudioOutputParams": ".audio_output",
-    "BaseJobParametersParams": ".base_job_parameters",
-    "BulkJobCallbackParams": ".bulk_job_callback",
-    "BulkJobInitResponseV1Params": ".bulk_job_init_response_v_1",
-    "ChatCompletionRequestAssistantMessageParams": ".chat_completion_request_assistant_message",
-    "ChatCompletionRequestMessageParams": ".chat_completion_request_message",
-    "ChatCompletionRequestMessage_AssistantParams": ".chat_completion_request_message",
-    "ChatCompletionRequestMessage_SystemParams": ".chat_completion_request_message",
-    "ChatCompletionRequestMessage_UserParams": ".chat_completion_request_message",
-    "ChatCompletionRequestSystemMessageParams": ".chat_completion_request_system_message",
-    "ChatCompletionRequestUserMessageParams": ".chat_completion_request_user_message",
-    "ChatCompletionResponseMessageParams": ".chat_completion_response_message",
-    "ChoiceParams": ".choice",
-    "CompletionUsageParams": ".completion_usage",
-    "ConfigMessageParams": ".config_message",
-    "ConfigureConnectionDataParams": ".configure_connection_data",
-    "ConfigureConnectionParams": ".configure_connection",
-    "CreateChatCompletionResponseParams": ".create_chat_completion_response",
-    "DiarizedEntryParams": ".diarized_entry",
-    "DiarizedTranscriptParams": ".diarized_transcript",
-    "ErrorDataParams": ".error_data",
-    "ErrorDetailsParams": ".error_details",
-    "ErrorMessageParams": ".error_message",
-    "ErrorResponseDataParams": ".error_response_data",
-    "ErrorResponseParams": ".error_response",
-    "EventResponseDataParams": ".event_response_data",
-    "EventResponseParams": ".event_response",
-    "EventsDataParams": ".events_data",
-    "FileSignedUrlDetailsParams": ".file_signed_url_details",
-    "FilesDownloadResponseParams": ".files_download_response",
-    "FilesRequestParams": ".files_request",
-    "FilesUploadResponseParams": ".files_upload_response",
-    "FlushSignalParams": ".flush_signal",
-    "JobStatusV1ResponseParams": ".job_status_v_1_response",
-    "LanguageIdentificationResponseParams": ".language_identification_response",
-    "PingSignalParams": ".ping_signal",
-    "SendTextDataParams": ".send_text_data",
-    "SendTextParams": ".send_text",
-    "SpeechToTextJobParametersParams": ".speech_to_text_job_parameters",
-    "SpeechToTextResponseDataParams": ".speech_to_text_response_data",
-    "SpeechToTextResponseParams": ".speech_to_text_response",
-    "SpeechToTextStreamingResponseParams": ".speech_to_text_streaming_response",
-    "SpeechToTextTranscriptionDataParams": ".speech_to_text_transcription_data",
-    "SpeechToTextTranslateJobParametersParams": ".speech_to_text_translate_job_parameters",
-    "SpeechToTextTranslateResponseDataParams": ".speech_to_text_translate_response_data",
-    "SpeechToTextTranslateResponseParams": ".speech_to_text_translate_response",
-    "SpeechToTextTranslateStreamingResponseParams": ".speech_to_text_translate_streaming_response",
-    "SpeechToTextTranslateTranscriptionDataParams": ".speech_to_text_translate_transcription_data",
-    "StopConfigurationParams": ".stop_configuration",
-    "SttFlushSignalParams": ".stt_flush_signal",
-    "TaskDetailV1Params": ".task_detail_v_1",
-    "TaskFileDetailsParams": ".task_file_details",
-    "TextToSpeechResponseParams": ".text_to_speech_response",
-    "TimestampsModelParams": ".timestamps_model",
-    "TranscriptionMetricsParams": ".transcription_metrics",
-    "TranslationResponseParams": ".translation_response",
-    "TransliterationResponseParams": ".transliteration_response",
-}
-def __getattr__(attr_name: str) -> typing.Any:
-    module_name = _dynamic_imports.get(attr_name)
-    if module_name is None:
-        raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
-    try:
-        module = import_module(module_name, __package__)
-        if module_name == f".{attr_name}":
-            return module
-        else:
-            return getattr(module, attr_name)
-    except ImportError as e:
-        raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
-    except AttributeError as e:
-        raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
-def __dir__():
-    lazy_attrs = list(_dynamic_imports.keys())
-    return sorted(lazy_attrs)
+from .audio_data import AudioDataParams
+from .audio_message import AudioMessageParams
+from .audio_output import AudioOutputParams
+from .audio_output_data import AudioOutputDataParams
+from .base_job_parameters import BaseJobParametersParams
+from .bulk_job_callback import BulkJobCallbackParams
+from .bulk_job_init_response_v_1 import BulkJobInitResponseV1Params
+from .chat_completion_request_assistant_message import ChatCompletionRequestAssistantMessageParams
+from .chat_completion_request_message import (
+    ChatCompletionRequestMessageParams,
+    ChatCompletionRequestMessage_AssistantParams,
+    ChatCompletionRequestMessage_SystemParams,
+    ChatCompletionRequestMessage_UserParams,
+)
+from .chat_completion_request_system_message import ChatCompletionRequestSystemMessageParams
+from .chat_completion_request_user_message import ChatCompletionRequestUserMessageParams
+from .chat_completion_response_message import ChatCompletionResponseMessageParams
+from .choice import ChoiceParams
+from .completion_usage import CompletionUsageParams
+from .config_message import ConfigMessageParams
+from .configure_connection import ConfigureConnectionParams
+from .configure_connection_data import ConfigureConnectionDataParams
+from .create_chat_completion_response import CreateChatCompletionResponseParams
+from .diarized_entry import DiarizedEntryParams
+from .diarized_transcript import DiarizedTranscriptParams
+from .error_data import ErrorDataParams
+from .error_details import ErrorDetailsParams
+from .error_message import ErrorMessageParams
+from .error_response import ErrorResponseParams
+from .error_response_data import ErrorResponseDataParams
+from .event_response import EventResponseParams
+from .event_response_data import EventResponseDataParams
+from .events_data import EventsDataParams
+from .file_signed_url_details import FileSignedUrlDetailsParams
+from .files_download_response import FilesDownloadResponseParams
+from .files_request import FilesRequestParams
+from .files_upload_response import FilesUploadResponseParams
+from .flush_signal import FlushSignalParams
+from .job_status_v_1_response import JobStatusV1ResponseParams
+from .language_identification_response import LanguageIdentificationResponseParams
+from .ping_signal import PingSignalParams
+from .send_text import SendTextParams
+from .send_text_data import SendTextDataParams
+from .speech_to_text_job_parameters import SpeechToTextJobParametersParams
+from .speech_to_text_response import SpeechToTextResponseParams
+from .speech_to_text_response_data import SpeechToTextResponseDataParams
+from .speech_to_text_streaming_response import SpeechToTextStreamingResponseParams
+from .speech_to_text_transcription_data import SpeechToTextTranscriptionDataParams
+from .speech_to_text_translate_job_parameters import SpeechToTextTranslateJobParametersParams
+from .speech_to_text_translate_response import SpeechToTextTranslateResponseParams
+from .speech_to_text_translate_response_data import SpeechToTextTranslateResponseDataParams
+from .speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponseParams
+from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTranscriptionDataParams
+from .stop_configuration import StopConfigurationParams
+from .stt_flush_signal import SttFlushSignalParams
+from .task_detail_v_1 import TaskDetailV1Params
+from .task_file_details import TaskFileDetailsParams
+from .text_to_speech_response import TextToSpeechResponseParams
+from .timestamps_model import TimestampsModelParams
+from .transcription_metrics import TranscriptionMetricsParams
+from .translation_response import TranslationResponseParams
+from .transliteration_response import TransliterationResponseParams
 __all__ = [
     "AudioDataParams",

sarvamai/requests/configure_connection.py CHANGED Viewed

@@ -12,6 +12,10 @@ class ConfigureConnectionParams(typing_extensions.TypedDict):
     This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
     by sending a new config message. When a config update is sent, any text currently in the buffer
     will be automatically flushed and processed before applying the new configuration.
+    **Model-Specific Notes:**
+    - **bulbul:v2:** Supports pitch, loudness, pace (0.3-3.0). Default sample rate: 22050 Hz.
+    - **bulbul:v3-beta:** Does NOT support pitch/loudness. Pace range: 0.5-2.0. Supports temperature. Default sample rate: 24000 Hz.
     """
     type: typing.Literal["config"]

sarvamai/requests/configure_connection_data.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # This file was auto-generated by Fern from our API Definition.
 import typing_extensions
+from ..types.configure_connection_data_model import ConfigureConnectionDataModel
 from ..types.configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
 from ..types.configure_connection_data_output_audio_codec import ConfigureConnectionDataOutputAudioCodec
 from ..types.configure_connection_data_speaker import ConfigureConnectionDataSpeaker
@@ -8,21 +9,25 @@ from ..types.configure_connection_data_target_language_code import ConfigureConn
 class ConfigureConnectionDataParams(typing_extensions.TypedDict):
+    model: typing_extensions.NotRequired[ConfigureConnectionDataModel]
+    """
+    Specifies the model to use for text-to-speech conversion.
+    - **bulbul:v2** (default): Standard TTS model with pitch/loudness support
+    - **bulbul:v3-beta**: Advanced model with temperature control (no pitch/loudness)
+    """
     target_language_code: ConfigureConnectionDataTargetLanguageCode
     """
-    The language of the text is BCP-47 format
+    The language of the text in BCP-47 format
     """
     speaker: ConfigureConnectionDataSpeaker
     """
     The speaker voice to be used for the output audio.
-    **Default:** Anushka
-    **Model Compatibility (Speakers compatible with respective model):**
-    - **bulbul:v2:**
-      - Female: Anushka, Manisha, Vidya, Arya
-      - Male: Abhilash, Karun, Hitesh
+    **Model Compatibility:**
+    - **bulbul:v2:** anushka (default), abhilash, manisha, vidya, arya, karun, hitesh
+    - **bulbul:v3-beta:** aditya (default), ritu, priya, neha, rahul, pooja, rohan, simran, kavya, amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir, aayan, shubh, ashutosh, advait, amelia, sophia
     **Note:** Speaker selection must match the chosen model version.
     """
@@ -32,13 +37,18 @@ class ConfigureConnectionDataParams(typing_extensions.TypedDict):
     Controls the pitch of the audio. Lower values result in a deeper voice,
     while higher values make it sharper. The suitable range is between -0.75
     and 0.75. Default is 0.0.
+    **Note:** NOT supported for bulbul:v3-beta. Will be ignored if provided.
     """
     pace: typing_extensions.NotRequired[float]
     """
     Controls the speed of the audio. Lower values result in slower speech,
-    while higher values make it faster. The suitable range is between 0.5
-    and 2.0. Default is 1.0.
+    while higher values make it faster. Default is 1.0.
+    **Model-specific ranges:**
+    - **bulbul:v2:** 0.3 to 3.0
+    - **bulbul:v3-beta:** 0.5 to 2.0
     """
     loudness: typing_extensions.NotRequired[float]
@@ -46,19 +56,38 @@ class ConfigureConnectionDataParams(typing_extensions.TypedDict):
     Controls the loudness of the audio. Lower values result in quieter audio,
     while higher values make it louder. The suitable range is between 0.3
     and 3.0. Default is 1.0.
+    **Note:** NOT supported for bulbul:v3-beta. Will be ignored if provided.
+    """
+    temperature: typing_extensions.NotRequired[float]
+    """
+    Controls the randomness of the output. Lower values make the output more
+    focused and deterministic, while higher values make it more random.
+    The suitable range is between 0.01 and 1.0. Default is 0.6.
+    **Note:** Only supported for bulbul:v3-beta. Will be ignored for bulbul:v2.
     """
     speech_sample_rate: typing_extensions.NotRequired[int]
     """
     Specifies the sample rate of the output audio. Supported values are
-    8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
+    8000, 16000, 22050, 24000 Hz.
+    **Model-specific defaults:**
+    - **bulbul:v2:** 22050 Hz
+    - **bulbul:v3-beta:** 24000 Hz
     """
     enable_preprocessing: typing_extensions.NotRequired[bool]
     """
     Controls whether normalization of English words and numeric entities
     (e.g., numbers, dates) is performed. Set to true for better handling
-    of mixed-language text. Default is false.
+    of mixed-language text.
+    **Model-specific defaults:**
+    - **bulbul:v2:** false (optional)
+    - **bulbul:v3-beta:** Always enabled (cannot be disabled)
     """
     output_audio_codec: typing_extensions.NotRequired[ConfigureConnectionDataOutputAudioCodec]

sarvamai/requests/error_response_data.py CHANGED Viewed

@@ -12,7 +12,7 @@ class ErrorResponseDataParams(typing_extensions.TypedDict):
     Optional error code for programmatic error handling
     """
-    details: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
+    details: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
     """
     Additional error details and context information
     """

sarvamai/requests/file_signed_url_details.py CHANGED Viewed

@@ -7,4 +7,4 @@ import typing_extensions
 class FileSignedUrlDetailsParams(typing_extensions.TypedDict):
     file_url: str
-    file_metadata: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
+    file_metadata: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]

sarvamai/requests/speech_to_text_job_parameters.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # This file was auto-generated by Fern from our API Definition.
 import typing_extensions
+from ..types.mode import Mode
 from ..types.speech_to_text_model import SpeechToTextModel
 from ..types.speech_to_text_translate_language import SpeechToTextTranslateLanguage
@@ -13,7 +14,15 @@ class SpeechToTextJobParametersParams(typing_extensions.TypedDict):
     model: typing_extensions.NotRequired[SpeechToTextModel]
     """
-    Model to be used for speech to text
+    Model to be used for speech to text.
+    - **saarika:v2.5** (default)
+    - **saarika:v3**: Advanced transcription model
+    - **saaras:v3**: Advanced model with multiple modes
+    """
+    mode: typing_extensions.NotRequired[Mode]
+    """
+    Mode of operation. Only applicable for saaras:v3 model.
     """
     with_timestamps: typing_extensions.NotRequired[bool]

sarvamai/requests/speech_to_text_transcription_data.py CHANGED Viewed

@@ -17,12 +17,12 @@ class SpeechToTextTranscriptionDataParams(typing_extensions.TypedDict):
     Transcript of the provided speech in original language
     """
-    timestamps: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
+    timestamps: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
     """
     Timestamp information (if available)
     """
-    diarized_transcript: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
+    diarized_transcript: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
     """
     Diarized transcript of the provided speech
     """

sarvamai/speech_to_text/client.py CHANGED Viewed

@@ -6,6 +6,7 @@ from .. import core
 from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
 from ..core.request_options import RequestOptions
 from ..types.input_audio_codec import InputAudioCodec
+from ..types.mode import Mode
 from ..types.speech_to_text_language import SpeechToTextLanguage
 from ..types.speech_to_text_model import SpeechToTextModel
 from ..types.speech_to_text_response import SpeechToTextResponse
@@ -37,6 +38,7 @@ class SpeechToTextClient:
         *,
         file: core.File,
         model: typing.Optional[SpeechToTextModel] = OMIT,
+        mode: typing.Optional[Mode] = OMIT,
         language_code: typing.Optional[SpeechToTextLanguage] = OMIT,
         input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
@@ -63,7 +65,18 @@ class SpeechToTextClient:
         model : typing.Optional[SpeechToTextModel]
             Specifies the model to use for speech-to-text conversion.
-            Note:- Default model is `saarika:v2.5`
+            - **saarika:v2.5** (default): Standard transcription model
+            - **saarika:v3**: Advanced transcription model
+            - **saaras:v3**: Advanced model with multiple output modes
+        mode : typing.Optional[Mode]
+            Mode of operation. **Only applicable when using saaras:v3 model.**
+            - **transcribe** (default): Standard transcription
+            - **translate**: Translation to English
+            - **indic-en**: Indic to English translation
+            - **verbatim**: Exact transcription
+            - **translit**: Transliteration to Latin script
+            - **codemix**: Code-mixed output
         language_code : typing.Optional[SpeechToTextLanguage]
             Specifies the language of the input audio.
@@ -93,6 +106,7 @@ class SpeechToTextClient:
         _response = self._raw_client.transcribe(
             file=file,
             model=model,
+            mode=mode,
             language_code=language_code,
             input_audio_codec=input_audio_codec,
             request_options=request_options,
@@ -180,6 +194,7 @@ class AsyncSpeechToTextClient:
         *,
         file: core.File,
         model: typing.Optional[SpeechToTextModel] = OMIT,
+        mode: typing.Optional[Mode] = OMIT,
         language_code: typing.Optional[SpeechToTextLanguage] = OMIT,
         input_audio_codec: typing.Optional[InputAudioCodec] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
@@ -206,7 +221,18 @@ class AsyncSpeechToTextClient:
         model : typing.Optional[SpeechToTextModel]
             Specifies the model to use for speech-to-text conversion.
-            Note:- Default model is `saarika:v2.5`
+            - **saarika:v2.5** (default): Standard transcription model
+            - **saarika:v3**: Advanced transcription model
+            - **saaras:v3**: Advanced model with multiple output modes
+        mode : typing.Optional[Mode]
+            Mode of operation. **Only applicable when using saaras:v3 model.**
+            - **transcribe** (default): Standard transcription
+            - **translate**: Translation to English
+            - **indic-en**: Indic to English translation
+            - **verbatim**: Exact transcription
+            - **translit**: Transliteration to Latin script
+            - **codemix**: Code-mixed output
         language_code : typing.Optional[SpeechToTextLanguage]
             Specifies the language of the input audio.
@@ -244,6 +270,7 @@ class AsyncSpeechToTextClient:
         _response = await self._raw_client.transcribe(
             file=file,
             model=model,
+            mode=mode,
             language_code=language_code,
             input_audio_codec=input_audio_codec,
             request_options=request_options,

sarvamai 0.1.23a3__py3-none-any.whl → 0.1.23a4__py3-none-any.whl

sarvamai 0.1.23a3py3-none-any.whl → 0.1.23a4py3-none-any.whl