PyPI - sarvamai - Versions diffs - 0.1.8rc5__tar.gz → 0.1.8rc7__tar.gz - Mend

sarvamai 0.1.8rc5tar.gz → 0.1.8rc7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

{sarvamai-0.1.8rc5 → sarvamai-0.1.8rc7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sarvamai
-Version: 0.1.8rc5
+Version: 0.1.8rc7
 Summary:
 Requires-Python: >=3.8,<4.0
 Classifier: Intended Audience :: Developers

{sarvamai-0.1.8rc5 → sarvamai-0.1.8rc7}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "sarvamai"
 [tool.poetry]
 name = "sarvamai"
-version = "0.1.8rc5"
+version = "0.1.8rc7"
 description = ""
 readme = "README.md"
 authors = []

{sarvamai-0.1.8rc5 → sarvamai-0.1.8rc7}/src/sarvamai/__init__.py RENAMED Viewed

@@ -18,6 +18,11 @@ from .types import (
     Choice,
     CompletionUsage,
     ConfigMessage,
+    ConfigureConnection,
+    ConfigureConnectionData,
+    ConfigureConnectionDataOutputAudioBitrate,
+    ConfigureConnectionDataSpeaker,
+    ConfigureConnectionDataTargetLanguageCode,
     CreateChatCompletionResponse,
     DiarizedEntry,
     DiarizedTranscript,
@@ -31,11 +36,6 @@ from .types import (
     FinishReason,
     FlushSignal,
     Format,
-    InitializeConnection,
-    InitializeConnectionData,
-    InitializeConnectionDataOutputAudioBitrate,
-    InitializeConnectionDataSpeaker,
-    InitializeConnectionDataTargetLanguageCode,
     LanguageIdentificationResponse,
     NumeralsFormat,
     PingSignal,
@@ -112,6 +112,8 @@ from .requests import (
     ChoiceParams,
     CompletionUsageParams,
     ConfigMessageParams,
+    ConfigureConnectionDataParams,
+    ConfigureConnectionParams,
     CreateChatCompletionResponseParams,
     DiarizedEntryParams,
     DiarizedTranscriptParams,
@@ -122,8 +124,6 @@ from .requests import (
     ErrorResponseParams,
     EventsDataParams,
     FlushSignalParams,
-    InitializeConnectionDataParams,
-    InitializeConnectionParams,
     LanguageIdentificationResponseParams,
     PingSignalParams,
     SendTextDataParams,
@@ -154,7 +154,6 @@ from .speech_to_text_translate_streaming import (
     SpeechToTextTranslateStreamingModel,
     SpeechToTextTranslateStreamingVadSignals,
 )
-from .text_to_speech_streaming import TextToSpeechStreamingModel
 from .version import __version__
 __all__ = [
@@ -190,6 +189,13 @@ __all__ = [
     "CompletionUsageParams",
     "ConfigMessage",
     "ConfigMessageParams",
+    "ConfigureConnection",
+    "ConfigureConnectionData",
+    "ConfigureConnectionDataOutputAudioBitrate",
+    "ConfigureConnectionDataParams",
+    "ConfigureConnectionDataSpeaker",
+    "ConfigureConnectionDataTargetLanguageCode",
+    "ConfigureConnectionParams",
     "CreateChatCompletionResponse",
     "CreateChatCompletionResponseParams",
     "DiarizedEntry",
@@ -214,13 +220,6 @@ __all__ = [
     "FlushSignalParams",
     "ForbiddenError",
     "Format",
-    "InitializeConnection",
-    "InitializeConnectionData",
-    "InitializeConnectionDataOutputAudioBitrate",
-    "InitializeConnectionDataParams",
-    "InitializeConnectionDataSpeaker",
-    "InitializeConnectionDataTargetLanguageCode",
-    "InitializeConnectionParams",
     "InternalServerError",
     "LanguageIdentificationResponse",
     "LanguageIdentificationResponseParams",
@@ -274,7 +273,6 @@ __all__ = [
     "TextToSpeechResponse",
     "TextToSpeechResponseParams",
     "TextToSpeechSpeaker",
-    "TextToSpeechStreamingModel",
     "TimestampsModel",
     "TimestampsModelParams",
     "TooManyRequestsError",

{sarvamai-0.1.8rc5 → sarvamai-0.1.8rc7}/src/sarvamai/core/client_wrapper.py RENAMED Viewed

@@ -17,10 +17,10 @@ class BaseClientWrapper:
     def get_headers(self) -> typing.Dict[str, str]:
         headers: typing.Dict[str, str] = {
-            "User-Agent": "sarvamai/0.1.8rc5",
+            "User-Agent": "sarvamai/0.1.8rc7",
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "sarvamai",
-            "X-Fern-SDK-Version": "0.1.8rc5",
+            "X-Fern-SDK-Version": "0.1.8rc7",
         }
         headers["api-subscription-key"] = self.api_subscription_key
         return headers

{sarvamai-0.1.8rc5 → sarvamai-0.1.8rc7}/src/sarvamai/requests/__init__.py RENAMED Viewed

@@ -19,6 +19,8 @@ from .chat_completion_response_message import ChatCompletionResponseMessageParam
 from .choice import ChoiceParams
 from .completion_usage import CompletionUsageParams
 from .config_message import ConfigMessageParams
+from .configure_connection import ConfigureConnectionParams
+from .configure_connection_data import ConfigureConnectionDataParams
 from .create_chat_completion_response import CreateChatCompletionResponseParams
 from .diarized_entry import DiarizedEntryParams
 from .diarized_transcript import DiarizedTranscriptParams
@@ -29,8 +31,6 @@ from .error_response import ErrorResponseParams
 from .error_response_data import ErrorResponseDataParams
 from .events_data import EventsDataParams
 from .flush_signal import FlushSignalParams
-from .initialize_connection import InitializeConnectionParams
-from .initialize_connection_data import InitializeConnectionDataParams
 from .language_identification_response import LanguageIdentificationResponseParams
 from .ping_signal import PingSignalParams
 from .send_text import SendTextParams
@@ -66,6 +66,8 @@ __all__ = [
     "ChoiceParams",
     "CompletionUsageParams",
     "ConfigMessageParams",
+    "ConfigureConnectionDataParams",
+    "ConfigureConnectionParams",
     "CreateChatCompletionResponseParams",
     "DiarizedEntryParams",
     "DiarizedTranscriptParams",
@@ -76,8 +78,6 @@ __all__ = [
     "ErrorResponseParams",
     "EventsDataParams",
     "FlushSignalParams",
-    "InitializeConnectionDataParams",
-    "InitializeConnectionParams",
     "LanguageIdentificationResponseParams",
     "PingSignalParams",
     "SendTextDataParams",

sarvamai-0.1.8rc7/src/sarvamai/requests/configure_connection.py ADDED Viewed

@@ -0,0 +1,18 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+import typing_extensions
+from .configure_connection_data import ConfigureConnectionDataParams
+class ConfigureConnectionParams(typing_extensions.TypedDict):
+    """
+    Configuration message required as the first message after establishing the WebSocket connection.
+    This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
+    by sending a new config message. When a config update is sent, any text currently in the buffer
+    will be automatically flushed and processed before applying the new configuration.
+    """
+    type: typing.Literal["config"]
+    data: ConfigureConnectionDataParams

sarvamai-0.1.8rc7/src/sarvamai/requests/configure_connection_data.py ADDED Viewed

@@ -0,0 +1,83 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+import typing_extensions
+from ..types.configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
+from ..types.configure_connection_data_speaker import ConfigureConnectionDataSpeaker
+from ..types.configure_connection_data_target_language_code import ConfigureConnectionDataTargetLanguageCode
+class ConfigureConnectionDataParams(typing_extensions.TypedDict):
+    target_language_code: ConfigureConnectionDataTargetLanguageCode
+    """
+    The language of the text is BCP-47 format
+    """
+    speaker: ConfigureConnectionDataSpeaker
+    """
+    The speaker voice to be used for the output audio.
+    **Default:** Anushka
+    **Model Compatibility (Speakers compatible with respective model):**
+    - **bulbul:v2:**
+      - Female: Anushka, Manisha, Vidya, Arya
+      - Male: Abhilash, Karun, Hitesh
+    **Note:** Speaker selection must match the chosen model version.
+    """
+    pitch: typing_extensions.NotRequired[float]
+    """
+    Controls the pitch of the audio. Lower values result in a deeper voice,
+    while higher values make it sharper. The suitable range is between -0.75
+    and 0.75. Default is 0.0.
+    """
+    pace: typing_extensions.NotRequired[float]
+    """
+    Controls the speed of the audio. Lower values result in slower speech,
+    while higher values make it faster. The suitable range is between 0.5
+    and 2.0. Default is 1.0.
+    """
+    loudness: typing_extensions.NotRequired[float]
+    """
+    Controls the loudness of the audio. Lower values result in quieter audio,
+    while higher values make it louder. The suitable range is between 0.3
+    and 3.0. Default is 1.0.
+    """
+    speech_sample_rate: typing_extensions.NotRequired[int]
+    """
+    Specifies the sample rate of the output audio. Supported values are
+    8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
+    """
+    enable_preprocessing: typing_extensions.NotRequired[bool]
+    """
+    Controls whether normalization of English words and numeric entities
+    (e.g., numbers, dates) is performed. Set to true for better handling
+    of mixed-language text. Default is false.
+    """
+    output_audio_codec: typing_extensions.NotRequired[typing.Literal["mp3"]]
+    """
+    Audio codec (currently supports MP3 only, optimized for real-time playback)
+    """
+    output_audio_bitrate: typing_extensions.NotRequired[ConfigureConnectionDataOutputAudioBitrate]
+    """
+    Audio bitrate (choose from 5 supported bitrate options)
+    """
+    min_buffer_size: typing_extensions.NotRequired[int]
+    """
+    Minimum character length that triggers buffer flushing for TTS model processing
+    """
+    max_chunk_length: typing_extensions.NotRequired[int]
+    """
+    Maximum length for sentence splitting (adjust based on content length)
+    """

sarvamai-0.1.8rc7/src/sarvamai/requests/flush_signal.py ADDED Viewed

@@ -0,0 +1,14 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+import typing_extensions
+class FlushSignalParams(typing_extensions.TypedDict):
+    """
+    Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
+    Use this when you need to process remaining text that hasn't reached the minimum buffer size.
+    """
+    type: typing.Literal["flush"]

{sarvamai-0.1.8rc5 → sarvamai-0.1.8rc7}/src/sarvamai/requests/ping_signal.py RENAMED Viewed

@@ -6,4 +6,9 @@ import typing_extensions
 class PingSignalParams(typing_extensions.TypedDict):
+    """
+    Send ping signal to keep the WebSocket connection alive. The connection automatically
+    closes after one minute of inactivity.
+    """
     type: typing.Literal["ping"]

sarvamai-0.1.8rc7/src/sarvamai/text_to_speech_streaming/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+# This file was auto-generated by Fern from our API Definition.
+# isort: skip_file

{sarvamai-0.1.8rc5 → sarvamai-0.1.8rc7}/src/sarvamai/text_to_speech_streaming/client.py RENAMED Viewed

@@ -11,7 +11,6 @@ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
 from ..core.request_options import RequestOptions
 from .raw_client import AsyncRawTextToSpeechStreamingClient, RawTextToSpeechStreamingClient
 from .socket_client import AsyncTextToSpeechStreamingSocketClient, TextToSpeechStreamingSocketClient
-from .types.text_to_speech_streaming_model import TextToSpeechStreamingModel
 class TextToSpeechStreamingClient:
@@ -33,7 +32,7 @@ class TextToSpeechStreamingClient:
     def connect(
         self,
         *,
-        model: typing.Optional[TextToSpeechStreamingModel] = None,
+        model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
         api_subscription_key: typing.Optional[str] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.Iterator[TextToSpeechStreamingSocketClient]:
@@ -43,7 +42,7 @@ class TextToSpeechStreamingClient:
         Parameters
         ----------
-        model : typing.Optional[TextToSpeechStreamingModel]
+        model : typing.Optional[typing.Literal["bulbul:v2"]]
             Text to speech model to use
         api_subscription_key : typing.Optional[str]
@@ -103,7 +102,7 @@ class AsyncTextToSpeechStreamingClient:
     async def connect(
         self,
         *,
-        model: typing.Optional[TextToSpeechStreamingModel] = None,
+        model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
         api_subscription_key: typing.Optional[str] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.AsyncIterator[AsyncTextToSpeechStreamingSocketClient]:
@@ -113,7 +112,7 @@ class AsyncTextToSpeechStreamingClient:
         Parameters
         ----------
-        model : typing.Optional[TextToSpeechStreamingModel]
+        model : typing.Optional[typing.Literal["bulbul:v2"]]
             Text to speech model to use
         api_subscription_key : typing.Optional[str]

{sarvamai-0.1.8rc5 → sarvamai-0.1.8rc7}/src/sarvamai/text_to_speech_streaming/raw_client.py RENAMED Viewed

@@ -10,7 +10,6 @@ from ..core.api_error import ApiError
 from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
 from ..core.request_options import RequestOptions
 from .socket_client import AsyncTextToSpeechStreamingSocketClient, TextToSpeechStreamingSocketClient
-from .types.text_to_speech_streaming_model import TextToSpeechStreamingModel
 class RawTextToSpeechStreamingClient:
@@ -21,7 +20,7 @@ class RawTextToSpeechStreamingClient:
     def connect(
         self,
         *,
-        model: typing.Optional[TextToSpeechStreamingModel] = None,
+        model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
         api_subscription_key: typing.Optional[str] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.Iterator[TextToSpeechStreamingSocketClient]:
@@ -31,7 +30,7 @@ class RawTextToSpeechStreamingClient:
         Parameters
         ----------
-        model : typing.Optional[TextToSpeechStreamingModel]
+        model : typing.Optional[typing.Literal["bulbul:v2"]]
             Text to speech model to use
         api_subscription_key : typing.Optional[str]
@@ -80,7 +79,7 @@ class AsyncRawTextToSpeechStreamingClient:
     async def connect(
         self,
         *,
-        model: typing.Optional[TextToSpeechStreamingModel] = None,
+        model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
         api_subscription_key: typing.Optional[str] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.AsyncIterator[AsyncTextToSpeechStreamingSocketClient]:
@@ -90,7 +89,7 @@ class AsyncRawTextToSpeechStreamingClient:
         Parameters
         ----------
-        model : typing.Optional[TextToSpeechStreamingModel]
+        model : typing.Optional[typing.Literal["bulbul:v2"]]
             Text to speech model to use
         api_subscription_key : typing.Optional[str]

{sarvamai-0.1.8rc5 → sarvamai-0.1.8rc7}/src/sarvamai/text_to_speech_streaming/socket_client.py RENAMED Viewed

@@ -10,8 +10,8 @@ from ..core.pydantic_utilities import parse_obj_as
 from ..types.audio_output import AudioOutput
 from ..types.flush_signal import FlushSignal
 from ..types.error_response import ErrorResponse
-from ..types.initialize_connection import InitializeConnection
-from ..types.initialize_connection_data import InitializeConnectionData
+from ..types.configure_connection import ConfigureConnection
+from ..types.configure_connection_data import ConfigureConnectionData
 from ..types.ping_signal import PingSignal
 from ..types.send_text import SendText
 from ..types.send_text_data import SendTextData
@@ -54,10 +54,10 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
         finally:
             self._emit(EventType.CLOSE, None)
-    async def initialize_connection(
+    async def configure(
         self,
         target_language_code: str,
-        speaker: str,
+        speaker: str = "anushka",
         pitch: float = 0.0,
         pace: float = 1.0,
         loudness: float = 1.0,
@@ -69,21 +69,35 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
         max_chunk_length: int = 150,
     ) -> None:
         """
-        Initialize the TTS connection with configuration parameters.
+        Configuration message required as the first message after establishing the WebSocket connection.
+        This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
+        by sending a new config message. When a config update is sent, any text currently in the buffer
+        will be automatically flushed and processed before applying the new configuration.
-        :param target_language_code: Target language code (e.g., 'hi-IN')
-        :param speaker: Voice speaker name (e.g., 'meera', 'arvind')
-        :param pitch: Voice pitch adjustment (-1.0 to 1.0, default: 0.0)
-        :param pace: Speech pace (0.3 to 3.0, default: 1.0)
-        :param loudness: Voice loudness (0.1 to 3.0, default: 1.0)
-        :param speech_sample_rate: Audio sample rate, default: 22050
-        :param enable_preprocessing: Enable text preprocessing, default: False
-        :param output_audio_codec: Audio codec, default: 'mp3'
-        :param output_audio_bitrate: Audio bitrate, default: '128k'
-        :param min_buffer_size: Minimum buffer size, default: 50
-        :param max_chunk_length: Maximum chunk length, default: 150
-        """
-        data = InitializeConnectionData(
+        :param target_language_code: The language of the text is BCP-47 format
+        :param speaker: The speaker voice to be used for the output audio. Default: Anushka.
+            Model Compatibility (bulbul:v2): Female: Anushka, Manisha, Vidya, Arya;
+            Male: Abhilash, Karun, Hitesh
+        :param pitch: Controls the pitch of the audio. Lower values result in a deeper voice,
+            while higher values make it sharper. The suitable range is between -0.75
+            and 0.75. Default is 0.0.
+        :param pace: Controls the speed of the audio. Lower values result in slower speech,
+            while higher values make it faster. The suitable range is between 0.5
+            and 2.0. Default is 1.0.
+        :param loudness: Controls the loudness of the audio. Lower values result in quieter audio,
+            while higher values make it louder. The suitable range is between 0.3
+            and 3.0. Default is 1.0.
+        :param speech_sample_rate: Specifies the sample rate of the output audio. Supported values are
+            8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
+        :param enable_preprocessing: Controls whether normalization of English words and numeric entities
+            (e.g., numbers, dates) is performed. Set to true for better handling
+            of mixed-language text. Default is false.
+        :param output_audio_codec: Audio codec (currently supports MP3 only, optimized for real-time playback)
+        :param output_audio_bitrate: Audio bitrate (choose from 5 supported bitrate options)
+        :param min_buffer_size: Minimum character length that triggers buffer flushing for TTS model processing
+        :param max_chunk_length: Maximum length for sentence splitting (adjust based on content length)
+        """
+        data = ConfigureConnectionData(
             target_language_code=target_language_code,
             speaker=speaker,
             pitch=pitch,
@@ -96,14 +110,16 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
             min_buffer_size=min_buffer_size,
             max_chunk_length=max_chunk_length,
         )
-        message = InitializeConnection(data=data)
+        message = ConfigureConnection(data=data)
         await self._send_model(message)
     async def convert(self, text: str) -> None:
         """
-        Send text to be converted to speech.
+        Send text to be converted to speech. Text length should be 1-2500 characters.
+        Recommended: <500 characters for optimal streaming performance.
+        Real-time endpoints perform better with longer character counts.
-        :param text: Text to be synthesized (1-2500 characters)
+        :param text: Text to be synthesized (1-2500 characters, recommended <500)
         """
         data = SendTextData(text=text)
         message = SendText(data=data)
@@ -111,15 +127,16 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
     async def flush(self) -> None:
         """
-        Signal to flush the buffer and finalize audio output.
-        This indicates the end of text input.
+        Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
+        Use this when you need to process remaining text that hasn't reached the minimum buffer size.
         """
         message = FlushSignal()
         await self._send_model(message)
     async def ping(self) -> None:
         """
-        Send ping signal to keep the WebSocket connection alive.
+        Send ping signal to keep the WebSocket connection alive. The connection automatically
+        closes after one minute of inactivity.
         """
         message = PingSignal()
         await self._send_model(message)
@@ -182,10 +199,10 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
         finally:
             self._emit(EventType.CLOSE, None)
-    def initialize_connection(
+    def configure(
         self,
         target_language_code: str,
-        speaker: str,
+        speaker: str = "anushka",
         pitch: float = 0.0,
         pace: float = 1.0,
         loudness: float = 1.0,
@@ -197,21 +214,35 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
         max_chunk_length: int = 150,
     ) -> None:
         """
-        Initialize the TTS connection with configuration parameters.
+        Configuration message required as the first message after establishing the WebSocket connection.
+        This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
+        by sending a new config message. When a config update is sent, any text currently in the buffer
+        will be automatically flushed and processed before applying the new configuration.
-        :param target_language_code: Target language code (e.g., 'hi-IN')
-        :param speaker: Voice speaker name (e.g., 'meera', 'arvind')
-        :param pitch: Voice pitch adjustment (-1.0 to 1.0, default: 0.0)
-        :param pace: Speech pace (0.3 to 3.0, default: 1.0)
-        :param loudness: Voice loudness (0.1 to 3.0, default: 1.0)
-        :param speech_sample_rate: Audio sample rate, default: 22050
-        :param enable_preprocessing: Enable text preprocessing, default: False
-        :param output_audio_codec: Audio codec, default: 'mp3'
-        :param output_audio_bitrate: Audio bitrate, default: '128k'
-        :param min_buffer_size: Minimum buffer size, default: 50
-        :param max_chunk_length: Maximum chunk length, default: 150
-        """
-        data = InitializeConnectionData(
+        :param target_language_code: The language of the text is BCP-47 format
+        :param speaker: The speaker voice to be used for the output audio. Default: Anushka.
+            Model Compatibility (bulbul:v2): Female: Anushka, Manisha, Vidya, Arya;
+            Male: Abhilash, Karun, Hitesh
+        :param pitch: Controls the pitch of the audio. Lower values result in a deeper voice,
+            while higher values make it sharper. The suitable range is between -0.75
+            and 0.75. Default is 0.0.
+        :param pace: Controls the speed of the audio. Lower values result in slower speech,
+            while higher values make it faster. The suitable range is between 0.5
+            and 2.0. Default is 1.0.
+        :param loudness: Controls the loudness of the audio. Lower values result in quieter audio,
+            while higher values make it louder. The suitable range is between 0.3
+            and 3.0. Default is 1.0.
+        :param speech_sample_rate: Specifies the sample rate of the output audio. Supported values are
+            8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
+        :param enable_preprocessing: Controls whether normalization of English words and numeric entities
+            (e.g., numbers, dates) is performed. Set to true for better handling
+            of mixed-language text. Default is false.
+        :param output_audio_codec: Audio codec (currently supports MP3 only, optimized for real-time playback)
+        :param output_audio_bitrate: Audio bitrate (choose from 5 supported bitrate options)
+        :param min_buffer_size: Minimum character length that triggers buffer flushing for TTS model processing
+        :param max_chunk_length: Maximum length for sentence splitting (adjust based on content length)
+        """
+        data = ConfigureConnectionData(
             target_language_code=target_language_code,
             speaker=speaker,
             pitch=pitch,
@@ -224,14 +255,16 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
             min_buffer_size=min_buffer_size,
             max_chunk_length=max_chunk_length,
         )
-        message = InitializeConnection(data=data)
+        message = ConfigureConnection(data=data)
         self._send_model(message)
     def convert(self, text: str) -> None:
         """
-        Send text to be converted to speech.
+        Send text to be converted to speech. Text length should be 1-2500 characters.
+        Recommended: <500 characters for optimal streaming performance.
+        Real-time endpoints perform better with longer character counts.
-        :param text: Text to be synthesized (1-2500 characters)
+        :param text: Text to be synthesized (1-2500 characters, recommended <500)
         """
         data = SendTextData(text=text)
         message = SendText(data=data)
@@ -239,15 +272,16 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
     def flush(self) -> None:
         """
-        Signal to flush the buffer and finalize audio output.
-        This indicates the end of text input.
+        Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
+        Use this when you need to process remaining text that hasn't reached the minimum buffer size.
         """
         message = FlushSignal()
         self._send_model(message)
     def ping(self) -> None:
         """
-        Send ping signal to keep the WebSocket connection alive.
+        Send ping signal to keep the WebSocket connection alive. The connection automatically
+        closes after one minute of inactivity.
         """
         message = PingSignal()
         self._send_model(message)

{sarvamai-0.1.8rc5 → sarvamai-0.1.8rc7}/src/sarvamai/types/__init__.py RENAMED Viewed

@@ -19,6 +19,11 @@ from .chat_completion_response_message import ChatCompletionResponseMessage
 from .choice import Choice
 from .completion_usage import CompletionUsage
 from .config_message import ConfigMessage
+from .configure_connection import ConfigureConnection
+from .configure_connection_data import ConfigureConnectionData
+from .configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
+from .configure_connection_data_speaker import ConfigureConnectionDataSpeaker
+from .configure_connection_data_target_language_code import ConfigureConnectionDataTargetLanguageCode
 from .create_chat_completion_response import CreateChatCompletionResponse
 from .diarized_entry import DiarizedEntry
 from .diarized_transcript import DiarizedTranscript
@@ -32,11 +37,6 @@ from .events_data import EventsData
 from .finish_reason import FinishReason
 from .flush_signal import FlushSignal
 from .format import Format
-from .initialize_connection import InitializeConnection
-from .initialize_connection_data import InitializeConnectionData
-from .initialize_connection_data_output_audio_bitrate import InitializeConnectionDataOutputAudioBitrate
-from .initialize_connection_data_speaker import InitializeConnectionDataSpeaker
-from .initialize_connection_data_target_language_code import InitializeConnectionDataTargetLanguageCode
 from .language_identification_response import LanguageIdentificationResponse
 from .numerals_format import NumeralsFormat
 from .ping_signal import PingSignal
@@ -94,6 +94,11 @@ __all__ = [
     "Choice",
     "CompletionUsage",
     "ConfigMessage",
+    "ConfigureConnection",
+    "ConfigureConnectionData",
+    "ConfigureConnectionDataOutputAudioBitrate",
+    "ConfigureConnectionDataSpeaker",
+    "ConfigureConnectionDataTargetLanguageCode",
     "CreateChatCompletionResponse",
     "DiarizedEntry",
     "DiarizedTranscript",
@@ -107,11 +112,6 @@ __all__ = [
     "FinishReason",
     "FlushSignal",
     "Format",
-    "InitializeConnection",
-    "InitializeConnectionData",
-    "InitializeConnectionDataOutputAudioBitrate",
-    "InitializeConnectionDataSpeaker",
-    "InitializeConnectionDataTargetLanguageCode",
     "LanguageIdentificationResponse",
     "NumeralsFormat",
     "PingSignal",

sarvamai 0.1.8rc5__tar.gz → 0.1.8rc7__tar.gz

sarvamai 0.1.8rc5tar.gz → 0.1.8rc7tar.gz