sarvamai 0.1.8rc4__py3-none-any.whl → 0.1.8rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sarvamai/__init__.py CHANGED
@@ -16,7 +16,6 @@ from .types import (
16
16
  ChatCompletionRequestUserMessage,
17
17
  ChatCompletionResponseMessage,
18
18
  Choice,
19
- CloseConnection,
20
19
  CompletionUsage,
21
20
  ConfigMessage,
22
21
  CreateChatCompletionResponse,
@@ -30,6 +29,7 @@ from .types import (
30
29
  ErrorResponseData,
31
30
  EventsData,
32
31
  FinishReason,
32
+ FlushSignal,
33
33
  Format,
34
34
  InitializeConnection,
35
35
  InitializeConnectionData,
@@ -110,7 +110,6 @@ from .requests import (
110
110
  ChatCompletionRequestUserMessageParams,
111
111
  ChatCompletionResponseMessageParams,
112
112
  ChoiceParams,
113
- CloseConnectionParams,
114
113
  CompletionUsageParams,
115
114
  ConfigMessageParams,
116
115
  CreateChatCompletionResponseParams,
@@ -122,6 +121,7 @@ from .requests import (
122
121
  ErrorResponseDataParams,
123
122
  ErrorResponseParams,
124
123
  EventsDataParams,
124
+ FlushSignalParams,
125
125
  InitializeConnectionDataParams,
126
126
  InitializeConnectionParams,
127
127
  LanguageIdentificationResponseParams,
@@ -154,7 +154,6 @@ from .speech_to_text_translate_streaming import (
154
154
  SpeechToTextTranslateStreamingModel,
155
155
  SpeechToTextTranslateStreamingVadSignals,
156
156
  )
157
- from .text_to_speech_streaming import TextToSpeechStreamingModel
158
157
  from .version import __version__
159
158
 
160
159
  __all__ = [
@@ -186,8 +185,6 @@ __all__ = [
186
185
  "ChatCompletionResponseMessageParams",
187
186
  "Choice",
188
187
  "ChoiceParams",
189
- "CloseConnection",
190
- "CloseConnectionParams",
191
188
  "CompletionUsage",
192
189
  "CompletionUsageParams",
193
190
  "ConfigMessage",
@@ -212,6 +209,8 @@ __all__ = [
212
209
  "EventsData",
213
210
  "EventsDataParams",
214
211
  "FinishReason",
212
+ "FlushSignal",
213
+ "FlushSignalParams",
215
214
  "ForbiddenError",
216
215
  "Format",
217
216
  "InitializeConnection",
@@ -274,7 +273,6 @@ __all__ = [
274
273
  "TextToSpeechResponse",
275
274
  "TextToSpeechResponseParams",
276
275
  "TextToSpeechSpeaker",
277
- "TextToSpeechStreamingModel",
278
276
  "TimestampsModel",
279
277
  "TimestampsModelParams",
280
278
  "TooManyRequestsError",
@@ -17,10 +17,10 @@ class BaseClientWrapper:
17
17
 
18
18
  def get_headers(self) -> typing.Dict[str, str]:
19
19
  headers: typing.Dict[str, str] = {
20
- "User-Agent": "sarvamai/0.1.8rc4",
20
+ "User-Agent": "sarvamai/0.1.8rc6",
21
21
  "X-Fern-Language": "Python",
22
22
  "X-Fern-SDK-Name": "sarvamai",
23
- "X-Fern-SDK-Version": "0.1.8rc4",
23
+ "X-Fern-SDK-Version": "0.1.8rc6",
24
24
  }
25
25
  headers["api-subscription-key"] = self.api_subscription_key
26
26
  return headers
@@ -17,7 +17,6 @@ from .chat_completion_request_system_message import ChatCompletionRequestSystemM
17
17
  from .chat_completion_request_user_message import ChatCompletionRequestUserMessageParams
18
18
  from .chat_completion_response_message import ChatCompletionResponseMessageParams
19
19
  from .choice import ChoiceParams
20
- from .close_connection import CloseConnectionParams
21
20
  from .completion_usage import CompletionUsageParams
22
21
  from .config_message import ConfigMessageParams
23
22
  from .create_chat_completion_response import CreateChatCompletionResponseParams
@@ -29,6 +28,7 @@ from .error_message import ErrorMessageParams
29
28
  from .error_response import ErrorResponseParams
30
29
  from .error_response_data import ErrorResponseDataParams
31
30
  from .events_data import EventsDataParams
31
+ from .flush_signal import FlushSignalParams
32
32
  from .initialize_connection import InitializeConnectionParams
33
33
  from .initialize_connection_data import InitializeConnectionDataParams
34
34
  from .language_identification_response import LanguageIdentificationResponseParams
@@ -64,7 +64,6 @@ __all__ = [
64
64
  "ChatCompletionRequestUserMessageParams",
65
65
  "ChatCompletionResponseMessageParams",
66
66
  "ChoiceParams",
67
- "CloseConnectionParams",
68
67
  "CompletionUsageParams",
69
68
  "ConfigMessageParams",
70
69
  "CreateChatCompletionResponseParams",
@@ -76,6 +75,7 @@ __all__ = [
76
75
  "ErrorResponseDataParams",
77
76
  "ErrorResponseParams",
78
77
  "EventsDataParams",
78
+ "FlushSignalParams",
79
79
  "InitializeConnectionDataParams",
80
80
  "InitializeConnectionParams",
81
81
  "LanguageIdentificationResponseParams",
@@ -0,0 +1,14 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import typing_extensions
6
+
7
+
8
+ class FlushSignalParams(typing_extensions.TypedDict):
9
+ """
10
+ Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
11
+ Use this when you need to process remaining text that hasn't reached the minimum buffer size.
12
+ """
13
+
14
+ type: typing.Literal["flush"]
@@ -7,5 +7,12 @@ from .initialize_connection_data import InitializeConnectionDataParams
7
7
 
8
8
 
9
9
  class InitializeConnectionParams(typing_extensions.TypedDict):
10
+ """
11
+ Configuration message required as the first message after establishing the WebSocket connection.
12
+ This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
13
+ by sending a new config message. When a config update is sent, any text currently in the buffer
14
+ will be automatically flushed and processed before applying the new configuration.
15
+ """
16
+
10
17
  type: typing.Literal["config"]
11
18
  data: InitializeConnectionDataParams
@@ -10,13 +10,74 @@ from ..types.initialize_connection_data_target_language_code import InitializeCo
10
10
 
11
11
  class InitializeConnectionDataParams(typing_extensions.TypedDict):
12
12
  target_language_code: InitializeConnectionDataTargetLanguageCode
13
+ """
14
+ The language of the text is BCP-47 format
15
+ """
16
+
13
17
  speaker: InitializeConnectionDataSpeaker
18
+ """
19
+ The speaker voice to be used for the output audio.
20
+
21
+ **Default:** Anushka
22
+
23
+ **Model Compatibility (Speakers compatible with respective model):**
24
+ - **bulbul:v2:**
25
+ - Female: Anushka, Manisha, Vidya, Arya
26
+ - Male: Abhilash, Karun, Hitesh
27
+
28
+ **Note:** Speaker selection must match the chosen model version.
29
+ """
30
+
14
31
  pitch: typing_extensions.NotRequired[float]
32
+ """
33
+ Controls the pitch of the audio. Lower values result in a deeper voice,
34
+ while higher values make it sharper. The suitable range is between -0.75
35
+ and 0.75. Default is 0.0.
36
+ """
37
+
15
38
  pace: typing_extensions.NotRequired[float]
39
+ """
40
+ Controls the speed of the audio. Lower values result in slower speech,
41
+ while higher values make it faster. The suitable range is between 0.5
42
+ and 2.0. Default is 1.0.
43
+ """
44
+
16
45
  loudness: typing_extensions.NotRequired[float]
46
+ """
47
+ Controls the loudness of the audio. Lower values result in quieter audio,
48
+ while higher values make it louder. The suitable range is between 0.3
49
+ and 3.0. Default is 1.0.
50
+ """
51
+
17
52
  speech_sample_rate: typing_extensions.NotRequired[int]
53
+ """
54
+ Specifies the sample rate of the output audio. Supported values are
55
+ 8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
56
+ """
57
+
18
58
  enable_preprocessing: typing_extensions.NotRequired[bool]
59
+ """
60
+ Controls whether normalization of English words and numeric entities
61
+ (e.g., numbers, dates) is performed. Set to true for better handling
62
+ of mixed-language text. Default is false.
63
+ """
64
+
19
65
  output_audio_codec: typing_extensions.NotRequired[typing.Literal["mp3"]]
66
+ """
67
+ Audio codec (currently supports MP3 only, optimized for real-time playback)
68
+ """
69
+
20
70
  output_audio_bitrate: typing_extensions.NotRequired[InitializeConnectionDataOutputAudioBitrate]
71
+ """
72
+ Audio bitrate (choose from 5 supported bitrate options)
73
+ """
74
+
21
75
  min_buffer_size: typing_extensions.NotRequired[int]
76
+ """
77
+ Minimum character length that triggers buffer flushing for TTS model processing
78
+ """
79
+
22
80
  max_chunk_length: typing_extensions.NotRequired[int]
81
+ """
82
+ Maximum length for sentence splitting (adjust based on content length)
83
+ """
@@ -6,4 +6,9 @@ import typing_extensions
6
6
 
7
7
 
8
8
  class PingSignalParams(typing_extensions.TypedDict):
9
+ """
10
+ Send ping signal to keep the WebSocket connection alive. The connection automatically
11
+ closes after one minute of inactivity.
12
+ """
13
+
9
14
  type: typing.Literal["ping"]
@@ -2,6 +2,3 @@
2
2
 
3
3
  # isort: skip_file
4
4
 
5
- from .types import TextToSpeechStreamingModel
6
-
7
- __all__ = ["TextToSpeechStreamingModel"]
@@ -11,7 +11,6 @@ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
11
11
  from ..core.request_options import RequestOptions
12
12
  from .raw_client import AsyncRawTextToSpeechStreamingClient, RawTextToSpeechStreamingClient
13
13
  from .socket_client import AsyncTextToSpeechStreamingSocketClient, TextToSpeechStreamingSocketClient
14
- from .types.text_to_speech_streaming_model import TextToSpeechStreamingModel
15
14
 
16
15
 
17
16
  class TextToSpeechStreamingClient:
@@ -33,7 +32,7 @@ class TextToSpeechStreamingClient:
33
32
  def connect(
34
33
  self,
35
34
  *,
36
- model: typing.Optional[TextToSpeechStreamingModel] = None,
35
+ model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
37
36
  api_subscription_key: typing.Optional[str] = None,
38
37
  request_options: typing.Optional[RequestOptions] = None,
39
38
  ) -> typing.Iterator[TextToSpeechStreamingSocketClient]:
@@ -43,7 +42,7 @@ class TextToSpeechStreamingClient:
43
42
 
44
43
  Parameters
45
44
  ----------
46
- model : typing.Optional[TextToSpeechStreamingModel]
45
+ model : typing.Optional[typing.Literal["bulbul:v2"]]
47
46
  Text to speech model to use
48
47
 
49
48
  api_subscription_key : typing.Optional[str]
@@ -103,7 +102,7 @@ class AsyncTextToSpeechStreamingClient:
103
102
  async def connect(
104
103
  self,
105
104
  *,
106
- model: typing.Optional[TextToSpeechStreamingModel] = None,
105
+ model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
107
106
  api_subscription_key: typing.Optional[str] = None,
108
107
  request_options: typing.Optional[RequestOptions] = None,
109
108
  ) -> typing.AsyncIterator[AsyncTextToSpeechStreamingSocketClient]:
@@ -113,7 +112,7 @@ class AsyncTextToSpeechStreamingClient:
113
112
 
114
113
  Parameters
115
114
  ----------
116
- model : typing.Optional[TextToSpeechStreamingModel]
115
+ model : typing.Optional[typing.Literal["bulbul:v2"]]
117
116
  Text to speech model to use
118
117
 
119
118
  api_subscription_key : typing.Optional[str]
@@ -10,7 +10,6 @@ from ..core.api_error import ApiError
10
10
  from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
11
11
  from ..core.request_options import RequestOptions
12
12
  from .socket_client import AsyncTextToSpeechStreamingSocketClient, TextToSpeechStreamingSocketClient
13
- from .types.text_to_speech_streaming_model import TextToSpeechStreamingModel
14
13
 
15
14
 
16
15
  class RawTextToSpeechStreamingClient:
@@ -21,7 +20,7 @@ class RawTextToSpeechStreamingClient:
21
20
  def connect(
22
21
  self,
23
22
  *,
24
- model: typing.Optional[TextToSpeechStreamingModel] = None,
23
+ model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
25
24
  api_subscription_key: typing.Optional[str] = None,
26
25
  request_options: typing.Optional[RequestOptions] = None,
27
26
  ) -> typing.Iterator[TextToSpeechStreamingSocketClient]:
@@ -31,7 +30,7 @@ class RawTextToSpeechStreamingClient:
31
30
 
32
31
  Parameters
33
32
  ----------
34
- model : typing.Optional[TextToSpeechStreamingModel]
33
+ model : typing.Optional[typing.Literal["bulbul:v2"]]
35
34
  Text to speech model to use
36
35
 
37
36
  api_subscription_key : typing.Optional[str]
@@ -80,7 +79,7 @@ class AsyncRawTextToSpeechStreamingClient:
80
79
  async def connect(
81
80
  self,
82
81
  *,
83
- model: typing.Optional[TextToSpeechStreamingModel] = None,
82
+ model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
84
83
  api_subscription_key: typing.Optional[str] = None,
85
84
  request_options: typing.Optional[RequestOptions] = None,
86
85
  ) -> typing.AsyncIterator[AsyncTextToSpeechStreamingSocketClient]:
@@ -90,7 +89,7 @@ class AsyncRawTextToSpeechStreamingClient:
90
89
 
91
90
  Parameters
92
91
  ----------
93
- model : typing.Optional[TextToSpeechStreamingModel]
92
+ model : typing.Optional[typing.Literal["bulbul:v2"]]
94
93
  Text to speech model to use
95
94
 
96
95
  api_subscription_key : typing.Optional[str]
@@ -8,7 +8,7 @@ import websockets.sync.connection as websockets_sync_connection
8
8
  from ..core.events import EventEmitterMixin, EventType
9
9
  from ..core.pydantic_utilities import parse_obj_as
10
10
  from ..types.audio_output import AudioOutput
11
- from ..types.close_connection import CloseConnection
11
+ from ..types.flush_signal import FlushSignal
12
12
  from ..types.error_response import ErrorResponse
13
13
  from ..types.initialize_connection import InitializeConnection
14
14
  from ..types.initialize_connection_data import InitializeConnectionData
@@ -58,16 +58,43 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
58
58
  self,
59
59
  target_language_code: str,
60
60
  speaker: str,
61
+ pitch: float = 0.0,
62
+ pace: float = 1.0,
63
+ loudness: float = 1.0,
64
+ speech_sample_rate: int = 22050,
65
+ enable_preprocessing: bool = False,
66
+ output_audio_codec: str = "mp3",
67
+ output_audio_bitrate: str = "128k",
68
+ min_buffer_size: int = 50,
69
+ max_chunk_length: int = 150,
61
70
  ) -> None:
62
71
  """
63
72
  Initialize the TTS connection with configuration parameters.
64
73
 
65
74
  :param target_language_code: Target language code (e.g., 'hi-IN')
66
75
  :param speaker: Voice speaker name (e.g., 'meera', 'arvind')
76
+ :param pitch: Voice pitch adjustment (-1.0 to 1.0, default: 0.0)
77
+ :param pace: Speech pace (0.3 to 3.0, default: 1.0)
78
+ :param loudness: Voice loudness (0.1 to 3.0, default: 1.0)
79
+ :param speech_sample_rate: Audio sample rate, default: 22050
80
+ :param enable_preprocessing: Enable text preprocessing, default: False
81
+ :param output_audio_codec: Audio codec, default: 'mp3'
82
+ :param output_audio_bitrate: Audio bitrate, default: '128k'
83
+ :param min_buffer_size: Minimum buffer size, default: 50
84
+ :param max_chunk_length: Maximum chunk length, default: 150
67
85
  """
68
86
  data = InitializeConnectionData(
69
87
  target_language_code=target_language_code,
70
88
  speaker=speaker,
89
+ pitch=pitch,
90
+ pace=pace,
91
+ loudness=loudness,
92
+ speech_sample_rate=speech_sample_rate,
93
+ enable_preprocessing=enable_preprocessing,
94
+ output_audio_codec=output_audio_codec,
95
+ output_audio_bitrate=output_audio_bitrate,
96
+ min_buffer_size=min_buffer_size,
97
+ max_chunk_length=max_chunk_length,
71
98
  )
72
99
  message = InitializeConnection(data=data)
73
100
  await self._send_model(message)
@@ -87,7 +114,7 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
87
114
  Signal to flush the buffer and finalize audio output.
88
115
  This indicates the end of text input.
89
116
  """
90
- message = CloseConnection()
117
+ message = FlushSignal()
91
118
  await self._send_model(message)
92
119
 
93
120
  async def ping(self) -> None:
@@ -159,16 +186,43 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
159
186
  self,
160
187
  target_language_code: str,
161
188
  speaker: str,
189
+ pitch: float = 0.0,
190
+ pace: float = 1.0,
191
+ loudness: float = 1.0,
192
+ speech_sample_rate: int = 22050,
193
+ enable_preprocessing: bool = False,
194
+ output_audio_codec: str = "mp3",
195
+ output_audio_bitrate: str = "128k",
196
+ min_buffer_size: int = 50,
197
+ max_chunk_length: int = 150,
162
198
  ) -> None:
163
199
  """
164
200
  Initialize the TTS connection with configuration parameters.
165
201
 
166
202
  :param target_language_code: Target language code (e.g., 'hi-IN')
167
203
  :param speaker: Voice speaker name (e.g., 'meera', 'arvind')
204
+ :param pitch: Voice pitch adjustment (-1.0 to 1.0, default: 0.0)
205
+ :param pace: Speech pace (0.3 to 3.0, default: 1.0)
206
+ :param loudness: Voice loudness (0.1 to 3.0, default: 1.0)
207
+ :param speech_sample_rate: Audio sample rate, default: 22050
208
+ :param enable_preprocessing: Enable text preprocessing, default: False
209
+ :param output_audio_codec: Audio codec, default: 'mp3'
210
+ :param output_audio_bitrate: Audio bitrate, default: '128k'
211
+ :param min_buffer_size: Minimum buffer size, default: 50
212
+ :param max_chunk_length: Maximum chunk length, default: 150
168
213
  """
169
214
  data = InitializeConnectionData(
170
215
  target_language_code=target_language_code,
171
216
  speaker=speaker,
217
+ pitch=pitch,
218
+ pace=pace,
219
+ loudness=loudness,
220
+ speech_sample_rate=speech_sample_rate,
221
+ enable_preprocessing=enable_preprocessing,
222
+ output_audio_codec=output_audio_codec,
223
+ output_audio_bitrate=output_audio_bitrate,
224
+ min_buffer_size=min_buffer_size,
225
+ max_chunk_length=max_chunk_length,
172
226
  )
173
227
  message = InitializeConnection(data=data)
174
228
  self._send_model(message)
@@ -188,7 +242,7 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
188
242
  Signal to flush the buffer and finalize audio output.
189
243
  This indicates the end of text input.
190
244
  """
191
- message = CloseConnection()
245
+ message = FlushSignal()
192
246
  self._send_model(message)
193
247
 
194
248
  def ping(self) -> None:
@@ -17,7 +17,6 @@ from .chat_completion_request_system_message import ChatCompletionRequestSystemM
17
17
  from .chat_completion_request_user_message import ChatCompletionRequestUserMessage
18
18
  from .chat_completion_response_message import ChatCompletionResponseMessage
19
19
  from .choice import Choice
20
- from .close_connection import CloseConnection
21
20
  from .completion_usage import CompletionUsage
22
21
  from .config_message import ConfigMessage
23
22
  from .create_chat_completion_response import CreateChatCompletionResponse
@@ -31,6 +30,7 @@ from .error_response import ErrorResponse
31
30
  from .error_response_data import ErrorResponseData
32
31
  from .events_data import EventsData
33
32
  from .finish_reason import FinishReason
33
+ from .flush_signal import FlushSignal
34
34
  from .format import Format
35
35
  from .initialize_connection import InitializeConnection
36
36
  from .initialize_connection_data import InitializeConnectionData
@@ -92,7 +92,6 @@ __all__ = [
92
92
  "ChatCompletionRequestUserMessage",
93
93
  "ChatCompletionResponseMessage",
94
94
  "Choice",
95
- "CloseConnection",
96
95
  "CompletionUsage",
97
96
  "ConfigMessage",
98
97
  "CreateChatCompletionResponse",
@@ -106,6 +105,7 @@ __all__ = [
106
105
  "ErrorResponseData",
107
106
  "EventsData",
108
107
  "FinishReason",
108
+ "FlushSignal",
109
109
  "Format",
110
110
  "InitializeConnection",
111
111
  "InitializeConnectionData",
@@ -6,7 +6,12 @@ import pydantic
6
6
  from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
7
7
 
8
8
 
9
- class CloseConnection(UniversalBaseModel):
9
+ class FlushSignal(UniversalBaseModel):
10
+ """
11
+ Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
12
+ Use this when you need to process remaining text that hasn't reached the minimum buffer size.
13
+ """
14
+
10
15
  type: typing.Literal["flush"] = "flush"
11
16
 
12
17
  if IS_PYDANTIC_V2:
@@ -8,6 +8,13 @@ from .initialize_connection_data import InitializeConnectionData
8
8
 
9
9
 
10
10
  class InitializeConnection(UniversalBaseModel):
11
+ """
12
+ Configuration message required as the first message after establishing the WebSocket connection.
13
+ This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
14
+ by sending a new config message. When a config update is sent, any text currently in the buffer
15
+ will be automatically flushed and processed before applying the new configuration.
16
+ """
17
+
11
18
  type: typing.Literal["config"] = "config"
12
19
  data: InitializeConnectionData
13
20
 
@@ -10,17 +10,78 @@ from .initialize_connection_data_target_language_code import InitializeConnectio
10
10
 
11
11
 
12
12
  class InitializeConnectionData(UniversalBaseModel):
13
- target_language_code: InitializeConnectionDataTargetLanguageCode
14
- speaker: InitializeConnectionDataSpeaker
15
- pitch: typing.Optional[float] = None
16
- pace: typing.Optional[float] = None
17
- loudness: typing.Optional[float] = None
18
- speech_sample_rate: typing.Optional[int] = None
19
- enable_preprocessing: typing.Optional[bool] = None
20
- output_audio_codec: typing.Optional[typing.Literal["mp3"]] = None
21
- output_audio_bitrate: typing.Optional[InitializeConnectionDataOutputAudioBitrate] = None
22
- min_buffer_size: typing.Optional[int] = None
23
- max_chunk_length: typing.Optional[int] = None
13
+ target_language_code: InitializeConnectionDataTargetLanguageCode = pydantic.Field()
14
+ """
15
+ The language of the text is BCP-47 format
16
+ """
17
+
18
+ speaker: InitializeConnectionDataSpeaker = pydantic.Field()
19
+ """
20
+ The speaker voice to be used for the output audio.
21
+
22
+ **Default:** Anushka
23
+
24
+ **Model Compatibility (Speakers compatible with respective model):**
25
+ - **bulbul:v2:**
26
+ - Female: Anushka, Manisha, Vidya, Arya
27
+ - Male: Abhilash, Karun, Hitesh
28
+
29
+ **Note:** Speaker selection must match the chosen model version.
30
+ """
31
+
32
+ pitch: typing.Optional[float] = pydantic.Field(default=None)
33
+ """
34
+ Controls the pitch of the audio. Lower values result in a deeper voice,
35
+ while higher values make it sharper. The suitable range is between -0.75
36
+ and 0.75. Default is 0.0.
37
+ """
38
+
39
+ pace: typing.Optional[float] = pydantic.Field(default=None)
40
+ """
41
+ Controls the speed of the audio. Lower values result in slower speech,
42
+ while higher values make it faster. The suitable range is between 0.5
43
+ and 2.0. Default is 1.0.
44
+ """
45
+
46
+ loudness: typing.Optional[float] = pydantic.Field(default=None)
47
+ """
48
+ Controls the loudness of the audio. Lower values result in quieter audio,
49
+ while higher values make it louder. The suitable range is between 0.3
50
+ and 3.0. Default is 1.0.
51
+ """
52
+
53
+ speech_sample_rate: typing.Optional[int] = pydantic.Field(default=None)
54
+ """
55
+ Specifies the sample rate of the output audio. Supported values are
56
+ 8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
57
+ """
58
+
59
+ enable_preprocessing: typing.Optional[bool] = pydantic.Field(default=None)
60
+ """
61
+ Controls whether normalization of English words and numeric entities
62
+ (e.g., numbers, dates) is performed. Set to true for better handling
63
+ of mixed-language text. Default is false.
64
+ """
65
+
66
+ output_audio_codec: typing.Optional[typing.Literal["mp3"]] = pydantic.Field(default=None)
67
+ """
68
+ Audio codec (currently supports MP3 only, optimized for real-time playback)
69
+ """
70
+
71
+ output_audio_bitrate: typing.Optional[InitializeConnectionDataOutputAudioBitrate] = pydantic.Field(default=None)
72
+ """
73
+ Audio bitrate (choose from 5 supported bitrate options)
74
+ """
75
+
76
+ min_buffer_size: typing.Optional[int] = pydantic.Field(default=None)
77
+ """
78
+ Minimum character length that triggers buffer flushing for TTS model processing
79
+ """
80
+
81
+ max_chunk_length: typing.Optional[int] = pydantic.Field(default=None)
82
+ """
83
+ Maximum length for sentence splitting (adjust based on content length)
84
+ """
24
85
 
25
86
  if IS_PYDANTIC_V2:
26
87
  model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
@@ -3,26 +3,5 @@
3
3
  import typing
4
4
 
5
5
  InitializeConnectionDataSpeaker = typing.Union[
6
- typing.Literal[
7
- "meera",
8
- "pavithra",
9
- "maitreyi",
10
- "arvind",
11
- "amol",
12
- "amartya",
13
- "diya",
14
- "neel",
15
- "misha",
16
- "vian",
17
- "arjun",
18
- "maya",
19
- "anushka",
20
- "abhilash",
21
- "manisha",
22
- "vidya",
23
- "arya",
24
- "karun",
25
- "hitesh",
26
- ],
27
- typing.Any,
6
+ typing.Literal["anushka", "abhilash", "manisha", "vidya", "arya", "karun", "hitesh"], typing.Any
28
7
  ]
@@ -7,6 +7,11 @@ from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
7
7
 
8
8
 
9
9
  class PingSignal(UniversalBaseModel):
10
+ """
11
+ Send ping signal to keep the WebSocket connection alive. The connection automatically
12
+ closes after one minute of inactivity.
13
+ """
14
+
10
15
  type: typing.Literal["ping"] = "ping"
11
16
 
12
17
  if IS_PYDANTIC_V2:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sarvamai
3
- Version: 0.1.8rc4
3
+ Version: 0.1.8rc6
4
4
  Summary:
5
5
  Requires-Python: >=3.8,<4.0
6
6
  Classifier: Intended Audience :: Developers
@@ -1,11 +1,11 @@
1
- sarvamai/__init__.py,sha256=j9fFu7SIJXzjAmYb9Majr2Ir-ojDhH2dHjxsUVOxgc4,8953
1
+ sarvamai/__init__.py,sha256=PHOfArF05nQpPgHVlAeWbzuU3fj_a_1nb4KTPV5OuoE,8838
2
2
  sarvamai/chat/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
3
3
  sarvamai/chat/client.py,sha256=xOSj83Gr6Q7eY2qUeATiuXYQqBqWqSCQlIEopK5fKus,11022
4
4
  sarvamai/chat/raw_client.py,sha256=A2kRuZcVWlJhyYCD7YKgqNkZEp3cYa1731KhRkhirU0,17885
5
5
  sarvamai/client.py,sha256=aI1sw5LVGMjgukgZLDlUmA17ecK1yGsQxH-W_JiCrco,7177
6
6
  sarvamai/core/__init__.py,sha256=YE2CtXeASe1RAbaI39twKWYKCuT4tW5is9HWHhJjR_g,1653
7
7
  sarvamai/core/api_error.py,sha256=44vPoTyWN59gonCIZMdzw7M1uspygiLnr3GNFOoVL2Q,614
8
- sarvamai/core/client_wrapper.py,sha256=n37XVBNUAT0tusKUk_VaNpSUmSysyGgwv6sSK9hXikI,2080
8
+ sarvamai/core/client_wrapper.py,sha256=XLVb0UOmyM8ir1Ht42b7D13ojMz0QetLyGKaImhbMcI,2080
9
9
  sarvamai/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
10
10
  sarvamai/core/events.py,sha256=j7VWXgMpOsjCXdzY22wIhI7Q-v5InZ4WchRzA88x_Sk,856
11
11
  sarvamai/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
@@ -28,7 +28,7 @@ sarvamai/errors/too_many_requests_error.py,sha256=Dl-_pfpboXJh-OtSbRaPQOB-UXvpVO
28
28
  sarvamai/errors/unprocessable_entity_error.py,sha256=JqxtzIhvjkpQDqbT9Q-go1n-gyv9PsYqq0ng_ZYyBMo,347
29
29
  sarvamai/play.py,sha256=4fh86zy8g8IPU2O8yPBY7QxXQOivv_nWQvPQsOa1arw,2183
30
30
  sarvamai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- sarvamai/requests/__init__.py,sha256=9o0Quh-d3Pg5ybS95fzgR4aA5IpJahRbOY-Y-ebvarw,4496
31
+ sarvamai/requests/__init__.py,sha256=UmEj75u8yYiZYL0M0QVJoCuS8oduaH82eD3bmP-6myg,4484
32
32
  sarvamai/requests/audio_data.py,sha256=QI3SK5aiAg2yJ-m3l9CxOkONnH3CCKMFCl9kAdMs19o,410
33
33
  sarvamai/requests/audio_message.py,sha256=ZBeogjGE6YFXXM-0g8zq9SoizDk21reR0YXSB-0fMjg,214
34
34
  sarvamai/requests/audio_output.py,sha256=BnoX345rwoWgaMaj24u_19-SjmPV0xt7vlFEEDKRw20,280
@@ -39,7 +39,6 @@ sarvamai/requests/chat_completion_request_system_message.py,sha256=ZvjBuyL0oM7Vw
39
39
  sarvamai/requests/chat_completion_request_user_message.py,sha256=IqYy7K-qF9oQ8AUIvuH06EsLL-Wn6QufPMPpSR-VNGI,238
40
40
  sarvamai/requests/chat_completion_response_message.py,sha256=JFazj4zK-nj_wjdvNLDkcfIFxIlqw49Xf_P8o7d70aY,336
41
41
  sarvamai/requests/choice.py,sha256=uulX4MZUoThEMcD3a80o_3V5YpnpqN8DfPaNZWVz-1o,867
42
- sarvamai/requests/close_connection.py,sha256=p6_bsmzUCDRnIZN3cLN_UPXT7t-_22CYhVixi7pq90c,199
43
42
  sarvamai/requests/completion_usage.py,sha256=LbZV-RxcxKdCAYqhCiaRtSFF3VwMJq71A989Z1rm-I8,428
44
43
  sarvamai/requests/config_message.py,sha256=EpYioGvDhCXDMvGH7Q1F7448zJzoHmlkQ1owoNGbWAw,383
45
44
  sarvamai/requests/create_chat_completion_response.py,sha256=TqS9u5_WVWMok_NreT4TeOsLJQeybPkbJm45Q0Zxw30,857
@@ -51,10 +50,11 @@ sarvamai/requests/error_message.py,sha256=-J21pfEJghsms4pNe55O_9qkODNd-BKLMt96AO
51
50
  sarvamai/requests/error_response.py,sha256=A8j12JQ7JJkUcnt26k2M9uwXXkwyT-LNqG3BO3U8NIk,288
52
51
  sarvamai/requests/error_response_data.py,sha256=l9tGTykaKZ8pKxdw9RKitpW49kKcs4aGibH7rKG2v7w,461
53
52
  sarvamai/requests/events_data.py,sha256=3seSash8DysPUWX6mKPzoEzWZlsrK4Tann2GFSbQjZg,286
54
- sarvamai/requests/initialize_connection.py,sha256=QO4zyg2gEAeNRcxuew6Y5UW9l7OYG68sj_mL3c26GpA,317
55
- sarvamai/requests/initialize_connection_data.py,sha256=R6DA52P2Qlum2uz6Hld5QVSJgfY9GQGIcbvyyJOHK9A,1154
53
+ sarvamai/requests/flush_signal.py,sha256=Aj_PzphMNcHMMOaxvTi1uQ5y36ZTtKEsUGCprbWIOvw,406
54
+ sarvamai/requests/initialize_connection.py,sha256=H4SMx6-TkJ_oyDtQkUpbS8H-njItwNDfHkl9bppLuwk,720
55
+ sarvamai/requests/initialize_connection_data.py,sha256=NEvOWOO5UmIH3qxXsfBgjH2sLnG54HrR9x1AdYfBgCE,2918
56
56
  sarvamai/requests/language_identification_response.py,sha256=BdS5U9Gic-71vb--ph6HGvd2hGNKDXERC7yrn8vFcvI,1098
57
- sarvamai/requests/ping_signal.py,sha256=Auzf9pafJD9vQFFtMLM3iyLtIqsOjEzVd7Jt9q2LoKY,193
57
+ sarvamai/requests/ping_signal.py,sha256=TSgmfz2k4X1L6TzvX8u2SKZ6XQY3bSf7nPZf8mUViaM,343
58
58
  sarvamai/requests/send_text.py,sha256=DWzbNgeNN2xSIYgk2zEisgLqjwq5oleqJVHrtOnIqbE,267
59
59
  sarvamai/requests/send_text_data.py,sha256=2jds-xd77u-YTgIWQsTUBKE-_7tsrFshXXuC_Ld4ULo,161
60
60
  sarvamai/requests/speech_to_text_response.py,sha256=GS3jNmHDOxqNZ7cvftD62khUMSBIQUu6zEPdCqk8zJk,1041
@@ -97,13 +97,11 @@ sarvamai/text/raw_client.py,sha256=lQ7bV9aVqxjwEUHMPEZ4x0_Xs036_yFArMK9rnYT4ZI,4
97
97
  sarvamai/text_to_speech/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
98
98
  sarvamai/text_to_speech/client.py,sha256=aVvwdGTfgVUekALLpdyxKNsGUZQ2Ee2OZBLx6WP6g_E,8842
99
99
  sarvamai/text_to_speech/raw_client.py,sha256=3Zu6HN_FOY683Vm-EN-OL7YAbLsftjJlFm5OyRGNtYc,14780
100
- sarvamai/text_to_speech_streaming/__init__.py,sha256=AyHwl9te1mTfiz6IkMU20quN-0RP5njbchknXCz-oK8,173
101
- sarvamai/text_to_speech_streaming/client.py,sha256=sEC5mVNALldkZri3gHVxHcJCI9TmVwF1vPTL8vfatbY,6161
102
- sarvamai/text_to_speech_streaming/raw_client.py,sha256=DXsU8Rq27yZGINkRmyeyqPWLlSAGsR1RTJWJH81FlTI,5342
103
- sarvamai/text_to_speech_streaming/socket_client.py,sha256=rvlRi2wmyUzWdh2jBqrveD6Ck9dXA-IiCAf0ehZIREE,7799
104
- sarvamai/text_to_speech_streaming/types/__init__.py,sha256=DUsvIGTtST5N1v3Hnodq5aNKfPcTBlod28DSypc8NzA,198
105
- sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py,sha256=1uOMrJIaAxi_XzwCYmnG5XA-il66cq9uC4ZuiI7HCHo,176
106
- sarvamai/types/__init__.py,sha256=oCDY6p7Hfd26vLAvluWMrP-qpUd-BJdlVE-KipWXqR8,6332
100
+ sarvamai/text_to_speech_streaming/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
101
+ sarvamai/text_to_speech_streaming/client.py,sha256=geTF5xy-batzO12XVt0sPw_XJCi7-m2sDFK_B7SL7qc,6088
102
+ sarvamai/text_to_speech_streaming/raw_client.py,sha256=asOcNw1WAViOiXDVWH4sxWSXGVoLwAOh9vUtq_xralA,5269
103
+ sarvamai/text_to_speech_streaming/socket_client.py,sha256=qZpHFC1z1c6T_FaGOW6CPtWE15glmqClmdc5Iixa7FY,10429
104
+ sarvamai/types/__init__.py,sha256=_5KKlTtdY4kOQ3go9T60EPmSA53AOFGbF5VJOkcnros,6320
107
105
  sarvamai/types/audio_data.py,sha256=rgOukLkLNJ_HBBVE2g5dfEL2CWjRoGiMvCtpq0qTB1Y,829
108
106
  sarvamai/types/audio_message.py,sha256=sB4EgkWkWJzipYXobkmM9AYZTTZtCpg_ySKssUeznUE,560
109
107
  sarvamai/types/audio_output.py,sha256=Eq-YUZa1mSDwt7bax2c4Vv2gBlyM_JBJWzHhTAhFSko,621
@@ -114,7 +112,6 @@ sarvamai/types/chat_completion_request_system_message.py,sha256=E7YhTk1zr4u7dj_y
114
112
  sarvamai/types/chat_completion_request_user_message.py,sha256=J3WhlrfOfCCe7ugmJIfP_L9st3OFtXkIjZTSuR8O9nQ,615
115
113
  sarvamai/types/chat_completion_response_message.py,sha256=wz935eBnCkSIl0I0qMxBuH4vAUCso1aHDGReMW1VHGE,744
116
114
  sarvamai/types/choice.py,sha256=uXBCsjWP9VK3XWQWZUeI4EnU10w0G9nAfKn2tJZvxko,1244
117
- sarvamai/types/close_connection.py,sha256=bsPGGh7yYJv0tikBRXmEe6ZC1GsUJy5kwwt30kiiqK4,552
118
115
  sarvamai/types/completion_usage.py,sha256=xYQGlQUbKqsksuV73H-1ajjfT5M7w47eLfdWXSlrI5M,843
119
116
  sarvamai/types/config_message.py,sha256=sGrT-qYTRqLVfIo5nRUuRlqPtPVmiAkUAnaMtlmQYCU,778
120
117
  sarvamai/types/create_chat_completion_response.py,sha256=4nEzeWzHGW1_BmRAtOuGsbRZ0ojNgnzJSMUFyYuYviw,1285
@@ -128,15 +125,16 @@ sarvamai/types/error_response.py,sha256=3m17Aj3GY_-mSY2lH-GkbuiyewTm-wNL9UQ1exLv
128
125
  sarvamai/types/error_response_data.py,sha256=hncN_zmPg8kqSHjNciYLn2QeNqzqHxKiZDlKOvjKdwA,838
129
126
  sarvamai/types/events_data.py,sha256=hDSOyODc8-lmpduJIQkps9kHlUZKYXGw3lETi8irHt0,681
130
127
  sarvamai/types/finish_reason.py,sha256=PBWtBNkX4FMaODmlUehpF6qLB5uH_zR-Mw3M4uhIB6U,209
128
+ sarvamai/types/flush_signal.py,sha256=N7MJWb658KoxRpFN9cIbyQGY45zZcg8YCou3E1v--9o,759
131
129
  sarvamai/types/format.py,sha256=57LicD0XLqW4D1QEnZWsWGifzRy1GV9P5utKPXLoxtg,144
132
- sarvamai/types/initialize_connection.py,sha256=qL9vhyZL7fJxV1UFHtCmFMYaVgZkCWMkP4v-TCREAow,659
133
- sarvamai/types/initialize_connection_data.py,sha256=iCl6fC3QpmIJXOkoC3TjVU4HmUQdbq95RJUHe0nRv1M,1413
130
+ sarvamai/types/initialize_connection.py,sha256=cXHmWJ1GrD1JX9fDMpQzUSRBXWTDAPekoEYtESRtQos,1062
131
+ sarvamai/types/initialize_connection_data.py,sha256=vfHqhN29dg6IwjYMsDKgoAaoYwi6JAljPOQTrCx5CEM,3431
134
132
  sarvamai/types/initialize_connection_data_output_audio_bitrate.py,sha256=Wl2b6KR_shf6FzsBr1RJxBeVduS6AtO8skwQqOaffFc,209
135
- sarvamai/types/initialize_connection_data_speaker.py,sha256=TW-tEGVALD-67FqWLzudeWtFmUPKjZBOy7X2_pjg9Y4,507
133
+ sarvamai/types/initialize_connection_data_speaker.py,sha256=pdLyERHk0NtAKj-7lvebElF7QR1ac2T2jZ-keJsWba8,231
136
134
  sarvamai/types/initialize_connection_data_target_language_code.py,sha256=iEuRFQJPsXO6lCwbNOoB7BzvoLCPIXL7UbIOKOMIjPs,276
137
135
  sarvamai/types/language_identification_response.py,sha256=jG4ZQ6KQHCiEDqC51OniOwiRdW14Fbz22bbTsUDp_kc,1483
138
136
  sarvamai/types/numerals_format.py,sha256=xg3lYiHcnzyFwuwRcaIteJLH_Pz6pJ9n9kTlYPEnCBU,165
139
- sarvamai/types/ping_signal.py,sha256=4X0E96-0Gf4VpJP3GDjzw4963LMxU0a_ykaknJwS_7o,545
137
+ sarvamai/types/ping_signal.py,sha256=cE53FRIXlc8bSo18z6jlAnOh6DhZEMX36huWEX6X3-A,695
140
138
  sarvamai/types/reasoning_effort.py,sha256=_TBLn3rQgzJAdnKqV2g0PETbrSBZl0fPLfQ5ZE9H4Pc,164
141
139
  sarvamai/types/response_type.py,sha256=yyk0QTIQlNa9W0Uoj_5_ey_Q3Bu8Jij5GkgR0Rt_WnU,163
142
140
  sarvamai/types/role.py,sha256=3eY01zZQKB8BSD4cFDeVjz-o2qnHJKz1vnToLqbExxs,115
@@ -175,6 +173,6 @@ sarvamai/types/transliterate_mode.py,sha256=1jSEMlGcoLkWuk12TgoOpSgwifa4rThGKZ1h
175
173
  sarvamai/types/transliterate_source_language.py,sha256=bSY9wJszF0sg-Cgg6F-YcWC8ly1mIlj9rqa15-jBtx8,283
176
174
  sarvamai/types/transliteration_response.py,sha256=yt-lzTbDeJ_ZL4I8kQa6oESxA9ebeJJY7LfFHpdEsmM,815
177
175
  sarvamai/version.py,sha256=Qkp3Ee9YH-O9RTix90e0i7iNrFAGN-QDt2AFwGA4n8k,75
178
- sarvamai-0.1.8rc4.dist-info/METADATA,sha256=oq7Tpq7QybN8DkNwZe1pYJfxt1cuHCetcQxvCb1CO48,26760
179
- sarvamai-0.1.8rc4.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
180
- sarvamai-0.1.8rc4.dist-info/RECORD,,
176
+ sarvamai-0.1.8rc6.dist-info/METADATA,sha256=tSU-7crW7SJuPf0lcP4nigyvM2VN1rkBTe7KzaxmnGw,26760
177
+ sarvamai-0.1.8rc6.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
178
+ sarvamai-0.1.8rc6.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- # This file was auto-generated by Fern from our API Definition.
2
-
3
- import typing
4
-
5
- import typing_extensions
6
-
7
-
8
- class CloseConnectionParams(typing_extensions.TypedDict):
9
- type: typing.Literal["flush"]
@@ -1,7 +0,0 @@
1
- # This file was auto-generated by Fern from our API Definition.
2
-
3
- # isort: skip_file
4
-
5
- from .text_to_speech_streaming_model import TextToSpeechStreamingModel
6
-
7
- __all__ = ["TextToSpeechStreamingModel"]
@@ -1,5 +0,0 @@
1
- # This file was auto-generated by Fern from our API Definition.
2
-
3
- import typing
4
-
5
- TextToSpeechStreamingModel = typing.Union[typing.Literal["bulbul:v1", "bulbul:v2"], typing.Any]