sarvamai 0.1.8rc5__py3-none-any.whl → 0.1.8rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. sarvamai/__init__.py +14 -16
  2. sarvamai/core/client_wrapper.py +2 -2
  3. sarvamai/requests/__init__.py +4 -4
  4. sarvamai/requests/configure_connection.py +18 -0
  5. sarvamai/requests/configure_connection_data.py +83 -0
  6. sarvamai/requests/flush_signal.py +5 -0
  7. sarvamai/requests/ping_signal.py +5 -0
  8. sarvamai/text_to_speech_streaming/__init__.py +0 -3
  9. sarvamai/text_to_speech_streaming/client.py +4 -5
  10. sarvamai/text_to_speech_streaming/raw_client.py +4 -5
  11. sarvamai/text_to_speech_streaming/socket_client.py +80 -46
  12. sarvamai/types/__init__.py +10 -10
  13. sarvamai/types/configure_connection.py +28 -0
  14. sarvamai/types/configure_connection_data.py +93 -0
  15. sarvamai/types/{initialize_connection_data_output_audio_bitrate.py → configure_connection_data_output_audio_bitrate.py} +1 -1
  16. sarvamai/types/configure_connection_data_speaker.py +7 -0
  17. sarvamai/types/{initialize_connection_data_target_language_code.py → configure_connection_data_target_language_code.py} +1 -1
  18. sarvamai/types/flush_signal.py +5 -0
  19. sarvamai/types/ping_signal.py +5 -0
  20. {sarvamai-0.1.8rc5.dist-info → sarvamai-0.1.8rc7.dist-info}/METADATA +1 -1
  21. {sarvamai-0.1.8rc5.dist-info → sarvamai-0.1.8rc7.dist-info}/RECORD +22 -24
  22. sarvamai/requests/initialize_connection.py +0 -11
  23. sarvamai/requests/initialize_connection_data.py +0 -22
  24. sarvamai/text_to_speech_streaming/types/__init__.py +0 -7
  25. sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +0 -5
  26. sarvamai/types/initialize_connection.py +0 -21
  27. sarvamai/types/initialize_connection_data.py +0 -32
  28. sarvamai/types/initialize_connection_data_speaker.py +0 -28
  29. {sarvamai-0.1.8rc5.dist-info → sarvamai-0.1.8rc7.dist-info}/WHEEL +0 -0
sarvamai/__init__.py CHANGED
@@ -18,6 +18,11 @@ from .types import (
18
18
  Choice,
19
19
  CompletionUsage,
20
20
  ConfigMessage,
21
+ ConfigureConnection,
22
+ ConfigureConnectionData,
23
+ ConfigureConnectionDataOutputAudioBitrate,
24
+ ConfigureConnectionDataSpeaker,
25
+ ConfigureConnectionDataTargetLanguageCode,
21
26
  CreateChatCompletionResponse,
22
27
  DiarizedEntry,
23
28
  DiarizedTranscript,
@@ -31,11 +36,6 @@ from .types import (
31
36
  FinishReason,
32
37
  FlushSignal,
33
38
  Format,
34
- InitializeConnection,
35
- InitializeConnectionData,
36
- InitializeConnectionDataOutputAudioBitrate,
37
- InitializeConnectionDataSpeaker,
38
- InitializeConnectionDataTargetLanguageCode,
39
39
  LanguageIdentificationResponse,
40
40
  NumeralsFormat,
41
41
  PingSignal,
@@ -112,6 +112,8 @@ from .requests import (
112
112
  ChoiceParams,
113
113
  CompletionUsageParams,
114
114
  ConfigMessageParams,
115
+ ConfigureConnectionDataParams,
116
+ ConfigureConnectionParams,
115
117
  CreateChatCompletionResponseParams,
116
118
  DiarizedEntryParams,
117
119
  DiarizedTranscriptParams,
@@ -122,8 +124,6 @@ from .requests import (
122
124
  ErrorResponseParams,
123
125
  EventsDataParams,
124
126
  FlushSignalParams,
125
- InitializeConnectionDataParams,
126
- InitializeConnectionParams,
127
127
  LanguageIdentificationResponseParams,
128
128
  PingSignalParams,
129
129
  SendTextDataParams,
@@ -154,7 +154,6 @@ from .speech_to_text_translate_streaming import (
154
154
  SpeechToTextTranslateStreamingModel,
155
155
  SpeechToTextTranslateStreamingVadSignals,
156
156
  )
157
- from .text_to_speech_streaming import TextToSpeechStreamingModel
158
157
  from .version import __version__
159
158
 
160
159
  __all__ = [
@@ -190,6 +189,13 @@ __all__ = [
190
189
  "CompletionUsageParams",
191
190
  "ConfigMessage",
192
191
  "ConfigMessageParams",
192
+ "ConfigureConnection",
193
+ "ConfigureConnectionData",
194
+ "ConfigureConnectionDataOutputAudioBitrate",
195
+ "ConfigureConnectionDataParams",
196
+ "ConfigureConnectionDataSpeaker",
197
+ "ConfigureConnectionDataTargetLanguageCode",
198
+ "ConfigureConnectionParams",
193
199
  "CreateChatCompletionResponse",
194
200
  "CreateChatCompletionResponseParams",
195
201
  "DiarizedEntry",
@@ -214,13 +220,6 @@ __all__ = [
214
220
  "FlushSignalParams",
215
221
  "ForbiddenError",
216
222
  "Format",
217
- "InitializeConnection",
218
- "InitializeConnectionData",
219
- "InitializeConnectionDataOutputAudioBitrate",
220
- "InitializeConnectionDataParams",
221
- "InitializeConnectionDataSpeaker",
222
- "InitializeConnectionDataTargetLanguageCode",
223
- "InitializeConnectionParams",
224
223
  "InternalServerError",
225
224
  "LanguageIdentificationResponse",
226
225
  "LanguageIdentificationResponseParams",
@@ -274,7 +273,6 @@ __all__ = [
274
273
  "TextToSpeechResponse",
275
274
  "TextToSpeechResponseParams",
276
275
  "TextToSpeechSpeaker",
277
- "TextToSpeechStreamingModel",
278
276
  "TimestampsModel",
279
277
  "TimestampsModelParams",
280
278
  "TooManyRequestsError",
@@ -17,10 +17,10 @@ class BaseClientWrapper:
17
17
 
18
18
  def get_headers(self) -> typing.Dict[str, str]:
19
19
  headers: typing.Dict[str, str] = {
20
- "User-Agent": "sarvamai/0.1.8rc5",
20
+ "User-Agent": "sarvamai/0.1.8rc7",
21
21
  "X-Fern-Language": "Python",
22
22
  "X-Fern-SDK-Name": "sarvamai",
23
- "X-Fern-SDK-Version": "0.1.8rc5",
23
+ "X-Fern-SDK-Version": "0.1.8rc7",
24
24
  }
25
25
  headers["api-subscription-key"] = self.api_subscription_key
26
26
  return headers
@@ -19,6 +19,8 @@ from .chat_completion_response_message import ChatCompletionResponseMessageParam
19
19
  from .choice import ChoiceParams
20
20
  from .completion_usage import CompletionUsageParams
21
21
  from .config_message import ConfigMessageParams
22
+ from .configure_connection import ConfigureConnectionParams
23
+ from .configure_connection_data import ConfigureConnectionDataParams
22
24
  from .create_chat_completion_response import CreateChatCompletionResponseParams
23
25
  from .diarized_entry import DiarizedEntryParams
24
26
  from .diarized_transcript import DiarizedTranscriptParams
@@ -29,8 +31,6 @@ from .error_response import ErrorResponseParams
29
31
  from .error_response_data import ErrorResponseDataParams
30
32
  from .events_data import EventsDataParams
31
33
  from .flush_signal import FlushSignalParams
32
- from .initialize_connection import InitializeConnectionParams
33
- from .initialize_connection_data import InitializeConnectionDataParams
34
34
  from .language_identification_response import LanguageIdentificationResponseParams
35
35
  from .ping_signal import PingSignalParams
36
36
  from .send_text import SendTextParams
@@ -66,6 +66,8 @@ __all__ = [
66
66
  "ChoiceParams",
67
67
  "CompletionUsageParams",
68
68
  "ConfigMessageParams",
69
+ "ConfigureConnectionDataParams",
70
+ "ConfigureConnectionParams",
69
71
  "CreateChatCompletionResponseParams",
70
72
  "DiarizedEntryParams",
71
73
  "DiarizedTranscriptParams",
@@ -76,8 +78,6 @@ __all__ = [
76
78
  "ErrorResponseParams",
77
79
  "EventsDataParams",
78
80
  "FlushSignalParams",
79
- "InitializeConnectionDataParams",
80
- "InitializeConnectionParams",
81
81
  "LanguageIdentificationResponseParams",
82
82
  "PingSignalParams",
83
83
  "SendTextDataParams",
@@ -0,0 +1,18 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import typing_extensions
6
+ from .configure_connection_data import ConfigureConnectionDataParams
7
+
8
+
9
+ class ConfigureConnectionParams(typing_extensions.TypedDict):
10
+ """
11
+ Configuration message required as the first message after establishing the WebSocket connection.
12
+ This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
13
+ by sending a new config message. When a config update is sent, any text currently in the buffer
14
+ will be automatically flushed and processed before applying the new configuration.
15
+ """
16
+
17
+ type: typing.Literal["config"]
18
+ data: ConfigureConnectionDataParams
@@ -0,0 +1,83 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import typing_extensions
6
+ from ..types.configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
7
+ from ..types.configure_connection_data_speaker import ConfigureConnectionDataSpeaker
8
+ from ..types.configure_connection_data_target_language_code import ConfigureConnectionDataTargetLanguageCode
9
+
10
+
11
+ class ConfigureConnectionDataParams(typing_extensions.TypedDict):
12
+ target_language_code: ConfigureConnectionDataTargetLanguageCode
13
+ """
14
+ The language of the text is BCP-47 format
15
+ """
16
+
17
+ speaker: ConfigureConnectionDataSpeaker
18
+ """
19
+ The speaker voice to be used for the output audio.
20
+
21
+ **Default:** Anushka
22
+
23
+ **Model Compatibility (Speakers compatible with respective model):**
24
+ - **bulbul:v2:**
25
+ - Female: Anushka, Manisha, Vidya, Arya
26
+ - Male: Abhilash, Karun, Hitesh
27
+
28
+ **Note:** Speaker selection must match the chosen model version.
29
+ """
30
+
31
+ pitch: typing_extensions.NotRequired[float]
32
+ """
33
+ Controls the pitch of the audio. Lower values result in a deeper voice,
34
+ while higher values make it sharper. The suitable range is between -0.75
35
+ and 0.75. Default is 0.0.
36
+ """
37
+
38
+ pace: typing_extensions.NotRequired[float]
39
+ """
40
+ Controls the speed of the audio. Lower values result in slower speech,
41
+ while higher values make it faster. The suitable range is between 0.5
42
+ and 2.0. Default is 1.0.
43
+ """
44
+
45
+ loudness: typing_extensions.NotRequired[float]
46
+ """
47
+ Controls the loudness of the audio. Lower values result in quieter audio,
48
+ while higher values make it louder. The suitable range is between 0.3
49
+ and 3.0. Default is 1.0.
50
+ """
51
+
52
+ speech_sample_rate: typing_extensions.NotRequired[int]
53
+ """
54
+ Specifies the sample rate of the output audio. Supported values are
55
+ 8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
56
+ """
57
+
58
+ enable_preprocessing: typing_extensions.NotRequired[bool]
59
+ """
60
+ Controls whether normalization of English words and numeric entities
61
+ (e.g., numbers, dates) is performed. Set to true for better handling
62
+ of mixed-language text. Default is false.
63
+ """
64
+
65
+ output_audio_codec: typing_extensions.NotRequired[typing.Literal["mp3"]]
66
+ """
67
+ Audio codec (currently supports MP3 only, optimized for real-time playback)
68
+ """
69
+
70
+ output_audio_bitrate: typing_extensions.NotRequired[ConfigureConnectionDataOutputAudioBitrate]
71
+ """
72
+ Audio bitrate (choose from 5 supported bitrate options)
73
+ """
74
+
75
+ min_buffer_size: typing_extensions.NotRequired[int]
76
+ """
77
+ Minimum character length that triggers buffer flushing for TTS model processing
78
+ """
79
+
80
+ max_chunk_length: typing_extensions.NotRequired[int]
81
+ """
82
+ Maximum length for sentence splitting (adjust based on content length)
83
+ """
@@ -6,4 +6,9 @@ import typing_extensions
6
6
 
7
7
 
8
8
  class FlushSignalParams(typing_extensions.TypedDict):
9
+ """
10
+ Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
11
+ Use this when you need to process remaining text that hasn't reached the minimum buffer size.
12
+ """
13
+
9
14
  type: typing.Literal["flush"]
@@ -6,4 +6,9 @@ import typing_extensions
6
6
 
7
7
 
8
8
  class PingSignalParams(typing_extensions.TypedDict):
9
+ """
10
+ Send ping signal to keep the WebSocket connection alive. The connection automatically
11
+ closes after one minute of inactivity.
12
+ """
13
+
9
14
  type: typing.Literal["ping"]
@@ -2,6 +2,3 @@
2
2
 
3
3
  # isort: skip_file
4
4
 
5
- from .types import TextToSpeechStreamingModel
6
-
7
- __all__ = ["TextToSpeechStreamingModel"]
@@ -11,7 +11,6 @@ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
11
11
  from ..core.request_options import RequestOptions
12
12
  from .raw_client import AsyncRawTextToSpeechStreamingClient, RawTextToSpeechStreamingClient
13
13
  from .socket_client import AsyncTextToSpeechStreamingSocketClient, TextToSpeechStreamingSocketClient
14
- from .types.text_to_speech_streaming_model import TextToSpeechStreamingModel
15
14
 
16
15
 
17
16
  class TextToSpeechStreamingClient:
@@ -33,7 +32,7 @@ class TextToSpeechStreamingClient:
33
32
  def connect(
34
33
  self,
35
34
  *,
36
- model: typing.Optional[TextToSpeechStreamingModel] = None,
35
+ model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
37
36
  api_subscription_key: typing.Optional[str] = None,
38
37
  request_options: typing.Optional[RequestOptions] = None,
39
38
  ) -> typing.Iterator[TextToSpeechStreamingSocketClient]:
@@ -43,7 +42,7 @@ class TextToSpeechStreamingClient:
43
42
 
44
43
  Parameters
45
44
  ----------
46
- model : typing.Optional[TextToSpeechStreamingModel]
45
+ model : typing.Optional[typing.Literal["bulbul:v2"]]
47
46
  Text to speech model to use
48
47
 
49
48
  api_subscription_key : typing.Optional[str]
@@ -103,7 +102,7 @@ class AsyncTextToSpeechStreamingClient:
103
102
  async def connect(
104
103
  self,
105
104
  *,
106
- model: typing.Optional[TextToSpeechStreamingModel] = None,
105
+ model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
107
106
  api_subscription_key: typing.Optional[str] = None,
108
107
  request_options: typing.Optional[RequestOptions] = None,
109
108
  ) -> typing.AsyncIterator[AsyncTextToSpeechStreamingSocketClient]:
@@ -113,7 +112,7 @@ class AsyncTextToSpeechStreamingClient:
113
112
 
114
113
  Parameters
115
114
  ----------
116
- model : typing.Optional[TextToSpeechStreamingModel]
115
+ model : typing.Optional[typing.Literal["bulbul:v2"]]
117
116
  Text to speech model to use
118
117
 
119
118
  api_subscription_key : typing.Optional[str]
@@ -10,7 +10,6 @@ from ..core.api_error import ApiError
10
10
  from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
11
11
  from ..core.request_options import RequestOptions
12
12
  from .socket_client import AsyncTextToSpeechStreamingSocketClient, TextToSpeechStreamingSocketClient
13
- from .types.text_to_speech_streaming_model import TextToSpeechStreamingModel
14
13
 
15
14
 
16
15
  class RawTextToSpeechStreamingClient:
@@ -21,7 +20,7 @@ class RawTextToSpeechStreamingClient:
21
20
  def connect(
22
21
  self,
23
22
  *,
24
- model: typing.Optional[TextToSpeechStreamingModel] = None,
23
+ model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
25
24
  api_subscription_key: typing.Optional[str] = None,
26
25
  request_options: typing.Optional[RequestOptions] = None,
27
26
  ) -> typing.Iterator[TextToSpeechStreamingSocketClient]:
@@ -31,7 +30,7 @@ class RawTextToSpeechStreamingClient:
31
30
 
32
31
  Parameters
33
32
  ----------
34
- model : typing.Optional[TextToSpeechStreamingModel]
33
+ model : typing.Optional[typing.Literal["bulbul:v2"]]
35
34
  Text to speech model to use
36
35
 
37
36
  api_subscription_key : typing.Optional[str]
@@ -80,7 +79,7 @@ class AsyncRawTextToSpeechStreamingClient:
80
79
  async def connect(
81
80
  self,
82
81
  *,
83
- model: typing.Optional[TextToSpeechStreamingModel] = None,
82
+ model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
84
83
  api_subscription_key: typing.Optional[str] = None,
85
84
  request_options: typing.Optional[RequestOptions] = None,
86
85
  ) -> typing.AsyncIterator[AsyncTextToSpeechStreamingSocketClient]:
@@ -90,7 +89,7 @@ class AsyncRawTextToSpeechStreamingClient:
90
89
 
91
90
  Parameters
92
91
  ----------
93
- model : typing.Optional[TextToSpeechStreamingModel]
92
+ model : typing.Optional[typing.Literal["bulbul:v2"]]
94
93
  Text to speech model to use
95
94
 
96
95
  api_subscription_key : typing.Optional[str]
@@ -10,8 +10,8 @@ from ..core.pydantic_utilities import parse_obj_as
10
10
  from ..types.audio_output import AudioOutput
11
11
  from ..types.flush_signal import FlushSignal
12
12
  from ..types.error_response import ErrorResponse
13
- from ..types.initialize_connection import InitializeConnection
14
- from ..types.initialize_connection_data import InitializeConnectionData
13
+ from ..types.configure_connection import ConfigureConnection
14
+ from ..types.configure_connection_data import ConfigureConnectionData
15
15
  from ..types.ping_signal import PingSignal
16
16
  from ..types.send_text import SendText
17
17
  from ..types.send_text_data import SendTextData
@@ -54,10 +54,10 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
54
54
  finally:
55
55
  self._emit(EventType.CLOSE, None)
56
56
 
57
- async def initialize_connection(
57
+ async def configure(
58
58
  self,
59
59
  target_language_code: str,
60
- speaker: str,
60
+ speaker: str = "anushka",
61
61
  pitch: float = 0.0,
62
62
  pace: float = 1.0,
63
63
  loudness: float = 1.0,
@@ -69,21 +69,35 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
69
69
  max_chunk_length: int = 150,
70
70
  ) -> None:
71
71
  """
72
- Initialize the TTS connection with configuration parameters.
72
+ Configuration message required as the first message after establishing the WebSocket connection.
73
+ This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
74
+ by sending a new config message. When a config update is sent, any text currently in the buffer
75
+ will be automatically flushed and processed before applying the new configuration.
73
76
 
74
- :param target_language_code: Target language code (e.g., 'hi-IN')
75
- :param speaker: Voice speaker name (e.g., 'meera', 'arvind')
76
- :param pitch: Voice pitch adjustment (-1.0 to 1.0, default: 0.0)
77
- :param pace: Speech pace (0.3 to 3.0, default: 1.0)
78
- :param loudness: Voice loudness (0.1 to 3.0, default: 1.0)
79
- :param speech_sample_rate: Audio sample rate, default: 22050
80
- :param enable_preprocessing: Enable text preprocessing, default: False
81
- :param output_audio_codec: Audio codec, default: 'mp3'
82
- :param output_audio_bitrate: Audio bitrate, default: '128k'
83
- :param min_buffer_size: Minimum buffer size, default: 50
84
- :param max_chunk_length: Maximum chunk length, default: 150
85
- """
86
- data = InitializeConnectionData(
77
+ :param target_language_code: The language of the text is BCP-47 format
78
+ :param speaker: The speaker voice to be used for the output audio. Default: Anushka.
79
+ Model Compatibility (bulbul:v2): Female: Anushka, Manisha, Vidya, Arya;
80
+ Male: Abhilash, Karun, Hitesh
81
+ :param pitch: Controls the pitch of the audio. Lower values result in a deeper voice,
82
+ while higher values make it sharper. The suitable range is between -0.75
83
+ and 0.75. Default is 0.0.
84
+ :param pace: Controls the speed of the audio. Lower values result in slower speech,
85
+ while higher values make it faster. The suitable range is between 0.5
86
+ and 2.0. Default is 1.0.
87
+ :param loudness: Controls the loudness of the audio. Lower values result in quieter audio,
88
+ while higher values make it louder. The suitable range is between 0.3
89
+ and 3.0. Default is 1.0.
90
+ :param speech_sample_rate: Specifies the sample rate of the output audio. Supported values are
91
+ 8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
92
+ :param enable_preprocessing: Controls whether normalization of English words and numeric entities
93
+ (e.g., numbers, dates) is performed. Set to true for better handling
94
+ of mixed-language text. Default is false.
95
+ :param output_audio_codec: Audio codec (currently supports MP3 only, optimized for real-time playback)
96
+ :param output_audio_bitrate: Audio bitrate (choose from 5 supported bitrate options)
97
+ :param min_buffer_size: Minimum character length that triggers buffer flushing for TTS model processing
98
+ :param max_chunk_length: Maximum length for sentence splitting (adjust based on content length)
99
+ """
100
+ data = ConfigureConnectionData(
87
101
  target_language_code=target_language_code,
88
102
  speaker=speaker,
89
103
  pitch=pitch,
@@ -96,14 +110,16 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
96
110
  min_buffer_size=min_buffer_size,
97
111
  max_chunk_length=max_chunk_length,
98
112
  )
99
- message = InitializeConnection(data=data)
113
+ message = ConfigureConnection(data=data)
100
114
  await self._send_model(message)
101
115
 
102
116
  async def convert(self, text: str) -> None:
103
117
  """
104
- Send text to be converted to speech.
118
+ Send text to be converted to speech. Text length should be 1-2500 characters.
119
+ Recommended: <500 characters for optimal streaming performance.
120
+ Real-time endpoints perform better with longer character counts.
105
121
 
106
- :param text: Text to be synthesized (1-2500 characters)
122
+ :param text: Text to be synthesized (1-2500 characters, recommended <500)
107
123
  """
108
124
  data = SendTextData(text=text)
109
125
  message = SendText(data=data)
@@ -111,15 +127,16 @@ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
111
127
 
112
128
  async def flush(self) -> None:
113
129
  """
114
- Signal to flush the buffer and finalize audio output.
115
- This indicates the end of text input.
130
+ Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
131
+ Use this when you need to process remaining text that hasn't reached the minimum buffer size.
116
132
  """
117
133
  message = FlushSignal()
118
134
  await self._send_model(message)
119
135
 
120
136
  async def ping(self) -> None:
121
137
  """
122
- Send ping signal to keep the WebSocket connection alive.
138
+ Send ping signal to keep the WebSocket connection alive. The connection automatically
139
+ closes after one minute of inactivity.
123
140
  """
124
141
  message = PingSignal()
125
142
  await self._send_model(message)
@@ -182,10 +199,10 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
182
199
  finally:
183
200
  self._emit(EventType.CLOSE, None)
184
201
 
185
- def initialize_connection(
202
+ def configure(
186
203
  self,
187
204
  target_language_code: str,
188
- speaker: str,
205
+ speaker: str = "anushka",
189
206
  pitch: float = 0.0,
190
207
  pace: float = 1.0,
191
208
  loudness: float = 1.0,
@@ -197,21 +214,35 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
197
214
  max_chunk_length: int = 150,
198
215
  ) -> None:
199
216
  """
200
- Initialize the TTS connection with configuration parameters.
217
+ Configuration message required as the first message after establishing the WebSocket connection.
218
+ This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
219
+ by sending a new config message. When a config update is sent, any text currently in the buffer
220
+ will be automatically flushed and processed before applying the new configuration.
201
221
 
202
- :param target_language_code: Target language code (e.g., 'hi-IN')
203
- :param speaker: Voice speaker name (e.g., 'meera', 'arvind')
204
- :param pitch: Voice pitch adjustment (-1.0 to 1.0, default: 0.0)
205
- :param pace: Speech pace (0.3 to 3.0, default: 1.0)
206
- :param loudness: Voice loudness (0.1 to 3.0, default: 1.0)
207
- :param speech_sample_rate: Audio sample rate, default: 22050
208
- :param enable_preprocessing: Enable text preprocessing, default: False
209
- :param output_audio_codec: Audio codec, default: 'mp3'
210
- :param output_audio_bitrate: Audio bitrate, default: '128k'
211
- :param min_buffer_size: Minimum buffer size, default: 50
212
- :param max_chunk_length: Maximum chunk length, default: 150
213
- """
214
- data = InitializeConnectionData(
222
+ :param target_language_code: The language of the text is BCP-47 format
223
+ :param speaker: The speaker voice to be used for the output audio. Default: Anushka.
224
+ Model Compatibility (bulbul:v2): Female: Anushka, Manisha, Vidya, Arya;
225
+ Male: Abhilash, Karun, Hitesh
226
+ :param pitch: Controls the pitch of the audio. Lower values result in a deeper voice,
227
+ while higher values make it sharper. The suitable range is between -0.75
228
+ and 0.75. Default is 0.0.
229
+ :param pace: Controls the speed of the audio. Lower values result in slower speech,
230
+ while higher values make it faster. The suitable range is between 0.5
231
+ and 2.0. Default is 1.0.
232
+ :param loudness: Controls the loudness of the audio. Lower values result in quieter audio,
233
+ while higher values make it louder. The suitable range is between 0.3
234
+ and 3.0. Default is 1.0.
235
+ :param speech_sample_rate: Specifies the sample rate of the output audio. Supported values are
236
+ 8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
237
+ :param enable_preprocessing: Controls whether normalization of English words and numeric entities
238
+ (e.g., numbers, dates) is performed. Set to true for better handling
239
+ of mixed-language text. Default is false.
240
+ :param output_audio_codec: Audio codec (currently supports MP3 only, optimized for real-time playback)
241
+ :param output_audio_bitrate: Audio bitrate (choose from 5 supported bitrate options)
242
+ :param min_buffer_size: Minimum character length that triggers buffer flushing for TTS model processing
243
+ :param max_chunk_length: Maximum length for sentence splitting (adjust based on content length)
244
+ """
245
+ data = ConfigureConnectionData(
215
246
  target_language_code=target_language_code,
216
247
  speaker=speaker,
217
248
  pitch=pitch,
@@ -224,14 +255,16 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
224
255
  min_buffer_size=min_buffer_size,
225
256
  max_chunk_length=max_chunk_length,
226
257
  )
227
- message = InitializeConnection(data=data)
258
+ message = ConfigureConnection(data=data)
228
259
  self._send_model(message)
229
260
 
230
261
  def convert(self, text: str) -> None:
231
262
  """
232
- Send text to be converted to speech.
263
+ Send text to be converted to speech. Text length should be 1-2500 characters.
264
+ Recommended: <500 characters for optimal streaming performance.
265
+ Real-time endpoints perform better with longer character counts.
233
266
 
234
- :param text: Text to be synthesized (1-2500 characters)
267
+ :param text: Text to be synthesized (1-2500 characters, recommended <500)
235
268
  """
236
269
  data = SendTextData(text=text)
237
270
  message = SendText(data=data)
@@ -239,15 +272,16 @@ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
239
272
 
240
273
  def flush(self) -> None:
241
274
  """
242
- Signal to flush the buffer and finalize audio output.
243
- This indicates the end of text input.
275
+ Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
276
+ Use this when you need to process remaining text that hasn't reached the minimum buffer size.
244
277
  """
245
278
  message = FlushSignal()
246
279
  self._send_model(message)
247
280
 
248
281
  def ping(self) -> None:
249
282
  """
250
- Send ping signal to keep the WebSocket connection alive.
283
+ Send ping signal to keep the WebSocket connection alive. The connection automatically
284
+ closes after one minute of inactivity.
251
285
  """
252
286
  message = PingSignal()
253
287
  self._send_model(message)
@@ -19,6 +19,11 @@ from .chat_completion_response_message import ChatCompletionResponseMessage
19
19
  from .choice import Choice
20
20
  from .completion_usage import CompletionUsage
21
21
  from .config_message import ConfigMessage
22
+ from .configure_connection import ConfigureConnection
23
+ from .configure_connection_data import ConfigureConnectionData
24
+ from .configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
25
+ from .configure_connection_data_speaker import ConfigureConnectionDataSpeaker
26
+ from .configure_connection_data_target_language_code import ConfigureConnectionDataTargetLanguageCode
22
27
  from .create_chat_completion_response import CreateChatCompletionResponse
23
28
  from .diarized_entry import DiarizedEntry
24
29
  from .diarized_transcript import DiarizedTranscript
@@ -32,11 +37,6 @@ from .events_data import EventsData
32
37
  from .finish_reason import FinishReason
33
38
  from .flush_signal import FlushSignal
34
39
  from .format import Format
35
- from .initialize_connection import InitializeConnection
36
- from .initialize_connection_data import InitializeConnectionData
37
- from .initialize_connection_data_output_audio_bitrate import InitializeConnectionDataOutputAudioBitrate
38
- from .initialize_connection_data_speaker import InitializeConnectionDataSpeaker
39
- from .initialize_connection_data_target_language_code import InitializeConnectionDataTargetLanguageCode
40
40
  from .language_identification_response import LanguageIdentificationResponse
41
41
  from .numerals_format import NumeralsFormat
42
42
  from .ping_signal import PingSignal
@@ -94,6 +94,11 @@ __all__ = [
94
94
  "Choice",
95
95
  "CompletionUsage",
96
96
  "ConfigMessage",
97
+ "ConfigureConnection",
98
+ "ConfigureConnectionData",
99
+ "ConfigureConnectionDataOutputAudioBitrate",
100
+ "ConfigureConnectionDataSpeaker",
101
+ "ConfigureConnectionDataTargetLanguageCode",
97
102
  "CreateChatCompletionResponse",
98
103
  "DiarizedEntry",
99
104
  "DiarizedTranscript",
@@ -107,11 +112,6 @@ __all__ = [
107
112
  "FinishReason",
108
113
  "FlushSignal",
109
114
  "Format",
110
- "InitializeConnection",
111
- "InitializeConnectionData",
112
- "InitializeConnectionDataOutputAudioBitrate",
113
- "InitializeConnectionDataSpeaker",
114
- "InitializeConnectionDataTargetLanguageCode",
115
115
  "LanguageIdentificationResponse",
116
116
  "NumeralsFormat",
117
117
  "PingSignal",
@@ -0,0 +1,28 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import pydantic
6
+ from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
7
+ from .configure_connection_data import ConfigureConnectionData
8
+
9
+
10
+ class ConfigureConnection(UniversalBaseModel):
11
+ """
12
+ Configuration message required as the first message after establishing the WebSocket connection.
13
+ This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
14
+ by sending a new config message. When a config update is sent, any text currently in the buffer
15
+ will be automatically flushed and processed before applying the new configuration.
16
+ """
17
+
18
+ type: typing.Literal["config"] = "config"
19
+ data: ConfigureConnectionData
20
+
21
+ if IS_PYDANTIC_V2:
22
+ model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
23
+ else:
24
+
25
+ class Config:
26
+ frozen = True
27
+ smart_union = True
28
+ extra = pydantic.Extra.allow
@@ -0,0 +1,93 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import pydantic
6
+ from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
7
+ from .configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
8
+ from .configure_connection_data_speaker import ConfigureConnectionDataSpeaker
9
+ from .configure_connection_data_target_language_code import ConfigureConnectionDataTargetLanguageCode
10
+
11
+
12
+ class ConfigureConnectionData(UniversalBaseModel):
13
+ target_language_code: ConfigureConnectionDataTargetLanguageCode = pydantic.Field()
14
+ """
15
+ The language of the text is BCP-47 format
16
+ """
17
+
18
+ speaker: ConfigureConnectionDataSpeaker = pydantic.Field()
19
+ """
20
+ The speaker voice to be used for the output audio.
21
+
22
+ **Default:** Anushka
23
+
24
+ **Model Compatibility (Speakers compatible with respective model):**
25
+ - **bulbul:v2:**
26
+ - Female: Anushka, Manisha, Vidya, Arya
27
+ - Male: Abhilash, Karun, Hitesh
28
+
29
+ **Note:** Speaker selection must match the chosen model version.
30
+ """
31
+
32
+ pitch: typing.Optional[float] = pydantic.Field(default=None)
33
+ """
34
+ Controls the pitch of the audio. Lower values result in a deeper voice,
35
+ while higher values make it sharper. The suitable range is between -0.75
36
+ and 0.75. Default is 0.0.
37
+ """
38
+
39
+ pace: typing.Optional[float] = pydantic.Field(default=None)
40
+ """
41
+ Controls the speed of the audio. Lower values result in slower speech,
42
+ while higher values make it faster. The suitable range is between 0.5
43
+ and 2.0. Default is 1.0.
44
+ """
45
+
46
+ loudness: typing.Optional[float] = pydantic.Field(default=None)
47
+ """
48
+ Controls the loudness of the audio. Lower values result in quieter audio,
49
+ while higher values make it louder. The suitable range is between 0.3
50
+ and 3.0. Default is 1.0.
51
+ """
52
+
53
+ speech_sample_rate: typing.Optional[int] = pydantic.Field(default=None)
54
+ """
55
+ Specifies the sample rate of the output audio. Supported values are
56
+ 8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
57
+ """
58
+
59
+ enable_preprocessing: typing.Optional[bool] = pydantic.Field(default=None)
60
+ """
61
+ Controls whether normalization of English words and numeric entities
62
+ (e.g., numbers, dates) is performed. Set to true for better handling
63
+ of mixed-language text. Default is false.
64
+ """
65
+
66
+ output_audio_codec: typing.Optional[typing.Literal["mp3"]] = pydantic.Field(default=None)
67
+ """
68
+ Audio codec (currently supports MP3 only, optimized for real-time playback)
69
+ """
70
+
71
+ output_audio_bitrate: typing.Optional[ConfigureConnectionDataOutputAudioBitrate] = pydantic.Field(default=None)
72
+ """
73
+ Audio bitrate (choose from 5 supported bitrate options)
74
+ """
75
+
76
+ min_buffer_size: typing.Optional[int] = pydantic.Field(default=None)
77
+ """
78
+ Minimum character length that triggers buffer flushing for TTS model processing
79
+ """
80
+
81
+ max_chunk_length: typing.Optional[int] = pydantic.Field(default=None)
82
+ """
83
+ Maximum length for sentence splitting (adjust based on content length)
84
+ """
85
+
86
+ if IS_PYDANTIC_V2:
87
+ model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
88
+ else:
89
+
90
+ class Config:
91
+ frozen = True
92
+ smart_union = True
93
+ extra = pydantic.Extra.allow
@@ -2,6 +2,6 @@
2
2
 
3
3
  import typing
4
4
 
5
- InitializeConnectionDataOutputAudioBitrate = typing.Union[
5
+ ConfigureConnectionDataOutputAudioBitrate = typing.Union[
6
6
  typing.Literal["32k", "64k", "96k", "128k", "192k"], typing.Any
7
7
  ]
@@ -0,0 +1,7 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ ConfigureConnectionDataSpeaker = typing.Union[
6
+ typing.Literal["anushka", "abhilash", "manisha", "vidya", "arya", "karun", "hitesh"], typing.Any
7
+ ]
@@ -2,7 +2,7 @@
2
2
 
3
3
  import typing
4
4
 
5
- InitializeConnectionDataTargetLanguageCode = typing.Union[
5
+ ConfigureConnectionDataTargetLanguageCode = typing.Union[
6
6
  typing.Literal["bn-IN", "en-IN", "gu-IN", "hi-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN"],
7
7
  typing.Any,
8
8
  ]
@@ -7,6 +7,11 @@ from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
7
7
 
8
8
 
9
9
  class FlushSignal(UniversalBaseModel):
10
+ """
11
+ Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
12
+ Use this when you need to process remaining text that hasn't reached the minimum buffer size.
13
+ """
14
+
10
15
  type: typing.Literal["flush"] = "flush"
11
16
 
12
17
  if IS_PYDANTIC_V2:
@@ -7,6 +7,11 @@ from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
7
7
 
8
8
 
9
9
  class PingSignal(UniversalBaseModel):
10
+ """
11
+ Send ping signal to keep the WebSocket connection alive. The connection automatically
12
+ closes after one minute of inactivity.
13
+ """
14
+
10
15
  type: typing.Literal["ping"] = "ping"
11
16
 
12
17
  if IS_PYDANTIC_V2:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sarvamai
3
- Version: 0.1.8rc5
3
+ Version: 0.1.8rc7
4
4
  Summary:
5
5
  Requires-Python: >=3.8,<4.0
6
6
  Classifier: Intended Audience :: Developers
@@ -1,11 +1,11 @@
1
- sarvamai/__init__.py,sha256=1T6EojO84La_t1nZ0AphCawAVhYQAlgMk8A7ZC0FH4M,8937
1
+ sarvamai/__init__.py,sha256=xkQNmsHue7UaEN7PgNfH2ExBGBdyszDgg5omMkSnZMM,8824
2
2
  sarvamai/chat/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
3
3
  sarvamai/chat/client.py,sha256=xOSj83Gr6Q7eY2qUeATiuXYQqBqWqSCQlIEopK5fKus,11022
4
4
  sarvamai/chat/raw_client.py,sha256=A2kRuZcVWlJhyYCD7YKgqNkZEp3cYa1731KhRkhirU0,17885
5
5
  sarvamai/client.py,sha256=aI1sw5LVGMjgukgZLDlUmA17ecK1yGsQxH-W_JiCrco,7177
6
6
  sarvamai/core/__init__.py,sha256=YE2CtXeASe1RAbaI39twKWYKCuT4tW5is9HWHhJjR_g,1653
7
7
  sarvamai/core/api_error.py,sha256=44vPoTyWN59gonCIZMdzw7M1uspygiLnr3GNFOoVL2Q,614
8
- sarvamai/core/client_wrapper.py,sha256=ucQFDVPXC5Z3Tn-1T8MlxyL0QrswihfNYW1J7w8LJS0,2080
8
+ sarvamai/core/client_wrapper.py,sha256=-Vik8MO2fDeTSfXmt3KybXUHsipm3dqlcwxLaBwaqA8,2080
9
9
  sarvamai/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
10
10
  sarvamai/core/events.py,sha256=j7VWXgMpOsjCXdzY22wIhI7Q-v5InZ4WchRzA88x_Sk,856
11
11
  sarvamai/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
@@ -28,7 +28,7 @@ sarvamai/errors/too_many_requests_error.py,sha256=Dl-_pfpboXJh-OtSbRaPQOB-UXvpVO
28
28
  sarvamai/errors/unprocessable_entity_error.py,sha256=JqxtzIhvjkpQDqbT9Q-go1n-gyv9PsYqq0ng_ZYyBMo,347
29
29
  sarvamai/play.py,sha256=4fh86zy8g8IPU2O8yPBY7QxXQOivv_nWQvPQsOa1arw,2183
30
30
  sarvamai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- sarvamai/requests/__init__.py,sha256=UmEj75u8yYiZYL0M0QVJoCuS8oduaH82eD3bmP-6myg,4484
31
+ sarvamai/requests/__init__.py,sha256=AYpixS_3RLRqMW4U2OfeACJjD3u3NnethhdYr_V-P9M,4478
32
32
  sarvamai/requests/audio_data.py,sha256=QI3SK5aiAg2yJ-m3l9CxOkONnH3CCKMFCl9kAdMs19o,410
33
33
  sarvamai/requests/audio_message.py,sha256=ZBeogjGE6YFXXM-0g8zq9SoizDk21reR0YXSB-0fMjg,214
34
34
  sarvamai/requests/audio_output.py,sha256=BnoX345rwoWgaMaj24u_19-SjmPV0xt7vlFEEDKRw20,280
@@ -41,6 +41,8 @@ sarvamai/requests/chat_completion_response_message.py,sha256=JFazj4zK-nj_wjdvNLD
41
41
  sarvamai/requests/choice.py,sha256=uulX4MZUoThEMcD3a80o_3V5YpnpqN8DfPaNZWVz-1o,867
42
42
  sarvamai/requests/completion_usage.py,sha256=LbZV-RxcxKdCAYqhCiaRtSFF3VwMJq71A989Z1rm-I8,428
43
43
  sarvamai/requests/config_message.py,sha256=EpYioGvDhCXDMvGH7Q1F7448zJzoHmlkQ1owoNGbWAw,383
44
+ sarvamai/requests/configure_connection.py,sha256=a-foQtLxArL4CulvKEdeebbRqmS1GRmko3MZdnHVPEk,716
45
+ sarvamai/requests/configure_connection_data.py,sha256=Niil2OrVBzQEtmWFn1JC-StLVp6WzzRIsu2i_M_8_44,2908
44
46
  sarvamai/requests/create_chat_completion_response.py,sha256=TqS9u5_WVWMok_NreT4TeOsLJQeybPkbJm45Q0Zxw30,857
45
47
  sarvamai/requests/diarized_entry.py,sha256=gbXB4D_r5_Q8gs1arRKjxPeFcYg16dVDLcg2VhxmKQA,462
46
48
  sarvamai/requests/diarized_transcript.py,sha256=X-znuJ45oqwXzVyJumBHSqVGLz6JnoYFZmluQlEpEAw,323
@@ -50,11 +52,9 @@ sarvamai/requests/error_message.py,sha256=-J21pfEJghsms4pNe55O_9qkODNd-BKLMt96AO
50
52
  sarvamai/requests/error_response.py,sha256=A8j12JQ7JJkUcnt26k2M9uwXXkwyT-LNqG3BO3U8NIk,288
51
53
  sarvamai/requests/error_response_data.py,sha256=l9tGTykaKZ8pKxdw9RKitpW49kKcs4aGibH7rKG2v7w,461
52
54
  sarvamai/requests/events_data.py,sha256=3seSash8DysPUWX6mKPzoEzWZlsrK4Tann2GFSbQjZg,286
53
- sarvamai/requests/flush_signal.py,sha256=k087oW96WFiC0j5opFMjytgqXPi37jYljhCEqLx9d5o,195
54
- sarvamai/requests/initialize_connection.py,sha256=QO4zyg2gEAeNRcxuew6Y5UW9l7OYG68sj_mL3c26GpA,317
55
- sarvamai/requests/initialize_connection_data.py,sha256=R6DA52P2Qlum2uz6Hld5QVSJgfY9GQGIcbvyyJOHK9A,1154
55
+ sarvamai/requests/flush_signal.py,sha256=Aj_PzphMNcHMMOaxvTi1uQ5y36ZTtKEsUGCprbWIOvw,406
56
56
  sarvamai/requests/language_identification_response.py,sha256=BdS5U9Gic-71vb--ph6HGvd2hGNKDXERC7yrn8vFcvI,1098
57
- sarvamai/requests/ping_signal.py,sha256=Auzf9pafJD9vQFFtMLM3iyLtIqsOjEzVd7Jt9q2LoKY,193
57
+ sarvamai/requests/ping_signal.py,sha256=TSgmfz2k4X1L6TzvX8u2SKZ6XQY3bSf7nPZf8mUViaM,343
58
58
  sarvamai/requests/send_text.py,sha256=DWzbNgeNN2xSIYgk2zEisgLqjwq5oleqJVHrtOnIqbE,267
59
59
  sarvamai/requests/send_text_data.py,sha256=2jds-xd77u-YTgIWQsTUBKE-_7tsrFshXXuC_Ld4ULo,161
60
60
  sarvamai/requests/speech_to_text_response.py,sha256=GS3jNmHDOxqNZ7cvftD62khUMSBIQUu6zEPdCqk8zJk,1041
@@ -97,13 +97,11 @@ sarvamai/text/raw_client.py,sha256=lQ7bV9aVqxjwEUHMPEZ4x0_Xs036_yFArMK9rnYT4ZI,4
97
97
  sarvamai/text_to_speech/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
98
98
  sarvamai/text_to_speech/client.py,sha256=aVvwdGTfgVUekALLpdyxKNsGUZQ2Ee2OZBLx6WP6g_E,8842
99
99
  sarvamai/text_to_speech/raw_client.py,sha256=3Zu6HN_FOY683Vm-EN-OL7YAbLsftjJlFm5OyRGNtYc,14780
100
- sarvamai/text_to_speech_streaming/__init__.py,sha256=AyHwl9te1mTfiz6IkMU20quN-0RP5njbchknXCz-oK8,173
101
- sarvamai/text_to_speech_streaming/client.py,sha256=sEC5mVNALldkZri3gHVxHcJCI9TmVwF1vPTL8vfatbY,6161
102
- sarvamai/text_to_speech_streaming/raw_client.py,sha256=DXsU8Rq27yZGINkRmyeyqPWLlSAGsR1RTJWJH81FlTI,5342
103
- sarvamai/text_to_speech_streaming/socket_client.py,sha256=qZpHFC1z1c6T_FaGOW6CPtWE15glmqClmdc5Iixa7FY,10429
104
- sarvamai/text_to_speech_streaming/types/__init__.py,sha256=DUsvIGTtST5N1v3Hnodq5aNKfPcTBlod28DSypc8NzA,198
105
- sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py,sha256=1uOMrJIaAxi_XzwCYmnG5XA-il66cq9uC4ZuiI7HCHo,176
106
- sarvamai/types/__init__.py,sha256=_5KKlTtdY4kOQ3go9T60EPmSA53AOFGbF5VJOkcnros,6320
100
+ sarvamai/text_to_speech_streaming/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
101
+ sarvamai/text_to_speech_streaming/client.py,sha256=geTF5xy-batzO12XVt0sPw_XJCi7-m2sDFK_B7SL7qc,6088
102
+ sarvamai/text_to_speech_streaming/raw_client.py,sha256=asOcNw1WAViOiXDVWH4sxWSXGVoLwAOh9vUtq_xralA,5269
103
+ sarvamai/text_to_speech_streaming/socket_client.py,sha256=NEcijnvjuNcWfzqpBi-xWsXVkL0NPq6EGAkEjnaq9hw,13909
104
+ sarvamai/types/__init__.py,sha256=cEfGVQMYlbz13iS9v83_CB9160Cky4JOImmxM30fGhg,6305
107
105
  sarvamai/types/audio_data.py,sha256=rgOukLkLNJ_HBBVE2g5dfEL2CWjRoGiMvCtpq0qTB1Y,829
108
106
  sarvamai/types/audio_message.py,sha256=sB4EgkWkWJzipYXobkmM9AYZTTZtCpg_ySKssUeznUE,560
109
107
  sarvamai/types/audio_output.py,sha256=Eq-YUZa1mSDwt7bax2c4Vv2gBlyM_JBJWzHhTAhFSko,621
@@ -116,6 +114,11 @@ sarvamai/types/chat_completion_response_message.py,sha256=wz935eBnCkSIl0I0qMxBuH
116
114
  sarvamai/types/choice.py,sha256=uXBCsjWP9VK3XWQWZUeI4EnU10w0G9nAfKn2tJZvxko,1244
117
115
  sarvamai/types/completion_usage.py,sha256=xYQGlQUbKqsksuV73H-1ajjfT5M7w47eLfdWXSlrI5M,843
118
116
  sarvamai/types/config_message.py,sha256=sGrT-qYTRqLVfIo5nRUuRlqPtPVmiAkUAnaMtlmQYCU,778
117
+ sarvamai/types/configure_connection.py,sha256=SnSNk02gQqP8e4VB4y88jjeFQ4ClpImjGLn2ANI8cZ4,1058
118
+ sarvamai/types/configure_connection_data.py,sha256=brMO-Z1TDq3oTJ22m1icBkkmnd9k67p_DzecnMcqNko,3421
119
+ sarvamai/types/configure_connection_data_output_audio_bitrate.py,sha256=h00YvKLxsZC8L3__rH4XH53nN_GY40UElW1EjysCwUs,208
120
+ sarvamai/types/configure_connection_data_speaker.py,sha256=SzyAiK5LynXwb9KniaO2qoOLY-II3-PMZbRuIsQ9shw,230
121
+ sarvamai/types/configure_connection_data_target_language_code.py,sha256=jrU1EblAtDYbybUO1KUkHhevmlSBj2AQxX13ii3QhAQ,275
119
122
  sarvamai/types/create_chat_completion_response.py,sha256=4nEzeWzHGW1_BmRAtOuGsbRZ0ojNgnzJSMUFyYuYviw,1285
120
123
  sarvamai/types/diarized_entry.py,sha256=kf9DLrcoMHZdTKNCAaF0z46q_iAe7CE-DFP4CNrZGTw,896
121
124
  sarvamai/types/diarized_transcript.py,sha256=a491XmALLE7AQcByaaOYTew0BZoFTlewEMHLMJyj-Js,669
@@ -127,16 +130,11 @@ sarvamai/types/error_response.py,sha256=3m17Aj3GY_-mSY2lH-GkbuiyewTm-wNL9UQ1exLv
127
130
  sarvamai/types/error_response_data.py,sha256=hncN_zmPg8kqSHjNciYLn2QeNqzqHxKiZDlKOvjKdwA,838
128
131
  sarvamai/types/events_data.py,sha256=hDSOyODc8-lmpduJIQkps9kHlUZKYXGw3lETi8irHt0,681
129
132
  sarvamai/types/finish_reason.py,sha256=PBWtBNkX4FMaODmlUehpF6qLB5uH_zR-Mw3M4uhIB6U,209
130
- sarvamai/types/flush_signal.py,sha256=zX-jCXQixmellMi1-NKKPyqcRaxeOMycw90fKRpWWAQ,548
133
+ sarvamai/types/flush_signal.py,sha256=N7MJWb658KoxRpFN9cIbyQGY45zZcg8YCou3E1v--9o,759
131
134
  sarvamai/types/format.py,sha256=57LicD0XLqW4D1QEnZWsWGifzRy1GV9P5utKPXLoxtg,144
132
- sarvamai/types/initialize_connection.py,sha256=qL9vhyZL7fJxV1UFHtCmFMYaVgZkCWMkP4v-TCREAow,659
133
- sarvamai/types/initialize_connection_data.py,sha256=iCl6fC3QpmIJXOkoC3TjVU4HmUQdbq95RJUHe0nRv1M,1413
134
- sarvamai/types/initialize_connection_data_output_audio_bitrate.py,sha256=Wl2b6KR_shf6FzsBr1RJxBeVduS6AtO8skwQqOaffFc,209
135
- sarvamai/types/initialize_connection_data_speaker.py,sha256=TW-tEGVALD-67FqWLzudeWtFmUPKjZBOy7X2_pjg9Y4,507
136
- sarvamai/types/initialize_connection_data_target_language_code.py,sha256=iEuRFQJPsXO6lCwbNOoB7BzvoLCPIXL7UbIOKOMIjPs,276
137
135
  sarvamai/types/language_identification_response.py,sha256=jG4ZQ6KQHCiEDqC51OniOwiRdW14Fbz22bbTsUDp_kc,1483
138
136
  sarvamai/types/numerals_format.py,sha256=xg3lYiHcnzyFwuwRcaIteJLH_Pz6pJ9n9kTlYPEnCBU,165
139
- sarvamai/types/ping_signal.py,sha256=4X0E96-0Gf4VpJP3GDjzw4963LMxU0a_ykaknJwS_7o,545
137
+ sarvamai/types/ping_signal.py,sha256=cE53FRIXlc8bSo18z6jlAnOh6DhZEMX36huWEX6X3-A,695
140
138
  sarvamai/types/reasoning_effort.py,sha256=_TBLn3rQgzJAdnKqV2g0PETbrSBZl0fPLfQ5ZE9H4Pc,164
141
139
  sarvamai/types/response_type.py,sha256=yyk0QTIQlNa9W0Uoj_5_ey_Q3Bu8Jij5GkgR0Rt_WnU,163
142
140
  sarvamai/types/role.py,sha256=3eY01zZQKB8BSD4cFDeVjz-o2qnHJKz1vnToLqbExxs,115
@@ -175,6 +173,6 @@ sarvamai/types/transliterate_mode.py,sha256=1jSEMlGcoLkWuk12TgoOpSgwifa4rThGKZ1h
175
173
  sarvamai/types/transliterate_source_language.py,sha256=bSY9wJszF0sg-Cgg6F-YcWC8ly1mIlj9rqa15-jBtx8,283
176
174
  sarvamai/types/transliteration_response.py,sha256=yt-lzTbDeJ_ZL4I8kQa6oESxA9ebeJJY7LfFHpdEsmM,815
177
175
  sarvamai/version.py,sha256=Qkp3Ee9YH-O9RTix90e0i7iNrFAGN-QDt2AFwGA4n8k,75
178
- sarvamai-0.1.8rc5.dist-info/METADATA,sha256=UJapTnAvJLabUeDXBxpUj9-KIZQeHYdoeoCNsHKLUvQ,26760
179
- sarvamai-0.1.8rc5.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
180
- sarvamai-0.1.8rc5.dist-info/RECORD,,
176
+ sarvamai-0.1.8rc7.dist-info/METADATA,sha256=byv0QyLj3QnQ13nxHgu9h8pKdiCy8e_hRmp7HL01tCU,26760
177
+ sarvamai-0.1.8rc7.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
178
+ sarvamai-0.1.8rc7.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- # This file was auto-generated by Fern from our API Definition.
2
-
3
- import typing
4
-
5
- import typing_extensions
6
- from .initialize_connection_data import InitializeConnectionDataParams
7
-
8
-
9
- class InitializeConnectionParams(typing_extensions.TypedDict):
10
- type: typing.Literal["config"]
11
- data: InitializeConnectionDataParams
@@ -1,22 +0,0 @@
1
- # This file was auto-generated by Fern from our API Definition.
2
-
3
- import typing
4
-
5
- import typing_extensions
6
- from ..types.initialize_connection_data_output_audio_bitrate import InitializeConnectionDataOutputAudioBitrate
7
- from ..types.initialize_connection_data_speaker import InitializeConnectionDataSpeaker
8
- from ..types.initialize_connection_data_target_language_code import InitializeConnectionDataTargetLanguageCode
9
-
10
-
11
- class InitializeConnectionDataParams(typing_extensions.TypedDict):
12
- target_language_code: InitializeConnectionDataTargetLanguageCode
13
- speaker: InitializeConnectionDataSpeaker
14
- pitch: typing_extensions.NotRequired[float]
15
- pace: typing_extensions.NotRequired[float]
16
- loudness: typing_extensions.NotRequired[float]
17
- speech_sample_rate: typing_extensions.NotRequired[int]
18
- enable_preprocessing: typing_extensions.NotRequired[bool]
19
- output_audio_codec: typing_extensions.NotRequired[typing.Literal["mp3"]]
20
- output_audio_bitrate: typing_extensions.NotRequired[InitializeConnectionDataOutputAudioBitrate]
21
- min_buffer_size: typing_extensions.NotRequired[int]
22
- max_chunk_length: typing_extensions.NotRequired[int]
@@ -1,7 +0,0 @@
1
- # This file was auto-generated by Fern from our API Definition.
2
-
3
- # isort: skip_file
4
-
5
- from .text_to_speech_streaming_model import TextToSpeechStreamingModel
6
-
7
- __all__ = ["TextToSpeechStreamingModel"]
@@ -1,5 +0,0 @@
1
- # This file was auto-generated by Fern from our API Definition.
2
-
3
- import typing
4
-
5
- TextToSpeechStreamingModel = typing.Union[typing.Literal["bulbul:v1", "bulbul:v2"], typing.Any]
@@ -1,21 +0,0 @@
1
- # This file was auto-generated by Fern from our API Definition.
2
-
3
- import typing
4
-
5
- import pydantic
6
- from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
7
- from .initialize_connection_data import InitializeConnectionData
8
-
9
-
10
- class InitializeConnection(UniversalBaseModel):
11
- type: typing.Literal["config"] = "config"
12
- data: InitializeConnectionData
13
-
14
- if IS_PYDANTIC_V2:
15
- model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
16
- else:
17
-
18
- class Config:
19
- frozen = True
20
- smart_union = True
21
- extra = pydantic.Extra.allow
@@ -1,32 +0,0 @@
1
- # This file was auto-generated by Fern from our API Definition.
2
-
3
- import typing
4
-
5
- import pydantic
6
- from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
7
- from .initialize_connection_data_output_audio_bitrate import InitializeConnectionDataOutputAudioBitrate
8
- from .initialize_connection_data_speaker import InitializeConnectionDataSpeaker
9
- from .initialize_connection_data_target_language_code import InitializeConnectionDataTargetLanguageCode
10
-
11
-
12
- class InitializeConnectionData(UniversalBaseModel):
13
- target_language_code: InitializeConnectionDataTargetLanguageCode
14
- speaker: InitializeConnectionDataSpeaker
15
- pitch: typing.Optional[float] = None
16
- pace: typing.Optional[float] = None
17
- loudness: typing.Optional[float] = None
18
- speech_sample_rate: typing.Optional[int] = None
19
- enable_preprocessing: typing.Optional[bool] = None
20
- output_audio_codec: typing.Optional[typing.Literal["mp3"]] = None
21
- output_audio_bitrate: typing.Optional[InitializeConnectionDataOutputAudioBitrate] = None
22
- min_buffer_size: typing.Optional[int] = None
23
- max_chunk_length: typing.Optional[int] = None
24
-
25
- if IS_PYDANTIC_V2:
26
- model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
27
- else:
28
-
29
- class Config:
30
- frozen = True
31
- smart_union = True
32
- extra = pydantic.Extra.allow
@@ -1,28 +0,0 @@
1
- # This file was auto-generated by Fern from our API Definition.
2
-
3
- import typing
4
-
5
- InitializeConnectionDataSpeaker = typing.Union[
6
- typing.Literal[
7
- "meera",
8
- "pavithra",
9
- "maitreyi",
10
- "arvind",
11
- "amol",
12
- "amartya",
13
- "diya",
14
- "neel",
15
- "misha",
16
- "vian",
17
- "arjun",
18
- "maya",
19
- "anushka",
20
- "abhilash",
21
- "manisha",
22
- "vidya",
23
- "arya",
24
- "karun",
25
- "hitesh",
26
- ],
27
- typing.Any,
28
- ]