sarvamai 0.1.7a0__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. sarvamai/__init__.py +48 -52
  2. sarvamai/client.py +3 -3
  3. sarvamai/core/client_wrapper.py +2 -2
  4. sarvamai/errors/service_unavailable_error.py +2 -1
  5. sarvamai/requests/__init__.py +20 -22
  6. sarvamai/requests/audio_output.py +11 -0
  7. sarvamai/requests/audio_output_data.py +15 -0
  8. sarvamai/requests/configure_connection.py +18 -0
  9. sarvamai/requests/configure_connection_data.py +83 -0
  10. sarvamai/requests/error_response.py +11 -0
  11. sarvamai/requests/error_response_data.py +18 -0
  12. sarvamai/requests/flush_signal.py +14 -0
  13. sarvamai/requests/ping_signal.py +14 -0
  14. sarvamai/requests/send_text.py +11 -0
  15. sarvamai/requests/{base_job_parameters.py → send_text_data.py} +2 -2
  16. sarvamai/speech_to_text/raw_client.py +9 -8
  17. sarvamai/text_to_speech_streaming/client.py +153 -0
  18. sarvamai/text_to_speech_streaming/raw_client.py +130 -0
  19. sarvamai/text_to_speech_streaming/socket_client.py +309 -0
  20. sarvamai/types/__init__.py +26 -28
  21. sarvamai/types/{files_request.py → audio_output.py} +4 -3
  22. sarvamai/types/{bulk_job_callback.py → audio_output_data.py} +5 -5
  23. sarvamai/types/configure_connection.py +28 -0
  24. sarvamai/types/configure_connection_data.py +93 -0
  25. sarvamai/types/configure_connection_data_output_audio_bitrate.py +7 -0
  26. sarvamai/types/configure_connection_data_speaker.py +7 -0
  27. sarvamai/types/configure_connection_data_target_language_code.py +8 -0
  28. sarvamai/types/{file_signed_url_details.py → error_response.py} +4 -3
  29. sarvamai/types/{files_download_response.py → error_response_data.py} +11 -8
  30. sarvamai/types/flush_signal.py +24 -0
  31. sarvamai/types/ping_signal.py +24 -0
  32. sarvamai/types/{task_file_details.py → send_text.py} +4 -3
  33. sarvamai/types/{base_job_parameters.py → send_text_data.py} +3 -1
  34. {sarvamai-0.1.7a0.dist-info → sarvamai-0.1.8.dist-info}/METADATA +1 -1
  35. {sarvamai-0.1.7a0.dist-info → sarvamai-0.1.8.dist-info}/RECORD +37 -38
  36. sarvamai/requests/bulk_job_callback.py +0 -15
  37. sarvamai/requests/bulk_job_init_response_v_1.py +0 -27
  38. sarvamai/requests/file_signed_url_details.py +0 -10
  39. sarvamai/requests/files_download_response.py +0 -15
  40. sarvamai/requests/files_request.py +0 -10
  41. sarvamai/requests/files_upload_response.py +0 -15
  42. sarvamai/requests/job_status_v_1.py +0 -70
  43. sarvamai/requests/speech_to_text_job_parameters.py +0 -32
  44. sarvamai/requests/task_detail_v_1.py +0 -15
  45. sarvamai/requests/task_file_details.py +0 -8
  46. sarvamai/speech_to_text_job/client.py +0 -454
  47. sarvamai/speech_to_text_job/raw_client.py +0 -1189
  48. sarvamai/types/bulk_job_init_response_v_1.py +0 -39
  49. sarvamai/types/files_upload_response.py +0 -25
  50. sarvamai/types/job_state.py +0 -5
  51. sarvamai/types/job_status_v_1.py +0 -80
  52. sarvamai/types/speech_to_text_job_parameters.py +0 -44
  53. sarvamai/types/storage_container_type.py +0 -5
  54. sarvamai/types/task_detail_v_1.py +0 -25
  55. sarvamai/types/task_state.py +0 -5
  56. /sarvamai/{speech_to_text_job → text_to_speech_streaming}/__init__.py +0 -0
  57. {sarvamai-0.1.7a0.dist-info → sarvamai-0.1.8.dist-info}/WHEEL +0 -0
@@ -0,0 +1,153 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+ from contextlib import asynccontextmanager, contextmanager
5
+
6
+ import httpx
7
+ import websockets
8
+ import websockets.sync.client as websockets_sync_client
9
+ from ..core.api_error import ApiError
10
+ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
11
+ from ..core.request_options import RequestOptions
12
+ from .raw_client import AsyncRawTextToSpeechStreamingClient, RawTextToSpeechStreamingClient
13
+ from .socket_client import AsyncTextToSpeechStreamingSocketClient, TextToSpeechStreamingSocketClient
14
+
15
+
16
+ class TextToSpeechStreamingClient:
17
+ def __init__(self, *, client_wrapper: SyncClientWrapper):
18
+ self._raw_client = RawTextToSpeechStreamingClient(client_wrapper=client_wrapper)
19
+
20
+ @property
21
+ def with_raw_response(self) -> RawTextToSpeechStreamingClient:
22
+ """
23
+ Retrieves a raw implementation of this client that returns raw responses.
24
+
25
+ Returns
26
+ -------
27
+ RawTextToSpeechStreamingClient
28
+ """
29
+ return self._raw_client
30
+
31
+ @contextmanager
32
+ def connect(
33
+ self,
34
+ *,
35
+ model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
36
+ api_subscription_key: typing.Optional[str] = None,
37
+ request_options: typing.Optional[RequestOptions] = None,
38
+ ) -> typing.Iterator[TextToSpeechStreamingSocketClient]:
39
+ """
40
+ Bidirectional WebSocket channel for real-time TTS synthesis.
41
+ Supports streaming, flushing, config updates, and audio playback.
42
+
43
+ Parameters
44
+ ----------
45
+ model : typing.Optional[typing.Literal["bulbul:v2"]]
46
+ Text to speech model to use
47
+
48
+ api_subscription_key : typing.Optional[str]
49
+ API subscription key for authentication
50
+
51
+ request_options : typing.Optional[RequestOptions]
52
+ Request-specific configuration.
53
+
54
+ Returns
55
+ -------
56
+ TextToSpeechStreamingSocketClient
57
+ """
58
+ ws_url = self._raw_client._client_wrapper.get_environment().production + "/text-to-speech/ws"
59
+ query_params = httpx.QueryParams()
60
+ if model is not None:
61
+ query_params = query_params.add("model", model)
62
+ ws_url = ws_url + f"?{query_params}"
63
+ headers = self._raw_client._client_wrapper.get_headers()
64
+ if api_subscription_key is not None:
65
+ headers["Api-Subscription-Key"] = str(api_subscription_key)
66
+ if request_options and "additional_headers" in request_options:
67
+ headers.update(request_options["additional_headers"])
68
+ try:
69
+ with websockets_sync_client.connect(ws_url, additional_headers=headers) as protocol:
70
+ yield TextToSpeechStreamingSocketClient(websocket=protocol)
71
+ except websockets.exceptions.InvalidStatusCode as exc:
72
+ status_code: int = exc.status_code
73
+ if status_code == 401:
74
+ raise ApiError(
75
+ status_code=status_code,
76
+ headers=dict(headers),
77
+ body="Websocket initialized with invalid credentials.",
78
+ )
79
+ raise ApiError(
80
+ status_code=status_code,
81
+ headers=dict(headers),
82
+ body="Unexpected error when initializing websocket connection.",
83
+ )
84
+
85
+
86
+ class AsyncTextToSpeechStreamingClient:
87
+ def __init__(self, *, client_wrapper: AsyncClientWrapper):
88
+ self._raw_client = AsyncRawTextToSpeechStreamingClient(client_wrapper=client_wrapper)
89
+
90
+ @property
91
+ def with_raw_response(self) -> AsyncRawTextToSpeechStreamingClient:
92
+ """
93
+ Retrieves a raw implementation of this client that returns raw responses.
94
+
95
+ Returns
96
+ -------
97
+ AsyncRawTextToSpeechStreamingClient
98
+ """
99
+ return self._raw_client
100
+
101
+ @asynccontextmanager
102
+ async def connect(
103
+ self,
104
+ *,
105
+ model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
106
+ api_subscription_key: typing.Optional[str] = None,
107
+ request_options: typing.Optional[RequestOptions] = None,
108
+ ) -> typing.AsyncIterator[AsyncTextToSpeechStreamingSocketClient]:
109
+ """
110
+ Bidirectional WebSocket channel for real-time TTS synthesis.
111
+ Supports streaming, flushing, config updates, and audio playback.
112
+
113
+ Parameters
114
+ ----------
115
+ model : typing.Optional[typing.Literal["bulbul:v2"]]
116
+ Text to speech model to use
117
+
118
+ api_subscription_key : typing.Optional[str]
119
+ API subscription key for authentication
120
+
121
+ request_options : typing.Optional[RequestOptions]
122
+ Request-specific configuration.
123
+
124
+ Returns
125
+ -------
126
+ AsyncTextToSpeechStreamingSocketClient
127
+ """
128
+ ws_url = self._raw_client._client_wrapper.get_environment().production + "/text-to-speech/ws"
129
+ query_params = httpx.QueryParams()
130
+ if model is not None:
131
+ query_params = query_params.add("model", model)
132
+ ws_url = ws_url + f"?{query_params}"
133
+ headers = self._raw_client._client_wrapper.get_headers()
134
+ if api_subscription_key is not None:
135
+ headers["Api-Subscription-Key"] = str(api_subscription_key)
136
+ if request_options and "additional_headers" in request_options:
137
+ headers.update(request_options["additional_headers"])
138
+ try:
139
+ async with websockets.connect(ws_url, extra_headers=headers) as protocol:
140
+ yield AsyncTextToSpeechStreamingSocketClient(websocket=protocol)
141
+ except websockets.exceptions.InvalidStatusCode as exc:
142
+ status_code: int = exc.status_code
143
+ if status_code == 401:
144
+ raise ApiError(
145
+ status_code=status_code,
146
+ headers=dict(headers),
147
+ body="Websocket initialized with invalid credentials.",
148
+ )
149
+ raise ApiError(
150
+ status_code=status_code,
151
+ headers=dict(headers),
152
+ body="Unexpected error when initializing websocket connection.",
153
+ )
@@ -0,0 +1,130 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+ from contextlib import asynccontextmanager, contextmanager
5
+
6
+ import httpx
7
+ import websockets
8
+ import websockets.sync.client as websockets_sync_client
9
+ from ..core.api_error import ApiError
10
+ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
11
+ from ..core.request_options import RequestOptions
12
+ from .socket_client import AsyncTextToSpeechStreamingSocketClient, TextToSpeechStreamingSocketClient
13
+
14
+
15
+ class RawTextToSpeechStreamingClient:
16
+ def __init__(self, *, client_wrapper: SyncClientWrapper):
17
+ self._client_wrapper = client_wrapper
18
+
19
+ @contextmanager
20
+ def connect(
21
+ self,
22
+ *,
23
+ model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
24
+ api_subscription_key: typing.Optional[str] = None,
25
+ request_options: typing.Optional[RequestOptions] = None,
26
+ ) -> typing.Iterator[TextToSpeechStreamingSocketClient]:
27
+ """
28
+ Bidirectional WebSocket channel for real-time TTS synthesis.
29
+ Supports streaming, flushing, config updates, and audio playback.
30
+
31
+ Parameters
32
+ ----------
33
+ model : typing.Optional[typing.Literal["bulbul:v2"]]
34
+ Text to speech model to use
35
+
36
+ api_subscription_key : typing.Optional[str]
37
+ API subscription key for authentication
38
+
39
+ request_options : typing.Optional[RequestOptions]
40
+ Request-specific configuration.
41
+
42
+ Returns
43
+ -------
44
+ TextToSpeechStreamingSocketClient
45
+ """
46
+ ws_url = self._client_wrapper.get_environment().production + "/text-to-speech/ws"
47
+ query_params = httpx.QueryParams()
48
+ if model is not None:
49
+ query_params = query_params.add("model", model)
50
+ ws_url = ws_url + f"?{query_params}"
51
+ headers = self._client_wrapper.get_headers()
52
+ if api_subscription_key is not None:
53
+ headers["Api-Subscription-Key"] = str(api_subscription_key)
54
+ if request_options and "additional_headers" in request_options:
55
+ headers.update(request_options["additional_headers"])
56
+ try:
57
+ with websockets_sync_client.connect(ws_url, additional_headers=headers) as protocol:
58
+ yield TextToSpeechStreamingSocketClient(websocket=protocol)
59
+ except websockets.exceptions.InvalidStatusCode as exc:
60
+ status_code: int = exc.status_code
61
+ if status_code == 401:
62
+ raise ApiError(
63
+ status_code=status_code,
64
+ headers=dict(headers),
65
+ body="Websocket initialized with invalid credentials.",
66
+ )
67
+ raise ApiError(
68
+ status_code=status_code,
69
+ headers=dict(headers),
70
+ body="Unexpected error when initializing websocket connection.",
71
+ )
72
+
73
+
74
+ class AsyncRawTextToSpeechStreamingClient:
75
+ def __init__(self, *, client_wrapper: AsyncClientWrapper):
76
+ self._client_wrapper = client_wrapper
77
+
78
+ @asynccontextmanager
79
+ async def connect(
80
+ self,
81
+ *,
82
+ model: typing.Optional[typing.Literal["bulbul:v2"]] = None,
83
+ api_subscription_key: typing.Optional[str] = None,
84
+ request_options: typing.Optional[RequestOptions] = None,
85
+ ) -> typing.AsyncIterator[AsyncTextToSpeechStreamingSocketClient]:
86
+ """
87
+ Bidirectional WebSocket channel for real-time TTS synthesis.
88
+ Supports streaming, flushing, config updates, and audio playback.
89
+
90
+ Parameters
91
+ ----------
92
+ model : typing.Optional[typing.Literal["bulbul:v2"]]
93
+ Text to speech model to use
94
+
95
+ api_subscription_key : typing.Optional[str]
96
+ API subscription key for authentication
97
+
98
+ request_options : typing.Optional[RequestOptions]
99
+ Request-specific configuration.
100
+
101
+ Returns
102
+ -------
103
+ AsyncTextToSpeechStreamingSocketClient
104
+ """
105
+ ws_url = self._client_wrapper.get_environment().production + "/text-to-speech/ws"
106
+ query_params = httpx.QueryParams()
107
+ if model is not None:
108
+ query_params = query_params.add("model", model)
109
+ ws_url = ws_url + f"?{query_params}"
110
+ headers = self._client_wrapper.get_headers()
111
+ if api_subscription_key is not None:
112
+ headers["Api-Subscription-Key"] = str(api_subscription_key)
113
+ if request_options and "additional_headers" in request_options:
114
+ headers.update(request_options["additional_headers"])
115
+ try:
116
+ async with websockets.connect(ws_url, extra_headers=headers) as protocol:
117
+ yield AsyncTextToSpeechStreamingSocketClient(websocket=protocol)
118
+ except websockets.exceptions.InvalidStatusCode as exc:
119
+ status_code: int = exc.status_code
120
+ if status_code == 401:
121
+ raise ApiError(
122
+ status_code=status_code,
123
+ headers=dict(headers),
124
+ body="Websocket initialized with invalid credentials.",
125
+ )
126
+ raise ApiError(
127
+ status_code=status_code,
128
+ headers=dict(headers),
129
+ body="Unexpected error when initializing websocket connection.",
130
+ )
@@ -0,0 +1,309 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import json
4
+ import typing
5
+
6
+ import websockets
7
+ import websockets.sync.connection as websockets_sync_connection
8
+ from ..core.events import EventEmitterMixin, EventType
9
+ from ..core.pydantic_utilities import parse_obj_as
10
+ from ..types.audio_output import AudioOutput
11
+ from ..types.flush_signal import FlushSignal
12
+ from ..types.error_response import ErrorResponse
13
+ from ..types.configure_connection import ConfigureConnection
14
+ from ..types.configure_connection_data import ConfigureConnectionData
15
+ from ..types.ping_signal import PingSignal
16
+ from ..types.send_text import SendText
17
+ from ..types.send_text_data import SendTextData
18
+
19
+ TextToSpeechStreamingSocketClientResponse = typing.Union[AudioOutput, ErrorResponse]
20
+
21
+
22
+ class AsyncTextToSpeechStreamingSocketClient(EventEmitterMixin):
23
+ def __init__(self, *, websocket: websockets.WebSocketClientProtocol):
24
+ super().__init__()
25
+ self._websocket = websocket
26
+
27
+ async def __aiter__(self):
28
+ async for message in self._websocket:
29
+ message = json.loads(message) if isinstance(message, str) else message
30
+ yield parse_obj_as(TextToSpeechStreamingSocketClientResponse, message) # type: ignore
31
+
32
+ async def start_listening(self):
33
+ """
34
+ Start listening for messages on the websocket connection.
35
+
36
+ Emits events in the following order:
37
+ - EventType.OPEN when connection is established
38
+ - EventType.MESSAGE for each message received
39
+ - EventType.ERROR if an error occurs
40
+ - EventType.CLOSE when connection is closed
41
+ """
42
+ self._emit(EventType.OPEN, None)
43
+ try:
44
+ async for raw_message in self._websocket:
45
+ raw_message = (
46
+ json.loads(raw_message)
47
+ if isinstance(raw_message, str)
48
+ else raw_message
49
+ )
50
+ parsed = parse_obj_as(TextToSpeechStreamingSocketClientResponse, raw_message) # type: ignore
51
+ self._emit(EventType.MESSAGE, parsed)
52
+ except websockets.WebSocketException as exc:
53
+ self._emit(EventType.ERROR, exc)
54
+ finally:
55
+ self._emit(EventType.CLOSE, None)
56
+
57
+ async def configure(
58
+ self,
59
+ target_language_code: str,
60
+ speaker: str = "anushka",
61
+ pitch: float = 0.0,
62
+ pace: float = 1.0,
63
+ loudness: float = 1.0,
64
+ speech_sample_rate: int = 22050,
65
+ enable_preprocessing: bool = False,
66
+ output_audio_codec: str = "mp3",
67
+ output_audio_bitrate: str = "128k",
68
+ min_buffer_size: int = 50,
69
+ max_chunk_length: int = 150,
70
+ ) -> None:
71
+ """
72
+ Configuration message required as the first message after establishing the WebSocket connection.
73
+ This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
74
+ by sending a new config message. When a config update is sent, any text currently in the buffer
75
+ will be automatically flushed and processed before applying the new configuration.
76
+
77
+ :param target_language_code: The language of the text is BCP-47 format
78
+ :param speaker: The speaker voice to be used for the output audio. Default: Anushka.
79
+ Model Compatibility (bulbul:v2): Female: Anushka, Manisha, Vidya, Arya;
80
+ Male: Abhilash, Karun, Hitesh
81
+ :param pitch: Controls the pitch of the audio. Lower values result in a deeper voice,
82
+ while higher values make it sharper. The suitable range is between -0.75
83
+ and 0.75. Default is 0.0.
84
+ :param pace: Controls the speed of the audio. Lower values result in slower speech,
85
+ while higher values make it faster. The suitable range is between 0.5
86
+ and 2.0. Default is 1.0.
87
+ :param loudness: Controls the loudness of the audio. Lower values result in quieter audio,
88
+ while higher values make it louder. The suitable range is between 0.3
89
+ and 3.0. Default is 1.0.
90
+ :param speech_sample_rate: Specifies the sample rate of the output audio. Supported values are
91
+ 8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
92
+ :param enable_preprocessing: Controls whether normalization of English words and numeric entities
93
+ (e.g., numbers, dates) is performed. Set to true for better handling
94
+ of mixed-language text. Default is false.
95
+ :param output_audio_codec: Audio codec (currently supports MP3 only, optimized for real-time playback)
96
+ :param output_audio_bitrate: Audio bitrate (choose from 5 supported bitrate options)
97
+ :param min_buffer_size: Minimum character length that triggers buffer flushing for TTS model processing
98
+ :param max_chunk_length: Maximum length for sentence splitting (adjust based on content length)
99
+ """
100
+ data = ConfigureConnectionData(
101
+ target_language_code=target_language_code,
102
+ speaker=speaker,
103
+ pitch=pitch,
104
+ pace=pace,
105
+ loudness=loudness,
106
+ speech_sample_rate=speech_sample_rate,
107
+ enable_preprocessing=enable_preprocessing,
108
+ output_audio_codec=output_audio_codec,
109
+ output_audio_bitrate=output_audio_bitrate,
110
+ min_buffer_size=min_buffer_size,
111
+ max_chunk_length=max_chunk_length,
112
+ )
113
+ message = ConfigureConnection(data=data)
114
+ await self._send_model(message)
115
+
116
+ async def convert(self, text: str) -> None:
117
+ """
118
+ Send text to be converted to speech. Text length should be 1-2500 characters.
119
+ Recommended: <500 characters for optimal streaming performance.
120
+ Real-time endpoints perform better with longer character counts.
121
+
122
+ :param text: Text to be synthesized (1-2500 characters, recommended <500)
123
+ """
124
+ data = SendTextData(text=text)
125
+ message = SendText(data=data)
126
+ await self._send_model(message)
127
+
128
+ async def flush(self) -> None:
129
+ """
130
+ Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
131
+ Use this when you need to process remaining text that hasn't reached the minimum buffer size.
132
+ """
133
+ message = FlushSignal()
134
+ await self._send_model(message)
135
+
136
+ async def ping(self) -> None:
137
+ """
138
+ Send ping signal to keep the WebSocket connection alive. The connection automatically
139
+ closes after one minute of inactivity.
140
+ """
141
+ message = PingSignal()
142
+ await self._send_model(message)
143
+
144
+ async def recv(self) -> TextToSpeechStreamingSocketClientResponse:
145
+ """
146
+ Receive a message from the websocket connection.
147
+ """
148
+ data = await self._websocket.recv()
149
+ data = json.loads(data) if isinstance(data, str) else data
150
+ return parse_obj_as(TextToSpeechStreamingSocketClientResponse, data) # type: ignore
151
+
152
+ async def _send(self, data: typing.Any) -> None:
153
+ """
154
+ Send a message to the websocket connection.
155
+ """
156
+ if isinstance(data, dict):
157
+ data = json.dumps(data)
158
+ await self._websocket.send(data)
159
+
160
+ async def _send_model(self, data: typing.Any) -> None:
161
+ """
162
+ Send a Pydantic model to the websocket connection.
163
+ """
164
+ await self._send(data.dict())
165
+
166
+
167
+ class TextToSpeechStreamingSocketClient(EventEmitterMixin):
168
+ def __init__(self, *, websocket: websockets_sync_connection.Connection):
169
+ super().__init__()
170
+ self._websocket = websocket
171
+
172
+ def __iter__(self):
173
+ for message in self._websocket:
174
+ message = json.loads(message) if isinstance(message, str) else message
175
+ yield parse_obj_as(TextToSpeechStreamingSocketClientResponse, message) # type: ignore
176
+
177
+ def start_listening(self):
178
+ """
179
+ Start listening for messages on the websocket connection.
180
+
181
+ Emits events in the following order:
182
+ - EventType.OPEN when connection is established
183
+ - EventType.MESSAGE for each message received
184
+ - EventType.ERROR if an error occurs
185
+ - EventType.CLOSE when connection is closed
186
+ """
187
+ self._emit(EventType.OPEN, None)
188
+ try:
189
+ for raw_message in self._websocket:
190
+ raw_message = (
191
+ json.loads(raw_message)
192
+ if isinstance(raw_message, str)
193
+ else raw_message
194
+ )
195
+ parsed = parse_obj_as(TextToSpeechStreamingSocketClientResponse, raw_message) # type: ignore
196
+ self._emit(EventType.MESSAGE, parsed)
197
+ except websockets.WebSocketException as exc:
198
+ self._emit(EventType.ERROR, exc)
199
+ finally:
200
+ self._emit(EventType.CLOSE, None)
201
+
202
+ def configure(
203
+ self,
204
+ target_language_code: str,
205
+ speaker: str = "anushka",
206
+ pitch: float = 0.0,
207
+ pace: float = 1.0,
208
+ loudness: float = 1.0,
209
+ speech_sample_rate: int = 22050,
210
+ enable_preprocessing: bool = False,
211
+ output_audio_codec: str = "mp3",
212
+ output_audio_bitrate: str = "128k",
213
+ min_buffer_size: int = 50,
214
+ max_chunk_length: int = 150,
215
+ ) -> None:
216
+ """
217
+ Configuration message required as the first message after establishing the WebSocket connection.
218
+ This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
219
+ by sending a new config message. When a config update is sent, any text currently in the buffer
220
+ will be automatically flushed and processed before applying the new configuration.
221
+
222
+ :param target_language_code: The language of the text is BCP-47 format
223
+ :param speaker: The speaker voice to be used for the output audio. Default: Anushka.
224
+ Model Compatibility (bulbul:v2): Female: Anushka, Manisha, Vidya, Arya;
225
+ Male: Abhilash, Karun, Hitesh
226
+ :param pitch: Controls the pitch of the audio. Lower values result in a deeper voice,
227
+ while higher values make it sharper. The suitable range is between -0.75
228
+ and 0.75. Default is 0.0.
229
+ :param pace: Controls the speed of the audio. Lower values result in slower speech,
230
+ while higher values make it faster. The suitable range is between 0.5
231
+ and 2.0. Default is 1.0.
232
+ :param loudness: Controls the loudness of the audio. Lower values result in quieter audio,
233
+ while higher values make it louder. The suitable range is between 0.3
234
+ and 3.0. Default is 1.0.
235
+ :param speech_sample_rate: Specifies the sample rate of the output audio. Supported values are
236
+ 8000, 16000, 22050, 24000 Hz. If not provided, the default is 22050 Hz.
237
+ :param enable_preprocessing: Controls whether normalization of English words and numeric entities
238
+ (e.g., numbers, dates) is performed. Set to true for better handling
239
+ of mixed-language text. Default is false.
240
+ :param output_audio_codec: Audio codec (currently supports MP3 only, optimized for real-time playback)
241
+ :param output_audio_bitrate: Audio bitrate (choose from 5 supported bitrate options)
242
+ :param min_buffer_size: Minimum character length that triggers buffer flushing for TTS model processing
243
+ :param max_chunk_length: Maximum length for sentence splitting (adjust based on content length)
244
+ """
245
+ data = ConfigureConnectionData(
246
+ target_language_code=target_language_code,
247
+ speaker=speaker,
248
+ pitch=pitch,
249
+ pace=pace,
250
+ loudness=loudness,
251
+ speech_sample_rate=speech_sample_rate,
252
+ enable_preprocessing=enable_preprocessing,
253
+ output_audio_codec=output_audio_codec,
254
+ output_audio_bitrate=output_audio_bitrate,
255
+ min_buffer_size=min_buffer_size,
256
+ max_chunk_length=max_chunk_length,
257
+ )
258
+ message = ConfigureConnection(data=data)
259
+ self._send_model(message)
260
+
261
+ def convert(self, text: str) -> None:
262
+ """
263
+ Send text to be converted to speech. Text length should be 1-2500 characters.
264
+ Recommended: <500 characters for optimal streaming performance.
265
+ Real-time endpoints perform better with longer character counts.
266
+
267
+ :param text: Text to be synthesized (1-2500 characters, recommended <500)
268
+ """
269
+ data = SendTextData(text=text)
270
+ message = SendText(data=data)
271
+ self._send_model(message)
272
+
273
+ def flush(self) -> None:
274
+ """
275
+ Forces the text buffer to process immediately, regardless of the min_buffer_size threshold.
276
+ Use this when you need to process remaining text that hasn't reached the minimum buffer size.
277
+ """
278
+ message = FlushSignal()
279
+ self._send_model(message)
280
+
281
+ def ping(self) -> None:
282
+ """
283
+ Send ping signal to keep the WebSocket connection alive. The connection automatically
284
+ closes after one minute of inactivity.
285
+ """
286
+ message = PingSignal()
287
+ self._send_model(message)
288
+
289
+ def recv(self) -> TextToSpeechStreamingSocketClientResponse:
290
+ """
291
+ Receive a message from the websocket connection.
292
+ """
293
+ data = self._websocket.recv()
294
+ data = json.loads(data) if isinstance(data, str) else data
295
+ return parse_obj_as(TextToSpeechStreamingSocketClientResponse, data) # type: ignore
296
+
297
+ def _send(self, data: typing.Any) -> None:
298
+ """
299
+ Send a message to the websocket connection.
300
+ """
301
+ if isinstance(data, dict):
302
+ data = json.dumps(data)
303
+ self._websocket.send(data)
304
+
305
+ def _send_model(self, data: typing.Any) -> None:
306
+ """
307
+ Send a Pydantic model to the websocket connection.
308
+ """
309
+ self._send(data.dict())