sarvamai 0.1.5a10__py3-none-any.whl → 0.1.5a13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. sarvamai/__init__.py +30 -11
  2. sarvamai/client.py +10 -0
  3. sarvamai/core/client_wrapper.py +2 -2
  4. sarvamai/requests/__init__.py +12 -4
  5. sarvamai/requests/config_message.py +17 -0
  6. sarvamai/requests/speech_to_text_response_data.py +9 -0
  7. sarvamai/requests/speech_to_text_streaming_response.py +4 -4
  8. sarvamai/requests/{transcription_data.py → speech_to_text_transcription_data.py} +2 -2
  9. sarvamai/requests/speech_to_text_translate_response_data.py +11 -0
  10. sarvamai/requests/speech_to_text_translate_streaming_response.py +10 -0
  11. sarvamai/requests/speech_to_text_translate_transcription_data.py +23 -0
  12. sarvamai/speech_to_text_streaming/client.py +4 -4
  13. sarvamai/speech_to_text_streaming/raw_client.py +4 -4
  14. sarvamai/speech_to_text_streaming/socket_client.py +30 -5
  15. sarvamai/speech_to_text_translate_streaming/__init__.py +7 -0
  16. sarvamai/speech_to_text_translate_streaming/client.py +176 -0
  17. sarvamai/speech_to_text_translate_streaming/raw_client.py +153 -0
  18. sarvamai/speech_to_text_translate_streaming/socket_client.py +191 -0
  19. sarvamai/speech_to_text_translate_streaming/types/__init__.py +7 -0
  20. sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_model.py +5 -0
  21. sarvamai/types/__init__.py +14 -6
  22. sarvamai/types/config_message.py +27 -0
  23. sarvamai/types/response_type.py +5 -0
  24. sarvamai/types/speech_to_text_response_data.py +9 -0
  25. sarvamai/types/speech_to_text_streaming_response.py +4 -4
  26. sarvamai/types/{transcription_data.py → speech_to_text_transcription_data.py} +2 -2
  27. sarvamai/types/speech_to_text_translate_response_data.py +9 -0
  28. sarvamai/types/speech_to_text_translate_streaming_response.py +22 -0
  29. sarvamai/types/speech_to_text_translate_transcription_data.py +35 -0
  30. {sarvamai-0.1.5a10.dist-info → sarvamai-0.1.5a13.dist-info}/METADATA +1 -1
  31. {sarvamai-0.1.5a10.dist-info → sarvamai-0.1.5a13.dist-info}/RECORD +32 -18
  32. sarvamai/requests/speech_to_text_streaming_response_data.py +0 -9
  33. sarvamai/types/speech_to_text_streaming_response_data.py +0 -9
  34. sarvamai/types/speech_to_text_streaming_response_type.py +0 -5
  35. {sarvamai-0.1.5a10.dist-info → sarvamai-0.1.5a13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,176 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+ from contextlib import asynccontextmanager, contextmanager
5
+
6
+ import httpx
7
+ import websockets
8
+ import websockets.sync.client as websockets_sync_client
9
+ from ..core.api_error import ApiError
10
+ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
11
+ from ..core.request_options import RequestOptions
12
+ from .raw_client import AsyncRawSpeechToTextTranslateStreamingClient, RawSpeechToTextTranslateStreamingClient
13
+ from .socket_client import AsyncSpeechToTextTranslateStreamingSocketClient, SpeechToTextTranslateStreamingSocketClient
14
+ from .types.speech_to_text_translate_streaming_model import SpeechToTextTranslateStreamingModel
15
+
16
+
17
+ class SpeechToTextTranslateStreamingClient:
18
+ def __init__(self, *, client_wrapper: SyncClientWrapper):
19
+ self._raw_client = RawSpeechToTextTranslateStreamingClient(client_wrapper=client_wrapper)
20
+
21
+ @property
22
+ def with_raw_response(self) -> RawSpeechToTextTranslateStreamingClient:
23
+ """
24
+ Retrieves a raw implementation of this client that returns raw responses.
25
+
26
+ Returns
27
+ -------
28
+ RawSpeechToTextTranslateStreamingClient
29
+ """
30
+ return self._raw_client
31
+
32
+ @contextmanager
33
+ def connect(
34
+ self,
35
+ *,
36
+ model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
37
+ high_vad_sensitivity: typing.Optional[str] = None,
38
+ vad_signals: typing.Optional[str] = None,
39
+ api_subscription_key: typing.Optional[str] = None,
40
+ request_options: typing.Optional[RequestOptions] = None,
41
+ ) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
42
+ """
43
+ WebSocket channel for real-time speech to text streaming with English translation
44
+
45
+ Parameters
46
+ ----------
47
+ model : typing.Optional[SpeechToTextTranslateStreamingModel]
48
+ Speech to text model to use (defaults to "saaras:v2.5" if not specified)
49
+
50
+ high_vad_sensitivity : typing.Optional[str]
51
+ Enable high VAD (Voice Activity Detection) sensitivity
52
+
53
+ vad_signals : typing.Optional[str]
54
+ Enable VAD signals in response
55
+
56
+ api_subscription_key : typing.Optional[str]
57
+ API subscription key for authentication
58
+
59
+ request_options : typing.Optional[RequestOptions]
60
+ Request-specific configuration.
61
+
62
+ Returns
63
+ -------
64
+ SpeechToTextTranslateStreamingSocketClient
65
+ """
66
+ ws_url = self._raw_client._client_wrapper.get_environment().production + "/speech-to-text-translate/ws"
67
+ query_params = httpx.QueryParams()
68
+ if model is not None:
69
+ query_params = query_params.add("model", model)
70
+ if high_vad_sensitivity is not None:
71
+ query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
72
+ if vad_signals is not None:
73
+ query_params = query_params.add("vad_signals", vad_signals)
74
+ ws_url = ws_url + f"?{query_params}"
75
+ headers = self._raw_client._client_wrapper.get_headers()
76
+ if api_subscription_key is not None:
77
+ headers["Api-Subscription-Key"] = str(api_subscription_key)
78
+ if request_options and "additional_headers" in request_options:
79
+ headers.update(request_options["additional_headers"])
80
+ try:
81
+ with websockets_sync_client.connect(ws_url, additional_headers=headers) as protocol:
82
+ yield SpeechToTextTranslateStreamingSocketClient(websocket=protocol)
83
+ except websockets.exceptions.InvalidStatusCode as exc:
84
+ status_code: int = exc.status_code
85
+ if status_code == 401:
86
+ raise ApiError(
87
+ status_code=status_code,
88
+ headers=dict(headers),
89
+ body="Websocket initialized with invalid credentials.",
90
+ )
91
+ raise ApiError(
92
+ status_code=status_code,
93
+ headers=dict(headers),
94
+ body="Unexpected error when initializing websocket connection.",
95
+ )
96
+
97
+
98
+ class AsyncSpeechToTextTranslateStreamingClient:
99
+ def __init__(self, *, client_wrapper: AsyncClientWrapper):
100
+ self._raw_client = AsyncRawSpeechToTextTranslateStreamingClient(client_wrapper=client_wrapper)
101
+
102
+ @property
103
+ def with_raw_response(self) -> AsyncRawSpeechToTextTranslateStreamingClient:
104
+ """
105
+ Retrieves a raw implementation of this client that returns raw responses.
106
+
107
+ Returns
108
+ -------
109
+ AsyncRawSpeechToTextTranslateStreamingClient
110
+ """
111
+ return self._raw_client
112
+
113
+ @asynccontextmanager
114
+ async def connect(
115
+ self,
116
+ *,
117
+ model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
118
+ high_vad_sensitivity: typing.Optional[str] = None,
119
+ vad_signals: typing.Optional[str] = None,
120
+ api_subscription_key: typing.Optional[str] = None,
121
+ request_options: typing.Optional[RequestOptions] = None,
122
+ ) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
123
+ """
124
+ WebSocket channel for real-time speech to text streaming with English translation
125
+
126
+ Parameters
127
+ ----------
128
+ model : typing.Optional[SpeechToTextTranslateStreamingModel]
129
+ Speech to text model to use (defaults to "saaras:v2.5" if not specified)
130
+
131
+ high_vad_sensitivity : typing.Optional[str]
132
+ Enable high VAD (Voice Activity Detection) sensitivity
133
+
134
+ vad_signals : typing.Optional[str]
135
+ Enable VAD signals in response
136
+
137
+ api_subscription_key : typing.Optional[str]
138
+ API subscription key for authentication
139
+
140
+ request_options : typing.Optional[RequestOptions]
141
+ Request-specific configuration.
142
+
143
+ Returns
144
+ -------
145
+ AsyncSpeechToTextTranslateStreamingSocketClient
146
+ """
147
+ ws_url = self._raw_client._client_wrapper.get_environment().production + "/speech-to-text-translate/ws"
148
+ query_params = httpx.QueryParams()
149
+ if model is not None:
150
+ query_params = query_params.add("model", model)
151
+ if high_vad_sensitivity is not None:
152
+ query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
153
+ if vad_signals is not None:
154
+ query_params = query_params.add("vad_signals", vad_signals)
155
+ ws_url = ws_url + f"?{query_params}"
156
+ headers = self._raw_client._client_wrapper.get_headers()
157
+ if api_subscription_key is not None:
158
+ headers["Api-Subscription-Key"] = str(api_subscription_key)
159
+ if request_options and "additional_headers" in request_options:
160
+ headers.update(request_options["additional_headers"])
161
+ try:
162
+ async with websockets.connect(ws_url, extra_headers=headers) as protocol:
163
+ yield AsyncSpeechToTextTranslateStreamingSocketClient(websocket=protocol)
164
+ except websockets.exceptions.InvalidStatusCode as exc:
165
+ status_code: int = exc.status_code
166
+ if status_code == 401:
167
+ raise ApiError(
168
+ status_code=status_code,
169
+ headers=dict(headers),
170
+ body="Websocket initialized with invalid credentials.",
171
+ )
172
+ raise ApiError(
173
+ status_code=status_code,
174
+ headers=dict(headers),
175
+ body="Unexpected error when initializing websocket connection.",
176
+ )
@@ -0,0 +1,153 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+ from contextlib import asynccontextmanager, contextmanager
5
+
6
+ import httpx
7
+ import websockets
8
+ import websockets.sync.client as websockets_sync_client
9
+ from ..core.api_error import ApiError
10
+ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
11
+ from ..core.request_options import RequestOptions
12
+ from .socket_client import AsyncSpeechToTextTranslateStreamingSocketClient, SpeechToTextTranslateStreamingSocketClient
13
+ from .types.speech_to_text_translate_streaming_model import SpeechToTextTranslateStreamingModel
14
+
15
+
16
+ class RawSpeechToTextTranslateStreamingClient:
17
+ def __init__(self, *, client_wrapper: SyncClientWrapper):
18
+ self._client_wrapper = client_wrapper
19
+
20
+ @contextmanager
21
+ def connect(
22
+ self,
23
+ *,
24
+ model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
25
+ high_vad_sensitivity: typing.Optional[str] = None,
26
+ vad_signals: typing.Optional[str] = None,
27
+ api_subscription_key: typing.Optional[str] = None,
28
+ request_options: typing.Optional[RequestOptions] = None,
29
+ ) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
30
+ """
31
+ WebSocket channel for real-time speech to text streaming with English translation
32
+
33
+ Parameters
34
+ ----------
35
+ model : typing.Optional[SpeechToTextTranslateStreamingModel]
36
+ Speech to text model to use (defaults to "saaras:v2.5" if not specified)
37
+
38
+ high_vad_sensitivity : typing.Optional[str]
39
+ Enable high VAD (Voice Activity Detection) sensitivity
40
+
41
+ vad_signals : typing.Optional[str]
42
+ Enable VAD signals in response
43
+
44
+ api_subscription_key : typing.Optional[str]
45
+ API subscription key for authentication
46
+
47
+ request_options : typing.Optional[RequestOptions]
48
+ Request-specific configuration.
49
+
50
+ Returns
51
+ -------
52
+ SpeechToTextTranslateStreamingSocketClient
53
+ """
54
+ ws_url = self._client_wrapper.get_environment().production + "/speech-to-text-translate/ws"
55
+ query_params = httpx.QueryParams()
56
+ if model is not None:
57
+ query_params = query_params.add("model", model)
58
+ if high_vad_sensitivity is not None:
59
+ query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
60
+ if vad_signals is not None:
61
+ query_params = query_params.add("vad_signals", vad_signals)
62
+ ws_url = ws_url + f"?{query_params}"
63
+ headers = self._client_wrapper.get_headers()
64
+ if api_subscription_key is not None:
65
+ headers["Api-Subscription-Key"] = str(api_subscription_key)
66
+ if request_options and "additional_headers" in request_options:
67
+ headers.update(request_options["additional_headers"])
68
+ try:
69
+ with websockets_sync_client.connect(ws_url, additional_headers=headers) as protocol:
70
+ yield SpeechToTextTranslateStreamingSocketClient(websocket=protocol)
71
+ except websockets.exceptions.InvalidStatusCode as exc:
72
+ status_code: int = exc.status_code
73
+ if status_code == 401:
74
+ raise ApiError(
75
+ status_code=status_code,
76
+ headers=dict(headers),
77
+ body="Websocket initialized with invalid credentials.",
78
+ )
79
+ raise ApiError(
80
+ status_code=status_code,
81
+ headers=dict(headers),
82
+ body="Unexpected error when initializing websocket connection.",
83
+ )
84
+
85
+
86
+ class AsyncRawSpeechToTextTranslateStreamingClient:
87
+ def __init__(self, *, client_wrapper: AsyncClientWrapper):
88
+ self._client_wrapper = client_wrapper
89
+
90
+ @asynccontextmanager
91
+ async def connect(
92
+ self,
93
+ *,
94
+ model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
95
+ high_vad_sensitivity: typing.Optional[str] = None,
96
+ vad_signals: typing.Optional[str] = None,
97
+ api_subscription_key: typing.Optional[str] = None,
98
+ request_options: typing.Optional[RequestOptions] = None,
99
+ ) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
100
+ """
101
+ WebSocket channel for real-time speech to text streaming with English translation
102
+
103
+ Parameters
104
+ ----------
105
+ model : typing.Optional[SpeechToTextTranslateStreamingModel]
106
+ Speech to text model to use (defaults to "saaras:v2.5" if not specified)
107
+
108
+ high_vad_sensitivity : typing.Optional[str]
109
+ Enable high VAD (Voice Activity Detection) sensitivity
110
+
111
+ vad_signals : typing.Optional[str]
112
+ Enable VAD signals in response
113
+
114
+ api_subscription_key : typing.Optional[str]
115
+ API subscription key for authentication
116
+
117
+ request_options : typing.Optional[RequestOptions]
118
+ Request-specific configuration.
119
+
120
+ Returns
121
+ -------
122
+ AsyncSpeechToTextTranslateStreamingSocketClient
123
+ """
124
+ ws_url = self._client_wrapper.get_environment().production + "/speech-to-text-translate/ws"
125
+ query_params = httpx.QueryParams()
126
+ if model is not None:
127
+ query_params = query_params.add("model", model)
128
+ if high_vad_sensitivity is not None:
129
+ query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
130
+ if vad_signals is not None:
131
+ query_params = query_params.add("vad_signals", vad_signals)
132
+ ws_url = ws_url + f"?{query_params}"
133
+ headers = self._client_wrapper.get_headers()
134
+ if api_subscription_key is not None:
135
+ headers["Api-Subscription-Key"] = str(api_subscription_key)
136
+ if request_options and "additional_headers" in request_options:
137
+ headers.update(request_options["additional_headers"])
138
+ try:
139
+ async with websockets.connect(ws_url, extra_headers=headers) as protocol:
140
+ yield AsyncSpeechToTextTranslateStreamingSocketClient(websocket=protocol)
141
+ except websockets.exceptions.InvalidStatusCode as exc:
142
+ status_code: int = exc.status_code
143
+ if status_code == 401:
144
+ raise ApiError(
145
+ status_code=status_code,
146
+ headers=dict(headers),
147
+ body="Websocket initialized with invalid credentials.",
148
+ )
149
+ raise ApiError(
150
+ status_code=status_code,
151
+ headers=dict(headers),
152
+ body="Unexpected error when initializing websocket connection.",
153
+ )
@@ -0,0 +1,191 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import json
4
+ import typing
5
+
6
+ import websockets
7
+ import websockets.sync.connection as websockets_sync_connection
8
+ from ..core.events import EventEmitterMixin, EventType
9
+ from ..core.pydantic_utilities import parse_obj_as
10
+ from ..types.audio_data import AudioData
11
+ from ..types.audio_message import AudioMessage
12
+ from ..types.config_message import ConfigMessage
13
+ from ..types.speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponse
14
+
15
+ SpeechToTextTranslateStreamingSocketClientResponse = typing.Union[SpeechToTextTranslateStreamingResponse]
16
+
17
+
18
+ class AsyncSpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
19
+ def __init__(self, *, websocket: websockets.WebSocketClientProtocol):
20
+ super().__init__()
21
+ self._websocket = websocket
22
+
23
+ async def __aiter__(self):
24
+ async for message in self._websocket:
25
+ message = json.loads(message) if isinstance(message, str) else message
26
+ yield parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, message) # type: ignore
27
+
28
+ async def start_listening(self):
29
+ """
30
+ Start listening for messages on the websocket connection.
31
+
32
+ Emits events in the following order:
33
+ - EventType.OPEN when connection is established
34
+ - EventType.MESSAGE for each message received
35
+ - EventType.ERROR if an error occurs
36
+ - EventType.CLOSE when connection is closed
37
+ """
38
+ self._emit(EventType.OPEN, None)
39
+ try:
40
+ async for raw_message in self._websocket:
41
+ raw_message = json.loads(raw_message) if isinstance(raw_message, str) else raw_message
42
+ parsed = parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, raw_message) # type: ignore
43
+ self._emit(EventType.MESSAGE, parsed)
44
+ except websockets.WebSocketException as exc:
45
+ self._emit(EventType.ERROR, exc)
46
+ finally:
47
+ self._emit(EventType.CLOSE, None)
48
+
49
+ async def translate(self, audio: str, encoding: str = "audio/wav", sample_rate: int = 16000):
50
+ """
51
+ Sends audio translation request to the server.
52
+ :param audio: Base64 encoded audio data
53
+ :param encoding (Optional): Audio encoding format (default is "audio/wav")
54
+ :param sample_rate (Optional): Audio sample rate in Hz (default is 16000)
55
+ """
56
+ return await self._send_speech_to_text_translate_streaming_audio_message(
57
+ message=AudioMessage(audio=AudioData(data=audio, encoding=encoding, sample_rate=sample_rate))
58
+ )
59
+
60
+ async def set_prompt(self, prompt: str) -> None:
61
+ """
62
+ Set the prompt for the translation.
63
+ The prompt is useful for detecting hotwords in the audio stream.
64
+ This will send a ConfigMessage to the websocket connection.
65
+ """
66
+ message = ConfigMessage(prompt=prompt)
67
+ await self._send_config_message(message)
68
+
69
+ async def recv(self) -> SpeechToTextTranslateStreamingSocketClientResponse:
70
+ """
71
+ Receive a message from the websocket connection.
72
+ """
73
+ data = await self._websocket.recv()
74
+ data = json.loads(data) if isinstance(data, str) else data
75
+ return parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, data) # type: ignore
76
+
77
+ async def _send_speech_to_text_translate_streaming_audio_message(self, message: AudioMessage) -> None:
78
+ """
79
+ Send a message to the websocket connection.
80
+ The message will be sent as a AudioMessage.
81
+ """
82
+ await self._send_model(message)
83
+
84
+ async def _send_config_message(self, message: ConfigMessage) -> None:
85
+ """
86
+ Send a message to the websocket connection.
87
+ The message will be sent as a ConfigMessage.
88
+ """
89
+ await self._send_model(message)
90
+
91
+ async def _send(self, data: typing.Any) -> None:
92
+ """
93
+ Send a message to the websocket connection.
94
+ """
95
+ if isinstance(data, dict):
96
+ data = json.dumps(data)
97
+ await self._websocket.send(data)
98
+
99
+ async def _send_model(self, data: typing.Any) -> None:
100
+ """
101
+ Send a Pydantic model to the websocket connection.
102
+ """
103
+ await self._send(data.dict())
104
+
105
+
106
+ class SpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
107
+ def __init__(self, *, websocket: websockets_sync_connection.Connection):
108
+ super().__init__()
109
+ self._websocket = websocket
110
+
111
+ def __iter__(self):
112
+ for message in self._websocket:
113
+ message = json.loads(message) if isinstance(message, str) else message
114
+ yield parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, message) # type: ignore
115
+
116
+ def start_listening(self):
117
+ """
118
+ Start listening for messages on the websocket connection.
119
+
120
+ Emits events in the following order:
121
+ - EventType.OPEN when connection is established
122
+ - EventType.MESSAGE for each message received
123
+ - EventType.ERROR if an error occurs
124
+ - EventType.CLOSE when connection is closed
125
+ """
126
+ self._emit(EventType.OPEN, None)
127
+ try:
128
+ for raw_message in self._websocket:
129
+ raw_message = json.loads(raw_message) if isinstance(raw_message, str) else raw_message
130
+ parsed = parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, raw_message) # type: ignore
131
+ self._emit(EventType.MESSAGE, parsed)
132
+ except websockets.WebSocketException as exc:
133
+ self._emit(EventType.ERROR, exc)
134
+ finally:
135
+ self._emit(EventType.CLOSE, None)
136
+
137
+ def translate(self, audio: str, encoding: str = "audio/wav", sample_rate: int = 16000):
138
+ """
139
+ Sends audio translation request to the server.
140
+ :param audio: Base64 encoded audio data
141
+ :param encoding: Audio encoding format (default is "audio/wav")
142
+ :param sample_rate: Audio sample rate in Hz (default is 16000)
143
+ """
144
+ return self._send_speech_to_text_translate_streaming_audio_message(
145
+ message=AudioMessage(audio=AudioData(data=audio, encoding=encoding, sample_rate=sample_rate))
146
+ )
147
+
148
+ def set_prompt(self, prompt: str) -> None:
149
+ """
150
+ Set the prompt for the translation.
151
+ The prompt is useful for detecting hotwords in the audio stream.
152
+ This will send a ConfigMessage to the websocket connection.
153
+ """
154
+ message = ConfigMessage(prompt=prompt)
155
+ self._send_config_message(message)
156
+
157
+ def recv(self) -> SpeechToTextTranslateStreamingSocketClientResponse:
158
+ """
159
+ Receive a message from the websocket connection.
160
+ """
161
+ data = self._websocket.recv()
162
+ data = json.loads(data) if isinstance(data, str) else data
163
+ return parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, data) # type: ignore
164
+
165
+ def _send_config_message(self, message: ConfigMessage) -> None:
166
+ """
167
+ Send a message to the websocket connection.
168
+ The message will be sent as a ConfigMessage.
169
+ """
170
+ self._send_model(message)
171
+
172
+ def _send_speech_to_text_translate_streaming_audio_message(self, message: AudioMessage) -> None:
173
+ """
174
+ Send a message to the websocket connection.
175
+ The message will be sent as a AudioMessage.
176
+ """
177
+ self._send_model(message)
178
+
179
+ def _send(self, data: typing.Any) -> None:
180
+ """
181
+ Send a message to the websocket connection.
182
+ """
183
+ if isinstance(data, dict):
184
+ data = json.dumps(data)
185
+ self._websocket.send(data)
186
+
187
+ def _send_model(self, data: typing.Any) -> None:
188
+ """
189
+ Send a Pydantic model to the websocket connection.
190
+ """
191
+ self._send(data.dict())
@@ -0,0 +1,7 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ # isort: skip_file
4
+
5
+ from .speech_to_text_translate_streaming_model import SpeechToTextTranslateStreamingModel
6
+
7
+ __all__ = ["SpeechToTextTranslateStreamingModel"]
@@ -0,0 +1,5 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ SpeechToTextTranslateStreamingModel = typing.Union[typing.Literal["saaras:v2.5", "saaras:v2"], typing.Any]
@@ -17,6 +17,7 @@ from .chat_completion_request_user_message import ChatCompletionRequestUserMessa
17
17
  from .chat_completion_response_message import ChatCompletionResponseMessage
18
18
  from .choice import Choice
19
19
  from .completion_usage import CompletionUsage
20
+ from .config_message import ConfigMessage
20
21
  from .create_chat_completion_response import CreateChatCompletionResponse
21
22
  from .diarized_entry import DiarizedEntry
22
23
  from .diarized_transcript import DiarizedTranscript
@@ -30,18 +31,22 @@ from .format import Format
30
31
  from .language_identification_response import LanguageIdentificationResponse
31
32
  from .numerals_format import NumeralsFormat
32
33
  from .reasoning_effort import ReasoningEffort
34
+ from .response_type import ResponseType
33
35
  from .role import Role
34
36
  from .sarvam_model_ids import SarvamModelIds
35
37
  from .speech_sample_rate import SpeechSampleRate
36
38
  from .speech_to_text_language import SpeechToTextLanguage
37
39
  from .speech_to_text_model import SpeechToTextModel
38
40
  from .speech_to_text_response import SpeechToTextResponse
41
+ from .speech_to_text_response_data import SpeechToTextResponseData
39
42
  from .speech_to_text_streaming_response import SpeechToTextStreamingResponse
40
- from .speech_to_text_streaming_response_data import SpeechToTextStreamingResponseData
41
- from .speech_to_text_streaming_response_type import SpeechToTextStreamingResponseType
43
+ from .speech_to_text_transcription_data import SpeechToTextTranscriptionData
42
44
  from .speech_to_text_translate_language import SpeechToTextTranslateLanguage
43
45
  from .speech_to_text_translate_model import SpeechToTextTranslateModel
44
46
  from .speech_to_text_translate_response import SpeechToTextTranslateResponse
47
+ from .speech_to_text_translate_response_data import SpeechToTextTranslateResponseData
48
+ from .speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponse
49
+ from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTranscriptionData
45
50
  from .spoken_form_numerals_format import SpokenFormNumeralsFormat
46
51
  from .stop_configuration import StopConfiguration
47
52
  from .text_to_speech_language import TextToSpeechLanguage
@@ -49,7 +54,6 @@ from .text_to_speech_model import TextToSpeechModel
49
54
  from .text_to_speech_response import TextToSpeechResponse
50
55
  from .text_to_speech_speaker import TextToSpeechSpeaker
51
56
  from .timestamps_model import TimestampsModel
52
- from .transcription_data import TranscriptionData
53
57
  from .transcription_metrics import TranscriptionMetrics
54
58
  from .translate_mode import TranslateMode
55
59
  from .translate_model import TranslateModel
@@ -76,6 +80,7 @@ __all__ = [
76
80
  "ChatCompletionResponseMessage",
77
81
  "Choice",
78
82
  "CompletionUsage",
83
+ "ConfigMessage",
79
84
  "CreateChatCompletionResponse",
80
85
  "DiarizedEntry",
81
86
  "DiarizedTranscript",
@@ -89,18 +94,22 @@ __all__ = [
89
94
  "LanguageIdentificationResponse",
90
95
  "NumeralsFormat",
91
96
  "ReasoningEffort",
97
+ "ResponseType",
92
98
  "Role",
93
99
  "SarvamModelIds",
94
100
  "SpeechSampleRate",
95
101
  "SpeechToTextLanguage",
96
102
  "SpeechToTextModel",
97
103
  "SpeechToTextResponse",
104
+ "SpeechToTextResponseData",
98
105
  "SpeechToTextStreamingResponse",
99
- "SpeechToTextStreamingResponseData",
100
- "SpeechToTextStreamingResponseType",
106
+ "SpeechToTextTranscriptionData",
101
107
  "SpeechToTextTranslateLanguage",
102
108
  "SpeechToTextTranslateModel",
103
109
  "SpeechToTextTranslateResponse",
110
+ "SpeechToTextTranslateResponseData",
111
+ "SpeechToTextTranslateStreamingResponse",
112
+ "SpeechToTextTranslateTranscriptionData",
104
113
  "SpokenFormNumeralsFormat",
105
114
  "StopConfiguration",
106
115
  "TextToSpeechLanguage",
@@ -108,7 +117,6 @@ __all__ = [
108
117
  "TextToSpeechResponse",
109
118
  "TextToSpeechSpeaker",
110
119
  "TimestampsModel",
111
- "TranscriptionData",
112
120
  "TranscriptionMetrics",
113
121
  "TranslateMode",
114
122
  "TranslateModel",
@@ -0,0 +1,27 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import pydantic
6
+ from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
7
+
8
+
9
+ class ConfigMessage(UniversalBaseModel):
10
+ type: typing.Literal["config"] = pydantic.Field(default="config")
11
+ """
12
+ Message type identifier for configuration
13
+ """
14
+
15
+ prompt: typing.Optional[str] = pydantic.Field(default=None)
16
+ """
17
+ Prompt for ASR model to improve transcription accuracy
18
+ """
19
+
20
+ if IS_PYDANTIC_V2:
21
+ model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
22
+ else:
23
+
24
+ class Config:
25
+ frozen = True
26
+ smart_union = True
27
+ extra = pydantic.Extra.allow
@@ -0,0 +1,5 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ ResponseType = typing.Union[typing.Literal["data", "error", "events"], typing.Any]
@@ -0,0 +1,9 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ from .error_data import ErrorData
6
+ from .events_data import EventsData
7
+ from .speech_to_text_transcription_data import SpeechToTextTranscriptionData
8
+
9
+ SpeechToTextResponseData = typing.Union[SpeechToTextTranscriptionData, ErrorData, EventsData]
@@ -4,13 +4,13 @@ import typing
4
4
 
5
5
  import pydantic
6
6
  from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
7
- from .speech_to_text_streaming_response_data import SpeechToTextStreamingResponseData
8
- from .speech_to_text_streaming_response_type import SpeechToTextStreamingResponseType
7
+ from .response_type import ResponseType
8
+ from .speech_to_text_response_data import SpeechToTextResponseData
9
9
 
10
10
 
11
11
  class SpeechToTextStreamingResponse(UniversalBaseModel):
12
- type: SpeechToTextStreamingResponseType
13
- data: SpeechToTextStreamingResponseData
12
+ type: ResponseType
13
+ data: SpeechToTextResponseData
14
14
 
15
15
  if IS_PYDANTIC_V2:
16
16
  model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2