sarvamai 0.1.18__py3-none-any.whl → 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +8 -0
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/requests/__init__.py +2 -0
- sarvamai/requests/config_message.py +1 -1
- sarvamai/requests/stt_flush_signal.py +16 -0
- sarvamai/speech_to_text_streaming/__init__.py +2 -0
- sarvamai/speech_to_text_streaming/client.py +13 -0
- sarvamai/speech_to_text_streaming/raw_client.py +13 -0
- sarvamai/speech_to_text_streaming/socket_client.py +62 -13
- sarvamai/speech_to_text_streaming/types/__init__.py +2 -0
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_flush_signal.py +5 -0
- sarvamai/speech_to_text_translate_streaming/__init__.py +2 -0
- sarvamai/speech_to_text_translate_streaming/client.py +13 -0
- sarvamai/speech_to_text_translate_streaming/raw_client.py +13 -0
- sarvamai/speech_to_text_translate_streaming/socket_client.py +56 -11
- sarvamai/speech_to_text_translate_streaming/types/__init__.py +2 -0
- sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_flush_signal.py +5 -0
- sarvamai/types/__init__.py +2 -0
- sarvamai/types/config_message.py +1 -1
- sarvamai/types/stt_flush_signal.py +26 -0
- {sarvamai-0.1.18.dist-info → sarvamai-0.1.19.dist-info}/METADATA +1 -1
- {sarvamai-0.1.18.dist-info → sarvamai-0.1.19.dist-info}/RECORD +23 -19
- {sarvamai-0.1.18.dist-info → sarvamai-0.1.19.dist-info}/WHEEL +0 -0
sarvamai/__init__.py
CHANGED
|
@@ -74,6 +74,7 @@ from .types import (
|
|
|
74
74
|
SpokenFormNumeralsFormat,
|
|
75
75
|
StopConfiguration,
|
|
76
76
|
StorageContainerType,
|
|
77
|
+
SttFlushSignal,
|
|
77
78
|
TaskDetailV1,
|
|
78
79
|
TaskFileDetails,
|
|
79
80
|
TaskState,
|
|
@@ -167,6 +168,7 @@ from .requests import (
|
|
|
167
168
|
SpeechToTextTranslateStreamingResponseParams,
|
|
168
169
|
SpeechToTextTranslateTranscriptionDataParams,
|
|
169
170
|
StopConfigurationParams,
|
|
171
|
+
SttFlushSignalParams,
|
|
170
172
|
TaskDetailV1Params,
|
|
171
173
|
TaskFileDetailsParams,
|
|
172
174
|
TextToSpeechResponseParams,
|
|
@@ -176,12 +178,14 @@ from .requests import (
|
|
|
176
178
|
TransliterationResponseParams,
|
|
177
179
|
)
|
|
178
180
|
from .speech_to_text_streaming import (
|
|
181
|
+
SpeechToTextStreamingFlushSignal,
|
|
179
182
|
SpeechToTextStreamingHighVadSensitivity,
|
|
180
183
|
SpeechToTextStreamingLanguageCode,
|
|
181
184
|
SpeechToTextStreamingModel,
|
|
182
185
|
SpeechToTextStreamingVadSignals,
|
|
183
186
|
)
|
|
184
187
|
from .speech_to_text_translate_streaming import (
|
|
188
|
+
SpeechToTextTranslateStreamingFlushSignal,
|
|
185
189
|
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
186
190
|
SpeechToTextTranslateStreamingModel,
|
|
187
191
|
SpeechToTextTranslateStreamingVadSignals,
|
|
@@ -297,6 +301,7 @@ __all__ = [
|
|
|
297
301
|
"SpeechToTextResponseData",
|
|
298
302
|
"SpeechToTextResponseDataParams",
|
|
299
303
|
"SpeechToTextResponseParams",
|
|
304
|
+
"SpeechToTextStreamingFlushSignal",
|
|
300
305
|
"SpeechToTextStreamingHighVadSensitivity",
|
|
301
306
|
"SpeechToTextStreamingLanguageCode",
|
|
302
307
|
"SpeechToTextStreamingModel",
|
|
@@ -313,6 +318,7 @@ __all__ = [
|
|
|
313
318
|
"SpeechToTextTranslateResponseData",
|
|
314
319
|
"SpeechToTextTranslateResponseDataParams",
|
|
315
320
|
"SpeechToTextTranslateResponseParams",
|
|
321
|
+
"SpeechToTextTranslateStreamingFlushSignal",
|
|
316
322
|
"SpeechToTextTranslateStreamingHighVadSensitivity",
|
|
317
323
|
"SpeechToTextTranslateStreamingModel",
|
|
318
324
|
"SpeechToTextTranslateStreamingResponse",
|
|
@@ -324,6 +330,8 @@ __all__ = [
|
|
|
324
330
|
"StopConfiguration",
|
|
325
331
|
"StopConfigurationParams",
|
|
326
332
|
"StorageContainerType",
|
|
333
|
+
"SttFlushSignal",
|
|
334
|
+
"SttFlushSignalParams",
|
|
327
335
|
"TaskDetailV1",
|
|
328
336
|
"TaskDetailV1Params",
|
|
329
337
|
"TaskFileDetails",
|
sarvamai/core/client_wrapper.py
CHANGED
|
@@ -23,10 +23,10 @@ class BaseClientWrapper:
|
|
|
23
23
|
|
|
24
24
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
25
25
|
headers: typing.Dict[str, str] = {
|
|
26
|
-
"User-Agent": "sarvamai/0.1.
|
|
26
|
+
"User-Agent": "sarvamai/0.1.19",
|
|
27
27
|
"X-Fern-Language": "Python",
|
|
28
28
|
"X-Fern-SDK-Name": "sarvamai",
|
|
29
|
-
"X-Fern-SDK-Version": "0.1.
|
|
29
|
+
"X-Fern-SDK-Version": "0.1.19",
|
|
30
30
|
**(self.get_custom_headers() or {}),
|
|
31
31
|
}
|
|
32
32
|
headers["api-subscription-key"] = self.api_subscription_key
|
sarvamai/requests/__init__.py
CHANGED
|
@@ -54,6 +54,7 @@ from .speech_to_text_translate_response_data import SpeechToTextTranslateRespons
|
|
|
54
54
|
from .speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponseParams
|
|
55
55
|
from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTranscriptionDataParams
|
|
56
56
|
from .stop_configuration import StopConfigurationParams
|
|
57
|
+
from .stt_flush_signal import SttFlushSignalParams
|
|
57
58
|
from .task_detail_v_1 import TaskDetailV1Params
|
|
58
59
|
from .task_file_details import TaskFileDetailsParams
|
|
59
60
|
from .text_to_speech_response import TextToSpeechResponseParams
|
|
@@ -113,6 +114,7 @@ __all__ = [
|
|
|
113
114
|
"SpeechToTextTranslateStreamingResponseParams",
|
|
114
115
|
"SpeechToTextTranslateTranscriptionDataParams",
|
|
115
116
|
"StopConfigurationParams",
|
|
117
|
+
"SttFlushSignalParams",
|
|
116
118
|
"TaskDetailV1Params",
|
|
117
119
|
"TaskFileDetailsParams",
|
|
118
120
|
"TextToSpeechResponseParams",
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import typing_extensions
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SttFlushSignalParams(typing_extensions.TypedDict):
|
|
9
|
+
"""
|
|
10
|
+
Signal to flush the audio buffer and force finalize partial transcriptions/translations
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
type: typing.Literal["flush"]
|
|
14
|
+
"""
|
|
15
|
+
Type identifier for flush signal
|
|
16
|
+
"""
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
5
|
from .types import (
|
|
6
|
+
SpeechToTextStreamingFlushSignal,
|
|
6
7
|
SpeechToTextStreamingHighVadSensitivity,
|
|
7
8
|
SpeechToTextStreamingLanguageCode,
|
|
8
9
|
SpeechToTextStreamingModel,
|
|
@@ -10,6 +11,7 @@ from .types import (
|
|
|
10
11
|
)
|
|
11
12
|
|
|
12
13
|
__all__ = [
|
|
14
|
+
"SpeechToTextStreamingFlushSignal",
|
|
13
15
|
"SpeechToTextStreamingHighVadSensitivity",
|
|
14
16
|
"SpeechToTextStreamingLanguageCode",
|
|
15
17
|
"SpeechToTextStreamingModel",
|
|
@@ -11,6 +11,7 @@ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
|
11
11
|
from ..core.request_options import RequestOptions
|
|
12
12
|
from .raw_client import AsyncRawSpeechToTextStreamingClient, RawSpeechToTextStreamingClient
|
|
13
13
|
from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
|
|
14
|
+
from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
|
|
14
15
|
from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
15
16
|
from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
16
17
|
from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
|
|
@@ -45,6 +46,7 @@ class SpeechToTextStreamingClient:
|
|
|
45
46
|
model: typing.Optional[SpeechToTextStreamingModel] = None,
|
|
46
47
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
47
48
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
49
|
+
flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
|
|
48
50
|
api_subscription_key: typing.Optional[str] = None,
|
|
49
51
|
request_options: typing.Optional[RequestOptions] = None,
|
|
50
52
|
) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
|
|
@@ -65,6 +67,9 @@ class SpeechToTextStreamingClient:
|
|
|
65
67
|
vad_signals : typing.Optional[SpeechToTextStreamingVadSignals]
|
|
66
68
|
Enable VAD signals in response
|
|
67
69
|
|
|
70
|
+
flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
|
|
71
|
+
Signal to flush the audio buffer and finalize transcription
|
|
72
|
+
|
|
68
73
|
api_subscription_key : typing.Optional[str]
|
|
69
74
|
API subscription key for authentication
|
|
70
75
|
|
|
@@ -85,6 +90,8 @@ class SpeechToTextStreamingClient:
|
|
|
85
90
|
query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
|
|
86
91
|
if vad_signals is not None:
|
|
87
92
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
93
|
+
if flush_signal is not None:
|
|
94
|
+
query_params = query_params.add("flush_signal", flush_signal)
|
|
88
95
|
ws_url = ws_url + f"?{query_params}"
|
|
89
96
|
headers = self._raw_client._client_wrapper.get_headers()
|
|
90
97
|
if api_subscription_key is not None:
|
|
@@ -132,6 +139,7 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
132
139
|
model: typing.Optional[SpeechToTextStreamingModel] = None,
|
|
133
140
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
134
141
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
142
|
+
flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
|
|
135
143
|
api_subscription_key: typing.Optional[str] = None,
|
|
136
144
|
request_options: typing.Optional[RequestOptions] = None,
|
|
137
145
|
) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
|
|
@@ -152,6 +160,9 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
152
160
|
vad_signals : typing.Optional[SpeechToTextStreamingVadSignals]
|
|
153
161
|
Enable VAD signals in response
|
|
154
162
|
|
|
163
|
+
flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
|
|
164
|
+
Signal to flush the audio buffer and finalize transcription
|
|
165
|
+
|
|
155
166
|
api_subscription_key : typing.Optional[str]
|
|
156
167
|
API subscription key for authentication
|
|
157
168
|
|
|
@@ -172,6 +183,8 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
172
183
|
query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
|
|
173
184
|
if vad_signals is not None:
|
|
174
185
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
186
|
+
if flush_signal is not None:
|
|
187
|
+
query_params = query_params.add("flush_signal", flush_signal)
|
|
175
188
|
ws_url = ws_url + f"?{query_params}"
|
|
176
189
|
headers = self._raw_client._client_wrapper.get_headers()
|
|
177
190
|
if api_subscription_key is not None:
|
|
@@ -10,6 +10,7 @@ from ..core.api_error import ApiError
|
|
|
10
10
|
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
11
11
|
from ..core.request_options import RequestOptions
|
|
12
12
|
from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
|
|
13
|
+
from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
|
|
13
14
|
from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
14
15
|
from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
15
16
|
from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
|
|
@@ -33,6 +34,7 @@ class RawSpeechToTextStreamingClient:
|
|
|
33
34
|
model: typing.Optional[SpeechToTextStreamingModel] = None,
|
|
34
35
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
35
36
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
37
|
+
flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
|
|
36
38
|
api_subscription_key: typing.Optional[str] = None,
|
|
37
39
|
request_options: typing.Optional[RequestOptions] = None,
|
|
38
40
|
) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
|
|
@@ -53,6 +55,9 @@ class RawSpeechToTextStreamingClient:
|
|
|
53
55
|
vad_signals : typing.Optional[SpeechToTextStreamingVadSignals]
|
|
54
56
|
Enable VAD signals in response
|
|
55
57
|
|
|
58
|
+
flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
|
|
59
|
+
Signal to flush the audio buffer and finalize transcription
|
|
60
|
+
|
|
56
61
|
api_subscription_key : typing.Optional[str]
|
|
57
62
|
API subscription key for authentication
|
|
58
63
|
|
|
@@ -73,6 +78,8 @@ class RawSpeechToTextStreamingClient:
|
|
|
73
78
|
query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
|
|
74
79
|
if vad_signals is not None:
|
|
75
80
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
81
|
+
if flush_signal is not None:
|
|
82
|
+
query_params = query_params.add("flush_signal", flush_signal)
|
|
76
83
|
ws_url = ws_url + f"?{query_params}"
|
|
77
84
|
headers = self._client_wrapper.get_headers()
|
|
78
85
|
if api_subscription_key is not None:
|
|
@@ -109,6 +116,7 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
109
116
|
model: typing.Optional[SpeechToTextStreamingModel] = None,
|
|
110
117
|
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
111
118
|
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
119
|
+
flush_signal: typing.Optional[SpeechToTextStreamingFlushSignal] = None,
|
|
112
120
|
api_subscription_key: typing.Optional[str] = None,
|
|
113
121
|
request_options: typing.Optional[RequestOptions] = None,
|
|
114
122
|
) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
|
|
@@ -129,6 +137,9 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
129
137
|
vad_signals : typing.Optional[SpeechToTextStreamingVadSignals]
|
|
130
138
|
Enable VAD signals in response
|
|
131
139
|
|
|
140
|
+
flush_signal : typing.Optional[SpeechToTextStreamingFlushSignal]
|
|
141
|
+
Signal to flush the audio buffer and finalize transcription
|
|
142
|
+
|
|
132
143
|
api_subscription_key : typing.Optional[str]
|
|
133
144
|
API subscription key for authentication
|
|
134
145
|
|
|
@@ -149,6 +160,8 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
149
160
|
query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
|
|
150
161
|
if vad_signals is not None:
|
|
151
162
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
163
|
+
if flush_signal is not None:
|
|
164
|
+
query_params = query_params.add("flush_signal", flush_signal)
|
|
152
165
|
ws_url = ws_url + f"?{query_params}"
|
|
153
166
|
headers = self._client_wrapper.get_headers()
|
|
154
167
|
if api_subscription_key is not None:
|
|
@@ -9,7 +9,10 @@ from ..core.events import EventEmitterMixin, EventType
|
|
|
9
9
|
from ..core.pydantic_utilities import parse_obj_as
|
|
10
10
|
from ..types.audio_data import AudioData
|
|
11
11
|
from ..types.audio_message import AudioMessage
|
|
12
|
-
from ..types.speech_to_text_streaming_response import
|
|
12
|
+
from ..types.speech_to_text_streaming_response import (
|
|
13
|
+
SpeechToTextStreamingResponse,
|
|
14
|
+
)
|
|
15
|
+
from ..types.stt_flush_signal import SttFlushSignal
|
|
13
16
|
|
|
14
17
|
SpeechToTextStreamingSocketClientResponse = typing.Union[SpeechToTextStreamingResponse]
|
|
15
18
|
|
|
@@ -22,7 +25,9 @@ class AsyncSpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
22
25
|
async def __aiter__(self):
|
|
23
26
|
async for message in self._websocket:
|
|
24
27
|
message = json.loads(message) if isinstance(message, str) else message
|
|
25
|
-
yield parse_obj_as(
|
|
28
|
+
yield parse_obj_as(
|
|
29
|
+
SpeechToTextStreamingSocketClientResponse, message
|
|
30
|
+
) # type: ignore
|
|
26
31
|
|
|
27
32
|
async def start_listening(self):
|
|
28
33
|
"""
|
|
@@ -37,8 +42,14 @@ class AsyncSpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
37
42
|
self._emit(EventType.OPEN, None)
|
|
38
43
|
try:
|
|
39
44
|
async for raw_message in self._websocket:
|
|
40
|
-
raw_message =
|
|
41
|
-
|
|
45
|
+
raw_message = (
|
|
46
|
+
json.loads(raw_message)
|
|
47
|
+
if isinstance(raw_message, str)
|
|
48
|
+
else raw_message
|
|
49
|
+
)
|
|
50
|
+
parsed = parse_obj_as(
|
|
51
|
+
SpeechToTextStreamingSocketClientResponse, raw_message
|
|
52
|
+
) # type: ignore
|
|
42
53
|
self._emit(EventType.MESSAGE, parsed)
|
|
43
54
|
except websockets.WebSocketException as exc:
|
|
44
55
|
self._emit(EventType.ERROR, exc)
|
|
@@ -54,10 +65,23 @@ class AsyncSpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
54
65
|
"""
|
|
55
66
|
|
|
56
67
|
return await self._send_speech_to_text_streaming_audio_message(
|
|
57
|
-
message=AudioMessage(
|
|
68
|
+
message=AudioMessage(
|
|
69
|
+
audio=AudioData(data=audio, sample_rate=sample_rate, encoding=encoding)
|
|
70
|
+
)
|
|
58
71
|
)
|
|
59
72
|
|
|
60
|
-
async def
|
|
73
|
+
async def flush(self) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Signal to flush the audio buffer and force finalize partial transcriptions.
|
|
76
|
+
Use this to force processing of any remaining audio that hasn't been
|
|
77
|
+
transcribed yet.
|
|
78
|
+
"""
|
|
79
|
+
message = SttFlushSignal()
|
|
80
|
+
await self._send_model(message)
|
|
81
|
+
|
|
82
|
+
async def _send_speech_to_text_streaming_audio_message(
|
|
83
|
+
self, message: AudioMessage
|
|
84
|
+
) -> None:
|
|
61
85
|
"""
|
|
62
86
|
Send a message to the websocket connection.
|
|
63
87
|
The message will be sent as a AudioMessage.
|
|
@@ -70,7 +94,9 @@ class AsyncSpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
70
94
|
"""
|
|
71
95
|
data = await self._websocket.recv()
|
|
72
96
|
data = json.loads(data) if isinstance(data, str) else data
|
|
73
|
-
return parse_obj_as(
|
|
97
|
+
return parse_obj_as(
|
|
98
|
+
SpeechToTextStreamingSocketClientResponse, data
|
|
99
|
+
) # type: ignore
|
|
74
100
|
|
|
75
101
|
async def _send(self, data: typing.Any) -> None:
|
|
76
102
|
"""
|
|
@@ -95,7 +121,9 @@ class SpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
95
121
|
def __iter__(self):
|
|
96
122
|
for message in self._websocket:
|
|
97
123
|
message = json.loads(message) if isinstance(message, str) else message
|
|
98
|
-
yield parse_obj_as(
|
|
124
|
+
yield parse_obj_as(
|
|
125
|
+
SpeechToTextStreamingSocketClientResponse, message
|
|
126
|
+
) # type: ignore
|
|
99
127
|
|
|
100
128
|
def start_listening(self):
|
|
101
129
|
"""
|
|
@@ -110,8 +138,14 @@ class SpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
110
138
|
self._emit(EventType.OPEN, None)
|
|
111
139
|
try:
|
|
112
140
|
for raw_message in self._websocket:
|
|
113
|
-
raw_message =
|
|
114
|
-
|
|
141
|
+
raw_message = (
|
|
142
|
+
json.loads(raw_message)
|
|
143
|
+
if isinstance(raw_message, str)
|
|
144
|
+
else raw_message
|
|
145
|
+
)
|
|
146
|
+
parsed = parse_obj_as(
|
|
147
|
+
SpeechToTextStreamingSocketClientResponse, raw_message
|
|
148
|
+
) # type: ignore
|
|
115
149
|
self._emit(EventType.MESSAGE, parsed)
|
|
116
150
|
except websockets.WebSocketException as exc:
|
|
117
151
|
self._emit(EventType.ERROR, exc)
|
|
@@ -126,18 +160,33 @@ class SpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
126
160
|
:param sample_rate (Optional): Audio sample rate in Hz (default is 16000)
|
|
127
161
|
"""
|
|
128
162
|
return self._send_speech_to_text_streaming_audio_message(
|
|
129
|
-
message=AudioMessage(
|
|
163
|
+
message=AudioMessage(
|
|
164
|
+
audio=AudioData(data=audio, sample_rate=sample_rate, encoding=encoding)
|
|
165
|
+
)
|
|
130
166
|
)
|
|
131
167
|
|
|
168
|
+
def flush(self) -> None:
|
|
169
|
+
"""
|
|
170
|
+
Signal to flush the audio buffer and force finalize partial transcriptions.
|
|
171
|
+
Use this to force processing of any remaining audio that hasn't been
|
|
172
|
+
transcribed yet.
|
|
173
|
+
"""
|
|
174
|
+
message = SttFlushSignal()
|
|
175
|
+
self._send_model(message)
|
|
176
|
+
|
|
132
177
|
def recv(self) -> SpeechToTextStreamingSocketClientResponse:
|
|
133
178
|
"""
|
|
134
179
|
Receive a message from the websocket connection.
|
|
135
180
|
"""
|
|
136
181
|
data = self._websocket.recv()
|
|
137
182
|
data = json.loads(data) if isinstance(data, str) else data
|
|
138
|
-
return parse_obj_as(
|
|
183
|
+
return parse_obj_as(
|
|
184
|
+
SpeechToTextStreamingSocketClientResponse, data
|
|
185
|
+
) # type: ignore
|
|
139
186
|
|
|
140
|
-
def _send_speech_to_text_streaming_audio_message(
|
|
187
|
+
def _send_speech_to_text_streaming_audio_message(
|
|
188
|
+
self, message: AudioMessage
|
|
189
|
+
) -> None:
|
|
141
190
|
"""
|
|
142
191
|
Send a message to the websocket connection.
|
|
143
192
|
The message will be sent as a AudioMessage.
|
|
@@ -2,12 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
+
from .speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
|
|
5
6
|
from .speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
6
7
|
from .speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
7
8
|
from .speech_to_text_streaming_model import SpeechToTextStreamingModel
|
|
8
9
|
from .speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
|
|
9
10
|
|
|
10
11
|
__all__ = [
|
|
12
|
+
"SpeechToTextStreamingFlushSignal",
|
|
11
13
|
"SpeechToTextStreamingHighVadSensitivity",
|
|
12
14
|
"SpeechToTextStreamingLanguageCode",
|
|
13
15
|
"SpeechToTextStreamingModel",
|
|
@@ -3,12 +3,14 @@
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
5
|
from .types import (
|
|
6
|
+
SpeechToTextTranslateStreamingFlushSignal,
|
|
6
7
|
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
7
8
|
SpeechToTextTranslateStreamingModel,
|
|
8
9
|
SpeechToTextTranslateStreamingVadSignals,
|
|
9
10
|
)
|
|
10
11
|
|
|
11
12
|
__all__ = [
|
|
13
|
+
"SpeechToTextTranslateStreamingFlushSignal",
|
|
12
14
|
"SpeechToTextTranslateStreamingHighVadSensitivity",
|
|
13
15
|
"SpeechToTextTranslateStreamingModel",
|
|
14
16
|
"SpeechToTextTranslateStreamingVadSignals",
|
|
@@ -11,6 +11,7 @@ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
|
11
11
|
from ..core.request_options import RequestOptions
|
|
12
12
|
from .raw_client import AsyncRawSpeechToTextTranslateStreamingClient, RawSpeechToTextTranslateStreamingClient
|
|
13
13
|
from .socket_client import AsyncSpeechToTextTranslateStreamingSocketClient, SpeechToTextTranslateStreamingSocketClient
|
|
14
|
+
from .types.speech_to_text_translate_streaming_flush_signal import SpeechToTextTranslateStreamingFlushSignal
|
|
14
15
|
from .types.speech_to_text_translate_streaming_high_vad_sensitivity import (
|
|
15
16
|
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
16
17
|
)
|
|
@@ -45,6 +46,7 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
45
46
|
model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
|
|
46
47
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
47
48
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
49
|
+
flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
|
|
48
50
|
api_subscription_key: typing.Optional[str] = None,
|
|
49
51
|
request_options: typing.Optional[RequestOptions] = None,
|
|
50
52
|
) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -62,6 +64,9 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
62
64
|
vad_signals : typing.Optional[SpeechToTextTranslateStreamingVadSignals]
|
|
63
65
|
Enable VAD signals in response
|
|
64
66
|
|
|
67
|
+
flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
|
|
68
|
+
Signal to flush the audio buffer and finalize transcription and translation
|
|
69
|
+
|
|
65
70
|
api_subscription_key : typing.Optional[str]
|
|
66
71
|
API subscription key for authentication
|
|
67
72
|
|
|
@@ -80,6 +85,8 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
80
85
|
query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
|
|
81
86
|
if vad_signals is not None:
|
|
82
87
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
88
|
+
if flush_signal is not None:
|
|
89
|
+
query_params = query_params.add("flush_signal", flush_signal)
|
|
83
90
|
ws_url = ws_url + f"?{query_params}"
|
|
84
91
|
headers = self._raw_client._client_wrapper.get_headers()
|
|
85
92
|
if api_subscription_key is not None:
|
|
@@ -126,6 +133,7 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
126
133
|
model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
|
|
127
134
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
128
135
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
136
|
+
flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
|
|
129
137
|
api_subscription_key: typing.Optional[str] = None,
|
|
130
138
|
request_options: typing.Optional[RequestOptions] = None,
|
|
131
139
|
) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -143,6 +151,9 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
143
151
|
vad_signals : typing.Optional[SpeechToTextTranslateStreamingVadSignals]
|
|
144
152
|
Enable VAD signals in response
|
|
145
153
|
|
|
154
|
+
flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
|
|
155
|
+
Signal to flush the audio buffer and finalize transcription and translation
|
|
156
|
+
|
|
146
157
|
api_subscription_key : typing.Optional[str]
|
|
147
158
|
API subscription key for authentication
|
|
148
159
|
|
|
@@ -161,6 +172,8 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
161
172
|
query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
|
|
162
173
|
if vad_signals is not None:
|
|
163
174
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
175
|
+
if flush_signal is not None:
|
|
176
|
+
query_params = query_params.add("flush_signal", flush_signal)
|
|
164
177
|
ws_url = ws_url + f"?{query_params}"
|
|
165
178
|
headers = self._raw_client._client_wrapper.get_headers()
|
|
166
179
|
if api_subscription_key is not None:
|
|
@@ -10,6 +10,7 @@ from ..core.api_error import ApiError
|
|
|
10
10
|
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
11
11
|
from ..core.request_options import RequestOptions
|
|
12
12
|
from .socket_client import AsyncSpeechToTextTranslateStreamingSocketClient, SpeechToTextTranslateStreamingSocketClient
|
|
13
|
+
from .types.speech_to_text_translate_streaming_flush_signal import SpeechToTextTranslateStreamingFlushSignal
|
|
13
14
|
from .types.speech_to_text_translate_streaming_high_vad_sensitivity import (
|
|
14
15
|
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
15
16
|
)
|
|
@@ -33,6 +34,7 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
33
34
|
model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
|
|
34
35
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
35
36
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
37
|
+
flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
|
|
36
38
|
api_subscription_key: typing.Optional[str] = None,
|
|
37
39
|
request_options: typing.Optional[RequestOptions] = None,
|
|
38
40
|
) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -50,6 +52,9 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
50
52
|
vad_signals : typing.Optional[SpeechToTextTranslateStreamingVadSignals]
|
|
51
53
|
Enable VAD signals in response
|
|
52
54
|
|
|
55
|
+
flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
|
|
56
|
+
Signal to flush the audio buffer and finalize transcription and translation
|
|
57
|
+
|
|
53
58
|
api_subscription_key : typing.Optional[str]
|
|
54
59
|
API subscription key for authentication
|
|
55
60
|
|
|
@@ -68,6 +73,8 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
68
73
|
query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
|
|
69
74
|
if vad_signals is not None:
|
|
70
75
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
76
|
+
if flush_signal is not None:
|
|
77
|
+
query_params = query_params.add("flush_signal", flush_signal)
|
|
71
78
|
ws_url = ws_url + f"?{query_params}"
|
|
72
79
|
headers = self._client_wrapper.get_headers()
|
|
73
80
|
if api_subscription_key is not None:
|
|
@@ -103,6 +110,7 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
103
110
|
model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
|
|
104
111
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
105
112
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
113
|
+
flush_signal: typing.Optional[SpeechToTextTranslateStreamingFlushSignal] = None,
|
|
106
114
|
api_subscription_key: typing.Optional[str] = None,
|
|
107
115
|
request_options: typing.Optional[RequestOptions] = None,
|
|
108
116
|
) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -120,6 +128,9 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
120
128
|
vad_signals : typing.Optional[SpeechToTextTranslateStreamingVadSignals]
|
|
121
129
|
Enable VAD signals in response
|
|
122
130
|
|
|
131
|
+
flush_signal : typing.Optional[SpeechToTextTranslateStreamingFlushSignal]
|
|
132
|
+
Signal to flush the audio buffer and finalize transcription and translation
|
|
133
|
+
|
|
123
134
|
api_subscription_key : typing.Optional[str]
|
|
124
135
|
API subscription key for authentication
|
|
125
136
|
|
|
@@ -138,6 +149,8 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
138
149
|
query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
|
|
139
150
|
if vad_signals is not None:
|
|
140
151
|
query_params = query_params.add("vad_signals", vad_signals)
|
|
152
|
+
if flush_signal is not None:
|
|
153
|
+
query_params = query_params.add("flush_signal", flush_signal)
|
|
141
154
|
ws_url = ws_url + f"?{query_params}"
|
|
142
155
|
headers = self._client_wrapper.get_headers()
|
|
143
156
|
if api_subscription_key is not None:
|
|
@@ -10,9 +10,14 @@ from ..core.pydantic_utilities import parse_obj_as
|
|
|
10
10
|
from ..types.audio_data import AudioData
|
|
11
11
|
from ..types.audio_message import AudioMessage
|
|
12
12
|
from ..types.config_message import ConfigMessage
|
|
13
|
-
from ..types.speech_to_text_translate_streaming_response import
|
|
13
|
+
from ..types.speech_to_text_translate_streaming_response import (
|
|
14
|
+
SpeechToTextTranslateStreamingResponse,
|
|
15
|
+
)
|
|
16
|
+
from ..types.stt_flush_signal import SttFlushSignal
|
|
14
17
|
|
|
15
|
-
SpeechToTextTranslateStreamingSocketClientResponse = typing.Union[
|
|
18
|
+
SpeechToTextTranslateStreamingSocketClientResponse = typing.Union[
|
|
19
|
+
SpeechToTextTranslateStreamingResponse
|
|
20
|
+
]
|
|
16
21
|
|
|
17
22
|
|
|
18
23
|
class AsyncSpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
@@ -38,7 +43,11 @@ class AsyncSpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
38
43
|
self._emit(EventType.OPEN, None)
|
|
39
44
|
try:
|
|
40
45
|
async for raw_message in self._websocket:
|
|
41
|
-
raw_message =
|
|
46
|
+
raw_message = (
|
|
47
|
+
json.loads(raw_message)
|
|
48
|
+
if isinstance(raw_message, str)
|
|
49
|
+
else raw_message
|
|
50
|
+
)
|
|
42
51
|
parsed = parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, raw_message) # type: ignore
|
|
43
52
|
self._emit(EventType.MESSAGE, parsed)
|
|
44
53
|
except websockets.WebSocketException as exc:
|
|
@@ -46,7 +55,9 @@ class AsyncSpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
46
55
|
finally:
|
|
47
56
|
self._emit(EventType.CLOSE, None)
|
|
48
57
|
|
|
49
|
-
async def translate(
|
|
58
|
+
async def translate(
|
|
59
|
+
self, audio: str, encoding: str = "audio/wav", sample_rate: int = 16000
|
|
60
|
+
):
|
|
50
61
|
"""
|
|
51
62
|
Sends audio translation request to the server.
|
|
52
63
|
:param audio: Base64 encoded audio data
|
|
@@ -54,7 +65,9 @@ class AsyncSpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
54
65
|
:param sample_rate (Optional): Audio sample rate in Hz (default is 16000)
|
|
55
66
|
"""
|
|
56
67
|
return await self._send_speech_to_text_translate_streaming_audio_message(
|
|
57
|
-
message=AudioMessage(
|
|
68
|
+
message=AudioMessage(
|
|
69
|
+
audio=AudioData(data=audio, encoding=encoding, sample_rate=sample_rate)
|
|
70
|
+
)
|
|
58
71
|
)
|
|
59
72
|
|
|
60
73
|
async def set_prompt(self, prompt: str) -> None:
|
|
@@ -66,15 +79,28 @@ class AsyncSpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
66
79
|
message = ConfigMessage(prompt=prompt)
|
|
67
80
|
await self._send_config_message(message)
|
|
68
81
|
|
|
82
|
+
async def flush(self) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Signal to flush the audio buffer and force finalize partial
|
|
85
|
+
transcriptions and translations. Use this to force processing of any
|
|
86
|
+
remaining audio that hasn't been transcribed and translated yet.
|
|
87
|
+
"""
|
|
88
|
+
message = SttFlushSignal()
|
|
89
|
+
await self._send_model(message)
|
|
90
|
+
|
|
69
91
|
async def recv(self) -> SpeechToTextTranslateStreamingSocketClientResponse:
|
|
70
92
|
"""
|
|
71
93
|
Receive a message from the websocket connection.
|
|
72
94
|
"""
|
|
73
95
|
data = await self._websocket.recv()
|
|
74
96
|
data = json.loads(data) if isinstance(data, str) else data
|
|
75
|
-
return parse_obj_as(
|
|
97
|
+
return parse_obj_as(
|
|
98
|
+
SpeechToTextTranslateStreamingSocketClientResponse, data
|
|
99
|
+
) # type: ignore
|
|
76
100
|
|
|
77
|
-
async def _send_speech_to_text_translate_streaming_audio_message(
|
|
101
|
+
async def _send_speech_to_text_translate_streaming_audio_message(
|
|
102
|
+
self, message: AudioMessage
|
|
103
|
+
) -> None:
|
|
78
104
|
"""
|
|
79
105
|
Send a message to the websocket connection.
|
|
80
106
|
The message will be sent as a AudioMessage.
|
|
@@ -126,7 +152,11 @@ class SpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
126
152
|
self._emit(EventType.OPEN, None)
|
|
127
153
|
try:
|
|
128
154
|
for raw_message in self._websocket:
|
|
129
|
-
raw_message =
|
|
155
|
+
raw_message = (
|
|
156
|
+
json.loads(raw_message)
|
|
157
|
+
if isinstance(raw_message, str)
|
|
158
|
+
else raw_message
|
|
159
|
+
)
|
|
130
160
|
parsed = parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, raw_message) # type: ignore
|
|
131
161
|
self._emit(EventType.MESSAGE, parsed)
|
|
132
162
|
except websockets.WebSocketException as exc:
|
|
@@ -134,7 +164,9 @@ class SpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
134
164
|
finally:
|
|
135
165
|
self._emit(EventType.CLOSE, None)
|
|
136
166
|
|
|
137
|
-
def translate(
|
|
167
|
+
def translate(
|
|
168
|
+
self, audio: str, encoding: str = "audio/wav", sample_rate: int = 16000
|
|
169
|
+
):
|
|
138
170
|
"""
|
|
139
171
|
Sends audio translation request to the server.
|
|
140
172
|
:param audio: Base64 encoded audio data
|
|
@@ -142,7 +174,9 @@ class SpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
142
174
|
:param sample_rate: Audio sample rate in Hz (default is 16000)
|
|
143
175
|
"""
|
|
144
176
|
return self._send_speech_to_text_translate_streaming_audio_message(
|
|
145
|
-
message=AudioMessage(
|
|
177
|
+
message=AudioMessage(
|
|
178
|
+
audio=AudioData(data=audio, encoding=encoding, sample_rate=sample_rate)
|
|
179
|
+
)
|
|
146
180
|
)
|
|
147
181
|
|
|
148
182
|
def set_prompt(self, prompt: str) -> None:
|
|
@@ -154,6 +188,15 @@ class SpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
154
188
|
message = ConfigMessage(prompt=prompt)
|
|
155
189
|
self._send_config_message(message)
|
|
156
190
|
|
|
191
|
+
def flush(self) -> None:
|
|
192
|
+
"""
|
|
193
|
+
Signal to flush the audio buffer and force finalize partial transcriptions and translations.
|
|
194
|
+
Use this to force processing of any remaining audio that hasn't been
|
|
195
|
+
transcribed and translated yet.
|
|
196
|
+
"""
|
|
197
|
+
message = SttFlushSignal()
|
|
198
|
+
self._send_model(message)
|
|
199
|
+
|
|
157
200
|
def recv(self) -> SpeechToTextTranslateStreamingSocketClientResponse:
|
|
158
201
|
"""
|
|
159
202
|
Receive a message from the websocket connection.
|
|
@@ -169,7 +212,9 @@ class SpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
169
212
|
"""
|
|
170
213
|
self._send_model(message)
|
|
171
214
|
|
|
172
|
-
def _send_speech_to_text_translate_streaming_audio_message(
|
|
215
|
+
def _send_speech_to_text_translate_streaming_audio_message(
|
|
216
|
+
self, message: AudioMessage
|
|
217
|
+
) -> None:
|
|
173
218
|
"""
|
|
174
219
|
Send a message to the websocket connection.
|
|
175
220
|
The message will be sent as a AudioMessage.
|
|
@@ -2,11 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
+
from .speech_to_text_translate_streaming_flush_signal import SpeechToTextTranslateStreamingFlushSignal
|
|
5
6
|
from .speech_to_text_translate_streaming_high_vad_sensitivity import SpeechToTextTranslateStreamingHighVadSensitivity
|
|
6
7
|
from .speech_to_text_translate_streaming_model import SpeechToTextTranslateStreamingModel
|
|
7
8
|
from .speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
|
|
8
9
|
|
|
9
10
|
__all__ = [
|
|
11
|
+
"SpeechToTextTranslateStreamingFlushSignal",
|
|
10
12
|
"SpeechToTextTranslateStreamingHighVadSensitivity",
|
|
11
13
|
"SpeechToTextTranslateStreamingModel",
|
|
12
14
|
"SpeechToTextTranslateStreamingVadSignals",
|
sarvamai/types/__init__.py
CHANGED
|
@@ -75,6 +75,7 @@ from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTr
|
|
|
75
75
|
from .spoken_form_numerals_format import SpokenFormNumeralsFormat
|
|
76
76
|
from .stop_configuration import StopConfiguration
|
|
77
77
|
from .storage_container_type import StorageContainerType
|
|
78
|
+
from .stt_flush_signal import SttFlushSignal
|
|
78
79
|
from .task_detail_v_1 import TaskDetailV1
|
|
79
80
|
from .task_file_details import TaskFileDetails
|
|
80
81
|
from .task_state import TaskState
|
|
@@ -168,6 +169,7 @@ __all__ = [
|
|
|
168
169
|
"SpokenFormNumeralsFormat",
|
|
169
170
|
"StopConfiguration",
|
|
170
171
|
"StorageContainerType",
|
|
172
|
+
"SttFlushSignal",
|
|
171
173
|
"TaskDetailV1",
|
|
172
174
|
"TaskFileDetails",
|
|
173
175
|
"TaskState",
|
sarvamai/types/config_message.py
CHANGED
|
@@ -14,7 +14,7 @@ class ConfigMessage(UniversalBaseModel):
|
|
|
14
14
|
|
|
15
15
|
prompt: typing.Optional[str] = pydantic.Field(default=None)
|
|
16
16
|
"""
|
|
17
|
-
Prompt for ASR model to improve transcription accuracy
|
|
17
|
+
Prompt for ASR model to improve transcription accuracy.
|
|
18
18
|
"""
|
|
19
19
|
|
|
20
20
|
if IS_PYDANTIC_V2:
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import pydantic
|
|
6
|
+
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SttFlushSignal(UniversalBaseModel):
|
|
10
|
+
"""
|
|
11
|
+
Signal to flush the audio buffer and force finalize partial transcriptions/translations
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
type: typing.Literal["flush"] = pydantic.Field(default="flush")
|
|
15
|
+
"""
|
|
16
|
+
Type identifier for flush signal
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
if IS_PYDANTIC_V2:
|
|
20
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
|
21
|
+
else:
|
|
22
|
+
|
|
23
|
+
class Config:
|
|
24
|
+
frozen = True
|
|
25
|
+
smart_union = True
|
|
26
|
+
extra = pydantic.Extra.allow
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
sarvamai/__init__.py,sha256=
|
|
1
|
+
sarvamai/__init__.py,sha256=R2X_oWI35aSTjAskLMeKysc5rt17-ldLPc8vhkfGku4,10964
|
|
2
2
|
sarvamai/chat/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
3
3
|
sarvamai/chat/client.py,sha256=xOSj83Gr6Q7eY2qUeATiuXYQqBqWqSCQlIEopK5fKus,11022
|
|
4
4
|
sarvamai/chat/raw_client.py,sha256=A2kRuZcVWlJhyYCD7YKgqNkZEp3cYa1731KhRkhirU0,17885
|
|
5
5
|
sarvamai/client.py,sha256=J30X_os1lPf8Wml0KDFEf6p8VGHhgF_lf3nw1T2D3qo,8207
|
|
6
6
|
sarvamai/core/__init__.py,sha256=YE2CtXeASe1RAbaI39twKWYKCuT4tW5is9HWHhJjR_g,1653
|
|
7
7
|
sarvamai/core/api_error.py,sha256=44vPoTyWN59gonCIZMdzw7M1uspygiLnr3GNFOoVL2Q,614
|
|
8
|
-
sarvamai/core/client_wrapper.py,sha256=
|
|
8
|
+
sarvamai/core/client_wrapper.py,sha256=ucCg-OwVhAiBDQkscKXALdYEmd9pgEBrfGW0Slq_Ae0,2566
|
|
9
9
|
sarvamai/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
|
|
10
10
|
sarvamai/core/events.py,sha256=HvKBdSoYcFetk7cgNXb7FxuY-FtY8NtUhZIN7mGVx8U,1159
|
|
11
11
|
sarvamai/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
|
|
@@ -28,7 +28,7 @@ sarvamai/errors/too_many_requests_error.py,sha256=Dl-_pfpboXJh-OtSbRaPQOB-UXvpVO
|
|
|
28
28
|
sarvamai/errors/unprocessable_entity_error.py,sha256=JqxtzIhvjkpQDqbT9Q-go1n-gyv9PsYqq0ng_ZYyBMo,347
|
|
29
29
|
sarvamai/play.py,sha256=4fh86zy8g8IPU2O8yPBY7QxXQOivv_nWQvPQsOa1arw,2183
|
|
30
30
|
sarvamai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
-
sarvamai/requests/__init__.py,sha256=
|
|
31
|
+
sarvamai/requests/__init__.py,sha256=nAXyZKbjwhLTxvfb1kaxOxZpyFvWwQu30Yr5HqgOZXw,5702
|
|
32
32
|
sarvamai/requests/audio_data.py,sha256=QI3SK5aiAg2yJ-m3l9CxOkONnH3CCKMFCl9kAdMs19o,410
|
|
33
33
|
sarvamai/requests/audio_message.py,sha256=ZBeogjGE6YFXXM-0g8zq9SoizDk21reR0YXSB-0fMjg,214
|
|
34
34
|
sarvamai/requests/audio_output.py,sha256=BnoX345rwoWgaMaj24u_19-SjmPV0xt7vlFEEDKRw20,280
|
|
@@ -43,7 +43,7 @@ sarvamai/requests/chat_completion_request_user_message.py,sha256=IqYy7K-qF9oQ8AU
|
|
|
43
43
|
sarvamai/requests/chat_completion_response_message.py,sha256=JFazj4zK-nj_wjdvNLDkcfIFxIlqw49Xf_P8o7d70aY,336
|
|
44
44
|
sarvamai/requests/choice.py,sha256=uulX4MZUoThEMcD3a80o_3V5YpnpqN8DfPaNZWVz-1o,867
|
|
45
45
|
sarvamai/requests/completion_usage.py,sha256=LbZV-RxcxKdCAYqhCiaRtSFF3VwMJq71A989Z1rm-I8,428
|
|
46
|
-
sarvamai/requests/config_message.py,sha256=
|
|
46
|
+
sarvamai/requests/config_message.py,sha256=SeWZB5OEVzgL3aAtLehpGZzLdSEkxr-6RI3372j_Blg,384
|
|
47
47
|
sarvamai/requests/configure_connection.py,sha256=a-foQtLxArL4CulvKEdeebbRqmS1GRmko3MZdnHVPEk,716
|
|
48
48
|
sarvamai/requests/configure_connection_data.py,sha256=lRk_4rYPQLLlwS2HXjQ9Abxdf98_DuOOja-VkrIR44Q,3016
|
|
49
49
|
sarvamai/requests/create_chat_completion_response.py,sha256=TqS9u5_WVWMok_NreT4TeOsLJQeybPkbJm45Q0Zxw30,857
|
|
@@ -76,6 +76,7 @@ sarvamai/requests/speech_to_text_translate_response_data.py,sha256=OmjunP9R2xert
|
|
|
76
76
|
sarvamai/requests/speech_to_text_translate_streaming_response.py,sha256=KTjYZ0_oLapuM5Iiq7UwejMsrL1TGgFAW4k5l17TkZs,385
|
|
77
77
|
sarvamai/requests/speech_to_text_translate_transcription_data.py,sha256=oAmW5ihTd301IJYN2u2KrZxB0j3EMacFBfvIhtOSjFI,595
|
|
78
78
|
sarvamai/requests/stop_configuration.py,sha256=Xmp8zyUpnN65pH5A7NqefckB8wk53_BBzOUrgRm2gXs,146
|
|
79
|
+
sarvamai/requests/stt_flush_signal.py,sha256=Gb-SoPPAyVKFVPZKxebLgV4bAv21NjVgvfCl5cqcxrY,360
|
|
79
80
|
sarvamai/requests/task_detail_v_1.py,sha256=2rehl7dSDSgzaw13b9bODamhiN2uB-IK4cOksq8Vmqc,582
|
|
80
81
|
sarvamai/requests/task_file_details.py,sha256=yf6NVXymizG9wTsCGl-xolsxDA_nDOzM3qeabiqMs60,186
|
|
81
82
|
sarvamai/requests/text_to_speech_response.py,sha256=-FNMERV6zrwIH27htwBy3QiPY8WhbTmlmQ06RegYQaw,391
|
|
@@ -90,11 +91,12 @@ sarvamai/speech_to_text_job/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23
|
|
|
90
91
|
sarvamai/speech_to_text_job/client.py,sha256=WSGBJxYcNxl77Zd1X6VVWjg4zshqecXf6WCyhfLXVlI,18007
|
|
91
92
|
sarvamai/speech_to_text_job/job.py,sha256=K8HOmwrYd6l82-MZfWDBmNkZeeERyg9YOihnFfvl-Js,15021
|
|
92
93
|
sarvamai/speech_to_text_job/raw_client.py,sha256=OZTPzMhAn-ckE_xKzfZ9QLsEX5EZVOJS0Pf-PBa19jM,48200
|
|
93
|
-
sarvamai/speech_to_text_streaming/__init__.py,sha256=
|
|
94
|
-
sarvamai/speech_to_text_streaming/client.py,sha256=
|
|
95
|
-
sarvamai/speech_to_text_streaming/raw_client.py,sha256=
|
|
96
|
-
sarvamai/speech_to_text_streaming/socket_client.py,sha256=
|
|
97
|
-
sarvamai/speech_to_text_streaming/types/__init__.py,sha256=
|
|
94
|
+
sarvamai/speech_to_text_streaming/__init__.py,sha256=aVcI8kSlUpwLrPtA-4oUkuJ_D9Onha_qU49lA2vtEqU,515
|
|
95
|
+
sarvamai/speech_to_text_streaming/client.py,sha256=15k-UH2ROia2EymXia0BxVKbzBz7k1Rh9k7L9hgO5Fg,9206
|
|
96
|
+
sarvamai/speech_to_text_streaming/raw_client.py,sha256=69hAAD9mcpplo3gR7B5JXbo6Yys8bPT3DfSk1FjyI8Q,8387
|
|
97
|
+
sarvamai/speech_to_text_streaming/socket_client.py,sha256=P6qXRN0s3UFAp6CP5lkqrW2KPK9me70ZVfWquxLB4wI,7538
|
|
98
|
+
sarvamai/speech_to_text_streaming/types/__init__.py,sha256=A99VJwG5fkik5SFTMDMKPZCjvKSaU2aQ02EbA5rrDlM,723
|
|
99
|
+
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_flush_signal.py,sha256=dDJOBlzAjhuiSVqW2RHHY1f6xy0DU_Yoo9UV8-7MjnA,173
|
|
98
100
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_high_vad_sensitivity.py,sha256=OwPwffa8TkLPGMnOTn5S7d-HmV8QmN3B7fHz8I1-VT8,180
|
|
99
101
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_language_code.py,sha256=LxgEifmgWTCFZn9U-f-TWKxRPng3a2J26Zt526QrA0Y,267
|
|
100
102
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py,sha256=b6F4ymgz4got6KVDqrweYvkET8itze63wUwWyjqDlO4,180
|
|
@@ -103,11 +105,12 @@ sarvamai/speech_to_text_translate_job/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3
|
|
|
103
105
|
sarvamai/speech_to_text_translate_job/client.py,sha256=xu8kYtCESDB7LzL8YKBUq5qhTPMIl3_H3XD2L_7y4UU,18969
|
|
104
106
|
sarvamai/speech_to_text_translate_job/job.py,sha256=DU4k3eB28V8N16M_QEchakVng4IOul6_Qrdn3FumgHA,15208
|
|
105
107
|
sarvamai/speech_to_text_translate_job/raw_client.py,sha256=dAitbu2B9afPK6iT9zNjUJnE5BIr5-lrAlwrfwFxdkU,49507
|
|
106
|
-
sarvamai/speech_to_text_translate_streaming/__init__.py,sha256=
|
|
107
|
-
sarvamai/speech_to_text_translate_streaming/client.py,sha256=
|
|
108
|
-
sarvamai/speech_to_text_translate_streaming/raw_client.py,sha256=
|
|
109
|
-
sarvamai/speech_to_text_translate_streaming/socket_client.py,sha256=
|
|
110
|
-
sarvamai/speech_to_text_translate_streaming/types/__init__.py,sha256=
|
|
108
|
+
sarvamai/speech_to_text_translate_streaming/__init__.py,sha256=sDeWLagKUTg34tL7vpNNwr2gX4AznEn6NAy9jY-Znf4,507
|
|
109
|
+
sarvamai/speech_to_text_translate_streaming/client.py,sha256=41FAJekotqq08tDdxWqhT966B5Ofx9MuZgHAJ9xmwRQ,9137
|
|
110
|
+
sarvamai/speech_to_text_translate_streaming/raw_client.py,sha256=P-p7pNJNLrgra4HoUx1vG34RcLADYfQEpoVMYe-aMa0,8246
|
|
111
|
+
sarvamai/speech_to_text_translate_streaming/socket_client.py,sha256=ipEPSj5eHAyDpuEXfaP7JJL1rXJXGEo-IB888ReAFKs,8901
|
|
112
|
+
sarvamai/speech_to_text_translate_streaming/types/__init__.py,sha256=sK4Zv64ZcV33opFFaVd1kgnQnzjfXrSDhUwiEM85sZc,708
|
|
113
|
+
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_flush_signal.py,sha256=jkjvCGJ1pFKi3AOTkwMW-lo18WGgrgAhMpoe5P0AMzA,182
|
|
111
114
|
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_high_vad_sensitivity.py,sha256=r6MvTlkM0VEpb4dpnMHtINOZ-gYc22o0Fx_Xce2rjvo,189
|
|
112
115
|
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_model.py,sha256=6B8VxkpJG_pNprCSctseDtJb_ULVdKrPaeENkQ6Jvjg,187
|
|
113
116
|
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_vad_signals.py,sha256=EV3xd9qyKMnMvA9rO-qFDDIac4b84roBu7n-maaPxG8,181
|
|
@@ -121,7 +124,7 @@ sarvamai/text_to_speech_streaming/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7
|
|
|
121
124
|
sarvamai/text_to_speech_streaming/client.py,sha256=tcYTUmWGseGQkLOsJdO4S3eZrFstjaLCPi5OcKflfus,6306
|
|
122
125
|
sarvamai/text_to_speech_streaming/raw_client.py,sha256=VdL5HYpUotIm5HaOqTwRkyZqoV0BuzwCtAfcB0g1y10,5487
|
|
123
126
|
sarvamai/text_to_speech_streaming/socket_client.py,sha256=NEcijnvjuNcWfzqpBi-xWsXVkL0NPq6EGAkEjnaq9hw,13909
|
|
124
|
-
sarvamai/types/__init__.py,sha256=
|
|
127
|
+
sarvamai/types/__init__.py,sha256=Zydymb6llnD7V1vFWM42RrXvyxvtE84e5ayeXL5iV30,7929
|
|
125
128
|
sarvamai/types/audio_data.py,sha256=rgOukLkLNJ_HBBVE2g5dfEL2CWjRoGiMvCtpq0qTB1Y,829
|
|
126
129
|
sarvamai/types/audio_message.py,sha256=sB4EgkWkWJzipYXobkmM9AYZTTZtCpg_ySKssUeznUE,560
|
|
127
130
|
sarvamai/types/audio_output.py,sha256=Eq-YUZa1mSDwt7bax2c4Vv2gBlyM_JBJWzHhTAhFSko,621
|
|
@@ -136,7 +139,7 @@ sarvamai/types/chat_completion_request_user_message.py,sha256=J3WhlrfOfCCe7ugmJI
|
|
|
136
139
|
sarvamai/types/chat_completion_response_message.py,sha256=wz935eBnCkSIl0I0qMxBuH4vAUCso1aHDGReMW1VHGE,744
|
|
137
140
|
sarvamai/types/choice.py,sha256=uXBCsjWP9VK3XWQWZUeI4EnU10w0G9nAfKn2tJZvxko,1244
|
|
138
141
|
sarvamai/types/completion_usage.py,sha256=xYQGlQUbKqsksuV73H-1ajjfT5M7w47eLfdWXSlrI5M,843
|
|
139
|
-
sarvamai/types/config_message.py,sha256=
|
|
142
|
+
sarvamai/types/config_message.py,sha256=xLD2wZcXejYrmREMd-cn38da4hKfsNPKRtyAGCW0Zcg,779
|
|
140
143
|
sarvamai/types/configure_connection.py,sha256=SnSNk02gQqP8e4VB4y88jjeFQ4ClpImjGLn2ANI8cZ4,1058
|
|
141
144
|
sarvamai/types/configure_connection_data.py,sha256=uXC7fhNJWCpaKc2Vrz2DNpUxx1gN3PwAoDL-H8L401A,3537
|
|
142
145
|
sarvamai/types/configure_connection_data_output_audio_bitrate.py,sha256=h00YvKLxsZC8L3__rH4XH53nN_GY40UElW1EjysCwUs,208
|
|
@@ -190,6 +193,7 @@ sarvamai/types/speech_to_text_translate_transcription_data.py,sha256=-cZZm21um6e
|
|
|
190
193
|
sarvamai/types/spoken_form_numerals_format.py,sha256=soBly93wMkazIcp2GDM0Mf1MjY140Pe24hBlwNoWge0,169
|
|
191
194
|
sarvamai/types/stop_configuration.py,sha256=yA_q4s4BIrbl3FotZpg4ZcyL10C7gVI0s2dqvH32BNw,136
|
|
192
195
|
sarvamai/types/storage_container_type.py,sha256=DZXDiDj74lMmUq6jaZfIMW1zMXgoVdY6rs_FcyB9OGk,184
|
|
196
|
+
sarvamai/types/stt_flush_signal.py,sha256=7rw7DHOw1uAJQQk_RuqhAzg6x5Z0TezgixnndK6BbWI,737
|
|
193
197
|
sarvamai/types/task_detail_v_1.py,sha256=Z-RotwB9BzegZqur--4HkCafyTcDvr0CosZ52St0Eeo,857
|
|
194
198
|
sarvamai/types/task_file_details.py,sha256=oJV7zaUVrbqqw-If-2_V1aLk28qW0ZbeIDtIpn5-xUM,544
|
|
195
199
|
sarvamai/types/task_state.py,sha256=fSrmD00Goi0J6s9hzqcFqz3Fkh37diBYpxnz4FkwHdU,182
|
|
@@ -211,6 +215,6 @@ sarvamai/types/transliterate_mode.py,sha256=1jSEMlGcoLkWuk12TgoOpSgwifa4rThGKZ1h
|
|
|
211
215
|
sarvamai/types/transliterate_source_language.py,sha256=bSY9wJszF0sg-Cgg6F-YcWC8ly1mIlj9rqa15-jBtx8,283
|
|
212
216
|
sarvamai/types/transliteration_response.py,sha256=yt-lzTbDeJ_ZL4I8kQa6oESxA9ebeJJY7LfFHpdEsmM,815
|
|
213
217
|
sarvamai/version.py,sha256=Qkp3Ee9YH-O9RTix90e0i7iNrFAGN-QDt2AFwGA4n8k,75
|
|
214
|
-
sarvamai-0.1.
|
|
215
|
-
sarvamai-0.1.
|
|
216
|
-
sarvamai-0.1.
|
|
218
|
+
sarvamai-0.1.19.dist-info/METADATA,sha256=PgY_RDYe0C98s8lgxI_1PyKV06kCWYcebpgjOrgJe5U,26751
|
|
219
|
+
sarvamai-0.1.19.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
|
220
|
+
sarvamai-0.1.19.dist-info/RECORD,,
|
|
File without changes
|