sarvamai 0.1.5a12__py3-none-any.whl → 0.1.5a14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +15 -4
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/requests/audio_data.py +3 -2
- sarvamai/speech_to_text_streaming/__init__.py +12 -2
- sarvamai/speech_to_text_streaming/client.py +10 -8
- sarvamai/speech_to_text_streaming/raw_client.py +10 -8
- sarvamai/speech_to_text_streaming/socket_client.py +35 -5
- sarvamai/speech_to_text_streaming/types/__init__.py +8 -1
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_high_vad_sensitivity.py +5 -0
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_vad_signals.py +5 -0
- sarvamai/speech_to_text_translate_streaming/__init__.py +10 -2
- sarvamai/speech_to_text_translate_streaming/client.py +12 -8
- sarvamai/speech_to_text_translate_streaming/raw_client.py +12 -8
- sarvamai/speech_to_text_translate_streaming/socket_client.py +63 -16
- sarvamai/speech_to_text_translate_streaming/types/__init__.py +7 -1
- sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_high_vad_sensitivity.py +5 -0
- sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_vad_signals.py +5 -0
- sarvamai/types/__init__.py +0 -2
- sarvamai/types/audio_data.py +1 -2
- {sarvamai-0.1.5a12.dist-info → sarvamai-0.1.5a14.dist-info}/METADATA +1 -1
- {sarvamai-0.1.5a12.dist-info → sarvamai-0.1.5a14.dist-info}/RECORD +22 -19
- sarvamai/types/audio_data_encoding.py +0 -5
- {sarvamai-0.1.5a12.dist-info → sarvamai-0.1.5a14.dist-info}/WHEEL +0 -0
sarvamai/__init__.py
CHANGED
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
|
|
5
5
|
from .types import (
|
|
6
6
|
AudioData,
|
|
7
|
-
AudioDataEncoding,
|
|
8
7
|
AudioMessage,
|
|
9
8
|
ChatCompletionRequestAssistantMessage,
|
|
10
9
|
ChatCompletionRequestMessage,
|
|
@@ -113,14 +112,22 @@ from .requests import (
|
|
|
113
112
|
TranslationResponseParams,
|
|
114
113
|
TransliterationResponseParams,
|
|
115
114
|
)
|
|
116
|
-
from .speech_to_text_streaming import
|
|
117
|
-
|
|
115
|
+
from .speech_to_text_streaming import (
|
|
116
|
+
SpeechToTextStreamingHighVadSensitivity,
|
|
117
|
+
SpeechToTextStreamingLanguageCode,
|
|
118
|
+
SpeechToTextStreamingModel,
|
|
119
|
+
SpeechToTextStreamingVadSignals,
|
|
120
|
+
)
|
|
121
|
+
from .speech_to_text_translate_streaming import (
|
|
122
|
+
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
123
|
+
SpeechToTextTranslateStreamingModel,
|
|
124
|
+
SpeechToTextTranslateStreamingVadSignals,
|
|
125
|
+
)
|
|
118
126
|
from .version import __version__
|
|
119
127
|
|
|
120
128
|
__all__ = [
|
|
121
129
|
"AsyncSarvamAI",
|
|
122
130
|
"AudioData",
|
|
123
|
-
"AudioDataEncoding",
|
|
124
131
|
"AudioDataParams",
|
|
125
132
|
"AudioMessage",
|
|
126
133
|
"AudioMessageParams",
|
|
@@ -183,10 +190,12 @@ __all__ = [
|
|
|
183
190
|
"SpeechToTextResponseData",
|
|
184
191
|
"SpeechToTextResponseDataParams",
|
|
185
192
|
"SpeechToTextResponseParams",
|
|
193
|
+
"SpeechToTextStreamingHighVadSensitivity",
|
|
186
194
|
"SpeechToTextStreamingLanguageCode",
|
|
187
195
|
"SpeechToTextStreamingModel",
|
|
188
196
|
"SpeechToTextStreamingResponse",
|
|
189
197
|
"SpeechToTextStreamingResponseParams",
|
|
198
|
+
"SpeechToTextStreamingVadSignals",
|
|
190
199
|
"SpeechToTextTranscriptionData",
|
|
191
200
|
"SpeechToTextTranscriptionDataParams",
|
|
192
201
|
"SpeechToTextTranslateLanguage",
|
|
@@ -195,9 +204,11 @@ __all__ = [
|
|
|
195
204
|
"SpeechToTextTranslateResponseData",
|
|
196
205
|
"SpeechToTextTranslateResponseDataParams",
|
|
197
206
|
"SpeechToTextTranslateResponseParams",
|
|
207
|
+
"SpeechToTextTranslateStreamingHighVadSensitivity",
|
|
198
208
|
"SpeechToTextTranslateStreamingModel",
|
|
199
209
|
"SpeechToTextTranslateStreamingResponse",
|
|
200
210
|
"SpeechToTextTranslateStreamingResponseParams",
|
|
211
|
+
"SpeechToTextTranslateStreamingVadSignals",
|
|
201
212
|
"SpeechToTextTranslateTranscriptionData",
|
|
202
213
|
"SpeechToTextTranslateTranscriptionDataParams",
|
|
203
214
|
"SpokenFormNumeralsFormat",
|
sarvamai/core/client_wrapper.py
CHANGED
|
@@ -17,10 +17,10 @@ class BaseClientWrapper:
|
|
|
17
17
|
|
|
18
18
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
19
19
|
headers: typing.Dict[str, str] = {
|
|
20
|
-
"User-Agent": "sarvamai/0.1.
|
|
20
|
+
"User-Agent": "sarvamai/0.1.5a14",
|
|
21
21
|
"X-Fern-Language": "Python",
|
|
22
22
|
"X-Fern-SDK-Name": "sarvamai",
|
|
23
|
-
"X-Fern-SDK-Version": "0.1.
|
|
23
|
+
"X-Fern-SDK-Version": "0.1.5a14",
|
|
24
24
|
}
|
|
25
25
|
headers["api-subscription-key"] = self.api_subscription_key
|
|
26
26
|
return headers
|
sarvamai/requests/audio_data.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
|
2
2
|
|
|
3
|
+
import typing
|
|
4
|
+
|
|
3
5
|
import typing_extensions
|
|
4
|
-
from ..types.audio_data_encoding import AudioDataEncoding
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class AudioDataParams(typing_extensions.TypedDict):
|
|
@@ -15,7 +16,7 @@ class AudioDataParams(typing_extensions.TypedDict):
|
|
|
15
16
|
Audio sample rate in Hz (16kHz preferred, 8kHz least preferred)
|
|
16
17
|
"""
|
|
17
18
|
|
|
18
|
-
encoding:
|
|
19
|
+
encoding: typing.Literal["audio/wav"]
|
|
19
20
|
"""
|
|
20
21
|
Audio encoding format
|
|
21
22
|
"""
|
|
@@ -2,6 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
-
from .types import
|
|
5
|
+
from .types import (
|
|
6
|
+
SpeechToTextStreamingHighVadSensitivity,
|
|
7
|
+
SpeechToTextStreamingLanguageCode,
|
|
8
|
+
SpeechToTextStreamingModel,
|
|
9
|
+
SpeechToTextStreamingVadSignals,
|
|
10
|
+
)
|
|
6
11
|
|
|
7
|
-
__all__ = [
|
|
12
|
+
__all__ = [
|
|
13
|
+
"SpeechToTextStreamingHighVadSensitivity",
|
|
14
|
+
"SpeechToTextStreamingLanguageCode",
|
|
15
|
+
"SpeechToTextStreamingModel",
|
|
16
|
+
"SpeechToTextStreamingVadSignals",
|
|
17
|
+
]
|
|
@@ -11,8 +11,10 @@ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
|
11
11
|
from ..core.request_options import RequestOptions
|
|
12
12
|
from .raw_client import AsyncRawSpeechToTextStreamingClient, RawSpeechToTextStreamingClient
|
|
13
13
|
from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
|
|
14
|
+
from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
14
15
|
from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
15
16
|
from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
|
|
17
|
+
from .types.speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
class SpeechToTextStreamingClient:
|
|
@@ -36,8 +38,8 @@ class SpeechToTextStreamingClient:
|
|
|
36
38
|
*,
|
|
37
39
|
language_code: SpeechToTextStreamingLanguageCode,
|
|
38
40
|
model: typing.Optional[SpeechToTextStreamingModel] = None,
|
|
39
|
-
high_vad_sensitivity: typing.Optional[
|
|
40
|
-
vad_signals: typing.Optional[
|
|
41
|
+
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
42
|
+
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
41
43
|
api_subscription_key: typing.Optional[str] = None,
|
|
42
44
|
request_options: typing.Optional[RequestOptions] = None,
|
|
43
45
|
) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
|
|
@@ -52,10 +54,10 @@ class SpeechToTextStreamingClient:
|
|
|
52
54
|
model : typing.Optional[SpeechToTextStreamingModel]
|
|
53
55
|
Speech to text model to use
|
|
54
56
|
|
|
55
|
-
high_vad_sensitivity : typing.Optional[
|
|
57
|
+
high_vad_sensitivity : typing.Optional[SpeechToTextStreamingHighVadSensitivity]
|
|
56
58
|
Enable high VAD (Voice Activity Detection) sensitivity
|
|
57
59
|
|
|
58
|
-
vad_signals : typing.Optional[
|
|
60
|
+
vad_signals : typing.Optional[SpeechToTextStreamingVadSignals]
|
|
59
61
|
Enable VAD signals in response
|
|
60
62
|
|
|
61
63
|
api_subscription_key : typing.Optional[str]
|
|
@@ -123,8 +125,8 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
123
125
|
*,
|
|
124
126
|
language_code: SpeechToTextStreamingLanguageCode,
|
|
125
127
|
model: typing.Optional[SpeechToTextStreamingModel] = None,
|
|
126
|
-
high_vad_sensitivity: typing.Optional[
|
|
127
|
-
vad_signals: typing.Optional[
|
|
128
|
+
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
129
|
+
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
128
130
|
api_subscription_key: typing.Optional[str] = None,
|
|
129
131
|
request_options: typing.Optional[RequestOptions] = None,
|
|
130
132
|
) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
|
|
@@ -139,10 +141,10 @@ class AsyncSpeechToTextStreamingClient:
|
|
|
139
141
|
model : typing.Optional[SpeechToTextStreamingModel]
|
|
140
142
|
Speech to text model to use
|
|
141
143
|
|
|
142
|
-
high_vad_sensitivity : typing.Optional[
|
|
144
|
+
high_vad_sensitivity : typing.Optional[SpeechToTextStreamingHighVadSensitivity]
|
|
143
145
|
Enable high VAD (Voice Activity Detection) sensitivity
|
|
144
146
|
|
|
145
|
-
vad_signals : typing.Optional[
|
|
147
|
+
vad_signals : typing.Optional[SpeechToTextStreamingVadSignals]
|
|
146
148
|
Enable VAD signals in response
|
|
147
149
|
|
|
148
150
|
api_subscription_key : typing.Optional[str]
|
|
@@ -10,8 +10,10 @@ from ..core.api_error import ApiError
|
|
|
10
10
|
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
11
11
|
from ..core.request_options import RequestOptions
|
|
12
12
|
from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
|
|
13
|
+
from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
13
14
|
from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
14
15
|
from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
|
|
16
|
+
from .types.speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
class RawSpeechToTextStreamingClient:
|
|
@@ -24,8 +26,8 @@ class RawSpeechToTextStreamingClient:
|
|
|
24
26
|
*,
|
|
25
27
|
language_code: SpeechToTextStreamingLanguageCode,
|
|
26
28
|
model: typing.Optional[SpeechToTextStreamingModel] = None,
|
|
27
|
-
high_vad_sensitivity: typing.Optional[
|
|
28
|
-
vad_signals: typing.Optional[
|
|
29
|
+
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
30
|
+
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
29
31
|
api_subscription_key: typing.Optional[str] = None,
|
|
30
32
|
request_options: typing.Optional[RequestOptions] = None,
|
|
31
33
|
) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
|
|
@@ -40,10 +42,10 @@ class RawSpeechToTextStreamingClient:
|
|
|
40
42
|
model : typing.Optional[SpeechToTextStreamingModel]
|
|
41
43
|
Speech to text model to use
|
|
42
44
|
|
|
43
|
-
high_vad_sensitivity : typing.Optional[
|
|
45
|
+
high_vad_sensitivity : typing.Optional[SpeechToTextStreamingHighVadSensitivity]
|
|
44
46
|
Enable high VAD (Voice Activity Detection) sensitivity
|
|
45
47
|
|
|
46
|
-
vad_signals : typing.Optional[
|
|
48
|
+
vad_signals : typing.Optional[SpeechToTextStreamingVadSignals]
|
|
47
49
|
Enable VAD signals in response
|
|
48
50
|
|
|
49
51
|
api_subscription_key : typing.Optional[str]
|
|
@@ -100,8 +102,8 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
100
102
|
*,
|
|
101
103
|
language_code: SpeechToTextStreamingLanguageCode,
|
|
102
104
|
model: typing.Optional[SpeechToTextStreamingModel] = None,
|
|
103
|
-
high_vad_sensitivity: typing.Optional[
|
|
104
|
-
vad_signals: typing.Optional[
|
|
105
|
+
high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
|
|
106
|
+
vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
|
|
105
107
|
api_subscription_key: typing.Optional[str] = None,
|
|
106
108
|
request_options: typing.Optional[RequestOptions] = None,
|
|
107
109
|
) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
|
|
@@ -116,10 +118,10 @@ class AsyncRawSpeechToTextStreamingClient:
|
|
|
116
118
|
model : typing.Optional[SpeechToTextStreamingModel]
|
|
117
119
|
Speech to text model to use
|
|
118
120
|
|
|
119
|
-
high_vad_sensitivity : typing.Optional[
|
|
121
|
+
high_vad_sensitivity : typing.Optional[SpeechToTextStreamingHighVadSensitivity]
|
|
120
122
|
Enable high VAD (Voice Activity Detection) sensitivity
|
|
121
123
|
|
|
122
|
-
vad_signals : typing.Optional[
|
|
124
|
+
vad_signals : typing.Optional[SpeechToTextStreamingVadSignals]
|
|
123
125
|
Enable VAD signals in response
|
|
124
126
|
|
|
125
127
|
api_subscription_key : typing.Optional[str]
|
|
@@ -7,6 +7,7 @@ import websockets
|
|
|
7
7
|
import websockets.sync.connection as websockets_sync_connection
|
|
8
8
|
from ..core.events import EventEmitterMixin, EventType
|
|
9
9
|
from ..core.pydantic_utilities import parse_obj_as
|
|
10
|
+
from ..types.audio_data import AudioData
|
|
10
11
|
from ..types.audio_message import AudioMessage
|
|
11
12
|
from ..types.speech_to_text_streaming_response import SpeechToTextStreamingResponse
|
|
12
13
|
|
|
@@ -20,6 +21,7 @@ class AsyncSpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
20
21
|
|
|
21
22
|
async def __aiter__(self):
|
|
22
23
|
async for message in self._websocket:
|
|
24
|
+
message = json.loads(message) if isinstance(message, str) else message
|
|
23
25
|
yield parse_obj_as(SpeechToTextStreamingSocketClientResponse, message) # type: ignore
|
|
24
26
|
|
|
25
27
|
async def start_listening(self):
|
|
@@ -35,6 +37,7 @@ class AsyncSpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
35
37
|
self._emit(EventType.OPEN, None)
|
|
36
38
|
try:
|
|
37
39
|
async for raw_message in self._websocket:
|
|
40
|
+
raw_message = json.loads(raw_message) if isinstance(raw_message, str) else raw_message
|
|
38
41
|
parsed = parse_obj_as(SpeechToTextStreamingSocketClientResponse, raw_message) # type: ignore
|
|
39
42
|
self._emit(EventType.MESSAGE, parsed)
|
|
40
43
|
except websockets.WebSocketException as exc:
|
|
@@ -42,7 +45,19 @@ class AsyncSpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
42
45
|
finally:
|
|
43
46
|
self._emit(EventType.CLOSE, None)
|
|
44
47
|
|
|
45
|
-
async def
|
|
48
|
+
async def transcribe(self, audio: str, encoding="audio/wav", sample_rate=16000):
|
|
49
|
+
"""
|
|
50
|
+
Sends transcription request to the server.
|
|
51
|
+
:param audio: Base64 encoded audio data
|
|
52
|
+
:param encoding: Audio encoding format (default is "audio/wav")
|
|
53
|
+
:param sample_rate: Audio sample rate in Hz (default is 16000)
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
return await self._send_speech_to_text_streaming_audio_message(
|
|
57
|
+
message=AudioMessage(audio=AudioData(data=audio, sample_rate=sample_rate, encoding=encoding))
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
async def _send_speech_to_text_streaming_audio_message(self, message: AudioMessage) -> None:
|
|
46
61
|
"""
|
|
47
62
|
Send a message to the websocket connection.
|
|
48
63
|
The message will be sent as a AudioMessage.
|
|
@@ -54,6 +69,7 @@ class AsyncSpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
54
69
|
Receive a message from the websocket connection.
|
|
55
70
|
"""
|
|
56
71
|
data = await self._websocket.recv()
|
|
72
|
+
data = json.loads(data) if isinstance(data, str) else data
|
|
57
73
|
return parse_obj_as(SpeechToTextStreamingSocketClientResponse, data) # type: ignore
|
|
58
74
|
|
|
59
75
|
async def _send(self, data: typing.Any) -> None:
|
|
@@ -78,6 +94,7 @@ class SpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
78
94
|
|
|
79
95
|
def __iter__(self):
|
|
80
96
|
for message in self._websocket:
|
|
97
|
+
message = json.loads(message) if isinstance(message, str) else message
|
|
81
98
|
yield parse_obj_as(SpeechToTextStreamingSocketClientResponse, message) # type: ignore
|
|
82
99
|
|
|
83
100
|
def start_listening(self):
|
|
@@ -93,6 +110,7 @@ class SpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
93
110
|
self._emit(EventType.OPEN, None)
|
|
94
111
|
try:
|
|
95
112
|
for raw_message in self._websocket:
|
|
113
|
+
raw_message = json.loads(raw_message) if isinstance(raw_message, str) else raw_message
|
|
96
114
|
parsed = parse_obj_as(SpeechToTextStreamingSocketClientResponse, raw_message) # type: ignore
|
|
97
115
|
self._emit(EventType.MESSAGE, parsed)
|
|
98
116
|
except websockets.WebSocketException as exc:
|
|
@@ -100,20 +118,32 @@ class SpeechToTextStreamingSocketClient(EventEmitterMixin):
|
|
|
100
118
|
finally:
|
|
101
119
|
self._emit(EventType.CLOSE, None)
|
|
102
120
|
|
|
103
|
-
def
|
|
121
|
+
def transcribe(self, audio: str, encoding="audio/wav", sample_rate=16000) -> None:
|
|
104
122
|
"""
|
|
105
|
-
|
|
106
|
-
|
|
123
|
+
Sends transcription request to the server.
|
|
124
|
+
:param audio: Base64 encoded audio data
|
|
125
|
+
:param encoding (Optional): Audio encoding format (default is "audio/wav")
|
|
126
|
+
:param sample_rate (Optional): Audio sample rate in Hz (default is 16000)
|
|
107
127
|
"""
|
|
108
|
-
self.
|
|
128
|
+
return self._send_speech_to_text_streaming_audio_message(
|
|
129
|
+
message=AudioMessage(audio=AudioData(data=audio, sample_rate=sample_rate, encoding=encoding))
|
|
130
|
+
)
|
|
109
131
|
|
|
110
132
|
def recv(self) -> SpeechToTextStreamingSocketClientResponse:
|
|
111
133
|
"""
|
|
112
134
|
Receive a message from the websocket connection.
|
|
113
135
|
"""
|
|
114
136
|
data = self._websocket.recv()
|
|
137
|
+
data = json.loads(data) if isinstance(data, str) else data
|
|
115
138
|
return parse_obj_as(SpeechToTextStreamingSocketClientResponse, data) # type: ignore
|
|
116
139
|
|
|
140
|
+
def _send_speech_to_text_streaming_audio_message(self, message: AudioMessage) -> None:
|
|
141
|
+
"""
|
|
142
|
+
Send a message to the websocket connection.
|
|
143
|
+
The message will be sent as a AudioMessage.
|
|
144
|
+
"""
|
|
145
|
+
self._send_model(message)
|
|
146
|
+
|
|
117
147
|
def _send(self, data: typing.Any) -> None:
|
|
118
148
|
"""
|
|
119
149
|
Send a message to the websocket connection.
|
|
@@ -2,7 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
+
from .speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
|
|
5
6
|
from .speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
6
7
|
from .speech_to_text_streaming_model import SpeechToTextStreamingModel
|
|
8
|
+
from .speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
|
|
7
9
|
|
|
8
|
-
__all__ = [
|
|
10
|
+
__all__ = [
|
|
11
|
+
"SpeechToTextStreamingHighVadSensitivity",
|
|
12
|
+
"SpeechToTextStreamingLanguageCode",
|
|
13
|
+
"SpeechToTextStreamingModel",
|
|
14
|
+
"SpeechToTextStreamingVadSignals",
|
|
15
|
+
]
|
|
@@ -2,6 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
-
from .types import
|
|
5
|
+
from .types import (
|
|
6
|
+
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
7
|
+
SpeechToTextTranslateStreamingModel,
|
|
8
|
+
SpeechToTextTranslateStreamingVadSignals,
|
|
9
|
+
)
|
|
6
10
|
|
|
7
|
-
__all__ = [
|
|
11
|
+
__all__ = [
|
|
12
|
+
"SpeechToTextTranslateStreamingHighVadSensitivity",
|
|
13
|
+
"SpeechToTextTranslateStreamingModel",
|
|
14
|
+
"SpeechToTextTranslateStreamingVadSignals",
|
|
15
|
+
]
|
|
@@ -11,7 +11,11 @@ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
|
11
11
|
from ..core.request_options import RequestOptions
|
|
12
12
|
from .raw_client import AsyncRawSpeechToTextTranslateStreamingClient, RawSpeechToTextTranslateStreamingClient
|
|
13
13
|
from .socket_client import AsyncSpeechToTextTranslateStreamingSocketClient, SpeechToTextTranslateStreamingSocketClient
|
|
14
|
+
from .types.speech_to_text_translate_streaming_high_vad_sensitivity import (
|
|
15
|
+
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
16
|
+
)
|
|
14
17
|
from .types.speech_to_text_translate_streaming_model import SpeechToTextTranslateStreamingModel
|
|
18
|
+
from .types.speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
|
|
15
19
|
|
|
16
20
|
|
|
17
21
|
class SpeechToTextTranslateStreamingClient:
|
|
@@ -34,8 +38,8 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
34
38
|
self,
|
|
35
39
|
*,
|
|
36
40
|
model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
|
|
37
|
-
high_vad_sensitivity: typing.Optional[
|
|
38
|
-
vad_signals: typing.Optional[
|
|
41
|
+
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
42
|
+
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
39
43
|
api_subscription_key: typing.Optional[str] = None,
|
|
40
44
|
request_options: typing.Optional[RequestOptions] = None,
|
|
41
45
|
) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -47,10 +51,10 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
47
51
|
model : typing.Optional[SpeechToTextTranslateStreamingModel]
|
|
48
52
|
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
49
53
|
|
|
50
|
-
high_vad_sensitivity : typing.Optional[
|
|
54
|
+
high_vad_sensitivity : typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity]
|
|
51
55
|
Enable high VAD (Voice Activity Detection) sensitivity
|
|
52
56
|
|
|
53
|
-
vad_signals : typing.Optional[
|
|
57
|
+
vad_signals : typing.Optional[SpeechToTextTranslateStreamingVadSignals]
|
|
54
58
|
Enable VAD signals in response
|
|
55
59
|
|
|
56
60
|
api_subscription_key : typing.Optional[str]
|
|
@@ -115,8 +119,8 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
115
119
|
self,
|
|
116
120
|
*,
|
|
117
121
|
model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
|
|
118
|
-
high_vad_sensitivity: typing.Optional[
|
|
119
|
-
vad_signals: typing.Optional[
|
|
122
|
+
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
123
|
+
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
120
124
|
api_subscription_key: typing.Optional[str] = None,
|
|
121
125
|
request_options: typing.Optional[RequestOptions] = None,
|
|
122
126
|
) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -128,10 +132,10 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
128
132
|
model : typing.Optional[SpeechToTextTranslateStreamingModel]
|
|
129
133
|
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
130
134
|
|
|
131
|
-
high_vad_sensitivity : typing.Optional[
|
|
135
|
+
high_vad_sensitivity : typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity]
|
|
132
136
|
Enable high VAD (Voice Activity Detection) sensitivity
|
|
133
137
|
|
|
134
|
-
vad_signals : typing.Optional[
|
|
138
|
+
vad_signals : typing.Optional[SpeechToTextTranslateStreamingVadSignals]
|
|
135
139
|
Enable VAD signals in response
|
|
136
140
|
|
|
137
141
|
api_subscription_key : typing.Optional[str]
|
|
@@ -10,7 +10,11 @@ from ..core.api_error import ApiError
|
|
|
10
10
|
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
11
11
|
from ..core.request_options import RequestOptions
|
|
12
12
|
from .socket_client import AsyncSpeechToTextTranslateStreamingSocketClient, SpeechToTextTranslateStreamingSocketClient
|
|
13
|
+
from .types.speech_to_text_translate_streaming_high_vad_sensitivity import (
|
|
14
|
+
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
15
|
+
)
|
|
13
16
|
from .types.speech_to_text_translate_streaming_model import SpeechToTextTranslateStreamingModel
|
|
17
|
+
from .types.speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
|
|
14
18
|
|
|
15
19
|
|
|
16
20
|
class RawSpeechToTextTranslateStreamingClient:
|
|
@@ -22,8 +26,8 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
22
26
|
self,
|
|
23
27
|
*,
|
|
24
28
|
model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
|
|
25
|
-
high_vad_sensitivity: typing.Optional[
|
|
26
|
-
vad_signals: typing.Optional[
|
|
29
|
+
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
30
|
+
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
27
31
|
api_subscription_key: typing.Optional[str] = None,
|
|
28
32
|
request_options: typing.Optional[RequestOptions] = None,
|
|
29
33
|
) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -35,10 +39,10 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
35
39
|
model : typing.Optional[SpeechToTextTranslateStreamingModel]
|
|
36
40
|
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
37
41
|
|
|
38
|
-
high_vad_sensitivity : typing.Optional[
|
|
42
|
+
high_vad_sensitivity : typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity]
|
|
39
43
|
Enable high VAD (Voice Activity Detection) sensitivity
|
|
40
44
|
|
|
41
|
-
vad_signals : typing.Optional[
|
|
45
|
+
vad_signals : typing.Optional[SpeechToTextTranslateStreamingVadSignals]
|
|
42
46
|
Enable VAD signals in response
|
|
43
47
|
|
|
44
48
|
api_subscription_key : typing.Optional[str]
|
|
@@ -92,8 +96,8 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
92
96
|
self,
|
|
93
97
|
*,
|
|
94
98
|
model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
|
|
95
|
-
high_vad_sensitivity: typing.Optional[
|
|
96
|
-
vad_signals: typing.Optional[
|
|
99
|
+
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
100
|
+
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
97
101
|
api_subscription_key: typing.Optional[str] = None,
|
|
98
102
|
request_options: typing.Optional[RequestOptions] = None,
|
|
99
103
|
) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
|
|
@@ -105,10 +109,10 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
105
109
|
model : typing.Optional[SpeechToTextTranslateStreamingModel]
|
|
106
110
|
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
107
111
|
|
|
108
|
-
high_vad_sensitivity : typing.Optional[
|
|
112
|
+
high_vad_sensitivity : typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity]
|
|
109
113
|
Enable high VAD (Voice Activity Detection) sensitivity
|
|
110
114
|
|
|
111
|
-
vad_signals : typing.Optional[
|
|
115
|
+
vad_signals : typing.Optional[SpeechToTextTranslateStreamingVadSignals]
|
|
112
116
|
Enable VAD signals in response
|
|
113
117
|
|
|
114
118
|
api_subscription_key : typing.Optional[str]
|
|
@@ -7,6 +7,7 @@ import websockets
|
|
|
7
7
|
import websockets.sync.connection as websockets_sync_connection
|
|
8
8
|
from ..core.events import EventEmitterMixin, EventType
|
|
9
9
|
from ..core.pydantic_utilities import parse_obj_as
|
|
10
|
+
from ..types.audio_data import AudioData
|
|
10
11
|
from ..types.audio_message import AudioMessage
|
|
11
12
|
from ..types.config_message import ConfigMessage
|
|
12
13
|
from ..types.speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponse
|
|
@@ -21,6 +22,7 @@ class AsyncSpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
21
22
|
|
|
22
23
|
async def __aiter__(self):
|
|
23
24
|
async for message in self._websocket:
|
|
25
|
+
message = json.loads(message) if isinstance(message, str) else message
|
|
24
26
|
yield parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, message) # type: ignore
|
|
25
27
|
|
|
26
28
|
async def start_listening(self):
|
|
@@ -36,6 +38,7 @@ class AsyncSpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
36
38
|
self._emit(EventType.OPEN, None)
|
|
37
39
|
try:
|
|
38
40
|
async for raw_message in self._websocket:
|
|
41
|
+
raw_message = json.loads(raw_message) if isinstance(raw_message, str) else raw_message
|
|
39
42
|
parsed = parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, raw_message) # type: ignore
|
|
40
43
|
self._emit(EventType.MESSAGE, parsed)
|
|
41
44
|
except websockets.WebSocketException as exc:
|
|
@@ -43,27 +46,48 @@ class AsyncSpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
43
46
|
finally:
|
|
44
47
|
self._emit(EventType.CLOSE, None)
|
|
45
48
|
|
|
46
|
-
async def
|
|
49
|
+
async def translate(self, audio: str, encoding: str = "audio/wav", sample_rate: int = 16000):
|
|
47
50
|
"""
|
|
48
|
-
|
|
49
|
-
|
|
51
|
+
Sends audio translation request to the server.
|
|
52
|
+
:param audio: Base64 encoded audio data
|
|
53
|
+
:param encoding (Optional): Audio encoding format (default is "audio/wav")
|
|
54
|
+
:param sample_rate (Optional): Audio sample rate in Hz (default is 16000)
|
|
50
55
|
"""
|
|
51
|
-
await self.
|
|
56
|
+
return await self._send_speech_to_text_translate_streaming_audio_message(
|
|
57
|
+
message=AudioMessage(audio=AudioData(data=audio, encoding=encoding, sample_rate=sample_rate))
|
|
58
|
+
)
|
|
52
59
|
|
|
53
|
-
async def
|
|
60
|
+
async def set_prompt(self, prompt: str) -> None:
|
|
54
61
|
"""
|
|
55
|
-
|
|
56
|
-
The
|
|
62
|
+
Set the prompt for the translation.
|
|
63
|
+
The prompt is useful for detecting hotwords in the audio stream.
|
|
64
|
+
This will send a ConfigMessage to the websocket connection.
|
|
57
65
|
"""
|
|
58
|
-
|
|
66
|
+
message = ConfigMessage(prompt=prompt)
|
|
67
|
+
await self._send_config_message(message)
|
|
59
68
|
|
|
60
69
|
async def recv(self) -> SpeechToTextTranslateStreamingSocketClientResponse:
|
|
61
70
|
"""
|
|
62
71
|
Receive a message from the websocket connection.
|
|
63
72
|
"""
|
|
64
73
|
data = await self._websocket.recv()
|
|
74
|
+
data = json.loads(data) if isinstance(data, str) else data
|
|
65
75
|
return parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, data) # type: ignore
|
|
66
76
|
|
|
77
|
+
async def _send_speech_to_text_translate_streaming_audio_message(self, message: AudioMessage) -> None:
|
|
78
|
+
"""
|
|
79
|
+
Send a message to the websocket connection.
|
|
80
|
+
The message will be sent as a AudioMessage.
|
|
81
|
+
"""
|
|
82
|
+
await self._send_model(message)
|
|
83
|
+
|
|
84
|
+
async def _send_config_message(self, message: ConfigMessage) -> None:
|
|
85
|
+
"""
|
|
86
|
+
Send a message to the websocket connection.
|
|
87
|
+
The message will be sent as a ConfigMessage.
|
|
88
|
+
"""
|
|
89
|
+
await self._send_model(message)
|
|
90
|
+
|
|
67
91
|
async def _send(self, data: typing.Any) -> None:
|
|
68
92
|
"""
|
|
69
93
|
Send a message to the websocket connection.
|
|
@@ -86,6 +110,7 @@ class SpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
86
110
|
|
|
87
111
|
def __iter__(self):
|
|
88
112
|
for message in self._websocket:
|
|
113
|
+
message = json.loads(message) if isinstance(message, str) else message
|
|
89
114
|
yield parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, message) # type: ignore
|
|
90
115
|
|
|
91
116
|
def start_listening(self):
|
|
@@ -101,6 +126,7 @@ class SpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
101
126
|
self._emit(EventType.OPEN, None)
|
|
102
127
|
try:
|
|
103
128
|
for raw_message in self._websocket:
|
|
129
|
+
raw_message = json.loads(raw_message) if isinstance(raw_message, str) else raw_message
|
|
104
130
|
parsed = parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, raw_message) # type: ignore
|
|
105
131
|
self._emit(EventType.MESSAGE, parsed)
|
|
106
132
|
except websockets.WebSocketException as exc:
|
|
@@ -108,27 +134,48 @@ class SpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
|
108
134
|
finally:
|
|
109
135
|
self._emit(EventType.CLOSE, None)
|
|
110
136
|
|
|
111
|
-
def
|
|
137
|
+
def translate(self, audio: str, encoding: str = "audio/wav", sample_rate: int = 16000):
|
|
112
138
|
"""
|
|
113
|
-
|
|
114
|
-
|
|
139
|
+
Sends audio translation request to the server.
|
|
140
|
+
:param audio: Base64 encoded audio data
|
|
141
|
+
:param encoding: Audio encoding format (default is "audio/wav")
|
|
142
|
+
:param sample_rate: Audio sample rate in Hz (default is 16000)
|
|
115
143
|
"""
|
|
116
|
-
self.
|
|
144
|
+
return self._send_speech_to_text_translate_streaming_audio_message(
|
|
145
|
+
message=AudioMessage(audio=AudioData(data=audio, encoding=encoding, sample_rate=sample_rate))
|
|
146
|
+
)
|
|
117
147
|
|
|
118
|
-
def
|
|
148
|
+
def set_prompt(self, prompt: str) -> None:
|
|
119
149
|
"""
|
|
120
|
-
|
|
121
|
-
The
|
|
150
|
+
Set the prompt for the translation.
|
|
151
|
+
The prompt is useful for detecting hotwords in the audio stream.
|
|
152
|
+
This will send a ConfigMessage to the websocket connection.
|
|
122
153
|
"""
|
|
123
|
-
|
|
154
|
+
message = ConfigMessage(prompt=prompt)
|
|
155
|
+
self._send_config_message(message)
|
|
124
156
|
|
|
125
157
|
def recv(self) -> SpeechToTextTranslateStreamingSocketClientResponse:
|
|
126
158
|
"""
|
|
127
159
|
Receive a message from the websocket connection.
|
|
128
160
|
"""
|
|
129
161
|
data = self._websocket.recv()
|
|
162
|
+
data = json.loads(data) if isinstance(data, str) else data
|
|
130
163
|
return parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, data) # type: ignore
|
|
131
164
|
|
|
165
|
+
def _send_config_message(self, message: ConfigMessage) -> None:
|
|
166
|
+
"""
|
|
167
|
+
Send a message to the websocket connection.
|
|
168
|
+
The message will be sent as a ConfigMessage.
|
|
169
|
+
"""
|
|
170
|
+
self._send_model(message)
|
|
171
|
+
|
|
172
|
+
def _send_speech_to_text_translate_streaming_audio_message(self, message: AudioMessage) -> None:
|
|
173
|
+
"""
|
|
174
|
+
Send a message to the websocket connection.
|
|
175
|
+
The message will be sent as a AudioMessage.
|
|
176
|
+
"""
|
|
177
|
+
self._send_model(message)
|
|
178
|
+
|
|
132
179
|
def _send(self, data: typing.Any) -> None:
|
|
133
180
|
"""
|
|
134
181
|
Send a message to the websocket connection.
|
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
+
from .speech_to_text_translate_streaming_high_vad_sensitivity import SpeechToTextTranslateStreamingHighVadSensitivity
|
|
5
6
|
from .speech_to_text_translate_streaming_model import SpeechToTextTranslateStreamingModel
|
|
7
|
+
from .speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
|
|
6
8
|
|
|
7
|
-
__all__ = [
|
|
9
|
+
__all__ = [
|
|
10
|
+
"SpeechToTextTranslateStreamingHighVadSensitivity",
|
|
11
|
+
"SpeechToTextTranslateStreamingModel",
|
|
12
|
+
"SpeechToTextTranslateStreamingVadSignals",
|
|
13
|
+
]
|
sarvamai/types/__init__.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
5
|
from .audio_data import AudioData
|
|
6
|
-
from .audio_data_encoding import AudioDataEncoding
|
|
7
6
|
from .audio_message import AudioMessage
|
|
8
7
|
from .chat_completion_request_assistant_message import ChatCompletionRequestAssistantMessage
|
|
9
8
|
from .chat_completion_request_message import (
|
|
@@ -68,7 +67,6 @@ from .transliteration_response import TransliterationResponse
|
|
|
68
67
|
|
|
69
68
|
__all__ = [
|
|
70
69
|
"AudioData",
|
|
71
|
-
"AudioDataEncoding",
|
|
72
70
|
"AudioMessage",
|
|
73
71
|
"ChatCompletionRequestAssistantMessage",
|
|
74
72
|
"ChatCompletionRequestMessage",
|
sarvamai/types/audio_data.py
CHANGED
|
@@ -4,7 +4,6 @@ import typing
|
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
6
|
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
|
-
from .audio_data_encoding import AudioDataEncoding
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
class AudioData(UniversalBaseModel):
|
|
@@ -18,7 +17,7 @@ class AudioData(UniversalBaseModel):
|
|
|
18
17
|
Audio sample rate in Hz (16kHz preferred, 8kHz least preferred)
|
|
19
18
|
"""
|
|
20
19
|
|
|
21
|
-
encoding:
|
|
20
|
+
encoding: typing.Literal["audio/wav"] = pydantic.Field(default="audio/wav")
|
|
22
21
|
"""
|
|
23
22
|
Audio encoding format
|
|
24
23
|
"""
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
sarvamai/__init__.py,sha256
|
|
1
|
+
sarvamai/__init__.py,sha256=-XV5i09uHn5nMVvhydUPG84_lryvCgcEmOwzOgCEsMk,7511
|
|
2
2
|
sarvamai/chat/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
3
3
|
sarvamai/chat/client.py,sha256=xOSj83Gr6Q7eY2qUeATiuXYQqBqWqSCQlIEopK5fKus,11022
|
|
4
4
|
sarvamai/chat/raw_client.py,sha256=A2kRuZcVWlJhyYCD7YKgqNkZEp3cYa1731KhRkhirU0,17885
|
|
5
5
|
sarvamai/client.py,sha256=5-fW9679vlfqw6hQCtFIG0gnqd6tdz2W8BWx2ypFUHE,6855
|
|
6
6
|
sarvamai/core/__init__.py,sha256=YE2CtXeASe1RAbaI39twKWYKCuT4tW5is9HWHhJjR_g,1653
|
|
7
7
|
sarvamai/core/api_error.py,sha256=44vPoTyWN59gonCIZMdzw7M1uspygiLnr3GNFOoVL2Q,614
|
|
8
|
-
sarvamai/core/client_wrapper.py,sha256=
|
|
8
|
+
sarvamai/core/client_wrapper.py,sha256=a9EClnhAIjINM2aTSKgrG-q3k7M8Z85hiQhB3X1KytE,2080
|
|
9
9
|
sarvamai/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
|
|
10
10
|
sarvamai/core/events.py,sha256=j7VWXgMpOsjCXdzY22wIhI7Q-v5InZ4WchRzA88x_Sk,856
|
|
11
11
|
sarvamai/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
|
|
@@ -29,7 +29,7 @@ sarvamai/errors/unprocessable_entity_error.py,sha256=JqxtzIhvjkpQDqbT9Q-go1n-gyv
|
|
|
29
29
|
sarvamai/play.py,sha256=4fh86zy8g8IPU2O8yPBY7QxXQOivv_nWQvPQsOa1arw,2183
|
|
30
30
|
sarvamai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
31
|
sarvamai/requests/__init__.py,sha256=orINJGeCXuK0ep5LLDhoiGp0kCYICrfJiLC6S-JmYNw,3697
|
|
32
|
-
sarvamai/requests/audio_data.py,sha256=
|
|
32
|
+
sarvamai/requests/audio_data.py,sha256=QI3SK5aiAg2yJ-m3l9CxOkONnH3CCKMFCl9kAdMs19o,410
|
|
33
33
|
sarvamai/requests/audio_message.py,sha256=ZBeogjGE6YFXXM-0g8zq9SoizDk21reR0YXSB-0fMjg,214
|
|
34
34
|
sarvamai/requests/chat_completion_request_assistant_message.py,sha256=xI6nqqY2t4j56DGEAt2aasDnI7no_mxxCBk_ChxNQjg,247
|
|
35
35
|
sarvamai/requests/chat_completion_request_message.py,sha256=B5tOPGNdSaMOJRl0k26uuXaqvpTrftiu-99CDDBTnSI,736
|
|
@@ -64,28 +64,31 @@ sarvamai/requests/transliteration_response.py,sha256=KqRkqnegLmt7LjdVxjRePX6Roqa
|
|
|
64
64
|
sarvamai/speech_to_text/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
65
65
|
sarvamai/speech_to_text/client.py,sha256=E76V1BZ236AN37cvm7c-NTRTnDleiYQ4_hKHE3wfevY,11435
|
|
66
66
|
sarvamai/speech_to_text/raw_client.py,sha256=rencVGZ5cVX1eHhYoVMhcrlkqVmpI4aAGTrSC7kcYUI,25690
|
|
67
|
-
sarvamai/speech_to_text_streaming/__init__.py,sha256=
|
|
68
|
-
sarvamai/speech_to_text_streaming/client.py,sha256=
|
|
69
|
-
sarvamai/speech_to_text_streaming/raw_client.py,sha256=
|
|
70
|
-
sarvamai/speech_to_text_streaming/socket_client.py,sha256=
|
|
71
|
-
sarvamai/speech_to_text_streaming/types/__init__.py,sha256=
|
|
67
|
+
sarvamai/speech_to_text_streaming/__init__.py,sha256=q7QygMmZCHJ-4FMhhL_6XNV_dsqlIFRCO1iSxoyxaaY,437
|
|
68
|
+
sarvamai/speech_to_text_streaming/client.py,sha256=WdkzZxKMdnQ2hHv9hzJlfSNggRJLKFljRiC7695Jcog,8224
|
|
69
|
+
sarvamai/speech_to_text_streaming/raw_client.py,sha256=7zcgVw7CXA2TySkJKQkS-NdnJOMRudmV_m4NaigICN0,7405
|
|
70
|
+
sarvamai/speech_to_text_streaming/socket_client.py,sha256=QI0vEjDvNTG9-EH99NolIuARYwD-P8Fms8rqKOD9HJQ,6419
|
|
71
|
+
sarvamai/speech_to_text_streaming/types/__init__.py,sha256=hzEDbcyy6K0Q9-Zx5k5lxAHxIgeHnAoPpvTLrL13YT0,599
|
|
72
|
+
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_high_vad_sensitivity.py,sha256=OwPwffa8TkLPGMnOTn5S7d-HmV8QmN3B7fHz8I1-VT8,180
|
|
72
73
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_language_code.py,sha256=LxgEifmgWTCFZn9U-f-TWKxRPng3a2J26Zt526QrA0Y,267
|
|
73
74
|
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py,sha256=b6F4ymgz4got6KVDqrweYvkET8itze63wUwWyjqDlO4,180
|
|
74
|
-
sarvamai/
|
|
75
|
-
sarvamai/speech_to_text_translate_streaming/
|
|
76
|
-
sarvamai/speech_to_text_translate_streaming/
|
|
77
|
-
sarvamai/speech_to_text_translate_streaming/
|
|
78
|
-
sarvamai/speech_to_text_translate_streaming/
|
|
75
|
+
sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_vad_signals.py,sha256=8wiFOB7WDMbYCcMTYgNFJaIjEytYeXpJLwr_O_mH0TI,172
|
|
76
|
+
sarvamai/speech_to_text_translate_streaming/__init__.py,sha256=_hmlce1Zs1grylysZhBUdtKfkaUROwVydtwz6l-1qqg,411
|
|
77
|
+
sarvamai/speech_to_text_translate_streaming/client.py,sha256=TnHCcspbbYFaimcEk8km3QNrNkm8JlX7e2ydpeHL9EE,8068
|
|
78
|
+
sarvamai/speech_to_text_translate_streaming/raw_client.py,sha256=_TlUro1NwWc1dAvC0SHcOoTnsnAqRRXxzxKHxw8BGTs,7177
|
|
79
|
+
sarvamai/speech_to_text_translate_streaming/socket_client.py,sha256=Xy86G9Mv1kGLf8xwFo3OBf98vqGQ50iigUAoOaApLdI,7842
|
|
80
|
+
sarvamai/speech_to_text_translate_streaming/types/__init__.py,sha256=zyKoGAbKW4d0-Zi56F0RfPUqtk_xUjWjF_RjTxbXXW4,556
|
|
81
|
+
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_high_vad_sensitivity.py,sha256=r6MvTlkM0VEpb4dpnMHtINOZ-gYc22o0Fx_Xce2rjvo,189
|
|
79
82
|
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_model.py,sha256=6B8VxkpJG_pNprCSctseDtJb_ULVdKrPaeENkQ6Jvjg,187
|
|
83
|
+
sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_vad_signals.py,sha256=EV3xd9qyKMnMvA9rO-qFDDIac4b84roBu7n-maaPxG8,181
|
|
80
84
|
sarvamai/text/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
81
85
|
sarvamai/text/client.py,sha256=CMwDVtMsPor08e8F9a7yhHtgrdLzd1__samwm4S9CGM,30525
|
|
82
86
|
sarvamai/text/raw_client.py,sha256=lQ7bV9aVqxjwEUHMPEZ4x0_Xs036_yFArMK9rnYT4ZI,48625
|
|
83
87
|
sarvamai/text_to_speech/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
84
88
|
sarvamai/text_to_speech/client.py,sha256=dyJlmHhVHUiXsi_5uh8Oged-ynN_NcDzD7JC_MByzV4,9164
|
|
85
89
|
sarvamai/text_to_speech/raw_client.py,sha256=dwVPzx1kjXjuYGuNUjB1ibxHrYHXQJOJ6TnMGpDZOdo,15102
|
|
86
|
-
sarvamai/types/__init__.py,sha256=
|
|
87
|
-
sarvamai/types/audio_data.py,sha256=
|
|
88
|
-
sarvamai/types/audio_data_encoding.py,sha256=bfXb83yGdbLChm-XoN73PW-ak-iFPR24lVti07Ej00A,194
|
|
90
|
+
sarvamai/types/__init__.py,sha256=OWVmsa_5dWjmCtLRsCpSlXrlTPd1ZLNAxqfAYvxAF80,5226
|
|
91
|
+
sarvamai/types/audio_data.py,sha256=rgOukLkLNJ_HBBVE2g5dfEL2CWjRoGiMvCtpq0qTB1Y,829
|
|
89
92
|
sarvamai/types/audio_message.py,sha256=sB4EgkWkWJzipYXobkmM9AYZTTZtCpg_ySKssUeznUE,560
|
|
90
93
|
sarvamai/types/chat_completion_request_assistant_message.py,sha256=pFSONJ6CBsv3frcteid66SOKMkFwQ1UJs_e0XwwbKis,624
|
|
91
94
|
sarvamai/types/chat_completion_request_message.py,sha256=4ZfaNvaphnPdJqYSeSXMZwBhkrAqBz3aob6j-4Cklho,1638
|
|
@@ -143,6 +146,6 @@ sarvamai/types/transliterate_mode.py,sha256=1jSEMlGcoLkWuk12TgoOpSgwifa4rThGKZ1h
|
|
|
143
146
|
sarvamai/types/transliterate_source_language.py,sha256=bSY9wJszF0sg-Cgg6F-YcWC8ly1mIlj9rqa15-jBtx8,283
|
|
144
147
|
sarvamai/types/transliteration_response.py,sha256=yt-lzTbDeJ_ZL4I8kQa6oESxA9ebeJJY7LfFHpdEsmM,815
|
|
145
148
|
sarvamai/version.py,sha256=Qkp3Ee9YH-O9RTix90e0i7iNrFAGN-QDt2AFwGA4n8k,75
|
|
146
|
-
sarvamai-0.1.
|
|
147
|
-
sarvamai-0.1.
|
|
148
|
-
sarvamai-0.1.
|
|
149
|
+
sarvamai-0.1.5a14.dist-info/METADATA,sha256=lNBU8MkacRV__lZfNW49jdBk1-rMfRkeGQA7o4fKb0o,1038
|
|
150
|
+
sarvamai-0.1.5a14.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
|
151
|
+
sarvamai-0.1.5a14.dist-info/RECORD,,
|
|
File without changes
|