sarvamai 0.1.22a4__py3-none-any.whl → 0.1.23a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +405 -200
- sarvamai/chat/raw_client.py +20 -20
- sarvamai/client.py +186 -34
- sarvamai/core/__init__.py +76 -21
- sarvamai/core/client_wrapper.py +19 -3
- sarvamai/core/force_multipart.py +4 -2
- sarvamai/core/http_client.py +217 -97
- sarvamai/core/http_response.py +1 -1
- sarvamai/core/http_sse/__init__.py +42 -0
- sarvamai/core/http_sse/_api.py +112 -0
- sarvamai/core/http_sse/_decoders.py +61 -0
- sarvamai/core/http_sse/_exceptions.py +7 -0
- sarvamai/core/http_sse/_models.py +17 -0
- sarvamai/core/jsonable_encoder.py +8 -0
- sarvamai/core/pydantic_utilities.py +110 -4
- sarvamai/errors/__init__.py +40 -6
- sarvamai/errors/bad_request_error.py +1 -1
- sarvamai/errors/forbidden_error.py +1 -1
- sarvamai/errors/internal_server_error.py +1 -1
- sarvamai/errors/service_unavailable_error.py +1 -1
- sarvamai/errors/too_many_requests_error.py +1 -1
- sarvamai/errors/unprocessable_entity_error.py +1 -1
- sarvamai/requests/__init__.py +150 -62
- sarvamai/requests/audio_data.py +0 -6
- sarvamai/requests/error_response_data.py +1 -1
- sarvamai/requests/file_signed_url_details.py +1 -1
- sarvamai/requests/speech_to_text_transcription_data.py +2 -2
- sarvamai/speech_to_text/raw_client.py +54 -52
- sarvamai/speech_to_text_job/raw_client.py +120 -120
- sarvamai/speech_to_text_streaming/__init__.py +38 -8
- sarvamai/speech_to_text_streaming/client.py +0 -13
- sarvamai/speech_to_text_streaming/raw_client.py +0 -13
- sarvamai/speech_to_text_streaming/types/__init__.py +36 -6
- sarvamai/speech_to_text_translate_job/raw_client.py +120 -120
- sarvamai/speech_to_text_translate_streaming/__init__.py +36 -7
- sarvamai/speech_to_text_translate_streaming/client.py +0 -13
- sarvamai/speech_to_text_translate_streaming/raw_client.py +0 -13
- sarvamai/speech_to_text_translate_streaming/types/__init__.py +36 -5
- sarvamai/text/client.py +0 -12
- sarvamai/text/raw_client.py +60 -72
- sarvamai/text_to_speech/client.py +18 -0
- sarvamai/text_to_speech/raw_client.py +38 -20
- sarvamai/text_to_speech_streaming/__init__.py +28 -1
- sarvamai/text_to_speech_streaming/types/__init__.py +30 -1
- sarvamai/types/__init__.py +222 -100
- sarvamai/types/audio_data.py +0 -6
- sarvamai/types/chat_completion_request_message.py +6 -2
- sarvamai/types/error_response_data.py +1 -1
- sarvamai/types/file_signed_url_details.py +1 -1
- sarvamai/types/speech_to_text_transcription_data.py +2 -2
- {sarvamai-0.1.22a4.dist-info → sarvamai-0.1.23a2.dist-info}/METADATA +2 -1
- {sarvamai-0.1.22a4.dist-info → sarvamai-0.1.23a2.dist-info}/RECORD +53 -51
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_input_audio_codec.py +0 -33
- sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_input_audio_codec.py +0 -33
- sarvamai/types/audio_data_input_audio_codec.py +0 -33
- {sarvamai-0.1.22a4.dist-info → sarvamai-0.1.23a2.dist-info}/WHEEL +0 -0
|
@@ -2,16 +2,45 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
5
|
+
import typing
|
|
6
|
+
from importlib import import_module
|
|
7
|
+
|
|
8
|
+
if typing.TYPE_CHECKING:
|
|
9
|
+
from .types import (
|
|
10
|
+
SpeechToTextTranslateStreamingFlushSignal,
|
|
11
|
+
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
12
|
+
SpeechToTextTranslateStreamingVadSignals,
|
|
13
|
+
)
|
|
14
|
+
_dynamic_imports: typing.Dict[str, str] = {
|
|
15
|
+
"SpeechToTextTranslateStreamingFlushSignal": ".types",
|
|
16
|
+
"SpeechToTextTranslateStreamingHighVadSensitivity": ".types",
|
|
17
|
+
"SpeechToTextTranslateStreamingVadSignals": ".types",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __getattr__(attr_name: str) -> typing.Any:
|
|
22
|
+
module_name = _dynamic_imports.get(attr_name)
|
|
23
|
+
if module_name is None:
|
|
24
|
+
raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
|
|
25
|
+
try:
|
|
26
|
+
module = import_module(module_name, __package__)
|
|
27
|
+
if module_name == f".{attr_name}":
|
|
28
|
+
return module
|
|
29
|
+
else:
|
|
30
|
+
return getattr(module, attr_name)
|
|
31
|
+
except ImportError as e:
|
|
32
|
+
raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
|
|
33
|
+
except AttributeError as e:
|
|
34
|
+
raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def __dir__():
|
|
38
|
+
lazy_attrs = list(_dynamic_imports.keys())
|
|
39
|
+
return sorted(lazy_attrs)
|
|
40
|
+
|
|
11
41
|
|
|
12
42
|
__all__ = [
|
|
13
43
|
"SpeechToTextTranslateStreamingFlushSignal",
|
|
14
44
|
"SpeechToTextTranslateStreamingHighVadSensitivity",
|
|
15
|
-
"SpeechToTextTranslateStreamingInputAudioCodec",
|
|
16
45
|
"SpeechToTextTranslateStreamingVadSignals",
|
|
17
46
|
]
|
|
@@ -15,7 +15,6 @@ from .types.speech_to_text_translate_streaming_flush_signal import SpeechToTextT
|
|
|
15
15
|
from .types.speech_to_text_translate_streaming_high_vad_sensitivity import (
|
|
16
16
|
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
17
17
|
)
|
|
18
|
-
from .types.speech_to_text_translate_streaming_input_audio_codec import SpeechToTextTranslateStreamingInputAudioCodec
|
|
19
18
|
from .types.speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
|
|
20
19
|
|
|
21
20
|
try:
|
|
@@ -44,7 +43,6 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
44
43
|
self,
|
|
45
44
|
*,
|
|
46
45
|
model: typing.Optional[typing.Literal["saaras:v2.5"]] = None,
|
|
47
|
-
input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
|
|
48
46
|
sample_rate: typing.Optional[str] = None,
|
|
49
47
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
50
48
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
@@ -64,9 +62,6 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
64
62
|
model : typing.Optional[typing.Literal["saaras:v2.5"]]
|
|
65
63
|
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
66
64
|
|
|
67
|
-
input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
|
|
68
|
-
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
|
|
69
|
-
|
|
70
65
|
sample_rate : typing.Optional[str]
|
|
71
66
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
72
67
|
|
|
@@ -93,8 +88,6 @@ class SpeechToTextTranslateStreamingClient:
|
|
|
93
88
|
query_params = httpx.QueryParams()
|
|
94
89
|
if model is not None:
|
|
95
90
|
query_params = query_params.add("model", model)
|
|
96
|
-
if input_audio_codec is not None:
|
|
97
|
-
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
98
91
|
if sample_rate is not None:
|
|
99
92
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
100
93
|
if high_vad_sensitivity is not None:
|
|
@@ -147,7 +140,6 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
147
140
|
self,
|
|
148
141
|
*,
|
|
149
142
|
model: typing.Optional[typing.Literal["saaras:v2.5"]] = None,
|
|
150
|
-
input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
|
|
151
143
|
sample_rate: typing.Optional[str] = None,
|
|
152
144
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
153
145
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
@@ -167,9 +159,6 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
167
159
|
model : typing.Optional[typing.Literal["saaras:v2.5"]]
|
|
168
160
|
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
169
161
|
|
|
170
|
-
input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
|
|
171
|
-
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
|
|
172
|
-
|
|
173
162
|
sample_rate : typing.Optional[str]
|
|
174
163
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
175
164
|
|
|
@@ -196,8 +185,6 @@ class AsyncSpeechToTextTranslateStreamingClient:
|
|
|
196
185
|
query_params = httpx.QueryParams()
|
|
197
186
|
if model is not None:
|
|
198
187
|
query_params = query_params.add("model", model)
|
|
199
|
-
if input_audio_codec is not None:
|
|
200
|
-
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
201
188
|
if sample_rate is not None:
|
|
202
189
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
203
190
|
if high_vad_sensitivity is not None:
|
|
@@ -14,7 +14,6 @@ from .types.speech_to_text_translate_streaming_flush_signal import SpeechToTextT
|
|
|
14
14
|
from .types.speech_to_text_translate_streaming_high_vad_sensitivity import (
|
|
15
15
|
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
16
16
|
)
|
|
17
|
-
from .types.speech_to_text_translate_streaming_input_audio_codec import SpeechToTextTranslateStreamingInputAudioCodec
|
|
18
17
|
from .types.speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
|
|
19
18
|
|
|
20
19
|
try:
|
|
@@ -32,7 +31,6 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
32
31
|
self,
|
|
33
32
|
*,
|
|
34
33
|
model: typing.Optional[typing.Literal["saaras:v2.5"]] = None,
|
|
35
|
-
input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
|
|
36
34
|
sample_rate: typing.Optional[str] = None,
|
|
37
35
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
38
36
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
@@ -52,9 +50,6 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
52
50
|
model : typing.Optional[typing.Literal["saaras:v2.5"]]
|
|
53
51
|
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
54
52
|
|
|
55
|
-
input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
|
|
56
|
-
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
|
|
57
|
-
|
|
58
53
|
sample_rate : typing.Optional[str]
|
|
59
54
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
60
55
|
|
|
@@ -81,8 +76,6 @@ class RawSpeechToTextTranslateStreamingClient:
|
|
|
81
76
|
query_params = httpx.QueryParams()
|
|
82
77
|
if model is not None:
|
|
83
78
|
query_params = query_params.add("model", model)
|
|
84
|
-
if input_audio_codec is not None:
|
|
85
|
-
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
86
79
|
if sample_rate is not None:
|
|
87
80
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
88
81
|
if high_vad_sensitivity is not None:
|
|
@@ -124,7 +117,6 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
124
117
|
self,
|
|
125
118
|
*,
|
|
126
119
|
model: typing.Optional[typing.Literal["saaras:v2.5"]] = None,
|
|
127
|
-
input_audio_codec: typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec] = None,
|
|
128
120
|
sample_rate: typing.Optional[str] = None,
|
|
129
121
|
high_vad_sensitivity: typing.Optional[SpeechToTextTranslateStreamingHighVadSensitivity] = None,
|
|
130
122
|
vad_signals: typing.Optional[SpeechToTextTranslateStreamingVadSignals] = None,
|
|
@@ -144,9 +136,6 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
144
136
|
model : typing.Optional[typing.Literal["saaras:v2.5"]]
|
|
145
137
|
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
146
138
|
|
|
147
|
-
input_audio_codec : typing.Optional[SpeechToTextTranslateStreamingInputAudioCodec]
|
|
148
|
-
Audio codec/format of the input file. Our API automatically detects all codec formats, but for PCM files specifically (pcm_s16le, pcm_l16, pcm_raw), you must pass this parameter. PCM files supports sample rate 16000 and 8000.
|
|
149
|
-
|
|
150
139
|
sample_rate : typing.Optional[str]
|
|
151
140
|
Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
|
|
152
141
|
|
|
@@ -173,8 +162,6 @@ class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
|
173
162
|
query_params = httpx.QueryParams()
|
|
174
163
|
if model is not None:
|
|
175
164
|
query_params = query_params.add("model", model)
|
|
176
|
-
if input_audio_codec is not None:
|
|
177
|
-
query_params = query_params.add("input_audio_codec", input_audio_codec)
|
|
178
165
|
if sample_rate is not None:
|
|
179
166
|
query_params = query_params.add("sample_rate", sample_rate)
|
|
180
167
|
if high_vad_sensitivity is not None:
|
|
@@ -2,14 +2,45 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
import typing
|
|
6
|
+
from importlib import import_module
|
|
7
|
+
|
|
8
|
+
if typing.TYPE_CHECKING:
|
|
9
|
+
from .speech_to_text_translate_streaming_flush_signal import SpeechToTextTranslateStreamingFlushSignal
|
|
10
|
+
from .speech_to_text_translate_streaming_high_vad_sensitivity import (
|
|
11
|
+
SpeechToTextTranslateStreamingHighVadSensitivity,
|
|
12
|
+
)
|
|
13
|
+
from .speech_to_text_translate_streaming_vad_signals import SpeechToTextTranslateStreamingVadSignals
|
|
14
|
+
_dynamic_imports: typing.Dict[str, str] = {
|
|
15
|
+
"SpeechToTextTranslateStreamingFlushSignal": ".speech_to_text_translate_streaming_flush_signal",
|
|
16
|
+
"SpeechToTextTranslateStreamingHighVadSensitivity": ".speech_to_text_translate_streaming_high_vad_sensitivity",
|
|
17
|
+
"SpeechToTextTranslateStreamingVadSignals": ".speech_to_text_translate_streaming_vad_signals",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __getattr__(attr_name: str) -> typing.Any:
|
|
22
|
+
module_name = _dynamic_imports.get(attr_name)
|
|
23
|
+
if module_name is None:
|
|
24
|
+
raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
|
|
25
|
+
try:
|
|
26
|
+
module = import_module(module_name, __package__)
|
|
27
|
+
if module_name == f".{attr_name}":
|
|
28
|
+
return module
|
|
29
|
+
else:
|
|
30
|
+
return getattr(module, attr_name)
|
|
31
|
+
except ImportError as e:
|
|
32
|
+
raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
|
|
33
|
+
except AttributeError as e:
|
|
34
|
+
raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def __dir__():
|
|
38
|
+
lazy_attrs = list(_dynamic_imports.keys())
|
|
39
|
+
return sorted(lazy_attrs)
|
|
40
|
+
|
|
9
41
|
|
|
10
42
|
__all__ = [
|
|
11
43
|
"SpeechToTextTranslateStreamingFlushSignal",
|
|
12
44
|
"SpeechToTextTranslateStreamingHighVadSensitivity",
|
|
13
|
-
"SpeechToTextTranslateStreamingInputAudioCodec",
|
|
14
45
|
"SpeechToTextTranslateStreamingVadSignals",
|
|
15
46
|
]
|
sarvamai/text/client.py
CHANGED
|
@@ -47,7 +47,6 @@ class TextClient:
|
|
|
47
47
|
speaker_gender: typing.Optional[TranslateSpeakerGender] = OMIT,
|
|
48
48
|
mode: typing.Optional[TranslateMode] = OMIT,
|
|
49
49
|
model: typing.Optional[TranslateModel] = OMIT,
|
|
50
|
-
enable_preprocessing: typing.Optional[bool] = OMIT,
|
|
51
50
|
output_script: typing.Optional[TransliterateMode] = OMIT,
|
|
52
51
|
numerals_format: typing.Optional[NumeralsFormat] = OMIT,
|
|
53
52
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -125,10 +124,6 @@ class TextClient:
|
|
|
125
124
|
- mayura:v1: Supports 12 languages with all modes, output scripts, and automatic language detection.
|
|
126
125
|
- sarvam-translate:v1: Supports all 22 scheduled languages of India, formal mode only.
|
|
127
126
|
|
|
128
|
-
enable_preprocessing : typing.Optional[bool]
|
|
129
|
-
This will enable custom preprocessing of the input text which can result in better translations.
|
|
130
|
-
Recommendation- You can switch on whenever there is some complex text with difficult vocabulary and sentences, for which you want simple translations that people can understand.
|
|
131
|
-
|
|
132
127
|
output_script : typing.Optional[TransliterateMode]
|
|
133
128
|
**output_script**: This is an optional parameter which controls the transliteration style applied to the output text.
|
|
134
129
|
|
|
@@ -186,7 +181,6 @@ class TextClient:
|
|
|
186
181
|
speaker_gender=speaker_gender,
|
|
187
182
|
mode=mode,
|
|
188
183
|
model=model,
|
|
189
|
-
enable_preprocessing=enable_preprocessing,
|
|
190
184
|
output_script=output_script,
|
|
191
185
|
numerals_format=numerals_format,
|
|
192
186
|
request_options=request_options,
|
|
@@ -371,7 +365,6 @@ class AsyncTextClient:
|
|
|
371
365
|
speaker_gender: typing.Optional[TranslateSpeakerGender] = OMIT,
|
|
372
366
|
mode: typing.Optional[TranslateMode] = OMIT,
|
|
373
367
|
model: typing.Optional[TranslateModel] = OMIT,
|
|
374
|
-
enable_preprocessing: typing.Optional[bool] = OMIT,
|
|
375
368
|
output_script: typing.Optional[TransliterateMode] = OMIT,
|
|
376
369
|
numerals_format: typing.Optional[NumeralsFormat] = OMIT,
|
|
377
370
|
request_options: typing.Optional[RequestOptions] = None,
|
|
@@ -449,10 +442,6 @@ class AsyncTextClient:
|
|
|
449
442
|
- mayura:v1: Supports 12 languages with all modes, output scripts, and automatic language detection.
|
|
450
443
|
- sarvam-translate:v1: Supports all 22 scheduled languages of India, formal mode only.
|
|
451
444
|
|
|
452
|
-
enable_preprocessing : typing.Optional[bool]
|
|
453
|
-
This will enable custom preprocessing of the input text which can result in better translations.
|
|
454
|
-
Recommendation- You can switch on whenever there is some complex text with difficult vocabulary and sentences, for which you want simple translations that people can understand.
|
|
455
|
-
|
|
456
445
|
output_script : typing.Optional[TransliterateMode]
|
|
457
446
|
**output_script**: This is an optional parameter which controls the transliteration style applied to the output text.
|
|
458
447
|
|
|
@@ -518,7 +507,6 @@ class AsyncTextClient:
|
|
|
518
507
|
speaker_gender=speaker_gender,
|
|
519
508
|
mode=mode,
|
|
520
509
|
model=model,
|
|
521
|
-
enable_preprocessing=enable_preprocessing,
|
|
522
510
|
output_script=output_script,
|
|
523
511
|
numerals_format=numerals_format,
|
|
524
512
|
request_options=request_options,
|