sarvamai 0.1.23a2__py3-none-any.whl → 0.1.23a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. sarvamai/__init__.py +203 -405
  2. sarvamai/chat/raw_client.py +20 -20
  3. sarvamai/client.py +34 -186
  4. sarvamai/core/__init__.py +21 -76
  5. sarvamai/core/client_wrapper.py +3 -19
  6. sarvamai/core/force_multipart.py +2 -4
  7. sarvamai/core/http_client.py +97 -217
  8. sarvamai/core/http_response.py +1 -1
  9. sarvamai/core/jsonable_encoder.py +0 -8
  10. sarvamai/core/pydantic_utilities.py +4 -110
  11. sarvamai/errors/__init__.py +6 -40
  12. sarvamai/errors/bad_request_error.py +1 -1
  13. sarvamai/errors/forbidden_error.py +1 -1
  14. sarvamai/errors/internal_server_error.py +1 -1
  15. sarvamai/errors/service_unavailable_error.py +1 -1
  16. sarvamai/errors/too_many_requests_error.py +1 -1
  17. sarvamai/errors/unprocessable_entity_error.py +1 -1
  18. sarvamai/requests/__init__.py +62 -150
  19. sarvamai/requests/configure_connection.py +4 -0
  20. sarvamai/requests/configure_connection_data.py +40 -11
  21. sarvamai/requests/error_response_data.py +1 -1
  22. sarvamai/requests/file_signed_url_details.py +1 -1
  23. sarvamai/requests/speech_to_text_job_parameters.py +10 -1
  24. sarvamai/requests/speech_to_text_transcription_data.py +2 -2
  25. sarvamai/speech_to_text/client.py +29 -2
  26. sarvamai/speech_to_text/raw_client.py +81 -56
  27. sarvamai/speech_to_text_job/client.py +60 -15
  28. sarvamai/speech_to_text_job/raw_client.py +120 -120
  29. sarvamai/speech_to_text_streaming/__init__.py +10 -38
  30. sarvamai/speech_to_text_streaming/client.py +32 -6
  31. sarvamai/speech_to_text_streaming/raw_client.py +32 -6
  32. sarvamai/speech_to_text_streaming/types/__init__.py +8 -36
  33. sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +7 -0
  34. sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
  35. sarvamai/speech_to_text_translate_job/raw_client.py +120 -120
  36. sarvamai/speech_to_text_translate_streaming/__init__.py +5 -36
  37. sarvamai/speech_to_text_translate_streaming/types/__init__.py +3 -36
  38. sarvamai/text/raw_client.py +60 -60
  39. sarvamai/text_to_speech/client.py +100 -16
  40. sarvamai/text_to_speech/raw_client.py +120 -36
  41. sarvamai/text_to_speech_streaming/__init__.py +2 -29
  42. sarvamai/text_to_speech_streaming/client.py +19 -6
  43. sarvamai/text_to_speech_streaming/raw_client.py +19 -6
  44. sarvamai/text_to_speech_streaming/types/__init__.py +3 -31
  45. sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +5 -0
  46. sarvamai/types/__init__.py +102 -222
  47. sarvamai/types/chat_completion_request_message.py +2 -6
  48. sarvamai/types/configure_connection.py +4 -0
  49. sarvamai/types/configure_connection_data.py +40 -11
  50. sarvamai/types/configure_connection_data_model.py +5 -0
  51. sarvamai/types/configure_connection_data_speaker.py +35 -1
  52. sarvamai/types/error_response_data.py +1 -1
  53. sarvamai/types/file_signed_url_details.py +1 -1
  54. sarvamai/types/mode.py +7 -0
  55. sarvamai/types/speech_to_text_job_parameters.py +10 -1
  56. sarvamai/types/speech_to_text_model.py +3 -1
  57. sarvamai/types/speech_to_text_transcription_data.py +2 -2
  58. sarvamai/types/speech_to_text_translate_model.py +1 -1
  59. sarvamai/types/text_to_speech_model.py +1 -1
  60. sarvamai/types/text_to_speech_speaker.py +35 -1
  61. {sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/METADATA +1 -2
  62. {sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/RECORD +63 -63
  63. sarvamai/core/http_sse/__init__.py +0 -42
  64. sarvamai/core/http_sse/_api.py +0 -112
  65. sarvamai/core/http_sse/_decoders.py +0 -61
  66. sarvamai/core/http_sse/_exceptions.py +0 -7
  67. sarvamai/core/http_sse/_models.py +0 -17
  68. {sarvamai-0.1.23a2.dist-info → sarvamai-0.1.23a4.dist-info}/WHEEL +0 -0
@@ -2,48 +2,20 @@
2
2
 
3
3
  # isort: skip_file
4
4
 
5
- import typing
6
- from importlib import import_module
7
-
8
- if typing.TYPE_CHECKING:
9
- from .types import (
10
- SpeechToTextStreamingFlushSignal,
11
- SpeechToTextStreamingHighVadSensitivity,
12
- SpeechToTextStreamingLanguageCode,
13
- SpeechToTextStreamingVadSignals,
14
- )
15
- _dynamic_imports: typing.Dict[str, str] = {
16
- "SpeechToTextStreamingFlushSignal": ".types",
17
- "SpeechToTextStreamingHighVadSensitivity": ".types",
18
- "SpeechToTextStreamingLanguageCode": ".types",
19
- "SpeechToTextStreamingVadSignals": ".types",
20
- }
21
-
22
-
23
- def __getattr__(attr_name: str) -> typing.Any:
24
- module_name = _dynamic_imports.get(attr_name)
25
- if module_name is None:
26
- raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
27
- try:
28
- module = import_module(module_name, __package__)
29
- if module_name == f".{attr_name}":
30
- return module
31
- else:
32
- return getattr(module, attr_name)
33
- except ImportError as e:
34
- raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
35
- except AttributeError as e:
36
- raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
37
-
38
-
39
- def __dir__():
40
- lazy_attrs = list(_dynamic_imports.keys())
41
- return sorted(lazy_attrs)
42
-
5
+ from .types import (
6
+ SpeechToTextStreamingFlushSignal,
7
+ SpeechToTextStreamingHighVadSensitivity,
8
+ SpeechToTextStreamingLanguageCode,
9
+ SpeechToTextStreamingMode,
10
+ SpeechToTextStreamingModel,
11
+ SpeechToTextStreamingVadSignals,
12
+ )
43
13
 
44
14
  __all__ = [
45
15
  "SpeechToTextStreamingFlushSignal",
46
16
  "SpeechToTextStreamingHighVadSensitivity",
47
17
  "SpeechToTextStreamingLanguageCode",
18
+ "SpeechToTextStreamingMode",
19
+ "SpeechToTextStreamingModel",
48
20
  "SpeechToTextStreamingVadSignals",
49
21
  ]
@@ -14,6 +14,8 @@ from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextS
14
14
  from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
15
15
  from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
16
16
  from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
17
+ from .types.speech_to_text_streaming_mode import SpeechToTextStreamingMode
18
+ from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
17
19
  from .types.speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
18
20
 
19
21
  try:
@@ -42,7 +44,8 @@ class SpeechToTextStreamingClient:
42
44
  self,
43
45
  *,
44
46
  language_code: SpeechToTextStreamingLanguageCode,
45
- model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
47
+ model: typing.Optional[SpeechToTextStreamingModel] = None,
48
+ mode: typing.Optional[SpeechToTextStreamingMode] = None,
46
49
  sample_rate: typing.Optional[str] = None,
47
50
  high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
48
51
  vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
@@ -60,11 +63,20 @@ class SpeechToTextStreamingClient:
60
63
  Parameters
61
64
  ----------
62
65
  language_code : SpeechToTextStreamingLanguageCode
63
- Language code for speech recognition
66
+ Language code for speech recognition (BCP-47 format)
64
67
 
65
- model : typing.Optional[typing.Literal["saarika:v2.5"]]
68
+ model : typing.Optional[SpeechToTextStreamingModel]
66
69
  Speech to text model to use
67
70
 
71
+ mode : typing.Optional[SpeechToTextStreamingMode]
72
+ Mode of operation for saaras:v3 model. Only applicable when model is 'saaras:v3'.
73
+ - transcribe: Standard Whisper transcription
74
+ - translate: Standard Whisper translation to English
75
+ - indic-en: Translate Indic languages to English
76
+ - verbatim: Exact transcription in original script
77
+ - translit: Transliteration to Latin script
78
+ - codemix: Code-mixed output (native + English)
79
+
68
80
  sample_rate : typing.Optional[str]
69
81
  Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
70
82
 
@@ -93,6 +105,8 @@ class SpeechToTextStreamingClient:
93
105
  query_params = query_params.add("language-code", language_code)
94
106
  if model is not None:
95
107
  query_params = query_params.add("model", model)
108
+ if mode is not None:
109
+ query_params = query_params.add("mode", mode)
96
110
  if sample_rate is not None:
97
111
  query_params = query_params.add("sample_rate", sample_rate)
98
112
  if high_vad_sensitivity is not None:
@@ -145,7 +159,8 @@ class AsyncSpeechToTextStreamingClient:
145
159
  self,
146
160
  *,
147
161
  language_code: SpeechToTextStreamingLanguageCode,
148
- model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
162
+ model: typing.Optional[SpeechToTextStreamingModel] = None,
163
+ mode: typing.Optional[SpeechToTextStreamingMode] = None,
149
164
  sample_rate: typing.Optional[str] = None,
150
165
  high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
151
166
  vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
@@ -163,11 +178,20 @@ class AsyncSpeechToTextStreamingClient:
163
178
  Parameters
164
179
  ----------
165
180
  language_code : SpeechToTextStreamingLanguageCode
166
- Language code for speech recognition
181
+ Language code for speech recognition (BCP-47 format)
167
182
 
168
- model : typing.Optional[typing.Literal["saarika:v2.5"]]
183
+ model : typing.Optional[SpeechToTextStreamingModel]
169
184
  Speech to text model to use
170
185
 
186
+ mode : typing.Optional[SpeechToTextStreamingMode]
187
+ Mode of operation for saaras:v3 model. Only applicable when model is 'saaras:v3'.
188
+ - transcribe: Standard Whisper transcription
189
+ - translate: Standard Whisper translation to English
190
+ - indic-en: Translate Indic languages to English
191
+ - verbatim: Exact transcription in original script
192
+ - translit: Transliteration to Latin script
193
+ - codemix: Code-mixed output (native + English)
194
+
171
195
  sample_rate : typing.Optional[str]
172
196
  Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
173
197
 
@@ -196,6 +220,8 @@ class AsyncSpeechToTextStreamingClient:
196
220
  query_params = query_params.add("language-code", language_code)
197
221
  if model is not None:
198
222
  query_params = query_params.add("model", model)
223
+ if mode is not None:
224
+ query_params = query_params.add("mode", mode)
199
225
  if sample_rate is not None:
200
226
  query_params = query_params.add("sample_rate", sample_rate)
201
227
  if high_vad_sensitivity is not None:
@@ -13,6 +13,8 @@ from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextS
13
13
  from .types.speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
14
14
  from .types.speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
15
15
  from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
16
+ from .types.speech_to_text_streaming_mode import SpeechToTextStreamingMode
17
+ from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
16
18
  from .types.speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
17
19
 
18
20
  try:
@@ -30,7 +32,8 @@ class RawSpeechToTextStreamingClient:
30
32
  self,
31
33
  *,
32
34
  language_code: SpeechToTextStreamingLanguageCode,
33
- model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
35
+ model: typing.Optional[SpeechToTextStreamingModel] = None,
36
+ mode: typing.Optional[SpeechToTextStreamingMode] = None,
34
37
  sample_rate: typing.Optional[str] = None,
35
38
  high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
36
39
  vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
@@ -48,11 +51,20 @@ class RawSpeechToTextStreamingClient:
48
51
  Parameters
49
52
  ----------
50
53
  language_code : SpeechToTextStreamingLanguageCode
51
- Language code for speech recognition
54
+ Language code for speech recognition (BCP-47 format)
52
55
 
53
- model : typing.Optional[typing.Literal["saarika:v2.5"]]
56
+ model : typing.Optional[SpeechToTextStreamingModel]
54
57
  Speech to text model to use
55
58
 
59
+ mode : typing.Optional[SpeechToTextStreamingMode]
60
+ Mode of operation for saaras:v3 model. Only applicable when model is 'saaras:v3'.
61
+ - transcribe: Standard Whisper transcription
62
+ - translate: Standard Whisper translation to English
63
+ - indic-en: Translate Indic languages to English
64
+ - verbatim: Exact transcription in original script
65
+ - translit: Transliteration to Latin script
66
+ - codemix: Code-mixed output (native + English)
67
+
56
68
  sample_rate : typing.Optional[str]
57
69
  Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
58
70
 
@@ -81,6 +93,8 @@ class RawSpeechToTextStreamingClient:
81
93
  query_params = query_params.add("language-code", language_code)
82
94
  if model is not None:
83
95
  query_params = query_params.add("model", model)
96
+ if mode is not None:
97
+ query_params = query_params.add("mode", mode)
84
98
  if sample_rate is not None:
85
99
  query_params = query_params.add("sample_rate", sample_rate)
86
100
  if high_vad_sensitivity is not None:
@@ -122,7 +136,8 @@ class AsyncRawSpeechToTextStreamingClient:
122
136
  self,
123
137
  *,
124
138
  language_code: SpeechToTextStreamingLanguageCode,
125
- model: typing.Optional[typing.Literal["saarika:v2.5"]] = None,
139
+ model: typing.Optional[SpeechToTextStreamingModel] = None,
140
+ mode: typing.Optional[SpeechToTextStreamingMode] = None,
126
141
  sample_rate: typing.Optional[str] = None,
127
142
  high_vad_sensitivity: typing.Optional[SpeechToTextStreamingHighVadSensitivity] = None,
128
143
  vad_signals: typing.Optional[SpeechToTextStreamingVadSignals] = None,
@@ -140,11 +155,20 @@ class AsyncRawSpeechToTextStreamingClient:
140
155
  Parameters
141
156
  ----------
142
157
  language_code : SpeechToTextStreamingLanguageCode
143
- Language code for speech recognition
158
+ Language code for speech recognition (BCP-47 format)
144
159
 
145
- model : typing.Optional[typing.Literal["saarika:v2.5"]]
160
+ model : typing.Optional[SpeechToTextStreamingModel]
146
161
  Speech to text model to use
147
162
 
163
+ mode : typing.Optional[SpeechToTextStreamingMode]
164
+ Mode of operation for saaras:v3 model. Only applicable when model is 'saaras:v3'.
165
+ - transcribe: Standard Whisper transcription
166
+ - translate: Standard Whisper translation to English
167
+ - indic-en: Translate Indic languages to English
168
+ - verbatim: Exact transcription in original script
169
+ - translit: Transliteration to Latin script
170
+ - codemix: Code-mixed output (native + English)
171
+
148
172
  sample_rate : typing.Optional[str]
149
173
  Audio sample rate for the WebSocket connection. When specified as a connection parameter, only 16kHz and 8kHz are supported. 8kHz is only available via this connection parameter. If not specified, defaults to 16kHz.
150
174
 
@@ -173,6 +197,8 @@ class AsyncRawSpeechToTextStreamingClient:
173
197
  query_params = query_params.add("language-code", language_code)
174
198
  if model is not None:
175
199
  query_params = query_params.add("model", model)
200
+ if mode is not None:
201
+ query_params = query_params.add("mode", mode)
176
202
  if sample_rate is not None:
177
203
  query_params = query_params.add("sample_rate", sample_rate)
178
204
  if high_vad_sensitivity is not None:
@@ -2,46 +2,18 @@
2
2
 
3
3
  # isort: skip_file
4
4
 
5
- import typing
6
- from importlib import import_module
7
-
8
- if typing.TYPE_CHECKING:
9
- from .speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
10
- from .speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
11
- from .speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
12
- from .speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
13
- _dynamic_imports: typing.Dict[str, str] = {
14
- "SpeechToTextStreamingFlushSignal": ".speech_to_text_streaming_flush_signal",
15
- "SpeechToTextStreamingHighVadSensitivity": ".speech_to_text_streaming_high_vad_sensitivity",
16
- "SpeechToTextStreamingLanguageCode": ".speech_to_text_streaming_language_code",
17
- "SpeechToTextStreamingVadSignals": ".speech_to_text_streaming_vad_signals",
18
- }
19
-
20
-
21
- def __getattr__(attr_name: str) -> typing.Any:
22
- module_name = _dynamic_imports.get(attr_name)
23
- if module_name is None:
24
- raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
25
- try:
26
- module = import_module(module_name, __package__)
27
- if module_name == f".{attr_name}":
28
- return module
29
- else:
30
- return getattr(module, attr_name)
31
- except ImportError as e:
32
- raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
33
- except AttributeError as e:
34
- raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
35
-
36
-
37
- def __dir__():
38
- lazy_attrs = list(_dynamic_imports.keys())
39
- return sorted(lazy_attrs)
40
-
5
+ from .speech_to_text_streaming_flush_signal import SpeechToTextStreamingFlushSignal
6
+ from .speech_to_text_streaming_high_vad_sensitivity import SpeechToTextStreamingHighVadSensitivity
7
+ from .speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
8
+ from .speech_to_text_streaming_mode import SpeechToTextStreamingMode
9
+ from .speech_to_text_streaming_model import SpeechToTextStreamingModel
10
+ from .speech_to_text_streaming_vad_signals import SpeechToTextStreamingVadSignals
41
11
 
42
12
  __all__ = [
43
13
  "SpeechToTextStreamingFlushSignal",
44
14
  "SpeechToTextStreamingHighVadSensitivity",
45
15
  "SpeechToTextStreamingLanguageCode",
16
+ "SpeechToTextStreamingMode",
17
+ "SpeechToTextStreamingModel",
46
18
  "SpeechToTextStreamingVadSignals",
47
19
  ]
@@ -0,0 +1,7 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ SpeechToTextStreamingMode = typing.Union[
6
+ typing.Literal["transcribe", "translate", "indic-en", "verbatim", "translit", "codemix"], typing.Any
7
+ ]
@@ -0,0 +1,5 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ SpeechToTextStreamingModel = typing.Union[typing.Literal["saarika:v2.5", "saaras:v3"], typing.Any]