sarvamai 0.1.5a3__py3-none-any.whl → 0.1.5a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. sarvamai/__init__.py +45 -3
  2. sarvamai/chat/client.py +0 -2
  3. sarvamai/chat/raw_client.py +0 -2
  4. sarvamai/client.py +3 -0
  5. sarvamai/core/client_wrapper.py +2 -2
  6. sarvamai/requests/__init__.py +16 -0
  7. sarvamai/requests/audio_data.py +21 -0
  8. sarvamai/requests/audio_message.py +8 -0
  9. sarvamai/requests/error_data.py +15 -0
  10. sarvamai/requests/events_data.py +17 -0
  11. sarvamai/requests/speech_to_text_streaming_response.py +10 -0
  12. sarvamai/requests/speech_to_text_streaming_response_data.py +9 -0
  13. sarvamai/requests/transcription_data.py +35 -0
  14. sarvamai/requests/transcription_metrics.py +15 -0
  15. sarvamai/speech_to_text_streaming/__init__.py +7 -0
  16. sarvamai/speech_to_text_streaming/client.py +189 -0
  17. sarvamai/speech_to_text_streaming/raw_client.py +166 -0
  18. sarvamai/speech_to_text_streaming/socket_client.py +129 -0
  19. sarvamai/speech_to_text_streaming/types/__init__.py +8 -0
  20. sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_language_code.py +8 -0
  21. sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
  22. sarvamai/text/client.py +89 -47
  23. sarvamai/text/raw_client.py +85 -43
  24. sarvamai/types/__init__.py +24 -2
  25. sarvamai/types/audio_data.py +33 -0
  26. sarvamai/types/audio_data_encoding.py +5 -0
  27. sarvamai/types/audio_message.py +20 -0
  28. sarvamai/types/error_data.py +27 -0
  29. sarvamai/types/events_data.py +28 -0
  30. sarvamai/types/format.py +5 -0
  31. sarvamai/types/speech_to_text_model.py +3 -1
  32. sarvamai/types/speech_to_text_streaming_response.py +22 -0
  33. sarvamai/types/speech_to_text_streaming_response_data.py +9 -0
  34. sarvamai/types/speech_to_text_streaming_response_type.py +5 -0
  35. sarvamai/types/speech_to_text_translate_model.py +1 -1
  36. sarvamai/types/transcription_data.py +45 -0
  37. sarvamai/types/transcription_metrics.py +27 -0
  38. sarvamai/types/translate_model.py +1 -1
  39. sarvamai/types/translate_source_language.py +24 -1
  40. sarvamai/types/translate_target_language.py +25 -1
  41. sarvamai/types/translatiterate_target_language.py +1 -1
  42. sarvamai/types/transliterate_mode.py +5 -0
  43. sarvamai/types/transliterate_source_language.py +1 -1
  44. {sarvamai-0.1.5a3.dist-info → sarvamai-0.1.5a5.dist-info}/METADATA +2 -1
  45. {sarvamai-0.1.5a3.dist-info → sarvamai-0.1.5a5.dist-info}/RECORD +46 -20
  46. sarvamai/types/translate_postprocessing.py +0 -5
  47. {sarvamai-0.1.5a3.dist-info → sarvamai-0.1.5a5.dist-info}/WHEEL +0 -0
sarvamai/__init__.py CHANGED
@@ -3,6 +3,9 @@
3
3
  # isort: skip_file
4
4
 
5
5
  from .types import (
6
+ AudioData,
7
+ AudioDataEncoding,
8
+ AudioMessage,
6
9
  ChatCompletionRequestAssistantMessage,
7
10
  ChatCompletionRequestMessage,
8
11
  ChatCompletionRequestMessage_Assistant,
@@ -17,9 +20,12 @@ from .types import (
17
20
  DiarizedEntry,
18
21
  DiarizedTranscript,
19
22
  ErrorCode,
23
+ ErrorData,
20
24
  ErrorDetails,
21
25
  ErrorMessage,
26
+ EventsData,
22
27
  FinishReason,
28
+ Format,
23
29
  LanguageIdentificationResponse,
24
30
  NumeralsFormat,
25
31
  ReasoningEffort,
@@ -29,6 +35,9 @@ from .types import (
29
35
  SpeechToTextLanguage,
30
36
  SpeechToTextModel,
31
37
  SpeechToTextResponse,
38
+ SpeechToTextStreamingResponse,
39
+ SpeechToTextStreamingResponseData,
40
+ SpeechToTextStreamingResponseType,
32
41
  SpeechToTextTranslateLanguage,
33
42
  SpeechToTextTranslateModel,
34
43
  SpeechToTextTranslateResponse,
@@ -39,14 +48,16 @@ from .types import (
39
48
  TextToSpeechResponse,
40
49
  TextToSpeechSpeaker,
41
50
  TimestampsModel,
51
+ TranscriptionData,
52
+ TranscriptionMetrics,
42
53
  TranslateMode,
43
54
  TranslateModel,
44
- TranslatePostprocessing,
45
55
  TranslateSourceLanguage,
46
56
  TranslateSpeakerGender,
47
57
  TranslateTargetLanguage,
48
58
  TranslationResponse,
49
59
  TranslatiterateTargetLanguage,
60
+ TransliterateMode,
50
61
  TransliterateSourceLanguage,
51
62
  TransliterationResponse,
52
63
  )
@@ -58,10 +69,12 @@ from .errors import (
58
69
  TooManyRequestsError,
59
70
  UnprocessableEntityError,
60
71
  )
61
- from . import chat, speech_to_text, text, text_to_speech
72
+ from . import chat, speech_to_text, speech_to_text_streaming, text, text_to_speech
62
73
  from .client import AsyncSarvamAI, SarvamAI
63
74
  from .environment import SarvamAIEnvironment
64
75
  from .requests import (
76
+ AudioDataParams,
77
+ AudioMessageParams,
65
78
  ChatCompletionRequestAssistantMessageParams,
66
79
  ChatCompletionRequestMessageParams,
67
80
  ChatCompletionRequestMessage_AssistantParams,
@@ -75,21 +88,33 @@ from .requests import (
75
88
  CreateChatCompletionResponseParams,
76
89
  DiarizedEntryParams,
77
90
  DiarizedTranscriptParams,
91
+ ErrorDataParams,
78
92
  ErrorDetailsParams,
79
93
  ErrorMessageParams,
94
+ EventsDataParams,
80
95
  LanguageIdentificationResponseParams,
81
96
  SpeechToTextResponseParams,
97
+ SpeechToTextStreamingResponseDataParams,
98
+ SpeechToTextStreamingResponseParams,
82
99
  SpeechToTextTranslateResponseParams,
83
100
  StopConfigurationParams,
84
101
  TextToSpeechResponseParams,
85
102
  TimestampsModelParams,
103
+ TranscriptionDataParams,
104
+ TranscriptionMetricsParams,
86
105
  TranslationResponseParams,
87
106
  TransliterationResponseParams,
88
107
  )
108
+ from .speech_to_text_streaming import SpeechToTextStreamingLanguageCode, SpeechToTextStreamingModel
89
109
  from .version import __version__
90
110
 
91
111
  __all__ = [
92
112
  "AsyncSarvamAI",
113
+ "AudioData",
114
+ "AudioDataEncoding",
115
+ "AudioDataParams",
116
+ "AudioMessage",
117
+ "AudioMessageParams",
93
118
  "BadRequestError",
94
119
  "ChatCompletionRequestAssistantMessage",
95
120
  "ChatCompletionRequestAssistantMessageParams",
@@ -118,12 +143,17 @@ __all__ = [
118
143
  "DiarizedTranscript",
119
144
  "DiarizedTranscriptParams",
120
145
  "ErrorCode",
146
+ "ErrorData",
147
+ "ErrorDataParams",
121
148
  "ErrorDetails",
122
149
  "ErrorDetailsParams",
123
150
  "ErrorMessage",
124
151
  "ErrorMessageParams",
152
+ "EventsData",
153
+ "EventsDataParams",
125
154
  "FinishReason",
126
155
  "ForbiddenError",
156
+ "Format",
127
157
  "InternalServerError",
128
158
  "LanguageIdentificationResponse",
129
159
  "LanguageIdentificationResponseParams",
@@ -139,6 +169,13 @@ __all__ = [
139
169
  "SpeechToTextModel",
140
170
  "SpeechToTextResponse",
141
171
  "SpeechToTextResponseParams",
172
+ "SpeechToTextStreamingLanguageCode",
173
+ "SpeechToTextStreamingModel",
174
+ "SpeechToTextStreamingResponse",
175
+ "SpeechToTextStreamingResponseData",
176
+ "SpeechToTextStreamingResponseDataParams",
177
+ "SpeechToTextStreamingResponseParams",
178
+ "SpeechToTextStreamingResponseType",
142
179
  "SpeechToTextTranslateLanguage",
143
180
  "SpeechToTextTranslateModel",
144
181
  "SpeechToTextTranslateResponse",
@@ -154,15 +191,19 @@ __all__ = [
154
191
  "TimestampsModel",
155
192
  "TimestampsModelParams",
156
193
  "TooManyRequestsError",
194
+ "TranscriptionData",
195
+ "TranscriptionDataParams",
196
+ "TranscriptionMetrics",
197
+ "TranscriptionMetricsParams",
157
198
  "TranslateMode",
158
199
  "TranslateModel",
159
- "TranslatePostprocessing",
160
200
  "TranslateSourceLanguage",
161
201
  "TranslateSpeakerGender",
162
202
  "TranslateTargetLanguage",
163
203
  "TranslationResponse",
164
204
  "TranslationResponseParams",
165
205
  "TranslatiterateTargetLanguage",
206
+ "TransliterateMode",
166
207
  "TransliterateSourceLanguage",
167
208
  "TransliterationResponse",
168
209
  "TransliterationResponseParams",
@@ -170,6 +211,7 @@ __all__ = [
170
211
  "__version__",
171
212
  "chat",
172
213
  "speech_to_text",
214
+ "speech_to_text_streaming",
173
215
  "text",
174
216
  "text_to_speech",
175
217
  ]
sarvamai/chat/client.py CHANGED
@@ -83,7 +83,6 @@ class ChatClient:
83
83
  seed : typing.Optional[int]
84
84
  This feature is in Beta.
85
85
  If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
86
- Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
87
86
 
88
87
  frequency_penalty : typing.Optional[float]
89
88
  Number between -2.0 and 2.0. Positive values penalize new tokens based on
@@ -204,7 +203,6 @@ class AsyncChatClient:
204
203
  seed : typing.Optional[int]
205
204
  This feature is in Beta.
206
205
  If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
207
- Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
208
206
 
209
207
  frequency_penalty : typing.Optional[float]
210
208
  Number between -2.0 and 2.0. Positive values penalize new tokens based on
@@ -81,7 +81,6 @@ class RawChatClient:
81
81
  seed : typing.Optional[int]
82
82
  This feature is in Beta.
83
83
  If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
84
- Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
85
84
 
86
85
  frequency_penalty : typing.Optional[float]
87
86
  Number between -2.0 and 2.0. Positive values penalize new tokens based on
@@ -262,7 +261,6 @@ class AsyncRawChatClient:
262
261
  seed : typing.Optional[int]
263
262
  This feature is in Beta.
264
263
  If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
265
- Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
266
264
 
267
265
  frequency_penalty : typing.Optional[float]
268
266
  Number between -2.0 and 2.0. Positive values penalize new tokens based on
sarvamai/client.py CHANGED
@@ -9,6 +9,7 @@ from .core.api_error import ApiError
9
9
  from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
10
10
  from .environment import SarvamAIEnvironment
11
11
  from .speech_to_text.client import AsyncSpeechToTextClient, SpeechToTextClient
12
+ from .speech_to_text_streaming.client import AsyncSpeechToTextStreamingClient, SpeechToTextStreamingClient
12
13
  from .text.client import AsyncTextClient, TextClient
13
14
  from .text_to_speech.client import AsyncTextToSpeechClient, TextToSpeechClient
14
15
 
@@ -77,6 +78,7 @@ class SarvamAI:
77
78
  self.speech_to_text = SpeechToTextClient(client_wrapper=self._client_wrapper)
78
79
  self.text_to_speech = TextToSpeechClient(client_wrapper=self._client_wrapper)
79
80
  self.chat = ChatClient(client_wrapper=self._client_wrapper)
81
+ self.speech_to_text_streaming = SpeechToTextStreamingClient(client_wrapper=self._client_wrapper)
80
82
 
81
83
 
82
84
  class AsyncSarvamAI:
@@ -143,3 +145,4 @@ class AsyncSarvamAI:
143
145
  self.speech_to_text = AsyncSpeechToTextClient(client_wrapper=self._client_wrapper)
144
146
  self.text_to_speech = AsyncTextToSpeechClient(client_wrapper=self._client_wrapper)
145
147
  self.chat = AsyncChatClient(client_wrapper=self._client_wrapper)
148
+ self.speech_to_text_streaming = AsyncSpeechToTextStreamingClient(client_wrapper=self._client_wrapper)
@@ -17,10 +17,10 @@ class BaseClientWrapper:
17
17
 
18
18
  def get_headers(self) -> typing.Dict[str, str]:
19
19
  headers: typing.Dict[str, str] = {
20
- "User-Agent": "sarvamai/0.1.5a3",
20
+ "User-Agent": "sarvamai/0.1.5a5",
21
21
  "X-Fern-Language": "Python",
22
22
  "X-Fern-SDK-Name": "sarvamai",
23
- "X-Fern-SDK-Version": "0.1.5a3",
23
+ "X-Fern-SDK-Version": "0.1.5a5",
24
24
  }
25
25
  headers["api-subscription-key"] = self.api_subscription_key
26
26
  return headers
@@ -2,6 +2,8 @@
2
2
 
3
3
  # isort: skip_file
4
4
 
5
+ from .audio_data import AudioDataParams
6
+ from .audio_message import AudioMessageParams
5
7
  from .chat_completion_request_assistant_message import ChatCompletionRequestAssistantMessageParams
6
8
  from .chat_completion_request_message import (
7
9
  ChatCompletionRequestMessageParams,
@@ -17,18 +19,26 @@ from .completion_usage import CompletionUsageParams
17
19
  from .create_chat_completion_response import CreateChatCompletionResponseParams
18
20
  from .diarized_entry import DiarizedEntryParams
19
21
  from .diarized_transcript import DiarizedTranscriptParams
22
+ from .error_data import ErrorDataParams
20
23
  from .error_details import ErrorDetailsParams
21
24
  from .error_message import ErrorMessageParams
25
+ from .events_data import EventsDataParams
22
26
  from .language_identification_response import LanguageIdentificationResponseParams
23
27
  from .speech_to_text_response import SpeechToTextResponseParams
28
+ from .speech_to_text_streaming_response import SpeechToTextStreamingResponseParams
29
+ from .speech_to_text_streaming_response_data import SpeechToTextStreamingResponseDataParams
24
30
  from .speech_to_text_translate_response import SpeechToTextTranslateResponseParams
25
31
  from .stop_configuration import StopConfigurationParams
26
32
  from .text_to_speech_response import TextToSpeechResponseParams
27
33
  from .timestamps_model import TimestampsModelParams
34
+ from .transcription_data import TranscriptionDataParams
35
+ from .transcription_metrics import TranscriptionMetricsParams
28
36
  from .translation_response import TranslationResponseParams
29
37
  from .transliteration_response import TransliterationResponseParams
30
38
 
31
39
  __all__ = [
40
+ "AudioDataParams",
41
+ "AudioMessageParams",
32
42
  "ChatCompletionRequestAssistantMessageParams",
33
43
  "ChatCompletionRequestMessageParams",
34
44
  "ChatCompletionRequestMessage_AssistantParams",
@@ -42,14 +52,20 @@ __all__ = [
42
52
  "CreateChatCompletionResponseParams",
43
53
  "DiarizedEntryParams",
44
54
  "DiarizedTranscriptParams",
55
+ "ErrorDataParams",
45
56
  "ErrorDetailsParams",
46
57
  "ErrorMessageParams",
58
+ "EventsDataParams",
47
59
  "LanguageIdentificationResponseParams",
48
60
  "SpeechToTextResponseParams",
61
+ "SpeechToTextStreamingResponseDataParams",
62
+ "SpeechToTextStreamingResponseParams",
49
63
  "SpeechToTextTranslateResponseParams",
50
64
  "StopConfigurationParams",
51
65
  "TextToSpeechResponseParams",
52
66
  "TimestampsModelParams",
67
+ "TranscriptionDataParams",
68
+ "TranscriptionMetricsParams",
53
69
  "TranslationResponseParams",
54
70
  "TransliterationResponseParams",
55
71
  ]
@@ -0,0 +1,21 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ from ..types.audio_data_encoding import AudioDataEncoding
5
+
6
+
7
+ class AudioDataParams(typing_extensions.TypedDict):
8
+ data: str
9
+ """
10
+ Base64 encoded audio data
11
+ """
12
+
13
+ sample_rate: int
14
+ """
15
+ Audio sample rate in Hz (16kHz preferred, 8kHz least preferred)
16
+ """
17
+
18
+ encoding: AudioDataEncoding
19
+ """
20
+ Audio encoding format
21
+ """
@@ -0,0 +1,8 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ from .audio_data import AudioDataParams
5
+
6
+
7
+ class AudioMessageParams(typing_extensions.TypedDict):
8
+ audio: AudioDataParams
@@ -0,0 +1,15 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+
5
+
6
+ class ErrorDataParams(typing_extensions.TypedDict):
7
+ error: str
8
+ """
9
+ Error message
10
+ """
11
+
12
+ code: str
13
+ """
14
+ Error code
15
+ """
@@ -0,0 +1,17 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+
5
+ import typing_extensions
6
+
7
+
8
+ class EventsDataParams(typing_extensions.TypedDict):
9
+ event_type: str
10
+ """
11
+ Type of event
12
+ """
13
+
14
+ timestamp: dt.datetime
15
+ """
16
+ Event timestamp
17
+ """
@@ -0,0 +1,10 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+ from ..types.speech_to_text_streaming_response_type import SpeechToTextStreamingResponseType
5
+ from .speech_to_text_streaming_response_data import SpeechToTextStreamingResponseDataParams
6
+
7
+
8
+ class SpeechToTextStreamingResponseParams(typing_extensions.TypedDict):
9
+ type: SpeechToTextStreamingResponseType
10
+ data: SpeechToTextStreamingResponseDataParams
@@ -0,0 +1,9 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ from .error_data import ErrorDataParams
6
+ from .events_data import EventsDataParams
7
+ from .transcription_data import TranscriptionDataParams
8
+
9
+ SpeechToTextStreamingResponseDataParams = typing.Union[TranscriptionDataParams, ErrorDataParams, EventsDataParams]
@@ -0,0 +1,35 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ import typing_extensions
6
+ from .transcription_metrics import TranscriptionMetricsParams
7
+
8
+
9
+ class TranscriptionDataParams(typing_extensions.TypedDict):
10
+ request_id: str
11
+ """
12
+ Unique identifier for the request
13
+ """
14
+
15
+ transcript: str
16
+ """
17
+ Transcript of the provided speech
18
+ """
19
+
20
+ timestamps: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
21
+ """
22
+ Timestamp information (if available)
23
+ """
24
+
25
+ diarized_transcript: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
26
+ """
27
+ Diarized transcript of the provided speech
28
+ """
29
+
30
+ language_code: typing_extensions.NotRequired[str]
31
+ """
32
+ BCP-47 code of detected language
33
+ """
34
+
35
+ metrics: TranscriptionMetricsParams
@@ -0,0 +1,15 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing_extensions
4
+
5
+
6
+ class TranscriptionMetricsParams(typing_extensions.TypedDict):
7
+ audio_duration: float
8
+ """
9
+ Duration of processed audio in seconds
10
+ """
11
+
12
+ processing_latency: float
13
+ """
14
+ Processing latency in seconds
15
+ """
@@ -0,0 +1,7 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ # isort: skip_file
4
+
5
+ from .types import SpeechToTextStreamingLanguageCode, SpeechToTextStreamingModel
6
+
7
+ __all__ = ["SpeechToTextStreamingLanguageCode", "SpeechToTextStreamingModel"]
@@ -0,0 +1,189 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+ from contextlib import asynccontextmanager, contextmanager
5
+
6
+ import httpx
7
+ import websockets
8
+ import websockets.sync.client as websockets_sync_client
9
+ from ..core.api_error import ApiError
10
+ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
11
+ from ..core.request_options import RequestOptions
12
+ from .raw_client import AsyncRawSpeechToTextStreamingClient, RawSpeechToTextStreamingClient
13
+ from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
14
+ from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
15
+ from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
16
+
17
+
18
+ class SpeechToTextStreamingClient:
19
+ def __init__(self, *, client_wrapper: SyncClientWrapper):
20
+ self._raw_client = RawSpeechToTextStreamingClient(client_wrapper=client_wrapper)
21
+
22
+ @property
23
+ def with_raw_response(self) -> RawSpeechToTextStreamingClient:
24
+ """
25
+ Retrieves a raw implementation of this client that returns raw responses.
26
+
27
+ Returns
28
+ -------
29
+ RawSpeechToTextStreamingClient
30
+ """
31
+ return self._raw_client
32
+
33
+ @contextmanager
34
+ def connect(
35
+ self,
36
+ *,
37
+ language_code: SpeechToTextStreamingLanguageCode,
38
+ model: SpeechToTextStreamingModel,
39
+ high_vad_sensitivity: typing.Optional[str] = None,
40
+ vad_signals: typing.Optional[str] = None,
41
+ api_subscription_key: typing.Optional[str] = None,
42
+ request_options: typing.Optional[RequestOptions] = None,
43
+ ) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
44
+ """
45
+ WebSocket channel for real-time speech to text streaming
46
+
47
+ Parameters
48
+ ----------
49
+ language_code : SpeechToTextStreamingLanguageCode
50
+ Language code for speech recognition
51
+
52
+ model : SpeechToTextStreamingModel
53
+ Speech to text model to use
54
+
55
+ high_vad_sensitivity : typing.Optional[str]
56
+ Enable high VAD (Voice Activity Detection) sensitivity
57
+
58
+ vad_signals : typing.Optional[str]
59
+ Enable VAD signals in response
60
+
61
+ api_subscription_key : typing.Optional[str]
62
+ API subscription key for authentication
63
+
64
+ request_options : typing.Optional[RequestOptions]
65
+ Request-specific configuration.
66
+
67
+ Returns
68
+ -------
69
+ SpeechToTextStreamingSocketClient
70
+ """
71
+ ws_url = self._raw_client._client_wrapper.get_environment().production + "/speech-to-text/ws"
72
+ query_params = httpx.QueryParams()
73
+ if language_code is not None:
74
+ query_params = query_params.add("language-code", language_code)
75
+ if model is not None:
76
+ query_params = query_params.add("model", model)
77
+ if high_vad_sensitivity is not None:
78
+ query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
79
+ if vad_signals is not None:
80
+ query_params = query_params.add("vad_signals", vad_signals)
81
+ ws_url = ws_url + f"?{query_params}"
82
+ headers = self._raw_client._client_wrapper.get_headers()
83
+ if api_subscription_key is not None:
84
+ headers["Api-Subscription-Key"] = str(api_subscription_key)
85
+ if request_options and "additional_headers" in request_options:
86
+ headers.update(request_options["additional_headers"])
87
+ try:
88
+ with websockets_sync_client.connect(ws_url, additional_headers=headers) as protocol:
89
+ yield SpeechToTextStreamingSocketClient(websocket=protocol)
90
+ except websockets.exceptions.InvalidStatusCode as exc:
91
+ status_code: int = exc.status_code
92
+ if status_code == 401:
93
+ raise ApiError(
94
+ status_code=status_code,
95
+ headers=dict(headers),
96
+ body="Websocket initialized with invalid credentials.",
97
+ )
98
+ raise ApiError(
99
+ status_code=status_code,
100
+ headers=dict(headers),
101
+ body="Unexpected error when initializing websocket connection.",
102
+ )
103
+
104
+
105
+ class AsyncSpeechToTextStreamingClient:
106
+ def __init__(self, *, client_wrapper: AsyncClientWrapper):
107
+ self._raw_client = AsyncRawSpeechToTextStreamingClient(client_wrapper=client_wrapper)
108
+
109
+ @property
110
+ def with_raw_response(self) -> AsyncRawSpeechToTextStreamingClient:
111
+ """
112
+ Retrieves a raw implementation of this client that returns raw responses.
113
+
114
+ Returns
115
+ -------
116
+ AsyncRawSpeechToTextStreamingClient
117
+ """
118
+ return self._raw_client
119
+
120
+ @asynccontextmanager
121
+ async def connect(
122
+ self,
123
+ *,
124
+ language_code: SpeechToTextStreamingLanguageCode,
125
+ model: SpeechToTextStreamingModel,
126
+ high_vad_sensitivity: typing.Optional[str] = None,
127
+ vad_signals: typing.Optional[str] = None,
128
+ api_subscription_key: typing.Optional[str] = None,
129
+ request_options: typing.Optional[RequestOptions] = None,
130
+ ) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
131
+ """
132
+ WebSocket channel for real-time speech to text streaming
133
+
134
+ Parameters
135
+ ----------
136
+ language_code : SpeechToTextStreamingLanguageCode
137
+ Language code for speech recognition
138
+
139
+ model : SpeechToTextStreamingModel
140
+ Speech to text model to use
141
+
142
+ high_vad_sensitivity : typing.Optional[str]
143
+ Enable high VAD (Voice Activity Detection) sensitivity
144
+
145
+ vad_signals : typing.Optional[str]
146
+ Enable VAD signals in response
147
+
148
+ api_subscription_key : typing.Optional[str]
149
+ API subscription key for authentication
150
+
151
+ request_options : typing.Optional[RequestOptions]
152
+ Request-specific configuration.
153
+
154
+ Returns
155
+ -------
156
+ AsyncSpeechToTextStreamingSocketClient
157
+ """
158
+ ws_url = self._raw_client._client_wrapper.get_environment().production + "/speech-to-text/ws"
159
+ query_params = httpx.QueryParams()
160
+ if language_code is not None:
161
+ query_params = query_params.add("language-code", language_code)
162
+ if model is not None:
163
+ query_params = query_params.add("model", model)
164
+ if high_vad_sensitivity is not None:
165
+ query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
166
+ if vad_signals is not None:
167
+ query_params = query_params.add("vad_signals", vad_signals)
168
+ ws_url = ws_url + f"?{query_params}"
169
+ headers = self._raw_client._client_wrapper.get_headers()
170
+ if api_subscription_key is not None:
171
+ headers["Api-Subscription-Key"] = str(api_subscription_key)
172
+ if request_options and "additional_headers" in request_options:
173
+ headers.update(request_options["additional_headers"])
174
+ try:
175
+ async with websockets.connect(ws_url, extra_headers=headers) as protocol:
176
+ yield AsyncSpeechToTextStreamingSocketClient(websocket=protocol)
177
+ except websockets.exceptions.InvalidStatusCode as exc:
178
+ status_code: int = exc.status_code
179
+ if status_code == 401:
180
+ raise ApiError(
181
+ status_code=status_code,
182
+ headers=dict(headers),
183
+ body="Websocket initialized with invalid credentials.",
184
+ )
185
+ raise ApiError(
186
+ status_code=status_code,
187
+ headers=dict(headers),
188
+ body="Unexpected error when initializing websocket connection.",
189
+ )