sarvamai 0.1.5a3__tar.gz → 0.1.5a5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/PKG-INFO +2 -1
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/pyproject.toml +2 -1
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/__init__.py +45 -3
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/chat/client.py +0 -2
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/chat/raw_client.py +0 -2
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/client.py +3 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/client_wrapper.py +2 -2
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/__init__.py +16 -0
- sarvamai-0.1.5a5/src/sarvamai/requests/audio_data.py +21 -0
- sarvamai-0.1.5a5/src/sarvamai/requests/audio_message.py +8 -0
- sarvamai-0.1.5a5/src/sarvamai/requests/error_data.py +15 -0
- sarvamai-0.1.5a5/src/sarvamai/requests/events_data.py +17 -0
- sarvamai-0.1.5a5/src/sarvamai/requests/speech_to_text_streaming_response.py +10 -0
- sarvamai-0.1.5a5/src/sarvamai/requests/speech_to_text_streaming_response_data.py +9 -0
- sarvamai-0.1.5a5/src/sarvamai/requests/transcription_data.py +35 -0
- sarvamai-0.1.5a5/src/sarvamai/requests/transcription_metrics.py +15 -0
- sarvamai-0.1.5a5/src/sarvamai/speech_to_text_streaming/__init__.py +7 -0
- sarvamai-0.1.5a5/src/sarvamai/speech_to_text_streaming/client.py +189 -0
- sarvamai-0.1.5a5/src/sarvamai/speech_to_text_streaming/raw_client.py +166 -0
- sarvamai-0.1.5a5/src/sarvamai/speech_to_text_streaming/socket_client.py +129 -0
- sarvamai-0.1.5a5/src/sarvamai/speech_to_text_streaming/types/__init__.py +8 -0
- sarvamai-0.1.5a3/src/sarvamai/types/translate_target_language.py → sarvamai-0.1.5a5/src/sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_language_code.py +1 -1
- sarvamai-0.1.5a5/src/sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/text/client.py +89 -47
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/text/raw_client.py +85 -43
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/__init__.py +24 -2
- sarvamai-0.1.5a5/src/sarvamai/types/audio_data.py +33 -0
- sarvamai-0.1.5a5/src/sarvamai/types/audio_data_encoding.py +5 -0
- sarvamai-0.1.5a5/src/sarvamai/types/audio_message.py +20 -0
- sarvamai-0.1.5a5/src/sarvamai/types/error_data.py +27 -0
- sarvamai-0.1.5a5/src/sarvamai/types/events_data.py +28 -0
- sarvamai-0.1.5a3/src/sarvamai/types/translate_model.py → sarvamai-0.1.5a5/src/sarvamai/types/format.py +1 -1
- sarvamai-0.1.5a5/src/sarvamai/types/speech_to_text_model.py +7 -0
- sarvamai-0.1.5a5/src/sarvamai/types/speech_to_text_streaming_response.py +22 -0
- sarvamai-0.1.5a5/src/sarvamai/types/speech_to_text_streaming_response_data.py +9 -0
- sarvamai-0.1.5a5/src/sarvamai/types/speech_to_text_streaming_response_type.py +5 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/speech_to_text_translate_model.py +1 -1
- sarvamai-0.1.5a5/src/sarvamai/types/transcription_data.py +45 -0
- sarvamai-0.1.5a5/src/sarvamai/types/transcription_metrics.py +27 -0
- sarvamai-0.1.5a5/src/sarvamai/types/translate_model.py +5 -0
- sarvamai-0.1.5a5/src/sarvamai/types/translate_source_language.py +33 -0
- sarvamai-0.1.5a5/src/sarvamai/types/translate_target_language.py +32 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/translatiterate_target_language.py +1 -1
- sarvamai-0.1.5a5/src/sarvamai/types/transliterate_mode.py +5 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/transliterate_source_language.py +1 -1
- sarvamai-0.1.5a3/src/sarvamai/types/speech_to_text_model.py +0 -5
- sarvamai-0.1.5a3/src/sarvamai/types/translate_postprocessing.py +0 -5
- sarvamai-0.1.5a3/src/sarvamai/types/translate_source_language.py +0 -10
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/README.md +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/chat/__init__.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/__init__.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/api_error.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/datetime_utils.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/events.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/file.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/force_multipart.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/http_client.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/http_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/jsonable_encoder.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/pydantic_utilities.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/query_encoder.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/remove_none_from_dict.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/request_options.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/core/serialization.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/environment.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/errors/__init__.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/errors/bad_request_error.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/errors/forbidden_error.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/errors/internal_server_error.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/errors/service_unavailable_error.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/errors/too_many_requests_error.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/errors/unprocessable_entity_error.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/play.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/py.typed +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/chat_completion_request_assistant_message.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/chat_completion_request_message.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/chat_completion_request_system_message.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/chat_completion_request_user_message.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/chat_completion_response_message.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/choice.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/completion_usage.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/create_chat_completion_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/diarized_entry.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/diarized_transcript.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/error_details.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/error_message.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/language_identification_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/speech_to_text_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/speech_to_text_translate_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/stop_configuration.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/text_to_speech_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/timestamps_model.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/translation_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/requests/transliteration_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/speech_to_text/__init__.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/speech_to_text/client.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/speech_to_text/raw_client.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/text/__init__.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/text_to_speech/__init__.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/text_to_speech/client.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/text_to_speech/raw_client.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/chat_completion_request_assistant_message.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/chat_completion_request_message.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/chat_completion_request_system_message.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/chat_completion_request_user_message.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/chat_completion_response_message.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/choice.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/completion_usage.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/create_chat_completion_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/diarized_entry.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/diarized_transcript.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/error_code.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/error_details.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/error_message.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/finish_reason.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/language_identification_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/numerals_format.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/reasoning_effort.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/role.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/sarvam_model_ids.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/speech_sample_rate.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/speech_to_text_language.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/speech_to_text_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/speech_to_text_translate_language.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/speech_to_text_translate_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/spoken_form_numerals_format.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/stop_configuration.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/text_to_speech_language.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/text_to_speech_model.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/text_to_speech_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/text_to_speech_speaker.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/timestamps_model.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/translate_mode.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/translate_speaker_gender.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/translation_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/types/transliteration_response.py +0 -0
- {sarvamai-0.1.5a3 → sarvamai-0.1.5a5}/src/sarvamai/version.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sarvamai
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5a5
|
|
4
4
|
Summary:
|
|
5
5
|
Requires-Python: >=3.8,<4.0
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -22,6 +22,7 @@ Requires-Dist: httpx (>=0.21.2)
|
|
|
22
22
|
Requires-Dist: pydantic (>=1.9.2)
|
|
23
23
|
Requires-Dist: pydantic-core (>=2.18.2,<3.0.0)
|
|
24
24
|
Requires-Dist: typing_extensions (>=4.0.0)
|
|
25
|
+
Requires-Dist: websockets (==12.0)
|
|
25
26
|
Description-Content-Type: text/markdown
|
|
26
27
|
|
|
27
28
|
|
|
@@ -3,7 +3,7 @@ name = "sarvamai"
|
|
|
3
3
|
|
|
4
4
|
[tool.poetry]
|
|
5
5
|
name = "sarvamai"
|
|
6
|
-
version = "0.1.
|
|
6
|
+
version = "0.1.5a5"
|
|
7
7
|
description = ""
|
|
8
8
|
readme = "README.md"
|
|
9
9
|
authors = []
|
|
@@ -39,6 +39,7 @@ httpx = ">=0.21.2"
|
|
|
39
39
|
pydantic = ">= 1.9.2"
|
|
40
40
|
pydantic-core = "^2.18.2"
|
|
41
41
|
typing_extensions = ">= 4.0.0"
|
|
42
|
+
websockets = "12.0"
|
|
42
43
|
|
|
43
44
|
[tool.poetry.group.dev.dependencies]
|
|
44
45
|
mypy = "==1.13.0"
|
|
@@ -3,6 +3,9 @@
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
5
|
from .types import (
|
|
6
|
+
AudioData,
|
|
7
|
+
AudioDataEncoding,
|
|
8
|
+
AudioMessage,
|
|
6
9
|
ChatCompletionRequestAssistantMessage,
|
|
7
10
|
ChatCompletionRequestMessage,
|
|
8
11
|
ChatCompletionRequestMessage_Assistant,
|
|
@@ -17,9 +20,12 @@ from .types import (
|
|
|
17
20
|
DiarizedEntry,
|
|
18
21
|
DiarizedTranscript,
|
|
19
22
|
ErrorCode,
|
|
23
|
+
ErrorData,
|
|
20
24
|
ErrorDetails,
|
|
21
25
|
ErrorMessage,
|
|
26
|
+
EventsData,
|
|
22
27
|
FinishReason,
|
|
28
|
+
Format,
|
|
23
29
|
LanguageIdentificationResponse,
|
|
24
30
|
NumeralsFormat,
|
|
25
31
|
ReasoningEffort,
|
|
@@ -29,6 +35,9 @@ from .types import (
|
|
|
29
35
|
SpeechToTextLanguage,
|
|
30
36
|
SpeechToTextModel,
|
|
31
37
|
SpeechToTextResponse,
|
|
38
|
+
SpeechToTextStreamingResponse,
|
|
39
|
+
SpeechToTextStreamingResponseData,
|
|
40
|
+
SpeechToTextStreamingResponseType,
|
|
32
41
|
SpeechToTextTranslateLanguage,
|
|
33
42
|
SpeechToTextTranslateModel,
|
|
34
43
|
SpeechToTextTranslateResponse,
|
|
@@ -39,14 +48,16 @@ from .types import (
|
|
|
39
48
|
TextToSpeechResponse,
|
|
40
49
|
TextToSpeechSpeaker,
|
|
41
50
|
TimestampsModel,
|
|
51
|
+
TranscriptionData,
|
|
52
|
+
TranscriptionMetrics,
|
|
42
53
|
TranslateMode,
|
|
43
54
|
TranslateModel,
|
|
44
|
-
TranslatePostprocessing,
|
|
45
55
|
TranslateSourceLanguage,
|
|
46
56
|
TranslateSpeakerGender,
|
|
47
57
|
TranslateTargetLanguage,
|
|
48
58
|
TranslationResponse,
|
|
49
59
|
TranslatiterateTargetLanguage,
|
|
60
|
+
TransliterateMode,
|
|
50
61
|
TransliterateSourceLanguage,
|
|
51
62
|
TransliterationResponse,
|
|
52
63
|
)
|
|
@@ -58,10 +69,12 @@ from .errors import (
|
|
|
58
69
|
TooManyRequestsError,
|
|
59
70
|
UnprocessableEntityError,
|
|
60
71
|
)
|
|
61
|
-
from . import chat, speech_to_text, text, text_to_speech
|
|
72
|
+
from . import chat, speech_to_text, speech_to_text_streaming, text, text_to_speech
|
|
62
73
|
from .client import AsyncSarvamAI, SarvamAI
|
|
63
74
|
from .environment import SarvamAIEnvironment
|
|
64
75
|
from .requests import (
|
|
76
|
+
AudioDataParams,
|
|
77
|
+
AudioMessageParams,
|
|
65
78
|
ChatCompletionRequestAssistantMessageParams,
|
|
66
79
|
ChatCompletionRequestMessageParams,
|
|
67
80
|
ChatCompletionRequestMessage_AssistantParams,
|
|
@@ -75,21 +88,33 @@ from .requests import (
|
|
|
75
88
|
CreateChatCompletionResponseParams,
|
|
76
89
|
DiarizedEntryParams,
|
|
77
90
|
DiarizedTranscriptParams,
|
|
91
|
+
ErrorDataParams,
|
|
78
92
|
ErrorDetailsParams,
|
|
79
93
|
ErrorMessageParams,
|
|
94
|
+
EventsDataParams,
|
|
80
95
|
LanguageIdentificationResponseParams,
|
|
81
96
|
SpeechToTextResponseParams,
|
|
97
|
+
SpeechToTextStreamingResponseDataParams,
|
|
98
|
+
SpeechToTextStreamingResponseParams,
|
|
82
99
|
SpeechToTextTranslateResponseParams,
|
|
83
100
|
StopConfigurationParams,
|
|
84
101
|
TextToSpeechResponseParams,
|
|
85
102
|
TimestampsModelParams,
|
|
103
|
+
TranscriptionDataParams,
|
|
104
|
+
TranscriptionMetricsParams,
|
|
86
105
|
TranslationResponseParams,
|
|
87
106
|
TransliterationResponseParams,
|
|
88
107
|
)
|
|
108
|
+
from .speech_to_text_streaming import SpeechToTextStreamingLanguageCode, SpeechToTextStreamingModel
|
|
89
109
|
from .version import __version__
|
|
90
110
|
|
|
91
111
|
__all__ = [
|
|
92
112
|
"AsyncSarvamAI",
|
|
113
|
+
"AudioData",
|
|
114
|
+
"AudioDataEncoding",
|
|
115
|
+
"AudioDataParams",
|
|
116
|
+
"AudioMessage",
|
|
117
|
+
"AudioMessageParams",
|
|
93
118
|
"BadRequestError",
|
|
94
119
|
"ChatCompletionRequestAssistantMessage",
|
|
95
120
|
"ChatCompletionRequestAssistantMessageParams",
|
|
@@ -118,12 +143,17 @@ __all__ = [
|
|
|
118
143
|
"DiarizedTranscript",
|
|
119
144
|
"DiarizedTranscriptParams",
|
|
120
145
|
"ErrorCode",
|
|
146
|
+
"ErrorData",
|
|
147
|
+
"ErrorDataParams",
|
|
121
148
|
"ErrorDetails",
|
|
122
149
|
"ErrorDetailsParams",
|
|
123
150
|
"ErrorMessage",
|
|
124
151
|
"ErrorMessageParams",
|
|
152
|
+
"EventsData",
|
|
153
|
+
"EventsDataParams",
|
|
125
154
|
"FinishReason",
|
|
126
155
|
"ForbiddenError",
|
|
156
|
+
"Format",
|
|
127
157
|
"InternalServerError",
|
|
128
158
|
"LanguageIdentificationResponse",
|
|
129
159
|
"LanguageIdentificationResponseParams",
|
|
@@ -139,6 +169,13 @@ __all__ = [
|
|
|
139
169
|
"SpeechToTextModel",
|
|
140
170
|
"SpeechToTextResponse",
|
|
141
171
|
"SpeechToTextResponseParams",
|
|
172
|
+
"SpeechToTextStreamingLanguageCode",
|
|
173
|
+
"SpeechToTextStreamingModel",
|
|
174
|
+
"SpeechToTextStreamingResponse",
|
|
175
|
+
"SpeechToTextStreamingResponseData",
|
|
176
|
+
"SpeechToTextStreamingResponseDataParams",
|
|
177
|
+
"SpeechToTextStreamingResponseParams",
|
|
178
|
+
"SpeechToTextStreamingResponseType",
|
|
142
179
|
"SpeechToTextTranslateLanguage",
|
|
143
180
|
"SpeechToTextTranslateModel",
|
|
144
181
|
"SpeechToTextTranslateResponse",
|
|
@@ -154,15 +191,19 @@ __all__ = [
|
|
|
154
191
|
"TimestampsModel",
|
|
155
192
|
"TimestampsModelParams",
|
|
156
193
|
"TooManyRequestsError",
|
|
194
|
+
"TranscriptionData",
|
|
195
|
+
"TranscriptionDataParams",
|
|
196
|
+
"TranscriptionMetrics",
|
|
197
|
+
"TranscriptionMetricsParams",
|
|
157
198
|
"TranslateMode",
|
|
158
199
|
"TranslateModel",
|
|
159
|
-
"TranslatePostprocessing",
|
|
160
200
|
"TranslateSourceLanguage",
|
|
161
201
|
"TranslateSpeakerGender",
|
|
162
202
|
"TranslateTargetLanguage",
|
|
163
203
|
"TranslationResponse",
|
|
164
204
|
"TranslationResponseParams",
|
|
165
205
|
"TranslatiterateTargetLanguage",
|
|
206
|
+
"TransliterateMode",
|
|
166
207
|
"TransliterateSourceLanguage",
|
|
167
208
|
"TransliterationResponse",
|
|
168
209
|
"TransliterationResponseParams",
|
|
@@ -170,6 +211,7 @@ __all__ = [
|
|
|
170
211
|
"__version__",
|
|
171
212
|
"chat",
|
|
172
213
|
"speech_to_text",
|
|
214
|
+
"speech_to_text_streaming",
|
|
173
215
|
"text",
|
|
174
216
|
"text_to_speech",
|
|
175
217
|
]
|
|
@@ -83,7 +83,6 @@ class ChatClient:
|
|
|
83
83
|
seed : typing.Optional[int]
|
|
84
84
|
This feature is in Beta.
|
|
85
85
|
If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
|
|
86
|
-
Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
|
|
87
86
|
|
|
88
87
|
frequency_penalty : typing.Optional[float]
|
|
89
88
|
Number between -2.0 and 2.0. Positive values penalize new tokens based on
|
|
@@ -204,7 +203,6 @@ class AsyncChatClient:
|
|
|
204
203
|
seed : typing.Optional[int]
|
|
205
204
|
This feature is in Beta.
|
|
206
205
|
If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
|
|
207
|
-
Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
|
|
208
206
|
|
|
209
207
|
frequency_penalty : typing.Optional[float]
|
|
210
208
|
Number between -2.0 and 2.0. Positive values penalize new tokens based on
|
|
@@ -81,7 +81,6 @@ class RawChatClient:
|
|
|
81
81
|
seed : typing.Optional[int]
|
|
82
82
|
This feature is in Beta.
|
|
83
83
|
If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
|
|
84
|
-
Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
|
|
85
84
|
|
|
86
85
|
frequency_penalty : typing.Optional[float]
|
|
87
86
|
Number between -2.0 and 2.0. Positive values penalize new tokens based on
|
|
@@ -262,7 +261,6 @@ class AsyncRawChatClient:
|
|
|
262
261
|
seed : typing.Optional[int]
|
|
263
262
|
This feature is in Beta.
|
|
264
263
|
If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
|
|
265
|
-
Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
|
|
266
264
|
|
|
267
265
|
frequency_penalty : typing.Optional[float]
|
|
268
266
|
Number between -2.0 and 2.0. Positive values penalize new tokens based on
|
|
@@ -9,6 +9,7 @@ from .core.api_error import ApiError
|
|
|
9
9
|
from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
10
10
|
from .environment import SarvamAIEnvironment
|
|
11
11
|
from .speech_to_text.client import AsyncSpeechToTextClient, SpeechToTextClient
|
|
12
|
+
from .speech_to_text_streaming.client import AsyncSpeechToTextStreamingClient, SpeechToTextStreamingClient
|
|
12
13
|
from .text.client import AsyncTextClient, TextClient
|
|
13
14
|
from .text_to_speech.client import AsyncTextToSpeechClient, TextToSpeechClient
|
|
14
15
|
|
|
@@ -77,6 +78,7 @@ class SarvamAI:
|
|
|
77
78
|
self.speech_to_text = SpeechToTextClient(client_wrapper=self._client_wrapper)
|
|
78
79
|
self.text_to_speech = TextToSpeechClient(client_wrapper=self._client_wrapper)
|
|
79
80
|
self.chat = ChatClient(client_wrapper=self._client_wrapper)
|
|
81
|
+
self.speech_to_text_streaming = SpeechToTextStreamingClient(client_wrapper=self._client_wrapper)
|
|
80
82
|
|
|
81
83
|
|
|
82
84
|
class AsyncSarvamAI:
|
|
@@ -143,3 +145,4 @@ class AsyncSarvamAI:
|
|
|
143
145
|
self.speech_to_text = AsyncSpeechToTextClient(client_wrapper=self._client_wrapper)
|
|
144
146
|
self.text_to_speech = AsyncTextToSpeechClient(client_wrapper=self._client_wrapper)
|
|
145
147
|
self.chat = AsyncChatClient(client_wrapper=self._client_wrapper)
|
|
148
|
+
self.speech_to_text_streaming = AsyncSpeechToTextStreamingClient(client_wrapper=self._client_wrapper)
|
|
@@ -17,10 +17,10 @@ class BaseClientWrapper:
|
|
|
17
17
|
|
|
18
18
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
19
19
|
headers: typing.Dict[str, str] = {
|
|
20
|
-
"User-Agent": "sarvamai/0.1.
|
|
20
|
+
"User-Agent": "sarvamai/0.1.5a5",
|
|
21
21
|
"X-Fern-Language": "Python",
|
|
22
22
|
"X-Fern-SDK-Name": "sarvamai",
|
|
23
|
-
"X-Fern-SDK-Version": "0.1.
|
|
23
|
+
"X-Fern-SDK-Version": "0.1.5a5",
|
|
24
24
|
}
|
|
25
25
|
headers["api-subscription-key"] = self.api_subscription_key
|
|
26
26
|
return headers
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
+
from .audio_data import AudioDataParams
|
|
6
|
+
from .audio_message import AudioMessageParams
|
|
5
7
|
from .chat_completion_request_assistant_message import ChatCompletionRequestAssistantMessageParams
|
|
6
8
|
from .chat_completion_request_message import (
|
|
7
9
|
ChatCompletionRequestMessageParams,
|
|
@@ -17,18 +19,26 @@ from .completion_usage import CompletionUsageParams
|
|
|
17
19
|
from .create_chat_completion_response import CreateChatCompletionResponseParams
|
|
18
20
|
from .diarized_entry import DiarizedEntryParams
|
|
19
21
|
from .diarized_transcript import DiarizedTranscriptParams
|
|
22
|
+
from .error_data import ErrorDataParams
|
|
20
23
|
from .error_details import ErrorDetailsParams
|
|
21
24
|
from .error_message import ErrorMessageParams
|
|
25
|
+
from .events_data import EventsDataParams
|
|
22
26
|
from .language_identification_response import LanguageIdentificationResponseParams
|
|
23
27
|
from .speech_to_text_response import SpeechToTextResponseParams
|
|
28
|
+
from .speech_to_text_streaming_response import SpeechToTextStreamingResponseParams
|
|
29
|
+
from .speech_to_text_streaming_response_data import SpeechToTextStreamingResponseDataParams
|
|
24
30
|
from .speech_to_text_translate_response import SpeechToTextTranslateResponseParams
|
|
25
31
|
from .stop_configuration import StopConfigurationParams
|
|
26
32
|
from .text_to_speech_response import TextToSpeechResponseParams
|
|
27
33
|
from .timestamps_model import TimestampsModelParams
|
|
34
|
+
from .transcription_data import TranscriptionDataParams
|
|
35
|
+
from .transcription_metrics import TranscriptionMetricsParams
|
|
28
36
|
from .translation_response import TranslationResponseParams
|
|
29
37
|
from .transliteration_response import TransliterationResponseParams
|
|
30
38
|
|
|
31
39
|
__all__ = [
|
|
40
|
+
"AudioDataParams",
|
|
41
|
+
"AudioMessageParams",
|
|
32
42
|
"ChatCompletionRequestAssistantMessageParams",
|
|
33
43
|
"ChatCompletionRequestMessageParams",
|
|
34
44
|
"ChatCompletionRequestMessage_AssistantParams",
|
|
@@ -42,14 +52,20 @@ __all__ = [
|
|
|
42
52
|
"CreateChatCompletionResponseParams",
|
|
43
53
|
"DiarizedEntryParams",
|
|
44
54
|
"DiarizedTranscriptParams",
|
|
55
|
+
"ErrorDataParams",
|
|
45
56
|
"ErrorDetailsParams",
|
|
46
57
|
"ErrorMessageParams",
|
|
58
|
+
"EventsDataParams",
|
|
47
59
|
"LanguageIdentificationResponseParams",
|
|
48
60
|
"SpeechToTextResponseParams",
|
|
61
|
+
"SpeechToTextStreamingResponseDataParams",
|
|
62
|
+
"SpeechToTextStreamingResponseParams",
|
|
49
63
|
"SpeechToTextTranslateResponseParams",
|
|
50
64
|
"StopConfigurationParams",
|
|
51
65
|
"TextToSpeechResponseParams",
|
|
52
66
|
"TimestampsModelParams",
|
|
67
|
+
"TranscriptionDataParams",
|
|
68
|
+
"TranscriptionMetricsParams",
|
|
53
69
|
"TranslationResponseParams",
|
|
54
70
|
"TransliterationResponseParams",
|
|
55
71
|
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing_extensions
|
|
4
|
+
from ..types.audio_data_encoding import AudioDataEncoding
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AudioDataParams(typing_extensions.TypedDict):
|
|
8
|
+
data: str
|
|
9
|
+
"""
|
|
10
|
+
Base64 encoded audio data
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
sample_rate: int
|
|
14
|
+
"""
|
|
15
|
+
Audio sample rate in Hz (16kHz preferred, 8kHz least preferred)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
encoding: AudioDataEncoding
|
|
19
|
+
"""
|
|
20
|
+
Audio encoding format
|
|
21
|
+
"""
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
|
|
5
|
+
import typing_extensions
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class EventsDataParams(typing_extensions.TypedDict):
|
|
9
|
+
event_type: str
|
|
10
|
+
"""
|
|
11
|
+
Type of event
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
timestamp: dt.datetime
|
|
15
|
+
"""
|
|
16
|
+
Event timestamp
|
|
17
|
+
"""
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing_extensions
|
|
4
|
+
from ..types.speech_to_text_streaming_response_type import SpeechToTextStreamingResponseType
|
|
5
|
+
from .speech_to_text_streaming_response_data import SpeechToTextStreamingResponseDataParams
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SpeechToTextStreamingResponseParams(typing_extensions.TypedDict):
|
|
9
|
+
type: SpeechToTextStreamingResponseType
|
|
10
|
+
data: SpeechToTextStreamingResponseDataParams
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
from .error_data import ErrorDataParams
|
|
6
|
+
from .events_data import EventsDataParams
|
|
7
|
+
from .transcription_data import TranscriptionDataParams
|
|
8
|
+
|
|
9
|
+
SpeechToTextStreamingResponseDataParams = typing.Union[TranscriptionDataParams, ErrorDataParams, EventsDataParams]
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import typing_extensions
|
|
6
|
+
from .transcription_metrics import TranscriptionMetricsParams
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TranscriptionDataParams(typing_extensions.TypedDict):
|
|
10
|
+
request_id: str
|
|
11
|
+
"""
|
|
12
|
+
Unique identifier for the request
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
transcript: str
|
|
16
|
+
"""
|
|
17
|
+
Transcript of the provided speech
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
timestamps: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
|
|
21
|
+
"""
|
|
22
|
+
Timestamp information (if available)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
diarized_transcript: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
|
|
26
|
+
"""
|
|
27
|
+
Diarized transcript of the provided speech
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
language_code: typing_extensions.NotRequired[str]
|
|
31
|
+
"""
|
|
32
|
+
BCP-47 code of detected language
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
metrics: TranscriptionMetricsParams
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing_extensions
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TranscriptionMetricsParams(typing_extensions.TypedDict):
|
|
7
|
+
audio_duration: float
|
|
8
|
+
"""
|
|
9
|
+
Duration of processed audio in seconds
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
processing_latency: float
|
|
13
|
+
"""
|
|
14
|
+
Processing latency in seconds
|
|
15
|
+
"""
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
from contextlib import asynccontextmanager, contextmanager
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
import websockets
|
|
8
|
+
import websockets.sync.client as websockets_sync_client
|
|
9
|
+
from ..core.api_error import ApiError
|
|
10
|
+
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
11
|
+
from ..core.request_options import RequestOptions
|
|
12
|
+
from .raw_client import AsyncRawSpeechToTextStreamingClient, RawSpeechToTextStreamingClient
|
|
13
|
+
from .socket_client import AsyncSpeechToTextStreamingSocketClient, SpeechToTextStreamingSocketClient
|
|
14
|
+
from .types.speech_to_text_streaming_language_code import SpeechToTextStreamingLanguageCode
|
|
15
|
+
from .types.speech_to_text_streaming_model import SpeechToTextStreamingModel
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class SpeechToTextStreamingClient:
|
|
19
|
+
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
|
20
|
+
self._raw_client = RawSpeechToTextStreamingClient(client_wrapper=client_wrapper)
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def with_raw_response(self) -> RawSpeechToTextStreamingClient:
|
|
24
|
+
"""
|
|
25
|
+
Retrieves a raw implementation of this client that returns raw responses.
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
RawSpeechToTextStreamingClient
|
|
30
|
+
"""
|
|
31
|
+
return self._raw_client
|
|
32
|
+
|
|
33
|
+
@contextmanager
|
|
34
|
+
def connect(
|
|
35
|
+
self,
|
|
36
|
+
*,
|
|
37
|
+
language_code: SpeechToTextStreamingLanguageCode,
|
|
38
|
+
model: SpeechToTextStreamingModel,
|
|
39
|
+
high_vad_sensitivity: typing.Optional[str] = None,
|
|
40
|
+
vad_signals: typing.Optional[str] = None,
|
|
41
|
+
api_subscription_key: typing.Optional[str] = None,
|
|
42
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
43
|
+
) -> typing.Iterator[SpeechToTextStreamingSocketClient]:
|
|
44
|
+
"""
|
|
45
|
+
WebSocket channel for real-time speech to text streaming
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
language_code : SpeechToTextStreamingLanguageCode
|
|
50
|
+
Language code for speech recognition
|
|
51
|
+
|
|
52
|
+
model : SpeechToTextStreamingModel
|
|
53
|
+
Speech to text model to use
|
|
54
|
+
|
|
55
|
+
high_vad_sensitivity : typing.Optional[str]
|
|
56
|
+
Enable high VAD (Voice Activity Detection) sensitivity
|
|
57
|
+
|
|
58
|
+
vad_signals : typing.Optional[str]
|
|
59
|
+
Enable VAD signals in response
|
|
60
|
+
|
|
61
|
+
api_subscription_key : typing.Optional[str]
|
|
62
|
+
API subscription key for authentication
|
|
63
|
+
|
|
64
|
+
request_options : typing.Optional[RequestOptions]
|
|
65
|
+
Request-specific configuration.
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
SpeechToTextStreamingSocketClient
|
|
70
|
+
"""
|
|
71
|
+
ws_url = self._raw_client._client_wrapper.get_environment().production + "/speech-to-text/ws"
|
|
72
|
+
query_params = httpx.QueryParams()
|
|
73
|
+
if language_code is not None:
|
|
74
|
+
query_params = query_params.add("language-code", language_code)
|
|
75
|
+
if model is not None:
|
|
76
|
+
query_params = query_params.add("model", model)
|
|
77
|
+
if high_vad_sensitivity is not None:
|
|
78
|
+
query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
|
|
79
|
+
if vad_signals is not None:
|
|
80
|
+
query_params = query_params.add("vad_signals", vad_signals)
|
|
81
|
+
ws_url = ws_url + f"?{query_params}"
|
|
82
|
+
headers = self._raw_client._client_wrapper.get_headers()
|
|
83
|
+
if api_subscription_key is not None:
|
|
84
|
+
headers["Api-Subscription-Key"] = str(api_subscription_key)
|
|
85
|
+
if request_options and "additional_headers" in request_options:
|
|
86
|
+
headers.update(request_options["additional_headers"])
|
|
87
|
+
try:
|
|
88
|
+
with websockets_sync_client.connect(ws_url, additional_headers=headers) as protocol:
|
|
89
|
+
yield SpeechToTextStreamingSocketClient(websocket=protocol)
|
|
90
|
+
except websockets.exceptions.InvalidStatusCode as exc:
|
|
91
|
+
status_code: int = exc.status_code
|
|
92
|
+
if status_code == 401:
|
|
93
|
+
raise ApiError(
|
|
94
|
+
status_code=status_code,
|
|
95
|
+
headers=dict(headers),
|
|
96
|
+
body="Websocket initialized with invalid credentials.",
|
|
97
|
+
)
|
|
98
|
+
raise ApiError(
|
|
99
|
+
status_code=status_code,
|
|
100
|
+
headers=dict(headers),
|
|
101
|
+
body="Unexpected error when initializing websocket connection.",
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class AsyncSpeechToTextStreamingClient:
|
|
106
|
+
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
107
|
+
self._raw_client = AsyncRawSpeechToTextStreamingClient(client_wrapper=client_wrapper)
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def with_raw_response(self) -> AsyncRawSpeechToTextStreamingClient:
|
|
111
|
+
"""
|
|
112
|
+
Retrieves a raw implementation of this client that returns raw responses.
|
|
113
|
+
|
|
114
|
+
Returns
|
|
115
|
+
-------
|
|
116
|
+
AsyncRawSpeechToTextStreamingClient
|
|
117
|
+
"""
|
|
118
|
+
return self._raw_client
|
|
119
|
+
|
|
120
|
+
@asynccontextmanager
|
|
121
|
+
async def connect(
|
|
122
|
+
self,
|
|
123
|
+
*,
|
|
124
|
+
language_code: SpeechToTextStreamingLanguageCode,
|
|
125
|
+
model: SpeechToTextStreamingModel,
|
|
126
|
+
high_vad_sensitivity: typing.Optional[str] = None,
|
|
127
|
+
vad_signals: typing.Optional[str] = None,
|
|
128
|
+
api_subscription_key: typing.Optional[str] = None,
|
|
129
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
130
|
+
) -> typing.AsyncIterator[AsyncSpeechToTextStreamingSocketClient]:
|
|
131
|
+
"""
|
|
132
|
+
WebSocket channel for real-time speech to text streaming
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
language_code : SpeechToTextStreamingLanguageCode
|
|
137
|
+
Language code for speech recognition
|
|
138
|
+
|
|
139
|
+
model : SpeechToTextStreamingModel
|
|
140
|
+
Speech to text model to use
|
|
141
|
+
|
|
142
|
+
high_vad_sensitivity : typing.Optional[str]
|
|
143
|
+
Enable high VAD (Voice Activity Detection) sensitivity
|
|
144
|
+
|
|
145
|
+
vad_signals : typing.Optional[str]
|
|
146
|
+
Enable VAD signals in response
|
|
147
|
+
|
|
148
|
+
api_subscription_key : typing.Optional[str]
|
|
149
|
+
API subscription key for authentication
|
|
150
|
+
|
|
151
|
+
request_options : typing.Optional[RequestOptions]
|
|
152
|
+
Request-specific configuration.
|
|
153
|
+
|
|
154
|
+
Returns
|
|
155
|
+
-------
|
|
156
|
+
AsyncSpeechToTextStreamingSocketClient
|
|
157
|
+
"""
|
|
158
|
+
ws_url = self._raw_client._client_wrapper.get_environment().production + "/speech-to-text/ws"
|
|
159
|
+
query_params = httpx.QueryParams()
|
|
160
|
+
if language_code is not None:
|
|
161
|
+
query_params = query_params.add("language-code", language_code)
|
|
162
|
+
if model is not None:
|
|
163
|
+
query_params = query_params.add("model", model)
|
|
164
|
+
if high_vad_sensitivity is not None:
|
|
165
|
+
query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
|
|
166
|
+
if vad_signals is not None:
|
|
167
|
+
query_params = query_params.add("vad_signals", vad_signals)
|
|
168
|
+
ws_url = ws_url + f"?{query_params}"
|
|
169
|
+
headers = self._raw_client._client_wrapper.get_headers()
|
|
170
|
+
if api_subscription_key is not None:
|
|
171
|
+
headers["Api-Subscription-Key"] = str(api_subscription_key)
|
|
172
|
+
if request_options and "additional_headers" in request_options:
|
|
173
|
+
headers.update(request_options["additional_headers"])
|
|
174
|
+
try:
|
|
175
|
+
async with websockets.connect(ws_url, extra_headers=headers) as protocol:
|
|
176
|
+
yield AsyncSpeechToTextStreamingSocketClient(websocket=protocol)
|
|
177
|
+
except websockets.exceptions.InvalidStatusCode as exc:
|
|
178
|
+
status_code: int = exc.status_code
|
|
179
|
+
if status_code == 401:
|
|
180
|
+
raise ApiError(
|
|
181
|
+
status_code=status_code,
|
|
182
|
+
headers=dict(headers),
|
|
183
|
+
body="Websocket initialized with invalid credentials.",
|
|
184
|
+
)
|
|
185
|
+
raise ApiError(
|
|
186
|
+
status_code=status_code,
|
|
187
|
+
headers=dict(headers),
|
|
188
|
+
body="Unexpected error when initializing websocket connection.",
|
|
189
|
+
)
|