sarvamai 0.1.23a3__py3-none-any.whl → 0.1.23a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +203 -405
- sarvamai/chat/raw_client.py +20 -20
- sarvamai/client.py +34 -186
- sarvamai/core/__init__.py +21 -76
- sarvamai/core/client_wrapper.py +3 -19
- sarvamai/core/force_multipart.py +2 -4
- sarvamai/core/http_client.py +97 -217
- sarvamai/core/http_response.py +1 -1
- sarvamai/core/jsonable_encoder.py +0 -8
- sarvamai/core/pydantic_utilities.py +4 -110
- sarvamai/errors/__init__.py +6 -40
- sarvamai/errors/bad_request_error.py +1 -1
- sarvamai/errors/forbidden_error.py +1 -1
- sarvamai/errors/internal_server_error.py +1 -1
- sarvamai/errors/service_unavailable_error.py +1 -1
- sarvamai/errors/too_many_requests_error.py +1 -1
- sarvamai/errors/unprocessable_entity_error.py +1 -1
- sarvamai/requests/__init__.py +62 -150
- sarvamai/requests/configure_connection.py +4 -0
- sarvamai/requests/configure_connection_data.py +40 -11
- sarvamai/requests/error_response_data.py +1 -1
- sarvamai/requests/file_signed_url_details.py +1 -1
- sarvamai/requests/speech_to_text_job_parameters.py +43 -2
- sarvamai/requests/speech_to_text_transcription_data.py +2 -2
- sarvamai/requests/speech_to_text_translate_job_parameters.py +4 -1
- sarvamai/speech_to_text/client.py +95 -10
- sarvamai/speech_to_text/raw_client.py +147 -64
- sarvamai/speech_to_text_job/client.py +60 -15
- sarvamai/speech_to_text_job/raw_client.py +120 -120
- sarvamai/speech_to_text_streaming/__init__.py +10 -38
- sarvamai/speech_to_text_streaming/client.py +90 -8
- sarvamai/speech_to_text_streaming/raw_client.py +90 -8
- sarvamai/speech_to_text_streaming/types/__init__.py +8 -36
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +7 -0
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
- sarvamai/speech_to_text_translate_job/raw_client.py +120 -120
- sarvamai/speech_to_text_translate_streaming/__init__.py +5 -36
- sarvamai/speech_to_text_translate_streaming/client.py +8 -2
- sarvamai/speech_to_text_translate_streaming/raw_client.py +8 -2
- sarvamai/speech_to_text_translate_streaming/types/__init__.py +3 -36
- sarvamai/text/raw_client.py +60 -60
- sarvamai/text_to_speech/client.py +100 -16
- sarvamai/text_to_speech/raw_client.py +120 -36
- sarvamai/text_to_speech_streaming/__init__.py +2 -29
- sarvamai/text_to_speech_streaming/client.py +19 -6
- sarvamai/text_to_speech_streaming/raw_client.py +19 -6
- sarvamai/text_to_speech_streaming/types/__init__.py +3 -31
- sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +5 -0
- sarvamai/types/__init__.py +102 -222
- sarvamai/types/chat_completion_request_message.py +2 -6
- sarvamai/types/configure_connection.py +4 -0
- sarvamai/types/configure_connection_data.py +40 -11
- sarvamai/types/configure_connection_data_model.py +5 -0
- sarvamai/types/configure_connection_data_speaker.py +35 -1
- sarvamai/types/error_response_data.py +1 -1
- sarvamai/types/file_signed_url_details.py +1 -1
- sarvamai/types/mode.py +5 -0
- sarvamai/types/speech_to_text_job_parameters.py +43 -2
- sarvamai/types/speech_to_text_model.py +1 -1
- sarvamai/types/speech_to_text_transcription_data.py +2 -2
- sarvamai/types/speech_to_text_translate_job_parameters.py +4 -1
- sarvamai/types/text_to_speech_model.py +1 -1
- sarvamai/types/text_to_speech_speaker.py +35 -1
- {sarvamai-0.1.23a3.dist-info → sarvamai-0.1.23a5.dist-info}/METADATA +1 -2
- {sarvamai-0.1.23a3.dist-info → sarvamai-0.1.23a5.dist-info}/RECORD +66 -66
- sarvamai/core/http_sse/__init__.py +0 -42
- sarvamai/core/http_sse/_api.py +0 -112
- sarvamai/core/http_sse/_decoders.py +0 -61
- sarvamai/core/http_sse/_exceptions.py +0 -7
- sarvamai/core/http_sse/_models.py +0 -17
- {sarvamai-0.1.23a3.dist-info → sarvamai-0.1.23a5.dist-info}/WHEEL +0 -0
sarvamai/errors/__init__.py
CHANGED
|
@@ -2,46 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
-
import
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
from .internal_server_error import InternalServerError
|
|
12
|
-
from .service_unavailable_error import ServiceUnavailableError
|
|
13
|
-
from .too_many_requests_error import TooManyRequestsError
|
|
14
|
-
from .unprocessable_entity_error import UnprocessableEntityError
|
|
15
|
-
_dynamic_imports: typing.Dict[str, str] = {
|
|
16
|
-
"BadRequestError": ".bad_request_error",
|
|
17
|
-
"ForbiddenError": ".forbidden_error",
|
|
18
|
-
"InternalServerError": ".internal_server_error",
|
|
19
|
-
"ServiceUnavailableError": ".service_unavailable_error",
|
|
20
|
-
"TooManyRequestsError": ".too_many_requests_error",
|
|
21
|
-
"UnprocessableEntityError": ".unprocessable_entity_error",
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def __getattr__(attr_name: str) -> typing.Any:
|
|
26
|
-
module_name = _dynamic_imports.get(attr_name)
|
|
27
|
-
if module_name is None:
|
|
28
|
-
raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
|
|
29
|
-
try:
|
|
30
|
-
module = import_module(module_name, __package__)
|
|
31
|
-
if module_name == f".{attr_name}":
|
|
32
|
-
return module
|
|
33
|
-
else:
|
|
34
|
-
return getattr(module, attr_name)
|
|
35
|
-
except ImportError as e:
|
|
36
|
-
raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
|
|
37
|
-
except AttributeError as e:
|
|
38
|
-
raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def __dir__():
|
|
42
|
-
lazy_attrs = list(_dynamic_imports.keys())
|
|
43
|
-
return sorted(lazy_attrs)
|
|
44
|
-
|
|
5
|
+
from .bad_request_error import BadRequestError
|
|
6
|
+
from .forbidden_error import ForbiddenError
|
|
7
|
+
from .internal_server_error import InternalServerError
|
|
8
|
+
from .service_unavailable_error import ServiceUnavailableError
|
|
9
|
+
from .too_many_requests_error import TooManyRequestsError
|
|
10
|
+
from .unprocessable_entity_error import UnprocessableEntityError
|
|
45
11
|
|
|
46
12
|
__all__ = [
|
|
47
13
|
"BadRequestError",
|
|
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class BadRequestError(ApiError):
|
|
9
|
-
def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
|
|
9
|
+
def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
|
|
10
10
|
super().__init__(status_code=400, headers=headers, body=body)
|
|
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class ForbiddenError(ApiError):
|
|
9
|
-
def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
|
|
9
|
+
def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
|
|
10
10
|
super().__init__(status_code=403, headers=headers, body=body)
|
|
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class InternalServerError(ApiError):
|
|
9
|
-
def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
|
|
9
|
+
def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
|
|
10
10
|
super().__init__(status_code=500, headers=headers, body=body)
|
|
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class ServiceUnavailableError(ApiError):
|
|
9
|
-
def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
|
|
9
|
+
def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
|
|
10
10
|
super().__init__(status_code=503, headers=headers, body=body)
|
|
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class TooManyRequestsError(ApiError):
|
|
9
|
-
def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
|
|
9
|
+
def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
|
|
10
10
|
super().__init__(status_code=429, headers=headers, body=body)
|
|
@@ -6,5 +6,5 @@ from ..core.api_error import ApiError
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class UnprocessableEntityError(ApiError):
|
|
9
|
-
def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):
|
|
9
|
+
def __init__(self, body: typing.Optional[typing.Any], headers: typing.Optional[typing.Dict[str, str]] = None):
|
|
10
10
|
super().__init__(status_code=422, headers=headers, body=body)
|
sarvamai/requests/__init__.py
CHANGED
|
@@ -2,156 +2,68 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
-
import
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
from .timestamps_model import TimestampsModelParams
|
|
68
|
-
from .transcription_metrics import TranscriptionMetricsParams
|
|
69
|
-
from .translation_response import TranslationResponseParams
|
|
70
|
-
from .transliteration_response import TransliterationResponseParams
|
|
71
|
-
_dynamic_imports: typing.Dict[str, str] = {
|
|
72
|
-
"AudioDataParams": ".audio_data",
|
|
73
|
-
"AudioMessageParams": ".audio_message",
|
|
74
|
-
"AudioOutputDataParams": ".audio_output_data",
|
|
75
|
-
"AudioOutputParams": ".audio_output",
|
|
76
|
-
"BaseJobParametersParams": ".base_job_parameters",
|
|
77
|
-
"BulkJobCallbackParams": ".bulk_job_callback",
|
|
78
|
-
"BulkJobInitResponseV1Params": ".bulk_job_init_response_v_1",
|
|
79
|
-
"ChatCompletionRequestAssistantMessageParams": ".chat_completion_request_assistant_message",
|
|
80
|
-
"ChatCompletionRequestMessageParams": ".chat_completion_request_message",
|
|
81
|
-
"ChatCompletionRequestMessage_AssistantParams": ".chat_completion_request_message",
|
|
82
|
-
"ChatCompletionRequestMessage_SystemParams": ".chat_completion_request_message",
|
|
83
|
-
"ChatCompletionRequestMessage_UserParams": ".chat_completion_request_message",
|
|
84
|
-
"ChatCompletionRequestSystemMessageParams": ".chat_completion_request_system_message",
|
|
85
|
-
"ChatCompletionRequestUserMessageParams": ".chat_completion_request_user_message",
|
|
86
|
-
"ChatCompletionResponseMessageParams": ".chat_completion_response_message",
|
|
87
|
-
"ChoiceParams": ".choice",
|
|
88
|
-
"CompletionUsageParams": ".completion_usage",
|
|
89
|
-
"ConfigMessageParams": ".config_message",
|
|
90
|
-
"ConfigureConnectionDataParams": ".configure_connection_data",
|
|
91
|
-
"ConfigureConnectionParams": ".configure_connection",
|
|
92
|
-
"CreateChatCompletionResponseParams": ".create_chat_completion_response",
|
|
93
|
-
"DiarizedEntryParams": ".diarized_entry",
|
|
94
|
-
"DiarizedTranscriptParams": ".diarized_transcript",
|
|
95
|
-
"ErrorDataParams": ".error_data",
|
|
96
|
-
"ErrorDetailsParams": ".error_details",
|
|
97
|
-
"ErrorMessageParams": ".error_message",
|
|
98
|
-
"ErrorResponseDataParams": ".error_response_data",
|
|
99
|
-
"ErrorResponseParams": ".error_response",
|
|
100
|
-
"EventResponseDataParams": ".event_response_data",
|
|
101
|
-
"EventResponseParams": ".event_response",
|
|
102
|
-
"EventsDataParams": ".events_data",
|
|
103
|
-
"FileSignedUrlDetailsParams": ".file_signed_url_details",
|
|
104
|
-
"FilesDownloadResponseParams": ".files_download_response",
|
|
105
|
-
"FilesRequestParams": ".files_request",
|
|
106
|
-
"FilesUploadResponseParams": ".files_upload_response",
|
|
107
|
-
"FlushSignalParams": ".flush_signal",
|
|
108
|
-
"JobStatusV1ResponseParams": ".job_status_v_1_response",
|
|
109
|
-
"LanguageIdentificationResponseParams": ".language_identification_response",
|
|
110
|
-
"PingSignalParams": ".ping_signal",
|
|
111
|
-
"SendTextDataParams": ".send_text_data",
|
|
112
|
-
"SendTextParams": ".send_text",
|
|
113
|
-
"SpeechToTextJobParametersParams": ".speech_to_text_job_parameters",
|
|
114
|
-
"SpeechToTextResponseDataParams": ".speech_to_text_response_data",
|
|
115
|
-
"SpeechToTextResponseParams": ".speech_to_text_response",
|
|
116
|
-
"SpeechToTextStreamingResponseParams": ".speech_to_text_streaming_response",
|
|
117
|
-
"SpeechToTextTranscriptionDataParams": ".speech_to_text_transcription_data",
|
|
118
|
-
"SpeechToTextTranslateJobParametersParams": ".speech_to_text_translate_job_parameters",
|
|
119
|
-
"SpeechToTextTranslateResponseDataParams": ".speech_to_text_translate_response_data",
|
|
120
|
-
"SpeechToTextTranslateResponseParams": ".speech_to_text_translate_response",
|
|
121
|
-
"SpeechToTextTranslateStreamingResponseParams": ".speech_to_text_translate_streaming_response",
|
|
122
|
-
"SpeechToTextTranslateTranscriptionDataParams": ".speech_to_text_translate_transcription_data",
|
|
123
|
-
"StopConfigurationParams": ".stop_configuration",
|
|
124
|
-
"SttFlushSignalParams": ".stt_flush_signal",
|
|
125
|
-
"TaskDetailV1Params": ".task_detail_v_1",
|
|
126
|
-
"TaskFileDetailsParams": ".task_file_details",
|
|
127
|
-
"TextToSpeechResponseParams": ".text_to_speech_response",
|
|
128
|
-
"TimestampsModelParams": ".timestamps_model",
|
|
129
|
-
"TranscriptionMetricsParams": ".transcription_metrics",
|
|
130
|
-
"TranslationResponseParams": ".translation_response",
|
|
131
|
-
"TransliterationResponseParams": ".transliteration_response",
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
def __getattr__(attr_name: str) -> typing.Any:
|
|
136
|
-
module_name = _dynamic_imports.get(attr_name)
|
|
137
|
-
if module_name is None:
|
|
138
|
-
raise AttributeError(f"No {attr_name} found in _dynamic_imports for module name -> {__name__}")
|
|
139
|
-
try:
|
|
140
|
-
module = import_module(module_name, __package__)
|
|
141
|
-
if module_name == f".{attr_name}":
|
|
142
|
-
return module
|
|
143
|
-
else:
|
|
144
|
-
return getattr(module, attr_name)
|
|
145
|
-
except ImportError as e:
|
|
146
|
-
raise ImportError(f"Failed to import {attr_name} from {module_name}: {e}") from e
|
|
147
|
-
except AttributeError as e:
|
|
148
|
-
raise AttributeError(f"Failed to get {attr_name} from {module_name}: {e}") from e
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
def __dir__():
|
|
152
|
-
lazy_attrs = list(_dynamic_imports.keys())
|
|
153
|
-
return sorted(lazy_attrs)
|
|
154
|
-
|
|
5
|
+
from .audio_data import AudioDataParams
|
|
6
|
+
from .audio_message import AudioMessageParams
|
|
7
|
+
from .audio_output import AudioOutputParams
|
|
8
|
+
from .audio_output_data import AudioOutputDataParams
|
|
9
|
+
from .base_job_parameters import BaseJobParametersParams
|
|
10
|
+
from .bulk_job_callback import BulkJobCallbackParams
|
|
11
|
+
from .bulk_job_init_response_v_1 import BulkJobInitResponseV1Params
|
|
12
|
+
from .chat_completion_request_assistant_message import ChatCompletionRequestAssistantMessageParams
|
|
13
|
+
from .chat_completion_request_message import (
|
|
14
|
+
ChatCompletionRequestMessageParams,
|
|
15
|
+
ChatCompletionRequestMessage_AssistantParams,
|
|
16
|
+
ChatCompletionRequestMessage_SystemParams,
|
|
17
|
+
ChatCompletionRequestMessage_UserParams,
|
|
18
|
+
)
|
|
19
|
+
from .chat_completion_request_system_message import ChatCompletionRequestSystemMessageParams
|
|
20
|
+
from .chat_completion_request_user_message import ChatCompletionRequestUserMessageParams
|
|
21
|
+
from .chat_completion_response_message import ChatCompletionResponseMessageParams
|
|
22
|
+
from .choice import ChoiceParams
|
|
23
|
+
from .completion_usage import CompletionUsageParams
|
|
24
|
+
from .config_message import ConfigMessageParams
|
|
25
|
+
from .configure_connection import ConfigureConnectionParams
|
|
26
|
+
from .configure_connection_data import ConfigureConnectionDataParams
|
|
27
|
+
from .create_chat_completion_response import CreateChatCompletionResponseParams
|
|
28
|
+
from .diarized_entry import DiarizedEntryParams
|
|
29
|
+
from .diarized_transcript import DiarizedTranscriptParams
|
|
30
|
+
from .error_data import ErrorDataParams
|
|
31
|
+
from .error_details import ErrorDetailsParams
|
|
32
|
+
from .error_message import ErrorMessageParams
|
|
33
|
+
from .error_response import ErrorResponseParams
|
|
34
|
+
from .error_response_data import ErrorResponseDataParams
|
|
35
|
+
from .event_response import EventResponseParams
|
|
36
|
+
from .event_response_data import EventResponseDataParams
|
|
37
|
+
from .events_data import EventsDataParams
|
|
38
|
+
from .file_signed_url_details import FileSignedUrlDetailsParams
|
|
39
|
+
from .files_download_response import FilesDownloadResponseParams
|
|
40
|
+
from .files_request import FilesRequestParams
|
|
41
|
+
from .files_upload_response import FilesUploadResponseParams
|
|
42
|
+
from .flush_signal import FlushSignalParams
|
|
43
|
+
from .job_status_v_1_response import JobStatusV1ResponseParams
|
|
44
|
+
from .language_identification_response import LanguageIdentificationResponseParams
|
|
45
|
+
from .ping_signal import PingSignalParams
|
|
46
|
+
from .send_text import SendTextParams
|
|
47
|
+
from .send_text_data import SendTextDataParams
|
|
48
|
+
from .speech_to_text_job_parameters import SpeechToTextJobParametersParams
|
|
49
|
+
from .speech_to_text_response import SpeechToTextResponseParams
|
|
50
|
+
from .speech_to_text_response_data import SpeechToTextResponseDataParams
|
|
51
|
+
from .speech_to_text_streaming_response import SpeechToTextStreamingResponseParams
|
|
52
|
+
from .speech_to_text_transcription_data import SpeechToTextTranscriptionDataParams
|
|
53
|
+
from .speech_to_text_translate_job_parameters import SpeechToTextTranslateJobParametersParams
|
|
54
|
+
from .speech_to_text_translate_response import SpeechToTextTranslateResponseParams
|
|
55
|
+
from .speech_to_text_translate_response_data import SpeechToTextTranslateResponseDataParams
|
|
56
|
+
from .speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponseParams
|
|
57
|
+
from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTranscriptionDataParams
|
|
58
|
+
from .stop_configuration import StopConfigurationParams
|
|
59
|
+
from .stt_flush_signal import SttFlushSignalParams
|
|
60
|
+
from .task_detail_v_1 import TaskDetailV1Params
|
|
61
|
+
from .task_file_details import TaskFileDetailsParams
|
|
62
|
+
from .text_to_speech_response import TextToSpeechResponseParams
|
|
63
|
+
from .timestamps_model import TimestampsModelParams
|
|
64
|
+
from .transcription_metrics import TranscriptionMetricsParams
|
|
65
|
+
from .translation_response import TranslationResponseParams
|
|
66
|
+
from .transliteration_response import TransliterationResponseParams
|
|
155
67
|
|
|
156
68
|
__all__ = [
|
|
157
69
|
"AudioDataParams",
|
|
@@ -12,6 +12,10 @@ class ConfigureConnectionParams(typing_extensions.TypedDict):
|
|
|
12
12
|
This initializes TTS parameters and can be updated at any time during the WebSocket lifecycle
|
|
13
13
|
by sending a new config message. When a config update is sent, any text currently in the buffer
|
|
14
14
|
will be automatically flushed and processed before applying the new configuration.
|
|
15
|
+
|
|
16
|
+
**Model-Specific Notes:**
|
|
17
|
+
- **bulbul:v2:** Supports pitch, loudness, pace (0.3-3.0). Default sample rate: 22050 Hz.
|
|
18
|
+
- **bulbul:v3-beta:** Does NOT support pitch/loudness. Pace range: 0.5-2.0. Supports temperature. Default sample rate: 24000 Hz.
|
|
15
19
|
"""
|
|
16
20
|
|
|
17
21
|
type: typing.Literal["config"]
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
|
2
2
|
|
|
3
3
|
import typing_extensions
|
|
4
|
+
from ..types.configure_connection_data_model import ConfigureConnectionDataModel
|
|
4
5
|
from ..types.configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
|
|
5
6
|
from ..types.configure_connection_data_output_audio_codec import ConfigureConnectionDataOutputAudioCodec
|
|
6
7
|
from ..types.configure_connection_data_speaker import ConfigureConnectionDataSpeaker
|
|
@@ -8,21 +9,25 @@ from ..types.configure_connection_data_target_language_code import ConfigureConn
|
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
class ConfigureConnectionDataParams(typing_extensions.TypedDict):
|
|
12
|
+
model: typing_extensions.NotRequired[ConfigureConnectionDataModel]
|
|
13
|
+
"""
|
|
14
|
+
Specifies the model to use for text-to-speech conversion.
|
|
15
|
+
- **bulbul:v2** (default): Standard TTS model with pitch/loudness support
|
|
16
|
+
- **bulbul:v3-beta**: Advanced model with temperature control (no pitch/loudness)
|
|
17
|
+
"""
|
|
18
|
+
|
|
11
19
|
target_language_code: ConfigureConnectionDataTargetLanguageCode
|
|
12
20
|
"""
|
|
13
|
-
The language of the text
|
|
21
|
+
The language of the text in BCP-47 format
|
|
14
22
|
"""
|
|
15
23
|
|
|
16
24
|
speaker: ConfigureConnectionDataSpeaker
|
|
17
25
|
"""
|
|
18
26
|
The speaker voice to be used for the output audio.
|
|
19
27
|
|
|
20
|
-
**
|
|
21
|
-
|
|
22
|
-
**
|
|
23
|
-
- **bulbul:v2:**
|
|
24
|
-
- Female: Anushka, Manisha, Vidya, Arya
|
|
25
|
-
- Male: Abhilash, Karun, Hitesh
|
|
28
|
+
**Model Compatibility:**
|
|
29
|
+
- **bulbul:v2:** anushka (default), abhilash, manisha, vidya, arya, karun, hitesh
|
|
30
|
+
- **bulbul:v3-beta:** aditya (default), ritu, priya, neha, rahul, pooja, rohan, simran, kavya, amit, dev, ishita, shreya, ratan, varun, manan, sumit, roopa, kabir, aayan, shubh, ashutosh, advait, amelia, sophia
|
|
26
31
|
|
|
27
32
|
**Note:** Speaker selection must match the chosen model version.
|
|
28
33
|
"""
|
|
@@ -32,13 +37,18 @@ class ConfigureConnectionDataParams(typing_extensions.TypedDict):
|
|
|
32
37
|
Controls the pitch of the audio. Lower values result in a deeper voice,
|
|
33
38
|
while higher values make it sharper. The suitable range is between -0.75
|
|
34
39
|
and 0.75. Default is 0.0.
|
|
40
|
+
|
|
41
|
+
**Note:** NOT supported for bulbul:v3-beta. Will be ignored if provided.
|
|
35
42
|
"""
|
|
36
43
|
|
|
37
44
|
pace: typing_extensions.NotRequired[float]
|
|
38
45
|
"""
|
|
39
46
|
Controls the speed of the audio. Lower values result in slower speech,
|
|
40
|
-
while higher values make it faster.
|
|
41
|
-
|
|
47
|
+
while higher values make it faster. Default is 1.0.
|
|
48
|
+
|
|
49
|
+
**Model-specific ranges:**
|
|
50
|
+
- **bulbul:v2:** 0.3 to 3.0
|
|
51
|
+
- **bulbul:v3-beta:** 0.5 to 2.0
|
|
42
52
|
"""
|
|
43
53
|
|
|
44
54
|
loudness: typing_extensions.NotRequired[float]
|
|
@@ -46,19 +56,38 @@ class ConfigureConnectionDataParams(typing_extensions.TypedDict):
|
|
|
46
56
|
Controls the loudness of the audio. Lower values result in quieter audio,
|
|
47
57
|
while higher values make it louder. The suitable range is between 0.3
|
|
48
58
|
and 3.0. Default is 1.0.
|
|
59
|
+
|
|
60
|
+
**Note:** NOT supported for bulbul:v3-beta. Will be ignored if provided.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
temperature: typing_extensions.NotRequired[float]
|
|
64
|
+
"""
|
|
65
|
+
Controls the randomness of the output. Lower values make the output more
|
|
66
|
+
focused and deterministic, while higher values make it more random.
|
|
67
|
+
The suitable range is between 0.01 and 1.0. Default is 0.6.
|
|
68
|
+
|
|
69
|
+
**Note:** Only supported for bulbul:v3-beta. Will be ignored for bulbul:v2.
|
|
49
70
|
"""
|
|
50
71
|
|
|
51
72
|
speech_sample_rate: typing_extensions.NotRequired[int]
|
|
52
73
|
"""
|
|
53
74
|
Specifies the sample rate of the output audio. Supported values are
|
|
54
|
-
8000, 16000, 22050, 24000 Hz.
|
|
75
|
+
8000, 16000, 22050, 24000 Hz.
|
|
76
|
+
|
|
77
|
+
**Model-specific defaults:**
|
|
78
|
+
- **bulbul:v2:** 22050 Hz
|
|
79
|
+
- **bulbul:v3-beta:** 24000 Hz
|
|
55
80
|
"""
|
|
56
81
|
|
|
57
82
|
enable_preprocessing: typing_extensions.NotRequired[bool]
|
|
58
83
|
"""
|
|
59
84
|
Controls whether normalization of English words and numeric entities
|
|
60
85
|
(e.g., numbers, dates) is performed. Set to true for better handling
|
|
61
|
-
of mixed-language text.
|
|
86
|
+
of mixed-language text.
|
|
87
|
+
|
|
88
|
+
**Model-specific defaults:**
|
|
89
|
+
- **bulbul:v2:** false (optional)
|
|
90
|
+
- **bulbul:v3-beta:** Always enabled (cannot be disabled)
|
|
62
91
|
"""
|
|
63
92
|
|
|
64
93
|
output_audio_codec: typing_extensions.NotRequired[ConfigureConnectionDataOutputAudioCodec]
|
|
@@ -12,7 +12,7 @@ class ErrorResponseDataParams(typing_extensions.TypedDict):
|
|
|
12
12
|
Optional error code for programmatic error handling
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
|
-
details: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
|
|
15
|
+
details: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
|
|
16
16
|
"""
|
|
17
17
|
Additional error details and context information
|
|
18
18
|
"""
|
|
@@ -7,4 +7,4 @@ import typing_extensions
|
|
|
7
7
|
|
|
8
8
|
class FileSignedUrlDetailsParams(typing_extensions.TypedDict):
|
|
9
9
|
file_url: str
|
|
10
|
-
file_metadata: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
|
|
10
|
+
file_metadata: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
|
2
2
|
|
|
3
3
|
import typing_extensions
|
|
4
|
+
from ..types.mode import Mode
|
|
4
5
|
from ..types.speech_to_text_model import SpeechToTextModel
|
|
5
6
|
from ..types.speech_to_text_translate_language import SpeechToTextTranslateLanguage
|
|
6
7
|
|
|
@@ -8,12 +9,52 @@ from ..types.speech_to_text_translate_language import SpeechToTextTranslateLangu
|
|
|
8
9
|
class SpeechToTextJobParametersParams(typing_extensions.TypedDict):
|
|
9
10
|
language_code: typing_extensions.NotRequired[SpeechToTextTranslateLanguage]
|
|
10
11
|
"""
|
|
11
|
-
|
|
12
|
+
Specifies the language of the input audio in BCP-47 format.
|
|
13
|
+
|
|
14
|
+
**Available Options:**
|
|
15
|
+
- `unknown` (default): Use when the language is not known; the API will auto-detect.
|
|
16
|
+
- `hi-IN`: Hindi
|
|
17
|
+
- `bn-IN`: Bengali
|
|
18
|
+
- `kn-IN`: Kannada
|
|
19
|
+
- `ml-IN`: Malayalam
|
|
20
|
+
- `mr-IN`: Marathi
|
|
21
|
+
- `od-IN`: Odia
|
|
22
|
+
- `pa-IN`: Punjabi
|
|
23
|
+
- `ta-IN`: Tamil
|
|
24
|
+
- `te-IN`: Telugu
|
|
25
|
+
- `en-IN`: English
|
|
26
|
+
- `gu-IN`: Gujarati
|
|
12
27
|
"""
|
|
13
28
|
|
|
14
29
|
model: typing_extensions.NotRequired[SpeechToTextModel]
|
|
15
30
|
"""
|
|
16
|
-
Model to be used for speech to text
|
|
31
|
+
Model to be used for speech to text.
|
|
32
|
+
|
|
33
|
+
- **saarika:v2.5** (default): Transcribes audio in the spoken language.
|
|
34
|
+
|
|
35
|
+
- **saaras:v3**: State-of-the-art model with flexible output formats. Supports multiple modes via the `mode` parameter: transcribe, translate, verbatim, translit, codemix.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
mode: typing_extensions.NotRequired[Mode]
|
|
39
|
+
"""
|
|
40
|
+
Mode of operation. **Only applicable when using saaras:v3 model.**
|
|
41
|
+
|
|
42
|
+
Example audio: 'मेरा फोन नंबर है 9840950950'
|
|
43
|
+
|
|
44
|
+
- **transcribe** (default): Standard transcription in the original language with proper formatting and number normalization.
|
|
45
|
+
- Output: `मेरा फोन नंबर है 9840950950`
|
|
46
|
+
|
|
47
|
+
- **translate**: Translates speech from any supported Indic language to English.
|
|
48
|
+
- Output: `My phone number is 9840950950`
|
|
49
|
+
|
|
50
|
+
- **verbatim**: Exact word-for-word transcription without normalization, preserving filler words and spoken numbers as-is.
|
|
51
|
+
- Output: `मेरा फोन नंबर है नौ आठ चार zero नौ पांच zero नौ पांच zero`
|
|
52
|
+
|
|
53
|
+
- **translit**: Romanization - Transliterates speech to Latin/Roman script only.
|
|
54
|
+
- Output: `mera phone number hai 9840950950`
|
|
55
|
+
|
|
56
|
+
- **codemix**: Code-mixed text with English words in English and Indic words in native script.
|
|
57
|
+
- Output: `मेरा phone number है 9840950950`
|
|
17
58
|
"""
|
|
18
59
|
|
|
19
60
|
with_timestamps: typing_extensions.NotRequired[bool]
|
|
@@ -17,12 +17,12 @@ class SpeechToTextTranscriptionDataParams(typing_extensions.TypedDict):
|
|
|
17
17
|
Transcript of the provided speech in original language
|
|
18
18
|
"""
|
|
19
19
|
|
|
20
|
-
timestamps: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
|
|
20
|
+
timestamps: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
|
|
21
21
|
"""
|
|
22
22
|
Timestamp information (if available)
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
-
diarized_transcript: typing_extensions.NotRequired[typing.Dict[str, typing.Any]]
|
|
25
|
+
diarized_transcript: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[typing.Any]]]
|
|
26
26
|
"""
|
|
27
27
|
Diarized transcript of the provided speech
|
|
28
28
|
"""
|
|
@@ -12,7 +12,10 @@ class SpeechToTextTranslateJobParametersParams(typing_extensions.TypedDict):
|
|
|
12
12
|
|
|
13
13
|
model: typing_extensions.NotRequired[SpeechToTextTranslateModel]
|
|
14
14
|
"""
|
|
15
|
-
Model to be used for
|
|
15
|
+
Model to be used for speech to text translation.
|
|
16
|
+
|
|
17
|
+
- **saaras:v2.5** (default): Translation model that translates audio from any spoken Indic language to English.
|
|
18
|
+
- Example: Hindi audio → English text output
|
|
16
19
|
"""
|
|
17
20
|
|
|
18
21
|
with_diarization: typing_extensions.NotRequired[bool]
|