sarvamai 0.1.5a8__py3-none-any.whl → 0.1.5a12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +30 -11
- sarvamai/client.py +10 -0
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/requests/__init__.py +12 -4
- sarvamai/requests/config_message.py +17 -0
- sarvamai/requests/speech_to_text_response_data.py +9 -0
- sarvamai/requests/speech_to_text_streaming_response.py +4 -4
- sarvamai/requests/{transcription_data.py → speech_to_text_transcription_data.py} +2 -2
- sarvamai/requests/speech_to_text_translate_response_data.py +11 -0
- sarvamai/requests/speech_to_text_translate_streaming_response.py +10 -0
- sarvamai/requests/speech_to_text_translate_transcription_data.py +23 -0
- sarvamai/speech_to_text_streaming/client.py +4 -4
- sarvamai/speech_to_text_streaming/raw_client.py +4 -4
- sarvamai/speech_to_text_streaming/socket_client.py +2 -6
- sarvamai/speech_to_text_translate_streaming/__init__.py +7 -0
- sarvamai/speech_to_text_translate_streaming/client.py +176 -0
- sarvamai/speech_to_text_translate_streaming/raw_client.py +153 -0
- sarvamai/speech_to_text_translate_streaming/socket_client.py +144 -0
- sarvamai/speech_to_text_translate_streaming/types/__init__.py +7 -0
- sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_model.py +5 -0
- sarvamai/types/__init__.py +14 -6
- sarvamai/types/config_message.py +27 -0
- sarvamai/types/response_type.py +5 -0
- sarvamai/types/speech_to_text_response_data.py +9 -0
- sarvamai/types/speech_to_text_streaming_response.py +4 -4
- sarvamai/types/{transcription_data.py → speech_to_text_transcription_data.py} +2 -2
- sarvamai/types/speech_to_text_translate_response_data.py +9 -0
- sarvamai/types/speech_to_text_translate_streaming_response.py +22 -0
- sarvamai/types/speech_to_text_translate_transcription_data.py +35 -0
- {sarvamai-0.1.5a8.dist-info → sarvamai-0.1.5a12.dist-info}/METADATA +1 -1
- {sarvamai-0.1.5a8.dist-info → sarvamai-0.1.5a12.dist-info}/RECORD +32 -18
- sarvamai/requests/speech_to_text_streaming_response_data.py +0 -9
- sarvamai/types/speech_to_text_streaming_response_data.py +0 -9
- sarvamai/types/speech_to_text_streaming_response_type.py +0 -5
- {sarvamai-0.1.5a8.dist-info → sarvamai-0.1.5a12.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
from contextlib import asynccontextmanager, contextmanager
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
import websockets
|
|
8
|
+
import websockets.sync.client as websockets_sync_client
|
|
9
|
+
from ..core.api_error import ApiError
|
|
10
|
+
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
11
|
+
from ..core.request_options import RequestOptions
|
|
12
|
+
from .socket_client import AsyncSpeechToTextTranslateStreamingSocketClient, SpeechToTextTranslateStreamingSocketClient
|
|
13
|
+
from .types.speech_to_text_translate_streaming_model import SpeechToTextTranslateStreamingModel
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RawSpeechToTextTranslateStreamingClient:
|
|
17
|
+
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
|
18
|
+
self._client_wrapper = client_wrapper
|
|
19
|
+
|
|
20
|
+
@contextmanager
|
|
21
|
+
def connect(
|
|
22
|
+
self,
|
|
23
|
+
*,
|
|
24
|
+
model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
|
|
25
|
+
high_vad_sensitivity: typing.Optional[str] = None,
|
|
26
|
+
vad_signals: typing.Optional[str] = None,
|
|
27
|
+
api_subscription_key: typing.Optional[str] = None,
|
|
28
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
29
|
+
) -> typing.Iterator[SpeechToTextTranslateStreamingSocketClient]:
|
|
30
|
+
"""
|
|
31
|
+
WebSocket channel for real-time speech to text streaming with English translation
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
model : typing.Optional[SpeechToTextTranslateStreamingModel]
|
|
36
|
+
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
37
|
+
|
|
38
|
+
high_vad_sensitivity : typing.Optional[str]
|
|
39
|
+
Enable high VAD (Voice Activity Detection) sensitivity
|
|
40
|
+
|
|
41
|
+
vad_signals : typing.Optional[str]
|
|
42
|
+
Enable VAD signals in response
|
|
43
|
+
|
|
44
|
+
api_subscription_key : typing.Optional[str]
|
|
45
|
+
API subscription key for authentication
|
|
46
|
+
|
|
47
|
+
request_options : typing.Optional[RequestOptions]
|
|
48
|
+
Request-specific configuration.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
SpeechToTextTranslateStreamingSocketClient
|
|
53
|
+
"""
|
|
54
|
+
ws_url = self._client_wrapper.get_environment().production + "/speech-to-text-translate/ws"
|
|
55
|
+
query_params = httpx.QueryParams()
|
|
56
|
+
if model is not None:
|
|
57
|
+
query_params = query_params.add("model", model)
|
|
58
|
+
if high_vad_sensitivity is not None:
|
|
59
|
+
query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
|
|
60
|
+
if vad_signals is not None:
|
|
61
|
+
query_params = query_params.add("vad_signals", vad_signals)
|
|
62
|
+
ws_url = ws_url + f"?{query_params}"
|
|
63
|
+
headers = self._client_wrapper.get_headers()
|
|
64
|
+
if api_subscription_key is not None:
|
|
65
|
+
headers["Api-Subscription-Key"] = str(api_subscription_key)
|
|
66
|
+
if request_options and "additional_headers" in request_options:
|
|
67
|
+
headers.update(request_options["additional_headers"])
|
|
68
|
+
try:
|
|
69
|
+
with websockets_sync_client.connect(ws_url, additional_headers=headers) as protocol:
|
|
70
|
+
yield SpeechToTextTranslateStreamingSocketClient(websocket=protocol)
|
|
71
|
+
except websockets.exceptions.InvalidStatusCode as exc:
|
|
72
|
+
status_code: int = exc.status_code
|
|
73
|
+
if status_code == 401:
|
|
74
|
+
raise ApiError(
|
|
75
|
+
status_code=status_code,
|
|
76
|
+
headers=dict(headers),
|
|
77
|
+
body="Websocket initialized with invalid credentials.",
|
|
78
|
+
)
|
|
79
|
+
raise ApiError(
|
|
80
|
+
status_code=status_code,
|
|
81
|
+
headers=dict(headers),
|
|
82
|
+
body="Unexpected error when initializing websocket connection.",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class AsyncRawSpeechToTextTranslateStreamingClient:
|
|
87
|
+
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
88
|
+
self._client_wrapper = client_wrapper
|
|
89
|
+
|
|
90
|
+
@asynccontextmanager
|
|
91
|
+
async def connect(
|
|
92
|
+
self,
|
|
93
|
+
*,
|
|
94
|
+
model: typing.Optional[SpeechToTextTranslateStreamingModel] = None,
|
|
95
|
+
high_vad_sensitivity: typing.Optional[str] = None,
|
|
96
|
+
vad_signals: typing.Optional[str] = None,
|
|
97
|
+
api_subscription_key: typing.Optional[str] = None,
|
|
98
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
99
|
+
) -> typing.AsyncIterator[AsyncSpeechToTextTranslateStreamingSocketClient]:
|
|
100
|
+
"""
|
|
101
|
+
WebSocket channel for real-time speech to text streaming with English translation
|
|
102
|
+
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
model : typing.Optional[SpeechToTextTranslateStreamingModel]
|
|
106
|
+
Speech to text model to use (defaults to "saaras:v2.5" if not specified)
|
|
107
|
+
|
|
108
|
+
high_vad_sensitivity : typing.Optional[str]
|
|
109
|
+
Enable high VAD (Voice Activity Detection) sensitivity
|
|
110
|
+
|
|
111
|
+
vad_signals : typing.Optional[str]
|
|
112
|
+
Enable VAD signals in response
|
|
113
|
+
|
|
114
|
+
api_subscription_key : typing.Optional[str]
|
|
115
|
+
API subscription key for authentication
|
|
116
|
+
|
|
117
|
+
request_options : typing.Optional[RequestOptions]
|
|
118
|
+
Request-specific configuration.
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
AsyncSpeechToTextTranslateStreamingSocketClient
|
|
123
|
+
"""
|
|
124
|
+
ws_url = self._client_wrapper.get_environment().production + "/speech-to-text-translate/ws"
|
|
125
|
+
query_params = httpx.QueryParams()
|
|
126
|
+
if model is not None:
|
|
127
|
+
query_params = query_params.add("model", model)
|
|
128
|
+
if high_vad_sensitivity is not None:
|
|
129
|
+
query_params = query_params.add("high_vad_sensitivity", high_vad_sensitivity)
|
|
130
|
+
if vad_signals is not None:
|
|
131
|
+
query_params = query_params.add("vad_signals", vad_signals)
|
|
132
|
+
ws_url = ws_url + f"?{query_params}"
|
|
133
|
+
headers = self._client_wrapper.get_headers()
|
|
134
|
+
if api_subscription_key is not None:
|
|
135
|
+
headers["Api-Subscription-Key"] = str(api_subscription_key)
|
|
136
|
+
if request_options and "additional_headers" in request_options:
|
|
137
|
+
headers.update(request_options["additional_headers"])
|
|
138
|
+
try:
|
|
139
|
+
async with websockets.connect(ws_url, extra_headers=headers) as protocol:
|
|
140
|
+
yield AsyncSpeechToTextTranslateStreamingSocketClient(websocket=protocol)
|
|
141
|
+
except websockets.exceptions.InvalidStatusCode as exc:
|
|
142
|
+
status_code: int = exc.status_code
|
|
143
|
+
if status_code == 401:
|
|
144
|
+
raise ApiError(
|
|
145
|
+
status_code=status_code,
|
|
146
|
+
headers=dict(headers),
|
|
147
|
+
body="Websocket initialized with invalid credentials.",
|
|
148
|
+
)
|
|
149
|
+
raise ApiError(
|
|
150
|
+
status_code=status_code,
|
|
151
|
+
headers=dict(headers),
|
|
152
|
+
body="Unexpected error when initializing websocket connection.",
|
|
153
|
+
)
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
import websockets
|
|
7
|
+
import websockets.sync.connection as websockets_sync_connection
|
|
8
|
+
from ..core.events import EventEmitterMixin, EventType
|
|
9
|
+
from ..core.pydantic_utilities import parse_obj_as
|
|
10
|
+
from ..types.audio_message import AudioMessage
|
|
11
|
+
from ..types.config_message import ConfigMessage
|
|
12
|
+
from ..types.speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponse
|
|
13
|
+
|
|
14
|
+
SpeechToTextTranslateStreamingSocketClientResponse = typing.Union[SpeechToTextTranslateStreamingResponse]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AsyncSpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
18
|
+
def __init__(self, *, websocket: websockets.WebSocketClientProtocol):
|
|
19
|
+
super().__init__()
|
|
20
|
+
self._websocket = websocket
|
|
21
|
+
|
|
22
|
+
async def __aiter__(self):
|
|
23
|
+
async for message in self._websocket:
|
|
24
|
+
yield parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, message) # type: ignore
|
|
25
|
+
|
|
26
|
+
async def start_listening(self):
|
|
27
|
+
"""
|
|
28
|
+
Start listening for messages on the websocket connection.
|
|
29
|
+
|
|
30
|
+
Emits events in the following order:
|
|
31
|
+
- EventType.OPEN when connection is established
|
|
32
|
+
- EventType.MESSAGE for each message received
|
|
33
|
+
- EventType.ERROR if an error occurs
|
|
34
|
+
- EventType.CLOSE when connection is closed
|
|
35
|
+
"""
|
|
36
|
+
self._emit(EventType.OPEN, None)
|
|
37
|
+
try:
|
|
38
|
+
async for raw_message in self._websocket:
|
|
39
|
+
parsed = parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, raw_message) # type: ignore
|
|
40
|
+
self._emit(EventType.MESSAGE, parsed)
|
|
41
|
+
except websockets.WebSocketException as exc:
|
|
42
|
+
self._emit(EventType.ERROR, exc)
|
|
43
|
+
finally:
|
|
44
|
+
self._emit(EventType.CLOSE, None)
|
|
45
|
+
|
|
46
|
+
async def send_speech_to_text_translate_streaming_audio_message(self, message: AudioMessage) -> None:
|
|
47
|
+
"""
|
|
48
|
+
Send a message to the websocket connection.
|
|
49
|
+
The message will be sent as a AudioMessage.
|
|
50
|
+
"""
|
|
51
|
+
await self._send_model(message)
|
|
52
|
+
|
|
53
|
+
async def send_config_message(self, message: ConfigMessage) -> None:
|
|
54
|
+
"""
|
|
55
|
+
Send a message to the websocket connection.
|
|
56
|
+
The message will be sent as a ConfigMessage.
|
|
57
|
+
"""
|
|
58
|
+
await self._send_model(message)
|
|
59
|
+
|
|
60
|
+
async def recv(self) -> SpeechToTextTranslateStreamingSocketClientResponse:
|
|
61
|
+
"""
|
|
62
|
+
Receive a message from the websocket connection.
|
|
63
|
+
"""
|
|
64
|
+
data = await self._websocket.recv()
|
|
65
|
+
return parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, data) # type: ignore
|
|
66
|
+
|
|
67
|
+
async def _send(self, data: typing.Any) -> None:
|
|
68
|
+
"""
|
|
69
|
+
Send a message to the websocket connection.
|
|
70
|
+
"""
|
|
71
|
+
if isinstance(data, dict):
|
|
72
|
+
data = json.dumps(data)
|
|
73
|
+
await self._websocket.send(data)
|
|
74
|
+
|
|
75
|
+
async def _send_model(self, data: typing.Any) -> None:
|
|
76
|
+
"""
|
|
77
|
+
Send a Pydantic model to the websocket connection.
|
|
78
|
+
"""
|
|
79
|
+
await self._send(data.dict())
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class SpeechToTextTranslateStreamingSocketClient(EventEmitterMixin):
|
|
83
|
+
def __init__(self, *, websocket: websockets_sync_connection.Connection):
|
|
84
|
+
super().__init__()
|
|
85
|
+
self._websocket = websocket
|
|
86
|
+
|
|
87
|
+
def __iter__(self):
|
|
88
|
+
for message in self._websocket:
|
|
89
|
+
yield parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, message) # type: ignore
|
|
90
|
+
|
|
91
|
+
def start_listening(self):
|
|
92
|
+
"""
|
|
93
|
+
Start listening for messages on the websocket connection.
|
|
94
|
+
|
|
95
|
+
Emits events in the following order:
|
|
96
|
+
- EventType.OPEN when connection is established
|
|
97
|
+
- EventType.MESSAGE for each message received
|
|
98
|
+
- EventType.ERROR if an error occurs
|
|
99
|
+
- EventType.CLOSE when connection is closed
|
|
100
|
+
"""
|
|
101
|
+
self._emit(EventType.OPEN, None)
|
|
102
|
+
try:
|
|
103
|
+
for raw_message in self._websocket:
|
|
104
|
+
parsed = parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, raw_message) # type: ignore
|
|
105
|
+
self._emit(EventType.MESSAGE, parsed)
|
|
106
|
+
except websockets.WebSocketException as exc:
|
|
107
|
+
self._emit(EventType.ERROR, exc)
|
|
108
|
+
finally:
|
|
109
|
+
self._emit(EventType.CLOSE, None)
|
|
110
|
+
|
|
111
|
+
def send_speech_to_text_translate_streaming_audio_message(self, message: AudioMessage) -> None:
|
|
112
|
+
"""
|
|
113
|
+
Send a message to the websocket connection.
|
|
114
|
+
The message will be sent as a AudioMessage.
|
|
115
|
+
"""
|
|
116
|
+
self._send_model(message)
|
|
117
|
+
|
|
118
|
+
def send_config_message(self, message: ConfigMessage) -> None:
|
|
119
|
+
"""
|
|
120
|
+
Send a message to the websocket connection.
|
|
121
|
+
The message will be sent as a ConfigMessage.
|
|
122
|
+
"""
|
|
123
|
+
self._send_model(message)
|
|
124
|
+
|
|
125
|
+
def recv(self) -> SpeechToTextTranslateStreamingSocketClientResponse:
|
|
126
|
+
"""
|
|
127
|
+
Receive a message from the websocket connection.
|
|
128
|
+
"""
|
|
129
|
+
data = self._websocket.recv()
|
|
130
|
+
return parse_obj_as(SpeechToTextTranslateStreamingSocketClientResponse, data) # type: ignore
|
|
131
|
+
|
|
132
|
+
def _send(self, data: typing.Any) -> None:
|
|
133
|
+
"""
|
|
134
|
+
Send a message to the websocket connection.
|
|
135
|
+
"""
|
|
136
|
+
if isinstance(data, dict):
|
|
137
|
+
data = json.dumps(data)
|
|
138
|
+
self._websocket.send(data)
|
|
139
|
+
|
|
140
|
+
def _send_model(self, data: typing.Any) -> None:
|
|
141
|
+
"""
|
|
142
|
+
Send a Pydantic model to the websocket connection.
|
|
143
|
+
"""
|
|
144
|
+
self._send(data.dict())
|
sarvamai/types/__init__.py
CHANGED
|
@@ -17,6 +17,7 @@ from .chat_completion_request_user_message import ChatCompletionRequestUserMessa
|
|
|
17
17
|
from .chat_completion_response_message import ChatCompletionResponseMessage
|
|
18
18
|
from .choice import Choice
|
|
19
19
|
from .completion_usage import CompletionUsage
|
|
20
|
+
from .config_message import ConfigMessage
|
|
20
21
|
from .create_chat_completion_response import CreateChatCompletionResponse
|
|
21
22
|
from .diarized_entry import DiarizedEntry
|
|
22
23
|
from .diarized_transcript import DiarizedTranscript
|
|
@@ -30,18 +31,22 @@ from .format import Format
|
|
|
30
31
|
from .language_identification_response import LanguageIdentificationResponse
|
|
31
32
|
from .numerals_format import NumeralsFormat
|
|
32
33
|
from .reasoning_effort import ReasoningEffort
|
|
34
|
+
from .response_type import ResponseType
|
|
33
35
|
from .role import Role
|
|
34
36
|
from .sarvam_model_ids import SarvamModelIds
|
|
35
37
|
from .speech_sample_rate import SpeechSampleRate
|
|
36
38
|
from .speech_to_text_language import SpeechToTextLanguage
|
|
37
39
|
from .speech_to_text_model import SpeechToTextModel
|
|
38
40
|
from .speech_to_text_response import SpeechToTextResponse
|
|
41
|
+
from .speech_to_text_response_data import SpeechToTextResponseData
|
|
39
42
|
from .speech_to_text_streaming_response import SpeechToTextStreamingResponse
|
|
40
|
-
from .
|
|
41
|
-
from .speech_to_text_streaming_response_type import SpeechToTextStreamingResponseType
|
|
43
|
+
from .speech_to_text_transcription_data import SpeechToTextTranscriptionData
|
|
42
44
|
from .speech_to_text_translate_language import SpeechToTextTranslateLanguage
|
|
43
45
|
from .speech_to_text_translate_model import SpeechToTextTranslateModel
|
|
44
46
|
from .speech_to_text_translate_response import SpeechToTextTranslateResponse
|
|
47
|
+
from .speech_to_text_translate_response_data import SpeechToTextTranslateResponseData
|
|
48
|
+
from .speech_to_text_translate_streaming_response import SpeechToTextTranslateStreamingResponse
|
|
49
|
+
from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTranscriptionData
|
|
45
50
|
from .spoken_form_numerals_format import SpokenFormNumeralsFormat
|
|
46
51
|
from .stop_configuration import StopConfiguration
|
|
47
52
|
from .text_to_speech_language import TextToSpeechLanguage
|
|
@@ -49,7 +54,6 @@ from .text_to_speech_model import TextToSpeechModel
|
|
|
49
54
|
from .text_to_speech_response import TextToSpeechResponse
|
|
50
55
|
from .text_to_speech_speaker import TextToSpeechSpeaker
|
|
51
56
|
from .timestamps_model import TimestampsModel
|
|
52
|
-
from .transcription_data import TranscriptionData
|
|
53
57
|
from .transcription_metrics import TranscriptionMetrics
|
|
54
58
|
from .translate_mode import TranslateMode
|
|
55
59
|
from .translate_model import TranslateModel
|
|
@@ -76,6 +80,7 @@ __all__ = [
|
|
|
76
80
|
"ChatCompletionResponseMessage",
|
|
77
81
|
"Choice",
|
|
78
82
|
"CompletionUsage",
|
|
83
|
+
"ConfigMessage",
|
|
79
84
|
"CreateChatCompletionResponse",
|
|
80
85
|
"DiarizedEntry",
|
|
81
86
|
"DiarizedTranscript",
|
|
@@ -89,18 +94,22 @@ __all__ = [
|
|
|
89
94
|
"LanguageIdentificationResponse",
|
|
90
95
|
"NumeralsFormat",
|
|
91
96
|
"ReasoningEffort",
|
|
97
|
+
"ResponseType",
|
|
92
98
|
"Role",
|
|
93
99
|
"SarvamModelIds",
|
|
94
100
|
"SpeechSampleRate",
|
|
95
101
|
"SpeechToTextLanguage",
|
|
96
102
|
"SpeechToTextModel",
|
|
97
103
|
"SpeechToTextResponse",
|
|
104
|
+
"SpeechToTextResponseData",
|
|
98
105
|
"SpeechToTextStreamingResponse",
|
|
99
|
-
"
|
|
100
|
-
"SpeechToTextStreamingResponseType",
|
|
106
|
+
"SpeechToTextTranscriptionData",
|
|
101
107
|
"SpeechToTextTranslateLanguage",
|
|
102
108
|
"SpeechToTextTranslateModel",
|
|
103
109
|
"SpeechToTextTranslateResponse",
|
|
110
|
+
"SpeechToTextTranslateResponseData",
|
|
111
|
+
"SpeechToTextTranslateStreamingResponse",
|
|
112
|
+
"SpeechToTextTranslateTranscriptionData",
|
|
104
113
|
"SpokenFormNumeralsFormat",
|
|
105
114
|
"StopConfiguration",
|
|
106
115
|
"TextToSpeechLanguage",
|
|
@@ -108,7 +117,6 @@ __all__ = [
|
|
|
108
117
|
"TextToSpeechResponse",
|
|
109
118
|
"TextToSpeechSpeaker",
|
|
110
119
|
"TimestampsModel",
|
|
111
|
-
"TranscriptionData",
|
|
112
120
|
"TranscriptionMetrics",
|
|
113
121
|
"TranslateMode",
|
|
114
122
|
"TranslateModel",
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import pydantic
|
|
6
|
+
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ConfigMessage(UniversalBaseModel):
|
|
10
|
+
type: typing.Literal["config"] = pydantic.Field(default="config")
|
|
11
|
+
"""
|
|
12
|
+
Message type identifier for configuration
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
prompt: typing.Optional[str] = pydantic.Field(default=None)
|
|
16
|
+
"""
|
|
17
|
+
Prompt for ASR model to improve transcription accuracy
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
if IS_PYDANTIC_V2:
|
|
21
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
|
22
|
+
else:
|
|
23
|
+
|
|
24
|
+
class Config:
|
|
25
|
+
frozen = True
|
|
26
|
+
smart_union = True
|
|
27
|
+
extra = pydantic.Extra.allow
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
from .error_data import ErrorData
|
|
6
|
+
from .events_data import EventsData
|
|
7
|
+
from .speech_to_text_transcription_data import SpeechToTextTranscriptionData
|
|
8
|
+
|
|
9
|
+
SpeechToTextResponseData = typing.Union[SpeechToTextTranscriptionData, ErrorData, EventsData]
|
|
@@ -4,13 +4,13 @@ import typing
|
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
6
|
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
|
-
from .
|
|
8
|
-
from .
|
|
7
|
+
from .response_type import ResponseType
|
|
8
|
+
from .speech_to_text_response_data import SpeechToTextResponseData
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class SpeechToTextStreamingResponse(UniversalBaseModel):
|
|
12
|
-
type:
|
|
13
|
-
data:
|
|
12
|
+
type: ResponseType
|
|
13
|
+
data: SpeechToTextResponseData
|
|
14
14
|
|
|
15
15
|
if IS_PYDANTIC_V2:
|
|
16
16
|
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
|
@@ -7,7 +7,7 @@ from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
|
7
7
|
from .transcription_metrics import TranscriptionMetrics
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
class
|
|
10
|
+
class SpeechToTextTranscriptionData(UniversalBaseModel):
|
|
11
11
|
request_id: str = pydantic.Field()
|
|
12
12
|
"""
|
|
13
13
|
Unique identifier for the request
|
|
@@ -15,7 +15,7 @@ class TranscriptionData(UniversalBaseModel):
|
|
|
15
15
|
|
|
16
16
|
transcript: str = pydantic.Field()
|
|
17
17
|
"""
|
|
18
|
-
Transcript of the provided speech
|
|
18
|
+
Transcript of the provided speech in original language
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
timestamps: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = pydantic.Field(default=None)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
from .error_data import ErrorData
|
|
6
|
+
from .events_data import EventsData
|
|
7
|
+
from .speech_to_text_translate_transcription_data import SpeechToTextTranslateTranscriptionData
|
|
8
|
+
|
|
9
|
+
SpeechToTextTranslateResponseData = typing.Union[SpeechToTextTranslateTranscriptionData, ErrorData, EventsData]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import pydantic
|
|
6
|
+
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
|
+
from .response_type import ResponseType
|
|
8
|
+
from .speech_to_text_translate_response_data import SpeechToTextTranslateResponseData
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SpeechToTextTranslateStreamingResponse(UniversalBaseModel):
|
|
12
|
+
type: ResponseType
|
|
13
|
+
data: SpeechToTextTranslateResponseData
|
|
14
|
+
|
|
15
|
+
if IS_PYDANTIC_V2:
|
|
16
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
|
17
|
+
else:
|
|
18
|
+
|
|
19
|
+
class Config:
|
|
20
|
+
frozen = True
|
|
21
|
+
smart_union = True
|
|
22
|
+
extra = pydantic.Extra.allow
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
import pydantic
|
|
6
|
+
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
|
+
from .transcription_metrics import TranscriptionMetrics
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SpeechToTextTranslateTranscriptionData(UniversalBaseModel):
|
|
11
|
+
request_id: str = pydantic.Field()
|
|
12
|
+
"""
|
|
13
|
+
Unique identifier for the request
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
transcript: str = pydantic.Field()
|
|
17
|
+
"""
|
|
18
|
+
English translation of the provided speech
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
language_code: typing.Optional[str] = pydantic.Field(default=None)
|
|
22
|
+
"""
|
|
23
|
+
BCP-47 code of detected source language (null when language detection is in progress)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
metrics: TranscriptionMetrics
|
|
27
|
+
|
|
28
|
+
if IS_PYDANTIC_V2:
|
|
29
|
+
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
|
30
|
+
else:
|
|
31
|
+
|
|
32
|
+
class Config:
|
|
33
|
+
frozen = True
|
|
34
|
+
smart_union = True
|
|
35
|
+
extra = pydantic.Extra.allow
|