sarvamai 0.1.11a0__py3-none-any.whl → 0.1.11a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +4 -4
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/requests/configure_connection_data.py +2 -3
- sarvamai/speech_to_text_job/client.py +89 -14
- sarvamai/speech_to_text_job/job.py +468 -0
- sarvamai/text_to_speech/client.py +7 -7
- sarvamai/text_to_speech/raw_client.py +7 -7
- sarvamai/types/__init__.py +4 -4
- sarvamai/types/configure_connection_data.py +2 -1
- sarvamai/types/configure_connection_data_output_audio_codec.py +7 -0
- sarvamai/types/text_to_speech_output_audio_codec.py +7 -0
- {sarvamai-0.1.11a0.dist-info → sarvamai-0.1.11a2.dist-info}/METADATA +1 -1
- {sarvamai-0.1.11a0.dist-info → sarvamai-0.1.11a2.dist-info}/RECORD +14 -13
- sarvamai/types/audio_codec.py +0 -5
- sarvamai/types/format.py +0 -5
- {sarvamai-0.1.11a0.dist-info → sarvamai-0.1.11a2.dist-info}/WHEEL +0 -0
sarvamai/__init__.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
5
|
from .types import (
|
|
6
|
-
AudioCodec,
|
|
7
6
|
AudioData,
|
|
8
7
|
AudioMessage,
|
|
9
8
|
AudioOutput,
|
|
@@ -25,6 +24,7 @@ from .types import (
|
|
|
25
24
|
ConfigureConnection,
|
|
26
25
|
ConfigureConnectionData,
|
|
27
26
|
ConfigureConnectionDataOutputAudioBitrate,
|
|
27
|
+
ConfigureConnectionDataOutputAudioCodec,
|
|
28
28
|
ConfigureConnectionDataSpeaker,
|
|
29
29
|
ConfigureConnectionDataTargetLanguageCode,
|
|
30
30
|
CreateChatCompletionResponse,
|
|
@@ -43,7 +43,6 @@ from .types import (
|
|
|
43
43
|
FilesUploadResponse,
|
|
44
44
|
FinishReason,
|
|
45
45
|
FlushSignal,
|
|
46
|
-
Format,
|
|
47
46
|
JobState,
|
|
48
47
|
JobStatusV1,
|
|
49
48
|
LanguageIdentificationResponse,
|
|
@@ -77,6 +76,7 @@ from .types import (
|
|
|
77
76
|
TaskState,
|
|
78
77
|
TextToSpeechLanguage,
|
|
79
78
|
TextToSpeechModel,
|
|
79
|
+
TextToSpeechOutputAudioCodec,
|
|
80
80
|
TextToSpeechResponse,
|
|
81
81
|
TextToSpeechSpeaker,
|
|
82
82
|
TimestampsModel,
|
|
@@ -185,7 +185,6 @@ from .version import __version__
|
|
|
185
185
|
|
|
186
186
|
__all__ = [
|
|
187
187
|
"AsyncSarvamAI",
|
|
188
|
-
"AudioCodec",
|
|
189
188
|
"AudioData",
|
|
190
189
|
"AudioDataParams",
|
|
191
190
|
"AudioMessage",
|
|
@@ -226,6 +225,7 @@ __all__ = [
|
|
|
226
225
|
"ConfigureConnection",
|
|
227
226
|
"ConfigureConnectionData",
|
|
228
227
|
"ConfigureConnectionDataOutputAudioBitrate",
|
|
228
|
+
"ConfigureConnectionDataOutputAudioCodec",
|
|
229
229
|
"ConfigureConnectionDataParams",
|
|
230
230
|
"ConfigureConnectionDataSpeaker",
|
|
231
231
|
"ConfigureConnectionDataTargetLanguageCode",
|
|
@@ -261,7 +261,6 @@ __all__ = [
|
|
|
261
261
|
"FlushSignal",
|
|
262
262
|
"FlushSignalParams",
|
|
263
263
|
"ForbiddenError",
|
|
264
|
-
"Format",
|
|
265
264
|
"InternalServerError",
|
|
266
265
|
"JobState",
|
|
267
266
|
"JobStatusV1",
|
|
@@ -323,6 +322,7 @@ __all__ = [
|
|
|
323
322
|
"TaskState",
|
|
324
323
|
"TextToSpeechLanguage",
|
|
325
324
|
"TextToSpeechModel",
|
|
325
|
+
"TextToSpeechOutputAudioCodec",
|
|
326
326
|
"TextToSpeechResponse",
|
|
327
327
|
"TextToSpeechResponseParams",
|
|
328
328
|
"TextToSpeechSpeaker",
|
sarvamai/core/client_wrapper.py
CHANGED
|
@@ -23,10 +23,10 @@ class BaseClientWrapper:
|
|
|
23
23
|
|
|
24
24
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
25
25
|
headers: typing.Dict[str, str] = {
|
|
26
|
-
"User-Agent": "sarvamai/0.1.
|
|
26
|
+
"User-Agent": "sarvamai/0.1.11a2",
|
|
27
27
|
"X-Fern-Language": "Python",
|
|
28
28
|
"X-Fern-SDK-Name": "sarvamai",
|
|
29
|
-
"X-Fern-SDK-Version": "0.1.
|
|
29
|
+
"X-Fern-SDK-Version": "0.1.11a2",
|
|
30
30
|
**(self.get_custom_headers() or {}),
|
|
31
31
|
}
|
|
32
32
|
headers["api-subscription-key"] = self.api_subscription_key
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
# This file was auto-generated by Fern from our API Definition.
|
|
2
2
|
|
|
3
|
-
import typing
|
|
4
|
-
|
|
5
3
|
import typing_extensions
|
|
6
4
|
from ..types.configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
|
|
5
|
+
from ..types.configure_connection_data_output_audio_codec import ConfigureConnectionDataOutputAudioCodec
|
|
7
6
|
from ..types.configure_connection_data_speaker import ConfigureConnectionDataSpeaker
|
|
8
7
|
from ..types.configure_connection_data_target_language_code import ConfigureConnectionDataTargetLanguageCode
|
|
9
8
|
|
|
@@ -62,7 +61,7 @@ class ConfigureConnectionDataParams(typing_extensions.TypedDict):
|
|
|
62
61
|
of mixed-language text. Default is false.
|
|
63
62
|
"""
|
|
64
63
|
|
|
65
|
-
output_audio_codec: typing_extensions.NotRequired[
|
|
64
|
+
output_audio_codec: typing_extensions.NotRequired[ConfigureConnectionDataOutputAudioCodec]
|
|
66
65
|
"""
|
|
67
66
|
Audio codec (currently supports MP3 only, optimized for real-time playback)
|
|
68
67
|
"""
|
|
@@ -11,6 +11,7 @@ from ..types.files_download_response import FilesDownloadResponse
|
|
|
11
11
|
from ..types.files_upload_response import FilesUploadResponse
|
|
12
12
|
from ..types.job_status_v_1 import JobStatusV1
|
|
13
13
|
from .raw_client import AsyncRawSpeechToTextJobClient, RawSpeechToTextJobClient
|
|
14
|
+
from .job import AsyncSpeechToTextJob, SpeechToTextJob
|
|
14
15
|
|
|
15
16
|
# this is used as the default value for optional parameters
|
|
16
17
|
OMIT = typing.cast(typing.Any, ...)
|
|
@@ -69,11 +70,15 @@ class SpeechToTextJobClient:
|
|
|
69
70
|
)
|
|
70
71
|
"""
|
|
71
72
|
_response = self._raw_client.initialise(
|
|
72
|
-
job_parameters=job_parameters,
|
|
73
|
+
job_parameters=job_parameters,
|
|
74
|
+
callback=callback,
|
|
75
|
+
request_options=request_options,
|
|
73
76
|
)
|
|
74
77
|
return _response.data
|
|
75
78
|
|
|
76
|
-
def get_status(
|
|
79
|
+
def get_status(
|
|
80
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
81
|
+
) -> JobStatusV1:
|
|
77
82
|
"""
|
|
78
83
|
Get the status of a speech to text bulk job V1
|
|
79
84
|
|
|
@@ -140,11 +145,17 @@ class SpeechToTextJobClient:
|
|
|
140
145
|
job_id="job_id",
|
|
141
146
|
)
|
|
142
147
|
"""
|
|
143
|
-
_response = self._raw_client.start(
|
|
148
|
+
_response = self._raw_client.start(
|
|
149
|
+
job_id, ptu_id=ptu_id, request_options=request_options
|
|
150
|
+
)
|
|
144
151
|
return _response.data
|
|
145
152
|
|
|
146
153
|
def get_upload_links(
|
|
147
|
-
self,
|
|
154
|
+
self,
|
|
155
|
+
*,
|
|
156
|
+
job_id: str,
|
|
157
|
+
files: typing.Sequence[str],
|
|
158
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
148
159
|
) -> FilesUploadResponse:
|
|
149
160
|
"""
|
|
150
161
|
Start a speech to text bulk job V1
|
|
@@ -175,11 +186,17 @@ class SpeechToTextJobClient:
|
|
|
175
186
|
files=["files"],
|
|
176
187
|
)
|
|
177
188
|
"""
|
|
178
|
-
_response = self._raw_client.get_upload_links(
|
|
189
|
+
_response = self._raw_client.get_upload_links(
|
|
190
|
+
job_id=job_id, files=files, request_options=request_options
|
|
191
|
+
)
|
|
179
192
|
return _response.data
|
|
180
193
|
|
|
181
194
|
def get_download_links(
|
|
182
|
-
self,
|
|
195
|
+
self,
|
|
196
|
+
*,
|
|
197
|
+
job_id: str,
|
|
198
|
+
files: typing.Sequence[str],
|
|
199
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
183
200
|
) -> FilesDownloadResponse:
|
|
184
201
|
"""
|
|
185
202
|
Start a speech to text bulk job V1
|
|
@@ -210,9 +227,30 @@ class SpeechToTextJobClient:
|
|
|
210
227
|
files=["files"],
|
|
211
228
|
)
|
|
212
229
|
"""
|
|
213
|
-
_response = self._raw_client.get_download_links(
|
|
230
|
+
_response = self._raw_client.get_download_links(
|
|
231
|
+
job_id=job_id, files=files, request_options=request_options
|
|
232
|
+
)
|
|
214
233
|
return _response.data
|
|
215
234
|
|
|
235
|
+
def create_job(
|
|
236
|
+
self,
|
|
237
|
+
job_parameters: SpeechToTextJobParametersParams,
|
|
238
|
+
callback: typing.Optional[BulkJobCallbackParams] = OMIT,
|
|
239
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
240
|
+
) -> SpeechToTextJob:
|
|
241
|
+
response = self.initialise(
|
|
242
|
+
job_parameters=job_parameters,
|
|
243
|
+
callback=callback,
|
|
244
|
+
request_options=request_options,
|
|
245
|
+
)
|
|
246
|
+
return SpeechToTextJob(job_id=response.job_id, client=self)
|
|
247
|
+
|
|
248
|
+
def get_job(self, job_id: str) -> SpeechToTextJob:
|
|
249
|
+
"""
|
|
250
|
+
Return a job handle for an existing Speech-to-Text job.
|
|
251
|
+
"""
|
|
252
|
+
return SpeechToTextJob(job_id=job_id, client=self)
|
|
253
|
+
|
|
216
254
|
|
|
217
255
|
class AsyncSpeechToTextJobClient:
|
|
218
256
|
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
@@ -275,11 +313,15 @@ class AsyncSpeechToTextJobClient:
|
|
|
275
313
|
asyncio.run(main())
|
|
276
314
|
"""
|
|
277
315
|
_response = await self._raw_client.initialise(
|
|
278
|
-
job_parameters=job_parameters,
|
|
316
|
+
job_parameters=job_parameters,
|
|
317
|
+
callback=callback,
|
|
318
|
+
request_options=request_options,
|
|
279
319
|
)
|
|
280
320
|
return _response.data
|
|
281
321
|
|
|
282
|
-
async def get_status(
|
|
322
|
+
async def get_status(
|
|
323
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
324
|
+
) -> JobStatusV1:
|
|
283
325
|
"""
|
|
284
326
|
Get the status of a speech to text bulk job V1
|
|
285
327
|
|
|
@@ -315,7 +357,9 @@ class AsyncSpeechToTextJobClient:
|
|
|
315
357
|
|
|
316
358
|
asyncio.run(main())
|
|
317
359
|
"""
|
|
318
|
-
_response = await self._raw_client.get_status(
|
|
360
|
+
_response = await self._raw_client.get_status(
|
|
361
|
+
job_id, request_options=request_options
|
|
362
|
+
)
|
|
319
363
|
return _response.data
|
|
320
364
|
|
|
321
365
|
async def start(
|
|
@@ -362,11 +406,17 @@ class AsyncSpeechToTextJobClient:
|
|
|
362
406
|
|
|
363
407
|
asyncio.run(main())
|
|
364
408
|
"""
|
|
365
|
-
_response = await self._raw_client.start(
|
|
409
|
+
_response = await self._raw_client.start(
|
|
410
|
+
job_id, ptu_id=ptu_id, request_options=request_options
|
|
411
|
+
)
|
|
366
412
|
return _response.data
|
|
367
413
|
|
|
368
414
|
async def get_upload_links(
|
|
369
|
-
self,
|
|
415
|
+
self,
|
|
416
|
+
*,
|
|
417
|
+
job_id: str,
|
|
418
|
+
files: typing.Sequence[str],
|
|
419
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
370
420
|
) -> FilesUploadResponse:
|
|
371
421
|
"""
|
|
372
422
|
Start a speech to text bulk job V1
|
|
@@ -405,11 +455,17 @@ class AsyncSpeechToTextJobClient:
|
|
|
405
455
|
|
|
406
456
|
asyncio.run(main())
|
|
407
457
|
"""
|
|
408
|
-
_response = await self._raw_client.get_upload_links(
|
|
458
|
+
_response = await self._raw_client.get_upload_links(
|
|
459
|
+
job_id=job_id, files=files, request_options=request_options
|
|
460
|
+
)
|
|
409
461
|
return _response.data
|
|
410
462
|
|
|
411
463
|
async def get_download_links(
|
|
412
|
-
self,
|
|
464
|
+
self,
|
|
465
|
+
*,
|
|
466
|
+
job_id: str,
|
|
467
|
+
files: typing.Sequence[str],
|
|
468
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
413
469
|
) -> FilesDownloadResponse:
|
|
414
470
|
"""
|
|
415
471
|
Start a speech to text bulk job V1
|
|
@@ -452,3 +508,22 @@ class AsyncSpeechToTextJobClient:
|
|
|
452
508
|
job_id=job_id, files=files, request_options=request_options
|
|
453
509
|
)
|
|
454
510
|
return _response.data
|
|
511
|
+
|
|
512
|
+
async def create_job(
|
|
513
|
+
self,
|
|
514
|
+
job_parameters: SpeechToTextJobParametersParams,
|
|
515
|
+
callback: typing.Optional[BulkJobCallbackParams] = OMIT,
|
|
516
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
517
|
+
) -> "AsyncSpeechToTextJob":
|
|
518
|
+
response = await self.initialise(
|
|
519
|
+
job_parameters=job_parameters,
|
|
520
|
+
callback=callback,
|
|
521
|
+
request_options=request_options,
|
|
522
|
+
)
|
|
523
|
+
return AsyncSpeechToTextJob(job_id=response.job_id, client=self)
|
|
524
|
+
|
|
525
|
+
async def get_job(self, job_id: str) -> "AsyncSpeechToTextJob":
|
|
526
|
+
"""
|
|
527
|
+
Return a job handle for an existing Speech-to-Text job.
|
|
528
|
+
"""
|
|
529
|
+
return AsyncSpeechToTextJob(job_id=job_id, client=self)
|
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
import typing
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from ..types import JobStatusV1
|
|
8
|
+
|
|
9
|
+
if typing.TYPE_CHECKING:
|
|
10
|
+
from .client import AsyncSpeechToTextJobClient, SpeechToTextJobClient
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AsyncSpeechToTextJob:
|
|
14
|
+
def __init__(self, job_id: str, client: "AsyncSpeechToTextJobClient"):
|
|
15
|
+
"""
|
|
16
|
+
Initialize the asynchronous speech-to-text job.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
job_id : str
|
|
21
|
+
The unique job identifier returned from a previous job initialization.
|
|
22
|
+
|
|
23
|
+
client : AsyncSpeechToTextJobClient
|
|
24
|
+
The async client instance used to create the job.
|
|
25
|
+
|
|
26
|
+
!!! important
|
|
27
|
+
This must be the **same client instance** that was used to initialize
|
|
28
|
+
the job originally, as it contains the subscription key and configuration
|
|
29
|
+
required to authenticate and manage the job.
|
|
30
|
+
|
|
31
|
+
"""
|
|
32
|
+
self._job_id = job_id
|
|
33
|
+
self._client = client
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def job_id(self) -> str:
|
|
37
|
+
"""
|
|
38
|
+
Returns the job ID associated with this job instance.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
str
|
|
43
|
+
"""
|
|
44
|
+
return self._job_id
|
|
45
|
+
|
|
46
|
+
async def upload_files(self, file_paths: typing.Sequence[str]) -> bool:
|
|
47
|
+
"""
|
|
48
|
+
Upload input audio files for the speech-to-text job.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
file_paths : Sequence[str]
|
|
53
|
+
List of full paths to local audio files.
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
bool
|
|
58
|
+
True if all files are uploaded successfully.
|
|
59
|
+
"""
|
|
60
|
+
upload_links = await self._client.get_upload_links(
|
|
61
|
+
job_id=self._job_id,
|
|
62
|
+
files=[os.path.basename(p) for p in file_paths],
|
|
63
|
+
)
|
|
64
|
+
async with httpx.AsyncClient() as session:
|
|
65
|
+
for path in file_paths:
|
|
66
|
+
file_name = os.path.basename(path)
|
|
67
|
+
url = upload_links.upload_urls[file_name].file_url
|
|
68
|
+
with open(path, "rb") as f:
|
|
69
|
+
response = await session.put(
|
|
70
|
+
url,
|
|
71
|
+
content=f.read(),
|
|
72
|
+
headers={
|
|
73
|
+
"x-ms-blob-type": "BlockBlob",
|
|
74
|
+
"Content-Type": "audio/wav",
|
|
75
|
+
},
|
|
76
|
+
)
|
|
77
|
+
if response.status_code != 201:
|
|
78
|
+
raise RuntimeError(
|
|
79
|
+
f"Upload failed for {file_name}: {response.status_code}"
|
|
80
|
+
)
|
|
81
|
+
return True
|
|
82
|
+
|
|
83
|
+
async def wait_until_complete(
|
|
84
|
+
self, poll_interval: int = 5, timeout: int = 600
|
|
85
|
+
) -> JobStatusV1:
|
|
86
|
+
"""
|
|
87
|
+
Polls job status until it completes or fails.
|
|
88
|
+
|
|
89
|
+
Parameters
|
|
90
|
+
----------
|
|
91
|
+
poll_interval : int, optional
|
|
92
|
+
Time in seconds between polling attempts (default is 5).
|
|
93
|
+
|
|
94
|
+
timeout : int, optional
|
|
95
|
+
Maximum time to wait for completion in seconds (default is 600).
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
JobStatusV1
|
|
100
|
+
Final job status.
|
|
101
|
+
|
|
102
|
+
Raises
|
|
103
|
+
------
|
|
104
|
+
TimeoutError
|
|
105
|
+
If the job does not complete within the given timeout.
|
|
106
|
+
"""
|
|
107
|
+
start = asyncio.get_event_loop().time()
|
|
108
|
+
while True:
|
|
109
|
+
status = await self.get_status()
|
|
110
|
+
state = status.job_state.lower()
|
|
111
|
+
if state in {"completed", "failed"}:
|
|
112
|
+
return status
|
|
113
|
+
if asyncio.get_event_loop().time() - start > timeout:
|
|
114
|
+
raise TimeoutError(
|
|
115
|
+
f"Job {self._job_id} did not complete within {timeout} seconds."
|
|
116
|
+
)
|
|
117
|
+
await asyncio.sleep(poll_interval)
|
|
118
|
+
|
|
119
|
+
async def get_output_mappings(self) -> typing.List[typing.Dict[str, str]]:
|
|
120
|
+
"""
|
|
121
|
+
Get the mapping of input files to their corresponding output files.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
List[Dict[str, str]]
|
|
126
|
+
List of mappings with keys 'input_file' and 'output_file'.
|
|
127
|
+
"""
|
|
128
|
+
job_status = await self.get_status()
|
|
129
|
+
return [
|
|
130
|
+
{
|
|
131
|
+
"input_file": detail.inputs[0].file_name,
|
|
132
|
+
"output_file": detail.outputs[0].file_name,
|
|
133
|
+
}
|
|
134
|
+
for detail in (job_status.job_details or [])
|
|
135
|
+
if detail.inputs and detail.outputs
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
async def download_outputs(self, output_dir: str) -> bool:
|
|
139
|
+
"""
|
|
140
|
+
Download output files to the specified directory.
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
output_dir : str
|
|
145
|
+
Local directory where outputs will be saved.
|
|
146
|
+
|
|
147
|
+
Returns
|
|
148
|
+
-------
|
|
149
|
+
bool
|
|
150
|
+
True if all files downloaded successfully.
|
|
151
|
+
|
|
152
|
+
Raises
|
|
153
|
+
------
|
|
154
|
+
RuntimeError
|
|
155
|
+
If a file fails to download.
|
|
156
|
+
"""
|
|
157
|
+
mappings = await self.get_output_mappings()
|
|
158
|
+
file_names = [m["output_file"] for m in mappings]
|
|
159
|
+
download_links = await self._client.get_download_links(
|
|
160
|
+
job_id=self._job_id, files=file_names
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
164
|
+
async with httpx.AsyncClient() as session:
|
|
165
|
+
for m in mappings:
|
|
166
|
+
url = download_links.download_urls[m["output_file"]].file_url
|
|
167
|
+
response = await session.get(url)
|
|
168
|
+
if response.status_code != 200:
|
|
169
|
+
raise RuntimeError(
|
|
170
|
+
f"Download failed for {m['output_file']}: {response.status_code}"
|
|
171
|
+
)
|
|
172
|
+
output_path = os.path.join(output_dir, f"{m['input_file']}.json")
|
|
173
|
+
with open(output_path, "wb") as f:
|
|
174
|
+
f.write(response.content)
|
|
175
|
+
return True
|
|
176
|
+
|
|
177
|
+
async def get_status(self) -> JobStatusV1:
|
|
178
|
+
"""
|
|
179
|
+
Retrieve the current status of the job.
|
|
180
|
+
|
|
181
|
+
Returns
|
|
182
|
+
-------
|
|
183
|
+
JobStatusV1
|
|
184
|
+
"""
|
|
185
|
+
return await self._client.get_status(self._job_id)
|
|
186
|
+
|
|
187
|
+
async def start(self) -> JobStatusV1:
|
|
188
|
+
"""
|
|
189
|
+
Start the speech-to-text job processing.
|
|
190
|
+
|
|
191
|
+
Returns
|
|
192
|
+
-------
|
|
193
|
+
JobStatusV1
|
|
194
|
+
"""
|
|
195
|
+
return await self._client.start(job_id=self._job_id)
|
|
196
|
+
|
|
197
|
+
async def exists(self) -> bool:
|
|
198
|
+
"""
|
|
199
|
+
Check if the job exists in the system.
|
|
200
|
+
|
|
201
|
+
Returns
|
|
202
|
+
-------
|
|
203
|
+
bool
|
|
204
|
+
"""
|
|
205
|
+
try:
|
|
206
|
+
await self.get_status()
|
|
207
|
+
return True
|
|
208
|
+
except httpx.HTTPStatusError:
|
|
209
|
+
return False
|
|
210
|
+
|
|
211
|
+
async def is_complete(self) -> bool:
|
|
212
|
+
"""
|
|
213
|
+
Check if the job is either completed or failed.
|
|
214
|
+
|
|
215
|
+
Returns
|
|
216
|
+
-------
|
|
217
|
+
bool
|
|
218
|
+
"""
|
|
219
|
+
state = (await self.get_status()).job_state.lower()
|
|
220
|
+
return state in {"completed", "failed"}
|
|
221
|
+
|
|
222
|
+
async def is_successful(self) -> bool:
|
|
223
|
+
"""
|
|
224
|
+
Check if the job completed successfully.
|
|
225
|
+
|
|
226
|
+
Returns
|
|
227
|
+
-------
|
|
228
|
+
bool
|
|
229
|
+
"""
|
|
230
|
+
return (await self.get_status()).job_state.lower() == "completed"
|
|
231
|
+
|
|
232
|
+
async def is_failed(self) -> bool:
|
|
233
|
+
"""
|
|
234
|
+
Check if the job has failed.
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
bool
|
|
239
|
+
"""
|
|
240
|
+
return (await self.get_status()).job_state.lower() == "failed"
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class SpeechToTextJob:
|
|
244
|
+
def __init__(self, job_id: str, client: "SpeechToTextJobClient"):
|
|
245
|
+
"""
|
|
246
|
+
Initialize the synchronous speech-to-text job.
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
job_id : str
|
|
251
|
+
The unique job identifier returned from a previous job initialization.
|
|
252
|
+
|
|
253
|
+
client : SpeechToTextJobClient
|
|
254
|
+
The client instance used to create the job.
|
|
255
|
+
|
|
256
|
+
!!! important
|
|
257
|
+
This must be the **same client instance** that was used to initialize
|
|
258
|
+
the job originally, as it contains the subscription key and configuration
|
|
259
|
+
required to authenticate and manage the job.
|
|
260
|
+
|
|
261
|
+
"""
|
|
262
|
+
self._job_id = job_id
|
|
263
|
+
self._client = client
|
|
264
|
+
|
|
265
|
+
@property
|
|
266
|
+
def job_id(self) -> str:
|
|
267
|
+
"""
|
|
268
|
+
Returns the job ID associated with this job instance.
|
|
269
|
+
|
|
270
|
+
Returns
|
|
271
|
+
-------
|
|
272
|
+
str
|
|
273
|
+
"""
|
|
274
|
+
return self._job_id
|
|
275
|
+
|
|
276
|
+
def upload_files(self, file_paths: typing.Sequence[str]) -> bool:
|
|
277
|
+
"""
|
|
278
|
+
Upload input audio files for the speech-to-text job.
|
|
279
|
+
|
|
280
|
+
Parameters
|
|
281
|
+
----------
|
|
282
|
+
file_paths : Sequence[str]
|
|
283
|
+
List of full paths to local audio files.
|
|
284
|
+
|
|
285
|
+
Returns
|
|
286
|
+
-------
|
|
287
|
+
bool
|
|
288
|
+
True if all files are uploaded successfully.
|
|
289
|
+
"""
|
|
290
|
+
upload_links = self._client.get_upload_links(
|
|
291
|
+
job_id=self._job_id, files=[os.path.basename(p) for p in file_paths]
|
|
292
|
+
)
|
|
293
|
+
with httpx.Client() as client:
|
|
294
|
+
for path in file_paths:
|
|
295
|
+
file_name = os.path.basename(path)
|
|
296
|
+
url = upload_links.upload_urls[file_name].file_url
|
|
297
|
+
with open(path, "rb") as f:
|
|
298
|
+
response = client.put(
|
|
299
|
+
url,
|
|
300
|
+
content=f,
|
|
301
|
+
headers={
|
|
302
|
+
"x-ms-blob-type": "BlockBlob",
|
|
303
|
+
"Content-Type": "audio/wav",
|
|
304
|
+
},
|
|
305
|
+
)
|
|
306
|
+
if response.status_code != 201:
|
|
307
|
+
raise RuntimeError(
|
|
308
|
+
f"Upload failed for {file_name}: {response.status_code}"
|
|
309
|
+
)
|
|
310
|
+
return True
|
|
311
|
+
|
|
312
|
+
def wait_until_complete(
|
|
313
|
+
self, poll_interval: int = 5, timeout: int = 600
|
|
314
|
+
) -> JobStatusV1:
|
|
315
|
+
"""
|
|
316
|
+
Polls job status until it completes or fails.
|
|
317
|
+
|
|
318
|
+
Parameters
|
|
319
|
+
----------
|
|
320
|
+
poll_interval : int, optional
|
|
321
|
+
Time in seconds between polling attempts (default is 5).
|
|
322
|
+
|
|
323
|
+
timeout : int, optional
|
|
324
|
+
Maximum time to wait for completion in seconds (default is 600).
|
|
325
|
+
|
|
326
|
+
Returns
|
|
327
|
+
-------
|
|
328
|
+
JobStatusV1
|
|
329
|
+
Final job status.
|
|
330
|
+
|
|
331
|
+
Raises
|
|
332
|
+
------
|
|
333
|
+
TimeoutError
|
|
334
|
+
If the job does not complete within the given timeout.
|
|
335
|
+
"""
|
|
336
|
+
start = time.monotonic()
|
|
337
|
+
while True:
|
|
338
|
+
status = self.get_status()
|
|
339
|
+
state = status.job_state.lower()
|
|
340
|
+
if state in {"completed", "failed"}:
|
|
341
|
+
return status
|
|
342
|
+
if time.monotonic() - start > timeout:
|
|
343
|
+
raise TimeoutError(
|
|
344
|
+
f"Job {self._job_id} did not complete within {timeout} seconds."
|
|
345
|
+
)
|
|
346
|
+
time.sleep(poll_interval)
|
|
347
|
+
|
|
348
|
+
def get_output_mappings(self) -> typing.List[typing.Dict[str, str]]:
|
|
349
|
+
"""
|
|
350
|
+
Get the mapping of input files to their corresponding output files.
|
|
351
|
+
|
|
352
|
+
Returns
|
|
353
|
+
-------
|
|
354
|
+
List[Dict[str, str]]
|
|
355
|
+
List of mappings with keys 'input_file' and 'output_file'.
|
|
356
|
+
"""
|
|
357
|
+
job_status = self.get_status()
|
|
358
|
+
return [
|
|
359
|
+
{
|
|
360
|
+
"input_file": detail.inputs[0].file_name,
|
|
361
|
+
"output_file": detail.outputs[0].file_name,
|
|
362
|
+
}
|
|
363
|
+
for detail in (job_status.job_details or [])
|
|
364
|
+
if detail.inputs and detail.outputs
|
|
365
|
+
]
|
|
366
|
+
|
|
367
|
+
def download_outputs(self, output_dir: str) -> bool:
|
|
368
|
+
"""
|
|
369
|
+
Download output files to the specified directory.
|
|
370
|
+
|
|
371
|
+
Parameters
|
|
372
|
+
----------
|
|
373
|
+
output_dir : str
|
|
374
|
+
Local directory where outputs will be saved.
|
|
375
|
+
|
|
376
|
+
Returns
|
|
377
|
+
-------
|
|
378
|
+
bool
|
|
379
|
+
True if all files downloaded successfully.
|
|
380
|
+
|
|
381
|
+
Raises
|
|
382
|
+
------
|
|
383
|
+
RuntimeError
|
|
384
|
+
If a file fails to download.
|
|
385
|
+
"""
|
|
386
|
+
mappings = self.get_output_mappings()
|
|
387
|
+
file_names = [m["output_file"] for m in mappings]
|
|
388
|
+
download_links = self._client.get_download_links(
|
|
389
|
+
job_id=self._job_id, files=file_names
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
393
|
+
with httpx.Client() as client:
|
|
394
|
+
for m in mappings:
|
|
395
|
+
url = download_links.download_urls[m["output_file"]].file_url
|
|
396
|
+
response = client.get(url)
|
|
397
|
+
if response.status_code != 200:
|
|
398
|
+
raise RuntimeError(
|
|
399
|
+
f"Download failed for {m['output_file']}: {response.status_code}"
|
|
400
|
+
)
|
|
401
|
+
output_path = os.path.join(output_dir, f"{m['input_file']}.json")
|
|
402
|
+
with open(output_path, "wb") as f:
|
|
403
|
+
f.write(response.content)
|
|
404
|
+
return True
|
|
405
|
+
|
|
406
|
+
def get_status(self) -> JobStatusV1:
|
|
407
|
+
"""
|
|
408
|
+
Retrieve the current status of the job.
|
|
409
|
+
|
|
410
|
+
Returns
|
|
411
|
+
-------
|
|
412
|
+
JobStatusV1
|
|
413
|
+
"""
|
|
414
|
+
return self._client.get_status(self._job_id)
|
|
415
|
+
|
|
416
|
+
def start(self) -> JobStatusV1:
|
|
417
|
+
"""
|
|
418
|
+
Start the speech-to-text job processing.
|
|
419
|
+
|
|
420
|
+
Returns
|
|
421
|
+
-------
|
|
422
|
+
JobStatusV1
|
|
423
|
+
"""
|
|
424
|
+
return self._client.start(job_id=self._job_id)
|
|
425
|
+
|
|
426
|
+
def exists(self) -> bool:
|
|
427
|
+
"""
|
|
428
|
+
Check if the job exists in the system.
|
|
429
|
+
|
|
430
|
+
Returns
|
|
431
|
+
-------
|
|
432
|
+
bool
|
|
433
|
+
"""
|
|
434
|
+
try:
|
|
435
|
+
self.get_status()
|
|
436
|
+
return True
|
|
437
|
+
except httpx.HTTPStatusError:
|
|
438
|
+
return False
|
|
439
|
+
|
|
440
|
+
def is_complete(self) -> bool:
|
|
441
|
+
"""
|
|
442
|
+
Check if the job is either completed or failed.
|
|
443
|
+
|
|
444
|
+
Returns
|
|
445
|
+
-------
|
|
446
|
+
bool
|
|
447
|
+
"""
|
|
448
|
+
return self.get_status().job_state.lower() in {"completed", "failed"}
|
|
449
|
+
|
|
450
|
+
def is_successful(self) -> bool:
|
|
451
|
+
"""
|
|
452
|
+
Check if the job completed successfully.
|
|
453
|
+
|
|
454
|
+
Returns
|
|
455
|
+
-------
|
|
456
|
+
bool
|
|
457
|
+
"""
|
|
458
|
+
return self.get_status().job_state.lower() == "completed"
|
|
459
|
+
|
|
460
|
+
def is_failed(self) -> bool:
|
|
461
|
+
"""
|
|
462
|
+
Check if the job has failed.
|
|
463
|
+
|
|
464
|
+
Returns
|
|
465
|
+
-------
|
|
466
|
+
bool
|
|
467
|
+
"""
|
|
468
|
+
return self.get_status().job_state.lower() == "failed"
|
|
@@ -4,10 +4,10 @@ import typing
|
|
|
4
4
|
|
|
5
5
|
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
6
6
|
from ..core.request_options import RequestOptions
|
|
7
|
-
from ..types.audio_codec import AudioCodec
|
|
8
7
|
from ..types.speech_sample_rate import SpeechSampleRate
|
|
9
8
|
from ..types.text_to_speech_language import TextToSpeechLanguage
|
|
10
9
|
from ..types.text_to_speech_model import TextToSpeechModel
|
|
10
|
+
from ..types.text_to_speech_output_audio_codec import TextToSpeechOutputAudioCodec
|
|
11
11
|
from ..types.text_to_speech_response import TextToSpeechResponse
|
|
12
12
|
from ..types.text_to_speech_speaker import TextToSpeechSpeaker
|
|
13
13
|
from .raw_client import AsyncRawTextToSpeechClient, RawTextToSpeechClient
|
|
@@ -43,7 +43,7 @@ class TextToSpeechClient:
|
|
|
43
43
|
speech_sample_rate: typing.Optional[SpeechSampleRate] = OMIT,
|
|
44
44
|
enable_preprocessing: typing.Optional[bool] = OMIT,
|
|
45
45
|
model: typing.Optional[TextToSpeechModel] = OMIT,
|
|
46
|
-
|
|
46
|
+
output_audio_codec: typing.Optional[TextToSpeechOutputAudioCodec] = OMIT,
|
|
47
47
|
request_options: typing.Optional[RequestOptions] = None,
|
|
48
48
|
) -> TextToSpeechResponse:
|
|
49
49
|
"""
|
|
@@ -87,7 +87,7 @@ class TextToSpeechClient:
|
|
|
87
87
|
model : typing.Optional[TextToSpeechModel]
|
|
88
88
|
Specifies the model to use for text-to-speech conversion. Default is bulbul:v2.
|
|
89
89
|
|
|
90
|
-
|
|
90
|
+
output_audio_codec : typing.Optional[TextToSpeechOutputAudioCodec]
|
|
91
91
|
Specifies the audio codec for the output audio file. Different codecs offer various compression and quality characteristics.
|
|
92
92
|
|
|
93
93
|
request_options : typing.Optional[RequestOptions]
|
|
@@ -120,7 +120,7 @@ class TextToSpeechClient:
|
|
|
120
120
|
speech_sample_rate=speech_sample_rate,
|
|
121
121
|
enable_preprocessing=enable_preprocessing,
|
|
122
122
|
model=model,
|
|
123
|
-
|
|
123
|
+
output_audio_codec=output_audio_codec,
|
|
124
124
|
request_options=request_options,
|
|
125
125
|
)
|
|
126
126
|
return _response.data
|
|
@@ -153,7 +153,7 @@ class AsyncTextToSpeechClient:
|
|
|
153
153
|
speech_sample_rate: typing.Optional[SpeechSampleRate] = OMIT,
|
|
154
154
|
enable_preprocessing: typing.Optional[bool] = OMIT,
|
|
155
155
|
model: typing.Optional[TextToSpeechModel] = OMIT,
|
|
156
|
-
|
|
156
|
+
output_audio_codec: typing.Optional[TextToSpeechOutputAudioCodec] = OMIT,
|
|
157
157
|
request_options: typing.Optional[RequestOptions] = None,
|
|
158
158
|
) -> TextToSpeechResponse:
|
|
159
159
|
"""
|
|
@@ -197,7 +197,7 @@ class AsyncTextToSpeechClient:
|
|
|
197
197
|
model : typing.Optional[TextToSpeechModel]
|
|
198
198
|
Specifies the model to use for text-to-speech conversion. Default is bulbul:v2.
|
|
199
199
|
|
|
200
|
-
|
|
200
|
+
output_audio_codec : typing.Optional[TextToSpeechOutputAudioCodec]
|
|
201
201
|
Specifies the audio codec for the output audio file. Different codecs offer various compression and quality characteristics.
|
|
202
202
|
|
|
203
203
|
request_options : typing.Optional[RequestOptions]
|
|
@@ -238,7 +238,7 @@ class AsyncTextToSpeechClient:
|
|
|
238
238
|
speech_sample_rate=speech_sample_rate,
|
|
239
239
|
enable_preprocessing=enable_preprocessing,
|
|
240
240
|
model=model,
|
|
241
|
-
|
|
241
|
+
output_audio_codec=output_audio_codec,
|
|
242
242
|
request_options=request_options,
|
|
243
243
|
)
|
|
244
244
|
return _response.data
|
|
@@ -13,10 +13,10 @@ from ..errors.forbidden_error import ForbiddenError
|
|
|
13
13
|
from ..errors.internal_server_error import InternalServerError
|
|
14
14
|
from ..errors.too_many_requests_error import TooManyRequestsError
|
|
15
15
|
from ..errors.unprocessable_entity_error import UnprocessableEntityError
|
|
16
|
-
from ..types.audio_codec import AudioCodec
|
|
17
16
|
from ..types.speech_sample_rate import SpeechSampleRate
|
|
18
17
|
from ..types.text_to_speech_language import TextToSpeechLanguage
|
|
19
18
|
from ..types.text_to_speech_model import TextToSpeechModel
|
|
19
|
+
from ..types.text_to_speech_output_audio_codec import TextToSpeechOutputAudioCodec
|
|
20
20
|
from ..types.text_to_speech_response import TextToSpeechResponse
|
|
21
21
|
from ..types.text_to_speech_speaker import TextToSpeechSpeaker
|
|
22
22
|
|
|
@@ -40,7 +40,7 @@ class RawTextToSpeechClient:
|
|
|
40
40
|
speech_sample_rate: typing.Optional[SpeechSampleRate] = OMIT,
|
|
41
41
|
enable_preprocessing: typing.Optional[bool] = OMIT,
|
|
42
42
|
model: typing.Optional[TextToSpeechModel] = OMIT,
|
|
43
|
-
|
|
43
|
+
output_audio_codec: typing.Optional[TextToSpeechOutputAudioCodec] = OMIT,
|
|
44
44
|
request_options: typing.Optional[RequestOptions] = None,
|
|
45
45
|
) -> HttpResponse[TextToSpeechResponse]:
|
|
46
46
|
"""
|
|
@@ -84,7 +84,7 @@ class RawTextToSpeechClient:
|
|
|
84
84
|
model : typing.Optional[TextToSpeechModel]
|
|
85
85
|
Specifies the model to use for text-to-speech conversion. Default is bulbul:v2.
|
|
86
86
|
|
|
87
|
-
|
|
87
|
+
output_audio_codec : typing.Optional[TextToSpeechOutputAudioCodec]
|
|
88
88
|
Specifies the audio codec for the output audio file. Different codecs offer various compression and quality characteristics.
|
|
89
89
|
|
|
90
90
|
request_options : typing.Optional[RequestOptions]
|
|
@@ -109,7 +109,7 @@ class RawTextToSpeechClient:
|
|
|
109
109
|
"speech_sample_rate": speech_sample_rate,
|
|
110
110
|
"enable_preprocessing": enable_preprocessing,
|
|
111
111
|
"model": model,
|
|
112
|
-
"
|
|
112
|
+
"output_audio_codec": output_audio_codec,
|
|
113
113
|
},
|
|
114
114
|
headers={
|
|
115
115
|
"content-type": "application/json",
|
|
@@ -204,7 +204,7 @@ class AsyncRawTextToSpeechClient:
|
|
|
204
204
|
speech_sample_rate: typing.Optional[SpeechSampleRate] = OMIT,
|
|
205
205
|
enable_preprocessing: typing.Optional[bool] = OMIT,
|
|
206
206
|
model: typing.Optional[TextToSpeechModel] = OMIT,
|
|
207
|
-
|
|
207
|
+
output_audio_codec: typing.Optional[TextToSpeechOutputAudioCodec] = OMIT,
|
|
208
208
|
request_options: typing.Optional[RequestOptions] = None,
|
|
209
209
|
) -> AsyncHttpResponse[TextToSpeechResponse]:
|
|
210
210
|
"""
|
|
@@ -248,7 +248,7 @@ class AsyncRawTextToSpeechClient:
|
|
|
248
248
|
model : typing.Optional[TextToSpeechModel]
|
|
249
249
|
Specifies the model to use for text-to-speech conversion. Default is bulbul:v2.
|
|
250
250
|
|
|
251
|
-
|
|
251
|
+
output_audio_codec : typing.Optional[TextToSpeechOutputAudioCodec]
|
|
252
252
|
Specifies the audio codec for the output audio file. Different codecs offer various compression and quality characteristics.
|
|
253
253
|
|
|
254
254
|
request_options : typing.Optional[RequestOptions]
|
|
@@ -273,7 +273,7 @@ class AsyncRawTextToSpeechClient:
|
|
|
273
273
|
"speech_sample_rate": speech_sample_rate,
|
|
274
274
|
"enable_preprocessing": enable_preprocessing,
|
|
275
275
|
"model": model,
|
|
276
|
-
"
|
|
276
|
+
"output_audio_codec": output_audio_codec,
|
|
277
277
|
},
|
|
278
278
|
headers={
|
|
279
279
|
"content-type": "application/json",
|
sarvamai/types/__init__.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
# isort: skip_file
|
|
4
4
|
|
|
5
|
-
from .audio_codec import AudioCodec
|
|
6
5
|
from .audio_data import AudioData
|
|
7
6
|
from .audio_message import AudioMessage
|
|
8
7
|
from .audio_output import AudioOutput
|
|
@@ -26,6 +25,7 @@ from .config_message import ConfigMessage
|
|
|
26
25
|
from .configure_connection import ConfigureConnection
|
|
27
26
|
from .configure_connection_data import ConfigureConnectionData
|
|
28
27
|
from .configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
|
|
28
|
+
from .configure_connection_data_output_audio_codec import ConfigureConnectionDataOutputAudioCodec
|
|
29
29
|
from .configure_connection_data_speaker import ConfigureConnectionDataSpeaker
|
|
30
30
|
from .configure_connection_data_target_language_code import ConfigureConnectionDataTargetLanguageCode
|
|
31
31
|
from .create_chat_completion_response import CreateChatCompletionResponse
|
|
@@ -44,7 +44,6 @@ from .files_request import FilesRequest
|
|
|
44
44
|
from .files_upload_response import FilesUploadResponse
|
|
45
45
|
from .finish_reason import FinishReason
|
|
46
46
|
from .flush_signal import FlushSignal
|
|
47
|
-
from .format import Format
|
|
48
47
|
from .job_state import JobState
|
|
49
48
|
from .job_status_v_1 import JobStatusV1
|
|
50
49
|
from .language_identification_response import LanguageIdentificationResponse
|
|
@@ -78,6 +77,7 @@ from .task_file_details import TaskFileDetails
|
|
|
78
77
|
from .task_state import TaskState
|
|
79
78
|
from .text_to_speech_language import TextToSpeechLanguage
|
|
80
79
|
from .text_to_speech_model import TextToSpeechModel
|
|
80
|
+
from .text_to_speech_output_audio_codec import TextToSpeechOutputAudioCodec
|
|
81
81
|
from .text_to_speech_response import TextToSpeechResponse
|
|
82
82
|
from .text_to_speech_speaker import TextToSpeechSpeaker
|
|
83
83
|
from .timestamps_model import TimestampsModel
|
|
@@ -94,7 +94,6 @@ from .transliterate_source_language import TransliterateSourceLanguage
|
|
|
94
94
|
from .transliteration_response import TransliterationResponse
|
|
95
95
|
|
|
96
96
|
__all__ = [
|
|
97
|
-
"AudioCodec",
|
|
98
97
|
"AudioData",
|
|
99
98
|
"AudioMessage",
|
|
100
99
|
"AudioOutput",
|
|
@@ -116,6 +115,7 @@ __all__ = [
|
|
|
116
115
|
"ConfigureConnection",
|
|
117
116
|
"ConfigureConnectionData",
|
|
118
117
|
"ConfigureConnectionDataOutputAudioBitrate",
|
|
118
|
+
"ConfigureConnectionDataOutputAudioCodec",
|
|
119
119
|
"ConfigureConnectionDataSpeaker",
|
|
120
120
|
"ConfigureConnectionDataTargetLanguageCode",
|
|
121
121
|
"CreateChatCompletionResponse",
|
|
@@ -134,7 +134,6 @@ __all__ = [
|
|
|
134
134
|
"FilesUploadResponse",
|
|
135
135
|
"FinishReason",
|
|
136
136
|
"FlushSignal",
|
|
137
|
-
"Format",
|
|
138
137
|
"JobState",
|
|
139
138
|
"JobStatusV1",
|
|
140
139
|
"LanguageIdentificationResponse",
|
|
@@ -168,6 +167,7 @@ __all__ = [
|
|
|
168
167
|
"TaskState",
|
|
169
168
|
"TextToSpeechLanguage",
|
|
170
169
|
"TextToSpeechModel",
|
|
170
|
+
"TextToSpeechOutputAudioCodec",
|
|
171
171
|
"TextToSpeechResponse",
|
|
172
172
|
"TextToSpeechSpeaker",
|
|
173
173
|
"TimestampsModel",
|
|
@@ -5,6 +5,7 @@ import typing
|
|
|
5
5
|
import pydantic
|
|
6
6
|
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|
7
7
|
from .configure_connection_data_output_audio_bitrate import ConfigureConnectionDataOutputAudioBitrate
|
|
8
|
+
from .configure_connection_data_output_audio_codec import ConfigureConnectionDataOutputAudioCodec
|
|
8
9
|
from .configure_connection_data_speaker import ConfigureConnectionDataSpeaker
|
|
9
10
|
from .configure_connection_data_target_language_code import ConfigureConnectionDataTargetLanguageCode
|
|
10
11
|
|
|
@@ -63,7 +64,7 @@ class ConfigureConnectionData(UniversalBaseModel):
|
|
|
63
64
|
of mixed-language text. Default is false.
|
|
64
65
|
"""
|
|
65
66
|
|
|
66
|
-
output_audio_codec: typing.Optional[
|
|
67
|
+
output_audio_codec: typing.Optional[ConfigureConnectionDataOutputAudioCodec] = pydantic.Field(default=None)
|
|
67
68
|
"""
|
|
68
69
|
Audio codec (currently supports MP3 only, optimized for real-time playback)
|
|
69
70
|
"""
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
sarvamai/__init__.py,sha256=
|
|
1
|
+
sarvamai/__init__.py,sha256=StGifG-VL1O3Xam5IR6bQa_8QfRA46pDQ0BgidTQx4U,10318
|
|
2
2
|
sarvamai/chat/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
3
3
|
sarvamai/chat/client.py,sha256=xOSj83Gr6Q7eY2qUeATiuXYQqBqWqSCQlIEopK5fKus,11022
|
|
4
4
|
sarvamai/chat/raw_client.py,sha256=A2kRuZcVWlJhyYCD7YKgqNkZEp3cYa1731KhRkhirU0,17885
|
|
5
5
|
sarvamai/client.py,sha256=5YC2fxVENOxQXoY-t3n8qZ0aQ9UasDjFRzBZw8ce9OQ,7861
|
|
6
6
|
sarvamai/core/__init__.py,sha256=YE2CtXeASe1RAbaI39twKWYKCuT4tW5is9HWHhJjR_g,1653
|
|
7
7
|
sarvamai/core/api_error.py,sha256=44vPoTyWN59gonCIZMdzw7M1uspygiLnr3GNFOoVL2Q,614
|
|
8
|
-
sarvamai/core/client_wrapper.py,sha256=
|
|
8
|
+
sarvamai/core/client_wrapper.py,sha256=iqRsEvtadYxKdwKY3YUlCec4a0SppCuIqnrLItNLO2c,2570
|
|
9
9
|
sarvamai/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
|
|
10
10
|
sarvamai/core/events.py,sha256=j7VWXgMpOsjCXdzY22wIhI7Q-v5InZ4WchRzA88x_Sk,856
|
|
11
11
|
sarvamai/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
|
|
@@ -45,7 +45,7 @@ sarvamai/requests/choice.py,sha256=uulX4MZUoThEMcD3a80o_3V5YpnpqN8DfPaNZWVz-1o,8
|
|
|
45
45
|
sarvamai/requests/completion_usage.py,sha256=LbZV-RxcxKdCAYqhCiaRtSFF3VwMJq71A989Z1rm-I8,428
|
|
46
46
|
sarvamai/requests/config_message.py,sha256=EpYioGvDhCXDMvGH7Q1F7448zJzoHmlkQ1owoNGbWAw,383
|
|
47
47
|
sarvamai/requests/configure_connection.py,sha256=a-foQtLxArL4CulvKEdeebbRqmS1GRmko3MZdnHVPEk,716
|
|
48
|
-
sarvamai/requests/configure_connection_data.py,sha256=
|
|
48
|
+
sarvamai/requests/configure_connection_data.py,sha256=lRk_4rYPQLLlwS2HXjQ9Abxdf98_DuOOja-VkrIR44Q,3016
|
|
49
49
|
sarvamai/requests/create_chat_completion_response.py,sha256=TqS9u5_WVWMok_NreT4TeOsLJQeybPkbJm45Q0Zxw30,857
|
|
50
50
|
sarvamai/requests/diarized_entry.py,sha256=gbXB4D_r5_Q8gs1arRKjxPeFcYg16dVDLcg2VhxmKQA,462
|
|
51
51
|
sarvamai/requests/diarized_transcript.py,sha256=X-znuJ45oqwXzVyJumBHSqVGLz6JnoYFZmluQlEpEAw,323
|
|
@@ -86,7 +86,8 @@ sarvamai/speech_to_text/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUz
|
|
|
86
86
|
sarvamai/speech_to_text/client.py,sha256=lp2G2fI9SUbeOBBE1S5tjcp-Xb8wIhAuVadLKwXveh8,11003
|
|
87
87
|
sarvamai/speech_to_text/raw_client.py,sha256=A_56vEVeJdyttVJRiFxTMJ4n-s4l_PS8rI1DiLZlOmc,25331
|
|
88
88
|
sarvamai/speech_to_text_job/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
89
|
-
sarvamai/speech_to_text_job/client.py,sha256=
|
|
89
|
+
sarvamai/speech_to_text_job/client.py,sha256=DJhCmYQ1EYbmrhLalsqEv_3rZYJWTsT-W2wQrU6IaXU,14275
|
|
90
|
+
sarvamai/speech_to_text_job/job.py,sha256=6h9hrpsKX21ql-umxEThtx8gU38cZj25WuBY9iBdnbI,13744
|
|
90
91
|
sarvamai/speech_to_text_job/raw_client.py,sha256=v14drcQLAmpqozRUNKmw1F9j3omieMPC8R88Th1BID8,48055
|
|
91
92
|
sarvamai/speech_to_text_streaming/__init__.py,sha256=q7QygMmZCHJ-4FMhhL_6XNV_dsqlIFRCO1iSxoyxaaY,437
|
|
92
93
|
sarvamai/speech_to_text_streaming/client.py,sha256=WdkzZxKMdnQ2hHv9hzJlfSNggRJLKFljRiC7695Jcog,8224
|
|
@@ -109,14 +110,13 @@ sarvamai/text/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
|
109
110
|
sarvamai/text/client.py,sha256=2kA0Gxfi-r52zMQdqRRD811014alzlHB_FANkp3Kn_c,30595
|
|
110
111
|
sarvamai/text/raw_client.py,sha256=7xYmJA50kTKy_gj8tkAPckKp2djHB37zOdm0_icbMb8,48695
|
|
111
112
|
sarvamai/text_to_speech/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
112
|
-
sarvamai/text_to_speech/client.py,sha256=
|
|
113
|
-
sarvamai/text_to_speech/raw_client.py,sha256=
|
|
113
|
+
sarvamai/text_to_speech/client.py,sha256=iwrQNfoMgCSOgvztTIXtLHQmSmn0RInwt5RSo9TwdtA,9617
|
|
114
|
+
sarvamai/text_to_speech/raw_client.py,sha256=si_aSjMR7SocIpKZFoVYqBmaIDuRm_6vxTM0dJ73PEo,15569
|
|
114
115
|
sarvamai/text_to_speech_streaming/__init__.py,sha256=_VhToAyIt_5axN6CLJwtxg3-CO7THa_23pbUzqhXJa4,85
|
|
115
116
|
sarvamai/text_to_speech_streaming/client.py,sha256=geTF5xy-batzO12XVt0sPw_XJCi7-m2sDFK_B7SL7qc,6088
|
|
116
117
|
sarvamai/text_to_speech_streaming/raw_client.py,sha256=asOcNw1WAViOiXDVWH4sxWSXGVoLwAOh9vUtq_xralA,5269
|
|
117
118
|
sarvamai/text_to_speech_streaming/socket_client.py,sha256=NEcijnvjuNcWfzqpBi-xWsXVkL0NPq6EGAkEjnaq9hw,13909
|
|
118
|
-
sarvamai/types/__init__.py,sha256=
|
|
119
|
-
sarvamai/types/audio_codec.py,sha256=9qNJc1SdIP4IKHYwclqqtU9810qTaJtsSGx22AqKhKY,200
|
|
119
|
+
sarvamai/types/__init__.py,sha256=gedZvTQHvAgl07VqBNMFY-su7Vnys_wBvCanZXD6Ews,7551
|
|
120
120
|
sarvamai/types/audio_data.py,sha256=rgOukLkLNJ_HBBVE2g5dfEL2CWjRoGiMvCtpq0qTB1Y,829
|
|
121
121
|
sarvamai/types/audio_message.py,sha256=sB4EgkWkWJzipYXobkmM9AYZTTZtCpg_ySKssUeznUE,560
|
|
122
122
|
sarvamai/types/audio_output.py,sha256=Eq-YUZa1mSDwt7bax2c4Vv2gBlyM_JBJWzHhTAhFSko,621
|
|
@@ -133,8 +133,9 @@ sarvamai/types/choice.py,sha256=uXBCsjWP9VK3XWQWZUeI4EnU10w0G9nAfKn2tJZvxko,1244
|
|
|
133
133
|
sarvamai/types/completion_usage.py,sha256=xYQGlQUbKqsksuV73H-1ajjfT5M7w47eLfdWXSlrI5M,843
|
|
134
134
|
sarvamai/types/config_message.py,sha256=sGrT-qYTRqLVfIo5nRUuRlqPtPVmiAkUAnaMtlmQYCU,778
|
|
135
135
|
sarvamai/types/configure_connection.py,sha256=SnSNk02gQqP8e4VB4y88jjeFQ4ClpImjGLn2ANI8cZ4,1058
|
|
136
|
-
sarvamai/types/configure_connection_data.py,sha256=
|
|
136
|
+
sarvamai/types/configure_connection_data.py,sha256=uXC7fhNJWCpaKc2Vrz2DNpUxx1gN3PwAoDL-H8L401A,3537
|
|
137
137
|
sarvamai/types/configure_connection_data_output_audio_bitrate.py,sha256=h00YvKLxsZC8L3__rH4XH53nN_GY40UElW1EjysCwUs,208
|
|
138
|
+
sarvamai/types/configure_connection_data_output_audio_codec.py,sha256=ddd-MjgmKE0e5-TPgPKclBu4h9WLC5g3kL8Ap_91i50,228
|
|
138
139
|
sarvamai/types/configure_connection_data_speaker.py,sha256=SzyAiK5LynXwb9KniaO2qoOLY-II3-PMZbRuIsQ9shw,230
|
|
139
140
|
sarvamai/types/configure_connection_data_target_language_code.py,sha256=jrU1EblAtDYbybUO1KUkHhevmlSBj2AQxX13ii3QhAQ,275
|
|
140
141
|
sarvamai/types/create_chat_completion_response.py,sha256=4nEzeWzHGW1_BmRAtOuGsbRZ0ojNgnzJSMUFyYuYviw,1285
|
|
@@ -153,7 +154,6 @@ sarvamai/types/files_request.py,sha256=Jh8xPjoOTjY7DOE2EieoRqtkWkYxz9j-BP8TvWxuR
|
|
|
153
154
|
sarvamai/types/files_upload_response.py,sha256=wRntZyh1-LGpo4-x_986Nv2A9rv9asDx93pqoQxNpTY,804
|
|
154
155
|
sarvamai/types/finish_reason.py,sha256=PBWtBNkX4FMaODmlUehpF6qLB5uH_zR-Mw3M4uhIB6U,209
|
|
155
156
|
sarvamai/types/flush_signal.py,sha256=N7MJWb658KoxRpFN9cIbyQGY45zZcg8YCou3E1v--9o,759
|
|
156
|
-
sarvamai/types/format.py,sha256=57LicD0XLqW4D1QEnZWsWGifzRy1GV9P5utKPXLoxtg,144
|
|
157
157
|
sarvamai/types/job_state.py,sha256=H6Zph2mIcjsd3upEDt1VzIEORkEpnIDs0kH8BvIyrow,189
|
|
158
158
|
sarvamai/types/job_status_v_1.py,sha256=i1xopAptPVbGGIUcjKWgjAzSKwLVy6y4oGVEYcOA1P0,1798
|
|
159
159
|
sarvamai/types/language_identification_response.py,sha256=jG4ZQ6KQHCiEDqC51OniOwiRdW14Fbz22bbTsUDp_kc,1483
|
|
@@ -187,6 +187,7 @@ sarvamai/types/task_file_details.py,sha256=oJV7zaUVrbqqw-If-2_V1aLk28qW0ZbeIDtIp
|
|
|
187
187
|
sarvamai/types/task_state.py,sha256=fSrmD00Goi0J6s9hzqcFqz3Fkh37diBYpxnz4FkwHdU,182
|
|
188
188
|
sarvamai/types/text_to_speech_language.py,sha256=T5-rP93WyJwkdJTmNZuvNkkoVtgU0G25a8R2F3OwRZ4,254
|
|
189
189
|
sarvamai/types/text_to_speech_model.py,sha256=qRkpGCcfrLD45l499cBUcBgZDo_qKPZtFxA7wPbp1NQ,128
|
|
190
|
+
sarvamai/types/text_to_speech_output_audio_codec.py,sha256=lW-w0RIQiLZGdVmUgg20yYEblvaZ9AvvqzE6o7xRhWQ,224
|
|
190
191
|
sarvamai/types/text_to_speech_response.py,sha256=Yzvwvwm65IR2vUzxZws9OLBW0GgB6bbmvjClqSuZzdg,742
|
|
191
192
|
sarvamai/types/text_to_speech_speaker.py,sha256=300mXxDKnWV9O7ccuO8EkLooAu2-y2ZdqDynrckaHew,219
|
|
192
193
|
sarvamai/types/timestamps_model.py,sha256=ZlqcxYNtAcm2c61NIwTcS2nGYMeM-T7hfhI0BMnnhI0,852
|
|
@@ -202,6 +203,6 @@ sarvamai/types/transliterate_mode.py,sha256=1jSEMlGcoLkWuk12TgoOpSgwifa4rThGKZ1h
|
|
|
202
203
|
sarvamai/types/transliterate_source_language.py,sha256=bSY9wJszF0sg-Cgg6F-YcWC8ly1mIlj9rqa15-jBtx8,283
|
|
203
204
|
sarvamai/types/transliteration_response.py,sha256=yt-lzTbDeJ_ZL4I8kQa6oESxA9ebeJJY7LfFHpdEsmM,815
|
|
204
205
|
sarvamai/version.py,sha256=Qkp3Ee9YH-O9RTix90e0i7iNrFAGN-QDt2AFwGA4n8k,75
|
|
205
|
-
sarvamai-0.1.
|
|
206
|
-
sarvamai-0.1.
|
|
207
|
-
sarvamai-0.1.
|
|
206
|
+
sarvamai-0.1.11a2.dist-info/METADATA,sha256=r4vET9Ft9Lgl9wbRLlh3gR7Hd1LdyaQJjfUKd59n0lY,26753
|
|
207
|
+
sarvamai-0.1.11a2.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
|
208
|
+
sarvamai-0.1.11a2.dist-info/RECORD,,
|
sarvamai/types/audio_codec.py
DELETED
sarvamai/types/format.py
DELETED
|
File without changes
|