sarvamai 0.1.12a1__py3-none-any.whl → 0.1.13a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +4 -4
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/requests/__init__.py +2 -2
- sarvamai/requests/bulk_job_init_response_v_1.py +0 -6
- sarvamai/requests/{job_status_v_1.py → job_status_v_1_response.py} +1 -11
- sarvamai/speech_to_text_job/client.py +21 -196
- sarvamai/speech_to_text_job/raw_client.py +17 -17
- sarvamai/speech_to_text_translate_job/client.py +18 -171
- sarvamai/speech_to_text_translate_job/raw_client.py +17 -17
- sarvamai/types/__init__.py +2 -2
- sarvamai/types/bulk_job_init_response_v_1.py +0 -6
- sarvamai/types/{job_status_v_1.py → job_status_v_1_response.py} +1 -11
- {sarvamai-0.1.12a1.dist-info → sarvamai-0.1.13a2.dist-info}/METADATA +1 -1
- {sarvamai-0.1.12a1.dist-info → sarvamai-0.1.13a2.dist-info}/RECORD +15 -17
- sarvamai/speech_to_text_job/job.py +0 -472
- sarvamai/speech_to_text_translate_job/job.py +0 -479
- {sarvamai-0.1.12a1.dist-info → sarvamai-0.1.13a2.dist-info}/WHEEL +0 -0
|
@@ -1,472 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import mimetypes
|
|
3
|
-
import os
|
|
4
|
-
import time
|
|
5
|
-
import typing
|
|
6
|
-
import httpx
|
|
7
|
-
|
|
8
|
-
from ..types import JobStatusV1
|
|
9
|
-
|
|
10
|
-
if typing.TYPE_CHECKING:
|
|
11
|
-
from .client import AsyncSpeechToTextJobClient, SpeechToTextJobClient
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class AsyncSpeechToTextJob:
|
|
15
|
-
def __init__(self, job_id: str, client: "AsyncSpeechToTextJobClient"):
|
|
16
|
-
"""
|
|
17
|
-
Initialize the asynchronous speech-to-text job.
|
|
18
|
-
|
|
19
|
-
Parameters
|
|
20
|
-
----------
|
|
21
|
-
job_id : str
|
|
22
|
-
The unique job identifier returned from a previous job initialization.
|
|
23
|
-
|
|
24
|
-
client : AsyncSpeechToTextJobClient
|
|
25
|
-
The async client instance used to create the job.
|
|
26
|
-
|
|
27
|
-
!!! important
|
|
28
|
-
This must be the **same client instance** that was used to initialize
|
|
29
|
-
the job originally, as it contains the subscription key and configuration
|
|
30
|
-
required to authenticate and manage the job.
|
|
31
|
-
|
|
32
|
-
"""
|
|
33
|
-
self._job_id = job_id
|
|
34
|
-
self._client = client
|
|
35
|
-
|
|
36
|
-
@property
|
|
37
|
-
def job_id(self) -> str:
|
|
38
|
-
"""
|
|
39
|
-
Returns the job ID associated with this job instance.
|
|
40
|
-
|
|
41
|
-
Returns
|
|
42
|
-
-------
|
|
43
|
-
str
|
|
44
|
-
"""
|
|
45
|
-
return self._job_id
|
|
46
|
-
|
|
47
|
-
async def upload_files(self, file_paths: typing.Sequence[str]) -> bool:
|
|
48
|
-
"""
|
|
49
|
-
Upload input audio files for the speech-to-text job.
|
|
50
|
-
|
|
51
|
-
Parameters
|
|
52
|
-
----------
|
|
53
|
-
file_paths : Sequence[str]
|
|
54
|
-
List of full paths to local audio files.
|
|
55
|
-
|
|
56
|
-
Returns
|
|
57
|
-
-------
|
|
58
|
-
bool
|
|
59
|
-
True if all files are uploaded successfully.
|
|
60
|
-
"""
|
|
61
|
-
upload_links = await self._client.get_upload_links(
|
|
62
|
-
job_id=self._job_id,
|
|
63
|
-
files=[os.path.basename(p) for p in file_paths],
|
|
64
|
-
)
|
|
65
|
-
async with httpx.AsyncClient() as session:
|
|
66
|
-
for path in file_paths:
|
|
67
|
-
file_name = os.path.basename(path)
|
|
68
|
-
url = upload_links.upload_urls[file_name].file_url
|
|
69
|
-
with open(path, "rb") as f:
|
|
70
|
-
content_type, _ = mimetypes.guess_type(path)
|
|
71
|
-
if content_type is None:
|
|
72
|
-
content_type = "audio/wav"
|
|
73
|
-
response = await session.put(
|
|
74
|
-
url,
|
|
75
|
-
content=f.read(),
|
|
76
|
-
headers={
|
|
77
|
-
"x-ms-blob-type": "BlockBlob",
|
|
78
|
-
"Content-Type": content_type,
|
|
79
|
-
},
|
|
80
|
-
)
|
|
81
|
-
if response.status_code != 201:
|
|
82
|
-
raise RuntimeError(
|
|
83
|
-
f"Upload failed for {file_name}: {response.status_code}"
|
|
84
|
-
)
|
|
85
|
-
return True
|
|
86
|
-
|
|
87
|
-
async def wait_until_complete(
|
|
88
|
-
self, poll_interval: int = 5, timeout: int = 600
|
|
89
|
-
) -> JobStatusV1:
|
|
90
|
-
"""
|
|
91
|
-
Polls job status until it completes or fails.
|
|
92
|
-
|
|
93
|
-
Parameters
|
|
94
|
-
----------
|
|
95
|
-
poll_interval : int, optional
|
|
96
|
-
Time in seconds between polling attempts (default is 5).
|
|
97
|
-
|
|
98
|
-
timeout : int, optional
|
|
99
|
-
Maximum time to wait for completion in seconds (default is 600).
|
|
100
|
-
|
|
101
|
-
Returns
|
|
102
|
-
-------
|
|
103
|
-
JobStatusV1
|
|
104
|
-
Final job status.
|
|
105
|
-
|
|
106
|
-
Raises
|
|
107
|
-
------
|
|
108
|
-
TimeoutError
|
|
109
|
-
If the job does not complete within the given timeout.
|
|
110
|
-
"""
|
|
111
|
-
start = asyncio.get_event_loop().time()
|
|
112
|
-
while True:
|
|
113
|
-
status = await self.get_status()
|
|
114
|
-
state = status.job_state.lower()
|
|
115
|
-
if state in {"completed", "failed"}:
|
|
116
|
-
return status
|
|
117
|
-
if asyncio.get_event_loop().time() - start > timeout:
|
|
118
|
-
raise TimeoutError(
|
|
119
|
-
f"Job {self._job_id} did not complete within {timeout} seconds."
|
|
120
|
-
)
|
|
121
|
-
await asyncio.sleep(poll_interval)
|
|
122
|
-
|
|
123
|
-
async def get_output_mappings(self) -> typing.List[typing.Dict[str, str]]:
|
|
124
|
-
"""
|
|
125
|
-
Get the mapping of input files to their corresponding output files.
|
|
126
|
-
|
|
127
|
-
Returns
|
|
128
|
-
-------
|
|
129
|
-
List[Dict[str, str]]
|
|
130
|
-
List of mappings with keys 'input_file' and 'output_file'.
|
|
131
|
-
"""
|
|
132
|
-
job_status = await self.get_status()
|
|
133
|
-
return [
|
|
134
|
-
{
|
|
135
|
-
"input_file": detail.inputs[0].file_name,
|
|
136
|
-
"output_file": detail.outputs[0].file_name,
|
|
137
|
-
}
|
|
138
|
-
for detail in (job_status.job_details or [])
|
|
139
|
-
if detail.inputs and detail.outputs
|
|
140
|
-
]
|
|
141
|
-
|
|
142
|
-
async def download_outputs(self, output_dir: str) -> bool:
|
|
143
|
-
"""
|
|
144
|
-
Download output files to the specified directory.
|
|
145
|
-
|
|
146
|
-
Parameters
|
|
147
|
-
----------
|
|
148
|
-
output_dir : str
|
|
149
|
-
Local directory where outputs will be saved.
|
|
150
|
-
|
|
151
|
-
Returns
|
|
152
|
-
-------
|
|
153
|
-
bool
|
|
154
|
-
True if all files downloaded successfully.
|
|
155
|
-
|
|
156
|
-
Raises
|
|
157
|
-
------
|
|
158
|
-
RuntimeError
|
|
159
|
-
If a file fails to download.
|
|
160
|
-
"""
|
|
161
|
-
mappings = await self.get_output_mappings()
|
|
162
|
-
file_names = [m["output_file"] for m in mappings]
|
|
163
|
-
download_links = await self._client.get_download_links(
|
|
164
|
-
job_id=self._job_id, files=file_names
|
|
165
|
-
)
|
|
166
|
-
|
|
167
|
-
os.makedirs(output_dir, exist_ok=True)
|
|
168
|
-
async with httpx.AsyncClient() as session:
|
|
169
|
-
for m in mappings:
|
|
170
|
-
url = download_links.download_urls[m["output_file"]].file_url
|
|
171
|
-
response = await session.get(url)
|
|
172
|
-
if response.status_code != 200:
|
|
173
|
-
raise RuntimeError(
|
|
174
|
-
f"Download failed for {m['output_file']}: {response.status_code}"
|
|
175
|
-
)
|
|
176
|
-
output_path = os.path.join(output_dir, f"{m['input_file']}.json")
|
|
177
|
-
with open(output_path, "wb") as f:
|
|
178
|
-
f.write(response.content)
|
|
179
|
-
return True
|
|
180
|
-
|
|
181
|
-
async def get_status(self) -> JobStatusV1:
|
|
182
|
-
"""
|
|
183
|
-
Retrieve the current status of the job.
|
|
184
|
-
|
|
185
|
-
Returns
|
|
186
|
-
-------
|
|
187
|
-
JobStatusV1
|
|
188
|
-
"""
|
|
189
|
-
return await self._client.get_status(self._job_id)
|
|
190
|
-
|
|
191
|
-
async def start(self) -> JobStatusV1:
|
|
192
|
-
"""
|
|
193
|
-
Start the speech-to-text job processing.
|
|
194
|
-
|
|
195
|
-
Returns
|
|
196
|
-
-------
|
|
197
|
-
JobStatusV1
|
|
198
|
-
"""
|
|
199
|
-
return await self._client.start(job_id=self._job_id)
|
|
200
|
-
|
|
201
|
-
async def exists(self) -> bool:
|
|
202
|
-
"""
|
|
203
|
-
Check if the job exists in the system.
|
|
204
|
-
|
|
205
|
-
Returns
|
|
206
|
-
-------
|
|
207
|
-
bool
|
|
208
|
-
"""
|
|
209
|
-
try:
|
|
210
|
-
await self.get_status()
|
|
211
|
-
return True
|
|
212
|
-
except httpx.HTTPStatusError:
|
|
213
|
-
return False
|
|
214
|
-
|
|
215
|
-
async def is_complete(self) -> bool:
|
|
216
|
-
"""
|
|
217
|
-
Check if the job is either completed or failed.
|
|
218
|
-
|
|
219
|
-
Returns
|
|
220
|
-
-------
|
|
221
|
-
bool
|
|
222
|
-
"""
|
|
223
|
-
state = (await self.get_status()).job_state.lower()
|
|
224
|
-
return state in {"completed", "failed"}
|
|
225
|
-
|
|
226
|
-
async def is_successful(self) -> bool:
|
|
227
|
-
"""
|
|
228
|
-
Check if the job completed successfully.
|
|
229
|
-
|
|
230
|
-
Returns
|
|
231
|
-
-------
|
|
232
|
-
bool
|
|
233
|
-
"""
|
|
234
|
-
return (await self.get_status()).job_state.lower() == "completed"
|
|
235
|
-
|
|
236
|
-
async def is_failed(self) -> bool:
|
|
237
|
-
"""
|
|
238
|
-
Check if the job has failed.
|
|
239
|
-
|
|
240
|
-
Returns
|
|
241
|
-
-------
|
|
242
|
-
bool
|
|
243
|
-
"""
|
|
244
|
-
return (await self.get_status()).job_state.lower() == "failed"
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
class SpeechToTextJob:
|
|
248
|
-
def __init__(self, job_id: str, client: "SpeechToTextJobClient"):
|
|
249
|
-
"""
|
|
250
|
-
Initialize the synchronous speech-to-text job.
|
|
251
|
-
|
|
252
|
-
Parameters
|
|
253
|
-
----------
|
|
254
|
-
job_id : str
|
|
255
|
-
The unique job identifier returned from a previous job initialization.
|
|
256
|
-
|
|
257
|
-
client : SpeechToTextJobClient
|
|
258
|
-
The client instance used to create the job.
|
|
259
|
-
|
|
260
|
-
!!! important
|
|
261
|
-
This must be the **same client instance** that was used to initialize
|
|
262
|
-
the job originally, as it contains the subscription key and configuration
|
|
263
|
-
required to authenticate and manage the job.
|
|
264
|
-
|
|
265
|
-
"""
|
|
266
|
-
self._job_id = job_id
|
|
267
|
-
self._client = client
|
|
268
|
-
|
|
269
|
-
@property
|
|
270
|
-
def job_id(self) -> str:
|
|
271
|
-
"""
|
|
272
|
-
Returns the job ID associated with this job instance.
|
|
273
|
-
|
|
274
|
-
Returns
|
|
275
|
-
-------
|
|
276
|
-
str
|
|
277
|
-
"""
|
|
278
|
-
return self._job_id
|
|
279
|
-
|
|
280
|
-
def upload_files(self, file_paths: typing.Sequence[str]) -> bool:
|
|
281
|
-
"""
|
|
282
|
-
Upload input audio files for the speech-to-text job.
|
|
283
|
-
|
|
284
|
-
Parameters
|
|
285
|
-
----------
|
|
286
|
-
file_paths : Sequence[str]
|
|
287
|
-
List of full paths to local audio files.
|
|
288
|
-
|
|
289
|
-
Returns
|
|
290
|
-
-------
|
|
291
|
-
bool
|
|
292
|
-
True if all files are uploaded successfully.
|
|
293
|
-
"""
|
|
294
|
-
upload_links = self._client.get_upload_links(
|
|
295
|
-
job_id=self._job_id, files=[os.path.basename(p) for p in file_paths]
|
|
296
|
-
)
|
|
297
|
-
with httpx.Client() as client:
|
|
298
|
-
for path in file_paths:
|
|
299
|
-
file_name = os.path.basename(path)
|
|
300
|
-
url = upload_links.upload_urls[file_name].file_url
|
|
301
|
-
with open(path, "rb") as f:
|
|
302
|
-
response = client.put(
|
|
303
|
-
url,
|
|
304
|
-
content=f,
|
|
305
|
-
headers={
|
|
306
|
-
"x-ms-blob-type": "BlockBlob",
|
|
307
|
-
"Content-Type": "audio/wav",
|
|
308
|
-
},
|
|
309
|
-
)
|
|
310
|
-
if response.status_code != 201:
|
|
311
|
-
raise RuntimeError(
|
|
312
|
-
f"Upload failed for {file_name}: {response.status_code}"
|
|
313
|
-
)
|
|
314
|
-
return True
|
|
315
|
-
|
|
316
|
-
def wait_until_complete(
|
|
317
|
-
self, poll_interval: int = 5, timeout: int = 600
|
|
318
|
-
) -> JobStatusV1:
|
|
319
|
-
"""
|
|
320
|
-
Polls job status until it completes or fails.
|
|
321
|
-
|
|
322
|
-
Parameters
|
|
323
|
-
----------
|
|
324
|
-
poll_interval : int, optional
|
|
325
|
-
Time in seconds between polling attempts (default is 5).
|
|
326
|
-
|
|
327
|
-
timeout : int, optional
|
|
328
|
-
Maximum time to wait for completion in seconds (default is 600).
|
|
329
|
-
|
|
330
|
-
Returns
|
|
331
|
-
-------
|
|
332
|
-
JobStatusV1
|
|
333
|
-
Final job status.
|
|
334
|
-
|
|
335
|
-
Raises
|
|
336
|
-
------
|
|
337
|
-
TimeoutError
|
|
338
|
-
If the job does not complete within the given timeout.
|
|
339
|
-
"""
|
|
340
|
-
start = time.monotonic()
|
|
341
|
-
while True:
|
|
342
|
-
status = self.get_status()
|
|
343
|
-
state = status.job_state.lower()
|
|
344
|
-
if state in {"completed", "failed"}:
|
|
345
|
-
return status
|
|
346
|
-
if time.monotonic() - start > timeout:
|
|
347
|
-
raise TimeoutError(
|
|
348
|
-
f"Job {self._job_id} did not complete within {timeout} seconds."
|
|
349
|
-
)
|
|
350
|
-
time.sleep(poll_interval)
|
|
351
|
-
|
|
352
|
-
def get_output_mappings(self) -> typing.List[typing.Dict[str, str]]:
|
|
353
|
-
"""
|
|
354
|
-
Get the mapping of input files to their corresponding output files.
|
|
355
|
-
|
|
356
|
-
Returns
|
|
357
|
-
-------
|
|
358
|
-
List[Dict[str, str]]
|
|
359
|
-
List of mappings with keys 'input_file' and 'output_file'.
|
|
360
|
-
"""
|
|
361
|
-
job_status = self.get_status()
|
|
362
|
-
return [
|
|
363
|
-
{
|
|
364
|
-
"input_file": detail.inputs[0].file_name,
|
|
365
|
-
"output_file": detail.outputs[0].file_name,
|
|
366
|
-
}
|
|
367
|
-
for detail in (job_status.job_details or [])
|
|
368
|
-
if detail.inputs and detail.outputs
|
|
369
|
-
]
|
|
370
|
-
|
|
371
|
-
def download_outputs(self, output_dir: str) -> bool:
|
|
372
|
-
"""
|
|
373
|
-
Download output files to the specified directory.
|
|
374
|
-
|
|
375
|
-
Parameters
|
|
376
|
-
----------
|
|
377
|
-
output_dir : str
|
|
378
|
-
Local directory where outputs will be saved.
|
|
379
|
-
|
|
380
|
-
Returns
|
|
381
|
-
-------
|
|
382
|
-
bool
|
|
383
|
-
True if all files downloaded successfully.
|
|
384
|
-
|
|
385
|
-
Raises
|
|
386
|
-
------
|
|
387
|
-
RuntimeError
|
|
388
|
-
If a file fails to download.
|
|
389
|
-
"""
|
|
390
|
-
mappings = self.get_output_mappings()
|
|
391
|
-
file_names = [m["output_file"] for m in mappings]
|
|
392
|
-
download_links = self._client.get_download_links(
|
|
393
|
-
job_id=self._job_id, files=file_names
|
|
394
|
-
)
|
|
395
|
-
|
|
396
|
-
os.makedirs(output_dir, exist_ok=True)
|
|
397
|
-
with httpx.Client() as client:
|
|
398
|
-
for m in mappings:
|
|
399
|
-
url = download_links.download_urls[m["output_file"]].file_url
|
|
400
|
-
response = client.get(url)
|
|
401
|
-
if response.status_code != 200:
|
|
402
|
-
raise RuntimeError(
|
|
403
|
-
f"Download failed for {m['output_file']}: {response.status_code}"
|
|
404
|
-
)
|
|
405
|
-
output_path = os.path.join(output_dir, f"{m['input_file']}.json")
|
|
406
|
-
with open(output_path, "wb") as f:
|
|
407
|
-
f.write(response.content)
|
|
408
|
-
return True
|
|
409
|
-
|
|
410
|
-
def get_status(self) -> JobStatusV1:
|
|
411
|
-
"""
|
|
412
|
-
Retrieve the current status of the job.
|
|
413
|
-
|
|
414
|
-
Returns
|
|
415
|
-
-------
|
|
416
|
-
JobStatusV1
|
|
417
|
-
"""
|
|
418
|
-
return self._client.get_status(self._job_id)
|
|
419
|
-
|
|
420
|
-
def start(self) -> JobStatusV1:
|
|
421
|
-
"""
|
|
422
|
-
Start the speech-to-text job processing.
|
|
423
|
-
|
|
424
|
-
Returns
|
|
425
|
-
-------
|
|
426
|
-
JobStatusV1
|
|
427
|
-
"""
|
|
428
|
-
return self._client.start(job_id=self._job_id)
|
|
429
|
-
|
|
430
|
-
def exists(self) -> bool:
|
|
431
|
-
"""
|
|
432
|
-
Check if the job exists in the system.
|
|
433
|
-
|
|
434
|
-
Returns
|
|
435
|
-
-------
|
|
436
|
-
bool
|
|
437
|
-
"""
|
|
438
|
-
try:
|
|
439
|
-
self.get_status()
|
|
440
|
-
return True
|
|
441
|
-
except httpx.HTTPStatusError:
|
|
442
|
-
return False
|
|
443
|
-
|
|
444
|
-
def is_complete(self) -> bool:
|
|
445
|
-
"""
|
|
446
|
-
Check if the job is either completed or failed.
|
|
447
|
-
|
|
448
|
-
Returns
|
|
449
|
-
-------
|
|
450
|
-
bool
|
|
451
|
-
"""
|
|
452
|
-
return self.get_status().job_state.lower() in {"completed", "failed"}
|
|
453
|
-
|
|
454
|
-
def is_successful(self) -> bool:
|
|
455
|
-
"""
|
|
456
|
-
Check if the job completed successfully.
|
|
457
|
-
|
|
458
|
-
Returns
|
|
459
|
-
-------
|
|
460
|
-
bool
|
|
461
|
-
"""
|
|
462
|
-
return self.get_status().job_state.lower() == "completed"
|
|
463
|
-
|
|
464
|
-
def is_failed(self) -> bool:
|
|
465
|
-
"""
|
|
466
|
-
Check if the job has failed.
|
|
467
|
-
|
|
468
|
-
Returns
|
|
469
|
-
-------
|
|
470
|
-
bool
|
|
471
|
-
"""
|
|
472
|
-
return self.get_status().job_state.lower() == "failed"
|