sarvamai 0.1.11a2__py3-none-any.whl → 0.1.11a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,479 @@
1
+ import asyncio
2
+ import mimetypes
3
+ import os
4
+ import time
5
+ import typing
6
+ import httpx
7
+
8
+ from ..types import JobStatusV1
9
+
10
+ if typing.TYPE_CHECKING:
11
+ from .client import (
12
+ AsyncSpeechToTextTranslateJobClient,
13
+ SpeechToTextTranslateJobClient,
14
+ )
15
+
16
+
17
+ class AsyncSpeechToTextTranslateJob:
18
+ def __init__(
19
+ self,
20
+ job_id: str,
21
+ client: "AsyncSpeechToTextTranslateJobClient",
22
+ ):
23
+ """
24
+ Initialise the asynchronous speech-to-text-translate-translate job.
25
+
26
+ Parameters
27
+ ----------
28
+ job_id : str
29
+ The unique job identifier returned from a previous job initialisation.
30
+
31
+ client : AsyncSpeechToTextTranslateJobClient
32
+ The async client instance used to create the job.
33
+
34
+ !!! important
35
+ This must be the **same client instance** that was used to initialise
36
+ the job originally, as it contains the subscription key and configuration
37
+ required to authenticate and manage the job.
38
+
39
+ """
40
+ self._job_id = job_id
41
+ self._client = client
42
+
43
+ @property
44
+ def job_id(self) -> str:
45
+ """
46
+ Returns the job ID associated with this job instance.
47
+
48
+ Returns
49
+ -------
50
+ str
51
+ """
52
+ return self._job_id
53
+
54
+ async def upload_files(self, file_paths: typing.Sequence[str]) -> bool:
55
+ """
56
+ Upload input audio files for the speech-to-text-translate job.
57
+
58
+ Parameters
59
+ ----------
60
+ file_paths : Sequence[str]
61
+ List of full paths to local audio files.
62
+
63
+ Returns
64
+ -------
65
+ bool
66
+ True if all files are uploaded successfully.
67
+ """
68
+ upload_links = await self._client.get_upload_links(
69
+ job_id=self._job_id,
70
+ files=[os.path.basename(p) for p in file_paths],
71
+ )
72
+ async with httpx.AsyncClient() as session:
73
+ for path in file_paths:
74
+ file_name = os.path.basename(path)
75
+ url = upload_links.upload_urls[file_name].file_url
76
+ with open(path, "rb") as f:
77
+ response = await session.put(
78
+ url,
79
+ content=f.read(),
80
+ headers={
81
+ "x-ms-blob-type": "BlockBlob",
82
+ "Content-Type": "audio/wav",
83
+ },
84
+ )
85
+ if response.status_code != 201:
86
+ raise RuntimeError(
87
+ f"Upload failed for {file_name}: {response.status_code}"
88
+ )
89
+ return True
90
+
91
+ async def wait_until_complete(
92
+ self, poll_interval: int = 5, timeout: int = 600
93
+ ) -> JobStatusV1:
94
+ """
95
+ Polls job status until it completes or fails.
96
+
97
+ Parameters
98
+ ----------
99
+ poll_interval : int, optional
100
+ Time in seconds between polling attempts (default is 5).
101
+
102
+ timeout : int, optional
103
+ Maximum time to wait for completion in seconds (default is 600).
104
+
105
+ Returns
106
+ -------
107
+ JobStatusV1
108
+ Final job status.
109
+
110
+ Raises
111
+ ------
112
+ TimeoutError
113
+ If the job does not complete within the given timeout.
114
+ """
115
+ start = asyncio.get_event_loop().time()
116
+ while True:
117
+ status = await self.get_status()
118
+ state = status.job_state.lower()
119
+ if state in {"completed", "failed"}:
120
+ return status
121
+ if asyncio.get_event_loop().time() - start > timeout:
122
+ raise TimeoutError(
123
+ f"Job {self._job_id} did not complete within {timeout} seconds."
124
+ )
125
+ await asyncio.sleep(poll_interval)
126
+
127
+ async def get_output_mappings(self) -> typing.List[typing.Dict[str, str]]:
128
+ """
129
+ Get the mapping of input files to their corresponding output files.
130
+
131
+ Returns
132
+ -------
133
+ List[Dict[str, str]]
134
+ List of mappings with keys 'input_file' and 'output_file'.
135
+ """
136
+ job_status = await self.get_status()
137
+ return [
138
+ {
139
+ "input_file": detail.inputs[0].file_name,
140
+ "output_file": detail.outputs[0].file_name,
141
+ }
142
+ for detail in (job_status.job_details or [])
143
+ if detail.inputs and detail.outputs
144
+ ]
145
+
146
+ async def download_outputs(self, output_dir: str) -> bool:
147
+ """
148
+ Download output files to the specified directory.
149
+
150
+ Parameters
151
+ ----------
152
+ output_dir : str
153
+ Local directory where outputs will be saved.
154
+
155
+ Returns
156
+ -------
157
+ bool
158
+ True if all files downloaded successfully.
159
+
160
+ Raises
161
+ ------
162
+ RuntimeError
163
+ If a file fails to download.
164
+ """
165
+ mappings = await self.get_output_mappings()
166
+ file_names = [m["output_file"] for m in mappings]
167
+ download_links = await self._client.get_download_links(
168
+ job_id=self._job_id, files=file_names
169
+ )
170
+
171
+ os.makedirs(output_dir, exist_ok=True)
172
+ async with httpx.AsyncClient() as session:
173
+ for m in mappings:
174
+ url = download_links.download_urls[m["output_file"]].file_url
175
+ response = await session.get(url)
176
+ if response.status_code != 200:
177
+ raise RuntimeError(
178
+ f"Download failed for {m['output_file']}: {response.status_code}"
179
+ )
180
+ output_path = os.path.join(output_dir, f"{m['input_file']}.json")
181
+ with open(output_path, "wb") as f:
182
+ f.write(response.content)
183
+ return True
184
+
185
+ async def get_status(self) -> JobStatusV1:
186
+ """
187
+ Retrieve the current status of the job.
188
+
189
+ Returns
190
+ -------
191
+ JobStatusV1
192
+ """
193
+ return await self._client.get_status(self._job_id)
194
+
195
+ async def start(self) -> JobStatusV1:
196
+ """
197
+ Start the speech-to-text-translate job processing.
198
+
199
+ Returns
200
+ -------
201
+ JobStatusV1
202
+ """
203
+ return await self._client.start(job_id=self._job_id)
204
+
205
+ async def exists(self) -> bool:
206
+ """
207
+ Check if the job exists in the system.
208
+
209
+ Returns
210
+ -------
211
+ bool
212
+ """
213
+ try:
214
+ await self.get_status()
215
+ return True
216
+ except httpx.HTTPStatusError:
217
+ return False
218
+
219
+ async def is_complete(self) -> bool:
220
+ """
221
+ Check if the job is either completed or failed.
222
+
223
+ Returns
224
+ -------
225
+ bool
226
+ """
227
+ state = (await self.get_status()).job_state.lower()
228
+ return state in {"completed", "failed"}
229
+
230
+ async def is_successful(self) -> bool:
231
+ """
232
+ Check if the job completed successfully.
233
+
234
+ Returns
235
+ -------
236
+ bool
237
+ """
238
+ return (await self.get_status()).job_state.lower() == "completed"
239
+
240
+ async def is_failed(self) -> bool:
241
+ """
242
+ Check if the job has failed.
243
+
244
+ Returns
245
+ -------
246
+ bool
247
+ """
248
+ return (await self.get_status()).job_state.lower() == "failed"
249
+
250
+
251
+ class SpeechToTextTranslateJob:
252
+ def __init__(self, job_id: str, client: "SpeechToTextTranslateJobClient"):
253
+ """
254
+ Initialise the synchronous speech-to-text-translate job.
255
+
256
+ Parameters
257
+ ----------
258
+ job_id : str
259
+ The unique job identifier returned from a previous job initialisation.
260
+
261
+ client : SpeechToTextTranslateJobClient
262
+ The client instance used to create the job.
263
+
264
+ !!! important
265
+ This must be the **same client instance** that was used to initialise
266
+ the job originally, as it contains the subscription key and configuration
267
+ required to authenticate and manage the job.
268
+
269
+ """
270
+ self._job_id = job_id
271
+ self._client = client
272
+
273
+ @property
274
+ def job_id(self) -> str:
275
+ """
276
+ Returns the job ID associated with this job instance.
277
+
278
+ Returns
279
+ -------
280
+ str
281
+ """
282
+ return self._job_id
283
+
284
+ def upload_files(self, file_paths: typing.Sequence[str]) -> bool:
285
+ """
286
+ Upload input audio files for the speech-to-text-translate job.
287
+
288
+ Parameters
289
+ ----------
290
+ file_paths : Sequence[str]
291
+ List of full paths to local audio files.
292
+
293
+ Returns
294
+ -------
295
+ bool
296
+ True if all files are uploaded successfully.
297
+ """
298
+ upload_links = self._client.get_upload_links(
299
+ job_id=self._job_id, files=[os.path.basename(p) for p in file_paths]
300
+ )
301
+ with httpx.Client() as client:
302
+ for path in file_paths:
303
+ file_name = os.path.basename(path)
304
+ url = upload_links.upload_urls[file_name].file_url
305
+ content_type, _ = mimetypes.guess_type(path)
306
+ if content_type is None:
307
+ content_type = "audio/wav"
308
+ with open(path, "rb") as f:
309
+ response = client.put(
310
+ url,
311
+ content=f,
312
+ headers={
313
+ "x-ms-blob-type": "BlockBlob",
314
+ "Content-Type": content_type,
315
+ },
316
+ )
317
+ if response.status_code != 201:
318
+ raise RuntimeError(
319
+ f"Upload failed for {file_name}: {response.status_code}"
320
+ )
321
+ return True
322
+
323
+ def wait_until_complete(
324
+ self, poll_interval: int = 5, timeout: int = 600
325
+ ) -> JobStatusV1:
326
+ """
327
+ Polls job status until it completes or fails.
328
+
329
+ Parameters
330
+ ----------
331
+ poll_interval : int, optional
332
+ Time in seconds between polling attempts (default is 5).
333
+
334
+ timeout : int, optional
335
+ Maximum time to wait for completion in seconds (default is 600).
336
+
337
+ Returns
338
+ -------
339
+ JobStatusV1
340
+ Final job status.
341
+
342
+ Raises
343
+ ------
344
+ TimeoutError
345
+ If the job does not complete within the given timeout.
346
+ """
347
+ start = time.monotonic()
348
+ while True:
349
+ status = self.get_status()
350
+ state = status.job_state.lower()
351
+ if state in {"completed", "failed"}:
352
+ return status
353
+ if time.monotonic() - start > timeout:
354
+ raise TimeoutError(
355
+ f"Job {self._job_id} did not complete within {timeout} seconds."
356
+ )
357
+ time.sleep(poll_interval)
358
+
359
+ def get_output_mappings(self) -> typing.List[typing.Dict[str, str]]:
360
+ """
361
+ Get the mapping of input files to their corresponding output files.
362
+
363
+ Returns
364
+ -------
365
+ List[Dict[str, str]]
366
+ List of mappings with keys 'input_file' and 'output_file'.
367
+ """
368
+ job_status = self.get_status()
369
+ return [
370
+ {
371
+ "input_file": detail.inputs[0].file_name,
372
+ "output_file": detail.outputs[0].file_name,
373
+ }
374
+ for detail in (job_status.job_details or [])
375
+ if detail.inputs and detail.outputs
376
+ ]
377
+
378
+ def download_outputs(self, output_dir: str) -> bool:
379
+ """
380
+ Download output files to the specified directory.
381
+
382
+ Parameters
383
+ ----------
384
+ output_dir : str
385
+ Local directory where outputs will be saved.
386
+
387
+ Returns
388
+ -------
389
+ bool
390
+ True if all files downloaded successfully.
391
+
392
+ Raises
393
+ ------
394
+ RuntimeError
395
+ If a file fails to download.
396
+ """
397
+ mappings = self.get_output_mappings()
398
+ file_names = [m["output_file"] for m in mappings]
399
+ download_links = self._client.get_download_links(
400
+ job_id=self._job_id, files=file_names
401
+ )
402
+
403
+ os.makedirs(output_dir, exist_ok=True)
404
+ with httpx.Client() as client:
405
+ for m in mappings:
406
+ url = download_links.download_urls[m["output_file"]].file_url
407
+ response = client.get(url)
408
+ if response.status_code != 200:
409
+ raise RuntimeError(
410
+ f"Download failed for {m['output_file']}: {response.status_code}"
411
+ )
412
+ output_path = os.path.join(output_dir, f"{m['input_file']}.json")
413
+ with open(output_path, "wb") as f:
414
+ f.write(response.content)
415
+ return True
416
+
417
+ def get_status(self) -> JobStatusV1:
418
+ """
419
+ Retrieve the current status of the job.
420
+
421
+ Returns
422
+ -------
423
+ JobStatusV1
424
+ """
425
+ return self._client.get_status(self._job_id)
426
+
427
+ def start(self) -> JobStatusV1:
428
+ """
429
+ Start the speech-to-text-translate job processing.
430
+
431
+ Returns
432
+ -------
433
+ JobStatusV1
434
+ """
435
+ return self._client.start(job_id=self._job_id)
436
+
437
+ def exists(self) -> bool:
438
+ """
439
+ Check if the job exists in the system.
440
+
441
+ Returns
442
+ -------
443
+ bool
444
+ """
445
+ try:
446
+ self.get_status()
447
+ return True
448
+ except httpx.HTTPStatusError:
449
+ return False
450
+
451
+ def is_complete(self) -> bool:
452
+ """
453
+ Check if the job is either completed or failed.
454
+
455
+ Returns
456
+ -------
457
+ bool
458
+ """
459
+ return self.get_status().job_state.lower() in {"completed", "failed"}
460
+
461
+ def is_successful(self) -> bool:
462
+ """
463
+ Check if the job completed successfully.
464
+
465
+ Returns
466
+ -------
467
+ bool
468
+ """
469
+ return self.get_status().job_state.lower() == "completed"
470
+
471
+ def is_failed(self) -> bool:
472
+ """
473
+ Check if the job has failed.
474
+
475
+ Returns
476
+ -------
477
+ bool
478
+ """
479
+ return self.get_status().job_state.lower() == "failed"