sarvamai 0.1.10__py3-none-any.whl → 0.1.11a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. sarvamai/__init__.py +52 -0
  2. sarvamai/client.py +3 -0
  3. sarvamai/core/client_wrapper.py +2 -2
  4. sarvamai/errors/service_unavailable_error.py +1 -2
  5. sarvamai/requests/__init__.py +22 -0
  6. sarvamai/requests/base_job_parameters.py +7 -0
  7. sarvamai/requests/bulk_job_callback.py +15 -0
  8. sarvamai/requests/bulk_job_init_response_v_1.py +27 -0
  9. sarvamai/requests/file_signed_url_details.py +10 -0
  10. sarvamai/requests/files_download_response.py +15 -0
  11. sarvamai/requests/files_request.py +10 -0
  12. sarvamai/requests/files_upload_response.py +15 -0
  13. sarvamai/requests/job_status_v_1.py +70 -0
  14. sarvamai/requests/speech_to_text_job_parameters.py +32 -0
  15. sarvamai/requests/task_detail_v_1.py +15 -0
  16. sarvamai/requests/task_file_details.py +8 -0
  17. sarvamai/speech_to_text/raw_client.py +8 -9
  18. sarvamai/speech_to_text_job/__init__.py +4 -0
  19. sarvamai/speech_to_text_job/client.py +529 -0
  20. sarvamai/speech_to_text_job/job.py +468 -0
  21. sarvamai/speech_to_text_job/raw_client.py +1189 -0
  22. sarvamai/types/__init__.py +28 -0
  23. sarvamai/types/base_job_parameters.py +17 -0
  24. sarvamai/types/bulk_job_callback.py +27 -0
  25. sarvamai/types/bulk_job_init_response_v_1.py +39 -0
  26. sarvamai/types/file_signed_url_details.py +20 -0
  27. sarvamai/types/files_download_response.py +25 -0
  28. sarvamai/types/files_request.py +20 -0
  29. sarvamai/types/files_upload_response.py +25 -0
  30. sarvamai/types/job_state.py +5 -0
  31. sarvamai/types/job_status_v_1.py +80 -0
  32. sarvamai/types/speech_to_text_job_parameters.py +44 -0
  33. sarvamai/types/storage_container_type.py +5 -0
  34. sarvamai/types/task_detail_v_1.py +25 -0
  35. sarvamai/types/task_file_details.py +20 -0
  36. sarvamai/types/task_state.py +5 -0
  37. {sarvamai-0.1.10.dist-info → sarvamai-0.1.11a1.dist-info}/METADATA +1 -1
  38. {sarvamai-0.1.10.dist-info → sarvamai-0.1.11a1.dist-info}/RECORD +39 -10
  39. {sarvamai-0.1.10.dist-info → sarvamai-0.1.11a1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,468 @@
1
+ import asyncio
2
+ import os
3
+ import time
4
+ import typing
5
+ import httpx
6
+
7
+ from ..types import JobStatusV1
8
+
9
+ if typing.TYPE_CHECKING:
10
+ from .client import AsyncSpeechToTextJobClient, SpeechToTextJobClient
11
+
12
+
13
+ class AsyncSpeechToTextJob:
14
+ def __init__(self, job_id: str, client: "AsyncSpeechToTextJobClient"):
15
+ """
16
+ Initialize the asynchronous speech-to-text job.
17
+
18
+ Parameters
19
+ ----------
20
+ job_id : str
21
+ The unique job identifier returned from a previous job initialization.
22
+
23
+ client : AsyncSpeechToTextJobClient
24
+ The async client instance used to create the job.
25
+
26
+ !!! important
27
+ This must be the **same client instance** that was used to initialize
28
+ the job originally, as it contains the subscription key and configuration
29
+ required to authenticate and manage the job.
30
+
31
+ """
32
+ self._job_id = job_id
33
+ self._client = client
34
+
35
+ @property
36
+ def job_id(self) -> str:
37
+ """
38
+ Returns the job ID associated with this job instance.
39
+
40
+ Returns
41
+ -------
42
+ str
43
+ """
44
+ return self._job_id
45
+
46
+ async def upload_files(self, file_paths: typing.Sequence[str]) -> bool:
47
+ """
48
+ Upload input audio files for the speech-to-text job.
49
+
50
+ Parameters
51
+ ----------
52
+ file_paths : Sequence[str]
53
+ List of full paths to local audio files.
54
+
55
+ Returns
56
+ -------
57
+ bool
58
+ True if all files are uploaded successfully.
59
+ """
60
+ upload_links = await self._client.get_upload_links(
61
+ job_id=self._job_id,
62
+ files=[os.path.basename(p) for p in file_paths],
63
+ )
64
+ async with httpx.AsyncClient() as session:
65
+ for path in file_paths:
66
+ file_name = os.path.basename(path)
67
+ url = upload_links.upload_urls[file_name].file_url
68
+ with open(path, "rb") as f:
69
+ response = await session.put(
70
+ url,
71
+ content=f.read(),
72
+ headers={
73
+ "x-ms-blob-type": "BlockBlob",
74
+ "Content-Type": "audio/wav",
75
+ },
76
+ )
77
+ if response.status_code != 201:
78
+ raise RuntimeError(
79
+ f"Upload failed for {file_name}: {response.status_code}"
80
+ )
81
+ return True
82
+
83
+ async def wait_until_complete(
84
+ self, poll_interval: int = 5, timeout: int = 600
85
+ ) -> JobStatusV1:
86
+ """
87
+ Polls job status until it completes or fails.
88
+
89
+ Parameters
90
+ ----------
91
+ poll_interval : int, optional
92
+ Time in seconds between polling attempts (default is 5).
93
+
94
+ timeout : int, optional
95
+ Maximum time to wait for completion in seconds (default is 600).
96
+
97
+ Returns
98
+ -------
99
+ JobStatusV1
100
+ Final job status.
101
+
102
+ Raises
103
+ ------
104
+ TimeoutError
105
+ If the job does not complete within the given timeout.
106
+ """
107
+ start = asyncio.get_event_loop().time()
108
+ while True:
109
+ status = await self.get_status()
110
+ state = status.job_state.lower()
111
+ if state in {"completed", "failed"}:
112
+ return status
113
+ if asyncio.get_event_loop().time() - start > timeout:
114
+ raise TimeoutError(
115
+ f"Job {self._job_id} did not complete within {timeout} seconds."
116
+ )
117
+ await asyncio.sleep(poll_interval)
118
+
119
+ async def get_output_mappings(self) -> typing.List[typing.Dict[str, str]]:
120
+ """
121
+ Get the mapping of input files to their corresponding output files.
122
+
123
+ Returns
124
+ -------
125
+ List[Dict[str, str]]
126
+ List of mappings with keys 'input_file' and 'output_file'.
127
+ """
128
+ job_status = await self.get_status()
129
+ return [
130
+ {
131
+ "input_file": detail.inputs[0].file_name,
132
+ "output_file": detail.outputs[0].file_name,
133
+ }
134
+ for detail in (job_status.job_details or [])
135
+ if detail.inputs and detail.outputs
136
+ ]
137
+
138
+ async def download_outputs(self, output_dir: str) -> bool:
139
+ """
140
+ Download output files to the specified directory.
141
+
142
+ Parameters
143
+ ----------
144
+ output_dir : str
145
+ Local directory where outputs will be saved.
146
+
147
+ Returns
148
+ -------
149
+ bool
150
+ True if all files downloaded successfully.
151
+
152
+ Raises
153
+ ------
154
+ RuntimeError
155
+ If a file fails to download.
156
+ """
157
+ mappings = await self.get_output_mappings()
158
+ file_names = [m["output_file"] for m in mappings]
159
+ download_links = await self._client.get_download_links(
160
+ job_id=self._job_id, files=file_names
161
+ )
162
+
163
+ os.makedirs(output_dir, exist_ok=True)
164
+ async with httpx.AsyncClient() as session:
165
+ for m in mappings:
166
+ url = download_links.download_urls[m["output_file"]].file_url
167
+ response = await session.get(url)
168
+ if response.status_code != 200:
169
+ raise RuntimeError(
170
+ f"Download failed for {m['output_file']}: {response.status_code}"
171
+ )
172
+ output_path = os.path.join(output_dir, f"{m['input_file']}.json")
173
+ with open(output_path, "wb") as f:
174
+ f.write(response.content)
175
+ return True
176
+
177
+ async def get_status(self) -> JobStatusV1:
178
+ """
179
+ Retrieve the current status of the job.
180
+
181
+ Returns
182
+ -------
183
+ JobStatusV1
184
+ """
185
+ return await self._client.get_status(self._job_id)
186
+
187
+ async def start(self) -> JobStatusV1:
188
+ """
189
+ Start the speech-to-text job processing.
190
+
191
+ Returns
192
+ -------
193
+ JobStatusV1
194
+ """
195
+ return await self._client.start(job_id=self._job_id)
196
+
197
+ async def exists(self) -> bool:
198
+ """
199
+ Check if the job exists in the system.
200
+
201
+ Returns
202
+ -------
203
+ bool
204
+ """
205
+ try:
206
+ await self.get_status()
207
+ return True
208
+ except httpx.HTTPStatusError:
209
+ return False
210
+
211
+ async def is_complete(self) -> bool:
212
+ """
213
+ Check if the job is either completed or failed.
214
+
215
+ Returns
216
+ -------
217
+ bool
218
+ """
219
+ state = (await self.get_status()).job_state.lower()
220
+ return state in {"completed", "failed"}
221
+
222
+ async def is_successful(self) -> bool:
223
+ """
224
+ Check if the job completed successfully.
225
+
226
+ Returns
227
+ -------
228
+ bool
229
+ """
230
+ return (await self.get_status()).job_state.lower() == "completed"
231
+
232
+ async def is_failed(self) -> bool:
233
+ """
234
+ Check if the job has failed.
235
+
236
+ Returns
237
+ -------
238
+ bool
239
+ """
240
+ return (await self.get_status()).job_state.lower() == "failed"
241
+
242
+
243
+ class SpeechToTextJob:
244
+ def __init__(self, job_id: str, client: "SpeechToTextJobClient"):
245
+ """
246
+ Initialize the synchronous speech-to-text job.
247
+
248
+ Parameters
249
+ ----------
250
+ job_id : str
251
+ The unique job identifier returned from a previous job initialization.
252
+
253
+ client : SpeechToTextJobClient
254
+ The client instance used to create the job.
255
+
256
+ !!! important
257
+ This must be the **same client instance** that was used to initialize
258
+ the job originally, as it contains the subscription key and configuration
259
+ required to authenticate and manage the job.
260
+
261
+ """
262
+ self._job_id = job_id
263
+ self._client = client
264
+
265
+ @property
266
+ def job_id(self) -> str:
267
+ """
268
+ Returns the job ID associated with this job instance.
269
+
270
+ Returns
271
+ -------
272
+ str
273
+ """
274
+ return self._job_id
275
+
276
+ def upload_files(self, file_paths: typing.Sequence[str]) -> bool:
277
+ """
278
+ Upload input audio files for the speech-to-text job.
279
+
280
+ Parameters
281
+ ----------
282
+ file_paths : Sequence[str]
283
+ List of full paths to local audio files.
284
+
285
+ Returns
286
+ -------
287
+ bool
288
+ True if all files are uploaded successfully.
289
+ """
290
+ upload_links = self._client.get_upload_links(
291
+ job_id=self._job_id, files=[os.path.basename(p) for p in file_paths]
292
+ )
293
+ with httpx.Client() as client:
294
+ for path in file_paths:
295
+ file_name = os.path.basename(path)
296
+ url = upload_links.upload_urls[file_name].file_url
297
+ with open(path, "rb") as f:
298
+ response = client.put(
299
+ url,
300
+ content=f,
301
+ headers={
302
+ "x-ms-blob-type": "BlockBlob",
303
+ "Content-Type": "audio/wav",
304
+ },
305
+ )
306
+ if response.status_code != 201:
307
+ raise RuntimeError(
308
+ f"Upload failed for {file_name}: {response.status_code}"
309
+ )
310
+ return True
311
+
312
+ def wait_until_complete(
313
+ self, poll_interval: int = 5, timeout: int = 600
314
+ ) -> JobStatusV1:
315
+ """
316
+ Polls job status until it completes or fails.
317
+
318
+ Parameters
319
+ ----------
320
+ poll_interval : int, optional
321
+ Time in seconds between polling attempts (default is 5).
322
+
323
+ timeout : int, optional
324
+ Maximum time to wait for completion in seconds (default is 600).
325
+
326
+ Returns
327
+ -------
328
+ JobStatusV1
329
+ Final job status.
330
+
331
+ Raises
332
+ ------
333
+ TimeoutError
334
+ If the job does not complete within the given timeout.
335
+ """
336
+ start = time.monotonic()
337
+ while True:
338
+ status = self.get_status()
339
+ state = status.job_state.lower()
340
+ if state in {"completed", "failed"}:
341
+ return status
342
+ if time.monotonic() - start > timeout:
343
+ raise TimeoutError(
344
+ f"Job {self._job_id} did not complete within {timeout} seconds."
345
+ )
346
+ time.sleep(poll_interval)
347
+
348
+ def get_output_mappings(self) -> typing.List[typing.Dict[str, str]]:
349
+ """
350
+ Get the mapping of input files to their corresponding output files.
351
+
352
+ Returns
353
+ -------
354
+ List[Dict[str, str]]
355
+ List of mappings with keys 'input_file' and 'output_file'.
356
+ """
357
+ job_status = self.get_status()
358
+ return [
359
+ {
360
+ "input_file": detail.inputs[0].file_name,
361
+ "output_file": detail.outputs[0].file_name,
362
+ }
363
+ for detail in (job_status.job_details or [])
364
+ if detail.inputs and detail.outputs
365
+ ]
366
+
367
+ def download_outputs(self, output_dir: str) -> bool:
368
+ """
369
+ Download output files to the specified directory.
370
+
371
+ Parameters
372
+ ----------
373
+ output_dir : str
374
+ Local directory where outputs will be saved.
375
+
376
+ Returns
377
+ -------
378
+ bool
379
+ True if all files downloaded successfully.
380
+
381
+ Raises
382
+ ------
383
+ RuntimeError
384
+ If a file fails to download.
385
+ """
386
+ mappings = self.get_output_mappings()
387
+ file_names = [m["output_file"] for m in mappings]
388
+ download_links = self._client.get_download_links(
389
+ job_id=self._job_id, files=file_names
390
+ )
391
+
392
+ os.makedirs(output_dir, exist_ok=True)
393
+ with httpx.Client() as client:
394
+ for m in mappings:
395
+ url = download_links.download_urls[m["output_file"]].file_url
396
+ response = client.get(url)
397
+ if response.status_code != 200:
398
+ raise RuntimeError(
399
+ f"Download failed for {m['output_file']}: {response.status_code}"
400
+ )
401
+ output_path = os.path.join(output_dir, f"{m['input_file']}.json")
402
+ with open(output_path, "wb") as f:
403
+ f.write(response.content)
404
+ return True
405
+
406
+ def get_status(self) -> JobStatusV1:
407
+ """
408
+ Retrieve the current status of the job.
409
+
410
+ Returns
411
+ -------
412
+ JobStatusV1
413
+ """
414
+ return self._client.get_status(self._job_id)
415
+
416
+ def start(self) -> JobStatusV1:
417
+ """
418
+ Start the speech-to-text job processing.
419
+
420
+ Returns
421
+ -------
422
+ JobStatusV1
423
+ """
424
+ return self._client.start(job_id=self._job_id)
425
+
426
+ def exists(self) -> bool:
427
+ """
428
+ Check if the job exists in the system.
429
+
430
+ Returns
431
+ -------
432
+ bool
433
+ """
434
+ try:
435
+ self.get_status()
436
+ return True
437
+ except httpx.HTTPStatusError:
438
+ return False
439
+
440
+ def is_complete(self) -> bool:
441
+ """
442
+ Check if the job is either completed or failed.
443
+
444
+ Returns
445
+ -------
446
+ bool
447
+ """
448
+ return self.get_status().job_state.lower() in {"completed", "failed"}
449
+
450
+ def is_successful(self) -> bool:
451
+ """
452
+ Check if the job completed successfully.
453
+
454
+ Returns
455
+ -------
456
+ bool
457
+ """
458
+ return self.get_status().job_state.lower() == "completed"
459
+
460
+ def is_failed(self) -> bool:
461
+ """
462
+ Check if the job has failed.
463
+
464
+ Returns
465
+ -------
466
+ bool
467
+ """
468
+ return self.get_status().job_state.lower() == "failed"