sarvamai 0.1.23a6__py3-none-any.whl → 0.1.23a8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +52 -0
- sarvamai/client.py +3 -0
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/doc_digitization_job/__init__.py +4 -0
- sarvamai/doc_digitization_job/client.py +776 -0
- sarvamai/doc_digitization_job/job.py +496 -0
- sarvamai/doc_digitization_job/raw_client.py +1176 -0
- sarvamai/requests/__init__.py +20 -0
- sarvamai/requests/doc_digitization_create_job_response.py +25 -0
- sarvamai/requests/doc_digitization_download_files_response.py +37 -0
- sarvamai/requests/doc_digitization_error_details.py +21 -0
- sarvamai/requests/doc_digitization_error_message.py +11 -0
- sarvamai/requests/doc_digitization_job_detail.py +64 -0
- sarvamai/requests/doc_digitization_job_parameters.py +21 -0
- sarvamai/requests/doc_digitization_job_status_response.py +65 -0
- sarvamai/requests/doc_digitization_page_error.py +24 -0
- sarvamai/requests/doc_digitization_upload_files_response.py +34 -0
- sarvamai/requests/doc_digitization_webhook_callback.py +19 -0
- sarvamai/requests/speech_to_text_response.py +14 -6
- sarvamai/requests/speech_to_text_transcription_data.py +0 -14
- sarvamai/requests/speech_to_text_translate_response.py +9 -6
- sarvamai/requests/speech_to_text_translate_transcription_data.py +0 -13
- sarvamai/speech_to_text_streaming/client.py +2 -30
- sarvamai/speech_to_text_streaming/raw_client.py +2 -30
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_language_code.py +1 -25
- sarvamai/types/__init__.py +30 -0
- sarvamai/types/doc_digitization_create_job_response.py +37 -0
- sarvamai/types/doc_digitization_download_files_response.py +47 -0
- sarvamai/types/doc_digitization_error_code.py +15 -0
- sarvamai/types/doc_digitization_error_details.py +33 -0
- sarvamai/types/doc_digitization_error_message.py +23 -0
- sarvamai/types/doc_digitization_job_detail.py +74 -0
- sarvamai/types/doc_digitization_job_detail_state.py +7 -0
- sarvamai/types/doc_digitization_job_parameters.py +33 -0
- sarvamai/types/doc_digitization_job_state.py +7 -0
- sarvamai/types/doc_digitization_job_status_response.py +75 -0
- sarvamai/types/doc_digitization_output_format.py +5 -0
- sarvamai/types/doc_digitization_page_error.py +36 -0
- sarvamai/types/doc_digitization_supported_language.py +32 -0
- sarvamai/types/doc_digitization_upload_files_response.py +44 -0
- sarvamai/types/doc_digitization_webhook_callback.py +31 -0
- sarvamai/types/speech_to_text_language.py +1 -24
- sarvamai/types/speech_to_text_response.py +14 -6
- sarvamai/types/speech_to_text_transcription_data.py +0 -14
- sarvamai/types/speech_to_text_translate_language.py +1 -25
- sarvamai/types/speech_to_text_translate_response.py +9 -6
- sarvamai/types/speech_to_text_translate_transcription_data.py +0 -13
- {sarvamai-0.1.23a6.dist-info → sarvamai-0.1.23a8.dist-info}/METADATA +1 -1
- {sarvamai-0.1.23a6.dist-info → sarvamai-0.1.23a8.dist-info}/RECORD +50 -21
- {sarvamai-0.1.23a6.dist-info → sarvamai-0.1.23a8.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,776 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
3
|
+
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
7
|
+
from ..core.request_options import RequestOptions
|
|
8
|
+
from ..requests.doc_digitization_job_parameters import (
|
|
9
|
+
DocDigitizationJobParametersParams,
|
|
10
|
+
)
|
|
11
|
+
from ..requests.doc_digitization_webhook_callback import (
|
|
12
|
+
DocDigitizationWebhookCallbackParams,
|
|
13
|
+
)
|
|
14
|
+
from ..types.doc_digitization_create_job_response import (
|
|
15
|
+
DocDigitizationCreateJobResponse,
|
|
16
|
+
)
|
|
17
|
+
from ..types.doc_digitization_download_files_response import (
|
|
18
|
+
DocDigitizationDownloadFilesResponse,
|
|
19
|
+
)
|
|
20
|
+
from ..types.doc_digitization_job_status_response import (
|
|
21
|
+
DocDigitizationJobStatusResponse,
|
|
22
|
+
)
|
|
23
|
+
from ..types.doc_digitization_upload_files_response import (
|
|
24
|
+
DocDigitizationUploadFilesResponse,
|
|
25
|
+
)
|
|
26
|
+
from ..types.doc_digitization_supported_language import DocDigitizationSupportedLanguage
|
|
27
|
+
from ..types.doc_digitization_output_format import DocDigitizationOutputFormat
|
|
28
|
+
from .raw_client import AsyncRawDocDigitizationJobClient, RawDocDigitizationJobClient
|
|
29
|
+
from .job import DocDigitizationJob, AsyncDocDigitizationJob
|
|
30
|
+
|
|
31
|
+
# this is used as the default value for optional parameters
|
|
32
|
+
OMIT = typing.cast(typing.Any, ...)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class DocDigitizationJobClient:
|
|
36
|
+
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
|
37
|
+
self._raw_client = RawDocDigitizationJobClient(client_wrapper=client_wrapper)
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def with_raw_response(self) -> RawDocDigitizationJobClient:
|
|
41
|
+
"""
|
|
42
|
+
Retrieves a raw implementation of this client that returns raw responses.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
RawDocDigitizationJobClient
|
|
47
|
+
"""
|
|
48
|
+
return self._raw_client
|
|
49
|
+
|
|
50
|
+
def initialise(
|
|
51
|
+
self,
|
|
52
|
+
*,
|
|
53
|
+
job_parameters: typing.Optional[DocDigitizationJobParametersParams] = OMIT,
|
|
54
|
+
callback: typing.Optional[DocDigitizationWebhookCallbackParams] = OMIT,
|
|
55
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
56
|
+
) -> DocDigitizationCreateJobResponse:
|
|
57
|
+
"""
|
|
58
|
+
Creates a new document digitization job.
|
|
59
|
+
|
|
60
|
+
**Supported Languages:**
|
|
61
|
+
- `hi`: Hindi
|
|
62
|
+
- `en`: English
|
|
63
|
+
- `bn`: Bengali
|
|
64
|
+
- `gu`: Gujarati
|
|
65
|
+
- `kn`: Kannada
|
|
66
|
+
- `ml`: Malayalam
|
|
67
|
+
- `mr`: Marathi
|
|
68
|
+
- `or`: Odia
|
|
69
|
+
- `pa`: Punjabi
|
|
70
|
+
- `ta`: Tamil
|
|
71
|
+
- `te`: Telugu
|
|
72
|
+
- `ur`: Urdu
|
|
73
|
+
- `as`: Assamese
|
|
74
|
+
- `bodo`: Bodo
|
|
75
|
+
- `doi`: Dogri
|
|
76
|
+
- `ks`: Kashmiri
|
|
77
|
+
- `kok`: Konkani
|
|
78
|
+
- `mai`: Maithili
|
|
79
|
+
- `mni`: Manipuri
|
|
80
|
+
- `ne`: Nepali
|
|
81
|
+
- `sa`: Sanskrit
|
|
82
|
+
- `sat`: Santali
|
|
83
|
+
- `sd`: Sindhi
|
|
84
|
+
|
|
85
|
+
**Output Formats:**
|
|
86
|
+
- `html`: Structured HTML with layout preservation (default)
|
|
87
|
+
- `md`: Markdown format
|
|
88
|
+
|
|
89
|
+
**Webhook Callback:**
|
|
90
|
+
Optionally provide a callback URL to receive notification when processing completes.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
job_parameters : typing.Optional[DocDigitizationJobParametersParams]
|
|
95
|
+
Job configuration parameters. Omit the request body to use defaults.
|
|
96
|
+
|
|
97
|
+
callback : typing.Optional[DocDigitizationWebhookCallbackParams]
|
|
98
|
+
Optional webhook for completion notification
|
|
99
|
+
|
|
100
|
+
request_options : typing.Optional[RequestOptions]
|
|
101
|
+
Request-specific configuration.
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
DocDigitizationCreateJobResponse
|
|
106
|
+
Successful Response
|
|
107
|
+
|
|
108
|
+
Examples
|
|
109
|
+
--------
|
|
110
|
+
from sarvamai import SarvamAI
|
|
111
|
+
|
|
112
|
+
client = SarvamAI(
|
|
113
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
114
|
+
)
|
|
115
|
+
client.doc_digitization_job.initialise()
|
|
116
|
+
"""
|
|
117
|
+
_response = self._raw_client.initialise(
|
|
118
|
+
job_parameters=job_parameters,
|
|
119
|
+
callback=callback,
|
|
120
|
+
request_options=request_options,
|
|
121
|
+
)
|
|
122
|
+
return _response.data
|
|
123
|
+
|
|
124
|
+
def get_upload_links(
|
|
125
|
+
self,
|
|
126
|
+
*,
|
|
127
|
+
job_id: str,
|
|
128
|
+
files: typing.Sequence[str],
|
|
129
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
130
|
+
) -> DocDigitizationUploadFilesResponse:
|
|
131
|
+
"""
|
|
132
|
+
Returns presigned URLs for uploading input files.
|
|
133
|
+
|
|
134
|
+
**File Constraints:**
|
|
135
|
+
- Exactly one file required (PDF or ZIP)
|
|
136
|
+
- PDF files: `.pdf` extension
|
|
137
|
+
- ZIP files: `.zip` extension
|
|
138
|
+
|
|
139
|
+
Parameters
|
|
140
|
+
----------
|
|
141
|
+
job_id : str
|
|
142
|
+
Job identifier returned from Create Job
|
|
143
|
+
|
|
144
|
+
files : typing.Sequence[str]
|
|
145
|
+
List of filenames to upload (exactly 1 file: PDF or ZIP)
|
|
146
|
+
|
|
147
|
+
request_options : typing.Optional[RequestOptions]
|
|
148
|
+
Request-specific configuration.
|
|
149
|
+
|
|
150
|
+
Returns
|
|
151
|
+
-------
|
|
152
|
+
DocDigitizationUploadFilesResponse
|
|
153
|
+
Successful Response
|
|
154
|
+
|
|
155
|
+
Examples
|
|
156
|
+
--------
|
|
157
|
+
from sarvamai import SarvamAI
|
|
158
|
+
|
|
159
|
+
client = SarvamAI(
|
|
160
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
161
|
+
)
|
|
162
|
+
client.doc_digitization_job.get_upload_links(
|
|
163
|
+
job_id="job_id",
|
|
164
|
+
files=["files"],
|
|
165
|
+
)
|
|
166
|
+
"""
|
|
167
|
+
_response = self._raw_client.get_upload_links(
|
|
168
|
+
job_id=job_id, files=files, request_options=request_options
|
|
169
|
+
)
|
|
170
|
+
return _response.data
|
|
171
|
+
|
|
172
|
+
def start(
|
|
173
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
174
|
+
) -> DocDigitizationJobStatusResponse:
|
|
175
|
+
"""
|
|
176
|
+
Validates the uploaded file and starts processing.
|
|
177
|
+
|
|
178
|
+
**Validation Checks:**
|
|
179
|
+
- File must be uploaded before starting
|
|
180
|
+
- File size must not exceed 200 MB
|
|
181
|
+
- PDF must be parseable by the PDF parser
|
|
182
|
+
- ZIP must contain only JPEG/PNG images
|
|
183
|
+
- ZIP must be flat (no nested folders beyond one level)
|
|
184
|
+
- ZIP must contain at least one valid image
|
|
185
|
+
- Page/image count must not exceed 500
|
|
186
|
+
- User must have sufficient credits
|
|
187
|
+
|
|
188
|
+
**Processing:**
|
|
189
|
+
Job runs asynchronously. Poll the status endpoint or use webhook callback for completion notification.
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
job_id : str
|
|
194
|
+
The unique identifier of the job
|
|
195
|
+
|
|
196
|
+
request_options : typing.Optional[RequestOptions]
|
|
197
|
+
Request-specific configuration.
|
|
198
|
+
|
|
199
|
+
Returns
|
|
200
|
+
-------
|
|
201
|
+
DocDigitizationJobStatusResponse
|
|
202
|
+
Successful Response
|
|
203
|
+
|
|
204
|
+
Examples
|
|
205
|
+
--------
|
|
206
|
+
from sarvamai import SarvamAI
|
|
207
|
+
|
|
208
|
+
client = SarvamAI(
|
|
209
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
210
|
+
)
|
|
211
|
+
client.doc_digitization_job.start(
|
|
212
|
+
job_id="job_id",
|
|
213
|
+
)
|
|
214
|
+
"""
|
|
215
|
+
_response = self._raw_client.start(job_id, request_options=request_options)
|
|
216
|
+
return _response.data
|
|
217
|
+
|
|
218
|
+
def get_status(
|
|
219
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
220
|
+
) -> DocDigitizationJobStatusResponse:
|
|
221
|
+
"""
|
|
222
|
+
Returns the current status of a job with page-level metrics.
|
|
223
|
+
|
|
224
|
+
**Job States:**
|
|
225
|
+
- `Accepted`: Job created, awaiting file upload
|
|
226
|
+
- `Pending`: File uploaded, waiting to start
|
|
227
|
+
- `Running`: Processing in progress
|
|
228
|
+
- `Completed`: All pages processed successfully
|
|
229
|
+
- `PartiallyCompleted`: Some pages succeeded, some failed
|
|
230
|
+
- `Failed`: All pages failed or job-level error
|
|
231
|
+
|
|
232
|
+
**Page Metrics:**
|
|
233
|
+
Response includes detailed progress: total pages, pages processed, succeeded, failed, and per-page errors.
|
|
234
|
+
|
|
235
|
+
Parameters
|
|
236
|
+
----------
|
|
237
|
+
job_id : str
|
|
238
|
+
The unique identifier of the job
|
|
239
|
+
|
|
240
|
+
request_options : typing.Optional[RequestOptions]
|
|
241
|
+
Request-specific configuration.
|
|
242
|
+
|
|
243
|
+
Returns
|
|
244
|
+
-------
|
|
245
|
+
DocDigitizationJobStatusResponse
|
|
246
|
+
Successful Response
|
|
247
|
+
|
|
248
|
+
Examples
|
|
249
|
+
--------
|
|
250
|
+
from sarvamai import SarvamAI
|
|
251
|
+
|
|
252
|
+
client = SarvamAI(
|
|
253
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
254
|
+
)
|
|
255
|
+
client.doc_digitization_job.get_status(
|
|
256
|
+
job_id="job_id",
|
|
257
|
+
)
|
|
258
|
+
"""
|
|
259
|
+
_response = self._raw_client.get_status(job_id, request_options=request_options)
|
|
260
|
+
return _response.data
|
|
261
|
+
|
|
262
|
+
def get_download_links(
|
|
263
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
264
|
+
) -> DocDigitizationDownloadFilesResponse:
|
|
265
|
+
"""
|
|
266
|
+
Returns presigned URLs for downloading output files.
|
|
267
|
+
|
|
268
|
+
**Prerequisites:**
|
|
269
|
+
- Job must be in `Completed` or `PartiallyCompleted` state
|
|
270
|
+
- Failed jobs have no output available
|
|
271
|
+
|
|
272
|
+
Parameters
|
|
273
|
+
----------
|
|
274
|
+
job_id : str
|
|
275
|
+
The unique identifier of the job
|
|
276
|
+
|
|
277
|
+
request_options : typing.Optional[RequestOptions]
|
|
278
|
+
Request-specific configuration.
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
DocDigitizationDownloadFilesResponse
|
|
283
|
+
Successful Response
|
|
284
|
+
|
|
285
|
+
Examples
|
|
286
|
+
--------
|
|
287
|
+
from sarvamai import SarvamAI
|
|
288
|
+
|
|
289
|
+
client = SarvamAI(
|
|
290
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
291
|
+
)
|
|
292
|
+
client.doc_digitization_job.get_download_links(
|
|
293
|
+
job_id="job_id",
|
|
294
|
+
)
|
|
295
|
+
"""
|
|
296
|
+
_response = self._raw_client.get_download_links(
|
|
297
|
+
job_id, request_options=request_options
|
|
298
|
+
)
|
|
299
|
+
return _response.data
|
|
300
|
+
|
|
301
|
+
def create_job(
|
|
302
|
+
self,
|
|
303
|
+
language: DocDigitizationSupportedLanguage = "hi",
|
|
304
|
+
output_format: DocDigitizationOutputFormat = "html",
|
|
305
|
+
callback: typing.Optional[DocDigitizationWebhookCallbackParams] = OMIT,
|
|
306
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
307
|
+
) -> DocDigitizationJob:
|
|
308
|
+
"""
|
|
309
|
+
Create a new Document Digitization job.
|
|
310
|
+
|
|
311
|
+
Parameters
|
|
312
|
+
----------
|
|
313
|
+
language : DocDigitizationSupportedLanguage, default="hi"
|
|
314
|
+
ISO language code for the document.
|
|
315
|
+
|
|
316
|
+
output_format : DocDigitizationOutputFormat, default="html"
|
|
317
|
+
Output format: "html" for structured HTML or "md" for Markdown.
|
|
318
|
+
|
|
319
|
+
callback : typing.Optional[DocDigitizationWebhookCallbackParams], default=OMIT
|
|
320
|
+
Optional webhook configuration for job completion notification.
|
|
321
|
+
|
|
322
|
+
request_options : typing.Optional[RequestOptions], default=None
|
|
323
|
+
Request-specific configuration.
|
|
324
|
+
|
|
325
|
+
Returns
|
|
326
|
+
-------
|
|
327
|
+
DocDigitizationJob
|
|
328
|
+
A handle to the newly created Document Digitization job.
|
|
329
|
+
|
|
330
|
+
Examples
|
|
331
|
+
--------
|
|
332
|
+
from sarvamai import SarvamAI
|
|
333
|
+
|
|
334
|
+
client = SarvamAI(api_subscription_key="YOUR_API_SUBSCRIPTION_KEY")
|
|
335
|
+
|
|
336
|
+
# Create job and get handle
|
|
337
|
+
job = client.doc_digitization_job.create_job(language="hi", output_format="md")
|
|
338
|
+
|
|
339
|
+
# Upload PDF, start, wait, download
|
|
340
|
+
job.upload_file("/path/to/document.pdf")
|
|
341
|
+
job.start()
|
|
342
|
+
job.wait_until_complete()
|
|
343
|
+
job.download_output("./output.md")
|
|
344
|
+
"""
|
|
345
|
+
response = self.initialise(
|
|
346
|
+
job_parameters=DocDigitizationJobParametersParams(
|
|
347
|
+
language=language,
|
|
348
|
+
output_format=output_format,
|
|
349
|
+
),
|
|
350
|
+
callback=callback,
|
|
351
|
+
request_options=request_options,
|
|
352
|
+
)
|
|
353
|
+
return DocDigitizationJob(job_id=response.job_id, client=self)
|
|
354
|
+
|
|
355
|
+
def get_job(self, job_id: str) -> DocDigitizationJob:
|
|
356
|
+
"""
|
|
357
|
+
Get an existing Document Digitization job handle by job ID.
|
|
358
|
+
|
|
359
|
+
Parameters
|
|
360
|
+
----------
|
|
361
|
+
job_id : str
|
|
362
|
+
The job ID of a previously created Document Digitization job.
|
|
363
|
+
|
|
364
|
+
Returns
|
|
365
|
+
-------
|
|
366
|
+
DocDigitizationJob
|
|
367
|
+
A job handle which can be used to check status, upload files, or download results.
|
|
368
|
+
|
|
369
|
+
Examples
|
|
370
|
+
--------
|
|
371
|
+
from sarvamai import SarvamAI
|
|
372
|
+
|
|
373
|
+
client = SarvamAI(api_subscription_key="YOUR_API_SUBSCRIPTION_KEY")
|
|
374
|
+
|
|
375
|
+
# Get existing job
|
|
376
|
+
job = client.doc_digitization_job.get_job(job_id="your-job-uuid")
|
|
377
|
+
status = job.get_status()
|
|
378
|
+
"""
|
|
379
|
+
return DocDigitizationJob(job_id=job_id, client=self)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
class AsyncDocDigitizationJobClient:
|
|
383
|
+
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
384
|
+
self._raw_client = AsyncRawDocDigitizationJobClient(
|
|
385
|
+
client_wrapper=client_wrapper
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
@property
|
|
389
|
+
def with_raw_response(self) -> AsyncRawDocDigitizationJobClient:
|
|
390
|
+
"""
|
|
391
|
+
Retrieves a raw implementation of this client that returns raw responses.
|
|
392
|
+
|
|
393
|
+
Returns
|
|
394
|
+
-------
|
|
395
|
+
AsyncRawDocDigitizationJobClient
|
|
396
|
+
"""
|
|
397
|
+
return self._raw_client
|
|
398
|
+
|
|
399
|
+
async def initialise(
|
|
400
|
+
self,
|
|
401
|
+
*,
|
|
402
|
+
job_parameters: typing.Optional[DocDigitizationJobParametersParams] = OMIT,
|
|
403
|
+
callback: typing.Optional[DocDigitizationWebhookCallbackParams] = OMIT,
|
|
404
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
405
|
+
) -> DocDigitizationCreateJobResponse:
|
|
406
|
+
"""
|
|
407
|
+
Creates a new document digitization job.
|
|
408
|
+
|
|
409
|
+
**Supported Languages:**
|
|
410
|
+
- `hi`: Hindi
|
|
411
|
+
- `en`: English
|
|
412
|
+
- `bn`: Bengali
|
|
413
|
+
- `gu`: Gujarati
|
|
414
|
+
- `kn`: Kannada
|
|
415
|
+
- `ml`: Malayalam
|
|
416
|
+
- `mr`: Marathi
|
|
417
|
+
- `or`: Odia
|
|
418
|
+
- `pa`: Punjabi
|
|
419
|
+
- `ta`: Tamil
|
|
420
|
+
- `te`: Telugu
|
|
421
|
+
- `ur`: Urdu
|
|
422
|
+
- `as`: Assamese
|
|
423
|
+
- `bodo`: Bodo
|
|
424
|
+
- `doi`: Dogri
|
|
425
|
+
- `ks`: Kashmiri
|
|
426
|
+
- `kok`: Konkani
|
|
427
|
+
- `mai`: Maithili
|
|
428
|
+
- `mni`: Manipuri
|
|
429
|
+
- `ne`: Nepali
|
|
430
|
+
- `sa`: Sanskrit
|
|
431
|
+
- `sat`: Santali
|
|
432
|
+
- `sd`: Sindhi
|
|
433
|
+
|
|
434
|
+
**Output Formats:**
|
|
435
|
+
- `html`: Structured HTML with layout preservation (default)
|
|
436
|
+
- `md`: Markdown format
|
|
437
|
+
|
|
438
|
+
**Webhook Callback:**
|
|
439
|
+
Optionally provide a callback URL to receive notification when processing completes.
|
|
440
|
+
|
|
441
|
+
Parameters
|
|
442
|
+
----------
|
|
443
|
+
job_parameters : typing.Optional[DocDigitizationJobParametersParams]
|
|
444
|
+
Job configuration parameters. Omit the request body to use defaults.
|
|
445
|
+
|
|
446
|
+
callback : typing.Optional[DocDigitizationWebhookCallbackParams]
|
|
447
|
+
Optional webhook for completion notification
|
|
448
|
+
|
|
449
|
+
request_options : typing.Optional[RequestOptions]
|
|
450
|
+
Request-specific configuration.
|
|
451
|
+
|
|
452
|
+
Returns
|
|
453
|
+
-------
|
|
454
|
+
DocDigitizationCreateJobResponse
|
|
455
|
+
Successful Response
|
|
456
|
+
|
|
457
|
+
Examples
|
|
458
|
+
--------
|
|
459
|
+
import asyncio
|
|
460
|
+
|
|
461
|
+
from sarvamai import AsyncSarvamAI
|
|
462
|
+
|
|
463
|
+
client = AsyncSarvamAI(
|
|
464
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
async def main() -> None:
|
|
469
|
+
await client.doc_digitization_job.initialise()
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
asyncio.run(main())
|
|
473
|
+
"""
|
|
474
|
+
_response = await self._raw_client.initialise(
|
|
475
|
+
job_parameters=job_parameters,
|
|
476
|
+
callback=callback,
|
|
477
|
+
request_options=request_options,
|
|
478
|
+
)
|
|
479
|
+
return _response.data
|
|
480
|
+
|
|
481
|
+
async def get_upload_links(
|
|
482
|
+
self,
|
|
483
|
+
*,
|
|
484
|
+
job_id: str,
|
|
485
|
+
files: typing.Sequence[str],
|
|
486
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
487
|
+
) -> DocDigitizationUploadFilesResponse:
|
|
488
|
+
"""
|
|
489
|
+
Returns presigned URLs for uploading input files.
|
|
490
|
+
|
|
491
|
+
**File Constraints:**
|
|
492
|
+
- Exactly one file required (PDF or ZIP)
|
|
493
|
+
- PDF files: `.pdf` extension
|
|
494
|
+
- ZIP files: `.zip` extension
|
|
495
|
+
|
|
496
|
+
Parameters
|
|
497
|
+
----------
|
|
498
|
+
job_id : str
|
|
499
|
+
Job identifier returned from Create Job
|
|
500
|
+
|
|
501
|
+
files : typing.Sequence[str]
|
|
502
|
+
List of filenames to upload (exactly 1 file: PDF or ZIP)
|
|
503
|
+
|
|
504
|
+
request_options : typing.Optional[RequestOptions]
|
|
505
|
+
Request-specific configuration.
|
|
506
|
+
|
|
507
|
+
Returns
|
|
508
|
+
-------
|
|
509
|
+
DocDigitizationUploadFilesResponse
|
|
510
|
+
Successful Response
|
|
511
|
+
|
|
512
|
+
Examples
|
|
513
|
+
--------
|
|
514
|
+
import asyncio
|
|
515
|
+
|
|
516
|
+
from sarvamai import AsyncSarvamAI
|
|
517
|
+
|
|
518
|
+
client = AsyncSarvamAI(
|
|
519
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
async def main() -> None:
|
|
524
|
+
await client.doc_digitization_job.get_upload_links(
|
|
525
|
+
job_id="job_id",
|
|
526
|
+
files=["files"],
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
asyncio.run(main())
|
|
531
|
+
"""
|
|
532
|
+
_response = await self._raw_client.get_upload_links(
|
|
533
|
+
job_id=job_id, files=files, request_options=request_options
|
|
534
|
+
)
|
|
535
|
+
return _response.data
|
|
536
|
+
|
|
537
|
+
async def start(
|
|
538
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
539
|
+
) -> DocDigitizationJobStatusResponse:
|
|
540
|
+
"""
|
|
541
|
+
Validates the uploaded file and starts processing.
|
|
542
|
+
|
|
543
|
+
**Validation Checks:**
|
|
544
|
+
- File must be uploaded before starting
|
|
545
|
+
- File size must not exceed 200 MB
|
|
546
|
+
- PDF must be parseable by the PDF parser
|
|
547
|
+
- ZIP must contain only JPEG/PNG images
|
|
548
|
+
- ZIP must be flat (no nested folders beyond one level)
|
|
549
|
+
- ZIP must contain at least one valid image
|
|
550
|
+
- Page/image count must not exceed 500
|
|
551
|
+
- User must have sufficient credits
|
|
552
|
+
|
|
553
|
+
**Processing:**
|
|
554
|
+
Job runs asynchronously. Poll the status endpoint or use webhook callback for completion notification.
|
|
555
|
+
|
|
556
|
+
Parameters
|
|
557
|
+
----------
|
|
558
|
+
job_id : str
|
|
559
|
+
The unique identifier of the job
|
|
560
|
+
|
|
561
|
+
request_options : typing.Optional[RequestOptions]
|
|
562
|
+
Request-specific configuration.
|
|
563
|
+
|
|
564
|
+
Returns
|
|
565
|
+
-------
|
|
566
|
+
DocDigitizationJobStatusResponse
|
|
567
|
+
Successful Response
|
|
568
|
+
|
|
569
|
+
Examples
|
|
570
|
+
--------
|
|
571
|
+
import asyncio
|
|
572
|
+
|
|
573
|
+
from sarvamai import AsyncSarvamAI
|
|
574
|
+
|
|
575
|
+
client = AsyncSarvamAI(
|
|
576
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
async def main() -> None:
|
|
581
|
+
await client.doc_digitization_job.start(
|
|
582
|
+
job_id="job_id",
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
asyncio.run(main())
|
|
587
|
+
"""
|
|
588
|
+
_response = await self._raw_client.start(
|
|
589
|
+
job_id, request_options=request_options
|
|
590
|
+
)
|
|
591
|
+
return _response.data
|
|
592
|
+
|
|
593
|
+
async def get_status(
|
|
594
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
595
|
+
) -> DocDigitizationJobStatusResponse:
|
|
596
|
+
"""
|
|
597
|
+
Returns the current status of a job with page-level metrics.
|
|
598
|
+
|
|
599
|
+
**Job States:**
|
|
600
|
+
- `Accepted`: Job created, awaiting file upload
|
|
601
|
+
- `Pending`: File uploaded, waiting to start
|
|
602
|
+
- `Running`: Processing in progress
|
|
603
|
+
- `Completed`: All pages processed successfully
|
|
604
|
+
- `PartiallyCompleted`: Some pages succeeded, some failed
|
|
605
|
+
- `Failed`: All pages failed or job-level error
|
|
606
|
+
|
|
607
|
+
**Page Metrics:**
|
|
608
|
+
Response includes detailed progress: total pages, pages processed, succeeded, failed, and per-page errors.
|
|
609
|
+
|
|
610
|
+
Parameters
|
|
611
|
+
----------
|
|
612
|
+
job_id : str
|
|
613
|
+
The unique identifier of the job
|
|
614
|
+
|
|
615
|
+
request_options : typing.Optional[RequestOptions]
|
|
616
|
+
Request-specific configuration.
|
|
617
|
+
|
|
618
|
+
Returns
|
|
619
|
+
-------
|
|
620
|
+
DocDigitizationJobStatusResponse
|
|
621
|
+
Successful Response
|
|
622
|
+
|
|
623
|
+
Examples
|
|
624
|
+
--------
|
|
625
|
+
import asyncio
|
|
626
|
+
|
|
627
|
+
from sarvamai import AsyncSarvamAI
|
|
628
|
+
|
|
629
|
+
client = AsyncSarvamAI(
|
|
630
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
async def main() -> None:
|
|
635
|
+
await client.doc_digitization_job.get_status(
|
|
636
|
+
job_id="job_id",
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
asyncio.run(main())
|
|
641
|
+
"""
|
|
642
|
+
_response = await self._raw_client.get_status(
|
|
643
|
+
job_id, request_options=request_options
|
|
644
|
+
)
|
|
645
|
+
return _response.data
|
|
646
|
+
|
|
647
|
+
async def get_download_links(
|
|
648
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
649
|
+
) -> DocDigitizationDownloadFilesResponse:
|
|
650
|
+
"""
|
|
651
|
+
Returns presigned URLs for downloading output files.
|
|
652
|
+
|
|
653
|
+
**Prerequisites:**
|
|
654
|
+
- Job must be in `Completed` or `PartiallyCompleted` state
|
|
655
|
+
- Failed jobs have no output available
|
|
656
|
+
|
|
657
|
+
Parameters
|
|
658
|
+
----------
|
|
659
|
+
job_id : str
|
|
660
|
+
The unique identifier of the job
|
|
661
|
+
|
|
662
|
+
request_options : typing.Optional[RequestOptions]
|
|
663
|
+
Request-specific configuration.
|
|
664
|
+
|
|
665
|
+
Returns
|
|
666
|
+
-------
|
|
667
|
+
DocDigitizationDownloadFilesResponse
|
|
668
|
+
Successful Response
|
|
669
|
+
|
|
670
|
+
Examples
|
|
671
|
+
--------
|
|
672
|
+
import asyncio
|
|
673
|
+
|
|
674
|
+
from sarvamai import AsyncSarvamAI
|
|
675
|
+
|
|
676
|
+
client = AsyncSarvamAI(
|
|
677
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
async def main() -> None:
|
|
682
|
+
await client.doc_digitization_job.get_download_links(
|
|
683
|
+
job_id="job_id",
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
asyncio.run(main())
|
|
688
|
+
"""
|
|
689
|
+
_response = await self._raw_client.get_download_links(
|
|
690
|
+
job_id, request_options=request_options
|
|
691
|
+
)
|
|
692
|
+
return _response.data
|
|
693
|
+
|
|
694
|
+
async def create_job(
|
|
695
|
+
self,
|
|
696
|
+
language: DocDigitizationSupportedLanguage = "hi",
|
|
697
|
+
output_format: DocDigitizationOutputFormat = "html",
|
|
698
|
+
callback: typing.Optional[DocDigitizationWebhookCallbackParams] = OMIT,
|
|
699
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
700
|
+
) -> AsyncDocDigitizationJob:
|
|
701
|
+
"""
|
|
702
|
+
Create a new Document Digitization job.
|
|
703
|
+
|
|
704
|
+
Parameters
|
|
705
|
+
----------
|
|
706
|
+
language : DocDigitizationSupportedLanguage, default="hi"
|
|
707
|
+
ISO language code for the document.
|
|
708
|
+
|
|
709
|
+
output_format : DocDigitizationOutputFormat, default="html"
|
|
710
|
+
Output format: "html" for structured HTML or "md" for Markdown.
|
|
711
|
+
|
|
712
|
+
callback : typing.Optional[DocDigitizationWebhookCallbackParams], default=OMIT
|
|
713
|
+
Optional webhook configuration for job completion notification.
|
|
714
|
+
|
|
715
|
+
request_options : typing.Optional[RequestOptions], default=None
|
|
716
|
+
Request-specific configuration.
|
|
717
|
+
|
|
718
|
+
Returns
|
|
719
|
+
-------
|
|
720
|
+
AsyncDocDigitizationJob
|
|
721
|
+
A handle to the newly created Document Digitization job.
|
|
722
|
+
|
|
723
|
+
Examples
|
|
724
|
+
--------
|
|
725
|
+
import asyncio
|
|
726
|
+
from sarvamai import AsyncSarvamAI
|
|
727
|
+
|
|
728
|
+
client = AsyncSarvamAI(api_subscription_key="YOUR_API_SUBSCRIPTION_KEY")
|
|
729
|
+
|
|
730
|
+
async def main():
|
|
731
|
+
job = await client.doc_digitization_job.create_job(language="hi", output_format="md")
|
|
732
|
+
await job.upload_file("/path/to/document.pdf")
|
|
733
|
+
await job.start()
|
|
734
|
+
await job.wait_until_complete()
|
|
735
|
+
await job.download_output("./output.md")
|
|
736
|
+
|
|
737
|
+
asyncio.run(main())
|
|
738
|
+
"""
|
|
739
|
+
response = await self.initialise(
|
|
740
|
+
job_parameters=DocDigitizationJobParametersParams(
|
|
741
|
+
language=language,
|
|
742
|
+
output_format=output_format,
|
|
743
|
+
),
|
|
744
|
+
callback=callback,
|
|
745
|
+
request_options=request_options,
|
|
746
|
+
)
|
|
747
|
+
return AsyncDocDigitizationJob(job_id=response.job_id, client=self)
|
|
748
|
+
|
|
749
|
+
def get_job(self, job_id: str) -> AsyncDocDigitizationJob:
|
|
750
|
+
"""
|
|
751
|
+
Get an existing Document Digitization job handle by job ID.
|
|
752
|
+
|
|
753
|
+
Parameters
|
|
754
|
+
----------
|
|
755
|
+
job_id : str
|
|
756
|
+
The job ID of a previously created Document Digitization job.
|
|
757
|
+
|
|
758
|
+
Returns
|
|
759
|
+
-------
|
|
760
|
+
AsyncDocDigitizationJob
|
|
761
|
+
A job handle which can be used to check status, upload files, or download results.
|
|
762
|
+
|
|
763
|
+
Examples
|
|
764
|
+
--------
|
|
765
|
+
import asyncio
|
|
766
|
+
from sarvamai import AsyncSarvamAI
|
|
767
|
+
|
|
768
|
+
client = AsyncSarvamAI(api_subscription_key="YOUR_API_SUBSCRIPTION_KEY")
|
|
769
|
+
|
|
770
|
+
async def main():
|
|
771
|
+
job = client.doc_digitization_job.get_job(job_id="your-job-uuid")
|
|
772
|
+
status = await job.get_status()
|
|
773
|
+
|
|
774
|
+
asyncio.run(main())
|
|
775
|
+
"""
|
|
776
|
+
return AsyncDocDigitizationJob(job_id=job_id, client=self)
|