sarvamai 0.1.23a6__py3-none-any.whl → 0.1.23a7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +52 -0
- sarvamai/client.py +3 -0
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/doc_digitization_job/__init__.py +4 -0
- sarvamai/doc_digitization_job/client.py +570 -0
- sarvamai/doc_digitization_job/raw_client.py +1176 -0
- sarvamai/requests/__init__.py +20 -0
- sarvamai/requests/doc_digitization_create_job_response.py +25 -0
- sarvamai/requests/doc_digitization_download_files_response.py +37 -0
- sarvamai/requests/doc_digitization_error_details.py +21 -0
- sarvamai/requests/doc_digitization_error_message.py +11 -0
- sarvamai/requests/doc_digitization_job_detail.py +64 -0
- sarvamai/requests/doc_digitization_job_parameters.py +21 -0
- sarvamai/requests/doc_digitization_job_status_response.py +65 -0
- sarvamai/requests/doc_digitization_page_error.py +24 -0
- sarvamai/requests/doc_digitization_upload_files_response.py +34 -0
- sarvamai/requests/doc_digitization_webhook_callback.py +19 -0
- sarvamai/requests/speech_to_text_response.py +14 -6
- sarvamai/requests/speech_to_text_transcription_data.py +0 -14
- sarvamai/requests/speech_to_text_translate_response.py +9 -6
- sarvamai/requests/speech_to_text_translate_transcription_data.py +0 -13
- sarvamai/speech_to_text_streaming/client.py +2 -30
- sarvamai/speech_to_text_streaming/raw_client.py +2 -30
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_language_code.py +1 -25
- sarvamai/types/__init__.py +30 -0
- sarvamai/types/doc_digitization_create_job_response.py +37 -0
- sarvamai/types/doc_digitization_download_files_response.py +47 -0
- sarvamai/types/doc_digitization_error_code.py +15 -0
- sarvamai/types/doc_digitization_error_details.py +33 -0
- sarvamai/types/doc_digitization_error_message.py +23 -0
- sarvamai/types/doc_digitization_job_detail.py +74 -0
- sarvamai/types/doc_digitization_job_detail_state.py +7 -0
- sarvamai/types/doc_digitization_job_parameters.py +33 -0
- sarvamai/types/doc_digitization_job_state.py +7 -0
- sarvamai/types/doc_digitization_job_status_response.py +75 -0
- sarvamai/types/doc_digitization_output_format.py +5 -0
- sarvamai/types/doc_digitization_page_error.py +36 -0
- sarvamai/types/doc_digitization_supported_language.py +32 -0
- sarvamai/types/doc_digitization_upload_files_response.py +44 -0
- sarvamai/types/doc_digitization_webhook_callback.py +31 -0
- sarvamai/types/speech_to_text_language.py +1 -24
- sarvamai/types/speech_to_text_response.py +14 -6
- sarvamai/types/speech_to_text_transcription_data.py +0 -14
- sarvamai/types/speech_to_text_translate_language.py +1 -25
- sarvamai/types/speech_to_text_translate_response.py +9 -6
- sarvamai/types/speech_to_text_translate_transcription_data.py +0 -13
- {sarvamai-0.1.23a6.dist-info → sarvamai-0.1.23a7.dist-info}/METADATA +1 -1
- {sarvamai-0.1.23a6.dist-info → sarvamai-0.1.23a7.dist-info}/RECORD +49 -21
- {sarvamai-0.1.23a6.dist-info → sarvamai-0.1.23a7.dist-info}/WHEEL +0 -0
sarvamai/__init__.py
CHANGED
|
@@ -33,6 +33,21 @@ from .types import (
|
|
|
33
33
|
CreateChatCompletionResponse,
|
|
34
34
|
DiarizedEntry,
|
|
35
35
|
DiarizedTranscript,
|
|
36
|
+
DocDigitizationCreateJobResponse,
|
|
37
|
+
DocDigitizationDownloadFilesResponse,
|
|
38
|
+
DocDigitizationErrorCode,
|
|
39
|
+
DocDigitizationErrorDetails,
|
|
40
|
+
DocDigitizationErrorMessage,
|
|
41
|
+
DocDigitizationJobDetail,
|
|
42
|
+
DocDigitizationJobDetailState,
|
|
43
|
+
DocDigitizationJobParameters,
|
|
44
|
+
DocDigitizationJobState,
|
|
45
|
+
DocDigitizationJobStatusResponse,
|
|
46
|
+
DocDigitizationOutputFormat,
|
|
47
|
+
DocDigitizationPageError,
|
|
48
|
+
DocDigitizationSupportedLanguage,
|
|
49
|
+
DocDigitizationUploadFilesResponse,
|
|
50
|
+
DocDigitizationWebhookCallback,
|
|
36
51
|
ErrorCode,
|
|
37
52
|
ErrorData,
|
|
38
53
|
ErrorDetails,
|
|
@@ -112,6 +127,7 @@ from .errors import (
|
|
|
112
127
|
)
|
|
113
128
|
from . import (
|
|
114
129
|
chat,
|
|
130
|
+
doc_digitization_job,
|
|
115
131
|
speech_to_text,
|
|
116
132
|
speech_to_text_job,
|
|
117
133
|
speech_to_text_streaming,
|
|
@@ -147,6 +163,16 @@ from .requests import (
|
|
|
147
163
|
CreateChatCompletionResponseParams,
|
|
148
164
|
DiarizedEntryParams,
|
|
149
165
|
DiarizedTranscriptParams,
|
|
166
|
+
DocDigitizationCreateJobResponseParams,
|
|
167
|
+
DocDigitizationDownloadFilesResponseParams,
|
|
168
|
+
DocDigitizationErrorDetailsParams,
|
|
169
|
+
DocDigitizationErrorMessageParams,
|
|
170
|
+
DocDigitizationJobDetailParams,
|
|
171
|
+
DocDigitizationJobParametersParams,
|
|
172
|
+
DocDigitizationJobStatusResponseParams,
|
|
173
|
+
DocDigitizationPageErrorParams,
|
|
174
|
+
DocDigitizationUploadFilesResponseParams,
|
|
175
|
+
DocDigitizationWebhookCallbackParams,
|
|
150
176
|
ErrorDataParams,
|
|
151
177
|
ErrorDetailsParams,
|
|
152
178
|
ErrorMessageParams,
|
|
@@ -259,6 +285,31 @@ __all__ = [
|
|
|
259
285
|
"DiarizedEntryParams",
|
|
260
286
|
"DiarizedTranscript",
|
|
261
287
|
"DiarizedTranscriptParams",
|
|
288
|
+
"DocDigitizationCreateJobResponse",
|
|
289
|
+
"DocDigitizationCreateJobResponseParams",
|
|
290
|
+
"DocDigitizationDownloadFilesResponse",
|
|
291
|
+
"DocDigitizationDownloadFilesResponseParams",
|
|
292
|
+
"DocDigitizationErrorCode",
|
|
293
|
+
"DocDigitizationErrorDetails",
|
|
294
|
+
"DocDigitizationErrorDetailsParams",
|
|
295
|
+
"DocDigitizationErrorMessage",
|
|
296
|
+
"DocDigitizationErrorMessageParams",
|
|
297
|
+
"DocDigitizationJobDetail",
|
|
298
|
+
"DocDigitizationJobDetailParams",
|
|
299
|
+
"DocDigitizationJobDetailState",
|
|
300
|
+
"DocDigitizationJobParameters",
|
|
301
|
+
"DocDigitizationJobParametersParams",
|
|
302
|
+
"DocDigitizationJobState",
|
|
303
|
+
"DocDigitizationJobStatusResponse",
|
|
304
|
+
"DocDigitizationJobStatusResponseParams",
|
|
305
|
+
"DocDigitizationOutputFormat",
|
|
306
|
+
"DocDigitizationPageError",
|
|
307
|
+
"DocDigitizationPageErrorParams",
|
|
308
|
+
"DocDigitizationSupportedLanguage",
|
|
309
|
+
"DocDigitizationUploadFilesResponse",
|
|
310
|
+
"DocDigitizationUploadFilesResponseParams",
|
|
311
|
+
"DocDigitizationWebhookCallback",
|
|
312
|
+
"DocDigitizationWebhookCallbackParams",
|
|
262
313
|
"ErrorCode",
|
|
263
314
|
"ErrorData",
|
|
264
315
|
"ErrorDataParams",
|
|
@@ -386,6 +437,7 @@ __all__ = [
|
|
|
386
437
|
"UnprocessableEntityError",
|
|
387
438
|
"__version__",
|
|
388
439
|
"chat",
|
|
440
|
+
"doc_digitization_job",
|
|
389
441
|
"speech_to_text",
|
|
390
442
|
"speech_to_text_job",
|
|
391
443
|
"speech_to_text_streaming",
|
sarvamai/client.py
CHANGED
|
@@ -7,6 +7,7 @@ import httpx
|
|
|
7
7
|
from .chat.client import AsyncChatClient, ChatClient
|
|
8
8
|
from .core.api_error import ApiError
|
|
9
9
|
from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
10
|
+
from .doc_digitization_job.client import AsyncDocDigitizationJobClient, DocDigitizationJobClient
|
|
10
11
|
from .environment import SarvamAIEnvironment
|
|
11
12
|
from .speech_to_text.client import AsyncSpeechToTextClient, SpeechToTextClient
|
|
12
13
|
from .speech_to_text_job.client import AsyncSpeechToTextJobClient, SpeechToTextJobClient
|
|
@@ -92,6 +93,7 @@ class SarvamAI:
|
|
|
92
93
|
self.chat = ChatClient(client_wrapper=self._client_wrapper)
|
|
93
94
|
self.speech_to_text_job = SpeechToTextJobClient(client_wrapper=self._client_wrapper)
|
|
94
95
|
self.speech_to_text_translate_job = SpeechToTextTranslateJobClient(client_wrapper=self._client_wrapper)
|
|
96
|
+
self.doc_digitization_job = DocDigitizationJobClient(client_wrapper=self._client_wrapper)
|
|
95
97
|
self.speech_to_text_streaming = SpeechToTextStreamingClient(client_wrapper=self._client_wrapper)
|
|
96
98
|
self.speech_to_text_translate_streaming = SpeechToTextTranslateStreamingClient(
|
|
97
99
|
client_wrapper=self._client_wrapper
|
|
@@ -170,6 +172,7 @@ class AsyncSarvamAI:
|
|
|
170
172
|
self.chat = AsyncChatClient(client_wrapper=self._client_wrapper)
|
|
171
173
|
self.speech_to_text_job = AsyncSpeechToTextJobClient(client_wrapper=self._client_wrapper)
|
|
172
174
|
self.speech_to_text_translate_job = AsyncSpeechToTextTranslateJobClient(client_wrapper=self._client_wrapper)
|
|
175
|
+
self.doc_digitization_job = AsyncDocDigitizationJobClient(client_wrapper=self._client_wrapper)
|
|
173
176
|
self.speech_to_text_streaming = AsyncSpeechToTextStreamingClient(client_wrapper=self._client_wrapper)
|
|
174
177
|
self.speech_to_text_translate_streaming = AsyncSpeechToTextTranslateStreamingClient(
|
|
175
178
|
client_wrapper=self._client_wrapper
|
sarvamai/core/client_wrapper.py
CHANGED
|
@@ -23,10 +23,10 @@ class BaseClientWrapper:
|
|
|
23
23
|
|
|
24
24
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
25
25
|
headers: typing.Dict[str, str] = {
|
|
26
|
-
"User-Agent": "sarvamai/0.1.
|
|
26
|
+
"User-Agent": "sarvamai/0.1.23a7",
|
|
27
27
|
"X-Fern-Language": "Python",
|
|
28
28
|
"X-Fern-SDK-Name": "sarvamai",
|
|
29
|
-
"X-Fern-SDK-Version": "0.1.
|
|
29
|
+
"X-Fern-SDK-Version": "0.1.23a7",
|
|
30
30
|
**(self.get_custom_headers() or {}),
|
|
31
31
|
}
|
|
32
32
|
headers["api-subscription-key"] = self.api_subscription_key
|
|
@@ -0,0 +1,570 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
6
|
+
from ..core.request_options import RequestOptions
|
|
7
|
+
from ..requests.doc_digitization_job_parameters import DocDigitizationJobParametersParams
|
|
8
|
+
from ..requests.doc_digitization_webhook_callback import DocDigitizationWebhookCallbackParams
|
|
9
|
+
from ..types.doc_digitization_create_job_response import DocDigitizationCreateJobResponse
|
|
10
|
+
from ..types.doc_digitization_download_files_response import DocDigitizationDownloadFilesResponse
|
|
11
|
+
from ..types.doc_digitization_job_status_response import DocDigitizationJobStatusResponse
|
|
12
|
+
from ..types.doc_digitization_upload_files_response import DocDigitizationUploadFilesResponse
|
|
13
|
+
from .raw_client import AsyncRawDocDigitizationJobClient, RawDocDigitizationJobClient
|
|
14
|
+
|
|
15
|
+
# this is used as the default value for optional parameters
|
|
16
|
+
OMIT = typing.cast(typing.Any, ...)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DocDigitizationJobClient:
|
|
20
|
+
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
|
21
|
+
self._raw_client = RawDocDigitizationJobClient(client_wrapper=client_wrapper)
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def with_raw_response(self) -> RawDocDigitizationJobClient:
|
|
25
|
+
"""
|
|
26
|
+
Retrieves a raw implementation of this client that returns raw responses.
|
|
27
|
+
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
RawDocDigitizationJobClient
|
|
31
|
+
"""
|
|
32
|
+
return self._raw_client
|
|
33
|
+
|
|
34
|
+
def initialise(
|
|
35
|
+
self,
|
|
36
|
+
*,
|
|
37
|
+
job_parameters: typing.Optional[DocDigitizationJobParametersParams] = OMIT,
|
|
38
|
+
callback: typing.Optional[DocDigitizationWebhookCallbackParams] = OMIT,
|
|
39
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
40
|
+
) -> DocDigitizationCreateJobResponse:
|
|
41
|
+
"""
|
|
42
|
+
Creates a new document digitization job.
|
|
43
|
+
|
|
44
|
+
**Supported Languages:**
|
|
45
|
+
- `hi`: Hindi
|
|
46
|
+
- `en`: English
|
|
47
|
+
- `bn`: Bengali
|
|
48
|
+
- `gu`: Gujarati
|
|
49
|
+
- `kn`: Kannada
|
|
50
|
+
- `ml`: Malayalam
|
|
51
|
+
- `mr`: Marathi
|
|
52
|
+
- `or`: Odia
|
|
53
|
+
- `pa`: Punjabi
|
|
54
|
+
- `ta`: Tamil
|
|
55
|
+
- `te`: Telugu
|
|
56
|
+
- `ur`: Urdu
|
|
57
|
+
- `as`: Assamese
|
|
58
|
+
- `bodo`: Bodo
|
|
59
|
+
- `doi`: Dogri
|
|
60
|
+
- `ks`: Kashmiri
|
|
61
|
+
- `kok`: Konkani
|
|
62
|
+
- `mai`: Maithili
|
|
63
|
+
- `mni`: Manipuri
|
|
64
|
+
- `ne`: Nepali
|
|
65
|
+
- `sa`: Sanskrit
|
|
66
|
+
- `sat`: Santali
|
|
67
|
+
- `sd`: Sindhi
|
|
68
|
+
|
|
69
|
+
**Output Formats:**
|
|
70
|
+
- `html`: Structured HTML with layout preservation (default)
|
|
71
|
+
- `md`: Markdown format
|
|
72
|
+
|
|
73
|
+
**Webhook Callback:**
|
|
74
|
+
Optionally provide a callback URL to receive notification when processing completes.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
job_parameters : typing.Optional[DocDigitizationJobParametersParams]
|
|
79
|
+
Job configuration parameters. Omit the request body to use defaults.
|
|
80
|
+
|
|
81
|
+
callback : typing.Optional[DocDigitizationWebhookCallbackParams]
|
|
82
|
+
Optional webhook for completion notification
|
|
83
|
+
|
|
84
|
+
request_options : typing.Optional[RequestOptions]
|
|
85
|
+
Request-specific configuration.
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
DocDigitizationCreateJobResponse
|
|
90
|
+
Successful Response
|
|
91
|
+
|
|
92
|
+
Examples
|
|
93
|
+
--------
|
|
94
|
+
from sarvamai import SarvamAI
|
|
95
|
+
|
|
96
|
+
client = SarvamAI(
|
|
97
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
98
|
+
)
|
|
99
|
+
client.doc_digitization_job.initialise()
|
|
100
|
+
"""
|
|
101
|
+
_response = self._raw_client.initialise(
|
|
102
|
+
job_parameters=job_parameters, callback=callback, request_options=request_options
|
|
103
|
+
)
|
|
104
|
+
return _response.data
|
|
105
|
+
|
|
106
|
+
def get_upload_links(
|
|
107
|
+
self, *, job_id: str, files: typing.Sequence[str], request_options: typing.Optional[RequestOptions] = None
|
|
108
|
+
) -> DocDigitizationUploadFilesResponse:
|
|
109
|
+
"""
|
|
110
|
+
Returns presigned URLs for uploading input files.
|
|
111
|
+
|
|
112
|
+
**File Constraints:**
|
|
113
|
+
- Exactly one file required (PDF or ZIP)
|
|
114
|
+
- PDF files: `.pdf` extension
|
|
115
|
+
- ZIP files: `.zip` extension
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
job_id : str
|
|
120
|
+
Job identifier returned from Create Job
|
|
121
|
+
|
|
122
|
+
files : typing.Sequence[str]
|
|
123
|
+
List of filenames to upload (exactly 1 file: PDF or ZIP)
|
|
124
|
+
|
|
125
|
+
request_options : typing.Optional[RequestOptions]
|
|
126
|
+
Request-specific configuration.
|
|
127
|
+
|
|
128
|
+
Returns
|
|
129
|
+
-------
|
|
130
|
+
DocDigitizationUploadFilesResponse
|
|
131
|
+
Successful Response
|
|
132
|
+
|
|
133
|
+
Examples
|
|
134
|
+
--------
|
|
135
|
+
from sarvamai import SarvamAI
|
|
136
|
+
|
|
137
|
+
client = SarvamAI(
|
|
138
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
139
|
+
)
|
|
140
|
+
client.doc_digitization_job.get_upload_links(
|
|
141
|
+
job_id="job_id",
|
|
142
|
+
files=["files"],
|
|
143
|
+
)
|
|
144
|
+
"""
|
|
145
|
+
_response = self._raw_client.get_upload_links(job_id=job_id, files=files, request_options=request_options)
|
|
146
|
+
return _response.data
|
|
147
|
+
|
|
148
|
+
def start(
|
|
149
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
150
|
+
) -> DocDigitizationJobStatusResponse:
|
|
151
|
+
"""
|
|
152
|
+
Validates the uploaded file and starts processing.
|
|
153
|
+
|
|
154
|
+
**Validation Checks:**
|
|
155
|
+
- File must be uploaded before starting
|
|
156
|
+
- File size must not exceed 200 MB
|
|
157
|
+
- PDF must be parseable by the PDF parser
|
|
158
|
+
- ZIP must contain only JPEG/PNG images
|
|
159
|
+
- ZIP must be flat (no nested folders beyond one level)
|
|
160
|
+
- ZIP must contain at least one valid image
|
|
161
|
+
- Page/image count must not exceed 500
|
|
162
|
+
- User must have sufficient credits
|
|
163
|
+
|
|
164
|
+
**Processing:**
|
|
165
|
+
Job runs asynchronously. Poll the status endpoint or use webhook callback for completion notification.
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
job_id : str
|
|
170
|
+
The unique identifier of the job
|
|
171
|
+
|
|
172
|
+
request_options : typing.Optional[RequestOptions]
|
|
173
|
+
Request-specific configuration.
|
|
174
|
+
|
|
175
|
+
Returns
|
|
176
|
+
-------
|
|
177
|
+
DocDigitizationJobStatusResponse
|
|
178
|
+
Successful Response
|
|
179
|
+
|
|
180
|
+
Examples
|
|
181
|
+
--------
|
|
182
|
+
from sarvamai import SarvamAI
|
|
183
|
+
|
|
184
|
+
client = SarvamAI(
|
|
185
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
186
|
+
)
|
|
187
|
+
client.doc_digitization_job.start(
|
|
188
|
+
job_id="job_id",
|
|
189
|
+
)
|
|
190
|
+
"""
|
|
191
|
+
_response = self._raw_client.start(job_id, request_options=request_options)
|
|
192
|
+
return _response.data
|
|
193
|
+
|
|
194
|
+
def get_status(
|
|
195
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
196
|
+
) -> DocDigitizationJobStatusResponse:
|
|
197
|
+
"""
|
|
198
|
+
Returns the current status of a job with page-level metrics.
|
|
199
|
+
|
|
200
|
+
**Job States:**
|
|
201
|
+
- `Accepted`: Job created, awaiting file upload
|
|
202
|
+
- `Pending`: File uploaded, waiting to start
|
|
203
|
+
- `Running`: Processing in progress
|
|
204
|
+
- `Completed`: All pages processed successfully
|
|
205
|
+
- `PartiallyCompleted`: Some pages succeeded, some failed
|
|
206
|
+
- `Failed`: All pages failed or job-level error
|
|
207
|
+
|
|
208
|
+
**Page Metrics:**
|
|
209
|
+
Response includes detailed progress: total pages, pages processed, succeeded, failed, and per-page errors.
|
|
210
|
+
|
|
211
|
+
Parameters
|
|
212
|
+
----------
|
|
213
|
+
job_id : str
|
|
214
|
+
The unique identifier of the job
|
|
215
|
+
|
|
216
|
+
request_options : typing.Optional[RequestOptions]
|
|
217
|
+
Request-specific configuration.
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
DocDigitizationJobStatusResponse
|
|
222
|
+
Successful Response
|
|
223
|
+
|
|
224
|
+
Examples
|
|
225
|
+
--------
|
|
226
|
+
from sarvamai import SarvamAI
|
|
227
|
+
|
|
228
|
+
client = SarvamAI(
|
|
229
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
230
|
+
)
|
|
231
|
+
client.doc_digitization_job.get_status(
|
|
232
|
+
job_id="job_id",
|
|
233
|
+
)
|
|
234
|
+
"""
|
|
235
|
+
_response = self._raw_client.get_status(job_id, request_options=request_options)
|
|
236
|
+
return _response.data
|
|
237
|
+
|
|
238
|
+
def get_download_links(
|
|
239
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
240
|
+
) -> DocDigitizationDownloadFilesResponse:
|
|
241
|
+
"""
|
|
242
|
+
Returns presigned URLs for downloading output files.
|
|
243
|
+
|
|
244
|
+
**Prerequisites:**
|
|
245
|
+
- Job must be in `Completed` or `PartiallyCompleted` state
|
|
246
|
+
- Failed jobs have no output available
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
job_id : str
|
|
251
|
+
The unique identifier of the job
|
|
252
|
+
|
|
253
|
+
request_options : typing.Optional[RequestOptions]
|
|
254
|
+
Request-specific configuration.
|
|
255
|
+
|
|
256
|
+
Returns
|
|
257
|
+
-------
|
|
258
|
+
DocDigitizationDownloadFilesResponse
|
|
259
|
+
Successful Response
|
|
260
|
+
|
|
261
|
+
Examples
|
|
262
|
+
--------
|
|
263
|
+
from sarvamai import SarvamAI
|
|
264
|
+
|
|
265
|
+
client = SarvamAI(
|
|
266
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
267
|
+
)
|
|
268
|
+
client.doc_digitization_job.get_download_links(
|
|
269
|
+
job_id="job_id",
|
|
270
|
+
)
|
|
271
|
+
"""
|
|
272
|
+
_response = self._raw_client.get_download_links(job_id, request_options=request_options)
|
|
273
|
+
return _response.data
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
class AsyncDocDigitizationJobClient:
|
|
277
|
+
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
278
|
+
self._raw_client = AsyncRawDocDigitizationJobClient(client_wrapper=client_wrapper)
|
|
279
|
+
|
|
280
|
+
@property
|
|
281
|
+
def with_raw_response(self) -> AsyncRawDocDigitizationJobClient:
|
|
282
|
+
"""
|
|
283
|
+
Retrieves a raw implementation of this client that returns raw responses.
|
|
284
|
+
|
|
285
|
+
Returns
|
|
286
|
+
-------
|
|
287
|
+
AsyncRawDocDigitizationJobClient
|
|
288
|
+
"""
|
|
289
|
+
return self._raw_client
|
|
290
|
+
|
|
291
|
+
async def initialise(
|
|
292
|
+
self,
|
|
293
|
+
*,
|
|
294
|
+
job_parameters: typing.Optional[DocDigitizationJobParametersParams] = OMIT,
|
|
295
|
+
callback: typing.Optional[DocDigitizationWebhookCallbackParams] = OMIT,
|
|
296
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
297
|
+
) -> DocDigitizationCreateJobResponse:
|
|
298
|
+
"""
|
|
299
|
+
Creates a new document digitization job.
|
|
300
|
+
|
|
301
|
+
**Supported Languages:**
|
|
302
|
+
- `hi`: Hindi
|
|
303
|
+
- `en`: English
|
|
304
|
+
- `bn`: Bengali
|
|
305
|
+
- `gu`: Gujarati
|
|
306
|
+
- `kn`: Kannada
|
|
307
|
+
- `ml`: Malayalam
|
|
308
|
+
- `mr`: Marathi
|
|
309
|
+
- `or`: Odia
|
|
310
|
+
- `pa`: Punjabi
|
|
311
|
+
- `ta`: Tamil
|
|
312
|
+
- `te`: Telugu
|
|
313
|
+
- `ur`: Urdu
|
|
314
|
+
- `as`: Assamese
|
|
315
|
+
- `bodo`: Bodo
|
|
316
|
+
- `doi`: Dogri
|
|
317
|
+
- `ks`: Kashmiri
|
|
318
|
+
- `kok`: Konkani
|
|
319
|
+
- `mai`: Maithili
|
|
320
|
+
- `mni`: Manipuri
|
|
321
|
+
- `ne`: Nepali
|
|
322
|
+
- `sa`: Sanskrit
|
|
323
|
+
- `sat`: Santali
|
|
324
|
+
- `sd`: Sindhi
|
|
325
|
+
|
|
326
|
+
**Output Formats:**
|
|
327
|
+
- `html`: Structured HTML with layout preservation (default)
|
|
328
|
+
- `md`: Markdown format
|
|
329
|
+
|
|
330
|
+
**Webhook Callback:**
|
|
331
|
+
Optionally provide a callback URL to receive notification when processing completes.
|
|
332
|
+
|
|
333
|
+
Parameters
|
|
334
|
+
----------
|
|
335
|
+
job_parameters : typing.Optional[DocDigitizationJobParametersParams]
|
|
336
|
+
Job configuration parameters. Omit the request body to use defaults.
|
|
337
|
+
|
|
338
|
+
callback : typing.Optional[DocDigitizationWebhookCallbackParams]
|
|
339
|
+
Optional webhook for completion notification
|
|
340
|
+
|
|
341
|
+
request_options : typing.Optional[RequestOptions]
|
|
342
|
+
Request-specific configuration.
|
|
343
|
+
|
|
344
|
+
Returns
|
|
345
|
+
-------
|
|
346
|
+
DocDigitizationCreateJobResponse
|
|
347
|
+
Successful Response
|
|
348
|
+
|
|
349
|
+
Examples
|
|
350
|
+
--------
|
|
351
|
+
import asyncio
|
|
352
|
+
|
|
353
|
+
from sarvamai import AsyncSarvamAI
|
|
354
|
+
|
|
355
|
+
client = AsyncSarvamAI(
|
|
356
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
async def main() -> None:
|
|
361
|
+
await client.doc_digitization_job.initialise()
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
asyncio.run(main())
|
|
365
|
+
"""
|
|
366
|
+
_response = await self._raw_client.initialise(
|
|
367
|
+
job_parameters=job_parameters, callback=callback, request_options=request_options
|
|
368
|
+
)
|
|
369
|
+
return _response.data
|
|
370
|
+
|
|
371
|
+
async def get_upload_links(
|
|
372
|
+
self, *, job_id: str, files: typing.Sequence[str], request_options: typing.Optional[RequestOptions] = None
|
|
373
|
+
) -> DocDigitizationUploadFilesResponse:
|
|
374
|
+
"""
|
|
375
|
+
Returns presigned URLs for uploading input files.
|
|
376
|
+
|
|
377
|
+
**File Constraints:**
|
|
378
|
+
- Exactly one file required (PDF or ZIP)
|
|
379
|
+
- PDF files: `.pdf` extension
|
|
380
|
+
- ZIP files: `.zip` extension
|
|
381
|
+
|
|
382
|
+
Parameters
|
|
383
|
+
----------
|
|
384
|
+
job_id : str
|
|
385
|
+
Job identifier returned from Create Job
|
|
386
|
+
|
|
387
|
+
files : typing.Sequence[str]
|
|
388
|
+
List of filenames to upload (exactly 1 file: PDF or ZIP)
|
|
389
|
+
|
|
390
|
+
request_options : typing.Optional[RequestOptions]
|
|
391
|
+
Request-specific configuration.
|
|
392
|
+
|
|
393
|
+
Returns
|
|
394
|
+
-------
|
|
395
|
+
DocDigitizationUploadFilesResponse
|
|
396
|
+
Successful Response
|
|
397
|
+
|
|
398
|
+
Examples
|
|
399
|
+
--------
|
|
400
|
+
import asyncio
|
|
401
|
+
|
|
402
|
+
from sarvamai import AsyncSarvamAI
|
|
403
|
+
|
|
404
|
+
client = AsyncSarvamAI(
|
|
405
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
async def main() -> None:
|
|
410
|
+
await client.doc_digitization_job.get_upload_links(
|
|
411
|
+
job_id="job_id",
|
|
412
|
+
files=["files"],
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
asyncio.run(main())
|
|
417
|
+
"""
|
|
418
|
+
_response = await self._raw_client.get_upload_links(job_id=job_id, files=files, request_options=request_options)
|
|
419
|
+
return _response.data
|
|
420
|
+
|
|
421
|
+
async def start(
|
|
422
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
423
|
+
) -> DocDigitizationJobStatusResponse:
|
|
424
|
+
"""
|
|
425
|
+
Validates the uploaded file and starts processing.
|
|
426
|
+
|
|
427
|
+
**Validation Checks:**
|
|
428
|
+
- File must be uploaded before starting
|
|
429
|
+
- File size must not exceed 200 MB
|
|
430
|
+
- PDF must be parseable by the PDF parser
|
|
431
|
+
- ZIP must contain only JPEG/PNG images
|
|
432
|
+
- ZIP must be flat (no nested folders beyond one level)
|
|
433
|
+
- ZIP must contain at least one valid image
|
|
434
|
+
- Page/image count must not exceed 500
|
|
435
|
+
- User must have sufficient credits
|
|
436
|
+
|
|
437
|
+
**Processing:**
|
|
438
|
+
Job runs asynchronously. Poll the status endpoint or use webhook callback for completion notification.
|
|
439
|
+
|
|
440
|
+
Parameters
|
|
441
|
+
----------
|
|
442
|
+
job_id : str
|
|
443
|
+
The unique identifier of the job
|
|
444
|
+
|
|
445
|
+
request_options : typing.Optional[RequestOptions]
|
|
446
|
+
Request-specific configuration.
|
|
447
|
+
|
|
448
|
+
Returns
|
|
449
|
+
-------
|
|
450
|
+
DocDigitizationJobStatusResponse
|
|
451
|
+
Successful Response
|
|
452
|
+
|
|
453
|
+
Examples
|
|
454
|
+
--------
|
|
455
|
+
import asyncio
|
|
456
|
+
|
|
457
|
+
from sarvamai import AsyncSarvamAI
|
|
458
|
+
|
|
459
|
+
client = AsyncSarvamAI(
|
|
460
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
async def main() -> None:
|
|
465
|
+
await client.doc_digitization_job.start(
|
|
466
|
+
job_id="job_id",
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
asyncio.run(main())
|
|
471
|
+
"""
|
|
472
|
+
_response = await self._raw_client.start(job_id, request_options=request_options)
|
|
473
|
+
return _response.data
|
|
474
|
+
|
|
475
|
+
async def get_status(
|
|
476
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
477
|
+
) -> DocDigitizationJobStatusResponse:
|
|
478
|
+
"""
|
|
479
|
+
Returns the current status of a job with page-level metrics.
|
|
480
|
+
|
|
481
|
+
**Job States:**
|
|
482
|
+
- `Accepted`: Job created, awaiting file upload
|
|
483
|
+
- `Pending`: File uploaded, waiting to start
|
|
484
|
+
- `Running`: Processing in progress
|
|
485
|
+
- `Completed`: All pages processed successfully
|
|
486
|
+
- `PartiallyCompleted`: Some pages succeeded, some failed
|
|
487
|
+
- `Failed`: All pages failed or job-level error
|
|
488
|
+
|
|
489
|
+
**Page Metrics:**
|
|
490
|
+
Response includes detailed progress: total pages, pages processed, succeeded, failed, and per-page errors.
|
|
491
|
+
|
|
492
|
+
Parameters
|
|
493
|
+
----------
|
|
494
|
+
job_id : str
|
|
495
|
+
The unique identifier of the job
|
|
496
|
+
|
|
497
|
+
request_options : typing.Optional[RequestOptions]
|
|
498
|
+
Request-specific configuration.
|
|
499
|
+
|
|
500
|
+
Returns
|
|
501
|
+
-------
|
|
502
|
+
DocDigitizationJobStatusResponse
|
|
503
|
+
Successful Response
|
|
504
|
+
|
|
505
|
+
Examples
|
|
506
|
+
--------
|
|
507
|
+
import asyncio
|
|
508
|
+
|
|
509
|
+
from sarvamai import AsyncSarvamAI
|
|
510
|
+
|
|
511
|
+
client = AsyncSarvamAI(
|
|
512
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
async def main() -> None:
|
|
517
|
+
await client.doc_digitization_job.get_status(
|
|
518
|
+
job_id="job_id",
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
asyncio.run(main())
|
|
523
|
+
"""
|
|
524
|
+
_response = await self._raw_client.get_status(job_id, request_options=request_options)
|
|
525
|
+
return _response.data
|
|
526
|
+
|
|
527
|
+
async def get_download_links(
|
|
528
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
529
|
+
) -> DocDigitizationDownloadFilesResponse:
|
|
530
|
+
"""
|
|
531
|
+
Returns presigned URLs for downloading output files.
|
|
532
|
+
|
|
533
|
+
**Prerequisites:**
|
|
534
|
+
- Job must be in `Completed` or `PartiallyCompleted` state
|
|
535
|
+
- Failed jobs have no output available
|
|
536
|
+
|
|
537
|
+
Parameters
|
|
538
|
+
----------
|
|
539
|
+
job_id : str
|
|
540
|
+
The unique identifier of the job
|
|
541
|
+
|
|
542
|
+
request_options : typing.Optional[RequestOptions]
|
|
543
|
+
Request-specific configuration.
|
|
544
|
+
|
|
545
|
+
Returns
|
|
546
|
+
-------
|
|
547
|
+
DocDigitizationDownloadFilesResponse
|
|
548
|
+
Successful Response
|
|
549
|
+
|
|
550
|
+
Examples
|
|
551
|
+
--------
|
|
552
|
+
import asyncio
|
|
553
|
+
|
|
554
|
+
from sarvamai import AsyncSarvamAI
|
|
555
|
+
|
|
556
|
+
client = AsyncSarvamAI(
|
|
557
|
+
api_subscription_key="YOUR_API_SUBSCRIPTION_KEY",
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
async def main() -> None:
|
|
562
|
+
await client.doc_digitization_job.get_download_links(
|
|
563
|
+
job_id="job_id",
|
|
564
|
+
)
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
asyncio.run(main())
|
|
568
|
+
"""
|
|
569
|
+
_response = await self._raw_client.get_download_links(job_id, request_options=request_options)
|
|
570
|
+
return _response.data
|