sarvamai 0.1.22a3__py3-none-any.whl → 0.1.22a7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarvamai/__init__.py +62 -9
- sarvamai/client.py +3 -0
- sarvamai/core/client_wrapper.py +2 -2
- sarvamai/doc_digitization_job/__init__.py +4 -0
- sarvamai/doc_digitization_job/client.py +776 -0
- sarvamai/doc_digitization_job/job.py +496 -0
- sarvamai/doc_digitization_job/raw_client.py +1176 -0
- sarvamai/requests/__init__.py +20 -0
- sarvamai/requests/audio_data.py +0 -6
- sarvamai/requests/configure_connection.py +4 -0
- sarvamai/requests/configure_connection_data.py +40 -11
- sarvamai/requests/doc_digitization_create_job_response.py +25 -0
- sarvamai/requests/doc_digitization_download_files_response.py +37 -0
- sarvamai/requests/doc_digitization_error_details.py +21 -0
- sarvamai/requests/doc_digitization_error_message.py +11 -0
- sarvamai/requests/doc_digitization_job_detail.py +64 -0
- sarvamai/requests/doc_digitization_job_parameters.py +21 -0
- sarvamai/requests/doc_digitization_job_status_response.py +65 -0
- sarvamai/requests/doc_digitization_page_error.py +24 -0
- sarvamai/requests/doc_digitization_upload_files_response.py +34 -0
- sarvamai/requests/doc_digitization_webhook_callback.py +19 -0
- sarvamai/requests/speech_to_text_job_parameters.py +43 -2
- sarvamai/requests/speech_to_text_transcription_data.py +0 -6
- sarvamai/requests/speech_to_text_translate_job_parameters.py +4 -1
- sarvamai/requests/speech_to_text_translate_transcription_data.py +0 -6
- sarvamai/speech_to_text/client.py +95 -10
- sarvamai/speech_to_text/raw_client.py +95 -10
- sarvamai/speech_to_text_job/client.py +60 -15
- sarvamai/speech_to_text_job/job.py +100 -2
- sarvamai/speech_to_text_job/raw_client.py +14 -10
- sarvamai/speech_to_text_streaming/__init__.py +4 -2
- sarvamai/speech_to_text_streaming/client.py +100 -47
- sarvamai/speech_to_text_streaming/raw_client.py +100 -47
- sarvamai/speech_to_text_streaming/types/__init__.py +4 -2
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_input_audio_codec.py +1 -27
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_mode.py +7 -0
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_model.py +5 -0
- sarvamai/speech_to_text_translate_job/job.py +100 -2
- sarvamai/speech_to_text_translate_job/raw_client.py +14 -10
- sarvamai/speech_to_text_translate_streaming/__init__.py +0 -2
- sarvamai/speech_to_text_translate_streaming/client.py +18 -41
- sarvamai/speech_to_text_translate_streaming/raw_client.py +18 -41
- sarvamai/speech_to_text_translate_streaming/types/__init__.py +0 -4
- sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_input_audio_codec.py +1 -27
- sarvamai/text/client.py +0 -12
- sarvamai/text/raw_client.py +0 -12
- sarvamai/text_to_speech/client.py +116 -14
- sarvamai/text_to_speech/raw_client.py +116 -14
- sarvamai/text_to_speech_streaming/__init__.py +2 -2
- sarvamai/text_to_speech_streaming/client.py +19 -6
- sarvamai/text_to_speech_streaming/raw_client.py +19 -6
- sarvamai/text_to_speech_streaming/types/__init__.py +2 -1
- sarvamai/text_to_speech_streaming/types/text_to_speech_streaming_model.py +5 -0
- sarvamai/types/__init__.py +34 -4
- sarvamai/types/audio_data.py +0 -6
- sarvamai/types/completion_event_flag.py +3 -1
- sarvamai/types/configure_connection.py +4 -0
- sarvamai/types/configure_connection_data.py +40 -11
- sarvamai/types/configure_connection_data_model.py +5 -0
- sarvamai/types/configure_connection_data_speaker.py +35 -1
- sarvamai/types/doc_digitization_create_job_response.py +37 -0
- sarvamai/types/doc_digitization_download_files_response.py +47 -0
- sarvamai/types/doc_digitization_error_code.py +15 -0
- sarvamai/types/doc_digitization_error_details.py +33 -0
- sarvamai/types/doc_digitization_error_message.py +23 -0
- sarvamai/types/doc_digitization_job_detail.py +74 -0
- sarvamai/types/doc_digitization_job_detail_state.py +7 -0
- sarvamai/types/doc_digitization_job_parameters.py +33 -0
- sarvamai/types/doc_digitization_job_state.py +7 -0
- sarvamai/types/doc_digitization_job_status_response.py +75 -0
- sarvamai/types/doc_digitization_output_format.py +5 -0
- sarvamai/types/doc_digitization_page_error.py +36 -0
- sarvamai/types/doc_digitization_supported_language.py +32 -0
- sarvamai/types/doc_digitization_upload_files_response.py +44 -0
- sarvamai/types/doc_digitization_webhook_callback.py +31 -0
- sarvamai/types/mode.py +5 -0
- sarvamai/types/speech_to_text_job_parameters.py +43 -2
- sarvamai/types/speech_to_text_model.py +1 -1
- sarvamai/types/speech_to_text_transcription_data.py +0 -6
- sarvamai/types/speech_to_text_translate_job_parameters.py +4 -1
- sarvamai/types/speech_to_text_translate_transcription_data.py +0 -6
- sarvamai/types/text_to_speech_model.py +1 -1
- sarvamai/types/text_to_speech_speaker.py +35 -1
- {sarvamai-0.1.22a3.dist-info → sarvamai-0.1.22a7.dist-info}/METADATA +1 -1
- {sarvamai-0.1.22a3.dist-info → sarvamai-0.1.22a7.dist-info}/RECORD +86 -56
- sarvamai/speech_to_text_streaming/types/speech_to_text_streaming_stream_ongoing_speech_results.py +0 -5
- sarvamai/speech_to_text_translate_streaming/types/speech_to_text_translate_streaming_stream_ongoing_speech_results.py +0 -5
- sarvamai/types/audio_data_input_audio_codec.py +0 -33
- sarvamai/types/response_speech_state.py +0 -7
- {sarvamai-0.1.22a3.dist-info → sarvamai-0.1.22a7.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,1176 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
from json.decoder import JSONDecodeError
|
|
5
|
+
|
|
6
|
+
from ..core.api_error import ApiError
|
|
7
|
+
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
8
|
+
from ..core.http_response import AsyncHttpResponse, HttpResponse
|
|
9
|
+
from ..core.jsonable_encoder import jsonable_encoder
|
|
10
|
+
from ..core.pydantic_utilities import parse_obj_as
|
|
11
|
+
from ..core.request_options import RequestOptions
|
|
12
|
+
from ..core.serialization import convert_and_respect_annotation_metadata
|
|
13
|
+
from ..errors.bad_request_error import BadRequestError
|
|
14
|
+
from ..errors.forbidden_error import ForbiddenError
|
|
15
|
+
from ..errors.internal_server_error import InternalServerError
|
|
16
|
+
from ..errors.service_unavailable_error import ServiceUnavailableError
|
|
17
|
+
from ..errors.too_many_requests_error import TooManyRequestsError
|
|
18
|
+
from ..requests.doc_digitization_job_parameters import DocDigitizationJobParametersParams
|
|
19
|
+
from ..requests.doc_digitization_webhook_callback import DocDigitizationWebhookCallbackParams
|
|
20
|
+
from ..types.doc_digitization_create_job_response import DocDigitizationCreateJobResponse
|
|
21
|
+
from ..types.doc_digitization_download_files_response import DocDigitizationDownloadFilesResponse
|
|
22
|
+
from ..types.doc_digitization_job_status_response import DocDigitizationJobStatusResponse
|
|
23
|
+
from ..types.doc_digitization_upload_files_response import DocDigitizationUploadFilesResponse
|
|
24
|
+
|
|
25
|
+
# this is used as the default value for optional parameters
|
|
26
|
+
OMIT = typing.cast(typing.Any, ...)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class RawDocDigitizationJobClient:
|
|
30
|
+
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
|
31
|
+
self._client_wrapper = client_wrapper
|
|
32
|
+
|
|
33
|
+
def initialise(
|
|
34
|
+
self,
|
|
35
|
+
*,
|
|
36
|
+
job_parameters: typing.Optional[DocDigitizationJobParametersParams] = OMIT,
|
|
37
|
+
callback: typing.Optional[DocDigitizationWebhookCallbackParams] = OMIT,
|
|
38
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
39
|
+
) -> HttpResponse[DocDigitizationCreateJobResponse]:
|
|
40
|
+
"""
|
|
41
|
+
Creates a new document digitization job.
|
|
42
|
+
|
|
43
|
+
**Supported Languages:**
|
|
44
|
+
- `hi`: Hindi
|
|
45
|
+
- `en`: English
|
|
46
|
+
- `bn`: Bengali
|
|
47
|
+
- `gu`: Gujarati
|
|
48
|
+
- `kn`: Kannada
|
|
49
|
+
- `ml`: Malayalam
|
|
50
|
+
- `mr`: Marathi
|
|
51
|
+
- `or`: Odia
|
|
52
|
+
- `pa`: Punjabi
|
|
53
|
+
- `ta`: Tamil
|
|
54
|
+
- `te`: Telugu
|
|
55
|
+
- `ur`: Urdu
|
|
56
|
+
- `as`: Assamese
|
|
57
|
+
- `bodo`: Bodo
|
|
58
|
+
- `doi`: Dogri
|
|
59
|
+
- `ks`: Kashmiri
|
|
60
|
+
- `kok`: Konkani
|
|
61
|
+
- `mai`: Maithili
|
|
62
|
+
- `mni`: Manipuri
|
|
63
|
+
- `ne`: Nepali
|
|
64
|
+
- `sa`: Sanskrit
|
|
65
|
+
- `sat`: Santali
|
|
66
|
+
- `sd`: Sindhi
|
|
67
|
+
|
|
68
|
+
**Output Formats:**
|
|
69
|
+
- `html`: Structured HTML with layout preservation (default)
|
|
70
|
+
- `md`: Markdown format
|
|
71
|
+
|
|
72
|
+
**Webhook Callback:**
|
|
73
|
+
Optionally provide a callback URL to receive notification when processing completes.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
job_parameters : typing.Optional[DocDigitizationJobParametersParams]
|
|
78
|
+
Job configuration parameters. Omit the request body to use defaults.
|
|
79
|
+
|
|
80
|
+
callback : typing.Optional[DocDigitizationWebhookCallbackParams]
|
|
81
|
+
Optional webhook for completion notification
|
|
82
|
+
|
|
83
|
+
request_options : typing.Optional[RequestOptions]
|
|
84
|
+
Request-specific configuration.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
HttpResponse[DocDigitizationCreateJobResponse]
|
|
89
|
+
Successful Response
|
|
90
|
+
"""
|
|
91
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
92
|
+
"doc-digitization/job/v1",
|
|
93
|
+
base_url=self._client_wrapper.get_environment().base,
|
|
94
|
+
method="POST",
|
|
95
|
+
json={
|
|
96
|
+
"job_parameters": convert_and_respect_annotation_metadata(
|
|
97
|
+
object_=job_parameters, annotation=DocDigitizationJobParametersParams, direction="write"
|
|
98
|
+
),
|
|
99
|
+
"callback": convert_and_respect_annotation_metadata(
|
|
100
|
+
object_=callback, annotation=DocDigitizationWebhookCallbackParams, direction="write"
|
|
101
|
+
),
|
|
102
|
+
},
|
|
103
|
+
headers={
|
|
104
|
+
"content-type": "application/json",
|
|
105
|
+
},
|
|
106
|
+
request_options=request_options,
|
|
107
|
+
omit=OMIT,
|
|
108
|
+
)
|
|
109
|
+
try:
|
|
110
|
+
if 200 <= _response.status_code < 300:
|
|
111
|
+
_data = typing.cast(
|
|
112
|
+
DocDigitizationCreateJobResponse,
|
|
113
|
+
parse_obj_as(
|
|
114
|
+
type_=DocDigitizationCreateJobResponse, # type: ignore
|
|
115
|
+
object_=_response.json(),
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
return HttpResponse(response=_response, data=_data)
|
|
119
|
+
if _response.status_code == 400:
|
|
120
|
+
raise BadRequestError(
|
|
121
|
+
headers=dict(_response.headers),
|
|
122
|
+
body=typing.cast(
|
|
123
|
+
typing.Optional[typing.Any],
|
|
124
|
+
parse_obj_as(
|
|
125
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
126
|
+
object_=_response.json(),
|
|
127
|
+
),
|
|
128
|
+
),
|
|
129
|
+
)
|
|
130
|
+
if _response.status_code == 403:
|
|
131
|
+
raise ForbiddenError(
|
|
132
|
+
headers=dict(_response.headers),
|
|
133
|
+
body=typing.cast(
|
|
134
|
+
typing.Optional[typing.Any],
|
|
135
|
+
parse_obj_as(
|
|
136
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
137
|
+
object_=_response.json(),
|
|
138
|
+
),
|
|
139
|
+
),
|
|
140
|
+
)
|
|
141
|
+
if _response.status_code == 429:
|
|
142
|
+
raise TooManyRequestsError(
|
|
143
|
+
headers=dict(_response.headers),
|
|
144
|
+
body=typing.cast(
|
|
145
|
+
typing.Optional[typing.Any],
|
|
146
|
+
parse_obj_as(
|
|
147
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
148
|
+
object_=_response.json(),
|
|
149
|
+
),
|
|
150
|
+
),
|
|
151
|
+
)
|
|
152
|
+
if _response.status_code == 500:
|
|
153
|
+
raise InternalServerError(
|
|
154
|
+
headers=dict(_response.headers),
|
|
155
|
+
body=typing.cast(
|
|
156
|
+
typing.Optional[typing.Any],
|
|
157
|
+
parse_obj_as(
|
|
158
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
159
|
+
object_=_response.json(),
|
|
160
|
+
),
|
|
161
|
+
),
|
|
162
|
+
)
|
|
163
|
+
if _response.status_code == 503:
|
|
164
|
+
raise ServiceUnavailableError(
|
|
165
|
+
headers=dict(_response.headers),
|
|
166
|
+
body=typing.cast(
|
|
167
|
+
typing.Optional[typing.Any],
|
|
168
|
+
parse_obj_as(
|
|
169
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
170
|
+
object_=_response.json(),
|
|
171
|
+
),
|
|
172
|
+
),
|
|
173
|
+
)
|
|
174
|
+
_response_json = _response.json()
|
|
175
|
+
except JSONDecodeError:
|
|
176
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
177
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
|
178
|
+
|
|
179
|
+
def get_upload_links(
|
|
180
|
+
self, *, job_id: str, files: typing.Sequence[str], request_options: typing.Optional[RequestOptions] = None
|
|
181
|
+
) -> HttpResponse[DocDigitizationUploadFilesResponse]:
|
|
182
|
+
"""
|
|
183
|
+
Returns presigned URLs for uploading input files.
|
|
184
|
+
|
|
185
|
+
**File Constraints:**
|
|
186
|
+
- Exactly one file required (PDF or ZIP)
|
|
187
|
+
- PDF files: `.pdf` extension
|
|
188
|
+
- ZIP files: `.zip` extension
|
|
189
|
+
|
|
190
|
+
Parameters
|
|
191
|
+
----------
|
|
192
|
+
job_id : str
|
|
193
|
+
Job identifier returned from Create Job
|
|
194
|
+
|
|
195
|
+
files : typing.Sequence[str]
|
|
196
|
+
List of filenames to upload (exactly 1 file: PDF or ZIP)
|
|
197
|
+
|
|
198
|
+
request_options : typing.Optional[RequestOptions]
|
|
199
|
+
Request-specific configuration.
|
|
200
|
+
|
|
201
|
+
Returns
|
|
202
|
+
-------
|
|
203
|
+
HttpResponse[DocDigitizationUploadFilesResponse]
|
|
204
|
+
Successful Response
|
|
205
|
+
"""
|
|
206
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
207
|
+
"doc-digitization/job/v1/upload-files",
|
|
208
|
+
base_url=self._client_wrapper.get_environment().base,
|
|
209
|
+
method="POST",
|
|
210
|
+
json={
|
|
211
|
+
"job_id": job_id,
|
|
212
|
+
"files": files,
|
|
213
|
+
},
|
|
214
|
+
headers={
|
|
215
|
+
"content-type": "application/json",
|
|
216
|
+
},
|
|
217
|
+
request_options=request_options,
|
|
218
|
+
omit=OMIT,
|
|
219
|
+
)
|
|
220
|
+
try:
|
|
221
|
+
if 200 <= _response.status_code < 300:
|
|
222
|
+
_data = typing.cast(
|
|
223
|
+
DocDigitizationUploadFilesResponse,
|
|
224
|
+
parse_obj_as(
|
|
225
|
+
type_=DocDigitizationUploadFilesResponse, # type: ignore
|
|
226
|
+
object_=_response.json(),
|
|
227
|
+
),
|
|
228
|
+
)
|
|
229
|
+
return HttpResponse(response=_response, data=_data)
|
|
230
|
+
if _response.status_code == 400:
|
|
231
|
+
raise BadRequestError(
|
|
232
|
+
headers=dict(_response.headers),
|
|
233
|
+
body=typing.cast(
|
|
234
|
+
typing.Optional[typing.Any],
|
|
235
|
+
parse_obj_as(
|
|
236
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
237
|
+
object_=_response.json(),
|
|
238
|
+
),
|
|
239
|
+
),
|
|
240
|
+
)
|
|
241
|
+
if _response.status_code == 403:
|
|
242
|
+
raise ForbiddenError(
|
|
243
|
+
headers=dict(_response.headers),
|
|
244
|
+
body=typing.cast(
|
|
245
|
+
typing.Optional[typing.Any],
|
|
246
|
+
parse_obj_as(
|
|
247
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
248
|
+
object_=_response.json(),
|
|
249
|
+
),
|
|
250
|
+
),
|
|
251
|
+
)
|
|
252
|
+
if _response.status_code == 429:
|
|
253
|
+
raise TooManyRequestsError(
|
|
254
|
+
headers=dict(_response.headers),
|
|
255
|
+
body=typing.cast(
|
|
256
|
+
typing.Optional[typing.Any],
|
|
257
|
+
parse_obj_as(
|
|
258
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
259
|
+
object_=_response.json(),
|
|
260
|
+
),
|
|
261
|
+
),
|
|
262
|
+
)
|
|
263
|
+
if _response.status_code == 500:
|
|
264
|
+
raise InternalServerError(
|
|
265
|
+
headers=dict(_response.headers),
|
|
266
|
+
body=typing.cast(
|
|
267
|
+
typing.Optional[typing.Any],
|
|
268
|
+
parse_obj_as(
|
|
269
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
270
|
+
object_=_response.json(),
|
|
271
|
+
),
|
|
272
|
+
),
|
|
273
|
+
)
|
|
274
|
+
if _response.status_code == 503:
|
|
275
|
+
raise ServiceUnavailableError(
|
|
276
|
+
headers=dict(_response.headers),
|
|
277
|
+
body=typing.cast(
|
|
278
|
+
typing.Optional[typing.Any],
|
|
279
|
+
parse_obj_as(
|
|
280
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
281
|
+
object_=_response.json(),
|
|
282
|
+
),
|
|
283
|
+
),
|
|
284
|
+
)
|
|
285
|
+
_response_json = _response.json()
|
|
286
|
+
except JSONDecodeError:
|
|
287
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
288
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
|
289
|
+
|
|
290
|
+
def start(
|
|
291
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
292
|
+
) -> HttpResponse[DocDigitizationJobStatusResponse]:
|
|
293
|
+
"""
|
|
294
|
+
Validates the uploaded file and starts processing.
|
|
295
|
+
|
|
296
|
+
**Validation Checks:**
|
|
297
|
+
- File must be uploaded before starting
|
|
298
|
+
- File size must not exceed 200 MB
|
|
299
|
+
- PDF must be parseable by the PDF parser
|
|
300
|
+
- ZIP must contain only JPEG/PNG images
|
|
301
|
+
- ZIP must be flat (no nested folders beyond one level)
|
|
302
|
+
- ZIP must contain at least one valid image
|
|
303
|
+
- Page/image count must not exceed 500
|
|
304
|
+
- User must have sufficient credits
|
|
305
|
+
|
|
306
|
+
**Processing:**
|
|
307
|
+
Job runs asynchronously. Poll the status endpoint or use webhook callback for completion notification.
|
|
308
|
+
|
|
309
|
+
Parameters
|
|
310
|
+
----------
|
|
311
|
+
job_id : str
|
|
312
|
+
The unique identifier of the job
|
|
313
|
+
|
|
314
|
+
request_options : typing.Optional[RequestOptions]
|
|
315
|
+
Request-specific configuration.
|
|
316
|
+
|
|
317
|
+
Returns
|
|
318
|
+
-------
|
|
319
|
+
HttpResponse[DocDigitizationJobStatusResponse]
|
|
320
|
+
Successful Response
|
|
321
|
+
"""
|
|
322
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
323
|
+
f"doc-digitization/job/v1/{jsonable_encoder(job_id)}/start",
|
|
324
|
+
base_url=self._client_wrapper.get_environment().base,
|
|
325
|
+
method="POST",
|
|
326
|
+
request_options=request_options,
|
|
327
|
+
)
|
|
328
|
+
try:
|
|
329
|
+
if 200 <= _response.status_code < 300:
|
|
330
|
+
_data = typing.cast(
|
|
331
|
+
DocDigitizationJobStatusResponse,
|
|
332
|
+
parse_obj_as(
|
|
333
|
+
type_=DocDigitizationJobStatusResponse, # type: ignore
|
|
334
|
+
object_=_response.json(),
|
|
335
|
+
),
|
|
336
|
+
)
|
|
337
|
+
return HttpResponse(response=_response, data=_data)
|
|
338
|
+
if _response.status_code == 400:
|
|
339
|
+
raise BadRequestError(
|
|
340
|
+
headers=dict(_response.headers),
|
|
341
|
+
body=typing.cast(
|
|
342
|
+
typing.Optional[typing.Any],
|
|
343
|
+
parse_obj_as(
|
|
344
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
345
|
+
object_=_response.json(),
|
|
346
|
+
),
|
|
347
|
+
),
|
|
348
|
+
)
|
|
349
|
+
if _response.status_code == 403:
|
|
350
|
+
raise ForbiddenError(
|
|
351
|
+
headers=dict(_response.headers),
|
|
352
|
+
body=typing.cast(
|
|
353
|
+
typing.Optional[typing.Any],
|
|
354
|
+
parse_obj_as(
|
|
355
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
356
|
+
object_=_response.json(),
|
|
357
|
+
),
|
|
358
|
+
),
|
|
359
|
+
)
|
|
360
|
+
if _response.status_code == 429:
|
|
361
|
+
raise TooManyRequestsError(
|
|
362
|
+
headers=dict(_response.headers),
|
|
363
|
+
body=typing.cast(
|
|
364
|
+
typing.Optional[typing.Any],
|
|
365
|
+
parse_obj_as(
|
|
366
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
367
|
+
object_=_response.json(),
|
|
368
|
+
),
|
|
369
|
+
),
|
|
370
|
+
)
|
|
371
|
+
if _response.status_code == 500:
|
|
372
|
+
raise InternalServerError(
|
|
373
|
+
headers=dict(_response.headers),
|
|
374
|
+
body=typing.cast(
|
|
375
|
+
typing.Optional[typing.Any],
|
|
376
|
+
parse_obj_as(
|
|
377
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
378
|
+
object_=_response.json(),
|
|
379
|
+
),
|
|
380
|
+
),
|
|
381
|
+
)
|
|
382
|
+
if _response.status_code == 503:
|
|
383
|
+
raise ServiceUnavailableError(
|
|
384
|
+
headers=dict(_response.headers),
|
|
385
|
+
body=typing.cast(
|
|
386
|
+
typing.Optional[typing.Any],
|
|
387
|
+
parse_obj_as(
|
|
388
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
389
|
+
object_=_response.json(),
|
|
390
|
+
),
|
|
391
|
+
),
|
|
392
|
+
)
|
|
393
|
+
_response_json = _response.json()
|
|
394
|
+
except JSONDecodeError:
|
|
395
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
396
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
|
397
|
+
|
|
398
|
+
def get_status(
|
|
399
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
400
|
+
) -> HttpResponse[DocDigitizationJobStatusResponse]:
|
|
401
|
+
"""
|
|
402
|
+
Returns the current status of a job with page-level metrics.
|
|
403
|
+
|
|
404
|
+
**Job States:**
|
|
405
|
+
- `Accepted`: Job created, awaiting file upload
|
|
406
|
+
- `Pending`: File uploaded, waiting to start
|
|
407
|
+
- `Running`: Processing in progress
|
|
408
|
+
- `Completed`: All pages processed successfully
|
|
409
|
+
- `PartiallyCompleted`: Some pages succeeded, some failed
|
|
410
|
+
- `Failed`: All pages failed or job-level error
|
|
411
|
+
|
|
412
|
+
**Page Metrics:**
|
|
413
|
+
Response includes detailed progress: total pages, pages processed, succeeded, failed, and per-page errors.
|
|
414
|
+
|
|
415
|
+
Parameters
|
|
416
|
+
----------
|
|
417
|
+
job_id : str
|
|
418
|
+
The unique identifier of the job
|
|
419
|
+
|
|
420
|
+
request_options : typing.Optional[RequestOptions]
|
|
421
|
+
Request-specific configuration.
|
|
422
|
+
|
|
423
|
+
Returns
|
|
424
|
+
-------
|
|
425
|
+
HttpResponse[DocDigitizationJobStatusResponse]
|
|
426
|
+
Successful Response
|
|
427
|
+
"""
|
|
428
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
429
|
+
f"doc-digitization/job/v1/{jsonable_encoder(job_id)}/status",
|
|
430
|
+
base_url=self._client_wrapper.get_environment().base,
|
|
431
|
+
method="GET",
|
|
432
|
+
request_options=request_options,
|
|
433
|
+
)
|
|
434
|
+
try:
|
|
435
|
+
if 200 <= _response.status_code < 300:
|
|
436
|
+
_data = typing.cast(
|
|
437
|
+
DocDigitizationJobStatusResponse,
|
|
438
|
+
parse_obj_as(
|
|
439
|
+
type_=DocDigitizationJobStatusResponse, # type: ignore
|
|
440
|
+
object_=_response.json(),
|
|
441
|
+
),
|
|
442
|
+
)
|
|
443
|
+
return HttpResponse(response=_response, data=_data)
|
|
444
|
+
if _response.status_code == 400:
|
|
445
|
+
raise BadRequestError(
|
|
446
|
+
headers=dict(_response.headers),
|
|
447
|
+
body=typing.cast(
|
|
448
|
+
typing.Optional[typing.Any],
|
|
449
|
+
parse_obj_as(
|
|
450
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
451
|
+
object_=_response.json(),
|
|
452
|
+
),
|
|
453
|
+
),
|
|
454
|
+
)
|
|
455
|
+
if _response.status_code == 403:
|
|
456
|
+
raise ForbiddenError(
|
|
457
|
+
headers=dict(_response.headers),
|
|
458
|
+
body=typing.cast(
|
|
459
|
+
typing.Optional[typing.Any],
|
|
460
|
+
parse_obj_as(
|
|
461
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
462
|
+
object_=_response.json(),
|
|
463
|
+
),
|
|
464
|
+
),
|
|
465
|
+
)
|
|
466
|
+
if _response.status_code == 429:
|
|
467
|
+
raise TooManyRequestsError(
|
|
468
|
+
headers=dict(_response.headers),
|
|
469
|
+
body=typing.cast(
|
|
470
|
+
typing.Optional[typing.Any],
|
|
471
|
+
parse_obj_as(
|
|
472
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
473
|
+
object_=_response.json(),
|
|
474
|
+
),
|
|
475
|
+
),
|
|
476
|
+
)
|
|
477
|
+
if _response.status_code == 500:
|
|
478
|
+
raise InternalServerError(
|
|
479
|
+
headers=dict(_response.headers),
|
|
480
|
+
body=typing.cast(
|
|
481
|
+
typing.Optional[typing.Any],
|
|
482
|
+
parse_obj_as(
|
|
483
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
484
|
+
object_=_response.json(),
|
|
485
|
+
),
|
|
486
|
+
),
|
|
487
|
+
)
|
|
488
|
+
if _response.status_code == 503:
|
|
489
|
+
raise ServiceUnavailableError(
|
|
490
|
+
headers=dict(_response.headers),
|
|
491
|
+
body=typing.cast(
|
|
492
|
+
typing.Optional[typing.Any],
|
|
493
|
+
parse_obj_as(
|
|
494
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
495
|
+
object_=_response.json(),
|
|
496
|
+
),
|
|
497
|
+
),
|
|
498
|
+
)
|
|
499
|
+
_response_json = _response.json()
|
|
500
|
+
except JSONDecodeError:
|
|
501
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
502
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
|
503
|
+
|
|
504
|
+
def get_download_links(
|
|
505
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
506
|
+
) -> HttpResponse[DocDigitizationDownloadFilesResponse]:
|
|
507
|
+
"""
|
|
508
|
+
Returns presigned URLs for downloading output files.
|
|
509
|
+
|
|
510
|
+
**Prerequisites:**
|
|
511
|
+
- Job must be in `Completed` or `PartiallyCompleted` state
|
|
512
|
+
- Failed jobs have no output available
|
|
513
|
+
|
|
514
|
+
Parameters
|
|
515
|
+
----------
|
|
516
|
+
job_id : str
|
|
517
|
+
The unique identifier of the job
|
|
518
|
+
|
|
519
|
+
request_options : typing.Optional[RequestOptions]
|
|
520
|
+
Request-specific configuration.
|
|
521
|
+
|
|
522
|
+
Returns
|
|
523
|
+
-------
|
|
524
|
+
HttpResponse[DocDigitizationDownloadFilesResponse]
|
|
525
|
+
Successful Response
|
|
526
|
+
"""
|
|
527
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
528
|
+
f"doc-digitization/job/v1/{jsonable_encoder(job_id)}/download-files",
|
|
529
|
+
base_url=self._client_wrapper.get_environment().base,
|
|
530
|
+
method="POST",
|
|
531
|
+
request_options=request_options,
|
|
532
|
+
)
|
|
533
|
+
try:
|
|
534
|
+
if 200 <= _response.status_code < 300:
|
|
535
|
+
_data = typing.cast(
|
|
536
|
+
DocDigitizationDownloadFilesResponse,
|
|
537
|
+
parse_obj_as(
|
|
538
|
+
type_=DocDigitizationDownloadFilesResponse, # type: ignore
|
|
539
|
+
object_=_response.json(),
|
|
540
|
+
),
|
|
541
|
+
)
|
|
542
|
+
return HttpResponse(response=_response, data=_data)
|
|
543
|
+
if _response.status_code == 400:
|
|
544
|
+
raise BadRequestError(
|
|
545
|
+
headers=dict(_response.headers),
|
|
546
|
+
body=typing.cast(
|
|
547
|
+
typing.Optional[typing.Any],
|
|
548
|
+
parse_obj_as(
|
|
549
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
550
|
+
object_=_response.json(),
|
|
551
|
+
),
|
|
552
|
+
),
|
|
553
|
+
)
|
|
554
|
+
if _response.status_code == 403:
|
|
555
|
+
raise ForbiddenError(
|
|
556
|
+
headers=dict(_response.headers),
|
|
557
|
+
body=typing.cast(
|
|
558
|
+
typing.Optional[typing.Any],
|
|
559
|
+
parse_obj_as(
|
|
560
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
561
|
+
object_=_response.json(),
|
|
562
|
+
),
|
|
563
|
+
),
|
|
564
|
+
)
|
|
565
|
+
if _response.status_code == 429:
|
|
566
|
+
raise TooManyRequestsError(
|
|
567
|
+
headers=dict(_response.headers),
|
|
568
|
+
body=typing.cast(
|
|
569
|
+
typing.Optional[typing.Any],
|
|
570
|
+
parse_obj_as(
|
|
571
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
572
|
+
object_=_response.json(),
|
|
573
|
+
),
|
|
574
|
+
),
|
|
575
|
+
)
|
|
576
|
+
if _response.status_code == 500:
|
|
577
|
+
raise InternalServerError(
|
|
578
|
+
headers=dict(_response.headers),
|
|
579
|
+
body=typing.cast(
|
|
580
|
+
typing.Optional[typing.Any],
|
|
581
|
+
parse_obj_as(
|
|
582
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
583
|
+
object_=_response.json(),
|
|
584
|
+
),
|
|
585
|
+
),
|
|
586
|
+
)
|
|
587
|
+
if _response.status_code == 503:
|
|
588
|
+
raise ServiceUnavailableError(
|
|
589
|
+
headers=dict(_response.headers),
|
|
590
|
+
body=typing.cast(
|
|
591
|
+
typing.Optional[typing.Any],
|
|
592
|
+
parse_obj_as(
|
|
593
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
594
|
+
object_=_response.json(),
|
|
595
|
+
),
|
|
596
|
+
),
|
|
597
|
+
)
|
|
598
|
+
_response_json = _response.json()
|
|
599
|
+
except JSONDecodeError:
|
|
600
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
601
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
class AsyncRawDocDigitizationJobClient:
|
|
605
|
+
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
606
|
+
self._client_wrapper = client_wrapper
|
|
607
|
+
|
|
608
|
+
async def initialise(
|
|
609
|
+
self,
|
|
610
|
+
*,
|
|
611
|
+
job_parameters: typing.Optional[DocDigitizationJobParametersParams] = OMIT,
|
|
612
|
+
callback: typing.Optional[DocDigitizationWebhookCallbackParams] = OMIT,
|
|
613
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
614
|
+
) -> AsyncHttpResponse[DocDigitizationCreateJobResponse]:
|
|
615
|
+
"""
|
|
616
|
+
Creates a new document digitization job.
|
|
617
|
+
|
|
618
|
+
**Supported Languages:**
|
|
619
|
+
- `hi`: Hindi
|
|
620
|
+
- `en`: English
|
|
621
|
+
- `bn`: Bengali
|
|
622
|
+
- `gu`: Gujarati
|
|
623
|
+
- `kn`: Kannada
|
|
624
|
+
- `ml`: Malayalam
|
|
625
|
+
- `mr`: Marathi
|
|
626
|
+
- `or`: Odia
|
|
627
|
+
- `pa`: Punjabi
|
|
628
|
+
- `ta`: Tamil
|
|
629
|
+
- `te`: Telugu
|
|
630
|
+
- `ur`: Urdu
|
|
631
|
+
- `as`: Assamese
|
|
632
|
+
- `bodo`: Bodo
|
|
633
|
+
- `doi`: Dogri
|
|
634
|
+
- `ks`: Kashmiri
|
|
635
|
+
- `kok`: Konkani
|
|
636
|
+
- `mai`: Maithili
|
|
637
|
+
- `mni`: Manipuri
|
|
638
|
+
- `ne`: Nepali
|
|
639
|
+
- `sa`: Sanskrit
|
|
640
|
+
- `sat`: Santali
|
|
641
|
+
- `sd`: Sindhi
|
|
642
|
+
|
|
643
|
+
**Output Formats:**
|
|
644
|
+
- `html`: Structured HTML with layout preservation (default)
|
|
645
|
+
- `md`: Markdown format
|
|
646
|
+
|
|
647
|
+
**Webhook Callback:**
|
|
648
|
+
Optionally provide a callback URL to receive notification when processing completes.
|
|
649
|
+
|
|
650
|
+
Parameters
|
|
651
|
+
----------
|
|
652
|
+
job_parameters : typing.Optional[DocDigitizationJobParametersParams]
|
|
653
|
+
Job configuration parameters. Omit the request body to use defaults.
|
|
654
|
+
|
|
655
|
+
callback : typing.Optional[DocDigitizationWebhookCallbackParams]
|
|
656
|
+
Optional webhook for completion notification
|
|
657
|
+
|
|
658
|
+
request_options : typing.Optional[RequestOptions]
|
|
659
|
+
Request-specific configuration.
|
|
660
|
+
|
|
661
|
+
Returns
|
|
662
|
+
-------
|
|
663
|
+
AsyncHttpResponse[DocDigitizationCreateJobResponse]
|
|
664
|
+
Successful Response
|
|
665
|
+
"""
|
|
666
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
667
|
+
"doc-digitization/job/v1",
|
|
668
|
+
base_url=self._client_wrapper.get_environment().base,
|
|
669
|
+
method="POST",
|
|
670
|
+
json={
|
|
671
|
+
"job_parameters": convert_and_respect_annotation_metadata(
|
|
672
|
+
object_=job_parameters, annotation=DocDigitizationJobParametersParams, direction="write"
|
|
673
|
+
),
|
|
674
|
+
"callback": convert_and_respect_annotation_metadata(
|
|
675
|
+
object_=callback, annotation=DocDigitizationWebhookCallbackParams, direction="write"
|
|
676
|
+
),
|
|
677
|
+
},
|
|
678
|
+
headers={
|
|
679
|
+
"content-type": "application/json",
|
|
680
|
+
},
|
|
681
|
+
request_options=request_options,
|
|
682
|
+
omit=OMIT,
|
|
683
|
+
)
|
|
684
|
+
try:
|
|
685
|
+
if 200 <= _response.status_code < 300:
|
|
686
|
+
_data = typing.cast(
|
|
687
|
+
DocDigitizationCreateJobResponse,
|
|
688
|
+
parse_obj_as(
|
|
689
|
+
type_=DocDigitizationCreateJobResponse, # type: ignore
|
|
690
|
+
object_=_response.json(),
|
|
691
|
+
),
|
|
692
|
+
)
|
|
693
|
+
return AsyncHttpResponse(response=_response, data=_data)
|
|
694
|
+
if _response.status_code == 400:
|
|
695
|
+
raise BadRequestError(
|
|
696
|
+
headers=dict(_response.headers),
|
|
697
|
+
body=typing.cast(
|
|
698
|
+
typing.Optional[typing.Any],
|
|
699
|
+
parse_obj_as(
|
|
700
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
701
|
+
object_=_response.json(),
|
|
702
|
+
),
|
|
703
|
+
),
|
|
704
|
+
)
|
|
705
|
+
if _response.status_code == 403:
|
|
706
|
+
raise ForbiddenError(
|
|
707
|
+
headers=dict(_response.headers),
|
|
708
|
+
body=typing.cast(
|
|
709
|
+
typing.Optional[typing.Any],
|
|
710
|
+
parse_obj_as(
|
|
711
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
712
|
+
object_=_response.json(),
|
|
713
|
+
),
|
|
714
|
+
),
|
|
715
|
+
)
|
|
716
|
+
if _response.status_code == 429:
|
|
717
|
+
raise TooManyRequestsError(
|
|
718
|
+
headers=dict(_response.headers),
|
|
719
|
+
body=typing.cast(
|
|
720
|
+
typing.Optional[typing.Any],
|
|
721
|
+
parse_obj_as(
|
|
722
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
723
|
+
object_=_response.json(),
|
|
724
|
+
),
|
|
725
|
+
),
|
|
726
|
+
)
|
|
727
|
+
if _response.status_code == 500:
|
|
728
|
+
raise InternalServerError(
|
|
729
|
+
headers=dict(_response.headers),
|
|
730
|
+
body=typing.cast(
|
|
731
|
+
typing.Optional[typing.Any],
|
|
732
|
+
parse_obj_as(
|
|
733
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
734
|
+
object_=_response.json(),
|
|
735
|
+
),
|
|
736
|
+
),
|
|
737
|
+
)
|
|
738
|
+
if _response.status_code == 503:
|
|
739
|
+
raise ServiceUnavailableError(
|
|
740
|
+
headers=dict(_response.headers),
|
|
741
|
+
body=typing.cast(
|
|
742
|
+
typing.Optional[typing.Any],
|
|
743
|
+
parse_obj_as(
|
|
744
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
745
|
+
object_=_response.json(),
|
|
746
|
+
),
|
|
747
|
+
),
|
|
748
|
+
)
|
|
749
|
+
_response_json = _response.json()
|
|
750
|
+
except JSONDecodeError:
|
|
751
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
752
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
|
753
|
+
|
|
754
|
+
async def get_upload_links(
|
|
755
|
+
self, *, job_id: str, files: typing.Sequence[str], request_options: typing.Optional[RequestOptions] = None
|
|
756
|
+
) -> AsyncHttpResponse[DocDigitizationUploadFilesResponse]:
|
|
757
|
+
"""
|
|
758
|
+
Returns presigned URLs for uploading input files.
|
|
759
|
+
|
|
760
|
+
**File Constraints:**
|
|
761
|
+
- Exactly one file required (PDF or ZIP)
|
|
762
|
+
- PDF files: `.pdf` extension
|
|
763
|
+
- ZIP files: `.zip` extension
|
|
764
|
+
|
|
765
|
+
Parameters
|
|
766
|
+
----------
|
|
767
|
+
job_id : str
|
|
768
|
+
Job identifier returned from Create Job
|
|
769
|
+
|
|
770
|
+
files : typing.Sequence[str]
|
|
771
|
+
List of filenames to upload (exactly 1 file: PDF or ZIP)
|
|
772
|
+
|
|
773
|
+
request_options : typing.Optional[RequestOptions]
|
|
774
|
+
Request-specific configuration.
|
|
775
|
+
|
|
776
|
+
Returns
|
|
777
|
+
-------
|
|
778
|
+
AsyncHttpResponse[DocDigitizationUploadFilesResponse]
|
|
779
|
+
Successful Response
|
|
780
|
+
"""
|
|
781
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
782
|
+
"doc-digitization/job/v1/upload-files",
|
|
783
|
+
base_url=self._client_wrapper.get_environment().base,
|
|
784
|
+
method="POST",
|
|
785
|
+
json={
|
|
786
|
+
"job_id": job_id,
|
|
787
|
+
"files": files,
|
|
788
|
+
},
|
|
789
|
+
headers={
|
|
790
|
+
"content-type": "application/json",
|
|
791
|
+
},
|
|
792
|
+
request_options=request_options,
|
|
793
|
+
omit=OMIT,
|
|
794
|
+
)
|
|
795
|
+
try:
|
|
796
|
+
if 200 <= _response.status_code < 300:
|
|
797
|
+
_data = typing.cast(
|
|
798
|
+
DocDigitizationUploadFilesResponse,
|
|
799
|
+
parse_obj_as(
|
|
800
|
+
type_=DocDigitizationUploadFilesResponse, # type: ignore
|
|
801
|
+
object_=_response.json(),
|
|
802
|
+
),
|
|
803
|
+
)
|
|
804
|
+
return AsyncHttpResponse(response=_response, data=_data)
|
|
805
|
+
if _response.status_code == 400:
|
|
806
|
+
raise BadRequestError(
|
|
807
|
+
headers=dict(_response.headers),
|
|
808
|
+
body=typing.cast(
|
|
809
|
+
typing.Optional[typing.Any],
|
|
810
|
+
parse_obj_as(
|
|
811
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
812
|
+
object_=_response.json(),
|
|
813
|
+
),
|
|
814
|
+
),
|
|
815
|
+
)
|
|
816
|
+
if _response.status_code == 403:
|
|
817
|
+
raise ForbiddenError(
|
|
818
|
+
headers=dict(_response.headers),
|
|
819
|
+
body=typing.cast(
|
|
820
|
+
typing.Optional[typing.Any],
|
|
821
|
+
parse_obj_as(
|
|
822
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
823
|
+
object_=_response.json(),
|
|
824
|
+
),
|
|
825
|
+
),
|
|
826
|
+
)
|
|
827
|
+
if _response.status_code == 429:
|
|
828
|
+
raise TooManyRequestsError(
|
|
829
|
+
headers=dict(_response.headers),
|
|
830
|
+
body=typing.cast(
|
|
831
|
+
typing.Optional[typing.Any],
|
|
832
|
+
parse_obj_as(
|
|
833
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
834
|
+
object_=_response.json(),
|
|
835
|
+
),
|
|
836
|
+
),
|
|
837
|
+
)
|
|
838
|
+
if _response.status_code == 500:
|
|
839
|
+
raise InternalServerError(
|
|
840
|
+
headers=dict(_response.headers),
|
|
841
|
+
body=typing.cast(
|
|
842
|
+
typing.Optional[typing.Any],
|
|
843
|
+
parse_obj_as(
|
|
844
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
845
|
+
object_=_response.json(),
|
|
846
|
+
),
|
|
847
|
+
),
|
|
848
|
+
)
|
|
849
|
+
if _response.status_code == 503:
|
|
850
|
+
raise ServiceUnavailableError(
|
|
851
|
+
headers=dict(_response.headers),
|
|
852
|
+
body=typing.cast(
|
|
853
|
+
typing.Optional[typing.Any],
|
|
854
|
+
parse_obj_as(
|
|
855
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
856
|
+
object_=_response.json(),
|
|
857
|
+
),
|
|
858
|
+
),
|
|
859
|
+
)
|
|
860
|
+
_response_json = _response.json()
|
|
861
|
+
except JSONDecodeError:
|
|
862
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
863
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
|
864
|
+
|
|
865
|
+
async def start(
|
|
866
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
867
|
+
) -> AsyncHttpResponse[DocDigitizationJobStatusResponse]:
|
|
868
|
+
"""
|
|
869
|
+
Validates the uploaded file and starts processing.
|
|
870
|
+
|
|
871
|
+
**Validation Checks:**
|
|
872
|
+
- File must be uploaded before starting
|
|
873
|
+
- File size must not exceed 200 MB
|
|
874
|
+
- PDF must be parseable by the PDF parser
|
|
875
|
+
- ZIP must contain only JPEG/PNG images
|
|
876
|
+
- ZIP must be flat (no nested folders beyond one level)
|
|
877
|
+
- ZIP must contain at least one valid image
|
|
878
|
+
- Page/image count must not exceed 500
|
|
879
|
+
- User must have sufficient credits
|
|
880
|
+
|
|
881
|
+
**Processing:**
|
|
882
|
+
Job runs asynchronously. Poll the status endpoint or use webhook callback for completion notification.
|
|
883
|
+
|
|
884
|
+
Parameters
|
|
885
|
+
----------
|
|
886
|
+
job_id : str
|
|
887
|
+
The unique identifier of the job
|
|
888
|
+
|
|
889
|
+
request_options : typing.Optional[RequestOptions]
|
|
890
|
+
Request-specific configuration.
|
|
891
|
+
|
|
892
|
+
Returns
|
|
893
|
+
-------
|
|
894
|
+
AsyncHttpResponse[DocDigitizationJobStatusResponse]
|
|
895
|
+
Successful Response
|
|
896
|
+
"""
|
|
897
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
898
|
+
f"doc-digitization/job/v1/{jsonable_encoder(job_id)}/start",
|
|
899
|
+
base_url=self._client_wrapper.get_environment().base,
|
|
900
|
+
method="POST",
|
|
901
|
+
request_options=request_options,
|
|
902
|
+
)
|
|
903
|
+
try:
|
|
904
|
+
if 200 <= _response.status_code < 300:
|
|
905
|
+
_data = typing.cast(
|
|
906
|
+
DocDigitizationJobStatusResponse,
|
|
907
|
+
parse_obj_as(
|
|
908
|
+
type_=DocDigitizationJobStatusResponse, # type: ignore
|
|
909
|
+
object_=_response.json(),
|
|
910
|
+
),
|
|
911
|
+
)
|
|
912
|
+
return AsyncHttpResponse(response=_response, data=_data)
|
|
913
|
+
if _response.status_code == 400:
|
|
914
|
+
raise BadRequestError(
|
|
915
|
+
headers=dict(_response.headers),
|
|
916
|
+
body=typing.cast(
|
|
917
|
+
typing.Optional[typing.Any],
|
|
918
|
+
parse_obj_as(
|
|
919
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
920
|
+
object_=_response.json(),
|
|
921
|
+
),
|
|
922
|
+
),
|
|
923
|
+
)
|
|
924
|
+
if _response.status_code == 403:
|
|
925
|
+
raise ForbiddenError(
|
|
926
|
+
headers=dict(_response.headers),
|
|
927
|
+
body=typing.cast(
|
|
928
|
+
typing.Optional[typing.Any],
|
|
929
|
+
parse_obj_as(
|
|
930
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
931
|
+
object_=_response.json(),
|
|
932
|
+
),
|
|
933
|
+
),
|
|
934
|
+
)
|
|
935
|
+
if _response.status_code == 429:
|
|
936
|
+
raise TooManyRequestsError(
|
|
937
|
+
headers=dict(_response.headers),
|
|
938
|
+
body=typing.cast(
|
|
939
|
+
typing.Optional[typing.Any],
|
|
940
|
+
parse_obj_as(
|
|
941
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
942
|
+
object_=_response.json(),
|
|
943
|
+
),
|
|
944
|
+
),
|
|
945
|
+
)
|
|
946
|
+
if _response.status_code == 500:
|
|
947
|
+
raise InternalServerError(
|
|
948
|
+
headers=dict(_response.headers),
|
|
949
|
+
body=typing.cast(
|
|
950
|
+
typing.Optional[typing.Any],
|
|
951
|
+
parse_obj_as(
|
|
952
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
953
|
+
object_=_response.json(),
|
|
954
|
+
),
|
|
955
|
+
),
|
|
956
|
+
)
|
|
957
|
+
if _response.status_code == 503:
|
|
958
|
+
raise ServiceUnavailableError(
|
|
959
|
+
headers=dict(_response.headers),
|
|
960
|
+
body=typing.cast(
|
|
961
|
+
typing.Optional[typing.Any],
|
|
962
|
+
parse_obj_as(
|
|
963
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
964
|
+
object_=_response.json(),
|
|
965
|
+
),
|
|
966
|
+
),
|
|
967
|
+
)
|
|
968
|
+
_response_json = _response.json()
|
|
969
|
+
except JSONDecodeError:
|
|
970
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
971
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
|
972
|
+
|
|
973
|
+
async def get_status(
|
|
974
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
975
|
+
) -> AsyncHttpResponse[DocDigitizationJobStatusResponse]:
|
|
976
|
+
"""
|
|
977
|
+
Returns the current status of a job with page-level metrics.
|
|
978
|
+
|
|
979
|
+
**Job States:**
|
|
980
|
+
- `Accepted`: Job created, awaiting file upload
|
|
981
|
+
- `Pending`: File uploaded, waiting to start
|
|
982
|
+
- `Running`: Processing in progress
|
|
983
|
+
- `Completed`: All pages processed successfully
|
|
984
|
+
- `PartiallyCompleted`: Some pages succeeded, some failed
|
|
985
|
+
- `Failed`: All pages failed or job-level error
|
|
986
|
+
|
|
987
|
+
**Page Metrics:**
|
|
988
|
+
Response includes detailed progress: total pages, pages processed, succeeded, failed, and per-page errors.
|
|
989
|
+
|
|
990
|
+
Parameters
|
|
991
|
+
----------
|
|
992
|
+
job_id : str
|
|
993
|
+
The unique identifier of the job
|
|
994
|
+
|
|
995
|
+
request_options : typing.Optional[RequestOptions]
|
|
996
|
+
Request-specific configuration.
|
|
997
|
+
|
|
998
|
+
Returns
|
|
999
|
+
-------
|
|
1000
|
+
AsyncHttpResponse[DocDigitizationJobStatusResponse]
|
|
1001
|
+
Successful Response
|
|
1002
|
+
"""
|
|
1003
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1004
|
+
f"doc-digitization/job/v1/{jsonable_encoder(job_id)}/status",
|
|
1005
|
+
base_url=self._client_wrapper.get_environment().base,
|
|
1006
|
+
method="GET",
|
|
1007
|
+
request_options=request_options,
|
|
1008
|
+
)
|
|
1009
|
+
try:
|
|
1010
|
+
if 200 <= _response.status_code < 300:
|
|
1011
|
+
_data = typing.cast(
|
|
1012
|
+
DocDigitizationJobStatusResponse,
|
|
1013
|
+
parse_obj_as(
|
|
1014
|
+
type_=DocDigitizationJobStatusResponse, # type: ignore
|
|
1015
|
+
object_=_response.json(),
|
|
1016
|
+
),
|
|
1017
|
+
)
|
|
1018
|
+
return AsyncHttpResponse(response=_response, data=_data)
|
|
1019
|
+
if _response.status_code == 400:
|
|
1020
|
+
raise BadRequestError(
|
|
1021
|
+
headers=dict(_response.headers),
|
|
1022
|
+
body=typing.cast(
|
|
1023
|
+
typing.Optional[typing.Any],
|
|
1024
|
+
parse_obj_as(
|
|
1025
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
1026
|
+
object_=_response.json(),
|
|
1027
|
+
),
|
|
1028
|
+
),
|
|
1029
|
+
)
|
|
1030
|
+
if _response.status_code == 403:
|
|
1031
|
+
raise ForbiddenError(
|
|
1032
|
+
headers=dict(_response.headers),
|
|
1033
|
+
body=typing.cast(
|
|
1034
|
+
typing.Optional[typing.Any],
|
|
1035
|
+
parse_obj_as(
|
|
1036
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
1037
|
+
object_=_response.json(),
|
|
1038
|
+
),
|
|
1039
|
+
),
|
|
1040
|
+
)
|
|
1041
|
+
if _response.status_code == 429:
|
|
1042
|
+
raise TooManyRequestsError(
|
|
1043
|
+
headers=dict(_response.headers),
|
|
1044
|
+
body=typing.cast(
|
|
1045
|
+
typing.Optional[typing.Any],
|
|
1046
|
+
parse_obj_as(
|
|
1047
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
1048
|
+
object_=_response.json(),
|
|
1049
|
+
),
|
|
1050
|
+
),
|
|
1051
|
+
)
|
|
1052
|
+
if _response.status_code == 500:
|
|
1053
|
+
raise InternalServerError(
|
|
1054
|
+
headers=dict(_response.headers),
|
|
1055
|
+
body=typing.cast(
|
|
1056
|
+
typing.Optional[typing.Any],
|
|
1057
|
+
parse_obj_as(
|
|
1058
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
1059
|
+
object_=_response.json(),
|
|
1060
|
+
),
|
|
1061
|
+
),
|
|
1062
|
+
)
|
|
1063
|
+
if _response.status_code == 503:
|
|
1064
|
+
raise ServiceUnavailableError(
|
|
1065
|
+
headers=dict(_response.headers),
|
|
1066
|
+
body=typing.cast(
|
|
1067
|
+
typing.Optional[typing.Any],
|
|
1068
|
+
parse_obj_as(
|
|
1069
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
1070
|
+
object_=_response.json(),
|
|
1071
|
+
),
|
|
1072
|
+
),
|
|
1073
|
+
)
|
|
1074
|
+
_response_json = _response.json()
|
|
1075
|
+
except JSONDecodeError:
|
|
1076
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
1077
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
|
1078
|
+
|
|
1079
|
+
async def get_download_links(
|
|
1080
|
+
self, job_id: str, *, request_options: typing.Optional[RequestOptions] = None
|
|
1081
|
+
) -> AsyncHttpResponse[DocDigitizationDownloadFilesResponse]:
|
|
1082
|
+
"""
|
|
1083
|
+
Returns presigned URLs for downloading output files.
|
|
1084
|
+
|
|
1085
|
+
**Prerequisites:**
|
|
1086
|
+
- Job must be in `Completed` or `PartiallyCompleted` state
|
|
1087
|
+
- Failed jobs have no output available
|
|
1088
|
+
|
|
1089
|
+
Parameters
|
|
1090
|
+
----------
|
|
1091
|
+
job_id : str
|
|
1092
|
+
The unique identifier of the job
|
|
1093
|
+
|
|
1094
|
+
request_options : typing.Optional[RequestOptions]
|
|
1095
|
+
Request-specific configuration.
|
|
1096
|
+
|
|
1097
|
+
Returns
|
|
1098
|
+
-------
|
|
1099
|
+
AsyncHttpResponse[DocDigitizationDownloadFilesResponse]
|
|
1100
|
+
Successful Response
|
|
1101
|
+
"""
|
|
1102
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1103
|
+
f"doc-digitization/job/v1/{jsonable_encoder(job_id)}/download-files",
|
|
1104
|
+
base_url=self._client_wrapper.get_environment().base,
|
|
1105
|
+
method="POST",
|
|
1106
|
+
request_options=request_options,
|
|
1107
|
+
)
|
|
1108
|
+
try:
|
|
1109
|
+
if 200 <= _response.status_code < 300:
|
|
1110
|
+
_data = typing.cast(
|
|
1111
|
+
DocDigitizationDownloadFilesResponse,
|
|
1112
|
+
parse_obj_as(
|
|
1113
|
+
type_=DocDigitizationDownloadFilesResponse, # type: ignore
|
|
1114
|
+
object_=_response.json(),
|
|
1115
|
+
),
|
|
1116
|
+
)
|
|
1117
|
+
return AsyncHttpResponse(response=_response, data=_data)
|
|
1118
|
+
if _response.status_code == 400:
|
|
1119
|
+
raise BadRequestError(
|
|
1120
|
+
headers=dict(_response.headers),
|
|
1121
|
+
body=typing.cast(
|
|
1122
|
+
typing.Optional[typing.Any],
|
|
1123
|
+
parse_obj_as(
|
|
1124
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
1125
|
+
object_=_response.json(),
|
|
1126
|
+
),
|
|
1127
|
+
),
|
|
1128
|
+
)
|
|
1129
|
+
if _response.status_code == 403:
|
|
1130
|
+
raise ForbiddenError(
|
|
1131
|
+
headers=dict(_response.headers),
|
|
1132
|
+
body=typing.cast(
|
|
1133
|
+
typing.Optional[typing.Any],
|
|
1134
|
+
parse_obj_as(
|
|
1135
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
1136
|
+
object_=_response.json(),
|
|
1137
|
+
),
|
|
1138
|
+
),
|
|
1139
|
+
)
|
|
1140
|
+
if _response.status_code == 429:
|
|
1141
|
+
raise TooManyRequestsError(
|
|
1142
|
+
headers=dict(_response.headers),
|
|
1143
|
+
body=typing.cast(
|
|
1144
|
+
typing.Optional[typing.Any],
|
|
1145
|
+
parse_obj_as(
|
|
1146
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
1147
|
+
object_=_response.json(),
|
|
1148
|
+
),
|
|
1149
|
+
),
|
|
1150
|
+
)
|
|
1151
|
+
if _response.status_code == 500:
|
|
1152
|
+
raise InternalServerError(
|
|
1153
|
+
headers=dict(_response.headers),
|
|
1154
|
+
body=typing.cast(
|
|
1155
|
+
typing.Optional[typing.Any],
|
|
1156
|
+
parse_obj_as(
|
|
1157
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
1158
|
+
object_=_response.json(),
|
|
1159
|
+
),
|
|
1160
|
+
),
|
|
1161
|
+
)
|
|
1162
|
+
if _response.status_code == 503:
|
|
1163
|
+
raise ServiceUnavailableError(
|
|
1164
|
+
headers=dict(_response.headers),
|
|
1165
|
+
body=typing.cast(
|
|
1166
|
+
typing.Optional[typing.Any],
|
|
1167
|
+
parse_obj_as(
|
|
1168
|
+
type_=typing.Optional[typing.Any], # type: ignore
|
|
1169
|
+
object_=_response.json(),
|
|
1170
|
+
),
|
|
1171
|
+
),
|
|
1172
|
+
)
|
|
1173
|
+
_response_json = _response.json()
|
|
1174
|
+
except JSONDecodeError:
|
|
1175
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
1176
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|