retab 0.0.37__py3-none-any.whl → 0.0.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab/__init__.py +2 -2
- retab/_resource.py +5 -5
- retab/_utils/_model_cards/anthropic.yaml +59 -0
- retab/_utils/_model_cards/auto.yaml +43 -0
- retab/_utils/_model_cards/gemini.yaml +117 -0
- retab/_utils/_model_cards/openai.yaml +301 -0
- retab/_utils/_model_cards/xai.yaml +28 -0
- retab/_utils/ai_models.py +109 -71
- retab/_utils/chat.py +20 -20
- retab/_utils/responses.py +14 -14
- retab/_utils/usage/usage.py +5 -4
- retab/client.py +22 -22
- retab/resources/consensus/client.py +2 -2
- retab/resources/consensus/completions.py +26 -26
- retab/resources/consensus/completions_stream.py +27 -27
- retab/resources/consensus/responses.py +11 -11
- retab/resources/consensus/responses_stream.py +15 -15
- retab/resources/documents/client.py +297 -16
- retab/resources/documents/extractions.py +39 -39
- retab/resources/evaluations/documents.py +5 -5
- retab/resources/evaluations/iterations.py +7 -7
- retab/resources/jsonlUtils.py +7 -7
- retab/resources/processors/automations/endpoints.py +2 -2
- retab/resources/processors/automations/links.py +2 -2
- retab/resources/processors/automations/logs.py +2 -2
- retab/resources/processors/automations/mailboxes.py +2 -2
- retab/resources/processors/automations/outlook.py +2 -2
- retab/resources/processors/client.py +9 -9
- retab/resources/usage.py +4 -4
- retab/types/ai_models.py +41 -513
- retab/types/automations/mailboxes.py +1 -1
- retab/types/automations/webhooks.py +3 -3
- retab/types/chat.py +1 -1
- retab/types/completions.py +10 -10
- retab/types/documents/__init__.py +3 -0
- retab/types/documents/create_messages.py +2 -2
- retab/types/documents/extractions.py +19 -19
- retab/types/documents/parse.py +32 -0
- retab/types/extractions.py +4 -4
- retab/types/logs.py +2 -2
- retab/types/schemas/object.py +3 -3
- {retab-0.0.37.dist-info → retab-0.0.39.dist-info}/METADATA +72 -72
- {retab-0.0.37.dist-info → retab-0.0.39.dist-info}/RECORD +45 -39
- {retab-0.0.37.dist-info → retab-0.0.39.dist-info}/WHEEL +0 -0
- {retab-0.0.37.dist-info → retab-0.0.39.dist-info}/top_level.txt +0 -0
@@ -1,22 +1,39 @@
|
|
1
1
|
from io import IOBase
|
2
2
|
from pathlib import Path
|
3
|
-
from typing import Any
|
3
|
+
from typing import Any, Literal
|
4
4
|
|
5
5
|
import PIL.Image
|
6
6
|
from pydantic import HttpUrl
|
7
7
|
from pydantic_core import PydanticUndefined
|
8
|
+
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
8
9
|
|
9
10
|
from ..._resource import AsyncAPIResource, SyncAPIResource
|
10
|
-
from ..._utils.json_schema import load_json_schema
|
11
|
+
from ..._utils.json_schema import load_json_schema, filter_auxiliary_fields_json
|
11
12
|
from ..._utils.mime import convert_mime_data_to_pil_image, prepare_mime_document
|
13
|
+
from ..._utils.ai_models import assert_valid_model_extraction
|
12
14
|
from ...types.documents.create_messages import DocumentCreateInputRequest, DocumentCreateMessageRequest, DocumentMessage
|
15
|
+
from ...types.documents.extractions import DocumentExtractRequest, RetabParsedChatCompletion
|
16
|
+
from ...types.documents.parse import ParseRequest, ParseResult, TableParsingFormat
|
13
17
|
from ...types.browser_canvas import BrowserCanvas
|
14
18
|
from ...types.mime import MIMEData
|
15
19
|
from ...types.modalities import Modality
|
20
|
+
from ...types.schemas.object import Schema
|
16
21
|
from ...types.standards import PreparedRequest
|
17
22
|
from .extractions import AsyncExtractions, Extractions
|
18
23
|
|
19
24
|
|
25
|
+
def maybe_parse_to_pydantic(schema: Schema, response: RetabParsedChatCompletion, allow_partial: bool = False) -> RetabParsedChatCompletion:
|
26
|
+
if response.choices[0].message.content:
|
27
|
+
try:
|
28
|
+
if allow_partial:
|
29
|
+
response.choices[0].message.parsed = schema._partial_pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
|
30
|
+
else:
|
31
|
+
response.choices[0].message.parsed = schema.pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
|
32
|
+
except Exception:
|
33
|
+
pass
|
34
|
+
return response
|
35
|
+
|
36
|
+
|
20
37
|
class BaseDocumentsMixin:
|
21
38
|
def _prepare_create_messages(
|
22
39
|
self,
|
@@ -69,17 +86,37 @@ class BaseDocumentsMixin:
|
|
69
86
|
data={"document": mime_document.model_dump()},
|
70
87
|
)
|
71
88
|
|
89
|
+
def _prepare_parse(
|
90
|
+
self,
|
91
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
92
|
+
fast_mode: bool = False,
|
93
|
+
table_parsing_format: TableParsingFormat = "html",
|
94
|
+
image_resolution_dpi: int = 72,
|
95
|
+
browser_canvas: BrowserCanvas = "A4",
|
96
|
+
idempotency_key: str | None = None,
|
97
|
+
) -> PreparedRequest:
|
98
|
+
mime_document = prepare_mime_document(document)
|
99
|
+
|
100
|
+
parse_request = ParseRequest(
|
101
|
+
document=mime_document,
|
102
|
+
fast_mode=fast_mode,
|
103
|
+
table_parsing_format=table_parsing_format,
|
104
|
+
image_resolution_dpi=image_resolution_dpi,
|
105
|
+
browser_canvas=browser_canvas,
|
106
|
+
)
|
107
|
+
return PreparedRequest(method="POST", url="/v1/documents/parse", data=parse_request.model_dump(), idempotency_key=idempotency_key)
|
108
|
+
|
72
109
|
|
73
110
|
class Documents(SyncAPIResource, BaseDocumentsMixin):
|
74
111
|
"""Documents API wrapper"""
|
75
112
|
|
76
113
|
def __init__(self, client: Any) -> None:
|
77
114
|
super().__init__(client=client)
|
78
|
-
self.
|
115
|
+
# self.extractions_api = Extractions(client=client)
|
79
116
|
# self.batch = Batch(client=client)
|
80
117
|
|
81
118
|
def correct_image_orientation(self, document: Path | str | IOBase | MIMEData | PIL.Image.Image) -> PIL.Image.Image:
|
82
|
-
"""Corrects the orientation of an image using the
|
119
|
+
"""Corrects the orientation of an image using the Retab API.
|
83
120
|
|
84
121
|
This method takes an image in various formats and returns a PIL Image with corrected orientation.
|
85
122
|
Useful for handling images from mobile devices or cameras that may have incorrect EXIF orientation.
|
@@ -96,7 +133,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
96
133
|
|
97
134
|
Raises:
|
98
135
|
ValueError: If the input is not a valid image
|
99
|
-
|
136
|
+
RetabAPIError: If the API request fails
|
100
137
|
"""
|
101
138
|
request = self._prepare_correct_image_orientation(document)
|
102
139
|
response = self._client._prepared_request(request)
|
@@ -112,7 +149,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
112
149
|
idempotency_key: str | None = None,
|
113
150
|
) -> DocumentMessage:
|
114
151
|
"""
|
115
|
-
Create document messages from a file using the
|
152
|
+
Create document messages from a file using the Retab API.
|
116
153
|
|
117
154
|
Args:
|
118
155
|
document: The document to process. Can be a file path (Path or str) or a file-like object.
|
@@ -124,7 +161,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
124
161
|
DocumentMessage: The processed document message containing extracted content.
|
125
162
|
|
126
163
|
Raises:
|
127
|
-
|
164
|
+
RetabAPIError: If the API request fails.
|
128
165
|
"""
|
129
166
|
request = self._prepare_create_messages(
|
130
167
|
document=document, modality=modality, image_resolution_dpi=image_resolution_dpi, browser_canvas=browser_canvas, idempotency_key=idempotency_key
|
@@ -142,7 +179,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
142
179
|
idempotency_key: str | None = None,
|
143
180
|
) -> DocumentMessage:
|
144
181
|
"""
|
145
|
-
Create document inputs (messages with schema) from a file using the
|
182
|
+
Create document inputs (messages with schema) from a file using the Retab API.
|
146
183
|
|
147
184
|
Args:
|
148
185
|
document: The document to process. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
@@ -155,7 +192,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
155
192
|
DocumentMessage: The processed document message containing extracted content with schema context.
|
156
193
|
|
157
194
|
Raises:
|
158
|
-
|
195
|
+
RetabAPIError: If the API request fails.
|
159
196
|
"""
|
160
197
|
request = self._prepare_create_inputs(
|
161
198
|
document=document,
|
@@ -168,13 +205,135 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
168
205
|
response = self._client._prepared_request(request)
|
169
206
|
return DocumentMessage.model_validate(response)
|
170
207
|
|
208
|
+
def extract(
|
209
|
+
self,
|
210
|
+
json_schema: dict[str, Any] | Path | str,
|
211
|
+
model: str,
|
212
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
213
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
214
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
215
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
216
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
217
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
218
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
219
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
220
|
+
idempotency_key: str | None = None,
|
221
|
+
store: bool = False,
|
222
|
+
) -> RetabParsedChatCompletion:
|
223
|
+
"""
|
224
|
+
Process one or more documents using the Retab API for structured data extraction.
|
225
|
+
|
226
|
+
This method provides a direct interface to document extraction functionality,
|
227
|
+
intended to replace the current `.extractions.parse()` pattern.
|
228
|
+
|
229
|
+
Args:
|
230
|
+
json_schema: JSON schema defining the expected data structure
|
231
|
+
model: The AI model to use for processing
|
232
|
+
document: Single document to process (use either this or documents, not both)
|
233
|
+
documents: List of documents to process (use either this or document, not both)
|
234
|
+
image_resolution_dpi: Optional image resolution DPI
|
235
|
+
browser_canvas: Optional browser canvas size
|
236
|
+
temperature: Model temperature setting (0-1)
|
237
|
+
modality: Modality of the document (e.g., native)
|
238
|
+
reasoning_effort: The effort level for the model to reason about the input data
|
239
|
+
n_consensus: Number of consensus extractions to perform
|
240
|
+
idempotency_key: Idempotency key for request
|
241
|
+
store: Whether to store the document in the Retab database
|
242
|
+
|
243
|
+
Returns:
|
244
|
+
RetabParsedChatCompletion: Parsed response from the API
|
245
|
+
|
246
|
+
Raises:
|
247
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
248
|
+
HTTPException: If the request fails
|
249
|
+
"""
|
250
|
+
assert_valid_model_extraction(model)
|
251
|
+
|
252
|
+
json_schema = load_json_schema(json_schema)
|
253
|
+
|
254
|
+
# Handle both single document and multiple documents
|
255
|
+
if document is not None and documents is not None:
|
256
|
+
raise ValueError("Cannot provide both 'document' and 'documents' parameters. Use either one.")
|
257
|
+
|
258
|
+
# Convert single document to documents list for consistency
|
259
|
+
if document is not None:
|
260
|
+
processed_documents = [prepare_mime_document(document)]
|
261
|
+
elif documents is not None:
|
262
|
+
processed_documents = [prepare_mime_document(doc) for doc in documents]
|
263
|
+
else:
|
264
|
+
raise ValueError("Must provide either 'document' or 'documents' parameter.")
|
265
|
+
|
266
|
+
# Validate DocumentAPIRequest data (raises exception if invalid)
|
267
|
+
request = DocumentExtractRequest(
|
268
|
+
json_schema=json_schema,
|
269
|
+
documents=processed_documents,
|
270
|
+
model=model,
|
271
|
+
temperature=temperature,
|
272
|
+
stream=False,
|
273
|
+
modality=modality,
|
274
|
+
store=store,
|
275
|
+
reasoning_effort=reasoning_effort,
|
276
|
+
n_consensus=n_consensus,
|
277
|
+
image_resolution_dpi=image_resolution_dpi,
|
278
|
+
browser_canvas=browser_canvas,
|
279
|
+
)
|
280
|
+
|
281
|
+
prepared_request = PreparedRequest(
|
282
|
+
method="POST", url="/v1/documents/extract", data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), idempotency_key=idempotency_key
|
283
|
+
)
|
284
|
+
|
285
|
+
response = self._client._prepared_request(prepared_request)
|
286
|
+
|
287
|
+
schema = Schema(json_schema=load_json_schema(json_schema))
|
288
|
+
return maybe_parse_to_pydantic(schema, RetabParsedChatCompletion.model_validate(response))
|
289
|
+
|
290
|
+
def parse(
|
291
|
+
self,
|
292
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
293
|
+
fast_mode: bool = False,
|
294
|
+
table_parsing_format: TableParsingFormat = "html",
|
295
|
+
image_resolution_dpi: int = 72,
|
296
|
+
browser_canvas: BrowserCanvas = "A4",
|
297
|
+
idempotency_key: str | None = None,
|
298
|
+
) -> ParseResult:
|
299
|
+
"""
|
300
|
+
Parse a document and extract text content from each page.
|
301
|
+
|
302
|
+
This method processes various document types and returns structured text content
|
303
|
+
along with usage information. Supports different parsing modes and formats.
|
304
|
+
|
305
|
+
Args:
|
306
|
+
document: The document to parse. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
307
|
+
fast_mode: Use fast mode for parsing (may reduce quality). Defaults to False.
|
308
|
+
table_parsing_format: Format for parsing tables. Options: "html", "json", "yaml", "markdown". Defaults to "html".
|
309
|
+
image_resolution_dpi: DPI for image processing. Defaults to 72.
|
310
|
+
browser_canvas: Canvas size for document rendering. Defaults to "A4".
|
311
|
+
idempotency_key: Optional idempotency key for the request.
|
312
|
+
|
313
|
+
Returns:
|
314
|
+
ParseResult: Parsed response containing document metadata, usage information, and page text content.
|
315
|
+
|
316
|
+
Raises:
|
317
|
+
HTTPException: If the request fails.
|
318
|
+
"""
|
319
|
+
request = self._prepare_parse(
|
320
|
+
document=document,
|
321
|
+
fast_mode=fast_mode,
|
322
|
+
table_parsing_format=table_parsing_format,
|
323
|
+
image_resolution_dpi=image_resolution_dpi,
|
324
|
+
browser_canvas=browser_canvas,
|
325
|
+
idempotency_key=idempotency_key,
|
326
|
+
)
|
327
|
+
response = self._client._prepared_request(request)
|
328
|
+
return ParseResult.model_validate(response)
|
329
|
+
|
171
330
|
|
172
331
|
class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
173
332
|
"""Documents API wrapper for asynchronous usage."""
|
174
333
|
|
175
334
|
def __init__(self, client: Any) -> None:
|
176
335
|
super().__init__(client=client)
|
177
|
-
self.
|
336
|
+
# self.extractions_api = AsyncExtractions(client=client)
|
178
337
|
|
179
338
|
async def create_messages(
|
180
339
|
self,
|
@@ -185,7 +344,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
185
344
|
idempotency_key: str | None = None,
|
186
345
|
) -> DocumentMessage:
|
187
346
|
"""
|
188
|
-
Create document messages from a file using the
|
347
|
+
Create document messages from a file using the Retab API asynchronously.
|
189
348
|
|
190
349
|
Args:
|
191
350
|
document: The document to process. Can be a file path (Path or str) or a file-like object.
|
@@ -195,7 +354,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
195
354
|
DocumentMessage: The processed document message containing extracted content.
|
196
355
|
|
197
356
|
Raises:
|
198
|
-
|
357
|
+
RetabAPIError: If the API request fails.
|
199
358
|
"""
|
200
359
|
request = self._prepare_create_messages(
|
201
360
|
document=document,
|
@@ -217,7 +376,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
217
376
|
idempotency_key: str | None = None,
|
218
377
|
) -> DocumentMessage:
|
219
378
|
"""
|
220
|
-
Create document inputs (messages with schema) from a file using the
|
379
|
+
Create document inputs (messages with schema) from a file using the Retab API asynchronously.
|
221
380
|
|
222
381
|
Args:
|
223
382
|
document: The document to process. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
@@ -230,7 +389,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
230
389
|
DocumentMessage: The processed document message containing extracted content with schema context.
|
231
390
|
|
232
391
|
Raises:
|
233
|
-
|
392
|
+
RetabAPIError: If the API request fails.
|
234
393
|
"""
|
235
394
|
request = self._prepare_create_inputs(
|
236
395
|
document=document,
|
@@ -244,7 +403,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
244
403
|
return DocumentMessage.model_validate(response)
|
245
404
|
|
246
405
|
async def correct_image_orientation(self, document: Path | str | IOBase | MIMEData | PIL.Image.Image) -> PIL.Image.Image:
|
247
|
-
"""Corrects the orientation of an image using the
|
406
|
+
"""Corrects the orientation of an image using the Retab API asynchronously.
|
248
407
|
|
249
408
|
This method takes an image in various formats and returns a PIL Image with corrected orientation.
|
250
409
|
Useful for handling images from mobile devices or cameras that may have incorrect EXIF orientation.
|
@@ -261,9 +420,131 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
261
420
|
|
262
421
|
Raises:
|
263
422
|
ValueError: If the input is not a valid image
|
264
|
-
|
423
|
+
RetabAPIError: If the API request fails
|
265
424
|
"""
|
266
425
|
request = self._prepare_correct_image_orientation(document)
|
267
426
|
response = await self._client._prepared_request(request)
|
268
427
|
mime_response = MIMEData.model_validate(response["document"])
|
269
428
|
return convert_mime_data_to_pil_image(mime_response)
|
429
|
+
|
430
|
+
async def extract(
|
431
|
+
self,
|
432
|
+
json_schema: dict[str, Any] | Path | str,
|
433
|
+
model: str,
|
434
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
435
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
436
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
437
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
438
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
439
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
440
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
441
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
442
|
+
idempotency_key: str | None = None,
|
443
|
+
store: bool = False,
|
444
|
+
) -> RetabParsedChatCompletion:
|
445
|
+
"""
|
446
|
+
Process one or more documents using the Retab API for structured data extraction asynchronously.
|
447
|
+
|
448
|
+
This method provides a direct interface to document extraction functionality,
|
449
|
+
intended to replace the current `.extractions.parse()` pattern.
|
450
|
+
|
451
|
+
Args:
|
452
|
+
json_schema: JSON schema defining the expected data structure
|
453
|
+
model: The AI model to use for processing
|
454
|
+
document: Single document to process (use either this or documents, not both)
|
455
|
+
documents: List of documents to process (use either this or document, not both)
|
456
|
+
image_resolution_dpi: Optional image resolution DPI
|
457
|
+
browser_canvas: Optional browser canvas size
|
458
|
+
temperature: Model temperature setting (0-1)
|
459
|
+
modality: Modality of the document (e.g., native)
|
460
|
+
reasoning_effort: The effort level for the model to reason about the input data
|
461
|
+
n_consensus: Number of consensus extractions to perform
|
462
|
+
idempotency_key: Idempotency key for request
|
463
|
+
store: Whether to store the document in the Retab database
|
464
|
+
|
465
|
+
Returns:
|
466
|
+
RetabParsedChatCompletion: Parsed response from the API
|
467
|
+
|
468
|
+
Raises:
|
469
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
470
|
+
HTTPException: If the request fails
|
471
|
+
"""
|
472
|
+
assert_valid_model_extraction(model)
|
473
|
+
|
474
|
+
json_schema = load_json_schema(json_schema)
|
475
|
+
|
476
|
+
# Handle both single document and multiple documents
|
477
|
+
if document is not None and documents is not None:
|
478
|
+
raise ValueError("Cannot provide both 'document' and 'documents' parameters. Use either one.")
|
479
|
+
|
480
|
+
# Convert single document to documents list for consistency
|
481
|
+
if document is not None:
|
482
|
+
processed_documents = [prepare_mime_document(document)]
|
483
|
+
elif documents is not None:
|
484
|
+
processed_documents = [prepare_mime_document(doc) for doc in documents]
|
485
|
+
else:
|
486
|
+
raise ValueError("Must provide either 'document' or 'documents' parameter.")
|
487
|
+
|
488
|
+
# Validate DocumentAPIRequest data (raises exception if invalid)
|
489
|
+
request = DocumentExtractRequest(
|
490
|
+
json_schema=json_schema,
|
491
|
+
documents=processed_documents,
|
492
|
+
model=model,
|
493
|
+
temperature=temperature,
|
494
|
+
stream=False,
|
495
|
+
modality=modality,
|
496
|
+
store=store,
|
497
|
+
reasoning_effort=reasoning_effort,
|
498
|
+
n_consensus=n_consensus,
|
499
|
+
image_resolution_dpi=image_resolution_dpi,
|
500
|
+
browser_canvas=browser_canvas,
|
501
|
+
)
|
502
|
+
|
503
|
+
prepared_request = PreparedRequest(
|
504
|
+
method="POST", url="/v1/documents/extract", data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), idempotency_key=idempotency_key
|
505
|
+
)
|
506
|
+
|
507
|
+
response = await self._client._prepared_request(prepared_request)
|
508
|
+
|
509
|
+
schema = Schema(json_schema=load_json_schema(json_schema))
|
510
|
+
return maybe_parse_to_pydantic(schema, RetabParsedChatCompletion.model_validate(response))
|
511
|
+
|
512
|
+
async def parse(
|
513
|
+
self,
|
514
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
515
|
+
fast_mode: bool = False,
|
516
|
+
table_parsing_format: TableParsingFormat = "html",
|
517
|
+
image_resolution_dpi: int = 72,
|
518
|
+
browser_canvas: BrowserCanvas = "A4",
|
519
|
+
idempotency_key: str | None = None,
|
520
|
+
) -> ParseResult:
|
521
|
+
"""
|
522
|
+
Parse a document and extract text content from each page asynchronously.
|
523
|
+
|
524
|
+
This method processes various document types and returns structured text content
|
525
|
+
along with usage information. Supports different parsing modes and formats.
|
526
|
+
|
527
|
+
Args:
|
528
|
+
document: The document to parse. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
529
|
+
fast_mode: Use fast mode for parsing (may reduce quality). Defaults to False.
|
530
|
+
table_parsing_format: Format for parsing tables. Options: "html", "json", "yaml", "markdown". Defaults to "html".
|
531
|
+
image_resolution_dpi: DPI for image processing. Defaults to 72.
|
532
|
+
browser_canvas: Canvas size for document rendering. Defaults to "A4".
|
533
|
+
idempotency_key: Optional idempotency key for the request.
|
534
|
+
|
535
|
+
Returns:
|
536
|
+
ParseResult: Parsed response containing document metadata, usage information, and page text content.
|
537
|
+
|
538
|
+
Raises:
|
539
|
+
HTTPException: If the request fails.
|
540
|
+
"""
|
541
|
+
request = self._prepare_parse(
|
542
|
+
document=document,
|
543
|
+
fast_mode=fast_mode,
|
544
|
+
table_parsing_format=table_parsing_format,
|
545
|
+
image_resolution_dpi=image_resolution_dpi,
|
546
|
+
browser_canvas=browser_canvas,
|
547
|
+
idempotency_key=idempotency_key,
|
548
|
+
)
|
549
|
+
response = await self._client._prepared_request(request)
|
550
|
+
return ParseResult.model_validate(response)
|