retab 0.0.36__py3-none-any.whl → 0.0.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab/__init__.py +4 -0
- {uiform → retab}/_resource.py +5 -5
- {uiform → retab}/_utils/ai_models.py +2 -2
- {uiform → retab}/_utils/benchmarking.py +15 -16
- {uiform → retab}/_utils/chat.py +29 -34
- {uiform → retab}/_utils/display.py +0 -3
- {uiform → retab}/_utils/json_schema.py +9 -14
- {uiform → retab}/_utils/mime.py +11 -14
- {uiform → retab}/_utils/responses.py +16 -10
- {uiform → retab}/_utils/stream_context_managers.py +1 -1
- {uiform → retab}/_utils/usage/usage.py +31 -31
- {uiform → retab}/client.py +54 -53
- {uiform → retab}/resources/consensus/client.py +19 -38
- {uiform → retab}/resources/consensus/completions.py +36 -59
- {uiform → retab}/resources/consensus/completions_stream.py +35 -47
- {uiform → retab}/resources/consensus/responses.py +37 -86
- {uiform → retab}/resources/consensus/responses_stream.py +41 -89
- retab/resources/documents/client.py +455 -0
- {uiform → retab}/resources/documents/extractions.py +192 -101
- {uiform → retab}/resources/evals.py +56 -43
- retab/resources/evaluations/__init__.py +3 -0
- retab/resources/evaluations/client.py +301 -0
- retab/resources/evaluations/documents.py +233 -0
- retab/resources/evaluations/iterations.py +452 -0
- {uiform → retab}/resources/files.py +2 -2
- {uiform → retab}/resources/jsonlUtils.py +225 -221
- retab/resources/models.py +73 -0
- retab/resources/processors/automations/client.py +244 -0
- {uiform → retab}/resources/processors/automations/endpoints.py +79 -120
- retab/resources/processors/automations/links.py +294 -0
- {uiform → retab}/resources/processors/automations/logs.py +30 -19
- retab/resources/processors/automations/mailboxes.py +397 -0
- retab/resources/processors/automations/outlook.py +337 -0
- {uiform → retab}/resources/processors/automations/tests.py +22 -25
- {uiform → retab}/resources/processors/client.py +181 -166
- {uiform → retab}/resources/schemas.py +78 -66
- {uiform → retab}/resources/secrets/external_api_keys.py +1 -5
- retab/resources/secrets/webhook.py +64 -0
- {uiform → retab}/resources/usage.py +41 -4
- {uiform → retab}/types/ai_models.py +17 -17
- {uiform → retab}/types/automations/cron.py +19 -12
- {uiform → retab}/types/automations/endpoints.py +7 -4
- {uiform → retab}/types/automations/links.py +7 -3
- {uiform → retab}/types/automations/mailboxes.py +10 -10
- {uiform → retab}/types/automations/outlook.py +15 -11
- {uiform → retab}/types/automations/webhooks.py +1 -1
- retab/types/browser_canvas.py +3 -0
- retab/types/chat.py +8 -0
- {uiform → retab}/types/completions.py +12 -15
- retab/types/consensus.py +19 -0
- {uiform → retab}/types/db/annotations.py +3 -3
- {uiform → retab}/types/db/files.py +8 -6
- {uiform → retab}/types/documents/create_messages.py +20 -22
- {uiform → retab}/types/documents/extractions.py +71 -26
- {uiform → retab}/types/evals.py +5 -5
- retab/types/evaluations/__init__.py +31 -0
- retab/types/evaluations/documents.py +30 -0
- retab/types/evaluations/iterations.py +112 -0
- retab/types/evaluations/model.py +73 -0
- retab/types/events.py +79 -0
- {uiform → retab}/types/extractions.py +36 -13
- retab/types/inference_settings.py +15 -0
- retab/types/jobs/base.py +54 -0
- retab/types/jobs/batch_annotation.py +12 -0
- {uiform → retab}/types/jobs/evaluation.py +1 -2
- {uiform → retab}/types/logs.py +37 -34
- retab/types/metrics.py +32 -0
- {uiform → retab}/types/mime.py +22 -20
- {uiform → retab}/types/modalities.py +10 -10
- retab/types/predictions.py +19 -0
- {uiform → retab}/types/schemas/enhance.py +4 -2
- {uiform → retab}/types/schemas/evaluate.py +7 -4
- {uiform → retab}/types/schemas/generate.py +6 -3
- {uiform → retab}/types/schemas/layout.py +1 -1
- {uiform → retab}/types/schemas/object.py +16 -17
- {uiform → retab}/types/schemas/templates.py +1 -3
- {uiform → retab}/types/secrets/external_api_keys.py +0 -1
- {uiform → retab}/types/standards.py +18 -1
- {retab-0.0.36.dist-info → retab-0.0.38.dist-info}/METADATA +78 -77
- retab-0.0.38.dist-info/RECORD +107 -0
- retab-0.0.38.dist-info/top_level.txt +1 -0
- retab-0.0.36.dist-info/RECORD +0 -96
- retab-0.0.36.dist-info/top_level.txt +0 -1
- uiform/__init__.py +0 -4
- uiform/_utils/benchmarking copy.py +0 -588
- uiform/resources/documents/client.py +0 -255
- uiform/resources/models.py +0 -45
- uiform/resources/processors/automations/client.py +0 -78
- uiform/resources/processors/automations/links.py +0 -356
- uiform/resources/processors/automations/mailboxes.py +0 -435
- uiform/resources/processors/automations/outlook.py +0 -444
- uiform/resources/secrets/webhook.py +0 -62
- uiform/types/chat.py +0 -8
- uiform/types/consensus.py +0 -10
- uiform/types/events.py +0 -76
- uiform/types/jobs/base.py +0 -150
- uiform/types/jobs/batch_annotation.py +0 -22
- {uiform → retab}/_utils/__init__.py +0 -0
- {uiform → retab}/_utils/usage/__init__.py +0 -0
- {uiform → retab}/py.typed +0 -0
- {uiform → retab}/resources/__init__.py +0 -0
- {uiform → retab}/resources/consensus/__init__.py +0 -0
- {uiform → retab}/resources/documents/__init__.py +0 -0
- {uiform → retab}/resources/finetuning.py +0 -0
- {uiform → retab}/resources/openai_example.py +0 -0
- {uiform → retab}/resources/processors/__init__.py +0 -0
- {uiform → retab}/resources/processors/automations/__init__.py +0 -0
- {uiform → retab}/resources/prompt_optimization.py +0 -0
- {uiform → retab}/resources/secrets/__init__.py +0 -0
- {uiform → retab}/resources/secrets/client.py +0 -0
- {uiform → retab}/types/__init__.py +0 -0
- {uiform → retab}/types/automations/__init__.py +0 -0
- {uiform → retab}/types/db/__init__.py +0 -0
- {uiform → retab}/types/documents/__init__.py +0 -0
- {uiform → retab}/types/documents/correct_orientation.py +0 -0
- {uiform → retab}/types/jobs/__init__.py +0 -0
- {uiform → retab}/types/jobs/finetune.py +0 -0
- {uiform → retab}/types/jobs/prompt_optimization.py +0 -0
- {uiform → retab}/types/jobs/webcrawl.py +0 -0
- {uiform → retab}/types/pagination.py +0 -0
- {uiform → retab}/types/schemas/__init__.py +0 -0
- {uiform → retab}/types/secrets/__init__.py +0 -0
- {retab-0.0.36.dist-info → retab-0.0.38.dist-info}/WHEEL +0 -0
@@ -0,0 +1,455 @@
|
|
1
|
+
from io import IOBase
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
import PIL.Image
|
6
|
+
from pydantic import HttpUrl
|
7
|
+
from pydantic_core import PydanticUndefined
|
8
|
+
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
9
|
+
|
10
|
+
from ..._resource import AsyncAPIResource, SyncAPIResource
|
11
|
+
from ..._utils.json_schema import load_json_schema, filter_auxiliary_fields_json
|
12
|
+
from ..._utils.mime import convert_mime_data_to_pil_image, prepare_mime_document
|
13
|
+
from ..._utils.ai_models import assert_valid_model_extraction
|
14
|
+
from ...types.documents.create_messages import DocumentCreateInputRequest, DocumentCreateMessageRequest, DocumentMessage
|
15
|
+
from ...types.documents.extractions import DocumentExtractRequest, UiParsedChatCompletion
|
16
|
+
from ...types.browser_canvas import BrowserCanvas
|
17
|
+
from ...types.mime import MIMEData
|
18
|
+
from ...types.modalities import Modality
|
19
|
+
from ...types.schemas.object import Schema
|
20
|
+
from ...types.standards import PreparedRequest
|
21
|
+
from .extractions import AsyncExtractions, Extractions
|
22
|
+
|
23
|
+
|
24
|
+
def maybe_parse_to_pydantic(schema: Schema, response: UiParsedChatCompletion, allow_partial: bool = False) -> UiParsedChatCompletion:
|
25
|
+
if response.choices[0].message.content:
|
26
|
+
try:
|
27
|
+
if allow_partial:
|
28
|
+
response.choices[0].message.parsed = schema._partial_pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
|
29
|
+
else:
|
30
|
+
response.choices[0].message.parsed = schema.pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
|
31
|
+
except Exception:
|
32
|
+
pass
|
33
|
+
return response
|
34
|
+
|
35
|
+
|
36
|
+
class BaseDocumentsMixin:
|
37
|
+
def _prepare_create_messages(
|
38
|
+
self,
|
39
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
40
|
+
modality: Modality = "native",
|
41
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
42
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
43
|
+
idempotency_key: str | None = None,
|
44
|
+
) -> PreparedRequest:
|
45
|
+
mime_document = prepare_mime_document(document)
|
46
|
+
|
47
|
+
loading_request = DocumentCreateMessageRequest(
|
48
|
+
document=mime_document,
|
49
|
+
modality=modality,
|
50
|
+
image_resolution_dpi=image_resolution_dpi,
|
51
|
+
browser_canvas=browser_canvas,
|
52
|
+
)
|
53
|
+
return PreparedRequest(method="POST", url="/v1/documents/create_messages", data=loading_request.model_dump(), idempotency_key=idempotency_key)
|
54
|
+
|
55
|
+
def _prepare_create_inputs(
|
56
|
+
self,
|
57
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
58
|
+
json_schema: dict[str, Any] | Path | str,
|
59
|
+
modality: Modality = "native",
|
60
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment],
|
61
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment],
|
62
|
+
idempotency_key: str | None = None,
|
63
|
+
) -> PreparedRequest:
|
64
|
+
mime_document = prepare_mime_document(document)
|
65
|
+
loaded_schema = load_json_schema(json_schema)
|
66
|
+
|
67
|
+
loading_request = DocumentCreateInputRequest(
|
68
|
+
document=mime_document,
|
69
|
+
modality=modality,
|
70
|
+
json_schema=loaded_schema,
|
71
|
+
image_resolution_dpi=image_resolution_dpi,
|
72
|
+
browser_canvas=browser_canvas,
|
73
|
+
)
|
74
|
+
return PreparedRequest(method="POST", url="/v1/documents/create_inputs", data=loading_request.model_dump(), idempotency_key=idempotency_key)
|
75
|
+
|
76
|
+
def _prepare_correct_image_orientation(self, document: Path | str | IOBase | MIMEData | PIL.Image.Image) -> PreparedRequest:
|
77
|
+
mime_document = prepare_mime_document(document)
|
78
|
+
|
79
|
+
if not mime_document.mime_type.startswith("image/"):
|
80
|
+
raise ValueError("Image is not a valid image")
|
81
|
+
|
82
|
+
return PreparedRequest(
|
83
|
+
method="POST",
|
84
|
+
url="/v1/documents/correct_image_orientation",
|
85
|
+
data={"document": mime_document.model_dump()},
|
86
|
+
)
|
87
|
+
|
88
|
+
|
89
|
+
class Documents(SyncAPIResource, BaseDocumentsMixin):
|
90
|
+
"""Documents API wrapper"""
|
91
|
+
|
92
|
+
def __init__(self, client: Any) -> None:
|
93
|
+
super().__init__(client=client)
|
94
|
+
#self.extractions_api = Extractions(client=client)
|
95
|
+
# self.batch = Batch(client=client)
|
96
|
+
|
97
|
+
def correct_image_orientation(self, document: Path | str | IOBase | MIMEData | PIL.Image.Image) -> PIL.Image.Image:
|
98
|
+
"""Corrects the orientation of an image using the Retab API.
|
99
|
+
|
100
|
+
This method takes an image in various formats and returns a PIL Image with corrected orientation.
|
101
|
+
Useful for handling images from mobile devices or cameras that may have incorrect EXIF orientation.
|
102
|
+
|
103
|
+
Args:
|
104
|
+
image: The input image to correct. Can be:
|
105
|
+
- A file path (Path or str)
|
106
|
+
- A file-like object (IOBase)
|
107
|
+
- A MIMEData object
|
108
|
+
- A PIL Image object
|
109
|
+
|
110
|
+
Returns:
|
111
|
+
PIL.Image.Image: The orientation-corrected image as a PIL Image object
|
112
|
+
|
113
|
+
Raises:
|
114
|
+
ValueError: If the input is not a valid image
|
115
|
+
RetabAPIError: If the API request fails
|
116
|
+
"""
|
117
|
+
request = self._prepare_correct_image_orientation(document)
|
118
|
+
response = self._client._prepared_request(request)
|
119
|
+
mime_response = MIMEData.model_validate(response["document"])
|
120
|
+
return convert_mime_data_to_pil_image(mime_response)
|
121
|
+
|
122
|
+
def create_messages(
|
123
|
+
self,
|
124
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
125
|
+
modality: Modality = "native",
|
126
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
127
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
128
|
+
idempotency_key: str | None = None,
|
129
|
+
) -> DocumentMessage:
|
130
|
+
"""
|
131
|
+
Create document messages from a file using the Retab API.
|
132
|
+
|
133
|
+
Args:
|
134
|
+
document: The document to process. Can be a file path (Path or str) or a file-like object.
|
135
|
+
modality: The processing modality to use. Defaults to "native".
|
136
|
+
image_resolution_dpi: Optional image resolution DPI.
|
137
|
+
browser_canvas: Optional browser canvas size.
|
138
|
+
idempotency_key: Optional idempotency key for the request
|
139
|
+
Returns:
|
140
|
+
DocumentMessage: The processed document message containing extracted content.
|
141
|
+
|
142
|
+
Raises:
|
143
|
+
RetabAPIError: If the API request fails.
|
144
|
+
"""
|
145
|
+
request = self._prepare_create_messages(
|
146
|
+
document=document, modality=modality, image_resolution_dpi=image_resolution_dpi, browser_canvas=browser_canvas, idempotency_key=idempotency_key
|
147
|
+
)
|
148
|
+
response = self._client._prepared_request(request)
|
149
|
+
return DocumentMessage.model_validate(response)
|
150
|
+
|
151
|
+
def create_inputs(
|
152
|
+
self,
|
153
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
154
|
+
json_schema: dict[str, Any] | Path | str,
|
155
|
+
modality: Modality = "native",
|
156
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
157
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
158
|
+
idempotency_key: str | None = None,
|
159
|
+
) -> DocumentMessage:
|
160
|
+
"""
|
161
|
+
Create document inputs (messages with schema) from a file using the Retab API.
|
162
|
+
|
163
|
+
Args:
|
164
|
+
document: The document to process. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
165
|
+
json_schema: The JSON schema to use for structuring the document content.
|
166
|
+
modality: The processing modality to use. Defaults to "native".
|
167
|
+
image_resolution_dpi: Optional image resolution DPI.
|
168
|
+
browser_canvas: Optional browser canvas size.
|
169
|
+
idempotency_key: Optional idempotency key for the request
|
170
|
+
Returns:
|
171
|
+
DocumentMessage: The processed document message containing extracted content with schema context.
|
172
|
+
|
173
|
+
Raises:
|
174
|
+
RetabAPIError: If the API request fails.
|
175
|
+
"""
|
176
|
+
request = self._prepare_create_inputs(
|
177
|
+
document=document,
|
178
|
+
json_schema=json_schema,
|
179
|
+
modality=modality,
|
180
|
+
image_resolution_dpi=image_resolution_dpi,
|
181
|
+
browser_canvas=browser_canvas,
|
182
|
+
idempotency_key=idempotency_key,
|
183
|
+
)
|
184
|
+
response = self._client._prepared_request(request)
|
185
|
+
return DocumentMessage.model_validate(response)
|
186
|
+
|
187
|
+
def extract(
|
188
|
+
self,
|
189
|
+
json_schema: dict[str, Any] | Path | str,
|
190
|
+
model: str,
|
191
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
192
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
193
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
194
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
195
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
196
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
197
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
198
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
199
|
+
idempotency_key: str | None = None,
|
200
|
+
store: bool = False,
|
201
|
+
) -> UiParsedChatCompletion:
|
202
|
+
"""
|
203
|
+
Process one or more documents using the Retab API for structured data extraction.
|
204
|
+
|
205
|
+
This method provides a direct interface to document extraction functionality,
|
206
|
+
intended to replace the current `.extractions.parse()` pattern.
|
207
|
+
|
208
|
+
Args:
|
209
|
+
json_schema: JSON schema defining the expected data structure
|
210
|
+
model: The AI model to use for processing
|
211
|
+
document: Single document to process (use either this or documents, not both)
|
212
|
+
documents: List of documents to process (use either this or document, not both)
|
213
|
+
image_resolution_dpi: Optional image resolution DPI
|
214
|
+
browser_canvas: Optional browser canvas size
|
215
|
+
temperature: Model temperature setting (0-1)
|
216
|
+
modality: Modality of the document (e.g., native)
|
217
|
+
reasoning_effort: The effort level for the model to reason about the input data
|
218
|
+
n_consensus: Number of consensus extractions to perform
|
219
|
+
idempotency_key: Idempotency key for request
|
220
|
+
store: Whether to store the document in the Retab database
|
221
|
+
|
222
|
+
Returns:
|
223
|
+
UiParsedChatCompletion: Parsed response from the API
|
224
|
+
|
225
|
+
Raises:
|
226
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
227
|
+
HTTPException: If the request fails
|
228
|
+
"""
|
229
|
+
assert_valid_model_extraction(model)
|
230
|
+
|
231
|
+
json_schema = load_json_schema(json_schema)
|
232
|
+
|
233
|
+
# Handle both single document and multiple documents
|
234
|
+
if document is not None and documents is not None:
|
235
|
+
raise ValueError("Cannot provide both 'document' and 'documents' parameters. Use either one.")
|
236
|
+
|
237
|
+
# Convert single document to documents list for consistency
|
238
|
+
if document is not None:
|
239
|
+
processed_documents = [prepare_mime_document(document)]
|
240
|
+
elif documents is not None:
|
241
|
+
processed_documents = [prepare_mime_document(doc) for doc in documents]
|
242
|
+
else:
|
243
|
+
raise ValueError("Must provide either 'document' or 'documents' parameter.")
|
244
|
+
|
245
|
+
# Validate DocumentAPIRequest data (raises exception if invalid)
|
246
|
+
request = DocumentExtractRequest(
|
247
|
+
json_schema=json_schema,
|
248
|
+
documents=processed_documents,
|
249
|
+
model=model,
|
250
|
+
temperature=temperature,
|
251
|
+
stream=False,
|
252
|
+
modality=modality,
|
253
|
+
store=store,
|
254
|
+
reasoning_effort=reasoning_effort,
|
255
|
+
n_consensus=n_consensus,
|
256
|
+
image_resolution_dpi=image_resolution_dpi,
|
257
|
+
browser_canvas=browser_canvas,
|
258
|
+
)
|
259
|
+
|
260
|
+
prepared_request = PreparedRequest(
|
261
|
+
method="POST",
|
262
|
+
url="/v1/documents/extract",
|
263
|
+
data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True),
|
264
|
+
idempotency_key=idempotency_key
|
265
|
+
)
|
266
|
+
|
267
|
+
response = self._client._prepared_request(prepared_request)
|
268
|
+
|
269
|
+
schema = Schema(json_schema=load_json_schema(json_schema))
|
270
|
+
return maybe_parse_to_pydantic(schema, UiParsedChatCompletion.model_validate(response))
|
271
|
+
|
272
|
+
|
273
|
+
class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
274
|
+
"""Documents API wrapper for asynchronous usage."""
|
275
|
+
|
276
|
+
def __init__(self, client: Any) -> None:
|
277
|
+
super().__init__(client=client)
|
278
|
+
#self.extractions_api = AsyncExtractions(client=client)
|
279
|
+
|
280
|
+
async def create_messages(
|
281
|
+
self,
|
282
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image,
|
283
|
+
modality: Modality = "native",
|
284
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
285
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
286
|
+
idempotency_key: str | None = None,
|
287
|
+
) -> DocumentMessage:
|
288
|
+
"""
|
289
|
+
Create document messages from a file using the Retab API asynchronously.
|
290
|
+
|
291
|
+
Args:
|
292
|
+
document: The document to process. Can be a file path (Path or str) or a file-like object.
|
293
|
+
modality: The processing modality to use. Defaults to "native".
|
294
|
+
idempotency_key: Idempotency key for request
|
295
|
+
Returns:
|
296
|
+
DocumentMessage: The processed document message containing extracted content.
|
297
|
+
|
298
|
+
Raises:
|
299
|
+
RetabAPIError: If the API request fails.
|
300
|
+
"""
|
301
|
+
request = self._prepare_create_messages(
|
302
|
+
document=document,
|
303
|
+
modality=modality,
|
304
|
+
image_resolution_dpi=image_resolution_dpi,
|
305
|
+
browser_canvas=browser_canvas,
|
306
|
+
idempotency_key=idempotency_key,
|
307
|
+
)
|
308
|
+
response = await self._client._prepared_request(request)
|
309
|
+
return DocumentMessage.model_validate(response)
|
310
|
+
|
311
|
+
async def create_inputs(
|
312
|
+
self,
|
313
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
314
|
+
json_schema: dict[str, Any] | Path | str,
|
315
|
+
modality: Modality = "native",
|
316
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
317
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
318
|
+
idempotency_key: str | None = None,
|
319
|
+
) -> DocumentMessage:
|
320
|
+
"""
|
321
|
+
Create document inputs (messages with schema) from a file using the Retab API asynchronously.
|
322
|
+
|
323
|
+
Args:
|
324
|
+
document: The document to process. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
325
|
+
json_schema: The JSON schema to use for structuring the document content.
|
326
|
+
modality: The processing modality to use. Defaults to "native".
|
327
|
+
image_resolution_dpi: Optional image resolution DPI.
|
328
|
+
browser_canvas: Optional browser canvas size.
|
329
|
+
idempotency_key: Idempotency key for request
|
330
|
+
Returns:
|
331
|
+
DocumentMessage: The processed document message containing extracted content with schema context.
|
332
|
+
|
333
|
+
Raises:
|
334
|
+
RetabAPIError: If the API request fails.
|
335
|
+
"""
|
336
|
+
request = self._prepare_create_inputs(
|
337
|
+
document=document,
|
338
|
+
json_schema=json_schema,
|
339
|
+
modality=modality,
|
340
|
+
image_resolution_dpi=image_resolution_dpi,
|
341
|
+
browser_canvas=browser_canvas,
|
342
|
+
idempotency_key=idempotency_key,
|
343
|
+
)
|
344
|
+
response = await self._client._prepared_request(request)
|
345
|
+
return DocumentMessage.model_validate(response)
|
346
|
+
|
347
|
+
async def correct_image_orientation(self, document: Path | str | IOBase | MIMEData | PIL.Image.Image) -> PIL.Image.Image:
|
348
|
+
"""Corrects the orientation of an image using the Retab API asynchronously.
|
349
|
+
|
350
|
+
This method takes an image in various formats and returns a PIL Image with corrected orientation.
|
351
|
+
Useful for handling images from mobile devices or cameras that may have incorrect EXIF orientation.
|
352
|
+
|
353
|
+
Args:
|
354
|
+
image: The input image to correct. Can be:
|
355
|
+
- A file path (Path or str)
|
356
|
+
- A file-like object (IOBase)
|
357
|
+
- A MIMEData object
|
358
|
+
- A PIL Image object
|
359
|
+
|
360
|
+
Returns:
|
361
|
+
PIL.Image.Image: The orientation-corrected image as a PIL Image object
|
362
|
+
|
363
|
+
Raises:
|
364
|
+
ValueError: If the input is not a valid image
|
365
|
+
RetabAPIError: If the API request fails
|
366
|
+
"""
|
367
|
+
request = self._prepare_correct_image_orientation(document)
|
368
|
+
response = await self._client._prepared_request(request)
|
369
|
+
mime_response = MIMEData.model_validate(response["document"])
|
370
|
+
return convert_mime_data_to_pil_image(mime_response)
|
371
|
+
|
372
|
+
async def extract(
|
373
|
+
self,
|
374
|
+
json_schema: dict[str, Any] | Path | str,
|
375
|
+
model: str,
|
376
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
377
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
378
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
379
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
380
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
381
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
382
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
383
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
384
|
+
idempotency_key: str | None = None,
|
385
|
+
store: bool = False,
|
386
|
+
) -> UiParsedChatCompletion:
|
387
|
+
"""
|
388
|
+
Process one or more documents using the Retab API for structured data extraction asynchronously.
|
389
|
+
|
390
|
+
This method provides a direct interface to document extraction functionality,
|
391
|
+
intended to replace the current `.extractions.parse()` pattern.
|
392
|
+
|
393
|
+
Args:
|
394
|
+
json_schema: JSON schema defining the expected data structure
|
395
|
+
model: The AI model to use for processing
|
396
|
+
document: Single document to process (use either this or documents, not both)
|
397
|
+
documents: List of documents to process (use either this or document, not both)
|
398
|
+
image_resolution_dpi: Optional image resolution DPI
|
399
|
+
browser_canvas: Optional browser canvas size
|
400
|
+
temperature: Model temperature setting (0-1)
|
401
|
+
modality: Modality of the document (e.g., native)
|
402
|
+
reasoning_effort: The effort level for the model to reason about the input data
|
403
|
+
n_consensus: Number of consensus extractions to perform
|
404
|
+
idempotency_key: Idempotency key for request
|
405
|
+
store: Whether to store the document in the Retab database
|
406
|
+
|
407
|
+
Returns:
|
408
|
+
UiParsedChatCompletion: Parsed response from the API
|
409
|
+
|
410
|
+
Raises:
|
411
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
412
|
+
HTTPException: If the request fails
|
413
|
+
"""
|
414
|
+
assert_valid_model_extraction(model)
|
415
|
+
|
416
|
+
json_schema = load_json_schema(json_schema)
|
417
|
+
|
418
|
+
# Handle both single document and multiple documents
|
419
|
+
if document is not None and documents is not None:
|
420
|
+
raise ValueError("Cannot provide both 'document' and 'documents' parameters. Use either one.")
|
421
|
+
|
422
|
+
# Convert single document to documents list for consistency
|
423
|
+
if document is not None:
|
424
|
+
processed_documents = [prepare_mime_document(document)]
|
425
|
+
elif documents is not None:
|
426
|
+
processed_documents = [prepare_mime_document(doc) for doc in documents]
|
427
|
+
else:
|
428
|
+
raise ValueError("Must provide either 'document' or 'documents' parameter.")
|
429
|
+
|
430
|
+
# Validate DocumentAPIRequest data (raises exception if invalid)
|
431
|
+
request = DocumentExtractRequest(
|
432
|
+
json_schema=json_schema,
|
433
|
+
documents=processed_documents,
|
434
|
+
model=model,
|
435
|
+
temperature=temperature,
|
436
|
+
stream=False,
|
437
|
+
modality=modality,
|
438
|
+
store=store,
|
439
|
+
reasoning_effort=reasoning_effort,
|
440
|
+
n_consensus=n_consensus,
|
441
|
+
image_resolution_dpi=image_resolution_dpi,
|
442
|
+
browser_canvas=browser_canvas,
|
443
|
+
)
|
444
|
+
|
445
|
+
prepared_request = PreparedRequest(
|
446
|
+
method="POST",
|
447
|
+
url="/v1/documents/extract",
|
448
|
+
data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True),
|
449
|
+
idempotency_key=idempotency_key
|
450
|
+
)
|
451
|
+
|
452
|
+
response = await self._client._prepared_request(prepared_request)
|
453
|
+
|
454
|
+
schema = Schema(json_schema=load_json_schema(json_schema))
|
455
|
+
return maybe_parse_to_pydantic(schema, UiParsedChatCompletion.model_validate(response))
|