retab 0.0.36__py3-none-any.whl → 0.0.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. retab/__init__.py +4 -0
  2. {uiform → retab}/_resource.py +5 -5
  3. {uiform → retab}/_utils/ai_models.py +2 -2
  4. {uiform → retab}/_utils/benchmarking.py +15 -16
  5. {uiform → retab}/_utils/chat.py +29 -34
  6. {uiform → retab}/_utils/display.py +0 -3
  7. {uiform → retab}/_utils/json_schema.py +9 -14
  8. {uiform → retab}/_utils/mime.py +11 -14
  9. {uiform → retab}/_utils/responses.py +16 -10
  10. {uiform → retab}/_utils/stream_context_managers.py +1 -1
  11. {uiform → retab}/_utils/usage/usage.py +31 -31
  12. {uiform → retab}/client.py +54 -53
  13. {uiform → retab}/resources/consensus/client.py +19 -38
  14. {uiform → retab}/resources/consensus/completions.py +36 -59
  15. {uiform → retab}/resources/consensus/completions_stream.py +35 -47
  16. {uiform → retab}/resources/consensus/responses.py +37 -86
  17. {uiform → retab}/resources/consensus/responses_stream.py +41 -89
  18. retab/resources/documents/client.py +455 -0
  19. {uiform → retab}/resources/documents/extractions.py +192 -101
  20. {uiform → retab}/resources/evals.py +56 -43
  21. retab/resources/evaluations/__init__.py +3 -0
  22. retab/resources/evaluations/client.py +301 -0
  23. retab/resources/evaluations/documents.py +233 -0
  24. retab/resources/evaluations/iterations.py +452 -0
  25. {uiform → retab}/resources/files.py +2 -2
  26. {uiform → retab}/resources/jsonlUtils.py +225 -221
  27. retab/resources/models.py +73 -0
  28. retab/resources/processors/automations/client.py +244 -0
  29. {uiform → retab}/resources/processors/automations/endpoints.py +79 -120
  30. retab/resources/processors/automations/links.py +294 -0
  31. {uiform → retab}/resources/processors/automations/logs.py +30 -19
  32. retab/resources/processors/automations/mailboxes.py +397 -0
  33. retab/resources/processors/automations/outlook.py +337 -0
  34. {uiform → retab}/resources/processors/automations/tests.py +22 -25
  35. {uiform → retab}/resources/processors/client.py +181 -166
  36. {uiform → retab}/resources/schemas.py +78 -66
  37. {uiform → retab}/resources/secrets/external_api_keys.py +1 -5
  38. retab/resources/secrets/webhook.py +64 -0
  39. {uiform → retab}/resources/usage.py +41 -4
  40. {uiform → retab}/types/ai_models.py +17 -17
  41. {uiform → retab}/types/automations/cron.py +19 -12
  42. {uiform → retab}/types/automations/endpoints.py +7 -4
  43. {uiform → retab}/types/automations/links.py +7 -3
  44. {uiform → retab}/types/automations/mailboxes.py +10 -10
  45. {uiform → retab}/types/automations/outlook.py +15 -11
  46. {uiform → retab}/types/automations/webhooks.py +1 -1
  47. retab/types/browser_canvas.py +3 -0
  48. retab/types/chat.py +8 -0
  49. {uiform → retab}/types/completions.py +12 -15
  50. retab/types/consensus.py +19 -0
  51. {uiform → retab}/types/db/annotations.py +3 -3
  52. {uiform → retab}/types/db/files.py +8 -6
  53. {uiform → retab}/types/documents/create_messages.py +20 -22
  54. {uiform → retab}/types/documents/extractions.py +71 -26
  55. {uiform → retab}/types/evals.py +5 -5
  56. retab/types/evaluations/__init__.py +31 -0
  57. retab/types/evaluations/documents.py +30 -0
  58. retab/types/evaluations/iterations.py +112 -0
  59. retab/types/evaluations/model.py +73 -0
  60. retab/types/events.py +79 -0
  61. {uiform → retab}/types/extractions.py +36 -13
  62. retab/types/inference_settings.py +15 -0
  63. retab/types/jobs/base.py +54 -0
  64. retab/types/jobs/batch_annotation.py +12 -0
  65. {uiform → retab}/types/jobs/evaluation.py +1 -2
  66. {uiform → retab}/types/logs.py +37 -34
  67. retab/types/metrics.py +32 -0
  68. {uiform → retab}/types/mime.py +22 -20
  69. {uiform → retab}/types/modalities.py +10 -10
  70. retab/types/predictions.py +19 -0
  71. {uiform → retab}/types/schemas/enhance.py +4 -2
  72. {uiform → retab}/types/schemas/evaluate.py +7 -4
  73. {uiform → retab}/types/schemas/generate.py +6 -3
  74. {uiform → retab}/types/schemas/layout.py +1 -1
  75. {uiform → retab}/types/schemas/object.py +16 -17
  76. {uiform → retab}/types/schemas/templates.py +1 -3
  77. {uiform → retab}/types/secrets/external_api_keys.py +0 -1
  78. {uiform → retab}/types/standards.py +18 -1
  79. {retab-0.0.36.dist-info → retab-0.0.38.dist-info}/METADATA +78 -77
  80. retab-0.0.38.dist-info/RECORD +107 -0
  81. retab-0.0.38.dist-info/top_level.txt +1 -0
  82. retab-0.0.36.dist-info/RECORD +0 -96
  83. retab-0.0.36.dist-info/top_level.txt +0 -1
  84. uiform/__init__.py +0 -4
  85. uiform/_utils/benchmarking copy.py +0 -588
  86. uiform/resources/documents/client.py +0 -255
  87. uiform/resources/models.py +0 -45
  88. uiform/resources/processors/automations/client.py +0 -78
  89. uiform/resources/processors/automations/links.py +0 -356
  90. uiform/resources/processors/automations/mailboxes.py +0 -435
  91. uiform/resources/processors/automations/outlook.py +0 -444
  92. uiform/resources/secrets/webhook.py +0 -62
  93. uiform/types/chat.py +0 -8
  94. uiform/types/consensus.py +0 -10
  95. uiform/types/events.py +0 -76
  96. uiform/types/jobs/base.py +0 -150
  97. uiform/types/jobs/batch_annotation.py +0 -22
  98. {uiform → retab}/_utils/__init__.py +0 -0
  99. {uiform → retab}/_utils/usage/__init__.py +0 -0
  100. {uiform → retab}/py.typed +0 -0
  101. {uiform → retab}/resources/__init__.py +0 -0
  102. {uiform → retab}/resources/consensus/__init__.py +0 -0
  103. {uiform → retab}/resources/documents/__init__.py +0 -0
  104. {uiform → retab}/resources/finetuning.py +0 -0
  105. {uiform → retab}/resources/openai_example.py +0 -0
  106. {uiform → retab}/resources/processors/__init__.py +0 -0
  107. {uiform → retab}/resources/processors/automations/__init__.py +0 -0
  108. {uiform → retab}/resources/prompt_optimization.py +0 -0
  109. {uiform → retab}/resources/secrets/__init__.py +0 -0
  110. {uiform → retab}/resources/secrets/client.py +0 -0
  111. {uiform → retab}/types/__init__.py +0 -0
  112. {uiform → retab}/types/automations/__init__.py +0 -0
  113. {uiform → retab}/types/db/__init__.py +0 -0
  114. {uiform → retab}/types/documents/__init__.py +0 -0
  115. {uiform → retab}/types/documents/correct_orientation.py +0 -0
  116. {uiform → retab}/types/jobs/__init__.py +0 -0
  117. {uiform → retab}/types/jobs/finetune.py +0 -0
  118. {uiform → retab}/types/jobs/prompt_optimization.py +0 -0
  119. {uiform → retab}/types/jobs/webcrawl.py +0 -0
  120. {uiform → retab}/types/pagination.py +0 -0
  121. {uiform → retab}/types/schemas/__init__.py +0 -0
  122. {uiform → retab}/types/secrets/__init__.py +0 -0
  123. {retab-0.0.36.dist-info → retab-0.0.38.dist-info}/WHEEL +0 -0
@@ -0,0 +1,455 @@
1
+ from io import IOBase
2
+ from pathlib import Path
3
+ from typing import Any
4
+
5
+ import PIL.Image
6
+ from pydantic import HttpUrl
7
+ from pydantic_core import PydanticUndefined
8
+ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
9
+
10
+ from ..._resource import AsyncAPIResource, SyncAPIResource
11
+ from ..._utils.json_schema import load_json_schema, filter_auxiliary_fields_json
12
+ from ..._utils.mime import convert_mime_data_to_pil_image, prepare_mime_document
13
+ from ..._utils.ai_models import assert_valid_model_extraction
14
+ from ...types.documents.create_messages import DocumentCreateInputRequest, DocumentCreateMessageRequest, DocumentMessage
15
+ from ...types.documents.extractions import DocumentExtractRequest, UiParsedChatCompletion
16
+ from ...types.browser_canvas import BrowserCanvas
17
+ from ...types.mime import MIMEData
18
+ from ...types.modalities import Modality
19
+ from ...types.schemas.object import Schema
20
+ from ...types.standards import PreparedRequest
21
+ from .extractions import AsyncExtractions, Extractions
22
+
23
+
24
+ def maybe_parse_to_pydantic(schema: Schema, response: UiParsedChatCompletion, allow_partial: bool = False) -> UiParsedChatCompletion:
25
+ if response.choices[0].message.content:
26
+ try:
27
+ if allow_partial:
28
+ response.choices[0].message.parsed = schema._partial_pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
29
+ else:
30
+ response.choices[0].message.parsed = schema.pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
31
+ except Exception:
32
+ pass
33
+ return response
34
+
35
+
36
+ class BaseDocumentsMixin:
37
+ def _prepare_create_messages(
38
+ self,
39
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
40
+ modality: Modality = "native",
41
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
42
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
43
+ idempotency_key: str | None = None,
44
+ ) -> PreparedRequest:
45
+ mime_document = prepare_mime_document(document)
46
+
47
+ loading_request = DocumentCreateMessageRequest(
48
+ document=mime_document,
49
+ modality=modality,
50
+ image_resolution_dpi=image_resolution_dpi,
51
+ browser_canvas=browser_canvas,
52
+ )
53
+ return PreparedRequest(method="POST", url="/v1/documents/create_messages", data=loading_request.model_dump(), idempotency_key=idempotency_key)
54
+
55
+ def _prepare_create_inputs(
56
+ self,
57
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
58
+ json_schema: dict[str, Any] | Path | str,
59
+ modality: Modality = "native",
60
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment],
61
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment],
62
+ idempotency_key: str | None = None,
63
+ ) -> PreparedRequest:
64
+ mime_document = prepare_mime_document(document)
65
+ loaded_schema = load_json_schema(json_schema)
66
+
67
+ loading_request = DocumentCreateInputRequest(
68
+ document=mime_document,
69
+ modality=modality,
70
+ json_schema=loaded_schema,
71
+ image_resolution_dpi=image_resolution_dpi,
72
+ browser_canvas=browser_canvas,
73
+ )
74
+ return PreparedRequest(method="POST", url="/v1/documents/create_inputs", data=loading_request.model_dump(), idempotency_key=idempotency_key)
75
+
76
+ def _prepare_correct_image_orientation(self, document: Path | str | IOBase | MIMEData | PIL.Image.Image) -> PreparedRequest:
77
+ mime_document = prepare_mime_document(document)
78
+
79
+ if not mime_document.mime_type.startswith("image/"):
80
+ raise ValueError("Image is not a valid image")
81
+
82
+ return PreparedRequest(
83
+ method="POST",
84
+ url="/v1/documents/correct_image_orientation",
85
+ data={"document": mime_document.model_dump()},
86
+ )
87
+
88
+
89
+ class Documents(SyncAPIResource, BaseDocumentsMixin):
90
+ """Documents API wrapper"""
91
+
92
+ def __init__(self, client: Any) -> None:
93
+ super().__init__(client=client)
94
+ #self.extractions_api = Extractions(client=client)
95
+ # self.batch = Batch(client=client)
96
+
97
+ def correct_image_orientation(self, document: Path | str | IOBase | MIMEData | PIL.Image.Image) -> PIL.Image.Image:
98
+ """Corrects the orientation of an image using the Retab API.
99
+
100
+ This method takes an image in various formats and returns a PIL Image with corrected orientation.
101
+ Useful for handling images from mobile devices or cameras that may have incorrect EXIF orientation.
102
+
103
+ Args:
104
+ image: The input image to correct. Can be:
105
+ - A file path (Path or str)
106
+ - A file-like object (IOBase)
107
+ - A MIMEData object
108
+ - A PIL Image object
109
+
110
+ Returns:
111
+ PIL.Image.Image: The orientation-corrected image as a PIL Image object
112
+
113
+ Raises:
114
+ ValueError: If the input is not a valid image
115
+ RetabAPIError: If the API request fails
116
+ """
117
+ request = self._prepare_correct_image_orientation(document)
118
+ response = self._client._prepared_request(request)
119
+ mime_response = MIMEData.model_validate(response["document"])
120
+ return convert_mime_data_to_pil_image(mime_response)
121
+
122
+ def create_messages(
123
+ self,
124
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
125
+ modality: Modality = "native",
126
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
127
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
128
+ idempotency_key: str | None = None,
129
+ ) -> DocumentMessage:
130
+ """
131
+ Create document messages from a file using the Retab API.
132
+
133
+ Args:
134
+ document: The document to process. Can be a file path (Path or str) or a file-like object.
135
+ modality: The processing modality to use. Defaults to "native".
136
+ image_resolution_dpi: Optional image resolution DPI.
137
+ browser_canvas: Optional browser canvas size.
138
+ idempotency_key: Optional idempotency key for the request
139
+ Returns:
140
+ DocumentMessage: The processed document message containing extracted content.
141
+
142
+ Raises:
143
+ RetabAPIError: If the API request fails.
144
+ """
145
+ request = self._prepare_create_messages(
146
+ document=document, modality=modality, image_resolution_dpi=image_resolution_dpi, browser_canvas=browser_canvas, idempotency_key=idempotency_key
147
+ )
148
+ response = self._client._prepared_request(request)
149
+ return DocumentMessage.model_validate(response)
150
+
151
+ def create_inputs(
152
+ self,
153
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
154
+ json_schema: dict[str, Any] | Path | str,
155
+ modality: Modality = "native",
156
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
157
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
158
+ idempotency_key: str | None = None,
159
+ ) -> DocumentMessage:
160
+ """
161
+ Create document inputs (messages with schema) from a file using the Retab API.
162
+
163
+ Args:
164
+ document: The document to process. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
165
+ json_schema: The JSON schema to use for structuring the document content.
166
+ modality: The processing modality to use. Defaults to "native".
167
+ image_resolution_dpi: Optional image resolution DPI.
168
+ browser_canvas: Optional browser canvas size.
169
+ idempotency_key: Optional idempotency key for the request
170
+ Returns:
171
+ DocumentMessage: The processed document message containing extracted content with schema context.
172
+
173
+ Raises:
174
+ RetabAPIError: If the API request fails.
175
+ """
176
+ request = self._prepare_create_inputs(
177
+ document=document,
178
+ json_schema=json_schema,
179
+ modality=modality,
180
+ image_resolution_dpi=image_resolution_dpi,
181
+ browser_canvas=browser_canvas,
182
+ idempotency_key=idempotency_key,
183
+ )
184
+ response = self._client._prepared_request(request)
185
+ return DocumentMessage.model_validate(response)
186
+
187
+ def extract(
188
+ self,
189
+ json_schema: dict[str, Any] | Path | str,
190
+ model: str,
191
+ document: Path | str | IOBase | HttpUrl | None = None,
192
+ documents: list[Path | str | IOBase | HttpUrl] | None = None,
193
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
194
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
195
+ temperature: float = PydanticUndefined, # type: ignore[assignment]
196
+ modality: Modality = PydanticUndefined, # type: ignore[assignment]
197
+ reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
198
+ n_consensus: int = PydanticUndefined, # type: ignore[assignment]
199
+ idempotency_key: str | None = None,
200
+ store: bool = False,
201
+ ) -> UiParsedChatCompletion:
202
+ """
203
+ Process one or more documents using the Retab API for structured data extraction.
204
+
205
+ This method provides a direct interface to document extraction functionality,
206
+ intended to replace the current `.extractions.parse()` pattern.
207
+
208
+ Args:
209
+ json_schema: JSON schema defining the expected data structure
210
+ model: The AI model to use for processing
211
+ document: Single document to process (use either this or documents, not both)
212
+ documents: List of documents to process (use either this or document, not both)
213
+ image_resolution_dpi: Optional image resolution DPI
214
+ browser_canvas: Optional browser canvas size
215
+ temperature: Model temperature setting (0-1)
216
+ modality: Modality of the document (e.g., native)
217
+ reasoning_effort: The effort level for the model to reason about the input data
218
+ n_consensus: Number of consensus extractions to perform
219
+ idempotency_key: Idempotency key for request
220
+ store: Whether to store the document in the Retab database
221
+
222
+ Returns:
223
+ UiParsedChatCompletion: Parsed response from the API
224
+
225
+ Raises:
226
+ ValueError: If neither document nor documents is provided, or if both are provided
227
+ HTTPException: If the request fails
228
+ """
229
+ assert_valid_model_extraction(model)
230
+
231
+ json_schema = load_json_schema(json_schema)
232
+
233
+ # Handle both single document and multiple documents
234
+ if document is not None and documents is not None:
235
+ raise ValueError("Cannot provide both 'document' and 'documents' parameters. Use either one.")
236
+
237
+ # Convert single document to documents list for consistency
238
+ if document is not None:
239
+ processed_documents = [prepare_mime_document(document)]
240
+ elif documents is not None:
241
+ processed_documents = [prepare_mime_document(doc) for doc in documents]
242
+ else:
243
+ raise ValueError("Must provide either 'document' or 'documents' parameter.")
244
+
245
+ # Validate DocumentAPIRequest data (raises exception if invalid)
246
+ request = DocumentExtractRequest(
247
+ json_schema=json_schema,
248
+ documents=processed_documents,
249
+ model=model,
250
+ temperature=temperature,
251
+ stream=False,
252
+ modality=modality,
253
+ store=store,
254
+ reasoning_effort=reasoning_effort,
255
+ n_consensus=n_consensus,
256
+ image_resolution_dpi=image_resolution_dpi,
257
+ browser_canvas=browser_canvas,
258
+ )
259
+
260
+ prepared_request = PreparedRequest(
261
+ method="POST",
262
+ url="/v1/documents/extract",
263
+ data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True),
264
+ idempotency_key=idempotency_key
265
+ )
266
+
267
+ response = self._client._prepared_request(prepared_request)
268
+
269
+ schema = Schema(json_schema=load_json_schema(json_schema))
270
+ return maybe_parse_to_pydantic(schema, UiParsedChatCompletion.model_validate(response))
271
+
272
+
273
+ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
274
+ """Documents API wrapper for asynchronous usage."""
275
+
276
+ def __init__(self, client: Any) -> None:
277
+ super().__init__(client=client)
278
+ #self.extractions_api = AsyncExtractions(client=client)
279
+
280
+ async def create_messages(
281
+ self,
282
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image,
283
+ modality: Modality = "native",
284
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
285
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
286
+ idempotency_key: str | None = None,
287
+ ) -> DocumentMessage:
288
+ """
289
+ Create document messages from a file using the Retab API asynchronously.
290
+
291
+ Args:
292
+ document: The document to process. Can be a file path (Path or str) or a file-like object.
293
+ modality: The processing modality to use. Defaults to "native".
294
+ idempotency_key: Idempotency key for request
295
+ Returns:
296
+ DocumentMessage: The processed document message containing extracted content.
297
+
298
+ Raises:
299
+ RetabAPIError: If the API request fails.
300
+ """
301
+ request = self._prepare_create_messages(
302
+ document=document,
303
+ modality=modality,
304
+ image_resolution_dpi=image_resolution_dpi,
305
+ browser_canvas=browser_canvas,
306
+ idempotency_key=idempotency_key,
307
+ )
308
+ response = await self._client._prepared_request(request)
309
+ return DocumentMessage.model_validate(response)
310
+
311
+ async def create_inputs(
312
+ self,
313
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
314
+ json_schema: dict[str, Any] | Path | str,
315
+ modality: Modality = "native",
316
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
317
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
318
+ idempotency_key: str | None = None,
319
+ ) -> DocumentMessage:
320
+ """
321
+ Create document inputs (messages with schema) from a file using the Retab API asynchronously.
322
+
323
+ Args:
324
+ document: The document to process. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
325
+ json_schema: The JSON schema to use for structuring the document content.
326
+ modality: The processing modality to use. Defaults to "native".
327
+ image_resolution_dpi: Optional image resolution DPI.
328
+ browser_canvas: Optional browser canvas size.
329
+ idempotency_key: Idempotency key for request
330
+ Returns:
331
+ DocumentMessage: The processed document message containing extracted content with schema context.
332
+
333
+ Raises:
334
+ RetabAPIError: If the API request fails.
335
+ """
336
+ request = self._prepare_create_inputs(
337
+ document=document,
338
+ json_schema=json_schema,
339
+ modality=modality,
340
+ image_resolution_dpi=image_resolution_dpi,
341
+ browser_canvas=browser_canvas,
342
+ idempotency_key=idempotency_key,
343
+ )
344
+ response = await self._client._prepared_request(request)
345
+ return DocumentMessage.model_validate(response)
346
+
347
+ async def correct_image_orientation(self, document: Path | str | IOBase | MIMEData | PIL.Image.Image) -> PIL.Image.Image:
348
+ """Corrects the orientation of an image using the Retab API asynchronously.
349
+
350
+ This method takes an image in various formats and returns a PIL Image with corrected orientation.
351
+ Useful for handling images from mobile devices or cameras that may have incorrect EXIF orientation.
352
+
353
+ Args:
354
+ image: The input image to correct. Can be:
355
+ - A file path (Path or str)
356
+ - A file-like object (IOBase)
357
+ - A MIMEData object
358
+ - A PIL Image object
359
+
360
+ Returns:
361
+ PIL.Image.Image: The orientation-corrected image as a PIL Image object
362
+
363
+ Raises:
364
+ ValueError: If the input is not a valid image
365
+ RetabAPIError: If the API request fails
366
+ """
367
+ request = self._prepare_correct_image_orientation(document)
368
+ response = await self._client._prepared_request(request)
369
+ mime_response = MIMEData.model_validate(response["document"])
370
+ return convert_mime_data_to_pil_image(mime_response)
371
+
372
+ async def extract(
373
+ self,
374
+ json_schema: dict[str, Any] | Path | str,
375
+ model: str,
376
+ document: Path | str | IOBase | HttpUrl | None = None,
377
+ documents: list[Path | str | IOBase | HttpUrl] | None = None,
378
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
379
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
380
+ temperature: float = PydanticUndefined, # type: ignore[assignment]
381
+ modality: Modality = PydanticUndefined, # type: ignore[assignment]
382
+ reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
383
+ n_consensus: int = PydanticUndefined, # type: ignore[assignment]
384
+ idempotency_key: str | None = None,
385
+ store: bool = False,
386
+ ) -> UiParsedChatCompletion:
387
+ """
388
+ Process one or more documents using the Retab API for structured data extraction asynchronously.
389
+
390
+ This method provides a direct interface to document extraction functionality,
391
+ intended to replace the current `.extractions.parse()` pattern.
392
+
393
+ Args:
394
+ json_schema: JSON schema defining the expected data structure
395
+ model: The AI model to use for processing
396
+ document: Single document to process (use either this or documents, not both)
397
+ documents: List of documents to process (use either this or document, not both)
398
+ image_resolution_dpi: Optional image resolution DPI
399
+ browser_canvas: Optional browser canvas size
400
+ temperature: Model temperature setting (0-1)
401
+ modality: Modality of the document (e.g., native)
402
+ reasoning_effort: The effort level for the model to reason about the input data
403
+ n_consensus: Number of consensus extractions to perform
404
+ idempotency_key: Idempotency key for request
405
+ store: Whether to store the document in the Retab database
406
+
407
+ Returns:
408
+ UiParsedChatCompletion: Parsed response from the API
409
+
410
+ Raises:
411
+ ValueError: If neither document nor documents is provided, or if both are provided
412
+ HTTPException: If the request fails
413
+ """
414
+ assert_valid_model_extraction(model)
415
+
416
+ json_schema = load_json_schema(json_schema)
417
+
418
+ # Handle both single document and multiple documents
419
+ if document is not None and documents is not None:
420
+ raise ValueError("Cannot provide both 'document' and 'documents' parameters. Use either one.")
421
+
422
+ # Convert single document to documents list for consistency
423
+ if document is not None:
424
+ processed_documents = [prepare_mime_document(document)]
425
+ elif documents is not None:
426
+ processed_documents = [prepare_mime_document(doc) for doc in documents]
427
+ else:
428
+ raise ValueError("Must provide either 'document' or 'documents' parameter.")
429
+
430
+ # Validate DocumentAPIRequest data (raises exception if invalid)
431
+ request = DocumentExtractRequest(
432
+ json_schema=json_schema,
433
+ documents=processed_documents,
434
+ model=model,
435
+ temperature=temperature,
436
+ stream=False,
437
+ modality=modality,
438
+ store=store,
439
+ reasoning_effort=reasoning_effort,
440
+ n_consensus=n_consensus,
441
+ image_resolution_dpi=image_resolution_dpi,
442
+ browser_canvas=browser_canvas,
443
+ )
444
+
445
+ prepared_request = PreparedRequest(
446
+ method="POST",
447
+ url="/v1/documents/extract",
448
+ data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True),
449
+ idempotency_key=idempotency_key
450
+ )
451
+
452
+ response = await self._client._prepared_request(prepared_request)
453
+
454
+ schema = Schema(json_schema=load_json_schema(json_schema))
455
+ return maybe_parse_to_pydantic(schema, UiParsedChatCompletion.model_validate(response))