retab 0.0.37__py3-none-any.whl → 0.0.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab/__init__.py +2 -2
- retab/_resource.py +5 -5
- retab/_utils/chat.py +20 -20
- retab/_utils/responses.py +7 -7
- retab/_utils/usage/usage.py +3 -3
- retab/client.py +22 -22
- retab/resources/consensus/client.py +2 -2
- retab/resources/consensus/completions.py +12 -12
- retab/resources/consensus/completions_stream.py +9 -9
- retab/resources/consensus/responses.py +6 -6
- retab/resources/consensus/responses_stream.py +10 -10
- retab/resources/documents/client.py +201 -15
- retab/resources/documents/extractions.py +17 -17
- retab/resources/jsonlUtils.py +5 -5
- retab/resources/processors/automations/endpoints.py +2 -2
- retab/resources/processors/automations/links.py +2 -2
- retab/resources/processors/automations/logs.py +2 -2
- retab/resources/processors/automations/mailboxes.py +2 -2
- retab/resources/processors/automations/outlook.py +2 -2
- retab/resources/processors/client.py +2 -2
- retab/resources/usage.py +4 -4
- retab/types/ai_models.py +4 -4
- retab/types/automations/mailboxes.py +1 -1
- retab/types/automations/webhooks.py +1 -1
- retab/types/chat.py +1 -1
- retab/types/completions.py +3 -3
- retab/types/documents/create_messages.py +2 -2
- retab/types/documents/extractions.py +2 -2
- retab/types/extractions.py +3 -3
- retab/types/schemas/object.py +3 -3
- {retab-0.0.37.dist-info → retab-0.0.38.dist-info}/METADATA +72 -72
- {retab-0.0.37.dist-info → retab-0.0.38.dist-info}/RECORD +34 -34
- {retab-0.0.37.dist-info → retab-0.0.38.dist-info}/WHEEL +0 -0
- {retab-0.0.37.dist-info → retab-0.0.38.dist-info}/top_level.txt +0 -0
@@ -102,7 +102,7 @@ class BaseResponsesMixin:
|
|
102
102
|
|
103
103
|
|
104
104
|
class Responses(SyncAPIResource, BaseResponsesMixin):
|
105
|
-
"""
|
105
|
+
"""Retab Responses API compatible with OpenAI Responses API"""
|
106
106
|
|
107
107
|
@as_context_manager
|
108
108
|
def stream(
|
@@ -117,7 +117,7 @@ class Responses(SyncAPIResource, BaseResponsesMixin):
|
|
117
117
|
idempotency_key: Optional[str] = None,
|
118
118
|
) -> Generator[UiResponse, None, None]:
|
119
119
|
"""
|
120
|
-
Create a completion using the
|
120
|
+
Create a completion using the Retab API with streaming enabled.
|
121
121
|
|
122
122
|
Args:
|
123
123
|
model: The model to use
|
@@ -134,7 +134,7 @@ class Responses(SyncAPIResource, BaseResponsesMixin):
|
|
134
134
|
|
135
135
|
Usage:
|
136
136
|
```python
|
137
|
-
with
|
137
|
+
with retab.responses.stream(model, input, text, temperature, reasoning) as stream:
|
138
138
|
for response in stream:
|
139
139
|
print(response)
|
140
140
|
```
|
@@ -171,7 +171,7 @@ class Responses(SyncAPIResource, BaseResponsesMixin):
|
|
171
171
|
idempotency_key: Optional[str] = None,
|
172
172
|
) -> Generator[UiResponse, None, None]:
|
173
173
|
"""
|
174
|
-
Parse content using the
|
174
|
+
Parse content using the Retab API with streaming enabled.
|
175
175
|
|
176
176
|
Args:
|
177
177
|
model: The model to use
|
@@ -188,7 +188,7 @@ class Responses(SyncAPIResource, BaseResponsesMixin):
|
|
188
188
|
|
189
189
|
Usage:
|
190
190
|
```python
|
191
|
-
with
|
191
|
+
with retab.responses.stream_parse(model, input, MyModel, temperature, reasoning) as stream:
|
192
192
|
for response in stream:
|
193
193
|
print(response)
|
194
194
|
```
|
@@ -214,7 +214,7 @@ class Responses(SyncAPIResource, BaseResponsesMixin):
|
|
214
214
|
|
215
215
|
|
216
216
|
class AsyncResponses(AsyncAPIResource, BaseResponsesMixin):
|
217
|
-
"""
|
217
|
+
"""Retab Responses API compatible with OpenAI Responses API for async usage"""
|
218
218
|
|
219
219
|
@as_async_context_manager
|
220
220
|
async def stream(
|
@@ -229,7 +229,7 @@ class AsyncResponses(AsyncAPIResource, BaseResponsesMixin):
|
|
229
229
|
idempotency_key: Optional[str] = None,
|
230
230
|
) -> AsyncGenerator[UiResponse, None]:
|
231
231
|
"""
|
232
|
-
Create a completion using the
|
232
|
+
Create a completion using the Retab API asynchronously with streaming enabled.
|
233
233
|
|
234
234
|
Args:
|
235
235
|
model: The model to use
|
@@ -246,7 +246,7 @@ class AsyncResponses(AsyncAPIResource, BaseResponsesMixin):
|
|
246
246
|
|
247
247
|
Usage:
|
248
248
|
```python
|
249
|
-
async with
|
249
|
+
async with retab.responses.async_stream(model, input, text, temperature, reasoning) as stream:
|
250
250
|
async for response in stream:
|
251
251
|
print(response)
|
252
252
|
```
|
@@ -283,7 +283,7 @@ class AsyncResponses(AsyncAPIResource, BaseResponsesMixin):
|
|
283
283
|
idempotency_key: Optional[str] = None,
|
284
284
|
) -> AsyncGenerator[UiResponse, None]:
|
285
285
|
"""
|
286
|
-
Parse content using the
|
286
|
+
Parse content using the Retab API asynchronously with streaming enabled.
|
287
287
|
|
288
288
|
Args:
|
289
289
|
model: The model to use
|
@@ -300,7 +300,7 @@ class AsyncResponses(AsyncAPIResource, BaseResponsesMixin):
|
|
300
300
|
|
301
301
|
Usage:
|
302
302
|
```python
|
303
|
-
async with
|
303
|
+
async with retab.responses.async_stream_parse(model, input, MyModel, temperature, reasoning) as stream:
|
304
304
|
async for response in stream:
|
305
305
|
print(response)
|
306
306
|
```
|
@@ -5,18 +5,34 @@ from typing import Any
|
|
5
5
|
import PIL.Image
|
6
6
|
from pydantic import HttpUrl
|
7
7
|
from pydantic_core import PydanticUndefined
|
8
|
+
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
8
9
|
|
9
10
|
from ..._resource import AsyncAPIResource, SyncAPIResource
|
10
|
-
from ..._utils.json_schema import load_json_schema
|
11
|
+
from ..._utils.json_schema import load_json_schema, filter_auxiliary_fields_json
|
11
12
|
from ..._utils.mime import convert_mime_data_to_pil_image, prepare_mime_document
|
13
|
+
from ..._utils.ai_models import assert_valid_model_extraction
|
12
14
|
from ...types.documents.create_messages import DocumentCreateInputRequest, DocumentCreateMessageRequest, DocumentMessage
|
15
|
+
from ...types.documents.extractions import DocumentExtractRequest, UiParsedChatCompletion
|
13
16
|
from ...types.browser_canvas import BrowserCanvas
|
14
17
|
from ...types.mime import MIMEData
|
15
18
|
from ...types.modalities import Modality
|
19
|
+
from ...types.schemas.object import Schema
|
16
20
|
from ...types.standards import PreparedRequest
|
17
21
|
from .extractions import AsyncExtractions, Extractions
|
18
22
|
|
19
23
|
|
24
|
+
def maybe_parse_to_pydantic(schema: Schema, response: UiParsedChatCompletion, allow_partial: bool = False) -> UiParsedChatCompletion:
|
25
|
+
if response.choices[0].message.content:
|
26
|
+
try:
|
27
|
+
if allow_partial:
|
28
|
+
response.choices[0].message.parsed = schema._partial_pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
|
29
|
+
else:
|
30
|
+
response.choices[0].message.parsed = schema.pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
|
31
|
+
except Exception:
|
32
|
+
pass
|
33
|
+
return response
|
34
|
+
|
35
|
+
|
20
36
|
class BaseDocumentsMixin:
|
21
37
|
def _prepare_create_messages(
|
22
38
|
self,
|
@@ -75,11 +91,11 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
75
91
|
|
76
92
|
def __init__(self, client: Any) -> None:
|
77
93
|
super().__init__(client=client)
|
78
|
-
self.
|
94
|
+
#self.extractions_api = Extractions(client=client)
|
79
95
|
# self.batch = Batch(client=client)
|
80
96
|
|
81
97
|
def correct_image_orientation(self, document: Path | str | IOBase | MIMEData | PIL.Image.Image) -> PIL.Image.Image:
|
82
|
-
"""Corrects the orientation of an image using the
|
98
|
+
"""Corrects the orientation of an image using the Retab API.
|
83
99
|
|
84
100
|
This method takes an image in various formats and returns a PIL Image with corrected orientation.
|
85
101
|
Useful for handling images from mobile devices or cameras that may have incorrect EXIF orientation.
|
@@ -96,7 +112,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
96
112
|
|
97
113
|
Raises:
|
98
114
|
ValueError: If the input is not a valid image
|
99
|
-
|
115
|
+
RetabAPIError: If the API request fails
|
100
116
|
"""
|
101
117
|
request = self._prepare_correct_image_orientation(document)
|
102
118
|
response = self._client._prepared_request(request)
|
@@ -112,7 +128,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
112
128
|
idempotency_key: str | None = None,
|
113
129
|
) -> DocumentMessage:
|
114
130
|
"""
|
115
|
-
Create document messages from a file using the
|
131
|
+
Create document messages from a file using the Retab API.
|
116
132
|
|
117
133
|
Args:
|
118
134
|
document: The document to process. Can be a file path (Path or str) or a file-like object.
|
@@ -124,7 +140,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
124
140
|
DocumentMessage: The processed document message containing extracted content.
|
125
141
|
|
126
142
|
Raises:
|
127
|
-
|
143
|
+
RetabAPIError: If the API request fails.
|
128
144
|
"""
|
129
145
|
request = self._prepare_create_messages(
|
130
146
|
document=document, modality=modality, image_resolution_dpi=image_resolution_dpi, browser_canvas=browser_canvas, idempotency_key=idempotency_key
|
@@ -142,7 +158,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
142
158
|
idempotency_key: str | None = None,
|
143
159
|
) -> DocumentMessage:
|
144
160
|
"""
|
145
|
-
Create document inputs (messages with schema) from a file using the
|
161
|
+
Create document inputs (messages with schema) from a file using the Retab API.
|
146
162
|
|
147
163
|
Args:
|
148
164
|
document: The document to process. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
@@ -155,7 +171,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
155
171
|
DocumentMessage: The processed document message containing extracted content with schema context.
|
156
172
|
|
157
173
|
Raises:
|
158
|
-
|
174
|
+
RetabAPIError: If the API request fails.
|
159
175
|
"""
|
160
176
|
request = self._prepare_create_inputs(
|
161
177
|
document=document,
|
@@ -168,13 +184,98 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
168
184
|
response = self._client._prepared_request(request)
|
169
185
|
return DocumentMessage.model_validate(response)
|
170
186
|
|
187
|
+
def extract(
|
188
|
+
self,
|
189
|
+
json_schema: dict[str, Any] | Path | str,
|
190
|
+
model: str,
|
191
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
192
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
193
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
194
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
195
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
196
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
197
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
198
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
199
|
+
idempotency_key: str | None = None,
|
200
|
+
store: bool = False,
|
201
|
+
) -> UiParsedChatCompletion:
|
202
|
+
"""
|
203
|
+
Process one or more documents using the Retab API for structured data extraction.
|
204
|
+
|
205
|
+
This method provides a direct interface to document extraction functionality,
|
206
|
+
intended to replace the current `.extractions.parse()` pattern.
|
207
|
+
|
208
|
+
Args:
|
209
|
+
json_schema: JSON schema defining the expected data structure
|
210
|
+
model: The AI model to use for processing
|
211
|
+
document: Single document to process (use either this or documents, not both)
|
212
|
+
documents: List of documents to process (use either this or document, not both)
|
213
|
+
image_resolution_dpi: Optional image resolution DPI
|
214
|
+
browser_canvas: Optional browser canvas size
|
215
|
+
temperature: Model temperature setting (0-1)
|
216
|
+
modality: Modality of the document (e.g., native)
|
217
|
+
reasoning_effort: The effort level for the model to reason about the input data
|
218
|
+
n_consensus: Number of consensus extractions to perform
|
219
|
+
idempotency_key: Idempotency key for request
|
220
|
+
store: Whether to store the document in the Retab database
|
221
|
+
|
222
|
+
Returns:
|
223
|
+
UiParsedChatCompletion: Parsed response from the API
|
224
|
+
|
225
|
+
Raises:
|
226
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
227
|
+
HTTPException: If the request fails
|
228
|
+
"""
|
229
|
+
assert_valid_model_extraction(model)
|
230
|
+
|
231
|
+
json_schema = load_json_schema(json_schema)
|
232
|
+
|
233
|
+
# Handle both single document and multiple documents
|
234
|
+
if document is not None and documents is not None:
|
235
|
+
raise ValueError("Cannot provide both 'document' and 'documents' parameters. Use either one.")
|
236
|
+
|
237
|
+
# Convert single document to documents list for consistency
|
238
|
+
if document is not None:
|
239
|
+
processed_documents = [prepare_mime_document(document)]
|
240
|
+
elif documents is not None:
|
241
|
+
processed_documents = [prepare_mime_document(doc) for doc in documents]
|
242
|
+
else:
|
243
|
+
raise ValueError("Must provide either 'document' or 'documents' parameter.")
|
244
|
+
|
245
|
+
# Validate DocumentAPIRequest data (raises exception if invalid)
|
246
|
+
request = DocumentExtractRequest(
|
247
|
+
json_schema=json_schema,
|
248
|
+
documents=processed_documents,
|
249
|
+
model=model,
|
250
|
+
temperature=temperature,
|
251
|
+
stream=False,
|
252
|
+
modality=modality,
|
253
|
+
store=store,
|
254
|
+
reasoning_effort=reasoning_effort,
|
255
|
+
n_consensus=n_consensus,
|
256
|
+
image_resolution_dpi=image_resolution_dpi,
|
257
|
+
browser_canvas=browser_canvas,
|
258
|
+
)
|
259
|
+
|
260
|
+
prepared_request = PreparedRequest(
|
261
|
+
method="POST",
|
262
|
+
url="/v1/documents/extract",
|
263
|
+
data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True),
|
264
|
+
idempotency_key=idempotency_key
|
265
|
+
)
|
266
|
+
|
267
|
+
response = self._client._prepared_request(prepared_request)
|
268
|
+
|
269
|
+
schema = Schema(json_schema=load_json_schema(json_schema))
|
270
|
+
return maybe_parse_to_pydantic(schema, UiParsedChatCompletion.model_validate(response))
|
271
|
+
|
171
272
|
|
172
273
|
class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
173
274
|
"""Documents API wrapper for asynchronous usage."""
|
174
275
|
|
175
276
|
def __init__(self, client: Any) -> None:
|
176
277
|
super().__init__(client=client)
|
177
|
-
self.
|
278
|
+
#self.extractions_api = AsyncExtractions(client=client)
|
178
279
|
|
179
280
|
async def create_messages(
|
180
281
|
self,
|
@@ -185,7 +286,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
185
286
|
idempotency_key: str | None = None,
|
186
287
|
) -> DocumentMessage:
|
187
288
|
"""
|
188
|
-
Create document messages from a file using the
|
289
|
+
Create document messages from a file using the Retab API asynchronously.
|
189
290
|
|
190
291
|
Args:
|
191
292
|
document: The document to process. Can be a file path (Path or str) or a file-like object.
|
@@ -195,7 +296,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
195
296
|
DocumentMessage: The processed document message containing extracted content.
|
196
297
|
|
197
298
|
Raises:
|
198
|
-
|
299
|
+
RetabAPIError: If the API request fails.
|
199
300
|
"""
|
200
301
|
request = self._prepare_create_messages(
|
201
302
|
document=document,
|
@@ -217,7 +318,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
217
318
|
idempotency_key: str | None = None,
|
218
319
|
) -> DocumentMessage:
|
219
320
|
"""
|
220
|
-
Create document inputs (messages with schema) from a file using the
|
321
|
+
Create document inputs (messages with schema) from a file using the Retab API asynchronously.
|
221
322
|
|
222
323
|
Args:
|
223
324
|
document: The document to process. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
@@ -230,7 +331,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
230
331
|
DocumentMessage: The processed document message containing extracted content with schema context.
|
231
332
|
|
232
333
|
Raises:
|
233
|
-
|
334
|
+
RetabAPIError: If the API request fails.
|
234
335
|
"""
|
235
336
|
request = self._prepare_create_inputs(
|
236
337
|
document=document,
|
@@ -244,7 +345,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
244
345
|
return DocumentMessage.model_validate(response)
|
245
346
|
|
246
347
|
async def correct_image_orientation(self, document: Path | str | IOBase | MIMEData | PIL.Image.Image) -> PIL.Image.Image:
|
247
|
-
"""Corrects the orientation of an image using the
|
348
|
+
"""Corrects the orientation of an image using the Retab API asynchronously.
|
248
349
|
|
249
350
|
This method takes an image in various formats and returns a PIL Image with corrected orientation.
|
250
351
|
Useful for handling images from mobile devices or cameras that may have incorrect EXIF orientation.
|
@@ -261,9 +362,94 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
261
362
|
|
262
363
|
Raises:
|
263
364
|
ValueError: If the input is not a valid image
|
264
|
-
|
365
|
+
RetabAPIError: If the API request fails
|
265
366
|
"""
|
266
367
|
request = self._prepare_correct_image_orientation(document)
|
267
368
|
response = await self._client._prepared_request(request)
|
268
369
|
mime_response = MIMEData.model_validate(response["document"])
|
269
370
|
return convert_mime_data_to_pil_image(mime_response)
|
371
|
+
|
372
|
+
async def extract(
|
373
|
+
self,
|
374
|
+
json_schema: dict[str, Any] | Path | str,
|
375
|
+
model: str,
|
376
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
377
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
378
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
379
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
380
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
381
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
382
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
383
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
384
|
+
idempotency_key: str | None = None,
|
385
|
+
store: bool = False,
|
386
|
+
) -> UiParsedChatCompletion:
|
387
|
+
"""
|
388
|
+
Process one or more documents using the Retab API for structured data extraction asynchronously.
|
389
|
+
|
390
|
+
This method provides a direct interface to document extraction functionality,
|
391
|
+
intended to replace the current `.extractions.parse()` pattern.
|
392
|
+
|
393
|
+
Args:
|
394
|
+
json_schema: JSON schema defining the expected data structure
|
395
|
+
model: The AI model to use for processing
|
396
|
+
document: Single document to process (use either this or documents, not both)
|
397
|
+
documents: List of documents to process (use either this or document, not both)
|
398
|
+
image_resolution_dpi: Optional image resolution DPI
|
399
|
+
browser_canvas: Optional browser canvas size
|
400
|
+
temperature: Model temperature setting (0-1)
|
401
|
+
modality: Modality of the document (e.g., native)
|
402
|
+
reasoning_effort: The effort level for the model to reason about the input data
|
403
|
+
n_consensus: Number of consensus extractions to perform
|
404
|
+
idempotency_key: Idempotency key for request
|
405
|
+
store: Whether to store the document in the Retab database
|
406
|
+
|
407
|
+
Returns:
|
408
|
+
UiParsedChatCompletion: Parsed response from the API
|
409
|
+
|
410
|
+
Raises:
|
411
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
412
|
+
HTTPException: If the request fails
|
413
|
+
"""
|
414
|
+
assert_valid_model_extraction(model)
|
415
|
+
|
416
|
+
json_schema = load_json_schema(json_schema)
|
417
|
+
|
418
|
+
# Handle both single document and multiple documents
|
419
|
+
if document is not None and documents is not None:
|
420
|
+
raise ValueError("Cannot provide both 'document' and 'documents' parameters. Use either one.")
|
421
|
+
|
422
|
+
# Convert single document to documents list for consistency
|
423
|
+
if document is not None:
|
424
|
+
processed_documents = [prepare_mime_document(document)]
|
425
|
+
elif documents is not None:
|
426
|
+
processed_documents = [prepare_mime_document(doc) for doc in documents]
|
427
|
+
else:
|
428
|
+
raise ValueError("Must provide either 'document' or 'documents' parameter.")
|
429
|
+
|
430
|
+
# Validate DocumentAPIRequest data (raises exception if invalid)
|
431
|
+
request = DocumentExtractRequest(
|
432
|
+
json_schema=json_schema,
|
433
|
+
documents=processed_documents,
|
434
|
+
model=model,
|
435
|
+
temperature=temperature,
|
436
|
+
stream=False,
|
437
|
+
modality=modality,
|
438
|
+
store=store,
|
439
|
+
reasoning_effort=reasoning_effort,
|
440
|
+
n_consensus=n_consensus,
|
441
|
+
image_resolution_dpi=image_resolution_dpi,
|
442
|
+
browser_canvas=browser_canvas,
|
443
|
+
)
|
444
|
+
|
445
|
+
prepared_request = PreparedRequest(
|
446
|
+
method="POST",
|
447
|
+
url="/v1/documents/extract",
|
448
|
+
data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True),
|
449
|
+
idempotency_key=idempotency_key
|
450
|
+
)
|
451
|
+
|
452
|
+
response = await self._client._prepared_request(prepared_request)
|
453
|
+
|
454
|
+
schema = Schema(json_schema=load_json_schema(json_schema))
|
455
|
+
return maybe_parse_to_pydantic(schema, UiParsedChatCompletion.model_validate(response))
|
@@ -18,7 +18,7 @@ from ..._utils.ai_models import assert_valid_model_extraction
|
|
18
18
|
from ..._utils.json_schema import filter_auxiliary_fields_json, load_json_schema, unflatten_dict
|
19
19
|
from ..._utils.mime import MIMEData, prepare_mime_document
|
20
20
|
from ..._utils.stream_context_managers import as_async_context_manager, as_context_manager
|
21
|
-
from ...types.chat import
|
21
|
+
from ...types.chat import ChatCompletionRetabMessage
|
22
22
|
from ...types.documents.extractions import DocumentExtractRequest, LogExtractionRequest, UiParsedChatCompletion, UiParsedChatCompletionChunk, UiParsedChoice
|
23
23
|
from ...types.browser_canvas import BrowserCanvas
|
24
24
|
from ...types.modalities import Modality
|
@@ -97,8 +97,8 @@ class BaseExtractionsMixin:
|
|
97
97
|
model: str,
|
98
98
|
temperature: float,
|
99
99
|
completion: Any | None = None,
|
100
|
-
# The messages can be provided in different formats, we will convert them to the
|
101
|
-
messages: list[
|
100
|
+
# The messages can be provided in different formats, we will convert them to the Retab-compatible format
|
101
|
+
messages: list[ChatCompletionRetabMessage] | None = None,
|
102
102
|
openai_messages: list[ChatCompletionMessageParam] | None = None,
|
103
103
|
anthropic_messages: list[MessageParam] | None = None,
|
104
104
|
anthropic_system_prompt: str | None = None,
|
@@ -154,7 +154,7 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
154
154
|
store: bool = False,
|
155
155
|
) -> UiParsedChatCompletion:
|
156
156
|
"""
|
157
|
-
Process one or more documents using the
|
157
|
+
Process one or more documents using the Retab API.
|
158
158
|
|
159
159
|
Args:
|
160
160
|
json_schema: JSON schema defining the expected data structure
|
@@ -168,7 +168,7 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
168
168
|
reasoning_effort: The effort level for the model to reason about the input data.
|
169
169
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
170
170
|
idempotency_key: Idempotency key for request
|
171
|
-
store: Whether to store the document in the
|
171
|
+
store: Whether to store the document in the Retab database
|
172
172
|
Returns:
|
173
173
|
UiParsedChatCompletion: Parsed response from the API
|
174
174
|
Raises:
|
@@ -214,7 +214,7 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
214
214
|
store: bool = False,
|
215
215
|
) -> Generator[UiParsedChatCompletion, None, None]:
|
216
216
|
"""
|
217
|
-
Process one or more documents using the
|
217
|
+
Process one or more documents using the Retab API with streaming enabled.
|
218
218
|
|
219
219
|
Args:
|
220
220
|
json_schema: JSON schema defining the expected data structure
|
@@ -228,7 +228,7 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
228
228
|
reasoning_effort: The effort level for the model to reason about the input data.
|
229
229
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
230
230
|
idempotency_key: Idempotency key for request
|
231
|
-
store: Whether to store the document in the
|
231
|
+
store: Whether to store the document in the Retab database
|
232
232
|
|
233
233
|
Returns:
|
234
234
|
Generator[UiParsedChatCompletion]: Stream of parsed responses
|
@@ -238,12 +238,12 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
238
238
|
Usage:
|
239
239
|
```python
|
240
240
|
# Single document
|
241
|
-
with
|
241
|
+
with retab.documents.extractions.stream(json_schema, model, document=document) as stream:
|
242
242
|
for response in stream:
|
243
243
|
print(response)
|
244
244
|
|
245
245
|
# Multiple documents
|
246
|
-
with
|
246
|
+
with retab.documents.extractions.stream(json_schema, model, documents=[doc1, doc2]) as stream:
|
247
247
|
for response in stream:
|
248
248
|
print(response)
|
249
249
|
```
|
@@ -311,8 +311,8 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
311
311
|
model: str,
|
312
312
|
temperature: float,
|
313
313
|
completion: Any | None = None,
|
314
|
-
# The messages can be provided in different formats, we will convert them to the
|
315
|
-
messages: list[
|
314
|
+
# The messages can be provided in different formats, we will convert them to the Retab-compatible format
|
315
|
+
messages: list[ChatCompletionRetabMessage] | None = None,
|
316
316
|
openai_messages: list[ChatCompletionMessageParam] | None = None,
|
317
317
|
anthropic_messages: list[MessageParam] | None = None,
|
318
318
|
anthropic_system_prompt: str | None = None,
|
@@ -369,7 +369,7 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
369
369
|
reasoning_effort: The effort level for the model to reason about the input data.
|
370
370
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
371
371
|
idempotency_key: Idempotency key for request
|
372
|
-
store: Whether to store the document in the
|
372
|
+
store: Whether to store the document in the Retab database
|
373
373
|
Returns:
|
374
374
|
UiParsedChatCompletion: Parsed response from the API.
|
375
375
|
Raises:
|
@@ -425,7 +425,7 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
425
425
|
reasoning_effort: The effort level for the model to reason about the input data.
|
426
426
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
427
427
|
idempotency_key: Idempotency key for request
|
428
|
-
store: Whether to store the document in the
|
428
|
+
store: Whether to store the document in the Retab database
|
429
429
|
Returns:
|
430
430
|
AsyncGenerator[UiParsedChatCompletion, None]: Stream of parsed responses.
|
431
431
|
Raises:
|
@@ -434,12 +434,12 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
434
434
|
Usage:
|
435
435
|
```python
|
436
436
|
# Single document
|
437
|
-
async with
|
437
|
+
async with retab.documents.extractions.stream(json_schema, model, document=document) as stream:
|
438
438
|
async for response in stream:
|
439
439
|
print(response)
|
440
440
|
|
441
441
|
# Multiple documents
|
442
|
-
async with
|
442
|
+
async with retab.documents.extractions.stream(json_schema, model, documents=[doc1, doc2]) as stream:
|
443
443
|
async for response in stream:
|
444
444
|
print(response)
|
445
445
|
```
|
@@ -507,8 +507,8 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
507
507
|
model: str,
|
508
508
|
temperature: float,
|
509
509
|
completion: Any | None = None,
|
510
|
-
# The messages can be provided in different formats, we will convert them to the
|
511
|
-
messages: list[
|
510
|
+
# The messages can be provided in different formats, we will convert them to the Retab-compatible format
|
511
|
+
messages: list[ChatCompletionRetabMessage] | None = None,
|
512
512
|
openai_messages: list[ChatCompletionMessageParam] | None = None,
|
513
513
|
anthropic_messages: list[MessageParam] | None = None,
|
514
514
|
anthropic_system_prompt: str | None = None,
|
retab/resources/jsonlUtils.py
CHANGED
@@ -22,14 +22,14 @@ from .._utils.ai_models import assert_valid_model_extraction, find_provider_from
|
|
22
22
|
from .._utils.chat import convert_to_anthropic_format, convert_to_openai_format, separate_messages
|
23
23
|
from .._utils.display import Metrics, display_metrics, process_dataset_and_compute_metrics
|
24
24
|
from .._utils.json_schema import load_json_schema
|
25
|
-
from ..types.chat import
|
25
|
+
from ..types.chat import ChatCompletionRetabMessage
|
26
26
|
from ..types.modalities import Modality
|
27
27
|
from ..types.schemas.object import Schema
|
28
28
|
from ..types.browser_canvas import BrowserCanvas
|
29
29
|
|
30
30
|
|
31
31
|
class FinetuningJSON(BaseModel):
|
32
|
-
messages: list[
|
32
|
+
messages: list[ChatCompletionRetabMessage]
|
33
33
|
|
34
34
|
|
35
35
|
FinetuningJSONL = list[FinetuningJSON]
|
@@ -242,7 +242,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
|
|
242
242
|
training_set = []
|
243
243
|
|
244
244
|
for pair_paths in tqdm(pairs_paths):
|
245
|
-
document_messages: list[
|
245
|
+
document_messages: list[ChatCompletionRetabMessage] = []
|
246
246
|
|
247
247
|
if isinstance(pair_paths["document_fpath"], str) or isinstance(pair_paths["document_fpath"], Path):
|
248
248
|
document_message = self._client.documents.create_messages(document=pair_paths["document_fpath"], modality=modality)
|
@@ -299,7 +299,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
|
|
299
299
|
provider: str,
|
300
300
|
model: str,
|
301
301
|
temperature: float,
|
302
|
-
messages: list[
|
302
|
+
messages: list[ChatCompletionRetabMessage],
|
303
303
|
schema_obj: Schema,
|
304
304
|
) -> str:
|
305
305
|
"""Get completion from the appropriate model provider.
|
@@ -659,7 +659,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
|
|
659
659
|
system_message, user_messages, assistant_messages = separate_messages(messages)
|
660
660
|
system_and_user_messages = messages[:-1]
|
661
661
|
|
662
|
-
previous_annotation_message:
|
662
|
+
previous_annotation_message: ChatCompletionRetabMessage = {
|
663
663
|
"role": "user",
|
664
664
|
"content": "Here is an old annotation using a different schema. Use it as a reference to update the annotation: " + messages[-1]["content"],
|
665
665
|
}
|
@@ -120,7 +120,7 @@ class Endpoints(SyncAPIResource, EndpointsMixin):
|
|
120
120
|
need_validation=need_validation,
|
121
121
|
)
|
122
122
|
response = self._client._prepared_request(request)
|
123
|
-
print(f"Endpoint ID: {response['id']}. Endpoint available at https://www.
|
123
|
+
print(f"Endpoint ID: {response['id']}. Endpoint available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
124
124
|
return Endpoint.model_validate(response)
|
125
125
|
|
126
126
|
def list(
|
@@ -229,7 +229,7 @@ class AsyncEndpoints(AsyncAPIResource, EndpointsMixin):
|
|
229
229
|
need_validation=need_validation,
|
230
230
|
)
|
231
231
|
response = await self._client._prepared_request(request)
|
232
|
-
print(f"Endpoint ID: {response['id']}. Endpoint available at https://www.
|
232
|
+
print(f"Endpoint ID: {response['id']}. Endpoint available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
233
233
|
|
234
234
|
return Endpoint.model_validate(response)
|
235
235
|
|
@@ -128,7 +128,7 @@ class Links(SyncAPIResource, LinksMixin):
|
|
128
128
|
)
|
129
129
|
response = self._client._prepared_request(request)
|
130
130
|
|
131
|
-
print(f"Extraction Link Created. Link available at https://www.
|
131
|
+
print(f"Extraction Link Created. Link available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
132
132
|
return Link.model_validate(response)
|
133
133
|
|
134
134
|
def list(
|
@@ -248,7 +248,7 @@ class AsyncLinks(AsyncAPIResource, LinksMixin):
|
|
248
248
|
password=password,
|
249
249
|
)
|
250
250
|
response = await self._client._prepared_request(request)
|
251
|
-
print(f"Extraction Link Created. Link available at https://www.
|
251
|
+
print(f"Extraction Link Created. Link available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
252
252
|
return Link.model_validate(response)
|
253
253
|
|
254
254
|
async def list(
|
@@ -147,7 +147,7 @@ class Logs(SyncAPIResource, LogsMixin):
|
|
147
147
|
request = self.prepare_rerun(processor_id, log_id)
|
148
148
|
response = self._client._prepared_request(request)
|
149
149
|
|
150
|
-
print(f"Webhook call run successfully. Log available at https://www.
|
150
|
+
print(f"Webhook call run successfully. Log available at https://www.retab.dev/dashboard/processors/{processor_id}/logs/{log_id}")
|
151
151
|
|
152
152
|
return ExternalRequestLog.model_validate(response)
|
153
153
|
|
@@ -217,6 +217,6 @@ class AsyncLogs(AsyncAPIResource, LogsMixin):
|
|
217
217
|
request = self.prepare_rerun(processor_id, log_id)
|
218
218
|
response = await self._client._prepared_request(request)
|
219
219
|
|
220
|
-
print(f"Webhook call run successfully. Log available at https://www.
|
220
|
+
print(f"Webhook call run successfully. Log available at https://www.retab.dev/dashboard/processors/{processor_id}/logs/{log_id}")
|
221
221
|
|
222
222
|
return ExternalRequestLog.model_validate(response)
|
@@ -144,7 +144,7 @@ class Mailboxes(SyncAPIResource, MailBoxesMixin):
|
|
144
144
|
)
|
145
145
|
response = self._client._prepared_request(request)
|
146
146
|
|
147
|
-
print(f"Email automation created. Mailbox available at https://www.
|
147
|
+
print(f"Email automation created. Mailbox available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
148
148
|
|
149
149
|
return Mailbox.model_validate(response)
|
150
150
|
|
@@ -278,7 +278,7 @@ class AsyncMailboxes(AsyncAPIResource, MailBoxesMixin):
|
|
278
278
|
)
|
279
279
|
response = await self._client._prepared_request(request)
|
280
280
|
|
281
|
-
print(f"Email automation created. Mailbox available at https://www.
|
281
|
+
print(f"Email automation created. Mailbox available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
282
282
|
|
283
283
|
return Mailbox.model_validate(response)
|
284
284
|
|