retab 0.0.36__py3-none-any.whl → 0.0.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab/__init__.py +4 -0
- {uiform → retab}/_resource.py +5 -5
- {uiform → retab}/_utils/ai_models.py +2 -2
- {uiform → retab}/_utils/benchmarking.py +15 -16
- {uiform → retab}/_utils/chat.py +29 -34
- {uiform → retab}/_utils/display.py +0 -3
- {uiform → retab}/_utils/json_schema.py +9 -14
- {uiform → retab}/_utils/mime.py +11 -14
- {uiform → retab}/_utils/responses.py +16 -10
- {uiform → retab}/_utils/stream_context_managers.py +1 -1
- {uiform → retab}/_utils/usage/usage.py +31 -31
- {uiform → retab}/client.py +54 -53
- {uiform → retab}/resources/consensus/client.py +19 -38
- {uiform → retab}/resources/consensus/completions.py +36 -59
- {uiform → retab}/resources/consensus/completions_stream.py +35 -47
- {uiform → retab}/resources/consensus/responses.py +37 -86
- {uiform → retab}/resources/consensus/responses_stream.py +41 -89
- retab/resources/documents/client.py +455 -0
- {uiform → retab}/resources/documents/extractions.py +192 -101
- {uiform → retab}/resources/evals.py +56 -43
- retab/resources/evaluations/__init__.py +3 -0
- retab/resources/evaluations/client.py +301 -0
- retab/resources/evaluations/documents.py +233 -0
- retab/resources/evaluations/iterations.py +452 -0
- {uiform → retab}/resources/files.py +2 -2
- {uiform → retab}/resources/jsonlUtils.py +225 -221
- retab/resources/models.py +73 -0
- retab/resources/processors/automations/client.py +244 -0
- {uiform → retab}/resources/processors/automations/endpoints.py +79 -120
- retab/resources/processors/automations/links.py +294 -0
- {uiform → retab}/resources/processors/automations/logs.py +30 -19
- retab/resources/processors/automations/mailboxes.py +397 -0
- retab/resources/processors/automations/outlook.py +337 -0
- {uiform → retab}/resources/processors/automations/tests.py +22 -25
- {uiform → retab}/resources/processors/client.py +181 -166
- {uiform → retab}/resources/schemas.py +78 -66
- {uiform → retab}/resources/secrets/external_api_keys.py +1 -5
- retab/resources/secrets/webhook.py +64 -0
- {uiform → retab}/resources/usage.py +41 -4
- {uiform → retab}/types/ai_models.py +17 -17
- {uiform → retab}/types/automations/cron.py +19 -12
- {uiform → retab}/types/automations/endpoints.py +7 -4
- {uiform → retab}/types/automations/links.py +7 -3
- {uiform → retab}/types/automations/mailboxes.py +10 -10
- {uiform → retab}/types/automations/outlook.py +15 -11
- {uiform → retab}/types/automations/webhooks.py +1 -1
- retab/types/browser_canvas.py +3 -0
- retab/types/chat.py +8 -0
- {uiform → retab}/types/completions.py +12 -15
- retab/types/consensus.py +19 -0
- {uiform → retab}/types/db/annotations.py +3 -3
- {uiform → retab}/types/db/files.py +8 -6
- {uiform → retab}/types/documents/create_messages.py +20 -22
- {uiform → retab}/types/documents/extractions.py +71 -26
- {uiform → retab}/types/evals.py +5 -5
- retab/types/evaluations/__init__.py +31 -0
- retab/types/evaluations/documents.py +30 -0
- retab/types/evaluations/iterations.py +112 -0
- retab/types/evaluations/model.py +73 -0
- retab/types/events.py +79 -0
- {uiform → retab}/types/extractions.py +36 -13
- retab/types/inference_settings.py +15 -0
- retab/types/jobs/base.py +54 -0
- retab/types/jobs/batch_annotation.py +12 -0
- {uiform → retab}/types/jobs/evaluation.py +1 -2
- {uiform → retab}/types/logs.py +37 -34
- retab/types/metrics.py +32 -0
- {uiform → retab}/types/mime.py +22 -20
- {uiform → retab}/types/modalities.py +10 -10
- retab/types/predictions.py +19 -0
- {uiform → retab}/types/schemas/enhance.py +4 -2
- {uiform → retab}/types/schemas/evaluate.py +7 -4
- {uiform → retab}/types/schemas/generate.py +6 -3
- {uiform → retab}/types/schemas/layout.py +1 -1
- {uiform → retab}/types/schemas/object.py +16 -17
- {uiform → retab}/types/schemas/templates.py +1 -3
- {uiform → retab}/types/secrets/external_api_keys.py +0 -1
- {uiform → retab}/types/standards.py +18 -1
- {retab-0.0.36.dist-info → retab-0.0.38.dist-info}/METADATA +78 -77
- retab-0.0.38.dist-info/RECORD +107 -0
- retab-0.0.38.dist-info/top_level.txt +1 -0
- retab-0.0.36.dist-info/RECORD +0 -96
- retab-0.0.36.dist-info/top_level.txt +0 -1
- uiform/__init__.py +0 -4
- uiform/_utils/benchmarking copy.py +0 -588
- uiform/resources/documents/client.py +0 -255
- uiform/resources/models.py +0 -45
- uiform/resources/processors/automations/client.py +0 -78
- uiform/resources/processors/automations/links.py +0 -356
- uiform/resources/processors/automations/mailboxes.py +0 -435
- uiform/resources/processors/automations/outlook.py +0 -444
- uiform/resources/secrets/webhook.py +0 -62
- uiform/types/chat.py +0 -8
- uiform/types/consensus.py +0 -10
- uiform/types/events.py +0 -76
- uiform/types/jobs/base.py +0 -150
- uiform/types/jobs/batch_annotation.py +0 -22
- {uiform → retab}/_utils/__init__.py +0 -0
- {uiform → retab}/_utils/usage/__init__.py +0 -0
- {uiform → retab}/py.typed +0 -0
- {uiform → retab}/resources/__init__.py +0 -0
- {uiform → retab}/resources/consensus/__init__.py +0 -0
- {uiform → retab}/resources/documents/__init__.py +0 -0
- {uiform → retab}/resources/finetuning.py +0 -0
- {uiform → retab}/resources/openai_example.py +0 -0
- {uiform → retab}/resources/processors/__init__.py +0 -0
- {uiform → retab}/resources/processors/automations/__init__.py +0 -0
- {uiform → retab}/resources/prompt_optimization.py +0 -0
- {uiform → retab}/resources/secrets/__init__.py +0 -0
- {uiform → retab}/resources/secrets/client.py +0 -0
- {uiform → retab}/types/__init__.py +0 -0
- {uiform → retab}/types/automations/__init__.py +0 -0
- {uiform → retab}/types/db/__init__.py +0 -0
- {uiform → retab}/types/documents/__init__.py +0 -0
- {uiform → retab}/types/documents/correct_orientation.py +0 -0
- {uiform → retab}/types/jobs/__init__.py +0 -0
- {uiform → retab}/types/jobs/finetune.py +0 -0
- {uiform → retab}/types/jobs/prompt_optimization.py +0 -0
- {uiform → retab}/types/jobs/webcrawl.py +0 -0
- {uiform → retab}/types/pagination.py +0 -0
- {uiform → retab}/types/schemas/__init__.py +0 -0
- {uiform → retab}/types/secrets/__init__.py +0 -0
- {retab-0.0.36.dist-info → retab-0.0.38.dist-info}/WHEEL +0 -0
@@ -2,7 +2,7 @@ import base64
|
|
2
2
|
import json
|
3
3
|
from io import IOBase
|
4
4
|
from pathlib import Path
|
5
|
-
from typing import Any, AsyncGenerator, Generator
|
5
|
+
from typing import Any, AsyncGenerator, Generator
|
6
6
|
|
7
7
|
from anthropic.types.message_param import MessageParam
|
8
8
|
from openai.types.chat import ChatCompletionMessageParam
|
@@ -10,6 +10,7 @@ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionRea
|
|
10
10
|
from openai.types.chat.parsed_chat_completion import ParsedChatCompletionMessage
|
11
11
|
from openai.types.responses.response import Response
|
12
12
|
from openai.types.responses.response_input_param import ResponseInputItemParam
|
13
|
+
from pydantic_core import PydanticUndefined
|
13
14
|
from pydantic import HttpUrl
|
14
15
|
|
15
16
|
from ..._resource import AsyncAPIResource, SyncAPIResource
|
@@ -17,8 +18,9 @@ from ..._utils.ai_models import assert_valid_model_extraction
|
|
17
18
|
from ..._utils.json_schema import filter_auxiliary_fields_json, load_json_schema, unflatten_dict
|
18
19
|
from ..._utils.mime import MIMEData, prepare_mime_document
|
19
20
|
from ..._utils.stream_context_managers import as_async_context_manager, as_context_manager
|
20
|
-
from ...types.chat import
|
21
|
+
from ...types.chat import ChatCompletionRetabMessage
|
21
22
|
from ...types.documents.extractions import DocumentExtractRequest, LogExtractionRequest, UiParsedChatCompletion, UiParsedChatCompletionChunk, UiParsedChoice
|
23
|
+
from ...types.browser_canvas import BrowserCanvas
|
22
24
|
from ...types.modalities import Modality
|
23
25
|
from ...types.schemas.object import Schema
|
24
26
|
from ...types.standards import PreparedRequest
|
@@ -31,7 +33,7 @@ def maybe_parse_to_pydantic(schema: Schema, response: UiParsedChatCompletion, al
|
|
31
33
|
response.choices[0].message.parsed = schema._partial_pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
|
32
34
|
else:
|
33
35
|
response.choices[0].message.parsed = schema.pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
|
34
|
-
except Exception
|
36
|
+
except Exception:
|
35
37
|
pass
|
36
38
|
return response
|
37
39
|
|
@@ -40,15 +42,16 @@ class BaseExtractionsMixin:
|
|
40
42
|
def prepare_extraction(
|
41
43
|
self,
|
42
44
|
json_schema: dict[str, Any] | Path | str,
|
43
|
-
document: Path | str | IOBase | HttpUrl | None,
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
45
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
46
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
47
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
48
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
49
|
+
model: str = PydanticUndefined, # type: ignore[assignment]
|
50
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
51
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
52
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
53
|
+
stream: bool = False,
|
54
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
52
55
|
store: bool = False,
|
53
56
|
idempotency_key: str | None = None,
|
54
57
|
) -> PreparedRequest:
|
@@ -56,26 +59,36 @@ class BaseExtractionsMixin:
|
|
56
59
|
|
57
60
|
json_schema = load_json_schema(json_schema)
|
58
61
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
if image_resolution_dpi:
|
71
|
-
data["image_resolution_dpi"] = image_resolution_dpi
|
72
|
-
if browser_canvas:
|
73
|
-
data["browser_canvas"] = browser_canvas
|
62
|
+
# Handle both single document and multiple documents
|
63
|
+
if document is not None and documents is not None:
|
64
|
+
raise ValueError("Cannot provide both 'document' and 'documents' parameters. Use either one.")
|
65
|
+
|
66
|
+
# Convert single document to documents list for consistency
|
67
|
+
if document is not None:
|
68
|
+
processed_documents = [prepare_mime_document(document)]
|
69
|
+
elif documents is not None:
|
70
|
+
processed_documents = [prepare_mime_document(doc) for doc in documents]
|
71
|
+
else:
|
72
|
+
raise ValueError("Must provide either 'document' or 'documents' parameter.")
|
74
73
|
|
75
74
|
# Validate DocumentAPIRequest data (raises exception if invalid)
|
76
|
-
|
75
|
+
request = DocumentExtractRequest(
|
76
|
+
json_schema=json_schema,
|
77
|
+
documents=processed_documents,
|
78
|
+
model=model,
|
79
|
+
temperature=temperature,
|
80
|
+
stream=stream,
|
81
|
+
modality=modality,
|
82
|
+
store=store,
|
83
|
+
reasoning_effort=reasoning_effort,
|
84
|
+
n_consensus=n_consensus,
|
85
|
+
image_resolution_dpi=image_resolution_dpi,
|
86
|
+
browser_canvas=browser_canvas,
|
87
|
+
)
|
77
88
|
|
78
|
-
return PreparedRequest(
|
89
|
+
return PreparedRequest(
|
90
|
+
method="POST", url="/v1/documents/extractions", data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), idempotency_key=idempotency_key
|
91
|
+
)
|
79
92
|
|
80
93
|
def prepare_log_extraction(
|
81
94
|
self,
|
@@ -84,8 +97,8 @@ class BaseExtractionsMixin:
|
|
84
97
|
model: str,
|
85
98
|
temperature: float,
|
86
99
|
completion: Any | None = None,
|
87
|
-
# The messages can be provided in different formats, we will convert them to the
|
88
|
-
messages: list[
|
100
|
+
# The messages can be provided in different formats, we will convert them to the Retab-compatible format
|
101
|
+
messages: list[ChatCompletionRetabMessage] | None = None,
|
89
102
|
openai_messages: list[ChatCompletionMessageParam] | None = None,
|
90
103
|
anthropic_messages: list[MessageParam] | None = None,
|
91
104
|
anthropic_system_prompt: str | None = None,
|
@@ -99,12 +112,14 @@ class BaseExtractionsMixin:
|
|
99
112
|
# url is a base64 encoded string with the mime type and the content. For the dummy one we will send a .txt file with the text "No document provided"
|
100
113
|
url="data:text/plain;base64," + base64.b64encode(b"No document provided").decode("utf-8"),
|
101
114
|
)
|
115
|
+
else:
|
116
|
+
mime_document = prepare_mime_document(document)
|
102
117
|
|
103
118
|
return PreparedRequest(
|
104
119
|
method="POST",
|
105
120
|
url="/v1/documents/log_extraction",
|
106
121
|
data=LogExtractionRequest(
|
107
|
-
document=
|
122
|
+
document=mime_document,
|
108
123
|
messages=messages,
|
109
124
|
openai_messages=openai_messages,
|
110
125
|
anthropic_messages=anthropic_messages,
|
@@ -115,7 +130,7 @@ class BaseExtractionsMixin:
|
|
115
130
|
json_schema=json_schema,
|
116
131
|
model=model,
|
117
132
|
temperature=temperature,
|
118
|
-
).model_dump(mode="json"
|
133
|
+
).model_dump(mode="json"),
|
119
134
|
raise_for_status=True,
|
120
135
|
)
|
121
136
|
|
@@ -127,40 +142,55 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
127
142
|
self,
|
128
143
|
json_schema: dict[str, Any] | Path | str,
|
129
144
|
model: str,
|
130
|
-
document: Path | str | IOBase | HttpUrl | None,
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
145
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
146
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
147
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
148
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
149
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
150
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
151
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
152
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
137
153
|
idempotency_key: str | None = None,
|
138
154
|
store: bool = False,
|
139
155
|
) -> UiParsedChatCompletion:
|
140
156
|
"""
|
141
|
-
Process
|
157
|
+
Process one or more documents using the Retab API.
|
142
158
|
|
143
159
|
Args:
|
144
160
|
json_schema: JSON schema defining the expected data structure
|
145
|
-
document: Single document (as MIMEData) to process
|
146
161
|
model: The AI model to use for processing
|
162
|
+
document: Single document to process (use either this or documents, not both)
|
163
|
+
documents: List of documents to process (use either this or document, not both)
|
164
|
+
image_resolution_dpi: Optional image resolution DPI
|
165
|
+
browser_canvas: Optional browser canvas size
|
147
166
|
temperature: Model temperature setting (0-1)
|
148
167
|
modality: Modality of the document (e.g., native)
|
149
168
|
reasoning_effort: The effort level for the model to reason about the input data.
|
150
169
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
151
170
|
idempotency_key: Idempotency key for request
|
152
|
-
store: Whether to store the document in the
|
171
|
+
store: Whether to store the document in the Retab database
|
153
172
|
Returns:
|
154
|
-
|
173
|
+
UiParsedChatCompletion: Parsed response from the API
|
155
174
|
Raises:
|
156
|
-
|
175
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
176
|
+
HTTPException: If the request fails
|
157
177
|
"""
|
158
178
|
|
159
|
-
assert document is not None, "Either document or messages must be provided"
|
160
|
-
|
161
179
|
# Validate DocumentAPIRequest data (raises exception if invalid)
|
162
180
|
request = self.prepare_extraction(
|
163
|
-
json_schema
|
181
|
+
json_schema=json_schema,
|
182
|
+
document=document,
|
183
|
+
documents=documents,
|
184
|
+
image_resolution_dpi=image_resolution_dpi,
|
185
|
+
browser_canvas=browser_canvas,
|
186
|
+
model=model,
|
187
|
+
temperature=temperature,
|
188
|
+
modality=modality,
|
189
|
+
reasoning_effort=reasoning_effort,
|
190
|
+
stream=False,
|
191
|
+
n_consensus=n_consensus,
|
192
|
+
store=store,
|
193
|
+
idempotency_key=idempotency_key,
|
164
194
|
)
|
165
195
|
response = self._client._prepared_request(request)
|
166
196
|
|
@@ -172,45 +202,66 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
172
202
|
self,
|
173
203
|
json_schema: dict[str, Any] | Path | str,
|
174
204
|
model: str,
|
175
|
-
document: Path | str | IOBase | HttpUrl | None,
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
205
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
206
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
207
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
208
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
209
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
210
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
211
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
212
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
182
213
|
idempotency_key: str | None = None,
|
183
214
|
store: bool = False,
|
184
215
|
) -> Generator[UiParsedChatCompletion, None, None]:
|
185
216
|
"""
|
186
|
-
Process
|
217
|
+
Process one or more documents using the Retab API with streaming enabled.
|
187
218
|
|
188
219
|
Args:
|
189
220
|
json_schema: JSON schema defining the expected data structure
|
190
|
-
|
221
|
+
model: The AI model to use for processing
|
222
|
+
document: Single document to process (use either this or documents, not both)
|
223
|
+
documents: List of documents to process (use either this or document, not both)
|
191
224
|
image_resolution_dpi: Optional image resolution DPI.
|
192
225
|
browser_canvas: Optional browser canvas size.
|
193
|
-
model: The AI model to use for processing
|
194
226
|
temperature: Model temperature setting (0-1)
|
195
227
|
modality: Modality of the document (e.g., native)
|
196
228
|
reasoning_effort: The effort level for the model to reason about the input data.
|
197
229
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
198
230
|
idempotency_key: Idempotency key for request
|
199
|
-
store: Whether to store the document in the
|
231
|
+
store: Whether to store the document in the Retab database
|
200
232
|
|
201
233
|
Returns:
|
202
|
-
Generator[
|
234
|
+
Generator[UiParsedChatCompletion]: Stream of parsed responses
|
203
235
|
Raises:
|
204
|
-
|
236
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
237
|
+
HTTPException: If the request fails
|
205
238
|
Usage:
|
206
239
|
```python
|
207
|
-
|
240
|
+
# Single document
|
241
|
+
with retab.documents.extractions.stream(json_schema, model, document=document) as stream:
|
242
|
+
for response in stream:
|
243
|
+
print(response)
|
244
|
+
|
245
|
+
# Multiple documents
|
246
|
+
with retab.documents.extractions.stream(json_schema, model, documents=[doc1, doc2]) as stream:
|
208
247
|
for response in stream:
|
209
248
|
print(response)
|
210
249
|
```
|
211
250
|
"""
|
212
251
|
request = self.prepare_extraction(
|
213
|
-
json_schema
|
252
|
+
json_schema=json_schema,
|
253
|
+
document=document,
|
254
|
+
documents=documents,
|
255
|
+
image_resolution_dpi=image_resolution_dpi,
|
256
|
+
browser_canvas=browser_canvas,
|
257
|
+
model=model,
|
258
|
+
temperature=temperature,
|
259
|
+
modality=modality,
|
260
|
+
reasoning_effort=reasoning_effort,
|
261
|
+
stream=True,
|
262
|
+
n_consensus=n_consensus,
|
263
|
+
store=store,
|
264
|
+
idempotency_key=idempotency_key,
|
214
265
|
)
|
215
266
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
216
267
|
|
@@ -260,8 +311,8 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
260
311
|
model: str,
|
261
312
|
temperature: float,
|
262
313
|
completion: Any | None = None,
|
263
|
-
# The messages can be provided in different formats, we will convert them to the
|
264
|
-
messages: list[
|
314
|
+
# The messages can be provided in different formats, we will convert them to the Retab-compatible format
|
315
|
+
messages: list[ChatCompletionRetabMessage] | None = None,
|
265
316
|
openai_messages: list[ChatCompletionMessageParam] | None = None,
|
266
317
|
anthropic_messages: list[MessageParam] | None = None,
|
267
318
|
anthropic_system_prompt: str | None = None,
|
@@ -270,10 +321,10 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
270
321
|
openai_responses_output: Response | None = None,
|
271
322
|
) -> None:
|
272
323
|
request = self.prepare_log_extraction(
|
273
|
-
document,
|
274
|
-
json_schema,
|
275
|
-
model,
|
276
|
-
temperature,
|
324
|
+
document=document,
|
325
|
+
json_schema=json_schema,
|
326
|
+
model=model,
|
327
|
+
temperature=temperature,
|
277
328
|
completion=completion,
|
278
329
|
messages=messages,
|
279
330
|
openai_messages=openai_messages,
|
@@ -292,36 +343,52 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
292
343
|
self,
|
293
344
|
json_schema: dict[str, Any] | Path | str,
|
294
345
|
model: str,
|
295
|
-
document: Path | str | IOBase | HttpUrl | None,
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
346
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
347
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
348
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
349
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
350
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
351
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
352
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
353
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
302
354
|
idempotency_key: str | None = None,
|
303
355
|
store: bool = False,
|
304
356
|
) -> UiParsedChatCompletion:
|
305
357
|
"""
|
306
|
-
Extract structured data from
|
358
|
+
Extract structured data from one or more documents asynchronously.
|
307
359
|
|
308
360
|
Args:
|
309
361
|
json_schema: JSON schema defining the expected data structure.
|
310
|
-
|
362
|
+
model: The AI model to use.
|
363
|
+
document: Single document to process (use either this or documents, not both)
|
364
|
+
documents: List of documents to process (use either this or document, not both)
|
311
365
|
image_resolution_dpi: Optional image resolution DPI.
|
312
366
|
browser_canvas: Optional browser canvas size.
|
313
|
-
model: The AI model to use.
|
314
367
|
temperature: Model temperature setting (0-1).
|
315
368
|
modality: Modality of the document (e.g., native).
|
316
369
|
reasoning_effort: The effort level for the model to reason about the input data.
|
317
370
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
318
371
|
idempotency_key: Idempotency key for request
|
319
|
-
store: Whether to store the document in the
|
372
|
+
store: Whether to store the document in the Retab database
|
320
373
|
Returns:
|
321
|
-
|
374
|
+
UiParsedChatCompletion: Parsed response from the API.
|
375
|
+
Raises:
|
376
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
322
377
|
"""
|
323
378
|
request = self.prepare_extraction(
|
324
|
-
json_schema
|
379
|
+
json_schema=json_schema,
|
380
|
+
document=document,
|
381
|
+
documents=documents,
|
382
|
+
image_resolution_dpi=image_resolution_dpi,
|
383
|
+
browser_canvas=browser_canvas,
|
384
|
+
model=model,
|
385
|
+
temperature=temperature,
|
386
|
+
modality=modality,
|
387
|
+
reasoning_effort=reasoning_effort,
|
388
|
+
stream=False,
|
389
|
+
n_consensus=n_consensus,
|
390
|
+
store=store,
|
391
|
+
idempotency_key=idempotency_key,
|
325
392
|
)
|
326
393
|
response = await self._client._prepared_request(request)
|
327
394
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
@@ -332,41 +399,65 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
332
399
|
self,
|
333
400
|
json_schema: dict[str, Any] | Path | str,
|
334
401
|
model: str,
|
335
|
-
document: Path | str | IOBase | HttpUrl | None,
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
402
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
403
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
404
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
405
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
406
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
407
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
408
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
409
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
342
410
|
idempotency_key: str | None = None,
|
343
411
|
store: bool = False,
|
344
412
|
) -> AsyncGenerator[UiParsedChatCompletion, None]:
|
345
413
|
"""
|
346
|
-
Extract structured data from
|
414
|
+
Extract structured data from one or more documents asynchronously with streaming.
|
347
415
|
|
348
416
|
Args:
|
349
417
|
json_schema: JSON schema defining the expected data structure.
|
350
|
-
document: Path, string, or file-like object representing the document.
|
351
418
|
model: The AI model to use.
|
419
|
+
document: Single document to process (use either this or documents, not both)
|
420
|
+
documents: List of documents to process (use either this or document, not both)
|
421
|
+
image_resolution_dpi: Optional image resolution DPI.
|
422
|
+
browser_canvas: Optional browser canvas size.
|
352
423
|
temperature: Model temperature setting (0-1).
|
353
424
|
modality: Modality of the document (e.g., native).
|
354
425
|
reasoning_effort: The effort level for the model to reason about the input data.
|
355
426
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
356
427
|
idempotency_key: Idempotency key for request
|
357
|
-
store: Whether to store the document in the
|
428
|
+
store: Whether to store the document in the Retab database
|
358
429
|
Returns:
|
359
|
-
AsyncGenerator[
|
430
|
+
AsyncGenerator[UiParsedChatCompletion, None]: Stream of parsed responses.
|
431
|
+
Raises:
|
432
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
360
433
|
|
361
434
|
Usage:
|
362
435
|
```python
|
363
|
-
|
436
|
+
# Single document
|
437
|
+
async with retab.documents.extractions.stream(json_schema, model, document=document) as stream:
|
438
|
+
async for response in stream:
|
439
|
+
print(response)
|
440
|
+
|
441
|
+
# Multiple documents
|
442
|
+
async with retab.documents.extractions.stream(json_schema, model, documents=[doc1, doc2]) as stream:
|
364
443
|
async for response in stream:
|
365
444
|
print(response)
|
366
445
|
```
|
367
446
|
"""
|
368
447
|
request = self.prepare_extraction(
|
369
|
-
json_schema
|
448
|
+
json_schema=json_schema,
|
449
|
+
document=document,
|
450
|
+
documents=documents,
|
451
|
+
image_resolution_dpi=image_resolution_dpi,
|
452
|
+
browser_canvas=browser_canvas,
|
453
|
+
model=model,
|
454
|
+
temperature=temperature,
|
455
|
+
modality=modality,
|
456
|
+
reasoning_effort=reasoning_effort,
|
457
|
+
stream=True,
|
458
|
+
n_consensus=n_consensus,
|
459
|
+
store=store,
|
460
|
+
idempotency_key=idempotency_key,
|
370
461
|
)
|
371
462
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
372
463
|
ui_parsed_chat_completion_cum_chunk: UiParsedChatCompletionChunk | None = None
|
@@ -416,8 +507,8 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
416
507
|
model: str,
|
417
508
|
temperature: float,
|
418
509
|
completion: Any | None = None,
|
419
|
-
# The messages can be provided in different formats, we will convert them to the
|
420
|
-
messages: list[
|
510
|
+
# The messages can be provided in different formats, we will convert them to the Retab-compatible format
|
511
|
+
messages: list[ChatCompletionRetabMessage] | None = None,
|
421
512
|
openai_messages: list[ChatCompletionMessageParam] | None = None,
|
422
513
|
anthropic_messages: list[MessageParam] | None = None,
|
423
514
|
anthropic_system_prompt: str | None = None,
|
@@ -426,10 +517,10 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
426
517
|
openai_responses_output: Response | None = None,
|
427
518
|
) -> None:
|
428
519
|
request = self.prepare_log_extraction(
|
429
|
-
document,
|
430
|
-
json_schema,
|
431
|
-
model,
|
432
|
-
temperature,
|
520
|
+
document=document,
|
521
|
+
json_schema=json_schema,
|
522
|
+
model=model,
|
523
|
+
temperature=temperature,
|
433
524
|
completion=completion,
|
434
525
|
messages=messages,
|
435
526
|
openai_messages=openai_messages,
|