retab 0.0.35__py3-none-any.whl → 0.0.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {uiform → retab}/_utils/ai_models.py +2 -2
- {uiform → retab}/_utils/benchmarking.py +15 -16
- {uiform → retab}/_utils/chat.py +9 -14
- {uiform → retab}/_utils/display.py +0 -3
- {uiform → retab}/_utils/json_schema.py +9 -14
- {uiform → retab}/_utils/mime.py +11 -14
- {uiform → retab}/_utils/responses.py +9 -3
- {uiform → retab}/_utils/stream_context_managers.py +1 -1
- {uiform → retab}/_utils/usage/usage.py +28 -28
- {uiform → retab}/client.py +32 -31
- {uiform → retab}/resources/consensus/client.py +17 -36
- {uiform → retab}/resources/consensus/completions.py +24 -47
- {uiform → retab}/resources/consensus/completions_stream.py +26 -38
- {uiform → retab}/resources/consensus/responses.py +31 -80
- {uiform → retab}/resources/consensus/responses_stream.py +31 -79
- {uiform → retab}/resources/documents/client.py +59 -45
- {uiform → retab}/resources/documents/extractions.py +181 -90
- {uiform → retab}/resources/evals.py +56 -43
- retab/resources/evaluations/__init__.py +3 -0
- retab/resources/evaluations/client.py +301 -0
- retab/resources/evaluations/documents.py +233 -0
- retab/resources/evaluations/iterations.py +452 -0
- {uiform → retab}/resources/files.py +2 -2
- {uiform → retab}/resources/jsonlUtils.py +220 -216
- retab/resources/models.py +73 -0
- retab/resources/processors/automations/client.py +244 -0
- {uiform → retab}/resources/processors/automations/endpoints.py +77 -118
- retab/resources/processors/automations/links.py +294 -0
- {uiform → retab}/resources/processors/automations/logs.py +30 -19
- {uiform → retab}/resources/processors/automations/mailboxes.py +136 -174
- retab/resources/processors/automations/outlook.py +337 -0
- {uiform → retab}/resources/processors/automations/tests.py +22 -25
- {uiform → retab}/resources/processors/client.py +179 -164
- {uiform → retab}/resources/schemas.py +78 -66
- {uiform → retab}/resources/secrets/external_api_keys.py +1 -5
- retab/resources/secrets/webhook.py +64 -0
- {uiform → retab}/resources/usage.py +39 -2
- {uiform → retab}/types/ai_models.py +13 -13
- {uiform → retab}/types/automations/cron.py +19 -12
- {uiform → retab}/types/automations/endpoints.py +7 -4
- {uiform → retab}/types/automations/links.py +7 -3
- {uiform → retab}/types/automations/mailboxes.py +9 -9
- {uiform → retab}/types/automations/outlook.py +15 -11
- retab/types/browser_canvas.py +3 -0
- {uiform → retab}/types/chat.py +2 -2
- {uiform → retab}/types/completions.py +9 -12
- retab/types/consensus.py +19 -0
- {uiform → retab}/types/db/annotations.py +3 -3
- {uiform → retab}/types/db/files.py +8 -6
- {uiform → retab}/types/documents/create_messages.py +18 -20
- {uiform → retab}/types/documents/extractions.py +69 -24
- {uiform → retab}/types/evals.py +5 -5
- retab/types/evaluations/__init__.py +31 -0
- retab/types/evaluations/documents.py +30 -0
- retab/types/evaluations/iterations.py +112 -0
- retab/types/evaluations/model.py +73 -0
- retab/types/events.py +79 -0
- {uiform → retab}/types/extractions.py +33 -10
- retab/types/inference_settings.py +15 -0
- retab/types/jobs/base.py +54 -0
- retab/types/jobs/batch_annotation.py +12 -0
- {uiform → retab}/types/jobs/evaluation.py +1 -2
- {uiform → retab}/types/logs.py +37 -34
- retab/types/metrics.py +32 -0
- {uiform → retab}/types/mime.py +22 -20
- {uiform → retab}/types/modalities.py +10 -10
- retab/types/predictions.py +19 -0
- {uiform → retab}/types/schemas/enhance.py +4 -2
- {uiform → retab}/types/schemas/evaluate.py +7 -4
- {uiform → retab}/types/schemas/generate.py +6 -3
- {uiform → retab}/types/schemas/layout.py +1 -1
- {uiform → retab}/types/schemas/object.py +13 -14
- {uiform → retab}/types/schemas/templates.py +1 -3
- {uiform → retab}/types/secrets/external_api_keys.py +0 -1
- {uiform → retab}/types/standards.py +18 -1
- {retab-0.0.35.dist-info → retab-0.0.37.dist-info}/METADATA +7 -6
- retab-0.0.37.dist-info/RECORD +107 -0
- retab-0.0.37.dist-info/top_level.txt +1 -0
- retab-0.0.35.dist-info/RECORD +0 -111
- retab-0.0.35.dist-info/top_level.txt +0 -1
- uiform/_utils/benchmarking copy.py +0 -588
- uiform/resources/deployments/__init__.py +0 -9
- uiform/resources/deployments/client.py +0 -78
- uiform/resources/deployments/endpoints.py +0 -322
- uiform/resources/deployments/links.py +0 -452
- uiform/resources/deployments/logs.py +0 -211
- uiform/resources/deployments/mailboxes.py +0 -496
- uiform/resources/deployments/outlook.py +0 -531
- uiform/resources/deployments/tests.py +0 -158
- uiform/resources/models.py +0 -45
- uiform/resources/processors/automations/client.py +0 -78
- uiform/resources/processors/automations/links.py +0 -356
- uiform/resources/processors/automations/outlook.py +0 -444
- uiform/resources/secrets/webhook.py +0 -62
- uiform/types/consensus.py +0 -10
- uiform/types/deployments/cron.py +0 -59
- uiform/types/deployments/endpoints.py +0 -28
- uiform/types/deployments/links.py +0 -36
- uiform/types/deployments/mailboxes.py +0 -67
- uiform/types/deployments/outlook.py +0 -76
- uiform/types/deployments/webhooks.py +0 -21
- uiform/types/events.py +0 -76
- uiform/types/jobs/base.py +0 -150
- uiform/types/jobs/batch_annotation.py +0 -22
- uiform/types/secrets/__init__.py +0 -0
- {uiform → retab}/__init__.py +0 -0
- {uiform → retab}/_resource.py +0 -0
- {uiform → retab}/_utils/__init__.py +0 -0
- {uiform → retab}/_utils/usage/__init__.py +0 -0
- {uiform → retab}/py.typed +0 -0
- {uiform → retab}/resources/__init__.py +0 -0
- {uiform → retab}/resources/consensus/__init__.py +0 -0
- {uiform → retab}/resources/documents/__init__.py +0 -0
- {uiform → retab}/resources/finetuning.py +0 -0
- {uiform → retab}/resources/openai_example.py +0 -0
- {uiform → retab}/resources/processors/__init__.py +0 -0
- {uiform → retab}/resources/processors/automations/__init__.py +0 -0
- {uiform → retab}/resources/prompt_optimization.py +0 -0
- {uiform → retab}/resources/secrets/__init__.py +0 -0
- {uiform → retab}/resources/secrets/client.py +0 -0
- {uiform → retab}/types/__init__.py +0 -0
- {uiform → retab}/types/automations/__init__.py +0 -0
- {uiform → retab}/types/automations/webhooks.py +0 -0
- {uiform → retab}/types/db/__init__.py +0 -0
- {uiform/types/deployments → retab/types/documents}/__init__.py +0 -0
- {uiform → retab}/types/documents/correct_orientation.py +0 -0
- {uiform/types/documents → retab/types/jobs}/__init__.py +0 -0
- {uiform → retab}/types/jobs/finetune.py +0 -0
- {uiform → retab}/types/jobs/prompt_optimization.py +0 -0
- {uiform → retab}/types/jobs/webcrawl.py +0 -0
- {uiform → retab}/types/pagination.py +0 -0
- {uiform/types/jobs → retab/types/schemas}/__init__.py +0 -0
- {uiform/types/schemas → retab/types/secrets}/__init__.py +0 -0
- {retab-0.0.35.dist-info → retab-0.0.37.dist-info}/WHEEL +0 -0
@@ -2,7 +2,7 @@ import base64
|
|
2
2
|
import json
|
3
3
|
from io import IOBase
|
4
4
|
from pathlib import Path
|
5
|
-
from typing import Any, AsyncGenerator, Generator
|
5
|
+
from typing import Any, AsyncGenerator, Generator
|
6
6
|
|
7
7
|
from anthropic.types.message_param import MessageParam
|
8
8
|
from openai.types.chat import ChatCompletionMessageParam
|
@@ -10,6 +10,7 @@ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionRea
|
|
10
10
|
from openai.types.chat.parsed_chat_completion import ParsedChatCompletionMessage
|
11
11
|
from openai.types.responses.response import Response
|
12
12
|
from openai.types.responses.response_input_param import ResponseInputItemParam
|
13
|
+
from pydantic_core import PydanticUndefined
|
13
14
|
from pydantic import HttpUrl
|
14
15
|
|
15
16
|
from ..._resource import AsyncAPIResource, SyncAPIResource
|
@@ -19,6 +20,7 @@ from ..._utils.mime import MIMEData, prepare_mime_document
|
|
19
20
|
from ..._utils.stream_context_managers import as_async_context_manager, as_context_manager
|
20
21
|
from ...types.chat import ChatCompletionUiformMessage
|
21
22
|
from ...types.documents.extractions import DocumentExtractRequest, LogExtractionRequest, UiParsedChatCompletion, UiParsedChatCompletionChunk, UiParsedChoice
|
23
|
+
from ...types.browser_canvas import BrowserCanvas
|
22
24
|
from ...types.modalities import Modality
|
23
25
|
from ...types.schemas.object import Schema
|
24
26
|
from ...types.standards import PreparedRequest
|
@@ -31,7 +33,7 @@ def maybe_parse_to_pydantic(schema: Schema, response: UiParsedChatCompletion, al
|
|
31
33
|
response.choices[0].message.parsed = schema._partial_pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
|
32
34
|
else:
|
33
35
|
response.choices[0].message.parsed = schema.pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
|
34
|
-
except Exception
|
36
|
+
except Exception:
|
35
37
|
pass
|
36
38
|
return response
|
37
39
|
|
@@ -40,15 +42,16 @@ class BaseExtractionsMixin:
|
|
40
42
|
def prepare_extraction(
|
41
43
|
self,
|
42
44
|
json_schema: dict[str, Any] | Path | str,
|
43
|
-
document: Path | str | IOBase | HttpUrl | None,
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
45
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
46
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
47
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
48
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
49
|
+
model: str = PydanticUndefined, # type: ignore[assignment]
|
50
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
51
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
52
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
53
|
+
stream: bool = False,
|
54
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
52
55
|
store: bool = False,
|
53
56
|
idempotency_key: str | None = None,
|
54
57
|
) -> PreparedRequest:
|
@@ -56,26 +59,36 @@ class BaseExtractionsMixin:
|
|
56
59
|
|
57
60
|
json_schema = load_json_schema(json_schema)
|
58
61
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
if image_resolution_dpi:
|
71
|
-
data["image_resolution_dpi"] = image_resolution_dpi
|
72
|
-
if browser_canvas:
|
73
|
-
data["browser_canvas"] = browser_canvas
|
62
|
+
# Handle both single document and multiple documents
|
63
|
+
if document is not None and documents is not None:
|
64
|
+
raise ValueError("Cannot provide both 'document' and 'documents' parameters. Use either one.")
|
65
|
+
|
66
|
+
# Convert single document to documents list for consistency
|
67
|
+
if document is not None:
|
68
|
+
processed_documents = [prepare_mime_document(document)]
|
69
|
+
elif documents is not None:
|
70
|
+
processed_documents = [prepare_mime_document(doc) for doc in documents]
|
71
|
+
else:
|
72
|
+
raise ValueError("Must provide either 'document' or 'documents' parameter.")
|
74
73
|
|
75
74
|
# Validate DocumentAPIRequest data (raises exception if invalid)
|
76
|
-
|
75
|
+
request = DocumentExtractRequest(
|
76
|
+
json_schema=json_schema,
|
77
|
+
documents=processed_documents,
|
78
|
+
model=model,
|
79
|
+
temperature=temperature,
|
80
|
+
stream=stream,
|
81
|
+
modality=modality,
|
82
|
+
store=store,
|
83
|
+
reasoning_effort=reasoning_effort,
|
84
|
+
n_consensus=n_consensus,
|
85
|
+
image_resolution_dpi=image_resolution_dpi,
|
86
|
+
browser_canvas=browser_canvas,
|
87
|
+
)
|
77
88
|
|
78
|
-
return PreparedRequest(
|
89
|
+
return PreparedRequest(
|
90
|
+
method="POST", url="/v1/documents/extractions", data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), idempotency_key=idempotency_key
|
91
|
+
)
|
79
92
|
|
80
93
|
def prepare_log_extraction(
|
81
94
|
self,
|
@@ -99,12 +112,14 @@ class BaseExtractionsMixin:
|
|
99
112
|
# url is a base64 encoded string with the mime type and the content. For the dummy one we will send a .txt file with the text "No document provided"
|
100
113
|
url="data:text/plain;base64," + base64.b64encode(b"No document provided").decode("utf-8"),
|
101
114
|
)
|
115
|
+
else:
|
116
|
+
mime_document = prepare_mime_document(document)
|
102
117
|
|
103
118
|
return PreparedRequest(
|
104
119
|
method="POST",
|
105
120
|
url="/v1/documents/log_extraction",
|
106
121
|
data=LogExtractionRequest(
|
107
|
-
document=
|
122
|
+
document=mime_document,
|
108
123
|
messages=messages,
|
109
124
|
openai_messages=openai_messages,
|
110
125
|
anthropic_messages=anthropic_messages,
|
@@ -115,7 +130,7 @@ class BaseExtractionsMixin:
|
|
115
130
|
json_schema=json_schema,
|
116
131
|
model=model,
|
117
132
|
temperature=temperature,
|
118
|
-
).model_dump(mode="json"
|
133
|
+
).model_dump(mode="json"),
|
119
134
|
raise_for_status=True,
|
120
135
|
)
|
121
136
|
|
@@ -127,23 +142,27 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
127
142
|
self,
|
128
143
|
json_schema: dict[str, Any] | Path | str,
|
129
144
|
model: str,
|
130
|
-
document: Path | str | IOBase | HttpUrl | None,
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
145
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
146
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
147
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
148
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
149
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
150
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
151
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
152
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
137
153
|
idempotency_key: str | None = None,
|
138
154
|
store: bool = False,
|
139
155
|
) -> UiParsedChatCompletion:
|
140
156
|
"""
|
141
|
-
Process
|
157
|
+
Process one or more documents using the UiForm API.
|
142
158
|
|
143
159
|
Args:
|
144
160
|
json_schema: JSON schema defining the expected data structure
|
145
|
-
document: Single document (as MIMEData) to process
|
146
161
|
model: The AI model to use for processing
|
162
|
+
document: Single document to process (use either this or documents, not both)
|
163
|
+
documents: List of documents to process (use either this or document, not both)
|
164
|
+
image_resolution_dpi: Optional image resolution DPI
|
165
|
+
browser_canvas: Optional browser canvas size
|
147
166
|
temperature: Model temperature setting (0-1)
|
148
167
|
modality: Modality of the document (e.g., native)
|
149
168
|
reasoning_effort: The effort level for the model to reason about the input data.
|
@@ -151,16 +170,27 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
151
170
|
idempotency_key: Idempotency key for request
|
152
171
|
store: Whether to store the document in the UiForm database
|
153
172
|
Returns:
|
154
|
-
|
173
|
+
UiParsedChatCompletion: Parsed response from the API
|
155
174
|
Raises:
|
156
|
-
|
175
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
176
|
+
HTTPException: If the request fails
|
157
177
|
"""
|
158
178
|
|
159
|
-
assert document is not None, "Either document or messages must be provided"
|
160
|
-
|
161
179
|
# Validate DocumentAPIRequest data (raises exception if invalid)
|
162
180
|
request = self.prepare_extraction(
|
163
|
-
json_schema
|
181
|
+
json_schema=json_schema,
|
182
|
+
document=document,
|
183
|
+
documents=documents,
|
184
|
+
image_resolution_dpi=image_resolution_dpi,
|
185
|
+
browser_canvas=browser_canvas,
|
186
|
+
model=model,
|
187
|
+
temperature=temperature,
|
188
|
+
modality=modality,
|
189
|
+
reasoning_effort=reasoning_effort,
|
190
|
+
stream=False,
|
191
|
+
n_consensus=n_consensus,
|
192
|
+
store=store,
|
193
|
+
idempotency_key=idempotency_key,
|
164
194
|
)
|
165
195
|
response = self._client._prepared_request(request)
|
166
196
|
|
@@ -172,25 +202,27 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
172
202
|
self,
|
173
203
|
json_schema: dict[str, Any] | Path | str,
|
174
204
|
model: str,
|
175
|
-
document: Path | str | IOBase | HttpUrl | None,
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
205
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
206
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
207
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
208
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
209
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
210
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
211
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
212
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
182
213
|
idempotency_key: str | None = None,
|
183
214
|
store: bool = False,
|
184
215
|
) -> Generator[UiParsedChatCompletion, None, None]:
|
185
216
|
"""
|
186
|
-
Process
|
217
|
+
Process one or more documents using the UiForm API with streaming enabled.
|
187
218
|
|
188
219
|
Args:
|
189
220
|
json_schema: JSON schema defining the expected data structure
|
190
|
-
|
221
|
+
model: The AI model to use for processing
|
222
|
+
document: Single document to process (use either this or documents, not both)
|
223
|
+
documents: List of documents to process (use either this or document, not both)
|
191
224
|
image_resolution_dpi: Optional image resolution DPI.
|
192
225
|
browser_canvas: Optional browser canvas size.
|
193
|
-
model: The AI model to use for processing
|
194
226
|
temperature: Model temperature setting (0-1)
|
195
227
|
modality: Modality of the document (e.g., native)
|
196
228
|
reasoning_effort: The effort level for the model to reason about the input data.
|
@@ -199,18 +231,37 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
199
231
|
store: Whether to store the document in the UiForm database
|
200
232
|
|
201
233
|
Returns:
|
202
|
-
Generator[
|
234
|
+
Generator[UiParsedChatCompletion]: Stream of parsed responses
|
203
235
|
Raises:
|
204
|
-
|
236
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
237
|
+
HTTPException: If the request fails
|
205
238
|
Usage:
|
206
239
|
```python
|
207
|
-
|
240
|
+
# Single document
|
241
|
+
with uiform.documents.extractions.stream(json_schema, model, document=document) as stream:
|
242
|
+
for response in stream:
|
243
|
+
print(response)
|
244
|
+
|
245
|
+
# Multiple documents
|
246
|
+
with uiform.documents.extractions.stream(json_schema, model, documents=[doc1, doc2]) as stream:
|
208
247
|
for response in stream:
|
209
248
|
print(response)
|
210
249
|
```
|
211
250
|
"""
|
212
251
|
request = self.prepare_extraction(
|
213
|
-
json_schema
|
252
|
+
json_schema=json_schema,
|
253
|
+
document=document,
|
254
|
+
documents=documents,
|
255
|
+
image_resolution_dpi=image_resolution_dpi,
|
256
|
+
browser_canvas=browser_canvas,
|
257
|
+
model=model,
|
258
|
+
temperature=temperature,
|
259
|
+
modality=modality,
|
260
|
+
reasoning_effort=reasoning_effort,
|
261
|
+
stream=True,
|
262
|
+
n_consensus=n_consensus,
|
263
|
+
store=store,
|
264
|
+
idempotency_key=idempotency_key,
|
214
265
|
)
|
215
266
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
216
267
|
|
@@ -270,10 +321,10 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
270
321
|
openai_responses_output: Response | None = None,
|
271
322
|
) -> None:
|
272
323
|
request = self.prepare_log_extraction(
|
273
|
-
document,
|
274
|
-
json_schema,
|
275
|
-
model,
|
276
|
-
temperature,
|
324
|
+
document=document,
|
325
|
+
json_schema=json_schema,
|
326
|
+
model=model,
|
327
|
+
temperature=temperature,
|
277
328
|
completion=completion,
|
278
329
|
messages=messages,
|
279
330
|
openai_messages=openai_messages,
|
@@ -292,25 +343,27 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
292
343
|
self,
|
293
344
|
json_schema: dict[str, Any] | Path | str,
|
294
345
|
model: str,
|
295
|
-
document: Path | str | IOBase | HttpUrl | None,
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
346
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
347
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
348
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
349
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
350
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
351
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
352
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
353
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
302
354
|
idempotency_key: str | None = None,
|
303
355
|
store: bool = False,
|
304
356
|
) -> UiParsedChatCompletion:
|
305
357
|
"""
|
306
|
-
Extract structured data from
|
358
|
+
Extract structured data from one or more documents asynchronously.
|
307
359
|
|
308
360
|
Args:
|
309
361
|
json_schema: JSON schema defining the expected data structure.
|
310
|
-
|
362
|
+
model: The AI model to use.
|
363
|
+
document: Single document to process (use either this or documents, not both)
|
364
|
+
documents: List of documents to process (use either this or document, not both)
|
311
365
|
image_resolution_dpi: Optional image resolution DPI.
|
312
366
|
browser_canvas: Optional browser canvas size.
|
313
|
-
model: The AI model to use.
|
314
367
|
temperature: Model temperature setting (0-1).
|
315
368
|
modality: Modality of the document (e.g., native).
|
316
369
|
reasoning_effort: The effort level for the model to reason about the input data.
|
@@ -318,10 +371,24 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
318
371
|
idempotency_key: Idempotency key for request
|
319
372
|
store: Whether to store the document in the UiForm database
|
320
373
|
Returns:
|
321
|
-
|
374
|
+
UiParsedChatCompletion: Parsed response from the API.
|
375
|
+
Raises:
|
376
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
322
377
|
"""
|
323
378
|
request = self.prepare_extraction(
|
324
|
-
json_schema
|
379
|
+
json_schema=json_schema,
|
380
|
+
document=document,
|
381
|
+
documents=documents,
|
382
|
+
image_resolution_dpi=image_resolution_dpi,
|
383
|
+
browser_canvas=browser_canvas,
|
384
|
+
model=model,
|
385
|
+
temperature=temperature,
|
386
|
+
modality=modality,
|
387
|
+
reasoning_effort=reasoning_effort,
|
388
|
+
stream=False,
|
389
|
+
n_consensus=n_consensus,
|
390
|
+
store=store,
|
391
|
+
idempotency_key=idempotency_key,
|
325
392
|
)
|
326
393
|
response = await self._client._prepared_request(request)
|
327
394
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
@@ -332,23 +399,27 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
332
399
|
self,
|
333
400
|
json_schema: dict[str, Any] | Path | str,
|
334
401
|
model: str,
|
335
|
-
document: Path | str | IOBase | HttpUrl | None,
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
402
|
+
document: Path | str | IOBase | HttpUrl | None = None,
|
403
|
+
documents: list[Path | str | IOBase | HttpUrl] | None = None,
|
404
|
+
image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
|
405
|
+
browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
|
406
|
+
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
407
|
+
modality: Modality = PydanticUndefined, # type: ignore[assignment]
|
408
|
+
reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
|
409
|
+
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
342
410
|
idempotency_key: str | None = None,
|
343
411
|
store: bool = False,
|
344
412
|
) -> AsyncGenerator[UiParsedChatCompletion, None]:
|
345
413
|
"""
|
346
|
-
Extract structured data from
|
414
|
+
Extract structured data from one or more documents asynchronously with streaming.
|
347
415
|
|
348
416
|
Args:
|
349
417
|
json_schema: JSON schema defining the expected data structure.
|
350
|
-
document: Path, string, or file-like object representing the document.
|
351
418
|
model: The AI model to use.
|
419
|
+
document: Single document to process (use either this or documents, not both)
|
420
|
+
documents: List of documents to process (use either this or document, not both)
|
421
|
+
image_resolution_dpi: Optional image resolution DPI.
|
422
|
+
browser_canvas: Optional browser canvas size.
|
352
423
|
temperature: Model temperature setting (0-1).
|
353
424
|
modality: Modality of the document (e.g., native).
|
354
425
|
reasoning_effort: The effort level for the model to reason about the input data.
|
@@ -356,17 +427,37 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
356
427
|
idempotency_key: Idempotency key for request
|
357
428
|
store: Whether to store the document in the UiForm database
|
358
429
|
Returns:
|
359
|
-
AsyncGenerator[
|
430
|
+
AsyncGenerator[UiParsedChatCompletion, None]: Stream of parsed responses.
|
431
|
+
Raises:
|
432
|
+
ValueError: If neither document nor documents is provided, or if both are provided
|
360
433
|
|
361
434
|
Usage:
|
362
435
|
```python
|
363
|
-
|
436
|
+
# Single document
|
437
|
+
async with uiform.documents.extractions.stream(json_schema, model, document=document) as stream:
|
438
|
+
async for response in stream:
|
439
|
+
print(response)
|
440
|
+
|
441
|
+
# Multiple documents
|
442
|
+
async with uiform.documents.extractions.stream(json_schema, model, documents=[doc1, doc2]) as stream:
|
364
443
|
async for response in stream:
|
365
444
|
print(response)
|
366
445
|
```
|
367
446
|
"""
|
368
447
|
request = self.prepare_extraction(
|
369
|
-
json_schema
|
448
|
+
json_schema=json_schema,
|
449
|
+
document=document,
|
450
|
+
documents=documents,
|
451
|
+
image_resolution_dpi=image_resolution_dpi,
|
452
|
+
browser_canvas=browser_canvas,
|
453
|
+
model=model,
|
454
|
+
temperature=temperature,
|
455
|
+
modality=modality,
|
456
|
+
reasoning_effort=reasoning_effort,
|
457
|
+
stream=True,
|
458
|
+
n_consensus=n_consensus,
|
459
|
+
store=store,
|
460
|
+
idempotency_key=idempotency_key,
|
370
461
|
)
|
371
462
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
372
463
|
ui_parsed_chat_completion_cum_chunk: UiParsedChatCompletionChunk | None = None
|
@@ -426,10 +517,10 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
426
517
|
openai_responses_output: Response | None = None,
|
427
518
|
) -> None:
|
428
519
|
request = self.prepare_log_extraction(
|
429
|
-
document,
|
430
|
-
json_schema,
|
431
|
-
model,
|
432
|
-
temperature,
|
520
|
+
document=document,
|
521
|
+
json_schema=json_schema,
|
522
|
+
model=model,
|
523
|
+
temperature=temperature,
|
433
524
|
completion=completion,
|
434
525
|
messages=messages,
|
435
526
|
openai_messages=openai_messages,
|
@@ -1,32 +1,28 @@
|
|
1
|
-
from typing import Any, Dict, List, Optional, TypedDict, Union, Literal
|
2
1
|
from io import IOBase
|
3
2
|
from pathlib import Path
|
3
|
+
from typing import Any, Dict, List, Optional, TypedDict, Union
|
4
4
|
|
5
5
|
import PIL.Image
|
6
|
+
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
6
7
|
from pydantic import HttpUrl
|
7
8
|
|
8
9
|
from .._resource import AsyncAPIResource, SyncAPIResource
|
9
|
-
from ..
|
10
|
+
from .._utils.mime import prepare_mime_document
|
10
11
|
from ..types.evals import (
|
12
|
+
CreateIterationRequest,
|
13
|
+
DistancesResult,
|
14
|
+
DocumentItem,
|
11
15
|
Evaluation,
|
12
16
|
EvaluationDocument,
|
13
17
|
Iteration,
|
14
|
-
DistancesResult,
|
15
|
-
PredictionData,
|
16
|
-
AddIterationFromJsonlRequest,
|
17
|
-
DocumentItem,
|
18
18
|
UpdateEvaluationDocumentRequest,
|
19
|
-
|
20
|
-
CreateIterationRequest,
|
19
|
+
UpdateEvaluationRequest,
|
21
20
|
)
|
22
|
-
from ..types.
|
23
|
-
|
21
|
+
from ..types.inference_settings import InferenceSettings
|
24
22
|
from ..types.mime import MIMEData
|
25
|
-
from .._utils.mime import prepare_mime_document
|
26
23
|
from ..types.modalities import Modality
|
27
|
-
from
|
28
|
-
|
29
|
-
from tqdm import tqdm
|
24
|
+
from ..types.browser_canvas import BrowserCanvas
|
25
|
+
from ..types.standards import PreparedRequest
|
30
26
|
|
31
27
|
|
32
28
|
class DeleteResponse(TypedDict):
|
@@ -82,21 +78,17 @@ class EvalsMixin:
|
|
82
78
|
Only the provided fields will be updated. Fields set to None will be excluded from the update.
|
83
79
|
"""
|
84
80
|
# Build a dictionary with only the provided fields
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
if default_inference_settings is not None:
|
97
|
-
update_data["default_inference_settings"] = default_inference_settings.model_dump(exclude_none=True, mode="json")
|
98
|
-
|
99
|
-
return PreparedRequest(method="PATCH", url=f"/v1/evals/{evaluation_id}", data=update_data)
|
81
|
+
|
82
|
+
update_request = UpdateEvaluationRequest(
|
83
|
+
name=name,
|
84
|
+
project_id=project_id,
|
85
|
+
json_schema=json_schema,
|
86
|
+
documents=documents,
|
87
|
+
iterations=iterations,
|
88
|
+
default_inference_settings=default_inference_settings,
|
89
|
+
)
|
90
|
+
|
91
|
+
return PreparedRequest(method="PATCH", url=f"/v1/evals/{evaluation_id}", data=update_request.model_dump(exclude_none=True, mode="json"))
|
100
92
|
|
101
93
|
def prepare_list(self, project_id: Optional[str] = None) -> PreparedRequest:
|
102
94
|
params = {}
|
@@ -114,9 +106,7 @@ class DocumentsMixin:
|
|
114
106
|
|
115
107
|
def prepare_create(self, evaluation_id: str, document: MIMEData, annotation: Dict[str, Any]) -> PreparedRequest:
|
116
108
|
# Serialize the MIMEData
|
117
|
-
|
118
109
|
document_item = DocumentItem(mime_data=document, annotation=annotation, annotation_metadata=None)
|
119
|
-
|
120
110
|
return PreparedRequest(method="POST", url=f"/v1/evals/{evaluation_id}/documents", data=document_item.model_dump(mode="json"))
|
121
111
|
|
122
112
|
def prepare_list(self, evaluation_id: str, filename: Optional[str] = None) -> PreparedRequest:
|
@@ -127,7 +117,6 @@ class DocumentsMixin:
|
|
127
117
|
|
128
118
|
def prepare_update(self, evaluation_id: str, document_id: str, annotation: Dict[str, Any]) -> PreparedRequest:
|
129
119
|
update_request = UpdateEvaluationDocumentRequest(annotation=annotation, annotation_metadata=None)
|
130
|
-
|
131
120
|
return PreparedRequest(method="PUT", url=f"/v1/evals/{evaluation_id}/documents/{document_id}", data=update_request.model_dump(mode="json", exclude_none=True))
|
132
121
|
|
133
122
|
def prepare_delete(self, evaluation_id: str, document_id: str) -> PreparedRequest:
|
@@ -153,10 +142,10 @@ class IterationsMixin:
|
|
153
142
|
modality: Modality = "native",
|
154
143
|
reasoning_effort: ChatCompletionReasoningEffort = "medium",
|
155
144
|
image_resolution_dpi: int = 96,
|
156
|
-
browser_canvas:
|
145
|
+
browser_canvas: BrowserCanvas = "A4",
|
157
146
|
n_consensus: int = 1,
|
158
147
|
) -> PreparedRequest:
|
159
|
-
|
148
|
+
inference_settings = InferenceSettings(
|
160
149
|
model=model,
|
161
150
|
temperature=temperature,
|
162
151
|
modality=modality,
|
@@ -166,18 +155,30 @@ class IterationsMixin:
|
|
166
155
|
n_consensus=n_consensus,
|
167
156
|
)
|
168
157
|
|
169
|
-
|
158
|
+
request = CreateIterationRequest(inference_settings=inference_settings, json_schema=json_schema)
|
170
159
|
|
171
|
-
return PreparedRequest(method="POST", url=f"/v1/evals/{evaluation_id}/iterations/create", data=
|
160
|
+
return PreparedRequest(method="POST", url=f"/v1/evals/{evaluation_id}/iterations/create", data=request.model_dump(exclude_none=True, mode="json"))
|
172
161
|
|
173
162
|
def prepare_update(
|
174
|
-
self,
|
163
|
+
self,
|
164
|
+
iteration_id: str,
|
165
|
+
json_schema: Dict[str, Any],
|
166
|
+
model: str,
|
167
|
+
temperature: float = 0.0,
|
168
|
+
modality: Modality = "native",
|
169
|
+
reasoning_effort: ChatCompletionReasoningEffort = "medium",
|
170
|
+
image_resolution_dpi: int = 96,
|
171
|
+
browser_canvas: BrowserCanvas = "A4",
|
172
|
+
n_consensus: int = 1,
|
175
173
|
) -> PreparedRequest:
|
176
174
|
inference_settings = InferenceSettings(
|
177
175
|
model=model,
|
178
176
|
temperature=temperature,
|
177
|
+
modality=modality,
|
178
|
+
reasoning_effort=reasoning_effort,
|
179
179
|
image_resolution_dpi=image_resolution_dpi,
|
180
180
|
browser_canvas=browser_canvas,
|
181
|
+
n_consensus=n_consensus,
|
181
182
|
)
|
182
183
|
|
183
184
|
iteration_data = Iteration(id=iteration_id, json_schema=json_schema, inference_settings=inference_settings, predictions=[])
|
@@ -261,7 +262,13 @@ class Evals(SyncAPIResource, EvalsMixin):
|
|
261
262
|
HTTPException if the request fails
|
262
263
|
"""
|
263
264
|
request = self.prepare_update(
|
264
|
-
evaluation_id=evaluation_id,
|
265
|
+
evaluation_id=evaluation_id,
|
266
|
+
name=name,
|
267
|
+
project_id=project_id,
|
268
|
+
json_schema=json_schema,
|
269
|
+
documents=documents,
|
270
|
+
iterations=iterations,
|
271
|
+
default_inference_settings=default_inference_settings,
|
265
272
|
)
|
266
273
|
response = self._client._prepared_request(request)
|
267
274
|
return Evaluation(**response)
|
@@ -429,7 +436,7 @@ class Iterations(SyncAPIResource, IterationsMixin):
|
|
429
436
|
json_schema: Optional[Dict[str, Any]] = None,
|
430
437
|
reasoning_effort: ChatCompletionReasoningEffort = "medium",
|
431
438
|
image_resolution_dpi: int = 96,
|
432
|
-
browser_canvas:
|
439
|
+
browser_canvas: BrowserCanvas = "A4",
|
433
440
|
n_consensus: int = 1,
|
434
441
|
) -> Iteration:
|
435
442
|
"""
|
@@ -445,7 +452,7 @@ class Iterations(SyncAPIResource, IterationsMixin):
|
|
445
452
|
image_resolution_dpi: The DPI of the image. Defaults to 96.
|
446
453
|
browser_canvas: The canvas size of the browser. Must be one of:
|
447
454
|
- "A3" (11.7in x 16.54in)
|
448
|
-
- "A4" (8.27in x 11.7in)
|
455
|
+
- "A4" (8.27in x 11.7in)
|
449
456
|
- "A5" (5.83in x 8.27in)
|
450
457
|
Defaults to "A4".
|
451
458
|
n_consensus: Number of consensus iterations to perform
|
@@ -572,7 +579,13 @@ class AsyncEvals(AsyncAPIResource, EvalsMixin):
|
|
572
579
|
HTTPException if the request fails
|
573
580
|
"""
|
574
581
|
request = self.prepare_update(
|
575
|
-
evaluation_id=evaluation_id,
|
582
|
+
evaluation_id=evaluation_id,
|
583
|
+
name=name,
|
584
|
+
project_id=project_id,
|
585
|
+
json_schema=json_schema,
|
586
|
+
documents=documents,
|
587
|
+
iterations=iterations,
|
588
|
+
default_inference_settings=default_inference_settings,
|
576
589
|
)
|
577
590
|
response = await self._client._prepared_request(request)
|
578
591
|
return Evaluation(**response)
|
@@ -739,7 +752,7 @@ class AsyncIterations(AsyncAPIResource, IterationsMixin):
|
|
739
752
|
json_schema: Optional[Dict[str, Any]] = None,
|
740
753
|
reasoning_effort: ChatCompletionReasoningEffort = "medium",
|
741
754
|
image_resolution_dpi: int = 96,
|
742
|
-
browser_canvas:
|
755
|
+
browser_canvas: BrowserCanvas = "A4",
|
743
756
|
n_consensus: int = 1,
|
744
757
|
) -> Iteration:
|
745
758
|
"""
|
@@ -755,7 +768,7 @@ class AsyncIterations(AsyncAPIResource, IterationsMixin):
|
|
755
768
|
image_resolution_dpi: The DPI of the image. Defaults to 96.
|
756
769
|
browser_canvas: The canvas size of the browser. Must be one of:
|
757
770
|
- "A3" (11.7in x 16.54in)
|
758
|
-
- "A4" (8.27in x 11.7in)
|
771
|
+
- "A4" (8.27in x 11.7in)
|
759
772
|
- "A5" (5.83in x 8.27in)
|
760
773
|
Defaults to "A4".
|
761
774
|
n_consensus: Number of consensus iterations to perform
|