retab 0.0.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab-0.0.35.dist-info/METADATA +417 -0
- retab-0.0.35.dist-info/RECORD +111 -0
- retab-0.0.35.dist-info/WHEEL +5 -0
- retab-0.0.35.dist-info/top_level.txt +1 -0
- uiform/__init__.py +4 -0
- uiform/_resource.py +28 -0
- uiform/_utils/__init__.py +0 -0
- uiform/_utils/ai_models.py +100 -0
- uiform/_utils/benchmarking copy.py +588 -0
- uiform/_utils/benchmarking.py +485 -0
- uiform/_utils/chat.py +332 -0
- uiform/_utils/display.py +443 -0
- uiform/_utils/json_schema.py +2161 -0
- uiform/_utils/mime.py +168 -0
- uiform/_utils/responses.py +163 -0
- uiform/_utils/stream_context_managers.py +52 -0
- uiform/_utils/usage/__init__.py +0 -0
- uiform/_utils/usage/usage.py +300 -0
- uiform/client.py +701 -0
- uiform/py.typed +0 -0
- uiform/resources/__init__.py +0 -0
- uiform/resources/consensus/__init__.py +3 -0
- uiform/resources/consensus/client.py +114 -0
- uiform/resources/consensus/completions.py +252 -0
- uiform/resources/consensus/completions_stream.py +278 -0
- uiform/resources/consensus/responses.py +325 -0
- uiform/resources/consensus/responses_stream.py +373 -0
- uiform/resources/deployments/__init__.py +9 -0
- uiform/resources/deployments/client.py +78 -0
- uiform/resources/deployments/endpoints.py +322 -0
- uiform/resources/deployments/links.py +452 -0
- uiform/resources/deployments/logs.py +211 -0
- uiform/resources/deployments/mailboxes.py +496 -0
- uiform/resources/deployments/outlook.py +531 -0
- uiform/resources/deployments/tests.py +158 -0
- uiform/resources/documents/__init__.py +3 -0
- uiform/resources/documents/client.py +255 -0
- uiform/resources/documents/extractions.py +441 -0
- uiform/resources/evals.py +812 -0
- uiform/resources/files.py +24 -0
- uiform/resources/finetuning.py +62 -0
- uiform/resources/jsonlUtils.py +1046 -0
- uiform/resources/models.py +45 -0
- uiform/resources/openai_example.py +22 -0
- uiform/resources/processors/__init__.py +3 -0
- uiform/resources/processors/automations/__init__.py +9 -0
- uiform/resources/processors/automations/client.py +78 -0
- uiform/resources/processors/automations/endpoints.py +317 -0
- uiform/resources/processors/automations/links.py +356 -0
- uiform/resources/processors/automations/logs.py +211 -0
- uiform/resources/processors/automations/mailboxes.py +435 -0
- uiform/resources/processors/automations/outlook.py +444 -0
- uiform/resources/processors/automations/tests.py +158 -0
- uiform/resources/processors/client.py +474 -0
- uiform/resources/prompt_optimization.py +76 -0
- uiform/resources/schemas.py +369 -0
- uiform/resources/secrets/__init__.py +9 -0
- uiform/resources/secrets/client.py +20 -0
- uiform/resources/secrets/external_api_keys.py +109 -0
- uiform/resources/secrets/webhook.py +62 -0
- uiform/resources/usage.py +271 -0
- uiform/types/__init__.py +0 -0
- uiform/types/ai_models.py +645 -0
- uiform/types/automations/__init__.py +0 -0
- uiform/types/automations/cron.py +58 -0
- uiform/types/automations/endpoints.py +21 -0
- uiform/types/automations/links.py +28 -0
- uiform/types/automations/mailboxes.py +60 -0
- uiform/types/automations/outlook.py +68 -0
- uiform/types/automations/webhooks.py +21 -0
- uiform/types/chat.py +8 -0
- uiform/types/completions.py +93 -0
- uiform/types/consensus.py +10 -0
- uiform/types/db/__init__.py +0 -0
- uiform/types/db/annotations.py +24 -0
- uiform/types/db/files.py +36 -0
- uiform/types/deployments/__init__.py +0 -0
- uiform/types/deployments/cron.py +59 -0
- uiform/types/deployments/endpoints.py +28 -0
- uiform/types/deployments/links.py +36 -0
- uiform/types/deployments/mailboxes.py +67 -0
- uiform/types/deployments/outlook.py +76 -0
- uiform/types/deployments/webhooks.py +21 -0
- uiform/types/documents/__init__.py +0 -0
- uiform/types/documents/correct_orientation.py +13 -0
- uiform/types/documents/create_messages.py +226 -0
- uiform/types/documents/extractions.py +297 -0
- uiform/types/evals.py +207 -0
- uiform/types/events.py +76 -0
- uiform/types/extractions.py +85 -0
- uiform/types/jobs/__init__.py +0 -0
- uiform/types/jobs/base.py +150 -0
- uiform/types/jobs/batch_annotation.py +22 -0
- uiform/types/jobs/evaluation.py +133 -0
- uiform/types/jobs/finetune.py +6 -0
- uiform/types/jobs/prompt_optimization.py +41 -0
- uiform/types/jobs/webcrawl.py +6 -0
- uiform/types/logs.py +231 -0
- uiform/types/mime.py +257 -0
- uiform/types/modalities.py +68 -0
- uiform/types/pagination.py +6 -0
- uiform/types/schemas/__init__.py +0 -0
- uiform/types/schemas/enhance.py +53 -0
- uiform/types/schemas/evaluate.py +55 -0
- uiform/types/schemas/generate.py +32 -0
- uiform/types/schemas/layout.py +58 -0
- uiform/types/schemas/object.py +631 -0
- uiform/types/schemas/templates.py +107 -0
- uiform/types/secrets/__init__.py +0 -0
- uiform/types/secrets/external_api_keys.py +22 -0
- uiform/types/standards.py +39 -0
@@ -0,0 +1,441 @@
|
|
1
|
+
import base64
|
2
|
+
import json
|
3
|
+
from io import IOBase
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Any, AsyncGenerator, Generator, Literal, Optional
|
6
|
+
|
7
|
+
from anthropic.types.message_param import MessageParam
|
8
|
+
from openai.types.chat import ChatCompletionMessageParam
|
9
|
+
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
10
|
+
from openai.types.chat.parsed_chat_completion import ParsedChatCompletionMessage
|
11
|
+
from openai.types.responses.response import Response
|
12
|
+
from openai.types.responses.response_input_param import ResponseInputItemParam
|
13
|
+
from pydantic import HttpUrl
|
14
|
+
|
15
|
+
from ..._resource import AsyncAPIResource, SyncAPIResource
|
16
|
+
from ..._utils.ai_models import assert_valid_model_extraction
|
17
|
+
from ..._utils.json_schema import filter_auxiliary_fields_json, load_json_schema, unflatten_dict
|
18
|
+
from ..._utils.mime import MIMEData, prepare_mime_document
|
19
|
+
from ..._utils.stream_context_managers import as_async_context_manager, as_context_manager
|
20
|
+
from ...types.chat import ChatCompletionUiformMessage
|
21
|
+
from ...types.documents.extractions import DocumentExtractRequest, LogExtractionRequest, UiParsedChatCompletion, UiParsedChatCompletionChunk, UiParsedChoice
|
22
|
+
from ...types.modalities import Modality
|
23
|
+
from ...types.schemas.object import Schema
|
24
|
+
from ...types.standards import PreparedRequest
|
25
|
+
|
26
|
+
|
27
|
+
def maybe_parse_to_pydantic(schema: Schema, response: UiParsedChatCompletion, allow_partial: bool = False) -> UiParsedChatCompletion:
|
28
|
+
if response.choices[0].message.content:
|
29
|
+
try:
|
30
|
+
if allow_partial:
|
31
|
+
response.choices[0].message.parsed = schema._partial_pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
|
32
|
+
else:
|
33
|
+
response.choices[0].message.parsed = schema.pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
|
34
|
+
except Exception as e:
|
35
|
+
pass
|
36
|
+
return response
|
37
|
+
|
38
|
+
|
39
|
+
class BaseExtractionsMixin:
|
40
|
+
def prepare_extraction(
|
41
|
+
self,
|
42
|
+
json_schema: dict[str, Any] | Path | str,
|
43
|
+
document: Path | str | IOBase | HttpUrl | None,
|
44
|
+
image_resolution_dpi: int | None,
|
45
|
+
browser_canvas: Literal['A3', 'A4', 'A5'] | None,
|
46
|
+
model: str,
|
47
|
+
temperature: float,
|
48
|
+
modality: Modality,
|
49
|
+
reasoning_effort: ChatCompletionReasoningEffort,
|
50
|
+
stream: bool,
|
51
|
+
n_consensus: int = 1,
|
52
|
+
store: bool = False,
|
53
|
+
idempotency_key: str | None = None,
|
54
|
+
) -> PreparedRequest:
|
55
|
+
assert_valid_model_extraction(model)
|
56
|
+
|
57
|
+
json_schema = load_json_schema(json_schema)
|
58
|
+
|
59
|
+
data = {
|
60
|
+
"json_schema": json_schema,
|
61
|
+
"document": prepare_mime_document(document).model_dump() if document is not None else None,
|
62
|
+
"model": model,
|
63
|
+
"temperature": temperature,
|
64
|
+
"stream": stream,
|
65
|
+
"modality": modality,
|
66
|
+
"store": store,
|
67
|
+
"reasoning_effort": reasoning_effort,
|
68
|
+
"n_consensus": n_consensus,
|
69
|
+
}
|
70
|
+
if image_resolution_dpi:
|
71
|
+
data["image_resolution_dpi"] = image_resolution_dpi
|
72
|
+
if browser_canvas:
|
73
|
+
data["browser_canvas"] = browser_canvas
|
74
|
+
|
75
|
+
# Validate DocumentAPIRequest data (raises exception if invalid)
|
76
|
+
document_extract_request = DocumentExtractRequest.model_validate(data)
|
77
|
+
|
78
|
+
return PreparedRequest(method="POST", url="/v1/documents/extractions", data=document_extract_request.model_dump(), idempotency_key=idempotency_key)
|
79
|
+
|
80
|
+
def prepare_log_extraction(
|
81
|
+
self,
|
82
|
+
document: Path | str | IOBase | HttpUrl | None,
|
83
|
+
json_schema: dict[str, Any],
|
84
|
+
model: str,
|
85
|
+
temperature: float,
|
86
|
+
completion: Any | None = None,
|
87
|
+
# The messages can be provided in different formats, we will convert them to the UiForm-compatible format
|
88
|
+
messages: list[ChatCompletionUiformMessage] | None = None,
|
89
|
+
openai_messages: list[ChatCompletionMessageParam] | None = None,
|
90
|
+
anthropic_messages: list[MessageParam] | None = None,
|
91
|
+
anthropic_system_prompt: str | None = None,
|
92
|
+
# New fields for the Responses API
|
93
|
+
openai_responses_input: list[ResponseInputItemParam] | None = None,
|
94
|
+
openai_responses_output: Response | None = None,
|
95
|
+
) -> PreparedRequest:
|
96
|
+
if document is None:
|
97
|
+
mime_document = MIMEData(
|
98
|
+
filename="dummy.txt",
|
99
|
+
# url is a base64 encoded string with the mime type and the content. For the dummy one we will send a .txt file with the text "No document provided"
|
100
|
+
url="data:text/plain;base64," + base64.b64encode(b"No document provided").decode("utf-8"),
|
101
|
+
)
|
102
|
+
|
103
|
+
return PreparedRequest(
|
104
|
+
method="POST",
|
105
|
+
url="/v1/documents/log_extraction",
|
106
|
+
data=LogExtractionRequest(
|
107
|
+
document=prepare_mime_document(document) if document else mime_document,
|
108
|
+
messages=messages,
|
109
|
+
openai_messages=openai_messages,
|
110
|
+
anthropic_messages=anthropic_messages,
|
111
|
+
anthropic_system_prompt=anthropic_system_prompt,
|
112
|
+
completion=completion,
|
113
|
+
openai_responses_input=openai_responses_input,
|
114
|
+
openai_responses_output=openai_responses_output,
|
115
|
+
json_schema=json_schema,
|
116
|
+
model=model,
|
117
|
+
temperature=temperature,
|
118
|
+
).model_dump(mode="json", by_alias=True), # by_alias is necessary to enable serialization/deserialization ('schema' was being converted to 'schema_')
|
119
|
+
raise_for_status=True,
|
120
|
+
)
|
121
|
+
|
122
|
+
|
123
|
+
class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
124
|
+
"""Extraction API wrapper"""
|
125
|
+
|
126
|
+
def parse(
|
127
|
+
self,
|
128
|
+
json_schema: dict[str, Any] | Path | str,
|
129
|
+
model: str,
|
130
|
+
document: Path | str | IOBase | HttpUrl | None,
|
131
|
+
image_resolution_dpi: int | None = None,
|
132
|
+
browser_canvas: Literal['A3', 'A4', 'A5'] | None = None,
|
133
|
+
temperature: float = 0,
|
134
|
+
modality: Modality = "native",
|
135
|
+
reasoning_effort: ChatCompletionReasoningEffort = "medium",
|
136
|
+
n_consensus: int = 1,
|
137
|
+
idempotency_key: str | None = None,
|
138
|
+
store: bool = False,
|
139
|
+
) -> UiParsedChatCompletion:
|
140
|
+
"""
|
141
|
+
Process a document using the UiForm API.
|
142
|
+
|
143
|
+
Args:
|
144
|
+
json_schema: JSON schema defining the expected data structure
|
145
|
+
document: Single document (as MIMEData) to process
|
146
|
+
model: The AI model to use for processing
|
147
|
+
temperature: Model temperature setting (0-1)
|
148
|
+
modality: Modality of the document (e.g., native)
|
149
|
+
reasoning_effort: The effort level for the model to reason about the input data.
|
150
|
+
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
151
|
+
idempotency_key: Idempotency key for request
|
152
|
+
store: Whether to store the document in the UiForm database
|
153
|
+
Returns:
|
154
|
+
DocumentAPIResponse
|
155
|
+
Raises:
|
156
|
+
HTTPException if the request fails
|
157
|
+
"""
|
158
|
+
|
159
|
+
assert document is not None, "Either document or messages must be provided"
|
160
|
+
|
161
|
+
# Validate DocumentAPIRequest data (raises exception if invalid)
|
162
|
+
request = self.prepare_extraction(
|
163
|
+
json_schema, document, image_resolution_dpi, browser_canvas, model, temperature, modality, reasoning_effort, False, n_consensus=n_consensus, store=store, idempotency_key=idempotency_key
|
164
|
+
)
|
165
|
+
response = self._client._prepared_request(request)
|
166
|
+
|
167
|
+
schema = Schema(json_schema=load_json_schema(json_schema))
|
168
|
+
return maybe_parse_to_pydantic(schema, UiParsedChatCompletion.model_validate(response))
|
169
|
+
|
170
|
+
@as_context_manager
|
171
|
+
def stream(
|
172
|
+
self,
|
173
|
+
json_schema: dict[str, Any] | Path | str,
|
174
|
+
model: str,
|
175
|
+
document: Path | str | IOBase | HttpUrl | None,
|
176
|
+
image_resolution_dpi: int | None = None,
|
177
|
+
browser_canvas: Literal['A3', 'A4', 'A5'] | None = None,
|
178
|
+
temperature: float = 0,
|
179
|
+
modality: Modality = "native",
|
180
|
+
reasoning_effort: ChatCompletionReasoningEffort = "medium",
|
181
|
+
n_consensus: int = 1,
|
182
|
+
idempotency_key: str | None = None,
|
183
|
+
store: bool = False,
|
184
|
+
) -> Generator[UiParsedChatCompletion, None, None]:
|
185
|
+
"""
|
186
|
+
Process a document using the UiForm API with streaming enabled.
|
187
|
+
|
188
|
+
Args:
|
189
|
+
json_schema: JSON schema defining the expected data structure
|
190
|
+
document: Single document (as MIMEData) to process
|
191
|
+
image_resolution_dpi: Optional image resolution DPI.
|
192
|
+
browser_canvas: Optional browser canvas size.
|
193
|
+
model: The AI model to use for processing
|
194
|
+
temperature: Model temperature setting (0-1)
|
195
|
+
modality: Modality of the document (e.g., native)
|
196
|
+
reasoning_effort: The effort level for the model to reason about the input data.
|
197
|
+
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
198
|
+
idempotency_key: Idempotency key for request
|
199
|
+
store: Whether to store the document in the UiForm database
|
200
|
+
|
201
|
+
Returns:
|
202
|
+
Generator[DocumentExtractResponse]: Stream of parsed responses
|
203
|
+
Raises:
|
204
|
+
HTTPException if the request fails
|
205
|
+
Usage:
|
206
|
+
```python
|
207
|
+
with uiform.documents.extractions.stream(json_schema, document, model, temperature, reasoning_effort, modality) as stream:
|
208
|
+
for response in stream:
|
209
|
+
print(response)
|
210
|
+
```
|
211
|
+
"""
|
212
|
+
request = self.prepare_extraction(
|
213
|
+
json_schema, document, image_resolution_dpi, browser_canvas, model, temperature, modality, reasoning_effort, True, n_consensus=n_consensus, store=store, idempotency_key=idempotency_key
|
214
|
+
)
|
215
|
+
schema = Schema(json_schema=load_json_schema(json_schema))
|
216
|
+
|
217
|
+
# Request the stream and return a context manager
|
218
|
+
ui_parsed_chat_completion_cum_chunk: UiParsedChatCompletionChunk | None = None
|
219
|
+
# Initialize the UiParsedChatCompletion object
|
220
|
+
ui_parsed_completion: UiParsedChatCompletion = UiParsedChatCompletion(
|
221
|
+
id="",
|
222
|
+
created=0,
|
223
|
+
model="",
|
224
|
+
object="chat.completion",
|
225
|
+
likelihoods={},
|
226
|
+
choices=[
|
227
|
+
UiParsedChoice(
|
228
|
+
index=0,
|
229
|
+
message=ParsedChatCompletionMessage(content="", role="assistant"),
|
230
|
+
finish_reason=None,
|
231
|
+
logprobs=None,
|
232
|
+
)
|
233
|
+
],
|
234
|
+
)
|
235
|
+
for chunk_json in self._client._prepared_request_stream(request):
|
236
|
+
if not chunk_json:
|
237
|
+
continue
|
238
|
+
ui_parsed_chat_completion_cum_chunk = UiParsedChatCompletionChunk.model_validate(chunk_json).chunk_accumulator(ui_parsed_chat_completion_cum_chunk)
|
239
|
+
# Basic stuff
|
240
|
+
ui_parsed_completion.id = ui_parsed_chat_completion_cum_chunk.id
|
241
|
+
ui_parsed_completion.created = ui_parsed_chat_completion_cum_chunk.created
|
242
|
+
ui_parsed_completion.model = ui_parsed_chat_completion_cum_chunk.model
|
243
|
+
# Update the ui_parsed_completion object
|
244
|
+
parsed = unflatten_dict(ui_parsed_chat_completion_cum_chunk.choices[0].delta.flat_parsed)
|
245
|
+
likelihoods = unflatten_dict(ui_parsed_chat_completion_cum_chunk.choices[0].delta.flat_likelihoods)
|
246
|
+
ui_parsed_completion.choices[0].message.content = json.dumps(parsed)
|
247
|
+
ui_parsed_completion.choices[0].message.parsed = parsed
|
248
|
+
ui_parsed_completion.likelihoods = likelihoods
|
249
|
+
|
250
|
+
yield maybe_parse_to_pydantic(schema, ui_parsed_completion, allow_partial=True)
|
251
|
+
|
252
|
+
# change the finish_reason to stop
|
253
|
+
ui_parsed_completion.choices[0].finish_reason = "stop"
|
254
|
+
yield maybe_parse_to_pydantic(schema, ui_parsed_completion)
|
255
|
+
|
256
|
+
def log(
|
257
|
+
self,
|
258
|
+
document: Path | str | IOBase | HttpUrl | None,
|
259
|
+
json_schema: dict[str, Any],
|
260
|
+
model: str,
|
261
|
+
temperature: float,
|
262
|
+
completion: Any | None = None,
|
263
|
+
# The messages can be provided in different formats, we will convert them to the UiForm-compatible format
|
264
|
+
messages: list[ChatCompletionUiformMessage] | None = None,
|
265
|
+
openai_messages: list[ChatCompletionMessageParam] | None = None,
|
266
|
+
anthropic_messages: list[MessageParam] | None = None,
|
267
|
+
anthropic_system_prompt: str | None = None,
|
268
|
+
# New fields for the Responses API
|
269
|
+
openai_responses_input: list[ResponseInputItemParam] | None = None,
|
270
|
+
openai_responses_output: Response | None = None,
|
271
|
+
) -> None:
|
272
|
+
request = self.prepare_log_extraction(
|
273
|
+
document,
|
274
|
+
json_schema,
|
275
|
+
model,
|
276
|
+
temperature,
|
277
|
+
completion=completion,
|
278
|
+
messages=messages,
|
279
|
+
openai_messages=openai_messages,
|
280
|
+
anthropic_messages=anthropic_messages,
|
281
|
+
anthropic_system_prompt=anthropic_system_prompt,
|
282
|
+
openai_responses_input=openai_responses_input,
|
283
|
+
openai_responses_output=openai_responses_output,
|
284
|
+
)
|
285
|
+
return self._client._prepared_request(request)
|
286
|
+
|
287
|
+
|
288
|
+
class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
289
|
+
"""Extraction API wrapper for asynchronous usage."""
|
290
|
+
|
291
|
+
async def parse(
|
292
|
+
self,
|
293
|
+
json_schema: dict[str, Any] | Path | str,
|
294
|
+
model: str,
|
295
|
+
document: Path | str | IOBase | HttpUrl | None,
|
296
|
+
image_resolution_dpi: int | None = None,
|
297
|
+
browser_canvas: Literal['A3', 'A4', 'A5'] | None = None,
|
298
|
+
temperature: float = 0,
|
299
|
+
modality: Modality = "native",
|
300
|
+
reasoning_effort: ChatCompletionReasoningEffort = "medium",
|
301
|
+
n_consensus: int = 1,
|
302
|
+
idempotency_key: str | None = None,
|
303
|
+
store: bool = False,
|
304
|
+
) -> UiParsedChatCompletion:
|
305
|
+
"""
|
306
|
+
Extract structured data from a document asynchronously.
|
307
|
+
|
308
|
+
Args:
|
309
|
+
json_schema: JSON schema defining the expected data structure.
|
310
|
+
document: Path, string, or file-like object representing the document.
|
311
|
+
image_resolution_dpi: Optional image resolution DPI.
|
312
|
+
browser_canvas: Optional browser canvas size.
|
313
|
+
model: The AI model to use.
|
314
|
+
temperature: Model temperature setting (0-1).
|
315
|
+
modality: Modality of the document (e.g., native).
|
316
|
+
reasoning_effort: The effort level for the model to reason about the input data.
|
317
|
+
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
318
|
+
idempotency_key: Idempotency key for request
|
319
|
+
store: Whether to store the document in the UiForm database
|
320
|
+
Returns:
|
321
|
+
DocumentExtractResponse: Parsed response from the API.
|
322
|
+
"""
|
323
|
+
request = self.prepare_extraction(
|
324
|
+
json_schema, document, image_resolution_dpi, browser_canvas, model, temperature, modality, reasoning_effort, False, n_consensus=n_consensus, store=store, idempotency_key=idempotency_key
|
325
|
+
)
|
326
|
+
response = await self._client._prepared_request(request)
|
327
|
+
schema = Schema(json_schema=load_json_schema(json_schema))
|
328
|
+
return maybe_parse_to_pydantic(schema, UiParsedChatCompletion.model_validate(response))
|
329
|
+
|
330
|
+
@as_async_context_manager
|
331
|
+
async def stream(
|
332
|
+
self,
|
333
|
+
json_schema: dict[str, Any] | Path | str,
|
334
|
+
model: str,
|
335
|
+
document: Path | str | IOBase | HttpUrl | None,
|
336
|
+
image_resolution_dpi: int | None = None,
|
337
|
+
browser_canvas: Literal['A3', 'A4', 'A5'] | None = None,
|
338
|
+
temperature: float = 0,
|
339
|
+
modality: Modality = "native",
|
340
|
+
reasoning_effort: ChatCompletionReasoningEffort = "medium",
|
341
|
+
n_consensus: int = 1,
|
342
|
+
idempotency_key: str | None = None,
|
343
|
+
store: bool = False,
|
344
|
+
) -> AsyncGenerator[UiParsedChatCompletion, None]:
|
345
|
+
"""
|
346
|
+
Extract structured data from a document asynchronously with streaming.
|
347
|
+
|
348
|
+
Args:
|
349
|
+
json_schema: JSON schema defining the expected data structure.
|
350
|
+
document: Path, string, or file-like object representing the document.
|
351
|
+
model: The AI model to use.
|
352
|
+
temperature: Model temperature setting (0-1).
|
353
|
+
modality: Modality of the document (e.g., native).
|
354
|
+
reasoning_effort: The effort level for the model to reason about the input data.
|
355
|
+
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
356
|
+
idempotency_key: Idempotency key for request
|
357
|
+
store: Whether to store the document in the UiForm database
|
358
|
+
Returns:
|
359
|
+
AsyncGenerator[DocumentExtractResponse, None]: Stream of parsed responses.
|
360
|
+
|
361
|
+
Usage:
|
362
|
+
```python
|
363
|
+
async with uiform.documents.extractions.stream(json_schema, document, model, temperature, reasoning_effort, modality) as stream:
|
364
|
+
async for response in stream:
|
365
|
+
print(response)
|
366
|
+
```
|
367
|
+
"""
|
368
|
+
request = self.prepare_extraction(
|
369
|
+
json_schema, document, image_resolution_dpi, browser_canvas, model, temperature, modality, reasoning_effort, True, n_consensus=n_consensus, store=store, idempotency_key=idempotency_key
|
370
|
+
)
|
371
|
+
schema = Schema(json_schema=load_json_schema(json_schema))
|
372
|
+
ui_parsed_chat_completion_cum_chunk: UiParsedChatCompletionChunk | None = None
|
373
|
+
# Initialize the UiParsedChatCompletion object
|
374
|
+
ui_parsed_completion: UiParsedChatCompletion = UiParsedChatCompletion(
|
375
|
+
id="",
|
376
|
+
created=0,
|
377
|
+
model="",
|
378
|
+
object="chat.completion",
|
379
|
+
likelihoods={},
|
380
|
+
choices=[
|
381
|
+
UiParsedChoice(
|
382
|
+
index=0,
|
383
|
+
message=ParsedChatCompletionMessage(content="", role="assistant"),
|
384
|
+
finish_reason=None,
|
385
|
+
logprobs=None,
|
386
|
+
)
|
387
|
+
],
|
388
|
+
)
|
389
|
+
|
390
|
+
async for chunk_json in self._client._prepared_request_stream(request):
|
391
|
+
if not chunk_json:
|
392
|
+
continue
|
393
|
+
ui_parsed_chat_completion_cum_chunk = UiParsedChatCompletionChunk.model_validate(chunk_json).chunk_accumulator(ui_parsed_chat_completion_cum_chunk)
|
394
|
+
# Basic stuff
|
395
|
+
ui_parsed_completion.id = ui_parsed_chat_completion_cum_chunk.id
|
396
|
+
ui_parsed_completion.created = ui_parsed_chat_completion_cum_chunk.created
|
397
|
+
ui_parsed_completion.model = ui_parsed_chat_completion_cum_chunk.model
|
398
|
+
|
399
|
+
# Update the ui_parsed_completion object
|
400
|
+
parsed = unflatten_dict(ui_parsed_chat_completion_cum_chunk.choices[0].delta.flat_parsed)
|
401
|
+
likelihoods = unflatten_dict(ui_parsed_chat_completion_cum_chunk.choices[0].delta.flat_likelihoods)
|
402
|
+
ui_parsed_completion.choices[0].message.content = json.dumps(parsed)
|
403
|
+
ui_parsed_completion.choices[0].message.parsed = parsed
|
404
|
+
ui_parsed_completion.likelihoods = likelihoods
|
405
|
+
|
406
|
+
yield maybe_parse_to_pydantic(schema, ui_parsed_completion, allow_partial=True)
|
407
|
+
|
408
|
+
# change the finish_reason to stop
|
409
|
+
ui_parsed_completion.choices[0].finish_reason = "stop"
|
410
|
+
yield maybe_parse_to_pydantic(schema, ui_parsed_completion)
|
411
|
+
|
412
|
+
async def log(
|
413
|
+
self,
|
414
|
+
document: Path | str | IOBase | HttpUrl | None,
|
415
|
+
json_schema: dict[str, Any],
|
416
|
+
model: str,
|
417
|
+
temperature: float,
|
418
|
+
completion: Any | None = None,
|
419
|
+
# The messages can be provided in different formats, we will convert them to the UiForm-compatible format
|
420
|
+
messages: list[ChatCompletionUiformMessage] | None = None,
|
421
|
+
openai_messages: list[ChatCompletionMessageParam] | None = None,
|
422
|
+
anthropic_messages: list[MessageParam] | None = None,
|
423
|
+
anthropic_system_prompt: str | None = None,
|
424
|
+
# New fields for the Responses API
|
425
|
+
openai_responses_input: list[ResponseInputItemParam] | None = None,
|
426
|
+
openai_responses_output: Response | None = None,
|
427
|
+
) -> None:
|
428
|
+
request = self.prepare_log_extraction(
|
429
|
+
document,
|
430
|
+
json_schema,
|
431
|
+
model,
|
432
|
+
temperature,
|
433
|
+
completion=completion,
|
434
|
+
messages=messages,
|
435
|
+
openai_messages=openai_messages,
|
436
|
+
anthropic_messages=anthropic_messages,
|
437
|
+
anthropic_system_prompt=anthropic_system_prompt,
|
438
|
+
openai_responses_input=openai_responses_input,
|
439
|
+
openai_responses_output=openai_responses_output,
|
440
|
+
)
|
441
|
+
return await self._client._prepared_request(request)
|