retab 0.0.35__py3-none-any.whl → 0.0.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. {uiform → retab}/_utils/ai_models.py +2 -2
  2. {uiform → retab}/_utils/benchmarking.py +15 -16
  3. {uiform → retab}/_utils/chat.py +9 -14
  4. {uiform → retab}/_utils/display.py +0 -3
  5. {uiform → retab}/_utils/json_schema.py +9 -14
  6. {uiform → retab}/_utils/mime.py +11 -14
  7. {uiform → retab}/_utils/responses.py +9 -3
  8. {uiform → retab}/_utils/stream_context_managers.py +1 -1
  9. {uiform → retab}/_utils/usage/usage.py +28 -28
  10. {uiform → retab}/client.py +32 -31
  11. {uiform → retab}/resources/consensus/client.py +17 -36
  12. {uiform → retab}/resources/consensus/completions.py +24 -47
  13. {uiform → retab}/resources/consensus/completions_stream.py +26 -38
  14. {uiform → retab}/resources/consensus/responses.py +31 -80
  15. {uiform → retab}/resources/consensus/responses_stream.py +31 -79
  16. {uiform → retab}/resources/documents/client.py +59 -45
  17. {uiform → retab}/resources/documents/extractions.py +181 -90
  18. {uiform → retab}/resources/evals.py +56 -43
  19. retab/resources/evaluations/__init__.py +3 -0
  20. retab/resources/evaluations/client.py +301 -0
  21. retab/resources/evaluations/documents.py +233 -0
  22. retab/resources/evaluations/iterations.py +452 -0
  23. {uiform → retab}/resources/files.py +2 -2
  24. {uiform → retab}/resources/jsonlUtils.py +220 -216
  25. retab/resources/models.py +73 -0
  26. retab/resources/processors/automations/client.py +244 -0
  27. {uiform → retab}/resources/processors/automations/endpoints.py +77 -118
  28. retab/resources/processors/automations/links.py +294 -0
  29. {uiform → retab}/resources/processors/automations/logs.py +30 -19
  30. {uiform → retab}/resources/processors/automations/mailboxes.py +136 -174
  31. retab/resources/processors/automations/outlook.py +337 -0
  32. {uiform → retab}/resources/processors/automations/tests.py +22 -25
  33. {uiform → retab}/resources/processors/client.py +179 -164
  34. {uiform → retab}/resources/schemas.py +78 -66
  35. {uiform → retab}/resources/secrets/external_api_keys.py +1 -5
  36. retab/resources/secrets/webhook.py +64 -0
  37. {uiform → retab}/resources/usage.py +39 -2
  38. {uiform → retab}/types/ai_models.py +13 -13
  39. {uiform → retab}/types/automations/cron.py +19 -12
  40. {uiform → retab}/types/automations/endpoints.py +7 -4
  41. {uiform → retab}/types/automations/links.py +7 -3
  42. {uiform → retab}/types/automations/mailboxes.py +9 -9
  43. {uiform → retab}/types/automations/outlook.py +15 -11
  44. retab/types/browser_canvas.py +3 -0
  45. {uiform → retab}/types/chat.py +2 -2
  46. {uiform → retab}/types/completions.py +9 -12
  47. retab/types/consensus.py +19 -0
  48. {uiform → retab}/types/db/annotations.py +3 -3
  49. {uiform → retab}/types/db/files.py +8 -6
  50. {uiform → retab}/types/documents/create_messages.py +18 -20
  51. {uiform → retab}/types/documents/extractions.py +69 -24
  52. {uiform → retab}/types/evals.py +5 -5
  53. retab/types/evaluations/__init__.py +31 -0
  54. retab/types/evaluations/documents.py +30 -0
  55. retab/types/evaluations/iterations.py +112 -0
  56. retab/types/evaluations/model.py +73 -0
  57. retab/types/events.py +79 -0
  58. {uiform → retab}/types/extractions.py +33 -10
  59. retab/types/inference_settings.py +15 -0
  60. retab/types/jobs/base.py +54 -0
  61. retab/types/jobs/batch_annotation.py +12 -0
  62. {uiform → retab}/types/jobs/evaluation.py +1 -2
  63. {uiform → retab}/types/logs.py +37 -34
  64. retab/types/metrics.py +32 -0
  65. {uiform → retab}/types/mime.py +22 -20
  66. {uiform → retab}/types/modalities.py +10 -10
  67. retab/types/predictions.py +19 -0
  68. {uiform → retab}/types/schemas/enhance.py +4 -2
  69. {uiform → retab}/types/schemas/evaluate.py +7 -4
  70. {uiform → retab}/types/schemas/generate.py +6 -3
  71. {uiform → retab}/types/schemas/layout.py +1 -1
  72. {uiform → retab}/types/schemas/object.py +13 -14
  73. {uiform → retab}/types/schemas/templates.py +1 -3
  74. {uiform → retab}/types/secrets/external_api_keys.py +0 -1
  75. {uiform → retab}/types/standards.py +18 -1
  76. {retab-0.0.35.dist-info → retab-0.0.37.dist-info}/METADATA +7 -6
  77. retab-0.0.37.dist-info/RECORD +107 -0
  78. retab-0.0.37.dist-info/top_level.txt +1 -0
  79. retab-0.0.35.dist-info/RECORD +0 -111
  80. retab-0.0.35.dist-info/top_level.txt +0 -1
  81. uiform/_utils/benchmarking copy.py +0 -588
  82. uiform/resources/deployments/__init__.py +0 -9
  83. uiform/resources/deployments/client.py +0 -78
  84. uiform/resources/deployments/endpoints.py +0 -322
  85. uiform/resources/deployments/links.py +0 -452
  86. uiform/resources/deployments/logs.py +0 -211
  87. uiform/resources/deployments/mailboxes.py +0 -496
  88. uiform/resources/deployments/outlook.py +0 -531
  89. uiform/resources/deployments/tests.py +0 -158
  90. uiform/resources/models.py +0 -45
  91. uiform/resources/processors/automations/client.py +0 -78
  92. uiform/resources/processors/automations/links.py +0 -356
  93. uiform/resources/processors/automations/outlook.py +0 -444
  94. uiform/resources/secrets/webhook.py +0 -62
  95. uiform/types/consensus.py +0 -10
  96. uiform/types/deployments/cron.py +0 -59
  97. uiform/types/deployments/endpoints.py +0 -28
  98. uiform/types/deployments/links.py +0 -36
  99. uiform/types/deployments/mailboxes.py +0 -67
  100. uiform/types/deployments/outlook.py +0 -76
  101. uiform/types/deployments/webhooks.py +0 -21
  102. uiform/types/events.py +0 -76
  103. uiform/types/jobs/base.py +0 -150
  104. uiform/types/jobs/batch_annotation.py +0 -22
  105. uiform/types/secrets/__init__.py +0 -0
  106. {uiform → retab}/__init__.py +0 -0
  107. {uiform → retab}/_resource.py +0 -0
  108. {uiform → retab}/_utils/__init__.py +0 -0
  109. {uiform → retab}/_utils/usage/__init__.py +0 -0
  110. {uiform → retab}/py.typed +0 -0
  111. {uiform → retab}/resources/__init__.py +0 -0
  112. {uiform → retab}/resources/consensus/__init__.py +0 -0
  113. {uiform → retab}/resources/documents/__init__.py +0 -0
  114. {uiform → retab}/resources/finetuning.py +0 -0
  115. {uiform → retab}/resources/openai_example.py +0 -0
  116. {uiform → retab}/resources/processors/__init__.py +0 -0
  117. {uiform → retab}/resources/processors/automations/__init__.py +0 -0
  118. {uiform → retab}/resources/prompt_optimization.py +0 -0
  119. {uiform → retab}/resources/secrets/__init__.py +0 -0
  120. {uiform → retab}/resources/secrets/client.py +0 -0
  121. {uiform → retab}/types/__init__.py +0 -0
  122. {uiform → retab}/types/automations/__init__.py +0 -0
  123. {uiform → retab}/types/automations/webhooks.py +0 -0
  124. {uiform → retab}/types/db/__init__.py +0 -0
  125. {uiform/types/deployments → retab/types/documents}/__init__.py +0 -0
  126. {uiform → retab}/types/documents/correct_orientation.py +0 -0
  127. {uiform/types/documents → retab/types/jobs}/__init__.py +0 -0
  128. {uiform → retab}/types/jobs/finetune.py +0 -0
  129. {uiform → retab}/types/jobs/prompt_optimization.py +0 -0
  130. {uiform → retab}/types/jobs/webcrawl.py +0 -0
  131. {uiform → retab}/types/pagination.py +0 -0
  132. {uiform/types/jobs → retab/types/schemas}/__init__.py +0 -0
  133. {uiform/types/schemas → retab/types/secrets}/__init__.py +0 -0
  134. {retab-0.0.35.dist-info → retab-0.0.37.dist-info}/WHEEL +0 -0
@@ -2,7 +2,7 @@ import base64
2
2
  import json
3
3
  from io import IOBase
4
4
  from pathlib import Path
5
- from typing import Any, AsyncGenerator, Generator, Literal, Optional
5
+ from typing import Any, AsyncGenerator, Generator
6
6
 
7
7
  from anthropic.types.message_param import MessageParam
8
8
  from openai.types.chat import ChatCompletionMessageParam
@@ -10,6 +10,7 @@ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionRea
10
10
  from openai.types.chat.parsed_chat_completion import ParsedChatCompletionMessage
11
11
  from openai.types.responses.response import Response
12
12
  from openai.types.responses.response_input_param import ResponseInputItemParam
13
+ from pydantic_core import PydanticUndefined
13
14
  from pydantic import HttpUrl
14
15
 
15
16
  from ..._resource import AsyncAPIResource, SyncAPIResource
@@ -19,6 +20,7 @@ from ..._utils.mime import MIMEData, prepare_mime_document
19
20
  from ..._utils.stream_context_managers import as_async_context_manager, as_context_manager
20
21
  from ...types.chat import ChatCompletionUiformMessage
21
22
  from ...types.documents.extractions import DocumentExtractRequest, LogExtractionRequest, UiParsedChatCompletion, UiParsedChatCompletionChunk, UiParsedChoice
23
+ from ...types.browser_canvas import BrowserCanvas
22
24
  from ...types.modalities import Modality
23
25
  from ...types.schemas.object import Schema
24
26
  from ...types.standards import PreparedRequest
@@ -31,7 +33,7 @@ def maybe_parse_to_pydantic(schema: Schema, response: UiParsedChatCompletion, al
31
33
  response.choices[0].message.parsed = schema._partial_pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
32
34
  else:
33
35
  response.choices[0].message.parsed = schema.pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
34
- except Exception as e:
36
+ except Exception:
35
37
  pass
36
38
  return response
37
39
 
@@ -40,15 +42,16 @@ class BaseExtractionsMixin:
40
42
  def prepare_extraction(
41
43
  self,
42
44
  json_schema: dict[str, Any] | Path | str,
43
- document: Path | str | IOBase | HttpUrl | None,
44
- image_resolution_dpi: int | None,
45
- browser_canvas: Literal['A3', 'A4', 'A5'] | None,
46
- model: str,
47
- temperature: float,
48
- modality: Modality,
49
- reasoning_effort: ChatCompletionReasoningEffort,
50
- stream: bool,
51
- n_consensus: int = 1,
45
+ document: Path | str | IOBase | HttpUrl | None = None,
46
+ documents: list[Path | str | IOBase | HttpUrl] | None = None,
47
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
48
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
49
+ model: str = PydanticUndefined, # type: ignore[assignment]
50
+ temperature: float = PydanticUndefined, # type: ignore[assignment]
51
+ modality: Modality = PydanticUndefined, # type: ignore[assignment]
52
+ reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
53
+ stream: bool = False,
54
+ n_consensus: int = PydanticUndefined, # type: ignore[assignment]
52
55
  store: bool = False,
53
56
  idempotency_key: str | None = None,
54
57
  ) -> PreparedRequest:
@@ -56,26 +59,36 @@ class BaseExtractionsMixin:
56
59
 
57
60
  json_schema = load_json_schema(json_schema)
58
61
 
59
- data = {
60
- "json_schema": json_schema,
61
- "document": prepare_mime_document(document).model_dump() if document is not None else None,
62
- "model": model,
63
- "temperature": temperature,
64
- "stream": stream,
65
- "modality": modality,
66
- "store": store,
67
- "reasoning_effort": reasoning_effort,
68
- "n_consensus": n_consensus,
69
- }
70
- if image_resolution_dpi:
71
- data["image_resolution_dpi"] = image_resolution_dpi
72
- if browser_canvas:
73
- data["browser_canvas"] = browser_canvas
62
+ # Handle both single document and multiple documents
63
+ if document is not None and documents is not None:
64
+ raise ValueError("Cannot provide both 'document' and 'documents' parameters. Use either one.")
65
+
66
+ # Convert single document to documents list for consistency
67
+ if document is not None:
68
+ processed_documents = [prepare_mime_document(document)]
69
+ elif documents is not None:
70
+ processed_documents = [prepare_mime_document(doc) for doc in documents]
71
+ else:
72
+ raise ValueError("Must provide either 'document' or 'documents' parameter.")
74
73
 
75
74
  # Validate DocumentAPIRequest data (raises exception if invalid)
76
- document_extract_request = DocumentExtractRequest.model_validate(data)
75
+ request = DocumentExtractRequest(
76
+ json_schema=json_schema,
77
+ documents=processed_documents,
78
+ model=model,
79
+ temperature=temperature,
80
+ stream=stream,
81
+ modality=modality,
82
+ store=store,
83
+ reasoning_effort=reasoning_effort,
84
+ n_consensus=n_consensus,
85
+ image_resolution_dpi=image_resolution_dpi,
86
+ browser_canvas=browser_canvas,
87
+ )
77
88
 
78
- return PreparedRequest(method="POST", url="/v1/documents/extractions", data=document_extract_request.model_dump(), idempotency_key=idempotency_key)
89
+ return PreparedRequest(
90
+ method="POST", url="/v1/documents/extractions", data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), idempotency_key=idempotency_key
91
+ )
79
92
 
80
93
  def prepare_log_extraction(
81
94
  self,
@@ -99,12 +112,14 @@ class BaseExtractionsMixin:
99
112
  # url is a base64 encoded string with the mime type and the content. For the dummy one we will send a .txt file with the text "No document provided"
100
113
  url="data:text/plain;base64," + base64.b64encode(b"No document provided").decode("utf-8"),
101
114
  )
115
+ else:
116
+ mime_document = prepare_mime_document(document)
102
117
 
103
118
  return PreparedRequest(
104
119
  method="POST",
105
120
  url="/v1/documents/log_extraction",
106
121
  data=LogExtractionRequest(
107
- document=prepare_mime_document(document) if document else mime_document,
122
+ document=mime_document,
108
123
  messages=messages,
109
124
  openai_messages=openai_messages,
110
125
  anthropic_messages=anthropic_messages,
@@ -115,7 +130,7 @@ class BaseExtractionsMixin:
115
130
  json_schema=json_schema,
116
131
  model=model,
117
132
  temperature=temperature,
118
- ).model_dump(mode="json", by_alias=True), # by_alias is necessary to enable serialization/deserialization ('schema' was being converted to 'schema_')
133
+ ).model_dump(mode="json"),
119
134
  raise_for_status=True,
120
135
  )
121
136
 
@@ -127,23 +142,27 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
127
142
  self,
128
143
  json_schema: dict[str, Any] | Path | str,
129
144
  model: str,
130
- document: Path | str | IOBase | HttpUrl | None,
131
- image_resolution_dpi: int | None = None,
132
- browser_canvas: Literal['A3', 'A4', 'A5'] | None = None,
133
- temperature: float = 0,
134
- modality: Modality = "native",
135
- reasoning_effort: ChatCompletionReasoningEffort = "medium",
136
- n_consensus: int = 1,
145
+ document: Path | str | IOBase | HttpUrl | None = None,
146
+ documents: list[Path | str | IOBase | HttpUrl] | None = None,
147
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
148
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
149
+ temperature: float = PydanticUndefined, # type: ignore[assignment]
150
+ modality: Modality = PydanticUndefined, # type: ignore[assignment]
151
+ reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
152
+ n_consensus: int = PydanticUndefined, # type: ignore[assignment]
137
153
  idempotency_key: str | None = None,
138
154
  store: bool = False,
139
155
  ) -> UiParsedChatCompletion:
140
156
  """
141
- Process a document using the UiForm API.
157
+ Process one or more documents using the UiForm API.
142
158
 
143
159
  Args:
144
160
  json_schema: JSON schema defining the expected data structure
145
- document: Single document (as MIMEData) to process
146
161
  model: The AI model to use for processing
162
+ document: Single document to process (use either this or documents, not both)
163
+ documents: List of documents to process (use either this or document, not both)
164
+ image_resolution_dpi: Optional image resolution DPI
165
+ browser_canvas: Optional browser canvas size
147
166
  temperature: Model temperature setting (0-1)
148
167
  modality: Modality of the document (e.g., native)
149
168
  reasoning_effort: The effort level for the model to reason about the input data.
@@ -151,16 +170,27 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
151
170
  idempotency_key: Idempotency key for request
152
171
  store: Whether to store the document in the UiForm database
153
172
  Returns:
154
- DocumentAPIResponse
173
+ UiParsedChatCompletion: Parsed response from the API
155
174
  Raises:
156
- HTTPException if the request fails
175
+ ValueError: If neither document nor documents is provided, or if both are provided
176
+ HTTPException: If the request fails
157
177
  """
158
178
 
159
- assert document is not None, "Either document or messages must be provided"
160
-
161
179
  # Validate DocumentAPIRequest data (raises exception if invalid)
162
180
  request = self.prepare_extraction(
163
- json_schema, document, image_resolution_dpi, browser_canvas, model, temperature, modality, reasoning_effort, False, n_consensus=n_consensus, store=store, idempotency_key=idempotency_key
181
+ json_schema=json_schema,
182
+ document=document,
183
+ documents=documents,
184
+ image_resolution_dpi=image_resolution_dpi,
185
+ browser_canvas=browser_canvas,
186
+ model=model,
187
+ temperature=temperature,
188
+ modality=modality,
189
+ reasoning_effort=reasoning_effort,
190
+ stream=False,
191
+ n_consensus=n_consensus,
192
+ store=store,
193
+ idempotency_key=idempotency_key,
164
194
  )
165
195
  response = self._client._prepared_request(request)
166
196
 
@@ -172,25 +202,27 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
172
202
  self,
173
203
  json_schema: dict[str, Any] | Path | str,
174
204
  model: str,
175
- document: Path | str | IOBase | HttpUrl | None,
176
- image_resolution_dpi: int | None = None,
177
- browser_canvas: Literal['A3', 'A4', 'A5'] | None = None,
178
- temperature: float = 0,
179
- modality: Modality = "native",
180
- reasoning_effort: ChatCompletionReasoningEffort = "medium",
181
- n_consensus: int = 1,
205
+ document: Path | str | IOBase | HttpUrl | None = None,
206
+ documents: list[Path | str | IOBase | HttpUrl] | None = None,
207
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
208
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
209
+ temperature: float = PydanticUndefined, # type: ignore[assignment]
210
+ modality: Modality = PydanticUndefined, # type: ignore[assignment]
211
+ reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
212
+ n_consensus: int = PydanticUndefined, # type: ignore[assignment]
182
213
  idempotency_key: str | None = None,
183
214
  store: bool = False,
184
215
  ) -> Generator[UiParsedChatCompletion, None, None]:
185
216
  """
186
- Process a document using the UiForm API with streaming enabled.
217
+ Process one or more documents using the UiForm API with streaming enabled.
187
218
 
188
219
  Args:
189
220
  json_schema: JSON schema defining the expected data structure
190
- document: Single document (as MIMEData) to process
221
+ model: The AI model to use for processing
222
+ document: Single document to process (use either this or documents, not both)
223
+ documents: List of documents to process (use either this or document, not both)
191
224
  image_resolution_dpi: Optional image resolution DPI.
192
225
  browser_canvas: Optional browser canvas size.
193
- model: The AI model to use for processing
194
226
  temperature: Model temperature setting (0-1)
195
227
  modality: Modality of the document (e.g., native)
196
228
  reasoning_effort: The effort level for the model to reason about the input data.
@@ -199,18 +231,37 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
199
231
  store: Whether to store the document in the UiForm database
200
232
 
201
233
  Returns:
202
- Generator[DocumentExtractResponse]: Stream of parsed responses
234
+ Generator[UiParsedChatCompletion]: Stream of parsed responses
203
235
  Raises:
204
- HTTPException if the request fails
236
+ ValueError: If neither document nor documents is provided, or if both are provided
237
+ HTTPException: If the request fails
205
238
  Usage:
206
239
  ```python
207
- with uiform.documents.extractions.stream(json_schema, document, model, temperature, reasoning_effort, modality) as stream:
240
+ # Single document
241
+ with uiform.documents.extractions.stream(json_schema, model, document=document) as stream:
242
+ for response in stream:
243
+ print(response)
244
+
245
+ # Multiple documents
246
+ with uiform.documents.extractions.stream(json_schema, model, documents=[doc1, doc2]) as stream:
208
247
  for response in stream:
209
248
  print(response)
210
249
  ```
211
250
  """
212
251
  request = self.prepare_extraction(
213
- json_schema, document, image_resolution_dpi, browser_canvas, model, temperature, modality, reasoning_effort, True, n_consensus=n_consensus, store=store, idempotency_key=idempotency_key
252
+ json_schema=json_schema,
253
+ document=document,
254
+ documents=documents,
255
+ image_resolution_dpi=image_resolution_dpi,
256
+ browser_canvas=browser_canvas,
257
+ model=model,
258
+ temperature=temperature,
259
+ modality=modality,
260
+ reasoning_effort=reasoning_effort,
261
+ stream=True,
262
+ n_consensus=n_consensus,
263
+ store=store,
264
+ idempotency_key=idempotency_key,
214
265
  )
215
266
  schema = Schema(json_schema=load_json_schema(json_schema))
216
267
 
@@ -270,10 +321,10 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
270
321
  openai_responses_output: Response | None = None,
271
322
  ) -> None:
272
323
  request = self.prepare_log_extraction(
273
- document,
274
- json_schema,
275
- model,
276
- temperature,
324
+ document=document,
325
+ json_schema=json_schema,
326
+ model=model,
327
+ temperature=temperature,
277
328
  completion=completion,
278
329
  messages=messages,
279
330
  openai_messages=openai_messages,
@@ -292,25 +343,27 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
292
343
  self,
293
344
  json_schema: dict[str, Any] | Path | str,
294
345
  model: str,
295
- document: Path | str | IOBase | HttpUrl | None,
296
- image_resolution_dpi: int | None = None,
297
- browser_canvas: Literal['A3', 'A4', 'A5'] | None = None,
298
- temperature: float = 0,
299
- modality: Modality = "native",
300
- reasoning_effort: ChatCompletionReasoningEffort = "medium",
301
- n_consensus: int = 1,
346
+ document: Path | str | IOBase | HttpUrl | None = None,
347
+ documents: list[Path | str | IOBase | HttpUrl] | None = None,
348
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
349
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
350
+ temperature: float = PydanticUndefined, # type: ignore[assignment]
351
+ modality: Modality = PydanticUndefined, # type: ignore[assignment]
352
+ reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
353
+ n_consensus: int = PydanticUndefined, # type: ignore[assignment]
302
354
  idempotency_key: str | None = None,
303
355
  store: bool = False,
304
356
  ) -> UiParsedChatCompletion:
305
357
  """
306
- Extract structured data from a document asynchronously.
358
+ Extract structured data from one or more documents asynchronously.
307
359
 
308
360
  Args:
309
361
  json_schema: JSON schema defining the expected data structure.
310
- document: Path, string, or file-like object representing the document.
362
+ model: The AI model to use.
363
+ document: Single document to process (use either this or documents, not both)
364
+ documents: List of documents to process (use either this or document, not both)
311
365
  image_resolution_dpi: Optional image resolution DPI.
312
366
  browser_canvas: Optional browser canvas size.
313
- model: The AI model to use.
314
367
  temperature: Model temperature setting (0-1).
315
368
  modality: Modality of the document (e.g., native).
316
369
  reasoning_effort: The effort level for the model to reason about the input data.
@@ -318,10 +371,24 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
318
371
  idempotency_key: Idempotency key for request
319
372
  store: Whether to store the document in the UiForm database
320
373
  Returns:
321
- DocumentExtractResponse: Parsed response from the API.
374
+ UiParsedChatCompletion: Parsed response from the API.
375
+ Raises:
376
+ ValueError: If neither document nor documents is provided, or if both are provided
322
377
  """
323
378
  request = self.prepare_extraction(
324
- json_schema, document, image_resolution_dpi, browser_canvas, model, temperature, modality, reasoning_effort, False, n_consensus=n_consensus, store=store, idempotency_key=idempotency_key
379
+ json_schema=json_schema,
380
+ document=document,
381
+ documents=documents,
382
+ image_resolution_dpi=image_resolution_dpi,
383
+ browser_canvas=browser_canvas,
384
+ model=model,
385
+ temperature=temperature,
386
+ modality=modality,
387
+ reasoning_effort=reasoning_effort,
388
+ stream=False,
389
+ n_consensus=n_consensus,
390
+ store=store,
391
+ idempotency_key=idempotency_key,
325
392
  )
326
393
  response = await self._client._prepared_request(request)
327
394
  schema = Schema(json_schema=load_json_schema(json_schema))
@@ -332,23 +399,27 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
332
399
  self,
333
400
  json_schema: dict[str, Any] | Path | str,
334
401
  model: str,
335
- document: Path | str | IOBase | HttpUrl | None,
336
- image_resolution_dpi: int | None = None,
337
- browser_canvas: Literal['A3', 'A4', 'A5'] | None = None,
338
- temperature: float = 0,
339
- modality: Modality = "native",
340
- reasoning_effort: ChatCompletionReasoningEffort = "medium",
341
- n_consensus: int = 1,
402
+ document: Path | str | IOBase | HttpUrl | None = None,
403
+ documents: list[Path | str | IOBase | HttpUrl] | None = None,
404
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
405
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
406
+ temperature: float = PydanticUndefined, # type: ignore[assignment]
407
+ modality: Modality = PydanticUndefined, # type: ignore[assignment]
408
+ reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
409
+ n_consensus: int = PydanticUndefined, # type: ignore[assignment]
342
410
  idempotency_key: str | None = None,
343
411
  store: bool = False,
344
412
  ) -> AsyncGenerator[UiParsedChatCompletion, None]:
345
413
  """
346
- Extract structured data from a document asynchronously with streaming.
414
+ Extract structured data from one or more documents asynchronously with streaming.
347
415
 
348
416
  Args:
349
417
  json_schema: JSON schema defining the expected data structure.
350
- document: Path, string, or file-like object representing the document.
351
418
  model: The AI model to use.
419
+ document: Single document to process (use either this or documents, not both)
420
+ documents: List of documents to process (use either this or document, not both)
421
+ image_resolution_dpi: Optional image resolution DPI.
422
+ browser_canvas: Optional browser canvas size.
352
423
  temperature: Model temperature setting (0-1).
353
424
  modality: Modality of the document (e.g., native).
354
425
  reasoning_effort: The effort level for the model to reason about the input data.
@@ -356,17 +427,37 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
356
427
  idempotency_key: Idempotency key for request
357
428
  store: Whether to store the document in the UiForm database
358
429
  Returns:
359
- AsyncGenerator[DocumentExtractResponse, None]: Stream of parsed responses.
430
+ AsyncGenerator[UiParsedChatCompletion, None]: Stream of parsed responses.
431
+ Raises:
432
+ ValueError: If neither document nor documents is provided, or if both are provided
360
433
 
361
434
  Usage:
362
435
  ```python
363
- async with uiform.documents.extractions.stream(json_schema, document, model, temperature, reasoning_effort, modality) as stream:
436
+ # Single document
437
+ async with uiform.documents.extractions.stream(json_schema, model, document=document) as stream:
438
+ async for response in stream:
439
+ print(response)
440
+
441
+ # Multiple documents
442
+ async with uiform.documents.extractions.stream(json_schema, model, documents=[doc1, doc2]) as stream:
364
443
  async for response in stream:
365
444
  print(response)
366
445
  ```
367
446
  """
368
447
  request = self.prepare_extraction(
369
- json_schema, document, image_resolution_dpi, browser_canvas, model, temperature, modality, reasoning_effort, True, n_consensus=n_consensus, store=store, idempotency_key=idempotency_key
448
+ json_schema=json_schema,
449
+ document=document,
450
+ documents=documents,
451
+ image_resolution_dpi=image_resolution_dpi,
452
+ browser_canvas=browser_canvas,
453
+ model=model,
454
+ temperature=temperature,
455
+ modality=modality,
456
+ reasoning_effort=reasoning_effort,
457
+ stream=True,
458
+ n_consensus=n_consensus,
459
+ store=store,
460
+ idempotency_key=idempotency_key,
370
461
  )
371
462
  schema = Schema(json_schema=load_json_schema(json_schema))
372
463
  ui_parsed_chat_completion_cum_chunk: UiParsedChatCompletionChunk | None = None
@@ -426,10 +517,10 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
426
517
  openai_responses_output: Response | None = None,
427
518
  ) -> None:
428
519
  request = self.prepare_log_extraction(
429
- document,
430
- json_schema,
431
- model,
432
- temperature,
520
+ document=document,
521
+ json_schema=json_schema,
522
+ model=model,
523
+ temperature=temperature,
433
524
  completion=completion,
434
525
  messages=messages,
435
526
  openai_messages=openai_messages,
@@ -1,32 +1,28 @@
1
- from typing import Any, Dict, List, Optional, TypedDict, Union, Literal
2
1
  from io import IOBase
3
2
  from pathlib import Path
3
+ from typing import Any, Dict, List, Optional, TypedDict, Union
4
4
 
5
5
  import PIL.Image
6
+ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
6
7
  from pydantic import HttpUrl
7
8
 
8
9
  from .._resource import AsyncAPIResource, SyncAPIResource
9
- from ..types.standards import PreparedRequest
10
+ from .._utils.mime import prepare_mime_document
10
11
  from ..types.evals import (
12
+ CreateIterationRequest,
13
+ DistancesResult,
14
+ DocumentItem,
11
15
  Evaluation,
12
16
  EvaluationDocument,
13
17
  Iteration,
14
- DistancesResult,
15
- PredictionData,
16
- AddIterationFromJsonlRequest,
17
- DocumentItem,
18
18
  UpdateEvaluationDocumentRequest,
19
- PredictionMetadata,
20
- CreateIterationRequest,
19
+ UpdateEvaluationRequest,
21
20
  )
22
- from ..types.jobs.base import InferenceSettings
23
-
21
+ from ..types.inference_settings import InferenceSettings
24
22
  from ..types.mime import MIMEData
25
- from .._utils.mime import prepare_mime_document
26
23
  from ..types.modalities import Modality
27
- from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
28
-
29
- from tqdm import tqdm
24
+ from ..types.browser_canvas import BrowserCanvas
25
+ from ..types.standards import PreparedRequest
30
26
 
31
27
 
32
28
  class DeleteResponse(TypedDict):
@@ -82,21 +78,17 @@ class EvalsMixin:
82
78
  Only the provided fields will be updated. Fields set to None will be excluded from the update.
83
79
  """
84
80
  # Build a dictionary with only the provided fields
85
- update_data = {}
86
- if name is not None:
87
- update_data["name"] = name
88
- if project_id is not None:
89
- update_data["project_id"] = project_id
90
- if json_schema is not None:
91
- update_data["json_schema"] = json_schema
92
- if documents is not None:
93
- update_data["documents"] = [doc.model_dump(exclude_none=True, mode="json") for doc in documents]
94
- if iterations is not None:
95
- update_data["iterations"] = [iter.model_dump(exclude_none=True, mode="json") for iter in iterations]
96
- if default_inference_settings is not None:
97
- update_data["default_inference_settings"] = default_inference_settings.model_dump(exclude_none=True, mode="json")
98
-
99
- return PreparedRequest(method="PATCH", url=f"/v1/evals/{evaluation_id}", data=update_data)
81
+
82
+ update_request = UpdateEvaluationRequest(
83
+ name=name,
84
+ project_id=project_id,
85
+ json_schema=json_schema,
86
+ documents=documents,
87
+ iterations=iterations,
88
+ default_inference_settings=default_inference_settings,
89
+ )
90
+
91
+ return PreparedRequest(method="PATCH", url=f"/v1/evals/{evaluation_id}", data=update_request.model_dump(exclude_none=True, mode="json"))
100
92
 
101
93
  def prepare_list(self, project_id: Optional[str] = None) -> PreparedRequest:
102
94
  params = {}
@@ -114,9 +106,7 @@ class DocumentsMixin:
114
106
 
115
107
  def prepare_create(self, evaluation_id: str, document: MIMEData, annotation: Dict[str, Any]) -> PreparedRequest:
116
108
  # Serialize the MIMEData
117
-
118
109
  document_item = DocumentItem(mime_data=document, annotation=annotation, annotation_metadata=None)
119
-
120
110
  return PreparedRequest(method="POST", url=f"/v1/evals/{evaluation_id}/documents", data=document_item.model_dump(mode="json"))
121
111
 
122
112
  def prepare_list(self, evaluation_id: str, filename: Optional[str] = None) -> PreparedRequest:
@@ -127,7 +117,6 @@ class DocumentsMixin:
127
117
 
128
118
  def prepare_update(self, evaluation_id: str, document_id: str, annotation: Dict[str, Any]) -> PreparedRequest:
129
119
  update_request = UpdateEvaluationDocumentRequest(annotation=annotation, annotation_metadata=None)
130
-
131
120
  return PreparedRequest(method="PUT", url=f"/v1/evals/{evaluation_id}/documents/{document_id}", data=update_request.model_dump(mode="json", exclude_none=True))
132
121
 
133
122
  def prepare_delete(self, evaluation_id: str, document_id: str) -> PreparedRequest:
@@ -153,10 +142,10 @@ class IterationsMixin:
153
142
  modality: Modality = "native",
154
143
  reasoning_effort: ChatCompletionReasoningEffort = "medium",
155
144
  image_resolution_dpi: int = 96,
156
- browser_canvas: Literal['A3', 'A4', 'A5'] = 'A4',
145
+ browser_canvas: BrowserCanvas = "A4",
157
146
  n_consensus: int = 1,
158
147
  ) -> PreparedRequest:
159
- props = InferenceSettings(
148
+ inference_settings = InferenceSettings(
160
149
  model=model,
161
150
  temperature=temperature,
162
151
  modality=modality,
@@ -166,18 +155,30 @@ class IterationsMixin:
166
155
  n_consensus=n_consensus,
167
156
  )
168
157
 
169
- perform_iteration_request = CreateIterationRequest(inference_settings=props, json_schema=json_schema)
158
+ request = CreateIterationRequest(inference_settings=inference_settings, json_schema=json_schema)
170
159
 
171
- return PreparedRequest(method="POST", url=f"/v1/evals/{evaluation_id}/iterations/create", data=perform_iteration_request.model_dump(exclude_none=True, mode="json"))
160
+ return PreparedRequest(method="POST", url=f"/v1/evals/{evaluation_id}/iterations/create", data=request.model_dump(exclude_none=True, mode="json"))
172
161
 
173
162
  def prepare_update(
174
- self, iteration_id: str, json_schema: Dict[str, Any], model: str, temperature: float = 0.0, image_resolution_dpi: int = 96, browser_canvas: Literal['A3', 'A4', 'A5'] = 'A4'
163
+ self,
164
+ iteration_id: str,
165
+ json_schema: Dict[str, Any],
166
+ model: str,
167
+ temperature: float = 0.0,
168
+ modality: Modality = "native",
169
+ reasoning_effort: ChatCompletionReasoningEffort = "medium",
170
+ image_resolution_dpi: int = 96,
171
+ browser_canvas: BrowserCanvas = "A4",
172
+ n_consensus: int = 1,
175
173
  ) -> PreparedRequest:
176
174
  inference_settings = InferenceSettings(
177
175
  model=model,
178
176
  temperature=temperature,
177
+ modality=modality,
178
+ reasoning_effort=reasoning_effort,
179
179
  image_resolution_dpi=image_resolution_dpi,
180
180
  browser_canvas=browser_canvas,
181
+ n_consensus=n_consensus,
181
182
  )
182
183
 
183
184
  iteration_data = Iteration(id=iteration_id, json_schema=json_schema, inference_settings=inference_settings, predictions=[])
@@ -261,7 +262,13 @@ class Evals(SyncAPIResource, EvalsMixin):
261
262
  HTTPException if the request fails
262
263
  """
263
264
  request = self.prepare_update(
264
- evaluation_id=evaluation_id, name=name, project_id=project_id, json_schema=json_schema, documents=documents, iterations=iterations, default_inference_settings=default_inference_settings
265
+ evaluation_id=evaluation_id,
266
+ name=name,
267
+ project_id=project_id,
268
+ json_schema=json_schema,
269
+ documents=documents,
270
+ iterations=iterations,
271
+ default_inference_settings=default_inference_settings,
265
272
  )
266
273
  response = self._client._prepared_request(request)
267
274
  return Evaluation(**response)
@@ -429,7 +436,7 @@ class Iterations(SyncAPIResource, IterationsMixin):
429
436
  json_schema: Optional[Dict[str, Any]] = None,
430
437
  reasoning_effort: ChatCompletionReasoningEffort = "medium",
431
438
  image_resolution_dpi: int = 96,
432
- browser_canvas: Literal['A3', 'A4', 'A5'] = 'A4',
439
+ browser_canvas: BrowserCanvas = "A4",
433
440
  n_consensus: int = 1,
434
441
  ) -> Iteration:
435
442
  """
@@ -445,7 +452,7 @@ class Iterations(SyncAPIResource, IterationsMixin):
445
452
  image_resolution_dpi: The DPI of the image. Defaults to 96.
446
453
  browser_canvas: The canvas size of the browser. Must be one of:
447
454
  - "A3" (11.7in x 16.54in)
448
- - "A4" (8.27in x 11.7in)
455
+ - "A4" (8.27in x 11.7in)
449
456
  - "A5" (5.83in x 8.27in)
450
457
  Defaults to "A4".
451
458
  n_consensus: Number of consensus iterations to perform
@@ -572,7 +579,13 @@ class AsyncEvals(AsyncAPIResource, EvalsMixin):
572
579
  HTTPException if the request fails
573
580
  """
574
581
  request = self.prepare_update(
575
- evaluation_id=evaluation_id, name=name, project_id=project_id, json_schema=json_schema, documents=documents, iterations=iterations, default_inference_settings=default_inference_settings
582
+ evaluation_id=evaluation_id,
583
+ name=name,
584
+ project_id=project_id,
585
+ json_schema=json_schema,
586
+ documents=documents,
587
+ iterations=iterations,
588
+ default_inference_settings=default_inference_settings,
576
589
  )
577
590
  response = await self._client._prepared_request(request)
578
591
  return Evaluation(**response)
@@ -739,7 +752,7 @@ class AsyncIterations(AsyncAPIResource, IterationsMixin):
739
752
  json_schema: Optional[Dict[str, Any]] = None,
740
753
  reasoning_effort: ChatCompletionReasoningEffort = "medium",
741
754
  image_resolution_dpi: int = 96,
742
- browser_canvas: Literal['A3', 'A4', 'A5'] = 'A4',
755
+ browser_canvas: BrowserCanvas = "A4",
743
756
  n_consensus: int = 1,
744
757
  ) -> Iteration:
745
758
  """
@@ -755,7 +768,7 @@ class AsyncIterations(AsyncAPIResource, IterationsMixin):
755
768
  image_resolution_dpi: The DPI of the image. Defaults to 96.
756
769
  browser_canvas: The canvas size of the browser. Must be one of:
757
770
  - "A3" (11.7in x 16.54in)
758
- - "A4" (8.27in x 11.7in)
771
+ - "A4" (8.27in x 11.7in)
759
772
  - "A5" (5.83in x 8.27in)
760
773
  Defaults to "A4".
761
774
  n_consensus: Number of consensus iterations to perform