retab 0.0.36__py3-none-any.whl → 0.0.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. retab/__init__.py +4 -0
  2. {uiform → retab}/_resource.py +5 -5
  3. {uiform → retab}/_utils/ai_models.py +2 -2
  4. {uiform → retab}/_utils/benchmarking.py +15 -16
  5. {uiform → retab}/_utils/chat.py +29 -34
  6. {uiform → retab}/_utils/display.py +0 -3
  7. {uiform → retab}/_utils/json_schema.py +9 -14
  8. {uiform → retab}/_utils/mime.py +11 -14
  9. {uiform → retab}/_utils/responses.py +16 -10
  10. {uiform → retab}/_utils/stream_context_managers.py +1 -1
  11. {uiform → retab}/_utils/usage/usage.py +31 -31
  12. {uiform → retab}/client.py +54 -53
  13. {uiform → retab}/resources/consensus/client.py +19 -38
  14. {uiform → retab}/resources/consensus/completions.py +36 -59
  15. {uiform → retab}/resources/consensus/completions_stream.py +35 -47
  16. {uiform → retab}/resources/consensus/responses.py +37 -86
  17. {uiform → retab}/resources/consensus/responses_stream.py +41 -89
  18. retab/resources/documents/client.py +455 -0
  19. {uiform → retab}/resources/documents/extractions.py +192 -101
  20. {uiform → retab}/resources/evals.py +56 -43
  21. retab/resources/evaluations/__init__.py +3 -0
  22. retab/resources/evaluations/client.py +301 -0
  23. retab/resources/evaluations/documents.py +233 -0
  24. retab/resources/evaluations/iterations.py +452 -0
  25. {uiform → retab}/resources/files.py +2 -2
  26. {uiform → retab}/resources/jsonlUtils.py +225 -221
  27. retab/resources/models.py +73 -0
  28. retab/resources/processors/automations/client.py +244 -0
  29. {uiform → retab}/resources/processors/automations/endpoints.py +79 -120
  30. retab/resources/processors/automations/links.py +294 -0
  31. {uiform → retab}/resources/processors/automations/logs.py +30 -19
  32. retab/resources/processors/automations/mailboxes.py +397 -0
  33. retab/resources/processors/automations/outlook.py +337 -0
  34. {uiform → retab}/resources/processors/automations/tests.py +22 -25
  35. {uiform → retab}/resources/processors/client.py +181 -166
  36. {uiform → retab}/resources/schemas.py +78 -66
  37. {uiform → retab}/resources/secrets/external_api_keys.py +1 -5
  38. retab/resources/secrets/webhook.py +64 -0
  39. {uiform → retab}/resources/usage.py +41 -4
  40. {uiform → retab}/types/ai_models.py +17 -17
  41. {uiform → retab}/types/automations/cron.py +19 -12
  42. {uiform → retab}/types/automations/endpoints.py +7 -4
  43. {uiform → retab}/types/automations/links.py +7 -3
  44. {uiform → retab}/types/automations/mailboxes.py +10 -10
  45. {uiform → retab}/types/automations/outlook.py +15 -11
  46. {uiform → retab}/types/automations/webhooks.py +1 -1
  47. retab/types/browser_canvas.py +3 -0
  48. retab/types/chat.py +8 -0
  49. {uiform → retab}/types/completions.py +12 -15
  50. retab/types/consensus.py +19 -0
  51. {uiform → retab}/types/db/annotations.py +3 -3
  52. {uiform → retab}/types/db/files.py +8 -6
  53. {uiform → retab}/types/documents/create_messages.py +20 -22
  54. {uiform → retab}/types/documents/extractions.py +71 -26
  55. {uiform → retab}/types/evals.py +5 -5
  56. retab/types/evaluations/__init__.py +31 -0
  57. retab/types/evaluations/documents.py +30 -0
  58. retab/types/evaluations/iterations.py +112 -0
  59. retab/types/evaluations/model.py +73 -0
  60. retab/types/events.py +79 -0
  61. {uiform → retab}/types/extractions.py +36 -13
  62. retab/types/inference_settings.py +15 -0
  63. retab/types/jobs/base.py +54 -0
  64. retab/types/jobs/batch_annotation.py +12 -0
  65. {uiform → retab}/types/jobs/evaluation.py +1 -2
  66. {uiform → retab}/types/logs.py +37 -34
  67. retab/types/metrics.py +32 -0
  68. {uiform → retab}/types/mime.py +22 -20
  69. {uiform → retab}/types/modalities.py +10 -10
  70. retab/types/predictions.py +19 -0
  71. {uiform → retab}/types/schemas/enhance.py +4 -2
  72. {uiform → retab}/types/schemas/evaluate.py +7 -4
  73. {uiform → retab}/types/schemas/generate.py +6 -3
  74. {uiform → retab}/types/schemas/layout.py +1 -1
  75. {uiform → retab}/types/schemas/object.py +16 -17
  76. {uiform → retab}/types/schemas/templates.py +1 -3
  77. {uiform → retab}/types/secrets/external_api_keys.py +0 -1
  78. {uiform → retab}/types/standards.py +18 -1
  79. {retab-0.0.36.dist-info → retab-0.0.38.dist-info}/METADATA +78 -77
  80. retab-0.0.38.dist-info/RECORD +107 -0
  81. retab-0.0.38.dist-info/top_level.txt +1 -0
  82. retab-0.0.36.dist-info/RECORD +0 -96
  83. retab-0.0.36.dist-info/top_level.txt +0 -1
  84. uiform/__init__.py +0 -4
  85. uiform/_utils/benchmarking copy.py +0 -588
  86. uiform/resources/documents/client.py +0 -255
  87. uiform/resources/models.py +0 -45
  88. uiform/resources/processors/automations/client.py +0 -78
  89. uiform/resources/processors/automations/links.py +0 -356
  90. uiform/resources/processors/automations/mailboxes.py +0 -435
  91. uiform/resources/processors/automations/outlook.py +0 -444
  92. uiform/resources/secrets/webhook.py +0 -62
  93. uiform/types/chat.py +0 -8
  94. uiform/types/consensus.py +0 -10
  95. uiform/types/events.py +0 -76
  96. uiform/types/jobs/base.py +0 -150
  97. uiform/types/jobs/batch_annotation.py +0 -22
  98. {uiform → retab}/_utils/__init__.py +0 -0
  99. {uiform → retab}/_utils/usage/__init__.py +0 -0
  100. {uiform → retab}/py.typed +0 -0
  101. {uiform → retab}/resources/__init__.py +0 -0
  102. {uiform → retab}/resources/consensus/__init__.py +0 -0
  103. {uiform → retab}/resources/documents/__init__.py +0 -0
  104. {uiform → retab}/resources/finetuning.py +0 -0
  105. {uiform → retab}/resources/openai_example.py +0 -0
  106. {uiform → retab}/resources/processors/__init__.py +0 -0
  107. {uiform → retab}/resources/processors/automations/__init__.py +0 -0
  108. {uiform → retab}/resources/prompt_optimization.py +0 -0
  109. {uiform → retab}/resources/secrets/__init__.py +0 -0
  110. {uiform → retab}/resources/secrets/client.py +0 -0
  111. {uiform → retab}/types/__init__.py +0 -0
  112. {uiform → retab}/types/automations/__init__.py +0 -0
  113. {uiform → retab}/types/db/__init__.py +0 -0
  114. {uiform → retab}/types/documents/__init__.py +0 -0
  115. {uiform → retab}/types/documents/correct_orientation.py +0 -0
  116. {uiform → retab}/types/jobs/__init__.py +0 -0
  117. {uiform → retab}/types/jobs/finetune.py +0 -0
  118. {uiform → retab}/types/jobs/prompt_optimization.py +0 -0
  119. {uiform → retab}/types/jobs/webcrawl.py +0 -0
  120. {uiform → retab}/types/pagination.py +0 -0
  121. {uiform → retab}/types/schemas/__init__.py +0 -0
  122. {uiform → retab}/types/secrets/__init__.py +0 -0
  123. {retab-0.0.36.dist-info → retab-0.0.38.dist-info}/WHEEL +0 -0
@@ -2,7 +2,7 @@ import base64
2
2
  import json
3
3
  from io import IOBase
4
4
  from pathlib import Path
5
- from typing import Any, AsyncGenerator, Generator, Literal, Optional
5
+ from typing import Any, AsyncGenerator, Generator
6
6
 
7
7
  from anthropic.types.message_param import MessageParam
8
8
  from openai.types.chat import ChatCompletionMessageParam
@@ -10,6 +10,7 @@ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionRea
10
10
  from openai.types.chat.parsed_chat_completion import ParsedChatCompletionMessage
11
11
  from openai.types.responses.response import Response
12
12
  from openai.types.responses.response_input_param import ResponseInputItemParam
13
+ from pydantic_core import PydanticUndefined
13
14
  from pydantic import HttpUrl
14
15
 
15
16
  from ..._resource import AsyncAPIResource, SyncAPIResource
@@ -17,8 +18,9 @@ from ..._utils.ai_models import assert_valid_model_extraction
17
18
  from ..._utils.json_schema import filter_auxiliary_fields_json, load_json_schema, unflatten_dict
18
19
  from ..._utils.mime import MIMEData, prepare_mime_document
19
20
  from ..._utils.stream_context_managers import as_async_context_manager, as_context_manager
20
- from ...types.chat import ChatCompletionUiformMessage
21
+ from ...types.chat import ChatCompletionRetabMessage
21
22
  from ...types.documents.extractions import DocumentExtractRequest, LogExtractionRequest, UiParsedChatCompletion, UiParsedChatCompletionChunk, UiParsedChoice
23
+ from ...types.browser_canvas import BrowserCanvas
22
24
  from ...types.modalities import Modality
23
25
  from ...types.schemas.object import Schema
24
26
  from ...types.standards import PreparedRequest
@@ -31,7 +33,7 @@ def maybe_parse_to_pydantic(schema: Schema, response: UiParsedChatCompletion, al
31
33
  response.choices[0].message.parsed = schema._partial_pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
32
34
  else:
33
35
  response.choices[0].message.parsed = schema.pydantic_model.model_validate(filter_auxiliary_fields_json(response.choices[0].message.content))
34
- except Exception as e:
36
+ except Exception:
35
37
  pass
36
38
  return response
37
39
 
@@ -40,15 +42,16 @@ class BaseExtractionsMixin:
40
42
  def prepare_extraction(
41
43
  self,
42
44
  json_schema: dict[str, Any] | Path | str,
43
- document: Path | str | IOBase | HttpUrl | None,
44
- image_resolution_dpi: int | None,
45
- browser_canvas: Literal['A3', 'A4', 'A5'] | None,
46
- model: str,
47
- temperature: float,
48
- modality: Modality,
49
- reasoning_effort: ChatCompletionReasoningEffort,
50
- stream: bool,
51
- n_consensus: int = 1,
45
+ document: Path | str | IOBase | HttpUrl | None = None,
46
+ documents: list[Path | str | IOBase | HttpUrl] | None = None,
47
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
48
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
49
+ model: str = PydanticUndefined, # type: ignore[assignment]
50
+ temperature: float = PydanticUndefined, # type: ignore[assignment]
51
+ modality: Modality = PydanticUndefined, # type: ignore[assignment]
52
+ reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
53
+ stream: bool = False,
54
+ n_consensus: int = PydanticUndefined, # type: ignore[assignment]
52
55
  store: bool = False,
53
56
  idempotency_key: str | None = None,
54
57
  ) -> PreparedRequest:
@@ -56,26 +59,36 @@ class BaseExtractionsMixin:
56
59
 
57
60
  json_schema = load_json_schema(json_schema)
58
61
 
59
- data = {
60
- "json_schema": json_schema,
61
- "document": prepare_mime_document(document).model_dump() if document is not None else None,
62
- "model": model,
63
- "temperature": temperature,
64
- "stream": stream,
65
- "modality": modality,
66
- "store": store,
67
- "reasoning_effort": reasoning_effort,
68
- "n_consensus": n_consensus,
69
- }
70
- if image_resolution_dpi:
71
- data["image_resolution_dpi"] = image_resolution_dpi
72
- if browser_canvas:
73
- data["browser_canvas"] = browser_canvas
62
+ # Handle both single document and multiple documents
63
+ if document is not None and documents is not None:
64
+ raise ValueError("Cannot provide both 'document' and 'documents' parameters. Use either one.")
65
+
66
+ # Convert single document to documents list for consistency
67
+ if document is not None:
68
+ processed_documents = [prepare_mime_document(document)]
69
+ elif documents is not None:
70
+ processed_documents = [prepare_mime_document(doc) for doc in documents]
71
+ else:
72
+ raise ValueError("Must provide either 'document' or 'documents' parameter.")
74
73
 
75
74
  # Validate DocumentAPIRequest data (raises exception if invalid)
76
- document_extract_request = DocumentExtractRequest.model_validate(data)
75
+ request = DocumentExtractRequest(
76
+ json_schema=json_schema,
77
+ documents=processed_documents,
78
+ model=model,
79
+ temperature=temperature,
80
+ stream=stream,
81
+ modality=modality,
82
+ store=store,
83
+ reasoning_effort=reasoning_effort,
84
+ n_consensus=n_consensus,
85
+ image_resolution_dpi=image_resolution_dpi,
86
+ browser_canvas=browser_canvas,
87
+ )
77
88
 
78
- return PreparedRequest(method="POST", url="/v1/documents/extractions", data=document_extract_request.model_dump(), idempotency_key=idempotency_key)
89
+ return PreparedRequest(
90
+ method="POST", url="/v1/documents/extractions", data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), idempotency_key=idempotency_key
91
+ )
79
92
 
80
93
  def prepare_log_extraction(
81
94
  self,
@@ -84,8 +97,8 @@ class BaseExtractionsMixin:
84
97
  model: str,
85
98
  temperature: float,
86
99
  completion: Any | None = None,
87
- # The messages can be provided in different formats, we will convert them to the UiForm-compatible format
88
- messages: list[ChatCompletionUiformMessage] | None = None,
100
+ # The messages can be provided in different formats, we will convert them to the Retab-compatible format
101
+ messages: list[ChatCompletionRetabMessage] | None = None,
89
102
  openai_messages: list[ChatCompletionMessageParam] | None = None,
90
103
  anthropic_messages: list[MessageParam] | None = None,
91
104
  anthropic_system_prompt: str | None = None,
@@ -99,12 +112,14 @@ class BaseExtractionsMixin:
99
112
  # url is a base64 encoded string with the mime type and the content. For the dummy one we will send a .txt file with the text "No document provided"
100
113
  url="data:text/plain;base64," + base64.b64encode(b"No document provided").decode("utf-8"),
101
114
  )
115
+ else:
116
+ mime_document = prepare_mime_document(document)
102
117
 
103
118
  return PreparedRequest(
104
119
  method="POST",
105
120
  url="/v1/documents/log_extraction",
106
121
  data=LogExtractionRequest(
107
- document=prepare_mime_document(document) if document else mime_document,
122
+ document=mime_document,
108
123
  messages=messages,
109
124
  openai_messages=openai_messages,
110
125
  anthropic_messages=anthropic_messages,
@@ -115,7 +130,7 @@ class BaseExtractionsMixin:
115
130
  json_schema=json_schema,
116
131
  model=model,
117
132
  temperature=temperature,
118
- ).model_dump(mode="json", by_alias=True), # by_alias is necessary to enable serialization/deserialization ('schema' was being converted to 'schema_')
133
+ ).model_dump(mode="json"),
119
134
  raise_for_status=True,
120
135
  )
121
136
 
@@ -127,40 +142,55 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
127
142
  self,
128
143
  json_schema: dict[str, Any] | Path | str,
129
144
  model: str,
130
- document: Path | str | IOBase | HttpUrl | None,
131
- image_resolution_dpi: int | None = None,
132
- browser_canvas: Literal['A3', 'A4', 'A5'] | None = None,
133
- temperature: float = 0,
134
- modality: Modality = "native",
135
- reasoning_effort: ChatCompletionReasoningEffort = "medium",
136
- n_consensus: int = 1,
145
+ document: Path | str | IOBase | HttpUrl | None = None,
146
+ documents: list[Path | str | IOBase | HttpUrl] | None = None,
147
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
148
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
149
+ temperature: float = PydanticUndefined, # type: ignore[assignment]
150
+ modality: Modality = PydanticUndefined, # type: ignore[assignment]
151
+ reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
152
+ n_consensus: int = PydanticUndefined, # type: ignore[assignment]
137
153
  idempotency_key: str | None = None,
138
154
  store: bool = False,
139
155
  ) -> UiParsedChatCompletion:
140
156
  """
141
- Process a document using the UiForm API.
157
+ Process one or more documents using the Retab API.
142
158
 
143
159
  Args:
144
160
  json_schema: JSON schema defining the expected data structure
145
- document: Single document (as MIMEData) to process
146
161
  model: The AI model to use for processing
162
+ document: Single document to process (use either this or documents, not both)
163
+ documents: List of documents to process (use either this or document, not both)
164
+ image_resolution_dpi: Optional image resolution DPI
165
+ browser_canvas: Optional browser canvas size
147
166
  temperature: Model temperature setting (0-1)
148
167
  modality: Modality of the document (e.g., native)
149
168
  reasoning_effort: The effort level for the model to reason about the input data.
150
169
  n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
151
170
  idempotency_key: Idempotency key for request
152
- store: Whether to store the document in the UiForm database
171
+ store: Whether to store the document in the Retab database
153
172
  Returns:
154
- DocumentAPIResponse
173
+ UiParsedChatCompletion: Parsed response from the API
155
174
  Raises:
156
- HTTPException if the request fails
175
+ ValueError: If neither document nor documents is provided, or if both are provided
176
+ HTTPException: If the request fails
157
177
  """
158
178
 
159
- assert document is not None, "Either document or messages must be provided"
160
-
161
179
  # Validate DocumentAPIRequest data (raises exception if invalid)
162
180
  request = self.prepare_extraction(
163
- json_schema, document, image_resolution_dpi, browser_canvas, model, temperature, modality, reasoning_effort, False, n_consensus=n_consensus, store=store, idempotency_key=idempotency_key
181
+ json_schema=json_schema,
182
+ document=document,
183
+ documents=documents,
184
+ image_resolution_dpi=image_resolution_dpi,
185
+ browser_canvas=browser_canvas,
186
+ model=model,
187
+ temperature=temperature,
188
+ modality=modality,
189
+ reasoning_effort=reasoning_effort,
190
+ stream=False,
191
+ n_consensus=n_consensus,
192
+ store=store,
193
+ idempotency_key=idempotency_key,
164
194
  )
165
195
  response = self._client._prepared_request(request)
166
196
 
@@ -172,45 +202,66 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
172
202
  self,
173
203
  json_schema: dict[str, Any] | Path | str,
174
204
  model: str,
175
- document: Path | str | IOBase | HttpUrl | None,
176
- image_resolution_dpi: int | None = None,
177
- browser_canvas: Literal['A3', 'A4', 'A5'] | None = None,
178
- temperature: float = 0,
179
- modality: Modality = "native",
180
- reasoning_effort: ChatCompletionReasoningEffort = "medium",
181
- n_consensus: int = 1,
205
+ document: Path | str | IOBase | HttpUrl | None = None,
206
+ documents: list[Path | str | IOBase | HttpUrl] | None = None,
207
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
208
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
209
+ temperature: float = PydanticUndefined, # type: ignore[assignment]
210
+ modality: Modality = PydanticUndefined, # type: ignore[assignment]
211
+ reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
212
+ n_consensus: int = PydanticUndefined, # type: ignore[assignment]
182
213
  idempotency_key: str | None = None,
183
214
  store: bool = False,
184
215
  ) -> Generator[UiParsedChatCompletion, None, None]:
185
216
  """
186
- Process a document using the UiForm API with streaming enabled.
217
+ Process one or more documents using the Retab API with streaming enabled.
187
218
 
188
219
  Args:
189
220
  json_schema: JSON schema defining the expected data structure
190
- document: Single document (as MIMEData) to process
221
+ model: The AI model to use for processing
222
+ document: Single document to process (use either this or documents, not both)
223
+ documents: List of documents to process (use either this or document, not both)
191
224
  image_resolution_dpi: Optional image resolution DPI.
192
225
  browser_canvas: Optional browser canvas size.
193
- model: The AI model to use for processing
194
226
  temperature: Model temperature setting (0-1)
195
227
  modality: Modality of the document (e.g., native)
196
228
  reasoning_effort: The effort level for the model to reason about the input data.
197
229
  n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
198
230
  idempotency_key: Idempotency key for request
199
- store: Whether to store the document in the UiForm database
231
+ store: Whether to store the document in the Retab database
200
232
 
201
233
  Returns:
202
- Generator[DocumentExtractResponse]: Stream of parsed responses
234
+ Generator[UiParsedChatCompletion]: Stream of parsed responses
203
235
  Raises:
204
- HTTPException if the request fails
236
+ ValueError: If neither document nor documents is provided, or if both are provided
237
+ HTTPException: If the request fails
205
238
  Usage:
206
239
  ```python
207
- with uiform.documents.extractions.stream(json_schema, document, model, temperature, reasoning_effort, modality) as stream:
240
+ # Single document
241
+ with retab.documents.extractions.stream(json_schema, model, document=document) as stream:
242
+ for response in stream:
243
+ print(response)
244
+
245
+ # Multiple documents
246
+ with retab.documents.extractions.stream(json_schema, model, documents=[doc1, doc2]) as stream:
208
247
  for response in stream:
209
248
  print(response)
210
249
  ```
211
250
  """
212
251
  request = self.prepare_extraction(
213
- json_schema, document, image_resolution_dpi, browser_canvas, model, temperature, modality, reasoning_effort, True, n_consensus=n_consensus, store=store, idempotency_key=idempotency_key
252
+ json_schema=json_schema,
253
+ document=document,
254
+ documents=documents,
255
+ image_resolution_dpi=image_resolution_dpi,
256
+ browser_canvas=browser_canvas,
257
+ model=model,
258
+ temperature=temperature,
259
+ modality=modality,
260
+ reasoning_effort=reasoning_effort,
261
+ stream=True,
262
+ n_consensus=n_consensus,
263
+ store=store,
264
+ idempotency_key=idempotency_key,
214
265
  )
215
266
  schema = Schema(json_schema=load_json_schema(json_schema))
216
267
 
@@ -260,8 +311,8 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
260
311
  model: str,
261
312
  temperature: float,
262
313
  completion: Any | None = None,
263
- # The messages can be provided in different formats, we will convert them to the UiForm-compatible format
264
- messages: list[ChatCompletionUiformMessage] | None = None,
314
+ # The messages can be provided in different formats, we will convert them to the Retab-compatible format
315
+ messages: list[ChatCompletionRetabMessage] | None = None,
265
316
  openai_messages: list[ChatCompletionMessageParam] | None = None,
266
317
  anthropic_messages: list[MessageParam] | None = None,
267
318
  anthropic_system_prompt: str | None = None,
@@ -270,10 +321,10 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
270
321
  openai_responses_output: Response | None = None,
271
322
  ) -> None:
272
323
  request = self.prepare_log_extraction(
273
- document,
274
- json_schema,
275
- model,
276
- temperature,
324
+ document=document,
325
+ json_schema=json_schema,
326
+ model=model,
327
+ temperature=temperature,
277
328
  completion=completion,
278
329
  messages=messages,
279
330
  openai_messages=openai_messages,
@@ -292,36 +343,52 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
292
343
  self,
293
344
  json_schema: dict[str, Any] | Path | str,
294
345
  model: str,
295
- document: Path | str | IOBase | HttpUrl | None,
296
- image_resolution_dpi: int | None = None,
297
- browser_canvas: Literal['A3', 'A4', 'A5'] | None = None,
298
- temperature: float = 0,
299
- modality: Modality = "native",
300
- reasoning_effort: ChatCompletionReasoningEffort = "medium",
301
- n_consensus: int = 1,
346
+ document: Path | str | IOBase | HttpUrl | None = None,
347
+ documents: list[Path | str | IOBase | HttpUrl] | None = None,
348
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
349
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
350
+ temperature: float = PydanticUndefined, # type: ignore[assignment]
351
+ modality: Modality = PydanticUndefined, # type: ignore[assignment]
352
+ reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
353
+ n_consensus: int = PydanticUndefined, # type: ignore[assignment]
302
354
  idempotency_key: str | None = None,
303
355
  store: bool = False,
304
356
  ) -> UiParsedChatCompletion:
305
357
  """
306
- Extract structured data from a document asynchronously.
358
+ Extract structured data from one or more documents asynchronously.
307
359
 
308
360
  Args:
309
361
  json_schema: JSON schema defining the expected data structure.
310
- document: Path, string, or file-like object representing the document.
362
+ model: The AI model to use.
363
+ document: Single document to process (use either this or documents, not both)
364
+ documents: List of documents to process (use either this or document, not both)
311
365
  image_resolution_dpi: Optional image resolution DPI.
312
366
  browser_canvas: Optional browser canvas size.
313
- model: The AI model to use.
314
367
  temperature: Model temperature setting (0-1).
315
368
  modality: Modality of the document (e.g., native).
316
369
  reasoning_effort: The effort level for the model to reason about the input data.
317
370
  n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
318
371
  idempotency_key: Idempotency key for request
319
- store: Whether to store the document in the UiForm database
372
+ store: Whether to store the document in the Retab database
320
373
  Returns:
321
- DocumentExtractResponse: Parsed response from the API.
374
+ UiParsedChatCompletion: Parsed response from the API.
375
+ Raises:
376
+ ValueError: If neither document nor documents is provided, or if both are provided
322
377
  """
323
378
  request = self.prepare_extraction(
324
- json_schema, document, image_resolution_dpi, browser_canvas, model, temperature, modality, reasoning_effort, False, n_consensus=n_consensus, store=store, idempotency_key=idempotency_key
379
+ json_schema=json_schema,
380
+ document=document,
381
+ documents=documents,
382
+ image_resolution_dpi=image_resolution_dpi,
383
+ browser_canvas=browser_canvas,
384
+ model=model,
385
+ temperature=temperature,
386
+ modality=modality,
387
+ reasoning_effort=reasoning_effort,
388
+ stream=False,
389
+ n_consensus=n_consensus,
390
+ store=store,
391
+ idempotency_key=idempotency_key,
325
392
  )
326
393
  response = await self._client._prepared_request(request)
327
394
  schema = Schema(json_schema=load_json_schema(json_schema))
@@ -332,41 +399,65 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
332
399
  self,
333
400
  json_schema: dict[str, Any] | Path | str,
334
401
  model: str,
335
- document: Path | str | IOBase | HttpUrl | None,
336
- image_resolution_dpi: int | None = None,
337
- browser_canvas: Literal['A3', 'A4', 'A5'] | None = None,
338
- temperature: float = 0,
339
- modality: Modality = "native",
340
- reasoning_effort: ChatCompletionReasoningEffort = "medium",
341
- n_consensus: int = 1,
402
+ document: Path | str | IOBase | HttpUrl | None = None,
403
+ documents: list[Path | str | IOBase | HttpUrl] | None = None,
404
+ image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
405
+ browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
406
+ temperature: float = PydanticUndefined, # type: ignore[assignment]
407
+ modality: Modality = PydanticUndefined, # type: ignore[assignment]
408
+ reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
409
+ n_consensus: int = PydanticUndefined, # type: ignore[assignment]
342
410
  idempotency_key: str | None = None,
343
411
  store: bool = False,
344
412
  ) -> AsyncGenerator[UiParsedChatCompletion, None]:
345
413
  """
346
- Extract structured data from a document asynchronously with streaming.
414
+ Extract structured data from one or more documents asynchronously with streaming.
347
415
 
348
416
  Args:
349
417
  json_schema: JSON schema defining the expected data structure.
350
- document: Path, string, or file-like object representing the document.
351
418
  model: The AI model to use.
419
+ document: Single document to process (use either this or documents, not both)
420
+ documents: List of documents to process (use either this or document, not both)
421
+ image_resolution_dpi: Optional image resolution DPI.
422
+ browser_canvas: Optional browser canvas size.
352
423
  temperature: Model temperature setting (0-1).
353
424
  modality: Modality of the document (e.g., native).
354
425
  reasoning_effort: The effort level for the model to reason about the input data.
355
426
  n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
356
427
  idempotency_key: Idempotency key for request
357
- store: Whether to store the document in the UiForm database
428
+ store: Whether to store the document in the Retab database
358
429
  Returns:
359
- AsyncGenerator[DocumentExtractResponse, None]: Stream of parsed responses.
430
+ AsyncGenerator[UiParsedChatCompletion, None]: Stream of parsed responses.
431
+ Raises:
432
+ ValueError: If neither document nor documents is provided, or if both are provided
360
433
 
361
434
  Usage:
362
435
  ```python
363
- async with uiform.documents.extractions.stream(json_schema, document, model, temperature, reasoning_effort, modality) as stream:
436
+ # Single document
437
+ async with retab.documents.extractions.stream(json_schema, model, document=document) as stream:
438
+ async for response in stream:
439
+ print(response)
440
+
441
+ # Multiple documents
442
+ async with retab.documents.extractions.stream(json_schema, model, documents=[doc1, doc2]) as stream:
364
443
  async for response in stream:
365
444
  print(response)
366
445
  ```
367
446
  """
368
447
  request = self.prepare_extraction(
369
- json_schema, document, image_resolution_dpi, browser_canvas, model, temperature, modality, reasoning_effort, True, n_consensus=n_consensus, store=store, idempotency_key=idempotency_key
448
+ json_schema=json_schema,
449
+ document=document,
450
+ documents=documents,
451
+ image_resolution_dpi=image_resolution_dpi,
452
+ browser_canvas=browser_canvas,
453
+ model=model,
454
+ temperature=temperature,
455
+ modality=modality,
456
+ reasoning_effort=reasoning_effort,
457
+ stream=True,
458
+ n_consensus=n_consensus,
459
+ store=store,
460
+ idempotency_key=idempotency_key,
370
461
  )
371
462
  schema = Schema(json_schema=load_json_schema(json_schema))
372
463
  ui_parsed_chat_completion_cum_chunk: UiParsedChatCompletionChunk | None = None
@@ -416,8 +507,8 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
416
507
  model: str,
417
508
  temperature: float,
418
509
  completion: Any | None = None,
419
- # The messages can be provided in different formats, we will convert them to the UiForm-compatible format
420
- messages: list[ChatCompletionUiformMessage] | None = None,
510
+ # The messages can be provided in different formats, we will convert them to the Retab-compatible format
511
+ messages: list[ChatCompletionRetabMessage] | None = None,
421
512
  openai_messages: list[ChatCompletionMessageParam] | None = None,
422
513
  anthropic_messages: list[MessageParam] | None = None,
423
514
  anthropic_system_prompt: str | None = None,
@@ -426,10 +517,10 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
426
517
  openai_responses_output: Response | None = None,
427
518
  ) -> None:
428
519
  request = self.prepare_log_extraction(
429
- document,
430
- json_schema,
431
- model,
432
- temperature,
520
+ document=document,
521
+ json_schema=json_schema,
522
+ model=model,
523
+ temperature=temperature,
433
524
  completion=completion,
434
525
  messages=messages,
435
526
  openai_messages=openai_messages,