retab 0.0.41__py3-none-any.whl → 0.0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. retab/__init__.py +2 -1
  2. retab/client.py +21 -50
  3. retab/resources/consensus/client.py +1 -1
  4. retab/resources/consensus/completions_stream.py +2 -2
  5. retab/resources/consensus/responses.py +1 -1
  6. retab/resources/documents/client.py +103 -76
  7. retab/resources/documents/extractions.py +55 -46
  8. retab/resources/evaluations/client.py +32 -19
  9. retab/resources/evaluations/documents.py +12 -11
  10. retab/resources/evaluations/iterations.py +48 -30
  11. retab/resources/jsonlUtils.py +3 -4
  12. retab/resources/processors/automations/endpoints.py +57 -43
  13. retab/resources/processors/automations/links.py +54 -45
  14. retab/resources/processors/automations/logs.py +2 -2
  15. retab/resources/processors/automations/mailboxes.py +116 -90
  16. retab/resources/processors/automations/outlook.py +126 -86
  17. retab/resources/processors/automations/tests.py +7 -1
  18. retab/resources/processors/client.py +37 -32
  19. retab/resources/usage.py +2 -0
  20. retab/types/ai_models.py +1 -1
  21. retab/types/automations/mailboxes.py +1 -1
  22. retab/types/deprecated_evals.py +195 -0
  23. retab/types/documents/extractions.py +2 -2
  24. retab/types/documents/parse.py +3 -1
  25. retab/types/evaluations/__init__.py +5 -2
  26. retab/types/evaluations/iterations.py +9 -43
  27. retab/types/evaluations/model.py +20 -22
  28. retab/types/extractions.py +35 -9
  29. retab/types/logs.py +5 -6
  30. retab/types/mime.py +1 -10
  31. retab/types/schemas/enhance.py +22 -5
  32. retab/types/schemas/evaluate.py +1 -1
  33. retab/types/schemas/object.py +26 -0
  34. retab/types/standards.py +2 -2
  35. retab/utils/__init__.py +3 -0
  36. retab/utils/ai_models.py +127 -12
  37. retab/utils/hashing.py +24 -0
  38. retab/utils/json_schema.py +1 -26
  39. retab/utils/mime.py +0 -17
  40. retab-0.0.43.dist-info/METADATA +117 -0
  41. {retab-0.0.41.dist-info → retab-0.0.43.dist-info}/RECORD +43 -57
  42. retab/_utils/__init__.py +0 -0
  43. retab/_utils/_model_cards/anthropic.yaml +0 -59
  44. retab/_utils/_model_cards/auto.yaml +0 -43
  45. retab/_utils/_model_cards/gemini.yaml +0 -117
  46. retab/_utils/_model_cards/openai.yaml +0 -301
  47. retab/_utils/_model_cards/xai.yaml +0 -28
  48. retab/_utils/ai_models.py +0 -138
  49. retab/_utils/benchmarking.py +0 -484
  50. retab/_utils/chat.py +0 -327
  51. retab/_utils/display.py +0 -440
  52. retab/_utils/json_schema.py +0 -2156
  53. retab/_utils/mime.py +0 -165
  54. retab/_utils/responses.py +0 -169
  55. retab/_utils/stream_context_managers.py +0 -52
  56. retab/_utils/usage/__init__.py +0 -0
  57. retab/_utils/usage/usage.py +0 -301
  58. retab-0.0.41.dist-info/METADATA +0 -418
  59. {retab-0.0.41.dist-info → retab-0.0.43.dist-info}/WHEEL +0 -0
  60. {retab-0.0.41.dist-info → retab-0.0.43.dist-info}/top_level.txt +0 -0
retab/__init__.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from .client import AsyncRetab, Retab
2
2
  from .types.schemas.object import Schema
3
+ from . import utils
3
4
 
4
- __all__ = ["Retab", "AsyncRetab", "Schema"]
5
+ __all__ = ["Retab", "AsyncRetab", "Schema", "utils"]
retab/client.py CHANGED
@@ -7,10 +7,9 @@ import backoff
7
7
  import backoff.types
8
8
  import httpx
9
9
  import truststore
10
- from pydantic_core import PydanticUndefined
11
10
 
12
- from .resources import consensus, documents, evals, files, finetuning, models, processors, schemas, secrets, usage, evaluations
13
- from .types.standards import PreparedRequest
11
+ from .resources import consensus, documents, files, finetuning, models, processors, schemas, secrets, usage, evaluations
12
+ from .types.standards import PreparedRequest, FieldUnset
14
13
 
15
14
 
16
15
  class MaxRetriesExceeded(Exception):
@@ -34,7 +33,7 @@ class BaseRetab:
34
33
 
35
34
  Args:
36
35
  api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
37
- base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.dev
36
+ base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.com
38
37
  timeout (float): Request timeout in seconds. Defaults to 240.0
39
38
  max_retries (int): Maximum number of retries for failed requests. Defaults to 3
40
39
  openai_api_key (str, optional): OpenAI API key. Will look for OPENAI_API_KEY env variable if not provided
@@ -43,32 +42,27 @@ class BaseRetab:
43
42
  ValueError: If no API key is provided through arguments or environment variables
44
43
  """
45
44
 
46
- # claude_api_key (str, optional): Claude API key. Will look for CLAUDE_API_KEY env variable if not provided
47
- # xai_api_key (str, optional): XAI API key. Will look for XAI_API_KEY env variable if not provided
48
- # gemini_api_key (str, optional): Gemini API key. Will look for GEMINI_API_KEY env variable if not provided
49
-
50
45
  def __init__(
51
46
  self,
52
47
  api_key: Optional[str] = None,
53
48
  base_url: Optional[str] = None,
54
49
  timeout: float = 240.0,
55
50
  max_retries: int = 3,
56
- openai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
57
- gemini_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
58
- # claude_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
59
- xai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
51
+ openai_api_key: Optional[str] = FieldUnset,
52
+ gemini_api_key: Optional[str] = FieldUnset,
53
+ xai_api_key: Optional[str] = FieldUnset,
60
54
  ) -> None:
61
55
  if api_key is None:
62
56
  api_key = os.environ.get("RETAB_API_KEY")
63
57
 
64
58
  if api_key is None:
65
59
  raise ValueError(
66
- "No API key provided. You can create an API key at https://retab.dev\n"
60
+ "No API key provided. You can create an API key at https://retab.com\n"
67
61
  "Then either pass it to the client (api_key='your-key') or set the RETAB_API_KEY environment variable"
68
62
  )
69
63
 
70
64
  if base_url is None:
71
- base_url = os.environ.get("RETAB_API_BASE_URL", "https://api.retab.dev")
65
+ base_url = os.environ.get("RETAB_API_BASE_URL", "https://api.retab.com")
72
66
 
73
67
  truststore.inject_into_ssl()
74
68
  self.api_key = api_key
@@ -80,30 +74,21 @@ class BaseRetab:
80
74
  "Content-Type": "application/json",
81
75
  }
82
76
 
83
- # Only check environment variables if the value is PydanticUndefined
84
- if openai_api_key is PydanticUndefined:
77
+ # Only check environment variables if the value is FieldUnset
78
+ if openai_api_key is FieldUnset:
85
79
  openai_api_key = os.environ.get("OPENAI_API_KEY")
86
80
 
87
- # if claude_api_key is PydanticUndefined:
88
- # claude_api_key = os.environ.get("CLAUDE_API_KEY")
89
-
90
- # if xai_api_key is PydanticUndefined:
91
- # xai_api_key = os.environ.get("XAI_API_KEY")
92
-
93
- if gemini_api_key is PydanticUndefined:
81
+ if gemini_api_key is FieldUnset:
94
82
  gemini_api_key = os.environ.get("GEMINI_API_KEY")
95
83
 
96
- # Only add headers if the values are actual strings (not None or PydanticUndefined)
97
- if openai_api_key and openai_api_key is not PydanticUndefined:
84
+ # Only add headers if the values are actual strings (not None or FieldUnset)
85
+ if openai_api_key and openai_api_key is not FieldUnset:
98
86
  self.headers["OpenAI-Api-Key"] = openai_api_key
99
87
 
100
- # if claude_api_key and claude_api_key is not PydanticUndefined:
101
- # self.headers["Anthropic-Api-Key"] = claude_api_key
102
-
103
- if xai_api_key and xai_api_key is not PydanticUndefined:
88
+ if xai_api_key and xai_api_key is not FieldUnset:
104
89
  self.headers["XAI-Api-Key"] = xai_api_key
105
90
 
106
- if gemini_api_key and gemini_api_key is not PydanticUndefined:
91
+ if gemini_api_key and gemini_api_key is not FieldUnset:
107
92
  self.headers["Gemini-Api-Key"] = gemini_api_key
108
93
 
109
94
  def _prepare_url(self, endpoint: str) -> str:
@@ -154,12 +139,10 @@ class Retab(BaseRetab):
154
139
 
155
140
  Args:
156
141
  api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
157
- base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.dev
142
+ base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.com
158
143
  timeout (float): Request timeout in seconds. Defaults to 240.0
159
144
  max_retries (int): Maximum number of retries for failed requests. Defaults to 3
160
145
  openai_api_key (str, optional): OpenAI API key. Will look for OPENAI_API_KEY env variable if not provided
161
- claude_api_key (str, optional): Claude API key. Will look for CLAUDE_API_KEY env variable if not provided
162
- xai_api_key (str, optional): XAI API key. Will look for XAI_API_KEY env variable if not provided
163
146
  gemini_api_key (str, optional): Gemini API key. Will look for GEMINI_API_KEY env variable if not provided
164
147
 
165
148
  Attributes:
@@ -179,10 +162,8 @@ class Retab(BaseRetab):
179
162
  base_url: Optional[str] = None,
180
163
  timeout: float = 240.0,
181
164
  max_retries: int = 3,
182
- openai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
183
- gemini_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
184
- # claude_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
185
- # xai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
165
+ openai_api_key: Optional[str] = FieldUnset,
166
+ gemini_api_key: Optional[str] = FieldUnset,
186
167
  ) -> None:
187
168
  super().__init__(
188
169
  api_key=api_key,
@@ -191,16 +172,12 @@ class Retab(BaseRetab):
191
172
  max_retries=max_retries,
192
173
  openai_api_key=openai_api_key,
193
174
  gemini_api_key=gemini_api_key,
194
- # claude_api_key=claude_api_key,
195
- # xai_api_key=xai_api_key,
196
175
  )
197
176
 
198
177
  self.client = httpx.Client(timeout=self.timeout)
199
- self.evals = evals.Evals(client=self)
200
178
  self.evaluations = evaluations.Evaluations(client=self)
201
179
  self.files = files.Files(client=self)
202
180
  self.fine_tuning = finetuning.FineTuning(client=self)
203
- # self.prompt_optimization = prompt_optimization.PromptOptimization(client=self)
204
181
  self.documents = documents.Documents(client=self)
205
182
  self.models = models.Models(client=self)
206
183
  self.schemas = schemas.Schemas(client=self)
@@ -422,7 +399,7 @@ class AsyncRetab(BaseRetab):
422
399
 
423
400
  Args:
424
401
  api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
425
- base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.dev
402
+ base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.com
426
403
  timeout (float): Request timeout in seconds. Defaults to 240.0
427
404
  max_retries (int): Maximum number of retries for failed requests. Defaults to 3
428
405
  openai_api_key (str, optional): OpenAI API key. Will look for OPENAI_API_KEY env variable if not provided
@@ -447,10 +424,8 @@ class AsyncRetab(BaseRetab):
447
424
  base_url: Optional[str] = None,
448
425
  timeout: float = 240.0,
449
426
  max_retries: int = 3,
450
- openai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
451
- gemini_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
452
- # claude_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
453
- # xai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
427
+ openai_api_key: Optional[str] = FieldUnset,
428
+ gemini_api_key: Optional[str] = FieldUnset,
454
429
  ) -> None:
455
430
  super().__init__(
456
431
  api_key=api_key,
@@ -459,17 +434,13 @@ class AsyncRetab(BaseRetab):
459
434
  max_retries=max_retries,
460
435
  openai_api_key=openai_api_key,
461
436
  gemini_api_key=gemini_api_key,
462
- # claude_api_key=claude_api_key,
463
- # xai_api_key=xai_api_key,
464
437
  )
465
438
 
466
439
  self.client = httpx.AsyncClient(timeout=self.timeout)
467
440
 
468
- self.evals = evals.AsyncEvals(client=self)
469
441
  self.evaluations = evaluations.AsyncEvaluations(client=self)
470
442
  self.files = files.AsyncFiles(client=self)
471
443
  self.fine_tuning = finetuning.AsyncFineTuning(client=self)
472
- # self.prompt_optimization = prompt_optimization.AsyncPromptOptimization(client=self)
473
444
  self.documents = documents.AsyncDocuments(client=self)
474
445
  self.models = models.AsyncModels(client=self)
475
446
  self.schemas = schemas.AsyncSchemas(client=self)
@@ -21,7 +21,7 @@ class BaseConsensusMixin:
21
21
  mode=mode,
22
22
  )
23
23
 
24
- return PreparedRequest(method="POST", url="/v1/consensus/reconcile", data=request.model_dump(), idempotency_key=idempotency_key)
24
+ return PreparedRequest(method="POST", url="/v1/consensus/reconcile", data=request.model_dump(mode="json", exclude_unset=True), idempotency_key=idempotency_key)
25
25
 
26
26
 
27
27
  class Consensus(SyncAPIResource, BaseConsensusMixin):
@@ -123,7 +123,7 @@ class Completions(SyncAPIResource, BaseCompletionsMixin):
123
123
 
124
124
  Usage:
125
125
  ```python
126
- with retab.devpletions.stream(json_schema, messages, model, temperature, reasoning_effort) as stream:
126
+ with retab.completions.stream(json_schema, messages, model, temperature, reasoning_effort) as stream:
127
127
  for response in stream:
128
128
  print(response)
129
129
  ```
@@ -210,7 +210,7 @@ class AsyncCompletions(AsyncAPIResource, BaseCompletionsMixin):
210
210
 
211
211
  Usage:
212
212
  ```python
213
- async with retab.devpletions.stream(json_schema, messages, model, temperature, reasoning_effort, n_consensus) as stream:
213
+ async with retab.completions.stream(json_schema, messages, model, temperature, reasoning_effort, n_consensus) as stream:
214
214
  async for response in stream:
215
215
  print(response)
216
216
  ```
@@ -55,7 +55,7 @@ class BaseResponsesMixin:
55
55
  instructions=instructions,
56
56
  )
57
57
 
58
- return PreparedRequest(method="POST", url="/v1/responses", data=request.model_dump(), idempotency_key=idempotency_key)
58
+ return PreparedRequest(method="POST", url="/v1/responses", data=request.model_dump(mode="json", exclude_unset=True), idempotency_key=idempotency_key)
59
59
 
60
60
  def prepare_parse(
61
61
  self,
@@ -4,7 +4,6 @@ from typing import Any, Literal
4
4
 
5
5
  import PIL.Image
6
6
  from pydantic import HttpUrl
7
- from pydantic_core import PydanticUndefined
8
7
  from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
9
8
 
10
9
  from ..._resource import AsyncAPIResource, SyncAPIResource
@@ -17,9 +16,9 @@ from ...types.documents.parse import ParseRequest, ParseResult, TableParsingForm
17
16
  from ...types.browser_canvas import BrowserCanvas
18
17
  from ...types.mime import MIMEData
19
18
  from ...types.modalities import Modality
19
+ from ...types.ai_models import LLMModel
20
20
  from ...types.schemas.object import Schema
21
- from ...types.standards import PreparedRequest
22
- from .extractions import AsyncExtractions, Extractions
21
+ from ...types.standards import PreparedRequest, FieldUnset
23
22
 
24
23
 
25
24
  def maybe_parse_to_pydantic(schema: Schema, response: RetabParsedChatCompletion, allow_partial: bool = False) -> RetabParsedChatCompletion:
@@ -39,40 +38,50 @@ class BaseDocumentsMixin:
39
38
  self,
40
39
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
41
40
  modality: Modality = "native",
42
- image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
43
- browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
41
+ image_resolution_dpi: int = FieldUnset,
42
+ browser_canvas: BrowserCanvas = FieldUnset,
44
43
  idempotency_key: str | None = None,
45
44
  ) -> PreparedRequest:
46
45
  mime_document = prepare_mime_document(document)
47
46
 
48
- loading_request = DocumentCreateMessageRequest(
49
- document=mime_document,
50
- modality=modality,
51
- image_resolution_dpi=image_resolution_dpi,
52
- browser_canvas=browser_canvas,
47
+ loading_request_dict = {
48
+ "document": mime_document,
49
+ "modality": modality,
50
+ }
51
+ if image_resolution_dpi is not FieldUnset:
52
+ loading_request_dict["image_resolution_dpi"] = image_resolution_dpi
53
+ if browser_canvas is not FieldUnset:
54
+ loading_request_dict["browser_canvas"] = browser_canvas
55
+
56
+ loading_request = DocumentCreateMessageRequest(**loading_request_dict)
57
+ return PreparedRequest(
58
+ method="POST", url="/v1/documents/create_messages", data=loading_request.model_dump(mode="json", exclude_unset=True), idempotency_key=idempotency_key
53
59
  )
54
- return PreparedRequest(method="POST", url="/v1/documents/create_messages", data=loading_request.model_dump(), idempotency_key=idempotency_key)
55
60
 
56
61
  def _prepare_create_inputs(
57
62
  self,
58
63
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
59
64
  json_schema: dict[str, Any] | Path | str,
60
65
  modality: Modality = "native",
61
- image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment],
62
- browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment],
66
+ image_resolution_dpi: int = FieldUnset,
67
+ browser_canvas: BrowserCanvas = FieldUnset,
63
68
  idempotency_key: str | None = None,
64
69
  ) -> PreparedRequest:
65
70
  mime_document = prepare_mime_document(document)
66
71
  loaded_schema = load_json_schema(json_schema)
67
72
 
68
- loading_request = DocumentCreateInputRequest(
69
- document=mime_document,
70
- modality=modality,
71
- json_schema=loaded_schema,
72
- image_resolution_dpi=image_resolution_dpi,
73
- browser_canvas=browser_canvas,
74
- )
75
- return PreparedRequest(method="POST", url="/v1/documents/create_inputs", data=loading_request.model_dump(), idempotency_key=idempotency_key)
73
+ loading_request_dict = {
74
+ "document": mime_document,
75
+ "modality": modality,
76
+ "json_schema": loaded_schema,
77
+ }
78
+ if image_resolution_dpi is not FieldUnset:
79
+ loading_request_dict["image_resolution_dpi"] = image_resolution_dpi
80
+ if browser_canvas is not FieldUnset:
81
+ loading_request_dict["browser_canvas"] = browser_canvas
82
+
83
+ loading_request = DocumentCreateInputRequest(**loading_request_dict)
84
+ return PreparedRequest(method="POST", url="/v1/documents/create_inputs", data=loading_request.model_dump(mode="json", exclude_unset=True), idempotency_key=idempotency_key)
76
85
 
77
86
  def _prepare_correct_image_orientation(self, document: Path | str | IOBase | MIMEData | PIL.Image.Image) -> PreparedRequest:
78
87
  mime_document = prepare_mime_document(document)
@@ -89,7 +98,7 @@ class BaseDocumentsMixin:
89
98
  def _prepare_parse(
90
99
  self,
91
100
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
92
- fast_mode: bool = False,
101
+ model: LLMModel,
93
102
  table_parsing_format: TableParsingFormat = "html",
94
103
  image_resolution_dpi: int = 72,
95
104
  browser_canvas: BrowserCanvas = "A4",
@@ -99,12 +108,12 @@ class BaseDocumentsMixin:
99
108
 
100
109
  parse_request = ParseRequest(
101
110
  document=mime_document,
102
- fast_mode=fast_mode,
111
+ model=model,
103
112
  table_parsing_format=table_parsing_format,
104
113
  image_resolution_dpi=image_resolution_dpi,
105
114
  browser_canvas=browser_canvas,
106
115
  )
107
- return PreparedRequest(method="POST", url="/v1/documents/parse", data=parse_request.model_dump(), idempotency_key=idempotency_key)
116
+ return PreparedRequest(method="POST", url="/v1/documents/parse", data=parse_request.model_dump(mode="json", exclude_unset=True), idempotency_key=idempotency_key)
108
117
 
109
118
 
110
119
  class Documents(SyncAPIResource, BaseDocumentsMixin):
@@ -144,8 +153,8 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
144
153
  self,
145
154
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
146
155
  modality: Modality = "native",
147
- image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
148
- browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
156
+ image_resolution_dpi: int = FieldUnset,
157
+ browser_canvas: BrowserCanvas = FieldUnset,
149
158
  idempotency_key: str | None = None,
150
159
  ) -> DocumentMessage:
151
160
  """
@@ -174,8 +183,8 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
174
183
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
175
184
  json_schema: dict[str, Any] | Path | str,
176
185
  modality: Modality = "native",
177
- image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
178
- browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
186
+ image_resolution_dpi: int = FieldUnset,
187
+ browser_canvas: BrowserCanvas = FieldUnset,
179
188
  idempotency_key: str | None = None,
180
189
  ) -> DocumentMessage:
181
190
  """
@@ -211,12 +220,12 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
211
220
  model: str,
212
221
  document: Path | str | IOBase | HttpUrl | None = None,
213
222
  documents: list[Path | str | IOBase | HttpUrl] | None = None,
214
- image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
215
- browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
216
- temperature: float = PydanticUndefined, # type: ignore[assignment]
217
- modality: Modality = PydanticUndefined, # type: ignore[assignment]
218
- reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
219
- n_consensus: int = PydanticUndefined, # type: ignore[assignment]
223
+ image_resolution_dpi: int = FieldUnset,
224
+ browser_canvas: BrowserCanvas = FieldUnset,
225
+ temperature: float = FieldUnset,
226
+ modality: Modality = FieldUnset,
227
+ reasoning_effort: ChatCompletionReasoningEffort = FieldUnset,
228
+ n_consensus: int = FieldUnset,
220
229
  idempotency_key: str | None = None,
221
230
  store: bool = False,
222
231
  ) -> RetabParsedChatCompletion:
@@ -263,20 +272,29 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
263
272
  else:
264
273
  raise ValueError("Must provide either 'document' or 'documents' parameter.")
265
274
 
275
+ # Build request dictionary with only provided fields
276
+ request_dict = {
277
+ "json_schema": json_schema,
278
+ "documents": processed_documents,
279
+ "model": model,
280
+ "stream": False,
281
+ "store": store,
282
+ }
283
+ if temperature is not FieldUnset:
284
+ request_dict["temperature"] = temperature
285
+ if modality is not FieldUnset:
286
+ request_dict["modality"] = modality
287
+ if reasoning_effort is not FieldUnset:
288
+ request_dict["reasoning_effort"] = reasoning_effort
289
+ if n_consensus is not FieldUnset:
290
+ request_dict["n_consensus"] = n_consensus
291
+ if image_resolution_dpi is not FieldUnset:
292
+ request_dict["image_resolution_dpi"] = image_resolution_dpi
293
+ if browser_canvas is not FieldUnset:
294
+ request_dict["browser_canvas"] = browser_canvas
295
+
266
296
  # Validate DocumentAPIRequest data (raises exception if invalid)
267
- request = DocumentExtractRequest(
268
- json_schema=json_schema,
269
- documents=processed_documents,
270
- model=model,
271
- temperature=temperature,
272
- stream=False,
273
- modality=modality,
274
- store=store,
275
- reasoning_effort=reasoning_effort,
276
- n_consensus=n_consensus,
277
- image_resolution_dpi=image_resolution_dpi,
278
- browser_canvas=browser_canvas,
279
- )
297
+ request = DocumentExtractRequest(**request_dict)
280
298
 
281
299
  prepared_request = PreparedRequest(
282
300
  method="POST", url="/v1/documents/extract", data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), idempotency_key=idempotency_key
@@ -290,7 +308,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
290
308
  def parse(
291
309
  self,
292
310
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
293
- fast_mode: bool = False,
311
+ model: LLMModel,
294
312
  table_parsing_format: TableParsingFormat = "html",
295
313
  image_resolution_dpi: int = 72,
296
314
  browser_canvas: BrowserCanvas = "A4",
@@ -304,7 +322,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
304
322
 
305
323
  Args:
306
324
  document: The document to parse. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
307
- fast_mode: Use fast mode for parsing (may reduce quality). Defaults to False.
325
+ model: The AI model to use for document parsing.
308
326
  table_parsing_format: Format for parsing tables. Options: "html", "json", "yaml", "markdown". Defaults to "html".
309
327
  image_resolution_dpi: DPI for image processing. Defaults to 72.
310
328
  browser_canvas: Canvas size for document rendering. Defaults to "A4".
@@ -318,7 +336,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
318
336
  """
319
337
  request = self._prepare_parse(
320
338
  document=document,
321
- fast_mode=fast_mode,
339
+ model=model,
322
340
  table_parsing_format=table_parsing_format,
323
341
  image_resolution_dpi=image_resolution_dpi,
324
342
  browser_canvas=browser_canvas,
@@ -339,8 +357,8 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
339
357
  self,
340
358
  document: Path | str | IOBase | MIMEData | PIL.Image.Image,
341
359
  modality: Modality = "native",
342
- image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
343
- browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
360
+ image_resolution_dpi: int = FieldUnset,
361
+ browser_canvas: BrowserCanvas = FieldUnset,
344
362
  idempotency_key: str | None = None,
345
363
  ) -> DocumentMessage:
346
364
  """
@@ -371,8 +389,8 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
371
389
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
372
390
  json_schema: dict[str, Any] | Path | str,
373
391
  modality: Modality = "native",
374
- image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
375
- browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
392
+ image_resolution_dpi: int = FieldUnset,
393
+ browser_canvas: BrowserCanvas = FieldUnset,
376
394
  idempotency_key: str | None = None,
377
395
  ) -> DocumentMessage:
378
396
  """
@@ -433,12 +451,12 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
433
451
  model: str,
434
452
  document: Path | str | IOBase | HttpUrl | None = None,
435
453
  documents: list[Path | str | IOBase | HttpUrl] | None = None,
436
- image_resolution_dpi: int = PydanticUndefined, # type: ignore[assignment]
437
- browser_canvas: BrowserCanvas = PydanticUndefined, # type: ignore[assignment]
438
- temperature: float = PydanticUndefined, # type: ignore[assignment]
439
- modality: Modality = PydanticUndefined, # type: ignore[assignment]
440
- reasoning_effort: ChatCompletionReasoningEffort = PydanticUndefined, # type: ignore[assignment]
441
- n_consensus: int = PydanticUndefined, # type: ignore[assignment]
454
+ image_resolution_dpi: int = FieldUnset,
455
+ browser_canvas: BrowserCanvas = FieldUnset,
456
+ temperature: float = FieldUnset,
457
+ modality: Modality = FieldUnset,
458
+ reasoning_effort: ChatCompletionReasoningEffort = FieldUnset,
459
+ n_consensus: int = FieldUnset,
442
460
  idempotency_key: str | None = None,
443
461
  store: bool = False,
444
462
  ) -> RetabParsedChatCompletion:
@@ -485,20 +503,29 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
485
503
  else:
486
504
  raise ValueError("Must provide either 'document' or 'documents' parameter.")
487
505
 
506
+ # Build request dictionary with only provided fields
507
+ request_dict = {
508
+ "json_schema": json_schema,
509
+ "documents": processed_documents,
510
+ "model": model,
511
+ "stream": False,
512
+ "store": store,
513
+ }
514
+ if temperature is not FieldUnset:
515
+ request_dict["temperature"] = temperature
516
+ if modality is not FieldUnset:
517
+ request_dict["modality"] = modality
518
+ if reasoning_effort is not FieldUnset:
519
+ request_dict["reasoning_effort"] = reasoning_effort
520
+ if n_consensus is not FieldUnset:
521
+ request_dict["n_consensus"] = n_consensus
522
+ if image_resolution_dpi is not FieldUnset:
523
+ request_dict["image_resolution_dpi"] = image_resolution_dpi
524
+ if browser_canvas is not FieldUnset:
525
+ request_dict["browser_canvas"] = browser_canvas
526
+
488
527
  # Validate DocumentAPIRequest data (raises exception if invalid)
489
- request = DocumentExtractRequest(
490
- json_schema=json_schema,
491
- documents=processed_documents,
492
- model=model,
493
- temperature=temperature,
494
- stream=False,
495
- modality=modality,
496
- store=store,
497
- reasoning_effort=reasoning_effort,
498
- n_consensus=n_consensus,
499
- image_resolution_dpi=image_resolution_dpi,
500
- browser_canvas=browser_canvas,
501
- )
528
+ request = DocumentExtractRequest(**request_dict)
502
529
 
503
530
  prepared_request = PreparedRequest(
504
531
  method="POST", url="/v1/documents/extract", data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), idempotency_key=idempotency_key
@@ -512,7 +539,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
512
539
  async def parse(
513
540
  self,
514
541
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
515
- fast_mode: bool = False,
542
+ model: LLMModel,
516
543
  table_parsing_format: TableParsingFormat = "html",
517
544
  image_resolution_dpi: int = 72,
518
545
  browser_canvas: BrowserCanvas = "A4",
@@ -526,7 +553,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
526
553
 
527
554
  Args:
528
555
  document: The document to parse. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
529
- fast_mode: Use fast mode for parsing (may reduce quality). Defaults to False.
556
+ model: The AI model to use for document parsing.
530
557
  table_parsing_format: Format for parsing tables. Options: "html", "json", "yaml", "markdown". Defaults to "html".
531
558
  image_resolution_dpi: DPI for image processing. Defaults to 72.
532
559
  browser_canvas: Canvas size for document rendering. Defaults to "A4".
@@ -540,7 +567,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
540
567
  """
541
568
  request = self._prepare_parse(
542
569
  document=document,
543
- fast_mode=fast_mode,
570
+ model=model,
544
571
  table_parsing_format=table_parsing_format,
545
572
  image_resolution_dpi=image_resolution_dpi,
546
573
  browser_canvas=browser_canvas,