retab 0.0.40__py3-none-any.whl → 0.0.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab/client.py +5 -5
- retab/resources/consensus/completions.py +1 -1
- retab/resources/consensus/completions_stream.py +5 -5
- retab/resources/consensus/responses.py +1 -1
- retab/resources/consensus/responses_stream.py +2 -2
- retab/resources/documents/client.py +12 -11
- retab/resources/documents/extractions.py +4 -4
- retab/resources/evals.py +1 -1
- retab/resources/evaluations/documents.py +1 -1
- retab/resources/jsonlUtils.py +4 -4
- retab/resources/processors/automations/endpoints.py +9 -5
- retab/resources/processors/automations/links.py +2 -2
- retab/resources/processors/automations/logs.py +2 -2
- retab/resources/processors/automations/mailboxes.py +43 -32
- retab/resources/processors/automations/outlook.py +25 -7
- retab/resources/processors/automations/tests.py +8 -2
- retab/resources/processors/client.py +25 -16
- retab/resources/prompt_optimization.py +1 -1
- retab/resources/schemas.py +3 -3
- retab/types/automations/mailboxes.py +1 -1
- retab/types/completions.py +1 -1
- retab/types/documents/create_messages.py +4 -4
- retab/types/documents/extractions.py +3 -3
- retab/types/documents/parse.py +3 -1
- retab/types/evals.py +2 -2
- retab/types/evaluations/iterations.py +2 -2
- retab/types/evaluations/model.py +2 -2
- retab/types/extractions.py +34 -9
- retab/types/jobs/prompt_optimization.py +1 -1
- retab/types/logs.py +3 -3
- retab/types/schemas/object.py +4 -4
- retab/types/schemas/templates.py +1 -1
- retab/utils/__init__.py +0 -0
- retab/utils/_model_cards/anthropic.yaml +59 -0
- retab/utils/_model_cards/auto.yaml +43 -0
- retab/utils/_model_cards/gemini.yaml +117 -0
- retab/utils/_model_cards/openai.yaml +301 -0
- retab/utils/_model_cards/xai.yaml +28 -0
- retab/utils/ai_models.py +138 -0
- retab/utils/benchmarking.py +484 -0
- retab/utils/chat.py +327 -0
- retab/utils/display.py +440 -0
- retab/utils/json_schema.py +2156 -0
- retab/utils/mime.py +165 -0
- retab/utils/responses.py +169 -0
- retab/utils/stream_context_managers.py +52 -0
- retab/utils/usage/__init__.py +0 -0
- retab/utils/usage/usage.py +301 -0
- retab-0.0.42.dist-info/METADATA +119 -0
- {retab-0.0.40.dist-info → retab-0.0.42.dist-info}/RECORD +52 -36
- retab-0.0.40.dist-info/METADATA +0 -418
- {retab-0.0.40.dist-info → retab-0.0.42.dist-info}/WHEEL +0 -0
- {retab-0.0.40.dist-info → retab-0.0.42.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,13 @@ from typing import Any, Literal, List
|
|
3
3
|
from pydantic_core import PydanticUndefined
|
4
4
|
|
5
5
|
from ...._resource import AsyncAPIResource, SyncAPIResource
|
6
|
-
from ....types.automations.outlook import
|
6
|
+
from ....types.automations.outlook import (
|
7
|
+
FetchParams,
|
8
|
+
ListOutlooks,
|
9
|
+
MatchParams,
|
10
|
+
Outlook,
|
11
|
+
UpdateOutlookRequest,
|
12
|
+
)
|
7
13
|
from ....types.standards import PreparedRequest
|
8
14
|
|
9
15
|
|
@@ -38,10 +44,15 @@ class OutlooksMixin:
|
|
38
44
|
match_params=match_params,
|
39
45
|
fetch_params=fetch_params,
|
40
46
|
)
|
41
|
-
return PreparedRequest(
|
47
|
+
return PreparedRequest(
|
48
|
+
method="POST",
|
49
|
+
url=self.outlooks_base_url,
|
50
|
+
data=outlook_data.model_dump(mode="json"),
|
51
|
+
)
|
42
52
|
|
43
53
|
def prepare_list(
|
44
54
|
self,
|
55
|
+
processor_id: str,
|
45
56
|
before: str | None = None,
|
46
57
|
after: str | None = None,
|
47
58
|
limit: int = 10,
|
@@ -50,6 +61,7 @@ class OutlooksMixin:
|
|
50
61
|
webhook_url: str | None = None,
|
51
62
|
) -> PreparedRequest:
|
52
63
|
params = {
|
64
|
+
"processor_id": processor_id,
|
53
65
|
"before": before,
|
54
66
|
"after": after,
|
55
67
|
"limit": limit,
|
@@ -92,7 +104,11 @@ class OutlooksMixin:
|
|
92
104
|
fetch_params=fetch_params,
|
93
105
|
)
|
94
106
|
|
95
|
-
return PreparedRequest(
|
107
|
+
return PreparedRequest(
|
108
|
+
method="PUT",
|
109
|
+
url=f"{self.outlooks_base_url}/{outlook_id}",
|
110
|
+
data=update_outlook_request.model_dump(mode="json"),
|
111
|
+
)
|
96
112
|
|
97
113
|
def prepare_delete(self, outlook_id: str) -> PreparedRequest:
|
98
114
|
return PreparedRequest(method="DELETE", url=f"{self.outlooks_base_url}/{outlook_id}")
|
@@ -149,12 +165,13 @@ class Outlooks(SyncAPIResource, OutlooksMixin):
|
|
149
165
|
)
|
150
166
|
response = self._client._prepared_request(request)
|
151
167
|
|
152
|
-
print(f"Outlook plugin created. Url: https://www.retab.
|
168
|
+
print(f"Outlook plugin created. Url: https://www.retab.com/dashboard/processors/automations/{response['id']}")
|
153
169
|
|
154
170
|
return Outlook.model_validate(response)
|
155
171
|
|
156
172
|
def list(
|
157
173
|
self,
|
174
|
+
processor_id: str,
|
158
175
|
before: str | None = None,
|
159
176
|
after: str | None = None,
|
160
177
|
limit: int = 10,
|
@@ -174,7 +191,7 @@ class Outlooks(SyncAPIResource, OutlooksMixin):
|
|
174
191
|
Returns:
|
175
192
|
List[Outlook]: List of outlook plugin configurations
|
176
193
|
"""
|
177
|
-
request = self.prepare_list(before, after, limit, order, name, webhook_url)
|
194
|
+
request = self.prepare_list(processor_id, before, after, limit, order, name, webhook_url)
|
178
195
|
response = self._client._prepared_request(request)
|
179
196
|
return ListOutlooks.model_validate(response)
|
180
197
|
|
@@ -280,11 +297,12 @@ class AsyncOutlooks(AsyncAPIResource, OutlooksMixin):
|
|
280
297
|
fetch_params=fetch_params,
|
281
298
|
)
|
282
299
|
response = await self._client._prepared_request(request)
|
283
|
-
print(f"Outlook plugin created. Url: https://www.retab.
|
300
|
+
print(f"Outlook plugin created. Url: https://www.retab.com/dashboard/processors/automations/{response['id']}")
|
284
301
|
return Outlook.model_validate(response)
|
285
302
|
|
286
303
|
async def list(
|
287
304
|
self,
|
305
|
+
processor_id: str,
|
288
306
|
before: str | None = None,
|
289
307
|
after: str | None = None,
|
290
308
|
limit: int = 10,
|
@@ -292,7 +310,7 @@ class AsyncOutlooks(AsyncAPIResource, OutlooksMixin):
|
|
292
310
|
name: str | None = None,
|
293
311
|
webhook_url: str | None = None,
|
294
312
|
) -> ListOutlooks:
|
295
|
-
request = self.prepare_list(before, after, limit, order, name, webhook_url)
|
313
|
+
request = self.prepare_list(processor_id, before, after, limit, order, name, webhook_url)
|
296
314
|
response = await self._client._prepared_request(request)
|
297
315
|
return ListOutlooks.model_validate(response)
|
298
316
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import json
|
2
|
+
import base64
|
2
3
|
from io import IOBase
|
3
4
|
from pathlib import Path
|
4
5
|
|
@@ -6,7 +7,7 @@ from PIL.Image import Image
|
|
6
7
|
from pydantic import HttpUrl
|
7
8
|
|
8
9
|
from ...._resource import AsyncAPIResource, SyncAPIResource
|
9
|
-
from ....
|
10
|
+
from ....utils.mime import prepare_mime_document
|
10
11
|
from ....types.logs import AutomationLog
|
11
12
|
from ....types.mime import MIMEData
|
12
13
|
from ....types.standards import PreparedRequest
|
@@ -15,7 +16,12 @@ from ....types.standards import PreparedRequest
|
|
15
16
|
class TestsMixin:
|
16
17
|
def prepare_upload(self, automation_id: str, document: Path | str | IOBase | HttpUrl | Image | MIMEData) -> PreparedRequest:
|
17
18
|
mime_document = prepare_mime_document(document)
|
18
|
-
|
19
|
+
|
20
|
+
# Convert MIME document to file upload format (similar to processors client)
|
21
|
+
files = {"file": (mime_document.filename, base64.b64decode(mime_document.content), mime_document.mime_type)}
|
22
|
+
|
23
|
+
# Send as multipart form data with file upload
|
24
|
+
return PreparedRequest(method="POST", url=f"/v1/processors/automations/tests/upload/{automation_id}", files=files)
|
19
25
|
|
20
26
|
def prepare_webhook(self, automation_id: str) -> PreparedRequest:
|
21
27
|
return PreparedRequest(method="POST", url=f"/v1/processors/automations/tests/webhook/{automation_id}", data=None)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import base64
|
2
2
|
from io import IOBase
|
3
3
|
from pathlib import Path
|
4
|
-
from typing import Any,
|
4
|
+
from typing import Any, List, Literal
|
5
5
|
|
6
6
|
import PIL.Image
|
7
7
|
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
@@ -9,8 +9,9 @@ from pydantic import BaseModel, HttpUrl
|
|
9
9
|
from pydantic_core import PydanticUndefined
|
10
10
|
|
11
11
|
from ..._resource import AsyncAPIResource, SyncAPIResource
|
12
|
-
from ...
|
13
|
-
from ...
|
12
|
+
from ...utils.ai_models import assert_valid_model_extraction
|
13
|
+
from ...utils.json_schema import load_json_schema
|
14
|
+
from ...utils.mime import MIMEData, prepare_mime_document
|
14
15
|
from ...types.browser_canvas import BrowserCanvas
|
15
16
|
from ...types.documents.extractions import RetabParsedChatCompletion
|
16
17
|
from ...types.logs import ProcessorConfig, UpdateProcessorRequest
|
@@ -31,7 +32,7 @@ class ProcessorsMixin:
|
|
31
32
|
def prepare_create(
|
32
33
|
self,
|
33
34
|
name: str,
|
34
|
-
json_schema: dict[str, Any],
|
35
|
+
json_schema: dict[str, Any] | Path | str,
|
35
36
|
modality: Modality = "native",
|
36
37
|
model: str = "gpt-4o-mini",
|
37
38
|
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
@@ -42,9 +43,12 @@ class ProcessorsMixin:
|
|
42
43
|
) -> PreparedRequest:
|
43
44
|
assert_valid_model_extraction(model)
|
44
45
|
|
46
|
+
# Load the JSON schema from file path, string, or dict
|
47
|
+
loaded_schema = load_json_schema(json_schema)
|
48
|
+
|
45
49
|
processor_config = ProcessorConfig(
|
46
50
|
name=name,
|
47
|
-
json_schema=
|
51
|
+
json_schema=loaded_schema,
|
48
52
|
modality=modality,
|
49
53
|
model=model,
|
50
54
|
temperature=temperature,
|
@@ -104,7 +108,7 @@ class ProcessorsMixin:
|
|
104
108
|
image_resolution_dpi: int | None = None,
|
105
109
|
browser_canvas: BrowserCanvas | None = None,
|
106
110
|
model: str | None = None,
|
107
|
-
json_schema: dict[str, Any] | None = None,
|
111
|
+
json_schema: dict[str, Any] | Path | str | None = None,
|
108
112
|
temperature: float | None = None,
|
109
113
|
reasoning_effort: ChatCompletionReasoningEffort | None = None,
|
110
114
|
n_consensus: int | None = None,
|
@@ -112,13 +116,18 @@ class ProcessorsMixin:
|
|
112
116
|
if model is not None:
|
113
117
|
assert_valid_model_extraction(model)
|
114
118
|
|
119
|
+
# Load the JSON schema from file path, string, or dict if provided
|
120
|
+
loaded_schema = None
|
121
|
+
if json_schema is not None:
|
122
|
+
loaded_schema = load_json_schema(json_schema)
|
123
|
+
|
115
124
|
update_request = UpdateProcessorRequest(
|
116
125
|
name=name,
|
117
126
|
modality=modality,
|
118
127
|
image_resolution_dpi=image_resolution_dpi,
|
119
128
|
browser_canvas=browser_canvas,
|
120
129
|
model=model,
|
121
|
-
json_schema=
|
130
|
+
json_schema=loaded_schema,
|
122
131
|
temperature=temperature,
|
123
132
|
reasoning_effort=reasoning_effort,
|
124
133
|
n_consensus=n_consensus,
|
@@ -203,7 +212,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
|
|
203
212
|
def create(
|
204
213
|
self,
|
205
214
|
name: str,
|
206
|
-
json_schema:
|
215
|
+
json_schema: dict[str, Any] | Path | str,
|
207
216
|
modality: Modality = "native",
|
208
217
|
model: str = "gpt-4o-mini",
|
209
218
|
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
@@ -216,7 +225,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
|
|
216
225
|
|
217
226
|
Args:
|
218
227
|
name: Name of the processor
|
219
|
-
json_schema: JSON schema for the processor
|
228
|
+
json_schema: JSON schema for the processor. Can be a dictionary, file path (Path or str), or JSON string.
|
220
229
|
image_resolution_dpi: Optional image resolution DPI
|
221
230
|
browser_canvas: Optional browser canvas size
|
222
231
|
modality: Processing modality (currently only "native" supported)
|
@@ -239,7 +248,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
|
|
239
248
|
n_consensus=n_consensus,
|
240
249
|
)
|
241
250
|
response = self._client._prepared_request(request)
|
242
|
-
print(f"Processor ID: {response['id']}. Processor available at https://www.retab.
|
251
|
+
print(f"Processor ID: {response['id']}. Processor available at https://www.retab.com/dashboard/processors/{response['id']}")
|
243
252
|
return ProcessorConfig.model_validate(response)
|
244
253
|
|
245
254
|
def list(
|
@@ -295,7 +304,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
|
|
295
304
|
image_resolution_dpi: int | None = None,
|
296
305
|
browser_canvas: BrowserCanvas | None = None,
|
297
306
|
model: str | None = None,
|
298
|
-
json_schema: dict[str, Any] | None = None,
|
307
|
+
json_schema: dict[str, Any] | Path | str | None = None,
|
299
308
|
temperature: float | None = None,
|
300
309
|
reasoning_effort: ChatCompletionReasoningEffort | None = None,
|
301
310
|
n_consensus: int | None = None,
|
@@ -309,7 +318,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
|
|
309
318
|
image_resolution_dpi: New image resolution DPI
|
310
319
|
browser_canvas: New browser canvas size
|
311
320
|
model: New AI model
|
312
|
-
json_schema: New JSON schema for the processor
|
321
|
+
json_schema: New JSON schema for the processor. Can be a dictionary, file path (Path or str), or JSON string.
|
313
322
|
temperature: New temperature setting
|
314
323
|
reasoning_effort: The effort level for the model to reason about the input data.
|
315
324
|
n_consensus: New number of consensus required
|
@@ -378,7 +387,7 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
|
|
378
387
|
async def create(
|
379
388
|
self,
|
380
389
|
name: str,
|
381
|
-
json_schema:
|
390
|
+
json_schema: dict[str, Any] | Path | str,
|
382
391
|
modality: Modality = "native",
|
383
392
|
model: str = "gpt-4o-mini",
|
384
393
|
temperature: float = PydanticUndefined, # type: ignore[assignment]
|
@@ -399,7 +408,7 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
|
|
399
408
|
n_consensus=n_consensus,
|
400
409
|
)
|
401
410
|
response = await self._client._prepared_request(request)
|
402
|
-
print(f"Processor ID: {response['id']}. Processor available at https://www.retab.
|
411
|
+
print(f"Processor ID: {response['id']}. Processor available at https://www.retab.com/dashboard/processors/{response['id']}")
|
403
412
|
|
404
413
|
return ProcessorConfig.model_validate(response)
|
405
414
|
|
@@ -432,7 +441,7 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
|
|
432
441
|
image_resolution_dpi: int | None = None,
|
433
442
|
browser_canvas: BrowserCanvas | None = None,
|
434
443
|
model: str | None = None,
|
435
|
-
json_schema: dict[str, Any] | None = None,
|
444
|
+
json_schema: dict[str, Any] | Path | str | None = None,
|
436
445
|
temperature: float | None = None,
|
437
446
|
reasoning_effort: ChatCompletionReasoningEffort | None = None,
|
438
447
|
n_consensus: int | None = None,
|
@@ -446,7 +455,7 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
|
|
446
455
|
image_resolution_dpi: New image resolution DPI
|
447
456
|
browser_canvas: New browser canvas size
|
448
457
|
model: New AI model
|
449
|
-
json_schema: New JSON schema for the processor
|
458
|
+
json_schema: New JSON schema for the processor. Can be a dictionary, file path (Path or str), or JSON string.
|
450
459
|
temperature: New temperature setting
|
451
460
|
reasoning_effort: The effort level for the model to reason about the input data.
|
452
461
|
n_consensus: New number of consensus required
|
@@ -3,7 +3,7 @@
|
|
3
3
|
# import json
|
4
4
|
|
5
5
|
# from .._resource import SyncAPIResource, AsyncAPIResource
|
6
|
-
# from ..
|
6
|
+
# from ..utils.json_schema import load_json_schema
|
7
7
|
# from ..types.jobs import JobResponse
|
8
8
|
# from ..types.jobs.prompt_optimization import PromptOptimizationObject, PromptOptimizationProps, PromptOptimizationJobInputData, PromptOptimizationJob
|
9
9
|
|
retab/resources/schemas.py
CHANGED
@@ -7,9 +7,9 @@ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionRea
|
|
7
7
|
from pydantic import BaseModel
|
8
8
|
|
9
9
|
from .._resource import AsyncAPIResource, SyncAPIResource
|
10
|
-
from ..
|
11
|
-
from ..
|
12
|
-
from ..
|
10
|
+
from ..utils.ai_models import assert_valid_model_schema_generation
|
11
|
+
from ..utils.json_schema import load_json_schema
|
12
|
+
from ..utils.mime import prepare_mime_document_list
|
13
13
|
from ..types.mime import MIMEData
|
14
14
|
from ..types.modalities import Modality
|
15
15
|
from ..types.schemas.enhance import EnhanceSchemaConfig, EnhanceSchemaConfigDict, EnhanceSchemaRequest
|
@@ -17,7 +17,7 @@ class Mailbox(AutomationConfig):
|
|
17
17
|
def object(self) -> str:
|
18
18
|
return "automation.mailbox"
|
19
19
|
|
20
|
-
EMAIL_PATTERN: ClassVar[str] = f".*@{os.getenv('EMAIL_DOMAIN', 'mailbox.retab.
|
20
|
+
EMAIL_PATTERN: ClassVar[str] = f".*@{os.getenv('EMAIL_DOMAIN', 'mailbox.retab.com')}$"
|
21
21
|
id: str = Field(default_factory=lambda: "mb_" + nanoid.generate(), description="Unique identifier for the mailbox")
|
22
22
|
|
23
23
|
# Email Specific config
|
retab/types/completions.py
CHANGED
@@ -7,7 +7,7 @@ from openai.types.shared_params.reasoning import Reasoning
|
|
7
7
|
from openai.types.shared_params.response_format_json_schema import ResponseFormatJSONSchema
|
8
8
|
from pydantic import BaseModel, ConfigDict, Field
|
9
9
|
|
10
|
-
from ..
|
10
|
+
from ..utils.ai_models import get_provider_for_model
|
11
11
|
from .ai_models import AIProvider
|
12
12
|
from .chat import ChatCompletionRetabMessage
|
13
13
|
|
@@ -10,10 +10,10 @@ from openai.types.chat.chat_completion_message_param import ChatCompletionMessag
|
|
10
10
|
from openai.types.responses.response_input_param import ResponseInputItemParam
|
11
11
|
from pydantic import BaseModel, Field, computed_field
|
12
12
|
|
13
|
-
from ...
|
14
|
-
from ...
|
15
|
-
from ...
|
16
|
-
from ...
|
13
|
+
from ...utils.chat import convert_to_anthropic_format, convert_to_google_genai_format, str_messages
|
14
|
+
from ...utils.chat import convert_to_openai_format as convert_to_openai_completions_api_format
|
15
|
+
from ...utils.display import count_image_tokens, count_text_tokens
|
16
|
+
from ...utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
|
17
17
|
from ..chat import ChatCompletionRetabMessage
|
18
18
|
from ..mime import MIMEData
|
19
19
|
from ..modalities import Modality
|
@@ -15,7 +15,7 @@ from openai.types.responses.response import Response
|
|
15
15
|
from openai.types.responses.response_input_param import ResponseInputItemParam
|
16
16
|
from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, computed_field, field_validator, model_validator
|
17
17
|
|
18
|
-
from ...
|
18
|
+
from ...utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
|
19
19
|
from ..ai_models import Amount
|
20
20
|
from ..chat import ChatCompletionRetabMessage
|
21
21
|
from ..mime import MIMEData
|
@@ -91,7 +91,7 @@ class FieldLocation(BaseModel):
|
|
91
91
|
quote: str = Field(..., description="The quote of the field (verbatim from the document)")
|
92
92
|
file_id: str | None = Field(default=None, description="The ID of the file")
|
93
93
|
page: int | None = Field(default=None, description="The page number of the field (1-indexed)")
|
94
|
-
|
94
|
+
bbox_normalized: tuple[float, float, float, float] | None = Field(default=None, description="The normalized bounding box of the field")
|
95
95
|
score: float | None = Field(default=None, description="The score of the field")
|
96
96
|
match_level: Literal["token", "line", "block"] | None = Field(default=None, description="The level of the match (token, line, block)")
|
97
97
|
|
@@ -99,7 +99,7 @@ class FieldLocation(BaseModel):
|
|
99
99
|
class RetabParsedChoice(ParsedChoice):
|
100
100
|
# Adaptable ParsedChoice that allows None for the finish_reason
|
101
101
|
finish_reason: Literal["stop", "length", "tool_calls", "content_filter", "function_call"] | None = None # type: ignore
|
102
|
-
field_locations: dict[str,
|
102
|
+
field_locations: dict[str, FieldLocation] | None = Field(default=None, description="The locations of the fields in the document, if available")
|
103
103
|
key_mapping: dict[str, Optional[str]] | None = Field(default=None, description="Mapping of consensus keys to original model keys")
|
104
104
|
|
105
105
|
|
retab/types/documents/parse.py
CHANGED
@@ -3,6 +3,7 @@ from pydantic import BaseModel, Field
|
|
3
3
|
|
4
4
|
from ..mime import MIMEData, BaseMIMEData
|
5
5
|
from ..browser_canvas import BrowserCanvas
|
6
|
+
from ..ai_models import LLMModel
|
6
7
|
|
7
8
|
TableParsingFormat = Literal["markdown", "yaml", "html", "json"]
|
8
9
|
|
@@ -18,7 +19,7 @@ class ParseRequest(BaseModel):
|
|
18
19
|
"""Request model for document parsing."""
|
19
20
|
|
20
21
|
document: MIMEData = Field(..., description="Document to parse")
|
21
|
-
|
22
|
+
model: LLMModel = Field(default="gemini-2.5-flash", description="Model to use for parsing")
|
22
23
|
table_parsing_format: TableParsingFormat = Field(default="html", description="Format for parsing tables")
|
23
24
|
image_resolution_dpi: int = Field(default=72, description="DPI for image processing")
|
24
25
|
browser_canvas: BrowserCanvas = Field(default="A4", description="Canvas size for document rendering")
|
@@ -30,3 +31,4 @@ class ParseResult(BaseModel):
|
|
30
31
|
document: BaseMIMEData = Field(..., description="Processed document metadata")
|
31
32
|
usage: RetabUsage = Field(..., description="Processing usage information")
|
32
33
|
pages: list[str] = Field(..., description="Text content of each page")
|
34
|
+
text: str = Field(..., description="Text content of the document")
|
retab/types/evals.py
CHANGED
@@ -6,8 +6,8 @@ from typing import Any, List, Literal, Optional
|
|
6
6
|
import nanoid # type: ignore
|
7
7
|
from pydantic import BaseModel, Field, computed_field
|
8
8
|
|
9
|
-
from ..
|
10
|
-
from ..
|
9
|
+
from ..utils.json_schema import clean_schema, compute_schema_data_id
|
10
|
+
from ..utils.mime import generate_blake2b_hash_from_string
|
11
11
|
from .ai_models import Amount
|
12
12
|
from .inference_settings import InferenceSettings
|
13
13
|
from .mime import MIMEData
|
@@ -6,8 +6,8 @@ from typing import Any, Optional, Self
|
|
6
6
|
import nanoid # type: ignore
|
7
7
|
from pydantic import BaseModel, Field, computed_field, model_validator
|
8
8
|
|
9
|
-
from ...
|
10
|
-
from ...
|
9
|
+
from ...utils.json_schema import clean_schema
|
10
|
+
from ...utils.mime import generate_blake2b_hash_from_string
|
11
11
|
from ..inference_settings import InferenceSettings
|
12
12
|
from ..metrics import MetricResult
|
13
13
|
from ..predictions import PredictionData
|
retab/types/evaluations/model.py
CHANGED
@@ -5,8 +5,8 @@ from typing import Any, Optional
|
|
5
5
|
import nanoid # type: ignore
|
6
6
|
from pydantic import BaseModel, Field, computed_field
|
7
7
|
|
8
|
-
from ...
|
9
|
-
from ...
|
8
|
+
from ...utils.json_schema import compute_schema_data_id
|
9
|
+
from ...utils.mime import generate_blake2b_hash_from_string
|
10
10
|
from ..inference_settings import InferenceSettings
|
11
11
|
from .documents import EvaluationDocument
|
12
12
|
from .iterations import Iteration
|
retab/types/extractions.py
CHANGED
@@ -3,13 +3,19 @@ from typing import Any, Literal, Optional
|
|
3
3
|
|
4
4
|
import nanoid # type: ignore
|
5
5
|
from openai.types.chat import ChatCompletion
|
6
|
-
from openai.types.chat.chat_completion_reasoning_effort import
|
6
|
+
from openai.types.chat.chat_completion_reasoning_effort import (
|
7
|
+
ChatCompletionReasoningEffort,
|
8
|
+
)
|
7
9
|
from pydantic import BaseModel, Field, computed_field, model_validator
|
8
10
|
|
9
11
|
from retab.types.chat import ChatCompletionRetabMessage
|
10
12
|
from retab.types.documents.extractions import RetabParsedChatCompletion
|
11
13
|
|
12
|
-
from ..
|
14
|
+
from ..utils.usage.usage import (
|
15
|
+
CostBreakdown,
|
16
|
+
compute_cost_from_model,
|
17
|
+
compute_cost_from_model_with_breakdown,
|
18
|
+
)
|
13
19
|
from .ai_models import Amount
|
14
20
|
from .modalities import Modality
|
15
21
|
|
@@ -17,9 +23,18 @@ ValidationsState = Literal["pending", "validated", "invalid"]
|
|
17
23
|
|
18
24
|
|
19
25
|
class ExtractionSource(BaseModel):
|
20
|
-
type: Literal[
|
21
|
-
|
22
|
-
|
26
|
+
type: Literal[
|
27
|
+
"api",
|
28
|
+
"annotation",
|
29
|
+
"processor",
|
30
|
+
"automation",
|
31
|
+
"automation.link",
|
32
|
+
"automation.mailbox",
|
33
|
+
"automation.cron",
|
34
|
+
"automation.outlook",
|
35
|
+
"automation.endpoint",
|
36
|
+
"schema.extract",
|
37
|
+
] = Field(description="Type of extraction")
|
23
38
|
id: str | None = Field(default=None, description="ID the trigger of the extraction")
|
24
39
|
|
25
40
|
|
@@ -34,7 +49,10 @@ class ExtractionTimingStep(BaseModel):
|
|
34
49
|
|
35
50
|
|
36
51
|
class Extraction(BaseModel):
|
37
|
-
id: str = Field(
|
52
|
+
id: str = Field(
|
53
|
+
default_factory=lambda: "extr_" + nanoid.generate(),
|
54
|
+
description="Unique identifier of the analysis",
|
55
|
+
)
|
38
56
|
messages: list[ChatCompletionRetabMessage] = Field(default_factory=list)
|
39
57
|
messages_gcs: str = Field(..., description="GCS path to the messages")
|
40
58
|
file_gcs_paths: list[str] = Field(..., description="GCS paths to the files")
|
@@ -51,16 +69,23 @@ class Extraction(BaseModel):
|
|
51
69
|
source: ExtractionSource = Field(..., description="Source of the extraction")
|
52
70
|
image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
|
53
71
|
browser_canvas: BrowserCanvas = Field(
|
54
|
-
default="A4",
|
72
|
+
default="A4",
|
73
|
+
description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type.",
|
55
74
|
)
|
56
75
|
modality: Modality = Field(default="native", description="Modality of the extraction")
|
57
|
-
reasoning_effort: Optional[ChatCompletionReasoningEffort] = Field(
|
76
|
+
reasoning_effort: Optional[ChatCompletionReasoningEffort] = Field(
|
77
|
+
default=None,
|
78
|
+
description="The effort level for the model to reason about the input data.",
|
79
|
+
)
|
58
80
|
timings: list[ExtractionTimingStep] = Field(default_factory=list, description="Timings of the extraction")
|
59
81
|
|
60
82
|
# Infered from the schema
|
61
83
|
schema_id: str = Field(..., description="Version of the schema used for the analysis")
|
62
84
|
schema_data_id: str = Field(..., description="Version of the schema data used for the analysis")
|
63
|
-
created_at: datetime.datetime = Field(
|
85
|
+
created_at: datetime.datetime = Field(
|
86
|
+
default_factory=lambda: datetime.datetime.now(datetime.timezone.utc),
|
87
|
+
description="Timestamp of the creation of the extraction object",
|
88
|
+
)
|
64
89
|
request_at: datetime.datetime | None = Field(default=None, description="Timestamp of the extraction request if provided.")
|
65
90
|
organization_id: str = Field(..., description="Organization ID of the user or application")
|
66
91
|
validation_state: Optional[ValidationsState] = Field(default=None, description="Validation state of the extraction")
|
retab/types/logs.py
CHANGED
@@ -7,9 +7,9 @@ from openai.types.chat.chat_completion import ChatCompletion
|
|
7
7
|
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
8
8
|
from pydantic import BaseModel, EmailStr, Field, HttpUrl, computed_field, field_validator
|
9
9
|
|
10
|
-
from ..
|
11
|
-
from ..
|
12
|
-
from ..
|
10
|
+
from ..utils.json_schema import compute_schema_data_id
|
11
|
+
from ..utils.mime import generate_blake2b_hash_from_string
|
12
|
+
from ..utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
|
13
13
|
from .ai_models import Amount
|
14
14
|
from .documents.extractions import RetabParsedChatCompletion
|
15
15
|
from .mime import BaseMIMEData
|
retab/types/schemas/object.py
CHANGED
@@ -10,9 +10,9 @@ from openai.types.chat.chat_completion_message_param import ChatCompletionMessag
|
|
10
10
|
from openai.types.responses.response_input_param import ResponseInputItemParam
|
11
11
|
from pydantic import BaseModel, Field, PrivateAttr, computed_field, model_validator
|
12
12
|
|
13
|
-
from ...
|
14
|
-
from ...
|
15
|
-
from ...
|
13
|
+
from ...utils.chat import convert_to_anthropic_format, convert_to_google_genai_format
|
14
|
+
from ...utils.chat import convert_to_openai_format as convert_to_openai_completions_api_format
|
15
|
+
from ...utils.json_schema import (
|
16
16
|
convert_basemodel_to_partial_basemodel,
|
17
17
|
convert_json_schema_to_basemodel,
|
18
18
|
create_reasoning_schema,
|
@@ -25,7 +25,7 @@ from ..._utils.json_schema import (
|
|
25
25
|
load_json_schema,
|
26
26
|
schema_to_ts_type,
|
27
27
|
)
|
28
|
-
from ...
|
28
|
+
from ...utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
|
29
29
|
from ...types.standards import StreamingBaseModel
|
30
30
|
from ..chat import ChatCompletionRetabMessage
|
31
31
|
|
retab/types/schemas/templates.py
CHANGED
@@ -4,7 +4,7 @@ from typing import Any, Literal, Optional
|
|
4
4
|
import nanoid # type: ignore
|
5
5
|
from pydantic import BaseModel, Field, PrivateAttr, computed_field
|
6
6
|
|
7
|
-
from ...
|
7
|
+
from ...utils.json_schema import generate_schema_data_id, generate_schema_id
|
8
8
|
from ...types.mime import MIMEData
|
9
9
|
|
10
10
|
|
retab/utils/__init__.py
ADDED
File without changes
|
@@ -0,0 +1,59 @@
|
|
1
|
+
- model: "claude-3-5-sonnet-latest"
|
2
|
+
pricing:
|
3
|
+
text:
|
4
|
+
prompt: 3.00
|
5
|
+
cached_discount: 0.5
|
6
|
+
completion: 15.00
|
7
|
+
audio: null
|
8
|
+
capabilities:
|
9
|
+
modalities: ["text", "image"]
|
10
|
+
endpoints: ["chat_completions"]
|
11
|
+
features: ["streaming", "function_calling"]
|
12
|
+
permissions:
|
13
|
+
show_in_free_picker: true
|
14
|
+
show_in_paid_picker: true
|
15
|
+
|
16
|
+
- model: "claude-3-5-sonnet-20241022"
|
17
|
+
inherits: "claude-3-5-sonnet-latest"
|
18
|
+
permissions:
|
19
|
+
show_in_free_picker: false
|
20
|
+
show_in_paid_picker: false
|
21
|
+
|
22
|
+
- model: "claude-3-opus-20240229"
|
23
|
+
pricing:
|
24
|
+
text:
|
25
|
+
prompt: 15.00
|
26
|
+
cached_discount: 0.5
|
27
|
+
completion: 75.00
|
28
|
+
audio: null
|
29
|
+
capabilities:
|
30
|
+
modalities: ["text", "image"]
|
31
|
+
endpoints: ["chat_completions"]
|
32
|
+
features: ["streaming", "function_calling"]
|
33
|
+
permissions:
|
34
|
+
show_in_free_picker: true
|
35
|
+
show_in_paid_picker: true
|
36
|
+
|
37
|
+
- model: "claude-3-sonnet-20240229"
|
38
|
+
pricing:
|
39
|
+
text:
|
40
|
+
prompt: 3.00
|
41
|
+
cached_discount: 0.5
|
42
|
+
completion: 15.00
|
43
|
+
audio: null
|
44
|
+
capabilities:
|
45
|
+
modalities: ["text", "image"]
|
46
|
+
endpoints: ["chat_completions"]
|
47
|
+
features: ["streaming", "function_calling"]
|
48
|
+
|
49
|
+
- model: "claude-3-haiku-20240307"
|
50
|
+
pricing:
|
51
|
+
text:
|
52
|
+
prompt: 0.25
|
53
|
+
cached_discount: 0.5
|
54
|
+
completion: 1.25
|
55
|
+
audio: null
|
56
|
+
capabilities:
|
57
|
+
modalities: ["text", "image"]
|
58
|
+
endpoints: ["chat_completions"]
|
59
|
+
features: ["streaming", "function_calling"]
|