retab 0.0.36__py3-none-any.whl → 0.0.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {uiform → retab}/_utils/ai_models.py +2 -2
- {uiform → retab}/_utils/benchmarking.py +15 -16
- {uiform → retab}/_utils/chat.py +9 -14
- {uiform → retab}/_utils/display.py +0 -3
- {uiform → retab}/_utils/json_schema.py +9 -14
- {uiform → retab}/_utils/mime.py +11 -14
- {uiform → retab}/_utils/responses.py +9 -3
- {uiform → retab}/_utils/stream_context_managers.py +1 -1
- {uiform → retab}/_utils/usage/usage.py +28 -28
- {uiform → retab}/client.py +32 -31
- {uiform → retab}/resources/consensus/client.py +17 -36
- {uiform → retab}/resources/consensus/completions.py +24 -47
- {uiform → retab}/resources/consensus/completions_stream.py +26 -38
- {uiform → retab}/resources/consensus/responses.py +31 -80
- {uiform → retab}/resources/consensus/responses_stream.py +31 -79
- {uiform → retab}/resources/documents/client.py +59 -45
- {uiform → retab}/resources/documents/extractions.py +181 -90
- {uiform → retab}/resources/evals.py +56 -43
- retab/resources/evaluations/__init__.py +3 -0
- retab/resources/evaluations/client.py +301 -0
- retab/resources/evaluations/documents.py +233 -0
- retab/resources/evaluations/iterations.py +452 -0
- {uiform → retab}/resources/files.py +2 -2
- {uiform → retab}/resources/jsonlUtils.py +220 -216
- retab/resources/models.py +73 -0
- retab/resources/processors/automations/client.py +244 -0
- {uiform → retab}/resources/processors/automations/endpoints.py +77 -118
- retab/resources/processors/automations/links.py +294 -0
- {uiform → retab}/resources/processors/automations/logs.py +30 -19
- {uiform → retab}/resources/processors/automations/mailboxes.py +136 -174
- retab/resources/processors/automations/outlook.py +337 -0
- {uiform → retab}/resources/processors/automations/tests.py +22 -25
- {uiform → retab}/resources/processors/client.py +179 -164
- {uiform → retab}/resources/schemas.py +78 -66
- {uiform → retab}/resources/secrets/external_api_keys.py +1 -5
- retab/resources/secrets/webhook.py +64 -0
- {uiform → retab}/resources/usage.py +39 -2
- {uiform → retab}/types/ai_models.py +13 -13
- {uiform → retab}/types/automations/cron.py +19 -12
- {uiform → retab}/types/automations/endpoints.py +7 -4
- {uiform → retab}/types/automations/links.py +7 -3
- {uiform → retab}/types/automations/mailboxes.py +9 -9
- {uiform → retab}/types/automations/outlook.py +15 -11
- retab/types/browser_canvas.py +3 -0
- {uiform → retab}/types/chat.py +2 -2
- {uiform → retab}/types/completions.py +9 -12
- retab/types/consensus.py +19 -0
- {uiform → retab}/types/db/annotations.py +3 -3
- {uiform → retab}/types/db/files.py +8 -6
- {uiform → retab}/types/documents/create_messages.py +18 -20
- {uiform → retab}/types/documents/extractions.py +69 -24
- {uiform → retab}/types/evals.py +5 -5
- retab/types/evaluations/__init__.py +31 -0
- retab/types/evaluations/documents.py +30 -0
- retab/types/evaluations/iterations.py +112 -0
- retab/types/evaluations/model.py +73 -0
- retab/types/events.py +79 -0
- {uiform → retab}/types/extractions.py +33 -10
- retab/types/inference_settings.py +15 -0
- retab/types/jobs/base.py +54 -0
- retab/types/jobs/batch_annotation.py +12 -0
- {uiform → retab}/types/jobs/evaluation.py +1 -2
- {uiform → retab}/types/logs.py +37 -34
- retab/types/metrics.py +32 -0
- {uiform → retab}/types/mime.py +22 -20
- {uiform → retab}/types/modalities.py +10 -10
- retab/types/predictions.py +19 -0
- {uiform → retab}/types/schemas/enhance.py +4 -2
- {uiform → retab}/types/schemas/evaluate.py +7 -4
- {uiform → retab}/types/schemas/generate.py +6 -3
- {uiform → retab}/types/schemas/layout.py +1 -1
- {uiform → retab}/types/schemas/object.py +13 -14
- {uiform → retab}/types/schemas/templates.py +1 -3
- {uiform → retab}/types/secrets/external_api_keys.py +0 -1
- {uiform → retab}/types/standards.py +18 -1
- {retab-0.0.36.dist-info → retab-0.0.37.dist-info}/METADATA +7 -6
- retab-0.0.37.dist-info/RECORD +107 -0
- retab-0.0.37.dist-info/top_level.txt +1 -0
- retab-0.0.36.dist-info/RECORD +0 -96
- retab-0.0.36.dist-info/top_level.txt +0 -1
- uiform/_utils/benchmarking copy.py +0 -588
- uiform/resources/models.py +0 -45
- uiform/resources/processors/automations/client.py +0 -78
- uiform/resources/processors/automations/links.py +0 -356
- uiform/resources/processors/automations/outlook.py +0 -444
- uiform/resources/secrets/webhook.py +0 -62
- uiform/types/consensus.py +0 -10
- uiform/types/events.py +0 -76
- uiform/types/jobs/base.py +0 -150
- uiform/types/jobs/batch_annotation.py +0 -22
- {uiform → retab}/__init__.py +0 -0
- {uiform → retab}/_resource.py +0 -0
- {uiform → retab}/_utils/__init__.py +0 -0
- {uiform → retab}/_utils/usage/__init__.py +0 -0
- {uiform → retab}/py.typed +0 -0
- {uiform → retab}/resources/__init__.py +0 -0
- {uiform → retab}/resources/consensus/__init__.py +0 -0
- {uiform → retab}/resources/documents/__init__.py +0 -0
- {uiform → retab}/resources/finetuning.py +0 -0
- {uiform → retab}/resources/openai_example.py +0 -0
- {uiform → retab}/resources/processors/__init__.py +0 -0
- {uiform → retab}/resources/processors/automations/__init__.py +0 -0
- {uiform → retab}/resources/prompt_optimization.py +0 -0
- {uiform → retab}/resources/secrets/__init__.py +0 -0
- {uiform → retab}/resources/secrets/client.py +0 -0
- {uiform → retab}/types/__init__.py +0 -0
- {uiform → retab}/types/automations/__init__.py +0 -0
- {uiform → retab}/types/automations/webhooks.py +0 -0
- {uiform → retab}/types/db/__init__.py +0 -0
- {uiform → retab}/types/documents/__init__.py +0 -0
- {uiform → retab}/types/documents/correct_orientation.py +0 -0
- {uiform → retab}/types/jobs/__init__.py +0 -0
- {uiform → retab}/types/jobs/finetune.py +0 -0
- {uiform → retab}/types/jobs/prompt_optimization.py +0 -0
- {uiform → retab}/types/jobs/webcrawl.py +0 -0
- {uiform → retab}/types/pagination.py +0 -0
- {uiform → retab}/types/schemas/__init__.py +0 -0
- {uiform → retab}/types/secrets/__init__.py +0 -0
- {retab-0.0.36.dist-info → retab-0.0.37.dist-info}/WHEEL +0 -0
{uiform → retab}/types/mime.py
RENAMED
@@ -58,7 +58,7 @@ class TextBox(BaseModel):
|
|
58
58
|
vertices: tuple[Point, Point, Point, Point] = Field(description="(top-left, top-right, bottom-right, bottom-left)")
|
59
59
|
text: str
|
60
60
|
|
61
|
-
@field_validator(
|
61
|
+
@field_validator("width", "height")
|
62
62
|
@classmethod
|
63
63
|
def check_positive_dimensions(cls, v: int) -> int:
|
64
64
|
if not isinstance(v, int) or v <= 0:
|
@@ -76,7 +76,7 @@ class Page(BaseModel):
|
|
76
76
|
tokens: list[TextBox]
|
77
77
|
transforms: list[Matrix] = Field(default=[], description="Transformation matrices applied to the original document image")
|
78
78
|
|
79
|
-
@field_validator(
|
79
|
+
@field_validator("width", "height")
|
80
80
|
@classmethod
|
81
81
|
def check_positive_dimensions(cls, v: int) -> int:
|
82
82
|
if not isinstance(v, int) or v <= 0:
|
@@ -98,21 +98,21 @@ class MIMEData(BaseModel):
|
|
98
98
|
|
99
99
|
@property
|
100
100
|
def extension(self) -> str:
|
101
|
-
return self.filename.split(
|
101
|
+
return self.filename.split(".")[-1].lower()
|
102
102
|
|
103
103
|
@property
|
104
104
|
def content(self) -> str:
|
105
|
-
if self.url.startswith(
|
105
|
+
if self.url.startswith("data:"):
|
106
106
|
# Extract base64 content from data URL
|
107
|
-
base64_content = self.url.split(
|
107
|
+
base64_content = self.url.split(",")[1]
|
108
108
|
return base64_content
|
109
109
|
else:
|
110
110
|
raise ValueError("Content is not available for this file")
|
111
111
|
|
112
112
|
@property
|
113
113
|
def mime_type(self) -> str:
|
114
|
-
if self.url.startswith(
|
115
|
-
return self.url.split(
|
114
|
+
if self.url.startswith("data:"):
|
115
|
+
return self.url.split(";")[0].split(":")[1]
|
116
116
|
else:
|
117
117
|
return mimetypes.guess_type(self.filename)[0] or "application/octet-stream"
|
118
118
|
|
@@ -126,7 +126,7 @@ class MIMEData(BaseModel):
|
|
126
126
|
return len(base64.b64decode(self.content))
|
127
127
|
|
128
128
|
def __str__(self) -> str:
|
129
|
-
truncated_url = self.url[:50] +
|
129
|
+
truncated_url = self.url[:50] + "..." if len(self.url) > 50 else self.url
|
130
130
|
# truncated_content = self.content[:50] + '...' if len(self.content) > 50 else self.content
|
131
131
|
return f"MIMEData(filename='{self.filename}', url='{truncated_url}', mime_type='{self.mime_type}', size='{self.size}', extension='{self.extension}')"
|
132
132
|
|
@@ -136,34 +136,36 @@ class MIMEData(BaseModel):
|
|
136
136
|
|
137
137
|
class BaseMIMEData(MIMEData):
|
138
138
|
@classmethod
|
139
|
-
def model_validate(
|
139
|
+
def model_validate(
|
140
|
+
cls, obj: Any, *, strict: bool | None = None, from_attributes: bool | None = None, context: Any | None = None, by_alias: bool | None = None, by_name: bool | None = None
|
141
|
+
) -> Self:
|
140
142
|
if isinstance(obj, MIMEData):
|
141
143
|
# Convert MIMEData instance to dict
|
142
144
|
obj = obj.model_dump()
|
143
|
-
if isinstance(obj, dict) and
|
145
|
+
if isinstance(obj, dict) and "url" in obj:
|
144
146
|
# Truncate URL to 1000 chars or less, ensuring it's a valid base64 string
|
145
|
-
if len(obj[
|
147
|
+
if len(obj["url"]) > 1000:
|
146
148
|
# Find the position of the base64 data
|
147
|
-
if
|
148
|
-
prefix, base64_data = obj[
|
149
|
+
if "," in obj["url"]:
|
150
|
+
prefix, base64_data = obj["url"].split(",", 1)
|
149
151
|
# Calculate how many characters we can keep (must be a multiple of 4)
|
150
152
|
max_base64_len = 1000 - len(prefix) - 1 # -1 for the comma
|
151
153
|
# Ensure the length is a multiple of 4
|
152
154
|
max_base64_len = max_base64_len - (max_base64_len % 4)
|
153
155
|
# Truncate and reassemble
|
154
|
-
obj[
|
156
|
+
obj["url"] = prefix + "," + base64_data[:max_base64_len]
|
155
157
|
else:
|
156
158
|
# If there's no comma (unexpected format), truncate to 996 chars (multiple of 4)
|
157
|
-
obj[
|
158
|
-
return super().model_validate(obj, strict=strict, from_attributes=from_attributes, context=context)
|
159
|
+
obj["url"] = obj["url"][:996]
|
160
|
+
return super().model_validate(obj, strict=strict, from_attributes=from_attributes, context=context, by_alias=by_alias, by_name=by_name)
|
159
161
|
|
160
162
|
@property
|
161
163
|
def id(self) -> str:
|
162
164
|
raise NotImplementedError("id is not implemented for BaseMIMEData - id is the hash of the content, so it's not possible to generate it from the base class")
|
163
165
|
|
164
166
|
def __str__(self) -> str:
|
165
|
-
truncated_url = self.url[:50] +
|
166
|
-
truncated_content = self.content[:50] +
|
167
|
+
truncated_url = self.url[:50] + "..." if len(self.url) > 50 else self.url
|
168
|
+
truncated_content = self.content[:50] + "..." if len(self.content) > 50 else self.content
|
167
169
|
return f"BaseMIMEData(filename='{self.filename}', url='{truncated_url}', content='{truncated_content}', mime_type='{self.mime_type}', extension='{self.extension}')"
|
168
170
|
|
169
171
|
def __repr__(self) -> str:
|
@@ -227,7 +229,7 @@ class BaseEmailData(BaseModel):
|
|
227
229
|
|
228
230
|
@property
|
229
231
|
def unique_filename(self) -> str:
|
230
|
-
cleaned_id = re.sub(r
|
232
|
+
cleaned_id = re.sub(r"[\s<>]", "", self.id)
|
231
233
|
return f"{cleaned_id}.eml"
|
232
234
|
|
233
235
|
def __repr__(self) -> str:
|
@@ -235,7 +237,7 @@ class BaseEmailData(BaseModel):
|
|
235
237
|
attachment_count = len(self.attachments)
|
236
238
|
|
237
239
|
subject_preview = self.subject
|
238
|
-
body_preview = self.body_plain[:5000] +
|
240
|
+
body_preview = self.body_plain[:5000] + "..." if self.body_plain and len(self.body_plain) > 5000 else self.body_plain
|
239
241
|
|
240
242
|
return (
|
241
243
|
f"BaseEmailData("
|
@@ -4,16 +4,16 @@ BaseModality = Literal["text", "image"] # "video" , "audio"
|
|
4
4
|
Modality = Literal[BaseModality, "native", "image+text"]
|
5
5
|
TYPE_FAMILIES = Literal["excel", "word", "powerpoint", "pdf", "image", "text", "email", "audio", "html", "web"]
|
6
6
|
NativeModalities: dict[TYPE_FAMILIES, Modality] = {
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
7
|
+
"excel": "image",
|
8
|
+
"word": "image",
|
9
|
+
"html": "text",
|
10
|
+
"powerpoint": "image",
|
11
|
+
"pdf": "image",
|
12
|
+
"image": "image",
|
13
|
+
"web": "image",
|
14
|
+
"text": "text",
|
15
|
+
"email": "native",
|
16
|
+
"audio": "text",
|
17
17
|
}
|
18
18
|
|
19
19
|
EXCEL_TYPES = Literal[".xls", ".xlsx", ".ods"]
|
@@ -0,0 +1,19 @@
|
|
1
|
+
import datetime
|
2
|
+
from typing import Any, Optional
|
3
|
+
from pydantic import BaseModel, Field
|
4
|
+
from .ai_models import Amount
|
5
|
+
|
6
|
+
|
7
|
+
class PredictionMetadata(BaseModel):
|
8
|
+
extraction_id: Optional[str] = Field(default=None, description="The ID of the extraction")
|
9
|
+
likelihoods: Optional[dict[str, Any]] = Field(default=None, description="The likelihoods of the extraction")
|
10
|
+
field_locations: Optional[dict[str, Any]] = Field(default=None, description="The field locations of the extraction")
|
11
|
+
agentic_field_locations: Optional[dict[str, Any]] = Field(default=None, description="The field locations of the extraction extracted by an llm")
|
12
|
+
consensus_details: Optional[list[dict[str, Any]]] = Field(default=None, description="The consensus details of the extraction")
|
13
|
+
api_cost: Optional[Amount] = Field(default=None, description="The cost of the API call for this document (if any -- ground truth for example)")
|
14
|
+
|
15
|
+
|
16
|
+
class PredictionData(BaseModel):
|
17
|
+
prediction: dict[str, Any] = Field(default={}, description="The result of the extraction or manual annotation")
|
18
|
+
metadata: Optional[PredictionMetadata] = Field(default=None, description="The metadata of the prediction")
|
19
|
+
updated_at: Optional[datetime.datetime] = Field(default=None, description="The creation date of the prediction")
|
@@ -1,9 +1,11 @@
|
|
1
|
-
from typing import Any, Self, TypedDict
|
1
|
+
from typing import Any, Self, TypedDict
|
2
|
+
|
2
3
|
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
3
4
|
from pydantic import BaseModel, Field, model_validator
|
4
5
|
|
5
6
|
from ..mime import MIMEData
|
6
7
|
from ..modalities import Modality
|
8
|
+
from ..browser_canvas import BrowserCanvas
|
7
9
|
|
8
10
|
|
9
11
|
class EnhanceSchemaConfig(BaseModel):
|
@@ -40,7 +42,7 @@ class EnhanceSchemaRequest(BaseModel):
|
|
40
42
|
"""The modality of the document to load."""
|
41
43
|
|
42
44
|
image_resolution_dpi: int = 96
|
43
|
-
browser_canvas:
|
45
|
+
browser_canvas: BrowserCanvas = "A4"
|
44
46
|
"""The image operations to apply to the document."""
|
45
47
|
|
46
48
|
stream: bool = False
|
@@ -1,9 +1,12 @@
|
|
1
|
+
from typing import Any, Self
|
2
|
+
|
3
|
+
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
1
4
|
from pydantic import BaseModel, Field, model_validator
|
2
|
-
|
5
|
+
|
6
|
+
from ..evals import ItemMetric
|
3
7
|
from ..mime import MIMEData
|
4
8
|
from ..modalities import Modality
|
5
|
-
from
|
6
|
-
from ..evals import ItemMetric
|
9
|
+
from ..browser_canvas import BrowserCanvas
|
7
10
|
|
8
11
|
|
9
12
|
class EvaluateSchemaRequest(BaseModel):
|
@@ -21,7 +24,7 @@ class EvaluateSchemaRequest(BaseModel):
|
|
21
24
|
reasoning_effort: ChatCompletionReasoningEffort = "medium"
|
22
25
|
modality: Modality
|
23
26
|
image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
|
24
|
-
browser_canvas:
|
27
|
+
browser_canvas: BrowserCanvas = Field(
|
25
28
|
default="A4", description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
|
26
29
|
)
|
27
30
|
n_consensus: int = 1
|
@@ -1,9 +1,11 @@
|
|
1
|
-
from typing import Any
|
1
|
+
from typing import Any
|
2
|
+
|
2
3
|
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
3
4
|
from pydantic import BaseModel, Field
|
4
5
|
|
5
6
|
from ..mime import MIMEData
|
6
7
|
from ..modalities import Modality
|
8
|
+
from ..browser_canvas import BrowserCanvas
|
7
9
|
|
8
10
|
|
9
11
|
class GenerateSchemaRequest(BaseModel):
|
@@ -15,7 +17,9 @@ class GenerateSchemaRequest(BaseModel):
|
|
15
17
|
instructions: str | None = None
|
16
18
|
"""The modality of the document to load."""
|
17
19
|
image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
|
18
|
-
browser_canvas:
|
20
|
+
browser_canvas: BrowserCanvas = Field(
|
21
|
+
default="A4", description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
|
22
|
+
)
|
19
23
|
|
20
24
|
"""The image operations to apply to the document."""
|
21
25
|
|
@@ -29,4 +33,3 @@ class GenerateSystemPromptRequest(GenerateSchemaRequest):
|
|
29
33
|
"""
|
30
34
|
|
31
35
|
json_schema: dict[str, Any]
|
32
|
-
|
@@ -2,10 +2,9 @@ import copy
|
|
2
2
|
import datetime
|
3
3
|
import json
|
4
4
|
from pathlib import Path
|
5
|
-
from typing import Any,
|
5
|
+
from typing import Any, Literal, Self
|
6
6
|
|
7
7
|
from anthropic.types.message_param import MessageParam
|
8
|
-
|
9
8
|
from google.genai.types import ContentUnionDict # type: ignore
|
10
9
|
from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
|
11
10
|
from openai.types.responses.response_input_param import ResponseInputItemParam
|
@@ -220,7 +219,7 @@ class Schema(PartialSchema):
|
|
220
219
|
|
221
220
|
@property
|
222
221
|
def developer_system_prompt(self) -> str:
|
223
|
-
return
|
222
|
+
return """
|
224
223
|
# General Instructions
|
225
224
|
|
226
225
|
You are an expert in data extraction and structured data outputs.
|
@@ -379,7 +378,7 @@ You can easily identify the fields that require a source by the `quote___[attrib
|
|
379
378
|
|
380
379
|
# User Defined System Prompt
|
381
380
|
|
382
|
-
|
381
|
+
"""
|
383
382
|
|
384
383
|
@property
|
385
384
|
def user_system_prompt(self) -> str:
|
@@ -463,7 +462,7 @@ You can easily identify the fields that require a source by the `quote___[attrib
|
|
463
462
|
rec_remove_required(_validation_object_schema_)
|
464
463
|
return _validation_object_schema_
|
465
464
|
|
466
|
-
def _get_pattern_attribute(self, pattern: str, attribute: Literal[
|
465
|
+
def _get_pattern_attribute(self, pattern: str, attribute: Literal["X-FieldPrompt", "X-ReasoningPrompt", "type"]) -> str | None:
|
467
466
|
"""
|
468
467
|
Given a JSON Schema and a pattern (like "my_object.my_array.*.my_property"),
|
469
468
|
navigate the schema and return the specified attribute of the identified node.
|
@@ -506,7 +505,7 @@ You can easily identify the fields that require a source by the `quote___[attrib
|
|
506
505
|
return schema_to_ts_type(current_schema, {}, {}, 0, 0, add_field_description=False)
|
507
506
|
return current_schema.get(attribute)
|
508
507
|
|
509
|
-
def _set_pattern_attribute(self, pattern: str, attribute: Literal[
|
508
|
+
def _set_pattern_attribute(self, pattern: str, attribute: Literal["X-FieldPrompt", "X-ReasoningPrompt", "X-SystemPrompt", "description"], value: str) -> None:
|
510
509
|
"""Sets an attribute value at a specific path in the schema.
|
511
510
|
|
512
511
|
Args:
|
@@ -551,7 +550,7 @@ You can easily identify the fields that require a source by the `quote___[attrib
|
|
551
550
|
assert ref_name in definitions, "Validation Error: The $ref is not a definition reference"
|
552
551
|
|
553
552
|
# Count how many times this ref is used in the entire schema
|
554
|
-
ref_count = json.dumps(self.json_schema).count(f"
|
553
|
+
ref_count = json.dumps(self.json_schema).count(f'"{ref}"')
|
555
554
|
|
556
555
|
if ref_count > 1:
|
557
556
|
# Create a unique copy name by appending a number
|
@@ -589,8 +588,8 @@ You can easily identify the fields that require a source by the `quote___[attrib
|
|
589
588
|
def validate_schema_and_model(cls, data: Any) -> Any:
|
590
589
|
"""Validate schema and model logic."""
|
591
590
|
# Extract from data
|
592
|
-
json_schema: dict[str, Any] | None = data.get(
|
593
|
-
pydantic_model: type[BaseModel] | None = data.get(
|
591
|
+
json_schema: dict[str, Any] | None = data.get("json_schema", None)
|
592
|
+
pydantic_model: type[BaseModel] | None = data.get("pydantic_model", None)
|
594
593
|
|
595
594
|
# Check if either json_schema or pydantic_model is provided
|
596
595
|
if json_schema and pydantic_model:
|
@@ -601,11 +600,11 @@ You can easily identify the fields that require a source by the `quote___[attrib
|
|
601
600
|
|
602
601
|
if json_schema:
|
603
602
|
json_schema = load_json_schema(json_schema)
|
604
|
-
data[
|
605
|
-
data[
|
603
|
+
data["pydantic_model"] = convert_json_schema_to_basemodel(json_schema)
|
604
|
+
data["json_schema"] = json_schema
|
606
605
|
if pydantic_model:
|
607
|
-
data[
|
608
|
-
data[
|
606
|
+
data["pydantic_model"] = pydantic_model
|
607
|
+
data["json_schema"] = pydantic_model.model_json_schema()
|
609
608
|
|
610
609
|
return data
|
611
610
|
|
@@ -627,5 +626,5 @@ You can easily identify the fields that require a source by the `quote___[attrib
|
|
627
626
|
json_schema: The JSON schema to save, can be a dict, Path, or string
|
628
627
|
schema_path: Output path for the schema file
|
629
628
|
"""
|
630
|
-
with open(path,
|
629
|
+
with open(path, "w", encoding="utf-8") as f:
|
631
630
|
json.dump(self.json_schema, f, ensure_ascii=False, indent=2)
|
@@ -5,6 +5,7 @@ import nanoid # type: ignore
|
|
5
5
|
from pydantic import BaseModel, Field, PrivateAttr, computed_field
|
6
6
|
|
7
7
|
from ..._utils.json_schema import generate_schema_data_id, generate_schema_id
|
8
|
+
from ...types.mime import MIMEData
|
8
9
|
|
9
10
|
|
10
11
|
class TemplateSchema(BaseModel):
|
@@ -58,9 +59,6 @@ class TemplateSchema(BaseModel):
|
|
58
59
|
"""The Pydantic model to use for loading."""
|
59
60
|
|
60
61
|
|
61
|
-
from ...types.mime import MIMEData
|
62
|
-
|
63
|
-
|
64
62
|
class UpdateTemplateRequest(BaseModel):
|
65
63
|
"""Request model for updating a template."""
|
66
64
|
|
@@ -1,12 +1,15 @@
|
|
1
|
-
from typing import Any,
|
1
|
+
from typing import Any, List, Literal, Optional, Tuple, TypeVar, TypedDict
|
2
2
|
|
3
3
|
from pydantic import BaseModel, Field
|
4
|
+
from pydantic.fields import _Unset
|
4
5
|
|
5
6
|
# API Standards
|
6
7
|
|
7
8
|
# Define a type variable to represent the content type
|
8
9
|
T = TypeVar("T")
|
9
10
|
|
11
|
+
FieldUnset = _Unset
|
12
|
+
|
10
13
|
|
11
14
|
# Define the ErrorDetail model
|
12
15
|
class ErrorDetail(BaseModel):
|
@@ -37,3 +40,17 @@ class PreparedRequest(BaseModel):
|
|
37
40
|
files: dict | List[Tuple[str, Tuple[str, bytes, str]]] | None = None
|
38
41
|
idempotency_key: str | None = None
|
39
42
|
raise_for_status: bool = False
|
43
|
+
|
44
|
+
|
45
|
+
class DeleteResponse(TypedDict):
|
46
|
+
"""Response from a delete operation"""
|
47
|
+
|
48
|
+
success: bool
|
49
|
+
id: str
|
50
|
+
|
51
|
+
|
52
|
+
class ExportResponse(TypedDict):
|
53
|
+
"""Response from an export operation"""
|
54
|
+
|
55
|
+
success: bool
|
56
|
+
path: str
|
@@ -1,11 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: retab
|
3
|
-
Version: 0.0.
|
4
|
-
Summary:
|
5
|
-
Home-page: https://github.com/
|
6
|
-
Author:
|
7
|
-
Author-email: contact@
|
8
|
-
Project-URL: Team website, https://
|
3
|
+
Version: 0.0.37
|
4
|
+
Summary: Retab official python library
|
5
|
+
Home-page: https://github.com/Retab-dev/retab
|
6
|
+
Author: Retab
|
7
|
+
Author-email: contact@retab.com
|
8
|
+
Project-URL: Team website, https://retab.com
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
10
10
|
Classifier: License :: OSI Approved :: MIT License
|
11
11
|
Classifier: Operating System :: POSIX :: Linux
|
@@ -39,6 +39,7 @@ Requires-Dist: google-generativeai
|
|
39
39
|
Requires-Dist: anthropic
|
40
40
|
Requires-Dist: tiktoken
|
41
41
|
Requires-Dist: truststore
|
42
|
+
Requires-Dist: ruff
|
42
43
|
|
43
44
|
# UiForm
|
44
45
|
|
@@ -0,0 +1,107 @@
|
|
1
|
+
retab/__init__.py,sha256=ojlcab4e684LddcAO8k-i2thE_3jtgZIeGvGZg_pxSE,128
|
2
|
+
retab/_resource.py,sha256=qixWTeG8JEFU7ZyQdntZq9Z88L5clTMZOL3-fIAxk3o,583
|
3
|
+
retab/client.py,sha256=8rSxjC3A-tji_nc7TAGEIhZg5y1WCjKJC1RSNeR6lzc,29796
|
4
|
+
retab/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
retab/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
retab/_utils/ai_models.py,sha256=mnnUWMkiBD8Xu-K72gkU621OVFCJTdaTc2_vDzuPpNo,3835
|
7
|
+
retab/_utils/benchmarking.py,sha256=ZSuVcRkYr4gD90yezAv6TuKaFdj2ulc5d5x3lXLbQss,17849
|
8
|
+
retab/_utils/chat.py,sha256=KABmxMC2r6zuWMaWaYvPFAF0Rmu7YEg3bavRdQh6EBU,14375
|
9
|
+
retab/_utils/display.py,sha256=ZFPbiBnwEWGR-suS8e9Xilz9OqyYRDwsKYWfbFSJPJM,18868
|
10
|
+
retab/_utils/json_schema.py,sha256=vbIg4NqREBq_eNbYdBTGw5ykhUmKkSiVRAahAOrWEZg,82237
|
11
|
+
retab/_utils/mime.py,sha256=S6pH_CmDc7fnb14PIoK3XALwb_Quha34a112joyUNmY,5723
|
12
|
+
retab/_utils/responses.py,sha256=Xiod3Oj_WvGgYjH5TMOlzpDZiLzqVOEbfHW1KN1EeDM,7006
|
13
|
+
retab/_utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
|
14
|
+
retab/_utils/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
retab/_utils/usage/usage.py,sha256=2m_Yw620Bup75CcEK9RAWle8zaQObQ0AiuH-Q-zGfuQ,12921
|
16
|
+
retab/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
+
retab/resources/evals.py,sha256=jRgHsFDsoZoTVpFNAtJm9rW1zQGdceLqZ522Mx1-bfo,29928
|
18
|
+
retab/resources/files.py,sha256=-_GejfQjKEk10HHb5npmLOA9cUnVHic2ZQntE8tw4TQ,954
|
19
|
+
retab/resources/finetuning.py,sha256=Rx8XcqB00UIjEDZcCGp5fM2yz5TW1j36aBicrTa0uAY,2587
|
20
|
+
retab/resources/jsonlUtils.py,sha256=-PmBt--8alWFsJ8IUNtdWNdxEbjGJyGrbudkVq6c_1Y,45162
|
21
|
+
retab/resources/models.py,sha256=4WidFBnTGZEA65DSn2pLP2SRnCVXkMTw7o_m8xVCFC4,2469
|
22
|
+
retab/resources/openai_example.py,sha256=yz34KvfCvHTZ-AJ9GV-N7ljTTSWBmQEQNMHY2FUbEi4,522
|
23
|
+
retab/resources/prompt_optimization.py,sha256=BJkE7L1w0Z88sR9c1evD8ti4Zqr_xHCFSK4OJmV_M0k,3489
|
24
|
+
retab/resources/schemas.py,sha256=YakiA6aqiiusO8BIe_PF2eKC0lTf7Z3Zh1hxeoN8AtI,16057
|
25
|
+
retab/resources/usage.py,sha256=9_F526ZuBvazG_qBinoLZUdR5fz3jHl2f18jzzaHLBk,13603
|
26
|
+
retab/resources/consensus/__init__.py,sha256=0b3MSOFiYPwkNTrs_dBPRhwSl3kuk8BtG5QXofIUb9M,89
|
27
|
+
retab/resources/consensus/client.py,sha256=vDEm9ycUV7Wx2Wlypr9c60gH2IkNJqlADnn1dHW-MxE,3707
|
28
|
+
retab/resources/consensus/completions.py,sha256=O-C4bwtQv7NhQ0_Q4PdST06T3d1dGyHrUo4M3WpbAiM,8343
|
29
|
+
retab/resources/consensus/completions_stream.py,sha256=0kpPCdULqmZpXy5aSW_GFVxHVn2ouFCp2R6JIQRJUQQ,10839
|
30
|
+
retab/resources/consensus/responses.py,sha256=mWVc5J6xFmWSmnNfjDq6Jxn0a8tX7mzvireziQIeXFM,9927
|
31
|
+
retab/resources/consensus/responses_stream.py,sha256=73QG4Fd2V-a96ExPnfeSek99xFw9HyQ1Zb6C4WWPpIs,11668
|
32
|
+
retab/resources/documents/__init__.py,sha256=OjXmngFN0RKqO4SI-mJBNzr6Ex6rMxfq0DxaqzP0RQs,89
|
33
|
+
retab/resources/documents/client.py,sha256=Z6lvedC51pW_XUXSkyQ_dU0a-9lR4WtligsR3bNeGB4,11935
|
34
|
+
retab/resources/documents/extractions.py,sha256=8kwcQbmfPKTFV65Qex2C0ZMmSEOY9BPOTgDoIta5Uzo,25563
|
35
|
+
retab/resources/evaluations/__init__.py,sha256=3npbUDbxYn3ihnUKV7PRYNBYqL7MZ9AwhQHr7LaIESg,97
|
36
|
+
retab/resources/evaluations/client.py,sha256=SdI-m_8V0BApparlHO1mYFwvjAGWsHBKD_-Z3ZLcdq0,10658
|
37
|
+
retab/resources/evaluations/documents.py,sha256=w28pC-sv67HBVGGT2wyU2eJS1oKnYSA-WHp5vffwK60,9475
|
38
|
+
retab/resources/evaluations/iterations.py,sha256=mlkkECATlju6w2SarJuhuLIIJ9L-Prv3499xTdl7Y5g,17643
|
39
|
+
retab/resources/processors/__init__.py,sha256=w1HrMdSi3xlrcEDFMQ9BA7rbUhOFWSTkTKkkR2PfFHQ,93
|
40
|
+
retab/resources/processors/client.py,sha256=3uDR0R2DpPYOQFM5vRjbcE6lWZuvT-Ahh6KMzzRQa5E,20061
|
41
|
+
retab/resources/processors/automations/__init__.py,sha256=Iej-_yIxc8xAuhYmR0e2VI7j_EXVsNk1_L98OJSD82E,121
|
42
|
+
retab/resources/processors/automations/client.py,sha256=3w54F0JfC2GYDosLux8LVEjDd_RXqQ29-SyNXGa28U8,10500
|
43
|
+
retab/resources/processors/automations/endpoints.py,sha256=kRVxk_C0qs6GGuWUe0yjZkFIBCukPoqXuw3rhq421qQ,10934
|
44
|
+
retab/resources/processors/automations/links.py,sha256=bEIbRR9QKlunuUDChJxQ0kUos3E2woZUAWDt2TAjkT8,11475
|
45
|
+
retab/resources/processors/automations/logs.py,sha256=hIhLurnrgi7J-VkPmUaBffyRGzx6zX3586m7aH2g6s4,8829
|
46
|
+
retab/resources/processors/automations/mailboxes.py,sha256=c85p-COdP-otthygKQjlYHsUD7RNTC6yNVF-L_aY4kQ,15896
|
47
|
+
retab/resources/processors/automations/outlook.py,sha256=CildHugv1VHG_HIkE2EESDhbycHGYX101DLfY3Bhk3A,14937
|
48
|
+
retab/resources/processors/automations/tests.py,sha256=Ni02vAdUqK5xt-DtWkVRJRCG3KdykvP_k4ez7vXCzsw,5992
|
49
|
+
retab/resources/secrets/__init__.py,sha256=SwofMyk96k0YSyj1d_GRxhpVx4wb4TA97TISsTjB0Kc,105
|
50
|
+
retab/resources/secrets/client.py,sha256=nXt1cgvkWqhA99WTnC1PWbWJq-EbwvoDuCQOa0GJOOU,599
|
51
|
+
retab/resources/secrets/external_api_keys.py,sha256=3TuJxjk65EPUT2XC3wBcYWaVwqzc6QGv9BoHufzxTLU,3759
|
52
|
+
retab/resources/secrets/webhook.py,sha256=2mFDNblQYBGOwgqOG5gfRJkusQX7Hjy28XZ7O7ffkl8,1805
|
53
|
+
retab/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
54
|
+
retab/types/ai_models.py,sha256=t-QtTS0hthkF8S8Ntzta8fOCJM5_mS4fv9xfqnUNtQU,28247
|
55
|
+
retab/types/browser_canvas.py,sha256=U3yLqJcSwfturcIsNFSblRtFtnG3p-UL1YYoM9KZfdE,70
|
56
|
+
retab/types/chat.py,sha256=YX6yfre8-gueIV8dtTvz1ROpM2TUqwn_O6Ya_RElQxQ,425
|
57
|
+
retab/types/completions.py,sha256=JHKXtXy7U9iqGXVzLpJVNhDMp2NC6eloBKf_4H0MZ1Q,5384
|
58
|
+
retab/types/consensus.py,sha256=EsFCsyZK8NhkQ1BizFpnGN54D24hRFKc0xwt9VpH11c,1161
|
59
|
+
retab/types/evals.py,sha256=1yx1rMG92ZZI7f5nWc6C-Z1WwUsBrWg4eLS-LakvC4w,9955
|
60
|
+
retab/types/events.py,sha256=NrisdzJAaJ_kkfgdsqoiDB-Upm0LnbIGZikU_e9XXWw,2195
|
61
|
+
retab/types/extractions.py,sha256=3ER6WuR_PdBRhgcHv7vqed4jtB_MoCG_Pu3Vt-gDJMs,5848
|
62
|
+
retab/types/inference_settings.py,sha256=F_mBPFVY1yAwsHD11Z2ljMf3zkvviOey_JBnu8yEF84,572
|
63
|
+
retab/types/logs.py,sha256=m-CJK1sWrNQphaVzNfdpx6_2NNiHnKnmJ9wpyWJanlQ,9257
|
64
|
+
retab/types/metrics.py,sha256=0KEWUWW13s_tWjh7oUs33ip9TPwI7LZUNGE7k5qNoOo,1947
|
65
|
+
retab/types/mime.py,sha256=Fhq04yQoHhyx5wBjx7GNyJqQQJtcM_yEZt7uQxq6Br4,11038
|
66
|
+
retab/types/modalities.py,sha256=_2iGC_EEZT0Y-7PV_nHar5vqeEdsK7oy7ZJPV681nkg,1581
|
67
|
+
retab/types/pagination.py,sha256=-XrKILKX_5isTHTfShLiK3Kwp21Y6Wqy0Jci8lIFQig,109
|
68
|
+
retab/types/predictions.py,sha256=r7XM4rIMkUU10VsUkQ1wYdSkIApSGCXWkWxz7sMdBOk,1292
|
69
|
+
retab/types/standards.py,sha256=BT8U2x3wBiC-741Bxer9ys0CPb7kWhxt1Wvl2y2aoM0,1452
|
70
|
+
retab/types/automations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
71
|
+
retab/types/automations/cron.py,sha256=jDx0VzciboQw2_whvBXzgX5ZS0z2DksTgmyhXCSSGMk,3174
|
72
|
+
retab/types/automations/endpoints.py,sha256=IbylkBUBllcrtr9tifug0ptVq2vFKixuQ6e2JfW5Xfw,666
|
73
|
+
retab/types/automations/links.py,sha256=1ipBFWasY3cqds0U5AUz9Ez6T5kcgzX5r0gzZvFw8tU,901
|
74
|
+
retab/types/automations/mailboxes.py,sha256=5dIhfMXWfC5i_sJl6HlWfFIE-XDdOHYbMHAMxAGXchs,2333
|
75
|
+
retab/types/automations/outlook.py,sha256=4rJ-_1Py88n44ASdWJHpb0_V5VKafP7pj67ovi6iYwU,3040
|
76
|
+
retab/types/automations/webhooks.py,sha256=I_zaART2V_XrC3bzHO7wB0UlxNGLiblTltfue6BCgb0,563
|
77
|
+
retab/types/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
78
|
+
retab/types/db/annotations.py,sha256=PM-H4zXyRs48s7AAv_sl8kOQo5lThsVvBM0aKJkCpmU,1026
|
79
|
+
retab/types/db/files.py,sha256=udJKGplw6a8cF4XUTLN_QAU9-pyEWs4THHX1zyvbx0U,1261
|
80
|
+
retab/types/documents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
81
|
+
retab/types/documents/correct_orientation.py,sha256=e-ivsslI6L6Gl0YkcslXw_DH620xMGEYVp4tdeviXeM,261
|
82
|
+
retab/types/documents/create_messages.py,sha256=jCsQK3Hv8gx-eAuNOq0iYdI8IBuVb7_2hB1GYHCQMHo,10291
|
83
|
+
retab/types/documents/extractions.py,sha256=S6RHX3NJ0jGkdhEPiFLVSan0WJub2Tggl5SXTpKxxcs,19058
|
84
|
+
retab/types/evaluations/__init__.py,sha256=fRQlK6y3x3SHqaukVYd9_zH8HrUk9TpoG9dlOTuIkcY,920
|
85
|
+
retab/types/evaluations/documents.py,sha256=oy0nqTrv0Pe__5ligeNWn5MbqVDAFRSrXYbCVoLxyXw,1268
|
86
|
+
retab/types/evaluations/iterations.py,sha256=66Svg5w2FqM14Zkn_opeUXV9mRZauGE8XLr1hJNMtKE,4637
|
87
|
+
retab/types/evaluations/model.py,sha256=V4W-so4ocXlNHP8Pe5Hp2jmB_dU_cUhku1jESe1a4qc,3133
|
88
|
+
retab/types/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
89
|
+
retab/types/jobs/base.py,sha256=R4UXcvgDmkgm4FB0ke5kQrSDWC95TweBLlc08ptfqt8,1695
|
90
|
+
retab/types/jobs/batch_annotation.py,sha256=Rftuu4Q6YzB4c39kWsqPGJ1QbPJrJWjWhupaKGO9kGE,281
|
91
|
+
retab/types/jobs/evaluation.py,sha256=R3Itl721bybXXnBCqU16C0gl5EAwTdcf_HZySkI_wsA,4524
|
92
|
+
retab/types/jobs/finetune.py,sha256=6O9NUy-ap_aqZ73tYx-NRRdFgKOIvk8WcItGhEUvrSQ,187
|
93
|
+
retab/types/jobs/prompt_optimization.py,sha256=dCWPwqnxZX5QgMYvHSlIEWLUvSxbs6qlbqsLHNCTRdM,1307
|
94
|
+
retab/types/jobs/webcrawl.py,sha256=C3_7mW2mmOXs6ypktDIHdjMnify90pFo70wmhrs_TP8,183
|
95
|
+
retab/types/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
96
|
+
retab/types/schemas/enhance.py,sha256=bkfkC_JCDvEolOdrp27rHbO1njEyrLJAM7qgwHGUYQk,2162
|
97
|
+
retab/types/schemas/evaluate.py,sha256=M9ZMv2FCcnSRGYBR3CUbCA88pW66EZEZC1YwoJUOei0,2206
|
98
|
+
retab/types/schemas/generate.py,sha256=pb6e6yJ2KPswmNHNkFcRhata7B698yBLnzlFVspJ9mE,1194
|
99
|
+
retab/types/schemas/layout.py,sha256=JLPwQGIWfPBoe1Y5r-MhiNDJigzZ-yKZnVGgox0uqMk,1487
|
100
|
+
retab/types/schemas/object.py,sha256=q6LXWfxLEvMcUee8Ydo1R6s7WjW45WyrzvXo4X7wj6k,25477
|
101
|
+
retab/types/schemas/templates.py,sha256=YRfgzy3hPuAfguu-qrYr9KhnAiBjLOoorqdoxyRYsXE,3466
|
102
|
+
retab/types/secrets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
103
|
+
retab/types/secrets/external_api_keys.py,sha256=-yaaOfNLxKpll3oD-0htQlW8S03lyWs9Mmk9HOdyQ3g,437
|
104
|
+
retab-0.0.37.dist-info/METADATA,sha256=GsyfM3mVlq5E9DVofbLmqpirgqczMrnUbA_vYOmErvA,14201
|
105
|
+
retab-0.0.37.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
106
|
+
retab-0.0.37.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
|
107
|
+
retab-0.0.37.dist-info/RECORD,,
|
@@ -0,0 +1 @@
|
|
1
|
+
retab
|