retab 0.0.68__py3-none-any.whl → 0.0.70__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ from typing import Literal
2
+
3
+ type HumanReviewStatus = Literal["success", "review_required", "reviewed"]
@@ -1,17 +1,19 @@
1
1
  from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
2
2
  from pydantic import BaseModel, Field, ConfigDict
3
3
  from .modality import Modality
4
- from .browser_canvas import BrowserCanvas
4
+ from typing import Any
5
5
 
6
6
  class InferenceSettings(BaseModel):
7
7
  model: str = "gpt-5-mini"
8
8
  temperature: float = 0.0
9
9
  reasoning_effort: ChatCompletionReasoningEffort = "minimal"
10
10
  image_resolution_dpi: int = Field(default=192, description="Resolution of the image sent to the LLM", ge=96, le=300)
11
- browser_canvas: BrowserCanvas = "A4"
12
- n_consensus: int = Field(default=1, description="Number of consensus rounds to perform")
11
+ n_consensus: int = Field(default=1, ge=1, le=8, description="Number of consensus rounds to perform")
13
12
  modality: Modality = "native"
14
13
  parallel_ocr_keys: dict[str, str] | None = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
15
-
16
14
  model_config = ConfigDict(extra="ignore")
17
15
 
16
+
17
+ class ExtractionSettings(InferenceSettings):
18
+ json_schema: dict[str, Any] = Field(..., description="JSON schema format used to validate the output data.")
19
+
retab/types/mime.py CHANGED
@@ -171,44 +171,10 @@ class MIMEData(BaseModel):
171
171
  return self.__str__()
172
172
 
173
173
 
174
-
175
- class BaseMIMEData(MIMEData):
176
- @classmethod
177
- def model_validate(
178
- cls, obj: Any, *, strict: bool | None = None, extra: Any | None = None, from_attributes: bool | None = None, context: Any | None = None, by_alias: bool | None = None, by_name: bool | None = None
179
- ) -> Self:
180
- if isinstance(obj, MIMEData):
181
- # Convert MIMEData instance to dict
182
- obj = obj.model_dump()
183
- if isinstance(obj, dict) and "url" in obj:
184
- # Truncate URL to 1000 chars or less, ensuring it's a valid base64 string
185
- if len(obj["url"]) > 1000:
186
- # Find the position of the base64 data
187
- if "," in obj["url"]:
188
- prefix, base64_data = obj["url"].split(",", 1)
189
- # Calculate how many characters we can keep (must be a multiple of 4)
190
- max_base64_len = 1000 - len(prefix) - 1 # -1 for the comma
191
- # Ensure the length is a multiple of 4
192
- max_base64_len = max_base64_len - (max_base64_len % 4)
193
- # Truncate and reassemble
194
- obj["url"] = prefix + "," + base64_data[:max_base64_len]
195
- else:
196
- # If there's no comma (unexpected format), truncate to 996 chars (multiple of 4)
197
- obj["url"] = obj["url"][:996]
198
- return super().model_validate(obj, strict=strict, extra=extra, from_attributes=from_attributes, context=context, by_alias=by_alias, by_name=by_name)
199
-
200
- @property
201
- def id(self) -> str:
202
- raise NotImplementedError("id is not implemented for BaseMIMEData - id is the hash of the content, so it's not possible to generate it from the base class")
203
-
204
- def __str__(self) -> str:
205
- truncated_url = self.url[:50] + "..." if len(self.url) > 50 else self.url
206
- truncated_content = self.content[:50] + "..." if len(self.content) > 50 else self.content
207
- return f"BaseMIMEData(filename='{self.filename}', url='{truncated_url}', content='{truncated_content}', mime_type='{self.mime_type}', extension='{self.extension}')"
208
-
209
- def __repr__(self) -> str:
210
- return self.__str__()
211
-
174
+ class BaseMIMEData(BaseModel):
175
+ id: str = Field(..., description="ID of the file")
176
+ filename: str = Field(..., description="Filename of the file")
177
+ mime_type: str = Field(..., description="MIME type of the file")
212
178
 
213
179
  # **** MIME DATACLASSES ****
214
180
  class AttachmentMetadata(BaseModel):
retab/types/pagination.py CHANGED
@@ -1,6 +1,14 @@
1
+ from typing import Any, List, Literal
1
2
  from pydantic import BaseModel
2
3
 
3
4
 
4
5
  class ListMetadata(BaseModel):
5
6
  before: str | None
6
7
  after: str | None
8
+
9
+
10
+ class PaginatedList(BaseModel):
11
+ data: List[Any]
12
+ list_metadata: ListMetadata
13
+
14
+ type PaginationOrder = Literal["asc", "desc"]
@@ -4,7 +4,9 @@ from typing import Any, Optional
4
4
  import nanoid # type: ignore
5
5
  from pydantic import BaseModel, Field, ConfigDict
6
6
 
7
+ from ..mime import BaseMIMEData, MIMEData
7
8
  from ..inference_settings import InferenceSettings
9
+ from .predictions import PredictionData
8
10
 
9
11
  default_inference_settings = InferenceSettings(
10
12
  model="auto-small",
@@ -12,45 +14,29 @@ default_inference_settings = InferenceSettings(
12
14
  reasoning_effort="minimal",
13
15
  modality="native",
14
16
  image_resolution_dpi=192,
15
- browser_canvas="A4",
16
17
  n_consensus=1,
17
18
  )
18
- class Function(BaseModel):
19
- model_config = ConfigDict(extra="ignore")
20
- id: str = Field(default_factory=lambda: "function_" + nanoid.generate())
21
- path: str
22
- code: Optional[str] = Field(default=None, description="The code of the function")
23
- function_registry_id: Optional[str] = Field(default=None, description="The function registry id of the function")
24
-
25
- # @model_validator(mode="before")
26
- # @classmethod
27
- # def validate_function(cls, data: Any):
28
- # if isinstance(data, dict):
29
- # code = data.get("code")
30
- # function_registry_id = data.get("function_registry_id")
31
- # if code is None and function_registry_id is None:
32
- # raise ValueError("Either code or function_registry_id must be provided")
33
- # return data
34
-
35
- class FunctionHilCriterion(BaseModel):
19
+
20
+ class Computation(BaseModel):
21
+ expression: str = Field(description="The expression to use for the computation")
22
+
23
+ class ComputationSpec(BaseModel):
24
+ computations: dict[str, Computation] = Field(default_factory=dict, description="The computations to use for the project")
25
+
26
+ class HilCriterion(BaseModel):
36
27
  path: str
37
28
  agentic_fix: bool = Field(default=False, description="Whether to use agentic fix for the criterion")
38
29
 
39
- class HumanInTheLoopParams(BaseModel):
40
- enabled: bool = Field(default=False)
41
- url: str = Field(default="", description="The URL of the human in the loop endpoint")
42
- headers: dict[str, str] = Field(default_factory=dict, description="The headers to send to the human in the loop endpoint")
43
- criteria: list[FunctionHilCriterion] = Field(default_factory=list, description="The criteria to use for the human in the loop")
44
-
45
- class PublishedConfig(BaseModel):
46
- inference_settings: InferenceSettings = default_inference_settings
47
- json_schema: dict[str, Any] = Field(default_factory=dict, description="The json schema of the project")
48
- human_in_the_loop_params: HumanInTheLoopParams = Field(default_factory=HumanInTheLoopParams)
49
- origin: str = Field(default="manual", description="The origin of the published config. Either 'Manual' or the iteration id that was used to generate the config")
50
30
  class DraftConfig(BaseModel):
51
- inference_settings: InferenceSettings = default_inference_settings
31
+ inference_settings: InferenceSettings = Field(default=default_inference_settings, description="The inference settings of the project")
52
32
  json_schema: dict[str, Any] = Field(default_factory=dict, description="The json schema of the builder config")
53
- human_in_the_loop_criteria: list[FunctionHilCriterion] = Field(default_factory=list)
33
+ human_in_the_loop_criteria: list[HilCriterion] = Field(default_factory=list)
34
+ computation_spec: ComputationSpec = Field(default_factory=ComputationSpec, description="The computation spec of the project")
35
+
36
+ class PublishedConfig(DraftConfig):
37
+ origin: str = Field(default="manual", description="The origin of the published config. Either 'Manual' or the iteration id that was used to generate the config")
38
+
39
+
54
40
  class Project(BaseModel):
55
41
  model_config = ConfigDict(extra="ignore")
56
42
  id: str = Field(default_factory=lambda: "project_" + nanoid.generate())
@@ -59,8 +45,7 @@ class Project(BaseModel):
59
45
  published_config: PublishedConfig
60
46
  draft_config: DraftConfig
61
47
  is_published: bool = False
62
- #computation_spec: ComputationSpec = Field(default_factory=ComputationSpec, description="The computation spec of the project")
63
- functions: list[Function] = Field(default_factory=list, description="The functions of the project")
48
+ is_schema_generated: bool = Field(default=True, description="Whether the schema has been generated for the project")
64
49
 
65
50
  class StoredProject(Project):
66
51
  """Project model with organization_id for database storage"""
@@ -78,5 +63,33 @@ class PatchProjectRequest(BaseModel):
78
63
  published_config: Optional[PublishedConfig] = Field(default=None, description="The published config of the project")
79
64
  draft_config: Optional[DraftConfig] = Field(default=None, description="The draft config of the project")
80
65
  is_published: Optional[bool] = Field(default=None, description="The published status of the project")
81
- #computation_spec: Optional[ComputationSpec] = Field(default=None, description="The computation spec of the project")
82
- functions: Optional[list[Function]] = Field(default=None, description="The functions of the project")
66
+ computation_spec: Optional[ComputationSpec] = Field(default=None, description="The computation spec of the project")
67
+ # ----------------------------
68
+ # ----------------------------
69
+ # ----------------------------
70
+
71
+ class BuilderDocument(BaseModel):
72
+ model_config = ConfigDict(extra="ignore")
73
+ id: str = Field(default_factory=lambda: "builder_doc_" + nanoid.generate())
74
+ updated_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(tz=datetime.timezone.utc))
75
+ project_id: str
76
+ mime_data: BaseMIMEData = Field(description="The mime data of the document")
77
+
78
+ prediction_data: PredictionData = Field(default=PredictionData(), description="The prediction data of the document")
79
+ extraction_id: str | None = Field(default=None, description="The extraction id of the document")
80
+
81
+ class StoredBuilderDocument(BuilderDocument):
82
+ """Builder document model with organization_id and project_id for database storage"""
83
+ organization_id: str
84
+
85
+ class PatchBuilderDocumentRequest(BaseModel):
86
+ model_config = ConfigDict(extra="ignore")
87
+ extraction_id: Optional[str] = Field(default=None, description="The extraction id of the builder document")
88
+ prediction_data: Optional[PredictionData] = Field(default=None, description="The prediction data of the document")
89
+
90
+
91
+ class AddBuilderDocumentRequest(BaseModel):
92
+ model_config = ConfigDict(extra="ignore")
93
+ mime_data: MIMEData
94
+ prediction_data: PredictionData = Field(default=PredictionData(), description="The prediction data of the document")
95
+ project_id: str
@@ -2,7 +2,6 @@ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionRea
2
2
  from pydantic import BaseModel, ConfigDict, Field
3
3
 
4
4
  from ..mime import MIMEData
5
- from ..browser_canvas import BrowserCanvas
6
5
 
7
6
 
8
7
  class GenerateSchemaRequest(BaseModel):
@@ -14,9 +13,6 @@ class GenerateSchemaRequest(BaseModel):
14
13
  instructions: str | None = None
15
14
  """The modality of the document to load."""
16
15
  image_resolution_dpi: int = Field(default=192, description="Resolution of the image sent to the LLM", ge=96, le=300)
17
- browser_canvas: BrowserCanvas = Field(
18
- default="A4", description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
19
- )
20
16
 
21
17
  """The image operations to apply to the document."""
22
18
  stream: bool = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: retab
3
- Version: 0.0.68
3
+ Version: 0.0.70
4
4
  Summary: Retab official python library
5
5
  Home-page: https://github.com/retab-dev/retab
6
6
  Author: Retab
@@ -1,36 +1,38 @@
1
1
  retab/__init__.py,sha256=s4GawWTRBYz4VY-CyAV5-ZdFtdw8V5oopGIYm9GgdSo,188
2
2
  retab/_resource.py,sha256=JfAU4UTa05ugWfbrpO7fsVr_pFewht99NkoIfK6kBQM,577
3
- retab/client copy.py,sha256=RqKYBvLB8Bp6QpS3DodOXffWQFdr36bobQRSU09vES0,27883
4
- retab/client.py,sha256=wOiTAzGw6Hi7FVbrs-TwSuPR6tmgOLMYjY1eBgrgBBk,29929
3
+ retab/client.py,sha256=fP9dI9YLFRNCqhODB85Gzp_sxhBJ5KF9R3U0qogP0dc,30075
5
4
  retab/generate_types.py,sha256=cUu1IX65uU__MHivmEb_PZtzAi8DYsvppZvcY30hj90,8425
6
5
  retab/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
6
  retab/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
7
  retab/resources/models.py,sha256=4WidFBnTGZEA65DSn2pLP2SRnCVXkMTw7o_m8xVCFC4,2469
9
- retab/resources/schemas.py,sha256=qWkOAZrimtbTHzhDof3z4PG72F7MIPrb3A495f6mrHE,5020
8
+ retab/resources/schemas.py,sha256=rZ6OzfmoYv-mGaRVzvXjO09dD-KxP74mZhOO8sMgcDQ,4632
10
9
  retab/resources/documents/__init__.py,sha256=OjXmngFN0RKqO4SI-mJBNzr6Ex6rMxfq0DxaqzP0RQs,89
11
- retab/resources/documents/client.py,sha256=7-BaveXUlwZqtMTvM3dNiltZS83jJVXDRuyfFMg_eGw,37446
10
+ retab/resources/documents/client.py,sha256=H-ylTrFcaC36iVj5XSWbwJB489n3GEWwbqsPgQQ50G4,31980
11
+ retab/resources/extractions/__init__.py,sha256=2H1ezUG8hI5SmTRy6NFzXdYLOdGFFsFrI60uzkitV20,97
12
+ retab/resources/extractions/client.py,sha256=sEoNjOgX91FTOgoJUV-I1A9A9xl1ciCdPlhYwjhEjbA,11035
12
13
  retab/resources/projects/__init__.py,sha256=tPR3_3tr7bsoYd618qmGjnYN2R23PmF5oCFd7Z5_HGY,85
13
- retab/resources/projects/client.py,sha256=-T5iSRp-jODNVg-cgQiQ3S6D53cwCTwNQsI0Rr9vqsU,14654
14
+ retab/resources/projects/client.py,sha256=nvqsDiVyeRrXsoYddwyXNmpZxaBLYWAGO2e0n6qkCzY,14988
14
15
  retab/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- retab/types/browser_canvas.py,sha256=LdW1SnaWb4WdFgFBCo8p9k78K2fmzV5YtloQtIR7hk0,82
16
16
  retab/types/chat.py,sha256=x9VbtPMa4w6Gc0HrFC3ILl6cCnfEn5ytDnwJtZmlcys,1436
17
- retab/types/inference_settings.py,sha256=co0C5ynGm7z7Bvi7gP3CFwhkRJUd-bMNBVbavFu7Iuo,939
18
- retab/types/mime.py,sha256=1k3REXEVJ4dlFErs27Xx05sIWHWr3KjLzuKSDXEMNQQ,11973
17
+ retab/types/inference_settings.py,sha256=e4NDZHFdtrqi30Za1z0JhRU5PBO94yKxUGoK7S2kA3M,1053
18
+ retab/types/mime.py,sha256=3Zk7vIbV8o4uJQiclVH-ncKKhs_ZeVi-UQV68TTu7s0,10039
19
19
  retab/types/modality.py,sha256=4B8LctdUBZVgIjtS2FjrJpljn2Eyse0XE1bpFsGb9O4,131
20
- retab/types/pagination.py,sha256=-XrKILKX_5isTHTfShLiK3Kwp21Y6Wqy0Jci8lIFQig,109
20
+ retab/types/pagination.py,sha256=A0Fw06baPTfEaYwo3kvNs4vaupzlqylBc6tQH-2DFuY,279
21
21
  retab/types/standards.py,sha256=7aGtuvzzkKidvqY8JB2Cjfn43V80FeKwrTtp162kjKc,1477
22
22
  retab/types/documents/__init__.py,sha256=RaD6PnvRJw7QEVTh_PYNX6gckpLcxUJH7FKaopRKJzY,114
23
23
  retab/types/documents/correct_orientation.py,sha256=e-ivsslI6L6Gl0YkcslXw_DH620xMGEYVp4tdeviXeM,261
24
- retab/types/documents/create_messages.py,sha256=fibThBlJg1iKQfVkvIVL1QqZxrraIl9qeO0lJHLo_nQ,7509
25
- retab/types/documents/extract.py,sha256=KeEDnbvXTtgamrGvpBINsg5MJAue-Xf_lNi1a71M7oE,20067
26
- retab/types/documents/parse.py,sha256=ekXnjnFEx7adZ8dJKivo5QRtYnKBEUFzJiIM9WY-LPY,1454
24
+ retab/types/documents/create_messages.py,sha256=Cox0QgIyLhTXIvw1Nzd2BCnB9-5KAYgw_gads5eTaDw,7272
25
+ retab/types/documents/extract.py,sha256=Z-Qsj5-lmWb9dkgyTTt3Z7Lpd4_7SZdRGErtV_aw8oE,16492
26
+ retab/types/documents/parse.py,sha256=Jd6i-1UXhAtgntRBZItEHGHeevyLdLmbTQa1-HNrico,1305
27
+ retab/types/extractions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ retab/types/extractions/types.py,sha256=mnCYSfJoEKsXN2eG-PrahnnQyR6RDjP5VO9sHC1Opmg,102
27
29
  retab/types/projects/__init__.py,sha256=I7P_dems5_LOLgYQ-4Bzt9B6P6jRlQwP-D_9GxRDhVk,155
28
30
  retab/types/projects/metrics.py,sha256=J8aZdVbqlszfxosAZyTB7l6lp9WgdL5QgLMlLrckN7k,1946
29
- retab/types/projects/model.py,sha256=0He-lMiT4jX1SwukNowq41LFPeozWDnTVsbjlmCNWJw,4374
31
+ retab/types/projects/model.py,sha256=f5NSIvwQQXhax4gHu57CE0pUkU_5S8t3aSfzAAqFFlI,4615
30
32
  retab/types/projects/predictions.py,sha256=GeQX-nCmbzKL50aaSFlSyn0sgnbzKserPK6DSQT8ACk,1114
31
33
  retab/types/schemas/__init__.py,sha256=9ODWiC_4pUVKxoIKglYZjvRjRyd1ZCVxG8GBdQgHNbU,57
32
34
  retab/types/schemas/chat.py,sha256=ppTidxsNslTKE5aBva04i9IxeARMqYpXYLjtR7V6pBc,21219
33
- retab/types/schemas/generate.py,sha256=DN4pMJihoADyAZgA2RW7BddFzhbNOoaEnfEjjjUyMKM,1007
35
+ retab/types/schemas/generate.py,sha256=kB69Sc8cSw4oqaw-Rfzc-vz3ModfKuX86KOqp47y2-8,744
34
36
  retab/types/schemas/layout.py,sha256=JLPwQGIWfPBoe1Y5r-MhiNDJigzZ-yKZnVGgox0uqMk,1487
35
37
  retab/types/schemas/model.py,sha256=MwiSrck_5NCDvxjDSqgiDtmenGowXnfoOZ7KTINKW34,69588
36
38
  retab/types/schemas/templates.py,sha256=XihWTHi6t_6QjxN07n_1dee5KdhHiuoHAYfmKwI7gQg,1708
@@ -42,7 +44,7 @@ retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
42
44
  retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
43
45
  retab/utils/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
46
  retab/utils/usage/json_schema.py,sha256=kRbL5E5OStlhlNlBXlxHNwaVHKd6MVhyqIb0y4aj8JA,84322
45
- retab-0.0.68.dist-info/METADATA,sha256=ZqKChL45ZWbRxN1B7QMPk-VWdz0qKezYOQOgPRhDFc0,4532
46
- retab-0.0.68.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
- retab-0.0.68.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
48
- retab-0.0.68.dist-info/RECORD,,
47
+ retab-0.0.70.dist-info/METADATA,sha256=fmovHmnRSdICMGaxGo7aOrYiOXbqyMmXBxUiG7gtdEQ,4532
48
+ retab-0.0.70.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
49
+ retab-0.0.70.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
50
+ retab-0.0.70.dist-info/RECORD,,