PyPI - retab - Versions diffs - 0.0.68__py3-none-any.whl → 0.0.70__py3-none-any.whl - Mend

retab 0.0.68py3-none-any.whl → 0.0.70py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

retab/client.py +3 -1
retab/resources/documents/client.py +44 -138
retab/resources/extractions/__init__.py +3 -0
retab/resources/extractions/client.py +288 -0
retab/resources/projects/client.py +7 -1
retab/resources/schemas.py +0 -8
retab/types/documents/create_messages.py +10 -12
retab/types/documents/extract.py +16 -81
retab/types/documents/parse.py +0 -2
retab/types/extractions/__init__.py +0 -0
retab/types/extractions/types.py +3 -0
retab/types/inference_settings.py +6 -4
retab/types/mime.py +4 -38
retab/types/pagination.py +8 -0
retab/types/projects/model.py +49 -36
retab/types/schemas/generate.py +0 -4
{retab-0.0.68.dist-info → retab-0.0.70.dist-info}/METADATA +1 -1
{retab-0.0.68.dist-info → retab-0.0.70.dist-info}/RECORD +20 -18
retab/client copy.py +0 -693
retab/types/browser_canvas.py +0 -3
{retab-0.0.68.dist-info → retab-0.0.70.dist-info}/WHEEL +0 -0
{retab-0.0.68.dist-info → retab-0.0.70.dist-info}/top_level.txt +0 -0

retab/types/extractions/types.py ADDED Viewed

@@ -0,0 +1,3 @@
+from typing import Literal
+type HumanReviewStatus = Literal["success", "review_required", "reviewed"]

retab/types/inference_settings.py CHANGED Viewed

@@ -1,17 +1,19 @@
 from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
 from pydantic import BaseModel, Field, ConfigDict
 from .modality import Modality
-from .browser_canvas import BrowserCanvas
+from typing import Any
 class InferenceSettings(BaseModel):
     model: str = "gpt-5-mini"
     temperature: float = 0.0
     reasoning_effort: ChatCompletionReasoningEffort = "minimal"
     image_resolution_dpi: int = Field(default=192, description="Resolution of the image sent to the LLM", ge=96, le=300)
-    browser_canvas: BrowserCanvas = "A4"
-    n_consensus: int = Field(default=1, description="Number of consensus rounds to perform")
+    n_consensus: int = Field(default=1, ge=1, le=8, description="Number of consensus rounds to perform")
     modality: Modality = "native"
     parallel_ocr_keys: dict[str, str] | None = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
     model_config = ConfigDict(extra="ignore")
+class ExtractionSettings(InferenceSettings):
+    json_schema: dict[str, Any] = Field(..., description="JSON schema format used to validate the output data.")

retab/types/mime.py CHANGED Viewed

@@ -171,44 +171,10 @@ class MIMEData(BaseModel):
         return self.__str__()
-class BaseMIMEData(MIMEData):
-    @classmethod
-    def model_validate(
-        cls, obj: Any, *, strict: bool | None = None, extra: Any | None = None, from_attributes: bool | None = None, context: Any | None = None, by_alias: bool | None = None, by_name: bool | None = None
-    ) -> Self:
-        if isinstance(obj, MIMEData):
-            # Convert MIMEData instance to dict
-            obj = obj.model_dump()
-        if isinstance(obj, dict) and "url" in obj:
-            # Truncate URL to 1000 chars or less, ensuring it's a valid base64 string
-            if len(obj["url"]) > 1000:
-                # Find the position of the base64 data
-                if "," in obj["url"]:
-                    prefix, base64_data = obj["url"].split(",", 1)
-                    # Calculate how many characters we can keep (must be a multiple of 4)
-                    max_base64_len = 1000 - len(prefix) - 1  # -1 for the comma
-                    # Ensure the length is a multiple of 4
-                    max_base64_len = max_base64_len - (max_base64_len % 4)
-                    # Truncate and reassemble
-                    obj["url"] = prefix + "," + base64_data[:max_base64_len]
-                else:
-                    # If there's no comma (unexpected format), truncate to 996 chars (multiple of 4)
-                    obj["url"] = obj["url"][:996]
-        return super().model_validate(obj, strict=strict, extra=extra, from_attributes=from_attributes, context=context, by_alias=by_alias, by_name=by_name)
-    @property
-    def id(self) -> str:
-        raise NotImplementedError("id is not implemented for BaseMIMEData - id is the hash of the content, so it's not possible to generate it from the base class")
-    def __str__(self) -> str:
-        truncated_url = self.url[:50] + "..." if len(self.url) > 50 else self.url
-        truncated_content = self.content[:50] + "..." if len(self.content) > 50 else self.content
-        return f"BaseMIMEData(filename='{self.filename}', url='{truncated_url}', content='{truncated_content}', mime_type='{self.mime_type}', extension='{self.extension}')"
-    def __repr__(self) -> str:
-        return self.__str__()
+class BaseMIMEData(BaseModel):
+    id: str = Field(..., description="ID of the file")
+    filename: str = Field(..., description="Filename of the file")
+    mime_type: str = Field(..., description="MIME type of the file")
 # **** MIME DATACLASSES ****
 class AttachmentMetadata(BaseModel):

retab/types/pagination.py CHANGED Viewed

@@ -1,6 +1,14 @@
+from typing import Any, List, Literal
 from pydantic import BaseModel
 class ListMetadata(BaseModel):
     before: str | None
     after: str | None
+class PaginatedList(BaseModel):
+    data: List[Any]
+    list_metadata: ListMetadata
+type PaginationOrder = Literal["asc", "desc"]

retab/types/projects/model.py CHANGED Viewed

@@ -4,7 +4,9 @@ from typing import Any, Optional
 import nanoid  # type: ignore
 from pydantic import BaseModel, Field, ConfigDict
+from ..mime import BaseMIMEData, MIMEData
 from ..inference_settings import InferenceSettings
+from .predictions import PredictionData
 default_inference_settings = InferenceSettings(
     model="auto-small",
@@ -12,45 +14,29 @@ default_inference_settings = InferenceSettings(
     reasoning_effort="minimal",
     modality="native",
     image_resolution_dpi=192,
-    browser_canvas="A4",
     n_consensus=1,
 )
-class Function(BaseModel):
-    model_config = ConfigDict(extra="ignore")
-    id: str = Field(default_factory=lambda: "function_" + nanoid.generate())
-    path: str
-    code: Optional[str] = Field(default=None, description="The code of the function")
-    function_registry_id: Optional[str] = Field(default=None, description="The function registry id of the function")
-    # @model_validator(mode="before")
-    # @classmethod
-    # def validate_function(cls, data: Any):
-    #     if isinstance(data, dict):
-    #         code = data.get("code")
-    #         function_registry_id = data.get("function_registry_id")
-    #         if code is None and function_registry_id is None:
-    #             raise ValueError("Either code or function_registry_id must be provided")
-    #     return data
-class FunctionHilCriterion(BaseModel):
+class Computation(BaseModel):
+    expression: str = Field(description="The expression to use for the computation")
+class ComputationSpec(BaseModel):
+    computations: dict[str, Computation] = Field(default_factory=dict, description="The computations to use for the project")
+class HilCriterion(BaseModel):
     path: str
     agentic_fix: bool = Field(default=False, description="Whether to use agentic fix for the criterion")
-class HumanInTheLoopParams(BaseModel):
-    enabled: bool = Field(default=False)
-    url: str = Field(default="", description="The URL of the human in the loop endpoint")
-    headers: dict[str, str] = Field(default_factory=dict, description="The headers to send to the human in the loop endpoint")
-    criteria: list[FunctionHilCriterion] = Field(default_factory=list, description="The criteria to use for the human in the loop")
-class PublishedConfig(BaseModel):
-    inference_settings: InferenceSettings = default_inference_settings
-    json_schema: dict[str, Any] = Field(default_factory=dict, description="The json schema of the project")
-    human_in_the_loop_params: HumanInTheLoopParams = Field(default_factory=HumanInTheLoopParams)
-    origin: str = Field(default="manual", description="The origin of the published config. Either 'Manual' or the iteration id that was used to generate the config")
 class DraftConfig(BaseModel):
-    inference_settings: InferenceSettings = default_inference_settings
+    inference_settings: InferenceSettings = Field(default=default_inference_settings, description="The inference settings of the project")
     json_schema: dict[str, Any] = Field(default_factory=dict, description="The json schema of the builder config")
-    human_in_the_loop_criteria: list[FunctionHilCriterion] = Field(default_factory=list)
+    human_in_the_loop_criteria: list[HilCriterion] = Field(default_factory=list)
+    computation_spec: ComputationSpec = Field(default_factory=ComputationSpec, description="The computation spec of the project")
+class PublishedConfig(DraftConfig):
+    origin: str = Field(default="manual", description="The origin of the published config. Either 'Manual' or the iteration id that was used to generate the config")
 class Project(BaseModel):
     model_config = ConfigDict(extra="ignore")
     id: str = Field(default_factory=lambda: "project_" + nanoid.generate())
@@ -59,8 +45,7 @@ class Project(BaseModel):
     published_config: PublishedConfig
     draft_config: DraftConfig
     is_published: bool = False
-    #computation_spec: ComputationSpec = Field(default_factory=ComputationSpec, description="The computation spec of the project")
-    functions: list[Function] = Field(default_factory=list, description="The functions of the project")
+    is_schema_generated: bool = Field(default=True, description="Whether the schema has been generated for the project")
 class StoredProject(Project):
     """Project model with organization_id for database storage"""
@@ -78,5 +63,33 @@ class PatchProjectRequest(BaseModel):
     published_config: Optional[PublishedConfig] = Field(default=None, description="The published config of the project")
     draft_config: Optional[DraftConfig] = Field(default=None, description="The draft config of the project")
     is_published: Optional[bool] = Field(default=None, description="The published status of the project")
-    #computation_spec: Optional[ComputationSpec] = Field(default=None, description="The computation spec of the project")
-    functions: Optional[list[Function]] = Field(default=None, description="The functions of the project")
+    computation_spec: Optional[ComputationSpec] = Field(default=None, description="The computation spec of the project")
+# ----------------------------
+# ----------------------------
+# ----------------------------
+class BuilderDocument(BaseModel):
+    model_config = ConfigDict(extra="ignore")
+    id: str = Field(default_factory=lambda: "builder_doc_" + nanoid.generate())
+    updated_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(tz=datetime.timezone.utc))
+    project_id: str
+    mime_data: BaseMIMEData = Field(description="The mime data of the document")
+    prediction_data: PredictionData = Field(default=PredictionData(), description="The prediction data of the document")
+    extraction_id: str | None = Field(default=None, description="The extraction id of the document")
+class StoredBuilderDocument(BuilderDocument):
+    """Builder document model with organization_id and project_id for database storage"""
+    organization_id: str
+class PatchBuilderDocumentRequest(BaseModel):
+    model_config = ConfigDict(extra="ignore")
+    extraction_id: Optional[str] = Field(default=None, description="The extraction id of the builder document")
+    prediction_data: Optional[PredictionData] = Field(default=None, description="The prediction data of the document")
+class AddBuilderDocumentRequest(BaseModel):
+    model_config = ConfigDict(extra="ignore")
+    mime_data: MIMEData
+    prediction_data: PredictionData = Field(default=PredictionData(), description="The prediction data of the document")
+    project_id: str

retab/types/schemas/generate.py CHANGED Viewed

@@ -2,7 +2,6 @@ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionRea
 from pydantic import BaseModel, ConfigDict, Field
 from ..mime import MIMEData
-from ..browser_canvas import BrowserCanvas
 class GenerateSchemaRequest(BaseModel):
@@ -14,9 +13,6 @@ class GenerateSchemaRequest(BaseModel):
     instructions: str | None = None
     """The modality of the document to load."""
     image_resolution_dpi: int = Field(default=192, description="Resolution of the image sent to the LLM", ge=96, le=300)
-    browser_canvas: BrowserCanvas = Field(
-        default="A4", description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
-    )
     """The image operations to apply to the document."""
     stream: bool = False

{retab-0.0.68.dist-info → retab-0.0.70.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: retab
-Version: 0.0.68
+Version: 0.0.70
 Summary: Retab official python library
 Home-page: https://github.com/retab-dev/retab
 Author: Retab

{retab-0.0.68.dist-info → retab-0.0.70.dist-info}/RECORD RENAMED Viewed

@@ -1,36 +1,38 @@
 retab/__init__.py,sha256=s4GawWTRBYz4VY-CyAV5-ZdFtdw8V5oopGIYm9GgdSo,188
 retab/_resource.py,sha256=JfAU4UTa05ugWfbrpO7fsVr_pFewht99NkoIfK6kBQM,577
-retab/client copy.py,sha256=RqKYBvLB8Bp6QpS3DodOXffWQFdr36bobQRSU09vES0,27883
-retab/client.py,sha256=wOiTAzGw6Hi7FVbrs-TwSuPR6tmgOLMYjY1eBgrgBBk,29929
+retab/client.py,sha256=fP9dI9YLFRNCqhODB85Gzp_sxhBJ5KF9R3U0qogP0dc,30075
 retab/generate_types.py,sha256=cUu1IX65uU__MHivmEb_PZtzAi8DYsvppZvcY30hj90,8425
 retab/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 retab/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 retab/resources/models.py,sha256=4WidFBnTGZEA65DSn2pLP2SRnCVXkMTw7o_m8xVCFC4,2469
-retab/resources/schemas.py,sha256=qWkOAZrimtbTHzhDof3z4PG72F7MIPrb3A495f6mrHE,5020
+retab/resources/schemas.py,sha256=rZ6OzfmoYv-mGaRVzvXjO09dD-KxP74mZhOO8sMgcDQ,4632
 retab/resources/documents/__init__.py,sha256=OjXmngFN0RKqO4SI-mJBNzr6Ex6rMxfq0DxaqzP0RQs,89
-retab/resources/documents/client.py,sha256=7-BaveXUlwZqtMTvM3dNiltZS83jJVXDRuyfFMg_eGw,37446
+retab/resources/documents/client.py,sha256=H-ylTrFcaC36iVj5XSWbwJB489n3GEWwbqsPgQQ50G4,31980
+retab/resources/extractions/__init__.py,sha256=2H1ezUG8hI5SmTRy6NFzXdYLOdGFFsFrI60uzkitV20,97
+retab/resources/extractions/client.py,sha256=sEoNjOgX91FTOgoJUV-I1A9A9xl1ciCdPlhYwjhEjbA,11035
 retab/resources/projects/__init__.py,sha256=tPR3_3tr7bsoYd618qmGjnYN2R23PmF5oCFd7Z5_HGY,85
-retab/resources/projects/client.py,sha256=-T5iSRp-jODNVg-cgQiQ3S6D53cwCTwNQsI0Rr9vqsU,14654
+retab/resources/projects/client.py,sha256=nvqsDiVyeRrXsoYddwyXNmpZxaBLYWAGO2e0n6qkCzY,14988
 retab/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-retab/types/browser_canvas.py,sha256=LdW1SnaWb4WdFgFBCo8p9k78K2fmzV5YtloQtIR7hk0,82
 retab/types/chat.py,sha256=x9VbtPMa4w6Gc0HrFC3ILl6cCnfEn5ytDnwJtZmlcys,1436
-retab/types/inference_settings.py,sha256=co0C5ynGm7z7Bvi7gP3CFwhkRJUd-bMNBVbavFu7Iuo,939
-retab/types/mime.py,sha256=1k3REXEVJ4dlFErs27Xx05sIWHWr3KjLzuKSDXEMNQQ,11973
+retab/types/inference_settings.py,sha256=e4NDZHFdtrqi30Za1z0JhRU5PBO94yKxUGoK7S2kA3M,1053
+retab/types/mime.py,sha256=3Zk7vIbV8o4uJQiclVH-ncKKhs_ZeVi-UQV68TTu7s0,10039
 retab/types/modality.py,sha256=4B8LctdUBZVgIjtS2FjrJpljn2Eyse0XE1bpFsGb9O4,131
-retab/types/pagination.py,sha256=-XrKILKX_5isTHTfShLiK3Kwp21Y6Wqy0Jci8lIFQig,109
+retab/types/pagination.py,sha256=A0Fw06baPTfEaYwo3kvNs4vaupzlqylBc6tQH-2DFuY,279
 retab/types/standards.py,sha256=7aGtuvzzkKidvqY8JB2Cjfn43V80FeKwrTtp162kjKc,1477
 retab/types/documents/__init__.py,sha256=RaD6PnvRJw7QEVTh_PYNX6gckpLcxUJH7FKaopRKJzY,114
 retab/types/documents/correct_orientation.py,sha256=e-ivsslI6L6Gl0YkcslXw_DH620xMGEYVp4tdeviXeM,261
-retab/types/documents/create_messages.py,sha256=fibThBlJg1iKQfVkvIVL1QqZxrraIl9qeO0lJHLo_nQ,7509
-retab/types/documents/extract.py,sha256=KeEDnbvXTtgamrGvpBINsg5MJAue-Xf_lNi1a71M7oE,20067
-retab/types/documents/parse.py,sha256=ekXnjnFEx7adZ8dJKivo5QRtYnKBEUFzJiIM9WY-LPY,1454
+retab/types/documents/create_messages.py,sha256=Cox0QgIyLhTXIvw1Nzd2BCnB9-5KAYgw_gads5eTaDw,7272
+retab/types/documents/extract.py,sha256=Z-Qsj5-lmWb9dkgyTTt3Z7Lpd4_7SZdRGErtV_aw8oE,16492
+retab/types/documents/parse.py,sha256=Jd6i-1UXhAtgntRBZItEHGHeevyLdLmbTQa1-HNrico,1305
+retab/types/extractions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+retab/types/extractions/types.py,sha256=mnCYSfJoEKsXN2eG-PrahnnQyR6RDjP5VO9sHC1Opmg,102
 retab/types/projects/__init__.py,sha256=I7P_dems5_LOLgYQ-4Bzt9B6P6jRlQwP-D_9GxRDhVk,155
 retab/types/projects/metrics.py,sha256=J8aZdVbqlszfxosAZyTB7l6lp9WgdL5QgLMlLrckN7k,1946
-retab/types/projects/model.py,sha256=0He-lMiT4jX1SwukNowq41LFPeozWDnTVsbjlmCNWJw,4374
+retab/types/projects/model.py,sha256=f5NSIvwQQXhax4gHu57CE0pUkU_5S8t3aSfzAAqFFlI,4615
 retab/types/projects/predictions.py,sha256=GeQX-nCmbzKL50aaSFlSyn0sgnbzKserPK6DSQT8ACk,1114
 retab/types/schemas/__init__.py,sha256=9ODWiC_4pUVKxoIKglYZjvRjRyd1ZCVxG8GBdQgHNbU,57
 retab/types/schemas/chat.py,sha256=ppTidxsNslTKE5aBva04i9IxeARMqYpXYLjtR7V6pBc,21219
-retab/types/schemas/generate.py,sha256=DN4pMJihoADyAZgA2RW7BddFzhbNOoaEnfEjjjUyMKM,1007
+retab/types/schemas/generate.py,sha256=kB69Sc8cSw4oqaw-Rfzc-vz3ModfKuX86KOqp47y2-8,744
 retab/types/schemas/layout.py,sha256=JLPwQGIWfPBoe1Y5r-MhiNDJigzZ-yKZnVGgox0uqMk,1487
 retab/types/schemas/model.py,sha256=MwiSrck_5NCDvxjDSqgiDtmenGowXnfoOZ7KTINKW34,69588
 retab/types/schemas/templates.py,sha256=XihWTHi6t_6QjxN07n_1dee5KdhHiuoHAYfmKwI7gQg,1708
@@ -42,7 +44,7 @@ retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
 retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
 retab/utils/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 retab/utils/usage/json_schema.py,sha256=kRbL5E5OStlhlNlBXlxHNwaVHKd6MVhyqIb0y4aj8JA,84322
-retab-0.0.68.dist-info/METADATA,sha256=ZqKChL45ZWbRxN1B7QMPk-VWdz0qKezYOQOgPRhDFc0,4532
-retab-0.0.68.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-retab-0.0.68.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
-retab-0.0.68.dist-info/RECORD,,
+retab-0.0.70.dist-info/METADATA,sha256=fmovHmnRSdICMGaxGo7aOrYiOXbqyMmXBxUiG7gtdEQ,4532
+retab-0.0.70.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+retab-0.0.70.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
+retab-0.0.70.dist-info/RECORD,,

retab 0.0.68__py3-none-any.whl → 0.0.70__py3-none-any.whl

retab 0.0.68py3-none-any.whl → 0.0.70py3-none-any.whl