PyPI - retab - Versions diffs - 0.0.83__py3-none-any.whl → 0.0.85__py3-none-any.whl - Mend

retab 0.0.83py3-none-any.whl → 0.0.85py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

retab/resources/edit/agent/__init__.py +4 -0
retab/resources/edit/agent/client.py +157 -0
retab/resources/edit/client.py +21 -156
retab/types/documents/edit.py +2 -3
{retab-0.0.83.dist-info → retab-0.0.85.dist-info}/METADATA +1 -1
{retab-0.0.83.dist-info → retab-0.0.85.dist-info}/RECORD +8 -6
{retab-0.0.83.dist-info → retab-0.0.85.dist-info}/WHEEL +0 -0
{retab-0.0.83.dist-info → retab-0.0.85.dist-info}/top_level.txt +0 -0

retab/resources/edit/agent/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .client import Agent, AsyncAgent
+__all__ = ["Agent", "AsyncAgent"]

retab/resources/edit/agent/client.py ADDED Viewed

@@ -0,0 +1,157 @@
+"""
+Agent Edit SDK client - Wrapper for agent-based document editing functionality.
+"""
+from io import IOBase
+from pathlib import Path
+from typing import Any
+import PIL.Image
+from pydantic import HttpUrl
+from ...._resource import AsyncAPIResource, SyncAPIResource
+from ....utils.mime import prepare_mime_document
+from ....types.documents.edit import (
+    EditRequest,
+    EditResponse,
+)
+from ....types.mime import MIMEData
+from ....types.standards import PreparedRequest, FieldUnset
+class BaseAgentMixin:
+    """Shared methods for preparing agent edit API requests."""
+    def _prepare_fill(
+        self,
+        instructions: str,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
+        model: str = FieldUnset,
+        **extra_body: Any,
+    ) -> PreparedRequest:
+        request_dict: dict[str, Any] = {
+            "instructions": instructions,
+        }
+        if document is not None:
+            mime_document = prepare_mime_document(document)
+            request_dict["document"] = mime_document
+        if model is not FieldUnset:
+            request_dict["model"] = model
+        # Merge any extra fields provided by the caller
+        if extra_body:
+            request_dict.update(extra_body)
+        edit_request = EditRequest(**request_dict)
+        return PreparedRequest(
+            method="POST",
+            url="/v1/edit/agent/fill",
+            data=edit_request.model_dump(mode="json", exclude_unset=True),
+        )
+class Agent(SyncAPIResource, BaseAgentMixin):
+    """Agent Edit API wrapper for synchronous usage."""
+    def __init__(self, client: Any) -> None:
+        super().__init__(client=client)
+    def fill(
+        self,
+        instructions: str,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
+        model: str = FieldUnset,
+        **extra_body: Any,
+    ) -> EditResponse:
+        """
+        Edit a document by inferring form fields and filling them with provided instructions.
+        This method performs:
+        1. Detection to identify form field bounding boxes
+        2. LLM inference to name and describe detected fields
+        3. LLM-based form filling using the provided instructions
+        4. Returns the filled document with form field values populated
+        Args:
+            instructions: Instructions describing how to fill the form fields.
+            document: The document to edit. Can be a file path (Path or str), file-like object,
+                MIMEData, PIL Image, or URL.
+            model: The LLM model to use for inference. Defaults to "retab-small".
+        Returns:
+            EditResponse: Response containing:
+                - form_data: List of form fields with filled values
+                - filled_document: Document with filled form values (MIMEData)
+        Raises:
+            HTTPException: If the request fails.
+        Supported document formats:
+            - PDF: Native form field detection and filling
+            - DOCX/DOC: Native editing to preserve styles and formatting
+            - PPTX/PPT: Native editing for presentations
+            - XLSX/XLS: Native editing for spreadsheets
+        """
+        request = self._prepare_fill(
+            instructions=instructions,
+            document=document,
+            model=model,
+            **extra_body,
+        )
+        response = self._client._prepared_request(request)
+        return EditResponse.model_validate(response)
+class AsyncAgent(AsyncAPIResource, BaseAgentMixin):
+    """Agent Edit API wrapper for asynchronous usage."""
+    def __init__(self, client: Any) -> None:
+        super().__init__(client=client)
+    async def fill(
+        self,
+        instructions: str,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
+        model: str = FieldUnset,
+        **extra_body: Any,
+    ) -> EditResponse:
+        """
+        Edit a document by inferring form fields and filling them with provided instructions asynchronously.
+        This method performs:
+        1. Detection to identify form field bounding boxes
+        2. LLM inference to name and describe detected fields
+        3. LLM-based form filling using the provided instructions
+        4. Returns the filled document with form field values populated
+        Args:
+            instructions: Instructions describing how to fill the form fields.
+            document: The document to edit. Can be a file path (Path or str), file-like object,
+                MIMEData, PIL Image, or URL.
+            model: The LLM model to use for inference. Defaults to "retab-small".
+        Returns:
+            EditResponse: Response containing:
+                - form_data: List of form fields with filled values
+                - filled_document: Document with filled form values (MIMEData)
+        Raises:
+            HTTPException: If the request fails.
+        Supported document formats:
+            - PDF: Native form field detection and filling
+            - DOCX/DOC: Native editing to preserve styles and formatting
+            - PPTX/PPT: Native editing for presentations
+            - XLSX/XLS: Native editing for spreadsheets
+        """
+        request = self._prepare_fill(
+            instructions=instructions,
+            document=document,
+            model=model,
+            **extra_body,
+        )
+        response = await self._client._prepared_request(request)
+        return EditResponse.model_validate(response)

retab/resources/edit/client.py CHANGED Viewed

@@ -1,176 +1,41 @@
 """
 Edit SDK client - Wrapper for document editing functionality.
+Provides access to:
+- edit.agent.fill() - Agent-based document editing (PDF, DOCX, PPTX, XLSX)
+- edit.templates.* - Template-based PDF form filling
 """
-from io import IOBase
-from pathlib import Path
 from typing import Any
-import PIL.Image
-from pydantic import HttpUrl
 from ..._resource import AsyncAPIResource, SyncAPIResource
-from ...utils.mime import prepare_mime_document
-from ...types.documents.edit import (
-    EditRequest,
-    EditResponse,
-)
-from ...types.mime import MIMEData
-from ...types.standards import PreparedRequest, FieldUnset
 from .templates import Templates, AsyncTemplates
+from .agent import Agent, AsyncAgent
-class BaseEditMixin:
-    """Shared methods for preparing edit API requests."""
-    def _prepare_fill_document(
-        self,
-        instructions: str,
-        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
-        model: str = FieldUnset,
-        template_id: str | None = FieldUnset,
-        **extra_body: Any,
-    ) -> PreparedRequest:
-        request_dict: dict[str, Any] = {
-            "instructions": instructions,
-        }
-        if document is not None:
-            mime_document = prepare_mime_document(document)
-            request_dict["document"] = mime_document
-        if model is not FieldUnset:
-            request_dict["model"] = model
-        if template_id is not FieldUnset:
-            request_dict["template_id"] = template_id
-        # Merge any extra fields provided by the caller
-        if extra_body:
-            request_dict.update(extra_body)
-        edit_request = EditRequest(**request_dict)
-        return PreparedRequest(
-            method="POST",
-            url="/v1/edit/fill-document",
-            data=edit_request.model_dump(mode="json", exclude_unset=True),
-        )
-class Edit(SyncAPIResource, BaseEditMixin):
-    """Edit API wrapper for synchronous usage."""
+class Edit(SyncAPIResource):
+    """Edit API wrapper for synchronous usage.
+    Sub-clients:
+        agent: Agent-based document editing (fill any document with AI)
+        templates: Template-based PDF form filling (for batch processing)
+    """
     def __init__(self, client: Any) -> None:
         super().__init__(client=client)
+        self.agent = Agent(client=client)
         self.templates = Templates(client=client)
-    def fill_document(
-        self,
-        instructions: str,
-        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
-        model: str = FieldUnset,
-        template_id: str | None = FieldUnset,
-        **extra_body: Any,
-    ) -> EditResponse:
-        """
-        Edit a document by inferring form fields and filling them with provided instructions.
-        This method performs:
-        1. Detection to identify form field bounding boxes
-        2. LLM inference to name and describe detected fields
-        3. LLM-based form filling using the provided instructions
-        4. Returns the filled document with form field values populated
-        Either `document` OR `template_id` must be provided, but not both.
-        Args:
-            instructions: Instructions describing how to fill the form fields.
-            document: The document to edit. Can be a file path (Path or str), file-like object,
-                MIMEData, PIL Image, or URL. Mutually exclusive with template_id.
-            model: The LLM model to use for inference. Defaults to "retab-small".
-            template_id: Template ID to use for filling. When provided, uses the template's
-                pre-defined form fields and empty PDF. Only works for PDF documents.
-                Mutually exclusive with document.
-        Returns:
-            EditResponse: Response containing:
-                - form_data: List of form fields with filled values
-                - filled_document: Document with filled form values (MIMEData)
-        Raises:
-            HTTPException: If the request fails.
-        Supported document formats:
-            - PDF: Native form field detection and filling
-            - DOCX/DOC: Native editing to preserve styles and formatting
-            - PPTX/PPT: Native editing for presentations
-            - XLSX/XLS: Native editing for spreadsheets
-        """
-        request = self._prepare_fill_document(
-            instructions=instructions,
-            document=document,
-            model=model,
-            template_id=template_id,
-            **extra_body,
-        )
-        response = self._client._prepared_request(request)
-        return EditResponse.model_validate(response)
-class AsyncEdit(AsyncAPIResource, BaseEditMixin):
-    """Edit API wrapper for asynchronous usage."""
+class AsyncEdit(AsyncAPIResource):
+    """Edit API wrapper for asynchronous usage.
+    Sub-clients:
+        agent: Agent-based document editing (fill any document with AI)
+        templates: Template-based PDF form filling (for batch processing)
+    """
     def __init__(self, client: Any) -> None:
         super().__init__(client=client)
+        self.agent = AsyncAgent(client=client)
         self.templates = AsyncTemplates(client=client)
-    async def fill_document(
-        self,
-        instructions: str,
-        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
-        model: str = FieldUnset,
-        template_id: str | None = FieldUnset,
-        **extra_body: Any,
-    ) -> EditResponse:
-        """
-        Edit a document by inferring form fields and filling them with provided instructions asynchronously.
-        This method performs:
-        1. Detection to identify form field bounding boxes
-        2. LLM inference to name and describe detected fields
-        3. LLM-based form filling using the provided instructions
-        4. Returns the filled document with form field values populated
-        Either `document` OR `template_id` must be provided, but not both.
-        Args:
-            instructions: Instructions describing how to fill the form fields.
-            document: The document to edit. Can be a file path (Path or str), file-like object,
-                MIMEData, PIL Image, or URL. Mutually exclusive with template_id.
-            model: The LLM model to use for inference. Defaults to "retab-small".
-            template_id: Template ID to use for filling. When provided, uses the template's
-                pre-defined form fields and empty PDF. Only works for PDF documents.
-                Mutually exclusive with document.
-        Returns:
-            EditResponse: Response containing:
-                - form_data: List of form fields with filled values
-                - filled_document: Document with filled form values (MIMEData)
-        Raises:
-            HTTPException: If the request fails.
-        Supported document formats:
-            - PDF: Native form field detection and filling
-            - DOCX/DOC: Native editing to preserve styles and formatting
-            - PPTX/PPT: Native editing for presentations
-            - XLSX/XLS: Native editing for spreadsheets
-        """
-        request = self._prepare_fill_document(
-            instructions=instructions,
-            document=document,
-            model=model,
-            template_id=template_id,
-            **extra_body,
-        )
-        response = await self._client._prepared_request(request)
-        return EditResponse.model_validate(response)

retab/types/documents/edit.py CHANGED Viewed

@@ -107,9 +107,8 @@ class OCRResult(BaseModel):
 class InferFormSchemaRequest(BaseModel):
     """Request to infer form schema from a PDF or DOCX document."""
-    document: MIMEData = Field(..., description="Input document (PDF or DOCX). DOCX files will be converted to PDF.")
+    document: MIMEData = Field(..., description="Input document (PDF, DOCX, XLSX or PPTX).")
     model: str = Field(default="retab-small", description="LLM model to use for inference")
-    instructions: Optional[str] = Field(default=None, description="Optional instructions to guide form field detection (e.g., which fields to focus on, specific areas to look for)")
 class EditRequest(BaseModel):
@@ -119,7 +118,7 @@ class EditRequest(BaseModel):
     - When `document` is provided: OCR + LLM inference to detect and fill form fields
     - When `template_id` is provided: Uses pre-defined form fields from the template (PDF only)
     """
-    document: Optional[MIMEData] = Field(default=None, description="Input document (PDF or DOCX). DOCX files will be converted to PDF. Mutually exclusive with template_id.")
+    document: Optional[MIMEData] = Field(default=None, description="Input document (PDF, DOCX, XLSX or PPTX). Mutually exclusive with template_id.")
     model: str = Field(default="retab-small", description="LLM model to use for inference")
     instructions: str = Field(..., description="Instructions to fill the form")
     template_id: Optional[str] = Field(default=None, description="Template ID to use for filling. When provided, uses the template's pre-defined form fields and empty PDF. Only works for PDF documents. Mutually exclusive with document.")

{retab-0.0.83.dist-info → retab-0.0.85.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: retab
-Version: 0.0.83
+Version: 0.0.85
 Summary: Retab official python library
 Home-page: https://github.com/retab-dev/retab
 Author: Retab

{retab-0.0.83.dist-info → retab-0.0.85.dist-info}/RECORD RENAMED Viewed

@@ -9,7 +9,9 @@ retab/resources/schemas.py,sha256=rZ6OzfmoYv-mGaRVzvXjO09dD-KxP74mZhOO8sMgcDQ,46
 retab/resources/documents/__init__.py,sha256=OjXmngFN0RKqO4SI-mJBNzr6Ex6rMxfq0DxaqzP0RQs,89
 retab/resources/documents/client.py,sha256=0ZOJojT4M9QZ53nheS_vuNZWcnmwTnKx3YqYyJ7_sGY,48912
 retab/resources/edit/__init__.py,sha256=yycIstpTSKsz2qXbrY3Buzd35UDcPWvb5hw6Eb2rLow,69
-retab/resources/edit/client.py,sha256=osWvuKj2SNH6-nQKsWcTYcm3jVENGlwGTvDnT45nDBY,6649
+retab/resources/edit/client.py,sha256=DJKlwh8xui7IDRjwPmiGKTC1_HshXLYXX-xr93FhSbo,1270
+retab/resources/edit/agent/__init__.py,sha256=i5IdOMhwOOQmnhPFeBbh7-ChqwQh5q7oLow1zJ0ZAwM,74
+retab/resources/edit/agent/client.py,sha256=BjVKjooWz-ZGRXwi0rcV7D_XW9iSPK0PzjzRt2gYTzI,5506
 retab/resources/edit/templates/__init__.py,sha256=n-zA_HXo7iGgeIclSwcsxmSueXJIRMo0iZjk_sax85I,90
 retab/resources/edit/templates/client.py,sha256=Eevzy5JaQmG5-hEshugQvrhgIBAjgZ8ZYZkpBSKEdBQ,19729
 retab/resources/extractions/__init__.py,sha256=2H1ezUG8hI5SmTRy6NFzXdYLOdGFFsFrI60uzkitV20,97
@@ -29,7 +31,7 @@ retab/types/documents/__init__.py,sha256=t1jXdpYqi-zQMC_9uM0m7eA1hRU0MCROwUx89cc
 retab/types/documents/classify.py,sha256=Tb6d_7kuTlWLr7bPn782dHrjtUVBCvXV3o9zm7j2lmE,1128
 retab/types/documents/correct_orientation.py,sha256=e-ivsslI6L6Gl0YkcslXw_DH620xMGEYVp4tdeviXeM,261
 retab/types/documents/create_messages.py,sha256=Uym0SnVUGkyt1C5AOD37BsZ3puyeu_igR6X9SboojfA,7267
-retab/types/documents/edit.py,sha256=4VK9ed1CF179r8sU4pZXcJhKftorGhul9q-5BlM1Ik4,5606
+retab/types/documents/edit.py,sha256=QogPSQF7jDbDmwiPJeRAYTy6HxgKp-7hMMFtAqIHnY0,5374
 retab/types/documents/extract.py,sha256=x_59fm69-icsxxGRgpFd0NN-SLRoMYqbvfCZuG7zyGc,18033
 retab/types/documents/parse.py,sha256=MXe7zh3DusWQhGe0Sr95nPy6cB8DRX8MA4Hmjj_AP7E,1300
 retab/types/documents/split.py,sha256=xRdJ6IpSRAPi_ZtAG2FNqg5A-v5tzfb1QQkW5UfO2pY,1246
@@ -55,7 +57,7 @@ retab/utils/hashing.py,sha256=_BMVUvftOcJav68QL0rLkH2dbhW9RRJPzeGC2akR0fc,757
 retab/utils/json_schema.py,sha256=zP4pQLpVHBKWo_abCjb_dU4kA0azhHopd-1TFUgVEvc,20655
 retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
 retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
-retab-0.0.83.dist-info/METADATA,sha256=2fC7uK_AP2G2o6m0-PDITV3A12TB-UMETQ-V51WwxB0,4532
-retab-0.0.83.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-retab-0.0.83.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
-retab-0.0.83.dist-info/RECORD,,
+retab-0.0.85.dist-info/METADATA,sha256=0IXHFvCerJlHt1VPw6YNMhO3YU-1w-YP56i4OclgwgA,4532
+retab-0.0.85.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+retab-0.0.85.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
+retab-0.0.85.dist-info/RECORD,,

{retab-0.0.83.dist-info → retab-0.0.85.dist-info}/WHEEL RENAMED Viewed

File without changes

{retab-0.0.83.dist-info → retab-0.0.85.dist-info}/top_level.txt RENAMED Viewed

File without changes

retab 0.0.83__py3-none-any.whl → 0.0.85__py3-none-any.whl

retab 0.0.83py3-none-any.whl → 0.0.85py3-none-any.whl