PyPI - retab - Versions diffs - 0.0.80__tar.gz → 0.0.82__tar.gz - Mend

retab 0.0.80tar.gz → 0.0.82tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

{retab-0.0.80 → retab-0.0.82}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: retab
-Version: 0.0.80
+Version: 0.0.82
 Summary: Retab official python library
 Home-page: https://github.com/retab-dev/retab
 Author: Retab

{retab-0.0.80 → retab-0.0.82}/retab/client.py RENAMED Viewed

@@ -10,7 +10,7 @@ import backoff.types
 import httpx
 import truststore
-from .resources import documents, models, schemas, projects, extractions
+from .resources import documents, models, schemas, projects, extractions, edit
 from .types.standards import PreparedRequest, FieldUnset
@@ -188,6 +188,7 @@ class Retab(BaseRetab):
         self.documents = documents.Documents(client=self)
         self.models = models.Models(client=self)
         self.schemas = schemas.Schemas(client=self)
+        self.edit = edit.Edit(client=self)
     def _request(
         self,
@@ -485,6 +486,7 @@ class AsyncRetab(BaseRetab):
         self.documents = documents.AsyncDocuments(client=self)
         self.models = models.AsyncModels(client=self)
         self.schemas = schemas.AsyncSchemas(client=self)
+        self.edit = edit.AsyncEdit(client=self)
     def _parse_response(self, response: httpx.Response) -> Any:
         """Parse response based on content-type.

{retab-0.0.80 → retab-0.0.82}/retab/resources/documents/client.py RENAMED Viewed

@@ -17,6 +17,7 @@ from ...types.documents.edit import EditRequest, EditResponse
 from ...types.documents.extract import DocumentExtractRequest, RetabParsedChatCompletion, RetabParsedChatCompletionChunk, RetabParsedChoice, maybe_parse_to_pydantic
 from ...types.documents.parse import ParseRequest, ParseResult, TableParsingFormat
 from ...types.documents.split import Category, SplitRequest, SplitResponse
+from ...types.documents.classify import ClassifyRequest, ClassifyResponse
 from ...types.mime import MIMEData
 from ...types.standards import PreparedRequest, FieldUnset
 from ...utils.json_schema import load_json_schema, unflatten_dict
@@ -172,6 +173,34 @@ class BaseDocumentsMixin:
         split_request = SplitRequest(**request_dict)
         return PreparedRequest(method="POST", url="/v1/documents/split", data=split_request.model_dump(mode="json", exclude_unset=True))
+    def _prepare_classify(
+        self,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
+        categories: list[Category] | list[dict[str, str]],
+        model: str,
+        **extra_body: Any,
+    ) -> PreparedRequest:
+        mime_document = prepare_mime_document(document)
+        # Convert dict categories to Category objects if needed
+        category_objects = [
+            Category(**cat) if isinstance(cat, dict) else cat
+            for cat in categories
+        ]
+        request_dict: dict[str, Any] = {
+            "document": mime_document,
+            "categories": category_objects,
+            "model": model,
+        }
+        # Merge any extra fields provided by the caller
+        if extra_body:
+            request_dict.update(extra_body)
+        classify_request = ClassifyRequest(**request_dict)
+        return PreparedRequest(method="POST", url="/v1/documents/classify", data=classify_request.model_dump(mode="json", exclude_unset=True))
     def _prepare_extract(
         self,
         json_schema: dict[str, Any] | Path | str,
@@ -662,6 +691,57 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
         response = self._client._prepared_request(request)
         return SplitResponse.model_validate(response)
+    def classify(
+        self,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
+        categories: list[Category] | list[dict[str, str]],
+        model: str,
+        **extra_body: Any,
+    ) -> ClassifyResponse:
+        """
+        Classify a document into one of the provided categories.
+        This method analyzes a document and classifies it into exactly one
+        of the user-defined categories, returning the classification with
+        chain-of-thought reasoning explaining the decision.
+        Args:
+            document: The document to classify. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
+            categories: List of categories to classify the document into. Each category should have a 'name' and 'description'.
+                Can be Category objects or dicts with 'name' and 'description' keys.
+            model: The AI model to use for document classification (e.g., "gemini-2.5-flash").
+        Returns:
+            ClassifyResponse: Response containing:
+                - result: ClassifyResult with reasoning and classification.
+        Raises:
+            HTTPException: If the request fails.
+        Example:
+            ```python
+            response = retab.documents.classify(
+                document="invoice.pdf",
+                model="gemini-2.5-flash",
+                categories=[
+                    {"name": "invoice", "description": "Invoice documents with billing information"},
+                    {"name": "receipt", "description": "Receipt documents for payments"},
+                    {"name": "contract", "description": "Legal contract documents"},
+                ]
+            )
+            print(f"Classification: {response.result.classification}")
+            print(f"Reasoning: {response.result.reasoning}")
+            ```
+        """
+        request = self._prepare_classify(
+            document=document,
+            categories=categories,
+            model=model,
+            **extra_body,
+        )
+        response = self._client._prepared_request(request)
+        return ClassifyResponse.model_validate(response)
 class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
     """Documents API wrapper for asynchronous usage."""
@@ -1005,3 +1085,54 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
         )
         response = await self._client._prepared_request(request)
         return SplitResponse.model_validate(response)
+    async def classify(
+        self,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
+        categories: list[Category] | list[dict[str, str]],
+        model: str,
+        **extra_body: Any,
+    ) -> ClassifyResponse:
+        """
+        Classify a document into one of the provided categories asynchronously.
+        This method analyzes a document and classifies it into exactly one
+        of the user-defined categories, returning the classification with
+        chain-of-thought reasoning explaining the decision.
+        Args:
+            document: The document to classify. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
+            categories: List of categories to classify the document into. Each category should have a 'name' and 'description'.
+                Can be Category objects or dicts with 'name' and 'description' keys.
+            model: The AI model to use for document classification (e.g., "gemini-2.5-flash").
+        Returns:
+            ClassifyResponse: Response containing:
+                - result: ClassifyResult with reasoning and classification.
+        Raises:
+            HTTPException: If the request fails.
+        Example:
+            ```python
+            response = await retab.documents.classify(
+                document="invoice.pdf",
+                model="gemini-2.5-flash",
+                categories=[
+                    {"name": "invoice", "description": "Invoice documents with billing information"},
+                    {"name": "receipt", "description": "Receipt documents for payments"},
+                    {"name": "contract", "description": "Legal contract documents"},
+                ]
+            )
+            print(f"Classification: {response.result.classification}")
+            print(f"Reasoning: {response.result.reasoning}")
+            ```
+        """
+        request = self._prepare_classify(
+            document=document,
+            categories=categories,
+            model=model,
+            **extra_body,
+        )
+        response = await self._client._prepared_request(request)
+        return ClassifyResponse.model_validate(response)

retab-0.0.82/retab/resources/edit/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .client import Edit, AsyncEdit
+__all__ = ["Edit", "AsyncEdit"]

retab-0.0.82/retab/resources/edit/client.py ADDED Viewed

@@ -0,0 +1,176 @@
+"""
+Edit SDK client - Wrapper for document editing functionality.
+"""
+from io import IOBase
+from pathlib import Path
+from typing import Any
+import PIL.Image
+from pydantic import HttpUrl
+from ..._resource import AsyncAPIResource, SyncAPIResource
+from ...utils.mime import prepare_mime_document
+from ...types.documents.edit import (
+    EditRequest,
+    EditResponse,
+)
+from ...types.mime import MIMEData
+from ...types.standards import PreparedRequest, FieldUnset
+from .templates import Templates, AsyncTemplates
+class BaseEditMixin:
+    """Shared methods for preparing edit API requests."""
+    def _prepare_fill_document(
+        self,
+        filling_instructions: str,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
+        model: str = FieldUnset,
+        template_id: str | None = FieldUnset,
+        **extra_body: Any,
+    ) -> PreparedRequest:
+        request_dict: dict[str, Any] = {
+            "filling_instructions": filling_instructions,
+        }
+        if document is not None:
+            mime_document = prepare_mime_document(document)
+            request_dict["document"] = mime_document
+        if model is not FieldUnset:
+            request_dict["model"] = model
+        if template_id is not FieldUnset:
+            request_dict["template_id"] = template_id
+        # Merge any extra fields provided by the caller
+        if extra_body:
+            request_dict.update(extra_body)
+        edit_request = EditRequest(**request_dict)
+        return PreparedRequest(
+            method="POST",
+            url="/v1/edit/fill-document",
+            data=edit_request.model_dump(mode="json", exclude_unset=True),
+        )
+class Edit(SyncAPIResource, BaseEditMixin):
+    """Edit API wrapper for synchronous usage."""
+    def __init__(self, client: Any) -> None:
+        super().__init__(client=client)
+        self.templates = Templates(client=client)
+    def fill_document(
+        self,
+        filling_instructions: str,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
+        model: str = FieldUnset,
+        template_id: str | None = FieldUnset,
+        **extra_body: Any,
+    ) -> EditResponse:
+        """
+        Edit a document by inferring form fields and filling them with provided instructions.
+        This method performs:
+        1. Detection to identify form field bounding boxes
+        2. LLM inference to name and describe detected fields
+        3. LLM-based form filling using the provided instructions
+        4. Returns the filled document with form field values populated
+        Either `document` OR `template_id` must be provided, but not both.
+        Args:
+            filling_instructions: Instructions describing how to fill the form fields.
+            document: The document to edit. Can be a file path (Path or str), file-like object,
+                MIMEData, PIL Image, or URL. Mutually exclusive with template_id.
+            model: The LLM model to use for inference. Defaults to "retab-small".
+            template_id: Template ID to use for filling. When provided, uses the template's
+                pre-defined form fields and empty PDF. Only works for PDF documents.
+                Mutually exclusive with document.
+        Returns:
+            EditResponse: Response containing:
+                - form_data: List of form fields with filled values
+                - filled_document: Document with filled form values (MIMEData)
+        Raises:
+            HTTPException: If the request fails.
+        Supported document formats:
+            - PDF: Native form field detection and filling
+            - DOCX/DOC: Native editing to preserve styles and formatting
+            - PPTX/PPT: Native editing for presentations
+            - XLSX/XLS: Native editing for spreadsheets
+        """
+        request = self._prepare_fill_document(
+            filling_instructions=filling_instructions,
+            document=document,
+            model=model,
+            template_id=template_id,
+            **extra_body,
+        )
+        response = self._client._prepared_request(request)
+        return EditResponse.model_validate(response)
+class AsyncEdit(AsyncAPIResource, BaseEditMixin):
+    """Edit API wrapper for asynchronous usage."""
+    def __init__(self, client: Any) -> None:
+        super().__init__(client=client)
+        self.templates = AsyncTemplates(client=client)
+    async def fill_document(
+        self,
+        filling_instructions: str,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
+        model: str = FieldUnset,
+        template_id: str | None = FieldUnset,
+        **extra_body: Any,
+    ) -> EditResponse:
+        """
+        Edit a document by inferring form fields and filling them with provided instructions asynchronously.
+        This method performs:
+        1. Detection to identify form field bounding boxes
+        2. LLM inference to name and describe detected fields
+        3. LLM-based form filling using the provided instructions
+        4. Returns the filled document with form field values populated
+        Either `document` OR `template_id` must be provided, but not both.
+        Args:
+            filling_instructions: Instructions describing how to fill the form fields.
+            document: The document to edit. Can be a file path (Path or str), file-like object,
+                MIMEData, PIL Image, or URL. Mutually exclusive with template_id.
+            model: The LLM model to use for inference. Defaults to "retab-small".
+            template_id: Template ID to use for filling. When provided, uses the template's
+                pre-defined form fields and empty PDF. Only works for PDF documents.
+                Mutually exclusive with document.
+        Returns:
+            EditResponse: Response containing:
+                - form_data: List of form fields with filled values
+                - filled_document: Document with filled form values (MIMEData)
+        Raises:
+            HTTPException: If the request fails.
+        Supported document formats:
+            - PDF: Native form field detection and filling
+            - DOCX/DOC: Native editing to preserve styles and formatting
+            - PPTX/PPT: Native editing for presentations
+            - XLSX/XLS: Native editing for spreadsheets
+        """
+        request = self._prepare_fill_document(
+            filling_instructions=filling_instructions,
+            document=document,
+            model=model,
+            template_id=template_id,
+            **extra_body,
+        )
+        response = await self._client._prepared_request(request)
+        return EditResponse.model_validate(response)

retab-0.0.82/retab/resources/edit/templates/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .client import Templates, AsyncTemplates
+__all__ = ["Templates", "AsyncTemplates"]

retab 0.0.80__tar.gz → 0.0.82__tar.gz

retab 0.0.80tar.gz → 0.0.82tar.gz