PyPI - retab - Versions diffs - 0.0.80__py3-none-any.whl → 0.0.81__py3-none-any.whl - Mend

retab 0.0.80py3-none-any.whl → 0.0.81py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

retab/resources/documents/client.py CHANGED Viewed

@@ -17,6 +17,7 @@ from ...types.documents.edit import EditRequest, EditResponse
 from ...types.documents.extract import DocumentExtractRequest, RetabParsedChatCompletion, RetabParsedChatCompletionChunk, RetabParsedChoice, maybe_parse_to_pydantic
 from ...types.documents.parse import ParseRequest, ParseResult, TableParsingFormat
 from ...types.documents.split import Category, SplitRequest, SplitResponse
+from ...types.documents.classify import ClassifyRequest, ClassifyResponse
 from ...types.mime import MIMEData
 from ...types.standards import PreparedRequest, FieldUnset
 from ...utils.json_schema import load_json_schema, unflatten_dict
@@ -172,6 +173,34 @@ class BaseDocumentsMixin:
         split_request = SplitRequest(**request_dict)
         return PreparedRequest(method="POST", url="/v1/documents/split", data=split_request.model_dump(mode="json", exclude_unset=True))
+    def _prepare_classify(
+        self,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
+        categories: list[Category] | list[dict[str, str]],
+        model: str,
+        **extra_body: Any,
+    ) -> PreparedRequest:
+        mime_document = prepare_mime_document(document)
+        # Convert dict categories to Category objects if needed
+        category_objects = [
+            Category(**cat) if isinstance(cat, dict) else cat
+            for cat in categories
+        ]
+        request_dict: dict[str, Any] = {
+            "document": mime_document,
+            "categories": category_objects,
+            "model": model,
+        }
+        # Merge any extra fields provided by the caller
+        if extra_body:
+            request_dict.update(extra_body)
+        classify_request = ClassifyRequest(**request_dict)
+        return PreparedRequest(method="POST", url="/v1/documents/classify", data=classify_request.model_dump(mode="json", exclude_unset=True))
     def _prepare_extract(
         self,
         json_schema: dict[str, Any] | Path | str,
@@ -662,6 +691,57 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
         response = self._client._prepared_request(request)
         return SplitResponse.model_validate(response)
+    def classify(
+        self,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
+        categories: list[Category] | list[dict[str, str]],
+        model: str,
+        **extra_body: Any,
+    ) -> ClassifyResponse:
+        """
+        Classify a document into one of the provided categories.
+        This method analyzes a document and classifies it into exactly one
+        of the user-defined categories, returning the classification with
+        chain-of-thought reasoning explaining the decision.
+        Args:
+            document: The document to classify. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
+            categories: List of categories to classify the document into. Each category should have a 'name' and 'description'.
+                Can be Category objects or dicts with 'name' and 'description' keys.
+            model: The AI model to use for document classification (e.g., "gemini-2.5-flash").
+        Returns:
+            ClassifyResponse: Response containing:
+                - result: ClassifyResult with reasoning and classification.
+        Raises:
+            HTTPException: If the request fails.
+        Example:
+            ```python
+            response = retab.documents.classify(
+                document="invoice.pdf",
+                model="gemini-2.5-flash",
+                categories=[
+                    {"name": "invoice", "description": "Invoice documents with billing information"},
+                    {"name": "receipt", "description": "Receipt documents for payments"},
+                    {"name": "contract", "description": "Legal contract documents"},
+                ]
+            )
+            print(f"Classification: {response.result.classification}")
+            print(f"Reasoning: {response.result.reasoning}")
+            ```
+        """
+        request = self._prepare_classify(
+            document=document,
+            categories=categories,
+            model=model,
+            **extra_body,
+        )
+        response = self._client._prepared_request(request)
+        return ClassifyResponse.model_validate(response)
 class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
     """Documents API wrapper for asynchronous usage."""
@@ -1005,3 +1085,54 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
         )
         response = await self._client._prepared_request(request)
         return SplitResponse.model_validate(response)
+    async def classify(
+        self,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
+        categories: list[Category] | list[dict[str, str]],
+        model: str,
+        **extra_body: Any,
+    ) -> ClassifyResponse:
+        """
+        Classify a document into one of the provided categories asynchronously.
+        This method analyzes a document and classifies it into exactly one
+        of the user-defined categories, returning the classification with
+        chain-of-thought reasoning explaining the decision.
+        Args:
+            document: The document to classify. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
+            categories: List of categories to classify the document into. Each category should have a 'name' and 'description'.
+                Can be Category objects or dicts with 'name' and 'description' keys.
+            model: The AI model to use for document classification (e.g., "gemini-2.5-flash").
+        Returns:
+            ClassifyResponse: Response containing:
+                - result: ClassifyResult with reasoning and classification.
+        Raises:
+            HTTPException: If the request fails.
+        Example:
+            ```python
+            response = await retab.documents.classify(
+                document="invoice.pdf",
+                model="gemini-2.5-flash",
+                categories=[
+                    {"name": "invoice", "description": "Invoice documents with billing information"},
+                    {"name": "receipt", "description": "Receipt documents for payments"},
+                    {"name": "contract", "description": "Legal contract documents"},
+                ]
+            )
+            print(f"Classification: {response.result.classification}")
+            print(f"Reasoning: {response.result.reasoning}")
+            ```
+        """
+        request = self._prepare_classify(
+            document=document,
+            categories=categories,
+            model=model,
+            **extra_body,
+        )
+        response = await self._client._prepared_request(request)
+        return ClassifyResponse.model_validate(response)

{retab-0.0.80.dist-info → retab-0.0.81.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: retab
-Version: 0.0.80
+Version: 0.0.81
 Summary: Retab official python library
 Home-page: https://github.com/retab-dev/retab
 Author: Retab

{retab-0.0.80.dist-info → retab-0.0.81.dist-info}/RECORD RENAMED Viewed

@@ -7,7 +7,7 @@ retab/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 retab/resources/models.py,sha256=4WidFBnTGZEA65DSn2pLP2SRnCVXkMTw7o_m8xVCFC4,2469
 retab/resources/schemas.py,sha256=rZ6OzfmoYv-mGaRVzvXjO09dD-KxP74mZhOO8sMgcDQ,4632
 retab/resources/documents/__init__.py,sha256=OjXmngFN0RKqO4SI-mJBNzr6Ex6rMxfq0DxaqzP0RQs,89
-retab/resources/documents/client.py,sha256=XxWo9FlktrpuskAPyKWTx9UIA2VA81g0SbHjHYnigMM,43583
+retab/resources/documents/client.py,sha256=E8v0aBF4-9ATYo5hkQ629OP5mm2AtodTzznlj2xRWtQ,49000
 retab/resources/extractions/__init__.py,sha256=2H1ezUG8hI5SmTRy6NFzXdYLOdGFFsFrI60uzkitV20,97
 retab/resources/extractions/client.py,sha256=sEoNjOgX91FTOgoJUV-I1A9A9xl1ciCdPlhYwjhEjbA,11035
 retab/resources/projects/__init__.py,sha256=tPR3_3tr7bsoYd618qmGjnYN2R23PmF5oCFd7Z5_HGY,85
@@ -49,7 +49,7 @@ retab/utils/hashing.py,sha256=_BMVUvftOcJav68QL0rLkH2dbhW9RRJPzeGC2akR0fc,757
 retab/utils/json_schema.py,sha256=zP4pQLpVHBKWo_abCjb_dU4kA0azhHopd-1TFUgVEvc,20655
 retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
 retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
-retab-0.0.80.dist-info/METADATA,sha256=yzLJuUr355iV7ihl3yNw5o2-7ha2m201Ht4t3889n7c,4532
-retab-0.0.80.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-retab-0.0.80.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
-retab-0.0.80.dist-info/RECORD,,
+retab-0.0.81.dist-info/METADATA,sha256=1dsE31zFzslvv3Up5BOM62auWgQNbLCie0hZ2NfwP5Y,4532
+retab-0.0.81.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+retab-0.0.81.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
+retab-0.0.81.dist-info/RECORD,,

{retab-0.0.80.dist-info → retab-0.0.81.dist-info}/WHEEL RENAMED Viewed

File without changes

{retab-0.0.80.dist-info → retab-0.0.81.dist-info}/top_level.txt RENAMED Viewed

File without changes

retab 0.0.80__py3-none-any.whl → 0.0.81__py3-none-any.whl

retab 0.0.80py3-none-any.whl → 0.0.81py3-none-any.whl