PyPI - retab - Versions diffs - 0.0.90__tar.gz → 0.0.92__tar.gz - Mend

retab 0.0.90tar.gz → 0.0.92tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

{retab-0.0.90 → retab-0.0.92}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: retab
-Version: 0.0.90
+Version: 0.0.92
 Summary: Retab official python library
 Home-page: https://github.com/retab-dev/retab
 Author: Retab

{retab-0.0.90 → retab-0.0.92}/retab/resources/documents/client.py RENAMED Viewed

@@ -16,7 +16,8 @@ from ...types.chat import ChatCompletionRetabMessage
 from ...types.documents.edit import EditRequest, EditResponse
 from ...types.documents.extract import DocumentExtractRequest, RetabParsedChatCompletion, RetabParsedChatCompletionChunk, RetabParsedChoice, maybe_parse_to_pydantic
 from ...types.documents.parse import ParseRequest, ParseResult, TableParsingFormat
-from ...types.documents.split import Category, SplitRequest, SplitResponse
+from ...types.documents.split import Subdocument, SplitRequest, SplitResponse
+from ...types.documents.classify import Category
 from ...types.documents.classify import ClassifyRequest, ClassifyResponse
 from ...types.mime import MIMEData
 from ...types.standards import PreparedRequest, FieldUnset
@@ -148,21 +149,21 @@ class BaseDocumentsMixin:
     def _prepare_split(
         self,
         document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
-        categories: list[Category] | list[dict[str, str]],
+        subdocuments: list[Subdocument] | list[dict[str, str]],
         model: str,
         **extra_body: Any,
     ) -> PreparedRequest:
         mime_document = prepare_mime_document(document)
-        # Convert dict categories to Category objects if needed
-        category_objects = [
-            Category(**cat) if isinstance(cat, dict) else cat
-            for cat in categories
+        # Convert dict subdocuments to Subdocument objects if needed
+        subdocument_objects = [
+            Subdocument(**subdoc) if isinstance(subdoc, dict) else subdoc
+            for subdoc in subdocuments
         ]
         request_dict: dict[str, Any] = {
             "document": mime_document,
-            "categories": category_objects,
+            "subdocuments": subdocument_objects,
             "model": model,
         }
@@ -644,20 +645,20 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
     def split(
         self,
         document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
-        categories: list[Category] | list[dict[str, str]],
+        subdocuments: list[Subdocument] | list[dict[str, str]],
         model: str,
         **extra_body: Any,
     ) -> SplitResponse:
         """
-        Split a document into sections based on provided categories.
+        Split a document into sections based on provided subdocuments.
         This method analyzes a multi-page document and classifies pages into
-        user-defined categories, returning the page ranges for each section.
+        user-defined subdocuments, returning the page ranges for each section.
         Args:
             document: The document to split. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
-            categories: List of categories to split the document into. Each category should have a 'name' and 'description'.
-                Can be Category objects or dicts with 'name' and 'description' keys.
+            subdocuments: List of subdocuments to split the document into. Each subdocument should have a 'name' and 'description'.
+                Can be Subdocument objects or dicts with 'name' and 'description' keys.
             model: The AI model to use for document splitting (e.g., "gemini-2.5-flash").
         Returns:
@@ -672,7 +673,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
             response = retab.documents.split(
                 document="invoice_batch.pdf",
                 model="gemini-2.5-flash",
-                categories=[
+                subdocuments=[
                     {"name": "invoice", "description": "Invoice documents with billing information"},
                     {"name": "receipt", "description": "Receipt documents for payments"},
                     {"name": "contract", "description": "Legal contract documents"},
@@ -684,7 +685,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
         """
         request = self._prepare_split(
             document=document,
-            categories=categories,
+            subdocuments=subdocuments,
             model=model,
             **extra_body,
         )
@@ -1039,20 +1040,20 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
     async def split(
         self,
         document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
-        categories: list[Category] | list[dict[str, str]],
+        subdocuments: list[Subdocument] | list[dict[str, str]],
         model: str,
         **extra_body: Any,
     ) -> SplitResponse:
         """
-        Split a document into sections based on provided categories asynchronously.
+        Split a document into sections based on provided subdocuments asynchronously.
         This method analyzes a multi-page document and classifies pages into
-        user-defined categories, returning the page ranges for each section.
+        user-defined subdocuments, returning the page ranges for each section.
         Args:
             document: The document to split. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
-            categories: List of categories to split the document into. Each category should have a 'name' and 'description'.
-                Can be Category objects or dicts with 'name' and 'description' keys.
+            subdocuments: List of subdocuments to split the document into. Each subdocument should have a 'name' and 'description'.
+                Can be Subdocument objects or dicts with 'name' and 'description' keys.
             model: The AI model to use for document splitting (e.g., "gemini-2.5-flash").
         Returns:
@@ -1067,7 +1068,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
             response = await retab.documents.split(
                 document="invoice_batch.pdf",
                 model="gemini-2.5-flash",
-                categories=[
+                subdocuments=[
                     {"name": "invoice", "description": "Invoice documents with billing information"},
                     {"name": "receipt", "description": "Receipt documents for payments"},
                     {"name": "contract", "description": "Legal contract documents"},
@@ -1079,7 +1080,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
         """
         request = self._prepare_split(
             document=document,
-            categories=categories,
+            subdocuments=subdocuments,
             model=model,
             **extra_body,
         )

{retab-0.0.90 → retab-0.0.92}/retab/types/documents/__init__.py RENAMED Viewed

@@ -1,12 +1,14 @@
 from .parse import ParseRequest, ParseResult, RetabUsage
-from .split import Category, SplitRequest, SplitResult, SplitResponse
-from .classify import ClassifyRequest, ClassifyResult, ClassifyResponse
+from .split import Subdocument, SplitRequest, SplitResult, SplitResponse
+from .classify import ClassifyRequest, ClassifyResult, ClassifyResponse, Category
 __all__ = [
     "ParseRequest",
     "ParseResult",
     "RetabUsage",
     "Category",
+    "Subdocument",
     "SplitRequest",
     "SplitResult",
     "SplitResponse",

{retab-0.0.90 → retab-0.0.92}/retab/types/documents/classify.py RENAMED Viewed

@@ -1,13 +1,16 @@
 from pydantic import BaseModel, Field
 from ..mime import MIMEData
-from .split import Category
+class Category(BaseModel):
+    name: str = Field(..., description="The name of the category")
+    description: str = Field(..., description="The description of the category")
 class ClassifyRequest(BaseModel):
     document: MIMEData = Field(..., description="The document to classify")
     categories: list[Category] = Field(..., description="The categories to classify the document into")
     model: str = Field(default="retab-small", description="The model to use for classification")
     first_n_pages: int | None = Field(default=None, description="Only use the first N pages of the document for classification. Useful for large documents where classification can be determined from early pages.")
+    context: str | None = Field(default=None, description="Additional context for classification (e.g., iteration context from a loop)")
 class ClassifyResult(BaseModel):

{retab-0.0.90 → retab-0.0.92}/retab/types/documents/split.py RENAMED Viewed

@@ -1,17 +1,17 @@
 from pydantic import BaseModel, Field
 from ..mime import MIMEData
-class Category(BaseModel):
-    name: str = Field(..., description="The name of the category")
-    description: str = Field(..., description="The description of the category")
-    partition_key: str | None = Field(default=None, description="The key to partition the category")
+class Subdocument(BaseModel):
+    name: str = Field(..., description="The name of the subdocument")
+    description: str = Field(..., description="The description of the subdocument")
+    partition_key: str | None = Field(default=None, description="The key to partition the subdocument")
 class SplitRequest(BaseModel):
     document: MIMEData = Field(..., description="The document to split")
-    categories: list[Category] = Field(..., description="The categories to split the document into")
+    subdocuments: list[Subdocument] = Field(..., description="The subdocuments to split the document into")
     model: str = Field(default="retab-small", description="The model to use to split the document")
+    context: str | None = Field(default=None, description="Additional context for the split operation (e.g., iteration context from a loop)")
 class Partition(BaseModel):
@@ -21,9 +21,9 @@ class Partition(BaseModel):
     last_page_y_end: float = Field(default=1.0, description="The y coordinate of the last page of the partition")
 class SplitResult(BaseModel):
-    name: str = Field(..., description="The name of the category")
-    pages: list[int] = Field(..., description="The pages of the category (1-indexed)")
-    partitions: list[Partition] = Field(default_factory=list, description="The partitions of the category")
+    name: str = Field(..., description="The name of the subdocument")
+    pages: list[int] = Field(..., description="The pages of the subdocument (1-indexed)")
+    partitions: list[Partition] = Field(default_factory=list, description="The partitions of the subdocument")
 class SplitResponse(BaseModel):
@@ -32,14 +32,14 @@ class SplitResponse(BaseModel):
 class SplitOutputItem(BaseModel):
     """Internal schema item for LLM structured output validation."""
-    name: str = Field(..., description="The name of the category")
-    start_page: int = Field(..., description="The start page of the category (1-indexed)")
-    end_page: int = Field(..., description="The end page of the category (1-indexed, inclusive)")
+    name: str = Field(..., description="The name of the subdocument")
+    start_page: int = Field(..., description="The start page of the subdocument (1-indexed)")
+    end_page: int = Field(..., description="The end page of the subdocument (1-indexed, inclusive)")
 class SplitOutputSchema(BaseModel):
     """Schema for LLM structured output."""
     splits: list[SplitOutputItem] = Field(
         ...,
-        description="List of document sections, each classified into one of the provided categories with their page ranges"
+        description="List of document sections, each classified into one of the provided subdocuments with their page ranges"
     )

{retab-0.0.90 → retab-0.0.92}/retab.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: retab
-Version: 0.0.90
+Version: 0.0.92
 Summary: Retab official python library
 Home-page: https://github.com/retab-dev/retab
 Author: Retab

{retab-0.0.90 → retab-0.0.92}/setup.py RENAMED Viewed

@@ -6,7 +6,7 @@ with open("requirements.txt") as f:
 setup(
     name="retab",
-    version="0.0.90",
+    version="0.0.92",
     author="Retab",
     author_email="contact@retab.com",
     description="Retab official python library",