PyPI - retab - Versions diffs - 0.0.77__py3-none-any.whl → 0.0.79__py3-none-any.whl - Mend

retab 0.0.77py3-none-any.whl → 0.0.79py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

retab/resources/documents/client.py +30 -11
retab/types/documents/create_messages.py +1 -1
retab/types/documents/edit.py +17 -5
retab/types/documents/extract.py +1 -1
retab/types/documents/parse.py +1 -1
retab/types/documents/split.py +1 -1
retab/types/inference_settings.py +1 -1
retab/types/projects/model.py +1 -1
{retab-0.0.77.dist-info → retab-0.0.79.dist-info}/METADATA +1 -1
{retab-0.0.77.dist-info → retab-0.0.79.dist-info}/RECORD +12 -12
{retab-0.0.77.dist-info → retab-0.0.79.dist-info}/WHEEL +0 -0
{retab-0.0.77.dist-info → retab-0.0.79.dist-info}/top_level.txt +0 -0

retab/resources/documents/client.py CHANGED Viewed

@@ -118,19 +118,24 @@ class BaseDocumentsMixin:
     def _prepare_edit(
         self,
-        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
         filling_instructions: str,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
         model: str = FieldUnset,
+        template_id: str | None = FieldUnset,
         **extra_body: Any,
     ) -> PreparedRequest:
-        mime_document = prepare_mime_document(document)
         request_dict: dict[str, Any] = {
-            "document": mime_document,
             "filling_instructions": filling_instructions,
         }
+        if document is not None:
+            mime_document = prepare_mime_document(document)
+            request_dict["document"] = mime_document
         if model is not FieldUnset:
             request_dict["model"] = model
+        if template_id is not FieldUnset:
+            request_dict["template_id"] = template_id
         # Merge any extra fields provided by the caller
         if extra_body:
@@ -564,9 +569,10 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
     def edit(
         self,
-        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
         filling_instructions: str,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
         model: str = FieldUnset,
+        template_id: str | None = FieldUnset,
         **extra_body: Any,
     ) -> EditResponse:
         """
@@ -578,10 +584,15 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
         3. LLM-based form filling using the provided instructions
         4. Returns the filled PDF with form field values populated
+        Either `document` OR `template_id` must be provided, but not both.
         Args:
-            document: The document to edit. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
             filling_instructions: Instructions describing how to fill the form fields.
-            model: The LLM model to use for inference. Defaults to "gemini-2.5-pro".
+            document: The document to edit. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
+                Mutually exclusive with template_id.
+            model: The LLM model to use for inference. Defaults to "retab-small".
+            template_id: Template ID to use for filling. When provided, uses the template's pre-defined form fields
+                and empty PDF. Only works for PDF documents. Mutually exclusive with document.
         Returns:
             EditResponse: Response containing:
@@ -592,9 +603,10 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
             HTTPException: If the request fails.
         """
         request = self._prepare_edit(
-            document=document,
             filling_instructions=filling_instructions,
+            document=document,
             model=model,
+            template_id=template_id,
             **extra_body,
         )
         response = self._client._prepared_request(request)
@@ -901,9 +913,10 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
     async def edit(
         self,
-        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
         filling_instructions: str,
+        document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
         model: str = FieldUnset,
+        template_id: str | None = FieldUnset,
         **extra_body: Any,
     ) -> EditResponse:
         """
@@ -915,10 +928,15 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
         3. LLM-based form filling using the provided instructions
         4. Returns the filled PDF with form field values populated
+        Either `document` OR `template_id` must be provided, but not both.
         Args:
-            document: The document to edit. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
             filling_instructions: Instructions describing how to fill the form fields.
+            document: The document to edit. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
+                Mutually exclusive with template_id.
             model: The LLM model to use for inference. Defaults to "gemini-2.5-pro".
+            template_id: Template ID to use for filling. When provided, uses the template's pre-defined form fields
+                and empty PDF. Only works for PDF documents. Mutually exclusive with document.
         Returns:
             EditResponse: Response containing:
@@ -929,9 +947,10 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
             HTTPException: If the request fails.
         """
         request = self._prepare_edit(
-            document=document,
             filling_instructions=filling_instructions,
+            document=document,
             model=model,
+            template_id=template_id,
             **extra_body,
         )
         response = await self._client._prepared_request(request)

retab/types/documents/create_messages.py CHANGED Viewed

@@ -22,7 +22,7 @@ class DocumentCreateMessageRequest(BaseModel):
     model_config = ConfigDict(extra="ignore")
     document: MIMEData = Field(description="The document to load.")
     image_resolution_dpi: int = Field(default=192, description="Resolution of the image sent to the LLM")
-    model: str = Field(default="gemini-2.5-flash", description="The model to use for the document.")
+    model: str = Field(default="retab-small", description="The model to use for the document.")
 class DocumentCreateInputRequest(DocumentCreateMessageRequest):
     json_schema: dict[str, Any] = Field(description="The json schema to use for the document.")

retab/types/documents/edit.py CHANGED Viewed

@@ -60,6 +60,10 @@ class BaseFormField(BaseModel):
         ...,
         description="Type of field. Currently supported values: 'text' and 'checkbox'.",
     )
+    key: str = Field(
+        ...,
+        description="Key of the field. This is used to identify the field in the form data.",
+    )
 class FormField(BaseFormField):
@@ -113,10 +117,11 @@ class OCRResult(BaseModel):
 class InferFormSchemaRequest(BaseModel):
-    """Request to infer form schema from a PDF."""
+    """Request to infer form schema from a PDF or DOCX document."""
-    document: MIMEData = Field(..., description="Input document (PDF)")
-    model: str = Field(default="gemini-2.5-pro", description="LLM model to use for inference")
+    document: MIMEData = Field(..., description="Input document (PDF or DOCX). DOCX files will be converted to PDF.")
+    model: str = Field(default="retab-small", description="LLM model to use for inference")
+    instructions: Optional[str] = Field(default=None, description="Optional instructions to guide form field detection (e.g., which fields to focus on, specific areas to look for)")
 class InferFormSchemaResponse(BaseModel):
@@ -127,10 +132,17 @@ class InferFormSchemaResponse(BaseModel):
     form_fields_pdf: MIMEData = Field(..., description="PDF with form field bounding boxes")
-class EditRequest(InferFormSchemaRequest):
-    """Request for the infer_and_fill_schema endpoint."""
+class EditRequest(BaseModel):
+    """Request for the infer_and_fill_schema endpoint.
+    Either `document` OR `template_id` must be provided, but not both.
+    - When `document` is provided: OCR + LLM inference to detect and fill form fields
+    - When `template_id` is provided: Uses pre-defined form fields from the template (PDF only)
+    """
+    document: Optional[MIMEData] = Field(default=None, description="Input document (PDF or DOCX). DOCX files will be converted to PDF. Mutually exclusive with template_id.")
+    model: str = Field(default="retab-small", description="LLM model to use for inference")
     filling_instructions: str = Field(..., description="Instructions to fill the form")
+    template_id: Optional[str] = Field(default=None, description="Template ID to use for filling. When provided, uses the template's pre-defined form fields and empty PDF. Only works for PDF documents. Mutually exclusive with document.")
 class EditResponse(BaseModel):
     """Response from the fill_form endpoint.

retab/types/documents/extract.py CHANGED Viewed

@@ -34,7 +34,7 @@ class DocumentExtractRequest(BaseModel):
     stream: bool = Field(default=False, description="If true, the extraction will be streamed to the user using the active WebSocket connection")
     seed: int | None = Field(default=None, description="Seed for the random number generator. If not provided, a random seed will be generated.", examples=[None])
     store: bool = Field(default=True, description="If true, the extraction will be stored in the database")
-    parallel_ocr_keys: Optional[dict[str, str]] = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
+    chunking_keys: Optional[dict[str, str]] = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
     web_search: bool = Field(default=False, description="Enable web search enrichment with Parallel AI to add external context during extraction")
     metadata: dict[str, str] = Field(default_factory=dict, description="User-defined metadata to associate with this extraction")
     extraction_id: Optional[str] = Field(default=None, description="Extraction ID to use for this extraction. If not provided, a new ID will be generated.")

retab/types/documents/parse.py CHANGED Viewed

@@ -18,7 +18,7 @@ class ParseRequest(BaseModel):
     model_config = ConfigDict(extra="ignore")
     document: MIMEData = Field(..., description="Document to parse")
-    model: str = Field(default="gemini-2.5-flash", description="Model to use for parsing")
+    model: str = Field(default="retab-small", description="Model to use for parsing")
     table_parsing_format: TableParsingFormat = Field(default="html", description="Format for parsing tables")
     image_resolution_dpi: int = Field(default=192, description="DPI for image processing", ge=96, le=300)

retab/types/documents/split.py CHANGED Viewed

@@ -10,7 +10,7 @@ class Category(BaseModel):
 class SplitRequest(BaseModel):
     document: MIMEData = Field(..., description="The document to split")
     categories: list[Category] = Field(..., description="The categories to split the document into")
-    model: str = Field(..., description="The model to use to split the document")
+    model: str = Field(default="retab-small", description="The model to use to split the document")
 class SplitResult(BaseModel):

retab/types/inference_settings.py CHANGED Viewed

@@ -8,7 +8,7 @@ class InferenceSettings(BaseModel):
     reasoning_effort: ChatCompletionReasoningEffort = "minimal"
     image_resolution_dpi: int = Field(default=192, description="Resolution of the image sent to the LLM", ge=96, le=300)
     n_consensus: int = Field(default=1, ge=1, le=8, description="Number of consensus rounds to perform")
-    parallel_ocr_keys: dict[str, str] | None = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
+    chunking_keys: dict[str, str] | None = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
     web_search: bool = Field(default=False, description="Enable web search enrichment with Parallel AI to add external context during extraction")
     model_config = ConfigDict(extra="ignore")

retab/types/projects/model.py CHANGED Viewed

@@ -9,7 +9,7 @@ from ..inference_settings import InferenceSettings
 from .predictions import PredictionData
 default_inference_settings = InferenceSettings(
-    model="auto-small",
+    model="retab-small",
     temperature=0.5,
     reasoning_effort="minimal",
     image_resolution_dpi=192,

{retab-0.0.77.dist-info → retab-0.0.79.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: retab
-Version: 0.0.77
+Version: 0.0.79
 Summary: Retab official python library
 Home-page: https://github.com/retab-dev/retab
 Author: Retab

{retab-0.0.77.dist-info → retab-0.0.79.dist-info}/RECORD RENAMED Viewed

@@ -7,30 +7,30 @@ retab/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 retab/resources/models.py,sha256=4WidFBnTGZEA65DSn2pLP2SRnCVXkMTw7o_m8xVCFC4,2469
 retab/resources/schemas.py,sha256=rZ6OzfmoYv-mGaRVzvXjO09dD-KxP74mZhOO8sMgcDQ,4632
 retab/resources/documents/__init__.py,sha256=OjXmngFN0RKqO4SI-mJBNzr6Ex6rMxfq0DxaqzP0RQs,89
-retab/resources/documents/client.py,sha256=xiHZPvaxETqZGXanOzp1zFQBcSB7WlgiXGtiv6Ys1dQ,42496
+retab/resources/documents/client.py,sha256=XxWo9FlktrpuskAPyKWTx9UIA2VA81g0SbHjHYnigMM,43583
 retab/resources/extractions/__init__.py,sha256=2H1ezUG8hI5SmTRy6NFzXdYLOdGFFsFrI60uzkitV20,97
 retab/resources/extractions/client.py,sha256=sEoNjOgX91FTOgoJUV-I1A9A9xl1ciCdPlhYwjhEjbA,11035
 retab/resources/projects/__init__.py,sha256=tPR3_3tr7bsoYd618qmGjnYN2R23PmF5oCFd7Z5_HGY,85
 retab/resources/projects/client.py,sha256=5LPAhJt5-nqBP4VWYvo0k7cW6HLGF6K9xMiHKQzIXho,15593
 retab/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 retab/types/chat.py,sha256=x9VbtPMa4w6Gc0HrFC3ILl6cCnfEn5ytDnwJtZmlcys,1436
-retab/types/inference_settings.py,sha256=8oeah8r9W4qj8ojn6aXCWsNkGloc2XgQ095W40Z_YUk,1135
+retab/types/inference_settings.py,sha256=wIivYffvEE7v6lhbjbhAZGssK4uYr64Oq6cZKxzY5_M,1131
 retab/types/mime.py,sha256=ZLNCD3pvgn5cbGfJwzrdkjgB9dMHCbN67YEV9bx47zE,10063
 retab/types/modality.py,sha256=4B8LctdUBZVgIjtS2FjrJpljn2Eyse0XE1bpFsGb9O4,131
 retab/types/pagination.py,sha256=A0Fw06baPTfEaYwo3kvNs4vaupzlqylBc6tQH-2DFuY,279
 retab/types/standards.py,sha256=7aGtuvzzkKidvqY8JB2Cjfn43V80FeKwrTtp162kjKc,1477
 retab/types/documents/__init__.py,sha256=YDsvsmwkS5lfGXk5aBqSqmFh6LKX3dM6q_cUo5oIydU,277
 retab/types/documents/correct_orientation.py,sha256=e-ivsslI6L6Gl0YkcslXw_DH620xMGEYVp4tdeviXeM,261
-retab/types/documents/create_messages.py,sha256=Cox0QgIyLhTXIvw1Nzd2BCnB9-5KAYgw_gads5eTaDw,7272
-retab/types/documents/edit.py,sha256=HjDjhHlj08Kks7ABVohTrAJ9QngDgwVj32AxXitjrv0,4804
-retab/types/documents/extract.py,sha256=DhS9jm0lUgXVLObKm2CnSJQ2eqMmsBfttO0K9TndfIw,16728
-retab/types/documents/parse.py,sha256=Jd6i-1UXhAtgntRBZItEHGHeevyLdLmbTQa1-HNrico,1305
-retab/types/documents/split.py,sha256=Sjp2u7Ob6nBRQL23RlgiabgyUmoyf8aEyr7zdvUdU-M,1228
+retab/types/documents/create_messages.py,sha256=Uym0SnVUGkyt1C5AOD37BsZ3puyeu_igR6X9SboojfA,7267
+retab/types/documents/edit.py,sha256=ZY-a_Q9Y76e4oojeJJsisoCZbNSU6gqwAgb9fq9S76w,5930
+retab/types/documents/extract.py,sha256=eMaVl76K_1CeuLmdttfrf4yoQqs27f10w9rNBePb0DY,16724
+retab/types/documents/parse.py,sha256=MXe7zh3DusWQhGe0Sr95nPy6cB8DRX8MA4Hmjj_AP7E,1300
+retab/types/documents/split.py,sha256=xRdJ6IpSRAPi_ZtAG2FNqg5A-v5tzfb1QQkW5UfO2pY,1246
 retab/types/extractions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 retab/types/extractions/types.py,sha256=mnCYSfJoEKsXN2eG-PrahnnQyR6RDjP5VO9sHC1Opmg,102
 retab/types/projects/__init__.py,sha256=I7P_dems5_LOLgYQ-4Bzt9B6P6jRlQwP-D_9GxRDhVk,155
 retab/types/projects/metrics.py,sha256=J8aZdVbqlszfxosAZyTB7l6lp9WgdL5QgLMlLrckN7k,1946
-retab/types/projects/model.py,sha256=ACvqLQ850t6-mxl_s8fU87Oto6pLCW4ABN4zLe6Bog8,4592
+retab/types/projects/model.py,sha256=keuCzlqUR_WPHW5Grl68fG4VxjTFmNDXQk0MlZ-vtXs,4593
 retab/types/projects/predictions.py,sha256=GeQX-nCmbzKL50aaSFlSyn0sgnbzKserPK6DSQT8ACk,1114
 retab/types/schemas/__init__.py,sha256=9ODWiC_4pUVKxoIKglYZjvRjRyd1ZCVxG8GBdQgHNbU,57
 retab/types/schemas/chat.py,sha256=ppTidxsNslTKE5aBva04i9IxeARMqYpXYLjtR7V6pBc,21219
@@ -44,7 +44,7 @@ retab/utils/hashing.py,sha256=_BMVUvftOcJav68QL0rLkH2dbhW9RRJPzeGC2akR0fc,757
 retab/utils/json_schema.py,sha256=F3MLNGskpfPh1IkXHPLp60ceOEFD79GyL8mVvr0OiVM,19583
 retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
 retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
-retab-0.0.77.dist-info/METADATA,sha256=F2-lc5_Am2m8rqSaVLrlsp0Uwdhe1pLZmiwcplBM9KA,4532
-retab-0.0.77.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-retab-0.0.77.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
-retab-0.0.77.dist-info/RECORD,,
+retab-0.0.79.dist-info/METADATA,sha256=GAgtfkDV8Zu0Bc4dBl7vL87xLutKpGUqpwCY3RxGFP0,4532
+retab-0.0.79.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+retab-0.0.79.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
+retab-0.0.79.dist-info/RECORD,,

{retab-0.0.77.dist-info → retab-0.0.79.dist-info}/WHEEL RENAMED Viewed

File without changes

{retab-0.0.77.dist-info → retab-0.0.79.dist-info}/top_level.txt RENAMED Viewed

File without changes

retab 0.0.77__py3-none-any.whl → 0.0.79__py3-none-any.whl

retab 0.0.77py3-none-any.whl → 0.0.79py3-none-any.whl