PyPI - retab - Versions diffs - 0.0.87__py3-none-any.whl → 0.0.89__py3-none-any.whl - Mend

retab 0.0.87py3-none-any.whl → 0.0.89py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

retab/client.py CHANGED Viewed

@@ -10,7 +10,7 @@ import backoff.types
 import httpx
 import truststore
-from .resources import documents, models, schemas, projects, extractions, edit
+from .resources import documents, models, schemas, projects, extractions, edit, workflows
 from .types.standards import PreparedRequest, FieldUnset
@@ -189,7 +189,7 @@ class Retab(BaseRetab):
         self.models = models.Models(client=self)
         self.schemas = schemas.Schemas(client=self)
         self.edit = edit.Edit(client=self)
+        self.workflows = workflows.Workflows(client=self)
     def _request(
         self,
         method: str,
@@ -487,7 +487,8 @@ class AsyncRetab(BaseRetab):
         self.models = models.AsyncModels(client=self)
         self.schemas = schemas.AsyncSchemas(client=self)
         self.edit = edit.AsyncEdit(client=self)
+        self.workflows = workflows.AsyncWorkflows(client=self)
     def _parse_response(self, response: httpx.Response) -> Any:
         """Parse response based on content-type.

retab/resources/workflows/runs/client.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from io import IOBase
 from pathlib import Path
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 import PIL.Image
 from pydantic import HttpUrl
@@ -21,15 +21,19 @@ class WorkflowRunsMixin:
     def prepare_create(
         self,
         workflow_id: str,
-        documents: Dict[str, DocumentInput],
+        documents: Optional[Dict[str, DocumentInput]] = None,
+        json_inputs: Optional[Dict[str, Dict[str, Any]]] = None,
+        text_inputs: Optional[Dict[str, str]] = None,
     ) -> PreparedRequest:
-        """Prepare a request to run a workflow with input documents.
+        """Prepare a request to run a workflow with input documents, JSON data, and/or text data.
         Args:
             workflow_id: The ID of the workflow to run
             documents: Mapping of start node IDs to their input documents.
                        Each document can be a file path, bytes, file-like object,
                        MIMEData, PIL Image, or HttpUrl.
+            json_inputs: Mapping of start_json node IDs to their input JSON data.
+            text_inputs: Mapping of start_text node IDs to their input text.
         Returns:
             PreparedRequest: The prepared request
@@ -40,20 +44,37 @@ class WorkflowRunsMixin:
             ...     documents={
             ...         "start-node-1": Path("invoice.pdf"),
             ...         "start-node-2": Path("receipt.pdf"),
+            ...     },
+            ...     json_inputs={
+            ...         "json-node-1": {"key": "value"},
+            ...     },
+            ...     text_inputs={
+            ...         "text-node-1": "Hello, world!",
             ...     }
             ... )
         """
+        data: Dict[str, Any] = {}
         # Convert each document to MIMEData and then to the format expected by the backend
-        documents_payload: Dict[str, Dict[str, Any]] = {}
-        for node_id, document in documents.items():
-            mime_data = prepare_mime_document(document)
-            documents_payload[node_id] = {
-                "filename": mime_data.filename,
-                "content": mime_data.content,
-                "mime_type": mime_data.mime_type,
-            }
-        data = {"documents": documents_payload}
+        if documents:
+            documents_payload: Dict[str, Dict[str, Any]] = {}
+            for node_id, document in documents.items():
+                mime_data = prepare_mime_document(document)
+                documents_payload[node_id] = {
+                    "filename": mime_data.filename,
+                    "content": mime_data.content,
+                    "mime_type": mime_data.mime_type,
+                }
+            data["documents"] = documents_payload
+        # Add JSON inputs directly
+        if json_inputs:
+            data["json_inputs"] = json_inputs
+        # Add text inputs directly
+        if text_inputs:
+            data["text_inputs"] = text_inputs
         return PreparedRequest(method="POST", url=f"/v1/workflows/{workflow_id}/run", data=data)
     def prepare_get(self, run_id: str) -> PreparedRequest:
@@ -77,9 +98,11 @@ class WorkflowRuns(SyncAPIResource, WorkflowRunsMixin):
     def create(
         self,
         workflow_id: str,
-        documents: Dict[str, DocumentInput],
+        documents: Optional[Dict[str, DocumentInput]] = None,
+        json_inputs: Optional[Dict[str, Dict[str, Any]]] = None,
+        text_inputs: Optional[Dict[str, str]] = None,
     ) -> WorkflowRun:
-        """Run a workflow with the provided input documents.
+        """Run a workflow with the provided inputs.
         This creates a workflow run and starts execution in the background.
         The returned WorkflowRun will have status "running" - use get()
@@ -90,25 +113,37 @@ class WorkflowRuns(SyncAPIResource, WorkflowRunsMixin):
             documents: Mapping of start node IDs to their input documents.
                        Each document can be a file path, bytes, file-like object,
                        MIMEData, PIL Image, or HttpUrl.
+            json_inputs: Mapping of start_json node IDs to their input JSON data.
+            text_inputs: Mapping of start_text node IDs to their input text.
         Returns:
             WorkflowRun: The created workflow run with status "running"
         Raises:
             HTTPException: If the request fails (e.g., workflow not found,
-                          missing input documents for start nodes)
+                          missing inputs for start nodes)
         Example:
             >>> run = client.workflows.runs.create(
             ...     workflow_id="wf_abc123",
             ...     documents={
             ...         "start-node-1": Path("invoice.pdf"),
-            ...         "start-node-2": Path("receipt.pdf"),
+            ...     },
+            ...     json_inputs={
+            ...         "json-node-1": {"key": "value"},
+            ...     },
+            ...     text_inputs={
+            ...         "text-node-1": "Hello, world!",
             ...     }
             ... )
             >>> print(f"Run started: {run.id}, status: {run.status}")
         """
-        request = self.prepare_create(workflow_id=workflow_id, documents=documents)
+        request = self.prepare_create(
+            workflow_id=workflow_id,
+            documents=documents,
+            json_inputs=json_inputs,
+            text_inputs=text_inputs,
+        )
         response = self._client._prepared_request(request)
         return WorkflowRun.model_validate(response)
@@ -138,9 +173,11 @@ class AsyncWorkflowRuns(AsyncAPIResource, WorkflowRunsMixin):
     async def create(
         self,
         workflow_id: str,
-        documents: Dict[str, DocumentInput],
+        documents: Optional[Dict[str, DocumentInput]] = None,
+        json_inputs: Optional[Dict[str, Dict[str, Any]]] = None,
+        text_inputs: Optional[Dict[str, str]] = None,
     ) -> WorkflowRun:
-        """Run a workflow with the provided input documents.
+        """Run a workflow with the provided inputs.
         This creates a workflow run and starts execution in the background.
         The returned WorkflowRun will have status "running" - use get()
@@ -151,25 +188,37 @@ class AsyncWorkflowRuns(AsyncAPIResource, WorkflowRunsMixin):
             documents: Mapping of start node IDs to their input documents.
                        Each document can be a file path, bytes, file-like object,
                        MIMEData, PIL Image, or HttpUrl.
+            json_inputs: Mapping of start_json node IDs to their input JSON data.
+            text_inputs: Mapping of start_text node IDs to their input text.
         Returns:
             WorkflowRun: The created workflow run with status "running"
         Raises:
             HTTPException: If the request fails (e.g., workflow not found,
-                          missing input documents for start nodes)
+                          missing inputs for start nodes)
         Example:
             >>> run = await client.workflows.runs.create(
             ...     workflow_id="wf_abc123",
             ...     documents={
             ...         "start-node-1": Path("invoice.pdf"),
-            ...         "start-node-2": Path("receipt.pdf"),
+            ...     },
+            ...     json_inputs={
+            ...         "json-node-1": {"key": "value"},
+            ...     },
+            ...     text_inputs={
+            ...         "text-node-1": "Hello, world!",
             ...     }
             ... )
             >>> print(f"Run started: {run.id}, status: {run.status}")
         """
-        request = self.prepare_create(workflow_id=workflow_id, documents=documents)
+        request = self.prepare_create(
+            workflow_id=workflow_id,
+            documents=documents,
+            json_inputs=json_inputs,
+            text_inputs=text_inputs,
+        )
         response = await self._client._prepared_request(request)
         return WorkflowRun.model_validate(response)

retab/types/documents/split.py CHANGED Viewed

@@ -5,6 +5,7 @@ from ..mime import MIMEData
 class Category(BaseModel):
     name: str = Field(..., description="The name of the category")
     description: str = Field(..., description="The description of the category")
+    partition_key: str | None = Field(default=None, description="The key to partition the category")
 class SplitRequest(BaseModel):
@@ -13,20 +14,32 @@ class SplitRequest(BaseModel):
     model: str = Field(default="retab-small", description="The model to use to split the document")
+class Partition(BaseModel):
+    key: str = Field(..., description="The partition key value (e.g., property ID, invoice number)")
+    pages: list[int] = Field(..., description="The pages of the partition (1-indexed)")
+    first_page_y_start: float = Field(default=0.0, description="The y coordinate of the first page of the partition")
+    last_page_y_end: float = Field(default=1.0, description="The y coordinate of the last page of the partition")
 class SplitResult(BaseModel):
     name: str = Field(..., description="The name of the category")
-    start_page: int = Field(..., description="The start page of the category (1-indexed)")
-    end_page: int = Field(..., description="The end page of the category (1-indexed, inclusive)")
+    pages: list[int] = Field(..., description="The pages of the category (1-indexed)")
+    partitions: list[Partition] = Field(default_factory=list, description="The partitions of the category")
 class SplitResponse(BaseModel):
     splits: list[SplitResult] = Field(..., description="The list of document splits with their page ranges")
+class SplitOutputItem(BaseModel):
+    """Internal schema item for LLM structured output validation."""
+    name: str = Field(..., description="The name of the category")
+    start_page: int = Field(..., description="The start page of the category (1-indexed)")
+    end_page: int = Field(..., description="The end page of the category (1-indexed, inclusive)")
 class SplitOutputSchema(BaseModel):
     """Schema for LLM structured output."""
-    splits: list[SplitResult] = Field(
-        ...,
+    splits: list[SplitOutputItem] = Field(
+        ...,
         description="List of document sections, each classified into one of the provided categories with their page ranges"
     )

{retab-0.0.87.dist-info → retab-0.0.89.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: retab
-Version: 0.0.87
+Version: 0.0.89
 Summary: Retab official python library
 Home-page: https://github.com/retab-dev/retab
 Author: Retab

{retab-0.0.87.dist-info → retab-0.0.89.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 retab/__init__.py,sha256=s4GawWTRBYz4VY-CyAV5-ZdFtdw8V5oopGIYm9GgdSo,188
 retab/_resource.py,sha256=JfAU4UTa05ugWfbrpO7fsVr_pFewht99NkoIfK6kBQM,577
-retab/client.py,sha256=VrOzEtZQPR4uydO8QJJYkMOoAiC1TfPbkXmTnatSQ0w,30172
+retab/client.py,sha256=ExQLR-xwFKIwqA1DoH3JxI1BU2RB7kWAiMbwR073w1c,30311
 retab/generate_types.py,sha256=cUu1IX65uU__MHivmEb_PZtzAi8DYsvppZvcY30hj90,8425
 retab/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 retab/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -21,7 +21,7 @@ retab/resources/projects/client.py,sha256=5LPAhJt5-nqBP4VWYvo0k7cW6HLGF6K9xMiHKQ
 retab/resources/workflows/__init__.py,sha256=-I0QNX7XKEr8ZJTV4-awMyKxZqGlSkKMdibiHiB7cZ0,89
 retab/resources/workflows/client.py,sha256=G1dYV66Wsas_QWQ9O2N7s1VUt72TP1W1ZG-_cEWEURM,755
 retab/resources/workflows/runs/__init__.py,sha256=5hPZ-70StN0U8bOlhm9H_ZXFljBjy8VoWQRu1_cGAVM,101
-retab/resources/workflows/runs/client.py,sha256=8l87Sf5RNNLIJNyhCwCprqA9ffq3J9zSlwoQHdyrEN4,6771
+retab/resources/workflows/runs/client.py,sha256=GopedV363XnGl0mL3bZHWaOay12uAeTqq4iIEJSadMA,8739
 retab/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 retab/types/chat.py,sha256=x9VbtPMa4w6Gc0HrFC3ILl6cCnfEn5ytDnwJtZmlcys,1436
 retab/types/inference_settings.py,sha256=wIivYffvEE7v6lhbjbhAZGssK4uYr64Oq6cZKxzY5_M,1131
@@ -36,7 +36,7 @@ retab/types/documents/create_messages.py,sha256=Uym0SnVUGkyt1C5AOD37BsZ3puyeu_ig
 retab/types/documents/edit.py,sha256=b6UcYLOJkClpMu4QyYmdp-X4WtN8U_3oiMBc1KLklVY,5663
 retab/types/documents/extract.py,sha256=x_59fm69-icsxxGRgpFd0NN-SLRoMYqbvfCZuG7zyGc,18033
 retab/types/documents/parse.py,sha256=MXe7zh3DusWQhGe0Sr95nPy6cB8DRX8MA4Hmjj_AP7E,1300
-retab/types/documents/split.py,sha256=xRdJ6IpSRAPi_ZtAG2FNqg5A-v5tzfb1QQkW5UfO2pY,1246
+retab/types/documents/split.py,sha256=Bjk5iJdS3v7I3rCvqpFUPlzgO4HINqh3uMPQJg-MqPc,2166
 retab/types/edit/__init__.py,sha256=M8hF97h7fX8RP9IsB6qpkw0eyvO0DFQvP6FmWL8caCQ,331
 retab/types/edit/templates.py,sha256=RLRIMdXzU-5_3XPf0iMSozjRTAP5Tliq0nrjlZn0l8E,2412
 retab/types/extractions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -59,7 +59,7 @@ retab/utils/hashing.py,sha256=_BMVUvftOcJav68QL0rLkH2dbhW9RRJPzeGC2akR0fc,757
 retab/utils/json_schema.py,sha256=zP4pQLpVHBKWo_abCjb_dU4kA0azhHopd-1TFUgVEvc,20655
 retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
 retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
-retab-0.0.87.dist-info/METADATA,sha256=Rz6B3ctJWOHF0hcaFxc2hEyBgpeBRgvScGxFNGjALMg,4532
-retab-0.0.87.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-retab-0.0.87.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
-retab-0.0.87.dist-info/RECORD,,
+retab-0.0.89.dist-info/METADATA,sha256=1ppp_sgtdC53grfu4xxD91N_-BDa7FBdofWz_Vd1WTw,4532
+retab-0.0.89.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+retab-0.0.89.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
+retab-0.0.89.dist-info/RECORD,,

{retab-0.0.87.dist-info → retab-0.0.89.dist-info}/WHEEL RENAMED Viewed

File without changes

{retab-0.0.87.dist-info → retab-0.0.89.dist-info}/top_level.txt RENAMED Viewed

File without changes

retab 0.0.87__py3-none-any.whl → 0.0.89__py3-none-any.whl

retab 0.0.87py3-none-any.whl → 0.0.89py3-none-any.whl