PyPI - retab - Versions diffs - 0.0.88__tar.gz → 0.0.90__tar.gz - Mend

retab 0.0.88tar.gz → 0.0.90tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

{retab-0.0.88 → retab-0.0.90}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: retab
-Version: 0.0.88
+Version: 0.0.90
 Summary: Retab official python library
 Home-page: https://github.com/retab-dev/retab
 Author: Retab

{retab-0.0.88 → retab-0.0.90}/retab/client.py RENAMED Viewed

@@ -10,7 +10,7 @@ import backoff.types
 import httpx
 import truststore
-from .resources import documents, models, schemas, projects, extractions, edit
+from .resources import documents, models, schemas, projects, extractions, edit, workflows, jobs
 from .types.standards import PreparedRequest, FieldUnset
@@ -189,7 +189,8 @@ class Retab(BaseRetab):
         self.models = models.Models(client=self)
         self.schemas = schemas.Schemas(client=self)
         self.edit = edit.Edit(client=self)
+        self.workflows = workflows.Workflows(client=self)
+        self.jobs = jobs.Jobs(client=self)
     def _request(
         self,
         method: str,
@@ -487,7 +488,9 @@ class AsyncRetab(BaseRetab):
         self.models = models.AsyncModels(client=self)
         self.schemas = schemas.AsyncSchemas(client=self)
         self.edit = edit.AsyncEdit(client=self)
+        self.workflows = workflows.AsyncWorkflows(client=self)
+        self.jobs = jobs.AsyncJobs(client=self)
     def _parse_response(self, response: httpx.Response) -> Any:
         """Parse response based on content-type.

retab-0.0.90/retab/resources/jobs/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .client import AsyncJobs, Jobs
+__all__ = ["Jobs", "AsyncJobs"]

retab-0.0.90/retab/resources/jobs/client.py ADDED Viewed

@@ -0,0 +1,252 @@
+"""
+Jobs API Resource
+Provides synchronous and asynchronous clients for the Jobs API.
+"""
+from typing import Any
+from ..._resource import AsyncAPIResource, SyncAPIResource
+from ...types.jobs import Job, JobListResponse, JobStatus, SupportedEndpoint
+from ...types.standards import PreparedRequest
+class BaseJobsMixin:
+    """Shared methods for preparing Jobs API requests."""
+    def _prepare_create(
+        self,
+        endpoint: SupportedEndpoint,
+        request: dict[str, Any],
+        metadata: dict[str, str] | None = None,
+    ) -> PreparedRequest:
+        data = {
+            "endpoint": endpoint,
+            "request": request,
+        }
+        if metadata is not None:
+            data["metadata"] = metadata
+        return PreparedRequest(method="POST", url="/v1/jobs", data=data)
+    def _prepare_retrieve(self, job_id: str) -> PreparedRequest:
+        return PreparedRequest(method="GET", url=f"/v1/jobs/{job_id}")
+    def _prepare_cancel(self, job_id: str) -> PreparedRequest:
+        return PreparedRequest(method="POST", url=f"/v1/jobs/{job_id}/cancel")
+    def _prepare_list(
+        self,
+        after: str | None = None,
+        limit: int = 20,
+        status: JobStatus | None = None,
+    ) -> PreparedRequest:
+        params: dict[str, Any] = {"limit": limit}
+        if after is not None:
+            params["after"] = after
+        if status is not None:
+            params["status"] = status
+        return PreparedRequest(method="GET", url="/v1/jobs", params=params)
+class Jobs(SyncAPIResource, BaseJobsMixin):
+    """
+    Synchronous Jobs API client.
+    The Jobs API allows you to submit long-running extract or parse operations
+    asynchronously and poll for their results.
+    Example:
+        >>> from retab import Retab
+        >>> client = Retab(api_key="your-api-key")
+        >>>
+        >>> # Create an async extraction job
+        >>> job = client.jobs.create(
+        ...     endpoint="/v1/documents/extract",
+        ...     request={
+        ...         "document": {"content": "...", "mime_type": "application/pdf"},
+        ...         "json_schema": {"type": "object", ...},
+        ...         "model": "gpt-4o",
+        ...     }
+        ... )
+        >>>
+        >>> # Poll for completion
+        >>> while job.status not in ("completed", "failed", "cancelled"):
+        ...     import time
+        ...     time.sleep(5)
+        ...     job = client.jobs.retrieve(job.id)
+        >>>
+        >>> if job.status == "completed":
+        ...     print(job.response.body)
+    """
+    def create(
+        self,
+        endpoint: SupportedEndpoint,
+        request: dict[str, Any],
+        metadata: dict[str, str] | None = None,
+    ) -> Job:
+        """
+        Create a new asynchronous job.
+        Args:
+            endpoint: The API endpoint to call ("/v1/documents/extract" or "/v1/documents/parse")
+            request: The full request body for the target endpoint
+            metadata: Optional metadata (max 16 pairs; keys ≤64 chars, values ≤512 chars)
+        Returns:
+            Job: The created job with status "queued"
+        """
+        prepared = self._prepare_create(endpoint, request, metadata)
+        response = self._client._prepared_request(prepared)
+        return Job.model_validate(response)
+    def retrieve(self, job_id: str) -> Job:
+        """
+        Retrieve a job by ID.
+        Args:
+            job_id: The job ID to retrieve
+        Returns:
+            Job: The job with current status and result (if completed)
+        """
+        prepared = self._prepare_retrieve(job_id)
+        response = self._client._prepared_request(prepared)
+        return Job.model_validate(response)
+    def cancel(self, job_id: str) -> Job:
+        """
+        Cancel a queued or in-progress job.
+        Args:
+            job_id: The job ID to cancel
+        Returns:
+            Job: The updated job with status "cancelled"
+        """
+        prepared = self._prepare_cancel(job_id)
+        response = self._client._prepared_request(prepared)
+        return Job.model_validate(response)
+    def list(
+        self,
+        after: str | None = None,
+        limit: int = 20,
+        status: JobStatus | None = None,
+    ) -> JobListResponse:
+        """
+        List jobs with pagination and optional status filtering.
+        Args:
+            after: Pagination cursor (last ID from previous page)
+            limit: Number of jobs to return (1-100, default 20)
+            status: Filter by job status
+        Returns:
+            JobListResponse: List of jobs with pagination info
+        """
+        prepared = self._prepare_list(after, limit, status)
+        response = self._client._prepared_request(prepared)
+        return JobListResponse.model_validate(response)
+class AsyncJobs(AsyncAPIResource, BaseJobsMixin):
+    """
+    Asynchronous Jobs API client.
+    The Jobs API allows you to submit long-running extract or parse operations
+    asynchronously and poll for their results.
+    Example:
+        >>> from retab import AsyncRetab
+        >>> client = AsyncRetab(api_key="your-api-key")
+        >>>
+        >>> # Create an async extraction job
+        >>> job = await client.jobs.create(
+        ...     endpoint="/v1/documents/extract",
+        ...     request={
+        ...         "document": {"content": "...", "mime_type": "application/pdf"},
+        ...         "json_schema": {"type": "object", ...},
+        ...         "model": "gpt-4o",
+        ...     }
+        ... )
+        >>>
+        >>> # Poll for completion
+        >>> while job.status not in ("completed", "failed", "cancelled"):
+        ...     import asyncio
+        ...     await asyncio.sleep(5)
+        ...     job = await client.jobs.retrieve(job.id)
+        >>>
+        >>> if job.status == "completed":
+        ...     print(job.response.body)
+    """
+    async def create(
+        self,
+        endpoint: SupportedEndpoint,
+        request: dict[str, Any],
+        metadata: dict[str, str] | None = None,
+    ) -> Job:
+        """
+        Create a new asynchronous job.
+        Args:
+            endpoint: The API endpoint to call ("/v1/documents/extract" or "/v1/documents/parse")
+            request: The full request body for the target endpoint
+            metadata: Optional metadata (max 16 pairs; keys ≤64 chars, values ≤512 chars)
+        Returns:
+            Job: The created job with status "queued"
+        """
+        prepared = self._prepare_create(endpoint, request, metadata)
+        response = await self._client._prepared_request(prepared)
+        return Job.model_validate(response)
+    async def retrieve(self, job_id: str) -> Job:
+        """
+        Retrieve a job by ID.
+        Args:
+            job_id: The job ID to retrieve
+        Returns:
+            Job: The job with current status and result (if completed)
+        """
+        prepared = self._prepare_retrieve(job_id)
+        response = await self._client._prepared_request(prepared)
+        return Job.model_validate(response)
+    async def cancel(self, job_id: str) -> Job:
+        """
+        Cancel a queued or in-progress job.
+        Args:
+            job_id: The job ID to cancel
+        Returns:
+            Job: The updated job with status "cancelled"
+        """
+        prepared = self._prepare_cancel(job_id)
+        response = await self._client._prepared_request(prepared)
+        return Job.model_validate(response)
+    async def list(
+        self,
+        after: str | None = None,
+        limit: int = 20,
+        status: JobStatus | None = None,
+    ) -> JobListResponse:
+        """
+        List jobs with pagination and optional status filtering.
+        Args:
+            after: Pagination cursor (last ID from previous page)
+            limit: Number of jobs to return (1-100, default 20)
+            status: Filter by job status
+        Returns:
+            JobListResponse: List of jobs with pagination info
+        """
+        prepared = self._prepare_list(after, limit, status)
+        response = await self._client._prepared_request(prepared)
+        return JobListResponse.model_validate(response)

{retab-0.0.88 → retab-0.0.90}/retab/resources/workflows/runs/client.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from io import IOBase
 from pathlib import Path
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 import PIL.Image
 from pydantic import HttpUrl
@@ -21,15 +21,19 @@ class WorkflowRunsMixin:
     def prepare_create(
         self,
         workflow_id: str,
-        documents: Dict[str, DocumentInput],
+        documents: Optional[Dict[str, DocumentInput]] = None,
+        json_inputs: Optional[Dict[str, Dict[str, Any]]] = None,
+        text_inputs: Optional[Dict[str, str]] = None,
     ) -> PreparedRequest:
-        """Prepare a request to run a workflow with input documents.
+        """Prepare a request to run a workflow with input documents, JSON data, and/or text data.
         Args:
             workflow_id: The ID of the workflow to run
             documents: Mapping of start node IDs to their input documents.
                        Each document can be a file path, bytes, file-like object,
                        MIMEData, PIL Image, or HttpUrl.
+            json_inputs: Mapping of start_json node IDs to their input JSON data.
+            text_inputs: Mapping of start_text node IDs to their input text.
         Returns:
             PreparedRequest: The prepared request
@@ -40,20 +44,37 @@ class WorkflowRunsMixin:
             ...     documents={
             ...         "start-node-1": Path("invoice.pdf"),
             ...         "start-node-2": Path("receipt.pdf"),
+            ...     },
+            ...     json_inputs={
+            ...         "json-node-1": {"key": "value"},
+            ...     },
+            ...     text_inputs={
+            ...         "text-node-1": "Hello, world!",
             ...     }
             ... )
         """
+        data: Dict[str, Any] = {}
         # Convert each document to MIMEData and then to the format expected by the backend
-        documents_payload: Dict[str, Dict[str, Any]] = {}
-        for node_id, document in documents.items():
-            mime_data = prepare_mime_document(document)
-            documents_payload[node_id] = {
-                "filename": mime_data.filename,
-                "content": mime_data.content,
-                "mime_type": mime_data.mime_type,
-            }
-        data = {"documents": documents_payload}
+        if documents:
+            documents_payload: Dict[str, Dict[str, Any]] = {}
+            for node_id, document in documents.items():
+                mime_data = prepare_mime_document(document)
+                documents_payload[node_id] = {
+                    "filename": mime_data.filename,
+                    "content": mime_data.content,
+                    "mime_type": mime_data.mime_type,
+                }
+            data["documents"] = documents_payload
+        # Add JSON inputs directly
+        if json_inputs:
+            data["json_inputs"] = json_inputs
+        # Add text inputs directly
+        if text_inputs:
+            data["text_inputs"] = text_inputs
         return PreparedRequest(method="POST", url=f"/v1/workflows/{workflow_id}/run", data=data)
     def prepare_get(self, run_id: str) -> PreparedRequest:
@@ -77,9 +98,11 @@ class WorkflowRuns(SyncAPIResource, WorkflowRunsMixin):
     def create(
         self,
         workflow_id: str,
-        documents: Dict[str, DocumentInput],
+        documents: Optional[Dict[str, DocumentInput]] = None,
+        json_inputs: Optional[Dict[str, Dict[str, Any]]] = None,
+        text_inputs: Optional[Dict[str, str]] = None,
     ) -> WorkflowRun:
-        """Run a workflow with the provided input documents.
+        """Run a workflow with the provided inputs.
         This creates a workflow run and starts execution in the background.
         The returned WorkflowRun will have status "running" - use get()
@@ -90,25 +113,37 @@ class WorkflowRuns(SyncAPIResource, WorkflowRunsMixin):
             documents: Mapping of start node IDs to their input documents.
                        Each document can be a file path, bytes, file-like object,
                        MIMEData, PIL Image, or HttpUrl.
+            json_inputs: Mapping of start_json node IDs to their input JSON data.
+            text_inputs: Mapping of start_text node IDs to their input text.
         Returns:
             WorkflowRun: The created workflow run with status "running"
         Raises:
             HTTPException: If the request fails (e.g., workflow not found,
-                          missing input documents for start nodes)
+                          missing inputs for start nodes)
         Example:
             >>> run = client.workflows.runs.create(
             ...     workflow_id="wf_abc123",
             ...     documents={
             ...         "start-node-1": Path("invoice.pdf"),
-            ...         "start-node-2": Path("receipt.pdf"),
+            ...     },
+            ...     json_inputs={
+            ...         "json-node-1": {"key": "value"},
+            ...     },
+            ...     text_inputs={
+            ...         "text-node-1": "Hello, world!",
             ...     }
             ... )
             >>> print(f"Run started: {run.id}, status: {run.status}")
         """
-        request = self.prepare_create(workflow_id=workflow_id, documents=documents)
+        request = self.prepare_create(
+            workflow_id=workflow_id,
+            documents=documents,
+            json_inputs=json_inputs,
+            text_inputs=text_inputs,
+        )
         response = self._client._prepared_request(request)
         return WorkflowRun.model_validate(response)
@@ -138,9 +173,11 @@ class AsyncWorkflowRuns(AsyncAPIResource, WorkflowRunsMixin):
     async def create(
         self,
         workflow_id: str,
-        documents: Dict[str, DocumentInput],
+        documents: Optional[Dict[str, DocumentInput]] = None,
+        json_inputs: Optional[Dict[str, Dict[str, Any]]] = None,
+        text_inputs: Optional[Dict[str, str]] = None,
     ) -> WorkflowRun:
-        """Run a workflow with the provided input documents.
+        """Run a workflow with the provided inputs.
         This creates a workflow run and starts execution in the background.
         The returned WorkflowRun will have status "running" - use get()
@@ -151,25 +188,37 @@ class AsyncWorkflowRuns(AsyncAPIResource, WorkflowRunsMixin):
             documents: Mapping of start node IDs to their input documents.
                        Each document can be a file path, bytes, file-like object,
                        MIMEData, PIL Image, or HttpUrl.
+            json_inputs: Mapping of start_json node IDs to their input JSON data.
+            text_inputs: Mapping of start_text node IDs to their input text.
         Returns:
             WorkflowRun: The created workflow run with status "running"
         Raises:
             HTTPException: If the request fails (e.g., workflow not found,
-                          missing input documents for start nodes)
+                          missing inputs for start nodes)
         Example:
             >>> run = await client.workflows.runs.create(
             ...     workflow_id="wf_abc123",
             ...     documents={
             ...         "start-node-1": Path("invoice.pdf"),
-            ...         "start-node-2": Path("receipt.pdf"),
+            ...     },
+            ...     json_inputs={
+            ...         "json-node-1": {"key": "value"},
+            ...     },
+            ...     text_inputs={
+            ...         "text-node-1": "Hello, world!",
             ...     }
             ... )
             >>> print(f"Run started: {run.id}, status: {run.status}")
         """
-        request = self.prepare_create(workflow_id=workflow_id, documents=documents)
+        request = self.prepare_create(
+            workflow_id=workflow_id,
+            documents=documents,
+            json_inputs=json_inputs,
+            text_inputs=text_inputs,
+        )
         response = await self._client._prepared_request(request)
         return WorkflowRun.model_validate(response)

retab-0.0.90/retab/types/jobs.py ADDED Viewed

@@ -0,0 +1,90 @@
+"""
+Jobs API Types
+Pydantic models for the asynchronous Jobs API.
+"""
+from typing import Any, Literal
+from pydantic import BaseModel, Field
+JobStatus = Literal[
+    "validating",
+    "queued",
+    "in_progress",
+    "completed",
+    "failed",
+    "cancelled",
+    "expired",
+]
+SupportedEndpoint = Literal[
+    "/v1/documents/extract",
+    "/v1/documents/parse",
+    "/v1/documents/split",
+    "/v1/documents/classify",
+    "/v1/schemas/generate",
+    "/v1/edit/agent/fill",
+    "/v1/edit/templates/fill",
+    "/v1/edit/templates/generate",
+    "/v1/projects/extract",  # Requires "project_id" in request body
+]
+class JobResponse(BaseModel):
+    """Response stored when job completes successfully."""
+    status_code: int
+    body: dict[str, Any]
+class JobError(BaseModel):
+    """Error details when job fails."""
+    code: str
+    message: str
+    details: dict[str, Any] | None = None
+class Job(BaseModel):
+    """
+    Job object representing an asynchronous operation.
+    Use this to track the status of long-running operations like extract, parse,
+    split, classify, schema generation, and template operations.
+    """
+    id: str
+    object: Literal["job"] = "job"
+    status: JobStatus
+    endpoint: SupportedEndpoint
+    request: dict[str, Any]
+    response: JobResponse | None = None
+    error: JobError | None = None
+    # Timestamps (Unix timestamps)
+    created_at: int
+    started_at: int | None = None
+    completed_at: int | None = None
+    expires_at: int
+    # User context
+    organization_id: str
+    metadata: dict[str, str] | None = None
+class CreateJobRequest(BaseModel):
+    """Request body for creating a new job."""
+    endpoint: SupportedEndpoint
+    request: dict[str, Any]
+    metadata: dict[str, str] | None = Field(
+        default=None,
+        description="Max 16 pairs; keys ≤64 chars, values ≤512 chars"
+    )
+class JobListResponse(BaseModel):
+    """Response for listing jobs."""
+    object: Literal["list"] = "list"
+    data: list[Job]
+    first_id: str | None = None
+    last_id: str | None = None
+    has_more: bool = False

{retab-0.0.88 → retab-0.0.90}/retab.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: retab
-Version: 0.0.88
+Version: 0.0.90
 Summary: Retab official python library
 Home-page: https://github.com/retab-dev/retab
 Author: Retab

{retab-0.0.88 → retab-0.0.90}/retab.egg-info/SOURCES.txt RENAMED Viewed

@@ -24,6 +24,8 @@ retab/resources/edit/templates/__init__.py
 retab/resources/edit/templates/client.py
 retab/resources/extractions/__init__.py
 retab/resources/extractions/client.py
+retab/resources/jobs/__init__.py
+retab/resources/jobs/client.py
 retab/resources/projects/__init__.py
 retab/resources/projects/client.py
 retab/resources/workflows/__init__.py
@@ -33,6 +35,7 @@ retab/resources/workflows/runs/client.py
 retab/types/__init__.py
 retab/types/chat.py
 retab/types/inference_settings.py
+retab/types/jobs.py
 retab/types/mime.py
 retab/types/modality.py
 retab/types/pagination.py

{retab-0.0.88 → retab-0.0.90}/setup.py RENAMED Viewed

@@ -6,7 +6,7 @@ with open("requirements.txt") as f:
 setup(
     name="retab",
-    version="0.0.88",
+    version="0.0.90",
     author="Retab",
     author_email="contact@retab.com",
     description="Retab official python library",