PyPI - retab - Versions diffs - 0.0.43__tar.gz → 0.0.45__tar.gz - Mend

retab 0.0.43tar.gz → 0.0.45tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

{retab-0.0.43 → retab-0.0.45}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: retab
-Version: 0.0.43
+Version: 0.0.45
 Summary: Retab official python library
 Home-page: https://github.com/Retab-dev/retab
 Author: Retab
@@ -67,7 +67,7 @@ Retab solves all the major challenges in document processing with Large Language
 2. **Structured, Schema-driven Extraction**: Get consistent, reliable outputs using schema-based prompt engineering
 3. **Processors**: Publish a live, stable, shareable document processor.
 4. **Automations**: Create document processing workflows that can be triggered by events (mailbox, upload link, endpoint, outlook plugin).
-5. **Evaluations**: Evaluate the performance of models against annotated datasets
+5. **Projects**: Evaluate the performance of models against annotated datasets
 6. **Optimizations**: Identify the most used processors and help you finetune models to reduce costs and improve performance
 We are offering you all the software-defined primitives to build your own document processing solutions. We see it as **Stripe** for document processing.

{retab-0.0.43 → retab-0.0.45}/README.md RENAMED Viewed

@@ -24,7 +24,7 @@ Retab solves all the major challenges in document processing with Large Language
 2. **Structured, Schema-driven Extraction**: Get consistent, reliable outputs using schema-based prompt engineering
 3. **Processors**: Publish a live, stable, shareable document processor.
 4. **Automations**: Create document processing workflows that can be triggered by events (mailbox, upload link, endpoint, outlook plugin).
-5. **Evaluations**: Evaluate the performance of models against annotated datasets
+5. **Projects**: Evaluate the performance of models against annotated datasets
 6. **Optimizations**: Identify the most used processors and help you finetune models to reduce costs and improve performance
 We are offering you all the software-defined primitives to build your own document processing solutions. We see it as **Stripe** for document processing.

{retab-0.0.43 → retab-0.0.45}/retab/client.py RENAMED Viewed

@@ -8,7 +8,7 @@ import backoff.types
 import httpx
 import truststore
-from .resources import consensus, documents, files, finetuning, models, processors, schemas, secrets, usage, evaluations
+from .resources import consensus, deployments, documents, files, finetuning, models, processors, schemas, secrets, usage, projects
 from .types.standards import PreparedRequest, FieldUnset
@@ -135,7 +135,7 @@ class Retab(BaseRetab):
     """Synchronous client for interacting with the Retab API.
     This client provides synchronous access to all Retab API resources including files, fine-tuning,
-    prompt optimization, documents, models, datasets, and schemas.
+    prompt optimization, documents, models, processors, deployments, and schemas.
     Args:
         api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
@@ -151,7 +151,8 @@ class Retab(BaseRetab):
         prompt_optimization: Access to prompt optimization operations
         documents: Access to document operations
         models: Access to model operations
-        datasets: Access to dataset operations
+        processors: Access to processor operations
+        deployments: Access to deployment operations
         schemas: Access to schema operations
         responses: Access to responses API (OpenAI Responses API compatible interface)
     """
@@ -175,13 +176,14 @@ class Retab(BaseRetab):
         )
         self.client = httpx.Client(timeout=self.timeout)
-        self.evaluations = evaluations.Evaluations(client=self)
+        self.projects = projects.Projects(client=self)
         self.files = files.Files(client=self)
         self.fine_tuning = finetuning.FineTuning(client=self)
         self.documents = documents.Documents(client=self)
         self.models = models.Models(client=self)
         self.schemas = schemas.Schemas(client=self)
         self.processors = processors.Processors(client=self)
+        self.deployments = deployments.Deployments(client=self)
         self.secrets = secrets.Secrets(client=self)
         self.usage = usage.Usage(client=self)
         self.consensus = consensus.Consensus(client=self)
@@ -395,7 +397,7 @@ class AsyncRetab(BaseRetab):
     """Asynchronous client for interacting with the Retab API.
     This client provides asynchronous access to all Retab API resources including files, fine-tuning,
-    prompt optimization, documents, models, datasets, and schemas.
+    prompt optimization, documents, models, processors, deployments, and schemas.
     Args:
         api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
@@ -413,7 +415,8 @@ class AsyncRetab(BaseRetab):
         prompt_optimization: Access to asynchronous prompt optimization operations
         documents: Access to asynchronous document operations
         models: Access to asynchronous model operations
-        datasets: Access to asynchronous dataset operations
+        processors: Access to asynchronous processor operations
+        deployments: Access to asynchronous deployment operations
         schemas: Access to asynchronous schema operations
         responses: Access to responses API (OpenAI Responses API compatible interface)
     """
@@ -438,13 +441,14 @@ class AsyncRetab(BaseRetab):
         self.client = httpx.AsyncClient(timeout=self.timeout)
-        self.evaluations = evaluations.AsyncEvaluations(client=self)
+        self.projects = projects.AsyncProjects(client=self)
         self.files = files.AsyncFiles(client=self)
         self.fine_tuning = finetuning.AsyncFineTuning(client=self)
         self.documents = documents.AsyncDocuments(client=self)
         self.models = models.AsyncModels(client=self)
         self.schemas = schemas.AsyncSchemas(client=self)
         self.processors = processors.AsyncProcessors(client=self)
+        self.deployments = deployments.AsyncDeployments(client=self)
         self.secrets = secrets.AsyncSecrets(client=self)
         self.usage = usage.AsyncUsage(client=self)
         self.consensus = consensus.AsyncConsensus(client=self)

retab-0.0.45/retab/generate_types.py ADDED Viewed

@@ -0,0 +1,180 @@
+import collections.abc
+import json
+import os
+import types
+import typing
+import enum
+import sys
+import inspect
+from datetime import datetime, date
+from typing import Any, Type, get_args, get_origin, Union, Literal, is_typeddict
+from typing_extensions import is_typeddict as is_typeddict_ext
+import typing_extensions
+from pydantic_core import PydanticUndefined
+from pydantic import BaseModel, EmailStr
+import PIL.Image
+to_compile: list[tuple[str, Type, bool]] = []
+def is_base_model(field_type: Type) -> bool:
+    return getattr(field_type, "__name__", None) in ["BaseModel", "GenericModel", "ConfigDict", "Generic"]
+def type_to_zod(field_type: Any, put_names: bool = True, ts: bool = False) -> str:
+    origin = get_origin(field_type) or field_type
+    optional = False
+    def make_union(args):
+        return args[0] if len(args) <= 1 else "z.union([" + ", ".join(args) + "])"
+    def make_ts_union(args):
+        return args[0] if len(args) <= 1 else " | ".join(args)
+    if isinstance(field_type, typing.ForwardRef):
+        return type_to_zod(typing._eval_type(field_type, globals(), locals(), []), ts=ts)
+    elif origin is typing.Annotated or origin is typing.Required or origin is typing_extensions.Required:
+        return type_to_zod(get_args(field_type)[0], put_names, ts=ts)
+    if origin is Union or origin is types.UnionType:
+        args = [x for x in get_args(field_type)]
+        if types.NoneType in args:
+            args.remove(types.NoneType)
+            optional = True
+        typename = make_union([type_to_zod(x) for x in args])
+        ts_typename = make_ts_union([type_to_zod(x, ts=True) for x in args])
+    elif issubclass(origin, BaseModel) or is_typeddict(origin) or is_typeddict_ext(origin):
+        if put_names:
+            typename = "Z" + origin.__name__
+            ts_typename = origin.__name__
+            to_compile.append((origin.__name__, field_type, True))
+        else:
+            excluded_fields = set()
+            typename = "z.object({\n"
+            ts_typename = "{\n"
+            props = [(n, f.annotation, f.default) for n, f in origin.model_fields.items()] if issubclass(origin, BaseModel) else \
+                    [(n, f, PydanticUndefined) for n, f in origin.__annotations__.items()]
+            for field_name, field, default in props:
+                if field_name in excluded_fields:
+                    continue
+                ts_compiled = type_to_zod(field, ts=True)
+                default_str = ""
+                if default is not PydanticUndefined and default is not None:
+                    if isinstance(default, BaseModel):
+                        default_str = f".default({json.dumps(default.model_dump(mode="json", exclude_unset=True))})"
+                    else:
+                        default_str = f".default({json.dumps(default)})"
+                typename += f"    {field_name}: {type_to_zod(field)}{default_str},\n"
+                ts_typename += f"    {field_name}{"?" if ts_compiled.endswith(" | undefined") or default is not PydanticUndefined else ""}: {ts_compiled},\n"
+            typename += "})"
+            ts_typename += "}"
+            based = origin.__bases__
+            for i in range(0, len(based)):
+                if is_base_model(based[i]) or based[i] is dict:
+                    break
+                if issubclass(based[i], BaseModel):
+                    excluded_fields.update(based[i].model_fields.keys())
+                typename += ".merge(Z" + based[i].__name__ + ".schema)"
+                ts_typename += " & " + based[i].__name__
+    elif origin is list or origin is typing.List or origin is collections.abc.Sequence or origin is collections.abc.Iterable:
+        typename = "z.array(" + type_to_zod(get_args(field_type)[0]) + ")"
+        ts_typename = "Array<" + type_to_zod(get_args(field_type)[0], ts=True) + ">"
+    elif origin is tuple:
+        args = get_args(field_type)
+        typename = "z.tuple([" + ", ".join([type_to_zod(x) for x in args]) + "])"
+        ts_typename = "[" + ", ".join([type_to_zod(x, ts=True) for x in args]) + "]"
+    elif origin is dict:
+        if len(get_args(field_type)) == 2:
+            typename = "z.record(" + type_to_zod(get_args(field_type)[0]) + ", " + type_to_zod(get_args(field_type)[1]) + ")"
+            ts_typename = "{[key: " + type_to_zod(get_args(field_type)[0], ts=True) + "]: " + type_to_zod(get_args(field_type)[1], ts=True) + "}"
+        else:
+            typename = "z.record(z.any())"
+            ts_typename = "{[key: string]: any}"
+    elif origin is Literal:
+        typename = make_union(["z.literal(" + json.dumps(x) + ")" for x in get_args(field_type)])
+        ts_typename = make_ts_union([json.dumps(x) for x in get_args(field_type)])
+    elif isinstance(field_type, typing.TypeVar):
+        typename = "z.any()"
+        ts_typename = "any"
+    elif isinstance(field_type, type) and issubclass(field_type, enum.Enum):
+        typename = "z.any()"
+        ts_typename = "any"
+    elif field_type is str or field_type is date or field_type is datetime:
+        typename = "z.string()"
+        ts_typename = "string"
+    elif field_type is int or field_type is float:
+        typename = "z.number()"
+        ts_typename = "number"
+    elif field_type is bool:
+        typename = "z.boolean()"
+        ts_typename = "boolean"
+    elif field_type is typing.Any:
+        typename = "z.any()"
+        ts_typename = "any"
+    elif field_type is bytes or field_type is PIL.Image.Image or field_type is typing.BinaryIO or origin is typing.IO or origin is typing_extensions.IO:
+        typename = "z.instanceof(Uint8Array)"
+        ts_typename = "Uint8Array"
+    elif field_type is EmailStr:
+        typename = "z.string().email()"
+        ts_typename = "string"
+    elif field_type is os.PathLike:
+        typename = "z.string()"
+        ts_typename = "string"
+    elif field_type is object:
+        typename = "z.object({}).passthrough()"
+        ts_typename = "object"
+    else:
+        raise ValueError(f"Unsupported type: {field_type} ({origin})")
+    if ts:
+        return ts_typename if not optional else ts_typename + " | null | undefined"
+    else:
+        return typename if not optional else typename + ".nullable().optional()"
+# SET of names of python builtin types starting with a capital
+builtin_types = {
+    "Any",
+    "BaseModel",
+    "NoneType",
+    "Literal",
+    "Union",
+    "List",
+    "Sequence",
+    "ConfigDict",
+    "Optional",
+}
+if __name__ == "__main__":
+    modules = []
+    for root, dirs, files in os.walk("retab/types"):
+        for module in files:
+            if module[-3:] != '.py':
+                continue
+            full_name = os.path.join(root, module[:-3]).replace(os.path.sep, '.')
+            __import__(full_name, locals(), globals())
+            modules.append(full_name)
+    for module_name in modules:
+        for name, obj in inspect.getmembers(sys.modules[module_name]):
+            if name[0] != name[0].lower() and name not in builtin_types:
+                to_compile.append((name, obj, False))
+    print("import * as z from 'zod';\n")
+    defined = {}
+    while len(to_compile) > 0:
+        name, model, necessary = to_compile.pop(0)
+        if name in defined: continue
+        defined[name] = True
+        try:
+            compiled = type_to_zod(model, False)
+            compiled_ts = type_to_zod(model, False, ts=True)
+        except Exception as e:
+            if not necessary:
+                print(f"Skipping {name} {model} due to error: {e}", file=sys.stderr)
+                continue
+            print(f"Error compiling {name} {model}", file=sys.stderr)
+            raise e
+        print("export const Z" + name + " = z.lazy(() => " + compiled + ");")
+        print("export type " + name + " = z.infer<typeof Z" + name + ">;\n")

retab-0.0.45/retab/resources/deployments/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .client import AsyncDeployments, Deployments
+__all__ = ["Deployments", "AsyncDeployments"]

retab-0.0.45/retab/resources/deployments/client.py ADDED Viewed

@@ -0,0 +1,148 @@
+import base64
+from io import IOBase
+from pathlib import Path
+from typing import Any, List
+import PIL.Image
+from pydantic import HttpUrl
+from ..._resource import AsyncAPIResource, SyncAPIResource
+from ...utils.mime import MIMEData, prepare_mime_document
+from ...types.documents.extractions import RetabParsedChatCompletion
+from ...types.standards import PreparedRequest
+class DeploymentsMixin:
+    def prepare_extract(
+        self,
+        project_id: str,
+        iteration_id: str,
+        document: Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
+        documents: list[Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl] | None = None,
+        temperature: float | None = None,
+        seed: int | None = None,
+        store: bool = True,
+    ) -> PreparedRequest:
+        """Prepare a request to extract documents from a deployment.
+        Args:
+            project_id: ID of the project
+            iteration_id: ID of the iteration
+            document: Single document to process (mutually exclusive with documents)
+            documents: List of documents to process (mutually exclusive with document)
+            temperature: Optional temperature override
+            seed: Optional seed for reproducibility
+            store: Whether to store the results
+        Returns:
+            PreparedRequest: The prepared request
+        """
+        # Validate that either document or documents is provided, but not both
+        if not document and not documents:
+            raise ValueError("Either 'document' or 'documents' must be provided")
+        if document and documents:
+            raise ValueError("Provide either 'document' (single) or 'documents' (multiple), not both")
+        # Prepare form data parameters
+        form_data = {
+            "temperature": temperature,
+            "seed": seed,
+            "store": store,
+        }
+        # Remove None values
+        form_data = {k: v for k, v in form_data.items() if v is not None}
+        # Prepare files for upload
+        files = {}
+        if document:
+            # Convert document to MIMEData if needed
+            mime_document = prepare_mime_document(document)
+            # Single document upload
+            files["document"] = (mime_document.filename, base64.b64decode(mime_document.content), mime_document.mime_type)
+        elif documents:
+            # Multiple documents upload - httpx supports multiple files with same field name using a list
+            files_list = []
+            for doc in documents:
+                # Convert each document to MIMEData if needed
+                mime_doc = prepare_mime_document(doc)
+                files_list.append(
+                    (
+                        "documents",  # field name
+                        (mime_doc.filename, base64.b64decode(mime_doc.content), mime_doc.mime_type),
+                    )
+                )
+            files = files_list
+        url = f"/v1/deployments/extract/{project_id}/{iteration_id}"
+        return PreparedRequest(method="POST", url=url, form_data=form_data, files=files)
+class Deployments(SyncAPIResource, DeploymentsMixin):
+    """Deployments API wrapper for managing deployment configurations"""
+    def __init__(self, client: Any) -> None:
+        super().__init__(client=client)
+    def extract(
+        self,
+        project_id: str,
+        iteration_id: str,
+        document: Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
+        documents: List[Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl] | None = None,
+        temperature: float | None = None,
+        seed: int | None = None,
+        store: bool = True,
+    ) -> RetabParsedChatCompletion:
+        """Extract documents from a deployment.
+        Args:
+            project_id: ID of the project
+            iteration_id: ID of the iteration
+            document: Single document to process (mutually exclusive with documents)
+            documents: List of documents to process (mutually exclusive with document)
+            temperature: Optional temperature override
+            seed: Optional seed for reproducibility
+            store: Whether to store the results
+        Returns:
+            RetabParsedChatCompletion: The processing result
+        """
+        request = self.prepare_extract(project_id=project_id, iteration_id=iteration_id, document=document, documents=documents, temperature=temperature, seed=seed, store=store)
+        response = self._client._prepared_request(request)
+        return RetabParsedChatCompletion.model_validate(response)
+class AsyncDeployments(AsyncAPIResource, DeploymentsMixin):
+    """Async Deployments API wrapper for managing deployment configurations"""
+    def __init__(self, client: Any) -> None:
+        super().__init__(client=client)
+    async def extract(
+        self,
+        project_id: str,
+        iteration_id: str,
+        document: Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
+        documents: List[Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl] | None = None,
+        temperature: float | None = None,
+        seed: int | None = None,
+        store: bool = True,
+    ) -> RetabParsedChatCompletion:
+        """Extract documents from a deployment.
+        Args:
+            project_id: ID of the project
+            iteration_id: ID of the iteration
+            document: Single document to process (mutually exclusive with documents)
+            documents: List of documents to process (mutually exclusive with document)
+            temperature: Optional temperature override
+            seed: Optional seed for reproducibility
+            store: Whether to store the results
+        Returns:
+            RetabParsedChatCompletion: The processing result
+        """
+        request = self.prepare_extract(project_id=project_id, iteration_id=iteration_id, document=document, documents=documents, temperature=temperature, seed=seed, store=store)
+        response = await self._client._prepared_request(request)
+        return RetabParsedChatCompletion.model_validate(response)

retab-0.0.45/retab/resources/projects/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .client import AsyncProjects, Projects
+__all__ = ["Projects", "AsyncProjects"]

retab 0.0.43__tar.gz → 0.0.45__tar.gz

retab 0.0.43tar.gz → 0.0.45tar.gz