PyPI - retab - Versions diffs - 0.0.42__py3-none-any.whl → 0.0.44__py3-none-any.whl - Mend

retab 0.0.42py3-none-any.whl → 0.0.44py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

retab/__init__.py +2 -1
retab/client.py +26 -51
retab/generate_types.py +180 -0
retab/resources/consensus/client.py +1 -1
retab/resources/consensus/responses.py +1 -1
retab/resources/deployments/__init__.py +3 -0
retab/resources/deployments/automations/__init__.py +9 -0
retab/resources/deployments/automations/client.py +244 -0
retab/resources/deployments/automations/endpoints.py +290 -0
retab/resources/deployments/automations/links.py +303 -0
retab/resources/deployments/automations/logs.py +222 -0
retab/resources/deployments/automations/mailboxes.py +423 -0
retab/resources/deployments/automations/outlook.py +377 -0
retab/resources/deployments/automations/tests.py +161 -0
retab/resources/deployments/client.py +148 -0
retab/resources/documents/client.py +94 -68
retab/resources/documents/extractions.py +55 -46
retab/resources/evaluations/__init__.py +2 -2
retab/resources/evaluations/client.py +61 -77
retab/resources/evaluations/documents.py +48 -37
retab/resources/evaluations/iterations.py +58 -40
retab/resources/jsonlUtils.py +3 -4
retab/resources/processors/automations/endpoints.py +49 -39
retab/resources/processors/automations/links.py +52 -43
retab/resources/processors/automations/mailboxes.py +74 -59
retab/resources/processors/automations/outlook.py +104 -82
retab/resources/processors/client.py +35 -30
retab/resources/projects/__init__.py +3 -0
retab/resources/projects/client.py +285 -0
retab/resources/projects/documents.py +244 -0
retab/resources/projects/iterations.py +470 -0
retab/resources/usage.py +2 -0
retab/types/ai_models.py +2 -1
retab/types/deprecated_evals.py +195 -0
retab/types/evaluations/__init__.py +5 -2
retab/types/evaluations/iterations.py +9 -43
retab/types/evaluations/model.py +19 -24
retab/types/extractions.py +1 -0
retab/types/jobs/base.py +1 -1
retab/types/jobs/evaluation.py +1 -1
retab/types/logs.py +5 -6
retab/types/mime.py +1 -10
retab/types/projects/__init__.py +34 -0
retab/types/projects/documents.py +30 -0
retab/types/projects/iterations.py +78 -0
retab/types/projects/model.py +68 -0
retab/types/schemas/enhance.py +22 -5
retab/types/schemas/evaluate.py +2 -2
retab/types/schemas/object.py +27 -25
retab/types/standards.py +2 -2
retab/utils/__init__.py +3 -0
retab/utils/ai_models.py +127 -12
retab/utils/hashing.py +24 -0
retab/utils/json_schema.py +1 -26
retab/utils/mime.py +0 -17
retab/utils/usage/usage.py +0 -1
{retab-0.0.42.dist-info → retab-0.0.44.dist-info}/METADATA +4 -6
{retab-0.0.42.dist-info → retab-0.0.44.dist-info}/RECORD +60 -55
retab/_utils/__init__.py +0 -0
retab/_utils/_model_cards/anthropic.yaml +0 -59
retab/_utils/_model_cards/auto.yaml +0 -43
retab/_utils/_model_cards/gemini.yaml +0 -117
retab/_utils/_model_cards/openai.yaml +0 -301
retab/_utils/_model_cards/xai.yaml +0 -28
retab/_utils/ai_models.py +0 -138
retab/_utils/benchmarking.py +0 -484
retab/_utils/chat.py +0 -327
retab/_utils/display.py +0 -440
retab/_utils/json_schema.py +0 -2156
retab/_utils/mime.py +0 -165
retab/_utils/responses.py +0 -169
retab/_utils/stream_context_managers.py +0 -52
retab/_utils/usage/__init__.py +0 -0
retab/_utils/usage/usage.py +0 -301
{retab-0.0.42.dist-info → retab-0.0.44.dist-info}/WHEEL +0 -0
{retab-0.0.42.dist-info → retab-0.0.44.dist-info}/top_level.txt +0 -0

retab/types/projects/model.py ADDED Viewed

@@ -0,0 +1,68 @@
+import datetime
+from typing import Any, Optional
+import nanoid  # type: ignore
+from pydantic import BaseModel, Field, computed_field
+from ...utils.json_schema import generate_schema_data_id, generate_schema_id
+from ..inference_settings import InferenceSettings
+from .documents import ProjectDocument
+from .iterations import Iteration
+class BaseProject(BaseModel):
+    id: str = Field(default_factory=lambda: "proj_" + nanoid.generate())
+    name: str = Field(default="", description="The name of the project")
+    json_schema: dict[str, Any] = Field(default_factory=dict, description="The json schema of the project")
+    default_inference_settings: InferenceSettings = Field(default=InferenceSettings(), description="The default inference properties for the project.")
+    updated_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(tz=datetime.timezone.utc))
+# Actual Object stored in DB
+class Project(BaseProject):
+    documents: list[ProjectDocument] = Field(default_factory=list)
+    iterations: list[Iteration] = Field(default_factory=list)
+    @computed_field  # type: ignore
+    @property
+    def schema_data_id(self) -> str:
+        """Returns the SHA1 hash of the schema data, ignoring all prompt/description/default fields.
+        Returns:
+            str: A SHA1 hash string representing the schema data version.
+        """
+        return generate_schema_data_id(self.json_schema)
+    # This is a computed field, it is exposed when serializing the object
+    @computed_field  # type: ignore
+    @property
+    def schema_id(self) -> str:
+        """Returns the SHA1 hash of the complete schema.
+        Returns:
+            str: A SHA1 hash string representing the complete schema version.
+        """
+        return generate_schema_id(self.json_schema)
+class ListProjectParams(BaseModel):
+    schema_id: Optional[str] = Field(default=None, description="The ID of the schema")
+    schema_data_id: Optional[str] = Field(default=None, description="The ID of the schema data")
+class CreateProjectRequest(BaseModel):
+    name: str
+    json_schema: dict[str, Any]
+    default_inference_settings: InferenceSettings
+# This is basically the same as BaseProject, but everything is optional.
+# Could be achieved by convert_basemodel_to_partial_basemodel(BaseProject) but we prefer explicitness
+class PatchProjectRequest(BaseModel):
+    name: Optional[str] = Field(default=None, description="The name of the document")
+    json_schema: Optional[dict[str, Any]] = Field(default=None, description="The json schema of the project")
+    default_inference_settings: Optional[InferenceSettings] = Field(default=None, description="The default inference properties for the project (mostly used in the frontend)")
+class AddIterationFromJsonlRequest(BaseModel):
+    jsonl_gcs_path: str

retab/types/schemas/enhance.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from typing import Any, Self, TypedDict
-from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
+from openai.types.chat.chat_completion_reasoning_effort import (
+    ChatCompletionReasoningEffort,
+)
 from pydantic import BaseModel, Field, model_validator
 from ..mime import MIMEData
@@ -9,23 +11,35 @@ from ..browser_canvas import BrowserCanvas
 class EnhanceSchemaConfig(BaseModel):
+    allow_reasoning_fields_added: bool = True  # Whether to allow the llm to add reasoning fields
     allow_field_description_update: bool = False  # Whether to allow the llm to update the description of existing fields
     allow_system_prompt_update: bool = True  # Whether to allow the llm to update the system prompt
-    allow_reasoning_field_toggle: bool = False  # Whether to allow the llm to toggle the reasoning for fields
+    allow_field_simple_type_change: bool = False  # Whether to allow the llm to make simple type changes (optional, string to date, etc.)
+    allow_field_data_structure_breakdown: bool = False  # Whether to allow the llm to make complex data-structure changes (raw diff)
     # Model validator
     @model_validator(mode="after")
     def check_at_least_one_tool_allowed(self) -> Self:
-        if not any([self.allow_field_description_update, self.allow_system_prompt_update, self.allow_reasoning_field_toggle]):
+        if not any(
+            [
+                self.allow_reasoning_fields_added,
+                self.allow_field_description_update,
+                self.allow_system_prompt_update,
+                self.allow_field_simple_type_change,
+                self.allow_field_data_structure_breakdown,
+            ]
+        ):
             raise ValueError("At least one tool must be allowed")
         return self
 # Define a typed Dict for EnhanceSchemaConfig (for now it is kind static, but we will add more flexibility in the future)
 class EnhanceSchemaConfigDict(TypedDict, total=False):
+    allow_reasoning_fields_added: bool
     allow_field_description_update: bool
     allow_system_prompt_update: bool
-    allow_reasoning_field_toggle: bool
+    allow_field_simple_type_change: bool
+    allow_field_data_structure_breakdown: bool
 class EnhanceSchemaRequest(BaseModel):
@@ -48,7 +62,10 @@ class EnhanceSchemaRequest(BaseModel):
     stream: bool = False
     """Whether to stream the response."""
-    tools_config: EnhanceSchemaConfig = Field(default_factory=EnhanceSchemaConfig, description="The configuration for the tools to use")
+    tools_config: EnhanceSchemaConfig = Field(
+        default_factory=EnhanceSchemaConfig,
+        description="The configuration for the tools to use",
+    )
     json_schema: dict[str, Any]
     instructions: str | None = None

retab/types/schemas/evaluate.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Any, Self
 from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
 from pydantic import BaseModel, Field, model_validator
-from ..evals import ItemMetric
+from ..metrics import ItemMetric
 from ..mime import MIMEData
 from ..modalities import Modality
 from ..browser_canvas import BrowserCanvas
@@ -45,7 +45,7 @@ class EvaluateSchemaRequest(BaseModel):
             if len(self.documents) != len(self.ground_truths):
                 raise ValueError("Distance mode requires equal number of documents and ground_truths")
         if len(self.documents) == 0:
-            raise ValueError("Evaluation mode requires at least one document")
+            raise ValueError("Project mode requires at least one document")
         return self

retab/types/schemas/object.py CHANGED Viewed

@@ -233,6 +233,29 @@ When provided with a **JSON schema** and a **document**, you must:
 ---
+## Date and Time Formatting
+When extracting date, time, or datetime values:
+- **Always use ISO format** for dates and times (e.g., "2023-12-25", "14:30:00", "2023-12-25T14:30:00")
+**Examples:**
+```json
+// Correct ISO formats:
+{"date": "2023-12-25"}
+{"time": "14:30:00"}
+{"datetime": "2023-12-25T14:30:00Z"}
+{"datetime_with_tz": "2023-12-25T14:30:00+02:00"}
+// Incorrect formats:
+{"date": "12/25/2023"}
+{"time": "2:30 PM"}
+{"datetime": "Dec 25, 2023 at 2:30 PM"}
+```
+---
 ## Handling Missing and Nullable Fields
 ### Nullable Leaf Attributes
@@ -357,32 +380,11 @@ When performing extraction, explicitly follow these core principles:
 - **Structure Preservation**: Always maintain explicitly the full schema structure, even when entire nested objects lack data (leaf attributes as null).
-## Source Fields
-Some leaf fields require you to explicitly provide the source of the data (verbatim from the document).
-The idea is to simply provide a verbatim quote from the document, without any additional formatting or commentary, keeping it as close as possible to the original text.
-Make sure to reasonably include some surrounding text to provide context about the quote.
-You can easily identify the fields that require a source by the `quote___[attributename]` naming pattern.
-**Example:**
-```json
-{
-  "quote___name": "NAME:\nJohn Doe",
-  "name": "John Doe"
-}
-```
----
-# User Defined System Prompt
-"""
+---"""
     @property
-    def user_system_prompt(self) -> str:
-        return self.json_schema.get("X-SystemPrompt", "")
+    def user_system_prompt(self) -> str | None:
+        return self.json_schema.get("X-SystemPrompt", None)
     @property
     def schema_system_prompt(self) -> str:
@@ -397,7 +399,7 @@ You can easily identify the fields that require a source by the `quote___[attrib
         Returns:
             str: The combined system prompt string.
         """
-        return self.developer_system_prompt + "\n\n" + self.user_system_prompt + "\n\n" + self.schema_system_prompt
+        return self.developer_system_prompt + "\n\n" + (self.user_system_prompt + "\n\n" if self.user_system_prompt else "") + self.schema_system_prompt
     @property
     def title(self) -> str:

retab/types/standards.py CHANGED Viewed

@@ -1,14 +1,14 @@
 from typing import Any, List, Literal, Optional, Tuple, TypeVar, TypedDict
 from pydantic import BaseModel, Field
-from pydantic.fields import _Unset
+from pydantic_core import PydanticUndefined
 # API Standards
 # Define a type variable to represent the content type
 T = TypeVar("T")
-FieldUnset = _Unset
+FieldUnset: Any = PydanticUndefined
 # Define the ErrorDetail model

retab/utils/__init__.py CHANGED Viewed

@@ -0,0 +1,3 @@
+from .json_schema import filter_auxiliary_fields, flatten_dict, unflatten_dict
+__all__ = ["filter_auxiliary_fields", "flatten_dict", "unflatten_dict"]

retab/utils/ai_models.py CHANGED Viewed

@@ -6,6 +6,7 @@ from ..types.ai_models import AIProvider, GeminiModel, OpenAIModel, xAI_Model, R
 MODEL_CARDS_DIR = os.path.join(os.path.dirname(__file__), "_model_cards")
 def merge_model_cards(base: dict, override: dict) -> dict:
     result = base.copy()
     for key, value in override.items():
@@ -17,6 +18,7 @@ def merge_model_cards(base: dict, override: dict) -> dict:
             result[key] = value
     return result
 def load_model_cards(yaml_file: str) -> list[ModelCard]:
     raw_cards = yaml.safe_load(open(yaml_file))
     name_to_card = {c["model"]: c for c in raw_cards if "inherits" not in c}
@@ -31,14 +33,18 @@ def load_model_cards(yaml_file: str) -> list[ModelCard]:
             final_cards.append(ModelCard(**card))
     return final_cards
 # Load all model cards
-model_cards = sum([
-    load_model_cards(os.path.join(MODEL_CARDS_DIR, "openai.yaml")),
-    load_model_cards(os.path.join(MODEL_CARDS_DIR, "anthropic.yaml")),
-    load_model_cards(os.path.join(MODEL_CARDS_DIR, "xai.yaml")),
-    load_model_cards(os.path.join(MODEL_CARDS_DIR, "gemini.yaml")),
-    load_model_cards(os.path.join(MODEL_CARDS_DIR, "auto.yaml")),
-], [])
+model_cards = sum(
+    [
+        load_model_cards(os.path.join(MODEL_CARDS_DIR, "openai.yaml")),
+        load_model_cards(os.path.join(MODEL_CARDS_DIR, "anthropic.yaml")),
+        load_model_cards(os.path.join(MODEL_CARDS_DIR, "xai.yaml")),
+        load_model_cards(os.path.join(MODEL_CARDS_DIR, "gemini.yaml")),
+        load_model_cards(os.path.join(MODEL_CARDS_DIR, "auto.yaml")),
+    ],
+    [],
+)
 model_cards_dict = {card.model: card for card in model_cards}
@@ -108,7 +114,7 @@ def get_provider_for_model(model_id: str) -> AIProvider:
 def assert_valid_model_extraction(model: str) -> None:
-    try:
+    try:
         get_provider_for_model(model)
     except ValueError:
         raise ValueError(
@@ -132,7 +138,116 @@ def assert_valid_model_schema_generation(model: str) -> None:
         return
     else:
         raise ValueError(
-                f"Invalid model format: {model}. Must be either:\n"
-                f"1. A standard model: {get_args(OpenAIModel)}\n"
-                f"2. A fine-tuned model in format 'base_model:id' where base_model is one of the standard openai models"
-            ) from None
+            f"Invalid model format: {model}. Must be either:\n"
+            f"1. A standard model: {get_args(OpenAIModel)}\n"
+            f"2. A fine-tuned model in format 'base_model:id' where base_model is one of the standard openai models"
+        ) from None
+def get_model_credits(model: str) -> float:
+    """
+    Get the credit cost for a given model based on its capabilities and size.
+    Credit tiers:
+    - 0.1 credits: Micro/nano models (fastest, cheapest)
+    - 0.5 credits: Small/mini models (balanced performance)
+    - 2.0 credits: Large/advanced models (highest capability)
+    Args:
+        model: The model name to look up
+    Returns:
+        The credit cost for the model
+    Raises:
+        ValueError: If no model card is found for the specified model
+    """
+    try:
+        model_card = get_model_card(model)
+        model_name = get_model_from_model_id(model)
+    except ValueError:
+        # Unknown model, return 0 credits (no billing)
+        return 0.0
+    # Define credit mapping based on model capabilities and naming patterns
+    model_credits = {
+        # 0.1 credit models - Micro/Nano tier (fastest, most efficient)
+        "auto-micro": 0.1,
+        "gemini-flash-lite": 0.1,
+        "gpt-4o-mini": 0.1,
+        "gpt-3.5-turbo": 0.1,
+        "gpt-4.1-nano": 0.1,  # Future model
+        # 0.5 credit models - Small/Mini tier (balanced performance)
+        "auto-small": 0.5,
+        "gemini-flash": 0.5,
+        "gpt-4o": 0.5,
+        "gpt-4-turbo": 0.5,
+        "gpt-4.1-mini": 0.5,  # Future model
+        "claude-3-haiku": 0.5,
+        "claude-3.5-haiku": 0.5,
+        # 2.0 credit models - Large/Advanced tier (highest capability)
+        "auto-large": 2.0,
+        "gemini-pro": 2.0,
+        "gpt-4": 2.0,
+        "gpt-4.1": 2.0,  # Future model
+        "o1-mini": 2.0,
+        "o1-preview": 2.0,
+        "o3": 5.0,  # Future model
+        "claude-3-sonnet": 2.0,
+        "claude-3-opus": 2.0,
+        "claude-3.5-sonnet": 2.0,
+        "grok-beta": 2.0,
+        "grok-2": 2.0,
+        # Special reasoning models - Higher tier
+        "o1": 3.0,
+        "o3-max": 3.0,  # Future model, highest tier
+    }
+    # Return the credits for the specific model
+    if model_name in model_credits:
+        return model_credits[model_name]
+    # Fallback logic based on model patterns and capabilities
+    model_lower = model_name.lower()
+    # Auto-model fallback logic
+    if model_lower.startswith("auto-"):
+        if "micro" in model_lower or "nano" in model_lower:
+            return 0.1
+        elif "small" in model_lower or "mini" in model_lower:
+            return 0.5
+        elif "large" in model_lower or "pro" in model_lower:
+            return 2.0
+    # Gemini model fallback logic
+    if "gemini" in model_lower:
+        if "lite" in model_lower or "nano" in model_lower:
+            return 0.1
+        elif "flash" in model_lower:
+            return 0.5
+        elif "pro" in model_lower or "ultra" in model_lower:
+            return 2.0
+    # GPT model fallback logic
+    if "gpt" in model_lower:
+        if "mini" in model_lower or "3.5" in model_lower:
+            return 0.1
+        elif "4o" in model_lower and "mini" not in model_lower:
+            return 0.5
+        elif "4" in model_lower or "o1" in model_lower:
+            return 2.0
+    # Claude model fallback logic
+    if "claude" in model_lower:
+        if "haiku" in model_lower:
+            return 0.5
+        elif "sonnet" in model_lower or "opus" in model_lower:
+            return 2.0
+    # Default for unknown models - use model card info if available
+    try:
+        # Try to determine based on model card properties
+        # This could be enhanced based on the actual ModelCard structure
+        return 1.0  # Default middle tier
+    except:
+        return 0.0  # No billing for completely unknown models

retab/utils/hashing.py ADDED Viewed

@@ -0,0 +1,24 @@
+import base64
+import hashlib
+import json
+from fastapi.encoders import jsonable_encoder
+# ************* Generalistic utils *************
+def generate_blake2b_hash_from_bytes(bytes_: bytes) -> str:
+    return hashlib.blake2b(bytes_, digest_size=8).hexdigest()
+def generate_blake2b_hash_from_base64(base64_string: str) -> str:
+    return generate_blake2b_hash_from_bytes(base64.b64decode(base64_string))
+def generate_blake2b_hash_from_string(input_string: str) -> str:
+    return generate_blake2b_hash_from_bytes(input_string.encode("utf-8"))
+def generate_blake2b_hash_from_dict(input_dict: dict) -> str:
+    jsonable_dict = jsonable_encoder(input_dict)
+    return generate_blake2b_hash_from_string(json.dumps(jsonable_dict, sort_keys=True).strip())

retab/utils/json_schema.py CHANGED Viewed

@@ -15,7 +15,7 @@ from pydantic import BaseModel, BeforeValidator, Field, create_model
 from pydantic.config import ConfigDict
 from ..types.schemas.layout import Column, FieldItem, Layout, RefObject, Row, RowList
-from .mime import generate_blake2b_hash_from_string
+from .hashing import generate_blake2b_hash_from_string
 # **** Validation Functions ****
@@ -2091,31 +2091,6 @@ def sanitize(instance: Any, schema: dict[str, Any]) -> Any:
     return __sanitize_instance(instance, expanded_schema)
-def compute_schema_data_id(json_schema: dict[str, Any]) -> str:
-    """Returns the schema_data_id for a given JSON schema.
-    The schema_data_id is a hash of the schema data, ignoring all prompt/description/default fields
-    and other non-structural metadata.
-    Args:
-        json_schema: The JSON schema to compute the ID for
-    Returns:
-        str: A hash string representing the schema data version with "sch_data_id_" prefix
-    """
-    return "sch_data_id_" + generate_blake2b_hash_from_string(
-        json.dumps(
-            clean_schema(
-                copy.deepcopy(json_schema),
-                remove_custom_fields=True,
-                fields_to_remove=["description", "default", "title", "required", "examples", "deprecated", "readOnly", "writeOnly"],
-            ),
-            sort_keys=True,
-        ).strip()
-    )
 def validate_json_against_schema(
     data: Any,
     schema: dict[str, Any],

retab/utils/mime.py CHANGED Viewed

@@ -16,23 +16,6 @@ from ..types.modalities import SUPPORTED_TYPES
 T = TypeVar("T")
-def generate_blake2b_hash_from_bytes(bytes_: bytes) -> str:
-    return hashlib.blake2b(bytes_, digest_size=8).hexdigest()
-def generate_blake2b_hash_from_base64(base64_string: str) -> str:
-    return generate_blake2b_hash_from_bytes(base64.b64decode(base64_string))
-def generate_blake2b_hash_from_string(input_string: str) -> str:
-    return generate_blake2b_hash_from_bytes(input_string.encode("utf-8"))
-def generate_blake2b_hash_from_dict(input_dict: dict) -> str:
-    return generate_blake2b_hash_from_string(json.dumps(input_dict, sort_keys=True).strip())
 def convert_pil_image_to_mime_data(image: PIL.Image.Image) -> MIMEData:
     """Convert a PIL Image object to a MIMEData object.

retab/utils/usage/usage.py CHANGED Viewed

@@ -117,7 +117,6 @@ class CompletionsUsage(BaseModel):
     input_audio_tokens: int = Field(description="The aggregated number of audio input tokens used, including cached tokens.")
     output_audio_tokens: int = Field(description="The aggregated number of audio output tokens used.")
     num_model_requests: int = Field(description="The count of requests made to the model.")
-    project_id: Optional[str] = Field(default=None, description="When group_by=project_id, this field provides the project ID of the grouped usage result.")
     user_id: Optional[str] = Field(default=None, description="When group_by=user_id, this field provides the user ID of the grouped usage result.")
     api_key_id: Optional[str] = Field(default=None, description="When group_by=api_key_id, this field provides the API key ID of the grouped usage result.")
     model: Optional[str] = Field(default=None, description="When group_by=model, this field provides the model name of the grouped usage result.")

{retab-0.0.42.dist-info → retab-0.0.44.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: retab
-Version: 0.0.42
+Version: 0.0.44
 Summary: Retab official python library
 Home-page: https://github.com/Retab-dev/retab
 Author: Retab
@@ -61,13 +61,13 @@ Made with love by the team at [Retab](https://retab.com) 🤍.
 ### What is Retab?
-Retab solves all the major challenges in document processing with LLMs:
+Retab solves all the major challenges in document processing with Large Language Models:
 1. **Universal Document Preprocessing**: Convert any file type (PDFs, Excel, emails, etc.) into LLM-ready format without writing custom parsers
 2. **Structured, Schema-driven Extraction**: Get consistent, reliable outputs using schema-based prompt engineering
 3. **Processors**: Publish a live, stable, shareable document processor.
 4. **Automations**: Create document processing workflows that can be triggered by events (mailbox, upload link, endpoint, outlook plugin).
-5. **Evaluations**: Evaluate the performance of models against annotated datasets
+5. **Projects**: Evaluate the performance of models against annotated datasets
 6. **Optimizations**: Identify the most used processors and help you finetune models to reduce costs and improve performance
 We are offering you all the software-defined primitives to build your own document processing solutions. We see it as **Stripe** for document processing.
@@ -90,7 +90,7 @@ Many people haven't yet realized how powerful LLMs have become at document proce
 ## Code examples
-## You can check our Github repository to see code examples: [python examples](https://github.com/Retab-dev/retab/tree/main/examples) and [jupyter notebooks](https://github.com/Retab-dev/retab-nodejs/tree/main/notebooks).
+You can check our Github repository to see code examples: [python examples](https://github.com/Retab-dev/retab/tree/main/examples) and [jupyter notebooks](https://github.com/Retab-dev/retab-nodejs/tree/main/notebooks).
 ## Community
@@ -112,8 +112,6 @@ We share our roadmap publicly on [Github](https://github.com/Retab-dev/retab)
 Among the features we're working on:
 * [ ] Node.js SDK
-* [ ] Low-level speed optimizations for Evals Frontend
 * [ ] Schema optimization autopilot
 * [ ] Sources API
-* [ ] Parse API for RAG

retab 0.0.42__py3-none-any.whl → 0.0.44__py3-none-any.whl

retab 0.0.42py3-none-any.whl → 0.0.44py3-none-any.whl