PyPI - retab - Versions diffs - 0.0.36__py3-none-any.whl → 0.0.37__py3-none-any.whl - Mend

retab 0.0.36py3-none-any.whl → 0.0.37py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

{uiform → retab}/_utils/ai_models.py +2 -2
{uiform → retab}/_utils/benchmarking.py +15 -16
{uiform → retab}/_utils/chat.py +9 -14
{uiform → retab}/_utils/display.py +0 -3
{uiform → retab}/_utils/json_schema.py +9 -14
{uiform → retab}/_utils/mime.py +11 -14
{uiform → retab}/_utils/responses.py +9 -3
{uiform → retab}/_utils/stream_context_managers.py +1 -1
{uiform → retab}/_utils/usage/usage.py +28 -28
{uiform → retab}/client.py +32 -31
{uiform → retab}/resources/consensus/client.py +17 -36
{uiform → retab}/resources/consensus/completions.py +24 -47
{uiform → retab}/resources/consensus/completions_stream.py +26 -38
{uiform → retab}/resources/consensus/responses.py +31 -80
{uiform → retab}/resources/consensus/responses_stream.py +31 -79
{uiform → retab}/resources/documents/client.py +59 -45
{uiform → retab}/resources/documents/extractions.py +181 -90
{uiform → retab}/resources/evals.py +56 -43
retab/resources/evaluations/__init__.py +3 -0
retab/resources/evaluations/client.py +301 -0
retab/resources/evaluations/documents.py +233 -0
retab/resources/evaluations/iterations.py +452 -0
{uiform → retab}/resources/files.py +2 -2
{uiform → retab}/resources/jsonlUtils.py +220 -216
retab/resources/models.py +73 -0
retab/resources/processors/automations/client.py +244 -0
{uiform → retab}/resources/processors/automations/endpoints.py +77 -118
retab/resources/processors/automations/links.py +294 -0
{uiform → retab}/resources/processors/automations/logs.py +30 -19
{uiform → retab}/resources/processors/automations/mailboxes.py +136 -174
retab/resources/processors/automations/outlook.py +337 -0
{uiform → retab}/resources/processors/automations/tests.py +22 -25
{uiform → retab}/resources/processors/client.py +179 -164
{uiform → retab}/resources/schemas.py +78 -66
{uiform → retab}/resources/secrets/external_api_keys.py +1 -5
retab/resources/secrets/webhook.py +64 -0
{uiform → retab}/resources/usage.py +39 -2
{uiform → retab}/types/ai_models.py +13 -13
{uiform → retab}/types/automations/cron.py +19 -12
{uiform → retab}/types/automations/endpoints.py +7 -4
{uiform → retab}/types/automations/links.py +7 -3
{uiform → retab}/types/automations/mailboxes.py +9 -9
{uiform → retab}/types/automations/outlook.py +15 -11
retab/types/browser_canvas.py +3 -0
{uiform → retab}/types/chat.py +2 -2
{uiform → retab}/types/completions.py +9 -12
retab/types/consensus.py +19 -0
{uiform → retab}/types/db/annotations.py +3 -3
{uiform → retab}/types/db/files.py +8 -6
{uiform → retab}/types/documents/create_messages.py +18 -20
{uiform → retab}/types/documents/extractions.py +69 -24
{uiform → retab}/types/evals.py +5 -5
retab/types/evaluations/__init__.py +31 -0
retab/types/evaluations/documents.py +30 -0
retab/types/evaluations/iterations.py +112 -0
retab/types/evaluations/model.py +73 -0
retab/types/events.py +79 -0
{uiform → retab}/types/extractions.py +33 -10
retab/types/inference_settings.py +15 -0
retab/types/jobs/base.py +54 -0
retab/types/jobs/batch_annotation.py +12 -0
{uiform → retab}/types/jobs/evaluation.py +1 -2
{uiform → retab}/types/logs.py +37 -34
retab/types/metrics.py +32 -0
{uiform → retab}/types/mime.py +22 -20
{uiform → retab}/types/modalities.py +10 -10
retab/types/predictions.py +19 -0
{uiform → retab}/types/schemas/enhance.py +4 -2
{uiform → retab}/types/schemas/evaluate.py +7 -4
{uiform → retab}/types/schemas/generate.py +6 -3
{uiform → retab}/types/schemas/layout.py +1 -1
{uiform → retab}/types/schemas/object.py +13 -14
{uiform → retab}/types/schemas/templates.py +1 -3
{uiform → retab}/types/secrets/external_api_keys.py +0 -1
{uiform → retab}/types/standards.py +18 -1
{retab-0.0.36.dist-info → retab-0.0.37.dist-info}/METADATA +7 -6
retab-0.0.37.dist-info/RECORD +107 -0
retab-0.0.37.dist-info/top_level.txt +1 -0
retab-0.0.36.dist-info/RECORD +0 -96
retab-0.0.36.dist-info/top_level.txt +0 -1
uiform/_utils/benchmarking copy.py +0 -588
uiform/resources/models.py +0 -45
uiform/resources/processors/automations/client.py +0 -78
uiform/resources/processors/automations/links.py +0 -356
uiform/resources/processors/automations/outlook.py +0 -444
uiform/resources/secrets/webhook.py +0 -62
uiform/types/consensus.py +0 -10
uiform/types/events.py +0 -76
uiform/types/jobs/base.py +0 -150
uiform/types/jobs/batch_annotation.py +0 -22
{uiform → retab}/__init__.py +0 -0
{uiform → retab}/_resource.py +0 -0
{uiform → retab}/_utils/__init__.py +0 -0
{uiform → retab}/_utils/usage/__init__.py +0 -0
{uiform → retab}/py.typed +0 -0
{uiform → retab}/resources/__init__.py +0 -0
{uiform → retab}/resources/consensus/__init__.py +0 -0
{uiform → retab}/resources/documents/__init__.py +0 -0
{uiform → retab}/resources/finetuning.py +0 -0
{uiform → retab}/resources/openai_example.py +0 -0
{uiform → retab}/resources/processors/__init__.py +0 -0
{uiform → retab}/resources/processors/automations/__init__.py +0 -0
{uiform → retab}/resources/prompt_optimization.py +0 -0
{uiform → retab}/resources/secrets/__init__.py +0 -0
{uiform → retab}/resources/secrets/client.py +0 -0
{uiform → retab}/types/__init__.py +0 -0
{uiform → retab}/types/automations/__init__.py +0 -0
{uiform → retab}/types/automations/webhooks.py +0 -0
{uiform → retab}/types/db/__init__.py +0 -0
{uiform → retab}/types/documents/__init__.py +0 -0
{uiform → retab}/types/documents/correct_orientation.py +0 -0
{uiform → retab}/types/jobs/__init__.py +0 -0
{uiform → retab}/types/jobs/finetune.py +0 -0
{uiform → retab}/types/jobs/prompt_optimization.py +0 -0
{uiform → retab}/types/jobs/webcrawl.py +0 -0
{uiform → retab}/types/pagination.py +0 -0
{uiform → retab}/types/schemas/__init__.py +0 -0
{uiform → retab}/types/secrets/__init__.py +0 -0
{retab-0.0.36.dist-info → retab-0.0.37.dist-info}/WHEEL +0 -0

{uiform → retab}/types/automations/outlook.py RENAMED Viewed

@@ -1,10 +1,9 @@
 import re
-from typing import Any, Dict, List, Literal, Optional
+from typing import Any, Dict, List, Optional
 import nanoid  # type: ignore
-from pydantic import BaseModel, EmailStr, Field, field_validator, model_validator
+from pydantic import BaseModel, EmailStr, Field, computed_field, field_validator
-from ..._utils.json_schema import convert_schema_to_layout
 from ..logs import AutomationConfig, UpdateAutomationRequest
 from ..pagination import ListMetadata
@@ -29,7 +28,11 @@ class FetchParams(BaseModel):
 class Outlook(AutomationConfig):
-    object: Literal['automation.outlook'] = "automation.outlook"
+    @computed_field
+    @property
+    def object(self) -> str:
+        return "automation.outlook"
     id: str = Field(default_factory=lambda: "outlook_" + nanoid.generate(), description="Unique identifier for the outlook")
     authorized_domains: list[str] = Field(default_factory=list, description="List of authorized domains to receive the emails from")
@@ -41,12 +44,13 @@ class Outlook(AutomationConfig):
     match_params: List[MatchParams] = Field(default_factory=list, description="List of match parameters for the outlook automation")
     fetch_params: List[FetchParams] = Field(default_factory=list, description="List of fetch parameters for the outlook automation")
-    @model_validator(mode='before')
-    @classmethod
-    def compute_layout_schema(cls, values: dict[str, Any]) -> dict[str, Any]:
-        if values.get('layout_schema') is None:
-            values['layout_schema'] = convert_schema_to_layout(values['json_schema'])
-        return values
+    # @model_validator(mode="before")
+    # @classmethod
+    # def compute_layout_schema(cls, values: dict[str, Any]) -> dict[str, Any]:
+    #     if values.get("layout_schema") is None:
+    #         values["layout_schema"] = convert_schema_to_layout(values["json_schema"])
+    #     return values
 class ListOutlooks(BaseModel):
     data: list[Outlook]
@@ -65,4 +69,4 @@ class UpdateOutlookRequest(UpdateAutomationRequest):
     @field_validator("authorized_emails", mode="before")
     def normalize_authorized_emails(cls, emails: Optional[List[str]]) -> Optional[List[str]]:
-        return [email.strip().lower() for email in emails] if emails else None
+        return [email.strip().lower() for email in emails] if emails else None

retab/types/browser_canvas.py ADDED Viewed

@@ -0,0 +1,3 @@
+from typing import Literal
+BrowserCanvas = Literal["A3", "A4", "A5"]

{uiform → retab}/types/chat.py RENAMED Viewed

@@ -1,8 +1,8 @@
-from typing import Iterable, Literal, TypedDict, Union
+from typing import Literal, TypedDict, Union
 from openai.types.chat.chat_completion_content_part_param import ChatCompletionContentPartParam
 class ChatCompletionUiformMessage(TypedDict):  # homemade replacement for ChatCompletionMessageParam because iterable messes the serialization with pydantic
-    role: Literal['user', 'system', 'assistant', 'developer']
+    role: Literal["user", "system", "assistant", "developer"]
     content: Union[str, list[ChatCompletionContentPartParam]]

{uiform → retab}/types/completions.py RENAMED Viewed

@@ -1,6 +1,10 @@
-from typing import Any
+from typing import Any, Optional, Union
 from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
+from openai.types.responses.response_input_param import ResponseInputParam
+from openai.types.responses.response_text_config_param import ResponseTextConfigParam
+from openai.types.shared_params.reasoning import Reasoning
+from openai.types.shared_params.response_format_json_schema import ResponseFormatJSONSchema
 from pydantic import BaseModel, ConfigDict, Field
 from .._utils.ai_models import find_provider_from_model
@@ -8,8 +12,6 @@ from .ai_models import AIProvider
 from .chat import ChatCompletionUiformMessage
-from openai.types.shared_params.response_format_json_schema import ResponseFormatJSONSchema
 class UiChatCompletionsRequest(BaseModel):
     model_config = ConfigDict(arbitrary_types_allowed=True)
     model: str = Field(..., description="Model used for chat completion")
@@ -35,8 +37,6 @@ class UiChatCompletionsRequest(BaseModel):
         return find_provider_from_model(self.model)
 class UiChatCompletionsParseRequest(BaseModel):
     model_config = ConfigDict(arbitrary_types_allowed=True)
     model: str = Field(..., description="Model used for chat completion")
@@ -61,10 +61,6 @@ class UiChatCompletionsParseRequest(BaseModel):
         """
         return find_provider_from_model(self.model)
-from typing import Optional, Union
-from openai.types.shared_params.reasoning import Reasoning
-from openai.types.responses.response_input_param import ResponseInputParam
-from openai.types.responses.response_text_config_param import ResponseTextConfigParam
 class UiChatResponseCreateRequest(BaseModel):
     input: Union[str, ResponseInputParam] = Field(..., description="Input to be parsed")
@@ -73,8 +69,10 @@ class UiChatResponseCreateRequest(BaseModel):
     model_config = ConfigDict(arbitrary_types_allowed=True)
     model: str = Field(..., description="Model used for chat completion")
     temperature: Optional[float] = Field(default=0.0, description="Temperature for sampling. If not provided, the default temperature for the model will be used.", examples=[0.0])
-    reasoning: Optional[Reasoning] = Field(default=None, description="The effort level for the model to reason about the input data. If not provided, the default reasoning effort for the model will be used.")
+    reasoning: Optional[Reasoning] = Field(
+        default=None, description="The effort level for the model to reason about the input data. If not provided, the default reasoning effort for the model will be used."
+    )
     stream: Optional[bool] = Field(default=False, description="If true, the extraction will be streamed to the user using the active WebSocket connection")
     seed: int | None = Field(default=None, description="Seed for the random number generator. If not provided, a random seed will be generated.", examples=[None])
     text: ResponseTextConfigParam = Field(default={"format": {"type": "text"}}, description="Format of the response")
@@ -90,4 +88,3 @@ class UiChatResponseCreateRequest(BaseModel):
             AIProvider: The AI provider corresponding to the given model.
         """
         return find_provider_from_model(self.model)

retab/types/consensus.py ADDED Viewed

@@ -0,0 +1,19 @@
+from typing import Any, Literal, Optional
+from pydantic import BaseModel, Field
+class ReconciliationRequest(BaseModel):
+    list_dicts: list[dict] = Field(description="List of dictionaries that will be reconciled into a single consensus dictionary.")
+    reference_schema: Optional[dict[str, Any]] = Field(
+        default=None,
+        description="Optional schema defining the structure and types of the dictionary fields to validate the list of dictionaries against. Raise an error if one of the dictionaries does not match the schema.",
+    )
+    mode: Literal["direct", "aligned"] = Field(
+        default="direct",
+        description="The mode to use for the consensus. If 'direct', the consensus is computed directly from the list of dictionaries. If 'aligned', the consensus is computed from the aligned dictionaries.",
+    )
+class ReconciliationResponse(BaseModel):
+    consensus_dict: dict = Field(description="The consensus dictionary containing the reconciled values from the input dictionaries.")
+    likelihoods: dict = Field(description="A dictionary containing the likelihood/confidence scores for each field in the consensus dictionary.")

{uiform → retab}/types/db/annotations.py RENAMED Viewed

@@ -1,17 +1,17 @@
 import datetime
-from typing import Any, Dict, Literal
+from typing import Any, Dict
-import nanoid  # type: ignore
 from pydantic import BaseModel, Field
 from ..modalities import Modality
+from ..browser_canvas import BrowserCanvas
 class AnnotationParameters(BaseModel):
     model: str
     modality: Modality | None = "native"
     image_resolution_dpi: int = 96
-    browser_canvas: Literal['A3', 'A4', 'A5'] = 'A4'
+    browser_canvas: BrowserCanvas = "A4"
     temperature: float = 0.0

{uiform → retab}/types/db/files.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import mimetypes
-from typing import BinaryIO, Literal, Tuple
+from typing import Any, BinaryIO, Literal, Tuple
-from pydantic import BaseModel, ConfigDict, Field, HttpUrl, field_serializer
+from pydantic import BaseModel, ConfigDict, Field, HttpUrl, field_validator
 class DBFile(BaseModel):
@@ -27,10 +27,12 @@ FileTuple = Tuple[str, FileData]
 class FileLink(BaseModel):
-    download_url: HttpUrl = Field(description="The signed URL to download the file")
+    download_url: str = Field(description="The signed URL to download the file")
     expires_in: str = Field(description="The expiration time of the signed URL")
     filename: str = Field(description="The name of the file")
-    @field_serializer('download_url')
-    def url2str(self, val: HttpUrl) -> str:
-        return str(val)
+    @field_validator("download_url", mode="after")
+    def validate_httpurl(cls, val: Any) -> Any:
+        if isinstance(val, str):
+            HttpUrl(val)
+        return val

{uiform → retab}/types/documents/create_messages.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import base64
 from io import BytesIO
-from typing import List, Literal, Dict, Union
+from typing import Any, List, Literal
 import PIL.Image
 import requests
@@ -12,11 +12,12 @@ from pydantic import BaseModel, Field, computed_field
 from ..._utils.chat import convert_to_anthropic_format, convert_to_google_genai_format, str_messages
 from ..._utils.chat import convert_to_openai_format as convert_to_openai_completions_api_format
+from ..._utils.display import count_image_tokens, count_text_tokens
 from ..._utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
-from ..._utils.display import count_text_tokens, count_image_tokens
 from ..chat import ChatCompletionUiformMessage
 from ..mime import MIMEData
 from ..modalities import Modality
+from ..browser_canvas import BrowserCanvas
 MediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
@@ -26,19 +27,20 @@ class TokenCount(BaseModel):
     developer_tokens: int = 0
     user_tokens: int = 0
 class DocumentCreateMessageRequest(BaseModel):
     document: MIMEData = Field(description="The document to load.")
     modality: Modality = Field(description="The modality of the document to load.")
     image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
-    browser_canvas: Literal['A3', 'A4', 'A5'] = Field(default='A4', description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type.")
+    browser_canvas: BrowserCanvas = Field(
+        default="A4", description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
+    )
-from typing import Any
 class DocumentCreateInputRequest(DocumentCreateMessageRequest):
     json_schema: dict[str, Any] = Field(description="The json schema to use for the document.")
 class DocumentMessage(BaseModel):
     id: str = Field(description="A unique identifier for the document loading.")
     object: Literal["document_message"] = Field(default="document_message", description="The type of object being loaded.")
@@ -49,21 +51,21 @@ class DocumentMessage(BaseModel):
     @computed_field
     def token_count(self) -> TokenCount:
         """Returns the token count for the document message.
         This property calculates token usage based on both text and image content
         in the messages using the token counting utilities.
         Returns:
             TokenCount: A Pydantic model with total, user, and developer token counts.
         """
         total_tokens = 0
         user_tokens = 0
         developer_tokens = 0
         for msg in self.messages:
             role = msg.get("role", "user")
             msg_tokens = 0
             if isinstance(msg["content"], str):
                 msg_tokens = count_text_tokens(msg["content"])
             elif isinstance(msg["content"], list):
@@ -72,30 +74,26 @@ class DocumentMessage(BaseModel):
                         msg_tokens += count_text_tokens(content_item)
                     elif isinstance(content_item, dict):
                         item_type = content_item.get("type")
                         if item_type == "text" and "text" in content_item:
                             msg_tokens += count_text_tokens(content_item["text"])
                         elif item_type == "image_url" and "image_url" in content_item:
                             image_url = content_item["image_url"]["url"]
                             detail = content_item["image_url"].get("detail", "high")
                             msg_tokens += count_image_tokens(image_url, detail)
             # Update total tokens
             total_tokens += msg_tokens
             # Update role-specific counts
             assert role in ["user", "developer"], f"Invalid role: {role}"
             if role == "user":
                 user_tokens += msg_tokens
             elif role == "developer":
                 developer_tokens += msg_tokens
-        return TokenCount(
-            total_tokens=total_tokens,
-            user_tokens=user_tokens,
-            developer_tokens=developer_tokens
-        )
+        return TokenCount(total_tokens=total_tokens, user_tokens=user_tokens, developer_tokens=developer_tokens)
     @property
     def items(self) -> list[str | PIL.Image.Image]:

{uiform → retab}/types/documents/extractions.py RENAMED Viewed

@@ -15,24 +15,23 @@ from openai.types.responses.response import Response
 from openai.types.responses.response_input_param import ResponseInputItemParam
 from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, computed_field, field_validator, model_validator
-from ..._utils.usage.usage import compute_cost_from_model, compute_cost_from_model_with_breakdown, CostBreakdown
-from ..._utils.ai_models import find_provider_from_model
-from ..ai_models import AIProvider, Amount, get_model_card
+from ..._utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
+from ..ai_models import Amount
 from ..chat import ChatCompletionUiformMessage
 from ..mime import MIMEData
 from ..modalities import Modality
+from ..browser_canvas import BrowserCanvas
 from ..standards import ErrorDetail, StreamingBaseModel
 class DocumentExtractRequest(BaseModel):
     model_config = ConfigDict(arbitrary_types_allowed=True)
-    document: MIMEData = Field(..., description="Document to be analyzed")
+    document: MIMEData = Field(default=None, description="Document to be analyzed", deprecated=True)  # type: ignore
+    documents: list[MIMEData] = Field(..., description="Documents to be analyzed (preferred over document)")
     modality: Modality
     image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
-    browser_canvas: Literal['A3', 'A4', 'A5'] = Field(
-        default='A4', description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
+    browser_canvas: BrowserCanvas = Field(
+        default="A4", description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
     )
     model: str = Field(..., description="Model used for chat completion")
     json_schema: dict[str, Any] = Field(..., description="JSON schema format used to validate the output data.")
@@ -54,6 +53,28 @@ class DocumentExtractRequest(BaseModel):
             raise ValueError("n_consensus greater than 1 but temperature is 0")
         return v
+    @model_validator(mode="before")
+    def validate_document_or_documents(cls, data: Any) -> Any:
+        # Handle both dict and model instance cases
+        if isinstance(data, dict):
+            if data.get("documents"):  # If documents is set, it has higher priority than document
+                data["document"] = data["documents"][0]
+            elif data.get("document"):
+                data["documents"] = [data["document"]]
+            else:
+                raise ValueError("document or documents must be provided")
+        else:
+            # Handle model instance case
+            document = getattr(data, "document", None)
+            documents = getattr(data, "documents", None)
+            if documents:
+                setattr(data, "document", documents[0])
+            elif document:
+                setattr(data, "documents", [document])
+            else:
+                raise ValueError("document or documents must be provided")
+        return data
 class ConsensusModel(BaseModel):
     model: str = Field(description="Model name")
@@ -87,7 +108,7 @@ LikelihoodsSource = Literal["consensus", "log_probs"]
 class UiParsedChatCompletion(ParsedChatCompletion):
     extraction_id: str | None = None
-    choices: list[UiParsedChoice]
+    choices: list[UiParsedChoice]  # type: ignore
     # Additional metadata fields (UIForm)
     likelihoods: Optional[dict[str, Any]] = Field(
         default=None, description="Object defining the uncertainties of the fields extracted when using consensus. Follows the same structure as the extraction object."
@@ -147,19 +168,43 @@ class LogExtractionRequest(BaseModel):
     # Validate that at least one of the messages, openai_messages, anthropic_messages is provided using model_validator
     @model_validator(mode="before")
     def validation(cls, data: Any) -> Any:
-        messages_candidates = [data.get("messages"), data.get("openai_messages"), data.get("anthropic_messages"), data.get("openai_responses_input")]
-        messages_candidates = [candidate for candidate in messages_candidates if candidate is not None]
-        if len(messages_candidates) != 1:
-            raise ValueError("Exactly one of the messages, openai_messages, anthropic_messages, openai_responses_input must be provided")
-        # Validate that if anthropic_messages is provided, anthropic_system_prompt is also provided
-        if data.get("anthropic_messages") is not None and data.get("anthropic_system_prompt") is None:
-            raise ValueError("anthropic_system_prompt must be provided if anthropic_messages is provided")
-        completion_candidates = [data.get("completion"), data.get("openai_responses_output")]
-        completion_candidates = [candidate for candidate in completion_candidates if candidate is not None]
-        if len(completion_candidates) != 1:
-            raise ValueError("Exactly one of completion, openai_responses_output must be provided")
+        # Handle both dict and model instance cases
+        if isinstance(data, dict):
+            messages_candidates = [data.get("messages"), data.get("openai_messages"), data.get("anthropic_messages"), data.get("openai_responses_input")]
+            messages_candidates = [candidate for candidate in messages_candidates if candidate is not None]
+            if len(messages_candidates) != 1:
+                raise ValueError("Exactly one of the messages, openai_messages, anthropic_messages, openai_responses_input must be provided")
+            # Validate that if anthropic_messages is provided, anthropic_system_prompt is also provided
+            if data.get("anthropic_messages") is not None and data.get("anthropic_system_prompt") is None:
+                raise ValueError("anthropic_system_prompt must be provided if anthropic_messages is provided")
+            completion_candidates = [data.get("completion"), data.get("openai_responses_output")]
+            completion_candidates = [candidate for candidate in completion_candidates if candidate is not None]
+            if len(completion_candidates) != 1:
+                raise ValueError("Exactly one of completion, openai_responses_output must be provided")
+        else:
+            # Handle model instance case
+            messages_candidates = [
+                getattr(data, "messages", None),
+                getattr(data, "openai_messages", None),
+                getattr(data, "anthropic_messages", None),
+                getattr(data, "openai_responses_input", None),
+            ]
+            messages_candidates = [candidate for candidate in messages_candidates if candidate is not None]
+            if len(messages_candidates) != 1:
+                raise ValueError("Exactly one of the messages, openai_messages, anthropic_messages, openai_responses_input must be provided")
+            # Validate that if anthropic_messages is provided, anthropic_system_prompt is also provided
+            anthropic_messages = getattr(data, "anthropic_messages", None)
+            anthropic_system_prompt = getattr(data, "anthropic_system_prompt", None)
+            if anthropic_messages is not None and anthropic_system_prompt is None:
+                raise ValueError("anthropic_system_prompt must be provided if anthropic_messages is provided")
+            completion_candidates = [getattr(data, "completion", None), getattr(data, "openai_responses_output", None)]
+            completion_candidates = [candidate for candidate in completion_candidates if candidate is not None]
+            if len(completion_candidates) != 1:
+                raise ValueError("Exactly one of completion, openai_responses_output must be provided")
         return data
@@ -192,12 +237,12 @@ class UiParsedChoiceDeltaChunk(ChoiceDeltaChunk):
 class UiParsedChoiceChunk(ChoiceChunk):
-    delta: UiParsedChoiceDeltaChunk
+    delta: UiParsedChoiceDeltaChunk  # type: ignore
 class UiParsedChatCompletionChunk(StreamingBaseModel, ChatCompletionChunk):
     extraction_id: str | None = None
-    choices: list[UiParsedChoiceChunk]
+    choices: list[UiParsedChoiceChunk]  # type: ignore
     schema_validation_error: ErrorDetail | None = None
     # Timestamps
     request_at: datetime.datetime | None = Field(default=None, description="Timestamp of the request")

{uiform → retab}/types/evals.py RENAMED Viewed

@@ -1,19 +1,17 @@
 import copy
 import datetime
 import json
-from typing import Any, List, Literal, Optional, Union
+from typing import Any, List, Literal, Optional
 import nanoid  # type: ignore
 from pydantic import BaseModel, Field, computed_field
 from .._utils.json_schema import clean_schema, compute_schema_data_id
 from .._utils.mime import generate_blake2b_hash_from_string
-from .ai_models import Amount, LLMModel
-from .jobs.base import InferenceSettings
+from .ai_models import Amount
+from .inference_settings import InferenceSettings
 from .mime import MIMEData
 # Define the type alias for MetricType
 MetricType = Literal["levenshtein", "jaccard", "hamming"]
@@ -132,6 +130,7 @@ class UpdateEvaluationRequest(BaseModel):
     json_schema: Optional[dict[str, Any]] = Field(default=None, description="The json schema of the evaluation")
     project_id: Optional[str] = Field(default=None, description="The ID of the project")
+    default_inference_settings: Optional[InferenceSettings] = Field(default=None, description="The default inference properties for the evaluation (mostly used in the frontend)")
     @computed_field  # type: ignore
     @property
@@ -165,6 +164,7 @@ class Evaluation(BaseModel):
     updated_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(tz=datetime.timezone.utc))
     name: str
+    old_documents: list[EvaluationDocument] | None = None
     documents: list[EvaluationDocument]
     iterations: list[Iteration]
     json_schema: dict[str, Any]

retab/types/evaluations/__init__.py ADDED Viewed

@@ -0,0 +1,31 @@
+from .model import Evaluation, CreateEvaluation, PatchEvaluationRequest, ListEvaluationParams
+from .documents import AnnotatedDocument, DocumentItem, EvaluationDocument, CreateEvaluationDocumentRequest, PatchEvaluationDocumentRequest
+from .iterations import (
+    Iteration,
+    CreateIterationRequest,
+    PatchIterationRequest,
+    ProcessIterationRequest,
+    DocumentStatus,
+    IterationDocumentStatusResponse,
+    AddIterationFromJsonlRequest,
+)
+__all__ = [
+    "Evaluation",
+    "CreateEvaluation",
+    "PatchEvaluationRequest",
+    "ListEvaluationParams",
+    "AnnotatedDocument",
+    "DocumentItem",
+    "EvaluationDocument",
+    "CreateEvaluationDocumentRequest",
+    "PatchEvaluationDocumentRequest",
+    "Iteration",
+    "CreateIterationRequest",
+    "PatchIterationRequest",
+    "ProcessIterationRequest",
+    "DocumentStatus",
+    "IterationDocumentStatusResponse",
+    "AddIterationFromJsonlRequest",
+]

retab/types/evaluations/documents.py ADDED Viewed

@@ -0,0 +1,30 @@
+from typing import Any, Optional
+from pydantic import BaseModel, Field
+from ..mime import MIMEData
+from ..predictions import PredictionMetadata
+class AnnotatedDocument(BaseModel):
+    mime_data: MIMEData = Field(
+        description="The mime data of the document. Can also be a BaseMIMEData, which is why we have this id field (to be able to identify the file, but id is equal to mime_data.id)"
+    )
+    annotation: dict[str, Any] = Field(default={}, description="The ground truth of the document")
+class DocumentItem(AnnotatedDocument):
+    annotation_metadata: Optional[PredictionMetadata] = Field(default=None, description="The metadata of the annotation when the annotation is a prediction")
+class EvaluationDocument(DocumentItem):
+    id: str = Field(description="The ID of the document. Equal to mime_data.id but robust to the case where mime_data is a BaseMIMEData")
+class CreateEvaluationDocumentRequest(DocumentItem):
+    pass
+class PatchEvaluationDocumentRequest(BaseModel):
+    annotation: Optional[dict[str, Any]] = Field(default=None, description="The ground truth of the document")
+    annotation_metadata: Optional[PredictionMetadata] = Field(default=None, description="The metadata of the annotation when the annotation is a prediction")

retab 0.0.36__py3-none-any.whl → 0.0.37__py3-none-any.whl

retab 0.0.36py3-none-any.whl → 0.0.37py3-none-any.whl