PyPI - retab - Versions diffs - 0.0.35__py3-none-any.whl - Mend

retab 0.0.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

retab-0.0.35.dist-info/METADATA +417 -0
retab-0.0.35.dist-info/RECORD +111 -0
retab-0.0.35.dist-info/WHEEL +5 -0
retab-0.0.35.dist-info/top_level.txt +1 -0
uiform/__init__.py +4 -0
uiform/_resource.py +28 -0
uiform/_utils/__init__.py +0 -0
uiform/_utils/ai_models.py +100 -0
uiform/_utils/benchmarking copy.py +588 -0
uiform/_utils/benchmarking.py +485 -0
uiform/_utils/chat.py +332 -0
uiform/_utils/display.py +443 -0
uiform/_utils/json_schema.py +2161 -0
uiform/_utils/mime.py +168 -0
uiform/_utils/responses.py +163 -0
uiform/_utils/stream_context_managers.py +52 -0
uiform/_utils/usage/__init__.py +0 -0
uiform/_utils/usage/usage.py +300 -0
uiform/client.py +701 -0
uiform/py.typed +0 -0
uiform/resources/__init__.py +0 -0
uiform/resources/consensus/__init__.py +3 -0
uiform/resources/consensus/client.py +114 -0
uiform/resources/consensus/completions.py +252 -0
uiform/resources/consensus/completions_stream.py +278 -0
uiform/resources/consensus/responses.py +325 -0
uiform/resources/consensus/responses_stream.py +373 -0
uiform/resources/deployments/__init__.py +9 -0
uiform/resources/deployments/client.py +78 -0
uiform/resources/deployments/endpoints.py +322 -0
uiform/resources/deployments/links.py +452 -0
uiform/resources/deployments/logs.py +211 -0
uiform/resources/deployments/mailboxes.py +496 -0
uiform/resources/deployments/outlook.py +531 -0
uiform/resources/deployments/tests.py +158 -0
uiform/resources/documents/__init__.py +3 -0
uiform/resources/documents/client.py +255 -0
uiform/resources/documents/extractions.py +441 -0
uiform/resources/evals.py +812 -0
uiform/resources/files.py +24 -0
uiform/resources/finetuning.py +62 -0
uiform/resources/jsonlUtils.py +1046 -0
uiform/resources/models.py +45 -0
uiform/resources/openai_example.py +22 -0
uiform/resources/processors/__init__.py +3 -0
uiform/resources/processors/automations/__init__.py +9 -0
uiform/resources/processors/automations/client.py +78 -0
uiform/resources/processors/automations/endpoints.py +317 -0
uiform/resources/processors/automations/links.py +356 -0
uiform/resources/processors/automations/logs.py +211 -0
uiform/resources/processors/automations/mailboxes.py +435 -0
uiform/resources/processors/automations/outlook.py +444 -0
uiform/resources/processors/automations/tests.py +158 -0
uiform/resources/processors/client.py +474 -0
uiform/resources/prompt_optimization.py +76 -0
uiform/resources/schemas.py +369 -0
uiform/resources/secrets/__init__.py +9 -0
uiform/resources/secrets/client.py +20 -0
uiform/resources/secrets/external_api_keys.py +109 -0
uiform/resources/secrets/webhook.py +62 -0
uiform/resources/usage.py +271 -0
uiform/types/__init__.py +0 -0
uiform/types/ai_models.py +645 -0
uiform/types/automations/__init__.py +0 -0
uiform/types/automations/cron.py +58 -0
uiform/types/automations/endpoints.py +21 -0
uiform/types/automations/links.py +28 -0
uiform/types/automations/mailboxes.py +60 -0
uiform/types/automations/outlook.py +68 -0
uiform/types/automations/webhooks.py +21 -0
uiform/types/chat.py +8 -0
uiform/types/completions.py +93 -0
uiform/types/consensus.py +10 -0
uiform/types/db/__init__.py +0 -0
uiform/types/db/annotations.py +24 -0
uiform/types/db/files.py +36 -0
uiform/types/deployments/__init__.py +0 -0
uiform/types/deployments/cron.py +59 -0
uiform/types/deployments/endpoints.py +28 -0
uiform/types/deployments/links.py +36 -0
uiform/types/deployments/mailboxes.py +67 -0
uiform/types/deployments/outlook.py +76 -0
uiform/types/deployments/webhooks.py +21 -0
uiform/types/documents/__init__.py +0 -0
uiform/types/documents/correct_orientation.py +13 -0
uiform/types/documents/create_messages.py +226 -0
uiform/types/documents/extractions.py +297 -0
uiform/types/evals.py +207 -0
uiform/types/events.py +76 -0
uiform/types/extractions.py +85 -0
uiform/types/jobs/__init__.py +0 -0
uiform/types/jobs/base.py +150 -0
uiform/types/jobs/batch_annotation.py +22 -0
uiform/types/jobs/evaluation.py +133 -0
uiform/types/jobs/finetune.py +6 -0
uiform/types/jobs/prompt_optimization.py +41 -0
uiform/types/jobs/webcrawl.py +6 -0
uiform/types/logs.py +231 -0
uiform/types/mime.py +257 -0
uiform/types/modalities.py +68 -0
uiform/types/pagination.py +6 -0
uiform/types/schemas/__init__.py +0 -0
uiform/types/schemas/enhance.py +53 -0
uiform/types/schemas/evaluate.py +55 -0
uiform/types/schemas/generate.py +32 -0
uiform/types/schemas/layout.py +58 -0
uiform/types/schemas/object.py +631 -0
uiform/types/schemas/templates.py +107 -0
uiform/types/secrets/__init__.py +0 -0
uiform/types/secrets/external_api_keys.py +22 -0
uiform/types/standards.py +39 -0

uiform/types/deployments/outlook.py ADDED Viewed

@@ -0,0 +1,76 @@
+import copy
+import datetime
+import json
+import os
+import re
+from typing import Any, ClassVar, Dict, List, Literal, Optional
+import nanoid  # type: ignore
+from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
+from pydantic import BaseModel, EmailStr, Field, HttpUrl, computed_field, field_serializer, field_validator, model_validator
+from pydantic_core import Url
+from ..._utils.json_schema import clean_schema, convert_schema_to_layout
+from ..._utils.mime import generate_blake2b_hash_from_string
+from ..logs import AutomationConfig, UpdateAutomationRequest
+from ..modalities import Modality
+from ..pagination import ListMetadata
+domain_pattern = re.compile(r"^(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}$")
+class AutomationLevel(BaseModel):
+    distance_threshold: float = Field(default=0.9, description="Distance threshold for the automation")
+    score_threshold: float = Field(default=0.9, description="Score threshold for the automation")
+class MatchParams(BaseModel):
+    endpoint: str = Field(..., description="Endpoint for matching parameters")
+    headers: Dict[str, str] = Field(..., description="Headers for the request")
+    path: str = Field(..., description="Path for matching parameters")
+class FetchParams(BaseModel):
+    endpoint: str = Field(..., description="Endpoint for fetching parameters")
+    headers: Dict[str, str] = Field(..., description="Headers for the request")
+    name: str = Field(..., description="Name of the fetch parameter")
+class Outlook(AutomationConfig):
+    object: Literal['deployment.outlook'] = "deployment.outlook"
+    id: str = Field(default_factory=lambda: "outlook_" + nanoid.generate(), description="Unique identifier for the outlook")
+    authorized_domains: list[str] = Field(default_factory=list, description="List of authorized domains to receive the emails from")
+    authorized_emails: List[EmailStr] = Field(default_factory=list, description="List of emails to access the link")
+    layout_schema: Optional[dict[str, Any]] = Field(default=None, description="Layout schema format used to display the data")
+    # Optional Fields for data integration
+    match_params: List[MatchParams] = Field(default_factory=list, description="List of match parameters for the outlook automation")
+    fetch_params: List[FetchParams] = Field(default_factory=list, description="List of fetch parameters for the outlook automation")
+    @model_validator(mode='before')
+    @classmethod
+    def compute_layout_schema(cls, values: dict[str, Any]) -> dict[str, Any]:
+        if values.get('layout_schema') is None:
+            values['layout_schema'] = convert_schema_to_layout(values['json_schema'])
+        return values
+class ListOutlooks(BaseModel):
+    data: list[Outlook]
+    list_metadata: ListMetadata
+# Inherits from the methods of UpdateAutomationRequest
+class UpdateOutlookRequest(UpdateAutomationRequest):
+    authorized_domains: Optional[list[str]] = None
+    authorized_emails: Optional[List[EmailStr]] = None
+    match_params: Optional[List[MatchParams]] = None
+    fetch_params: Optional[List[FetchParams]] = None
+    layout_schema: Optional[dict[str, Any]] = None
+    @field_validator("authorized_emails", mode="before")
+    def normalize_authorized_emails(cls, emails: Optional[List[str]]) -> Optional[List[str]]:
+        return [email.strip().lower() for email in emails] if emails else None

uiform/types/deployments/webhooks.py ADDED Viewed

@@ -0,0 +1,21 @@
+from typing import Any, Optional
+from pydantic import BaseModel, EmailStr
+from uiform.types.documents.extractions import UiParsedChatCompletion
+from ..mime import BaseMIMEData, MIMEData
+class WebhookRequest(BaseModel):
+    completion: UiParsedChatCompletion
+    user: Optional[EmailStr] = None
+    file_payload: MIMEData
+    metadata: Optional[dict[str, Any]] = None
+class BaseWebhookRequest(BaseModel):
+    completion: UiParsedChatCompletion
+    user: Optional[EmailStr] = None
+    file_payload: BaseMIMEData
+    metadata: Optional[dict[str, Any]] = None

uiform/types/documents/__init__.py ADDED Viewed

File without changes

uiform/types/documents/correct_orientation.py ADDED Viewed

@@ -0,0 +1,13 @@
+from pydantic import BaseModel
+from ..mime import MIMEData
+class DocumentTransformRequest(BaseModel):
+    document: MIMEData
+    """The document to load."""
+class DocumentTransformResponse(BaseModel):
+    document: MIMEData
+    """The document to load."""

uiform/types/documents/create_messages.py ADDED Viewed

@@ -0,0 +1,226 @@
+import base64
+from io import BytesIO
+from typing import List, Literal, Dict, Union
+import PIL.Image
+import requests
+from anthropic.types.message_param import MessageParam
+from google.genai.types import ContentUnionDict  # type: ignore
+from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from openai.types.responses.response_input_param import ResponseInputItemParam
+from pydantic import BaseModel, Field, computed_field
+from ..._utils.chat import convert_to_anthropic_format, convert_to_google_genai_format, str_messages
+from ..._utils.chat import convert_to_openai_format as convert_to_openai_completions_api_format
+from ..._utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
+from ..._utils.display import count_text_tokens, count_image_tokens
+from ..chat import ChatCompletionUiformMessage
+from ..mime import MIMEData
+from ..modalities import Modality
+MediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
+class TokenCount(BaseModel):
+    total_tokens: int = 0
+    developer_tokens: int = 0
+    user_tokens: int = 0
+class DocumentCreateMessageRequest(BaseModel):
+    document: MIMEData = Field(description="The document to load.")
+    modality: Modality = Field(description="The modality of the document to load.")
+    image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
+    browser_canvas: Literal['A3', 'A4', 'A5'] = Field(default='A4', description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type.")
+from typing import Any
+class DocumentCreateInputRequest(DocumentCreateMessageRequest):
+    json_schema: dict[str, Any] = Field(description="The json schema to use for the document.")
+class DocumentMessage(BaseModel):
+    id: str = Field(description="A unique identifier for the document loading.")
+    object: Literal["document_message"] = Field(default="document_message", description="The type of object being loaded.")
+    messages: List[ChatCompletionUiformMessage] = Field(description="A list of messages containing the document content and metadata.")
+    created: int = Field(description="The Unix timestamp (in seconds) of when the document was loaded.")
+    modality: Modality = Field(description="The modality of the document to load.")
+    @computed_field
+    def token_count(self) -> TokenCount:
+        """Returns the token count for the document message.
+        This property calculates token usage based on both text and image content
+        in the messages using the token counting utilities.
+        Returns:
+            TokenCount: A Pydantic model with total, user, and developer token counts.
+        """
+        total_tokens = 0
+        user_tokens = 0
+        developer_tokens = 0
+        for msg in self.messages:
+            role = msg.get("role", "user")
+            msg_tokens = 0
+            if isinstance(msg["content"], str):
+                msg_tokens = count_text_tokens(msg["content"])
+            elif isinstance(msg["content"], list):
+                for content_item in msg["content"]:
+                    if isinstance(content_item, str):
+                        msg_tokens += count_text_tokens(content_item)
+                    elif isinstance(content_item, dict):
+                        item_type = content_item.get("type")
+                        if item_type == "text" and "text" in content_item:
+                            msg_tokens += count_text_tokens(content_item["text"])
+                        elif item_type == "image_url" and "image_url" in content_item:
+                            image_url = content_item["image_url"]["url"]
+                            detail = content_item["image_url"].get("detail", "high")
+                            msg_tokens += count_image_tokens(image_url, detail)
+            # Update total tokens
+            total_tokens += msg_tokens
+            # Update role-specific counts
+            assert role in ["user", "developer"], f"Invalid role: {role}"
+            if role == "user":
+                user_tokens += msg_tokens
+            elif role == "developer":
+                developer_tokens += msg_tokens
+        return TokenCount(
+            total_tokens=total_tokens,
+            user_tokens=user_tokens,
+            developer_tokens=developer_tokens
+        )
+    @property
+    def items(self) -> list[str | PIL.Image.Image]:
+        """Returns the document contents as a list of strings and images.
+        This property processes the message content and converts it into a list of either
+        text strings or PIL Image objects. It handles various content types including:
+        - Plain text
+        - Base64 encoded images
+        - Remote image URLs
+        - Audio data (represented as truncated string)
+        Returns:
+            list[str | PIL.Image.Image]: A list containing either strings for text content
+                or PIL.Image.Image objects for image content. Failed image loads will
+                return their URLs as strings instead.
+        """
+        results: list[str | PIL.Image.Image] = []
+        for msg in self.messages:
+            if isinstance(msg["content"], str):
+                results.append(msg["content"])
+                continue
+            assert isinstance(msg["content"], list), "content must be a list or a string"
+            for content_item in msg["content"]:
+                if isinstance(content_item, str):
+                    results.append(content_item)
+                else:
+                    item_type = content_item.get("type")
+                    # If item is an image
+                    if item_type == "image_url":
+                        assert "image_url" in content_item, "image_url is required in ChatCompletionContentPartImageParam"
+                        image_data_url = content_item["image_url"]["url"]  # type: ignore
+                        # 1) Base64 inline data
+                        if image_data_url.startswith("data:image/"):
+                            try:
+                                prefix, base64_part = image_data_url.split(",", 1)
+                                img_bytes = base64.b64decode(base64_part)
+                                img = PIL.Image.open(BytesIO(img_bytes))
+                                results.append(img)
+                            except Exception as e:
+                                print(f"Error decoding base64 data:\n  {e}")
+                                results.append(image_data_url)
+                        # 2) Otherwise, assume it's a remote URL
+                        else:
+                            try:
+                                response = requests.get(image_data_url)
+                                response.raise_for_status()  # raises HTTPError if not 200
+                                img = PIL.Image.open(BytesIO(response.content))
+                                results.append(img)
+                            except Exception as e:
+                                # Here, log or print the actual error
+                                print(f"Could not download image from {image_data_url}:\n  {e}")
+                                results.append(image_data_url)
+                    # If item is text (or other types)
+                    elif item_type == "text":
+                        text_value = content_item.get("text", "")
+                        assert isinstance(text_value, str), "text is required in ChatCompletionContentPartTextParam"
+                        results.append(text_value)
+                    elif item_type == "input_audio":
+                        # Handle audio input content
+                        if "input_audio" in content_item:
+                            audio_data = content_item["input_audio"]["data"]  # type: ignore
+                            results.append(f"Audio data: {audio_data[:100]}...")  # Truncate long audio data
+                    else:
+                        # Fallback for unrecognized item types
+                        results.append(f"Unrecognized type: {item_type}")
+        return results
+    @property
+    def openai_messages(self) -> list[ChatCompletionMessageParam]:
+        """Returns the messages formatted for OpenAI's API.
+        Converts the internal message format to OpenAI's expected format for
+        chat completions.
+        Returns:
+            list[ChatCompletionMessageParam]: Messages formatted for OpenAI's chat completion API.
+        """
+        return convert_to_openai_completions_api_format(self.messages)
+    @property
+    def openai_responses_input(self) -> list[ResponseInputItemParam]:
+        """Returns the messages formatted for OpenAI's Responses API.
+        Converts the internal message format to OpenAI's expected format for
+        responses.
+        Returns:
+            list[ResponseInputItemParam]: Messages formatted for OpenAI's responses API.
+        """
+        return convert_to_openai_responses_api_format(self.messages)
+    @property
+    def anthropic_messages(self) -> list[MessageParam]:
+        """Returns the messages formatted for Anthropic's Claude API.
+        Converts the internal message format to Claude's expected format,
+        handling text, images, and other content types appropriately.
+        Returns:
+            list[MessageParam]: Messages formatted for Claude's API.
+        """
+        return convert_to_anthropic_format(self.messages)[1]
+    @property
+    def gemini_messages(self) -> list[ContentUnionDict]:
+        """Returns the messages formatted for Google's Gemini API.
+        Converts the internal message format to Gemini's expected format,
+        handling various content types including text and images.
+        Returns:
+            list[PartDict]: Messages formatted for Gemini's API.
+        """
+        return convert_to_google_genai_format(self.messages)[1]
+    def __str__(self) -> str:
+        return f"DocumentMessage(id={self.id}, object={self.object}, created={self.created}, messages={str_messages(self.messages)}, modality={self.modality})"
+    def __repr__(self) -> str:
+        return f"DocumentMessage(id={self.id}, object={self.object}, created={self.created}, messages={str_messages(self.messages)}, modality={self.modality})"

uiform/types/documents/extractions.py ADDED Viewed

@@ -0,0 +1,297 @@
+import base64
+import datetime
+from typing import Any, Literal, Optional
+from anthropic.types.message import Message
+from anthropic.types.message_param import MessageParam
+from openai.types.chat import ChatCompletionMessageParam
+from openai.types.chat.chat_completion import ChatCompletion
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
+from openai.types.chat.chat_completion_chunk import Choice as ChoiceChunk
+from openai.types.chat.chat_completion_chunk import ChoiceDelta as ChoiceDeltaChunk
+from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
+from openai.types.chat.parsed_chat_completion import ParsedChatCompletion, ParsedChoice
+from openai.types.responses.response import Response
+from openai.types.responses.response_input_param import ResponseInputItemParam
+from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, computed_field, field_validator, model_validator
+from ..._utils.usage.usage import compute_cost_from_model, compute_cost_from_model_with_breakdown, CostBreakdown
+from ..._utils.ai_models import find_provider_from_model
+from ..ai_models import AIProvider, Amount, get_model_card
+from ..chat import ChatCompletionUiformMessage
+from ..mime import MIMEData
+from ..modalities import Modality
+from ..standards import ErrorDetail, StreamingBaseModel
+class DocumentExtractRequest(BaseModel):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    document: MIMEData = Field(..., description="Document to be analyzed")
+    modality: Modality
+    image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
+    browser_canvas: Literal['A3', 'A4', 'A5'] = Field(
+        default='A4', description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
+    )
+    model: str = Field(..., description="Model used for chat completion")
+    json_schema: dict[str, Any] = Field(..., description="JSON schema format used to validate the output data.")
+    temperature: float = Field(default=0.0, description="Temperature for sampling. If not provided, the default temperature for the model will be used.", examples=[0.0])
+    reasoning_effort: ChatCompletionReasoningEffort = Field(
+        default="medium", description="The effort level for the model to reason about the input data. If not provided, the default reasoning effort for the model will be used."
+    )
+    n_consensus: int = Field(default=1, description="Number of consensus models to use for extraction. If greater than 1 the temperature cannot be 0.")
+    # Regular fields
+    stream: bool = Field(default=False, description="If true, the extraction will be streamed to the user using the active WebSocket connection")
+    seed: int | None = Field(default=None, description="Seed for the random number generator. If not provided, a random seed will be generated.", examples=[None])
+    store: bool = Field(default=True, description="If true, the extraction will be stored in the database")
+    need_validation: bool = Field(default=False, description="If true, the extraction will be validated against the schema")
+    # Add a model validator that rejects n_consensus > 1 if temperature is 0
+    @field_validator("n_consensus")
+    def check_n_consensus(cls, v: int, info: ValidationInfo) -> int:
+        if v > 1 and info.data.get("temperature") == 0:
+            raise ValueError("n_consensus greater than 1 but temperature is 0")
+        return v
+class ConsensusModel(BaseModel):
+    model: str = Field(description="Model name")
+    temperature: float = Field(default=0.0, description="Temperature for consensus")
+    reasoning_effort: ChatCompletionReasoningEffort = Field(
+        default="medium", description="The effort level for the model to reason about the input data. If not provided, the default reasoning effort for the model will be used."
+    )
+# For location of fields in the document (OCR)
+class FieldLocation(BaseModel):
+    label: str = Field(..., description="The label of the field")
+    value: str = Field(..., description="The extracted value of the field")
+    quote: str = Field(..., description="The quote of the field (verbatim from the document)")
+    file_id: str | None = Field(default=None, description="The ID of the file")
+    page: int | None = Field(default=None, description="The page number of the field (1-indexed)")
+    bboxes_normalized: list[tuple[float, float, float, float]] | None = Field(default=None, description="The normalized bounding boxes of the field")
+    score: float | None = Field(default=None, description="The score of the field")
+    match_level: Literal["token", "line", "block"] | None = Field(default=None, description="The level of the match (token, line, block)")
+class UiParsedChoice(ParsedChoice):
+    # Adaptable ParsedChoice that allows None for the finish_reason
+    finish_reason: Literal["stop", "length", "tool_calls", "content_filter", "function_call"] | None = None  # type: ignore
+    field_locations: dict[str, list[FieldLocation]] | None = Field(default=None, description="The locations of the fields in the document, if available")
+    key_mapping: dict[str, Optional[str]] | None = Field(default=None, description="Mapping of consensus keys to original model keys")
+LikelihoodsSource = Literal["consensus", "log_probs"]
+class UiParsedChatCompletion(ParsedChatCompletion):
+    extraction_id: str | None = None
+    choices: list[UiParsedChoice]
+    # Additional metadata fields (UIForm)
+    likelihoods: Optional[dict[str, Any]] = Field(
+        default=None, description="Object defining the uncertainties of the fields extracted when using consensus. Follows the same structure as the extraction object."
+    )
+    schema_validation_error: ErrorDetail | None = None
+    # Timestamps
+    request_at: datetime.datetime | None = Field(default=None, description="Timestamp of the request")
+    first_token_at: datetime.datetime | None = Field(default=None, description="Timestamp of the first token of the document. If non-streaming, set to last_token_at")
+    last_token_at: datetime.datetime | None = Field(default=None, description="Timestamp of the last token of the document")
+    @computed_field
+    @property
+    def api_cost(self) -> Optional[Amount]:
+        if self.usage:
+            try:
+                cost = compute_cost_from_model(self.model, self.usage)
+                return cost
+            except Exception as e:
+                print(f"Error computing cost: {e}")
+                return None
+        return None
+class UiResponse(Response):
+    extraction_id: str | None = None
+    # Additional metadata fields (UIForm)
+    likelihoods: Optional[dict[str, Any]] = Field(
+        default=None, description="Object defining the uncertainties of the fields extracted when using consensus. Follows the same structure as the extraction object."
+    )
+    schema_validation_error: ErrorDetail | None = None
+    # Timestamps
+    request_at: datetime.datetime | None = Field(default=None, description="Timestamp of the request")
+    first_token_at: datetime.datetime | None = Field(default=None, description="Timestamp of the first token of the document. If non-streaming, set to last_token_at")
+    last_token_at: datetime.datetime | None = Field(default=None, description="Timestamp of the last token of the document")
+class LogExtractionRequest(BaseModel):
+    messages: list[ChatCompletionUiformMessage] | None = None  # TODO: compatibility with Anthropic
+    openai_messages: list[ChatCompletionMessageParam] | None = None
+    openai_responses_input: list[ResponseInputItemParam] | None = None
+    anthropic_messages: list[MessageParam] | None = None
+    anthropic_system_prompt: str | None = None
+    document: MIMEData = Field(
+        default=MIMEData(
+            filename="dummy.txt",
+            # url is a base64 encoded string with the mime type and the content. For the dummy one we will send a .txt file with the text "No document provided"
+            url="data:text/plain;base64," + base64.b64encode(b"No document provided").decode("utf-8"),
+        ),
+        description="Document analyzed, if not provided a dummy one will be created with the text 'No document provided'",
+    )
+    completion: dict | UiParsedChatCompletion | Message | ParsedChatCompletion | ChatCompletion | None = None
+    openai_responses_output: Response | None = None
+    json_schema: dict[str, Any]
+    model: str
+    temperature: float
+    # Validate that at least one of the messages, openai_messages, anthropic_messages is provided using model_validator
+    @model_validator(mode="before")
+    def validation(cls, data: Any) -> Any:
+        messages_candidates = [data.get("messages"), data.get("openai_messages"), data.get("anthropic_messages"), data.get("openai_responses_input")]
+        messages_candidates = [candidate for candidate in messages_candidates if candidate is not None]
+        if len(messages_candidates) != 1:
+            raise ValueError("Exactly one of the messages, openai_messages, anthropic_messages, openai_responses_input must be provided")
+        # Validate that if anthropic_messages is provided, anthropic_system_prompt is also provided
+        if data.get("anthropic_messages") is not None and data.get("anthropic_system_prompt") is None:
+            raise ValueError("anthropic_system_prompt must be provided if anthropic_messages is provided")
+        completion_candidates = [data.get("completion"), data.get("openai_responses_output")]
+        completion_candidates = [candidate for candidate in completion_candidates if candidate is not None]
+        if len(completion_candidates) != 1:
+            raise ValueError("Exactly one of completion, openai_responses_output must be provided")
+        return data
+class LogExtractionResponse(BaseModel):
+    extraction_id: str | None = None  # None only in case of error
+    status: Literal["success", "error"]
+    error_message: str | None = None
+# DocumentExtractResponse = UiParsedChatCompletion
+###### I'll place here for now -- New Streaming API
+# We build from the openai.types.chat.chat_completion_chunk.ChatCompletionChunk adding just two three additional fields:
+# - is_valid_json: list[bool]               #  Whether the total accumulated content is a valid JSON
+# - likelihoods: dict[str, float]     #  The delta of the flattened likelihoods (to be merged with the cumulated likelihoods)
+# - schema_validation_error: ErrorDetail | None = None #  The error in the schema validation of the total accumulated content
+class UiParsedChoiceDeltaChunk(ChoiceDeltaChunk):
+    flat_likelihoods: dict[str, float] = {}
+    flat_parsed: dict[str, Any] = {}
+    flat_deleted_keys: list[str] = []
+    field_locations: dict[str, list[FieldLocation]] | None = Field(default=None, description="The locations of the fields in the document, if available")
+    is_valid_json: bool = False
+    key_mapping: dict[str, Optional[str]] | None = Field(default=None, description="Mapping of consensus keys to original model keys")
+class UiParsedChoiceChunk(ChoiceChunk):
+    delta: UiParsedChoiceDeltaChunk
+class UiParsedChatCompletionChunk(StreamingBaseModel, ChatCompletionChunk):
+    extraction_id: str | None = None
+    choices: list[UiParsedChoiceChunk]
+    schema_validation_error: ErrorDetail | None = None
+    # Timestamps
+    request_at: datetime.datetime | None = Field(default=None, description="Timestamp of the request")
+    first_token_at: datetime.datetime | None = Field(default=None, description="Timestamp of the first token of the document. If non-streaming, set to last_token_at")
+    last_token_at: datetime.datetime | None = Field(default=None, description="Timestamp of the last token of the document")
+    @computed_field
+    @property
+    def api_cost(self) -> Optional[Amount]:
+        if self.usage:
+            try:
+                cost = compute_cost_from_model(self.model, self.usage)
+                return cost
+            except Exception as e:
+                print(f"Error computing cost: {e}")
+                return None
+        return None
+    @computed_field  # type: ignore
+    @property
+    def cost_breakdown(self) -> Optional[CostBreakdown]:
+        if self.usage:
+            try:
+                cost = compute_cost_from_model_with_breakdown(self.model, self.usage)
+                return cost
+            except Exception as e:
+                print(f"Error computing cost: {e}")
+                return None
+        return None
+    def chunk_accumulator(self, previous_cumulated_chunk: "UiParsedChatCompletionChunk | None" = None) -> "UiParsedChatCompletionChunk":
+        """
+        Accumulate the chunk into the state, returning a new UiParsedChatCompletionChunk with the accumulated content that could be yielded alone to generate the same state.
+        """
+        def safe_get_delta(chnk: "UiParsedChatCompletionChunk | None", index: int) -> UiParsedChoiceDeltaChunk:
+            if chnk is not None and index < len(chnk.choices):
+                return chnk.choices[index].delta
+            else:
+                return UiParsedChoiceDeltaChunk(
+                    content="",
+                    flat_parsed={},
+                    flat_likelihoods={},
+                    is_valid_json=False,
+                )
+        max_choices = max(len(self.choices), len(previous_cumulated_chunk.choices)) if previous_cumulated_chunk is not None else len(self.choices)
+        # Get the current chunk missing content, flat_deleted_keys and is_valid_json
+        acc_flat_deleted_keys = [safe_get_delta(self, i).flat_deleted_keys for i in range(max_choices)]
+        acc_is_valid_json = [safe_get_delta(self, i).is_valid_json for i in range(max_choices)]
+        acc_field_locations = [safe_get_delta(self, i).field_locations for i in range(max_choices)]  # This is only present in the last chunk.
+        # Delete from previous_cumulated_chunk.choices[i].delta.flat_parsed the keys that are in safe_get_delta(self, i).flat_deleted_keys
+        for i in range(max_choices):
+            previous_delta = safe_get_delta(previous_cumulated_chunk, i)
+            current_delta = safe_get_delta(self, i)
+            for deleted_key in current_delta.flat_deleted_keys:
+                previous_delta.flat_parsed.pop(deleted_key, None)
+                previous_delta.flat_likelihoods.pop(deleted_key, None)
+        # Accumulate the flat_parsed and flat_likelihoods
+        acc_flat_parsed = [safe_get_delta(previous_cumulated_chunk, i).flat_parsed | safe_get_delta(self, i).flat_parsed for i in range(max_choices)]
+        acc_flat_likelihoods = [safe_get_delta(previous_cumulated_chunk, i).flat_likelihoods | safe_get_delta(self, i).flat_likelihoods for i in range(max_choices)]
+        acc_key_mapping = [safe_get_delta(previous_cumulated_chunk, i).key_mapping or safe_get_delta(self, i).key_mapping for i in range(max_choices)]
+        acc_content = [(safe_get_delta(previous_cumulated_chunk, i).content or "") + (safe_get_delta(self, i).content or "") for i in range(max_choices)]
+        usage = self.usage
+        first_token_at = self.first_token_at
+        last_token_at = self.last_token_at
+        request_at = self.request_at
+        return UiParsedChatCompletionChunk(
+            extraction_id=self.extraction_id,
+            id=self.id,
+            created=self.created,
+            model=self.model,
+            object=self.object,
+            usage=usage,
+            choices=[
+                UiParsedChoiceChunk(
+                    delta=UiParsedChoiceDeltaChunk(
+                        content=acc_content[i],
+                        flat_parsed=acc_flat_parsed[i],
+                        flat_likelihoods=acc_flat_likelihoods[i],
+                        flat_deleted_keys=acc_flat_deleted_keys[i],
+                        field_locations=acc_field_locations[i],
+                        is_valid_json=acc_is_valid_json[i],
+                        key_mapping=acc_key_mapping[i],
+                    ),
+                    index=i,
+                )
+                for i in range(max_choices)
+            ],
+            schema_validation_error=self.schema_validation_error,
+            request_at=request_at,
+            first_token_at=first_token_at,
+            last_token_at=last_token_at,
+        )