retab 0.0.36__py3-none-any.whl → 0.0.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {uiform → retab}/_utils/ai_models.py +2 -2
- {uiform → retab}/_utils/benchmarking.py +15 -16
- {uiform → retab}/_utils/chat.py +9 -14
- {uiform → retab}/_utils/display.py +0 -3
- {uiform → retab}/_utils/json_schema.py +9 -14
- {uiform → retab}/_utils/mime.py +11 -14
- {uiform → retab}/_utils/responses.py +9 -3
- {uiform → retab}/_utils/stream_context_managers.py +1 -1
- {uiform → retab}/_utils/usage/usage.py +28 -28
- {uiform → retab}/client.py +32 -31
- {uiform → retab}/resources/consensus/client.py +17 -36
- {uiform → retab}/resources/consensus/completions.py +24 -47
- {uiform → retab}/resources/consensus/completions_stream.py +26 -38
- {uiform → retab}/resources/consensus/responses.py +31 -80
- {uiform → retab}/resources/consensus/responses_stream.py +31 -79
- {uiform → retab}/resources/documents/client.py +59 -45
- {uiform → retab}/resources/documents/extractions.py +181 -90
- {uiform → retab}/resources/evals.py +56 -43
- retab/resources/evaluations/__init__.py +3 -0
- retab/resources/evaluations/client.py +301 -0
- retab/resources/evaluations/documents.py +233 -0
- retab/resources/evaluations/iterations.py +452 -0
- {uiform → retab}/resources/files.py +2 -2
- {uiform → retab}/resources/jsonlUtils.py +220 -216
- retab/resources/models.py +73 -0
- retab/resources/processors/automations/client.py +244 -0
- {uiform → retab}/resources/processors/automations/endpoints.py +77 -118
- retab/resources/processors/automations/links.py +294 -0
- {uiform → retab}/resources/processors/automations/logs.py +30 -19
- {uiform → retab}/resources/processors/automations/mailboxes.py +136 -174
- retab/resources/processors/automations/outlook.py +337 -0
- {uiform → retab}/resources/processors/automations/tests.py +22 -25
- {uiform → retab}/resources/processors/client.py +179 -164
- {uiform → retab}/resources/schemas.py +78 -66
- {uiform → retab}/resources/secrets/external_api_keys.py +1 -5
- retab/resources/secrets/webhook.py +64 -0
- {uiform → retab}/resources/usage.py +39 -2
- {uiform → retab}/types/ai_models.py +13 -13
- {uiform → retab}/types/automations/cron.py +19 -12
- {uiform → retab}/types/automations/endpoints.py +7 -4
- {uiform → retab}/types/automations/links.py +7 -3
- {uiform → retab}/types/automations/mailboxes.py +9 -9
- {uiform → retab}/types/automations/outlook.py +15 -11
- retab/types/browser_canvas.py +3 -0
- {uiform → retab}/types/chat.py +2 -2
- {uiform → retab}/types/completions.py +9 -12
- retab/types/consensus.py +19 -0
- {uiform → retab}/types/db/annotations.py +3 -3
- {uiform → retab}/types/db/files.py +8 -6
- {uiform → retab}/types/documents/create_messages.py +18 -20
- {uiform → retab}/types/documents/extractions.py +69 -24
- {uiform → retab}/types/evals.py +5 -5
- retab/types/evaluations/__init__.py +31 -0
- retab/types/evaluations/documents.py +30 -0
- retab/types/evaluations/iterations.py +112 -0
- retab/types/evaluations/model.py +73 -0
- retab/types/events.py +79 -0
- {uiform → retab}/types/extractions.py +33 -10
- retab/types/inference_settings.py +15 -0
- retab/types/jobs/base.py +54 -0
- retab/types/jobs/batch_annotation.py +12 -0
- {uiform → retab}/types/jobs/evaluation.py +1 -2
- {uiform → retab}/types/logs.py +37 -34
- retab/types/metrics.py +32 -0
- {uiform → retab}/types/mime.py +22 -20
- {uiform → retab}/types/modalities.py +10 -10
- retab/types/predictions.py +19 -0
- {uiform → retab}/types/schemas/enhance.py +4 -2
- {uiform → retab}/types/schemas/evaluate.py +7 -4
- {uiform → retab}/types/schemas/generate.py +6 -3
- {uiform → retab}/types/schemas/layout.py +1 -1
- {uiform → retab}/types/schemas/object.py +13 -14
- {uiform → retab}/types/schemas/templates.py +1 -3
- {uiform → retab}/types/secrets/external_api_keys.py +0 -1
- {uiform → retab}/types/standards.py +18 -1
- {retab-0.0.36.dist-info → retab-0.0.37.dist-info}/METADATA +7 -6
- retab-0.0.37.dist-info/RECORD +107 -0
- retab-0.0.37.dist-info/top_level.txt +1 -0
- retab-0.0.36.dist-info/RECORD +0 -96
- retab-0.0.36.dist-info/top_level.txt +0 -1
- uiform/_utils/benchmarking copy.py +0 -588
- uiform/resources/models.py +0 -45
- uiform/resources/processors/automations/client.py +0 -78
- uiform/resources/processors/automations/links.py +0 -356
- uiform/resources/processors/automations/outlook.py +0 -444
- uiform/resources/secrets/webhook.py +0 -62
- uiform/types/consensus.py +0 -10
- uiform/types/events.py +0 -76
- uiform/types/jobs/base.py +0 -150
- uiform/types/jobs/batch_annotation.py +0 -22
- {uiform → retab}/__init__.py +0 -0
- {uiform → retab}/_resource.py +0 -0
- {uiform → retab}/_utils/__init__.py +0 -0
- {uiform → retab}/_utils/usage/__init__.py +0 -0
- {uiform → retab}/py.typed +0 -0
- {uiform → retab}/resources/__init__.py +0 -0
- {uiform → retab}/resources/consensus/__init__.py +0 -0
- {uiform → retab}/resources/documents/__init__.py +0 -0
- {uiform → retab}/resources/finetuning.py +0 -0
- {uiform → retab}/resources/openai_example.py +0 -0
- {uiform → retab}/resources/processors/__init__.py +0 -0
- {uiform → retab}/resources/processors/automations/__init__.py +0 -0
- {uiform → retab}/resources/prompt_optimization.py +0 -0
- {uiform → retab}/resources/secrets/__init__.py +0 -0
- {uiform → retab}/resources/secrets/client.py +0 -0
- {uiform → retab}/types/__init__.py +0 -0
- {uiform → retab}/types/automations/__init__.py +0 -0
- {uiform → retab}/types/automations/webhooks.py +0 -0
- {uiform → retab}/types/db/__init__.py +0 -0
- {uiform → retab}/types/documents/__init__.py +0 -0
- {uiform → retab}/types/documents/correct_orientation.py +0 -0
- {uiform → retab}/types/jobs/__init__.py +0 -0
- {uiform → retab}/types/jobs/finetune.py +0 -0
- {uiform → retab}/types/jobs/prompt_optimization.py +0 -0
- {uiform → retab}/types/jobs/webcrawl.py +0 -0
- {uiform → retab}/types/pagination.py +0 -0
- {uiform → retab}/types/schemas/__init__.py +0 -0
- {uiform → retab}/types/secrets/__init__.py +0 -0
- {retab-0.0.36.dist-info → retab-0.0.37.dist-info}/WHEEL +0 -0
@@ -1,10 +1,9 @@
|
|
1
1
|
import re
|
2
|
-
from typing import Any, Dict, List,
|
2
|
+
from typing import Any, Dict, List, Optional
|
3
3
|
|
4
4
|
import nanoid # type: ignore
|
5
|
-
from pydantic import BaseModel, EmailStr, Field,
|
5
|
+
from pydantic import BaseModel, EmailStr, Field, computed_field, field_validator
|
6
6
|
|
7
|
-
from ..._utils.json_schema import convert_schema_to_layout
|
8
7
|
from ..logs import AutomationConfig, UpdateAutomationRequest
|
9
8
|
from ..pagination import ListMetadata
|
10
9
|
|
@@ -29,7 +28,11 @@ class FetchParams(BaseModel):
|
|
29
28
|
|
30
29
|
|
31
30
|
class Outlook(AutomationConfig):
|
32
|
-
|
31
|
+
@computed_field
|
32
|
+
@property
|
33
|
+
def object(self) -> str:
|
34
|
+
return "automation.outlook"
|
35
|
+
|
33
36
|
id: str = Field(default_factory=lambda: "outlook_" + nanoid.generate(), description="Unique identifier for the outlook")
|
34
37
|
|
35
38
|
authorized_domains: list[str] = Field(default_factory=list, description="List of authorized domains to receive the emails from")
|
@@ -41,12 +44,13 @@ class Outlook(AutomationConfig):
|
|
41
44
|
match_params: List[MatchParams] = Field(default_factory=list, description="List of match parameters for the outlook automation")
|
42
45
|
fetch_params: List[FetchParams] = Field(default_factory=list, description="List of fetch parameters for the outlook automation")
|
43
46
|
|
44
|
-
@model_validator(mode=
|
45
|
-
@classmethod
|
46
|
-
def compute_layout_schema(cls, values: dict[str, Any]) -> dict[str, Any]:
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
# @model_validator(mode="before")
|
48
|
+
# @classmethod
|
49
|
+
# def compute_layout_schema(cls, values: dict[str, Any]) -> dict[str, Any]:
|
50
|
+
# if values.get("layout_schema") is None:
|
51
|
+
# values["layout_schema"] = convert_schema_to_layout(values["json_schema"])
|
52
|
+
# return values
|
53
|
+
|
50
54
|
|
51
55
|
class ListOutlooks(BaseModel):
|
52
56
|
data: list[Outlook]
|
@@ -65,4 +69,4 @@ class UpdateOutlookRequest(UpdateAutomationRequest):
|
|
65
69
|
|
66
70
|
@field_validator("authorized_emails", mode="before")
|
67
71
|
def normalize_authorized_emails(cls, emails: Optional[List[str]]) -> Optional[List[str]]:
|
68
|
-
return [email.strip().lower() for email in emails] if emails else None
|
72
|
+
return [email.strip().lower() for email in emails] if emails else None
|
{uiform → retab}/types/chat.py
RENAMED
@@ -1,8 +1,8 @@
|
|
1
|
-
from typing import
|
1
|
+
from typing import Literal, TypedDict, Union
|
2
2
|
|
3
3
|
from openai.types.chat.chat_completion_content_part_param import ChatCompletionContentPartParam
|
4
4
|
|
5
5
|
|
6
6
|
class ChatCompletionUiformMessage(TypedDict): # homemade replacement for ChatCompletionMessageParam because iterable messes the serialization with pydantic
|
7
|
-
role: Literal[
|
7
|
+
role: Literal["user", "system", "assistant", "developer"]
|
8
8
|
content: Union[str, list[ChatCompletionContentPartParam]]
|
@@ -1,6 +1,10 @@
|
|
1
|
-
from typing import Any
|
1
|
+
from typing import Any, Optional, Union
|
2
2
|
|
3
3
|
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
4
|
+
from openai.types.responses.response_input_param import ResponseInputParam
|
5
|
+
from openai.types.responses.response_text_config_param import ResponseTextConfigParam
|
6
|
+
from openai.types.shared_params.reasoning import Reasoning
|
7
|
+
from openai.types.shared_params.response_format_json_schema import ResponseFormatJSONSchema
|
4
8
|
from pydantic import BaseModel, ConfigDict, Field
|
5
9
|
|
6
10
|
from .._utils.ai_models import find_provider_from_model
|
@@ -8,8 +12,6 @@ from .ai_models import AIProvider
|
|
8
12
|
from .chat import ChatCompletionUiformMessage
|
9
13
|
|
10
14
|
|
11
|
-
from openai.types.shared_params.response_format_json_schema import ResponseFormatJSONSchema
|
12
|
-
|
13
15
|
class UiChatCompletionsRequest(BaseModel):
|
14
16
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
15
17
|
model: str = Field(..., description="Model used for chat completion")
|
@@ -35,8 +37,6 @@ class UiChatCompletionsRequest(BaseModel):
|
|
35
37
|
return find_provider_from_model(self.model)
|
36
38
|
|
37
39
|
|
38
|
-
|
39
|
-
|
40
40
|
class UiChatCompletionsParseRequest(BaseModel):
|
41
41
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
42
42
|
model: str = Field(..., description="Model used for chat completion")
|
@@ -61,10 +61,6 @@ class UiChatCompletionsParseRequest(BaseModel):
|
|
61
61
|
"""
|
62
62
|
return find_provider_from_model(self.model)
|
63
63
|
|
64
|
-
from typing import Optional, Union
|
65
|
-
from openai.types.shared_params.reasoning import Reasoning
|
66
|
-
from openai.types.responses.response_input_param import ResponseInputParam
|
67
|
-
from openai.types.responses.response_text_config_param import ResponseTextConfigParam
|
68
64
|
|
69
65
|
class UiChatResponseCreateRequest(BaseModel):
|
70
66
|
input: Union[str, ResponseInputParam] = Field(..., description="Input to be parsed")
|
@@ -73,8 +69,10 @@ class UiChatResponseCreateRequest(BaseModel):
|
|
73
69
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
74
70
|
model: str = Field(..., description="Model used for chat completion")
|
75
71
|
temperature: Optional[float] = Field(default=0.0, description="Temperature for sampling. If not provided, the default temperature for the model will be used.", examples=[0.0])
|
76
|
-
reasoning: Optional[Reasoning] = Field(
|
77
|
-
|
72
|
+
reasoning: Optional[Reasoning] = Field(
|
73
|
+
default=None, description="The effort level for the model to reason about the input data. If not provided, the default reasoning effort for the model will be used."
|
74
|
+
)
|
75
|
+
|
78
76
|
stream: Optional[bool] = Field(default=False, description="If true, the extraction will be streamed to the user using the active WebSocket connection")
|
79
77
|
seed: int | None = Field(default=None, description="Seed for the random number generator. If not provided, a random seed will be generated.", examples=[None])
|
80
78
|
text: ResponseTextConfigParam = Field(default={"format": {"type": "text"}}, description="Format of the response")
|
@@ -90,4 +88,3 @@ class UiChatResponseCreateRequest(BaseModel):
|
|
90
88
|
AIProvider: The AI provider corresponding to the given model.
|
91
89
|
"""
|
92
90
|
return find_provider_from_model(self.model)
|
93
|
-
|
retab/types/consensus.py
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
from typing import Any, Literal, Optional
|
2
|
+
from pydantic import BaseModel, Field
|
3
|
+
|
4
|
+
|
5
|
+
class ReconciliationRequest(BaseModel):
|
6
|
+
list_dicts: list[dict] = Field(description="List of dictionaries that will be reconciled into a single consensus dictionary.")
|
7
|
+
reference_schema: Optional[dict[str, Any]] = Field(
|
8
|
+
default=None,
|
9
|
+
description="Optional schema defining the structure and types of the dictionary fields to validate the list of dictionaries against. Raise an error if one of the dictionaries does not match the schema.",
|
10
|
+
)
|
11
|
+
mode: Literal["direct", "aligned"] = Field(
|
12
|
+
default="direct",
|
13
|
+
description="The mode to use for the consensus. If 'direct', the consensus is computed directly from the list of dictionaries. If 'aligned', the consensus is computed from the aligned dictionaries.",
|
14
|
+
)
|
15
|
+
|
16
|
+
|
17
|
+
class ReconciliationResponse(BaseModel):
|
18
|
+
consensus_dict: dict = Field(description="The consensus dictionary containing the reconciled values from the input dictionaries.")
|
19
|
+
likelihoods: dict = Field(description="A dictionary containing the likelihood/confidence scores for each field in the consensus dictionary.")
|
@@ -1,17 +1,17 @@
|
|
1
1
|
import datetime
|
2
|
-
from typing import Any, Dict
|
2
|
+
from typing import Any, Dict
|
3
3
|
|
4
|
-
import nanoid # type: ignore
|
5
4
|
from pydantic import BaseModel, Field
|
6
5
|
|
7
6
|
from ..modalities import Modality
|
7
|
+
from ..browser_canvas import BrowserCanvas
|
8
8
|
|
9
9
|
|
10
10
|
class AnnotationParameters(BaseModel):
|
11
11
|
model: str
|
12
12
|
modality: Modality | None = "native"
|
13
13
|
image_resolution_dpi: int = 96
|
14
|
-
browser_canvas:
|
14
|
+
browser_canvas: BrowserCanvas = "A4"
|
15
15
|
temperature: float = 0.0
|
16
16
|
|
17
17
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import mimetypes
|
2
|
-
from typing import BinaryIO, Literal, Tuple
|
2
|
+
from typing import Any, BinaryIO, Literal, Tuple
|
3
3
|
|
4
|
-
from pydantic import BaseModel, ConfigDict, Field, HttpUrl,
|
4
|
+
from pydantic import BaseModel, ConfigDict, Field, HttpUrl, field_validator
|
5
5
|
|
6
6
|
|
7
7
|
class DBFile(BaseModel):
|
@@ -27,10 +27,12 @@ FileTuple = Tuple[str, FileData]
|
|
27
27
|
|
28
28
|
|
29
29
|
class FileLink(BaseModel):
|
30
|
-
download_url:
|
30
|
+
download_url: str = Field(description="The signed URL to download the file")
|
31
31
|
expires_in: str = Field(description="The expiration time of the signed URL")
|
32
32
|
filename: str = Field(description="The name of the file")
|
33
33
|
|
34
|
-
@
|
35
|
-
def
|
36
|
-
|
34
|
+
@field_validator("download_url", mode="after")
|
35
|
+
def validate_httpurl(cls, val: Any) -> Any:
|
36
|
+
if isinstance(val, str):
|
37
|
+
HttpUrl(val)
|
38
|
+
return val
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import base64
|
2
2
|
from io import BytesIO
|
3
|
-
from typing import List, Literal
|
3
|
+
from typing import Any, List, Literal
|
4
4
|
|
5
5
|
import PIL.Image
|
6
6
|
import requests
|
@@ -12,11 +12,12 @@ from pydantic import BaseModel, Field, computed_field
|
|
12
12
|
|
13
13
|
from ..._utils.chat import convert_to_anthropic_format, convert_to_google_genai_format, str_messages
|
14
14
|
from ..._utils.chat import convert_to_openai_format as convert_to_openai_completions_api_format
|
15
|
+
from ..._utils.display import count_image_tokens, count_text_tokens
|
15
16
|
from ..._utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
|
16
|
-
from ..._utils.display import count_text_tokens, count_image_tokens
|
17
17
|
from ..chat import ChatCompletionUiformMessage
|
18
18
|
from ..mime import MIMEData
|
19
19
|
from ..modalities import Modality
|
20
|
+
from ..browser_canvas import BrowserCanvas
|
20
21
|
|
21
22
|
MediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
|
22
23
|
|
@@ -26,19 +27,20 @@ class TokenCount(BaseModel):
|
|
26
27
|
developer_tokens: int = 0
|
27
28
|
user_tokens: int = 0
|
28
29
|
|
30
|
+
|
29
31
|
class DocumentCreateMessageRequest(BaseModel):
|
30
32
|
document: MIMEData = Field(description="The document to load.")
|
31
33
|
modality: Modality = Field(description="The modality of the document to load.")
|
32
34
|
image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
|
33
|
-
browser_canvas:
|
35
|
+
browser_canvas: BrowserCanvas = Field(
|
36
|
+
default="A4", description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
|
37
|
+
)
|
38
|
+
|
34
39
|
|
35
|
-
from typing import Any
|
36
40
|
class DocumentCreateInputRequest(DocumentCreateMessageRequest):
|
37
41
|
json_schema: dict[str, Any] = Field(description="The json schema to use for the document.")
|
38
42
|
|
39
43
|
|
40
|
-
|
41
|
-
|
42
44
|
class DocumentMessage(BaseModel):
|
43
45
|
id: str = Field(description="A unique identifier for the document loading.")
|
44
46
|
object: Literal["document_message"] = Field(default="document_message", description="The type of object being loaded.")
|
@@ -49,21 +51,21 @@ class DocumentMessage(BaseModel):
|
|
49
51
|
@computed_field
|
50
52
|
def token_count(self) -> TokenCount:
|
51
53
|
"""Returns the token count for the document message.
|
52
|
-
|
54
|
+
|
53
55
|
This property calculates token usage based on both text and image content
|
54
56
|
in the messages using the token counting utilities.
|
55
|
-
|
57
|
+
|
56
58
|
Returns:
|
57
59
|
TokenCount: A Pydantic model with total, user, and developer token counts.
|
58
60
|
"""
|
59
61
|
total_tokens = 0
|
60
62
|
user_tokens = 0
|
61
63
|
developer_tokens = 0
|
62
|
-
|
64
|
+
|
63
65
|
for msg in self.messages:
|
64
66
|
role = msg.get("role", "user")
|
65
67
|
msg_tokens = 0
|
66
|
-
|
68
|
+
|
67
69
|
if isinstance(msg["content"], str):
|
68
70
|
msg_tokens = count_text_tokens(msg["content"])
|
69
71
|
elif isinstance(msg["content"], list):
|
@@ -72,30 +74,26 @@ class DocumentMessage(BaseModel):
|
|
72
74
|
msg_tokens += count_text_tokens(content_item)
|
73
75
|
elif isinstance(content_item, dict):
|
74
76
|
item_type = content_item.get("type")
|
75
|
-
|
77
|
+
|
76
78
|
if item_type == "text" and "text" in content_item:
|
77
79
|
msg_tokens += count_text_tokens(content_item["text"])
|
78
|
-
|
80
|
+
|
79
81
|
elif item_type == "image_url" and "image_url" in content_item:
|
80
82
|
image_url = content_item["image_url"]["url"]
|
81
83
|
detail = content_item["image_url"].get("detail", "high")
|
82
84
|
msg_tokens += count_image_tokens(image_url, detail)
|
83
|
-
|
85
|
+
|
84
86
|
# Update total tokens
|
85
87
|
total_tokens += msg_tokens
|
86
|
-
|
88
|
+
|
87
89
|
# Update role-specific counts
|
88
90
|
assert role in ["user", "developer"], f"Invalid role: {role}"
|
89
91
|
if role == "user":
|
90
92
|
user_tokens += msg_tokens
|
91
93
|
elif role == "developer":
|
92
94
|
developer_tokens += msg_tokens
|
93
|
-
|
94
|
-
return TokenCount(
|
95
|
-
total_tokens=total_tokens,
|
96
|
-
user_tokens=user_tokens,
|
97
|
-
developer_tokens=developer_tokens
|
98
|
-
)
|
95
|
+
|
96
|
+
return TokenCount(total_tokens=total_tokens, user_tokens=user_tokens, developer_tokens=developer_tokens)
|
99
97
|
|
100
98
|
@property
|
101
99
|
def items(self) -> list[str | PIL.Image.Image]:
|
@@ -15,24 +15,23 @@ from openai.types.responses.response import Response
|
|
15
15
|
from openai.types.responses.response_input_param import ResponseInputItemParam
|
16
16
|
from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, computed_field, field_validator, model_validator
|
17
17
|
|
18
|
-
from ..._utils.usage.usage import compute_cost_from_model, compute_cost_from_model_with_breakdown
|
19
|
-
|
20
|
-
from ..._utils.ai_models import find_provider_from_model
|
21
|
-
from ..ai_models import AIProvider, Amount, get_model_card
|
18
|
+
from ..._utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
|
19
|
+
from ..ai_models import Amount
|
22
20
|
from ..chat import ChatCompletionUiformMessage
|
23
21
|
from ..mime import MIMEData
|
24
22
|
from ..modalities import Modality
|
23
|
+
from ..browser_canvas import BrowserCanvas
|
25
24
|
from ..standards import ErrorDetail, StreamingBaseModel
|
26
25
|
|
27
26
|
|
28
27
|
class DocumentExtractRequest(BaseModel):
|
29
28
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
30
|
-
|
31
|
-
|
29
|
+
document: MIMEData = Field(default=None, description="Document to be analyzed", deprecated=True) # type: ignore
|
30
|
+
documents: list[MIMEData] = Field(..., description="Documents to be analyzed (preferred over document)")
|
32
31
|
modality: Modality
|
33
32
|
image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
|
34
|
-
browser_canvas:
|
35
|
-
default=
|
33
|
+
browser_canvas: BrowserCanvas = Field(
|
34
|
+
default="A4", description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
|
36
35
|
)
|
37
36
|
model: str = Field(..., description="Model used for chat completion")
|
38
37
|
json_schema: dict[str, Any] = Field(..., description="JSON schema format used to validate the output data.")
|
@@ -54,6 +53,28 @@ class DocumentExtractRequest(BaseModel):
|
|
54
53
|
raise ValueError("n_consensus greater than 1 but temperature is 0")
|
55
54
|
return v
|
56
55
|
|
56
|
+
@model_validator(mode="before")
|
57
|
+
def validate_document_or_documents(cls, data: Any) -> Any:
|
58
|
+
# Handle both dict and model instance cases
|
59
|
+
if isinstance(data, dict):
|
60
|
+
if data.get("documents"): # If documents is set, it has higher priority than document
|
61
|
+
data["document"] = data["documents"][0]
|
62
|
+
elif data.get("document"):
|
63
|
+
data["documents"] = [data["document"]]
|
64
|
+
else:
|
65
|
+
raise ValueError("document or documents must be provided")
|
66
|
+
else:
|
67
|
+
# Handle model instance case
|
68
|
+
document = getattr(data, "document", None)
|
69
|
+
documents = getattr(data, "documents", None)
|
70
|
+
if documents:
|
71
|
+
setattr(data, "document", documents[0])
|
72
|
+
elif document:
|
73
|
+
setattr(data, "documents", [document])
|
74
|
+
else:
|
75
|
+
raise ValueError("document or documents must be provided")
|
76
|
+
return data
|
77
|
+
|
57
78
|
|
58
79
|
class ConsensusModel(BaseModel):
|
59
80
|
model: str = Field(description="Model name")
|
@@ -87,7 +108,7 @@ LikelihoodsSource = Literal["consensus", "log_probs"]
|
|
87
108
|
|
88
109
|
class UiParsedChatCompletion(ParsedChatCompletion):
|
89
110
|
extraction_id: str | None = None
|
90
|
-
choices: list[UiParsedChoice]
|
111
|
+
choices: list[UiParsedChoice] # type: ignore
|
91
112
|
# Additional metadata fields (UIForm)
|
92
113
|
likelihoods: Optional[dict[str, Any]] = Field(
|
93
114
|
default=None, description="Object defining the uncertainties of the fields extracted when using consensus. Follows the same structure as the extraction object."
|
@@ -147,19 +168,43 @@ class LogExtractionRequest(BaseModel):
|
|
147
168
|
# Validate that at least one of the messages, openai_messages, anthropic_messages is provided using model_validator
|
148
169
|
@model_validator(mode="before")
|
149
170
|
def validation(cls, data: Any) -> Any:
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
171
|
+
# Handle both dict and model instance cases
|
172
|
+
if isinstance(data, dict):
|
173
|
+
messages_candidates = [data.get("messages"), data.get("openai_messages"), data.get("anthropic_messages"), data.get("openai_responses_input")]
|
174
|
+
messages_candidates = [candidate for candidate in messages_candidates if candidate is not None]
|
175
|
+
if len(messages_candidates) != 1:
|
176
|
+
raise ValueError("Exactly one of the messages, openai_messages, anthropic_messages, openai_responses_input must be provided")
|
177
|
+
|
178
|
+
# Validate that if anthropic_messages is provided, anthropic_system_prompt is also provided
|
179
|
+
if data.get("anthropic_messages") is not None and data.get("anthropic_system_prompt") is None:
|
180
|
+
raise ValueError("anthropic_system_prompt must be provided if anthropic_messages is provided")
|
181
|
+
|
182
|
+
completion_candidates = [data.get("completion"), data.get("openai_responses_output")]
|
183
|
+
completion_candidates = [candidate for candidate in completion_candidates if candidate is not None]
|
184
|
+
if len(completion_candidates) != 1:
|
185
|
+
raise ValueError("Exactly one of completion, openai_responses_output must be provided")
|
186
|
+
else:
|
187
|
+
# Handle model instance case
|
188
|
+
messages_candidates = [
|
189
|
+
getattr(data, "messages", None),
|
190
|
+
getattr(data, "openai_messages", None),
|
191
|
+
getattr(data, "anthropic_messages", None),
|
192
|
+
getattr(data, "openai_responses_input", None),
|
193
|
+
]
|
194
|
+
messages_candidates = [candidate for candidate in messages_candidates if candidate is not None]
|
195
|
+
if len(messages_candidates) != 1:
|
196
|
+
raise ValueError("Exactly one of the messages, openai_messages, anthropic_messages, openai_responses_input must be provided")
|
197
|
+
|
198
|
+
# Validate that if anthropic_messages is provided, anthropic_system_prompt is also provided
|
199
|
+
anthropic_messages = getattr(data, "anthropic_messages", None)
|
200
|
+
anthropic_system_prompt = getattr(data, "anthropic_system_prompt", None)
|
201
|
+
if anthropic_messages is not None and anthropic_system_prompt is None:
|
202
|
+
raise ValueError("anthropic_system_prompt must be provided if anthropic_messages is provided")
|
203
|
+
|
204
|
+
completion_candidates = [getattr(data, "completion", None), getattr(data, "openai_responses_output", None)]
|
205
|
+
completion_candidates = [candidate for candidate in completion_candidates if candidate is not None]
|
206
|
+
if len(completion_candidates) != 1:
|
207
|
+
raise ValueError("Exactly one of completion, openai_responses_output must be provided")
|
163
208
|
|
164
209
|
return data
|
165
210
|
|
@@ -192,12 +237,12 @@ class UiParsedChoiceDeltaChunk(ChoiceDeltaChunk):
|
|
192
237
|
|
193
238
|
|
194
239
|
class UiParsedChoiceChunk(ChoiceChunk):
|
195
|
-
delta: UiParsedChoiceDeltaChunk
|
240
|
+
delta: UiParsedChoiceDeltaChunk # type: ignore
|
196
241
|
|
197
242
|
|
198
243
|
class UiParsedChatCompletionChunk(StreamingBaseModel, ChatCompletionChunk):
|
199
244
|
extraction_id: str | None = None
|
200
|
-
choices: list[UiParsedChoiceChunk]
|
245
|
+
choices: list[UiParsedChoiceChunk] # type: ignore
|
201
246
|
schema_validation_error: ErrorDetail | None = None
|
202
247
|
# Timestamps
|
203
248
|
request_at: datetime.datetime | None = Field(default=None, description="Timestamp of the request")
|
{uiform → retab}/types/evals.py
RENAMED
@@ -1,19 +1,17 @@
|
|
1
1
|
import copy
|
2
2
|
import datetime
|
3
3
|
import json
|
4
|
-
from typing import Any, List, Literal, Optional
|
4
|
+
from typing import Any, List, Literal, Optional
|
5
5
|
|
6
6
|
import nanoid # type: ignore
|
7
7
|
from pydantic import BaseModel, Field, computed_field
|
8
8
|
|
9
|
-
|
10
9
|
from .._utils.json_schema import clean_schema, compute_schema_data_id
|
11
10
|
from .._utils.mime import generate_blake2b_hash_from_string
|
12
|
-
from .ai_models import Amount
|
13
|
-
from .
|
11
|
+
from .ai_models import Amount
|
12
|
+
from .inference_settings import InferenceSettings
|
14
13
|
from .mime import MIMEData
|
15
14
|
|
16
|
-
|
17
15
|
# Define the type alias for MetricType
|
18
16
|
MetricType = Literal["levenshtein", "jaccard", "hamming"]
|
19
17
|
|
@@ -132,6 +130,7 @@ class UpdateEvaluationRequest(BaseModel):
|
|
132
130
|
json_schema: Optional[dict[str, Any]] = Field(default=None, description="The json schema of the evaluation")
|
133
131
|
|
134
132
|
project_id: Optional[str] = Field(default=None, description="The ID of the project")
|
133
|
+
default_inference_settings: Optional[InferenceSettings] = Field(default=None, description="The default inference properties for the evaluation (mostly used in the frontend)")
|
135
134
|
|
136
135
|
@computed_field # type: ignore
|
137
136
|
@property
|
@@ -165,6 +164,7 @@ class Evaluation(BaseModel):
|
|
165
164
|
updated_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(tz=datetime.timezone.utc))
|
166
165
|
|
167
166
|
name: str
|
167
|
+
old_documents: list[EvaluationDocument] | None = None
|
168
168
|
documents: list[EvaluationDocument]
|
169
169
|
iterations: list[Iteration]
|
170
170
|
json_schema: dict[str, Any]
|
@@ -0,0 +1,31 @@
|
|
1
|
+
from .model import Evaluation, CreateEvaluation, PatchEvaluationRequest, ListEvaluationParams
|
2
|
+
from .documents import AnnotatedDocument, DocumentItem, EvaluationDocument, CreateEvaluationDocumentRequest, PatchEvaluationDocumentRequest
|
3
|
+
from .iterations import (
|
4
|
+
Iteration,
|
5
|
+
CreateIterationRequest,
|
6
|
+
PatchIterationRequest,
|
7
|
+
ProcessIterationRequest,
|
8
|
+
DocumentStatus,
|
9
|
+
IterationDocumentStatusResponse,
|
10
|
+
AddIterationFromJsonlRequest,
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
__all__ = [
|
15
|
+
"Evaluation",
|
16
|
+
"CreateEvaluation",
|
17
|
+
"PatchEvaluationRequest",
|
18
|
+
"ListEvaluationParams",
|
19
|
+
"AnnotatedDocument",
|
20
|
+
"DocumentItem",
|
21
|
+
"EvaluationDocument",
|
22
|
+
"CreateEvaluationDocumentRequest",
|
23
|
+
"PatchEvaluationDocumentRequest",
|
24
|
+
"Iteration",
|
25
|
+
"CreateIterationRequest",
|
26
|
+
"PatchIterationRequest",
|
27
|
+
"ProcessIterationRequest",
|
28
|
+
"DocumentStatus",
|
29
|
+
"IterationDocumentStatusResponse",
|
30
|
+
"AddIterationFromJsonlRequest",
|
31
|
+
]
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from typing import Any, Optional
|
2
|
+
|
3
|
+
from pydantic import BaseModel, Field
|
4
|
+
|
5
|
+
from ..mime import MIMEData
|
6
|
+
from ..predictions import PredictionMetadata
|
7
|
+
|
8
|
+
|
9
|
+
class AnnotatedDocument(BaseModel):
|
10
|
+
mime_data: MIMEData = Field(
|
11
|
+
description="The mime data of the document. Can also be a BaseMIMEData, which is why we have this id field (to be able to identify the file, but id is equal to mime_data.id)"
|
12
|
+
)
|
13
|
+
annotation: dict[str, Any] = Field(default={}, description="The ground truth of the document")
|
14
|
+
|
15
|
+
|
16
|
+
class DocumentItem(AnnotatedDocument):
|
17
|
+
annotation_metadata: Optional[PredictionMetadata] = Field(default=None, description="The metadata of the annotation when the annotation is a prediction")
|
18
|
+
|
19
|
+
|
20
|
+
class EvaluationDocument(DocumentItem):
|
21
|
+
id: str = Field(description="The ID of the document. Equal to mime_data.id but robust to the case where mime_data is a BaseMIMEData")
|
22
|
+
|
23
|
+
|
24
|
+
class CreateEvaluationDocumentRequest(DocumentItem):
|
25
|
+
pass
|
26
|
+
|
27
|
+
|
28
|
+
class PatchEvaluationDocumentRequest(BaseModel):
|
29
|
+
annotation: Optional[dict[str, Any]] = Field(default=None, description="The ground truth of the document")
|
30
|
+
annotation_metadata: Optional[PredictionMetadata] = Field(default=None, description="The metadata of the annotation when the annotation is a prediction")
|