retab 0.0.35__py3-none-any.whl → 0.0.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. {uiform → retab}/_utils/ai_models.py +2 -2
  2. {uiform → retab}/_utils/benchmarking.py +15 -16
  3. {uiform → retab}/_utils/chat.py +9 -14
  4. {uiform → retab}/_utils/display.py +0 -3
  5. {uiform → retab}/_utils/json_schema.py +9 -14
  6. {uiform → retab}/_utils/mime.py +11 -14
  7. {uiform → retab}/_utils/responses.py +9 -3
  8. {uiform → retab}/_utils/stream_context_managers.py +1 -1
  9. {uiform → retab}/_utils/usage/usage.py +28 -28
  10. {uiform → retab}/client.py +32 -31
  11. {uiform → retab}/resources/consensus/client.py +17 -36
  12. {uiform → retab}/resources/consensus/completions.py +24 -47
  13. {uiform → retab}/resources/consensus/completions_stream.py +26 -38
  14. {uiform → retab}/resources/consensus/responses.py +31 -80
  15. {uiform → retab}/resources/consensus/responses_stream.py +31 -79
  16. {uiform → retab}/resources/documents/client.py +59 -45
  17. {uiform → retab}/resources/documents/extractions.py +181 -90
  18. {uiform → retab}/resources/evals.py +56 -43
  19. retab/resources/evaluations/__init__.py +3 -0
  20. retab/resources/evaluations/client.py +301 -0
  21. retab/resources/evaluations/documents.py +233 -0
  22. retab/resources/evaluations/iterations.py +452 -0
  23. {uiform → retab}/resources/files.py +2 -2
  24. {uiform → retab}/resources/jsonlUtils.py +220 -216
  25. retab/resources/models.py +73 -0
  26. retab/resources/processors/automations/client.py +244 -0
  27. {uiform → retab}/resources/processors/automations/endpoints.py +77 -118
  28. retab/resources/processors/automations/links.py +294 -0
  29. {uiform → retab}/resources/processors/automations/logs.py +30 -19
  30. {uiform → retab}/resources/processors/automations/mailboxes.py +136 -174
  31. retab/resources/processors/automations/outlook.py +337 -0
  32. {uiform → retab}/resources/processors/automations/tests.py +22 -25
  33. {uiform → retab}/resources/processors/client.py +179 -164
  34. {uiform → retab}/resources/schemas.py +78 -66
  35. {uiform → retab}/resources/secrets/external_api_keys.py +1 -5
  36. retab/resources/secrets/webhook.py +64 -0
  37. {uiform → retab}/resources/usage.py +39 -2
  38. {uiform → retab}/types/ai_models.py +13 -13
  39. {uiform → retab}/types/automations/cron.py +19 -12
  40. {uiform → retab}/types/automations/endpoints.py +7 -4
  41. {uiform → retab}/types/automations/links.py +7 -3
  42. {uiform → retab}/types/automations/mailboxes.py +9 -9
  43. {uiform → retab}/types/automations/outlook.py +15 -11
  44. retab/types/browser_canvas.py +3 -0
  45. {uiform → retab}/types/chat.py +2 -2
  46. {uiform → retab}/types/completions.py +9 -12
  47. retab/types/consensus.py +19 -0
  48. {uiform → retab}/types/db/annotations.py +3 -3
  49. {uiform → retab}/types/db/files.py +8 -6
  50. {uiform → retab}/types/documents/create_messages.py +18 -20
  51. {uiform → retab}/types/documents/extractions.py +69 -24
  52. {uiform → retab}/types/evals.py +5 -5
  53. retab/types/evaluations/__init__.py +31 -0
  54. retab/types/evaluations/documents.py +30 -0
  55. retab/types/evaluations/iterations.py +112 -0
  56. retab/types/evaluations/model.py +73 -0
  57. retab/types/events.py +79 -0
  58. {uiform → retab}/types/extractions.py +33 -10
  59. retab/types/inference_settings.py +15 -0
  60. retab/types/jobs/base.py +54 -0
  61. retab/types/jobs/batch_annotation.py +12 -0
  62. {uiform → retab}/types/jobs/evaluation.py +1 -2
  63. {uiform → retab}/types/logs.py +37 -34
  64. retab/types/metrics.py +32 -0
  65. {uiform → retab}/types/mime.py +22 -20
  66. {uiform → retab}/types/modalities.py +10 -10
  67. retab/types/predictions.py +19 -0
  68. {uiform → retab}/types/schemas/enhance.py +4 -2
  69. {uiform → retab}/types/schemas/evaluate.py +7 -4
  70. {uiform → retab}/types/schemas/generate.py +6 -3
  71. {uiform → retab}/types/schemas/layout.py +1 -1
  72. {uiform → retab}/types/schemas/object.py +13 -14
  73. {uiform → retab}/types/schemas/templates.py +1 -3
  74. {uiform → retab}/types/secrets/external_api_keys.py +0 -1
  75. {uiform → retab}/types/standards.py +18 -1
  76. {retab-0.0.35.dist-info → retab-0.0.37.dist-info}/METADATA +7 -6
  77. retab-0.0.37.dist-info/RECORD +107 -0
  78. retab-0.0.37.dist-info/top_level.txt +1 -0
  79. retab-0.0.35.dist-info/RECORD +0 -111
  80. retab-0.0.35.dist-info/top_level.txt +0 -1
  81. uiform/_utils/benchmarking copy.py +0 -588
  82. uiform/resources/deployments/__init__.py +0 -9
  83. uiform/resources/deployments/client.py +0 -78
  84. uiform/resources/deployments/endpoints.py +0 -322
  85. uiform/resources/deployments/links.py +0 -452
  86. uiform/resources/deployments/logs.py +0 -211
  87. uiform/resources/deployments/mailboxes.py +0 -496
  88. uiform/resources/deployments/outlook.py +0 -531
  89. uiform/resources/deployments/tests.py +0 -158
  90. uiform/resources/models.py +0 -45
  91. uiform/resources/processors/automations/client.py +0 -78
  92. uiform/resources/processors/automations/links.py +0 -356
  93. uiform/resources/processors/automations/outlook.py +0 -444
  94. uiform/resources/secrets/webhook.py +0 -62
  95. uiform/types/consensus.py +0 -10
  96. uiform/types/deployments/cron.py +0 -59
  97. uiform/types/deployments/endpoints.py +0 -28
  98. uiform/types/deployments/links.py +0 -36
  99. uiform/types/deployments/mailboxes.py +0 -67
  100. uiform/types/deployments/outlook.py +0 -76
  101. uiform/types/deployments/webhooks.py +0 -21
  102. uiform/types/events.py +0 -76
  103. uiform/types/jobs/base.py +0 -150
  104. uiform/types/jobs/batch_annotation.py +0 -22
  105. uiform/types/secrets/__init__.py +0 -0
  106. {uiform → retab}/__init__.py +0 -0
  107. {uiform → retab}/_resource.py +0 -0
  108. {uiform → retab}/_utils/__init__.py +0 -0
  109. {uiform → retab}/_utils/usage/__init__.py +0 -0
  110. {uiform → retab}/py.typed +0 -0
  111. {uiform → retab}/resources/__init__.py +0 -0
  112. {uiform → retab}/resources/consensus/__init__.py +0 -0
  113. {uiform → retab}/resources/documents/__init__.py +0 -0
  114. {uiform → retab}/resources/finetuning.py +0 -0
  115. {uiform → retab}/resources/openai_example.py +0 -0
  116. {uiform → retab}/resources/processors/__init__.py +0 -0
  117. {uiform → retab}/resources/processors/automations/__init__.py +0 -0
  118. {uiform → retab}/resources/prompt_optimization.py +0 -0
  119. {uiform → retab}/resources/secrets/__init__.py +0 -0
  120. {uiform → retab}/resources/secrets/client.py +0 -0
  121. {uiform → retab}/types/__init__.py +0 -0
  122. {uiform → retab}/types/automations/__init__.py +0 -0
  123. {uiform → retab}/types/automations/webhooks.py +0 -0
  124. {uiform → retab}/types/db/__init__.py +0 -0
  125. {uiform/types/deployments → retab/types/documents}/__init__.py +0 -0
  126. {uiform → retab}/types/documents/correct_orientation.py +0 -0
  127. {uiform/types/documents → retab/types/jobs}/__init__.py +0 -0
  128. {uiform → retab}/types/jobs/finetune.py +0 -0
  129. {uiform → retab}/types/jobs/prompt_optimization.py +0 -0
  130. {uiform → retab}/types/jobs/webcrawl.py +0 -0
  131. {uiform → retab}/types/pagination.py +0 -0
  132. {uiform/types/jobs → retab/types/schemas}/__init__.py +0 -0
  133. {uiform/types/schemas → retab/types/secrets}/__init__.py +0 -0
  134. {retab-0.0.35.dist-info → retab-0.0.37.dist-info}/WHEEL +0 -0
@@ -1,10 +1,9 @@
1
1
  import re
2
- from typing import Any, Dict, List, Literal, Optional
2
+ from typing import Any, Dict, List, Optional
3
3
 
4
4
  import nanoid # type: ignore
5
- from pydantic import BaseModel, EmailStr, Field, field_validator, model_validator
5
+ from pydantic import BaseModel, EmailStr, Field, computed_field, field_validator
6
6
 
7
- from ..._utils.json_schema import convert_schema_to_layout
8
7
  from ..logs import AutomationConfig, UpdateAutomationRequest
9
8
  from ..pagination import ListMetadata
10
9
 
@@ -29,7 +28,11 @@ class FetchParams(BaseModel):
29
28
 
30
29
 
31
30
  class Outlook(AutomationConfig):
32
- object: Literal['automation.outlook'] = "automation.outlook"
31
+ @computed_field
32
+ @property
33
+ def object(self) -> str:
34
+ return "automation.outlook"
35
+
33
36
  id: str = Field(default_factory=lambda: "outlook_" + nanoid.generate(), description="Unique identifier for the outlook")
34
37
 
35
38
  authorized_domains: list[str] = Field(default_factory=list, description="List of authorized domains to receive the emails from")
@@ -41,12 +44,13 @@ class Outlook(AutomationConfig):
41
44
  match_params: List[MatchParams] = Field(default_factory=list, description="List of match parameters for the outlook automation")
42
45
  fetch_params: List[FetchParams] = Field(default_factory=list, description="List of fetch parameters for the outlook automation")
43
46
 
44
- @model_validator(mode='before')
45
- @classmethod
46
- def compute_layout_schema(cls, values: dict[str, Any]) -> dict[str, Any]:
47
- if values.get('layout_schema') is None:
48
- values['layout_schema'] = convert_schema_to_layout(values['json_schema'])
49
- return values
47
+ # @model_validator(mode="before")
48
+ # @classmethod
49
+ # def compute_layout_schema(cls, values: dict[str, Any]) -> dict[str, Any]:
50
+ # if values.get("layout_schema") is None:
51
+ # values["layout_schema"] = convert_schema_to_layout(values["json_schema"])
52
+ # return values
53
+
50
54
 
51
55
  class ListOutlooks(BaseModel):
52
56
  data: list[Outlook]
@@ -65,4 +69,4 @@ class UpdateOutlookRequest(UpdateAutomationRequest):
65
69
 
66
70
  @field_validator("authorized_emails", mode="before")
67
71
  def normalize_authorized_emails(cls, emails: Optional[List[str]]) -> Optional[List[str]]:
68
- return [email.strip().lower() for email in emails] if emails else None
72
+ return [email.strip().lower() for email in emails] if emails else None
@@ -0,0 +1,3 @@
1
+ from typing import Literal
2
+
3
+ BrowserCanvas = Literal["A3", "A4", "A5"]
@@ -1,8 +1,8 @@
1
- from typing import Iterable, Literal, TypedDict, Union
1
+ from typing import Literal, TypedDict, Union
2
2
 
3
3
  from openai.types.chat.chat_completion_content_part_param import ChatCompletionContentPartParam
4
4
 
5
5
 
6
6
  class ChatCompletionUiformMessage(TypedDict): # homemade replacement for ChatCompletionMessageParam because iterable messes the serialization with pydantic
7
- role: Literal['user', 'system', 'assistant', 'developer']
7
+ role: Literal["user", "system", "assistant", "developer"]
8
8
  content: Union[str, list[ChatCompletionContentPartParam]]
@@ -1,6 +1,10 @@
1
- from typing import Any
1
+ from typing import Any, Optional, Union
2
2
 
3
3
  from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
4
+ from openai.types.responses.response_input_param import ResponseInputParam
5
+ from openai.types.responses.response_text_config_param import ResponseTextConfigParam
6
+ from openai.types.shared_params.reasoning import Reasoning
7
+ from openai.types.shared_params.response_format_json_schema import ResponseFormatJSONSchema
4
8
  from pydantic import BaseModel, ConfigDict, Field
5
9
 
6
10
  from .._utils.ai_models import find_provider_from_model
@@ -8,8 +12,6 @@ from .ai_models import AIProvider
8
12
  from .chat import ChatCompletionUiformMessage
9
13
 
10
14
 
11
- from openai.types.shared_params.response_format_json_schema import ResponseFormatJSONSchema
12
-
13
15
  class UiChatCompletionsRequest(BaseModel):
14
16
  model_config = ConfigDict(arbitrary_types_allowed=True)
15
17
  model: str = Field(..., description="Model used for chat completion")
@@ -35,8 +37,6 @@ class UiChatCompletionsRequest(BaseModel):
35
37
  return find_provider_from_model(self.model)
36
38
 
37
39
 
38
-
39
-
40
40
  class UiChatCompletionsParseRequest(BaseModel):
41
41
  model_config = ConfigDict(arbitrary_types_allowed=True)
42
42
  model: str = Field(..., description="Model used for chat completion")
@@ -61,10 +61,6 @@ class UiChatCompletionsParseRequest(BaseModel):
61
61
  """
62
62
  return find_provider_from_model(self.model)
63
63
 
64
- from typing import Optional, Union
65
- from openai.types.shared_params.reasoning import Reasoning
66
- from openai.types.responses.response_input_param import ResponseInputParam
67
- from openai.types.responses.response_text_config_param import ResponseTextConfigParam
68
64
 
69
65
  class UiChatResponseCreateRequest(BaseModel):
70
66
  input: Union[str, ResponseInputParam] = Field(..., description="Input to be parsed")
@@ -73,8 +69,10 @@ class UiChatResponseCreateRequest(BaseModel):
73
69
  model_config = ConfigDict(arbitrary_types_allowed=True)
74
70
  model: str = Field(..., description="Model used for chat completion")
75
71
  temperature: Optional[float] = Field(default=0.0, description="Temperature for sampling. If not provided, the default temperature for the model will be used.", examples=[0.0])
76
- reasoning: Optional[Reasoning] = Field(default=None, description="The effort level for the model to reason about the input data. If not provided, the default reasoning effort for the model will be used.")
77
-
72
+ reasoning: Optional[Reasoning] = Field(
73
+ default=None, description="The effort level for the model to reason about the input data. If not provided, the default reasoning effort for the model will be used."
74
+ )
75
+
78
76
  stream: Optional[bool] = Field(default=False, description="If true, the extraction will be streamed to the user using the active WebSocket connection")
79
77
  seed: int | None = Field(default=None, description="Seed for the random number generator. If not provided, a random seed will be generated.", examples=[None])
80
78
  text: ResponseTextConfigParam = Field(default={"format": {"type": "text"}}, description="Format of the response")
@@ -90,4 +88,3 @@ class UiChatResponseCreateRequest(BaseModel):
90
88
  AIProvider: The AI provider corresponding to the given model.
91
89
  """
92
90
  return find_provider_from_model(self.model)
93
-
@@ -0,0 +1,19 @@
1
+ from typing import Any, Literal, Optional
2
+ from pydantic import BaseModel, Field
3
+
4
+
5
+ class ReconciliationRequest(BaseModel):
6
+ list_dicts: list[dict] = Field(description="List of dictionaries that will be reconciled into a single consensus dictionary.")
7
+ reference_schema: Optional[dict[str, Any]] = Field(
8
+ default=None,
9
+ description="Optional schema defining the structure and types of the dictionary fields to validate the list of dictionaries against. Raise an error if one of the dictionaries does not match the schema.",
10
+ )
11
+ mode: Literal["direct", "aligned"] = Field(
12
+ default="direct",
13
+ description="The mode to use for the consensus. If 'direct', the consensus is computed directly from the list of dictionaries. If 'aligned', the consensus is computed from the aligned dictionaries.",
14
+ )
15
+
16
+
17
+ class ReconciliationResponse(BaseModel):
18
+ consensus_dict: dict = Field(description="The consensus dictionary containing the reconciled values from the input dictionaries.")
19
+ likelihoods: dict = Field(description="A dictionary containing the likelihood/confidence scores for each field in the consensus dictionary.")
@@ -1,17 +1,17 @@
1
1
  import datetime
2
- from typing import Any, Dict, Literal
2
+ from typing import Any, Dict
3
3
 
4
- import nanoid # type: ignore
5
4
  from pydantic import BaseModel, Field
6
5
 
7
6
  from ..modalities import Modality
7
+ from ..browser_canvas import BrowserCanvas
8
8
 
9
9
 
10
10
  class AnnotationParameters(BaseModel):
11
11
  model: str
12
12
  modality: Modality | None = "native"
13
13
  image_resolution_dpi: int = 96
14
- browser_canvas: Literal['A3', 'A4', 'A5'] = 'A4'
14
+ browser_canvas: BrowserCanvas = "A4"
15
15
  temperature: float = 0.0
16
16
 
17
17
 
@@ -1,7 +1,7 @@
1
1
  import mimetypes
2
- from typing import BinaryIO, Literal, Tuple
2
+ from typing import Any, BinaryIO, Literal, Tuple
3
3
 
4
- from pydantic import BaseModel, ConfigDict, Field, HttpUrl, field_serializer
4
+ from pydantic import BaseModel, ConfigDict, Field, HttpUrl, field_validator
5
5
 
6
6
 
7
7
  class DBFile(BaseModel):
@@ -27,10 +27,12 @@ FileTuple = Tuple[str, FileData]
27
27
 
28
28
 
29
29
  class FileLink(BaseModel):
30
- download_url: HttpUrl = Field(description="The signed URL to download the file")
30
+ download_url: str = Field(description="The signed URL to download the file")
31
31
  expires_in: str = Field(description="The expiration time of the signed URL")
32
32
  filename: str = Field(description="The name of the file")
33
33
 
34
- @field_serializer('download_url')
35
- def url2str(self, val: HttpUrl) -> str:
36
- return str(val)
34
+ @field_validator("download_url", mode="after")
35
+ def validate_httpurl(cls, val: Any) -> Any:
36
+ if isinstance(val, str):
37
+ HttpUrl(val)
38
+ return val
@@ -1,6 +1,6 @@
1
1
  import base64
2
2
  from io import BytesIO
3
- from typing import List, Literal, Dict, Union
3
+ from typing import Any, List, Literal
4
4
 
5
5
  import PIL.Image
6
6
  import requests
@@ -12,11 +12,12 @@ from pydantic import BaseModel, Field, computed_field
12
12
 
13
13
  from ..._utils.chat import convert_to_anthropic_format, convert_to_google_genai_format, str_messages
14
14
  from ..._utils.chat import convert_to_openai_format as convert_to_openai_completions_api_format
15
+ from ..._utils.display import count_image_tokens, count_text_tokens
15
16
  from ..._utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
16
- from ..._utils.display import count_text_tokens, count_image_tokens
17
17
  from ..chat import ChatCompletionUiformMessage
18
18
  from ..mime import MIMEData
19
19
  from ..modalities import Modality
20
+ from ..browser_canvas import BrowserCanvas
20
21
 
21
22
  MediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
22
23
 
@@ -26,19 +27,20 @@ class TokenCount(BaseModel):
26
27
  developer_tokens: int = 0
27
28
  user_tokens: int = 0
28
29
 
30
+
29
31
  class DocumentCreateMessageRequest(BaseModel):
30
32
  document: MIMEData = Field(description="The document to load.")
31
33
  modality: Modality = Field(description="The modality of the document to load.")
32
34
  image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
33
- browser_canvas: Literal['A3', 'A4', 'A5'] = Field(default='A4', description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type.")
35
+ browser_canvas: BrowserCanvas = Field(
36
+ default="A4", description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
37
+ )
38
+
34
39
 
35
- from typing import Any
36
40
  class DocumentCreateInputRequest(DocumentCreateMessageRequest):
37
41
  json_schema: dict[str, Any] = Field(description="The json schema to use for the document.")
38
42
 
39
43
 
40
-
41
-
42
44
  class DocumentMessage(BaseModel):
43
45
  id: str = Field(description="A unique identifier for the document loading.")
44
46
  object: Literal["document_message"] = Field(default="document_message", description="The type of object being loaded.")
@@ -49,21 +51,21 @@ class DocumentMessage(BaseModel):
49
51
  @computed_field
50
52
  def token_count(self) -> TokenCount:
51
53
  """Returns the token count for the document message.
52
-
54
+
53
55
  This property calculates token usage based on both text and image content
54
56
  in the messages using the token counting utilities.
55
-
57
+
56
58
  Returns:
57
59
  TokenCount: A Pydantic model with total, user, and developer token counts.
58
60
  """
59
61
  total_tokens = 0
60
62
  user_tokens = 0
61
63
  developer_tokens = 0
62
-
64
+
63
65
  for msg in self.messages:
64
66
  role = msg.get("role", "user")
65
67
  msg_tokens = 0
66
-
68
+
67
69
  if isinstance(msg["content"], str):
68
70
  msg_tokens = count_text_tokens(msg["content"])
69
71
  elif isinstance(msg["content"], list):
@@ -72,30 +74,26 @@ class DocumentMessage(BaseModel):
72
74
  msg_tokens += count_text_tokens(content_item)
73
75
  elif isinstance(content_item, dict):
74
76
  item_type = content_item.get("type")
75
-
77
+
76
78
  if item_type == "text" and "text" in content_item:
77
79
  msg_tokens += count_text_tokens(content_item["text"])
78
-
80
+
79
81
  elif item_type == "image_url" and "image_url" in content_item:
80
82
  image_url = content_item["image_url"]["url"]
81
83
  detail = content_item["image_url"].get("detail", "high")
82
84
  msg_tokens += count_image_tokens(image_url, detail)
83
-
85
+
84
86
  # Update total tokens
85
87
  total_tokens += msg_tokens
86
-
88
+
87
89
  # Update role-specific counts
88
90
  assert role in ["user", "developer"], f"Invalid role: {role}"
89
91
  if role == "user":
90
92
  user_tokens += msg_tokens
91
93
  elif role == "developer":
92
94
  developer_tokens += msg_tokens
93
-
94
- return TokenCount(
95
- total_tokens=total_tokens,
96
- user_tokens=user_tokens,
97
- developer_tokens=developer_tokens
98
- )
95
+
96
+ return TokenCount(total_tokens=total_tokens, user_tokens=user_tokens, developer_tokens=developer_tokens)
99
97
 
100
98
  @property
101
99
  def items(self) -> list[str | PIL.Image.Image]:
@@ -15,24 +15,23 @@ from openai.types.responses.response import Response
15
15
  from openai.types.responses.response_input_param import ResponseInputItemParam
16
16
  from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, computed_field, field_validator, model_validator
17
17
 
18
- from ..._utils.usage.usage import compute_cost_from_model, compute_cost_from_model_with_breakdown, CostBreakdown
19
-
20
- from ..._utils.ai_models import find_provider_from_model
21
- from ..ai_models import AIProvider, Amount, get_model_card
18
+ from ..._utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
19
+ from ..ai_models import Amount
22
20
  from ..chat import ChatCompletionUiformMessage
23
21
  from ..mime import MIMEData
24
22
  from ..modalities import Modality
23
+ from ..browser_canvas import BrowserCanvas
25
24
  from ..standards import ErrorDetail, StreamingBaseModel
26
25
 
27
26
 
28
27
  class DocumentExtractRequest(BaseModel):
29
28
  model_config = ConfigDict(arbitrary_types_allowed=True)
30
-
31
- document: MIMEData = Field(..., description="Document to be analyzed")
29
+ document: MIMEData = Field(default=None, description="Document to be analyzed", deprecated=True) # type: ignore
30
+ documents: list[MIMEData] = Field(..., description="Documents to be analyzed (preferred over document)")
32
31
  modality: Modality
33
32
  image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
34
- browser_canvas: Literal['A3', 'A4', 'A5'] = Field(
35
- default='A4', description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
33
+ browser_canvas: BrowserCanvas = Field(
34
+ default="A4", description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
36
35
  )
37
36
  model: str = Field(..., description="Model used for chat completion")
38
37
  json_schema: dict[str, Any] = Field(..., description="JSON schema format used to validate the output data.")
@@ -54,6 +53,28 @@ class DocumentExtractRequest(BaseModel):
54
53
  raise ValueError("n_consensus greater than 1 but temperature is 0")
55
54
  return v
56
55
 
56
+ @model_validator(mode="before")
57
+ def validate_document_or_documents(cls, data: Any) -> Any:
58
+ # Handle both dict and model instance cases
59
+ if isinstance(data, dict):
60
+ if data.get("documents"): # If documents is set, it has higher priority than document
61
+ data["document"] = data["documents"][0]
62
+ elif data.get("document"):
63
+ data["documents"] = [data["document"]]
64
+ else:
65
+ raise ValueError("document or documents must be provided")
66
+ else:
67
+ # Handle model instance case
68
+ document = getattr(data, "document", None)
69
+ documents = getattr(data, "documents", None)
70
+ if documents:
71
+ setattr(data, "document", documents[0])
72
+ elif document:
73
+ setattr(data, "documents", [document])
74
+ else:
75
+ raise ValueError("document or documents must be provided")
76
+ return data
77
+
57
78
 
58
79
  class ConsensusModel(BaseModel):
59
80
  model: str = Field(description="Model name")
@@ -87,7 +108,7 @@ LikelihoodsSource = Literal["consensus", "log_probs"]
87
108
 
88
109
  class UiParsedChatCompletion(ParsedChatCompletion):
89
110
  extraction_id: str | None = None
90
- choices: list[UiParsedChoice]
111
+ choices: list[UiParsedChoice] # type: ignore
91
112
  # Additional metadata fields (UIForm)
92
113
  likelihoods: Optional[dict[str, Any]] = Field(
93
114
  default=None, description="Object defining the uncertainties of the fields extracted when using consensus. Follows the same structure as the extraction object."
@@ -147,19 +168,43 @@ class LogExtractionRequest(BaseModel):
147
168
  # Validate that at least one of the messages, openai_messages, anthropic_messages is provided using model_validator
148
169
  @model_validator(mode="before")
149
170
  def validation(cls, data: Any) -> Any:
150
- messages_candidates = [data.get("messages"), data.get("openai_messages"), data.get("anthropic_messages"), data.get("openai_responses_input")]
151
- messages_candidates = [candidate for candidate in messages_candidates if candidate is not None]
152
- if len(messages_candidates) != 1:
153
- raise ValueError("Exactly one of the messages, openai_messages, anthropic_messages, openai_responses_input must be provided")
154
-
155
- # Validate that if anthropic_messages is provided, anthropic_system_prompt is also provided
156
- if data.get("anthropic_messages") is not None and data.get("anthropic_system_prompt") is None:
157
- raise ValueError("anthropic_system_prompt must be provided if anthropic_messages is provided")
158
-
159
- completion_candidates = [data.get("completion"), data.get("openai_responses_output")]
160
- completion_candidates = [candidate for candidate in completion_candidates if candidate is not None]
161
- if len(completion_candidates) != 1:
162
- raise ValueError("Exactly one of completion, openai_responses_output must be provided")
171
+ # Handle both dict and model instance cases
172
+ if isinstance(data, dict):
173
+ messages_candidates = [data.get("messages"), data.get("openai_messages"), data.get("anthropic_messages"), data.get("openai_responses_input")]
174
+ messages_candidates = [candidate for candidate in messages_candidates if candidate is not None]
175
+ if len(messages_candidates) != 1:
176
+ raise ValueError("Exactly one of the messages, openai_messages, anthropic_messages, openai_responses_input must be provided")
177
+
178
+ # Validate that if anthropic_messages is provided, anthropic_system_prompt is also provided
179
+ if data.get("anthropic_messages") is not None and data.get("anthropic_system_prompt") is None:
180
+ raise ValueError("anthropic_system_prompt must be provided if anthropic_messages is provided")
181
+
182
+ completion_candidates = [data.get("completion"), data.get("openai_responses_output")]
183
+ completion_candidates = [candidate for candidate in completion_candidates if candidate is not None]
184
+ if len(completion_candidates) != 1:
185
+ raise ValueError("Exactly one of completion, openai_responses_output must be provided")
186
+ else:
187
+ # Handle model instance case
188
+ messages_candidates = [
189
+ getattr(data, "messages", None),
190
+ getattr(data, "openai_messages", None),
191
+ getattr(data, "anthropic_messages", None),
192
+ getattr(data, "openai_responses_input", None),
193
+ ]
194
+ messages_candidates = [candidate for candidate in messages_candidates if candidate is not None]
195
+ if len(messages_candidates) != 1:
196
+ raise ValueError("Exactly one of the messages, openai_messages, anthropic_messages, openai_responses_input must be provided")
197
+
198
+ # Validate that if anthropic_messages is provided, anthropic_system_prompt is also provided
199
+ anthropic_messages = getattr(data, "anthropic_messages", None)
200
+ anthropic_system_prompt = getattr(data, "anthropic_system_prompt", None)
201
+ if anthropic_messages is not None and anthropic_system_prompt is None:
202
+ raise ValueError("anthropic_system_prompt must be provided if anthropic_messages is provided")
203
+
204
+ completion_candidates = [getattr(data, "completion", None), getattr(data, "openai_responses_output", None)]
205
+ completion_candidates = [candidate for candidate in completion_candidates if candidate is not None]
206
+ if len(completion_candidates) != 1:
207
+ raise ValueError("Exactly one of completion, openai_responses_output must be provided")
163
208
 
164
209
  return data
165
210
 
@@ -192,12 +237,12 @@ class UiParsedChoiceDeltaChunk(ChoiceDeltaChunk):
192
237
 
193
238
 
194
239
  class UiParsedChoiceChunk(ChoiceChunk):
195
- delta: UiParsedChoiceDeltaChunk
240
+ delta: UiParsedChoiceDeltaChunk # type: ignore
196
241
 
197
242
 
198
243
  class UiParsedChatCompletionChunk(StreamingBaseModel, ChatCompletionChunk):
199
244
  extraction_id: str | None = None
200
- choices: list[UiParsedChoiceChunk]
245
+ choices: list[UiParsedChoiceChunk] # type: ignore
201
246
  schema_validation_error: ErrorDetail | None = None
202
247
  # Timestamps
203
248
  request_at: datetime.datetime | None = Field(default=None, description="Timestamp of the request")
@@ -1,19 +1,17 @@
1
1
  import copy
2
2
  import datetime
3
3
  import json
4
- from typing import Any, List, Literal, Optional, Union
4
+ from typing import Any, List, Literal, Optional
5
5
 
6
6
  import nanoid # type: ignore
7
7
  from pydantic import BaseModel, Field, computed_field
8
8
 
9
-
10
9
  from .._utils.json_schema import clean_schema, compute_schema_data_id
11
10
  from .._utils.mime import generate_blake2b_hash_from_string
12
- from .ai_models import Amount, LLMModel
13
- from .jobs.base import InferenceSettings
11
+ from .ai_models import Amount
12
+ from .inference_settings import InferenceSettings
14
13
  from .mime import MIMEData
15
14
 
16
-
17
15
  # Define the type alias for MetricType
18
16
  MetricType = Literal["levenshtein", "jaccard", "hamming"]
19
17
 
@@ -132,6 +130,7 @@ class UpdateEvaluationRequest(BaseModel):
132
130
  json_schema: Optional[dict[str, Any]] = Field(default=None, description="The json schema of the evaluation")
133
131
 
134
132
  project_id: Optional[str] = Field(default=None, description="The ID of the project")
133
+ default_inference_settings: Optional[InferenceSettings] = Field(default=None, description="The default inference properties for the evaluation (mostly used in the frontend)")
135
134
 
136
135
  @computed_field # type: ignore
137
136
  @property
@@ -165,6 +164,7 @@ class Evaluation(BaseModel):
165
164
  updated_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(tz=datetime.timezone.utc))
166
165
 
167
166
  name: str
167
+ old_documents: list[EvaluationDocument] | None = None
168
168
  documents: list[EvaluationDocument]
169
169
  iterations: list[Iteration]
170
170
  json_schema: dict[str, Any]
@@ -0,0 +1,31 @@
1
+ from .model import Evaluation, CreateEvaluation, PatchEvaluationRequest, ListEvaluationParams
2
+ from .documents import AnnotatedDocument, DocumentItem, EvaluationDocument, CreateEvaluationDocumentRequest, PatchEvaluationDocumentRequest
3
+ from .iterations import (
4
+ Iteration,
5
+ CreateIterationRequest,
6
+ PatchIterationRequest,
7
+ ProcessIterationRequest,
8
+ DocumentStatus,
9
+ IterationDocumentStatusResponse,
10
+ AddIterationFromJsonlRequest,
11
+ )
12
+
13
+
14
+ __all__ = [
15
+ "Evaluation",
16
+ "CreateEvaluation",
17
+ "PatchEvaluationRequest",
18
+ "ListEvaluationParams",
19
+ "AnnotatedDocument",
20
+ "DocumentItem",
21
+ "EvaluationDocument",
22
+ "CreateEvaluationDocumentRequest",
23
+ "PatchEvaluationDocumentRequest",
24
+ "Iteration",
25
+ "CreateIterationRequest",
26
+ "PatchIterationRequest",
27
+ "ProcessIterationRequest",
28
+ "DocumentStatus",
29
+ "IterationDocumentStatusResponse",
30
+ "AddIterationFromJsonlRequest",
31
+ ]
@@ -0,0 +1,30 @@
1
+ from typing import Any, Optional
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from ..mime import MIMEData
6
+ from ..predictions import PredictionMetadata
7
+
8
+
9
+ class AnnotatedDocument(BaseModel):
10
+ mime_data: MIMEData = Field(
11
+ description="The mime data of the document. Can also be a BaseMIMEData, which is why we have this id field (to be able to identify the file, but id is equal to mime_data.id)"
12
+ )
13
+ annotation: dict[str, Any] = Field(default={}, description="The ground truth of the document")
14
+
15
+
16
+ class DocumentItem(AnnotatedDocument):
17
+ annotation_metadata: Optional[PredictionMetadata] = Field(default=None, description="The metadata of the annotation when the annotation is a prediction")
18
+
19
+
20
+ class EvaluationDocument(DocumentItem):
21
+ id: str = Field(description="The ID of the document. Equal to mime_data.id but robust to the case where mime_data is a BaseMIMEData")
22
+
23
+
24
+ class CreateEvaluationDocumentRequest(DocumentItem):
25
+ pass
26
+
27
+
28
+ class PatchEvaluationDocumentRequest(BaseModel):
29
+ annotation: Optional[dict[str, Any]] = Field(default=None, description="The ground truth of the document")
30
+ annotation_metadata: Optional[PredictionMetadata] = Field(default=None, description="The metadata of the annotation when the annotation is a prediction")