retab 0.0.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. retab-0.0.35.dist-info/METADATA +417 -0
  2. retab-0.0.35.dist-info/RECORD +111 -0
  3. retab-0.0.35.dist-info/WHEEL +5 -0
  4. retab-0.0.35.dist-info/top_level.txt +1 -0
  5. uiform/__init__.py +4 -0
  6. uiform/_resource.py +28 -0
  7. uiform/_utils/__init__.py +0 -0
  8. uiform/_utils/ai_models.py +100 -0
  9. uiform/_utils/benchmarking copy.py +588 -0
  10. uiform/_utils/benchmarking.py +485 -0
  11. uiform/_utils/chat.py +332 -0
  12. uiform/_utils/display.py +443 -0
  13. uiform/_utils/json_schema.py +2161 -0
  14. uiform/_utils/mime.py +168 -0
  15. uiform/_utils/responses.py +163 -0
  16. uiform/_utils/stream_context_managers.py +52 -0
  17. uiform/_utils/usage/__init__.py +0 -0
  18. uiform/_utils/usage/usage.py +300 -0
  19. uiform/client.py +701 -0
  20. uiform/py.typed +0 -0
  21. uiform/resources/__init__.py +0 -0
  22. uiform/resources/consensus/__init__.py +3 -0
  23. uiform/resources/consensus/client.py +114 -0
  24. uiform/resources/consensus/completions.py +252 -0
  25. uiform/resources/consensus/completions_stream.py +278 -0
  26. uiform/resources/consensus/responses.py +325 -0
  27. uiform/resources/consensus/responses_stream.py +373 -0
  28. uiform/resources/deployments/__init__.py +9 -0
  29. uiform/resources/deployments/client.py +78 -0
  30. uiform/resources/deployments/endpoints.py +322 -0
  31. uiform/resources/deployments/links.py +452 -0
  32. uiform/resources/deployments/logs.py +211 -0
  33. uiform/resources/deployments/mailboxes.py +496 -0
  34. uiform/resources/deployments/outlook.py +531 -0
  35. uiform/resources/deployments/tests.py +158 -0
  36. uiform/resources/documents/__init__.py +3 -0
  37. uiform/resources/documents/client.py +255 -0
  38. uiform/resources/documents/extractions.py +441 -0
  39. uiform/resources/evals.py +812 -0
  40. uiform/resources/files.py +24 -0
  41. uiform/resources/finetuning.py +62 -0
  42. uiform/resources/jsonlUtils.py +1046 -0
  43. uiform/resources/models.py +45 -0
  44. uiform/resources/openai_example.py +22 -0
  45. uiform/resources/processors/__init__.py +3 -0
  46. uiform/resources/processors/automations/__init__.py +9 -0
  47. uiform/resources/processors/automations/client.py +78 -0
  48. uiform/resources/processors/automations/endpoints.py +317 -0
  49. uiform/resources/processors/automations/links.py +356 -0
  50. uiform/resources/processors/automations/logs.py +211 -0
  51. uiform/resources/processors/automations/mailboxes.py +435 -0
  52. uiform/resources/processors/automations/outlook.py +444 -0
  53. uiform/resources/processors/automations/tests.py +158 -0
  54. uiform/resources/processors/client.py +474 -0
  55. uiform/resources/prompt_optimization.py +76 -0
  56. uiform/resources/schemas.py +369 -0
  57. uiform/resources/secrets/__init__.py +9 -0
  58. uiform/resources/secrets/client.py +20 -0
  59. uiform/resources/secrets/external_api_keys.py +109 -0
  60. uiform/resources/secrets/webhook.py +62 -0
  61. uiform/resources/usage.py +271 -0
  62. uiform/types/__init__.py +0 -0
  63. uiform/types/ai_models.py +645 -0
  64. uiform/types/automations/__init__.py +0 -0
  65. uiform/types/automations/cron.py +58 -0
  66. uiform/types/automations/endpoints.py +21 -0
  67. uiform/types/automations/links.py +28 -0
  68. uiform/types/automations/mailboxes.py +60 -0
  69. uiform/types/automations/outlook.py +68 -0
  70. uiform/types/automations/webhooks.py +21 -0
  71. uiform/types/chat.py +8 -0
  72. uiform/types/completions.py +93 -0
  73. uiform/types/consensus.py +10 -0
  74. uiform/types/db/__init__.py +0 -0
  75. uiform/types/db/annotations.py +24 -0
  76. uiform/types/db/files.py +36 -0
  77. uiform/types/deployments/__init__.py +0 -0
  78. uiform/types/deployments/cron.py +59 -0
  79. uiform/types/deployments/endpoints.py +28 -0
  80. uiform/types/deployments/links.py +36 -0
  81. uiform/types/deployments/mailboxes.py +67 -0
  82. uiform/types/deployments/outlook.py +76 -0
  83. uiform/types/deployments/webhooks.py +21 -0
  84. uiform/types/documents/__init__.py +0 -0
  85. uiform/types/documents/correct_orientation.py +13 -0
  86. uiform/types/documents/create_messages.py +226 -0
  87. uiform/types/documents/extractions.py +297 -0
  88. uiform/types/evals.py +207 -0
  89. uiform/types/events.py +76 -0
  90. uiform/types/extractions.py +85 -0
  91. uiform/types/jobs/__init__.py +0 -0
  92. uiform/types/jobs/base.py +150 -0
  93. uiform/types/jobs/batch_annotation.py +22 -0
  94. uiform/types/jobs/evaluation.py +133 -0
  95. uiform/types/jobs/finetune.py +6 -0
  96. uiform/types/jobs/prompt_optimization.py +41 -0
  97. uiform/types/jobs/webcrawl.py +6 -0
  98. uiform/types/logs.py +231 -0
  99. uiform/types/mime.py +257 -0
  100. uiform/types/modalities.py +68 -0
  101. uiform/types/pagination.py +6 -0
  102. uiform/types/schemas/__init__.py +0 -0
  103. uiform/types/schemas/enhance.py +53 -0
  104. uiform/types/schemas/evaluate.py +55 -0
  105. uiform/types/schemas/generate.py +32 -0
  106. uiform/types/schemas/layout.py +58 -0
  107. uiform/types/schemas/object.py +631 -0
  108. uiform/types/schemas/templates.py +107 -0
  109. uiform/types/secrets/__init__.py +0 -0
  110. uiform/types/secrets/external_api_keys.py +22 -0
  111. uiform/types/standards.py +39 -0
@@ -0,0 +1,58 @@
1
+ import datetime
2
+ from typing import Any, Literal, Optional
3
+
4
+ import nanoid # type: ignore
5
+ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
6
+ from pydantic import BaseModel, Field, HttpUrl, field_serializer
7
+
8
+ from ..modalities import Modality
9
+
10
+
11
+ def scrapping_action(link: HttpUrl) -> dict[str, Any]:
12
+ raise NotImplementedError("Scrapping action not implemented")
13
+
14
+
15
+ class CronSchedule(BaseModel):
16
+ second: Optional[int] = Field(0, ge=0, le=59, description="Second (0-59), defaults to 0")
17
+ minute: int = Field(..., ge=0, le=59, description="Minute (0-59)")
18
+ hour: int = Field(..., ge=0, le=23, description="Hour (0-23)")
19
+ day_of_month: Optional[int] = Field(None, ge=1, le=31, description="Day of the month (1-31), None means any day")
20
+ month: Optional[int] = Field(None, ge=1, le=12, description="Month (1-12), None means every month")
21
+ day_of_week: Optional[int] = Field(None, ge=0, le=6, description="Day of the week (0-6, Sunday = 0), None means any day")
22
+
23
+ def to_cron_string(self) -> str:
24
+ return f"{self.second or '*'} {self.minute} {self.hour} {self.day_of_month or '*'} {self.month or '*'} {self.day_of_week or '*'}"
25
+
26
+
27
+ from ..logs import AutomationConfig
28
+
29
+
30
+ class ScrappingConfig(AutomationConfig):
31
+ object: Literal['automation.scrapping_cron'] = "automation.scrapping_cron"
32
+ id: str = Field(default_factory=lambda: "scrapping_" + nanoid.generate(), description="Unique identifier for the scrapping job")
33
+
34
+ # Scrapping Specific Config
35
+ link: HttpUrl = Field(..., description="Link to be scrapped")
36
+ schedule: CronSchedule
37
+
38
+ updated_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(datetime.timezone.utc))
39
+
40
+ # HTTP Config
41
+ webhook_url: HttpUrl = Field(..., description="Url of the webhook to send the data to")
42
+ webhook_headers: dict[str, str] = Field(default_factory=dict, description="Headers to send with the request")
43
+
44
+ modality: Modality
45
+ image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
46
+ browser_canvas: Literal['A3', 'A4', 'A5'] = Field(default='A4', description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type.")
47
+
48
+ # New attributes
49
+ model: str = Field(..., description="Model used for chat completion")
50
+ json_schema: dict[str, Any] = Field(..., description="JSON schema format used to validate the output data.")
51
+ temperature: float = Field(default=0.0, description="Temperature for sampling. If not provided, the default temperature for the model will be used.", examples=[0.0])
52
+ reasoning_effort: ChatCompletionReasoningEffort = Field(
53
+ default="medium", description="The effort level for the model to reason about the input data. If not provided, the default reasoning effort for the model will be used."
54
+ )
55
+
56
+ @field_serializer('webhook_url', 'link')
57
+ def url2str(self, val: HttpUrl) -> str:
58
+ return str(val)
@@ -0,0 +1,21 @@
1
+ from typing import Literal
2
+
3
+ import nanoid # type: ignore
4
+ from pydantic import BaseModel, Field
5
+
6
+ from ..logs import AutomationConfig, UpdateAutomationRequest
7
+ from ..pagination import ListMetadata
8
+
9
+
10
+ class Endpoint(AutomationConfig):
11
+ object: Literal['automation.endpoint'] = "automation.endpoint"
12
+ id: str = Field(default_factory=lambda: "endp_" + nanoid.generate(), description="Unique identifier for the extraction endpoint")
13
+
14
+ class ListEndpoints(BaseModel):
15
+ data: list[Endpoint]
16
+ list_metadata: ListMetadata
17
+
18
+
19
+ # Inherits from the methods of UpdateAutomationRequest
20
+ class UpdateEndpointRequest(UpdateAutomationRequest):
21
+ pass
@@ -0,0 +1,28 @@
1
+ from typing import Literal, Optional
2
+
3
+ import nanoid # type: ignore
4
+ from pydantic import BaseModel, Field
5
+
6
+ from ..logs import AutomationConfig, UpdateAutomationRequest
7
+ from ..pagination import ListMetadata
8
+
9
+
10
+ class Link(AutomationConfig):
11
+ object: Literal['automation.link'] = "automation.link"
12
+ id: str = Field(default_factory=lambda: "lnk_" + nanoid.generate(), description="Unique identifier for the extraction link")
13
+
14
+ # Link Specific Config
15
+ password: Optional[str] = Field(None, description="Password to access the link")
16
+
17
+
18
+ class ListLinks(BaseModel):
19
+ data: list[Link]
20
+ list_metadata: ListMetadata
21
+
22
+
23
+ # Inherits from the methods of UpdateAutomationRequest
24
+ class UpdateLinkRequest(UpdateAutomationRequest):
25
+ # ------------------------------
26
+ # Link Config
27
+ # ------------------------------
28
+ password: Optional[str] = None
@@ -0,0 +1,60 @@
1
+ import os
2
+ import re
3
+ from typing import ClassVar, List, Literal, Optional
4
+
5
+ import nanoid # type: ignore
6
+ from pydantic import BaseModel, EmailStr, Field, field_validator
7
+
8
+ from ..pagination import ListMetadata
9
+
10
+ domain_pattern = re.compile(r"^(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}$")
11
+
12
+ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
13
+
14
+ from ..logs import AutomationConfig, UpdateAutomationRequest
15
+
16
+
17
+ class Mailbox(AutomationConfig):
18
+ EMAIL_PATTERN: ClassVar[str] = f".*@{os.getenv('EMAIL_DOMAIN', 'mailbox.uiform.com')}$"
19
+ object: Literal['automation.mailbox'] = "automation.mailbox"
20
+ id: str = Field(default_factory=lambda: "mb_" + nanoid.generate(), description="Unique identifier for the mailbox")
21
+
22
+ # Email Specific config
23
+ email: str = Field(..., pattern=EMAIL_PATTERN)
24
+ authorized_domains: list[str] = Field(default_factory=list, description="List of authorized domains to receive the emails from")
25
+ authorized_emails: List[EmailStr] = Field(default_factory=list, description="List of emails to access the link")
26
+
27
+ # Normalize email fields (case-insensitive)
28
+ @field_validator("email", mode="before")
29
+ def normalize_email(cls, value: str) -> str:
30
+ return value.strip().lower()
31
+
32
+ @field_validator("authorized_emails", mode="before")
33
+ def normalize_authorized_emails(cls, emails: List[str]) -> List[str]:
34
+ return [email.strip().lower() for email in emails]
35
+
36
+ @field_validator('authorized_domains', mode='before')
37
+ def validate_domain(cls, list_domains: list[str]) -> list[str]:
38
+ for domain in list_domains:
39
+ if not domain_pattern.match(domain):
40
+ raise ValueError(f"Invalid domain: {domain}")
41
+ return list_domains
42
+
43
+
44
+ class ListMailboxes(BaseModel):
45
+ data: list[Mailbox]
46
+ list_metadata: ListMetadata
47
+
48
+
49
+ # Inherits from the methods of UpdateAutomationRequest
50
+ class UpdateMailboxRequest(UpdateAutomationRequest):
51
+
52
+ # ------------------------------
53
+ # Email Specific config
54
+ # ------------------------------
55
+ authorized_domains: Optional[list[str]] = None
56
+ authorized_emails: Optional[List[EmailStr]] = None
57
+
58
+ @field_validator("authorized_emails", mode="before")
59
+ def normalize_authorized_emails(cls, emails: Optional[List[str]]) -> Optional[List[str]]:
60
+ return [email.strip().lower() for email in emails] if emails else None
@@ -0,0 +1,68 @@
1
+ import re
2
+ from typing import Any, Dict, List, Literal, Optional
3
+
4
+ import nanoid # type: ignore
5
+ from pydantic import BaseModel, EmailStr, Field, field_validator, model_validator
6
+
7
+ from ..._utils.json_schema import convert_schema_to_layout
8
+ from ..logs import AutomationConfig, UpdateAutomationRequest
9
+ from ..pagination import ListMetadata
10
+
11
+ domain_pattern = re.compile(r"^(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}$")
12
+
13
+
14
+ class AutomationLevel(BaseModel):
15
+ distance_threshold: float = Field(default=0.9, description="Distance threshold for the automation")
16
+ score_threshold: float = Field(default=0.9, description="Score threshold for the automation")
17
+
18
+
19
+ class MatchParams(BaseModel):
20
+ endpoint: str = Field(..., description="Endpoint for matching parameters")
21
+ headers: Dict[str, str] = Field(..., description="Headers for the request")
22
+ path: str = Field(..., description="Path for matching parameters")
23
+
24
+
25
+ class FetchParams(BaseModel):
26
+ endpoint: str = Field(..., description="Endpoint for fetching parameters")
27
+ headers: Dict[str, str] = Field(..., description="Headers for the request")
28
+ name: str = Field(..., description="Name of the fetch parameter")
29
+
30
+
31
+ class Outlook(AutomationConfig):
32
+ object: Literal['automation.outlook'] = "automation.outlook"
33
+ id: str = Field(default_factory=lambda: "outlook_" + nanoid.generate(), description="Unique identifier for the outlook")
34
+
35
+ authorized_domains: list[str] = Field(default_factory=list, description="List of authorized domains to receive the emails from")
36
+ authorized_emails: List[EmailStr] = Field(default_factory=list, description="List of emails to access the link")
37
+
38
+ layout_schema: Optional[dict[str, Any]] = Field(default=None, description="Layout schema format used to display the data")
39
+
40
+ # Optional Fields for data integration
41
+ match_params: List[MatchParams] = Field(default_factory=list, description="List of match parameters for the outlook automation")
42
+ fetch_params: List[FetchParams] = Field(default_factory=list, description="List of fetch parameters for the outlook automation")
43
+
44
+ @model_validator(mode='before')
45
+ @classmethod
46
+ def compute_layout_schema(cls, values: dict[str, Any]) -> dict[str, Any]:
47
+ if values.get('layout_schema') is None:
48
+ values['layout_schema'] = convert_schema_to_layout(values['json_schema'])
49
+ return values
50
+
51
+ class ListOutlooks(BaseModel):
52
+ data: list[Outlook]
53
+ list_metadata: ListMetadata
54
+
55
+
56
+ # Inherits from the methods of UpdateAutomationRequest
57
+ class UpdateOutlookRequest(UpdateAutomationRequest):
58
+ authorized_domains: Optional[list[str]] = None
59
+ authorized_emails: Optional[List[EmailStr]] = None
60
+
61
+ match_params: Optional[List[MatchParams]] = None
62
+ fetch_params: Optional[List[FetchParams]] = None
63
+
64
+ layout_schema: Optional[dict[str, Any]] = None
65
+
66
+ @field_validator("authorized_emails", mode="before")
67
+ def normalize_authorized_emails(cls, emails: Optional[List[str]]) -> Optional[List[str]]:
68
+ return [email.strip().lower() for email in emails] if emails else None
@@ -0,0 +1,21 @@
1
+ from typing import Any, Optional
2
+
3
+ from pydantic import BaseModel, EmailStr
4
+
5
+ from uiform.types.documents.extractions import UiParsedChatCompletion
6
+
7
+ from ..mime import BaseMIMEData, MIMEData
8
+
9
+
10
+ class WebhookRequest(BaseModel):
11
+ completion: UiParsedChatCompletion
12
+ user: Optional[EmailStr] = None
13
+ file_payload: MIMEData
14
+ metadata: Optional[dict[str, Any]] = None
15
+
16
+
17
+ class BaseWebhookRequest(BaseModel):
18
+ completion: UiParsedChatCompletion
19
+ user: Optional[EmailStr] = None
20
+ file_payload: BaseMIMEData
21
+ metadata: Optional[dict[str, Any]] = None
uiform/types/chat.py ADDED
@@ -0,0 +1,8 @@
1
+ from typing import Iterable, Literal, TypedDict, Union
2
+
3
+ from openai.types.chat.chat_completion_content_part_param import ChatCompletionContentPartParam
4
+
5
+
6
+ class ChatCompletionUiformMessage(TypedDict): # homemade replacement for ChatCompletionMessageParam because iterable messes the serialization with pydantic
7
+ role: Literal['user', 'system', 'assistant', 'developer']
8
+ content: Union[str, list[ChatCompletionContentPartParam]]
@@ -0,0 +1,93 @@
1
+ from typing import Any
2
+
3
+ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
4
+ from pydantic import BaseModel, ConfigDict, Field
5
+
6
+ from .._utils.ai_models import find_provider_from_model
7
+ from .ai_models import AIProvider
8
+ from .chat import ChatCompletionUiformMessage
9
+
10
+
11
+ from openai.types.shared_params.response_format_json_schema import ResponseFormatJSONSchema
12
+
13
+ class UiChatCompletionsRequest(BaseModel):
14
+ model_config = ConfigDict(arbitrary_types_allowed=True)
15
+ model: str = Field(..., description="Model used for chat completion")
16
+ messages: list[ChatCompletionUiformMessage] = Field(..., description="Messages to be parsed")
17
+ response_format: ResponseFormatJSONSchema = Field(..., description="response format used to validate the output data.")
18
+ temperature: float = Field(default=0.0, description="Temperature for sampling. If not provided, the default temperature for the model will be used.", examples=[0.0])
19
+ reasoning_effort: ChatCompletionReasoningEffort = Field(
20
+ default="medium", description="The effort level for the model to reason about the input data. If not provided, the default reasoning effort for the model will be used."
21
+ )
22
+ # Regular fields
23
+ stream: bool = Field(default=False, description="If true, the extraction will be streamed to the user using the active WebSocket connection")
24
+ seed: int | None = Field(default=None, description="Seed for the random number generator. If not provided, a random seed will be generated.", examples=[None])
25
+ n_consensus: int = Field(default=1, description="Number of consensus models to use for extraction. If greater than 1 the temperature cannot be 0.")
26
+
27
+ @property
28
+ def provider(self) -> AIProvider:
29
+ """
30
+ Determines the AI provider based on the model specified.
31
+
32
+ Returns:
33
+ AIProvider: The AI provider corresponding to the given model.
34
+ """
35
+ return find_provider_from_model(self.model)
36
+
37
+
38
+
39
+
40
+ class UiChatCompletionsParseRequest(BaseModel):
41
+ model_config = ConfigDict(arbitrary_types_allowed=True)
42
+ model: str = Field(..., description="Model used for chat completion")
43
+ messages: list[ChatCompletionUiformMessage] = Field(..., description="Messages to be parsed")
44
+ json_schema: dict[str, Any] = Field(..., description="JSON schema format used to validate the output data.")
45
+ temperature: float = Field(default=0.0, description="Temperature for sampling. If not provided, the default temperature for the model will be used.", examples=[0.0])
46
+ reasoning_effort: ChatCompletionReasoningEffort = Field(
47
+ default="medium", description="The effort level for the model to reason about the input data. If not provided, the default reasoning effort for the model will be used."
48
+ )
49
+ # Regular fields
50
+ stream: bool = Field(default=False, description="If true, the extraction will be streamed to the user using the active WebSocket connection")
51
+ seed: int | None = Field(default=None, description="Seed for the random number generator. If not provided, a random seed will be generated.", examples=[None])
52
+ n_consensus: int = Field(default=1, description="Number of consensus models to use for extraction. If greater than 1 the temperature cannot be 0.")
53
+
54
+ @property
55
+ def provider(self) -> AIProvider:
56
+ """
57
+ Determines the AI provider based on the model specified.
58
+
59
+ Returns:
60
+ AIProvider: The AI provider corresponding to the given model.
61
+ """
62
+ return find_provider_from_model(self.model)
63
+
64
+ from typing import Optional, Union
65
+ from openai.types.shared_params.reasoning import Reasoning
66
+ from openai.types.responses.response_input_param import ResponseInputParam
67
+ from openai.types.responses.response_text_config_param import ResponseTextConfigParam
68
+
69
+ class UiChatResponseCreateRequest(BaseModel):
70
+ input: Union[str, ResponseInputParam] = Field(..., description="Input to be parsed")
71
+ instructions: Optional[str] = None
72
+
73
+ model_config = ConfigDict(arbitrary_types_allowed=True)
74
+ model: str = Field(..., description="Model used for chat completion")
75
+ temperature: Optional[float] = Field(default=0.0, description="Temperature for sampling. If not provided, the default temperature for the model will be used.", examples=[0.0])
76
+ reasoning: Optional[Reasoning] = Field(default=None, description="The effort level for the model to reason about the input data. If not provided, the default reasoning effort for the model will be used.")
77
+
78
+ stream: Optional[bool] = Field(default=False, description="If true, the extraction will be streamed to the user using the active WebSocket connection")
79
+ seed: int | None = Field(default=None, description="Seed for the random number generator. If not provided, a random seed will be generated.", examples=[None])
80
+ text: ResponseTextConfigParam = Field(default={"format": {"type": "text"}}, description="Format of the response")
81
+
82
+ n_consensus: int = Field(default=1, description="Number of consensus models to use for extraction. If greater than 1 the temperature cannot be 0.")
83
+
84
+ @property
85
+ def provider(self) -> AIProvider:
86
+ """
87
+ Determines the AI provider based on the model specified.
88
+
89
+ Returns:
90
+ AIProvider: The AI provider corresponding to the given model.
91
+ """
92
+ return find_provider_from_model(self.model)
93
+
@@ -0,0 +1,10 @@
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class ReconciliationResponse(BaseModel):
5
+ consensus_dict: dict = Field(
6
+ description="The consensus dictionary containing the reconciled values from the input dictionaries."
7
+ )
8
+ likelihoods: dict = Field(
9
+ description="A dictionary containing the likelihood/confidence scores for each field in the consensus dictionary."
10
+ )
File without changes
@@ -0,0 +1,24 @@
1
+ import datetime
2
+ from typing import Any, Dict, Literal
3
+
4
+ import nanoid # type: ignore
5
+ from pydantic import BaseModel, Field
6
+
7
+ from ..modalities import Modality
8
+
9
+
10
+ class AnnotationParameters(BaseModel):
11
+ model: str
12
+ modality: Modality | None = "native"
13
+ image_resolution_dpi: int = 96
14
+ browser_canvas: Literal['A3', 'A4', 'A5'] = 'A4'
15
+ temperature: float = 0.0
16
+
17
+
18
+ class Annotation(BaseModel):
19
+ file_id: str = Field(description="ID of the file that the annotation belongs to")
20
+ parameters: AnnotationParameters = Field(description="Parameters used for the annotation")
21
+ data: Dict[str, Any] = Field(default_factory=dict, description="Data of the annotation")
22
+ schema_id: str = Field(description="ID of the schema used for the annotation")
23
+ organization_id: str = Field(description="ID of the organization that owns the annotation")
24
+ updated_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(datetime.timezone.utc), description="Timestamp for when the annotation was last updated")
@@ -0,0 +1,36 @@
1
+ import mimetypes
2
+ from typing import BinaryIO, Literal, Tuple
3
+
4
+ from pydantic import BaseModel, ConfigDict, Field, HttpUrl, field_serializer
5
+
6
+
7
+ class DBFile(BaseModel):
8
+ """Represents the core file object in your new spec."""
9
+
10
+ object: Literal["file"] = "file"
11
+ id: str = Field(..., description="The unique identifier of the file. It is generated from the content 'file_{sha256(content)}'")
12
+ filename: str = Field(..., description="The name of the file")
13
+
14
+ @property
15
+ def mime_type(self) -> str:
16
+ return mimetypes.guess_type(self.filename)[0] or "application/octet-stream"
17
+
18
+ @property
19
+ def extension(self) -> str:
20
+ return self.filename.split(".")[-1].lower()
21
+
22
+ model_config = ConfigDict(arbitrary_types_allowed=True)
23
+
24
+
25
+ FileData = Tuple[str, BinaryIO, str]
26
+ FileTuple = Tuple[str, FileData]
27
+
28
+
29
+ class FileLink(BaseModel):
30
+ download_url: HttpUrl = Field(description="The signed URL to download the file")
31
+ expires_in: str = Field(description="The expiration time of the signed URL")
32
+ filename: str = Field(description="The name of the file")
33
+
34
+ @field_serializer('download_url')
35
+ def url2str(self, val: HttpUrl) -> str:
36
+ return str(val)
File without changes
@@ -0,0 +1,59 @@
1
+ import datetime
2
+ from typing import Any, Literal, Optional
3
+
4
+ import nanoid # type: ignore
5
+ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
6
+ from pydantic import BaseModel, Field, HttpUrl, field_serializer
7
+ from pydantic_core import Url
8
+
9
+ from ..modalities import Modality
10
+
11
+
12
+ def scrapping_action(link: HttpUrl) -> dict[str, Any]:
13
+ raise NotImplementedError("Scrapping action not implemented")
14
+
15
+
16
+ class CronSchedule(BaseModel):
17
+ second: Optional[int] = Field(0, ge=0, le=59, description="Second (0-59), defaults to 0")
18
+ minute: int = Field(..., ge=0, le=59, description="Minute (0-59)")
19
+ hour: int = Field(..., ge=0, le=23, description="Hour (0-23)")
20
+ day_of_month: Optional[int] = Field(None, ge=1, le=31, description="Day of the month (1-31), None means any day")
21
+ month: Optional[int] = Field(None, ge=1, le=12, description="Month (1-12), None means every month")
22
+ day_of_week: Optional[int] = Field(None, ge=0, le=6, description="Day of the week (0-6, Sunday = 0), None means any day")
23
+
24
+ def to_cron_string(self) -> str:
25
+ return f"{self.second or '*'} {self.minute} {self.hour} {self.day_of_month or '*'} {self.month or '*'} {self.day_of_week or '*'}"
26
+
27
+
28
+ from ..logs import AutomationConfig
29
+
30
+
31
+ class ScrappingConfig(AutomationConfig):
32
+ object: Literal['deployment.scrapping_cron'] = "deployment.scrapping_cron"
33
+ id: str = Field(default_factory=lambda: "scrapping_" + nanoid.generate(), description="Unique identifier for the scrapping job")
34
+
35
+ # Scrapping Specific Config
36
+ link: HttpUrl = Field(..., description="Link to be scrapped")
37
+ schedule: CronSchedule
38
+
39
+ updated_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(datetime.timezone.utc))
40
+
41
+ # HTTP Config
42
+ webhook_url: HttpUrl = Field(..., description="Url of the webhook to send the data to")
43
+ webhook_headers: dict[str, str] = Field(default_factory=dict, description="Headers to send with the request")
44
+
45
+ modality: Modality
46
+ image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
47
+ browser_canvas: Literal['A3', 'A4', 'A5'] = Field(default='A4', description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type.")
48
+
49
+ # New attributes
50
+ model: str = Field(..., description="Model used for chat completion")
51
+ json_schema: dict[str, Any] = Field(..., description="JSON schema format used to validate the output data.")
52
+ temperature: float = Field(default=0.0, description="Temperature for sampling. If not provided, the default temperature for the model will be used.", examples=[0.0])
53
+ reasoning_effort: ChatCompletionReasoningEffort = Field(
54
+ default="medium", description="The effort level for the model to reason about the input data. If not provided, the default reasoning effort for the model will be used."
55
+ )
56
+
57
+ @field_serializer('webhook_url', 'link')
58
+ def url2str(self, val: HttpUrl) -> str:
59
+ return str(val)
@@ -0,0 +1,28 @@
1
+ import copy
2
+ import datetime
3
+ import json
4
+ from typing import Any, Literal, Optional
5
+
6
+ import nanoid # type: ignore
7
+ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
8
+ from pydantic import BaseModel, Field, HttpUrl, computed_field, field_serializer
9
+
10
+ from ..._utils.json_schema import clean_schema
11
+ from ..._utils.mime import generate_blake2b_hash_from_string
12
+ from ..logs import AutomationConfig, UpdateAutomationRequest
13
+ from ..modalities import Modality
14
+ from ..pagination import ListMetadata
15
+
16
+
17
+ class Endpoint(AutomationConfig):
18
+ object: Literal['deployment.endpoint'] = "deployment.endpoint"
19
+ id: str = Field(default_factory=lambda: "endp_" + nanoid.generate(), description="Unique identifier for the extraction endpoint")
20
+
21
+ class ListEndpoints(BaseModel):
22
+ data: list[Endpoint]
23
+ list_metadata: ListMetadata
24
+
25
+
26
+ # Inherits from the methods of UpdateAutomationRequest
27
+ class UpdateEndpointRequest(UpdateAutomationRequest):
28
+ pass
@@ -0,0 +1,36 @@
1
+ import copy
2
+ import datetime
3
+ import json
4
+ from typing import Any, Dict, Literal, Optional
5
+
6
+ import nanoid # type: ignore
7
+ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
8
+ from pydantic import BaseModel, Field, HttpUrl, computed_field, field_serializer
9
+ from pydantic_core import Url
10
+
11
+ from ..._utils.json_schema import clean_schema
12
+ from ..._utils.mime import generate_blake2b_hash_from_string
13
+ from ..logs import AutomationConfig, UpdateAutomationRequest
14
+ from ..modalities import Modality
15
+ from ..pagination import ListMetadata
16
+
17
+
18
+ class Link(AutomationConfig):
19
+ object: Literal['deployment.link'] = "deployment.link"
20
+ id: str = Field(default_factory=lambda: "lnk_" + nanoid.generate(), description="Unique identifier for the extraction link")
21
+
22
+ # Link Specific Config
23
+ password: Optional[str] = Field(None, description="Password to access the link")
24
+
25
+
26
+ class ListLinks(BaseModel):
27
+ data: list[Link]
28
+ list_metadata: ListMetadata
29
+
30
+
31
+ # Inherits from the methods of UpdateAutomationRequest
32
+ class UpdateLinkRequest(UpdateAutomationRequest):
33
+ # ------------------------------
34
+ # Link Config
35
+ # ------------------------------
36
+ password: Optional[str] = None
@@ -0,0 +1,67 @@
1
+ import copy
2
+ import datetime
3
+ import json
4
+ import os
5
+ import re
6
+ from typing import Any, ClassVar, Dict, List, Literal, Optional
7
+
8
+ import nanoid # type: ignore
9
+ from pydantic import BaseModel, EmailStr, Field, HttpUrl, computed_field, field_serializer, field_validator
10
+ from pydantic_core import Url
11
+
12
+ from ..._utils.json_schema import clean_schema
13
+ from ..._utils.mime import generate_blake2b_hash_from_string
14
+ from ..modalities import Modality
15
+ from ..pagination import ListMetadata
16
+
17
+ domain_pattern = re.compile(r"^(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}$")
18
+
19
+ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
20
+
21
+ from ..logs import AutomationConfig, UpdateAutomationRequest
22
+
23
+
24
+ class Mailbox(AutomationConfig):
25
+ EMAIL_PATTERN: ClassVar[str] = f".*@{os.getenv('EMAIL_DOMAIN', 'mailbox.uiform.com')}$"
26
+ object: Literal['deployment.mailbox'] = "deployment.mailbox"
27
+ id: str = Field(default_factory=lambda: "mb_" + nanoid.generate(), description="Unique identifier for the mailbox")
28
+
29
+ # Email Specific config
30
+ email: str = Field(..., pattern=EMAIL_PATTERN)
31
+ authorized_domains: list[str] = Field(default_factory=list, description="List of authorized domains to receive the emails from")
32
+ authorized_emails: List[EmailStr] = Field(default_factory=list, description="List of emails to access the link")
33
+
34
+ # Normalize email fields (case-insensitive)
35
+ @field_validator("email", mode="before")
36
+ def normalize_email(cls, value: str) -> str:
37
+ return value.strip().lower()
38
+
39
+ @field_validator("authorized_emails", mode="before")
40
+ def normalize_authorized_emails(cls, emails: List[str]) -> List[str]:
41
+ return [email.strip().lower() for email in emails]
42
+
43
+ @field_validator('authorized_domains', mode='before')
44
+ def validate_domain(cls, list_domains: list[str]) -> list[str]:
45
+ for domain in list_domains:
46
+ if not domain_pattern.match(domain):
47
+ raise ValueError(f"Invalid domain: {domain}")
48
+ return list_domains
49
+
50
+
51
+ class ListMailboxes(BaseModel):
52
+ data: list[Mailbox]
53
+ list_metadata: ListMetadata
54
+
55
+
56
+ # Inherits from the methods of UpdateAutomationRequest
57
+ class UpdateMailboxRequest(UpdateAutomationRequest):
58
+
59
+ # ------------------------------
60
+ # Email Specific config
61
+ # ------------------------------
62
+ authorized_domains: Optional[list[str]] = None
63
+ authorized_emails: Optional[List[EmailStr]] = None
64
+
65
+ @field_validator("authorized_emails", mode="before")
66
+ def normalize_authorized_emails(cls, emails: Optional[List[str]]) -> Optional[List[str]]:
67
+ return [email.strip().lower() for email in emails] if emails else None