retab 0.0.64__py3-none-any.whl → 0.0.66__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab/__init__.py +2 -2
- retab/client copy.py +693 -0
- retab/types/documents/extract.py +7 -4
- retab/types/inference_settings.py +1 -0
- retab/types/projects/Untitled-2.py +16671 -0
- retab/types/projects/model.py +13 -1
- retab/types/projects/v2.py +137 -0
- retab/types/schemas/__init__.py +5 -0
- retab/types/schemas/chat.py +491 -0
- retab/types/schemas/model.py +1747 -0
- {retab-0.0.64.dist-info → retab-0.0.66.dist-info}/METADATA +1 -1
- {retab-0.0.64.dist-info → retab-0.0.66.dist-info}/RECORD +14 -9
- {retab-0.0.64.dist-info → retab-0.0.66.dist-info}/WHEEL +0 -0
- {retab-0.0.64.dist-info → retab-0.0.66.dist-info}/top_level.txt +0 -0
retab/types/documents/extract.py
CHANGED
|
@@ -39,8 +39,7 @@ class DocumentExtractRequest(BaseModel):
|
|
|
39
39
|
store: bool = Field(default=True, description="If true, the extraction will be stored in the database")
|
|
40
40
|
need_validation: bool = Field(default=False, description="If true, the extraction will be validated against the schema")
|
|
41
41
|
modality: Modality = Field(default="native", description="The modality of the document to be analyzed")
|
|
42
|
-
|
|
43
|
-
|
|
42
|
+
parallel_ocr_keys: Optional[dict[str, str]] = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
|
|
44
43
|
|
|
45
44
|
# Add a model validator that rejects n_consensus > 1 if temperature is 0
|
|
46
45
|
@field_validator("n_consensus")
|
|
@@ -103,12 +102,16 @@ LikelihoodsSource = Literal["consensus", "log_probs"]
|
|
|
103
102
|
|
|
104
103
|
|
|
105
104
|
class RetabParsedChatCompletion(ParsedChatCompletion):
|
|
105
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="ignore")
|
|
106
|
+
|
|
106
107
|
extraction_id: str | None = None
|
|
107
108
|
choices: list[RetabParsedChoice] # type: ignore
|
|
108
|
-
# Additional metadata fields
|
|
109
|
+
# Additional metadata fields
|
|
109
110
|
likelihoods: Optional[dict[str, Any]] = Field(
|
|
110
111
|
default=None, description="Object defining the uncertainties of the fields extracted when using consensus. Follows the same structure as the extraction object."
|
|
111
112
|
)
|
|
113
|
+
|
|
114
|
+
requires_human_review: bool = Field(default=False, description="If true, the extraction requires human review")
|
|
112
115
|
schema_validation_error: ErrorDetail | None = None
|
|
113
116
|
# Timestamps
|
|
114
117
|
request_at: datetime.datetime | None = Field(default=None, description="Timestamp of the request")
|
|
@@ -330,7 +333,7 @@ class RetabParsedChatCompletionChunk(StreamingBaseModel, ChatCompletionChunk):
|
|
|
330
333
|
for idx in range(len(self.choices))
|
|
331
334
|
],
|
|
332
335
|
likelihoods=final_likelihoods,
|
|
333
|
-
usage=
|
|
336
|
+
usage=self.usage,
|
|
334
337
|
request_at=self.request_at,
|
|
335
338
|
first_token_at=self.first_token_at,
|
|
336
339
|
last_token_at=self.last_token_at,
|
|
@@ -11,6 +11,7 @@ class InferenceSettings(BaseModel):
|
|
|
11
11
|
browser_canvas: BrowserCanvas = "A4"
|
|
12
12
|
n_consensus: int = Field(default=1, description="Number of consensus rounds to perform")
|
|
13
13
|
modality: Modality = "native"
|
|
14
|
+
parallel_ocr_keys: dict[str, str] | None = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
|
|
14
15
|
|
|
15
16
|
model_config = ConfigDict(extra="ignore")
|
|
16
17
|
|