PyPI - retab - Versions diffs - 0.0.64__py3-none-any.whl → 0.0.66__py3-none-any.whl - Mend

retab 0.0.64py3-none-any.whl → 0.0.66py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

retab/__init__.py +2 -2
retab/client copy.py +693 -0
retab/types/documents/extract.py +7 -4
retab/types/inference_settings.py +1 -0
retab/types/projects/Untitled-2.py +16671 -0
retab/types/projects/model.py +13 -1
retab/types/projects/v2.py +137 -0
retab/types/schemas/__init__.py +5 -0
retab/types/schemas/chat.py +491 -0
retab/types/schemas/model.py +1747 -0
{retab-0.0.64.dist-info → retab-0.0.66.dist-info}/METADATA +1 -1
{retab-0.0.64.dist-info → retab-0.0.66.dist-info}/RECORD +14 -9
{retab-0.0.64.dist-info → retab-0.0.66.dist-info}/WHEEL +0 -0
{retab-0.0.64.dist-info → retab-0.0.66.dist-info}/top_level.txt +0 -0

retab/types/documents/extract.py CHANGED Viewed

@@ -39,8 +39,7 @@ class DocumentExtractRequest(BaseModel):
     store: bool = Field(default=True, description="If true, the extraction will be stored in the database")
     need_validation: bool = Field(default=False, description="If true, the extraction will be validated against the schema")
     modality: Modality = Field(default="native", description="The modality of the document to be analyzed")
-    hardcoded_keys: Optional[list[str]] = Field(default=None, description="hardcoded keys to be used for the extraction of long lists of data")
+    parallel_ocr_keys: Optional[dict[str, str]] = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
     # Add a model validator that rejects n_consensus > 1 if temperature is 0
     @field_validator("n_consensus")
@@ -103,12 +102,16 @@ LikelihoodsSource = Literal["consensus", "log_probs"]
 class RetabParsedChatCompletion(ParsedChatCompletion):
+    model_config = ConfigDict(arbitrary_types_allowed=True, extra="ignore")
     extraction_id: str | None = None
     choices: list[RetabParsedChoice]  # type: ignore
-    # Additional metadata fields (UIForm)
+    # Additional metadata fields
     likelihoods: Optional[dict[str, Any]] = Field(
         default=None, description="Object defining the uncertainties of the fields extracted when using consensus. Follows the same structure as the extraction object."
     )
+    requires_human_review: bool = Field(default=False, description="If true, the extraction requires human review")
     schema_validation_error: ErrorDetail | None = None
     # Timestamps
     request_at: datetime.datetime | None = Field(default=None, description="Timestamp of the request")
@@ -330,7 +333,7 @@ class RetabParsedChatCompletionChunk(StreamingBaseModel, ChatCompletionChunk):
                 for idx in range(len(self.choices))
             ],
             likelihoods=final_likelihoods,
-            usage=None,
+            usage=self.usage,
             request_at=self.request_at,
             first_token_at=self.first_token_at,
             last_token_at=self.last_token_at,

retab/types/inference_settings.py CHANGED Viewed

@@ -11,6 +11,7 @@ class InferenceSettings(BaseModel):
     browser_canvas: BrowserCanvas = "A4"
     n_consensus: int = Field(default=1, description="Number of consensus rounds to perform")
     modality: Modality = "native"
+    parallel_ocr_keys: dict[str, str] | None = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
     model_config = ConfigDict(extra="ignore")

retab 0.0.64__py3-none-any.whl → 0.0.66__py3-none-any.whl

retab 0.0.64py3-none-any.whl → 0.0.66py3-none-any.whl