retab 0.0.87__py3-none-any.whl → 0.0.88__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ from ..mime import MIMEData
5
5
  class Category(BaseModel):
6
6
  name: str = Field(..., description="The name of the category")
7
7
  description: str = Field(..., description="The description of the category")
8
+ partition_key: str | None = Field(default=None, description="The key to partition the category")
8
9
 
9
10
 
10
11
  class SplitRequest(BaseModel):
@@ -13,20 +14,32 @@ class SplitRequest(BaseModel):
13
14
  model: str = Field(default="retab-small", description="The model to use to split the document")
14
15
 
15
16
 
17
+ class Partition(BaseModel):
18
+ key: str = Field(..., description="The partition key value (e.g., property ID, invoice number)")
19
+ pages: list[int] = Field(..., description="The pages of the partition (1-indexed)")
20
+ first_page_y_start: float = Field(default=0.0, description="The y coordinate of the first page of the partition")
21
+ last_page_y_end: float = Field(default=1.0, description="The y coordinate of the last page of the partition")
22
+
16
23
  class SplitResult(BaseModel):
17
24
  name: str = Field(..., description="The name of the category")
18
- start_page: int = Field(..., description="The start page of the category (1-indexed)")
19
- end_page: int = Field(..., description="The end page of the category (1-indexed, inclusive)")
25
+ pages: list[int] = Field(..., description="The pages of the category (1-indexed)")
26
+ partitions: list[Partition] = Field(default_factory=list, description="The partitions of the category")
20
27
 
21
28
 
22
29
  class SplitResponse(BaseModel):
23
30
  splits: list[SplitResult] = Field(..., description="The list of document splits with their page ranges")
24
31
 
25
32
 
33
+ class SplitOutputItem(BaseModel):
34
+ """Internal schema item for LLM structured output validation."""
35
+ name: str = Field(..., description="The name of the category")
36
+ start_page: int = Field(..., description="The start page of the category (1-indexed)")
37
+ end_page: int = Field(..., description="The end page of the category (1-indexed, inclusive)")
38
+
26
39
 
27
40
  class SplitOutputSchema(BaseModel):
28
41
  """Schema for LLM structured output."""
29
- splits: list[SplitResult] = Field(
30
- ...,
42
+ splits: list[SplitOutputItem] = Field(
43
+ ...,
31
44
  description="List of document sections, each classified into one of the provided categories with their page ranges"
32
45
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: retab
3
- Version: 0.0.87
3
+ Version: 0.0.88
4
4
  Summary: Retab official python library
5
5
  Home-page: https://github.com/retab-dev/retab
6
6
  Author: Retab
@@ -36,7 +36,7 @@ retab/types/documents/create_messages.py,sha256=Uym0SnVUGkyt1C5AOD37BsZ3puyeu_ig
36
36
  retab/types/documents/edit.py,sha256=b6UcYLOJkClpMu4QyYmdp-X4WtN8U_3oiMBc1KLklVY,5663
37
37
  retab/types/documents/extract.py,sha256=x_59fm69-icsxxGRgpFd0NN-SLRoMYqbvfCZuG7zyGc,18033
38
38
  retab/types/documents/parse.py,sha256=MXe7zh3DusWQhGe0Sr95nPy6cB8DRX8MA4Hmjj_AP7E,1300
39
- retab/types/documents/split.py,sha256=xRdJ6IpSRAPi_ZtAG2FNqg5A-v5tzfb1QQkW5UfO2pY,1246
39
+ retab/types/documents/split.py,sha256=Bjk5iJdS3v7I3rCvqpFUPlzgO4HINqh3uMPQJg-MqPc,2166
40
40
  retab/types/edit/__init__.py,sha256=M8hF97h7fX8RP9IsB6qpkw0eyvO0DFQvP6FmWL8caCQ,331
41
41
  retab/types/edit/templates.py,sha256=RLRIMdXzU-5_3XPf0iMSozjRTAP5Tliq0nrjlZn0l8E,2412
42
42
  retab/types/extractions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -59,7 +59,7 @@ retab/utils/hashing.py,sha256=_BMVUvftOcJav68QL0rLkH2dbhW9RRJPzeGC2akR0fc,757
59
59
  retab/utils/json_schema.py,sha256=zP4pQLpVHBKWo_abCjb_dU4kA0azhHopd-1TFUgVEvc,20655
60
60
  retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
61
61
  retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
62
- retab-0.0.87.dist-info/METADATA,sha256=Rz6B3ctJWOHF0hcaFxc2hEyBgpeBRgvScGxFNGjALMg,4532
63
- retab-0.0.87.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
64
- retab-0.0.87.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
65
- retab-0.0.87.dist-info/RECORD,,
62
+ retab-0.0.88.dist-info/METADATA,sha256=6Tzn_jnoHNc0-UkawkeLW5LQL-rGrfU-6A_mHEbROac,4532
63
+ retab-0.0.88.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
64
+ retab-0.0.88.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
65
+ retab-0.0.88.dist-info/RECORD,,
File without changes