retab 0.0.77__py3-none-any.whl → 0.0.79__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab/resources/documents/client.py +30 -11
- retab/types/documents/create_messages.py +1 -1
- retab/types/documents/edit.py +17 -5
- retab/types/documents/extract.py +1 -1
- retab/types/documents/parse.py +1 -1
- retab/types/documents/split.py +1 -1
- retab/types/inference_settings.py +1 -1
- retab/types/projects/model.py +1 -1
- {retab-0.0.77.dist-info → retab-0.0.79.dist-info}/METADATA +1 -1
- {retab-0.0.77.dist-info → retab-0.0.79.dist-info}/RECORD +12 -12
- {retab-0.0.77.dist-info → retab-0.0.79.dist-info}/WHEEL +0 -0
- {retab-0.0.77.dist-info → retab-0.0.79.dist-info}/top_level.txt +0 -0
|
@@ -118,19 +118,24 @@ class BaseDocumentsMixin:
|
|
|
118
118
|
|
|
119
119
|
def _prepare_edit(
|
|
120
120
|
self,
|
|
121
|
-
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
|
122
121
|
filling_instructions: str,
|
|
122
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
|
|
123
123
|
model: str = FieldUnset,
|
|
124
|
+
template_id: str | None = FieldUnset,
|
|
124
125
|
**extra_body: Any,
|
|
125
126
|
) -> PreparedRequest:
|
|
126
|
-
mime_document = prepare_mime_document(document)
|
|
127
|
-
|
|
128
127
|
request_dict: dict[str, Any] = {
|
|
129
|
-
"document": mime_document,
|
|
130
128
|
"filling_instructions": filling_instructions,
|
|
131
129
|
}
|
|
130
|
+
|
|
131
|
+
if document is not None:
|
|
132
|
+
mime_document = prepare_mime_document(document)
|
|
133
|
+
request_dict["document"] = mime_document
|
|
134
|
+
|
|
132
135
|
if model is not FieldUnset:
|
|
133
136
|
request_dict["model"] = model
|
|
137
|
+
if template_id is not FieldUnset:
|
|
138
|
+
request_dict["template_id"] = template_id
|
|
134
139
|
|
|
135
140
|
# Merge any extra fields provided by the caller
|
|
136
141
|
if extra_body:
|
|
@@ -564,9 +569,10 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
|
564
569
|
|
|
565
570
|
def edit(
|
|
566
571
|
self,
|
|
567
|
-
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
|
568
572
|
filling_instructions: str,
|
|
573
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
|
|
569
574
|
model: str = FieldUnset,
|
|
575
|
+
template_id: str | None = FieldUnset,
|
|
570
576
|
**extra_body: Any,
|
|
571
577
|
) -> EditResponse:
|
|
572
578
|
"""
|
|
@@ -578,10 +584,15 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
|
578
584
|
3. LLM-based form filling using the provided instructions
|
|
579
585
|
4. Returns the filled PDF with form field values populated
|
|
580
586
|
|
|
587
|
+
Either `document` OR `template_id` must be provided, but not both.
|
|
588
|
+
|
|
581
589
|
Args:
|
|
582
|
-
document: The document to edit. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
|
583
590
|
filling_instructions: Instructions describing how to fill the form fields.
|
|
584
|
-
|
|
591
|
+
document: The document to edit. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
|
592
|
+
Mutually exclusive with template_id.
|
|
593
|
+
model: The LLM model to use for inference. Defaults to "retab-small".
|
|
594
|
+
template_id: Template ID to use for filling. When provided, uses the template's pre-defined form fields
|
|
595
|
+
and empty PDF. Only works for PDF documents. Mutually exclusive with document.
|
|
585
596
|
|
|
586
597
|
Returns:
|
|
587
598
|
EditResponse: Response containing:
|
|
@@ -592,9 +603,10 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
|
592
603
|
HTTPException: If the request fails.
|
|
593
604
|
"""
|
|
594
605
|
request = self._prepare_edit(
|
|
595
|
-
document=document,
|
|
596
606
|
filling_instructions=filling_instructions,
|
|
607
|
+
document=document,
|
|
597
608
|
model=model,
|
|
609
|
+
template_id=template_id,
|
|
598
610
|
**extra_body,
|
|
599
611
|
)
|
|
600
612
|
response = self._client._prepared_request(request)
|
|
@@ -901,9 +913,10 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
|
901
913
|
|
|
902
914
|
async def edit(
|
|
903
915
|
self,
|
|
904
|
-
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
|
905
916
|
filling_instructions: str,
|
|
917
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
|
|
906
918
|
model: str = FieldUnset,
|
|
919
|
+
template_id: str | None = FieldUnset,
|
|
907
920
|
**extra_body: Any,
|
|
908
921
|
) -> EditResponse:
|
|
909
922
|
"""
|
|
@@ -915,10 +928,15 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
|
915
928
|
3. LLM-based form filling using the provided instructions
|
|
916
929
|
4. Returns the filled PDF with form field values populated
|
|
917
930
|
|
|
931
|
+
Either `document` OR `template_id` must be provided, but not both.
|
|
932
|
+
|
|
918
933
|
Args:
|
|
919
|
-
document: The document to edit. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
|
920
934
|
filling_instructions: Instructions describing how to fill the form fields.
|
|
935
|
+
document: The document to edit. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
|
936
|
+
Mutually exclusive with template_id.
|
|
921
937
|
model: The LLM model to use for inference. Defaults to "gemini-2.5-pro".
|
|
938
|
+
template_id: Template ID to use for filling. When provided, uses the template's pre-defined form fields
|
|
939
|
+
and empty PDF. Only works for PDF documents. Mutually exclusive with document.
|
|
922
940
|
|
|
923
941
|
Returns:
|
|
924
942
|
EditResponse: Response containing:
|
|
@@ -929,9 +947,10 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
|
929
947
|
HTTPException: If the request fails.
|
|
930
948
|
"""
|
|
931
949
|
request = self._prepare_edit(
|
|
932
|
-
document=document,
|
|
933
950
|
filling_instructions=filling_instructions,
|
|
951
|
+
document=document,
|
|
934
952
|
model=model,
|
|
953
|
+
template_id=template_id,
|
|
935
954
|
**extra_body,
|
|
936
955
|
)
|
|
937
956
|
response = await self._client._prepared_request(request)
|
|
@@ -22,7 +22,7 @@ class DocumentCreateMessageRequest(BaseModel):
|
|
|
22
22
|
model_config = ConfigDict(extra="ignore")
|
|
23
23
|
document: MIMEData = Field(description="The document to load.")
|
|
24
24
|
image_resolution_dpi: int = Field(default=192, description="Resolution of the image sent to the LLM")
|
|
25
|
-
model: str = Field(default="
|
|
25
|
+
model: str = Field(default="retab-small", description="The model to use for the document.")
|
|
26
26
|
|
|
27
27
|
class DocumentCreateInputRequest(DocumentCreateMessageRequest):
|
|
28
28
|
json_schema: dict[str, Any] = Field(description="The json schema to use for the document.")
|
retab/types/documents/edit.py
CHANGED
|
@@ -60,6 +60,10 @@ class BaseFormField(BaseModel):
|
|
|
60
60
|
...,
|
|
61
61
|
description="Type of field. Currently supported values: 'text' and 'checkbox'.",
|
|
62
62
|
)
|
|
63
|
+
key: str = Field(
|
|
64
|
+
...,
|
|
65
|
+
description="Key of the field. This is used to identify the field in the form data.",
|
|
66
|
+
)
|
|
63
67
|
|
|
64
68
|
|
|
65
69
|
class FormField(BaseFormField):
|
|
@@ -113,10 +117,11 @@ class OCRResult(BaseModel):
|
|
|
113
117
|
|
|
114
118
|
|
|
115
119
|
class InferFormSchemaRequest(BaseModel):
|
|
116
|
-
"""Request to infer form schema from a PDF."""
|
|
120
|
+
"""Request to infer form schema from a PDF or DOCX document."""
|
|
117
121
|
|
|
118
|
-
document: MIMEData = Field(..., description="Input document (PDF)")
|
|
119
|
-
model: str = Field(default="
|
|
122
|
+
document: MIMEData = Field(..., description="Input document (PDF or DOCX). DOCX files will be converted to PDF.")
|
|
123
|
+
model: str = Field(default="retab-small", description="LLM model to use for inference")
|
|
124
|
+
instructions: Optional[str] = Field(default=None, description="Optional instructions to guide form field detection (e.g., which fields to focus on, specific areas to look for)")
|
|
120
125
|
|
|
121
126
|
|
|
122
127
|
class InferFormSchemaResponse(BaseModel):
|
|
@@ -127,10 +132,17 @@ class InferFormSchemaResponse(BaseModel):
|
|
|
127
132
|
form_fields_pdf: MIMEData = Field(..., description="PDF with form field bounding boxes")
|
|
128
133
|
|
|
129
134
|
|
|
130
|
-
class EditRequest(
|
|
131
|
-
"""Request for the infer_and_fill_schema endpoint.
|
|
135
|
+
class EditRequest(BaseModel):
|
|
136
|
+
"""Request for the infer_and_fill_schema endpoint.
|
|
132
137
|
|
|
138
|
+
Either `document` OR `template_id` must be provided, but not both.
|
|
139
|
+
- When `document` is provided: OCR + LLM inference to detect and fill form fields
|
|
140
|
+
- When `template_id` is provided: Uses pre-defined form fields from the template (PDF only)
|
|
141
|
+
"""
|
|
142
|
+
document: Optional[MIMEData] = Field(default=None, description="Input document (PDF or DOCX). DOCX files will be converted to PDF. Mutually exclusive with template_id.")
|
|
143
|
+
model: str = Field(default="retab-small", description="LLM model to use for inference")
|
|
133
144
|
filling_instructions: str = Field(..., description="Instructions to fill the form")
|
|
145
|
+
template_id: Optional[str] = Field(default=None, description="Template ID to use for filling. When provided, uses the template's pre-defined form fields and empty PDF. Only works for PDF documents. Mutually exclusive with document.")
|
|
134
146
|
|
|
135
147
|
class EditResponse(BaseModel):
|
|
136
148
|
"""Response from the fill_form endpoint.
|
retab/types/documents/extract.py
CHANGED
|
@@ -34,7 +34,7 @@ class DocumentExtractRequest(BaseModel):
|
|
|
34
34
|
stream: bool = Field(default=False, description="If true, the extraction will be streamed to the user using the active WebSocket connection")
|
|
35
35
|
seed: int | None = Field(default=None, description="Seed for the random number generator. If not provided, a random seed will be generated.", examples=[None])
|
|
36
36
|
store: bool = Field(default=True, description="If true, the extraction will be stored in the database")
|
|
37
|
-
|
|
37
|
+
chunking_keys: Optional[dict[str, str]] = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
|
|
38
38
|
web_search: bool = Field(default=False, description="Enable web search enrichment with Parallel AI to add external context during extraction")
|
|
39
39
|
metadata: dict[str, str] = Field(default_factory=dict, description="User-defined metadata to associate with this extraction")
|
|
40
40
|
extraction_id: Optional[str] = Field(default=None, description="Extraction ID to use for this extraction. If not provided, a new ID will be generated.")
|
retab/types/documents/parse.py
CHANGED
|
@@ -18,7 +18,7 @@ class ParseRequest(BaseModel):
|
|
|
18
18
|
model_config = ConfigDict(extra="ignore")
|
|
19
19
|
|
|
20
20
|
document: MIMEData = Field(..., description="Document to parse")
|
|
21
|
-
model: str = Field(default="
|
|
21
|
+
model: str = Field(default="retab-small", description="Model to use for parsing")
|
|
22
22
|
table_parsing_format: TableParsingFormat = Field(default="html", description="Format for parsing tables")
|
|
23
23
|
image_resolution_dpi: int = Field(default=192, description="DPI for image processing", ge=96, le=300)
|
|
24
24
|
|
retab/types/documents/split.py
CHANGED
|
@@ -10,7 +10,7 @@ class Category(BaseModel):
|
|
|
10
10
|
class SplitRequest(BaseModel):
|
|
11
11
|
document: MIMEData = Field(..., description="The document to split")
|
|
12
12
|
categories: list[Category] = Field(..., description="The categories to split the document into")
|
|
13
|
-
model: str = Field(
|
|
13
|
+
model: str = Field(default="retab-small", description="The model to use to split the document")
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class SplitResult(BaseModel):
|
|
@@ -8,7 +8,7 @@ class InferenceSettings(BaseModel):
|
|
|
8
8
|
reasoning_effort: ChatCompletionReasoningEffort = "minimal"
|
|
9
9
|
image_resolution_dpi: int = Field(default=192, description="Resolution of the image sent to the LLM", ge=96, le=300)
|
|
10
10
|
n_consensus: int = Field(default=1, ge=1, le=8, description="Number of consensus rounds to perform")
|
|
11
|
-
|
|
11
|
+
chunking_keys: dict[str, str] | None = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
|
|
12
12
|
web_search: bool = Field(default=False, description="Enable web search enrichment with Parallel AI to add external context during extraction")
|
|
13
13
|
model_config = ConfigDict(extra="ignore")
|
|
14
14
|
|
retab/types/projects/model.py
CHANGED
|
@@ -9,7 +9,7 @@ from ..inference_settings import InferenceSettings
|
|
|
9
9
|
from .predictions import PredictionData
|
|
10
10
|
|
|
11
11
|
default_inference_settings = InferenceSettings(
|
|
12
|
-
model="
|
|
12
|
+
model="retab-small",
|
|
13
13
|
temperature=0.5,
|
|
14
14
|
reasoning_effort="minimal",
|
|
15
15
|
image_resolution_dpi=192,
|
|
@@ -7,30 +7,30 @@ retab/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
7
7
|
retab/resources/models.py,sha256=4WidFBnTGZEA65DSn2pLP2SRnCVXkMTw7o_m8xVCFC4,2469
|
|
8
8
|
retab/resources/schemas.py,sha256=rZ6OzfmoYv-mGaRVzvXjO09dD-KxP74mZhOO8sMgcDQ,4632
|
|
9
9
|
retab/resources/documents/__init__.py,sha256=OjXmngFN0RKqO4SI-mJBNzr6Ex6rMxfq0DxaqzP0RQs,89
|
|
10
|
-
retab/resources/documents/client.py,sha256=
|
|
10
|
+
retab/resources/documents/client.py,sha256=XxWo9FlktrpuskAPyKWTx9UIA2VA81g0SbHjHYnigMM,43583
|
|
11
11
|
retab/resources/extractions/__init__.py,sha256=2H1ezUG8hI5SmTRy6NFzXdYLOdGFFsFrI60uzkitV20,97
|
|
12
12
|
retab/resources/extractions/client.py,sha256=sEoNjOgX91FTOgoJUV-I1A9A9xl1ciCdPlhYwjhEjbA,11035
|
|
13
13
|
retab/resources/projects/__init__.py,sha256=tPR3_3tr7bsoYd618qmGjnYN2R23PmF5oCFd7Z5_HGY,85
|
|
14
14
|
retab/resources/projects/client.py,sha256=5LPAhJt5-nqBP4VWYvo0k7cW6HLGF6K9xMiHKQzIXho,15593
|
|
15
15
|
retab/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
retab/types/chat.py,sha256=x9VbtPMa4w6Gc0HrFC3ILl6cCnfEn5ytDnwJtZmlcys,1436
|
|
17
|
-
retab/types/inference_settings.py,sha256=
|
|
17
|
+
retab/types/inference_settings.py,sha256=wIivYffvEE7v6lhbjbhAZGssK4uYr64Oq6cZKxzY5_M,1131
|
|
18
18
|
retab/types/mime.py,sha256=ZLNCD3pvgn5cbGfJwzrdkjgB9dMHCbN67YEV9bx47zE,10063
|
|
19
19
|
retab/types/modality.py,sha256=4B8LctdUBZVgIjtS2FjrJpljn2Eyse0XE1bpFsGb9O4,131
|
|
20
20
|
retab/types/pagination.py,sha256=A0Fw06baPTfEaYwo3kvNs4vaupzlqylBc6tQH-2DFuY,279
|
|
21
21
|
retab/types/standards.py,sha256=7aGtuvzzkKidvqY8JB2Cjfn43V80FeKwrTtp162kjKc,1477
|
|
22
22
|
retab/types/documents/__init__.py,sha256=YDsvsmwkS5lfGXk5aBqSqmFh6LKX3dM6q_cUo5oIydU,277
|
|
23
23
|
retab/types/documents/correct_orientation.py,sha256=e-ivsslI6L6Gl0YkcslXw_DH620xMGEYVp4tdeviXeM,261
|
|
24
|
-
retab/types/documents/create_messages.py,sha256=
|
|
25
|
-
retab/types/documents/edit.py,sha256=
|
|
26
|
-
retab/types/documents/extract.py,sha256=
|
|
27
|
-
retab/types/documents/parse.py,sha256=
|
|
28
|
-
retab/types/documents/split.py,sha256=
|
|
24
|
+
retab/types/documents/create_messages.py,sha256=Uym0SnVUGkyt1C5AOD37BsZ3puyeu_igR6X9SboojfA,7267
|
|
25
|
+
retab/types/documents/edit.py,sha256=ZY-a_Q9Y76e4oojeJJsisoCZbNSU6gqwAgb9fq9S76w,5930
|
|
26
|
+
retab/types/documents/extract.py,sha256=eMaVl76K_1CeuLmdttfrf4yoQqs27f10w9rNBePb0DY,16724
|
|
27
|
+
retab/types/documents/parse.py,sha256=MXe7zh3DusWQhGe0Sr95nPy6cB8DRX8MA4Hmjj_AP7E,1300
|
|
28
|
+
retab/types/documents/split.py,sha256=xRdJ6IpSRAPi_ZtAG2FNqg5A-v5tzfb1QQkW5UfO2pY,1246
|
|
29
29
|
retab/types/extractions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
30
|
retab/types/extractions/types.py,sha256=mnCYSfJoEKsXN2eG-PrahnnQyR6RDjP5VO9sHC1Opmg,102
|
|
31
31
|
retab/types/projects/__init__.py,sha256=I7P_dems5_LOLgYQ-4Bzt9B6P6jRlQwP-D_9GxRDhVk,155
|
|
32
32
|
retab/types/projects/metrics.py,sha256=J8aZdVbqlszfxosAZyTB7l6lp9WgdL5QgLMlLrckN7k,1946
|
|
33
|
-
retab/types/projects/model.py,sha256=
|
|
33
|
+
retab/types/projects/model.py,sha256=keuCzlqUR_WPHW5Grl68fG4VxjTFmNDXQk0MlZ-vtXs,4593
|
|
34
34
|
retab/types/projects/predictions.py,sha256=GeQX-nCmbzKL50aaSFlSyn0sgnbzKserPK6DSQT8ACk,1114
|
|
35
35
|
retab/types/schemas/__init__.py,sha256=9ODWiC_4pUVKxoIKglYZjvRjRyd1ZCVxG8GBdQgHNbU,57
|
|
36
36
|
retab/types/schemas/chat.py,sha256=ppTidxsNslTKE5aBva04i9IxeARMqYpXYLjtR7V6pBc,21219
|
|
@@ -44,7 +44,7 @@ retab/utils/hashing.py,sha256=_BMVUvftOcJav68QL0rLkH2dbhW9RRJPzeGC2akR0fc,757
|
|
|
44
44
|
retab/utils/json_schema.py,sha256=F3MLNGskpfPh1IkXHPLp60ceOEFD79GyL8mVvr0OiVM,19583
|
|
45
45
|
retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
|
|
46
46
|
retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
|
|
47
|
-
retab-0.0.
|
|
48
|
-
retab-0.0.
|
|
49
|
-
retab-0.0.
|
|
50
|
-
retab-0.0.
|
|
47
|
+
retab-0.0.79.dist-info/METADATA,sha256=GAgtfkDV8Zu0Bc4dBl7vL87xLutKpGUqpwCY3RxGFP0,4532
|
|
48
|
+
retab-0.0.79.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
49
|
+
retab-0.0.79.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
|
|
50
|
+
retab-0.0.79.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|