retab 0.0.77__py3-none-any.whl → 0.0.79__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -118,19 +118,24 @@ class BaseDocumentsMixin:
118
118
 
119
119
  def _prepare_edit(
120
120
  self,
121
- document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
122
121
  filling_instructions: str,
122
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
123
123
  model: str = FieldUnset,
124
+ template_id: str | None = FieldUnset,
124
125
  **extra_body: Any,
125
126
  ) -> PreparedRequest:
126
- mime_document = prepare_mime_document(document)
127
-
128
127
  request_dict: dict[str, Any] = {
129
- "document": mime_document,
130
128
  "filling_instructions": filling_instructions,
131
129
  }
130
+
131
+ if document is not None:
132
+ mime_document = prepare_mime_document(document)
133
+ request_dict["document"] = mime_document
134
+
132
135
  if model is not FieldUnset:
133
136
  request_dict["model"] = model
137
+ if template_id is not FieldUnset:
138
+ request_dict["template_id"] = template_id
134
139
 
135
140
  # Merge any extra fields provided by the caller
136
141
  if extra_body:
@@ -564,9 +569,10 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
564
569
 
565
570
  def edit(
566
571
  self,
567
- document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
568
572
  filling_instructions: str,
573
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
569
574
  model: str = FieldUnset,
575
+ template_id: str | None = FieldUnset,
570
576
  **extra_body: Any,
571
577
  ) -> EditResponse:
572
578
  """
@@ -578,10 +584,15 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
578
584
  3. LLM-based form filling using the provided instructions
579
585
  4. Returns the filled PDF with form field values populated
580
586
 
587
+ Either `document` OR `template_id` must be provided, but not both.
588
+
581
589
  Args:
582
- document: The document to edit. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
583
590
  filling_instructions: Instructions describing how to fill the form fields.
584
- model: The LLM model to use for inference. Defaults to "gemini-2.5-pro".
591
+ document: The document to edit. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
592
+ Mutually exclusive with template_id.
593
+ model: The LLM model to use for inference. Defaults to "retab-small".
594
+ template_id: Template ID to use for filling. When provided, uses the template's pre-defined form fields
595
+ and empty PDF. Only works for PDF documents. Mutually exclusive with document.
585
596
 
586
597
  Returns:
587
598
  EditResponse: Response containing:
@@ -592,9 +603,10 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
592
603
  HTTPException: If the request fails.
593
604
  """
594
605
  request = self._prepare_edit(
595
- document=document,
596
606
  filling_instructions=filling_instructions,
607
+ document=document,
597
608
  model=model,
609
+ template_id=template_id,
598
610
  **extra_body,
599
611
  )
600
612
  response = self._client._prepared_request(request)
@@ -901,9 +913,10 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
901
913
 
902
914
  async def edit(
903
915
  self,
904
- document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
905
916
  filling_instructions: str,
917
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
906
918
  model: str = FieldUnset,
919
+ template_id: str | None = FieldUnset,
907
920
  **extra_body: Any,
908
921
  ) -> EditResponse:
909
922
  """
@@ -915,10 +928,15 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
915
928
  3. LLM-based form filling using the provided instructions
916
929
  4. Returns the filled PDF with form field values populated
917
930
 
931
+ Either `document` OR `template_id` must be provided, but not both.
932
+
918
933
  Args:
919
- document: The document to edit. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
920
934
  filling_instructions: Instructions describing how to fill the form fields.
935
+ document: The document to edit. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
936
+ Mutually exclusive with template_id.
921
937
  model: The LLM model to use for inference. Defaults to "gemini-2.5-pro".
938
+ template_id: Template ID to use for filling. When provided, uses the template's pre-defined form fields
939
+ and empty PDF. Only works for PDF documents. Mutually exclusive with document.
922
940
 
923
941
  Returns:
924
942
  EditResponse: Response containing:
@@ -929,9 +947,10 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
929
947
  HTTPException: If the request fails.
930
948
  """
931
949
  request = self._prepare_edit(
932
- document=document,
933
950
  filling_instructions=filling_instructions,
951
+ document=document,
934
952
  model=model,
953
+ template_id=template_id,
935
954
  **extra_body,
936
955
  )
937
956
  response = await self._client._prepared_request(request)
@@ -22,7 +22,7 @@ class DocumentCreateMessageRequest(BaseModel):
22
22
  model_config = ConfigDict(extra="ignore")
23
23
  document: MIMEData = Field(description="The document to load.")
24
24
  image_resolution_dpi: int = Field(default=192, description="Resolution of the image sent to the LLM")
25
- model: str = Field(default="gemini-2.5-flash", description="The model to use for the document.")
25
+ model: str = Field(default="retab-small", description="The model to use for the document.")
26
26
 
27
27
  class DocumentCreateInputRequest(DocumentCreateMessageRequest):
28
28
  json_schema: dict[str, Any] = Field(description="The json schema to use for the document.")
@@ -60,6 +60,10 @@ class BaseFormField(BaseModel):
60
60
  ...,
61
61
  description="Type of field. Currently supported values: 'text' and 'checkbox'.",
62
62
  )
63
+ key: str = Field(
64
+ ...,
65
+ description="Key of the field. This is used to identify the field in the form data.",
66
+ )
63
67
 
64
68
 
65
69
  class FormField(BaseFormField):
@@ -113,10 +117,11 @@ class OCRResult(BaseModel):
113
117
 
114
118
 
115
119
  class InferFormSchemaRequest(BaseModel):
116
- """Request to infer form schema from a PDF."""
120
+ """Request to infer form schema from a PDF or DOCX document."""
117
121
 
118
- document: MIMEData = Field(..., description="Input document (PDF)")
119
- model: str = Field(default="gemini-2.5-pro", description="LLM model to use for inference")
122
+ document: MIMEData = Field(..., description="Input document (PDF or DOCX). DOCX files will be converted to PDF.")
123
+ model: str = Field(default="retab-small", description="LLM model to use for inference")
124
+ instructions: Optional[str] = Field(default=None, description="Optional instructions to guide form field detection (e.g., which fields to focus on, specific areas to look for)")
120
125
 
121
126
 
122
127
  class InferFormSchemaResponse(BaseModel):
@@ -127,10 +132,17 @@ class InferFormSchemaResponse(BaseModel):
127
132
  form_fields_pdf: MIMEData = Field(..., description="PDF with form field bounding boxes")
128
133
 
129
134
 
130
- class EditRequest(InferFormSchemaRequest):
131
- """Request for the infer_and_fill_schema endpoint."""
135
+ class EditRequest(BaseModel):
136
+ """Request for the infer_and_fill_schema endpoint.
132
137
 
138
+ Either `document` OR `template_id` must be provided, but not both.
139
+ - When `document` is provided: OCR + LLM inference to detect and fill form fields
140
+ - When `template_id` is provided: Uses pre-defined form fields from the template (PDF only)
141
+ """
142
+ document: Optional[MIMEData] = Field(default=None, description="Input document (PDF or DOCX). DOCX files will be converted to PDF. Mutually exclusive with template_id.")
143
+ model: str = Field(default="retab-small", description="LLM model to use for inference")
133
144
  filling_instructions: str = Field(..., description="Instructions to fill the form")
145
+ template_id: Optional[str] = Field(default=None, description="Template ID to use for filling. When provided, uses the template's pre-defined form fields and empty PDF. Only works for PDF documents. Mutually exclusive with document.")
134
146
 
135
147
  class EditResponse(BaseModel):
136
148
  """Response from the fill_form endpoint.
@@ -34,7 +34,7 @@ class DocumentExtractRequest(BaseModel):
34
34
  stream: bool = Field(default=False, description="If true, the extraction will be streamed to the user using the active WebSocket connection")
35
35
  seed: int | None = Field(default=None, description="Seed for the random number generator. If not provided, a random seed will be generated.", examples=[None])
36
36
  store: bool = Field(default=True, description="If true, the extraction will be stored in the database")
37
- parallel_ocr_keys: Optional[dict[str, str]] = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
37
+ chunking_keys: Optional[dict[str, str]] = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
38
38
  web_search: bool = Field(default=False, description="Enable web search enrichment with Parallel AI to add external context during extraction")
39
39
  metadata: dict[str, str] = Field(default_factory=dict, description="User-defined metadata to associate with this extraction")
40
40
  extraction_id: Optional[str] = Field(default=None, description="Extraction ID to use for this extraction. If not provided, a new ID will be generated.")
@@ -18,7 +18,7 @@ class ParseRequest(BaseModel):
18
18
  model_config = ConfigDict(extra="ignore")
19
19
 
20
20
  document: MIMEData = Field(..., description="Document to parse")
21
- model: str = Field(default="gemini-2.5-flash", description="Model to use for parsing")
21
+ model: str = Field(default="retab-small", description="Model to use for parsing")
22
22
  table_parsing_format: TableParsingFormat = Field(default="html", description="Format for parsing tables")
23
23
  image_resolution_dpi: int = Field(default=192, description="DPI for image processing", ge=96, le=300)
24
24
 
@@ -10,7 +10,7 @@ class Category(BaseModel):
10
10
  class SplitRequest(BaseModel):
11
11
  document: MIMEData = Field(..., description="The document to split")
12
12
  categories: list[Category] = Field(..., description="The categories to split the document into")
13
- model: str = Field(..., description="The model to use to split the document")
13
+ model: str = Field(default="retab-small", description="The model to use to split the document")
14
14
 
15
15
 
16
16
  class SplitResult(BaseModel):
@@ -8,7 +8,7 @@ class InferenceSettings(BaseModel):
8
8
  reasoning_effort: ChatCompletionReasoningEffort = "minimal"
9
9
  image_resolution_dpi: int = Field(default=192, description="Resolution of the image sent to the LLM", ge=96, le=300)
10
10
  n_consensus: int = Field(default=1, ge=1, le=8, description="Number of consensus rounds to perform")
11
- parallel_ocr_keys: dict[str, str] | None = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
11
+ chunking_keys: dict[str, str] | None = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
12
12
  web_search: bool = Field(default=False, description="Enable web search enrichment with Parallel AI to add external context during extraction")
13
13
  model_config = ConfigDict(extra="ignore")
14
14
 
@@ -9,7 +9,7 @@ from ..inference_settings import InferenceSettings
9
9
  from .predictions import PredictionData
10
10
 
11
11
  default_inference_settings = InferenceSettings(
12
- model="auto-small",
12
+ model="retab-small",
13
13
  temperature=0.5,
14
14
  reasoning_effort="minimal",
15
15
  image_resolution_dpi=192,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: retab
3
- Version: 0.0.77
3
+ Version: 0.0.79
4
4
  Summary: Retab official python library
5
5
  Home-page: https://github.com/retab-dev/retab
6
6
  Author: Retab
@@ -7,30 +7,30 @@ retab/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  retab/resources/models.py,sha256=4WidFBnTGZEA65DSn2pLP2SRnCVXkMTw7o_m8xVCFC4,2469
8
8
  retab/resources/schemas.py,sha256=rZ6OzfmoYv-mGaRVzvXjO09dD-KxP74mZhOO8sMgcDQ,4632
9
9
  retab/resources/documents/__init__.py,sha256=OjXmngFN0RKqO4SI-mJBNzr6Ex6rMxfq0DxaqzP0RQs,89
10
- retab/resources/documents/client.py,sha256=xiHZPvaxETqZGXanOzp1zFQBcSB7WlgiXGtiv6Ys1dQ,42496
10
+ retab/resources/documents/client.py,sha256=XxWo9FlktrpuskAPyKWTx9UIA2VA81g0SbHjHYnigMM,43583
11
11
  retab/resources/extractions/__init__.py,sha256=2H1ezUG8hI5SmTRy6NFzXdYLOdGFFsFrI60uzkitV20,97
12
12
  retab/resources/extractions/client.py,sha256=sEoNjOgX91FTOgoJUV-I1A9A9xl1ciCdPlhYwjhEjbA,11035
13
13
  retab/resources/projects/__init__.py,sha256=tPR3_3tr7bsoYd618qmGjnYN2R23PmF5oCFd7Z5_HGY,85
14
14
  retab/resources/projects/client.py,sha256=5LPAhJt5-nqBP4VWYvo0k7cW6HLGF6K9xMiHKQzIXho,15593
15
15
  retab/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  retab/types/chat.py,sha256=x9VbtPMa4w6Gc0HrFC3ILl6cCnfEn5ytDnwJtZmlcys,1436
17
- retab/types/inference_settings.py,sha256=8oeah8r9W4qj8ojn6aXCWsNkGloc2XgQ095W40Z_YUk,1135
17
+ retab/types/inference_settings.py,sha256=wIivYffvEE7v6lhbjbhAZGssK4uYr64Oq6cZKxzY5_M,1131
18
18
  retab/types/mime.py,sha256=ZLNCD3pvgn5cbGfJwzrdkjgB9dMHCbN67YEV9bx47zE,10063
19
19
  retab/types/modality.py,sha256=4B8LctdUBZVgIjtS2FjrJpljn2Eyse0XE1bpFsGb9O4,131
20
20
  retab/types/pagination.py,sha256=A0Fw06baPTfEaYwo3kvNs4vaupzlqylBc6tQH-2DFuY,279
21
21
  retab/types/standards.py,sha256=7aGtuvzzkKidvqY8JB2Cjfn43V80FeKwrTtp162kjKc,1477
22
22
  retab/types/documents/__init__.py,sha256=YDsvsmwkS5lfGXk5aBqSqmFh6LKX3dM6q_cUo5oIydU,277
23
23
  retab/types/documents/correct_orientation.py,sha256=e-ivsslI6L6Gl0YkcslXw_DH620xMGEYVp4tdeviXeM,261
24
- retab/types/documents/create_messages.py,sha256=Cox0QgIyLhTXIvw1Nzd2BCnB9-5KAYgw_gads5eTaDw,7272
25
- retab/types/documents/edit.py,sha256=HjDjhHlj08Kks7ABVohTrAJ9QngDgwVj32AxXitjrv0,4804
26
- retab/types/documents/extract.py,sha256=DhS9jm0lUgXVLObKm2CnSJQ2eqMmsBfttO0K9TndfIw,16728
27
- retab/types/documents/parse.py,sha256=Jd6i-1UXhAtgntRBZItEHGHeevyLdLmbTQa1-HNrico,1305
28
- retab/types/documents/split.py,sha256=Sjp2u7Ob6nBRQL23RlgiabgyUmoyf8aEyr7zdvUdU-M,1228
24
+ retab/types/documents/create_messages.py,sha256=Uym0SnVUGkyt1C5AOD37BsZ3puyeu_igR6X9SboojfA,7267
25
+ retab/types/documents/edit.py,sha256=ZY-a_Q9Y76e4oojeJJsisoCZbNSU6gqwAgb9fq9S76w,5930
26
+ retab/types/documents/extract.py,sha256=eMaVl76K_1CeuLmdttfrf4yoQqs27f10w9rNBePb0DY,16724
27
+ retab/types/documents/parse.py,sha256=MXe7zh3DusWQhGe0Sr95nPy6cB8DRX8MA4Hmjj_AP7E,1300
28
+ retab/types/documents/split.py,sha256=xRdJ6IpSRAPi_ZtAG2FNqg5A-v5tzfb1QQkW5UfO2pY,1246
29
29
  retab/types/extractions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
30
  retab/types/extractions/types.py,sha256=mnCYSfJoEKsXN2eG-PrahnnQyR6RDjP5VO9sHC1Opmg,102
31
31
  retab/types/projects/__init__.py,sha256=I7P_dems5_LOLgYQ-4Bzt9B6P6jRlQwP-D_9GxRDhVk,155
32
32
  retab/types/projects/metrics.py,sha256=J8aZdVbqlszfxosAZyTB7l6lp9WgdL5QgLMlLrckN7k,1946
33
- retab/types/projects/model.py,sha256=ACvqLQ850t6-mxl_s8fU87Oto6pLCW4ABN4zLe6Bog8,4592
33
+ retab/types/projects/model.py,sha256=keuCzlqUR_WPHW5Grl68fG4VxjTFmNDXQk0MlZ-vtXs,4593
34
34
  retab/types/projects/predictions.py,sha256=GeQX-nCmbzKL50aaSFlSyn0sgnbzKserPK6DSQT8ACk,1114
35
35
  retab/types/schemas/__init__.py,sha256=9ODWiC_4pUVKxoIKglYZjvRjRyd1ZCVxG8GBdQgHNbU,57
36
36
  retab/types/schemas/chat.py,sha256=ppTidxsNslTKE5aBva04i9IxeARMqYpXYLjtR7V6pBc,21219
@@ -44,7 +44,7 @@ retab/utils/hashing.py,sha256=_BMVUvftOcJav68QL0rLkH2dbhW9RRJPzeGC2akR0fc,757
44
44
  retab/utils/json_schema.py,sha256=F3MLNGskpfPh1IkXHPLp60ceOEFD79GyL8mVvr0OiVM,19583
45
45
  retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
46
46
  retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
47
- retab-0.0.77.dist-info/METADATA,sha256=F2-lc5_Am2m8rqSaVLrlsp0Uwdhe1pLZmiwcplBM9KA,4532
48
- retab-0.0.77.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
49
- retab-0.0.77.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
50
- retab-0.0.77.dist-info/RECORD,,
47
+ retab-0.0.79.dist-info/METADATA,sha256=GAgtfkDV8Zu0Bc4dBl7vL87xLutKpGUqpwCY3RxGFP0,4532
48
+ retab-0.0.79.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
49
+ retab-0.0.79.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
50
+ retab-0.0.79.dist-info/RECORD,,
File without changes