retab 0.0.83__py3-none-any.whl → 0.0.85__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ from .client import Agent, AsyncAgent
2
+
3
+ __all__ = ["Agent", "AsyncAgent"]
4
+
@@ -0,0 +1,157 @@
1
+ """
2
+ Agent Edit SDK client - Wrapper for agent-based document editing functionality.
3
+ """
4
+
5
+ from io import IOBase
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import PIL.Image
10
+ from pydantic import HttpUrl
11
+
12
+ from ...._resource import AsyncAPIResource, SyncAPIResource
13
+ from ....utils.mime import prepare_mime_document
14
+ from ....types.documents.edit import (
15
+ EditRequest,
16
+ EditResponse,
17
+ )
18
+ from ....types.mime import MIMEData
19
+ from ....types.standards import PreparedRequest, FieldUnset
20
+
21
+
22
+ class BaseAgentMixin:
23
+ """Shared methods for preparing agent edit API requests."""
24
+
25
+ def _prepare_fill(
26
+ self,
27
+ instructions: str,
28
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
29
+ model: str = FieldUnset,
30
+ **extra_body: Any,
31
+ ) -> PreparedRequest:
32
+ request_dict: dict[str, Any] = {
33
+ "instructions": instructions,
34
+ }
35
+
36
+ if document is not None:
37
+ mime_document = prepare_mime_document(document)
38
+ request_dict["document"] = mime_document
39
+
40
+ if model is not FieldUnset:
41
+ request_dict["model"] = model
42
+
43
+ # Merge any extra fields provided by the caller
44
+ if extra_body:
45
+ request_dict.update(extra_body)
46
+
47
+ edit_request = EditRequest(**request_dict)
48
+ return PreparedRequest(
49
+ method="POST",
50
+ url="/v1/edit/agent/fill",
51
+ data=edit_request.model_dump(mode="json", exclude_unset=True),
52
+ )
53
+
54
+
55
+ class Agent(SyncAPIResource, BaseAgentMixin):
56
+ """Agent Edit API wrapper for synchronous usage."""
57
+
58
+ def __init__(self, client: Any) -> None:
59
+ super().__init__(client=client)
60
+
61
+ def fill(
62
+ self,
63
+ instructions: str,
64
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
65
+ model: str = FieldUnset,
66
+ **extra_body: Any,
67
+ ) -> EditResponse:
68
+ """
69
+ Edit a document by inferring form fields and filling them with provided instructions.
70
+
71
+ This method performs:
72
+ 1. Detection to identify form field bounding boxes
73
+ 2. LLM inference to name and describe detected fields
74
+ 3. LLM-based form filling using the provided instructions
75
+ 4. Returns the filled document with form field values populated
76
+
77
+ Args:
78
+ instructions: Instructions describing how to fill the form fields.
79
+ document: The document to edit. Can be a file path (Path or str), file-like object,
80
+ MIMEData, PIL Image, or URL.
81
+ model: The LLM model to use for inference. Defaults to "retab-small".
82
+
83
+ Returns:
84
+ EditResponse: Response containing:
85
+ - form_data: List of form fields with filled values
86
+ - filled_document: Document with filled form values (MIMEData)
87
+
88
+ Raises:
89
+ HTTPException: If the request fails.
90
+
91
+ Supported document formats:
92
+ - PDF: Native form field detection and filling
93
+ - DOCX/DOC: Native editing to preserve styles and formatting
94
+ - PPTX/PPT: Native editing for presentations
95
+ - XLSX/XLS: Native editing for spreadsheets
96
+ """
97
+ request = self._prepare_fill(
98
+ instructions=instructions,
99
+ document=document,
100
+ model=model,
101
+ **extra_body,
102
+ )
103
+ response = self._client._prepared_request(request)
104
+ return EditResponse.model_validate(response)
105
+
106
+
107
+ class AsyncAgent(AsyncAPIResource, BaseAgentMixin):
108
+ """Agent Edit API wrapper for asynchronous usage."""
109
+
110
+ def __init__(self, client: Any) -> None:
111
+ super().__init__(client=client)
112
+
113
+ async def fill(
114
+ self,
115
+ instructions: str,
116
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
117
+ model: str = FieldUnset,
118
+ **extra_body: Any,
119
+ ) -> EditResponse:
120
+ """
121
+ Edit a document by inferring form fields and filling them with provided instructions asynchronously.
122
+
123
+ This method performs:
124
+ 1. Detection to identify form field bounding boxes
125
+ 2. LLM inference to name and describe detected fields
126
+ 3. LLM-based form filling using the provided instructions
127
+ 4. Returns the filled document with form field values populated
128
+
129
+ Args:
130
+ instructions: Instructions describing how to fill the form fields.
131
+ document: The document to edit. Can be a file path (Path or str), file-like object,
132
+ MIMEData, PIL Image, or URL.
133
+ model: The LLM model to use for inference. Defaults to "retab-small".
134
+
135
+ Returns:
136
+ EditResponse: Response containing:
137
+ - form_data: List of form fields with filled values
138
+ - filled_document: Document with filled form values (MIMEData)
139
+
140
+ Raises:
141
+ HTTPException: If the request fails.
142
+
143
+ Supported document formats:
144
+ - PDF: Native form field detection and filling
145
+ - DOCX/DOC: Native editing to preserve styles and formatting
146
+ - PPTX/PPT: Native editing for presentations
147
+ - XLSX/XLS: Native editing for spreadsheets
148
+ """
149
+ request = self._prepare_fill(
150
+ instructions=instructions,
151
+ document=document,
152
+ model=model,
153
+ **extra_body,
154
+ )
155
+ response = await self._client._prepared_request(request)
156
+ return EditResponse.model_validate(response)
157
+
@@ -1,176 +1,41 @@
1
1
  """
2
2
  Edit SDK client - Wrapper for document editing functionality.
3
+
4
+ Provides access to:
5
+ - edit.agent.fill() - Agent-based document editing (PDF, DOCX, PPTX, XLSX)
6
+ - edit.templates.* - Template-based PDF form filling
3
7
  """
4
8
 
5
- from io import IOBase
6
- from pathlib import Path
7
9
  from typing import Any
8
10
 
9
- import PIL.Image
10
- from pydantic import HttpUrl
11
-
12
11
  from ..._resource import AsyncAPIResource, SyncAPIResource
13
- from ...utils.mime import prepare_mime_document
14
- from ...types.documents.edit import (
15
- EditRequest,
16
- EditResponse,
17
- )
18
- from ...types.mime import MIMEData
19
- from ...types.standards import PreparedRequest, FieldUnset
20
12
  from .templates import Templates, AsyncTemplates
13
+ from .agent import Agent, AsyncAgent
21
14
 
22
15
 
23
- class BaseEditMixin:
24
- """Shared methods for preparing edit API requests."""
25
-
26
- def _prepare_fill_document(
27
- self,
28
- instructions: str,
29
- document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
30
- model: str = FieldUnset,
31
- template_id: str | None = FieldUnset,
32
- **extra_body: Any,
33
- ) -> PreparedRequest:
34
- request_dict: dict[str, Any] = {
35
- "instructions": instructions,
36
- }
37
-
38
- if document is not None:
39
- mime_document = prepare_mime_document(document)
40
- request_dict["document"] = mime_document
41
-
42
- if model is not FieldUnset:
43
- request_dict["model"] = model
44
- if template_id is not FieldUnset:
45
- request_dict["template_id"] = template_id
46
-
47
- # Merge any extra fields provided by the caller
48
- if extra_body:
49
- request_dict.update(extra_body)
50
-
51
- edit_request = EditRequest(**request_dict)
52
- return PreparedRequest(
53
- method="POST",
54
- url="/v1/edit/fill-document",
55
- data=edit_request.model_dump(mode="json", exclude_unset=True),
56
- )
57
-
58
-
59
- class Edit(SyncAPIResource, BaseEditMixin):
60
- """Edit API wrapper for synchronous usage."""
16
+ class Edit(SyncAPIResource):
17
+ """Edit API wrapper for synchronous usage.
18
+
19
+ Sub-clients:
20
+ agent: Agent-based document editing (fill any document with AI)
21
+ templates: Template-based PDF form filling (for batch processing)
22
+ """
61
23
 
62
24
  def __init__(self, client: Any) -> None:
63
25
  super().__init__(client=client)
26
+ self.agent = Agent(client=client)
64
27
  self.templates = Templates(client=client)
65
28
 
66
- def fill_document(
67
- self,
68
- instructions: str,
69
- document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
70
- model: str = FieldUnset,
71
- template_id: str | None = FieldUnset,
72
- **extra_body: Any,
73
- ) -> EditResponse:
74
- """
75
- Edit a document by inferring form fields and filling them with provided instructions.
76
-
77
- This method performs:
78
- 1. Detection to identify form field bounding boxes
79
- 2. LLM inference to name and describe detected fields
80
- 3. LLM-based form filling using the provided instructions
81
- 4. Returns the filled document with form field values populated
82
-
83
- Either `document` OR `template_id` must be provided, but not both.
84
-
85
- Args:
86
- instructions: Instructions describing how to fill the form fields.
87
- document: The document to edit. Can be a file path (Path or str), file-like object,
88
- MIMEData, PIL Image, or URL. Mutually exclusive with template_id.
89
- model: The LLM model to use for inference. Defaults to "retab-small".
90
- template_id: Template ID to use for filling. When provided, uses the template's
91
- pre-defined form fields and empty PDF. Only works for PDF documents.
92
- Mutually exclusive with document.
93
29
 
94
- Returns:
95
- EditResponse: Response containing:
96
- - form_data: List of form fields with filled values
97
- - filled_document: Document with filled form values (MIMEData)
98
-
99
- Raises:
100
- HTTPException: If the request fails.
101
-
102
- Supported document formats:
103
- - PDF: Native form field detection and filling
104
- - DOCX/DOC: Native editing to preserve styles and formatting
105
- - PPTX/PPT: Native editing for presentations
106
- - XLSX/XLS: Native editing for spreadsheets
107
- """
108
- request = self._prepare_fill_document(
109
- instructions=instructions,
110
- document=document,
111
- model=model,
112
- template_id=template_id,
113
- **extra_body,
114
- )
115
- response = self._client._prepared_request(request)
116
- return EditResponse.model_validate(response)
117
-
118
-
119
- class AsyncEdit(AsyncAPIResource, BaseEditMixin):
120
- """Edit API wrapper for asynchronous usage."""
30
+ class AsyncEdit(AsyncAPIResource):
31
+ """Edit API wrapper for asynchronous usage.
32
+
33
+ Sub-clients:
34
+ agent: Agent-based document editing (fill any document with AI)
35
+ templates: Template-based PDF form filling (for batch processing)
36
+ """
121
37
 
122
38
  def __init__(self, client: Any) -> None:
123
39
  super().__init__(client=client)
40
+ self.agent = AsyncAgent(client=client)
124
41
  self.templates = AsyncTemplates(client=client)
125
-
126
- async def fill_document(
127
- self,
128
- instructions: str,
129
- document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
130
- model: str = FieldUnset,
131
- template_id: str | None = FieldUnset,
132
- **extra_body: Any,
133
- ) -> EditResponse:
134
- """
135
- Edit a document by inferring form fields and filling them with provided instructions asynchronously.
136
-
137
- This method performs:
138
- 1. Detection to identify form field bounding boxes
139
- 2. LLM inference to name and describe detected fields
140
- 3. LLM-based form filling using the provided instructions
141
- 4. Returns the filled document with form field values populated
142
-
143
- Either `document` OR `template_id` must be provided, but not both.
144
-
145
- Args:
146
- instructions: Instructions describing how to fill the form fields.
147
- document: The document to edit. Can be a file path (Path or str), file-like object,
148
- MIMEData, PIL Image, or URL. Mutually exclusive with template_id.
149
- model: The LLM model to use for inference. Defaults to "retab-small".
150
- template_id: Template ID to use for filling. When provided, uses the template's
151
- pre-defined form fields and empty PDF. Only works for PDF documents.
152
- Mutually exclusive with document.
153
-
154
- Returns:
155
- EditResponse: Response containing:
156
- - form_data: List of form fields with filled values
157
- - filled_document: Document with filled form values (MIMEData)
158
-
159
- Raises:
160
- HTTPException: If the request fails.
161
-
162
- Supported document formats:
163
- - PDF: Native form field detection and filling
164
- - DOCX/DOC: Native editing to preserve styles and formatting
165
- - PPTX/PPT: Native editing for presentations
166
- - XLSX/XLS: Native editing for spreadsheets
167
- """
168
- request = self._prepare_fill_document(
169
- instructions=instructions,
170
- document=document,
171
- model=model,
172
- template_id=template_id,
173
- **extra_body,
174
- )
175
- response = await self._client._prepared_request(request)
176
- return EditResponse.model_validate(response)
@@ -107,9 +107,8 @@ class OCRResult(BaseModel):
107
107
  class InferFormSchemaRequest(BaseModel):
108
108
  """Request to infer form schema from a PDF or DOCX document."""
109
109
 
110
- document: MIMEData = Field(..., description="Input document (PDF or DOCX). DOCX files will be converted to PDF.")
110
+ document: MIMEData = Field(..., description="Input document (PDF, DOCX, XLSX or PPTX).")
111
111
  model: str = Field(default="retab-small", description="LLM model to use for inference")
112
- instructions: Optional[str] = Field(default=None, description="Optional instructions to guide form field detection (e.g., which fields to focus on, specific areas to look for)")
113
112
 
114
113
 
115
114
  class EditRequest(BaseModel):
@@ -119,7 +118,7 @@ class EditRequest(BaseModel):
119
118
  - When `document` is provided: OCR + LLM inference to detect and fill form fields
120
119
  - When `template_id` is provided: Uses pre-defined form fields from the template (PDF only)
121
120
  """
122
- document: Optional[MIMEData] = Field(default=None, description="Input document (PDF or DOCX). DOCX files will be converted to PDF. Mutually exclusive with template_id.")
121
+ document: Optional[MIMEData] = Field(default=None, description="Input document (PDF, DOCX, XLSX or PPTX). Mutually exclusive with template_id.")
123
122
  model: str = Field(default="retab-small", description="LLM model to use for inference")
124
123
  instructions: str = Field(..., description="Instructions to fill the form")
125
124
  template_id: Optional[str] = Field(default=None, description="Template ID to use for filling. When provided, uses the template's pre-defined form fields and empty PDF. Only works for PDF documents. Mutually exclusive with document.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: retab
3
- Version: 0.0.83
3
+ Version: 0.0.85
4
4
  Summary: Retab official python library
5
5
  Home-page: https://github.com/retab-dev/retab
6
6
  Author: Retab
@@ -9,7 +9,9 @@ retab/resources/schemas.py,sha256=rZ6OzfmoYv-mGaRVzvXjO09dD-KxP74mZhOO8sMgcDQ,46
9
9
  retab/resources/documents/__init__.py,sha256=OjXmngFN0RKqO4SI-mJBNzr6Ex6rMxfq0DxaqzP0RQs,89
10
10
  retab/resources/documents/client.py,sha256=0ZOJojT4M9QZ53nheS_vuNZWcnmwTnKx3YqYyJ7_sGY,48912
11
11
  retab/resources/edit/__init__.py,sha256=yycIstpTSKsz2qXbrY3Buzd35UDcPWvb5hw6Eb2rLow,69
12
- retab/resources/edit/client.py,sha256=osWvuKj2SNH6-nQKsWcTYcm3jVENGlwGTvDnT45nDBY,6649
12
+ retab/resources/edit/client.py,sha256=DJKlwh8xui7IDRjwPmiGKTC1_HshXLYXX-xr93FhSbo,1270
13
+ retab/resources/edit/agent/__init__.py,sha256=i5IdOMhwOOQmnhPFeBbh7-ChqwQh5q7oLow1zJ0ZAwM,74
14
+ retab/resources/edit/agent/client.py,sha256=BjVKjooWz-ZGRXwi0rcV7D_XW9iSPK0PzjzRt2gYTzI,5506
13
15
  retab/resources/edit/templates/__init__.py,sha256=n-zA_HXo7iGgeIclSwcsxmSueXJIRMo0iZjk_sax85I,90
14
16
  retab/resources/edit/templates/client.py,sha256=Eevzy5JaQmG5-hEshugQvrhgIBAjgZ8ZYZkpBSKEdBQ,19729
15
17
  retab/resources/extractions/__init__.py,sha256=2H1ezUG8hI5SmTRy6NFzXdYLOdGFFsFrI60uzkitV20,97
@@ -29,7 +31,7 @@ retab/types/documents/__init__.py,sha256=t1jXdpYqi-zQMC_9uM0m7eA1hRU0MCROwUx89cc
29
31
  retab/types/documents/classify.py,sha256=Tb6d_7kuTlWLr7bPn782dHrjtUVBCvXV3o9zm7j2lmE,1128
30
32
  retab/types/documents/correct_orientation.py,sha256=e-ivsslI6L6Gl0YkcslXw_DH620xMGEYVp4tdeviXeM,261
31
33
  retab/types/documents/create_messages.py,sha256=Uym0SnVUGkyt1C5AOD37BsZ3puyeu_igR6X9SboojfA,7267
32
- retab/types/documents/edit.py,sha256=4VK9ed1CF179r8sU4pZXcJhKftorGhul9q-5BlM1Ik4,5606
34
+ retab/types/documents/edit.py,sha256=QogPSQF7jDbDmwiPJeRAYTy6HxgKp-7hMMFtAqIHnY0,5374
33
35
  retab/types/documents/extract.py,sha256=x_59fm69-icsxxGRgpFd0NN-SLRoMYqbvfCZuG7zyGc,18033
34
36
  retab/types/documents/parse.py,sha256=MXe7zh3DusWQhGe0Sr95nPy6cB8DRX8MA4Hmjj_AP7E,1300
35
37
  retab/types/documents/split.py,sha256=xRdJ6IpSRAPi_ZtAG2FNqg5A-v5tzfb1QQkW5UfO2pY,1246
@@ -55,7 +57,7 @@ retab/utils/hashing.py,sha256=_BMVUvftOcJav68QL0rLkH2dbhW9RRJPzeGC2akR0fc,757
55
57
  retab/utils/json_schema.py,sha256=zP4pQLpVHBKWo_abCjb_dU4kA0azhHopd-1TFUgVEvc,20655
56
58
  retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
57
59
  retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
58
- retab-0.0.83.dist-info/METADATA,sha256=2fC7uK_AP2G2o6m0-PDITV3A12TB-UMETQ-V51WwxB0,4532
59
- retab-0.0.83.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
- retab-0.0.83.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
61
- retab-0.0.83.dist-info/RECORD,,
60
+ retab-0.0.85.dist-info/METADATA,sha256=0IXHFvCerJlHt1VPw6YNMhO3YU-1w-YP56i4OclgwgA,4532
61
+ retab-0.0.85.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
62
+ retab-0.0.85.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
63
+ retab-0.0.85.dist-info/RECORD,,
File without changes