retab 0.0.84__py3-none-any.whl → 0.0.86__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ from .client import Agent, AsyncAgent
2
+
3
+ __all__ = ["Agent", "AsyncAgent"]
4
+
@@ -0,0 +1,168 @@
1
+ """
2
+ Agent Edit SDK client - Wrapper for agent-based document editing functionality.
3
+ """
4
+
5
+ from io import IOBase
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import PIL.Image
10
+ from pydantic import HttpUrl
11
+
12
+ from ...._resource import AsyncAPIResource, SyncAPIResource
13
+ from ....utils.mime import prepare_mime_document
14
+ from ....types.documents.edit import (
15
+ EditConfig,
16
+ EditRequest,
17
+ EditResponse,
18
+ )
19
+ from ....types.mime import MIMEData
20
+ from ....types.standards import PreparedRequest, FieldUnset
21
+
22
+
23
+ class BaseAgentMixin:
24
+ """Shared methods for preparing agent edit API requests."""
25
+
26
+ def _prepare_fill(
27
+ self,
28
+ instructions: str,
29
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
30
+ model: str = FieldUnset,
31
+ color: str = FieldUnset,
32
+ **extra_body: Any,
33
+ ) -> PreparedRequest:
34
+ request_dict: dict[str, Any] = {
35
+ "instructions": instructions,
36
+ }
37
+
38
+ if document is not None:
39
+ mime_document = prepare_mime_document(document)
40
+ request_dict["document"] = mime_document
41
+
42
+ if model is not FieldUnset:
43
+ request_dict["model"] = model
44
+
45
+ if color is not FieldUnset:
46
+ request_dict["config"] = EditConfig(color=color)
47
+
48
+ # Merge any extra fields provided by the caller
49
+ if extra_body:
50
+ request_dict.update(extra_body)
51
+
52
+ edit_request = EditRequest(**request_dict)
53
+ return PreparedRequest(
54
+ method="POST",
55
+ url="/v1/edit/agent/fill",
56
+ data=edit_request.model_dump(mode="json", exclude_unset=True),
57
+ )
58
+
59
+
60
+ class Agent(SyncAPIResource, BaseAgentMixin):
61
+ """Agent Edit API wrapper for synchronous usage."""
62
+
63
+ def __init__(self, client: Any) -> None:
64
+ super().__init__(client=client)
65
+
66
+ def fill(
67
+ self,
68
+ instructions: str,
69
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
70
+ model: str = FieldUnset,
71
+ color: str = FieldUnset,
72
+ **extra_body: Any,
73
+ ) -> EditResponse:
74
+ """
75
+ Edit a document by inferring form fields and filling them with provided instructions.
76
+
77
+ This method performs:
78
+ 1. Detection to identify form field bounding boxes
79
+ 2. LLM inference to name and describe detected fields
80
+ 3. LLM-based form filling using the provided instructions
81
+ 4. Returns the filled document with form field values populated
82
+
83
+ Args:
84
+ instructions: Instructions describing how to fill the form fields.
85
+ document: The document to edit. Can be a file path (Path or str), file-like object,
86
+ MIMEData, PIL Image, or URL.
87
+ model: The LLM model to use for inference. Defaults to "retab-small".
88
+ color: Hex color code for filled text (e.g. "#000080"). Defaults to dark blue.
89
+
90
+ Returns:
91
+ EditResponse: Response containing:
92
+ - form_data: List of form fields with filled values
93
+ - filled_document: Document with filled form values (MIMEData)
94
+
95
+ Raises:
96
+ HTTPException: If the request fails.
97
+
98
+ Supported document formats:
99
+ - PDF: Native form field detection and filling
100
+ - DOCX/DOC: Native editing to preserve styles and formatting
101
+ - PPTX/PPT: Native editing for presentations
102
+ - XLSX/XLS: Native editing for spreadsheets
103
+ """
104
+ request = self._prepare_fill(
105
+ instructions=instructions,
106
+ document=document,
107
+ model=model,
108
+ color=color,
109
+ **extra_body,
110
+ )
111
+ response = self._client._prepared_request(request)
112
+ return EditResponse.model_validate(response)
113
+
114
+
115
+ class AsyncAgent(AsyncAPIResource, BaseAgentMixin):
116
+ """Agent Edit API wrapper for asynchronous usage."""
117
+
118
+ def __init__(self, client: Any) -> None:
119
+ super().__init__(client=client)
120
+
121
+ async def fill(
122
+ self,
123
+ instructions: str,
124
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
125
+ model: str = FieldUnset,
126
+ color: str = FieldUnset,
127
+ **extra_body: Any,
128
+ ) -> EditResponse:
129
+ """
130
+ Edit a document by inferring form fields and filling them with provided instructions asynchronously.
131
+
132
+ This method performs:
133
+ 1. Detection to identify form field bounding boxes
134
+ 2. LLM inference to name and describe detected fields
135
+ 3. LLM-based form filling using the provided instructions
136
+ 4. Returns the filled document with form field values populated
137
+
138
+ Args:
139
+ instructions: Instructions describing how to fill the form fields.
140
+ document: The document to edit. Can be a file path (Path or str), file-like object,
141
+ MIMEData, PIL Image, or URL.
142
+ model: The LLM model to use for inference. Defaults to "retab-small".
143
+ color: Hex color code for filled text (e.g. "#000080"). Defaults to dark blue.
144
+
145
+ Returns:
146
+ EditResponse: Response containing:
147
+ - form_data: List of form fields with filled values
148
+ - filled_document: Document with filled form values (MIMEData)
149
+
150
+ Raises:
151
+ HTTPException: If the request fails.
152
+
153
+ Supported document formats:
154
+ - PDF: Native form field detection and filling
155
+ - DOCX/DOC: Native editing to preserve styles and formatting
156
+ - PPTX/PPT: Native editing for presentations
157
+ - XLSX/XLS: Native editing for spreadsheets
158
+ """
159
+ request = self._prepare_fill(
160
+ instructions=instructions,
161
+ document=document,
162
+ model=model,
163
+ color=color,
164
+ **extra_body,
165
+ )
166
+ response = await self._client._prepared_request(request)
167
+ return EditResponse.model_validate(response)
168
+
@@ -1,176 +1,41 @@
1
1
  """
2
2
  Edit SDK client - Wrapper for document editing functionality.
3
+
4
+ Provides access to:
5
+ - edit.agent.fill() - Agent-based document editing (PDF, DOCX, PPTX, XLSX)
6
+ - edit.templates.* - Template-based PDF form filling
3
7
  """
4
8
 
5
- from io import IOBase
6
- from pathlib import Path
7
9
  from typing import Any
8
10
 
9
- import PIL.Image
10
- from pydantic import HttpUrl
11
-
12
11
  from ..._resource import AsyncAPIResource, SyncAPIResource
13
- from ...utils.mime import prepare_mime_document
14
- from ...types.documents.edit import (
15
- EditRequest,
16
- EditResponse,
17
- )
18
- from ...types.mime import MIMEData
19
- from ...types.standards import PreparedRequest, FieldUnset
20
12
  from .templates import Templates, AsyncTemplates
13
+ from .agent import Agent, AsyncAgent
21
14
 
22
15
 
23
- class BaseEditMixin:
24
- """Shared methods for preparing edit API requests."""
25
-
26
- def _prepare_fill_document(
27
- self,
28
- instructions: str,
29
- document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
30
- model: str = FieldUnset,
31
- template_id: str | None = FieldUnset,
32
- **extra_body: Any,
33
- ) -> PreparedRequest:
34
- request_dict: dict[str, Any] = {
35
- "instructions": instructions,
36
- }
37
-
38
- if document is not None:
39
- mime_document = prepare_mime_document(document)
40
- request_dict["document"] = mime_document
41
-
42
- if model is not FieldUnset:
43
- request_dict["model"] = model
44
- if template_id is not FieldUnset:
45
- request_dict["template_id"] = template_id
46
-
47
- # Merge any extra fields provided by the caller
48
- if extra_body:
49
- request_dict.update(extra_body)
50
-
51
- edit_request = EditRequest(**request_dict)
52
- return PreparedRequest(
53
- method="POST",
54
- url="/v1/edit/fill-document",
55
- data=edit_request.model_dump(mode="json", exclude_unset=True),
56
- )
57
-
58
-
59
- class Edit(SyncAPIResource, BaseEditMixin):
60
- """Edit API wrapper for synchronous usage."""
16
+ class Edit(SyncAPIResource):
17
+ """Edit API wrapper for synchronous usage.
18
+
19
+ Sub-clients:
20
+ agent: Agent-based document editing (fill any document with AI)
21
+ templates: Template-based PDF form filling (for batch processing)
22
+ """
61
23
 
62
24
  def __init__(self, client: Any) -> None:
63
25
  super().__init__(client=client)
26
+ self.agent = Agent(client=client)
64
27
  self.templates = Templates(client=client)
65
28
 
66
- def fill_document(
67
- self,
68
- instructions: str,
69
- document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
70
- model: str = FieldUnset,
71
- template_id: str | None = FieldUnset,
72
- **extra_body: Any,
73
- ) -> EditResponse:
74
- """
75
- Edit a document by inferring form fields and filling them with provided instructions.
76
-
77
- This method performs:
78
- 1. Detection to identify form field bounding boxes
79
- 2. LLM inference to name and describe detected fields
80
- 3. LLM-based form filling using the provided instructions
81
- 4. Returns the filled document with form field values populated
82
-
83
- Either `document` OR `template_id` must be provided, but not both.
84
-
85
- Args:
86
- instructions: Instructions describing how to fill the form fields.
87
- document: The document to edit. Can be a file path (Path or str), file-like object,
88
- MIMEData, PIL Image, or URL. Mutually exclusive with template_id.
89
- model: The LLM model to use for inference. Defaults to "retab-small".
90
- template_id: Template ID to use for filling. When provided, uses the template's
91
- pre-defined form fields and empty PDF. Only works for PDF documents.
92
- Mutually exclusive with document.
93
29
 
94
- Returns:
95
- EditResponse: Response containing:
96
- - form_data: List of form fields with filled values
97
- - filled_document: Document with filled form values (MIMEData)
98
-
99
- Raises:
100
- HTTPException: If the request fails.
101
-
102
- Supported document formats:
103
- - PDF: Native form field detection and filling
104
- - DOCX/DOC: Native editing to preserve styles and formatting
105
- - PPTX/PPT: Native editing for presentations
106
- - XLSX/XLS: Native editing for spreadsheets
107
- """
108
- request = self._prepare_fill_document(
109
- instructions=instructions,
110
- document=document,
111
- model=model,
112
- template_id=template_id,
113
- **extra_body,
114
- )
115
- response = self._client._prepared_request(request)
116
- return EditResponse.model_validate(response)
117
-
118
-
119
- class AsyncEdit(AsyncAPIResource, BaseEditMixin):
120
- """Edit API wrapper for asynchronous usage."""
30
+ class AsyncEdit(AsyncAPIResource):
31
+ """Edit API wrapper for asynchronous usage.
32
+
33
+ Sub-clients:
34
+ agent: Agent-based document editing (fill any document with AI)
35
+ templates: Template-based PDF form filling (for batch processing)
36
+ """
121
37
 
122
38
  def __init__(self, client: Any) -> None:
123
39
  super().__init__(client=client)
40
+ self.agent = AsyncAgent(client=client)
124
41
  self.templates = AsyncTemplates(client=client)
125
-
126
- async def fill_document(
127
- self,
128
- instructions: str,
129
- document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
130
- model: str = FieldUnset,
131
- template_id: str | None = FieldUnset,
132
- **extra_body: Any,
133
- ) -> EditResponse:
134
- """
135
- Edit a document by inferring form fields and filling them with provided instructions asynchronously.
136
-
137
- This method performs:
138
- 1. Detection to identify form field bounding boxes
139
- 2. LLM inference to name and describe detected fields
140
- 3. LLM-based form filling using the provided instructions
141
- 4. Returns the filled document with form field values populated
142
-
143
- Either `document` OR `template_id` must be provided, but not both.
144
-
145
- Args:
146
- instructions: Instructions describing how to fill the form fields.
147
- document: The document to edit. Can be a file path (Path or str), file-like object,
148
- MIMEData, PIL Image, or URL. Mutually exclusive with template_id.
149
- model: The LLM model to use for inference. Defaults to "retab-small".
150
- template_id: Template ID to use for filling. When provided, uses the template's
151
- pre-defined form fields and empty PDF. Only works for PDF documents.
152
- Mutually exclusive with document.
153
-
154
- Returns:
155
- EditResponse: Response containing:
156
- - form_data: List of form fields with filled values
157
- - filled_document: Document with filled form values (MIMEData)
158
-
159
- Raises:
160
- HTTPException: If the request fails.
161
-
162
- Supported document formats:
163
- - PDF: Native form field detection and filling
164
- - DOCX/DOC: Native editing to preserve styles and formatting
165
- - PPTX/PPT: Native editing for presentations
166
- - XLSX/XLS: Native editing for spreadsheets
167
- """
168
- request = self._prepare_fill_document(
169
- instructions=instructions,
170
- document=document,
171
- model=model,
172
- template_id=template_id,
173
- **extra_body,
174
- )
175
- response = await self._client._prepared_request(request)
176
- return EditResponse.model_validate(response)
@@ -12,6 +12,7 @@ from pydantic import HttpUrl
12
12
  from ...._resource import AsyncAPIResource, SyncAPIResource
13
13
  from ....utils.mime import prepare_mime_document
14
14
  from ....types.documents.edit import (
15
+ EditConfig,
15
16
  FormField,
16
17
  InferFormSchemaRequest,
17
18
  InferFormSchemaResponse,
@@ -163,6 +164,7 @@ class BaseTemplatesMixin:
163
164
  template_id: str,
164
165
  instructions: str,
165
166
  model: str = FieldUnset,
167
+ color: str = FieldUnset,
166
168
  **extra_body: Any,
167
169
  ) -> PreparedRequest:
168
170
  request_dict: dict[str, Any] = {
@@ -172,6 +174,8 @@ class BaseTemplatesMixin:
172
174
 
173
175
  if model is not FieldUnset:
174
176
  request_dict["model"] = model
177
+ if color is not FieldUnset:
178
+ request_dict["config"] = EditConfig(color=color)
175
179
  if extra_body:
176
180
  request_dict.update(extra_body)
177
181
 
@@ -368,6 +372,7 @@ class Templates(SyncAPIResource, BaseTemplatesMixin):
368
372
  template_id: str,
369
373
  instructions: str,
370
374
  model: str = FieldUnset,
375
+ color: str = FieldUnset,
371
376
  **extra_body: Any,
372
377
  ) -> EditResponse:
373
378
  """
@@ -380,6 +385,7 @@ class Templates(SyncAPIResource, BaseTemplatesMixin):
380
385
  template_id: The template ID to use for filling
381
386
  instructions: Instructions describing how to fill the form fields
382
387
  model: The LLM model to use for inference (default: "retab-small")
388
+ color: Hex color code for filled text (e.g. "#000080"). Defaults to dark blue.
383
389
 
384
390
  Returns:
385
391
  EditResponse: Response containing:
@@ -395,6 +401,7 @@ class Templates(SyncAPIResource, BaseTemplatesMixin):
395
401
  template_id=template_id,
396
402
  instructions=instructions,
397
403
  model=model,
404
+ color=color,
398
405
  **extra_body,
399
406
  )
400
407
  response = self._client._prepared_request(request)
@@ -586,6 +593,7 @@ class AsyncTemplates(AsyncAPIResource, BaseTemplatesMixin):
586
593
  template_id: str,
587
594
  instructions: str,
588
595
  model: str = FieldUnset,
596
+ color: str = FieldUnset,
589
597
  **extra_body: Any,
590
598
  ) -> EditResponse:
591
599
  """
@@ -598,6 +606,7 @@ class AsyncTemplates(AsyncAPIResource, BaseTemplatesMixin):
598
606
  template_id: The template ID to use for filling
599
607
  instructions: Instructions describing how to fill the form fields
600
608
  model: The LLM model to use for inference (default: "retab-small")
609
+ color: Hex color code for filled text (e.g. "#000080"). Defaults to dark blue.
601
610
 
602
611
  Returns:
603
612
  EditResponse: Response containing:
@@ -613,6 +622,7 @@ class AsyncTemplates(AsyncAPIResource, BaseTemplatesMixin):
613
622
  template_id=template_id,
614
623
  instructions=instructions,
615
624
  model=model,
625
+ color=color,
616
626
  **extra_body,
617
627
  )
618
628
  response = await self._client._prepared_request(request)
@@ -1,190 +1,28 @@
1
- from io import IOBase
2
- from pathlib import Path
3
- from typing import Any, Dict
4
-
5
- import PIL.Image
6
- from pydantic import HttpUrl
1
+ from typing import Any
7
2
 
8
3
  from ..._resource import AsyncAPIResource, SyncAPIResource
9
- from ...utils.mime import MIMEData, prepare_mime_document
10
- from ...types.standards import PreparedRequest
11
- from ...types.workflows import WorkflowRun
12
-
13
-
14
- # Type alias for document inputs
15
- DocumentInput = Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl
16
-
17
-
18
- class WorkflowsMixin:
19
- """Mixin providing shared methods for workflow operations."""
20
-
21
- def prepare_run(
22
- self,
23
- workflow_id: str,
24
- documents: Dict[str, DocumentInput],
25
- ) -> PreparedRequest:
26
- """Prepare a request to run a workflow with input documents.
27
-
28
- Args:
29
- workflow_id: The ID of the workflow to run
30
- documents: Mapping of start node IDs to their input documents.
31
- Each document can be a file path, bytes, file-like object,
32
- MIMEData, PIL Image, or HttpUrl.
33
-
34
- Returns:
35
- PreparedRequest: The prepared request
36
-
37
- Example:
38
- >>> client.workflows.run(
39
- ... workflow_id="wf_abc123",
40
- ... documents={
41
- ... "start-node-1": Path("invoice.pdf"),
42
- ... "start-node-2": Path("receipt.pdf"),
43
- ... }
44
- ... )
45
- """
46
- # Convert each document to MIMEData and then to the format expected by the backend
47
- documents_payload: Dict[str, Dict[str, Any]] = {}
48
- for node_id, document in documents.items():
49
- mime_data = prepare_mime_document(document)
50
- documents_payload[node_id] = {
51
- "filename": mime_data.filename,
52
- "content": mime_data.content,
53
- "mime_type": mime_data.mime_type,
54
- }
55
-
56
- data = {"documents": documents_payload}
57
- return PreparedRequest(method="POST", url=f"/v1/workflows/{workflow_id}/run", data=data)
58
-
59
- def prepare_get_run(self, run_id: str) -> PreparedRequest:
60
- """Prepare a request to get a workflow run by ID.
61
-
62
- Args:
63
- run_id: The ID of the workflow run to retrieve
64
-
65
- Returns:
66
- PreparedRequest: The prepared request
67
- """
68
- return PreparedRequest(method="GET", url=f"/v1/workflows/runs/{run_id}")
69
-
70
-
71
- class Workflows(SyncAPIResource, WorkflowsMixin):
72
- """Workflows API wrapper for synchronous operations."""
73
-
74
- def __init__(self, *args, **kwargs):
75
- super().__init__(*args, **kwargs)
76
-
77
- def run(
78
- self,
79
- workflow_id: str,
80
- documents: Dict[str, DocumentInput],
81
- ) -> WorkflowRun:
82
- """Run a workflow with the provided input documents.
83
-
84
- This creates a workflow run and starts execution in the background.
85
- The returned WorkflowRun will have status "running" - use get_run()
86
- to check for updates on the run status.
87
-
88
- Args:
89
- workflow_id: The ID of the workflow to run
90
- documents: Mapping of start node IDs to their input documents.
91
- Each document can be a file path, bytes, file-like object,
92
- MIMEData, PIL Image, or HttpUrl.
93
-
94
- Returns:
95
- WorkflowRun: The created workflow run with status "running"
96
-
97
- Raises:
98
- HTTPException: If the request fails (e.g., workflow not found,
99
- missing input documents for start nodes)
100
-
101
- Example:
102
- >>> run = client.workflows.run(
103
- ... workflow_id="wf_abc123",
104
- ... documents={
105
- ... "start-node-1": Path("invoice.pdf"),
106
- ... "start-node-2": Path("receipt.pdf"),
107
- ... }
108
- ... )
109
- >>> print(f"Run started: {run.id}, status: {run.status}")
110
- """
111
- request = self.prepare_run(workflow_id=workflow_id, documents=documents)
112
- response = self._client._prepared_request(request)
113
- return WorkflowRun.model_validate(response)
114
-
115
- def get_run(self, run_id: str) -> WorkflowRun:
116
- """Get a workflow run by ID.
117
-
118
- Args:
119
- run_id: The ID of the workflow run to retrieve
120
-
121
- Returns:
122
- WorkflowRun: The workflow run
123
-
124
- Raises:
125
- HTTPException: If the request fails (e.g., run not found)
126
- """
127
- request = self.prepare_get_run(run_id)
128
- response = self._client._prepared_request(request)
129
- return WorkflowRun.model_validate(response)
130
-
131
-
132
- class AsyncWorkflows(AsyncAPIResource, WorkflowsMixin):
133
- """Workflows API wrapper for asynchronous operations."""
134
-
135
- def __init__(self, *args, **kwargs):
136
- super().__init__(*args, **kwargs)
137
-
138
- async def run(
139
- self,
140
- workflow_id: str,
141
- documents: Dict[str, DocumentInput],
142
- ) -> WorkflowRun:
143
- """Run a workflow with the provided input documents.
144
-
145
- This creates a workflow run and starts execution in the background.
146
- The returned WorkflowRun will have status "running" - use get_run()
147
- to check for updates on the run status.
4
+ from .runs import WorkflowRuns, AsyncWorkflowRuns
148
5
 
149
- Args:
150
- workflow_id: The ID of the workflow to run
151
- documents: Mapping of start node IDs to their input documents.
152
- Each document can be a file path, bytes, file-like object,
153
- MIMEData, PIL Image, or HttpUrl.
154
6
 
155
- Returns:
156
- WorkflowRun: The created workflow run with status "running"
7
+ class Workflows(SyncAPIResource):
8
+ """Workflows API wrapper for synchronous operations.
157
9
 
158
- Raises:
159
- HTTPException: If the request fails (e.g., workflow not found,
160
- missing input documents for start nodes)
10
+ Sub-clients:
11
+ runs: Workflow run operations (create, get)
12
+ """
161
13
 
162
- Example:
163
- >>> run = await client.workflows.run(
164
- ... workflow_id="wf_abc123",
165
- ... documents={
166
- ... "start-node-1": Path("invoice.pdf"),
167
- ... "start-node-2": Path("receipt.pdf"),
168
- ... }
169
- ... )
170
- >>> print(f"Run started: {run.id}, status: {run.status}")
171
- """
172
- request = self.prepare_run(workflow_id=workflow_id, documents=documents)
173
- response = await self._client._prepared_request(request)
174
- return WorkflowRun.model_validate(response)
14
+ def __init__(self, client: Any) -> None:
15
+ super().__init__(client=client)
16
+ self.runs = WorkflowRuns(client=client)
175
17
 
176
- async def get_run(self, run_id: str) -> WorkflowRun:
177
- """Get a workflow run by ID.
178
18
 
179
- Args:
180
- run_id: The ID of the workflow run to retrieve
19
+ class AsyncWorkflows(AsyncAPIResource):
20
+ """Workflows API wrapper for asynchronous operations.
181
21
 
182
- Returns:
183
- WorkflowRun: The workflow run
22
+ Sub-clients:
23
+ runs: Workflow run operations (create, get)
24
+ """
184
25
 
185
- Raises:
186
- HTTPException: If the request fails (e.g., run not found)
187
- """
188
- request = self.prepare_get_run(run_id)
189
- response = await self._client._prepared_request(request)
190
- return WorkflowRun.model_validate(response)
26
+ def __init__(self, client: Any) -> None:
27
+ super().__init__(client=client)
28
+ self.runs = AsyncWorkflowRuns(client=client)
@@ -0,0 +1,3 @@
1
+ from .client import AsyncWorkflowRuns, WorkflowRuns
2
+
3
+ __all__ = ["WorkflowRuns", "AsyncWorkflowRuns"]
@@ -0,0 +1,190 @@
1
+ from io import IOBase
2
+ from pathlib import Path
3
+ from typing import Any, Dict
4
+
5
+ import PIL.Image
6
+ from pydantic import HttpUrl
7
+
8
+ from ...._resource import AsyncAPIResource, SyncAPIResource
9
+ from ....utils.mime import MIMEData, prepare_mime_document
10
+ from ....types.standards import PreparedRequest
11
+ from ....types.workflows import WorkflowRun
12
+
13
+
14
+ # Type alias for document inputs
15
+ DocumentInput = Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl
16
+
17
+
18
+ class WorkflowRunsMixin:
19
+ """Mixin providing shared methods for workflow run operations."""
20
+
21
+ def prepare_create(
22
+ self,
23
+ workflow_id: str,
24
+ documents: Dict[str, DocumentInput],
25
+ ) -> PreparedRequest:
26
+ """Prepare a request to run a workflow with input documents.
27
+
28
+ Args:
29
+ workflow_id: The ID of the workflow to run
30
+ documents: Mapping of start node IDs to their input documents.
31
+ Each document can be a file path, bytes, file-like object,
32
+ MIMEData, PIL Image, or HttpUrl.
33
+
34
+ Returns:
35
+ PreparedRequest: The prepared request
36
+
37
+ Example:
38
+ >>> client.workflows.runs.create(
39
+ ... workflow_id="wf_abc123",
40
+ ... documents={
41
+ ... "start-node-1": Path("invoice.pdf"),
42
+ ... "start-node-2": Path("receipt.pdf"),
43
+ ... }
44
+ ... )
45
+ """
46
+ # Convert each document to MIMEData and then to the format expected by the backend
47
+ documents_payload: Dict[str, Dict[str, Any]] = {}
48
+ for node_id, document in documents.items():
49
+ mime_data = prepare_mime_document(document)
50
+ documents_payload[node_id] = {
51
+ "filename": mime_data.filename,
52
+ "content": mime_data.content,
53
+ "mime_type": mime_data.mime_type,
54
+ }
55
+
56
+ data = {"documents": documents_payload}
57
+ return PreparedRequest(method="POST", url=f"/v1/workflows/{workflow_id}/run", data=data)
58
+
59
+ def prepare_get(self, run_id: str) -> PreparedRequest:
60
+ """Prepare a request to get a workflow run by ID.
61
+
62
+ Args:
63
+ run_id: The ID of the workflow run to retrieve
64
+
65
+ Returns:
66
+ PreparedRequest: The prepared request
67
+ """
68
+ return PreparedRequest(method="GET", url=f"/v1/workflows/runs/{run_id}")
69
+
70
+
71
+ class WorkflowRuns(SyncAPIResource, WorkflowRunsMixin):
72
+ """Workflow Runs API wrapper for synchronous operations."""
73
+
74
+ def __init__(self, *args, **kwargs):
75
+ super().__init__(*args, **kwargs)
76
+
77
+ def create(
78
+ self,
79
+ workflow_id: str,
80
+ documents: Dict[str, DocumentInput],
81
+ ) -> WorkflowRun:
82
+ """Run a workflow with the provided input documents.
83
+
84
+ This creates a workflow run and starts execution in the background.
85
+ The returned WorkflowRun will have status "running" - use get()
86
+ to check for updates on the run status.
87
+
88
+ Args:
89
+ workflow_id: The ID of the workflow to run
90
+ documents: Mapping of start node IDs to their input documents.
91
+ Each document can be a file path, bytes, file-like object,
92
+ MIMEData, PIL Image, or HttpUrl.
93
+
94
+ Returns:
95
+ WorkflowRun: The created workflow run with status "running"
96
+
97
+ Raises:
98
+ HTTPException: If the request fails (e.g., workflow not found,
99
+ missing input documents for start nodes)
100
+
101
+ Example:
102
+ >>> run = client.workflows.runs.create(
103
+ ... workflow_id="wf_abc123",
104
+ ... documents={
105
+ ... "start-node-1": Path("invoice.pdf"),
106
+ ... "start-node-2": Path("receipt.pdf"),
107
+ ... }
108
+ ... )
109
+ >>> print(f"Run started: {run.id}, status: {run.status}")
110
+ """
111
+ request = self.prepare_create(workflow_id=workflow_id, documents=documents)
112
+ response = self._client._prepared_request(request)
113
+ return WorkflowRun.model_validate(response)
114
+
115
+ def get(self, run_id: str) -> WorkflowRun:
116
+ """Get a workflow run by ID.
117
+
118
+ Args:
119
+ run_id: The ID of the workflow run to retrieve
120
+
121
+ Returns:
122
+ WorkflowRun: The workflow run
123
+
124
+ Raises:
125
+ HTTPException: If the request fails (e.g., run not found)
126
+ """
127
+ request = self.prepare_get(run_id)
128
+ response = self._client._prepared_request(request)
129
+ return WorkflowRun.model_validate(response)
130
+
131
+
132
+ class AsyncWorkflowRuns(AsyncAPIResource, WorkflowRunsMixin):
133
+ """Workflow Runs API wrapper for asynchronous operations."""
134
+
135
+ def __init__(self, *args, **kwargs):
136
+ super().__init__(*args, **kwargs)
137
+
138
+ async def create(
139
+ self,
140
+ workflow_id: str,
141
+ documents: Dict[str, DocumentInput],
142
+ ) -> WorkflowRun:
143
+ """Run a workflow with the provided input documents.
144
+
145
+ This creates a workflow run and starts execution in the background.
146
+ The returned WorkflowRun will have status "running" - use get()
147
+ to check for updates on the run status.
148
+
149
+ Args:
150
+ workflow_id: The ID of the workflow to run
151
+ documents: Mapping of start node IDs to their input documents.
152
+ Each document can be a file path, bytes, file-like object,
153
+ MIMEData, PIL Image, or HttpUrl.
154
+
155
+ Returns:
156
+ WorkflowRun: The created workflow run with status "running"
157
+
158
+ Raises:
159
+ HTTPException: If the request fails (e.g., workflow not found,
160
+ missing input documents for start nodes)
161
+
162
+ Example:
163
+ >>> run = await client.workflows.runs.create(
164
+ ... workflow_id="wf_abc123",
165
+ ... documents={
166
+ ... "start-node-1": Path("invoice.pdf"),
167
+ ... "start-node-2": Path("receipt.pdf"),
168
+ ... }
169
+ ... )
170
+ >>> print(f"Run started: {run.id}, status: {run.status}")
171
+ """
172
+ request = self.prepare_create(workflow_id=workflow_id, documents=documents)
173
+ response = await self._client._prepared_request(request)
174
+ return WorkflowRun.model_validate(response)
175
+
176
+ async def get(self, run_id: str) -> WorkflowRun:
177
+ """Get a workflow run by ID.
178
+
179
+ Args:
180
+ run_id: The ID of the workflow run to retrieve
181
+
182
+ Returns:
183
+ WorkflowRun: The workflow run
184
+
185
+ Raises:
186
+ HTTPException: If the request fails (e.g., run not found)
187
+ """
188
+ request = self.prepare_get(run_id)
189
+ response = await self._client._prepared_request(request)
190
+ return WorkflowRun.model_validate(response)
@@ -107,10 +107,15 @@ class OCRResult(BaseModel):
107
107
  class InferFormSchemaRequest(BaseModel):
108
108
  """Request to infer form schema from a PDF or DOCX document."""
109
109
 
110
- document: MIMEData = Field(..., description="Input document (PDF or DOCX). DOCX files will be converted to PDF.")
110
+ document: MIMEData = Field(..., description="Input document (PDF, DOCX, XLSX or PPTX).")
111
111
  model: str = Field(default="retab-small", description="LLM model to use for inference")
112
112
 
113
113
 
114
+ class EditConfig(BaseModel):
115
+ """Configuration for edit requests."""
116
+ color: str = Field(default="#000080", description="Hex code of the color to use for the filled text")
117
+
118
+
114
119
  class EditRequest(BaseModel):
115
120
  """Request for the infer_and_fill_schema endpoint.
116
121
 
@@ -118,10 +123,11 @@ class EditRequest(BaseModel):
118
123
  - When `document` is provided: OCR + LLM inference to detect and fill form fields
119
124
  - When `template_id` is provided: Uses pre-defined form fields from the template (PDF only)
120
125
  """
121
- document: Optional[MIMEData] = Field(default=None, description="Input document (PDF or DOCX). DOCX files will be converted to PDF. Mutually exclusive with template_id.")
126
+ document: Optional[MIMEData] = Field(default=None, description="Input document (PDF, DOCX, XLSX or PPTX). Mutually exclusive with template_id.")
122
127
  model: str = Field(default="retab-small", description="LLM model to use for inference")
123
128
  instructions: str = Field(..., description="Instructions to fill the form")
124
129
  template_id: Optional[str] = Field(default=None, description="Template ID to use for filling. When provided, uses the template's pre-defined form fields and empty PDF. Only works for PDF documents. Mutually exclusive with document.")
130
+ config: EditConfig = Field(default_factory=EditConfig, description="Configuration for the edit request")
125
131
 
126
132
  class EditResponse(BaseModel):
127
133
  """Response from the fill_form endpoint.
@@ -5,7 +5,7 @@ from pydantic import BaseModel, Field
5
5
  import datetime
6
6
 
7
7
  from ..mime import BaseMIMEData, MIMEData
8
- from ..documents.edit import FormField
8
+ from ..documents.edit import FormField, EditConfig
9
9
 
10
10
 
11
11
  class EditTemplate(BaseModel):
@@ -48,3 +48,4 @@ class FillTemplateRequest(BaseModel):
48
48
  model: str = Field(default="retab-small", description="LLM model to use for inference")
49
49
  instructions: str = Field(..., description="Instructions to fill the form")
50
50
  template_id: str = Field(..., description="Template ID to use for filling. When provided, uses the template's pre-defined form fields and empty PDF. Only works for PDF documents. Mutually exclusive with document.")
51
+ config: EditConfig = Field(default_factory=EditConfig, description="Configuration for the fill request")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: retab
3
- Version: 0.0.84
3
+ Version: 0.0.86
4
4
  Summary: Retab official python library
5
5
  Home-page: https://github.com/retab-dev/retab
6
6
  Author: Retab
@@ -9,15 +9,19 @@ retab/resources/schemas.py,sha256=rZ6OzfmoYv-mGaRVzvXjO09dD-KxP74mZhOO8sMgcDQ,46
9
9
  retab/resources/documents/__init__.py,sha256=OjXmngFN0RKqO4SI-mJBNzr6Ex6rMxfq0DxaqzP0RQs,89
10
10
  retab/resources/documents/client.py,sha256=0ZOJojT4M9QZ53nheS_vuNZWcnmwTnKx3YqYyJ7_sGY,48912
11
11
  retab/resources/edit/__init__.py,sha256=yycIstpTSKsz2qXbrY3Buzd35UDcPWvb5hw6Eb2rLow,69
12
- retab/resources/edit/client.py,sha256=osWvuKj2SNH6-nQKsWcTYcm3jVENGlwGTvDnT45nDBY,6649
12
+ retab/resources/edit/client.py,sha256=DJKlwh8xui7IDRjwPmiGKTC1_HshXLYXX-xr93FhSbo,1270
13
+ retab/resources/edit/agent/__init__.py,sha256=i5IdOMhwOOQmnhPFeBbh7-ChqwQh5q7oLow1zJ0ZAwM,74
14
+ retab/resources/edit/agent/client.py,sha256=z5kIC7vAPQi98jFfHXymjYg7gf5bSQSCELFGBKBg1s4,5951
13
15
  retab/resources/edit/templates/__init__.py,sha256=n-zA_HXo7iGgeIclSwcsxmSueXJIRMo0iZjk_sax85I,90
14
- retab/resources/edit/templates/client.py,sha256=Eevzy5JaQmG5-hEshugQvrhgIBAjgZ8ZYZkpBSKEdBQ,19729
16
+ retab/resources/edit/templates/client.py,sha256=kEyqat5I84_QBeWSjptteSwvlMGRZ1UF9KDzH7p0f9s,20173
15
17
  retab/resources/extractions/__init__.py,sha256=2H1ezUG8hI5SmTRy6NFzXdYLOdGFFsFrI60uzkitV20,97
16
18
  retab/resources/extractions/client.py,sha256=sEoNjOgX91FTOgoJUV-I1A9A9xl1ciCdPlhYwjhEjbA,11035
17
19
  retab/resources/projects/__init__.py,sha256=tPR3_3tr7bsoYd618qmGjnYN2R23PmF5oCFd7Z5_HGY,85
18
20
  retab/resources/projects/client.py,sha256=5LPAhJt5-nqBP4VWYvo0k7cW6HLGF6K9xMiHKQzIXho,15593
19
21
  retab/resources/workflows/__init__.py,sha256=-I0QNX7XKEr8ZJTV4-awMyKxZqGlSkKMdibiHiB7cZ0,89
20
- retab/resources/workflows/client.py,sha256=svKOmkqB1-P56IjzauWNdfQtzT0rlWRIu3EddwX-HiM,6743
22
+ retab/resources/workflows/client.py,sha256=G1dYV66Wsas_QWQ9O2N7s1VUt72TP1W1ZG-_cEWEURM,755
23
+ retab/resources/workflows/runs/__init__.py,sha256=5hPZ-70StN0U8bOlhm9H_ZXFljBjy8VoWQRu1_cGAVM,101
24
+ retab/resources/workflows/runs/client.py,sha256=8l87Sf5RNNLIJNyhCwCprqA9ffq3J9zSlwoQHdyrEN4,6771
21
25
  retab/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
26
  retab/types/chat.py,sha256=x9VbtPMa4w6Gc0HrFC3ILl6cCnfEn5ytDnwJtZmlcys,1436
23
27
  retab/types/inference_settings.py,sha256=wIivYffvEE7v6lhbjbhAZGssK4uYr64Oq6cZKxzY5_M,1131
@@ -29,12 +33,12 @@ retab/types/documents/__init__.py,sha256=t1jXdpYqi-zQMC_9uM0m7eA1hRU0MCROwUx89cc
29
33
  retab/types/documents/classify.py,sha256=Tb6d_7kuTlWLr7bPn782dHrjtUVBCvXV3o9zm7j2lmE,1128
30
34
  retab/types/documents/correct_orientation.py,sha256=e-ivsslI6L6Gl0YkcslXw_DH620xMGEYVp4tdeviXeM,261
31
35
  retab/types/documents/create_messages.py,sha256=Uym0SnVUGkyt1C5AOD37BsZ3puyeu_igR6X9SboojfA,7267
32
- retab/types/documents/edit.py,sha256=YOsLE4nDf5XLrgkoAxKvU5pivfTSHSjrQSIm2Ezyfn8,5424
36
+ retab/types/documents/edit.py,sha256=b6UcYLOJkClpMu4QyYmdp-X4WtN8U_3oiMBc1KLklVY,5663
33
37
  retab/types/documents/extract.py,sha256=x_59fm69-icsxxGRgpFd0NN-SLRoMYqbvfCZuG7zyGc,18033
34
38
  retab/types/documents/parse.py,sha256=MXe7zh3DusWQhGe0Sr95nPy6cB8DRX8MA4Hmjj_AP7E,1300
35
39
  retab/types/documents/split.py,sha256=xRdJ6IpSRAPi_ZtAG2FNqg5A-v5tzfb1QQkW5UfO2pY,1246
36
40
  retab/types/edit/__init__.py,sha256=M8hF97h7fX8RP9IsB6qpkw0eyvO0DFQvP6FmWL8caCQ,331
37
- retab/types/edit/templates.py,sha256=4ndnk-MlJE7roP_YktgxLpRSd68hdwNDWiqAFMy0Ddo,2291
41
+ retab/types/edit/templates.py,sha256=RLRIMdXzU-5_3XPf0iMSozjRTAP5Tliq0nrjlZn0l8E,2412
38
42
  retab/types/extractions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
43
  retab/types/extractions/types.py,sha256=mnCYSfJoEKsXN2eG-PrahnnQyR6RDjP5VO9sHC1Opmg,102
40
44
  retab/types/projects/__init__.py,sha256=I7P_dems5_LOLgYQ-4Bzt9B6P6jRlQwP-D_9GxRDhVk,155
@@ -55,7 +59,7 @@ retab/utils/hashing.py,sha256=_BMVUvftOcJav68QL0rLkH2dbhW9RRJPzeGC2akR0fc,757
55
59
  retab/utils/json_schema.py,sha256=zP4pQLpVHBKWo_abCjb_dU4kA0azhHopd-1TFUgVEvc,20655
56
60
  retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
57
61
  retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
58
- retab-0.0.84.dist-info/METADATA,sha256=8UeP_dWkqP9GOjbR1vpSGZwa9m528VEYNqkxLvbFrfE,4532
59
- retab-0.0.84.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
- retab-0.0.84.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
61
- retab-0.0.84.dist-info/RECORD,,
62
+ retab-0.0.86.dist-info/METADATA,sha256=X2bfnXHFaYuJVUKWbvbhoxOBoboro6h5GxBY-bVOHc0,4532
63
+ retab-0.0.86.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
64
+ retab-0.0.86.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
65
+ retab-0.0.86.dist-info/RECORD,,
File without changes