retab 0.0.87__py3-none-any.whl → 0.0.89__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
retab/client.py CHANGED
@@ -10,7 +10,7 @@ import backoff.types
10
10
  import httpx
11
11
  import truststore
12
12
 
13
- from .resources import documents, models, schemas, projects, extractions, edit
13
+ from .resources import documents, models, schemas, projects, extractions, edit, workflows
14
14
  from .types.standards import PreparedRequest, FieldUnset
15
15
 
16
16
 
@@ -189,7 +189,7 @@ class Retab(BaseRetab):
189
189
  self.models = models.Models(client=self)
190
190
  self.schemas = schemas.Schemas(client=self)
191
191
  self.edit = edit.Edit(client=self)
192
-
192
+ self.workflows = workflows.Workflows(client=self)
193
193
  def _request(
194
194
  self,
195
195
  method: str,
@@ -487,7 +487,8 @@ class AsyncRetab(BaseRetab):
487
487
  self.models = models.AsyncModels(client=self)
488
488
  self.schemas = schemas.AsyncSchemas(client=self)
489
489
  self.edit = edit.AsyncEdit(client=self)
490
-
490
+ self.workflows = workflows.AsyncWorkflows(client=self)
491
+
491
492
  def _parse_response(self, response: httpx.Response) -> Any:
492
493
  """Parse response based on content-type.
493
494
 
@@ -1,6 +1,6 @@
1
1
  from io import IOBase
2
2
  from pathlib import Path
3
- from typing import Any, Dict
3
+ from typing import Any, Dict, Optional
4
4
 
5
5
  import PIL.Image
6
6
  from pydantic import HttpUrl
@@ -21,15 +21,19 @@ class WorkflowRunsMixin:
21
21
  def prepare_create(
22
22
  self,
23
23
  workflow_id: str,
24
- documents: Dict[str, DocumentInput],
24
+ documents: Optional[Dict[str, DocumentInput]] = None,
25
+ json_inputs: Optional[Dict[str, Dict[str, Any]]] = None,
26
+ text_inputs: Optional[Dict[str, str]] = None,
25
27
  ) -> PreparedRequest:
26
- """Prepare a request to run a workflow with input documents.
28
+ """Prepare a request to run a workflow with input documents, JSON data, and/or text data.
27
29
 
28
30
  Args:
29
31
  workflow_id: The ID of the workflow to run
30
32
  documents: Mapping of start node IDs to their input documents.
31
33
  Each document can be a file path, bytes, file-like object,
32
34
  MIMEData, PIL Image, or HttpUrl.
35
+ json_inputs: Mapping of start_json node IDs to their input JSON data.
36
+ text_inputs: Mapping of start_text node IDs to their input text.
33
37
 
34
38
  Returns:
35
39
  PreparedRequest: The prepared request
@@ -40,20 +44,37 @@ class WorkflowRunsMixin:
40
44
  ... documents={
41
45
  ... "start-node-1": Path("invoice.pdf"),
42
46
  ... "start-node-2": Path("receipt.pdf"),
47
+ ... },
48
+ ... json_inputs={
49
+ ... "json-node-1": {"key": "value"},
50
+ ... },
51
+ ... text_inputs={
52
+ ... "text-node-1": "Hello, world!",
43
53
  ... }
44
54
  ... )
45
55
  """
56
+ data: Dict[str, Any] = {}
57
+
46
58
  # Convert each document to MIMEData and then to the format expected by the backend
47
- documents_payload: Dict[str, Dict[str, Any]] = {}
48
- for node_id, document in documents.items():
49
- mime_data = prepare_mime_document(document)
50
- documents_payload[node_id] = {
51
- "filename": mime_data.filename,
52
- "content": mime_data.content,
53
- "mime_type": mime_data.mime_type,
54
- }
55
-
56
- data = {"documents": documents_payload}
59
+ if documents:
60
+ documents_payload: Dict[str, Dict[str, Any]] = {}
61
+ for node_id, document in documents.items():
62
+ mime_data = prepare_mime_document(document)
63
+ documents_payload[node_id] = {
64
+ "filename": mime_data.filename,
65
+ "content": mime_data.content,
66
+ "mime_type": mime_data.mime_type,
67
+ }
68
+ data["documents"] = documents_payload
69
+
70
+ # Add JSON inputs directly
71
+ if json_inputs:
72
+ data["json_inputs"] = json_inputs
73
+
74
+ # Add text inputs directly
75
+ if text_inputs:
76
+ data["text_inputs"] = text_inputs
77
+
57
78
  return PreparedRequest(method="POST", url=f"/v1/workflows/{workflow_id}/run", data=data)
58
79
 
59
80
  def prepare_get(self, run_id: str) -> PreparedRequest:
@@ -77,9 +98,11 @@ class WorkflowRuns(SyncAPIResource, WorkflowRunsMixin):
77
98
  def create(
78
99
  self,
79
100
  workflow_id: str,
80
- documents: Dict[str, DocumentInput],
101
+ documents: Optional[Dict[str, DocumentInput]] = None,
102
+ json_inputs: Optional[Dict[str, Dict[str, Any]]] = None,
103
+ text_inputs: Optional[Dict[str, str]] = None,
81
104
  ) -> WorkflowRun:
82
- """Run a workflow with the provided input documents.
105
+ """Run a workflow with the provided inputs.
83
106
 
84
107
  This creates a workflow run and starts execution in the background.
85
108
  The returned WorkflowRun will have status "running" - use get()
@@ -90,25 +113,37 @@ class WorkflowRuns(SyncAPIResource, WorkflowRunsMixin):
90
113
  documents: Mapping of start node IDs to their input documents.
91
114
  Each document can be a file path, bytes, file-like object,
92
115
  MIMEData, PIL Image, or HttpUrl.
116
+ json_inputs: Mapping of start_json node IDs to their input JSON data.
117
+ text_inputs: Mapping of start_text node IDs to their input text.
93
118
 
94
119
  Returns:
95
120
  WorkflowRun: The created workflow run with status "running"
96
121
 
97
122
  Raises:
98
123
  HTTPException: If the request fails (e.g., workflow not found,
99
- missing input documents for start nodes)
124
+ missing inputs for start nodes)
100
125
 
101
126
  Example:
102
127
  >>> run = client.workflows.runs.create(
103
128
  ... workflow_id="wf_abc123",
104
129
  ... documents={
105
130
  ... "start-node-1": Path("invoice.pdf"),
106
- ... "start-node-2": Path("receipt.pdf"),
131
+ ... },
132
+ ... json_inputs={
133
+ ... "json-node-1": {"key": "value"},
134
+ ... },
135
+ ... text_inputs={
136
+ ... "text-node-1": "Hello, world!",
107
137
  ... }
108
138
  ... )
109
139
  >>> print(f"Run started: {run.id}, status: {run.status}")
110
140
  """
111
- request = self.prepare_create(workflow_id=workflow_id, documents=documents)
141
+ request = self.prepare_create(
142
+ workflow_id=workflow_id,
143
+ documents=documents,
144
+ json_inputs=json_inputs,
145
+ text_inputs=text_inputs,
146
+ )
112
147
  response = self._client._prepared_request(request)
113
148
  return WorkflowRun.model_validate(response)
114
149
 
@@ -138,9 +173,11 @@ class AsyncWorkflowRuns(AsyncAPIResource, WorkflowRunsMixin):
138
173
  async def create(
139
174
  self,
140
175
  workflow_id: str,
141
- documents: Dict[str, DocumentInput],
176
+ documents: Optional[Dict[str, DocumentInput]] = None,
177
+ json_inputs: Optional[Dict[str, Dict[str, Any]]] = None,
178
+ text_inputs: Optional[Dict[str, str]] = None,
142
179
  ) -> WorkflowRun:
143
- """Run a workflow with the provided input documents.
180
+ """Run a workflow with the provided inputs.
144
181
 
145
182
  This creates a workflow run and starts execution in the background.
146
183
  The returned WorkflowRun will have status "running" - use get()
@@ -151,25 +188,37 @@ class AsyncWorkflowRuns(AsyncAPIResource, WorkflowRunsMixin):
151
188
  documents: Mapping of start node IDs to their input documents.
152
189
  Each document can be a file path, bytes, file-like object,
153
190
  MIMEData, PIL Image, or HttpUrl.
191
+ json_inputs: Mapping of start_json node IDs to their input JSON data.
192
+ text_inputs: Mapping of start_text node IDs to their input text.
154
193
 
155
194
  Returns:
156
195
  WorkflowRun: The created workflow run with status "running"
157
196
 
158
197
  Raises:
159
198
  HTTPException: If the request fails (e.g., workflow not found,
160
- missing input documents for start nodes)
199
+ missing inputs for start nodes)
161
200
 
162
201
  Example:
163
202
  >>> run = await client.workflows.runs.create(
164
203
  ... workflow_id="wf_abc123",
165
204
  ... documents={
166
205
  ... "start-node-1": Path("invoice.pdf"),
167
- ... "start-node-2": Path("receipt.pdf"),
206
+ ... },
207
+ ... json_inputs={
208
+ ... "json-node-1": {"key": "value"},
209
+ ... },
210
+ ... text_inputs={
211
+ ... "text-node-1": "Hello, world!",
168
212
  ... }
169
213
  ... )
170
214
  >>> print(f"Run started: {run.id}, status: {run.status}")
171
215
  """
172
- request = self.prepare_create(workflow_id=workflow_id, documents=documents)
216
+ request = self.prepare_create(
217
+ workflow_id=workflow_id,
218
+ documents=documents,
219
+ json_inputs=json_inputs,
220
+ text_inputs=text_inputs,
221
+ )
173
222
  response = await self._client._prepared_request(request)
174
223
  return WorkflowRun.model_validate(response)
175
224
 
@@ -5,6 +5,7 @@ from ..mime import MIMEData
5
5
  class Category(BaseModel):
6
6
  name: str = Field(..., description="The name of the category")
7
7
  description: str = Field(..., description="The description of the category")
8
+ partition_key: str | None = Field(default=None, description="The key to partition the category")
8
9
 
9
10
 
10
11
  class SplitRequest(BaseModel):
@@ -13,20 +14,32 @@ class SplitRequest(BaseModel):
13
14
  model: str = Field(default="retab-small", description="The model to use to split the document")
14
15
 
15
16
 
17
+ class Partition(BaseModel):
18
+ key: str = Field(..., description="The partition key value (e.g., property ID, invoice number)")
19
+ pages: list[int] = Field(..., description="The pages of the partition (1-indexed)")
20
+ first_page_y_start: float = Field(default=0.0, description="The y coordinate of the first page of the partition")
21
+ last_page_y_end: float = Field(default=1.0, description="The y coordinate of the last page of the partition")
22
+
16
23
  class SplitResult(BaseModel):
17
24
  name: str = Field(..., description="The name of the category")
18
- start_page: int = Field(..., description="The start page of the category (1-indexed)")
19
- end_page: int = Field(..., description="The end page of the category (1-indexed, inclusive)")
25
+ pages: list[int] = Field(..., description="The pages of the category (1-indexed)")
26
+ partitions: list[Partition] = Field(default_factory=list, description="The partitions of the category")
20
27
 
21
28
 
22
29
  class SplitResponse(BaseModel):
23
30
  splits: list[SplitResult] = Field(..., description="The list of document splits with their page ranges")
24
31
 
25
32
 
33
+ class SplitOutputItem(BaseModel):
34
+ """Internal schema item for LLM structured output validation."""
35
+ name: str = Field(..., description="The name of the category")
36
+ start_page: int = Field(..., description="The start page of the category (1-indexed)")
37
+ end_page: int = Field(..., description="The end page of the category (1-indexed, inclusive)")
38
+
26
39
 
27
40
  class SplitOutputSchema(BaseModel):
28
41
  """Schema for LLM structured output."""
29
- splits: list[SplitResult] = Field(
30
- ...,
42
+ splits: list[SplitOutputItem] = Field(
43
+ ...,
31
44
  description="List of document sections, each classified into one of the provided categories with their page ranges"
32
45
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: retab
3
- Version: 0.0.87
3
+ Version: 0.0.89
4
4
  Summary: Retab official python library
5
5
  Home-page: https://github.com/retab-dev/retab
6
6
  Author: Retab
@@ -1,6 +1,6 @@
1
1
  retab/__init__.py,sha256=s4GawWTRBYz4VY-CyAV5-ZdFtdw8V5oopGIYm9GgdSo,188
2
2
  retab/_resource.py,sha256=JfAU4UTa05ugWfbrpO7fsVr_pFewht99NkoIfK6kBQM,577
3
- retab/client.py,sha256=VrOzEtZQPR4uydO8QJJYkMOoAiC1TfPbkXmTnatSQ0w,30172
3
+ retab/client.py,sha256=ExQLR-xwFKIwqA1DoH3JxI1BU2RB7kWAiMbwR073w1c,30311
4
4
  retab/generate_types.py,sha256=cUu1IX65uU__MHivmEb_PZtzAi8DYsvppZvcY30hj90,8425
5
5
  retab/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  retab/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -21,7 +21,7 @@ retab/resources/projects/client.py,sha256=5LPAhJt5-nqBP4VWYvo0k7cW6HLGF6K9xMiHKQ
21
21
  retab/resources/workflows/__init__.py,sha256=-I0QNX7XKEr8ZJTV4-awMyKxZqGlSkKMdibiHiB7cZ0,89
22
22
  retab/resources/workflows/client.py,sha256=G1dYV66Wsas_QWQ9O2N7s1VUt72TP1W1ZG-_cEWEURM,755
23
23
  retab/resources/workflows/runs/__init__.py,sha256=5hPZ-70StN0U8bOlhm9H_ZXFljBjy8VoWQRu1_cGAVM,101
24
- retab/resources/workflows/runs/client.py,sha256=8l87Sf5RNNLIJNyhCwCprqA9ffq3J9zSlwoQHdyrEN4,6771
24
+ retab/resources/workflows/runs/client.py,sha256=GopedV363XnGl0mL3bZHWaOay12uAeTqq4iIEJSadMA,8739
25
25
  retab/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  retab/types/chat.py,sha256=x9VbtPMa4w6Gc0HrFC3ILl6cCnfEn5ytDnwJtZmlcys,1436
27
27
  retab/types/inference_settings.py,sha256=wIivYffvEE7v6lhbjbhAZGssK4uYr64Oq6cZKxzY5_M,1131
@@ -36,7 +36,7 @@ retab/types/documents/create_messages.py,sha256=Uym0SnVUGkyt1C5AOD37BsZ3puyeu_ig
36
36
  retab/types/documents/edit.py,sha256=b6UcYLOJkClpMu4QyYmdp-X4WtN8U_3oiMBc1KLklVY,5663
37
37
  retab/types/documents/extract.py,sha256=x_59fm69-icsxxGRgpFd0NN-SLRoMYqbvfCZuG7zyGc,18033
38
38
  retab/types/documents/parse.py,sha256=MXe7zh3DusWQhGe0Sr95nPy6cB8DRX8MA4Hmjj_AP7E,1300
39
- retab/types/documents/split.py,sha256=xRdJ6IpSRAPi_ZtAG2FNqg5A-v5tzfb1QQkW5UfO2pY,1246
39
+ retab/types/documents/split.py,sha256=Bjk5iJdS3v7I3rCvqpFUPlzgO4HINqh3uMPQJg-MqPc,2166
40
40
  retab/types/edit/__init__.py,sha256=M8hF97h7fX8RP9IsB6qpkw0eyvO0DFQvP6FmWL8caCQ,331
41
41
  retab/types/edit/templates.py,sha256=RLRIMdXzU-5_3XPf0iMSozjRTAP5Tliq0nrjlZn0l8E,2412
42
42
  retab/types/extractions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -59,7 +59,7 @@ retab/utils/hashing.py,sha256=_BMVUvftOcJav68QL0rLkH2dbhW9RRJPzeGC2akR0fc,757
59
59
  retab/utils/json_schema.py,sha256=zP4pQLpVHBKWo_abCjb_dU4kA0azhHopd-1TFUgVEvc,20655
60
60
  retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
61
61
  retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
62
- retab-0.0.87.dist-info/METADATA,sha256=Rz6B3ctJWOHF0hcaFxc2hEyBgpeBRgvScGxFNGjALMg,4532
63
- retab-0.0.87.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
64
- retab-0.0.87.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
65
- retab-0.0.87.dist-info/RECORD,,
62
+ retab-0.0.89.dist-info/METADATA,sha256=1ppp_sgtdC53grfu4xxD91N_-BDa7FBdofWz_Vd1WTw,4532
63
+ retab-0.0.89.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
64
+ retab-0.0.89.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
65
+ retab-0.0.89.dist-info/RECORD,,
File without changes