retab 0.0.89__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab/client.py +3 -1
- retab/resources/jobs/__init__.py +3 -0
- retab/resources/jobs/client.py +252 -0
- retab/types/jobs.py +90 -0
- {retab-0.0.89.dist-info → retab-0.0.90.dist-info}/METADATA +1 -1
- {retab-0.0.89.dist-info → retab-0.0.90.dist-info}/RECORD +8 -5
- {retab-0.0.89.dist-info → retab-0.0.90.dist-info}/WHEEL +0 -0
- {retab-0.0.89.dist-info → retab-0.0.90.dist-info}/top_level.txt +0 -0
retab/client.py
CHANGED
|
@@ -10,7 +10,7 @@ import backoff.types
|
|
|
10
10
|
import httpx
|
|
11
11
|
import truststore
|
|
12
12
|
|
|
13
|
-
from .resources import documents, models, schemas, projects, extractions, edit, workflows
|
|
13
|
+
from .resources import documents, models, schemas, projects, extractions, edit, workflows, jobs
|
|
14
14
|
from .types.standards import PreparedRequest, FieldUnset
|
|
15
15
|
|
|
16
16
|
|
|
@@ -190,6 +190,7 @@ class Retab(BaseRetab):
|
|
|
190
190
|
self.schemas = schemas.Schemas(client=self)
|
|
191
191
|
self.edit = edit.Edit(client=self)
|
|
192
192
|
self.workflows = workflows.Workflows(client=self)
|
|
193
|
+
self.jobs = jobs.Jobs(client=self)
|
|
193
194
|
def _request(
|
|
194
195
|
self,
|
|
195
196
|
method: str,
|
|
@@ -488,6 +489,7 @@ class AsyncRetab(BaseRetab):
|
|
|
488
489
|
self.schemas = schemas.AsyncSchemas(client=self)
|
|
489
490
|
self.edit = edit.AsyncEdit(client=self)
|
|
490
491
|
self.workflows = workflows.AsyncWorkflows(client=self)
|
|
492
|
+
self.jobs = jobs.AsyncJobs(client=self)
|
|
491
493
|
|
|
492
494
|
def _parse_response(self, response: httpx.Response) -> Any:
|
|
493
495
|
"""Parse response based on content-type.
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Jobs API Resource
|
|
3
|
+
|
|
4
|
+
Provides synchronous and asynchronous clients for the Jobs API.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from ..._resource import AsyncAPIResource, SyncAPIResource
|
|
10
|
+
from ...types.jobs import Job, JobListResponse, JobStatus, SupportedEndpoint
|
|
11
|
+
from ...types.standards import PreparedRequest
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BaseJobsMixin:
|
|
15
|
+
"""Shared methods for preparing Jobs API requests."""
|
|
16
|
+
|
|
17
|
+
def _prepare_create(
|
|
18
|
+
self,
|
|
19
|
+
endpoint: SupportedEndpoint,
|
|
20
|
+
request: dict[str, Any],
|
|
21
|
+
metadata: dict[str, str] | None = None,
|
|
22
|
+
) -> PreparedRequest:
|
|
23
|
+
data = {
|
|
24
|
+
"endpoint": endpoint,
|
|
25
|
+
"request": request,
|
|
26
|
+
}
|
|
27
|
+
if metadata is not None:
|
|
28
|
+
data["metadata"] = metadata
|
|
29
|
+
return PreparedRequest(method="POST", url="/v1/jobs", data=data)
|
|
30
|
+
|
|
31
|
+
def _prepare_retrieve(self, job_id: str) -> PreparedRequest:
|
|
32
|
+
return PreparedRequest(method="GET", url=f"/v1/jobs/{job_id}")
|
|
33
|
+
|
|
34
|
+
def _prepare_cancel(self, job_id: str) -> PreparedRequest:
|
|
35
|
+
return PreparedRequest(method="POST", url=f"/v1/jobs/{job_id}/cancel")
|
|
36
|
+
|
|
37
|
+
def _prepare_list(
|
|
38
|
+
self,
|
|
39
|
+
after: str | None = None,
|
|
40
|
+
limit: int = 20,
|
|
41
|
+
status: JobStatus | None = None,
|
|
42
|
+
) -> PreparedRequest:
|
|
43
|
+
params: dict[str, Any] = {"limit": limit}
|
|
44
|
+
if after is not None:
|
|
45
|
+
params["after"] = after
|
|
46
|
+
if status is not None:
|
|
47
|
+
params["status"] = status
|
|
48
|
+
return PreparedRequest(method="GET", url="/v1/jobs", params=params)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class Jobs(SyncAPIResource, BaseJobsMixin):
|
|
52
|
+
"""
|
|
53
|
+
Synchronous Jobs API client.
|
|
54
|
+
|
|
55
|
+
The Jobs API allows you to submit long-running extract or parse operations
|
|
56
|
+
asynchronously and poll for their results.
|
|
57
|
+
|
|
58
|
+
Example:
|
|
59
|
+
>>> from retab import Retab
|
|
60
|
+
>>> client = Retab(api_key="your-api-key")
|
|
61
|
+
>>>
|
|
62
|
+
>>> # Create an async extraction job
|
|
63
|
+
>>> job = client.jobs.create(
|
|
64
|
+
... endpoint="/v1/documents/extract",
|
|
65
|
+
... request={
|
|
66
|
+
... "document": {"content": "...", "mime_type": "application/pdf"},
|
|
67
|
+
... "json_schema": {"type": "object", ...},
|
|
68
|
+
... "model": "gpt-4o",
|
|
69
|
+
... }
|
|
70
|
+
... )
|
|
71
|
+
>>>
|
|
72
|
+
>>> # Poll for completion
|
|
73
|
+
>>> while job.status not in ("completed", "failed", "cancelled"):
|
|
74
|
+
... import time
|
|
75
|
+
... time.sleep(5)
|
|
76
|
+
... job = client.jobs.retrieve(job.id)
|
|
77
|
+
>>>
|
|
78
|
+
>>> if job.status == "completed":
|
|
79
|
+
... print(job.response.body)
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def create(
|
|
83
|
+
self,
|
|
84
|
+
endpoint: SupportedEndpoint,
|
|
85
|
+
request: dict[str, Any],
|
|
86
|
+
metadata: dict[str, str] | None = None,
|
|
87
|
+
) -> Job:
|
|
88
|
+
"""
|
|
89
|
+
Create a new asynchronous job.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
endpoint: The API endpoint to call ("/v1/documents/extract" or "/v1/documents/parse")
|
|
93
|
+
request: The full request body for the target endpoint
|
|
94
|
+
metadata: Optional metadata (max 16 pairs; keys ≤64 chars, values ≤512 chars)
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Job: The created job with status "queued"
|
|
98
|
+
"""
|
|
99
|
+
prepared = self._prepare_create(endpoint, request, metadata)
|
|
100
|
+
response = self._client._prepared_request(prepared)
|
|
101
|
+
return Job.model_validate(response)
|
|
102
|
+
|
|
103
|
+
def retrieve(self, job_id: str) -> Job:
|
|
104
|
+
"""
|
|
105
|
+
Retrieve a job by ID.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
job_id: The job ID to retrieve
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Job: The job with current status and result (if completed)
|
|
112
|
+
"""
|
|
113
|
+
prepared = self._prepare_retrieve(job_id)
|
|
114
|
+
response = self._client._prepared_request(prepared)
|
|
115
|
+
return Job.model_validate(response)
|
|
116
|
+
|
|
117
|
+
def cancel(self, job_id: str) -> Job:
|
|
118
|
+
"""
|
|
119
|
+
Cancel a queued or in-progress job.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
job_id: The job ID to cancel
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Job: The updated job with status "cancelled"
|
|
126
|
+
"""
|
|
127
|
+
prepared = self._prepare_cancel(job_id)
|
|
128
|
+
response = self._client._prepared_request(prepared)
|
|
129
|
+
return Job.model_validate(response)
|
|
130
|
+
|
|
131
|
+
def list(
|
|
132
|
+
self,
|
|
133
|
+
after: str | None = None,
|
|
134
|
+
limit: int = 20,
|
|
135
|
+
status: JobStatus | None = None,
|
|
136
|
+
) -> JobListResponse:
|
|
137
|
+
"""
|
|
138
|
+
List jobs with pagination and optional status filtering.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
after: Pagination cursor (last ID from previous page)
|
|
142
|
+
limit: Number of jobs to return (1-100, default 20)
|
|
143
|
+
status: Filter by job status
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
JobListResponse: List of jobs with pagination info
|
|
147
|
+
"""
|
|
148
|
+
prepared = self._prepare_list(after, limit, status)
|
|
149
|
+
response = self._client._prepared_request(prepared)
|
|
150
|
+
return JobListResponse.model_validate(response)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class AsyncJobs(AsyncAPIResource, BaseJobsMixin):
|
|
154
|
+
"""
|
|
155
|
+
Asynchronous Jobs API client.
|
|
156
|
+
|
|
157
|
+
The Jobs API allows you to submit long-running extract or parse operations
|
|
158
|
+
asynchronously and poll for their results.
|
|
159
|
+
|
|
160
|
+
Example:
|
|
161
|
+
>>> from retab import AsyncRetab
|
|
162
|
+
>>> client = AsyncRetab(api_key="your-api-key")
|
|
163
|
+
>>>
|
|
164
|
+
>>> # Create an async extraction job
|
|
165
|
+
>>> job = await client.jobs.create(
|
|
166
|
+
... endpoint="/v1/documents/extract",
|
|
167
|
+
... request={
|
|
168
|
+
... "document": {"content": "...", "mime_type": "application/pdf"},
|
|
169
|
+
... "json_schema": {"type": "object", ...},
|
|
170
|
+
... "model": "gpt-4o",
|
|
171
|
+
... }
|
|
172
|
+
... )
|
|
173
|
+
>>>
|
|
174
|
+
>>> # Poll for completion
|
|
175
|
+
>>> while job.status not in ("completed", "failed", "cancelled"):
|
|
176
|
+
... import asyncio
|
|
177
|
+
... await asyncio.sleep(5)
|
|
178
|
+
... job = await client.jobs.retrieve(job.id)
|
|
179
|
+
>>>
|
|
180
|
+
>>> if job.status == "completed":
|
|
181
|
+
... print(job.response.body)
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
async def create(
|
|
185
|
+
self,
|
|
186
|
+
endpoint: SupportedEndpoint,
|
|
187
|
+
request: dict[str, Any],
|
|
188
|
+
metadata: dict[str, str] | None = None,
|
|
189
|
+
) -> Job:
|
|
190
|
+
"""
|
|
191
|
+
Create a new asynchronous job.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
endpoint: The API endpoint to call ("/v1/documents/extract" or "/v1/documents/parse")
|
|
195
|
+
request: The full request body for the target endpoint
|
|
196
|
+
metadata: Optional metadata (max 16 pairs; keys ≤64 chars, values ≤512 chars)
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Job: The created job with status "queued"
|
|
200
|
+
"""
|
|
201
|
+
prepared = self._prepare_create(endpoint, request, metadata)
|
|
202
|
+
response = await self._client._prepared_request(prepared)
|
|
203
|
+
return Job.model_validate(response)
|
|
204
|
+
|
|
205
|
+
async def retrieve(self, job_id: str) -> Job:
|
|
206
|
+
"""
|
|
207
|
+
Retrieve a job by ID.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
job_id: The job ID to retrieve
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Job: The job with current status and result (if completed)
|
|
214
|
+
"""
|
|
215
|
+
prepared = self._prepare_retrieve(job_id)
|
|
216
|
+
response = await self._client._prepared_request(prepared)
|
|
217
|
+
return Job.model_validate(response)
|
|
218
|
+
|
|
219
|
+
async def cancel(self, job_id: str) -> Job:
|
|
220
|
+
"""
|
|
221
|
+
Cancel a queued or in-progress job.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
job_id: The job ID to cancel
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Job: The updated job with status "cancelled"
|
|
228
|
+
"""
|
|
229
|
+
prepared = self._prepare_cancel(job_id)
|
|
230
|
+
response = await self._client._prepared_request(prepared)
|
|
231
|
+
return Job.model_validate(response)
|
|
232
|
+
|
|
233
|
+
async def list(
|
|
234
|
+
self,
|
|
235
|
+
after: str | None = None,
|
|
236
|
+
limit: int = 20,
|
|
237
|
+
status: JobStatus | None = None,
|
|
238
|
+
) -> JobListResponse:
|
|
239
|
+
"""
|
|
240
|
+
List jobs with pagination and optional status filtering.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
after: Pagination cursor (last ID from previous page)
|
|
244
|
+
limit: Number of jobs to return (1-100, default 20)
|
|
245
|
+
status: Filter by job status
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
JobListResponse: List of jobs with pagination info
|
|
249
|
+
"""
|
|
250
|
+
prepared = self._prepare_list(after, limit, status)
|
|
251
|
+
response = await self._client._prepared_request(prepared)
|
|
252
|
+
return JobListResponse.model_validate(response)
|
retab/types/jobs.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Jobs API Types
|
|
3
|
+
|
|
4
|
+
Pydantic models for the asynchronous Jobs API.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Literal
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
JobStatus = Literal[
|
|
13
|
+
"validating",
|
|
14
|
+
"queued",
|
|
15
|
+
"in_progress",
|
|
16
|
+
"completed",
|
|
17
|
+
"failed",
|
|
18
|
+
"cancelled",
|
|
19
|
+
"expired",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
SupportedEndpoint = Literal[
|
|
23
|
+
"/v1/documents/extract",
|
|
24
|
+
"/v1/documents/parse",
|
|
25
|
+
"/v1/documents/split",
|
|
26
|
+
"/v1/documents/classify",
|
|
27
|
+
"/v1/schemas/generate",
|
|
28
|
+
"/v1/edit/agent/fill",
|
|
29
|
+
"/v1/edit/templates/fill",
|
|
30
|
+
"/v1/edit/templates/generate",
|
|
31
|
+
"/v1/projects/extract", # Requires "project_id" in request body
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class JobResponse(BaseModel):
|
|
36
|
+
"""Response stored when job completes successfully."""
|
|
37
|
+
status_code: int
|
|
38
|
+
body: dict[str, Any]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class JobError(BaseModel):
|
|
42
|
+
"""Error details when job fails."""
|
|
43
|
+
code: str
|
|
44
|
+
message: str
|
|
45
|
+
details: dict[str, Any] | None = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class Job(BaseModel):
|
|
49
|
+
"""
|
|
50
|
+
Job object representing an asynchronous operation.
|
|
51
|
+
|
|
52
|
+
Use this to track the status of long-running operations like extract, parse,
|
|
53
|
+
split, classify, schema generation, and template operations.
|
|
54
|
+
"""
|
|
55
|
+
id: str
|
|
56
|
+
object: Literal["job"] = "job"
|
|
57
|
+
status: JobStatus
|
|
58
|
+
endpoint: SupportedEndpoint
|
|
59
|
+
request: dict[str, Any]
|
|
60
|
+
response: JobResponse | None = None
|
|
61
|
+
error: JobError | None = None
|
|
62
|
+
|
|
63
|
+
# Timestamps (Unix timestamps)
|
|
64
|
+
created_at: int
|
|
65
|
+
started_at: int | None = None
|
|
66
|
+
completed_at: int | None = None
|
|
67
|
+
expires_at: int
|
|
68
|
+
|
|
69
|
+
# User context
|
|
70
|
+
organization_id: str
|
|
71
|
+
metadata: dict[str, str] | None = None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class CreateJobRequest(BaseModel):
|
|
75
|
+
"""Request body for creating a new job."""
|
|
76
|
+
endpoint: SupportedEndpoint
|
|
77
|
+
request: dict[str, Any]
|
|
78
|
+
metadata: dict[str, str] | None = Field(
|
|
79
|
+
default=None,
|
|
80
|
+
description="Max 16 pairs; keys ≤64 chars, values ≤512 chars"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class JobListResponse(BaseModel):
|
|
85
|
+
"""Response for listing jobs."""
|
|
86
|
+
object: Literal["list"] = "list"
|
|
87
|
+
data: list[Job]
|
|
88
|
+
first_id: str | None = None
|
|
89
|
+
last_id: str | None = None
|
|
90
|
+
has_more: bool = False
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
retab/__init__.py,sha256=s4GawWTRBYz4VY-CyAV5-ZdFtdw8V5oopGIYm9GgdSo,188
|
|
2
2
|
retab/_resource.py,sha256=JfAU4UTa05ugWfbrpO7fsVr_pFewht99NkoIfK6kBQM,577
|
|
3
|
-
retab/client.py,sha256=
|
|
3
|
+
retab/client.py,sha256=Ds-Sy3ynN9GusN5rDrc2ogX3ATv-Dq1MuiZeDnLOWGk,30408
|
|
4
4
|
retab/generate_types.py,sha256=cUu1IX65uU__MHivmEb_PZtzAi8DYsvppZvcY30hj90,8425
|
|
5
5
|
retab/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
retab/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -16,6 +16,8 @@ retab/resources/edit/templates/__init__.py,sha256=n-zA_HXo7iGgeIclSwcsxmSueXJIRM
|
|
|
16
16
|
retab/resources/edit/templates/client.py,sha256=kEyqat5I84_QBeWSjptteSwvlMGRZ1UF9KDzH7p0f9s,20173
|
|
17
17
|
retab/resources/extractions/__init__.py,sha256=2H1ezUG8hI5SmTRy6NFzXdYLOdGFFsFrI60uzkitV20,97
|
|
18
18
|
retab/resources/extractions/client.py,sha256=sEoNjOgX91FTOgoJUV-I1A9A9xl1ciCdPlhYwjhEjbA,11035
|
|
19
|
+
retab/resources/jobs/__init__.py,sha256=g7WnNAw69CExMSyfxU9ROcSj-KODjxeLe2YlUqi8l0c,69
|
|
20
|
+
retab/resources/jobs/client.py,sha256=Cf7bafUzECqCXbCeKW396Q4fRFOMgjKDtgQ3e_ThIQY,8115
|
|
19
21
|
retab/resources/projects/__init__.py,sha256=tPR3_3tr7bsoYd618qmGjnYN2R23PmF5oCFd7Z5_HGY,85
|
|
20
22
|
retab/resources/projects/client.py,sha256=5LPAhJt5-nqBP4VWYvo0k7cW6HLGF6K9xMiHKQzIXho,15593
|
|
21
23
|
retab/resources/workflows/__init__.py,sha256=-I0QNX7XKEr8ZJTV4-awMyKxZqGlSkKMdibiHiB7cZ0,89
|
|
@@ -25,6 +27,7 @@ retab/resources/workflows/runs/client.py,sha256=GopedV363XnGl0mL3bZHWaOay12uAeTq
|
|
|
25
27
|
retab/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
28
|
retab/types/chat.py,sha256=x9VbtPMa4w6Gc0HrFC3ILl6cCnfEn5ytDnwJtZmlcys,1436
|
|
27
29
|
retab/types/inference_settings.py,sha256=wIivYffvEE7v6lhbjbhAZGssK4uYr64Oq6cZKxzY5_M,1131
|
|
30
|
+
retab/types/jobs.py,sha256=iVCl2EmlvvgvdO217gOGqvkuX-38le89C9XR7gnEb3E,2086
|
|
28
31
|
retab/types/mime.py,sha256=ZLNCD3pvgn5cbGfJwzrdkjgB9dMHCbN67YEV9bx47zE,10063
|
|
29
32
|
retab/types/modality.py,sha256=4B8LctdUBZVgIjtS2FjrJpljn2Eyse0XE1bpFsGb9O4,131
|
|
30
33
|
retab/types/pagination.py,sha256=A0Fw06baPTfEaYwo3kvNs4vaupzlqylBc6tQH-2DFuY,279
|
|
@@ -59,7 +62,7 @@ retab/utils/hashing.py,sha256=_BMVUvftOcJav68QL0rLkH2dbhW9RRJPzeGC2akR0fc,757
|
|
|
59
62
|
retab/utils/json_schema.py,sha256=zP4pQLpVHBKWo_abCjb_dU4kA0azhHopd-1TFUgVEvc,20655
|
|
60
63
|
retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
|
|
61
64
|
retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
|
|
62
|
-
retab-0.0.
|
|
63
|
-
retab-0.0.
|
|
64
|
-
retab-0.0.
|
|
65
|
-
retab-0.0.
|
|
65
|
+
retab-0.0.90.dist-info/METADATA,sha256=ea40JF37wk3xxuQRUv17mvliwsqiOHP8ufyna9ScpyY,4532
|
|
66
|
+
retab-0.0.90.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
67
|
+
retab-0.0.90.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
|
|
68
|
+
retab-0.0.90.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|