retab 0.0.89__tar.gz → 0.0.91__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {retab-0.0.89 → retab-0.0.91}/PKG-INFO +1 -1
  2. {retab-0.0.89 → retab-0.0.91}/retab/client.py +3 -1
  3. {retab-0.0.89 → retab-0.0.91}/retab/resources/documents/client.py +22 -21
  4. retab-0.0.91/retab/resources/jobs/__init__.py +3 -0
  5. retab-0.0.91/retab/resources/jobs/client.py +252 -0
  6. {retab-0.0.89 → retab-0.0.91}/retab/types/documents/__init__.py +4 -2
  7. {retab-0.0.89 → retab-0.0.91}/retab/types/documents/classify.py +4 -1
  8. {retab-0.0.89 → retab-0.0.91}/retab/types/documents/split.py +13 -13
  9. retab-0.0.91/retab/types/jobs.py +90 -0
  10. {retab-0.0.89 → retab-0.0.91}/retab.egg-info/PKG-INFO +1 -1
  11. {retab-0.0.89 → retab-0.0.91}/retab.egg-info/SOURCES.txt +3 -0
  12. {retab-0.0.89 → retab-0.0.91}/setup.py +1 -1
  13. {retab-0.0.89 → retab-0.0.91}/README.md +0 -0
  14. {retab-0.0.89 → retab-0.0.91}/pyproject.toml +0 -0
  15. {retab-0.0.89 → retab-0.0.91}/retab/__init__.py +0 -0
  16. {retab-0.0.89 → retab-0.0.91}/retab/_resource.py +0 -0
  17. {retab-0.0.89 → retab-0.0.91}/retab/generate_types.py +0 -0
  18. {retab-0.0.89 → retab-0.0.91}/retab/py.typed +0 -0
  19. {retab-0.0.89 → retab-0.0.91}/retab/resources/__init__.py +0 -0
  20. {retab-0.0.89 → retab-0.0.91}/retab/resources/documents/__init__.py +0 -0
  21. {retab-0.0.89 → retab-0.0.91}/retab/resources/edit/__init__.py +0 -0
  22. {retab-0.0.89 → retab-0.0.91}/retab/resources/edit/agent/__init__.py +0 -0
  23. {retab-0.0.89 → retab-0.0.91}/retab/resources/edit/agent/client.py +0 -0
  24. {retab-0.0.89 → retab-0.0.91}/retab/resources/edit/client.py +0 -0
  25. {retab-0.0.89 → retab-0.0.91}/retab/resources/edit/templates/__init__.py +0 -0
  26. {retab-0.0.89 → retab-0.0.91}/retab/resources/edit/templates/client.py +0 -0
  27. {retab-0.0.89 → retab-0.0.91}/retab/resources/extractions/__init__.py +0 -0
  28. {retab-0.0.89 → retab-0.0.91}/retab/resources/extractions/client.py +0 -0
  29. {retab-0.0.89 → retab-0.0.91}/retab/resources/models.py +0 -0
  30. {retab-0.0.89 → retab-0.0.91}/retab/resources/projects/__init__.py +0 -0
  31. {retab-0.0.89 → retab-0.0.91}/retab/resources/projects/client.py +0 -0
  32. {retab-0.0.89 → retab-0.0.91}/retab/resources/schemas.py +0 -0
  33. {retab-0.0.89 → retab-0.0.91}/retab/resources/workflows/__init__.py +0 -0
  34. {retab-0.0.89 → retab-0.0.91}/retab/resources/workflows/client.py +0 -0
  35. {retab-0.0.89 → retab-0.0.91}/retab/resources/workflows/runs/__init__.py +0 -0
  36. {retab-0.0.89 → retab-0.0.91}/retab/resources/workflows/runs/client.py +0 -0
  37. {retab-0.0.89 → retab-0.0.91}/retab/types/__init__.py +0 -0
  38. {retab-0.0.89 → retab-0.0.91}/retab/types/chat.py +0 -0
  39. {retab-0.0.89 → retab-0.0.91}/retab/types/documents/correct_orientation.py +0 -0
  40. {retab-0.0.89 → retab-0.0.91}/retab/types/documents/create_messages.py +0 -0
  41. {retab-0.0.89 → retab-0.0.91}/retab/types/documents/edit.py +0 -0
  42. {retab-0.0.89 → retab-0.0.91}/retab/types/documents/extract.py +0 -0
  43. {retab-0.0.89 → retab-0.0.91}/retab/types/documents/parse.py +0 -0
  44. {retab-0.0.89 → retab-0.0.91}/retab/types/edit/__init__.py +0 -0
  45. {retab-0.0.89 → retab-0.0.91}/retab/types/edit/templates.py +0 -0
  46. {retab-0.0.89 → retab-0.0.91}/retab/types/extractions/__init__.py +0 -0
  47. {retab-0.0.89 → retab-0.0.91}/retab/types/extractions/types.py +0 -0
  48. {retab-0.0.89 → retab-0.0.91}/retab/types/inference_settings.py +0 -0
  49. {retab-0.0.89 → retab-0.0.91}/retab/types/mime.py +0 -0
  50. {retab-0.0.89 → retab-0.0.91}/retab/types/modality.py +0 -0
  51. {retab-0.0.89 → retab-0.0.91}/retab/types/pagination.py +0 -0
  52. {retab-0.0.89 → retab-0.0.91}/retab/types/projects/__init__.py +0 -0
  53. {retab-0.0.89 → retab-0.0.91}/retab/types/projects/metrics.py +0 -0
  54. {retab-0.0.89 → retab-0.0.91}/retab/types/projects/model.py +0 -0
  55. {retab-0.0.89 → retab-0.0.91}/retab/types/projects/predictions.py +0 -0
  56. {retab-0.0.89 → retab-0.0.91}/retab/types/schemas/__init__.py +0 -0
  57. {retab-0.0.89 → retab-0.0.91}/retab/types/schemas/chat.py +0 -0
  58. {retab-0.0.89 → retab-0.0.91}/retab/types/schemas/generate.py +0 -0
  59. {retab-0.0.89 → retab-0.0.91}/retab/types/schemas/layout.py +0 -0
  60. {retab-0.0.89 → retab-0.0.91}/retab/types/schemas/model.py +0 -0
  61. {retab-0.0.89 → retab-0.0.91}/retab/types/schemas/templates.py +0 -0
  62. {retab-0.0.89 → retab-0.0.91}/retab/types/standards.py +0 -0
  63. {retab-0.0.89 → retab-0.0.91}/retab/types/workflows/__init__.py +0 -0
  64. {retab-0.0.89 → retab-0.0.91}/retab/types/workflows/model.py +0 -0
  65. {retab-0.0.89 → retab-0.0.91}/retab/utils/__init__.py +0 -0
  66. {retab-0.0.89 → retab-0.0.91}/retab/utils/display.py +0 -0
  67. {retab-0.0.89 → retab-0.0.91}/retab/utils/hashing.py +0 -0
  68. {retab-0.0.89 → retab-0.0.91}/retab/utils/json_schema.py +0 -0
  69. {retab-0.0.89 → retab-0.0.91}/retab/utils/mime.py +0 -0
  70. {retab-0.0.89 → retab-0.0.91}/retab/utils/stream_context_managers.py +0 -0
  71. {retab-0.0.89 → retab-0.0.91}/retab.egg-info/dependency_links.txt +0 -0
  72. {retab-0.0.89 → retab-0.0.91}/retab.egg-info/requires.txt +0 -0
  73. {retab-0.0.89 → retab-0.0.91}/retab.egg-info/top_level.txt +0 -0
  74. {retab-0.0.89 → retab-0.0.91}/setup.cfg +0 -0
  75. {retab-0.0.89 → retab-0.0.91}/tests/test_projects.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: retab
3
- Version: 0.0.89
3
+ Version: 0.0.91
4
4
  Summary: Retab official python library
5
5
  Home-page: https://github.com/retab-dev/retab
6
6
  Author: Retab
@@ -10,7 +10,7 @@ import backoff.types
10
10
  import httpx
11
11
  import truststore
12
12
 
13
- from .resources import documents, models, schemas, projects, extractions, edit, workflows
13
+ from .resources import documents, models, schemas, projects, extractions, edit, workflows, jobs
14
14
  from .types.standards import PreparedRequest, FieldUnset
15
15
 
16
16
 
@@ -190,6 +190,7 @@ class Retab(BaseRetab):
190
190
  self.schemas = schemas.Schemas(client=self)
191
191
  self.edit = edit.Edit(client=self)
192
192
  self.workflows = workflows.Workflows(client=self)
193
+ self.jobs = jobs.Jobs(client=self)
193
194
  def _request(
194
195
  self,
195
196
  method: str,
@@ -488,6 +489,7 @@ class AsyncRetab(BaseRetab):
488
489
  self.schemas = schemas.AsyncSchemas(client=self)
489
490
  self.edit = edit.AsyncEdit(client=self)
490
491
  self.workflows = workflows.AsyncWorkflows(client=self)
492
+ self.jobs = jobs.AsyncJobs(client=self)
491
493
 
492
494
  def _parse_response(self, response: httpx.Response) -> Any:
493
495
  """Parse response based on content-type.
@@ -16,7 +16,8 @@ from ...types.chat import ChatCompletionRetabMessage
16
16
  from ...types.documents.edit import EditRequest, EditResponse
17
17
  from ...types.documents.extract import DocumentExtractRequest, RetabParsedChatCompletion, RetabParsedChatCompletionChunk, RetabParsedChoice, maybe_parse_to_pydantic
18
18
  from ...types.documents.parse import ParseRequest, ParseResult, TableParsingFormat
19
- from ...types.documents.split import Category, SplitRequest, SplitResponse
19
+ from ...types.documents.split import Subdocument, SplitRequest, SplitResponse
20
+ from ...types.documents.classify import Category
20
21
  from ...types.documents.classify import ClassifyRequest, ClassifyResponse
21
22
  from ...types.mime import MIMEData
22
23
  from ...types.standards import PreparedRequest, FieldUnset
@@ -148,21 +149,21 @@ class BaseDocumentsMixin:
148
149
  def _prepare_split(
149
150
  self,
150
151
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
151
- categories: list[Category] | list[dict[str, str]],
152
+ subdocuments: list[Subdocument] | list[dict[str, str]],
152
153
  model: str,
153
154
  **extra_body: Any,
154
155
  ) -> PreparedRequest:
155
156
  mime_document = prepare_mime_document(document)
156
157
 
157
- # Convert dict categories to Category objects if needed
158
- category_objects = [
159
- Category(**cat) if isinstance(cat, dict) else cat
160
- for cat in categories
158
+ # Convert dict subdocuments to Subdocument objects if needed
159
+ subdocument_objects = [
160
+ Subdocument(**subdoc) if isinstance(subdoc, dict) else subdoc
161
+ for subdoc in subdocuments
161
162
  ]
162
163
 
163
164
  request_dict: dict[str, Any] = {
164
165
  "document": mime_document,
165
- "categories": category_objects,
166
+ "subdocuments": subdocument_objects,
166
167
  "model": model,
167
168
  }
168
169
 
@@ -644,20 +645,20 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
644
645
  def split(
645
646
  self,
646
647
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
647
- categories: list[Category] | list[dict[str, str]],
648
+ subdocuments: list[Subdocument] | list[dict[str, str]],
648
649
  model: str,
649
650
  **extra_body: Any,
650
651
  ) -> SplitResponse:
651
652
  """
652
- Split a document into sections based on provided categories.
653
+ Split a document into sections based on provided subdocuments.
653
654
 
654
655
  This method analyzes a multi-page document and classifies pages into
655
- user-defined categories, returning the page ranges for each section.
656
+ user-defined subdocuments, returning the page ranges for each section.
656
657
 
657
658
  Args:
658
659
  document: The document to split. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
659
- categories: List of categories to split the document into. Each category should have a 'name' and 'description'.
660
- Can be Category objects or dicts with 'name' and 'description' keys.
660
+ subdocuments: List of subdocuments to split the document into. Each subdocument should have a 'name' and 'description'.
661
+ Can be Subdocument objects or dicts with 'name' and 'description' keys.
661
662
  model: The AI model to use for document splitting (e.g., "gemini-2.5-flash").
662
663
 
663
664
  Returns:
@@ -672,7 +673,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
672
673
  response = retab.documents.split(
673
674
  document="invoice_batch.pdf",
674
675
  model="gemini-2.5-flash",
675
- categories=[
676
+ subdocuments=[
676
677
  {"name": "invoice", "description": "Invoice documents with billing information"},
677
678
  {"name": "receipt", "description": "Receipt documents for payments"},
678
679
  {"name": "contract", "description": "Legal contract documents"},
@@ -684,7 +685,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
684
685
  """
685
686
  request = self._prepare_split(
686
687
  document=document,
687
- categories=categories,
688
+ subdocuments=subdocuments,
688
689
  model=model,
689
690
  **extra_body,
690
691
  )
@@ -1039,20 +1040,20 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
1039
1040
  async def split(
1040
1041
  self,
1041
1042
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
1042
- categories: list[Category] | list[dict[str, str]],
1043
+ subdocuments: list[Subdocument] | list[dict[str, str]],
1043
1044
  model: str,
1044
1045
  **extra_body: Any,
1045
1046
  ) -> SplitResponse:
1046
1047
  """
1047
- Split a document into sections based on provided categories asynchronously.
1048
+ Split a document into sections based on provided subdocuments asynchronously.
1048
1049
 
1049
1050
  This method analyzes a multi-page document and classifies pages into
1050
- user-defined categories, returning the page ranges for each section.
1051
+ user-defined subdocuments, returning the page ranges for each section.
1051
1052
 
1052
1053
  Args:
1053
1054
  document: The document to split. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
1054
- categories: List of categories to split the document into. Each category should have a 'name' and 'description'.
1055
- Can be Category objects or dicts with 'name' and 'description' keys.
1055
+ subdocuments: List of subdocuments to split the document into. Each subdocument should have a 'name' and 'description'.
1056
+ Can be Subdocument objects or dicts with 'name' and 'description' keys.
1056
1057
  model: The AI model to use for document splitting (e.g., "gemini-2.5-flash").
1057
1058
 
1058
1059
  Returns:
@@ -1067,7 +1068,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
1067
1068
  response = await retab.documents.split(
1068
1069
  document="invoice_batch.pdf",
1069
1070
  model="gemini-2.5-flash",
1070
- categories=[
1071
+ subdocuments=[
1071
1072
  {"name": "invoice", "description": "Invoice documents with billing information"},
1072
1073
  {"name": "receipt", "description": "Receipt documents for payments"},
1073
1074
  {"name": "contract", "description": "Legal contract documents"},
@@ -1079,7 +1080,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
1079
1080
  """
1080
1081
  request = self._prepare_split(
1081
1082
  document=document,
1082
- categories=categories,
1083
+ subdocuments=subdocuments,
1083
1084
  model=model,
1084
1085
  **extra_body,
1085
1086
  )
@@ -0,0 +1,3 @@
1
+ from .client import AsyncJobs, Jobs
2
+
3
+ __all__ = ["Jobs", "AsyncJobs"]
@@ -0,0 +1,252 @@
1
+ """
2
+ Jobs API Resource
3
+
4
+ Provides synchronous and asynchronous clients for the Jobs API.
5
+ """
6
+
7
+ from typing import Any
8
+
9
+ from ..._resource import AsyncAPIResource, SyncAPIResource
10
+ from ...types.jobs import Job, JobListResponse, JobStatus, SupportedEndpoint
11
+ from ...types.standards import PreparedRequest
12
+
13
+
14
+ class BaseJobsMixin:
15
+ """Shared methods for preparing Jobs API requests."""
16
+
17
+ def _prepare_create(
18
+ self,
19
+ endpoint: SupportedEndpoint,
20
+ request: dict[str, Any],
21
+ metadata: dict[str, str] | None = None,
22
+ ) -> PreparedRequest:
23
+ data = {
24
+ "endpoint": endpoint,
25
+ "request": request,
26
+ }
27
+ if metadata is not None:
28
+ data["metadata"] = metadata
29
+ return PreparedRequest(method="POST", url="/v1/jobs", data=data)
30
+
31
+ def _prepare_retrieve(self, job_id: str) -> PreparedRequest:
32
+ return PreparedRequest(method="GET", url=f"/v1/jobs/{job_id}")
33
+
34
+ def _prepare_cancel(self, job_id: str) -> PreparedRequest:
35
+ return PreparedRequest(method="POST", url=f"/v1/jobs/{job_id}/cancel")
36
+
37
+ def _prepare_list(
38
+ self,
39
+ after: str | None = None,
40
+ limit: int = 20,
41
+ status: JobStatus | None = None,
42
+ ) -> PreparedRequest:
43
+ params: dict[str, Any] = {"limit": limit}
44
+ if after is not None:
45
+ params["after"] = after
46
+ if status is not None:
47
+ params["status"] = status
48
+ return PreparedRequest(method="GET", url="/v1/jobs", params=params)
49
+
50
+
51
+ class Jobs(SyncAPIResource, BaseJobsMixin):
52
+ """
53
+ Synchronous Jobs API client.
54
+
55
+ The Jobs API allows you to submit long-running extract or parse operations
56
+ asynchronously and poll for their results.
57
+
58
+ Example:
59
+ >>> from retab import Retab
60
+ >>> client = Retab(api_key="your-api-key")
61
+ >>>
62
+ >>> # Create an async extraction job
63
+ >>> job = client.jobs.create(
64
+ ... endpoint="/v1/documents/extract",
65
+ ... request={
66
+ ... "document": {"content": "...", "mime_type": "application/pdf"},
67
+ ... "json_schema": {"type": "object", ...},
68
+ ... "model": "gpt-4o",
69
+ ... }
70
+ ... )
71
+ >>>
72
+ >>> # Poll for completion
73
+ >>> while job.status not in ("completed", "failed", "cancelled"):
74
+ ... import time
75
+ ... time.sleep(5)
76
+ ... job = client.jobs.retrieve(job.id)
77
+ >>>
78
+ >>> if job.status == "completed":
79
+ ... print(job.response.body)
80
+ """
81
+
82
+ def create(
83
+ self,
84
+ endpoint: SupportedEndpoint,
85
+ request: dict[str, Any],
86
+ metadata: dict[str, str] | None = None,
87
+ ) -> Job:
88
+ """
89
+ Create a new asynchronous job.
90
+
91
+ Args:
92
+ endpoint: The API endpoint to call ("/v1/documents/extract" or "/v1/documents/parse")
93
+ request: The full request body for the target endpoint
94
+ metadata: Optional metadata (max 16 pairs; keys ≤64 chars, values ≤512 chars)
95
+
96
+ Returns:
97
+ Job: The created job with status "queued"
98
+ """
99
+ prepared = self._prepare_create(endpoint, request, metadata)
100
+ response = self._client._prepared_request(prepared)
101
+ return Job.model_validate(response)
102
+
103
+ def retrieve(self, job_id: str) -> Job:
104
+ """
105
+ Retrieve a job by ID.
106
+
107
+ Args:
108
+ job_id: The job ID to retrieve
109
+
110
+ Returns:
111
+ Job: The job with current status and result (if completed)
112
+ """
113
+ prepared = self._prepare_retrieve(job_id)
114
+ response = self._client._prepared_request(prepared)
115
+ return Job.model_validate(response)
116
+
117
+ def cancel(self, job_id: str) -> Job:
118
+ """
119
+ Cancel a queued or in-progress job.
120
+
121
+ Args:
122
+ job_id: The job ID to cancel
123
+
124
+ Returns:
125
+ Job: The updated job with status "cancelled"
126
+ """
127
+ prepared = self._prepare_cancel(job_id)
128
+ response = self._client._prepared_request(prepared)
129
+ return Job.model_validate(response)
130
+
131
+ def list(
132
+ self,
133
+ after: str | None = None,
134
+ limit: int = 20,
135
+ status: JobStatus | None = None,
136
+ ) -> JobListResponse:
137
+ """
138
+ List jobs with pagination and optional status filtering.
139
+
140
+ Args:
141
+ after: Pagination cursor (last ID from previous page)
142
+ limit: Number of jobs to return (1-100, default 20)
143
+ status: Filter by job status
144
+
145
+ Returns:
146
+ JobListResponse: List of jobs with pagination info
147
+ """
148
+ prepared = self._prepare_list(after, limit, status)
149
+ response = self._client._prepared_request(prepared)
150
+ return JobListResponse.model_validate(response)
151
+
152
+
153
+ class AsyncJobs(AsyncAPIResource, BaseJobsMixin):
154
+ """
155
+ Asynchronous Jobs API client.
156
+
157
+ The Jobs API allows you to submit long-running extract or parse operations
158
+ asynchronously and poll for their results.
159
+
160
+ Example:
161
+ >>> from retab import AsyncRetab
162
+ >>> client = AsyncRetab(api_key="your-api-key")
163
+ >>>
164
+ >>> # Create an async extraction job
165
+ >>> job = await client.jobs.create(
166
+ ... endpoint="/v1/documents/extract",
167
+ ... request={
168
+ ... "document": {"content": "...", "mime_type": "application/pdf"},
169
+ ... "json_schema": {"type": "object", ...},
170
+ ... "model": "gpt-4o",
171
+ ... }
172
+ ... )
173
+ >>>
174
+ >>> # Poll for completion
175
+ >>> while job.status not in ("completed", "failed", "cancelled"):
176
+ ... import asyncio
177
+ ... await asyncio.sleep(5)
178
+ ... job = await client.jobs.retrieve(job.id)
179
+ >>>
180
+ >>> if job.status == "completed":
181
+ ... print(job.response.body)
182
+ """
183
+
184
+ async def create(
185
+ self,
186
+ endpoint: SupportedEndpoint,
187
+ request: dict[str, Any],
188
+ metadata: dict[str, str] | None = None,
189
+ ) -> Job:
190
+ """
191
+ Create a new asynchronous job.
192
+
193
+ Args:
194
+ endpoint: The API endpoint to call ("/v1/documents/extract" or "/v1/documents/parse")
195
+ request: The full request body for the target endpoint
196
+ metadata: Optional metadata (max 16 pairs; keys ≤64 chars, values ≤512 chars)
197
+
198
+ Returns:
199
+ Job: The created job with status "queued"
200
+ """
201
+ prepared = self._prepare_create(endpoint, request, metadata)
202
+ response = await self._client._prepared_request(prepared)
203
+ return Job.model_validate(response)
204
+
205
+ async def retrieve(self, job_id: str) -> Job:
206
+ """
207
+ Retrieve a job by ID.
208
+
209
+ Args:
210
+ job_id: The job ID to retrieve
211
+
212
+ Returns:
213
+ Job: The job with current status and result (if completed)
214
+ """
215
+ prepared = self._prepare_retrieve(job_id)
216
+ response = await self._client._prepared_request(prepared)
217
+ return Job.model_validate(response)
218
+
219
+ async def cancel(self, job_id: str) -> Job:
220
+ """
221
+ Cancel a queued or in-progress job.
222
+
223
+ Args:
224
+ job_id: The job ID to cancel
225
+
226
+ Returns:
227
+ Job: The updated job with status "cancelled"
228
+ """
229
+ prepared = self._prepare_cancel(job_id)
230
+ response = await self._client._prepared_request(prepared)
231
+ return Job.model_validate(response)
232
+
233
+ async def list(
234
+ self,
235
+ after: str | None = None,
236
+ limit: int = 20,
237
+ status: JobStatus | None = None,
238
+ ) -> JobListResponse:
239
+ """
240
+ List jobs with pagination and optional status filtering.
241
+
242
+ Args:
243
+ after: Pagination cursor (last ID from previous page)
244
+ limit: Number of jobs to return (1-100, default 20)
245
+ status: Filter by job status
246
+
247
+ Returns:
248
+ JobListResponse: List of jobs with pagination info
249
+ """
250
+ prepared = self._prepare_list(after, limit, status)
251
+ response = await self._client._prepared_request(prepared)
252
+ return JobListResponse.model_validate(response)
@@ -1,12 +1,14 @@
1
1
  from .parse import ParseRequest, ParseResult, RetabUsage
2
- from .split import Category, SplitRequest, SplitResult, SplitResponse
3
- from .classify import ClassifyRequest, ClassifyResult, ClassifyResponse
2
+ from .split import Subdocument, SplitRequest, SplitResult, SplitResponse
3
+ from .classify import ClassifyRequest, ClassifyResult, ClassifyResponse, Category
4
+
4
5
 
5
6
  __all__ = [
6
7
  "ParseRequest",
7
8
  "ParseResult",
8
9
  "RetabUsage",
9
10
  "Category",
11
+ "Subdocument",
10
12
  "SplitRequest",
11
13
  "SplitResult",
12
14
  "SplitResponse",
@@ -1,13 +1,16 @@
1
1
  from pydantic import BaseModel, Field
2
2
  from ..mime import MIMEData
3
- from .split import Category
4
3
 
4
+ class Category(BaseModel):
5
+ name: str = Field(..., description="The name of the category")
6
+ description: str = Field(..., description="The description of the category")
5
7
 
6
8
  class ClassifyRequest(BaseModel):
7
9
  document: MIMEData = Field(..., description="The document to classify")
8
10
  categories: list[Category] = Field(..., description="The categories to classify the document into")
9
11
  model: str = Field(default="retab-small", description="The model to use for classification")
10
12
  first_n_pages: int | None = Field(default=None, description="Only use the first N pages of the document for classification. Useful for large documents where classification can be determined from early pages.")
13
+ context: str | None = Field(default=None, description="Additional context for classification (e.g., iteration context from a loop)")
11
14
 
12
15
 
13
16
  class ClassifyResult(BaseModel):
@@ -1,17 +1,17 @@
1
1
  from pydantic import BaseModel, Field
2
2
  from ..mime import MIMEData
3
3
 
4
-
5
- class Category(BaseModel):
6
- name: str = Field(..., description="The name of the category")
7
- description: str = Field(..., description="The description of the category")
8
- partition_key: str | None = Field(default=None, description="The key to partition the category")
4
+ class Subdocument(BaseModel):
5
+ name: str = Field(..., description="The name of the subdocument")
6
+ description: str = Field(..., description="The description of the subdocument")
7
+ partition_key: str | None = Field(default=None, description="The key to partition the subdocument")
9
8
 
10
9
 
11
10
  class SplitRequest(BaseModel):
12
11
  document: MIMEData = Field(..., description="The document to split")
13
- categories: list[Category] = Field(..., description="The categories to split the document into")
12
+ subdocuments: list[Subdocument] = Field(..., description="The subdocuments to split the document into")
14
13
  model: str = Field(default="retab-small", description="The model to use to split the document")
14
+ context: str | None = Field(default=None, description="Additional context for the split operation (e.g., iteration context from a loop)")
15
15
 
16
16
 
17
17
  class Partition(BaseModel):
@@ -21,9 +21,9 @@ class Partition(BaseModel):
21
21
  last_page_y_end: float = Field(default=1.0, description="The y coordinate of the last page of the partition")
22
22
 
23
23
  class SplitResult(BaseModel):
24
- name: str = Field(..., description="The name of the category")
25
- pages: list[int] = Field(..., description="The pages of the category (1-indexed)")
26
- partitions: list[Partition] = Field(default_factory=list, description="The partitions of the category")
24
+ name: str = Field(..., description="The name of the subdocument")
25
+ pages: list[int] = Field(..., description="The pages of the subdocument (1-indexed)")
26
+ partitions: list[Partition] = Field(default_factory=list, description="The partitions of the subdocument")
27
27
 
28
28
 
29
29
  class SplitResponse(BaseModel):
@@ -32,14 +32,14 @@ class SplitResponse(BaseModel):
32
32
 
33
33
  class SplitOutputItem(BaseModel):
34
34
  """Internal schema item for LLM structured output validation."""
35
- name: str = Field(..., description="The name of the category")
36
- start_page: int = Field(..., description="The start page of the category (1-indexed)")
37
- end_page: int = Field(..., description="The end page of the category (1-indexed, inclusive)")
35
+ name: str = Field(..., description="The name of the subdocument")
36
+ start_page: int = Field(..., description="The start page of the subdocument (1-indexed)")
37
+ end_page: int = Field(..., description="The end page of the subdocument (1-indexed, inclusive)")
38
38
 
39
39
 
40
40
  class SplitOutputSchema(BaseModel):
41
41
  """Schema for LLM structured output."""
42
42
  splits: list[SplitOutputItem] = Field(
43
43
  ...,
44
- description="List of document sections, each classified into one of the provided categories with their page ranges"
44
+ description="List of document sections, each classified into one of the provided subdocuments with their page ranges"
45
45
  )
@@ -0,0 +1,90 @@
1
+ """
2
+ Jobs API Types
3
+
4
+ Pydantic models for the asynchronous Jobs API.
5
+ """
6
+
7
+ from typing import Any, Literal
8
+
9
+ from pydantic import BaseModel, Field
10
+
11
+
12
+ JobStatus = Literal[
13
+ "validating",
14
+ "queued",
15
+ "in_progress",
16
+ "completed",
17
+ "failed",
18
+ "cancelled",
19
+ "expired",
20
+ ]
21
+
22
+ SupportedEndpoint = Literal[
23
+ "/v1/documents/extract",
24
+ "/v1/documents/parse",
25
+ "/v1/documents/split",
26
+ "/v1/documents/classify",
27
+ "/v1/schemas/generate",
28
+ "/v1/edit/agent/fill",
29
+ "/v1/edit/templates/fill",
30
+ "/v1/edit/templates/generate",
31
+ "/v1/projects/extract", # Requires "project_id" in request body
32
+ ]
33
+
34
+
35
+ class JobResponse(BaseModel):
36
+ """Response stored when job completes successfully."""
37
+ status_code: int
38
+ body: dict[str, Any]
39
+
40
+
41
+ class JobError(BaseModel):
42
+ """Error details when job fails."""
43
+ code: str
44
+ message: str
45
+ details: dict[str, Any] | None = None
46
+
47
+
48
+ class Job(BaseModel):
49
+ """
50
+ Job object representing an asynchronous operation.
51
+
52
+ Use this to track the status of long-running operations like extract, parse,
53
+ split, classify, schema generation, and template operations.
54
+ """
55
+ id: str
56
+ object: Literal["job"] = "job"
57
+ status: JobStatus
58
+ endpoint: SupportedEndpoint
59
+ request: dict[str, Any]
60
+ response: JobResponse | None = None
61
+ error: JobError | None = None
62
+
63
+ # Timestamps (Unix timestamps)
64
+ created_at: int
65
+ started_at: int | None = None
66
+ completed_at: int | None = None
67
+ expires_at: int
68
+
69
+ # User context
70
+ organization_id: str
71
+ metadata: dict[str, str] | None = None
72
+
73
+
74
+ class CreateJobRequest(BaseModel):
75
+ """Request body for creating a new job."""
76
+ endpoint: SupportedEndpoint
77
+ request: dict[str, Any]
78
+ metadata: dict[str, str] | None = Field(
79
+ default=None,
80
+ description="Max 16 pairs; keys ≤64 chars, values ≤512 chars"
81
+ )
82
+
83
+
84
+ class JobListResponse(BaseModel):
85
+ """Response for listing jobs."""
86
+ object: Literal["list"] = "list"
87
+ data: list[Job]
88
+ first_id: str | None = None
89
+ last_id: str | None = None
90
+ has_more: bool = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: retab
3
- Version: 0.0.89
3
+ Version: 0.0.91
4
4
  Summary: Retab official python library
5
5
  Home-page: https://github.com/retab-dev/retab
6
6
  Author: Retab
@@ -24,6 +24,8 @@ retab/resources/edit/templates/__init__.py
24
24
  retab/resources/edit/templates/client.py
25
25
  retab/resources/extractions/__init__.py
26
26
  retab/resources/extractions/client.py
27
+ retab/resources/jobs/__init__.py
28
+ retab/resources/jobs/client.py
27
29
  retab/resources/projects/__init__.py
28
30
  retab/resources/projects/client.py
29
31
  retab/resources/workflows/__init__.py
@@ -33,6 +35,7 @@ retab/resources/workflows/runs/client.py
33
35
  retab/types/__init__.py
34
36
  retab/types/chat.py
35
37
  retab/types/inference_settings.py
38
+ retab/types/jobs.py
36
39
  retab/types/mime.py
37
40
  retab/types/modality.py
38
41
  retab/types/pagination.py
@@ -6,7 +6,7 @@ with open("requirements.txt") as f:
6
6
 
7
7
  setup(
8
8
  name="retab",
9
- version="0.0.89",
9
+ version="0.0.91",
10
10
  author="Retab",
11
11
  author_email="contact@retab.com",
12
12
  description="Retab official python library",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes