retab 0.0.69__py3-none-any.whl → 0.0.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab/resources/documents/client.py +16 -0
- retab/resources/extractions/client.py +44 -181
- retab/types/documents/extract.py +1 -0
- retab/types/extractions/__init__.py +0 -0
- retab/types/extractions/types.py +3 -0
- retab/types/pagination.py +3 -1
- {retab-0.0.69.dist-info → retab-0.0.71.dist-info}/METADATA +1 -1
- {retab-0.0.69.dist-info → retab-0.0.71.dist-info}/RECORD +10 -8
- {retab-0.0.69.dist-info → retab-0.0.71.dist-info}/WHEEL +0 -0
- {retab-0.0.69.dist-info → retab-0.0.71.dist-info}/top_level.txt +0 -0
|
@@ -12,6 +12,7 @@ from ..._resource import AsyncAPIResource, SyncAPIResource
|
|
|
12
12
|
from ...utils.mime import prepare_mime_document
|
|
13
13
|
from ...utils.stream_context_managers import as_async_context_manager, as_context_manager
|
|
14
14
|
from ...types.documents.create_messages import DocumentCreateInputRequest, DocumentCreateMessageRequest, DocumentMessage
|
|
15
|
+
from ...types.chat import ChatCompletionRetabMessage
|
|
15
16
|
from ...types.documents.extract import DocumentExtractRequest, RetabParsedChatCompletion, RetabParsedChatCompletionChunk, RetabParsedChoice, maybe_parse_to_pydantic
|
|
16
17
|
from ...types.documents.parse import ParseRequest, ParseResult, TableParsingFormat
|
|
17
18
|
from ...types.mime import MIMEData
|
|
@@ -125,6 +126,7 @@ class BaseDocumentsMixin:
|
|
|
125
126
|
stream: bool = FieldUnset,
|
|
126
127
|
store: bool = FieldUnset,
|
|
127
128
|
metadata: dict[str, str] = FieldUnset,
|
|
129
|
+
additional_messages: list[ChatCompletionRetabMessage] = FieldUnset,
|
|
128
130
|
**extra_body: Any,
|
|
129
131
|
) -> PreparedRequest:
|
|
130
132
|
loaded_schema = load_json_schema(json_schema)
|
|
@@ -155,6 +157,8 @@ class BaseDocumentsMixin:
|
|
|
155
157
|
request_dict["image_resolution_dpi"] = image_resolution_dpi
|
|
156
158
|
if metadata is not FieldUnset:
|
|
157
159
|
request_dict["metadata"] = metadata
|
|
160
|
+
if additional_messages is not FieldUnset:
|
|
161
|
+
request_dict["additional_messages"] = additional_messages
|
|
158
162
|
|
|
159
163
|
# Merge any extra fields provided by the caller
|
|
160
164
|
if extra_body:
|
|
@@ -240,6 +244,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
|
240
244
|
n_consensus: int = FieldUnset,
|
|
241
245
|
store: bool = FieldUnset,
|
|
242
246
|
metadata: dict[str, str] = FieldUnset,
|
|
247
|
+
additional_messages: list[ChatCompletionRetabMessage] = FieldUnset,
|
|
243
248
|
**extra_body: Any,
|
|
244
249
|
) -> RetabParsedChatCompletion:
|
|
245
250
|
"""
|
|
@@ -257,6 +262,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
|
257
262
|
n_consensus: Number of consensus extractions to perform
|
|
258
263
|
store: Whether to store the document in the Retab database
|
|
259
264
|
metadata: User-defined metadata to associate with this extraction
|
|
265
|
+
additional_messages: Additional chat messages to append after the document content messages
|
|
260
266
|
|
|
261
267
|
Returns:
|
|
262
268
|
RetabParsedChatCompletion: Parsed response from the API
|
|
@@ -275,6 +281,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
|
275
281
|
n_consensus=n_consensus,
|
|
276
282
|
store=store,
|
|
277
283
|
metadata=metadata,
|
|
284
|
+
additional_messages=additional_messages,
|
|
278
285
|
**extra_body,
|
|
279
286
|
)
|
|
280
287
|
response = self._client._prepared_request(request)
|
|
@@ -381,6 +388,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
|
381
388
|
n_consensus: int = FieldUnset,
|
|
382
389
|
store: bool = FieldUnset,
|
|
383
390
|
metadata: dict[str, str] = FieldUnset,
|
|
391
|
+
additional_messages: list[ChatCompletionRetabMessage] = FieldUnset,
|
|
384
392
|
**extra_body: Any,
|
|
385
393
|
) -> Generator[RetabParsedChatCompletion, None, None]:
|
|
386
394
|
"""
|
|
@@ -396,6 +404,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
|
396
404
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
|
397
405
|
store: Whether to store the document in the Retab database
|
|
398
406
|
metadata: User-defined metadata to associate with this extraction
|
|
407
|
+
additional_messages: Additional chat messages to append after the document content messages
|
|
399
408
|
|
|
400
409
|
Returns:
|
|
401
410
|
Generator[RetabParsedChatCompletion]: Stream of parsed responses
|
|
@@ -420,6 +429,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
|
420
429
|
n_consensus=n_consensus,
|
|
421
430
|
store=store,
|
|
422
431
|
metadata=metadata,
|
|
432
|
+
additional_messages=additional_messages,
|
|
423
433
|
**extra_body,
|
|
424
434
|
)
|
|
425
435
|
schema = load_json_schema(json_schema)
|
|
@@ -572,6 +582,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
|
572
582
|
n_consensus: int = FieldUnset,
|
|
573
583
|
store: bool = FieldUnset,
|
|
574
584
|
metadata: dict[str, str] = FieldUnset,
|
|
585
|
+
additional_messages: list[ChatCompletionRetabMessage] = FieldUnset,
|
|
575
586
|
**extra_body: Any,
|
|
576
587
|
) -> RetabParsedChatCompletion:
|
|
577
588
|
"""
|
|
@@ -589,6 +600,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
|
589
600
|
n_consensus: Number of consensus extractions to perform
|
|
590
601
|
store: Whether to store the document in the Retab database
|
|
591
602
|
metadata: User-defined metadata to associate with this extraction
|
|
603
|
+
additional_messages: Additional chat messages to append after the document content messages
|
|
592
604
|
|
|
593
605
|
Returns:
|
|
594
606
|
RetabParsedChatCompletion: Parsed response from the API
|
|
@@ -607,6 +619,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
|
607
619
|
n_consensus=n_consensus,
|
|
608
620
|
store=store,
|
|
609
621
|
metadata=metadata,
|
|
622
|
+
additional_messages=additional_messages,
|
|
610
623
|
**extra_body,
|
|
611
624
|
)
|
|
612
625
|
response = await self._client._prepared_request(request)
|
|
@@ -625,6 +638,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
|
625
638
|
n_consensus: int = FieldUnset,
|
|
626
639
|
store: bool = FieldUnset,
|
|
627
640
|
metadata: dict[str, str] = FieldUnset,
|
|
641
|
+
additional_messages: list[ChatCompletionRetabMessage] = FieldUnset,
|
|
628
642
|
**extra_body: Any,
|
|
629
643
|
) -> AsyncGenerator[RetabParsedChatCompletion, None]:
|
|
630
644
|
"""
|
|
@@ -640,6 +654,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
|
640
654
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
|
641
655
|
store: Whether to store the document in the Retab database
|
|
642
656
|
metadata: User-defined metadata to associate with this extraction
|
|
657
|
+
additional_messages: Additional chat messages to append after the document content messages
|
|
643
658
|
Returns:
|
|
644
659
|
AsyncGenerator[RetabParsedChatCompletion, None]: Stream of parsed responses.
|
|
645
660
|
Raises:
|
|
@@ -663,6 +678,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
|
663
678
|
n_consensus=n_consensus,
|
|
664
679
|
store=store,
|
|
665
680
|
metadata=metadata,
|
|
681
|
+
additional_messages=additional_messages,
|
|
666
682
|
**extra_body,
|
|
667
683
|
)
|
|
668
684
|
schema = load_json_schema(json_schema)
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from datetime import datetime
|
|
2
3
|
from typing import Any, Dict, List, Literal
|
|
3
4
|
|
|
4
5
|
from ..._resource import AsyncAPIResource, SyncAPIResource
|
|
5
6
|
from ...types.standards import PreparedRequest
|
|
6
|
-
from ...types.pagination import PaginatedList
|
|
7
|
+
from ...types.pagination import PaginatedList, PaginationOrder
|
|
8
|
+
from ...types.extractions.types import HumanReviewStatus
|
|
7
9
|
|
|
8
10
|
class ExtractionsMixin:
|
|
9
11
|
def prepare_list(
|
|
@@ -11,12 +13,12 @@ class ExtractionsMixin:
|
|
|
11
13
|
before: str | None = None,
|
|
12
14
|
after: str | None = None,
|
|
13
15
|
limit: int = 10,
|
|
14
|
-
order:
|
|
16
|
+
order: PaginationOrder = "desc",
|
|
15
17
|
origin_dot_type: str | None = None,
|
|
16
18
|
origin_dot_id: str | None = None,
|
|
17
|
-
from_date:
|
|
18
|
-
to_date:
|
|
19
|
-
human_review_status:
|
|
19
|
+
from_date: datetime | None = None,
|
|
20
|
+
to_date: datetime | None = None,
|
|
21
|
+
human_review_status: HumanReviewStatus | None = None,
|
|
20
22
|
metadata: Dict[str, str] | None = None,
|
|
21
23
|
**extra_params: Any,
|
|
22
24
|
) -> PreparedRequest:
|
|
@@ -28,8 +30,8 @@ class ExtractionsMixin:
|
|
|
28
30
|
"order": order,
|
|
29
31
|
"origin_dot_type": origin_dot_type,
|
|
30
32
|
"origin_dot_id": origin_dot_id,
|
|
31
|
-
"from_date": from_date,
|
|
32
|
-
"to_date": to_date,
|
|
33
|
+
"from_date": from_date.isoformat() if from_date else None,
|
|
34
|
+
"to_date": to_date.isoformat() if to_date else None,
|
|
33
35
|
"human_review_status": human_review_status,
|
|
34
36
|
# Note: metadata must be JSON-serialized as the backend expects a JSON string
|
|
35
37
|
"metadata": json.dumps(metadata) if metadata else None,
|
|
@@ -40,34 +42,13 @@ class ExtractionsMixin:
|
|
|
40
42
|
params = {k: v for k, v in params.items() if v is not None}
|
|
41
43
|
return PreparedRequest(method="GET", url="/v1/extractions", params=params)
|
|
42
44
|
|
|
43
|
-
def prepare_count(
|
|
44
|
-
self,
|
|
45
|
-
origin_dot_type: str | None = None,
|
|
46
|
-
origin_dot_id: str | None = None,
|
|
47
|
-
human_review_status: str | None = "review_required",
|
|
48
|
-
metadata: Dict[str, str] | None = None,
|
|
49
|
-
**extra_params: Any,
|
|
50
|
-
) -> PreparedRequest:
|
|
51
|
-
"""Prepare a request to count extractions."""
|
|
52
|
-
params = {
|
|
53
|
-
"origin_dot_type": origin_dot_type,
|
|
54
|
-
"origin_dot_id": origin_dot_id,
|
|
55
|
-
"human_review_status": human_review_status,
|
|
56
|
-
# Note: metadata must be JSON-serialized as the backend expects a JSON string
|
|
57
|
-
"metadata": json.dumps(metadata) if metadata else None,
|
|
58
|
-
}
|
|
59
|
-
if extra_params:
|
|
60
|
-
params.update(extra_params)
|
|
61
|
-
params = {k: v for k, v in params.items() if v is not None}
|
|
62
|
-
return PreparedRequest(method="GET", url="/v1/extractions/count", params=params)
|
|
63
|
-
|
|
64
45
|
def prepare_download(
|
|
65
46
|
self,
|
|
66
47
|
order: Literal["asc", "desc"] = "desc",
|
|
67
48
|
origin_dot_id: str | None = None,
|
|
68
|
-
from_date:
|
|
69
|
-
to_date:
|
|
70
|
-
human_review_status:
|
|
49
|
+
from_date: datetime | None = None,
|
|
50
|
+
to_date: datetime | None = None,
|
|
51
|
+
human_review_status: HumanReviewStatus | None = None,
|
|
71
52
|
metadata: Dict[str, str] | None = None,
|
|
72
53
|
format: Literal["jsonl", "csv", "xlsx"] = "jsonl",
|
|
73
54
|
**extra_params: Any,
|
|
@@ -76,8 +57,8 @@ class ExtractionsMixin:
|
|
|
76
57
|
params = {
|
|
77
58
|
"order": order,
|
|
78
59
|
"origin_dot_id": origin_dot_id,
|
|
79
|
-
"from_date": from_date,
|
|
80
|
-
"to_date": to_date,
|
|
60
|
+
"from_date": from_date.isoformat() if from_date else None,
|
|
61
|
+
"to_date": to_date.isoformat() if to_date else None,
|
|
81
62
|
"human_review_status": human_review_status,
|
|
82
63
|
# Note: metadata must be JSON-serialized as the backend expects a JSON string
|
|
83
64
|
"metadata": json.dumps(metadata) if metadata else None,
|
|
@@ -88,36 +69,11 @@ class ExtractionsMixin:
|
|
|
88
69
|
params = {k: v for k, v in params.items() if v is not None}
|
|
89
70
|
return PreparedRequest(method="GET", url="/v1/extractions/download", params=params)
|
|
90
71
|
|
|
91
|
-
def prepare_get_payload_for_export(
|
|
92
|
-
self,
|
|
93
|
-
project_id: str,
|
|
94
|
-
extraction_ids: List[str],
|
|
95
|
-
json_schema: dict[str, Any],
|
|
96
|
-
delimiter: str = ";",
|
|
97
|
-
line_delimiter: str = "\n",
|
|
98
|
-
quote: str = '"',
|
|
99
|
-
**extra_body: Any,
|
|
100
|
-
) -> PreparedRequest:
|
|
101
|
-
"""Prepare a request to export extractions as CSV."""
|
|
102
|
-
data = {
|
|
103
|
-
"project_id": project_id,
|
|
104
|
-
"extraction_ids": extraction_ids,
|
|
105
|
-
"json_schema": json_schema,
|
|
106
|
-
}
|
|
107
|
-
if extra_body:
|
|
108
|
-
data.update(extra_body)
|
|
109
|
-
params = {
|
|
110
|
-
"delimiter": delimiter,
|
|
111
|
-
"line_delimiter": line_delimiter,
|
|
112
|
-
"quote": quote,
|
|
113
|
-
}
|
|
114
|
-
return PreparedRequest(method="POST", url="/v1/extractions/get_payload_for_export", data=data, params=params)
|
|
115
|
-
|
|
116
72
|
def prepare_update(
|
|
117
73
|
self,
|
|
118
74
|
extraction_id: str,
|
|
119
75
|
predictions: dict[str, Any] | None = None,
|
|
120
|
-
human_review_status:
|
|
76
|
+
human_review_status: HumanReviewStatus | None = None,
|
|
121
77
|
json_schema: dict[str, Any] | None = None,
|
|
122
78
|
inference_settings: dict[str, Any] | None = None,
|
|
123
79
|
**extra_body: Any,
|
|
@@ -140,14 +96,9 @@ class ExtractionsMixin:
|
|
|
140
96
|
"""Prepare a request to get an extraction by ID."""
|
|
141
97
|
return PreparedRequest(method="GET", url=f"/v1/extractions/{extraction_id}")
|
|
142
98
|
|
|
143
|
-
def
|
|
144
|
-
"""Prepare a request to
|
|
145
|
-
return PreparedRequest(method="
|
|
146
|
-
|
|
147
|
-
def prepare_download_sample_document(self, extraction_id: str) -> PreparedRequest:
|
|
148
|
-
"""Prepare a request to download the sample document for an extraction."""
|
|
149
|
-
return PreparedRequest(method="GET", url=f"/v1/extractions/{extraction_id}/sample-document")
|
|
150
|
-
|
|
99
|
+
def prepare_delete(self, extraction_id: str) -> PreparedRequest:
|
|
100
|
+
"""Prepare a request to delete an extraction by ID."""
|
|
101
|
+
return PreparedRequest(method="DELETE", url=f"/v1/extractions/{extraction_id}")
|
|
151
102
|
|
|
152
103
|
class Extractions(SyncAPIResource, ExtractionsMixin):
|
|
153
104
|
"""Extractions API wrapper"""
|
|
@@ -160,12 +111,12 @@ class Extractions(SyncAPIResource, ExtractionsMixin):
|
|
|
160
111
|
before: str | None = None,
|
|
161
112
|
after: str | None = None,
|
|
162
113
|
limit: int = 10,
|
|
163
|
-
order:
|
|
114
|
+
order: PaginationOrder = "desc",
|
|
164
115
|
origin_dot_type: str | None = None,
|
|
165
116
|
origin_dot_id: str | None = None,
|
|
166
|
-
from_date:
|
|
167
|
-
to_date:
|
|
168
|
-
human_review_status:
|
|
117
|
+
from_date: datetime | None = None,
|
|
118
|
+
to_date: datetime | None = None,
|
|
119
|
+
human_review_status: HumanReviewStatus | None = None,
|
|
169
120
|
metadata: Dict[str, str] | None = None,
|
|
170
121
|
**extra_params: Any,
|
|
171
122
|
) -> PaginatedList:
|
|
@@ -186,31 +137,14 @@ class Extractions(SyncAPIResource, ExtractionsMixin):
|
|
|
186
137
|
response = self._client._prepared_request(request)
|
|
187
138
|
return PaginatedList(**response)
|
|
188
139
|
|
|
189
|
-
|
|
190
|
-
self,
|
|
191
|
-
origin_dot_type: str | None = None,
|
|
192
|
-
origin_dot_id: str | None = None,
|
|
193
|
-
human_review_status: str | None = "review_required",
|
|
194
|
-
metadata: Dict[str, str] | None = None,
|
|
195
|
-
**extra_params: Any,
|
|
196
|
-
) -> dict[str, int]:
|
|
197
|
-
"""Count extractions matching filters."""
|
|
198
|
-
request = self.prepare_count(
|
|
199
|
-
origin_dot_type=origin_dot_type,
|
|
200
|
-
origin_dot_id=origin_dot_id,
|
|
201
|
-
human_review_status=human_review_status,
|
|
202
|
-
metadata=metadata,
|
|
203
|
-
**extra_params,
|
|
204
|
-
)
|
|
205
|
-
return self._client._prepared_request(request)
|
|
206
|
-
|
|
140
|
+
|
|
207
141
|
def download(
|
|
208
142
|
self,
|
|
209
143
|
order: Literal["asc", "desc"] = "desc",
|
|
210
144
|
origin_dot_id: str | None = None,
|
|
211
|
-
from_date:
|
|
212
|
-
to_date:
|
|
213
|
-
human_review_status:
|
|
145
|
+
from_date: datetime | None = None,
|
|
146
|
+
to_date: datetime | None = None,
|
|
147
|
+
human_review_status: HumanReviewStatus | None = None,
|
|
214
148
|
metadata: Dict[str, str] | None = None,
|
|
215
149
|
format: Literal["jsonl", "csv", "xlsx"] = "jsonl",
|
|
216
150
|
**extra_params: Any,
|
|
@@ -228,33 +162,12 @@ class Extractions(SyncAPIResource, ExtractionsMixin):
|
|
|
228
162
|
)
|
|
229
163
|
return self._client._prepared_request(request)
|
|
230
164
|
|
|
231
|
-
|
|
232
|
-
self,
|
|
233
|
-
project_id: str,
|
|
234
|
-
extraction_ids: List[str],
|
|
235
|
-
json_schema: dict[str, Any],
|
|
236
|
-
delimiter: str = ";",
|
|
237
|
-
line_delimiter: str = "\n",
|
|
238
|
-
quote: str = '"',
|
|
239
|
-
**extra_body: Any,
|
|
240
|
-
) -> dict[str, Any]:
|
|
241
|
-
"""Export extractions as CSV. Returns csv_data, rows, and columns."""
|
|
242
|
-
request = self.prepare_get_payload_for_export(
|
|
243
|
-
project_id=project_id,
|
|
244
|
-
extraction_ids=extraction_ids,
|
|
245
|
-
json_schema=json_schema,
|
|
246
|
-
delimiter=delimiter,
|
|
247
|
-
line_delimiter=line_delimiter,
|
|
248
|
-
quote=quote,
|
|
249
|
-
**extra_body,
|
|
250
|
-
)
|
|
251
|
-
return self._client._prepared_request(request)
|
|
252
|
-
|
|
165
|
+
|
|
253
166
|
def update(
|
|
254
167
|
self,
|
|
255
168
|
extraction_id: str,
|
|
256
169
|
predictions: dict[str, Any] | None = None,
|
|
257
|
-
human_review_status:
|
|
170
|
+
human_review_status: HumanReviewStatus | None = None,
|
|
258
171
|
json_schema: dict[str, Any] | None = None,
|
|
259
172
|
inference_settings: dict[str, Any] | None = None,
|
|
260
173
|
**extra_body: Any,
|
|
@@ -276,15 +189,10 @@ class Extractions(SyncAPIResource, ExtractionsMixin):
|
|
|
276
189
|
request = self.prepare_get(extraction_id)
|
|
277
190
|
return self._client._prepared_request(request)
|
|
278
191
|
|
|
279
|
-
def
|
|
280
|
-
"""
|
|
281
|
-
request = self.
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
def download_sample_document(self, extraction_id: str) -> bytes:
|
|
285
|
-
"""Download the sample document for an extraction."""
|
|
286
|
-
request = self.prepare_download_sample_document(extraction_id)
|
|
287
|
-
return self._client._prepared_request(request)
|
|
192
|
+
def delete(self, extraction_id: str) -> None:
|
|
193
|
+
"""Delete an extraction by ID."""
|
|
194
|
+
request = self.prepare_delete(extraction_id)
|
|
195
|
+
self._client._prepared_request(request)
|
|
288
196
|
|
|
289
197
|
|
|
290
198
|
class AsyncExtractions(AsyncAPIResource, ExtractionsMixin):
|
|
@@ -298,12 +206,12 @@ class AsyncExtractions(AsyncAPIResource, ExtractionsMixin):
|
|
|
298
206
|
before: str | None = None,
|
|
299
207
|
after: str | None = None,
|
|
300
208
|
limit: int = 10,
|
|
301
|
-
order:
|
|
209
|
+
order: PaginationOrder = "desc",
|
|
302
210
|
origin_dot_type: str | None = None,
|
|
303
211
|
origin_dot_id: str | None = None,
|
|
304
|
-
from_date:
|
|
305
|
-
to_date:
|
|
306
|
-
human_review_status:
|
|
212
|
+
from_date: datetime | None = None,
|
|
213
|
+
to_date: datetime | None = None,
|
|
214
|
+
human_review_status: HumanReviewStatus | None = None,
|
|
307
215
|
metadata: Dict[str, str] | None = None,
|
|
308
216
|
**extra_params: Any,
|
|
309
217
|
) -> PaginatedList:
|
|
@@ -324,31 +232,13 @@ class AsyncExtractions(AsyncAPIResource, ExtractionsMixin):
|
|
|
324
232
|
response = await self._client._prepared_request(request)
|
|
325
233
|
return PaginatedList(**response)
|
|
326
234
|
|
|
327
|
-
async def count(
|
|
328
|
-
self,
|
|
329
|
-
origin_dot_type: str | None = None,
|
|
330
|
-
origin_dot_id: str | None = None,
|
|
331
|
-
human_review_status: str | None = "review_required",
|
|
332
|
-
metadata: Dict[str, str] | None = None,
|
|
333
|
-
**extra_params: Any,
|
|
334
|
-
) -> dict[str, int]:
|
|
335
|
-
"""Count extractions matching filters."""
|
|
336
|
-
request = self.prepare_count(
|
|
337
|
-
origin_dot_type=origin_dot_type,
|
|
338
|
-
origin_dot_id=origin_dot_id,
|
|
339
|
-
human_review_status=human_review_status,
|
|
340
|
-
metadata=metadata,
|
|
341
|
-
**extra_params,
|
|
342
|
-
)
|
|
343
|
-
return await self._client._prepared_request(request)
|
|
344
|
-
|
|
345
235
|
async def download(
|
|
346
236
|
self,
|
|
347
237
|
order: Literal["asc", "desc"] = "desc",
|
|
348
238
|
origin_dot_id: str | None = None,
|
|
349
|
-
from_date:
|
|
350
|
-
to_date:
|
|
351
|
-
human_review_status:
|
|
239
|
+
from_date: datetime | None = None,
|
|
240
|
+
to_date: datetime | None = None,
|
|
241
|
+
human_review_status: HumanReviewStatus | None = None,
|
|
352
242
|
metadata: Dict[str, str] | None = None,
|
|
353
243
|
format: Literal["jsonl", "csv", "xlsx"] = "jsonl",
|
|
354
244
|
**extra_params: Any,
|
|
@@ -366,33 +256,11 @@ class AsyncExtractions(AsyncAPIResource, ExtractionsMixin):
|
|
|
366
256
|
)
|
|
367
257
|
return await self._client._prepared_request(request)
|
|
368
258
|
|
|
369
|
-
async def get_payload_for_export(
|
|
370
|
-
self,
|
|
371
|
-
project_id: str,
|
|
372
|
-
extraction_ids: List[str],
|
|
373
|
-
json_schema: dict[str, Any],
|
|
374
|
-
delimiter: str = ";",
|
|
375
|
-
line_delimiter: str = "\n",
|
|
376
|
-
quote: str = '"',
|
|
377
|
-
**extra_body: Any,
|
|
378
|
-
) -> dict[str, Any]:
|
|
379
|
-
"""Export extractions as CSV. Returns csv_data, rows, and columns."""
|
|
380
|
-
request = self.prepare_get_payload_for_export(
|
|
381
|
-
project_id=project_id,
|
|
382
|
-
extraction_ids=extraction_ids,
|
|
383
|
-
json_schema=json_schema,
|
|
384
|
-
delimiter=delimiter,
|
|
385
|
-
line_delimiter=line_delimiter,
|
|
386
|
-
quote=quote,
|
|
387
|
-
**extra_body,
|
|
388
|
-
)
|
|
389
|
-
return await self._client._prepared_request(request)
|
|
390
|
-
|
|
391
259
|
async def update(
|
|
392
260
|
self,
|
|
393
261
|
extraction_id: str,
|
|
394
262
|
predictions: dict[str, Any] | None = None,
|
|
395
|
-
human_review_status:
|
|
263
|
+
human_review_status: HumanReviewStatus | None = None,
|
|
396
264
|
json_schema: dict[str, Any] | None = None,
|
|
397
265
|
inference_settings: dict[str, Any] | None = None,
|
|
398
266
|
**extra_body: Any,
|
|
@@ -414,12 +282,7 @@ class AsyncExtractions(AsyncAPIResource, ExtractionsMixin):
|
|
|
414
282
|
request = self.prepare_get(extraction_id)
|
|
415
283
|
return await self._client._prepared_request(request)
|
|
416
284
|
|
|
417
|
-
async def
|
|
418
|
-
"""
|
|
419
|
-
request = self.
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
async def download_sample_document(self, extraction_id: str) -> bytes:
|
|
423
|
-
"""Download the sample document for an extraction."""
|
|
424
|
-
request = self.prepare_download_sample_document(extraction_id)
|
|
425
|
-
return await self._client._prepared_request(request)
|
|
285
|
+
async def delete(self, extraction_id: str) -> None:
|
|
286
|
+
"""Delete an extraction by ID."""
|
|
287
|
+
request = self.prepare_delete(extraction_id)
|
|
288
|
+
await self._client._prepared_request(request)
|
retab/types/documents/extract.py
CHANGED
|
@@ -39,6 +39,7 @@ class DocumentExtractRequest(BaseModel):
|
|
|
39
39
|
parallel_ocr_keys: Optional[dict[str, str]] = Field(default=None, description="If set, keys to be used for the extraction of long lists of data using Parallel OCR", examples=[{"properties": "ID", "products": "identity.id"}])
|
|
40
40
|
metadata: dict[str, str] = Field(default_factory=dict, description="User-defined metadata to associate with this extraction")
|
|
41
41
|
extraction_id: Optional[str] = Field(default=None, description="Extraction ID to use for this extraction. If not provided, a new ID will be generated.")
|
|
42
|
+
additional_messages: Optional[list[ChatCompletionRetabMessage]] = Field(default=None, description="Additional chat messages to append after the document content messages. Useful for providing extra context or instructions.")
|
|
42
43
|
|
|
43
44
|
# Add a model validator that rejects n_consensus > 1 if temperature is 0
|
|
44
45
|
@field_validator("n_consensus")
|
|
File without changes
|
retab/types/pagination.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, List
|
|
1
|
+
from typing import Any, List, Literal
|
|
2
2
|
from pydantic import BaseModel
|
|
3
3
|
|
|
4
4
|
|
|
@@ -10,3 +10,5 @@ class ListMetadata(BaseModel):
|
|
|
10
10
|
class PaginatedList(BaseModel):
|
|
11
11
|
data: List[Any]
|
|
12
12
|
list_metadata: ListMetadata
|
|
13
|
+
|
|
14
|
+
type PaginationOrder = Literal["asc", "desc"]
|
|
@@ -7,9 +7,9 @@ retab/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
7
7
|
retab/resources/models.py,sha256=4WidFBnTGZEA65DSn2pLP2SRnCVXkMTw7o_m8xVCFC4,2469
|
|
8
8
|
retab/resources/schemas.py,sha256=rZ6OzfmoYv-mGaRVzvXjO09dD-KxP74mZhOO8sMgcDQ,4632
|
|
9
9
|
retab/resources/documents/__init__.py,sha256=OjXmngFN0RKqO4SI-mJBNzr6Ex6rMxfq0DxaqzP0RQs,89
|
|
10
|
-
retab/resources/documents/client.py,sha256=
|
|
10
|
+
retab/resources/documents/client.py,sha256=8WT0-PwfrCgUaA6Pj2_VTDQ66zRTDexDxj9koMG0Ygo,33161
|
|
11
11
|
retab/resources/extractions/__init__.py,sha256=2H1ezUG8hI5SmTRy6NFzXdYLOdGFFsFrI60uzkitV20,97
|
|
12
|
-
retab/resources/extractions/client.py,sha256=
|
|
12
|
+
retab/resources/extractions/client.py,sha256=sEoNjOgX91FTOgoJUV-I1A9A9xl1ciCdPlhYwjhEjbA,11035
|
|
13
13
|
retab/resources/projects/__init__.py,sha256=tPR3_3tr7bsoYd618qmGjnYN2R23PmF5oCFd7Z5_HGY,85
|
|
14
14
|
retab/resources/projects/client.py,sha256=nvqsDiVyeRrXsoYddwyXNmpZxaBLYWAGO2e0n6qkCzY,14988
|
|
15
15
|
retab/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -17,13 +17,15 @@ retab/types/chat.py,sha256=x9VbtPMa4w6Gc0HrFC3ILl6cCnfEn5ytDnwJtZmlcys,1436
|
|
|
17
17
|
retab/types/inference_settings.py,sha256=e4NDZHFdtrqi30Za1z0JhRU5PBO94yKxUGoK7S2kA3M,1053
|
|
18
18
|
retab/types/mime.py,sha256=3Zk7vIbV8o4uJQiclVH-ncKKhs_ZeVi-UQV68TTu7s0,10039
|
|
19
19
|
retab/types/modality.py,sha256=4B8LctdUBZVgIjtS2FjrJpljn2Eyse0XE1bpFsGb9O4,131
|
|
20
|
-
retab/types/pagination.py,sha256=
|
|
20
|
+
retab/types/pagination.py,sha256=A0Fw06baPTfEaYwo3kvNs4vaupzlqylBc6tQH-2DFuY,279
|
|
21
21
|
retab/types/standards.py,sha256=7aGtuvzzkKidvqY8JB2Cjfn43V80FeKwrTtp162kjKc,1477
|
|
22
22
|
retab/types/documents/__init__.py,sha256=RaD6PnvRJw7QEVTh_PYNX6gckpLcxUJH7FKaopRKJzY,114
|
|
23
23
|
retab/types/documents/correct_orientation.py,sha256=e-ivsslI6L6Gl0YkcslXw_DH620xMGEYVp4tdeviXeM,261
|
|
24
24
|
retab/types/documents/create_messages.py,sha256=Cox0QgIyLhTXIvw1Nzd2BCnB9-5KAYgw_gads5eTaDw,7272
|
|
25
|
-
retab/types/documents/extract.py,sha256=
|
|
25
|
+
retab/types/documents/extract.py,sha256=tc6SJSbHcZlDdKv67AdV2w5v-veXqze9ztFOorPkLVU,16721
|
|
26
26
|
retab/types/documents/parse.py,sha256=Jd6i-1UXhAtgntRBZItEHGHeevyLdLmbTQa1-HNrico,1305
|
|
27
|
+
retab/types/extractions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
+
retab/types/extractions/types.py,sha256=mnCYSfJoEKsXN2eG-PrahnnQyR6RDjP5VO9sHC1Opmg,102
|
|
27
29
|
retab/types/projects/__init__.py,sha256=I7P_dems5_LOLgYQ-4Bzt9B6P6jRlQwP-D_9GxRDhVk,155
|
|
28
30
|
retab/types/projects/metrics.py,sha256=J8aZdVbqlszfxosAZyTB7l6lp9WgdL5QgLMlLrckN7k,1946
|
|
29
31
|
retab/types/projects/model.py,sha256=f5NSIvwQQXhax4gHu57CE0pUkU_5S8t3aSfzAAqFFlI,4615
|
|
@@ -42,7 +44,7 @@ retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
|
|
|
42
44
|
retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
|
|
43
45
|
retab/utils/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
46
|
retab/utils/usage/json_schema.py,sha256=kRbL5E5OStlhlNlBXlxHNwaVHKd6MVhyqIb0y4aj8JA,84322
|
|
45
|
-
retab-0.0.
|
|
46
|
-
retab-0.0.
|
|
47
|
-
retab-0.0.
|
|
48
|
-
retab-0.0.
|
|
47
|
+
retab-0.0.71.dist-info/METADATA,sha256=85N0ECgTable-tLM2gHI6ZQV3Qp4XQSY14J1atXcOJY,4532
|
|
48
|
+
retab-0.0.71.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
49
|
+
retab-0.0.71.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
|
|
50
|
+
retab-0.0.71.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|