retab 0.0.37__py3-none-any.whl → 0.0.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab/__init__.py +2 -2
- retab/_resource.py +5 -5
- retab/_utils/_model_cards/anthropic.yaml +59 -0
- retab/_utils/_model_cards/auto.yaml +43 -0
- retab/_utils/_model_cards/gemini.yaml +117 -0
- retab/_utils/_model_cards/openai.yaml +301 -0
- retab/_utils/_model_cards/xai.yaml +28 -0
- retab/_utils/ai_models.py +109 -71
- retab/_utils/chat.py +20 -20
- retab/_utils/responses.py +14 -14
- retab/_utils/usage/usage.py +5 -4
- retab/client.py +22 -22
- retab/resources/consensus/client.py +2 -2
- retab/resources/consensus/completions.py +26 -26
- retab/resources/consensus/completions_stream.py +27 -27
- retab/resources/consensus/responses.py +11 -11
- retab/resources/consensus/responses_stream.py +15 -15
- retab/resources/documents/client.py +297 -16
- retab/resources/documents/extractions.py +39 -39
- retab/resources/evaluations/documents.py +5 -5
- retab/resources/evaluations/iterations.py +7 -7
- retab/resources/jsonlUtils.py +7 -7
- retab/resources/processors/automations/endpoints.py +2 -2
- retab/resources/processors/automations/links.py +2 -2
- retab/resources/processors/automations/logs.py +2 -2
- retab/resources/processors/automations/mailboxes.py +2 -2
- retab/resources/processors/automations/outlook.py +2 -2
- retab/resources/processors/client.py +9 -9
- retab/resources/usage.py +4 -4
- retab/types/ai_models.py +41 -513
- retab/types/automations/mailboxes.py +1 -1
- retab/types/automations/webhooks.py +3 -3
- retab/types/chat.py +1 -1
- retab/types/completions.py +10 -10
- retab/types/documents/__init__.py +3 -0
- retab/types/documents/create_messages.py +2 -2
- retab/types/documents/extractions.py +19 -19
- retab/types/documents/parse.py +32 -0
- retab/types/extractions.py +4 -4
- retab/types/logs.py +2 -2
- retab/types/schemas/object.py +3 -3
- {retab-0.0.37.dist-info → retab-0.0.39.dist-info}/METADATA +72 -72
- {retab-0.0.37.dist-info → retab-0.0.39.dist-info}/RECORD +45 -39
- {retab-0.0.37.dist-info → retab-0.0.39.dist-info}/WHEEL +0 -0
- {retab-0.0.37.dist-info → retab-0.0.39.dist-info}/top_level.txt +0 -0
@@ -18,15 +18,15 @@ from ..._utils.ai_models import assert_valid_model_extraction
|
|
18
18
|
from ..._utils.json_schema import filter_auxiliary_fields_json, load_json_schema, unflatten_dict
|
19
19
|
from ..._utils.mime import MIMEData, prepare_mime_document
|
20
20
|
from ..._utils.stream_context_managers import as_async_context_manager, as_context_manager
|
21
|
-
from ...types.chat import
|
22
|
-
from ...types.documents.extractions import DocumentExtractRequest, LogExtractionRequest,
|
21
|
+
from ...types.chat import ChatCompletionRetabMessage
|
22
|
+
from ...types.documents.extractions import DocumentExtractRequest, LogExtractionRequest, RetabParsedChatCompletion, RetabParsedChatCompletionChunk, RetabParsedChoice
|
23
23
|
from ...types.browser_canvas import BrowserCanvas
|
24
24
|
from ...types.modalities import Modality
|
25
25
|
from ...types.schemas.object import Schema
|
26
26
|
from ...types.standards import PreparedRequest
|
27
27
|
|
28
28
|
|
29
|
-
def maybe_parse_to_pydantic(schema: Schema, response:
|
29
|
+
def maybe_parse_to_pydantic(schema: Schema, response: RetabParsedChatCompletion, allow_partial: bool = False) -> RetabParsedChatCompletion:
|
30
30
|
if response.choices[0].message.content:
|
31
31
|
try:
|
32
32
|
if allow_partial:
|
@@ -97,8 +97,8 @@ class BaseExtractionsMixin:
|
|
97
97
|
model: str,
|
98
98
|
temperature: float,
|
99
99
|
completion: Any | None = None,
|
100
|
-
# The messages can be provided in different formats, we will convert them to the
|
101
|
-
messages: list[
|
100
|
+
# The messages can be provided in different formats, we will convert them to the Retab-compatible format
|
101
|
+
messages: list[ChatCompletionRetabMessage] | None = None,
|
102
102
|
openai_messages: list[ChatCompletionMessageParam] | None = None,
|
103
103
|
anthropic_messages: list[MessageParam] | None = None,
|
104
104
|
anthropic_system_prompt: str | None = None,
|
@@ -152,9 +152,9 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
152
152
|
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
153
153
|
idempotency_key: str | None = None,
|
154
154
|
store: bool = False,
|
155
|
-
) ->
|
155
|
+
) -> RetabParsedChatCompletion:
|
156
156
|
"""
|
157
|
-
Process one or more documents using the
|
157
|
+
Process one or more documents using the Retab API.
|
158
158
|
|
159
159
|
Args:
|
160
160
|
json_schema: JSON schema defining the expected data structure
|
@@ -168,9 +168,9 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
168
168
|
reasoning_effort: The effort level for the model to reason about the input data.
|
169
169
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
170
170
|
idempotency_key: Idempotency key for request
|
171
|
-
store: Whether to store the document in the
|
171
|
+
store: Whether to store the document in the Retab database
|
172
172
|
Returns:
|
173
|
-
|
173
|
+
RetabParsedChatCompletion: Parsed response from the API
|
174
174
|
Raises:
|
175
175
|
ValueError: If neither document nor documents is provided, or if both are provided
|
176
176
|
HTTPException: If the request fails
|
@@ -195,7 +195,7 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
195
195
|
response = self._client._prepared_request(request)
|
196
196
|
|
197
197
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
198
|
-
return maybe_parse_to_pydantic(schema,
|
198
|
+
return maybe_parse_to_pydantic(schema, RetabParsedChatCompletion.model_validate(response))
|
199
199
|
|
200
200
|
@as_context_manager
|
201
201
|
def stream(
|
@@ -212,9 +212,9 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
212
212
|
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
213
213
|
idempotency_key: str | None = None,
|
214
214
|
store: bool = False,
|
215
|
-
) -> Generator[
|
215
|
+
) -> Generator[RetabParsedChatCompletion, None, None]:
|
216
216
|
"""
|
217
|
-
Process one or more documents using the
|
217
|
+
Process one or more documents using the Retab API with streaming enabled.
|
218
218
|
|
219
219
|
Args:
|
220
220
|
json_schema: JSON schema defining the expected data structure
|
@@ -228,22 +228,22 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
228
228
|
reasoning_effort: The effort level for the model to reason about the input data.
|
229
229
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
230
230
|
idempotency_key: Idempotency key for request
|
231
|
-
store: Whether to store the document in the
|
231
|
+
store: Whether to store the document in the Retab database
|
232
232
|
|
233
233
|
Returns:
|
234
|
-
Generator[
|
234
|
+
Generator[RetabParsedChatCompletion]: Stream of parsed responses
|
235
235
|
Raises:
|
236
236
|
ValueError: If neither document nor documents is provided, or if both are provided
|
237
237
|
HTTPException: If the request fails
|
238
238
|
Usage:
|
239
239
|
```python
|
240
240
|
# Single document
|
241
|
-
with
|
241
|
+
with retab.documents.extractions.stream(json_schema, model, document=document) as stream:
|
242
242
|
for response in stream:
|
243
243
|
print(response)
|
244
244
|
|
245
245
|
# Multiple documents
|
246
|
-
with
|
246
|
+
with retab.documents.extractions.stream(json_schema, model, documents=[doc1, doc2]) as stream:
|
247
247
|
for response in stream:
|
248
248
|
print(response)
|
249
249
|
```
|
@@ -266,16 +266,16 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
266
266
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
267
267
|
|
268
268
|
# Request the stream and return a context manager
|
269
|
-
ui_parsed_chat_completion_cum_chunk:
|
270
|
-
# Initialize the
|
271
|
-
ui_parsed_completion:
|
269
|
+
ui_parsed_chat_completion_cum_chunk: RetabParsedChatCompletionChunk | None = None
|
270
|
+
# Initialize the RetabParsedChatCompletion object
|
271
|
+
ui_parsed_completion: RetabParsedChatCompletion = RetabParsedChatCompletion(
|
272
272
|
id="",
|
273
273
|
created=0,
|
274
274
|
model="",
|
275
275
|
object="chat.completion",
|
276
276
|
likelihoods={},
|
277
277
|
choices=[
|
278
|
-
|
278
|
+
RetabParsedChoice(
|
279
279
|
index=0,
|
280
280
|
message=ParsedChatCompletionMessage(content="", role="assistant"),
|
281
281
|
finish_reason=None,
|
@@ -286,7 +286,7 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
286
286
|
for chunk_json in self._client._prepared_request_stream(request):
|
287
287
|
if not chunk_json:
|
288
288
|
continue
|
289
|
-
ui_parsed_chat_completion_cum_chunk =
|
289
|
+
ui_parsed_chat_completion_cum_chunk = RetabParsedChatCompletionChunk.model_validate(chunk_json).chunk_accumulator(ui_parsed_chat_completion_cum_chunk)
|
290
290
|
# Basic stuff
|
291
291
|
ui_parsed_completion.id = ui_parsed_chat_completion_cum_chunk.id
|
292
292
|
ui_parsed_completion.created = ui_parsed_chat_completion_cum_chunk.created
|
@@ -311,8 +311,8 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
311
311
|
model: str,
|
312
312
|
temperature: float,
|
313
313
|
completion: Any | None = None,
|
314
|
-
# The messages can be provided in different formats, we will convert them to the
|
315
|
-
messages: list[
|
314
|
+
# The messages can be provided in different formats, we will convert them to the Retab-compatible format
|
315
|
+
messages: list[ChatCompletionRetabMessage] | None = None,
|
316
316
|
openai_messages: list[ChatCompletionMessageParam] | None = None,
|
317
317
|
anthropic_messages: list[MessageParam] | None = None,
|
318
318
|
anthropic_system_prompt: str | None = None,
|
@@ -353,7 +353,7 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
353
353
|
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
354
354
|
idempotency_key: str | None = None,
|
355
355
|
store: bool = False,
|
356
|
-
) ->
|
356
|
+
) -> RetabParsedChatCompletion:
|
357
357
|
"""
|
358
358
|
Extract structured data from one or more documents asynchronously.
|
359
359
|
|
@@ -369,9 +369,9 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
369
369
|
reasoning_effort: The effort level for the model to reason about the input data.
|
370
370
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
371
371
|
idempotency_key: Idempotency key for request
|
372
|
-
store: Whether to store the document in the
|
372
|
+
store: Whether to store the document in the Retab database
|
373
373
|
Returns:
|
374
|
-
|
374
|
+
RetabParsedChatCompletion: Parsed response from the API.
|
375
375
|
Raises:
|
376
376
|
ValueError: If neither document nor documents is provided, or if both are provided
|
377
377
|
"""
|
@@ -392,7 +392,7 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
392
392
|
)
|
393
393
|
response = await self._client._prepared_request(request)
|
394
394
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
395
|
-
return maybe_parse_to_pydantic(schema,
|
395
|
+
return maybe_parse_to_pydantic(schema, RetabParsedChatCompletion.model_validate(response))
|
396
396
|
|
397
397
|
@as_async_context_manager
|
398
398
|
async def stream(
|
@@ -409,7 +409,7 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
409
409
|
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
410
410
|
idempotency_key: str | None = None,
|
411
411
|
store: bool = False,
|
412
|
-
) -> AsyncGenerator[
|
412
|
+
) -> AsyncGenerator[RetabParsedChatCompletion, None]:
|
413
413
|
"""
|
414
414
|
Extract structured data from one or more documents asynchronously with streaming.
|
415
415
|
|
@@ -425,21 +425,21 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
425
425
|
reasoning_effort: The effort level for the model to reason about the input data.
|
426
426
|
n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
|
427
427
|
idempotency_key: Idempotency key for request
|
428
|
-
store: Whether to store the document in the
|
428
|
+
store: Whether to store the document in the Retab database
|
429
429
|
Returns:
|
430
|
-
AsyncGenerator[
|
430
|
+
AsyncGenerator[RetabParsedChatCompletion, None]: Stream of parsed responses.
|
431
431
|
Raises:
|
432
432
|
ValueError: If neither document nor documents is provided, or if both are provided
|
433
433
|
|
434
434
|
Usage:
|
435
435
|
```python
|
436
436
|
# Single document
|
437
|
-
async with
|
437
|
+
async with retab.documents.extractions.stream(json_schema, model, document=document) as stream:
|
438
438
|
async for response in stream:
|
439
439
|
print(response)
|
440
440
|
|
441
441
|
# Multiple documents
|
442
|
-
async with
|
442
|
+
async with retab.documents.extractions.stream(json_schema, model, documents=[doc1, doc2]) as stream:
|
443
443
|
async for response in stream:
|
444
444
|
print(response)
|
445
445
|
```
|
@@ -460,16 +460,16 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
460
460
|
idempotency_key=idempotency_key,
|
461
461
|
)
|
462
462
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
463
|
-
ui_parsed_chat_completion_cum_chunk:
|
464
|
-
# Initialize the
|
465
|
-
ui_parsed_completion:
|
463
|
+
ui_parsed_chat_completion_cum_chunk: RetabParsedChatCompletionChunk | None = None
|
464
|
+
# Initialize the RetabParsedChatCompletion object
|
465
|
+
ui_parsed_completion: RetabParsedChatCompletion = RetabParsedChatCompletion(
|
466
466
|
id="",
|
467
467
|
created=0,
|
468
468
|
model="",
|
469
469
|
object="chat.completion",
|
470
470
|
likelihoods={},
|
471
471
|
choices=[
|
472
|
-
|
472
|
+
RetabParsedChoice(
|
473
473
|
index=0,
|
474
474
|
message=ParsedChatCompletionMessage(content="", role="assistant"),
|
475
475
|
finish_reason=None,
|
@@ -481,7 +481,7 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
481
481
|
async for chunk_json in self._client._prepared_request_stream(request):
|
482
482
|
if not chunk_json:
|
483
483
|
continue
|
484
|
-
ui_parsed_chat_completion_cum_chunk =
|
484
|
+
ui_parsed_chat_completion_cum_chunk = RetabParsedChatCompletionChunk.model_validate(chunk_json).chunk_accumulator(ui_parsed_chat_completion_cum_chunk)
|
485
485
|
# Basic stuff
|
486
486
|
ui_parsed_completion.id = ui_parsed_chat_completion_cum_chunk.id
|
487
487
|
ui_parsed_completion.created = ui_parsed_chat_completion_cum_chunk.created
|
@@ -507,8 +507,8 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
507
507
|
model: str,
|
508
508
|
temperature: float,
|
509
509
|
completion: Any | None = None,
|
510
|
-
# The messages can be provided in different formats, we will convert them to the
|
511
|
-
messages: list[
|
510
|
+
# The messages can be provided in different formats, we will convert them to the Retab-compatible format
|
511
|
+
messages: list[ChatCompletionRetabMessage] | None = None,
|
512
512
|
openai_messages: list[ChatCompletionMessageParam] | None = None,
|
513
513
|
anthropic_messages: list[MessageParam] | None = None,
|
514
514
|
anthropic_system_prompt: str | None = None,
|
@@ -10,7 +10,7 @@ from ..._utils.mime import prepare_mime_document
|
|
10
10
|
from ...types.evaluations import DocumentItem, EvaluationDocument, PatchEvaluationDocumentRequest
|
11
11
|
from ...types.mime import MIMEData
|
12
12
|
from ...types.standards import PreparedRequest, DeleteResponse, FieldUnset
|
13
|
-
from ...types.documents.extractions import
|
13
|
+
from ...types.documents.extractions import RetabParsedChatCompletion
|
14
14
|
|
15
15
|
|
16
16
|
class DocumentsMixin:
|
@@ -134,13 +134,13 @@ class Documents(SyncAPIResource, DocumentsMixin):
|
|
134
134
|
request = self.prepare_delete(evaluation_id, document_id)
|
135
135
|
return self._client._prepared_request(request)
|
136
136
|
|
137
|
-
def llm_annotate(self, evaluation_id: str, document_id: str) ->
|
137
|
+
def llm_annotate(self, evaluation_id: str, document_id: str) -> RetabParsedChatCompletion:
|
138
138
|
"""
|
139
139
|
Annotate a document with an LLM. This method updates the document (within the evaluation) with the latest extraction.
|
140
140
|
"""
|
141
141
|
request = self.prepare_llm_annotate(evaluation_id, document_id)
|
142
142
|
response = self._client._prepared_request(request)
|
143
|
-
return
|
143
|
+
return RetabParsedChatCompletion(**response)
|
144
144
|
|
145
145
|
|
146
146
|
class AsyncDocuments(AsyncAPIResource, DocumentsMixin):
|
@@ -223,11 +223,11 @@ class AsyncDocuments(AsyncAPIResource, DocumentsMixin):
|
|
223
223
|
request = self.prepare_delete(evaluation_id, document_id)
|
224
224
|
return await self._client._prepared_request(request)
|
225
225
|
|
226
|
-
async def llm_annotate(self, evaluation_id: str, document_id: str) ->
|
226
|
+
async def llm_annotate(self, evaluation_id: str, document_id: str) -> RetabParsedChatCompletion:
|
227
227
|
"""
|
228
228
|
Annotate a document with an LLM.
|
229
229
|
This method updates the document (within the evaluation) with the latest extraction.
|
230
230
|
"""
|
231
231
|
request = self.prepare_llm_annotate(evaluation_id, document_id)
|
232
232
|
response = await self._client._prepared_request(request)
|
233
|
-
return
|
233
|
+
return RetabParsedChatCompletion(**response)
|
@@ -9,7 +9,7 @@ from ...types.inference_settings import InferenceSettings
|
|
9
9
|
from ...types.metrics import DistancesResult
|
10
10
|
from ...types.modalities import Modality
|
11
11
|
from ...types.standards import DeleteResponse, PreparedRequest, FieldUnset
|
12
|
-
from ...types.documents.extractions import
|
12
|
+
from ...types.documents.extractions import RetabParsedChatCompletion
|
13
13
|
|
14
14
|
|
15
15
|
class IterationsMixin:
|
@@ -238,7 +238,7 @@ class Iterations(SyncAPIResource, IterationsMixin):
|
|
238
238
|
response = self._client._prepared_request(request)
|
239
239
|
return Iteration(**response)
|
240
240
|
|
241
|
-
def process_document(self, evaluation_id: str, iteration_id: str, document_id: str) ->
|
241
|
+
def process_document(self, evaluation_id: str, iteration_id: str, document_id: str) -> RetabParsedChatCompletion:
|
242
242
|
"""
|
243
243
|
Process a single document within an iteration.
|
244
244
|
This method updates the iteration document with the latest extraction.
|
@@ -248,13 +248,13 @@ class Iterations(SyncAPIResource, IterationsMixin):
|
|
248
248
|
document_id: The ID of the document
|
249
249
|
|
250
250
|
Returns:
|
251
|
-
|
251
|
+
RetabParsedChatCompletion: The parsed chat completion
|
252
252
|
Raises:
|
253
253
|
HTTPException if the request fails
|
254
254
|
"""
|
255
255
|
request = self.prepare_process_document(evaluation_id, iteration_id, document_id)
|
256
256
|
response = self._client._prepared_request(request)
|
257
|
-
return
|
257
|
+
return RetabParsedChatCompletion(**response)
|
258
258
|
|
259
259
|
def status(self, evaluation_id: str, iteration_id: str) -> IterationDocumentStatusResponse:
|
260
260
|
"""
|
@@ -417,7 +417,7 @@ class AsyncIterations(AsyncAPIResource, IterationsMixin):
|
|
417
417
|
response = await self._client._prepared_request(request)
|
418
418
|
return Iteration(**response)
|
419
419
|
|
420
|
-
async def process_document(self, evaluation_id: str, iteration_id: str, document_id: str) ->
|
420
|
+
async def process_document(self, evaluation_id: str, iteration_id: str, document_id: str) -> RetabParsedChatCompletion:
|
421
421
|
"""
|
422
422
|
Process a single document within an iteration.
|
423
423
|
This method updates the iteration document with the latest extraction.
|
@@ -427,13 +427,13 @@ class AsyncIterations(AsyncAPIResource, IterationsMixin):
|
|
427
427
|
document_id: The ID of the document
|
428
428
|
|
429
429
|
Returns:
|
430
|
-
|
430
|
+
RetabParsedChatCompletion: The parsed chat completion
|
431
431
|
Raises:
|
432
432
|
HTTPException if the request fails
|
433
433
|
"""
|
434
434
|
request = self.prepare_process_document(evaluation_id, iteration_id, document_id)
|
435
435
|
response = await self._client._prepared_request(request)
|
436
|
-
return
|
436
|
+
return RetabParsedChatCompletion(**response)
|
437
437
|
|
438
438
|
async def status(self, evaluation_id: str, iteration_id: str) -> IterationDocumentStatusResponse:
|
439
439
|
"""
|
retab/resources/jsonlUtils.py
CHANGED
@@ -18,18 +18,18 @@ from pydantic_core import PydanticUndefined
|
|
18
18
|
from tqdm import tqdm
|
19
19
|
|
20
20
|
from .._resource import AsyncAPIResource, SyncAPIResource
|
21
|
-
from .._utils.ai_models import assert_valid_model_extraction,
|
21
|
+
from .._utils.ai_models import assert_valid_model_extraction, get_provider_for_model
|
22
22
|
from .._utils.chat import convert_to_anthropic_format, convert_to_openai_format, separate_messages
|
23
23
|
from .._utils.display import Metrics, display_metrics, process_dataset_and_compute_metrics
|
24
24
|
from .._utils.json_schema import load_json_schema
|
25
|
-
from ..types.chat import
|
25
|
+
from ..types.chat import ChatCompletionRetabMessage
|
26
26
|
from ..types.modalities import Modality
|
27
27
|
from ..types.schemas.object import Schema
|
28
28
|
from ..types.browser_canvas import BrowserCanvas
|
29
29
|
|
30
30
|
|
31
31
|
class FinetuningJSON(BaseModel):
|
32
|
-
messages: list[
|
32
|
+
messages: list[ChatCompletionRetabMessage]
|
33
33
|
|
34
34
|
|
35
35
|
FinetuningJSONL = list[FinetuningJSON]
|
@@ -242,7 +242,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
|
|
242
242
|
training_set = []
|
243
243
|
|
244
244
|
for pair_paths in tqdm(pairs_paths):
|
245
|
-
document_messages: list[
|
245
|
+
document_messages: list[ChatCompletionRetabMessage] = []
|
246
246
|
|
247
247
|
if isinstance(pair_paths["document_fpath"], str) or isinstance(pair_paths["document_fpath"], Path):
|
248
248
|
document_message = self._client.documents.create_messages(document=pair_paths["document_fpath"], modality=modality)
|
@@ -278,7 +278,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
|
|
278
278
|
Returns:
|
279
279
|
A tuple of (client instance, provider type string)
|
280
280
|
"""
|
281
|
-
provider =
|
281
|
+
provider = get_provider_for_model(model)
|
282
282
|
|
283
283
|
if provider == "OpenAI":
|
284
284
|
return OpenAI(api_key=self._client.headers["OpenAI-Api-Key"]), provider
|
@@ -299,7 +299,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
|
|
299
299
|
provider: str,
|
300
300
|
model: str,
|
301
301
|
temperature: float,
|
302
|
-
messages: list[
|
302
|
+
messages: list[ChatCompletionRetabMessage],
|
303
303
|
schema_obj: Schema,
|
304
304
|
) -> str:
|
305
305
|
"""Get completion from the appropriate model provider.
|
@@ -659,7 +659,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
|
|
659
659
|
system_message, user_messages, assistant_messages = separate_messages(messages)
|
660
660
|
system_and_user_messages = messages[:-1]
|
661
661
|
|
662
|
-
previous_annotation_message:
|
662
|
+
previous_annotation_message: ChatCompletionRetabMessage = {
|
663
663
|
"role": "user",
|
664
664
|
"content": "Here is an old annotation using a different schema. Use it as a reference to update the annotation: " + messages[-1]["content"],
|
665
665
|
}
|
@@ -120,7 +120,7 @@ class Endpoints(SyncAPIResource, EndpointsMixin):
|
|
120
120
|
need_validation=need_validation,
|
121
121
|
)
|
122
122
|
response = self._client._prepared_request(request)
|
123
|
-
print(f"Endpoint ID: {response['id']}. Endpoint available at https://www.
|
123
|
+
print(f"Endpoint ID: {response['id']}. Endpoint available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
124
124
|
return Endpoint.model_validate(response)
|
125
125
|
|
126
126
|
def list(
|
@@ -229,7 +229,7 @@ class AsyncEndpoints(AsyncAPIResource, EndpointsMixin):
|
|
229
229
|
need_validation=need_validation,
|
230
230
|
)
|
231
231
|
response = await self._client._prepared_request(request)
|
232
|
-
print(f"Endpoint ID: {response['id']}. Endpoint available at https://www.
|
232
|
+
print(f"Endpoint ID: {response['id']}. Endpoint available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
233
233
|
|
234
234
|
return Endpoint.model_validate(response)
|
235
235
|
|
@@ -128,7 +128,7 @@ class Links(SyncAPIResource, LinksMixin):
|
|
128
128
|
)
|
129
129
|
response = self._client._prepared_request(request)
|
130
130
|
|
131
|
-
print(f"Extraction Link Created. Link available at https://www.
|
131
|
+
print(f"Extraction Link Created. Link available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
132
132
|
return Link.model_validate(response)
|
133
133
|
|
134
134
|
def list(
|
@@ -248,7 +248,7 @@ class AsyncLinks(AsyncAPIResource, LinksMixin):
|
|
248
248
|
password=password,
|
249
249
|
)
|
250
250
|
response = await self._client._prepared_request(request)
|
251
|
-
print(f"Extraction Link Created. Link available at https://www.
|
251
|
+
print(f"Extraction Link Created. Link available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
252
252
|
return Link.model_validate(response)
|
253
253
|
|
254
254
|
async def list(
|
@@ -147,7 +147,7 @@ class Logs(SyncAPIResource, LogsMixin):
|
|
147
147
|
request = self.prepare_rerun(processor_id, log_id)
|
148
148
|
response = self._client._prepared_request(request)
|
149
149
|
|
150
|
-
print(f"Webhook call run successfully. Log available at https://www.
|
150
|
+
print(f"Webhook call run successfully. Log available at https://www.retab.dev/dashboard/processors/{processor_id}/logs/{log_id}")
|
151
151
|
|
152
152
|
return ExternalRequestLog.model_validate(response)
|
153
153
|
|
@@ -217,6 +217,6 @@ class AsyncLogs(AsyncAPIResource, LogsMixin):
|
|
217
217
|
request = self.prepare_rerun(processor_id, log_id)
|
218
218
|
response = await self._client._prepared_request(request)
|
219
219
|
|
220
|
-
print(f"Webhook call run successfully. Log available at https://www.
|
220
|
+
print(f"Webhook call run successfully. Log available at https://www.retab.dev/dashboard/processors/{processor_id}/logs/{log_id}")
|
221
221
|
|
222
222
|
return ExternalRequestLog.model_validate(response)
|
@@ -144,7 +144,7 @@ class Mailboxes(SyncAPIResource, MailBoxesMixin):
|
|
144
144
|
)
|
145
145
|
response = self._client._prepared_request(request)
|
146
146
|
|
147
|
-
print(f"Email automation created. Mailbox available at https://www.
|
147
|
+
print(f"Email automation created. Mailbox available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
148
148
|
|
149
149
|
return Mailbox.model_validate(response)
|
150
150
|
|
@@ -278,7 +278,7 @@ class AsyncMailboxes(AsyncAPIResource, MailBoxesMixin):
|
|
278
278
|
)
|
279
279
|
response = await self._client._prepared_request(request)
|
280
280
|
|
281
|
-
print(f"Email automation created. Mailbox available at https://www.
|
281
|
+
print(f"Email automation created. Mailbox available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
282
282
|
|
283
283
|
return Mailbox.model_validate(response)
|
284
284
|
|
@@ -149,7 +149,7 @@ class Outlooks(SyncAPIResource, OutlooksMixin):
|
|
149
149
|
)
|
150
150
|
response = self._client._prepared_request(request)
|
151
151
|
|
152
|
-
print(f"Outlook automation created. Outlook available at https://www.
|
152
|
+
print(f"Outlook automation created. Outlook available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
153
153
|
|
154
154
|
return Outlook.model_validate(response)
|
155
155
|
|
@@ -280,7 +280,7 @@ class AsyncOutlooks(AsyncAPIResource, OutlooksMixin):
|
|
280
280
|
fetch_params=fetch_params,
|
281
281
|
)
|
282
282
|
response = await self._client._prepared_request(request)
|
283
|
-
print(f"Outlook automation created. Outlook available at https://www.
|
283
|
+
print(f"Outlook automation created. Outlook available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
284
284
|
return Outlook.model_validate(response)
|
285
285
|
|
286
286
|
async def list(
|
@@ -12,7 +12,7 @@ from ..._resource import AsyncAPIResource, SyncAPIResource
|
|
12
12
|
from ..._utils.ai_models import assert_valid_model_extraction
|
13
13
|
from ..._utils.mime import MIMEData, prepare_mime_document
|
14
14
|
from ...types.browser_canvas import BrowserCanvas
|
15
|
-
from ...types.documents.extractions import
|
15
|
+
from ...types.documents.extractions import RetabParsedChatCompletion
|
16
16
|
from ...types.logs import ProcessorConfig, UpdateProcessorRequest
|
17
17
|
from ...types.modalities import Modality
|
18
18
|
from ...types.pagination import ListMetadata
|
@@ -239,7 +239,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
|
|
239
239
|
n_consensus=n_consensus,
|
240
240
|
)
|
241
241
|
response = self._client._prepared_request(request)
|
242
|
-
print(f"Processor ID: {response['id']}. Processor available at https://www.
|
242
|
+
print(f"Processor ID: {response['id']}. Processor available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
243
243
|
return ProcessorConfig.model_validate(response)
|
244
244
|
|
245
245
|
def list(
|
@@ -349,7 +349,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
|
|
349
349
|
temperature: float | None = None,
|
350
350
|
seed: int | None = None,
|
351
351
|
store: bool = True,
|
352
|
-
) ->
|
352
|
+
) -> RetabParsedChatCompletion:
|
353
353
|
"""Submit documents to a processor for processing.
|
354
354
|
|
355
355
|
Args:
|
@@ -361,11 +361,11 @@ class Processors(SyncAPIResource, ProcessorsMixin):
|
|
361
361
|
store: Whether to store the results
|
362
362
|
|
363
363
|
Returns:
|
364
|
-
|
364
|
+
RetabParsedChatCompletion: The processing result
|
365
365
|
"""
|
366
366
|
request = self.prepare_submit(processor_id=processor_id, document=document, documents=documents, temperature=temperature, seed=seed, store=store)
|
367
367
|
response = self._client._prepared_request(request)
|
368
|
-
return
|
368
|
+
return RetabParsedChatCompletion.model_validate(response)
|
369
369
|
|
370
370
|
|
371
371
|
class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
|
@@ -399,7 +399,7 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
|
|
399
399
|
n_consensus=n_consensus,
|
400
400
|
)
|
401
401
|
response = await self._client._prepared_request(request)
|
402
|
-
print(f"Processor ID: {response['id']}. Processor available at https://www.
|
402
|
+
print(f"Processor ID: {response['id']}. Processor available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
403
403
|
|
404
404
|
return ProcessorConfig.model_validate(response)
|
405
405
|
|
@@ -470,7 +470,7 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
|
|
470
470
|
temperature: float | None = None,
|
471
471
|
seed: int | None = None,
|
472
472
|
store: bool = True,
|
473
|
-
) ->
|
473
|
+
) -> RetabParsedChatCompletion:
|
474
474
|
"""Submit documents to a processor for processing.
|
475
475
|
|
476
476
|
Args:
|
@@ -482,8 +482,8 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
|
|
482
482
|
store: Whether to store the results
|
483
483
|
|
484
484
|
Returns:
|
485
|
-
|
485
|
+
RetabParsedChatCompletion: The processing result
|
486
486
|
"""
|
487
487
|
request = self.prepare_submit(processor_id=processor_id, document=document, documents=documents, temperature=temperature, seed=seed, store=store)
|
488
488
|
response = await self._client._prepared_request(request)
|
489
|
-
return
|
489
|
+
return RetabParsedChatCompletion.model_validate(response)
|
retab/resources/usage.py
CHANGED
@@ -89,7 +89,7 @@ class Usage(SyncAPIResource, UsageMixin):
|
|
89
89
|
dict: Monthly usage data including credits consumed and limits
|
90
90
|
|
91
91
|
Raises:
|
92
|
-
|
92
|
+
RetabAPIError: If the API request fails
|
93
93
|
"""
|
94
94
|
request = self.prepare_monthly_credits_usage()
|
95
95
|
response = self._client._request(request.method, request.url, request.data, request.params)
|
@@ -176,7 +176,7 @@ class Usage(SyncAPIResource, UsageMixin):
|
|
176
176
|
],
|
177
177
|
response_format=CalendarEvent,
|
178
178
|
)
|
179
|
-
|
179
|
+
reclient.usage.log(
|
180
180
|
response_format=CalendarEvent,
|
181
181
|
completion=completion
|
182
182
|
)
|
@@ -203,7 +203,7 @@ class AsyncUsage(AsyncAPIResource, UsageMixin):
|
|
203
203
|
dict: Monthly usage data including credits consumed and limits
|
204
204
|
|
205
205
|
Raises:
|
206
|
-
|
206
|
+
RetabAPIError: If the API request fails
|
207
207
|
"""
|
208
208
|
request = self.prepare_monthly_credits_usage()
|
209
209
|
response = await self._client._request(request.method, request.url, request.data, request.params)
|
@@ -290,7 +290,7 @@ class AsyncUsage(AsyncAPIResource, UsageMixin):
|
|
290
290
|
],
|
291
291
|
response_format=CalendarEvent,
|
292
292
|
)
|
293
|
-
|
293
|
+
reclient.usage.log(
|
294
294
|
response_format=CalendarEvent,
|
295
295
|
completion=completion
|
296
296
|
)
|