retab 0.0.38__py3-none-any.whl → 0.0.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab/_utils/_model_cards/anthropic.yaml +59 -0
- retab/_utils/_model_cards/auto.yaml +43 -0
- retab/_utils/_model_cards/gemini.yaml +117 -0
- retab/_utils/_model_cards/openai.yaml +301 -0
- retab/_utils/_model_cards/xai.yaml +28 -0
- retab/_utils/ai_models.py +109 -71
- retab/_utils/responses.py +7 -7
- retab/_utils/usage/usage.py +2 -1
- retab/resources/consensus/completions.py +14 -14
- retab/resources/consensus/completions_stream.py +18 -18
- retab/resources/consensus/responses.py +5 -5
- retab/resources/consensus/responses_stream.py +5 -5
- retab/resources/documents/client.py +122 -27
- retab/resources/documents/extractions.py +22 -22
- retab/resources/evaluations/documents.py +5 -5
- retab/resources/evaluations/iterations.py +7 -7
- retab/resources/jsonlUtils.py +2 -2
- retab/resources/processors/client.py +7 -7
- retab/types/ai_models.py +41 -513
- retab/types/automations/webhooks.py +3 -3
- retab/types/completions.py +7 -7
- retab/types/documents/__init__.py +3 -0
- retab/types/documents/extractions.py +17 -17
- retab/types/documents/parse.py +32 -0
- retab/types/extractions.py +2 -2
- retab/types/logs.py +2 -2
- {retab-0.0.38.dist-info → retab-0.0.39.dist-info}/METADATA +4 -4
- {retab-0.0.38.dist-info → retab-0.0.39.dist-info}/RECORD +30 -24
- {retab-0.0.38.dist-info → retab-0.0.39.dist-info}/WHEEL +0 -0
- {retab-0.0.38.dist-info → retab-0.0.39.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
from io import IOBase
|
2
2
|
from pathlib import Path
|
3
|
-
from typing import Any
|
3
|
+
from typing import Any, Literal
|
4
4
|
|
5
5
|
import PIL.Image
|
6
6
|
from pydantic import HttpUrl
|
@@ -12,7 +12,8 @@ from ..._utils.json_schema import load_json_schema, filter_auxiliary_fields_json
|
|
12
12
|
from ..._utils.mime import convert_mime_data_to_pil_image, prepare_mime_document
|
13
13
|
from ..._utils.ai_models import assert_valid_model_extraction
|
14
14
|
from ...types.documents.create_messages import DocumentCreateInputRequest, DocumentCreateMessageRequest, DocumentMessage
|
15
|
-
from ...types.documents.extractions import DocumentExtractRequest,
|
15
|
+
from ...types.documents.extractions import DocumentExtractRequest, RetabParsedChatCompletion
|
16
|
+
from ...types.documents.parse import ParseRequest, ParseResult, TableParsingFormat
|
16
17
|
from ...types.browser_canvas import BrowserCanvas
|
17
18
|
from ...types.mime import MIMEData
|
18
19
|
from ...types.modalities import Modality
|
@@ -21,7 +22,7 @@ from ...types.standards import PreparedRequest
|
|
21
22
|
from .extractions import AsyncExtractions, Extractions
|
22
23
|
|
23
24
|
|
24
|
-
def maybe_parse_to_pydantic(schema: Schema, response:
|
25
|
+
def maybe_parse_to_pydantic(schema: Schema, response: RetabParsedChatCompletion, allow_partial: bool = False) -> RetabParsedChatCompletion:
|
25
26
|
if response.choices[0].message.content:
|
26
27
|
try:
|
27
28
|
if allow_partial:
|
@@ -85,13 +86,33 @@ class BaseDocumentsMixin:
|
|
85
86
|
data={"document": mime_document.model_dump()},
|
86
87
|
)
|
87
88
|
|
89
|
+
def _prepare_parse(
|
90
|
+
self,
|
91
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
92
|
+
fast_mode: bool = False,
|
93
|
+
table_parsing_format: TableParsingFormat = "html",
|
94
|
+
image_resolution_dpi: int = 72,
|
95
|
+
browser_canvas: BrowserCanvas = "A4",
|
96
|
+
idempotency_key: str | None = None,
|
97
|
+
) -> PreparedRequest:
|
98
|
+
mime_document = prepare_mime_document(document)
|
99
|
+
|
100
|
+
parse_request = ParseRequest(
|
101
|
+
document=mime_document,
|
102
|
+
fast_mode=fast_mode,
|
103
|
+
table_parsing_format=table_parsing_format,
|
104
|
+
image_resolution_dpi=image_resolution_dpi,
|
105
|
+
browser_canvas=browser_canvas,
|
106
|
+
)
|
107
|
+
return PreparedRequest(method="POST", url="/v1/documents/parse", data=parse_request.model_dump(), idempotency_key=idempotency_key)
|
108
|
+
|
88
109
|
|
89
110
|
class Documents(SyncAPIResource, BaseDocumentsMixin):
|
90
111
|
"""Documents API wrapper"""
|
91
112
|
|
92
113
|
def __init__(self, client: Any) -> None:
|
93
114
|
super().__init__(client=client)
|
94
|
-
#self.extractions_api = Extractions(client=client)
|
115
|
+
# self.extractions_api = Extractions(client=client)
|
95
116
|
# self.batch = Batch(client=client)
|
96
117
|
|
97
118
|
def correct_image_orientation(self, document: Path | str | IOBase | MIMEData | PIL.Image.Image) -> PIL.Image.Image:
|
@@ -198,10 +219,10 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
198
219
|
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
199
220
|
idempotency_key: str | None = None,
|
200
221
|
store: bool = False,
|
201
|
-
) ->
|
222
|
+
) -> RetabParsedChatCompletion:
|
202
223
|
"""
|
203
224
|
Process one or more documents using the Retab API for structured data extraction.
|
204
|
-
|
225
|
+
|
205
226
|
This method provides a direct interface to document extraction functionality,
|
206
227
|
intended to replace the current `.extractions.parse()` pattern.
|
207
228
|
|
@@ -218,10 +239,10 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
218
239
|
n_consensus: Number of consensus extractions to perform
|
219
240
|
idempotency_key: Idempotency key for request
|
220
241
|
store: Whether to store the document in the Retab database
|
221
|
-
|
242
|
+
|
222
243
|
Returns:
|
223
|
-
|
224
|
-
|
244
|
+
RetabParsedChatCompletion: Parsed response from the API
|
245
|
+
|
225
246
|
Raises:
|
226
247
|
ValueError: If neither document nor documents is provided, or if both are provided
|
227
248
|
HTTPException: If the request fails
|
@@ -258,16 +279,53 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
258
279
|
)
|
259
280
|
|
260
281
|
prepared_request = PreparedRequest(
|
261
|
-
method="POST",
|
262
|
-
url="/v1/documents/extract",
|
263
|
-
data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True),
|
264
|
-
idempotency_key=idempotency_key
|
282
|
+
method="POST", url="/v1/documents/extract", data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), idempotency_key=idempotency_key
|
265
283
|
)
|
266
|
-
|
284
|
+
|
267
285
|
response = self._client._prepared_request(prepared_request)
|
268
286
|
|
269
287
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
270
|
-
return maybe_parse_to_pydantic(schema,
|
288
|
+
return maybe_parse_to_pydantic(schema, RetabParsedChatCompletion.model_validate(response))
|
289
|
+
|
290
|
+
def parse(
|
291
|
+
self,
|
292
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
293
|
+
fast_mode: bool = False,
|
294
|
+
table_parsing_format: TableParsingFormat = "html",
|
295
|
+
image_resolution_dpi: int = 72,
|
296
|
+
browser_canvas: BrowserCanvas = "A4",
|
297
|
+
idempotency_key: str | None = None,
|
298
|
+
) -> ParseResult:
|
299
|
+
"""
|
300
|
+
Parse a document and extract text content from each page.
|
301
|
+
|
302
|
+
This method processes various document types and returns structured text content
|
303
|
+
along with usage information. Supports different parsing modes and formats.
|
304
|
+
|
305
|
+
Args:
|
306
|
+
document: The document to parse. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
307
|
+
fast_mode: Use fast mode for parsing (may reduce quality). Defaults to False.
|
308
|
+
table_parsing_format: Format for parsing tables. Options: "html", "json", "yaml", "markdown". Defaults to "html".
|
309
|
+
image_resolution_dpi: DPI for image processing. Defaults to 72.
|
310
|
+
browser_canvas: Canvas size for document rendering. Defaults to "A4".
|
311
|
+
idempotency_key: Optional idempotency key for the request.
|
312
|
+
|
313
|
+
Returns:
|
314
|
+
ParseResult: Parsed response containing document metadata, usage information, and page text content.
|
315
|
+
|
316
|
+
Raises:
|
317
|
+
HTTPException: If the request fails.
|
318
|
+
"""
|
319
|
+
request = self._prepare_parse(
|
320
|
+
document=document,
|
321
|
+
fast_mode=fast_mode,
|
322
|
+
table_parsing_format=table_parsing_format,
|
323
|
+
image_resolution_dpi=image_resolution_dpi,
|
324
|
+
browser_canvas=browser_canvas,
|
325
|
+
idempotency_key=idempotency_key,
|
326
|
+
)
|
327
|
+
response = self._client._prepared_request(request)
|
328
|
+
return ParseResult.model_validate(response)
|
271
329
|
|
272
330
|
|
273
331
|
class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
@@ -275,7 +333,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
275
333
|
|
276
334
|
def __init__(self, client: Any) -> None:
|
277
335
|
super().__init__(client=client)
|
278
|
-
#self.extractions_api = AsyncExtractions(client=client)
|
336
|
+
# self.extractions_api = AsyncExtractions(client=client)
|
279
337
|
|
280
338
|
async def create_messages(
|
281
339
|
self,
|
@@ -383,10 +441,10 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
383
441
|
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
384
442
|
idempotency_key: str | None = None,
|
385
443
|
store: bool = False,
|
386
|
-
) ->
|
444
|
+
) -> RetabParsedChatCompletion:
|
387
445
|
"""
|
388
446
|
Process one or more documents using the Retab API for structured data extraction asynchronously.
|
389
|
-
|
447
|
+
|
390
448
|
This method provides a direct interface to document extraction functionality,
|
391
449
|
intended to replace the current `.extractions.parse()` pattern.
|
392
450
|
|
@@ -403,10 +461,10 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
403
461
|
n_consensus: Number of consensus extractions to perform
|
404
462
|
idempotency_key: Idempotency key for request
|
405
463
|
store: Whether to store the document in the Retab database
|
406
|
-
|
464
|
+
|
407
465
|
Returns:
|
408
|
-
|
409
|
-
|
466
|
+
RetabParsedChatCompletion: Parsed response from the API
|
467
|
+
|
410
468
|
Raises:
|
411
469
|
ValueError: If neither document nor documents is provided, or if both are provided
|
412
470
|
HTTPException: If the request fails
|
@@ -443,13 +501,50 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
443
501
|
)
|
444
502
|
|
445
503
|
prepared_request = PreparedRequest(
|
446
|
-
method="POST",
|
447
|
-
url="/v1/documents/extract",
|
448
|
-
data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True),
|
449
|
-
idempotency_key=idempotency_key
|
504
|
+
method="POST", url="/v1/documents/extract", data=request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), idempotency_key=idempotency_key
|
450
505
|
)
|
451
|
-
|
506
|
+
|
452
507
|
response = await self._client._prepared_request(prepared_request)
|
453
508
|
|
454
509
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
455
|
-
return maybe_parse_to_pydantic(schema,
|
510
|
+
return maybe_parse_to_pydantic(schema, RetabParsedChatCompletion.model_validate(response))
|
511
|
+
|
512
|
+
async def parse(
|
513
|
+
self,
|
514
|
+
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
515
|
+
fast_mode: bool = False,
|
516
|
+
table_parsing_format: TableParsingFormat = "html",
|
517
|
+
image_resolution_dpi: int = 72,
|
518
|
+
browser_canvas: BrowserCanvas = "A4",
|
519
|
+
idempotency_key: str | None = None,
|
520
|
+
) -> ParseResult:
|
521
|
+
"""
|
522
|
+
Parse a document and extract text content from each page asynchronously.
|
523
|
+
|
524
|
+
This method processes various document types and returns structured text content
|
525
|
+
along with usage information. Supports different parsing modes and formats.
|
526
|
+
|
527
|
+
Args:
|
528
|
+
document: The document to parse. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
529
|
+
fast_mode: Use fast mode for parsing (may reduce quality). Defaults to False.
|
530
|
+
table_parsing_format: Format for parsing tables. Options: "html", "json", "yaml", "markdown". Defaults to "html".
|
531
|
+
image_resolution_dpi: DPI for image processing. Defaults to 72.
|
532
|
+
browser_canvas: Canvas size for document rendering. Defaults to "A4".
|
533
|
+
idempotency_key: Optional idempotency key for the request.
|
534
|
+
|
535
|
+
Returns:
|
536
|
+
ParseResult: Parsed response containing document metadata, usage information, and page text content.
|
537
|
+
|
538
|
+
Raises:
|
539
|
+
HTTPException: If the request fails.
|
540
|
+
"""
|
541
|
+
request = self._prepare_parse(
|
542
|
+
document=document,
|
543
|
+
fast_mode=fast_mode,
|
544
|
+
table_parsing_format=table_parsing_format,
|
545
|
+
image_resolution_dpi=image_resolution_dpi,
|
546
|
+
browser_canvas=browser_canvas,
|
547
|
+
idempotency_key=idempotency_key,
|
548
|
+
)
|
549
|
+
response = await self._client._prepared_request(request)
|
550
|
+
return ParseResult.model_validate(response)
|
@@ -19,14 +19,14 @@ from ..._utils.json_schema import filter_auxiliary_fields_json, load_json_schema
|
|
19
19
|
from ..._utils.mime import MIMEData, prepare_mime_document
|
20
20
|
from ..._utils.stream_context_managers import as_async_context_manager, as_context_manager
|
21
21
|
from ...types.chat import ChatCompletionRetabMessage
|
22
|
-
from ...types.documents.extractions import DocumentExtractRequest, LogExtractionRequest,
|
22
|
+
from ...types.documents.extractions import DocumentExtractRequest, LogExtractionRequest, RetabParsedChatCompletion, RetabParsedChatCompletionChunk, RetabParsedChoice
|
23
23
|
from ...types.browser_canvas import BrowserCanvas
|
24
24
|
from ...types.modalities import Modality
|
25
25
|
from ...types.schemas.object import Schema
|
26
26
|
from ...types.standards import PreparedRequest
|
27
27
|
|
28
28
|
|
29
|
-
def maybe_parse_to_pydantic(schema: Schema, response:
|
29
|
+
def maybe_parse_to_pydantic(schema: Schema, response: RetabParsedChatCompletion, allow_partial: bool = False) -> RetabParsedChatCompletion:
|
30
30
|
if response.choices[0].message.content:
|
31
31
|
try:
|
32
32
|
if allow_partial:
|
@@ -152,7 +152,7 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
152
152
|
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
153
153
|
idempotency_key: str | None = None,
|
154
154
|
store: bool = False,
|
155
|
-
) ->
|
155
|
+
) -> RetabParsedChatCompletion:
|
156
156
|
"""
|
157
157
|
Process one or more documents using the Retab API.
|
158
158
|
|
@@ -170,7 +170,7 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
170
170
|
idempotency_key: Idempotency key for request
|
171
171
|
store: Whether to store the document in the Retab database
|
172
172
|
Returns:
|
173
|
-
|
173
|
+
RetabParsedChatCompletion: Parsed response from the API
|
174
174
|
Raises:
|
175
175
|
ValueError: If neither document nor documents is provided, or if both are provided
|
176
176
|
HTTPException: If the request fails
|
@@ -195,7 +195,7 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
195
195
|
response = self._client._prepared_request(request)
|
196
196
|
|
197
197
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
198
|
-
return maybe_parse_to_pydantic(schema,
|
198
|
+
return maybe_parse_to_pydantic(schema, RetabParsedChatCompletion.model_validate(response))
|
199
199
|
|
200
200
|
@as_context_manager
|
201
201
|
def stream(
|
@@ -212,7 +212,7 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
212
212
|
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
213
213
|
idempotency_key: str | None = None,
|
214
214
|
store: bool = False,
|
215
|
-
) -> Generator[
|
215
|
+
) -> Generator[RetabParsedChatCompletion, None, None]:
|
216
216
|
"""
|
217
217
|
Process one or more documents using the Retab API with streaming enabled.
|
218
218
|
|
@@ -231,7 +231,7 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
231
231
|
store: Whether to store the document in the Retab database
|
232
232
|
|
233
233
|
Returns:
|
234
|
-
Generator[
|
234
|
+
Generator[RetabParsedChatCompletion]: Stream of parsed responses
|
235
235
|
Raises:
|
236
236
|
ValueError: If neither document nor documents is provided, or if both are provided
|
237
237
|
HTTPException: If the request fails
|
@@ -266,16 +266,16 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
266
266
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
267
267
|
|
268
268
|
# Request the stream and return a context manager
|
269
|
-
ui_parsed_chat_completion_cum_chunk:
|
270
|
-
# Initialize the
|
271
|
-
ui_parsed_completion:
|
269
|
+
ui_parsed_chat_completion_cum_chunk: RetabParsedChatCompletionChunk | None = None
|
270
|
+
# Initialize the RetabParsedChatCompletion object
|
271
|
+
ui_parsed_completion: RetabParsedChatCompletion = RetabParsedChatCompletion(
|
272
272
|
id="",
|
273
273
|
created=0,
|
274
274
|
model="",
|
275
275
|
object="chat.completion",
|
276
276
|
likelihoods={},
|
277
277
|
choices=[
|
278
|
-
|
278
|
+
RetabParsedChoice(
|
279
279
|
index=0,
|
280
280
|
message=ParsedChatCompletionMessage(content="", role="assistant"),
|
281
281
|
finish_reason=None,
|
@@ -286,7 +286,7 @@ class Extractions(SyncAPIResource, BaseExtractionsMixin):
|
|
286
286
|
for chunk_json in self._client._prepared_request_stream(request):
|
287
287
|
if not chunk_json:
|
288
288
|
continue
|
289
|
-
ui_parsed_chat_completion_cum_chunk =
|
289
|
+
ui_parsed_chat_completion_cum_chunk = RetabParsedChatCompletionChunk.model_validate(chunk_json).chunk_accumulator(ui_parsed_chat_completion_cum_chunk)
|
290
290
|
# Basic stuff
|
291
291
|
ui_parsed_completion.id = ui_parsed_chat_completion_cum_chunk.id
|
292
292
|
ui_parsed_completion.created = ui_parsed_chat_completion_cum_chunk.created
|
@@ -353,7 +353,7 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
353
353
|
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
354
354
|
idempotency_key: str | None = None,
|
355
355
|
store: bool = False,
|
356
|
-
) ->
|
356
|
+
) -> RetabParsedChatCompletion:
|
357
357
|
"""
|
358
358
|
Extract structured data from one or more documents asynchronously.
|
359
359
|
|
@@ -371,7 +371,7 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
371
371
|
idempotency_key: Idempotency key for request
|
372
372
|
store: Whether to store the document in the Retab database
|
373
373
|
Returns:
|
374
|
-
|
374
|
+
RetabParsedChatCompletion: Parsed response from the API.
|
375
375
|
Raises:
|
376
376
|
ValueError: If neither document nor documents is provided, or if both are provided
|
377
377
|
"""
|
@@ -392,7 +392,7 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
392
392
|
)
|
393
393
|
response = await self._client._prepared_request(request)
|
394
394
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
395
|
-
return maybe_parse_to_pydantic(schema,
|
395
|
+
return maybe_parse_to_pydantic(schema, RetabParsedChatCompletion.model_validate(response))
|
396
396
|
|
397
397
|
@as_async_context_manager
|
398
398
|
async def stream(
|
@@ -409,7 +409,7 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
409
409
|
n_consensus: int = PydanticUndefined, # type: ignore[assignment]
|
410
410
|
idempotency_key: str | None = None,
|
411
411
|
store: bool = False,
|
412
|
-
) -> AsyncGenerator[
|
412
|
+
) -> AsyncGenerator[RetabParsedChatCompletion, None]:
|
413
413
|
"""
|
414
414
|
Extract structured data from one or more documents asynchronously with streaming.
|
415
415
|
|
@@ -427,7 +427,7 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
427
427
|
idempotency_key: Idempotency key for request
|
428
428
|
store: Whether to store the document in the Retab database
|
429
429
|
Returns:
|
430
|
-
AsyncGenerator[
|
430
|
+
AsyncGenerator[RetabParsedChatCompletion, None]: Stream of parsed responses.
|
431
431
|
Raises:
|
432
432
|
ValueError: If neither document nor documents is provided, or if both are provided
|
433
433
|
|
@@ -460,16 +460,16 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
460
460
|
idempotency_key=idempotency_key,
|
461
461
|
)
|
462
462
|
schema = Schema(json_schema=load_json_schema(json_schema))
|
463
|
-
ui_parsed_chat_completion_cum_chunk:
|
464
|
-
# Initialize the
|
465
|
-
ui_parsed_completion:
|
463
|
+
ui_parsed_chat_completion_cum_chunk: RetabParsedChatCompletionChunk | None = None
|
464
|
+
# Initialize the RetabParsedChatCompletion object
|
465
|
+
ui_parsed_completion: RetabParsedChatCompletion = RetabParsedChatCompletion(
|
466
466
|
id="",
|
467
467
|
created=0,
|
468
468
|
model="",
|
469
469
|
object="chat.completion",
|
470
470
|
likelihoods={},
|
471
471
|
choices=[
|
472
|
-
|
472
|
+
RetabParsedChoice(
|
473
473
|
index=0,
|
474
474
|
message=ParsedChatCompletionMessage(content="", role="assistant"),
|
475
475
|
finish_reason=None,
|
@@ -481,7 +481,7 @@ class AsyncExtractions(AsyncAPIResource, BaseExtractionsMixin):
|
|
481
481
|
async for chunk_json in self._client._prepared_request_stream(request):
|
482
482
|
if not chunk_json:
|
483
483
|
continue
|
484
|
-
ui_parsed_chat_completion_cum_chunk =
|
484
|
+
ui_parsed_chat_completion_cum_chunk = RetabParsedChatCompletionChunk.model_validate(chunk_json).chunk_accumulator(ui_parsed_chat_completion_cum_chunk)
|
485
485
|
# Basic stuff
|
486
486
|
ui_parsed_completion.id = ui_parsed_chat_completion_cum_chunk.id
|
487
487
|
ui_parsed_completion.created = ui_parsed_chat_completion_cum_chunk.created
|
@@ -10,7 +10,7 @@ from ..._utils.mime import prepare_mime_document
|
|
10
10
|
from ...types.evaluations import DocumentItem, EvaluationDocument, PatchEvaluationDocumentRequest
|
11
11
|
from ...types.mime import MIMEData
|
12
12
|
from ...types.standards import PreparedRequest, DeleteResponse, FieldUnset
|
13
|
-
from ...types.documents.extractions import
|
13
|
+
from ...types.documents.extractions import RetabParsedChatCompletion
|
14
14
|
|
15
15
|
|
16
16
|
class DocumentsMixin:
|
@@ -134,13 +134,13 @@ class Documents(SyncAPIResource, DocumentsMixin):
|
|
134
134
|
request = self.prepare_delete(evaluation_id, document_id)
|
135
135
|
return self._client._prepared_request(request)
|
136
136
|
|
137
|
-
def llm_annotate(self, evaluation_id: str, document_id: str) ->
|
137
|
+
def llm_annotate(self, evaluation_id: str, document_id: str) -> RetabParsedChatCompletion:
|
138
138
|
"""
|
139
139
|
Annotate a document with an LLM. This method updates the document (within the evaluation) with the latest extraction.
|
140
140
|
"""
|
141
141
|
request = self.prepare_llm_annotate(evaluation_id, document_id)
|
142
142
|
response = self._client._prepared_request(request)
|
143
|
-
return
|
143
|
+
return RetabParsedChatCompletion(**response)
|
144
144
|
|
145
145
|
|
146
146
|
class AsyncDocuments(AsyncAPIResource, DocumentsMixin):
|
@@ -223,11 +223,11 @@ class AsyncDocuments(AsyncAPIResource, DocumentsMixin):
|
|
223
223
|
request = self.prepare_delete(evaluation_id, document_id)
|
224
224
|
return await self._client._prepared_request(request)
|
225
225
|
|
226
|
-
async def llm_annotate(self, evaluation_id: str, document_id: str) ->
|
226
|
+
async def llm_annotate(self, evaluation_id: str, document_id: str) -> RetabParsedChatCompletion:
|
227
227
|
"""
|
228
228
|
Annotate a document with an LLM.
|
229
229
|
This method updates the document (within the evaluation) with the latest extraction.
|
230
230
|
"""
|
231
231
|
request = self.prepare_llm_annotate(evaluation_id, document_id)
|
232
232
|
response = await self._client._prepared_request(request)
|
233
|
-
return
|
233
|
+
return RetabParsedChatCompletion(**response)
|
@@ -9,7 +9,7 @@ from ...types.inference_settings import InferenceSettings
|
|
9
9
|
from ...types.metrics import DistancesResult
|
10
10
|
from ...types.modalities import Modality
|
11
11
|
from ...types.standards import DeleteResponse, PreparedRequest, FieldUnset
|
12
|
-
from ...types.documents.extractions import
|
12
|
+
from ...types.documents.extractions import RetabParsedChatCompletion
|
13
13
|
|
14
14
|
|
15
15
|
class IterationsMixin:
|
@@ -238,7 +238,7 @@ class Iterations(SyncAPIResource, IterationsMixin):
|
|
238
238
|
response = self._client._prepared_request(request)
|
239
239
|
return Iteration(**response)
|
240
240
|
|
241
|
-
def process_document(self, evaluation_id: str, iteration_id: str, document_id: str) ->
|
241
|
+
def process_document(self, evaluation_id: str, iteration_id: str, document_id: str) -> RetabParsedChatCompletion:
|
242
242
|
"""
|
243
243
|
Process a single document within an iteration.
|
244
244
|
This method updates the iteration document with the latest extraction.
|
@@ -248,13 +248,13 @@ class Iterations(SyncAPIResource, IterationsMixin):
|
|
248
248
|
document_id: The ID of the document
|
249
249
|
|
250
250
|
Returns:
|
251
|
-
|
251
|
+
RetabParsedChatCompletion: The parsed chat completion
|
252
252
|
Raises:
|
253
253
|
HTTPException if the request fails
|
254
254
|
"""
|
255
255
|
request = self.prepare_process_document(evaluation_id, iteration_id, document_id)
|
256
256
|
response = self._client._prepared_request(request)
|
257
|
-
return
|
257
|
+
return RetabParsedChatCompletion(**response)
|
258
258
|
|
259
259
|
def status(self, evaluation_id: str, iteration_id: str) -> IterationDocumentStatusResponse:
|
260
260
|
"""
|
@@ -417,7 +417,7 @@ class AsyncIterations(AsyncAPIResource, IterationsMixin):
|
|
417
417
|
response = await self._client._prepared_request(request)
|
418
418
|
return Iteration(**response)
|
419
419
|
|
420
|
-
async def process_document(self, evaluation_id: str, iteration_id: str, document_id: str) ->
|
420
|
+
async def process_document(self, evaluation_id: str, iteration_id: str, document_id: str) -> RetabParsedChatCompletion:
|
421
421
|
"""
|
422
422
|
Process a single document within an iteration.
|
423
423
|
This method updates the iteration document with the latest extraction.
|
@@ -427,13 +427,13 @@ class AsyncIterations(AsyncAPIResource, IterationsMixin):
|
|
427
427
|
document_id: The ID of the document
|
428
428
|
|
429
429
|
Returns:
|
430
|
-
|
430
|
+
RetabParsedChatCompletion: The parsed chat completion
|
431
431
|
Raises:
|
432
432
|
HTTPException if the request fails
|
433
433
|
"""
|
434
434
|
request = self.prepare_process_document(evaluation_id, iteration_id, document_id)
|
435
435
|
response = await self._client._prepared_request(request)
|
436
|
-
return
|
436
|
+
return RetabParsedChatCompletion(**response)
|
437
437
|
|
438
438
|
async def status(self, evaluation_id: str, iteration_id: str) -> IterationDocumentStatusResponse:
|
439
439
|
"""
|
retab/resources/jsonlUtils.py
CHANGED
@@ -18,7 +18,7 @@ from pydantic_core import PydanticUndefined
|
|
18
18
|
from tqdm import tqdm
|
19
19
|
|
20
20
|
from .._resource import AsyncAPIResource, SyncAPIResource
|
21
|
-
from .._utils.ai_models import assert_valid_model_extraction,
|
21
|
+
from .._utils.ai_models import assert_valid_model_extraction, get_provider_for_model
|
22
22
|
from .._utils.chat import convert_to_anthropic_format, convert_to_openai_format, separate_messages
|
23
23
|
from .._utils.display import Metrics, display_metrics, process_dataset_and_compute_metrics
|
24
24
|
from .._utils.json_schema import load_json_schema
|
@@ -278,7 +278,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
|
|
278
278
|
Returns:
|
279
279
|
A tuple of (client instance, provider type string)
|
280
280
|
"""
|
281
|
-
provider =
|
281
|
+
provider = get_provider_for_model(model)
|
282
282
|
|
283
283
|
if provider == "OpenAI":
|
284
284
|
return OpenAI(api_key=self._client.headers["OpenAI-Api-Key"]), provider
|
@@ -12,7 +12,7 @@ from ..._resource import AsyncAPIResource, SyncAPIResource
|
|
12
12
|
from ..._utils.ai_models import assert_valid_model_extraction
|
13
13
|
from ..._utils.mime import MIMEData, prepare_mime_document
|
14
14
|
from ...types.browser_canvas import BrowserCanvas
|
15
|
-
from ...types.documents.extractions import
|
15
|
+
from ...types.documents.extractions import RetabParsedChatCompletion
|
16
16
|
from ...types.logs import ProcessorConfig, UpdateProcessorRequest
|
17
17
|
from ...types.modalities import Modality
|
18
18
|
from ...types.pagination import ListMetadata
|
@@ -349,7 +349,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
|
|
349
349
|
temperature: float | None = None,
|
350
350
|
seed: int | None = None,
|
351
351
|
store: bool = True,
|
352
|
-
) ->
|
352
|
+
) -> RetabParsedChatCompletion:
|
353
353
|
"""Submit documents to a processor for processing.
|
354
354
|
|
355
355
|
Args:
|
@@ -361,11 +361,11 @@ class Processors(SyncAPIResource, ProcessorsMixin):
|
|
361
361
|
store: Whether to store the results
|
362
362
|
|
363
363
|
Returns:
|
364
|
-
|
364
|
+
RetabParsedChatCompletion: The processing result
|
365
365
|
"""
|
366
366
|
request = self.prepare_submit(processor_id=processor_id, document=document, documents=documents, temperature=temperature, seed=seed, store=store)
|
367
367
|
response = self._client._prepared_request(request)
|
368
|
-
return
|
368
|
+
return RetabParsedChatCompletion.model_validate(response)
|
369
369
|
|
370
370
|
|
371
371
|
class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
|
@@ -470,7 +470,7 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
|
|
470
470
|
temperature: float | None = None,
|
471
471
|
seed: int | None = None,
|
472
472
|
store: bool = True,
|
473
|
-
) ->
|
473
|
+
) -> RetabParsedChatCompletion:
|
474
474
|
"""Submit documents to a processor for processing.
|
475
475
|
|
476
476
|
Args:
|
@@ -482,8 +482,8 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
|
|
482
482
|
store: Whether to store the results
|
483
483
|
|
484
484
|
Returns:
|
485
|
-
|
485
|
+
RetabParsedChatCompletion: The processing result
|
486
486
|
"""
|
487
487
|
request = self.prepare_submit(processor_id=processor_id, document=document, documents=documents, temperature=temperature, seed=seed, store=store)
|
488
488
|
response = await self._client._prepared_request(request)
|
489
|
-
return
|
489
|
+
return RetabParsedChatCompletion.model_validate(response)
|