retab 0.0.37__py3-none-any.whl → 0.0.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab/__init__.py +2 -2
- retab/_resource.py +5 -5
- retab/_utils/_model_cards/anthropic.yaml +59 -0
- retab/_utils/_model_cards/auto.yaml +43 -0
- retab/_utils/_model_cards/gemini.yaml +117 -0
- retab/_utils/_model_cards/openai.yaml +301 -0
- retab/_utils/_model_cards/xai.yaml +28 -0
- retab/_utils/ai_models.py +109 -71
- retab/_utils/chat.py +20 -20
- retab/_utils/responses.py +14 -14
- retab/_utils/usage/usage.py +5 -4
- retab/client.py +22 -22
- retab/resources/consensus/client.py +2 -2
- retab/resources/consensus/completions.py +26 -26
- retab/resources/consensus/completions_stream.py +27 -27
- retab/resources/consensus/responses.py +11 -11
- retab/resources/consensus/responses_stream.py +15 -15
- retab/resources/documents/client.py +297 -16
- retab/resources/documents/extractions.py +39 -39
- retab/resources/evaluations/documents.py +5 -5
- retab/resources/evaluations/iterations.py +7 -7
- retab/resources/jsonlUtils.py +7 -7
- retab/resources/processors/automations/endpoints.py +2 -2
- retab/resources/processors/automations/links.py +2 -2
- retab/resources/processors/automations/logs.py +2 -2
- retab/resources/processors/automations/mailboxes.py +2 -2
- retab/resources/processors/automations/outlook.py +2 -2
- retab/resources/processors/client.py +9 -9
- retab/resources/usage.py +4 -4
- retab/types/ai_models.py +41 -513
- retab/types/automations/mailboxes.py +1 -1
- retab/types/automations/webhooks.py +3 -3
- retab/types/chat.py +1 -1
- retab/types/completions.py +10 -10
- retab/types/documents/__init__.py +3 -0
- retab/types/documents/create_messages.py +2 -2
- retab/types/documents/extractions.py +19 -19
- retab/types/documents/parse.py +32 -0
- retab/types/extractions.py +4 -4
- retab/types/logs.py +2 -2
- retab/types/schemas/object.py +3 -3
- {retab-0.0.37.dist-info → retab-0.0.39.dist-info}/METADATA +72 -72
- {retab-0.0.37.dist-info → retab-0.0.39.dist-info}/RECORD +45 -39
- {retab-0.0.37.dist-info → retab-0.0.39.dist-info}/WHEEL +0 -0
- {retab-0.0.37.dist-info → retab-0.0.39.dist-info}/top_level.txt +0 -0
retab/types/completions.py
CHANGED
@@ -7,15 +7,15 @@ from openai.types.shared_params.reasoning import Reasoning
|
|
7
7
|
from openai.types.shared_params.response_format_json_schema import ResponseFormatJSONSchema
|
8
8
|
from pydantic import BaseModel, ConfigDict, Field
|
9
9
|
|
10
|
-
from .._utils.ai_models import
|
10
|
+
from .._utils.ai_models import get_provider_for_model
|
11
11
|
from .ai_models import AIProvider
|
12
|
-
from .chat import
|
12
|
+
from .chat import ChatCompletionRetabMessage
|
13
13
|
|
14
14
|
|
15
|
-
class
|
15
|
+
class RetabChatCompletionsRequest(BaseModel):
|
16
16
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
17
17
|
model: str = Field(..., description="Model used for chat completion")
|
18
|
-
messages: list[
|
18
|
+
messages: list[ChatCompletionRetabMessage] = Field(..., description="Messages to be parsed")
|
19
19
|
response_format: ResponseFormatJSONSchema = Field(..., description="response format used to validate the output data.")
|
20
20
|
temperature: float = Field(default=0.0, description="Temperature for sampling. If not provided, the default temperature for the model will be used.", examples=[0.0])
|
21
21
|
reasoning_effort: ChatCompletionReasoningEffort = Field(
|
@@ -34,13 +34,13 @@ class UiChatCompletionsRequest(BaseModel):
|
|
34
34
|
Returns:
|
35
35
|
AIProvider: The AI provider corresponding to the given model.
|
36
36
|
"""
|
37
|
-
return
|
37
|
+
return get_provider_for_model(self.model)
|
38
38
|
|
39
39
|
|
40
|
-
class
|
40
|
+
class RetabChatCompletionsParseRequest(BaseModel):
|
41
41
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
42
42
|
model: str = Field(..., description="Model used for chat completion")
|
43
|
-
messages: list[
|
43
|
+
messages: list[ChatCompletionRetabMessage] = Field(..., description="Messages to be parsed")
|
44
44
|
json_schema: dict[str, Any] = Field(..., description="JSON schema format used to validate the output data.")
|
45
45
|
temperature: float = Field(default=0.0, description="Temperature for sampling. If not provided, the default temperature for the model will be used.", examples=[0.0])
|
46
46
|
reasoning_effort: ChatCompletionReasoningEffort = Field(
|
@@ -59,10 +59,10 @@ class UiChatCompletionsParseRequest(BaseModel):
|
|
59
59
|
Returns:
|
60
60
|
AIProvider: The AI provider corresponding to the given model.
|
61
61
|
"""
|
62
|
-
return
|
62
|
+
return get_provider_for_model(self.model)
|
63
63
|
|
64
64
|
|
65
|
-
class
|
65
|
+
class RetabChatResponseCreateRequest(BaseModel):
|
66
66
|
input: Union[str, ResponseInputParam] = Field(..., description="Input to be parsed")
|
67
67
|
instructions: Optional[str] = None
|
68
68
|
|
@@ -87,4 +87,4 @@ class UiChatResponseCreateRequest(BaseModel):
|
|
87
87
|
Returns:
|
88
88
|
AIProvider: The AI provider corresponding to the given model.
|
89
89
|
"""
|
90
|
-
return
|
90
|
+
return get_provider_for_model(self.model)
|
@@ -14,7 +14,7 @@ from ..._utils.chat import convert_to_anthropic_format, convert_to_google_genai_
|
|
14
14
|
from ..._utils.chat import convert_to_openai_format as convert_to_openai_completions_api_format
|
15
15
|
from ..._utils.display import count_image_tokens, count_text_tokens
|
16
16
|
from ..._utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
|
17
|
-
from ..chat import
|
17
|
+
from ..chat import ChatCompletionRetabMessage
|
18
18
|
from ..mime import MIMEData
|
19
19
|
from ..modalities import Modality
|
20
20
|
from ..browser_canvas import BrowserCanvas
|
@@ -44,7 +44,7 @@ class DocumentCreateInputRequest(DocumentCreateMessageRequest):
|
|
44
44
|
class DocumentMessage(BaseModel):
|
45
45
|
id: str = Field(description="A unique identifier for the document loading.")
|
46
46
|
object: Literal["document_message"] = Field(default="document_message", description="The type of object being loaded.")
|
47
|
-
messages: List[
|
47
|
+
messages: List[ChatCompletionRetabMessage] = Field(description="A list of messages containing the document content and metadata.")
|
48
48
|
created: int = Field(description="The Unix timestamp (in seconds) of when the document was loaded.")
|
49
49
|
modality: Modality = Field(description="The modality of the document to load.")
|
50
50
|
|
@@ -17,7 +17,7 @@ from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, computed_fiel
|
|
17
17
|
|
18
18
|
from ..._utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
|
19
19
|
from ..ai_models import Amount
|
20
|
-
from ..chat import
|
20
|
+
from ..chat import ChatCompletionRetabMessage
|
21
21
|
from ..mime import MIMEData
|
22
22
|
from ..modalities import Modality
|
23
23
|
from ..browser_canvas import BrowserCanvas
|
@@ -96,7 +96,7 @@ class FieldLocation(BaseModel):
|
|
96
96
|
match_level: Literal["token", "line", "block"] | None = Field(default=None, description="The level of the match (token, line, block)")
|
97
97
|
|
98
98
|
|
99
|
-
class
|
99
|
+
class RetabParsedChoice(ParsedChoice):
|
100
100
|
# Adaptable ParsedChoice that allows None for the finish_reason
|
101
101
|
finish_reason: Literal["stop", "length", "tool_calls", "content_filter", "function_call"] | None = None # type: ignore
|
102
102
|
field_locations: dict[str, list[FieldLocation]] | None = Field(default=None, description="The locations of the fields in the document, if available")
|
@@ -106,9 +106,9 @@ class UiParsedChoice(ParsedChoice):
|
|
106
106
|
LikelihoodsSource = Literal["consensus", "log_probs"]
|
107
107
|
|
108
108
|
|
109
|
-
class
|
109
|
+
class RetabParsedChatCompletion(ParsedChatCompletion):
|
110
110
|
extraction_id: str | None = None
|
111
|
-
choices: list[
|
111
|
+
choices: list[RetabParsedChoice] # type: ignore
|
112
112
|
# Additional metadata fields (UIForm)
|
113
113
|
likelihoods: Optional[dict[str, Any]] = Field(
|
114
114
|
default=None, description="Object defining the uncertainties of the fields extracted when using consensus. Follows the same structure as the extraction object."
|
@@ -146,7 +146,7 @@ class UiResponse(Response):
|
|
146
146
|
|
147
147
|
|
148
148
|
class LogExtractionRequest(BaseModel):
|
149
|
-
messages: list[
|
149
|
+
messages: list[ChatCompletionRetabMessage] | None = None # TODO: compatibility with Anthropic
|
150
150
|
openai_messages: list[ChatCompletionMessageParam] | None = None
|
151
151
|
openai_responses_input: list[ResponseInputItemParam] | None = None
|
152
152
|
anthropic_messages: list[MessageParam] | None = None
|
@@ -159,7 +159,7 @@ class LogExtractionRequest(BaseModel):
|
|
159
159
|
),
|
160
160
|
description="Document analyzed, if not provided a dummy one will be created with the text 'No document provided'",
|
161
161
|
)
|
162
|
-
completion: dict |
|
162
|
+
completion: dict | RetabParsedChatCompletion | Message | ParsedChatCompletion | ChatCompletion | None = None
|
163
163
|
openai_responses_output: Response | None = None
|
164
164
|
json_schema: dict[str, Any]
|
165
165
|
model: str
|
@@ -215,7 +215,7 @@ class LogExtractionResponse(BaseModel):
|
|
215
215
|
error_message: str | None = None
|
216
216
|
|
217
217
|
|
218
|
-
# DocumentExtractResponse =
|
218
|
+
# DocumentExtractResponse = RetabParsedChatCompletion
|
219
219
|
|
220
220
|
|
221
221
|
###### I'll place here for now -- New Streaming API
|
@@ -227,7 +227,7 @@ class LogExtractionResponse(BaseModel):
|
|
227
227
|
# - schema_validation_error: ErrorDetail | None = None # The error in the schema validation of the total accumulated content
|
228
228
|
|
229
229
|
|
230
|
-
class
|
230
|
+
class RetabParsedChoiceDeltaChunk(ChoiceDeltaChunk):
|
231
231
|
flat_likelihoods: dict[str, float] = {}
|
232
232
|
flat_parsed: dict[str, Any] = {}
|
233
233
|
flat_deleted_keys: list[str] = []
|
@@ -236,13 +236,13 @@ class UiParsedChoiceDeltaChunk(ChoiceDeltaChunk):
|
|
236
236
|
key_mapping: dict[str, Optional[str]] | None = Field(default=None, description="Mapping of consensus keys to original model keys")
|
237
237
|
|
238
238
|
|
239
|
-
class
|
240
|
-
delta:
|
239
|
+
class RetabParsedChoiceChunk(ChoiceChunk):
|
240
|
+
delta: RetabParsedChoiceDeltaChunk # type: ignore
|
241
241
|
|
242
242
|
|
243
|
-
class
|
243
|
+
class RetabParsedChatCompletionChunk(StreamingBaseModel, ChatCompletionChunk):
|
244
244
|
extraction_id: str | None = None
|
245
|
-
choices: list[
|
245
|
+
choices: list[RetabParsedChoiceChunk] # type: ignore
|
246
246
|
schema_validation_error: ErrorDetail | None = None
|
247
247
|
# Timestamps
|
248
248
|
request_at: datetime.datetime | None = Field(default=None, description="Timestamp of the request")
|
@@ -273,16 +273,16 @@ class UiParsedChatCompletionChunk(StreamingBaseModel, ChatCompletionChunk):
|
|
273
273
|
return None
|
274
274
|
return None
|
275
275
|
|
276
|
-
def chunk_accumulator(self, previous_cumulated_chunk: "
|
276
|
+
def chunk_accumulator(self, previous_cumulated_chunk: "RetabParsedChatCompletionChunk | None" = None) -> "RetabParsedChatCompletionChunk":
|
277
277
|
"""
|
278
|
-
Accumulate the chunk into the state, returning a new
|
278
|
+
Accumulate the chunk into the state, returning a new RetabParsedChatCompletionChunk with the accumulated content that could be yielded alone to generate the same state.
|
279
279
|
"""
|
280
280
|
|
281
|
-
def safe_get_delta(chnk: "
|
281
|
+
def safe_get_delta(chnk: "RetabParsedChatCompletionChunk | None", index: int) -> RetabParsedChoiceDeltaChunk:
|
282
282
|
if chnk is not None and index < len(chnk.choices):
|
283
283
|
return chnk.choices[index].delta
|
284
284
|
else:
|
285
|
-
return
|
285
|
+
return RetabParsedChoiceDeltaChunk(
|
286
286
|
content="",
|
287
287
|
flat_parsed={},
|
288
288
|
flat_likelihoods={},
|
@@ -313,7 +313,7 @@ class UiParsedChatCompletionChunk(StreamingBaseModel, ChatCompletionChunk):
|
|
313
313
|
last_token_at = self.last_token_at
|
314
314
|
request_at = self.request_at
|
315
315
|
|
316
|
-
return
|
316
|
+
return RetabParsedChatCompletionChunk(
|
317
317
|
extraction_id=self.extraction_id,
|
318
318
|
id=self.id,
|
319
319
|
created=self.created,
|
@@ -321,8 +321,8 @@ class UiParsedChatCompletionChunk(StreamingBaseModel, ChatCompletionChunk):
|
|
321
321
|
object=self.object,
|
322
322
|
usage=usage,
|
323
323
|
choices=[
|
324
|
-
|
325
|
-
delta=
|
324
|
+
RetabParsedChoiceChunk(
|
325
|
+
delta=RetabParsedChoiceDeltaChunk(
|
326
326
|
content=acc_content[i],
|
327
327
|
flat_parsed=acc_flat_parsed[i],
|
328
328
|
flat_likelihoods=acc_flat_likelihoods[i],
|
@@ -0,0 +1,32 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
from pydantic import BaseModel, Field
|
3
|
+
|
4
|
+
from ..mime import MIMEData, BaseMIMEData
|
5
|
+
from ..browser_canvas import BrowserCanvas
|
6
|
+
|
7
|
+
TableParsingFormat = Literal["markdown", "yaml", "html", "json"]
|
8
|
+
|
9
|
+
|
10
|
+
class RetabUsage(BaseModel):
|
11
|
+
"""Usage information for document processing."""
|
12
|
+
|
13
|
+
page_count: int = Field(..., description="Number of pages processed")
|
14
|
+
credits: float = Field(..., description="Credits consumed for processing")
|
15
|
+
|
16
|
+
|
17
|
+
class ParseRequest(BaseModel):
|
18
|
+
"""Request model for document parsing."""
|
19
|
+
|
20
|
+
document: MIMEData = Field(..., description="Document to parse")
|
21
|
+
fast_mode: bool = Field(default=False, description="Use fast mode for parsing (may reduce quality)")
|
22
|
+
table_parsing_format: TableParsingFormat = Field(default="html", description="Format for parsing tables")
|
23
|
+
image_resolution_dpi: int = Field(default=72, description="DPI for image processing")
|
24
|
+
browser_canvas: BrowserCanvas = Field(default="A4", description="Canvas size for document rendering")
|
25
|
+
|
26
|
+
|
27
|
+
class ParseResult(BaseModel):
|
28
|
+
"""Result of document parsing."""
|
29
|
+
|
30
|
+
document: BaseMIMEData = Field(..., description="Processed document metadata")
|
31
|
+
usage: RetabUsage = Field(..., description="Processing usage information")
|
32
|
+
pages: list[str] = Field(..., description="Text content of each page")
|
retab/types/extractions.py
CHANGED
@@ -6,8 +6,8 @@ from openai.types.chat import ChatCompletion
|
|
6
6
|
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
7
7
|
from pydantic import BaseModel, Field, computed_field, model_validator
|
8
8
|
|
9
|
-
from
|
10
|
-
from
|
9
|
+
from retab.types.chat import ChatCompletionRetabMessage
|
10
|
+
from retab.types.documents.extractions import RetabParsedChatCompletion
|
11
11
|
|
12
12
|
from .._utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
|
13
13
|
from .ai_models import Amount
|
@@ -35,7 +35,7 @@ class ExtractionTimingStep(BaseModel):
|
|
35
35
|
|
36
36
|
class Extraction(BaseModel):
|
37
37
|
id: str = Field(default_factory=lambda: "extr_" + nanoid.generate(), description="Unique identifier of the analysis")
|
38
|
-
messages: list[
|
38
|
+
messages: list[ChatCompletionRetabMessage] = Field(default_factory=list)
|
39
39
|
messages_gcs: str = Field(..., description="GCS path to the messages")
|
40
40
|
file_gcs_paths: list[str] = Field(..., description="GCS paths to the files")
|
41
41
|
file_ids: list[str] = Field(..., description="IDs of the files")
|
@@ -44,7 +44,7 @@ class Extraction(BaseModel):
|
|
44
44
|
file_id: str = Field(default="", description="ID of the first file (deprecated)")
|
45
45
|
|
46
46
|
status: Literal["success", "failed"] = Field(..., description="Whether the analysis was successful")
|
47
|
-
completion:
|
47
|
+
completion: RetabParsedChatCompletion | ChatCompletion = Field(..., description="Response generated by the analysis")
|
48
48
|
json_schema: Any = Field(..., description="Response format (JSON Schema or pydantic_v2.BaseModel)")
|
49
49
|
model: str = Field(..., description="Model used for the analysis")
|
50
50
|
temperature: float = Field(default=0.0, description="Temperature used for the analysis")
|
retab/types/logs.py
CHANGED
@@ -11,7 +11,7 @@ from .._utils.json_schema import compute_schema_data_id
|
|
11
11
|
from .._utils.mime import generate_blake2b_hash_from_string
|
12
12
|
from .._utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
|
13
13
|
from .ai_models import Amount
|
14
|
-
from .documents.extractions import
|
14
|
+
from .documents.extractions import RetabParsedChatCompletion
|
15
15
|
from .mime import BaseMIMEData
|
16
16
|
from .modalities import Modality
|
17
17
|
from .pagination import ListMetadata
|
@@ -199,7 +199,7 @@ class AutomationLog(BaseModel):
|
|
199
199
|
organization_id: str
|
200
200
|
created_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(datetime.timezone.utc))
|
201
201
|
automation_snapshot: AutomationConfig
|
202
|
-
completion:
|
202
|
+
completion: RetabParsedChatCompletion | ChatCompletion
|
203
203
|
file_metadata: Optional[BaseMIMEData]
|
204
204
|
external_request_log: Optional[ExternalRequestLog]
|
205
205
|
extraction_id: Optional[str] = Field(default=None, description="ID of the extraction")
|
retab/types/schemas/object.py
CHANGED
@@ -27,7 +27,7 @@ from ..._utils.json_schema import (
|
|
27
27
|
)
|
28
28
|
from ..._utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
|
29
29
|
from ...types.standards import StreamingBaseModel
|
30
|
-
from ..chat import
|
30
|
+
from ..chat import ChatCompletionRetabMessage
|
31
31
|
|
32
32
|
|
33
33
|
class PartialSchema(BaseModel):
|
@@ -609,8 +609,8 @@ You can easily identify the fields that require a source by the `quote___[attrib
|
|
609
609
|
return data
|
610
610
|
|
611
611
|
@property
|
612
|
-
def messages(self) -> list[
|
613
|
-
return [
|
612
|
+
def messages(self) -> list[ChatCompletionRetabMessage]:
|
613
|
+
return [ChatCompletionRetabMessage(role="developer", content=self.system_prompt)]
|
614
614
|
|
615
615
|
@model_validator(mode="after")
|
616
616
|
def model_after_validator(self) -> Self:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: retab
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.39
|
4
4
|
Summary: Retab official python library
|
5
5
|
Home-page: https://github.com/Retab-dev/retab
|
6
6
|
Author: Retab
|
@@ -41,18 +41,18 @@ Requires-Dist: tiktoken
|
|
41
41
|
Requires-Dist: truststore
|
42
42
|
Requires-Dist: ruff
|
43
43
|
|
44
|
-
#
|
44
|
+
# Retab
|
45
45
|
|
46
46
|
<div align="center" style="margin-bottom: 1em;">
|
47
47
|
|
48
|
-
<img src="https://raw.githubusercontent.com/
|
48
|
+
<img src="https://raw.githubusercontent.com/Retab/retab/refs/heads/main/assets/retab-logo.png" alt="Retab Logo" width="150">
|
49
49
|
|
50
50
|
|
51
51
|
*The AI Automation Platform*
|
52
52
|
|
53
|
-
Made with love by the team at [
|
53
|
+
Made with love by the team at [Retab](https://retab.dev) 🤍.
|
54
54
|
|
55
|
-
[Our Website](https://
|
55
|
+
[Our Website](https://retab.dev) | [Documentation](https://docs.retab.dev/get-started/introduction) | [Discord](https://discord.com/invite/vc5tWRPqag) | [Twitter](https://x.com/retabdev)
|
56
56
|
|
57
57
|
|
58
58
|
</div>
|
@@ -61,16 +61,16 @@ Made with love by the team at [UiForm](https://uiform.com) 🤍.
|
|
61
61
|
|
62
62
|
## How It Works
|
63
63
|
|
64
|
-
|
64
|
+
Retab allows you to easily create document processing automations. Here is the general workflow:
|
65
65
|
|
66
66
|
```mermaid
|
67
67
|
sequenceDiagram
|
68
|
-
User ->>
|
69
|
-
|
70
|
-
|
71
|
-
AI Provider -->>
|
72
|
-
|
73
|
-
|
68
|
+
User ->> Retab: File Upload
|
69
|
+
Retab -->> Retab: Preprocessing
|
70
|
+
Retab ->> AI Provider: Request on your behalf
|
71
|
+
AI Provider -->> Retab: Structured Generation
|
72
|
+
Retab ->> Webhook: Send result
|
73
|
+
Retab ->> User: Send Confirmation
|
74
74
|
```
|
75
75
|
|
76
76
|
---
|
@@ -95,17 +95,17 @@ You come with your own API key from your favorite AI provider, and we handle the
|
|
95
95
|
We currently support [OpenAI](https://platform.openai.com/docs/overview), [Anthropic](https://www.anthropic.com/api), [Gemini](https://aistudio.google.com/prompts/new_chat) and [xAI](https://x.ai/api) models.
|
96
96
|
|
97
97
|
<p align="center">
|
98
|
-
<img src="https://raw.githubusercontent.com/
|
98
|
+
<img src="https://raw.githubusercontent.com/Retab/retab/refs/heads/main/assets/supported_models.png" alt="Supported Models" width="600">
|
99
99
|
</p>
|
100
100
|
|
101
101
|
---
|
102
102
|
|
103
103
|
## Quickstart
|
104
104
|
|
105
|
-
Explore our [Playground](https://www.
|
105
|
+
Explore our [Playground](https://www.retab.dev/dashboard/playground) and create your first automations easily 🚀!
|
106
106
|
|
107
107
|
<p align="center">
|
108
|
-
<img src="https://raw.githubusercontent.com/
|
108
|
+
<img src="https://raw.githubusercontent.com/Retab/retab/refs/heads/main/assets/retab-playground.png" alt="Retab Playground" width="600">
|
109
109
|
</p>
|
110
110
|
|
111
111
|
---
|
@@ -114,10 +114,10 @@ Explore our [Playground](https://www.uiform.com/dashboard/playground) and create
|
|
114
114
|
|
115
115
|
## Dev Mode 🔧
|
116
116
|
|
117
|
-
You need more control? You can access the [Documentation](https://docs.
|
117
|
+
You need more control? You can access the [Documentation](https://docs.retab.dev/get-started/introduction) of our **Python SDK**.
|
118
118
|
|
119
119
|
1. **Setup the Python SDK**
|
120
|
-
> Install the
|
120
|
+
> Install the Retab Python SDK and configure your API keys to start processing documents with your preferred AI provider.
|
121
121
|
|
122
122
|
2. **Create your JSON schema**
|
123
123
|
> Define the structure of the data you want to extract from your documents using our schema format with custom prompting capabilities.
|
@@ -133,32 +133,32 @@ You need more control? You can access the [Documentation](https://docs.uiform.co
|
|
133
133
|
|
134
134
|
### Step 1: Setup of the Python SDK
|
135
135
|
|
136
|
-
To get started, install the `
|
136
|
+
To get started, install the `retab` package using pip:
|
137
137
|
|
138
138
|
```bash
|
139
|
-
pip install
|
139
|
+
pip install retab
|
140
140
|
```
|
141
141
|
|
142
|
-
Then, [create your API key on
|
142
|
+
Then, [create your API key on retab.dev](https://www.retab.dev).
|
143
143
|
|
144
144
|
Create another API key by you favorite API key provider.
|
145
145
|
|
146
146
|
**Reminder**: We currently support [OpenAI](https://platform.openai.com/docs/overview), [Anthropic](https://www.anthropic.com/api), [Gemini](https://aistudio.google.com/prompts/new_chat) and [xAI](https://x.ai/api) models.
|
147
147
|
|
148
|
-
As we will use your API key to make requests to OpenAI on your behalf within an automation, you need to store your API key in the
|
148
|
+
As we will use your API key to make requests to OpenAI on your behalf within an automation, you need to store your API key in the Retab secrets manager:
|
149
149
|
|
150
150
|
```
|
151
151
|
OPENAI_API_KEY=sk-xxxxxxxxx
|
152
|
-
|
152
|
+
RETAB_API_KEY=sk_retab_xxxxxxxxx
|
153
153
|
```
|
154
154
|
|
155
155
|
```bash
|
156
|
-
import
|
156
|
+
import retab
|
157
157
|
import os
|
158
158
|
|
159
|
-
|
159
|
+
reclient = retab.Retab()
|
160
160
|
|
161
|
-
|
161
|
+
reclient.secrets.external_api_keys.create(
|
162
162
|
provider="OpenAI",
|
163
163
|
api_key=os.getenv("OPENAI_API_KEY")
|
164
164
|
)
|
@@ -167,14 +167,14 @@ uiclient.secrets.external_api_keys.create(
|
|
167
167
|
#### Process your first document with the create_messages method:
|
168
168
|
|
169
169
|
```bash
|
170
|
-
from
|
170
|
+
from retab import Retab
|
171
171
|
from openai import OpenAI
|
172
172
|
|
173
|
-
# Initialize
|
174
|
-
|
173
|
+
# Initialize Retab client
|
174
|
+
reclient = Retab()
|
175
175
|
|
176
176
|
# Convert any document into LLM-ready format
|
177
|
-
doc_msg =
|
177
|
+
doc_msg = reclient.documents.create_messages(
|
178
178
|
document = "invoice.pdf" # Works with PDFs, Excel, emails, etc.
|
179
179
|
)
|
180
180
|
|
@@ -196,10 +196,10 @@ We use a standard JSON Schema with custom annotations (`X-SystemPrompt`, `X-Fiel
|
|
196
196
|
|
197
197
|
These annotations help guide the LLM’s behavior and improve extraction accuracy.
|
198
198
|
|
199
|
-
You can learn more about these in our [JSON Schema documentation](https://docs.
|
199
|
+
You can learn more about these in our [JSON Schema documentation](https://docs.retab.dev/get-started/prompting-with-the-JSON-schema).
|
200
200
|
|
201
201
|
```bash
|
202
|
-
from
|
202
|
+
from retab import Retab
|
203
203
|
from openai import OpenAI
|
204
204
|
from pydantic import BaseModel, Field, ConfigDict
|
205
205
|
|
@@ -225,11 +225,11 @@ class Invoice(BaseModel):
|
|
225
225
|
)
|
226
226
|
|
227
227
|
# Process document and extract data
|
228
|
-
|
229
|
-
doc_msg =
|
228
|
+
reclient = Retab()
|
229
|
+
doc_msg = reclient.documents.create_messages(
|
230
230
|
document = "invoice.pdf"
|
231
231
|
)
|
232
|
-
schema_obj =
|
232
|
+
schema_obj = reclient.schemas.load(
|
233
233
|
pydantic_model = Invoice
|
234
234
|
)
|
235
235
|
|
@@ -244,7 +244,7 @@ completion = client.beta.chat.completions.parse(
|
|
244
244
|
print("Extracted data:", completion.choices[0].message.parsed)
|
245
245
|
|
246
246
|
# Validate the response against the original schema if you want to remove the reasoning fields
|
247
|
-
from
|
247
|
+
from retab._utils.json_schema import filter_auxiliary_fields_json
|
248
248
|
assert completion.choices[0].message.content is not None
|
249
249
|
extraction = schema_obj.pydantic_model.model_validate(
|
250
250
|
filter_auxiliary_fields_json(completion.choices[0].message.content, schema_obj.pydantic_model)
|
@@ -262,7 +262,7 @@ Below is an example of a simple FastAPI application with a webhook endpoint:
|
|
262
262
|
```bash
|
263
263
|
from fastapi import FastAPI, Request
|
264
264
|
from fastapi.responses import JSONResponse
|
265
|
-
from
|
265
|
+
from retab.types.automations.webhooks import WebhookRequest
|
266
266
|
from pydantic import BaseModel, Field, ConfigDict
|
267
267
|
|
268
268
|
app = FastAPI()
|
@@ -293,53 +293,53 @@ curl -X POST "http://localhost:8000/webhook" \
|
|
293
293
|
|
294
294
|
### Step 4: Create your automation
|
295
295
|
|
296
|
-
Finally, integrate the webhook with your automation system using the `
|
296
|
+
Finally, integrate the webhook with your automation system using the `retab` client.
|
297
297
|
|
298
298
|
This example demonstrates how to create an automation that triggers the webhook when a matching event occurs:
|
299
299
|
|
300
300
|
```bash
|
301
|
-
from
|
301
|
+
from retab import Retab
|
302
302
|
|
303
|
-
# Initialize the
|
304
|
-
|
303
|
+
# Initialize the Retab client
|
304
|
+
reclient = Retab()
|
305
305
|
|
306
306
|
# Create an automation that uses the webhook URL from Step 2
|
307
|
-
automation =
|
308
|
-
email="invoices@mailbox.
|
307
|
+
automation = reclient.processors.automations.mailboxes.create(
|
308
|
+
email="invoices@mailbox.retab.dev",
|
309
309
|
model="gpt-4.1-nano",
|
310
310
|
json_schema=Invoice.model_json_schema(), # use the pydantic model to create the json schema
|
311
311
|
webhook_url="https://your-server.com/webhook", # Replace with your actual webhook URL
|
312
312
|
)
|
313
313
|
```
|
314
314
|
|
315
|
-
At any email sent to `invoices@mailbox.
|
315
|
+
At any email sent to `invoices@mailbox.retab.dev`, the automation will send a POST request to your FastAPI webhook endpoint, where the payload can be processed.
|
316
316
|
|
317
|
-
You can see the automation you just created on your [dashboard](https://www.
|
317
|
+
You can see the automation you just created on your [dashboard](https://www.retab.dev/dashboard/processors)!
|
318
318
|
|
319
319
|
### Step 5: Test your automation
|
320
320
|
|
321
321
|
Finally, you can test the automation rapidly with the test functions of the sdk:
|
322
322
|
|
323
323
|
```bash
|
324
|
-
from
|
324
|
+
from retab import Retab
|
325
325
|
|
326
|
-
# Initialize the
|
327
|
-
|
326
|
+
# Initialize the Retab client
|
327
|
+
reclient = Retab()
|
328
328
|
|
329
329
|
# If you just want to send a test request to your webhook
|
330
|
-
log =
|
331
|
-
email="test-mailbox-local@devmail.
|
330
|
+
log = reclient.processors.automations.mailboxes.tests.webhook(
|
331
|
+
email="test-mailbox-local@devmail.retab.dev",
|
332
332
|
)
|
333
333
|
|
334
334
|
# If you want to test the file processing logic:
|
335
|
-
log =
|
336
|
-
email="test-mailbox-local@devmail.
|
335
|
+
log = reclient.processors.automations.mailboxes.tests.process(
|
336
|
+
email="test-mailbox-local@devmail.retab.dev",
|
337
337
|
document="your_invoice_email.eml"
|
338
338
|
)
|
339
339
|
|
340
340
|
# If you want to test a full email forwarding
|
341
|
-
log =
|
342
|
-
email="
|
341
|
+
log = reclient.processors.automations.mailboxes.tests.forward(
|
342
|
+
email="retab-quickstart@mailbox.retab.dev",
|
343
343
|
document="your_invoice_email.eml"
|
344
344
|
)
|
345
345
|
```
|
@@ -347,33 +347,33 @@ log = uiclient.processors.automations.mailboxes.tests.forward(
|
|
347
347
|
> 💡 **Tip:** You can also test your webhook locally by overriding the webhook URL set in the automation.
|
348
348
|
|
349
349
|
```bash
|
350
|
-
from
|
350
|
+
from retab import Retab
|
351
351
|
|
352
|
-
|
352
|
+
reclient = Retab()
|
353
353
|
|
354
354
|
# If you just want to send a test request to your webhook
|
355
|
-
log =
|
356
|
-
email="test-mailbox-local@devmail.
|
355
|
+
log = reclient.processors.automations.mailboxes.tests.webhook(
|
356
|
+
email="test-mailbox-local@devmail.retab.dev",
|
357
357
|
webhook_url="http://localhost:8000/webhook" # If you want to try your webhook locally, you can override the webhook url set in the automation
|
358
358
|
)
|
359
359
|
```
|
360
360
|
|
361
361
|
And that's it! You can start processing documents at scale!
|
362
|
-
You have 1000 free requests to get started, and you can [subscribe](https://www.
|
362
|
+
You have 1000 free requests to get started, and you can [subscribe](https://www.retab.dev) to the pro plan to get more.
|
363
363
|
|
364
364
|
But this minimalistic example is just the beginning.
|
365
365
|
|
366
|
-
Continue reading to learn more about how to use
|
366
|
+
Continue reading to learn more about how to use Retab **to its full potential** 🔥.
|
367
367
|
|
368
368
|
---
|
369
369
|
|
370
370
|
## Go further
|
371
371
|
|
372
|
-
- [Prompt Engineering Guide](https://docs.
|
373
|
-
- [General Concepts](https://docs.
|
374
|
-
- [Consensus](https://docs.
|
375
|
-
- [Create mailboxes](https://docs.
|
376
|
-
- [Create links](https://docs.
|
372
|
+
- [Prompt Engineering Guide](https://docs.retab.dev/get-started/prompting-with-the-json-schema)
|
373
|
+
- [General Concepts](https://docs.retab.dev/get-started/General-Concepts)
|
374
|
+
- [Consensus](https://docs.retab.dev/SDK/General-Concepts#consensus)
|
375
|
+
- [Create mailboxes](https://docs.retab.dev/SDK/Automations#mailbox)
|
376
|
+
- [Create links](https://docs.retab.dev/SDK/Automations#link)
|
377
377
|
- Finetuning (coming soon)
|
378
378
|
- Prompt optimization (coming soon)
|
379
379
|
- Data-Labelling with our AI-powered annotator (coming soon)
|
@@ -382,12 +382,12 @@ Continue reading to learn more about how to use UiForm **to its full potential**
|
|
382
382
|
|
383
383
|
## Jupyter Notebooks
|
384
384
|
|
385
|
-
You can view minimal notebooks that demonstrate how to use
|
385
|
+
You can view minimal notebooks that demonstrate how to use Retab to process documents:
|
386
386
|
|
387
|
-
- [Mailbox creation quickstart](https://github.com/
|
388
|
-
- [Upload Links creation quickstart](https://github.com/
|
389
|
-
- [Document Extractions quickstart](https://github.com/
|
390
|
-
- [Document Extractions quickstart - Async](https://github.com/
|
387
|
+
- [Mailbox creation quickstart](https://github.com/Retab-dev/retab/blob/main/notebooks/mailboxes_quickstart.ipynb)
|
388
|
+
- [Upload Links creation quickstart](https://github.com/Retab-dev/retab/blob/main/notebooks/links_quickstart.ipynb)
|
389
|
+
- [Document Extractions quickstart](https://github.com/Retab-dev/retab/blob/main/notebooks/Quickstart.ipynb)
|
390
|
+
- [Document Extractions quickstart - Async](https://github.com/Retab-dev/retab/blob/main/notebooks/Quickstart-Async.ipynb)
|
391
391
|
|
392
392
|
---
|
393
393
|
|
@@ -395,12 +395,12 @@ You can view minimal notebooks that demonstrate how to use UiForm to process doc
|
|
395
395
|
|
396
396
|
Let's create the future of document processing together!
|
397
397
|
|
398
|
-
Join our [discord community](https://discord.com/invite/vc5tWRPqag) to share tips, discuss best practices, and showcase what you build. Or just [tweet](https://x.com/
|
398
|
+
Join our [discord community](https://discord.com/invite/vc5tWRPqag) to share tips, discuss best practices, and showcase what you build. Or just [tweet](https://x.com/retabdev) at us.
|
399
399
|
|
400
|
-
We can't wait to see how you'll use
|
400
|
+
We can't wait to see how you'll use Retab.
|
401
401
|
|
402
402
|
- [Discord](https://discord.com/invite/vc5tWRPqag)
|
403
|
-
- [Twitter](https://x.com/
|
403
|
+
- [Twitter](https://x.com/retabdev)
|
404
404
|
|
405
405
|
|
406
406
|
## Roadmap
|