retab 0.0.37__py3-none-any.whl → 0.0.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab/__init__.py +2 -2
- retab/_resource.py +5 -5
- retab/_utils/chat.py +20 -20
- retab/_utils/responses.py +7 -7
- retab/_utils/usage/usage.py +3 -3
- retab/client.py +22 -22
- retab/resources/consensus/client.py +2 -2
- retab/resources/consensus/completions.py +12 -12
- retab/resources/consensus/completions_stream.py +9 -9
- retab/resources/consensus/responses.py +6 -6
- retab/resources/consensus/responses_stream.py +10 -10
- retab/resources/documents/client.py +201 -15
- retab/resources/documents/extractions.py +17 -17
- retab/resources/jsonlUtils.py +5 -5
- retab/resources/processors/automations/endpoints.py +2 -2
- retab/resources/processors/automations/links.py +2 -2
- retab/resources/processors/automations/logs.py +2 -2
- retab/resources/processors/automations/mailboxes.py +2 -2
- retab/resources/processors/automations/outlook.py +2 -2
- retab/resources/processors/client.py +2 -2
- retab/resources/usage.py +4 -4
- retab/types/ai_models.py +4 -4
- retab/types/automations/mailboxes.py +1 -1
- retab/types/automations/webhooks.py +1 -1
- retab/types/chat.py +1 -1
- retab/types/completions.py +3 -3
- retab/types/documents/create_messages.py +2 -2
- retab/types/documents/extractions.py +2 -2
- retab/types/extractions.py +3 -3
- retab/types/schemas/object.py +3 -3
- {retab-0.0.37.dist-info → retab-0.0.38.dist-info}/METADATA +72 -72
- {retab-0.0.37.dist-info → retab-0.0.38.dist-info}/RECORD +34 -34
- {retab-0.0.37.dist-info → retab-0.0.38.dist-info}/WHEEL +0 -0
- {retab-0.0.37.dist-info → retab-0.0.38.dist-info}/top_level.txt +0 -0
@@ -149,7 +149,7 @@ class Outlooks(SyncAPIResource, OutlooksMixin):
|
|
149
149
|
)
|
150
150
|
response = self._client._prepared_request(request)
|
151
151
|
|
152
|
-
print(f"Outlook automation created. Outlook available at https://www.
|
152
|
+
print(f"Outlook automation created. Outlook available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
153
153
|
|
154
154
|
return Outlook.model_validate(response)
|
155
155
|
|
@@ -280,7 +280,7 @@ class AsyncOutlooks(AsyncAPIResource, OutlooksMixin):
|
|
280
280
|
fetch_params=fetch_params,
|
281
281
|
)
|
282
282
|
response = await self._client._prepared_request(request)
|
283
|
-
print(f"Outlook automation created. Outlook available at https://www.
|
283
|
+
print(f"Outlook automation created. Outlook available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
284
284
|
return Outlook.model_validate(response)
|
285
285
|
|
286
286
|
async def list(
|
@@ -239,7 +239,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
|
|
239
239
|
n_consensus=n_consensus,
|
240
240
|
)
|
241
241
|
response = self._client._prepared_request(request)
|
242
|
-
print(f"Processor ID: {response['id']}. Processor available at https://www.
|
242
|
+
print(f"Processor ID: {response['id']}. Processor available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
243
243
|
return ProcessorConfig.model_validate(response)
|
244
244
|
|
245
245
|
def list(
|
@@ -399,7 +399,7 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
|
|
399
399
|
n_consensus=n_consensus,
|
400
400
|
)
|
401
401
|
response = await self._client._prepared_request(request)
|
402
|
-
print(f"Processor ID: {response['id']}. Processor available at https://www.
|
402
|
+
print(f"Processor ID: {response['id']}. Processor available at https://www.retab.dev/dashboard/processors/{response['id']}")
|
403
403
|
|
404
404
|
return ProcessorConfig.model_validate(response)
|
405
405
|
|
retab/resources/usage.py
CHANGED
@@ -89,7 +89,7 @@ class Usage(SyncAPIResource, UsageMixin):
|
|
89
89
|
dict: Monthly usage data including credits consumed and limits
|
90
90
|
|
91
91
|
Raises:
|
92
|
-
|
92
|
+
RetabAPIError: If the API request fails
|
93
93
|
"""
|
94
94
|
request = self.prepare_monthly_credits_usage()
|
95
95
|
response = self._client._request(request.method, request.url, request.data, request.params)
|
@@ -176,7 +176,7 @@ class Usage(SyncAPIResource, UsageMixin):
|
|
176
176
|
],
|
177
177
|
response_format=CalendarEvent,
|
178
178
|
)
|
179
|
-
|
179
|
+
reclient.usage.log(
|
180
180
|
response_format=CalendarEvent,
|
181
181
|
completion=completion
|
182
182
|
)
|
@@ -203,7 +203,7 @@ class AsyncUsage(AsyncAPIResource, UsageMixin):
|
|
203
203
|
dict: Monthly usage data including credits consumed and limits
|
204
204
|
|
205
205
|
Raises:
|
206
|
-
|
206
|
+
RetabAPIError: If the API request fails
|
207
207
|
"""
|
208
208
|
request = self.prepare_monthly_credits_usage()
|
209
209
|
response = await self._client._request(request.method, request.url, request.data, request.params)
|
@@ -290,7 +290,7 @@ class AsyncUsage(AsyncAPIResource, UsageMixin):
|
|
290
290
|
],
|
291
291
|
response_format=CalendarEvent,
|
292
292
|
)
|
293
|
-
|
293
|
+
reclient.usage.log(
|
294
294
|
response_format=CalendarEvent,
|
295
295
|
completion=completion
|
296
296
|
)
|
retab/types/ai_models.py
CHANGED
@@ -5,7 +5,7 @@ from pydantic import BaseModel, Field
|
|
5
5
|
|
6
6
|
from .inference_settings import InferenceSettings
|
7
7
|
|
8
|
-
AIProvider = Literal["OpenAI", "Gemini", "xAI", "
|
8
|
+
AIProvider = Literal["OpenAI", "Gemini", "xAI", "Retab"] # , "Anthropic", "xAI"]
|
9
9
|
OpenAICompatibleProvider = Literal["OpenAI", "xAI"] # , "xAI"]
|
10
10
|
GeminiModel = Literal[
|
11
11
|
"gemini-2.5-pro-preview-06-05",
|
@@ -52,8 +52,8 @@ OpenAIModel = Literal[
|
|
52
52
|
"gpt-4o-mini-realtime-preview-2024-12-17",
|
53
53
|
]
|
54
54
|
xAI_Model = Literal["grok-3-beta", "grok-3-mini-beta"]
|
55
|
-
|
56
|
-
LLMModel = Literal[OpenAIModel, "human", AnthropicModel, xAI_Model, GeminiModel,
|
55
|
+
RetabModel = Literal["auto", "auto-small"]
|
56
|
+
LLMModel = Literal[OpenAIModel, "human", AnthropicModel, xAI_Model, GeminiModel, RetabModel]
|
57
57
|
|
58
58
|
|
59
59
|
class FinetunedModel(BaseModel):
|
@@ -631,7 +631,7 @@ def get_model_card(model: str) -> ModelCard:
|
|
631
631
|
Raises:
|
632
632
|
ValueError: If no model card is found for the specified model
|
633
633
|
"""
|
634
|
-
# Extract base model name for fine-tuned models like "ft:gpt-4o:
|
634
|
+
# Extract base model name for fine-tuned models like "ft:gpt-4o:retab:4389573"
|
635
635
|
if model.startswith("ft:"):
|
636
636
|
# Split by colon and take the second part (index 1) which contains the base model
|
637
637
|
parts = model.split(":")
|
@@ -17,7 +17,7 @@ class Mailbox(AutomationConfig):
|
|
17
17
|
def object(self) -> str:
|
18
18
|
return "automation.mailbox"
|
19
19
|
|
20
|
-
EMAIL_PATTERN: ClassVar[str] = f".*@{os.getenv('EMAIL_DOMAIN', 'mailbox.
|
20
|
+
EMAIL_PATTERN: ClassVar[str] = f".*@{os.getenv('EMAIL_DOMAIN', 'mailbox.retab.dev')}$"
|
21
21
|
id: str = Field(default_factory=lambda: "mb_" + nanoid.generate(), description="Unique identifier for the mailbox")
|
22
22
|
|
23
23
|
# Email Specific config
|
@@ -2,7 +2,7 @@ from typing import Any, Optional
|
|
2
2
|
|
3
3
|
from pydantic import BaseModel, EmailStr
|
4
4
|
|
5
|
-
from
|
5
|
+
from retab.types.documents.extractions import UiParsedChatCompletion
|
6
6
|
|
7
7
|
from ..mime import BaseMIMEData, MIMEData
|
8
8
|
|
retab/types/chat.py
CHANGED
@@ -3,6 +3,6 @@ from typing import Literal, TypedDict, Union
|
|
3
3
|
from openai.types.chat.chat_completion_content_part_param import ChatCompletionContentPartParam
|
4
4
|
|
5
5
|
|
6
|
-
class
|
6
|
+
class ChatCompletionRetabMessage(TypedDict): # homemade replacement for ChatCompletionMessageParam because iterable messes the serialization with pydantic
|
7
7
|
role: Literal["user", "system", "assistant", "developer"]
|
8
8
|
content: Union[str, list[ChatCompletionContentPartParam]]
|
retab/types/completions.py
CHANGED
@@ -9,13 +9,13 @@ from pydantic import BaseModel, ConfigDict, Field
|
|
9
9
|
|
10
10
|
from .._utils.ai_models import find_provider_from_model
|
11
11
|
from .ai_models import AIProvider
|
12
|
-
from .chat import
|
12
|
+
from .chat import ChatCompletionRetabMessage
|
13
13
|
|
14
14
|
|
15
15
|
class UiChatCompletionsRequest(BaseModel):
|
16
16
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
17
17
|
model: str = Field(..., description="Model used for chat completion")
|
18
|
-
messages: list[
|
18
|
+
messages: list[ChatCompletionRetabMessage] = Field(..., description="Messages to be parsed")
|
19
19
|
response_format: ResponseFormatJSONSchema = Field(..., description="response format used to validate the output data.")
|
20
20
|
temperature: float = Field(default=0.0, description="Temperature for sampling. If not provided, the default temperature for the model will be used.", examples=[0.0])
|
21
21
|
reasoning_effort: ChatCompletionReasoningEffort = Field(
|
@@ -40,7 +40,7 @@ class UiChatCompletionsRequest(BaseModel):
|
|
40
40
|
class UiChatCompletionsParseRequest(BaseModel):
|
41
41
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
42
42
|
model: str = Field(..., description="Model used for chat completion")
|
43
|
-
messages: list[
|
43
|
+
messages: list[ChatCompletionRetabMessage] = Field(..., description="Messages to be parsed")
|
44
44
|
json_schema: dict[str, Any] = Field(..., description="JSON schema format used to validate the output data.")
|
45
45
|
temperature: float = Field(default=0.0, description="Temperature for sampling. If not provided, the default temperature for the model will be used.", examples=[0.0])
|
46
46
|
reasoning_effort: ChatCompletionReasoningEffort = Field(
|
@@ -14,7 +14,7 @@ from ..._utils.chat import convert_to_anthropic_format, convert_to_google_genai_
|
|
14
14
|
from ..._utils.chat import convert_to_openai_format as convert_to_openai_completions_api_format
|
15
15
|
from ..._utils.display import count_image_tokens, count_text_tokens
|
16
16
|
from ..._utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
|
17
|
-
from ..chat import
|
17
|
+
from ..chat import ChatCompletionRetabMessage
|
18
18
|
from ..mime import MIMEData
|
19
19
|
from ..modalities import Modality
|
20
20
|
from ..browser_canvas import BrowserCanvas
|
@@ -44,7 +44,7 @@ class DocumentCreateInputRequest(DocumentCreateMessageRequest):
|
|
44
44
|
class DocumentMessage(BaseModel):
|
45
45
|
id: str = Field(description="A unique identifier for the document loading.")
|
46
46
|
object: Literal["document_message"] = Field(default="document_message", description="The type of object being loaded.")
|
47
|
-
messages: List[
|
47
|
+
messages: List[ChatCompletionRetabMessage] = Field(description="A list of messages containing the document content and metadata.")
|
48
48
|
created: int = Field(description="The Unix timestamp (in seconds) of when the document was loaded.")
|
49
49
|
modality: Modality = Field(description="The modality of the document to load.")
|
50
50
|
|
@@ -17,7 +17,7 @@ from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, computed_fiel
|
|
17
17
|
|
18
18
|
from ..._utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
|
19
19
|
from ..ai_models import Amount
|
20
|
-
from ..chat import
|
20
|
+
from ..chat import ChatCompletionRetabMessage
|
21
21
|
from ..mime import MIMEData
|
22
22
|
from ..modalities import Modality
|
23
23
|
from ..browser_canvas import BrowserCanvas
|
@@ -146,7 +146,7 @@ class UiResponse(Response):
|
|
146
146
|
|
147
147
|
|
148
148
|
class LogExtractionRequest(BaseModel):
|
149
|
-
messages: list[
|
149
|
+
messages: list[ChatCompletionRetabMessage] | None = None # TODO: compatibility with Anthropic
|
150
150
|
openai_messages: list[ChatCompletionMessageParam] | None = None
|
151
151
|
openai_responses_input: list[ResponseInputItemParam] | None = None
|
152
152
|
anthropic_messages: list[MessageParam] | None = None
|
retab/types/extractions.py
CHANGED
@@ -6,8 +6,8 @@ from openai.types.chat import ChatCompletion
|
|
6
6
|
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
7
7
|
from pydantic import BaseModel, Field, computed_field, model_validator
|
8
8
|
|
9
|
-
from
|
10
|
-
from
|
9
|
+
from retab.types.chat import ChatCompletionRetabMessage
|
10
|
+
from retab.types.documents.extractions import UiParsedChatCompletion
|
11
11
|
|
12
12
|
from .._utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
|
13
13
|
from .ai_models import Amount
|
@@ -35,7 +35,7 @@ class ExtractionTimingStep(BaseModel):
|
|
35
35
|
|
36
36
|
class Extraction(BaseModel):
|
37
37
|
id: str = Field(default_factory=lambda: "extr_" + nanoid.generate(), description="Unique identifier of the analysis")
|
38
|
-
messages: list[
|
38
|
+
messages: list[ChatCompletionRetabMessage] = Field(default_factory=list)
|
39
39
|
messages_gcs: str = Field(..., description="GCS path to the messages")
|
40
40
|
file_gcs_paths: list[str] = Field(..., description="GCS paths to the files")
|
41
41
|
file_ids: list[str] = Field(..., description="IDs of the files")
|
retab/types/schemas/object.py
CHANGED
@@ -27,7 +27,7 @@ from ..._utils.json_schema import (
|
|
27
27
|
)
|
28
28
|
from ..._utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
|
29
29
|
from ...types.standards import StreamingBaseModel
|
30
|
-
from ..chat import
|
30
|
+
from ..chat import ChatCompletionRetabMessage
|
31
31
|
|
32
32
|
|
33
33
|
class PartialSchema(BaseModel):
|
@@ -609,8 +609,8 @@ You can easily identify the fields that require a source by the `quote___[attrib
|
|
609
609
|
return data
|
610
610
|
|
611
611
|
@property
|
612
|
-
def messages(self) -> list[
|
613
|
-
return [
|
612
|
+
def messages(self) -> list[ChatCompletionRetabMessage]:
|
613
|
+
return [ChatCompletionRetabMessage(role="developer", content=self.system_prompt)]
|
614
614
|
|
615
615
|
@model_validator(mode="after")
|
616
616
|
def model_after_validator(self) -> Self:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: retab
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.38
|
4
4
|
Summary: Retab official python library
|
5
5
|
Home-page: https://github.com/Retab-dev/retab
|
6
6
|
Author: Retab
|
@@ -41,18 +41,18 @@ Requires-Dist: tiktoken
|
|
41
41
|
Requires-Dist: truststore
|
42
42
|
Requires-Dist: ruff
|
43
43
|
|
44
|
-
#
|
44
|
+
# Retab
|
45
45
|
|
46
46
|
<div align="center" style="margin-bottom: 1em;">
|
47
47
|
|
48
|
-
<img src="https://raw.githubusercontent.com/
|
48
|
+
<img src="https://raw.githubusercontent.com/Retab/retab/refs/heads/main/assets/retab-logo.png" alt="Retab Logo" width="150">
|
49
49
|
|
50
50
|
|
51
51
|
*The AI Automation Platform*
|
52
52
|
|
53
|
-
Made with love by the team at [
|
53
|
+
Made with love by the team at [Retab](https://retab.dev) 🤍.
|
54
54
|
|
55
|
-
[Our Website](https://
|
55
|
+
[Our Website](https://retab.dev) | [Documentation](https://docs.retab.dev/get-started/introduction) | [Discord](https://discord.com/invite/vc5tWRPqag) | [Twitter](https://x.com/retabAPI)
|
56
56
|
|
57
57
|
|
58
58
|
</div>
|
@@ -61,16 +61,16 @@ Made with love by the team at [UiForm](https://uiform.com) 🤍.
|
|
61
61
|
|
62
62
|
## How It Works
|
63
63
|
|
64
|
-
|
64
|
+
Retab allows you to easily create document processing automations. Here is the general workflow:
|
65
65
|
|
66
66
|
```mermaid
|
67
67
|
sequenceDiagram
|
68
|
-
User ->>
|
69
|
-
|
70
|
-
|
71
|
-
AI Provider -->>
|
72
|
-
|
73
|
-
|
68
|
+
User ->> Retab: File Upload
|
69
|
+
Retab -->> Retab: Preprocessing
|
70
|
+
Retab ->> AI Provider: Request on your behalf
|
71
|
+
AI Provider -->> Retab: Structured Generation
|
72
|
+
Retab ->> Webhook: Send result
|
73
|
+
Retab ->> User: Send Confirmation
|
74
74
|
```
|
75
75
|
|
76
76
|
---
|
@@ -95,17 +95,17 @@ You come with your own API key from your favorite AI provider, and we handle the
|
|
95
95
|
We currently support [OpenAI](https://platform.openai.com/docs/overview), [Anthropic](https://www.anthropic.com/api), [Gemini](https://aistudio.google.com/prompts/new_chat) and [xAI](https://x.ai/api) models.
|
96
96
|
|
97
97
|
<p align="center">
|
98
|
-
<img src="https://raw.githubusercontent.com/
|
98
|
+
<img src="https://raw.githubusercontent.com/Retab/retab/refs/heads/main/assets/supported_models.png" alt="Supported Models" width="600">
|
99
99
|
</p>
|
100
100
|
|
101
101
|
---
|
102
102
|
|
103
103
|
## Quickstart
|
104
104
|
|
105
|
-
Explore our [Playground](https://www.
|
105
|
+
Explore our [Playground](https://www.retab.dev/dashboard/playground) and create your first automations easily 🚀!
|
106
106
|
|
107
107
|
<p align="center">
|
108
|
-
<img src="https://raw.githubusercontent.com/
|
108
|
+
<img src="https://raw.githubusercontent.com/Retab/retab/refs/heads/main/assets/retab-playground.png" alt="Retab Playground" width="600">
|
109
109
|
</p>
|
110
110
|
|
111
111
|
---
|
@@ -114,10 +114,10 @@ Explore our [Playground](https://www.uiform.com/dashboard/playground) and create
|
|
114
114
|
|
115
115
|
## Dev Mode 🔧
|
116
116
|
|
117
|
-
You need more control? You can access the [Documentation](https://docs.
|
117
|
+
You need more control? You can access the [Documentation](https://docs.retab.dev/get-started/introduction) of our **Python SDK**.
|
118
118
|
|
119
119
|
1. **Setup the Python SDK**
|
120
|
-
> Install the
|
120
|
+
> Install the Retab Python SDK and configure your API keys to start processing documents with your preferred AI provider.
|
121
121
|
|
122
122
|
2. **Create your JSON schema**
|
123
123
|
> Define the structure of the data you want to extract from your documents using our schema format with custom prompting capabilities.
|
@@ -133,32 +133,32 @@ You need more control? You can access the [Documentation](https://docs.uiform.co
|
|
133
133
|
|
134
134
|
### Step 1: Setup of the Python SDK
|
135
135
|
|
136
|
-
To get started, install the `
|
136
|
+
To get started, install the `retab` package using pip:
|
137
137
|
|
138
138
|
```bash
|
139
|
-
pip install
|
139
|
+
pip install retab
|
140
140
|
```
|
141
141
|
|
142
|
-
Then, [create your API key on
|
142
|
+
Then, [create your API key on retab.dev](https://www.retab.dev).
|
143
143
|
|
144
144
|
Create another API key by you favorite API key provider.
|
145
145
|
|
146
146
|
**Reminder**: We currently support [OpenAI](https://platform.openai.com/docs/overview), [Anthropic](https://www.anthropic.com/api), [Gemini](https://aistudio.google.com/prompts/new_chat) and [xAI](https://x.ai/api) models.
|
147
147
|
|
148
|
-
As we will use your API key to make requests to OpenAI on your behalf within an automation, you need to store your API key in the
|
148
|
+
As we will use your API key to make requests to OpenAI on your behalf within an automation, you need to store your API key in the Retab secrets manager:
|
149
149
|
|
150
150
|
```
|
151
151
|
OPENAI_API_KEY=sk-xxxxxxxxx
|
152
|
-
|
152
|
+
RETAB_API_KEY=sk_retab_xxxxxxxxx
|
153
153
|
```
|
154
154
|
|
155
155
|
```bash
|
156
|
-
import
|
156
|
+
import retab
|
157
157
|
import os
|
158
158
|
|
159
|
-
|
159
|
+
reclient = retab.Retab()
|
160
160
|
|
161
|
-
|
161
|
+
reclient.secrets.external_api_keys.create(
|
162
162
|
provider="OpenAI",
|
163
163
|
api_key=os.getenv("OPENAI_API_KEY")
|
164
164
|
)
|
@@ -167,14 +167,14 @@ uiclient.secrets.external_api_keys.create(
|
|
167
167
|
#### Process your first document with the create_messages method:
|
168
168
|
|
169
169
|
```bash
|
170
|
-
from
|
170
|
+
from retab import Retab
|
171
171
|
from openai import OpenAI
|
172
172
|
|
173
|
-
# Initialize
|
174
|
-
|
173
|
+
# Initialize Retab client
|
174
|
+
reclient = Retab()
|
175
175
|
|
176
176
|
# Convert any document into LLM-ready format
|
177
|
-
doc_msg =
|
177
|
+
doc_msg = reclient.documents.create_messages(
|
178
178
|
document = "invoice.pdf" # Works with PDFs, Excel, emails, etc.
|
179
179
|
)
|
180
180
|
|
@@ -196,10 +196,10 @@ We use a standard JSON Schema with custom annotations (`X-SystemPrompt`, `X-Fiel
|
|
196
196
|
|
197
197
|
These annotations help guide the LLM’s behavior and improve extraction accuracy.
|
198
198
|
|
199
|
-
You can learn more about these in our [JSON Schema documentation](https://docs.
|
199
|
+
You can learn more about these in our [JSON Schema documentation](https://docs.retab.dev/get-started/prompting-with-the-JSON-schema).
|
200
200
|
|
201
201
|
```bash
|
202
|
-
from
|
202
|
+
from retab import Retab
|
203
203
|
from openai import OpenAI
|
204
204
|
from pydantic import BaseModel, Field, ConfigDict
|
205
205
|
|
@@ -225,11 +225,11 @@ class Invoice(BaseModel):
|
|
225
225
|
)
|
226
226
|
|
227
227
|
# Process document and extract data
|
228
|
-
|
229
|
-
doc_msg =
|
228
|
+
reclient = Retab()
|
229
|
+
doc_msg = reclient.documents.create_messages(
|
230
230
|
document = "invoice.pdf"
|
231
231
|
)
|
232
|
-
schema_obj =
|
232
|
+
schema_obj = reclient.schemas.load(
|
233
233
|
pydantic_model = Invoice
|
234
234
|
)
|
235
235
|
|
@@ -244,7 +244,7 @@ completion = client.beta.chat.completions.parse(
|
|
244
244
|
print("Extracted data:", completion.choices[0].message.parsed)
|
245
245
|
|
246
246
|
# Validate the response against the original schema if you want to remove the reasoning fields
|
247
|
-
from
|
247
|
+
from retab._utils.json_schema import filter_auxiliary_fields_json
|
248
248
|
assert completion.choices[0].message.content is not None
|
249
249
|
extraction = schema_obj.pydantic_model.model_validate(
|
250
250
|
filter_auxiliary_fields_json(completion.choices[0].message.content, schema_obj.pydantic_model)
|
@@ -262,7 +262,7 @@ Below is an example of a simple FastAPI application with a webhook endpoint:
|
|
262
262
|
```bash
|
263
263
|
from fastapi import FastAPI, Request
|
264
264
|
from fastapi.responses import JSONResponse
|
265
|
-
from
|
265
|
+
from retab.types.automations.webhooks import WebhookRequest
|
266
266
|
from pydantic import BaseModel, Field, ConfigDict
|
267
267
|
|
268
268
|
app = FastAPI()
|
@@ -293,53 +293,53 @@ curl -X POST "http://localhost:8000/webhook" \
|
|
293
293
|
|
294
294
|
### Step 4: Create your automation
|
295
295
|
|
296
|
-
Finally, integrate the webhook with your automation system using the `
|
296
|
+
Finally, integrate the webhook with your automation system using the `retab` client.
|
297
297
|
|
298
298
|
This example demonstrates how to create an automation that triggers the webhook when a matching event occurs:
|
299
299
|
|
300
300
|
```bash
|
301
|
-
from
|
301
|
+
from retab import Retab
|
302
302
|
|
303
|
-
# Initialize the
|
304
|
-
|
303
|
+
# Initialize the Retab client
|
304
|
+
reclient = Retab()
|
305
305
|
|
306
306
|
# Create an automation that uses the webhook URL from Step 2
|
307
|
-
automation =
|
308
|
-
email="invoices@mailbox.
|
307
|
+
automation = reclient.processors.automations.mailboxes.create(
|
308
|
+
email="invoices@mailbox.retab.dev",
|
309
309
|
model="gpt-4.1-nano",
|
310
310
|
json_schema=Invoice.model_json_schema(), # use the pydantic model to create the json schema
|
311
311
|
webhook_url="https://your-server.com/webhook", # Replace with your actual webhook URL
|
312
312
|
)
|
313
313
|
```
|
314
314
|
|
315
|
-
At any email sent to `invoices@mailbox.
|
315
|
+
At any email sent to `invoices@mailbox.retab.dev`, the automation will send a POST request to your FastAPI webhook endpoint, where the payload can be processed.
|
316
316
|
|
317
|
-
You can see the automation you just created on your [dashboard](https://www.
|
317
|
+
You can see the automation you just created on your [dashboard](https://www.retab.dev/dashboard/processors)!
|
318
318
|
|
319
319
|
### Step 5: Test your automation
|
320
320
|
|
321
321
|
Finally, you can test the automation rapidly with the test functions of the sdk:
|
322
322
|
|
323
323
|
```bash
|
324
|
-
from
|
324
|
+
from retab import Retab
|
325
325
|
|
326
|
-
# Initialize the
|
327
|
-
|
326
|
+
# Initialize the Retab client
|
327
|
+
reclient = Retab()
|
328
328
|
|
329
329
|
# If you just want to send a test request to your webhook
|
330
|
-
log =
|
331
|
-
email="test-mailbox-local@devmail.
|
330
|
+
log = reclient.processors.automations.mailboxes.tests.webhook(
|
331
|
+
email="test-mailbox-local@devmail.retab.dev",
|
332
332
|
)
|
333
333
|
|
334
334
|
# If you want to test the file processing logic:
|
335
|
-
log =
|
336
|
-
email="test-mailbox-local@devmail.
|
335
|
+
log = reclient.processors.automations.mailboxes.tests.process(
|
336
|
+
email="test-mailbox-local@devmail.retab.dev",
|
337
337
|
document="your_invoice_email.eml"
|
338
338
|
)
|
339
339
|
|
340
340
|
# If you want to test a full email forwarding
|
341
|
-
log =
|
342
|
-
email="
|
341
|
+
log = reclient.processors.automations.mailboxes.tests.forward(
|
342
|
+
email="retab-quickstart@mailbox.retab.dev",
|
343
343
|
document="your_invoice_email.eml"
|
344
344
|
)
|
345
345
|
```
|
@@ -347,33 +347,33 @@ log = uiclient.processors.automations.mailboxes.tests.forward(
|
|
347
347
|
> 💡 **Tip:** You can also test your webhook locally by overriding the webhook URL set in the automation.
|
348
348
|
|
349
349
|
```bash
|
350
|
-
from
|
350
|
+
from retab import Retab
|
351
351
|
|
352
|
-
|
352
|
+
reclient = Retab()
|
353
353
|
|
354
354
|
# If you just want to send a test request to your webhook
|
355
|
-
log =
|
356
|
-
email="test-mailbox-local@devmail.
|
355
|
+
log = reclient.processors.automations.mailboxes.tests.webhook(
|
356
|
+
email="test-mailbox-local@devmail.retab.dev",
|
357
357
|
webhook_url="http://localhost:8000/webhook" # If you want to try your webhook locally, you can override the webhook url set in the automation
|
358
358
|
)
|
359
359
|
```
|
360
360
|
|
361
361
|
And that's it! You can start processing documents at scale!
|
362
|
-
You have 1000 free requests to get started, and you can [subscribe](https://www.
|
362
|
+
You have 1000 free requests to get started, and you can [subscribe](https://www.retab.dev) to the pro plan to get more.
|
363
363
|
|
364
364
|
But this minimalistic example is just the beginning.
|
365
365
|
|
366
|
-
Continue reading to learn more about how to use
|
366
|
+
Continue reading to learn more about how to use Retab **to its full potential** 🔥.
|
367
367
|
|
368
368
|
---
|
369
369
|
|
370
370
|
## Go further
|
371
371
|
|
372
|
-
- [Prompt Engineering Guide](https://docs.
|
373
|
-
- [General Concepts](https://docs.
|
374
|
-
- [Consensus](https://docs.
|
375
|
-
- [Create mailboxes](https://docs.
|
376
|
-
- [Create links](https://docs.
|
372
|
+
- [Prompt Engineering Guide](https://docs.retab.dev/get-started/prompting-with-the-json-schema)
|
373
|
+
- [General Concepts](https://docs.retab.dev/get-started/General-Concepts)
|
374
|
+
- [Consensus](https://docs.retab.dev/SDK/General-Concepts#consensus)
|
375
|
+
- [Create mailboxes](https://docs.retab.dev/SDK/Automations#mailbox)
|
376
|
+
- [Create links](https://docs.retab.dev/SDK/Automations#link)
|
377
377
|
- Finetuning (coming soon)
|
378
378
|
- Prompt optimization (coming soon)
|
379
379
|
- Data-Labelling with our AI-powered annotator (coming soon)
|
@@ -382,12 +382,12 @@ Continue reading to learn more about how to use UiForm **to its full potential**
|
|
382
382
|
|
383
383
|
## Jupyter Notebooks
|
384
384
|
|
385
|
-
You can view minimal notebooks that demonstrate how to use
|
385
|
+
You can view minimal notebooks that demonstrate how to use Retab to process documents:
|
386
386
|
|
387
|
-
- [Mailbox creation quickstart](https://github.com/
|
388
|
-
- [Upload Links creation quickstart](https://github.com/
|
389
|
-
- [Document Extractions quickstart](https://github.com/
|
390
|
-
- [Document Extractions quickstart - Async](https://github.com/
|
387
|
+
- [Mailbox creation quickstart](https://github.com/Retab-dev/retab/blob/main/notebooks/mailboxes_quickstart.ipynb)
|
388
|
+
- [Upload Links creation quickstart](https://github.com/Retab-dev/retab/blob/main/notebooks/links_quickstart.ipynb)
|
389
|
+
- [Document Extractions quickstart](https://github.com/Retab-dev/retab/blob/main/notebooks/Quickstart.ipynb)
|
390
|
+
- [Document Extractions quickstart - Async](https://github.com/Retab-dev/retab/blob/main/notebooks/Quickstart-Async.ipynb)
|
391
391
|
|
392
392
|
---
|
393
393
|
|
@@ -395,12 +395,12 @@ You can view minimal notebooks that demonstrate how to use UiForm to process doc
|
|
395
395
|
|
396
396
|
Let's create the future of document processing together!
|
397
397
|
|
398
|
-
Join our [discord community](https://discord.com/invite/vc5tWRPqag) to share tips, discuss best practices, and showcase what you build. Or just [tweet](https://x.com/
|
398
|
+
Join our [discord community](https://discord.com/invite/vc5tWRPqag) to share tips, discuss best practices, and showcase what you build. Or just [tweet](https://x.com/retabAPI) at us.
|
399
399
|
|
400
|
-
We can't wait to see how you'll use
|
400
|
+
We can't wait to see how you'll use Retab.
|
401
401
|
|
402
402
|
- [Discord](https://discord.com/invite/vc5tWRPqag)
|
403
|
-
- [Twitter](https://x.com/
|
403
|
+
- [Twitter](https://x.com/retabAPI)
|
404
404
|
|
405
405
|
|
406
406
|
## Roadmap
|