retab 0.0.40__tar.gz → 0.0.42__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- retab-0.0.42/PKG-INFO +119 -0
- retab-0.0.42/README.md +76 -0
- {retab-0.0.40 → retab-0.0.42}/retab/client.py +5 -5
- {retab-0.0.40 → retab-0.0.42}/retab/resources/consensus/completions.py +1 -1
- {retab-0.0.40 → retab-0.0.42}/retab/resources/consensus/completions_stream.py +5 -5
- {retab-0.0.40 → retab-0.0.42}/retab/resources/consensus/responses.py +1 -1
- {retab-0.0.40 → retab-0.0.42}/retab/resources/consensus/responses_stream.py +2 -2
- {retab-0.0.40 → retab-0.0.42}/retab/resources/documents/client.py +12 -11
- {retab-0.0.40 → retab-0.0.42}/retab/resources/documents/extractions.py +4 -4
- {retab-0.0.40 → retab-0.0.42}/retab/resources/evals.py +1 -1
- {retab-0.0.40 → retab-0.0.42}/retab/resources/evaluations/documents.py +1 -1
- {retab-0.0.40 → retab-0.0.42}/retab/resources/jsonlUtils.py +4 -4
- {retab-0.0.40 → retab-0.0.42}/retab/resources/processors/automations/endpoints.py +9 -5
- {retab-0.0.40 → retab-0.0.42}/retab/resources/processors/automations/links.py +2 -2
- {retab-0.0.40 → retab-0.0.42}/retab/resources/processors/automations/logs.py +2 -2
- {retab-0.0.40 → retab-0.0.42}/retab/resources/processors/automations/mailboxes.py +43 -32
- {retab-0.0.40 → retab-0.0.42}/retab/resources/processors/automations/outlook.py +25 -7
- {retab-0.0.40 → retab-0.0.42}/retab/resources/processors/automations/tests.py +8 -2
- {retab-0.0.40 → retab-0.0.42}/retab/resources/processors/client.py +25 -16
- {retab-0.0.40 → retab-0.0.42}/retab/resources/prompt_optimization.py +1 -1
- {retab-0.0.40 → retab-0.0.42}/retab/resources/schemas.py +3 -3
- {retab-0.0.40 → retab-0.0.42}/retab/types/automations/mailboxes.py +1 -1
- {retab-0.0.40 → retab-0.0.42}/retab/types/completions.py +1 -1
- {retab-0.0.40 → retab-0.0.42}/retab/types/documents/create_messages.py +4 -4
- {retab-0.0.40 → retab-0.0.42}/retab/types/documents/extractions.py +3 -3
- {retab-0.0.40 → retab-0.0.42}/retab/types/documents/parse.py +3 -1
- {retab-0.0.40 → retab-0.0.42}/retab/types/evals.py +2 -2
- {retab-0.0.40 → retab-0.0.42}/retab/types/evaluations/iterations.py +2 -2
- {retab-0.0.40 → retab-0.0.42}/retab/types/evaluations/model.py +2 -2
- {retab-0.0.40 → retab-0.0.42}/retab/types/extractions.py +34 -9
- {retab-0.0.40 → retab-0.0.42}/retab/types/jobs/prompt_optimization.py +1 -1
- {retab-0.0.40 → retab-0.0.42}/retab/types/logs.py +3 -3
- {retab-0.0.40 → retab-0.0.42}/retab/types/schemas/object.py +4 -4
- {retab-0.0.40 → retab-0.0.42}/retab/types/schemas/templates.py +1 -1
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/usage/usage.py +1 -1
- retab-0.0.42/retab.egg-info/PKG-INFO +119 -0
- {retab-0.0.40 → retab-0.0.42}/retab.egg-info/SOURCES.txt +16 -16
- {retab-0.0.40 → retab-0.0.42}/setup.py +1 -1
- {retab-0.0.40 → retab-0.0.42}/tests/test_automations_mailboxes.py +1 -1
- retab-0.0.40/PKG-INFO +0 -418
- retab-0.0.40/README.md +0 -375
- retab-0.0.40/retab.egg-info/PKG-INFO +0 -418
- {retab-0.0.40 → retab-0.0.42}/pyproject.toml +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/_resource.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/py.typed +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/resources}/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/consensus/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/consensus/client.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/documents/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/evaluations/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/evaluations/client.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/evaluations/iterations.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/files.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/finetuning.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/models.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/openai_example.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/processors/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/processors/automations/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/processors/automations/client.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/secrets/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/secrets/client.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/secrets/external_api_keys.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/secrets/webhook.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/resources/usage.py +0 -0
- {retab-0.0.40/retab/_utils/usage → retab-0.0.42/retab/types}/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/ai_models.py +0 -0
- {retab-0.0.40/retab/resources → retab-0.0.42/retab/types/automations}/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/automations/cron.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/automations/endpoints.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/automations/links.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/automations/outlook.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/automations/webhooks.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/browser_canvas.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/chat.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/consensus.py +0 -0
- {retab-0.0.40/retab/types → retab-0.0.42/retab/types/db}/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/db/annotations.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/db/files.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/documents/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/documents/correct_orientation.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/evaluations/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/evaluations/documents.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/events.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/inference_settings.py +0 -0
- {retab-0.0.40/retab/types/automations → retab-0.0.42/retab/types/jobs}/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/jobs/base.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/jobs/batch_annotation.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/jobs/evaluation.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/jobs/finetune.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/jobs/webcrawl.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/metrics.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/mime.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/modalities.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/pagination.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/predictions.py +0 -0
- {retab-0.0.40/retab/types/db → retab-0.0.42/retab/types/schemas}/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/schemas/enhance.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/schemas/evaluate.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/schemas/generate.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/schemas/layout.py +0 -0
- {retab-0.0.40/retab/types/jobs → retab-0.0.42/retab/types/secrets}/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/secrets/external_api_keys.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab/types/standards.py +0 -0
- {retab-0.0.40/retab/types/schemas → retab-0.0.42/retab/utils}/__init__.py +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/_model_cards/anthropic.yaml +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/_model_cards/auto.yaml +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/_model_cards/gemini.yaml +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/_model_cards/openai.yaml +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/_model_cards/xai.yaml +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/ai_models.py +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/benchmarking.py +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/chat.py +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/display.py +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/json_schema.py +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/mime.py +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/responses.py +0 -0
- {retab-0.0.40/retab/_utils → retab-0.0.42/retab/utils}/stream_context_managers.py +0 -0
- {retab-0.0.40/retab/types/secrets → retab-0.0.42/retab/utils/usage}/__init__.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab.egg-info/dependency_links.txt +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab.egg-info/requires.txt +0 -0
- {retab-0.0.40 → retab-0.0.42}/retab.egg-info/top_level.txt +0 -0
- {retab-0.0.40 → retab-0.0.42}/setup.cfg +0 -0
- {retab-0.0.40 → retab-0.0.42}/tests/test_automations_links.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/tests/test_documents_api.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/tests/test_evaluations.py +0 -0
- {retab-0.0.40 → retab-0.0.42}/tests/test_preprocessor.py +0 -0
retab-0.0.42/PKG-INFO
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: retab
|
3
|
+
Version: 0.0.42
|
4
|
+
Summary: Retab official python library
|
5
|
+
Home-page: https://github.com/Retab-dev/retab
|
6
|
+
Author: Retab
|
7
|
+
Author-email: contact@retab.com
|
8
|
+
Project-URL: Team website, https://retab.com
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
11
|
+
Classifier: Operating System :: POSIX :: Linux
|
12
|
+
Classifier: Operating System :: MacOS
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
14
|
+
Requires-Python: >=3.6
|
15
|
+
Description-Content-Type: text/markdown
|
16
|
+
Requires-Dist: Pillow
|
17
|
+
Requires-Dist: httpx
|
18
|
+
Requires-Dist: pydantic
|
19
|
+
Requires-Dist: pydantic_core
|
20
|
+
Requires-Dist: requests
|
21
|
+
Requires-Dist: tqdm
|
22
|
+
Requires-Dist: types-tqdm
|
23
|
+
Requires-Dist: backoff
|
24
|
+
Requires-Dist: termplotlib
|
25
|
+
Requires-Dist: Levenshtein
|
26
|
+
Requires-Dist: pandas
|
27
|
+
Requires-Dist: numpy
|
28
|
+
Requires-Dist: motor
|
29
|
+
Requires-Dist: rich
|
30
|
+
Requires-Dist: puremagic
|
31
|
+
Requires-Dist: pycountry
|
32
|
+
Requires-Dist: phonenumbers
|
33
|
+
Requires-Dist: email_validator
|
34
|
+
Requires-Dist: python-stdnum
|
35
|
+
Requires-Dist: nanoid
|
36
|
+
Requires-Dist: openai
|
37
|
+
Requires-Dist: google-genai
|
38
|
+
Requires-Dist: google-generativeai
|
39
|
+
Requires-Dist: anthropic
|
40
|
+
Requires-Dist: tiktoken
|
41
|
+
Requires-Dist: truststore
|
42
|
+
Requires-Dist: ruff
|
43
|
+
|
44
|
+
# Retab
|
45
|
+
|
46
|
+
<div align="center" style="margin-bottom: 1em;">
|
47
|
+
|
48
|
+
<img src="https://raw.githubusercontent.com/Retab-dev/retab/refs/heads/main/assets/retab-logo.png" alt="Retab Logo" width="150">
|
49
|
+
|
50
|
+
|
51
|
+
*The AI Automation Platform*
|
52
|
+
|
53
|
+
Made with love by the team at [Retab](https://retab.com) 🤍.
|
54
|
+
|
55
|
+
[Our Website](https://retab.com) | [Documentation](https://docs.retab.com/get-started/introduction) | [Discord](https://discord.com/invite/vc5tWRPqag) | [Twitter](https://x.com/retabdev)
|
56
|
+
|
57
|
+
|
58
|
+
</div>
|
59
|
+
|
60
|
+
---
|
61
|
+
|
62
|
+
### What is Retab?
|
63
|
+
|
64
|
+
Retab solves all the major challenges in document processing with LLMs:
|
65
|
+
|
66
|
+
1. **Universal Document Preprocessing**: Convert any file type (PDFs, Excel, emails, etc.) into LLM-ready format without writing custom parsers
|
67
|
+
2. **Structured, Schema-driven Extraction**: Get consistent, reliable outputs using schema-based prompt engineering
|
68
|
+
3. **Processors**: Publish a live, stable, shareable document processor.
|
69
|
+
4. **Automations**: Create document processing workflows that can be triggered by events (mailbox, upload link, endpoint, outlook plugin).
|
70
|
+
5. **Evaluations**: Evaluate the performance of models against annotated datasets
|
71
|
+
6. **Optimizations**: Identify the most used processors and help you finetune models to reduce costs and improve performance
|
72
|
+
|
73
|
+
We are offering you all the software-defined primitives to build your own document processing solutions. We see it as **Stripe** for document processing.
|
74
|
+
|
75
|
+
Our goal is to make the process of analyzing documents and unstructured data as **easy** and **transparent** as possible.
|
76
|
+
|
77
|
+
**A new, lighter paradigm**
|
78
|
+
Large Language Models collapse entire layers of legacy OCR pipelines into a single, elegant abstraction. When a model can read, reason, and structure text natively, we no longer need brittle heuristics, handcrafted parsers, or heavyweight ETL jobs. Instead, we can expose a small, principled API: "give me the document, tell me the schema, and get back structured truth." Complexity evaporates, reliability rises, speed follows, and costs fall—because every component you remove is one that can no longer break. LLM‑first design lets us focus less on plumbing and more on the questions we actually want answered.
|
79
|
+
|
80
|
+
Many people haven't yet realized how powerful LLMs have become at document processing tasks - we're here to help **unlock these capabilities**.
|
81
|
+
|
82
|
+
---
|
83
|
+
|
84
|
+
## Go further
|
85
|
+
|
86
|
+
* [Quickstart](/get-started/quickstart)
|
87
|
+
* [API Reference](/api-reference/introduction)
|
88
|
+
|
89
|
+
---
|
90
|
+
|
91
|
+
## Code examples
|
92
|
+
|
93
|
+
## You can check our Github repository to see code examples: [python examples](https://github.com/Retab-dev/retab/tree/main/examples) and [jupyter notebooks](https://github.com/Retab-dev/retab-nodejs/tree/main/notebooks).
|
94
|
+
|
95
|
+
## Community
|
96
|
+
|
97
|
+
Let's create the future of document processing together!
|
98
|
+
|
99
|
+
Join our [discord community](https://discord.com/invite/vc5tWRPqag) to share tips, discuss best practices, and showcase what you build. Or just [tweet](https://x.com/retabdev) at us.
|
100
|
+
|
101
|
+
We can't wait to see how you'll use Retab.
|
102
|
+
|
103
|
+
* [Discord](https://discord.com/invite/vc5tWRPqag)
|
104
|
+
* [Twitter](https://x.com/retabdev)
|
105
|
+
|
106
|
+
---
|
107
|
+
|
108
|
+
## Roadmap
|
109
|
+
|
110
|
+
We share our roadmap publicly on [Github](https://github.com/Retab-dev/retab)
|
111
|
+
|
112
|
+
Among the features we're working on:
|
113
|
+
|
114
|
+
* [ ] Node.js SDK
|
115
|
+
* [ ] Low-level speed optimizations for Evals Frontend
|
116
|
+
* [ ] Schema optimization autopilot
|
117
|
+
* [ ] Sources API
|
118
|
+
* [ ] Parse API for RAG
|
119
|
+
|
retab-0.0.42/README.md
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
# Retab
|
2
|
+
|
3
|
+
<div align="center" style="margin-bottom: 1em;">
|
4
|
+
|
5
|
+
<img src="https://raw.githubusercontent.com/Retab-dev/retab/refs/heads/main/assets/retab-logo.png" alt="Retab Logo" width="150">
|
6
|
+
|
7
|
+
|
8
|
+
*The AI Automation Platform*
|
9
|
+
|
10
|
+
Made with love by the team at [Retab](https://retab.com) 🤍.
|
11
|
+
|
12
|
+
[Our Website](https://retab.com) | [Documentation](https://docs.retab.com/get-started/introduction) | [Discord](https://discord.com/invite/vc5tWRPqag) | [Twitter](https://x.com/retabdev)
|
13
|
+
|
14
|
+
|
15
|
+
</div>
|
16
|
+
|
17
|
+
---
|
18
|
+
|
19
|
+
### What is Retab?
|
20
|
+
|
21
|
+
Retab solves all the major challenges in document processing with LLMs:
|
22
|
+
|
23
|
+
1. **Universal Document Preprocessing**: Convert any file type (PDFs, Excel, emails, etc.) into LLM-ready format without writing custom parsers
|
24
|
+
2. **Structured, Schema-driven Extraction**: Get consistent, reliable outputs using schema-based prompt engineering
|
25
|
+
3. **Processors**: Publish a live, stable, shareable document processor.
|
26
|
+
4. **Automations**: Create document processing workflows that can be triggered by events (mailbox, upload link, endpoint, outlook plugin).
|
27
|
+
5. **Evaluations**: Evaluate the performance of models against annotated datasets
|
28
|
+
6. **Optimizations**: Identify the most used processors and help you finetune models to reduce costs and improve performance
|
29
|
+
|
30
|
+
We are offering you all the software-defined primitives to build your own document processing solutions. We see it as **Stripe** for document processing.
|
31
|
+
|
32
|
+
Our goal is to make the process of analyzing documents and unstructured data as **easy** and **transparent** as possible.
|
33
|
+
|
34
|
+
**A new, lighter paradigm**
|
35
|
+
Large Language Models collapse entire layers of legacy OCR pipelines into a single, elegant abstraction. When a model can read, reason, and structure text natively, we no longer need brittle heuristics, handcrafted parsers, or heavyweight ETL jobs. Instead, we can expose a small, principled API: "give me the document, tell me the schema, and get back structured truth." Complexity evaporates, reliability rises, speed follows, and costs fall—because every component you remove is one that can no longer break. LLM‑first design lets us focus less on plumbing and more on the questions we actually want answered.
|
36
|
+
|
37
|
+
Many people haven't yet realized how powerful LLMs have become at document processing tasks - we're here to help **unlock these capabilities**.
|
38
|
+
|
39
|
+
---
|
40
|
+
|
41
|
+
## Go further
|
42
|
+
|
43
|
+
* [Quickstart](/get-started/quickstart)
|
44
|
+
* [API Reference](/api-reference/introduction)
|
45
|
+
|
46
|
+
---
|
47
|
+
|
48
|
+
## Code examples
|
49
|
+
|
50
|
+
## You can check our Github repository to see code examples: [python examples](https://github.com/Retab-dev/retab/tree/main/examples) and [jupyter notebooks](https://github.com/Retab-dev/retab-nodejs/tree/main/notebooks).
|
51
|
+
|
52
|
+
## Community
|
53
|
+
|
54
|
+
Let's create the future of document processing together!
|
55
|
+
|
56
|
+
Join our [discord community](https://discord.com/invite/vc5tWRPqag) to share tips, discuss best practices, and showcase what you build. Or just [tweet](https://x.com/retabdev) at us.
|
57
|
+
|
58
|
+
We can't wait to see how you'll use Retab.
|
59
|
+
|
60
|
+
* [Discord](https://discord.com/invite/vc5tWRPqag)
|
61
|
+
* [Twitter](https://x.com/retabdev)
|
62
|
+
|
63
|
+
---
|
64
|
+
|
65
|
+
## Roadmap
|
66
|
+
|
67
|
+
We share our roadmap publicly on [Github](https://github.com/Retab-dev/retab)
|
68
|
+
|
69
|
+
Among the features we're working on:
|
70
|
+
|
71
|
+
* [ ] Node.js SDK
|
72
|
+
* [ ] Low-level speed optimizations for Evals Frontend
|
73
|
+
* [ ] Schema optimization autopilot
|
74
|
+
* [ ] Sources API
|
75
|
+
* [ ] Parse API for RAG
|
76
|
+
|
@@ -34,7 +34,7 @@ class BaseRetab:
|
|
34
34
|
|
35
35
|
Args:
|
36
36
|
api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
|
37
|
-
base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.
|
37
|
+
base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.com
|
38
38
|
timeout (float): Request timeout in seconds. Defaults to 240.0
|
39
39
|
max_retries (int): Maximum number of retries for failed requests. Defaults to 3
|
40
40
|
openai_api_key (str, optional): OpenAI API key. Will look for OPENAI_API_KEY env variable if not provided
|
@@ -63,12 +63,12 @@ class BaseRetab:
|
|
63
63
|
|
64
64
|
if api_key is None:
|
65
65
|
raise ValueError(
|
66
|
-
"No API key provided. You can create an API key at https://retab.
|
66
|
+
"No API key provided. You can create an API key at https://retab.com\n"
|
67
67
|
"Then either pass it to the client (api_key='your-key') or set the RETAB_API_KEY environment variable"
|
68
68
|
)
|
69
69
|
|
70
70
|
if base_url is None:
|
71
|
-
base_url = os.environ.get("RETAB_API_BASE_URL", "https://api.retab.
|
71
|
+
base_url = os.environ.get("RETAB_API_BASE_URL", "https://api.retab.com")
|
72
72
|
|
73
73
|
truststore.inject_into_ssl()
|
74
74
|
self.api_key = api_key
|
@@ -154,7 +154,7 @@ class Retab(BaseRetab):
|
|
154
154
|
|
155
155
|
Args:
|
156
156
|
api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
|
157
|
-
base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.
|
157
|
+
base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.com
|
158
158
|
timeout (float): Request timeout in seconds. Defaults to 240.0
|
159
159
|
max_retries (int): Maximum number of retries for failed requests. Defaults to 3
|
160
160
|
openai_api_key (str, optional): OpenAI API key. Will look for OPENAI_API_KEY env variable if not provided
|
@@ -422,7 +422,7 @@ class AsyncRetab(BaseRetab):
|
|
422
422
|
|
423
423
|
Args:
|
424
424
|
api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
|
425
|
-
base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.
|
425
|
+
base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.com
|
426
426
|
timeout (float): Request timeout in seconds. Defaults to 240.0
|
427
427
|
max_retries (int): Maximum number of retries for failed requests. Defaults to 3
|
428
428
|
openai_api_key (str, optional): OpenAI API key. Will look for OPENAI_API_KEY env variable if not provided
|
@@ -5,7 +5,7 @@ from openai.types.shared_params.response_format_json_schema import ResponseForma
|
|
5
5
|
from pydantic import BaseModel as ResponseFormatT
|
6
6
|
|
7
7
|
from ..._resource import AsyncAPIResource, SyncAPIResource
|
8
|
-
from ...
|
8
|
+
from ...utils.ai_models import assert_valid_model_extraction
|
9
9
|
from ...types.chat import ChatCompletionRetabMessage
|
10
10
|
from ...types.completions import RetabChatCompletionsRequest
|
11
11
|
from ...types.documents.extractions import RetabParsedChatCompletion
|
@@ -9,9 +9,9 @@ from openai.types.shared_params.response_format_json_schema import ResponseForma
|
|
9
9
|
from pydantic import BaseModel as ResponseFormatT
|
10
10
|
|
11
11
|
from ..._resource import AsyncAPIResource, SyncAPIResource
|
12
|
-
from ...
|
13
|
-
from ...
|
14
|
-
from ...
|
12
|
+
from ...utils.ai_models import assert_valid_model_extraction
|
13
|
+
from ...utils.json_schema import unflatten_dict
|
14
|
+
from ...utils.stream_context_managers import as_async_context_manager, as_context_manager
|
15
15
|
from ...types.chat import ChatCompletionRetabMessage
|
16
16
|
from ...types.completions import RetabChatCompletionsRequest
|
17
17
|
from ...types.documents.extractions import RetabParsedChatCompletion, RetabParsedChatCompletionChunk, RetabParsedChoice
|
@@ -123,7 +123,7 @@ class Completions(SyncAPIResource, BaseCompletionsMixin):
|
|
123
123
|
|
124
124
|
Usage:
|
125
125
|
```python
|
126
|
-
with retab.
|
126
|
+
with retab.completions.stream(json_schema, messages, model, temperature, reasoning_effort) as stream:
|
127
127
|
for response in stream:
|
128
128
|
print(response)
|
129
129
|
```
|
@@ -210,7 +210,7 @@ class AsyncCompletions(AsyncAPIResource, BaseCompletionsMixin):
|
|
210
210
|
|
211
211
|
Usage:
|
212
212
|
```python
|
213
|
-
async with retab.
|
213
|
+
async with retab.completions.stream(json_schema, messages, model, temperature, reasoning_effort, n_consensus) as stream:
|
214
214
|
async for response in stream:
|
215
215
|
print(response)
|
216
216
|
```
|
@@ -7,7 +7,7 @@ from openai.types.shared_params.reasoning import Reasoning
|
|
7
7
|
from pydantic import BaseModel
|
8
8
|
|
9
9
|
from ..._resource import AsyncAPIResource, SyncAPIResource
|
10
|
-
from ...
|
10
|
+
from ...utils.ai_models import assert_valid_model_extraction
|
11
11
|
from ...types.completions import RetabChatResponseCreateRequest
|
12
12
|
from ...types.documents.extractions import UiResponse
|
13
13
|
from ...types.schemas.object import Schema
|
@@ -6,8 +6,8 @@ from openai.types.shared_params.reasoning import Reasoning
|
|
6
6
|
from pydantic import BaseModel
|
7
7
|
|
8
8
|
from ..._resource import AsyncAPIResource, SyncAPIResource
|
9
|
-
from ...
|
10
|
-
from ...
|
9
|
+
from ...utils.ai_models import assert_valid_model_extraction
|
10
|
+
from ...utils.stream_context_managers import as_async_context_manager, as_context_manager
|
11
11
|
from ...types.completions import RetabChatResponseCreateRequest
|
12
12
|
from ...types.documents.extractions import UiResponse
|
13
13
|
from ...types.schemas.object import Schema
|
@@ -8,15 +8,16 @@ from pydantic_core import PydanticUndefined
|
|
8
8
|
from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
|
9
9
|
|
10
10
|
from ..._resource import AsyncAPIResource, SyncAPIResource
|
11
|
-
from ...
|
12
|
-
from ...
|
13
|
-
from ...
|
11
|
+
from ...utils.json_schema import load_json_schema, filter_auxiliary_fields_json
|
12
|
+
from ...utils.mime import convert_mime_data_to_pil_image, prepare_mime_document
|
13
|
+
from ...utils.ai_models import assert_valid_model_extraction
|
14
14
|
from ...types.documents.create_messages import DocumentCreateInputRequest, DocumentCreateMessageRequest, DocumentMessage
|
15
15
|
from ...types.documents.extractions import DocumentExtractRequest, RetabParsedChatCompletion
|
16
16
|
from ...types.documents.parse import ParseRequest, ParseResult, TableParsingFormat
|
17
17
|
from ...types.browser_canvas import BrowserCanvas
|
18
18
|
from ...types.mime import MIMEData
|
19
19
|
from ...types.modalities import Modality
|
20
|
+
from ...types.ai_models import LLMModel
|
20
21
|
from ...types.schemas.object import Schema
|
21
22
|
from ...types.standards import PreparedRequest
|
22
23
|
from .extractions import AsyncExtractions, Extractions
|
@@ -89,7 +90,7 @@ class BaseDocumentsMixin:
|
|
89
90
|
def _prepare_parse(
|
90
91
|
self,
|
91
92
|
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
92
|
-
|
93
|
+
model: LLMModel,
|
93
94
|
table_parsing_format: TableParsingFormat = "html",
|
94
95
|
image_resolution_dpi: int = 72,
|
95
96
|
browser_canvas: BrowserCanvas = "A4",
|
@@ -99,7 +100,7 @@ class BaseDocumentsMixin:
|
|
99
100
|
|
100
101
|
parse_request = ParseRequest(
|
101
102
|
document=mime_document,
|
102
|
-
|
103
|
+
model=model,
|
103
104
|
table_parsing_format=table_parsing_format,
|
104
105
|
image_resolution_dpi=image_resolution_dpi,
|
105
106
|
browser_canvas=browser_canvas,
|
@@ -290,7 +291,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
290
291
|
def parse(
|
291
292
|
self,
|
292
293
|
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
293
|
-
|
294
|
+
model: LLMModel,
|
294
295
|
table_parsing_format: TableParsingFormat = "html",
|
295
296
|
image_resolution_dpi: int = 72,
|
296
297
|
browser_canvas: BrowserCanvas = "A4",
|
@@ -304,7 +305,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
304
305
|
|
305
306
|
Args:
|
306
307
|
document: The document to parse. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
307
|
-
|
308
|
+
model: The AI model to use for document parsing.
|
308
309
|
table_parsing_format: Format for parsing tables. Options: "html", "json", "yaml", "markdown". Defaults to "html".
|
309
310
|
image_resolution_dpi: DPI for image processing. Defaults to 72.
|
310
311
|
browser_canvas: Canvas size for document rendering. Defaults to "A4".
|
@@ -318,7 +319,7 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
|
|
318
319
|
"""
|
319
320
|
request = self._prepare_parse(
|
320
321
|
document=document,
|
321
|
-
|
322
|
+
model=model,
|
322
323
|
table_parsing_format=table_parsing_format,
|
323
324
|
image_resolution_dpi=image_resolution_dpi,
|
324
325
|
browser_canvas=browser_canvas,
|
@@ -512,7 +513,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
512
513
|
async def parse(
|
513
514
|
self,
|
514
515
|
document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
|
515
|
-
|
516
|
+
model: LLMModel,
|
516
517
|
table_parsing_format: TableParsingFormat = "html",
|
517
518
|
image_resolution_dpi: int = 72,
|
518
519
|
browser_canvas: BrowserCanvas = "A4",
|
@@ -526,7 +527,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
526
527
|
|
527
528
|
Args:
|
528
529
|
document: The document to parse. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
|
529
|
-
|
530
|
+
model: The AI model to use for document parsing.
|
530
531
|
table_parsing_format: Format for parsing tables. Options: "html", "json", "yaml", "markdown". Defaults to "html".
|
531
532
|
image_resolution_dpi: DPI for image processing. Defaults to 72.
|
532
533
|
browser_canvas: Canvas size for document rendering. Defaults to "A4".
|
@@ -540,7 +541,7 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
|
|
540
541
|
"""
|
541
542
|
request = self._prepare_parse(
|
542
543
|
document=document,
|
543
|
-
|
544
|
+
model=model,
|
544
545
|
table_parsing_format=table_parsing_format,
|
545
546
|
image_resolution_dpi=image_resolution_dpi,
|
546
547
|
browser_canvas=browser_canvas,
|
@@ -14,10 +14,10 @@ from pydantic_core import PydanticUndefined
|
|
14
14
|
from pydantic import HttpUrl
|
15
15
|
|
16
16
|
from ..._resource import AsyncAPIResource, SyncAPIResource
|
17
|
-
from ...
|
18
|
-
from ...
|
19
|
-
from ...
|
20
|
-
from ...
|
17
|
+
from ...utils.ai_models import assert_valid_model_extraction
|
18
|
+
from ...utils.json_schema import filter_auxiliary_fields_json, load_json_schema, unflatten_dict
|
19
|
+
from ...utils.mime import MIMEData, prepare_mime_document
|
20
|
+
from ...utils.stream_context_managers import as_async_context_manager, as_context_manager
|
21
21
|
from ...types.chat import ChatCompletionRetabMessage
|
22
22
|
from ...types.documents.extractions import DocumentExtractRequest, LogExtractionRequest, RetabParsedChatCompletion, RetabParsedChatCompletionChunk, RetabParsedChoice
|
23
23
|
from ...types.browser_canvas import BrowserCanvas
|
@@ -7,7 +7,7 @@ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionRea
|
|
7
7
|
from pydantic import HttpUrl
|
8
8
|
|
9
9
|
from .._resource import AsyncAPIResource, SyncAPIResource
|
10
|
-
from ..
|
10
|
+
from ..utils.mime import prepare_mime_document
|
11
11
|
from ..types.evals import (
|
12
12
|
CreateIterationRequest,
|
13
13
|
DistancesResult,
|
@@ -6,7 +6,7 @@ import PIL.Image
|
|
6
6
|
from pydantic import HttpUrl
|
7
7
|
|
8
8
|
from ..._resource import AsyncAPIResource, SyncAPIResource
|
9
|
-
from ...
|
9
|
+
from ...utils.mime import prepare_mime_document
|
10
10
|
from ...types.evaluations import DocumentItem, EvaluationDocument, PatchEvaluationDocumentRequest
|
11
11
|
from ...types.mime import MIMEData
|
12
12
|
from ...types.standards import PreparedRequest, DeleteResponse, FieldUnset
|
@@ -18,10 +18,10 @@ from pydantic_core import PydanticUndefined
|
|
18
18
|
from tqdm import tqdm
|
19
19
|
|
20
20
|
from .._resource import AsyncAPIResource, SyncAPIResource
|
21
|
-
from ..
|
22
|
-
from ..
|
23
|
-
from ..
|
24
|
-
from ..
|
21
|
+
from ..utils.ai_models import assert_valid_model_extraction, get_provider_for_model
|
22
|
+
from ..utils.chat import convert_to_anthropic_format, convert_to_openai_format, separate_messages
|
23
|
+
from ..utils.display import Metrics, display_metrics, process_dataset_and_compute_metrics
|
24
|
+
from ..utils.json_schema import load_json_schema
|
25
25
|
from ..types.chat import ChatCompletionRetabMessage
|
26
26
|
from ..types.modalities import Modality
|
27
27
|
from ..types.schemas.object import Schema
|
@@ -3,7 +3,7 @@ from typing import Literal, Optional
|
|
3
3
|
from pydantic_core import PydanticUndefined
|
4
4
|
|
5
5
|
from ...._resource import AsyncAPIResource, SyncAPIResource
|
6
|
-
from ....
|
6
|
+
from ....utils.ai_models import assert_valid_model_extraction
|
7
7
|
from ....types.automations.endpoints import Endpoint, ListEndpoints, UpdateEndpointRequest
|
8
8
|
from ....types.standards import PreparedRequest
|
9
9
|
|
@@ -31,6 +31,7 @@ class EndpointsMixin:
|
|
31
31
|
|
32
32
|
def prepare_list(
|
33
33
|
self,
|
34
|
+
processor_id: str,
|
34
35
|
before: Optional[str] = None,
|
35
36
|
after: Optional[str] = None,
|
36
37
|
limit: Optional[int] = 10,
|
@@ -40,6 +41,7 @@ class EndpointsMixin:
|
|
40
41
|
webhook_url: Optional[str] = None,
|
41
42
|
) -> PreparedRequest:
|
42
43
|
params = {
|
44
|
+
"processor_id": processor_id,
|
43
45
|
"before": before,
|
44
46
|
"after": after,
|
45
47
|
"limit": limit,
|
@@ -120,11 +122,12 @@ class Endpoints(SyncAPIResource, EndpointsMixin):
|
|
120
122
|
need_validation=need_validation,
|
121
123
|
)
|
122
124
|
response = self._client._prepared_request(request)
|
123
|
-
print(f"Endpoint Created. Url: https://www.retab.
|
125
|
+
print(f"Endpoint Created. Url: https://www.retab.com/dashboard/processors/automations/{response['id']}")
|
124
126
|
return Endpoint.model_validate(response)
|
125
127
|
|
126
128
|
def list(
|
127
129
|
self,
|
130
|
+
processor_id: str,
|
128
131
|
before: Optional[str] = None,
|
129
132
|
after: Optional[str] = None,
|
130
133
|
limit: Optional[int] = 10,
|
@@ -145,7 +148,7 @@ class Endpoints(SyncAPIResource, EndpointsMixin):
|
|
145
148
|
Returns:
|
146
149
|
ListEndpoints: Paginated list of endpoint configurations with metadata
|
147
150
|
"""
|
148
|
-
request = self.prepare_list(before, after, limit, order, name, webhook_url)
|
151
|
+
request = self.prepare_list(processor_id, before, after, limit, order, name, webhook_url)
|
149
152
|
response = self._client._prepared_request(request)
|
150
153
|
return ListEndpoints.model_validate(response)
|
151
154
|
|
@@ -229,12 +232,13 @@ class AsyncEndpoints(AsyncAPIResource, EndpointsMixin):
|
|
229
232
|
need_validation=need_validation,
|
230
233
|
)
|
231
234
|
response = await self._client._prepared_request(request)
|
232
|
-
print(f"Endpoint Created. Url: https://www.retab.
|
235
|
+
print(f"Endpoint Created. Url: https://www.retab.com/dashboard/processors/automations/{response['id']}")
|
233
236
|
|
234
237
|
return Endpoint.model_validate(response)
|
235
238
|
|
236
239
|
async def list(
|
237
240
|
self,
|
241
|
+
processor_id: str,
|
238
242
|
before: Optional[str] = None,
|
239
243
|
after: Optional[str] = None,
|
240
244
|
limit: Optional[int] = 10,
|
@@ -242,7 +246,7 @@ class AsyncEndpoints(AsyncAPIResource, EndpointsMixin):
|
|
242
246
|
name: Optional[str] = None,
|
243
247
|
webhook_url: Optional[str] = None,
|
244
248
|
) -> ListEndpoints:
|
245
|
-
request = self.prepare_list(before, after, limit, order, name, webhook_url)
|
249
|
+
request = self.prepare_list(processor_id, before, after, limit, order, name, webhook_url)
|
246
250
|
response = await self._client._prepared_request(request)
|
247
251
|
return ListEndpoints.model_validate(response)
|
248
252
|
|
@@ -128,7 +128,7 @@ class Links(SyncAPIResource, LinksMixin):
|
|
128
128
|
)
|
129
129
|
response = self._client._prepared_request(request)
|
130
130
|
|
131
|
-
print(f"Link Created. Url: https://www.retab.
|
131
|
+
print(f"Link Created. Url: https://www.retab.com/dashboard/processors/automations/{response['id']}")
|
132
132
|
return Link.model_validate(response)
|
133
133
|
|
134
134
|
def list(
|
@@ -248,7 +248,7 @@ class AsyncLinks(AsyncAPIResource, LinksMixin):
|
|
248
248
|
password=password,
|
249
249
|
)
|
250
250
|
response = await self._client._prepared_request(request)
|
251
|
-
print(f"Link Created. Url: https://www.retab.
|
251
|
+
print(f"Link Created. Url: https://www.retab.com/dashboard/processors/automations/{response['id']}")
|
252
252
|
return Link.model_validate(response)
|
253
253
|
|
254
254
|
async def list(
|
@@ -147,7 +147,7 @@ class Logs(SyncAPIResource, LogsMixin):
|
|
147
147
|
request = self.prepare_rerun(processor_id, log_id)
|
148
148
|
response = self._client._prepared_request(request)
|
149
149
|
|
150
|
-
print(f"Webhook call run successfully. Log available at https://www.retab.
|
150
|
+
print(f"Webhook call run successfully. Log available at https://www.retab.com/dashboard/processors/{processor_id}/logs/{log_id}")
|
151
151
|
|
152
152
|
return ExternalRequestLog.model_validate(response)
|
153
153
|
|
@@ -217,6 +217,6 @@ class AsyncLogs(AsyncAPIResource, LogsMixin):
|
|
217
217
|
request = self.prepare_rerun(processor_id, log_id)
|
218
218
|
response = await self._client._prepared_request(request)
|
219
219
|
|
220
|
-
print(f"Webhook call run successfully. Log available at https://www.retab.
|
220
|
+
print(f"Webhook call run successfully. Log available at https://www.retab.com/dashboard/processors/{processor_id}/logs/{log_id}")
|
221
221
|
|
222
222
|
return ExternalRequestLog.model_validate(response)
|