retab 0.0.42__tar.gz → 0.0.44__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {retab-0.0.42 → retab-0.0.44}/PKG-INFO +4 -6
- {retab-0.0.42 → retab-0.0.44}/README.md +3 -5
- {retab-0.0.42 → retab-0.0.44}/retab/__init__.py +2 -1
- {retab-0.0.42 → retab-0.0.44}/retab/client.py +26 -51
- retab-0.0.44/retab/generate_types.py +180 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/consensus/client.py +1 -1
- {retab-0.0.42 → retab-0.0.44}/retab/resources/consensus/responses.py +1 -1
- retab-0.0.44/retab/resources/deployments/__init__.py +3 -0
- retab-0.0.44/retab/resources/deployments/client.py +148 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/documents/client.py +94 -68
- {retab-0.0.42 → retab-0.0.44}/retab/resources/documents/extractions.py +55 -46
- {retab-0.0.42 → retab-0.0.44}/retab/resources/jsonlUtils.py +3 -4
- {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/endpoints.py +49 -39
- {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/links.py +52 -43
- {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/mailboxes.py +74 -59
- {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/outlook.py +104 -82
- {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/client.py +35 -30
- retab-0.0.44/retab/resources/projects/__init__.py +3 -0
- {retab-0.0.42/retab/resources/evaluations → retab-0.0.44/retab/resources/projects}/client.py +62 -78
- {retab-0.0.42/retab/resources/evaluations → retab-0.0.44/retab/resources/projects}/documents.py +48 -37
- {retab-0.0.42/retab/resources/evaluations → retab-0.0.44/retab/resources/projects}/iterations.py +58 -40
- {retab-0.0.42 → retab-0.0.44}/retab/resources/usage.py +2 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/ai_models.py +2 -1
- retab-0.0.42/retab/types/evals.py → retab-0.0.44/retab/types/deprecated_evals.py +14 -26
- {retab-0.0.42 → retab-0.0.44}/retab/types/extractions.py +1 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/base.py +1 -1
- {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/evaluation.py +1 -1
- {retab-0.0.42 → retab-0.0.44}/retab/types/logs.py +5 -6
- {retab-0.0.42 → retab-0.0.44}/retab/types/mime.py +1 -10
- {retab-0.0.42/retab/types/evaluations → retab-0.0.44/retab/types/projects}/__init__.py +12 -9
- {retab-0.0.42/retab/types/evaluations → retab-0.0.44/retab/types/projects}/documents.py +3 -3
- {retab-0.0.42/retab/types/evaluations → retab-0.0.44/retab/types/projects}/iterations.py +9 -43
- {retab-0.0.42/retab/types/evaluations → retab-0.0.44/retab/types/projects}/model.py +25 -30
- {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/enhance.py +22 -5
- {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/evaluate.py +2 -2
- {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/object.py +27 -25
- {retab-0.0.42 → retab-0.0.44}/retab/types/standards.py +2 -2
- retab-0.0.44/retab/utils/__init__.py +3 -0
- retab-0.0.44/retab/utils/ai_models.py +253 -0
- retab-0.0.44/retab/utils/hashing.py +24 -0
- {retab-0.0.42 → retab-0.0.44}/retab/utils/json_schema.py +1 -26
- {retab-0.0.42 → retab-0.0.44}/retab/utils/mime.py +0 -17
- {retab-0.0.42 → retab-0.0.44}/retab/utils/usage/usage.py +0 -1
- {retab-0.0.42 → retab-0.0.44}/retab.egg-info/PKG-INFO +4 -6
- {retab-0.0.42 → retab-0.0.44}/retab.egg-info/SOURCES.txt +13 -10
- {retab-0.0.42 → retab-0.0.44}/setup.py +1 -1
- {retab-0.0.42 → retab-0.0.44}/tests/test_evaluations.py +2 -16
- retab-0.0.42/retab/resources/evals.py +0 -825
- retab-0.0.42/retab/resources/evaluations/__init__.py +0 -3
- retab-0.0.42/retab/utils/ai_models.py +0 -138
- retab-0.0.42/retab/utils/usage/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/pyproject.toml +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/_resource.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/py.typed +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/consensus/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/consensus/completions.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/consensus/completions_stream.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/consensus/responses_stream.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/documents/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/files.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/finetuning.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/models.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/openai_example.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/client.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/logs.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/tests.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/prompt_optimization.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/schemas.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/secrets/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/secrets/client.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/secrets/external_api_keys.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/resources/secrets/webhook.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/automations/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/automations/cron.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/automations/endpoints.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/automations/links.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/automations/mailboxes.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/automations/outlook.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/automations/webhooks.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/browser_canvas.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/chat.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/completions.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/consensus.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/db/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/db/annotations.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/db/files.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/documents/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/documents/correct_orientation.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/documents/create_messages.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/documents/extractions.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/documents/parse.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/events.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/inference_settings.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/batch_annotation.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/finetune.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/prompt_optimization.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/webcrawl.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/metrics.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/modalities.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/pagination.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/predictions.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/generate.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/layout.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/templates.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/secrets/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/types/secrets/external_api_keys.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/utils/_model_cards/anthropic.yaml +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/utils/_model_cards/auto.yaml +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/utils/_model_cards/gemini.yaml +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/utils/_model_cards/openai.yaml +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/utils/_model_cards/xai.yaml +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/utils/benchmarking.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/utils/chat.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/utils/display.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/utils/responses.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab/utils/stream_context_managers.py +0 -0
- {retab-0.0.42/retab/utils → retab-0.0.44/retab/utils/usage}/__init__.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab.egg-info/dependency_links.txt +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab.egg-info/requires.txt +0 -0
- {retab-0.0.42 → retab-0.0.44}/retab.egg-info/top_level.txt +0 -0
- {retab-0.0.42 → retab-0.0.44}/setup.cfg +0 -0
- {retab-0.0.42 → retab-0.0.44}/tests/test_automations_links.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/tests/test_automations_mailboxes.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/tests/test_documents_api.py +0 -0
- {retab-0.0.42 → retab-0.0.44}/tests/test_preprocessor.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: retab
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.44
|
4
4
|
Summary: Retab official python library
|
5
5
|
Home-page: https://github.com/Retab-dev/retab
|
6
6
|
Author: Retab
|
@@ -61,13 +61,13 @@ Made with love by the team at [Retab](https://retab.com) 🤍.
|
|
61
61
|
|
62
62
|
### What is Retab?
|
63
63
|
|
64
|
-
Retab solves all the major challenges in document processing with
|
64
|
+
Retab solves all the major challenges in document processing with Large Language Models:
|
65
65
|
|
66
66
|
1. **Universal Document Preprocessing**: Convert any file type (PDFs, Excel, emails, etc.) into LLM-ready format without writing custom parsers
|
67
67
|
2. **Structured, Schema-driven Extraction**: Get consistent, reliable outputs using schema-based prompt engineering
|
68
68
|
3. **Processors**: Publish a live, stable, shareable document processor.
|
69
69
|
4. **Automations**: Create document processing workflows that can be triggered by events (mailbox, upload link, endpoint, outlook plugin).
|
70
|
-
5. **
|
70
|
+
5. **Projects**: Evaluate the performance of models against annotated datasets
|
71
71
|
6. **Optimizations**: Identify the most used processors and help you finetune models to reduce costs and improve performance
|
72
72
|
|
73
73
|
We are offering you all the software-defined primitives to build your own document processing solutions. We see it as **Stripe** for document processing.
|
@@ -90,7 +90,7 @@ Many people haven't yet realized how powerful LLMs have become at document proce
|
|
90
90
|
|
91
91
|
## Code examples
|
92
92
|
|
93
|
-
|
93
|
+
You can check our Github repository to see code examples: [python examples](https://github.com/Retab-dev/retab/tree/main/examples) and [jupyter notebooks](https://github.com/Retab-dev/retab-nodejs/tree/main/notebooks).
|
94
94
|
|
95
95
|
## Community
|
96
96
|
|
@@ -112,8 +112,6 @@ We share our roadmap publicly on [Github](https://github.com/Retab-dev/retab)
|
|
112
112
|
Among the features we're working on:
|
113
113
|
|
114
114
|
* [ ] Node.js SDK
|
115
|
-
* [ ] Low-level speed optimizations for Evals Frontend
|
116
115
|
* [ ] Schema optimization autopilot
|
117
116
|
* [ ] Sources API
|
118
|
-
* [ ] Parse API for RAG
|
119
117
|
|
@@ -18,13 +18,13 @@ Made with love by the team at [Retab](https://retab.com) 🤍.
|
|
18
18
|
|
19
19
|
### What is Retab?
|
20
20
|
|
21
|
-
Retab solves all the major challenges in document processing with
|
21
|
+
Retab solves all the major challenges in document processing with Large Language Models:
|
22
22
|
|
23
23
|
1. **Universal Document Preprocessing**: Convert any file type (PDFs, Excel, emails, etc.) into LLM-ready format without writing custom parsers
|
24
24
|
2. **Structured, Schema-driven Extraction**: Get consistent, reliable outputs using schema-based prompt engineering
|
25
25
|
3. **Processors**: Publish a live, stable, shareable document processor.
|
26
26
|
4. **Automations**: Create document processing workflows that can be triggered by events (mailbox, upload link, endpoint, outlook plugin).
|
27
|
-
5. **
|
27
|
+
5. **Projects**: Evaluate the performance of models against annotated datasets
|
28
28
|
6. **Optimizations**: Identify the most used processors and help you finetune models to reduce costs and improve performance
|
29
29
|
|
30
30
|
We are offering you all the software-defined primitives to build your own document processing solutions. We see it as **Stripe** for document processing.
|
@@ -47,7 +47,7 @@ Many people haven't yet realized how powerful LLMs have become at document proce
|
|
47
47
|
|
48
48
|
## Code examples
|
49
49
|
|
50
|
-
|
50
|
+
You can check our Github repository to see code examples: [python examples](https://github.com/Retab-dev/retab/tree/main/examples) and [jupyter notebooks](https://github.com/Retab-dev/retab-nodejs/tree/main/notebooks).
|
51
51
|
|
52
52
|
## Community
|
53
53
|
|
@@ -69,8 +69,6 @@ We share our roadmap publicly on [Github](https://github.com/Retab-dev/retab)
|
|
69
69
|
Among the features we're working on:
|
70
70
|
|
71
71
|
* [ ] Node.js SDK
|
72
|
-
* [ ] Low-level speed optimizations for Evals Frontend
|
73
72
|
* [ ] Schema optimization autopilot
|
74
73
|
* [ ] Sources API
|
75
|
-
* [ ] Parse API for RAG
|
76
74
|
|
@@ -7,10 +7,9 @@ import backoff
|
|
7
7
|
import backoff.types
|
8
8
|
import httpx
|
9
9
|
import truststore
|
10
|
-
from pydantic_core import PydanticUndefined
|
11
10
|
|
12
|
-
from .resources import consensus,
|
13
|
-
from .types.standards import PreparedRequest
|
11
|
+
from .resources import consensus, deployments, documents, files, finetuning, models, processors, schemas, secrets, usage, projects
|
12
|
+
from .types.standards import PreparedRequest, FieldUnset
|
14
13
|
|
15
14
|
|
16
15
|
class MaxRetriesExceeded(Exception):
|
@@ -43,20 +42,15 @@ class BaseRetab:
|
|
43
42
|
ValueError: If no API key is provided through arguments or environment variables
|
44
43
|
"""
|
45
44
|
|
46
|
-
# claude_api_key (str, optional): Claude API key. Will look for CLAUDE_API_KEY env variable if not provided
|
47
|
-
# xai_api_key (str, optional): XAI API key. Will look for XAI_API_KEY env variable if not provided
|
48
|
-
# gemini_api_key (str, optional): Gemini API key. Will look for GEMINI_API_KEY env variable if not provided
|
49
|
-
|
50
45
|
def __init__(
|
51
46
|
self,
|
52
47
|
api_key: Optional[str] = None,
|
53
48
|
base_url: Optional[str] = None,
|
54
49
|
timeout: float = 240.0,
|
55
50
|
max_retries: int = 3,
|
56
|
-
openai_api_key: Optional[str] =
|
57
|
-
gemini_api_key: Optional[str] =
|
58
|
-
|
59
|
-
xai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
|
51
|
+
openai_api_key: Optional[str] = FieldUnset,
|
52
|
+
gemini_api_key: Optional[str] = FieldUnset,
|
53
|
+
xai_api_key: Optional[str] = FieldUnset,
|
60
54
|
) -> None:
|
61
55
|
if api_key is None:
|
62
56
|
api_key = os.environ.get("RETAB_API_KEY")
|
@@ -80,30 +74,21 @@ class BaseRetab:
|
|
80
74
|
"Content-Type": "application/json",
|
81
75
|
}
|
82
76
|
|
83
|
-
# Only check environment variables if the value is
|
84
|
-
if openai_api_key is
|
77
|
+
# Only check environment variables if the value is FieldUnset
|
78
|
+
if openai_api_key is FieldUnset:
|
85
79
|
openai_api_key = os.environ.get("OPENAI_API_KEY")
|
86
80
|
|
87
|
-
|
88
|
-
# claude_api_key = os.environ.get("CLAUDE_API_KEY")
|
89
|
-
|
90
|
-
# if xai_api_key is PydanticUndefined:
|
91
|
-
# xai_api_key = os.environ.get("XAI_API_KEY")
|
92
|
-
|
93
|
-
if gemini_api_key is PydanticUndefined:
|
81
|
+
if gemini_api_key is FieldUnset:
|
94
82
|
gemini_api_key = os.environ.get("GEMINI_API_KEY")
|
95
83
|
|
96
|
-
# Only add headers if the values are actual strings (not None or
|
97
|
-
if openai_api_key and openai_api_key is not
|
84
|
+
# Only add headers if the values are actual strings (not None or FieldUnset)
|
85
|
+
if openai_api_key and openai_api_key is not FieldUnset:
|
98
86
|
self.headers["OpenAI-Api-Key"] = openai_api_key
|
99
87
|
|
100
|
-
|
101
|
-
# self.headers["Anthropic-Api-Key"] = claude_api_key
|
102
|
-
|
103
|
-
if xai_api_key and xai_api_key is not PydanticUndefined:
|
88
|
+
if xai_api_key and xai_api_key is not FieldUnset:
|
104
89
|
self.headers["XAI-Api-Key"] = xai_api_key
|
105
90
|
|
106
|
-
if gemini_api_key and gemini_api_key is not
|
91
|
+
if gemini_api_key and gemini_api_key is not FieldUnset:
|
107
92
|
self.headers["Gemini-Api-Key"] = gemini_api_key
|
108
93
|
|
109
94
|
def _prepare_url(self, endpoint: str) -> str:
|
@@ -150,7 +135,7 @@ class Retab(BaseRetab):
|
|
150
135
|
"""Synchronous client for interacting with the Retab API.
|
151
136
|
|
152
137
|
This client provides synchronous access to all Retab API resources including files, fine-tuning,
|
153
|
-
prompt optimization, documents, models,
|
138
|
+
prompt optimization, documents, models, processors, deployments, and schemas.
|
154
139
|
|
155
140
|
Args:
|
156
141
|
api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
|
@@ -158,8 +143,6 @@ class Retab(BaseRetab):
|
|
158
143
|
timeout (float): Request timeout in seconds. Defaults to 240.0
|
159
144
|
max_retries (int): Maximum number of retries for failed requests. Defaults to 3
|
160
145
|
openai_api_key (str, optional): OpenAI API key. Will look for OPENAI_API_KEY env variable if not provided
|
161
|
-
claude_api_key (str, optional): Claude API key. Will look for CLAUDE_API_KEY env variable if not provided
|
162
|
-
xai_api_key (str, optional): XAI API key. Will look for XAI_API_KEY env variable if not provided
|
163
146
|
gemini_api_key (str, optional): Gemini API key. Will look for GEMINI_API_KEY env variable if not provided
|
164
147
|
|
165
148
|
Attributes:
|
@@ -168,7 +151,8 @@ class Retab(BaseRetab):
|
|
168
151
|
prompt_optimization: Access to prompt optimization operations
|
169
152
|
documents: Access to document operations
|
170
153
|
models: Access to model operations
|
171
|
-
|
154
|
+
processors: Access to processor operations
|
155
|
+
deployments: Access to deployment operations
|
172
156
|
schemas: Access to schema operations
|
173
157
|
responses: Access to responses API (OpenAI Responses API compatible interface)
|
174
158
|
"""
|
@@ -179,10 +163,8 @@ class Retab(BaseRetab):
|
|
179
163
|
base_url: Optional[str] = None,
|
180
164
|
timeout: float = 240.0,
|
181
165
|
max_retries: int = 3,
|
182
|
-
openai_api_key: Optional[str] =
|
183
|
-
gemini_api_key: Optional[str] =
|
184
|
-
# claude_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
|
185
|
-
# xai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
|
166
|
+
openai_api_key: Optional[str] = FieldUnset,
|
167
|
+
gemini_api_key: Optional[str] = FieldUnset,
|
186
168
|
) -> None:
|
187
169
|
super().__init__(
|
188
170
|
api_key=api_key,
|
@@ -191,20 +173,17 @@ class Retab(BaseRetab):
|
|
191
173
|
max_retries=max_retries,
|
192
174
|
openai_api_key=openai_api_key,
|
193
175
|
gemini_api_key=gemini_api_key,
|
194
|
-
# claude_api_key=claude_api_key,
|
195
|
-
# xai_api_key=xai_api_key,
|
196
176
|
)
|
197
177
|
|
198
178
|
self.client = httpx.Client(timeout=self.timeout)
|
199
|
-
self.
|
200
|
-
self.evaluations = evaluations.Evaluations(client=self)
|
179
|
+
self.projects = projects.Projects(client=self)
|
201
180
|
self.files = files.Files(client=self)
|
202
181
|
self.fine_tuning = finetuning.FineTuning(client=self)
|
203
|
-
# self.prompt_optimization = prompt_optimization.PromptOptimization(client=self)
|
204
182
|
self.documents = documents.Documents(client=self)
|
205
183
|
self.models = models.Models(client=self)
|
206
184
|
self.schemas = schemas.Schemas(client=self)
|
207
185
|
self.processors = processors.Processors(client=self)
|
186
|
+
self.deployments = deployments.Deployments(client=self)
|
208
187
|
self.secrets = secrets.Secrets(client=self)
|
209
188
|
self.usage = usage.Usage(client=self)
|
210
189
|
self.consensus = consensus.Consensus(client=self)
|
@@ -418,7 +397,7 @@ class AsyncRetab(BaseRetab):
|
|
418
397
|
"""Asynchronous client for interacting with the Retab API.
|
419
398
|
|
420
399
|
This client provides asynchronous access to all Retab API resources including files, fine-tuning,
|
421
|
-
prompt optimization, documents, models,
|
400
|
+
prompt optimization, documents, models, processors, deployments, and schemas.
|
422
401
|
|
423
402
|
Args:
|
424
403
|
api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
|
@@ -436,7 +415,8 @@ class AsyncRetab(BaseRetab):
|
|
436
415
|
prompt_optimization: Access to asynchronous prompt optimization operations
|
437
416
|
documents: Access to asynchronous document operations
|
438
417
|
models: Access to asynchronous model operations
|
439
|
-
|
418
|
+
processors: Access to asynchronous processor operations
|
419
|
+
deployments: Access to asynchronous deployment operations
|
440
420
|
schemas: Access to asynchronous schema operations
|
441
421
|
responses: Access to responses API (OpenAI Responses API compatible interface)
|
442
422
|
"""
|
@@ -447,10 +427,8 @@ class AsyncRetab(BaseRetab):
|
|
447
427
|
base_url: Optional[str] = None,
|
448
428
|
timeout: float = 240.0,
|
449
429
|
max_retries: int = 3,
|
450
|
-
openai_api_key: Optional[str] =
|
451
|
-
gemini_api_key: Optional[str] =
|
452
|
-
# claude_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
|
453
|
-
# xai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
|
430
|
+
openai_api_key: Optional[str] = FieldUnset,
|
431
|
+
gemini_api_key: Optional[str] = FieldUnset,
|
454
432
|
) -> None:
|
455
433
|
super().__init__(
|
456
434
|
api_key=api_key,
|
@@ -459,21 +437,18 @@ class AsyncRetab(BaseRetab):
|
|
459
437
|
max_retries=max_retries,
|
460
438
|
openai_api_key=openai_api_key,
|
461
439
|
gemini_api_key=gemini_api_key,
|
462
|
-
# claude_api_key=claude_api_key,
|
463
|
-
# xai_api_key=xai_api_key,
|
464
440
|
)
|
465
441
|
|
466
442
|
self.client = httpx.AsyncClient(timeout=self.timeout)
|
467
443
|
|
468
|
-
self.
|
469
|
-
self.evaluations = evaluations.AsyncEvaluations(client=self)
|
444
|
+
self.projects = projects.AsyncProjects(client=self)
|
470
445
|
self.files = files.AsyncFiles(client=self)
|
471
446
|
self.fine_tuning = finetuning.AsyncFineTuning(client=self)
|
472
|
-
# self.prompt_optimization = prompt_optimization.AsyncPromptOptimization(client=self)
|
473
447
|
self.documents = documents.AsyncDocuments(client=self)
|
474
448
|
self.models = models.AsyncModels(client=self)
|
475
449
|
self.schemas = schemas.AsyncSchemas(client=self)
|
476
450
|
self.processors = processors.AsyncProcessors(client=self)
|
451
|
+
self.deployments = deployments.AsyncDeployments(client=self)
|
477
452
|
self.secrets = secrets.AsyncSecrets(client=self)
|
478
453
|
self.usage = usage.AsyncUsage(client=self)
|
479
454
|
self.consensus = consensus.AsyncConsensus(client=self)
|
@@ -0,0 +1,180 @@
|
|
1
|
+
import collections.abc
|
2
|
+
import json
|
3
|
+
import os
|
4
|
+
import types
|
5
|
+
import typing
|
6
|
+
import enum
|
7
|
+
import sys
|
8
|
+
import inspect
|
9
|
+
from datetime import datetime, date
|
10
|
+
from typing import Any, Type, get_args, get_origin, Union, Literal, is_typeddict
|
11
|
+
from typing_extensions import is_typeddict as is_typeddict_ext
|
12
|
+
import typing_extensions
|
13
|
+
from pydantic_core import PydanticUndefined
|
14
|
+
from pydantic import BaseModel, EmailStr
|
15
|
+
import PIL.Image
|
16
|
+
|
17
|
+
to_compile: list[tuple[str, Type, bool]] = []
|
18
|
+
|
19
|
+
def is_base_model(field_type: Type) -> bool:
|
20
|
+
return getattr(field_type, "__name__", None) in ["BaseModel", "GenericModel", "ConfigDict", "Generic"]
|
21
|
+
|
22
|
+
def type_to_zod(field_type: Any, put_names: bool = True, ts: bool = False) -> str:
|
23
|
+
origin = get_origin(field_type) or field_type
|
24
|
+
optional = False
|
25
|
+
|
26
|
+
def make_union(args):
|
27
|
+
return args[0] if len(args) <= 1 else "z.union([" + ", ".join(args) + "])"
|
28
|
+
|
29
|
+
def make_ts_union(args):
|
30
|
+
return args[0] if len(args) <= 1 else " | ".join(args)
|
31
|
+
|
32
|
+
if isinstance(field_type, typing.ForwardRef):
|
33
|
+
return type_to_zod(typing._eval_type(field_type, globals(), locals(), []), ts=ts)
|
34
|
+
elif origin is typing.Annotated or origin is typing.Required or origin is typing_extensions.Required:
|
35
|
+
return type_to_zod(get_args(field_type)[0], put_names, ts=ts)
|
36
|
+
if origin is Union or origin is types.UnionType:
|
37
|
+
args = [x for x in get_args(field_type)]
|
38
|
+
if types.NoneType in args:
|
39
|
+
args.remove(types.NoneType)
|
40
|
+
optional = True
|
41
|
+
typename = make_union([type_to_zod(x) for x in args])
|
42
|
+
ts_typename = make_ts_union([type_to_zod(x, ts=True) for x in args])
|
43
|
+
elif issubclass(origin, BaseModel) or is_typeddict(origin) or is_typeddict_ext(origin):
|
44
|
+
if put_names:
|
45
|
+
typename = "Z" + origin.__name__
|
46
|
+
ts_typename = origin.__name__
|
47
|
+
to_compile.append((origin.__name__, field_type, True))
|
48
|
+
else:
|
49
|
+
excluded_fields = set()
|
50
|
+
typename = "z.object({\n"
|
51
|
+
ts_typename = "{\n"
|
52
|
+
props = [(n, f.annotation, f.default) for n, f in origin.model_fields.items()] if issubclass(origin, BaseModel) else \
|
53
|
+
[(n, f, PydanticUndefined) for n, f in origin.__annotations__.items()]
|
54
|
+
|
55
|
+
for field_name, field, default in props:
|
56
|
+
if field_name in excluded_fields:
|
57
|
+
continue
|
58
|
+
ts_compiled = type_to_zod(field, ts=True)
|
59
|
+
default_str = ""
|
60
|
+
if default is not PydanticUndefined and default is not None:
|
61
|
+
if isinstance(default, BaseModel):
|
62
|
+
default_str = f".default({json.dumps(default.model_dump(mode="json", exclude_unset=True))})"
|
63
|
+
else:
|
64
|
+
default_str = f".default({json.dumps(default)})"
|
65
|
+
typename += f" {field_name}: {type_to_zod(field)}{default_str},\n"
|
66
|
+
ts_typename += f" {field_name}{"?" if ts_compiled.endswith(" | undefined") or default is not PydanticUndefined else ""}: {ts_compiled},\n"
|
67
|
+
typename += "})"
|
68
|
+
ts_typename += "}"
|
69
|
+
|
70
|
+
based = origin.__bases__
|
71
|
+
for i in range(0, len(based)):
|
72
|
+
if is_base_model(based[i]) or based[i] is dict:
|
73
|
+
break
|
74
|
+
if issubclass(based[i], BaseModel):
|
75
|
+
excluded_fields.update(based[i].model_fields.keys())
|
76
|
+
typename += ".merge(Z" + based[i].__name__ + ".schema)"
|
77
|
+
ts_typename += " & " + based[i].__name__
|
78
|
+
elif origin is list or origin is typing.List or origin is collections.abc.Sequence or origin is collections.abc.Iterable:
|
79
|
+
typename = "z.array(" + type_to_zod(get_args(field_type)[0]) + ")"
|
80
|
+
ts_typename = "Array<" + type_to_zod(get_args(field_type)[0], ts=True) + ">"
|
81
|
+
elif origin is tuple:
|
82
|
+
args = get_args(field_type)
|
83
|
+
typename = "z.tuple([" + ", ".join([type_to_zod(x) for x in args]) + "])"
|
84
|
+
ts_typename = "[" + ", ".join([type_to_zod(x, ts=True) for x in args]) + "]"
|
85
|
+
elif origin is dict:
|
86
|
+
if len(get_args(field_type)) == 2:
|
87
|
+
typename = "z.record(" + type_to_zod(get_args(field_type)[0]) + ", " + type_to_zod(get_args(field_type)[1]) + ")"
|
88
|
+
ts_typename = "{[key: " + type_to_zod(get_args(field_type)[0], ts=True) + "]: " + type_to_zod(get_args(field_type)[1], ts=True) + "}"
|
89
|
+
else:
|
90
|
+
typename = "z.record(z.any())"
|
91
|
+
ts_typename = "{[key: string]: any}"
|
92
|
+
elif origin is Literal:
|
93
|
+
typename = make_union(["z.literal(" + json.dumps(x) + ")" for x in get_args(field_type)])
|
94
|
+
ts_typename = make_ts_union([json.dumps(x) for x in get_args(field_type)])
|
95
|
+
elif isinstance(field_type, typing.TypeVar):
|
96
|
+
typename = "z.any()"
|
97
|
+
ts_typename = "any"
|
98
|
+
elif isinstance(field_type, type) and issubclass(field_type, enum.Enum):
|
99
|
+
typename = "z.any()"
|
100
|
+
ts_typename = "any"
|
101
|
+
elif field_type is str or field_type is date or field_type is datetime:
|
102
|
+
typename = "z.string()"
|
103
|
+
ts_typename = "string"
|
104
|
+
elif field_type is int or field_type is float:
|
105
|
+
typename = "z.number()"
|
106
|
+
ts_typename = "number"
|
107
|
+
elif field_type is bool:
|
108
|
+
typename = "z.boolean()"
|
109
|
+
ts_typename = "boolean"
|
110
|
+
elif field_type is typing.Any:
|
111
|
+
typename = "z.any()"
|
112
|
+
ts_typename = "any"
|
113
|
+
elif field_type is bytes or field_type is PIL.Image.Image or field_type is typing.BinaryIO or origin is typing.IO or origin is typing_extensions.IO:
|
114
|
+
typename = "z.instanceof(Uint8Array)"
|
115
|
+
ts_typename = "Uint8Array"
|
116
|
+
elif field_type is EmailStr:
|
117
|
+
typename = "z.string().email()"
|
118
|
+
ts_typename = "string"
|
119
|
+
elif field_type is os.PathLike:
|
120
|
+
typename = "z.string()"
|
121
|
+
ts_typename = "string"
|
122
|
+
elif field_type is object:
|
123
|
+
typename = "z.object({}).passthrough()"
|
124
|
+
ts_typename = "object"
|
125
|
+
else:
|
126
|
+
raise ValueError(f"Unsupported type: {field_type} ({origin})")
|
127
|
+
if ts:
|
128
|
+
return ts_typename if not optional else ts_typename + " | null | undefined"
|
129
|
+
else:
|
130
|
+
return typename if not optional else typename + ".nullable().optional()"
|
131
|
+
|
132
|
+
|
133
|
+
# SET of names of python builtin types starting with a capital
|
134
|
+
builtin_types = {
|
135
|
+
"Any",
|
136
|
+
"BaseModel",
|
137
|
+
"NoneType",
|
138
|
+
"Literal",
|
139
|
+
"Union",
|
140
|
+
"List",
|
141
|
+
"Sequence",
|
142
|
+
"ConfigDict",
|
143
|
+
"Optional",
|
144
|
+
}
|
145
|
+
|
146
|
+
if __name__ == "__main__":
|
147
|
+
modules = []
|
148
|
+
for root, dirs, files in os.walk("retab/types"):
|
149
|
+
for module in files:
|
150
|
+
if module[-3:] != '.py':
|
151
|
+
continue
|
152
|
+
full_name = os.path.join(root, module[:-3]).replace(os.path.sep, '.')
|
153
|
+
__import__(full_name, locals(), globals())
|
154
|
+
modules.append(full_name)
|
155
|
+
|
156
|
+
|
157
|
+
for module_name in modules:
|
158
|
+
for name, obj in inspect.getmembers(sys.modules[module_name]):
|
159
|
+
if name[0] != name[0].lower() and name not in builtin_types:
|
160
|
+
to_compile.append((name, obj, False))
|
161
|
+
|
162
|
+
print("import * as z from 'zod';\n")
|
163
|
+
|
164
|
+
defined = {}
|
165
|
+
while len(to_compile) > 0:
|
166
|
+
name, model, necessary = to_compile.pop(0)
|
167
|
+
if name in defined: continue
|
168
|
+
defined[name] = True
|
169
|
+
try:
|
170
|
+
compiled = type_to_zod(model, False)
|
171
|
+
compiled_ts = type_to_zod(model, False, ts=True)
|
172
|
+
except Exception as e:
|
173
|
+
if not necessary:
|
174
|
+
print(f"Skipping {name} {model} due to error: {e}", file=sys.stderr)
|
175
|
+
continue
|
176
|
+
print(f"Error compiling {name} {model}", file=sys.stderr)
|
177
|
+
raise e
|
178
|
+
print("export const Z" + name + " = z.lazy(() => " + compiled + ");")
|
179
|
+
print("export type " + name + " = z.infer<typeof Z" + name + ">;\n")
|
180
|
+
|
@@ -21,7 +21,7 @@ class BaseConsensusMixin:
|
|
21
21
|
mode=mode,
|
22
22
|
)
|
23
23
|
|
24
|
-
return PreparedRequest(method="POST", url="/v1/consensus/reconcile", data=request.model_dump(), idempotency_key=idempotency_key)
|
24
|
+
return PreparedRequest(method="POST", url="/v1/consensus/reconcile", data=request.model_dump(mode="json", exclude_unset=True), idempotency_key=idempotency_key)
|
25
25
|
|
26
26
|
|
27
27
|
class Consensus(SyncAPIResource, BaseConsensusMixin):
|
@@ -55,7 +55,7 @@ class BaseResponsesMixin:
|
|
55
55
|
instructions=instructions,
|
56
56
|
)
|
57
57
|
|
58
|
-
return PreparedRequest(method="POST", url="/v1/responses", data=request.model_dump(), idempotency_key=idempotency_key)
|
58
|
+
return PreparedRequest(method="POST", url="/v1/responses", data=request.model_dump(mode="json", exclude_unset=True), idempotency_key=idempotency_key)
|
59
59
|
|
60
60
|
def prepare_parse(
|
61
61
|
self,
|
@@ -0,0 +1,148 @@
|
|
1
|
+
import base64
|
2
|
+
from io import IOBase
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Any, List
|
5
|
+
|
6
|
+
import PIL.Image
|
7
|
+
from pydantic import HttpUrl
|
8
|
+
from ..._resource import AsyncAPIResource, SyncAPIResource
|
9
|
+
from ...utils.mime import MIMEData, prepare_mime_document
|
10
|
+
from ...types.documents.extractions import RetabParsedChatCompletion
|
11
|
+
from ...types.standards import PreparedRequest
|
12
|
+
|
13
|
+
|
14
|
+
class DeploymentsMixin:
|
15
|
+
def prepare_submit(
|
16
|
+
self,
|
17
|
+
project_id: str,
|
18
|
+
iteration_id: str,
|
19
|
+
document: Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
|
20
|
+
documents: list[Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl] | None = None,
|
21
|
+
temperature: float | None = None,
|
22
|
+
seed: int | None = None,
|
23
|
+
store: bool = True,
|
24
|
+
) -> PreparedRequest:
|
25
|
+
"""Prepare a request to submit documents to a processor.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
project_id: ID of the project
|
29
|
+
iteration_id: ID of the iteration
|
30
|
+
document: Single document to process (mutually exclusive with documents)
|
31
|
+
documents: List of documents to process (mutually exclusive with document)
|
32
|
+
temperature: Optional temperature override
|
33
|
+
seed: Optional seed for reproducibility
|
34
|
+
store: Whether to store the results
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
PreparedRequest: The prepared request
|
38
|
+
"""
|
39
|
+
# Validate that either document or documents is provided, but not both
|
40
|
+
if not document and not documents:
|
41
|
+
raise ValueError("Either 'document' or 'documents' must be provided")
|
42
|
+
|
43
|
+
if document and documents:
|
44
|
+
raise ValueError("Provide either 'document' (single) or 'documents' (multiple), not both")
|
45
|
+
|
46
|
+
# Prepare form data parameters
|
47
|
+
form_data = {
|
48
|
+
"temperature": temperature,
|
49
|
+
"seed": seed,
|
50
|
+
"store": store,
|
51
|
+
}
|
52
|
+
# Remove None values
|
53
|
+
form_data = {k: v for k, v in form_data.items() if v is not None}
|
54
|
+
|
55
|
+
# Prepare files for upload
|
56
|
+
files = {}
|
57
|
+
if document:
|
58
|
+
# Convert document to MIMEData if needed
|
59
|
+
mime_document = prepare_mime_document(document)
|
60
|
+
# Single document upload
|
61
|
+
files["document"] = (mime_document.filename, base64.b64decode(mime_document.content), mime_document.mime_type)
|
62
|
+
elif documents:
|
63
|
+
# Multiple documents upload - httpx supports multiple files with same field name using a list
|
64
|
+
files_list = []
|
65
|
+
for doc in documents:
|
66
|
+
# Convert each document to MIMEData if needed
|
67
|
+
mime_doc = prepare_mime_document(doc)
|
68
|
+
files_list.append(
|
69
|
+
(
|
70
|
+
"documents", # field name
|
71
|
+
(mime_doc.filename, base64.b64decode(mime_doc.content), mime_doc.mime_type),
|
72
|
+
)
|
73
|
+
)
|
74
|
+
files = files_list
|
75
|
+
|
76
|
+
url = f"/v1/deployments/{project_id}/{iteration_id}/submit"
|
77
|
+
|
78
|
+
return PreparedRequest(method="POST", url=url, form_data=form_data, files=files)
|
79
|
+
|
80
|
+
|
81
|
+
class Deployments(SyncAPIResource, DeploymentsMixin):
|
82
|
+
"""Deployments API wrapper for managing deployment configurations"""
|
83
|
+
|
84
|
+
def __init__(self, client: Any) -> None:
|
85
|
+
super().__init__(client=client)
|
86
|
+
|
87
|
+
def submit(
|
88
|
+
self,
|
89
|
+
project_id: str,
|
90
|
+
iteration_id: str,
|
91
|
+
document: Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
|
92
|
+
documents: List[Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl] | None = None,
|
93
|
+
temperature: float | None = None,
|
94
|
+
seed: int | None = None,
|
95
|
+
store: bool = True,
|
96
|
+
) -> RetabParsedChatCompletion:
|
97
|
+
"""Submit documents to a deployment for processing.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
project_id: ID of the project
|
101
|
+
iteration_id: ID of the iteration
|
102
|
+
document: Single document to process (mutually exclusive with documents)
|
103
|
+
documents: List of documents to process (mutually exclusive with document)
|
104
|
+
temperature: Optional temperature override
|
105
|
+
seed: Optional seed for reproducibility
|
106
|
+
store: Whether to store the results
|
107
|
+
|
108
|
+
Returns:
|
109
|
+
RetabParsedChatCompletion: The processing result
|
110
|
+
"""
|
111
|
+
request = self.prepare_submit(project_id=project_id, iteration_id=iteration_id, document=document, documents=documents, temperature=temperature, seed=seed, store=store)
|
112
|
+
response = self._client._prepared_request(request)
|
113
|
+
return RetabParsedChatCompletion.model_validate(response)
|
114
|
+
|
115
|
+
|
116
|
+
class AsyncDeployments(AsyncAPIResource, DeploymentsMixin):
|
117
|
+
"""Async Deployments API wrapper for managing deployment configurations"""
|
118
|
+
|
119
|
+
def __init__(self, client: Any) -> None:
|
120
|
+
super().__init__(client=client)
|
121
|
+
|
122
|
+
async def submit(
|
123
|
+
self,
|
124
|
+
project_id: str,
|
125
|
+
iteration_id: str,
|
126
|
+
document: Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
|
127
|
+
documents: List[Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl] | None = None,
|
128
|
+
temperature: float | None = None,
|
129
|
+
seed: int | None = None,
|
130
|
+
store: bool = True,
|
131
|
+
) -> RetabParsedChatCompletion:
|
132
|
+
"""Submit documents to a deployment for processing.
|
133
|
+
|
134
|
+
Args:
|
135
|
+
project_id: ID of the project
|
136
|
+
iteration_id: ID of the iteration
|
137
|
+
document: Single document to process (mutually exclusive with documents)
|
138
|
+
documents: List of documents to process (mutually exclusive with document)
|
139
|
+
temperature: Optional temperature override
|
140
|
+
seed: Optional seed for reproducibility
|
141
|
+
store: Whether to store the results
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
RetabParsedChatCompletion: The processing result
|
145
|
+
"""
|
146
|
+
request = self.prepare_submit(project_id=project_id, iteration_id=iteration_id, document=document, documents=documents, temperature=temperature, seed=seed, store=store)
|
147
|
+
response = await self._client._prepared_request(request)
|
148
|
+
return RetabParsedChatCompletion.model_validate(response)
|