retab 0.0.42__py3-none-any.whl → 0.0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. retab/__init__.py +2 -1
  2. retab/client.py +16 -45
  3. retab/resources/consensus/client.py +1 -1
  4. retab/resources/consensus/responses.py +1 -1
  5. retab/resources/documents/client.py +94 -68
  6. retab/resources/documents/extractions.py +55 -46
  7. retab/resources/evaluations/client.py +32 -19
  8. retab/resources/evaluations/documents.py +12 -11
  9. retab/resources/evaluations/iterations.py +48 -30
  10. retab/resources/jsonlUtils.py +3 -4
  11. retab/resources/processors/automations/endpoints.py +49 -39
  12. retab/resources/processors/automations/links.py +52 -43
  13. retab/resources/processors/automations/mailboxes.py +74 -59
  14. retab/resources/processors/automations/outlook.py +104 -82
  15. retab/resources/processors/client.py +35 -30
  16. retab/resources/usage.py +2 -0
  17. retab/types/ai_models.py +1 -1
  18. retab/types/deprecated_evals.py +195 -0
  19. retab/types/evaluations/__init__.py +5 -2
  20. retab/types/evaluations/iterations.py +9 -43
  21. retab/types/evaluations/model.py +20 -22
  22. retab/types/extractions.py +1 -0
  23. retab/types/logs.py +5 -6
  24. retab/types/mime.py +1 -10
  25. retab/types/schemas/enhance.py +22 -5
  26. retab/types/schemas/evaluate.py +1 -1
  27. retab/types/schemas/object.py +26 -0
  28. retab/types/standards.py +2 -2
  29. retab/utils/__init__.py +3 -0
  30. retab/utils/ai_models.py +127 -12
  31. retab/utils/hashing.py +24 -0
  32. retab/utils/json_schema.py +1 -26
  33. retab/utils/mime.py +0 -17
  34. {retab-0.0.42.dist-info → retab-0.0.43.dist-info}/METADATA +3 -5
  35. {retab-0.0.42.dist-info → retab-0.0.43.dist-info}/RECORD +37 -51
  36. retab/_utils/__init__.py +0 -0
  37. retab/_utils/_model_cards/anthropic.yaml +0 -59
  38. retab/_utils/_model_cards/auto.yaml +0 -43
  39. retab/_utils/_model_cards/gemini.yaml +0 -117
  40. retab/_utils/_model_cards/openai.yaml +0 -301
  41. retab/_utils/_model_cards/xai.yaml +0 -28
  42. retab/_utils/ai_models.py +0 -138
  43. retab/_utils/benchmarking.py +0 -484
  44. retab/_utils/chat.py +0 -327
  45. retab/_utils/display.py +0 -440
  46. retab/_utils/json_schema.py +0 -2156
  47. retab/_utils/mime.py +0 -165
  48. retab/_utils/responses.py +0 -169
  49. retab/_utils/stream_context_managers.py +0 -52
  50. retab/_utils/usage/__init__.py +0 -0
  51. retab/_utils/usage/usage.py +0 -301
  52. {retab-0.0.42.dist-info → retab-0.0.43.dist-info}/WHEEL +0 -0
  53. {retab-0.0.42.dist-info → retab-0.0.43.dist-info}/top_level.txt +0 -0
@@ -1,117 +0,0 @@
1
- # gemini-2.5-pro family
2
- - model: "gemini-2.5-pro"
3
- pricing:
4
- text:
5
- prompt: 1.25
6
- cached_discount: 0.25
7
- completion: 10.00
8
- audio: null
9
- capabilities:
10
- modalities: ["text", "image"]
11
- endpoints: ["chat_completions"]
12
- features: ["streaming", "function_calling", "structured_outputs"]
13
- permissions:
14
- show_in_free_picker: true
15
- show_in_paid_picker: true
16
-
17
- - model: "gemini-2.5-pro-exp-03-25"
18
- inherits: "gemini-2.5-pro"
19
- permissions:
20
- show_in_free_picker: false
21
- show_in_paid_picker: false
22
-
23
- - model: "gemini-2.5-pro-preview-06-05"
24
- inherits: "gemini-2.5-pro"
25
- permissions:
26
- show_in_free_picker: false
27
- show_in_paid_picker: false
28
-
29
- - model: "gemini-2.5-pro-preview-05-06"
30
- inherits: "gemini-2.5-pro"
31
- permissions:
32
- show_in_free_picker: false
33
- show_in_paid_picker: false
34
-
35
- - model: "gemini-2.5-pro-preview-03-25"
36
- inherits: "gemini-2.5-pro"
37
- permissions:
38
- show_in_free_picker: false
39
- show_in_paid_picker: false
40
-
41
- - model: "gemini-2.5-flash"
42
- pricing:
43
- text:
44
- prompt: 0.30
45
- completion: 2.50
46
- audio:
47
- prompt: 1.00
48
- completion: 1000
49
- capabilities:
50
- modalities: ["text", "image", "audio"]
51
- endpoints: ["chat_completions"]
52
- features: ["streaming", "function_calling", "structured_outputs"]
53
- permissions:
54
- show_in_free_picker: true
55
- show_in_paid_picker: true
56
-
57
- - model: "gemini-2.5-flash-preview-05-20"
58
- pricing:
59
- text:
60
- prompt: 0.15
61
- completion: 0.60
62
- audio: null
63
- capabilities:
64
- modalities: ["text", "image"]
65
- endpoints: ["chat_completions"]
66
- features: ["streaming", "function_calling", "structured_outputs"]
67
-
68
- - model: "gemini-2.5-flash-preview-04-17"
69
- inherits: "gemini-2.5-flash-preview-05-20"
70
-
71
- # gemini-2.0-flash family
72
- - model: "gemini-2.0-flash"
73
- pricing:
74
- text:
75
- prompt: 0.1
76
- cached_discount: 0.25
77
- completion: 0.40
78
- audio:
79
- prompt: 0.7
80
- cached_discount: 0.25
81
- completion: 1000
82
- capabilities:
83
- modalities: ["text", "image"]
84
- endpoints: ["chat_completions"]
85
- features: ["streaming", "function_calling", "structured_outputs"]
86
- temperature_support: true
87
-
88
- - model: "gemini-2.0-flash-lite"
89
- pricing:
90
- text:
91
- prompt: 0.075
92
- completion: 0.30
93
- audio:
94
- prompt: 0.075
95
- completion: 1000
96
- capabilities:
97
- modalities: ["text", "image", "audio"]
98
- endpoints: ["chat_completions"]
99
- features: ["streaming", "structured_outputs"]
100
- temperature_support: true
101
-
102
- - model: "gemini-2.5-flash-lite-preview-06-17"
103
- pricing:
104
- text:
105
- prompt: 0.10
106
- completion: 0.40
107
- audio:
108
- prompt: 0.50
109
- completion: 0.40
110
- capabilities:
111
- modalities: ["text", "image", "audio"]
112
- endpoints: ["chat_completions"]
113
- features: ["streaming", "structured_outputs"]
114
- temperature_support: true
115
- permissions:
116
- show_in_free_picker: true
117
- show_in_paid_picker: true
@@ -1,301 +0,0 @@
1
-
2
- # Reasoning models
3
- # o1 family
4
- - model: "o1"
5
- pricing:
6
- text:
7
- prompt: 15.00
8
- cached_discount: 0.5
9
- completion: 60.00
10
- audio: null
11
- capabilities:
12
- modalities: ["text", "image"]
13
- endpoints: ["chat_completions", "responses", "assistants", "batch"]
14
- features: ["streaming", "function_calling", "structured_outputs"]
15
- temperature_support: false
16
- reasoning_effort_support: true
17
-
18
- - model: "o1-2024-12-17"
19
- inherits: "o1"
20
-
21
- # o3 family
22
- - model: "o3"
23
- pricing:
24
- text:
25
- prompt: 2.0
26
- cached_discount: 0.25
27
- completion: 8.0
28
- audio: null
29
- ft_price_hike: 1.5
30
- capabilities:
31
- modalities: ["text", "image"]
32
- endpoints: ["chat_completions", "responses", "assistants", "batch"]
33
- features: ["streaming", "function_calling", "structured_outputs", "schema_generation"]
34
- temperature_support: false
35
- reasoning_effort_support: true
36
- permissions:
37
- show_in_free_picker: false
38
- show_in_paid_picker: true
39
-
40
- - model: "o3-2025-04-16"
41
- inherits: "o3"
42
- permissions:
43
- show_in_free_picker: false
44
- show_in_paid_picker: false
45
-
46
- # o4-mini family
47
- - model: "o4-mini"
48
- pricing:
49
- text:
50
- prompt: 1.10
51
- cached_discount: 0.25
52
- completion: 4.40
53
- audio: null
54
- ft_price_hike: 1.5
55
- capabilities:
56
- modalities: ["text", "image"]
57
- endpoints: ["chat_completions", "responses", "assistants", "batch"]
58
- features: ["streaming", "function_calling", "structured_outputs", "schema_generation"]
59
- temperature_support: false
60
- reasoning_effort_support: true
61
- permissions:
62
- show_in_free_picker: false
63
- show_in_paid_picker: true
64
-
65
- - model: "o4-mini-2025-04-16"
66
- inherits: "o4-mini"
67
- permissions:
68
- show_in_free_picker: false
69
- show_in_paid_picker: false
70
-
71
- # Chat models
72
- # gpt-4.1 family
73
- - model: "gpt-4.1"
74
- pricing:
75
- text:
76
- prompt: 2.00
77
- cached_discount: 0.25
78
- completion: 8.00
79
- audio: null
80
- ft_price_hike: 1.5
81
- capabilities:
82
- modalities: ["text", "image"]
83
- endpoints: ["chat_completions", "responses", "assistants", "batch"]
84
- features: ["streaming", "function_calling", "structured_outputs", "schema_generation"]
85
- temperature_support: true
86
- reasoning_effort_support: false
87
- permissions:
88
- show_in_free_picker: true
89
- show_in_paid_picker: true
90
-
91
- - model: "gpt-4.1-2025-04-14"
92
- inherits: "gpt-4.1"
93
- permissions:
94
- show_in_free_picker: false
95
- show_in_paid_picker: false
96
-
97
- - model: "gpt-4.1-mini"
98
- pricing:
99
- text:
100
- prompt: 0.40
101
- cached_discount: 0.25
102
- completion: 1.60
103
- audio: null
104
- ft_price_hike: 1.5
105
- capabilities:
106
- modalities: ["text", "image"]
107
- endpoints: ["chat_completions", "responses", "assistants", "batch"]
108
- features: ["streaming", "function_calling", "structured_outputs", "schema_generation"]
109
- temperature_support: true
110
- reasoning_effort_support: false
111
- permissions:
112
- show_in_free_picker: true
113
- show_in_paid_picker: true
114
-
115
- - model: "gpt-4.1-mini-2025-04-14"
116
- inherits: "gpt-4.1-mini"
117
- permissions:
118
- show_in_free_picker: false
119
- show_in_paid_picker: false
120
-
121
- - model: "gpt-4.1-nano"
122
- pricing:
123
- text:
124
- prompt: 0.10
125
- cached_discount: 0.25
126
- completion: 0.40
127
- audio: null
128
- ft_price_hike: 1.5
129
- capabilities:
130
- modalities: ["text", "image"]
131
- endpoints: ["chat_completions", "responses", "assistants", "batch"]
132
- features: ["streaming", "function_calling", "structured_outputs", "schema_generation"]
133
- temperature_support: true
134
- reasoning_effort_support: false
135
- permissions:
136
- show_in_free_picker: true
137
- show_in_paid_picker: true
138
-
139
- - model: "gpt-4.1-nano-2025-04-14"
140
- inherits: "gpt-4.1-nano"
141
- permissions:
142
- show_in_free_picker: false
143
- show_in_paid_picker: false
144
-
145
- # gpt-4o family
146
- - model: "chatgpt-4o-latest"
147
- pricing:
148
- text:
149
- prompt: 2.50
150
- cached_discount: 0.5
151
- completion: 10.00
152
- audio: null
153
- ft_price_hike: 1.5
154
- capabilities:
155
- modalities: ["text", "image"]
156
- endpoints: ["chat_completions", "responses", "assistants", "batch", "fine_tuning"]
157
- features: ["streaming", "function_calling", "structured_outputs", "fine_tuning", "distillation", "predicted_outputs"]
158
- temperature_support: true
159
-
160
- - model: "gpt-4o"
161
- inherits: "chatgpt-4o-latest"
162
-
163
- - model: "gpt-4o-2024-08-06"
164
- inherits: "chatgpt-4o-latest"
165
-
166
- - model: "gpt-4o-2024-11-20"
167
- inherits: "chatgpt-4o-latest"
168
-
169
- - model: "gpt-4o-2024-05-13"
170
- pricing:
171
- text:
172
- prompt: 5.00
173
- cached_discount: 0.5
174
- completion: 15.00
175
- audio: null
176
- ft_price_hike: 1.5
177
- capabilities:
178
- modalities: ["text", "image"]
179
- endpoints: ["chat_completions", "responses", "assistants", "batch", "fine_tuning"]
180
- features: ["streaming", "function_calling", "structured_outputs", "fine_tuning", "distillation", "predicted_outputs"]
181
- temperature_support: true
182
-
183
- # gpt-4o-audio family
184
- - model: "gpt-4o-audio-preview-2024-12-17"
185
- pricing:
186
- text:
187
- prompt: 2.50
188
- cached_discount: 0.5
189
- completion: 10.00
190
- audio:
191
- prompt: 40.00
192
- cached_discount: 0.2
193
- completion: 80.00
194
- capabilities:
195
- modalities: ["text", "audio"]
196
- endpoints: ["chat_completions"]
197
- features: ["streaming", "function_calling"]
198
- temperature_support: true
199
-
200
- - model: "gpt-4o-audio-preview-2024-10-01"
201
- pricing:
202
- text:
203
- prompt: 2.50
204
- cached_discount: 0.5
205
- completion: 10.00
206
- audio:
207
- prompt: 100.00
208
- cached_discount: 0.2
209
- completion: 200.00
210
- capabilities:
211
- modalities: ["text", "audio"]
212
- endpoints: ["chat_completions"]
213
- features: ["streaming", "function_calling"]
214
- temperature_support: true
215
-
216
- - model: "gpt-4o-realtime-preview-2024-12-17"
217
- pricing:
218
- text:
219
- prompt: 5.00
220
- cached_discount: 0.5
221
- completion: 20.00
222
- audio:
223
- prompt: 40.00
224
- cached_discount: 0.2
225
- completion: 80.00
226
- capabilities:
227
- modalities: ["text", "audio"]
228
- endpoints: ["chat_completions"]
229
- features: ["streaming", "function_calling"]
230
- temperature_support: true
231
-
232
- - model: "gpt-4o-realtime-preview-2024-10-01"
233
- pricing:
234
- text:
235
- prompt: 5.00
236
- cached_discount: 0.5
237
- completion: 20.00
238
- audio:
239
- prompt: 100.00
240
- cached_discount: 0.2
241
- completion: 200.00
242
- capabilities:
243
- modalities: ["text", "audio"]
244
- endpoints: ["chat_completions"]
245
- features: ["streaming", "function_calling"]
246
- temperature_support: true
247
-
248
- # gpt-4o-mini family
249
- - model: "gpt-4o-mini"
250
- pricing:
251
- text:
252
- prompt: 0.15
253
- cached_discount: 0.5
254
- completion: 0.60
255
- audio: null
256
- ft_price_hike: 2.0
257
- capabilities:
258
- modalities: ["text", "image"]
259
- endpoints: ["chat_completions", "responses", "assistants", "batch", "fine_tuning"]
260
- features: ["streaming", "function_calling", "structured_outputs", "fine_tuning"]
261
- temperature_support: true
262
-
263
- - model: "gpt-4o-mini-2024-07-18"
264
- inherits: "gpt-4o-mini"
265
-
266
- # gpt-4o-mini-audio family
267
- - model: "gpt-4o-mini-audio-preview-2024-12-17"
268
- pricing:
269
- text:
270
- prompt: 0.15
271
- cached_discount: 0.5
272
- completion: 0.60
273
- audio:
274
- prompt: 10.00
275
- cached_discount: 0.2
276
- completion: 20.00
277
- ft_price_hike: 2.0
278
- capabilities:
279
- modalities: ["text", "audio"]
280
- endpoints: ["chat_completions"]
281
- features: ["streaming", "function_calling"]
282
- temperature_support: true
283
-
284
- - model: "gpt-4o-mini-realtime-preview-2024-12-17"
285
- pricing:
286
- text:
287
- prompt: 0.60
288
- cached_discount: 0.5
289
- completion: 2.40
290
- audio:
291
- prompt: 10.00
292
- cached_discount: 0.2
293
- completion: 20.00
294
- ft_price_hike: 2.0
295
- capabilities:
296
- modalities: ["text", "audio"]
297
- endpoints: ["chat_completions"]
298
- features: ["streaming", "function_calling"]
299
- temperature_support: true
300
-
301
-
@@ -1,28 +0,0 @@
1
- # grok3-family
2
- - model: "grok-3"
3
- pricing:
4
- text:
5
- prompt: 3
6
- completion: 15
7
- audio: null
8
- capabilities:
9
- modalities: ["text"]
10
- endpoints: ["chat_completions"]
11
- features: ["streaming", "structured_outputs"]
12
- permissions:
13
- show_in_free_picker: true
14
- show_in_paid_picker: true
15
-
16
- - model: "grok-3-mini"
17
- pricing:
18
- text:
19
- prompt: 0.3
20
- completion: 0.5
21
- audio: null
22
- capabilities:
23
- modalities: ["text"]
24
- endpoints: ["chat_completions"]
25
- features: ["streaming", "structured_outputs"]
26
- permissions:
27
- show_in_free_picker: true
28
- show_in_paid_picker: true
retab/_utils/ai_models.py DELETED
@@ -1,138 +0,0 @@
1
- import os
2
- import yaml
3
- from typing import get_args
4
-
5
- from ..types.ai_models import AIProvider, GeminiModel, OpenAIModel, xAI_Model, RetabModel, PureLLMModel, ModelCard
6
-
7
- MODEL_CARDS_DIR = os.path.join(os.path.dirname(__file__), "_model_cards")
8
-
9
- def merge_model_cards(base: dict, override: dict) -> dict:
10
- result = base.copy()
11
- for key, value in override.items():
12
- if key == "inherits":
13
- continue
14
- if isinstance(value, dict) and key in result:
15
- result[key] = merge_model_cards(result[key], value)
16
- else:
17
- result[key] = value
18
- return result
19
-
20
- def load_model_cards(yaml_file: str) -> list[ModelCard]:
21
- raw_cards = yaml.safe_load(open(yaml_file))
22
- name_to_card = {c["model"]: c for c in raw_cards if "inherits" not in c}
23
-
24
- final_cards = []
25
- for card in raw_cards:
26
- if "inherits" in card:
27
- parent = name_to_card[card["inherits"]]
28
- merged = merge_model_cards(parent, card)
29
- final_cards.append(ModelCard(**merged))
30
- else:
31
- final_cards.append(ModelCard(**card))
32
- return final_cards
33
-
34
- # Load all model cards
35
- model_cards = sum([
36
- load_model_cards(os.path.join(MODEL_CARDS_DIR, "openai.yaml")),
37
- load_model_cards(os.path.join(MODEL_CARDS_DIR, "anthropic.yaml")),
38
- load_model_cards(os.path.join(MODEL_CARDS_DIR, "xai.yaml")),
39
- load_model_cards(os.path.join(MODEL_CARDS_DIR, "gemini.yaml")),
40
- load_model_cards(os.path.join(MODEL_CARDS_DIR, "auto.yaml")),
41
- ], [])
42
- model_cards_dict = {card.model: card for card in model_cards}
43
-
44
-
45
- # Validate that model cards
46
- all_model_names = set(model_cards_dict.keys())
47
- if all_model_names.symmetric_difference(set(get_args(PureLLMModel))):
48
- raise ValueError(f"Mismatch between model cards and PureLLMModel type: {all_model_names.symmetric_difference(set(get_args(PureLLMModel)))}")
49
-
50
-
51
- def get_model_from_model_id(model_id: str) -> str:
52
- """
53
- Get the model name from the model id.
54
- """
55
- if model_id.startswith("ft:"):
56
- parts = model_id.split(":")
57
- return parts[1]
58
- else:
59
- return model_id
60
-
61
-
62
- def get_model_card(model: str) -> ModelCard:
63
- """
64
- Get the model card for a specific model.
65
-
66
- Args:
67
- model: The model name to look up
68
-
69
- Returns:
70
- The ModelCard for the specified model
71
-
72
- Raises:
73
- ValueError: If no model card is found for the specified model
74
- """
75
- model_name = get_model_from_model_id(model)
76
- if model_name in model_cards_dict:
77
- model_card = ModelCard(**model_cards_dict[model_name].model_dump())
78
- if model_name != model:
79
- # Fine-tuned model -> Change the name
80
- model_card.model = model
81
- # Remove the fine-tuning feature (if exists)
82
- try:
83
- model_card.capabilities.features.remove("fine_tuning")
84
- except ValueError:
85
- pass
86
- return model_card
87
-
88
- raise ValueError(f"No model card found for model: {model_name}")
89
-
90
-
91
- def get_provider_for_model(model_id: str) -> AIProvider:
92
- """
93
- Determine the AI provider associated with the given model identifier.
94
- Returns one of: "Anthropic", "xAI", "OpenAI", "Gemini", "Retab" or None if unknown.
95
- """
96
- model_name = get_model_from_model_id(model_id)
97
- # if model_name in get_args(AnthropicModel):
98
- # return "Anthropic"
99
- # if model_name in get_args(xAI_Model):
100
- # return "xAI"
101
- if model_name in get_args(OpenAIModel):
102
- return "OpenAI"
103
- if model_name in get_args(GeminiModel):
104
- return "Gemini"
105
- if model_name in get_args(RetabModel):
106
- return "Retab"
107
- raise ValueError(f"Unknown model: {model_name}")
108
-
109
-
110
- def assert_valid_model_extraction(model: str) -> None:
111
- try:
112
- get_provider_for_model(model)
113
- except ValueError:
114
- raise ValueError(
115
- f"Invalid model for extraction: {model}.\nValid OpenAI models: {get_args(OpenAIModel)}\n"
116
- f"Valid xAI models: {get_args(xAI_Model)}\n"
117
- f"Valid Gemini models: {get_args(GeminiModel)}"
118
- ) from None
119
-
120
-
121
- def assert_valid_model_schema_generation(model: str) -> None:
122
- """Assert that the model is either a standard OpenAI model or a valid fine-tuned model.
123
-
124
- Valid formats:
125
- - Standard model: Must be in OpenAIModel
126
- - Fine-tuned model: Must be {base_model}:{id} where base_model is in OpenAIModel
127
-
128
- Raises:
129
- ValueError: If the model format is invalid
130
- """
131
- if get_model_from_model_id(model) in get_args(OpenAIModel):
132
- return
133
- else:
134
- raise ValueError(
135
- f"Invalid model format: {model}. Must be either:\n"
136
- f"1. A standard model: {get_args(OpenAIModel)}\n"
137
- f"2. A fine-tuned model in format 'base_model:id' where base_model is one of the standard openai models"
138
- ) from None