retab 0.0.40__py3-none-any.whl → 0.0.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. retab/client.py +5 -5
  2. retab/resources/consensus/completions.py +1 -1
  3. retab/resources/consensus/completions_stream.py +5 -5
  4. retab/resources/consensus/responses.py +1 -1
  5. retab/resources/consensus/responses_stream.py +2 -2
  6. retab/resources/documents/client.py +12 -11
  7. retab/resources/documents/extractions.py +4 -4
  8. retab/resources/evals.py +1 -1
  9. retab/resources/evaluations/documents.py +1 -1
  10. retab/resources/jsonlUtils.py +4 -4
  11. retab/resources/processors/automations/endpoints.py +9 -5
  12. retab/resources/processors/automations/links.py +2 -2
  13. retab/resources/processors/automations/logs.py +2 -2
  14. retab/resources/processors/automations/mailboxes.py +43 -32
  15. retab/resources/processors/automations/outlook.py +25 -7
  16. retab/resources/processors/automations/tests.py +8 -2
  17. retab/resources/processors/client.py +25 -16
  18. retab/resources/prompt_optimization.py +1 -1
  19. retab/resources/schemas.py +3 -3
  20. retab/types/automations/mailboxes.py +1 -1
  21. retab/types/completions.py +1 -1
  22. retab/types/documents/create_messages.py +4 -4
  23. retab/types/documents/extractions.py +3 -3
  24. retab/types/documents/parse.py +3 -1
  25. retab/types/evals.py +2 -2
  26. retab/types/evaluations/iterations.py +2 -2
  27. retab/types/evaluations/model.py +2 -2
  28. retab/types/extractions.py +34 -9
  29. retab/types/jobs/prompt_optimization.py +1 -1
  30. retab/types/logs.py +3 -3
  31. retab/types/schemas/object.py +4 -4
  32. retab/types/schemas/templates.py +1 -1
  33. retab/utils/__init__.py +0 -0
  34. retab/utils/_model_cards/anthropic.yaml +59 -0
  35. retab/utils/_model_cards/auto.yaml +43 -0
  36. retab/utils/_model_cards/gemini.yaml +117 -0
  37. retab/utils/_model_cards/openai.yaml +301 -0
  38. retab/utils/_model_cards/xai.yaml +28 -0
  39. retab/utils/ai_models.py +138 -0
  40. retab/utils/benchmarking.py +484 -0
  41. retab/utils/chat.py +327 -0
  42. retab/utils/display.py +440 -0
  43. retab/utils/json_schema.py +2156 -0
  44. retab/utils/mime.py +165 -0
  45. retab/utils/responses.py +169 -0
  46. retab/utils/stream_context_managers.py +52 -0
  47. retab/utils/usage/__init__.py +0 -0
  48. retab/utils/usage/usage.py +301 -0
  49. retab-0.0.42.dist-info/METADATA +119 -0
  50. {retab-0.0.40.dist-info → retab-0.0.42.dist-info}/RECORD +52 -36
  51. retab-0.0.40.dist-info/METADATA +0 -418
  52. {retab-0.0.40.dist-info → retab-0.0.42.dist-info}/WHEEL +0 -0
  53. {retab-0.0.40.dist-info → retab-0.0.42.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,484 @@
1
+ import datetime
2
+ import re
3
+ import shutil
4
+
5
+ # The goal is to leverage this piece of code to open a jsonl file and get an analysis of the performance of the model using a one-liner.
6
+ ############# BENCHMARKING MODELS #############
7
+ from itertools import zip_longest
8
+ from typing import Any, Callable, Literal, Optional, cast
9
+
10
+ import pandas as pd # type: ignore
11
+ from Levenshtein import distance as levenshtein_distance
12
+ from pydantic import BaseModel
13
+
14
+ from ..types.db.annotations import AnnotationParameters
15
+
16
+
17
+ def normalize_string(text: str) -> str:
18
+ """
19
+ Normalize a string by removing non-alphanumeric characters and lowercasing.
20
+
21
+ Args:
22
+ text: Input string to normalize
23
+
24
+ Returns:
25
+ Normalized string with only alphanumeric characters, all lowercase
26
+ """
27
+ if not text:
28
+ return ""
29
+ # Remove all non-alphanumeric characters and convert to lowercase
30
+ return re.sub(r"[^a-zA-Z0-9]", "", text).lower()
31
+
32
+
33
+ def hamming_distance_padded(s: str, t: str) -> int:
34
+ """
35
+ Compute the Hamming distance between two strings, treating spaces as wildcards.
36
+
37
+ Args:
38
+ s: The first string
39
+ t: The second string
40
+
41
+ Returns:
42
+ The Hamming distance between the two strings
43
+ """
44
+ # Normalize inputs
45
+ s = normalize_string(s)
46
+ t = normalize_string(t)
47
+
48
+ return sum(a != b for a, b in zip_longest(s, t, fillvalue=" "))
49
+
50
+
51
+ def hamming_similarity(str_1: str, str_2: str) -> float:
52
+ """
53
+ Compute the Hamming similarity between two strings.
54
+
55
+ Args:
56
+ str_1: The first string
57
+ str_2: The second string
58
+
59
+ Returns:
60
+ A float between 0 and 1, where 1 means the strings are identical
61
+ """
62
+ # Normalize inputs
63
+ str_1 = normalize_string(str_1)
64
+ str_2 = normalize_string(str_2)
65
+
66
+ max_length = max(len(str_1), len(str_2))
67
+
68
+ if max_length == 0:
69
+ return 1.0
70
+
71
+ dist = hamming_distance_padded(str_1, str_2)
72
+ return 1 - (dist / max_length)
73
+
74
+
75
+ def jaccard_similarity(str_1: str, str_2: str) -> float:
76
+ """
77
+ Compute the Jaccard similarity between two strings.
78
+
79
+ Args:
80
+ str_1: The first string
81
+ str_2: The second string
82
+
83
+ Returns:
84
+ A float between 0 and 1, where 1 means the strings are identical
85
+ """
86
+ # Normalize inputs
87
+ str_1 = normalize_string(str_1)
88
+ str_2 = normalize_string(str_2)
89
+
90
+ set_a = set(str_1)
91
+ set_b = set(str_2)
92
+ intersection = set_a & set_b
93
+ union = set_a | set_b
94
+ if not union:
95
+ return 1.0
96
+ return len(intersection) / len(union)
97
+
98
+
99
+ def levenshtein_similarity(str_1: str, str_2: str) -> float:
100
+ """
101
+ Calculate similarity between two values using Levenshtein distance.
102
+ Returns a similarity score between 0.0 and 1.0.
103
+ """
104
+ # Normalize inputs
105
+ str_1 = normalize_string(str_1)
106
+ str_2 = normalize_string(str_2)
107
+
108
+ max_length = max(len(str_1), len(str_2))
109
+
110
+ if max_length == 0:
111
+ return 1.0
112
+
113
+ dist = levenshtein_distance(str_1, str_2)
114
+ return 1 - (dist / max_length)
115
+
116
+
117
+ def key_normalization(key: str) -> str:
118
+ """This method is useful to compare keys under list indexes (that refers to the same kind of error but on different list index position)"""
119
+ # We will replace all .{i} with .* where i is the index of the list (using regex for this)
120
+ key_parts = key.split(".")
121
+ new_key_parts = []
122
+ for key_part in key_parts:
123
+ if key_part.isdigit():
124
+ new_key_parts.append("*")
125
+ else:
126
+ new_key_parts.append(key_part)
127
+ return ".".join(new_key_parts)
128
+
129
+
130
+ def compare_primitive_values(val1: str | int | float | bool | None, val2: str | int | float | bool | None, str_metric_function: Callable[[str, str], float]) -> float:
131
+ # Handle leaf nodes (primitives) with type-specific comparisons
132
+ # Special handling for None values
133
+ # Both None means perfect match
134
+ if val1 is None and val2 is None:
135
+ return 1.0
136
+ # One is None but the other has a value (False, "", 0, etc.)
137
+ elif (val1 is None and not val2) or (val2 is None and not val1):
138
+ return 1.0
139
+ # One is None but the other has non None-compatible values (True, "string", 1, 1.5, etc.)
140
+ elif val1 is None or val2 is None:
141
+ return 0.0
142
+
143
+ # From now on, we can assume that val1 and val2 are not None.
144
+ # Type compatibility check
145
+ if isinstance(val1, bool) and isinstance(val2, bool):
146
+ return 1.0 if val1 is val2 else 0.0
147
+
148
+ # Numeric comparison (int, float)
149
+ if isinstance(val1, (int, float)) and isinstance(val2, (int, float)):
150
+ # For numbers close to zero, use absolute difference
151
+ if abs(val1) < 1e-4 and abs(val2) < 1e-4:
152
+ return 1.0 if abs(val1 - val2) < 1e-4 else 0.0
153
+ # Otherwise use relative difference
154
+ max_val = max(abs(val1), abs(val2))
155
+ return 1.0 - min(1.0, abs(val1 - val2) / max_val)
156
+
157
+ # String comparison - use the provided metric function
158
+ if isinstance(val1, str) and isinstance(val2, str):
159
+ return float(str_metric_function(val1, val2))
160
+
161
+ # If we get here, types are incompatible
162
+ return 0.0
163
+
164
+
165
+ dictionary_metrics = Literal["levenshtein_similarity", "jaccard_similarity", "hamming_similarity"]
166
+
167
+
168
+ def compute_dict_difference(dict1: dict[str, Any], dict2: dict[str, Any], metric: dictionary_metrics) -> dict[str, Any]:
169
+ """
170
+ Compute the difference between two dictionaries recursively.
171
+
172
+ Args:
173
+ dict1: The first dictionary (can be nested)
174
+ dict2: The second dictionary (can be nested)
175
+ metric: The metric to use for comparison ("levenshtein_similarity", "jaccard_similarity", "hamming_similarity")
176
+
177
+ Returns:
178
+ A dictionary containing the difference between the two dictionaries
179
+ """
180
+ result: dict[str, Any] = {}
181
+
182
+ if metric == "levenshtein_similarity":
183
+ metric_function = levenshtein_similarity
184
+ elif metric == "jaccard_similarity":
185
+ metric_function = jaccard_similarity
186
+ elif metric == "hamming_similarity":
187
+ metric_function = hamming_similarity
188
+ else:
189
+ raise ValueError(f"Invalid metric: {metric}")
190
+
191
+ def compare_values(val1: dict | list | tuple | str | int | float | bool | None, val2: dict | list | tuple | str | int | float | bool | None, path: str = "") -> Any:
192
+ # If both are dictionaries, process recursively
193
+ if isinstance(val1, dict) and isinstance(val2, dict):
194
+ nested_result: dict[str, Any] = {}
195
+ all_keys = set(val1.keys()) | set(val2.keys())
196
+
197
+ for key in all_keys:
198
+ norm_key = key_normalization(key)
199
+ sub_val1 = val1.get(key, None)
200
+ sub_val2 = val2.get(key, None)
201
+
202
+ if sub_val1 is None or sub_val2 is None:
203
+ nested_result[norm_key] = None
204
+ else:
205
+ nested_result[norm_key] = compare_values(sub_val1, sub_val2, f"{path}.{norm_key}" if path else norm_key)
206
+
207
+ return nested_result
208
+
209
+ # If both are lists/arrays, compare items with detailed results
210
+ if isinstance(val1, (list, tuple)) and isinstance(val2, (list, tuple)):
211
+ # If both lists are empty, they're identical
212
+ if not val1 and not val2:
213
+ return 1.0
214
+
215
+ # Create a detailed element-by-element comparison
216
+ array_result = {}
217
+ similarities = []
218
+
219
+ # Process each position in both arrays
220
+ for i, (item1, item2) in enumerate(zip_longest(val1, val2, fillvalue=None)):
221
+ element_key = str(i) # Use index as dictionary key
222
+ element_path = f"{path}.{i}" if path else str(i)
223
+
224
+ if item1 is None or item2 is None:
225
+ # Handle lists of different lengths
226
+ array_result[element_key] = None
227
+ similarities.append(0.0) # Penalize missing elements
228
+ else:
229
+ # Compare the elements
230
+ comparison_result = compare_values(item1, item2, element_path)
231
+ array_result[element_key] = comparison_result
232
+
233
+ # Extract similarity metric for this element
234
+ if isinstance(comparison_result, dict):
235
+ # Calculate average from nested structure
236
+ numeric_values = [v for v in _extract_numeric_values(comparison_result) if v is not None]
237
+ if numeric_values:
238
+ similarities.append(sum(numeric_values) / len(numeric_values))
239
+ elif isinstance(comparison_result, (int, float)) and comparison_result is not None:
240
+ similarities.append(float(comparison_result))
241
+
242
+ # Add overall similarity as a special key
243
+ array_result["_similarity"] = sum(similarities) / max(len(similarities), 1) if similarities else 1.0
244
+
245
+ return array_result
246
+
247
+ # If one is a dict and the other isn't, return None
248
+ if isinstance(val1, dict) or isinstance(val2, dict) or isinstance(val1, (list, tuple)) or isinstance(val2, (list, tuple)):
249
+ return None
250
+
251
+ # Handle leaf nodes (primitives) with type-specific comparisons
252
+ return compare_primitive_values(val1, val2, metric_function)
253
+
254
+ def _extract_numeric_values(d: dict) -> list[float]:
255
+ """Extract all numeric values from a nested dictionary."""
256
+ result = []
257
+ for k, v in d.items():
258
+ if isinstance(v, dict):
259
+ # Recursively extract from nested dictionaries
260
+ result.extend(_extract_numeric_values(v))
261
+ elif isinstance(v, (int, float)) and not isinstance(v, bool):
262
+ # Add numeric values
263
+ result.append(v)
264
+ # Skip non-numeric values
265
+ return result
266
+
267
+ # Normalize top-level keys
268
+ dict1_normalized = {key_normalization(k): v for k, v in dict1.items()}
269
+ dict2_normalized = {key_normalization(k): v for k, v in dict2.items()}
270
+
271
+ # Process all keys from both dictionaries
272
+ keys_intersect = set(dict1_normalized.keys()) & set(dict2_normalized.keys())
273
+ keys_symmetric_difference = set(dict1_normalized.keys()) ^ set(dict2_normalized.keys())
274
+
275
+ for key in keys_symmetric_difference:
276
+ # When the key is not present in both dictionaries, we return None.
277
+ result[key] = None
278
+
279
+ for key in keys_intersect:
280
+ # compare_values can handle None values, so we don't need to check for that.
281
+ result[key] = compare_values(dict1_normalized[key], dict2_normalized[key], key)
282
+
283
+ return result
284
+
285
+
286
+ def aggregate_dict_differences(dict_differences: list[dict[str, Any]]) -> tuple[dict[str, Any], dict[str, Any]]:
287
+ """
288
+ Aggregate a list of dictionary differences into a single dictionary with average values,
289
+ handling nested dictionaries recursively.
290
+
291
+ Args:
292
+ dict_differences: A list of dictionaries containing similarity metrics (can be nested)
293
+
294
+ Returns:
295
+ A tuple containing:
296
+ - A dictionary with the average similarity metrics across all input dictionaries
297
+ - A dictionary with the uncertainty (standard deviation) for each metric
298
+ """
299
+ if not dict_differences:
300
+ return {}, {}
301
+
302
+ def aggregate_recursively(dicts_list: list[dict[str, Any]]) -> tuple[dict[str, Any], dict[str, Any]]:
303
+ # Initialize result dictionaries
304
+ result: dict[str, Any] = {}
305
+ uncertainty: dict[str, Any] = {}
306
+
307
+ # Collect all keys across all dictionaries
308
+ all_keys: set[str] = set()
309
+ for d in dicts_list:
310
+ all_keys.update(d.keys())
311
+
312
+ for key in all_keys:
313
+ # Collect values for this key from all dictionaries
314
+ values = []
315
+ for d in dicts_list:
316
+ if key in d:
317
+ values.append(d[key])
318
+
319
+ # Skip if no valid values
320
+ if not values:
321
+ result[key] = None
322
+ uncertainty[key] = None
323
+ continue
324
+
325
+ # Check if values are nested dictionaries
326
+ if all(isinstance(v, dict) for v in values if v is not None):
327
+ # Filter out None values
328
+ nested_dicts = [v for v in values if v is not None]
329
+ if nested_dicts:
330
+ nested_result, nested_uncertainty = aggregate_recursively(nested_dicts)
331
+ result[key] = nested_result
332
+ uncertainty[key] = nested_uncertainty
333
+ else:
334
+ result[key] = None
335
+ uncertainty[key] = None
336
+ else:
337
+ # Handle leaf nodes (numeric values)
338
+ numeric_values = [v for v in values if v is not None and isinstance(v, (int, float))]
339
+
340
+ if numeric_values:
341
+ mean = sum(numeric_values) / len(numeric_values)
342
+ result[key] = mean
343
+
344
+ if len(numeric_values) > 1:
345
+ variance = sum((x - mean) ** 2 for x in numeric_values) / (len(numeric_values) - 1)
346
+ uncertainty[key] = max(0, variance) ** 0.5
347
+ else:
348
+ uncertainty[key] = 0.0
349
+ else:
350
+ result[key] = None
351
+ uncertainty[key] = None
352
+
353
+ return result, uncertainty
354
+
355
+ return aggregate_recursively(dict_differences)
356
+
357
+
358
+ class SingleFileEval(BaseModel):
359
+ """
360
+ A class for evaluating metrics between two dictionaries.
361
+ """
362
+
363
+ evaluation_id: str
364
+ file_id: str
365
+ schema_id: str
366
+ schema_data_id: str | None = None
367
+ dict_1: dict[str, Any]
368
+ dict_2: dict[str, Any]
369
+ inference_settings_1: AnnotationParameters
370
+ inference_settings_2: AnnotationParameters
371
+ created_at: datetime.datetime
372
+ organization_id: str
373
+ hamming_similarity: dict[str, Any]
374
+ jaccard_similarity: dict[str, Any]
375
+ levenshtein_similarity: dict[str, Any]
376
+
377
+
378
+ class EvalMetric(BaseModel):
379
+ average: dict[str, Any]
380
+ std: dict[str, Any]
381
+
382
+
383
+ class EvalMetrics(BaseModel):
384
+ schema_id: str
385
+ distances: dict[dictionary_metrics, EvalMetric]
386
+
387
+
388
+ def flatten_dict(d: dict[str, Any], parent_key: str = "", sep: str = ".") -> dict[str, Any]:
389
+ """Flatten a nested dictionary with dot-separated keys."""
390
+ items: list[tuple[str, Any]] = []
391
+ for k, v in d.items():
392
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
393
+ if isinstance(v, dict):
394
+ items.extend(flatten_dict(v, new_key, sep=sep).items())
395
+ else:
396
+ items.append((new_key, v))
397
+ return dict(items)
398
+
399
+
400
+ def plot_metrics_with_uncertainty(analysis: dict[str, Any], uncertainties: Optional[dict[str, Any]] = None, top_n: int = 20, ascending: bool = False) -> None:
401
+ """Plot a metric from analysis results using a horizontal bar chart with uncertainty.
402
+
403
+ Args:
404
+ analysis: Dictionary containing similarity scores (can be nested).
405
+ uncertainties: Dictionary containing uncertainty values (same structure as analysis).
406
+ top_n: Number of top fields to display.
407
+ ascending: Whether to sort in ascending order.
408
+ """
409
+ # Flatten the dictionaries
410
+ flattened_analysis = flatten_dict(analysis)
411
+ # Prepare data by matching fields
412
+ fields = list(flattened_analysis.keys())
413
+ similarities = [flattened_analysis[field] for field in fields]
414
+
415
+ # Prepare uncertainties if provided
416
+ uncertainties_list = None
417
+ if uncertainties:
418
+ flattened_uncertainties = flatten_dict(uncertainties)
419
+ uncertainties_list = [flattened_uncertainties.get(field, None) for field in fields]
420
+
421
+ # Create a DataFrame
422
+ df = pd.DataFrame(
423
+ {
424
+ "field": fields,
425
+ "similarity": similarities,
426
+ }
427
+ )
428
+
429
+ if uncertainties:
430
+ df["uncertainty"] = uncertainties_list
431
+
432
+ # Sort by similarity and select top N
433
+ df = df.sort_values(by="similarity", ascending=ascending).head(top_n)
434
+
435
+ # Calculate layout dimensions
436
+ label_width = max(len(field) for field in df["field"]) + 2 # Padding for alignment
437
+ terminal_width = shutil.get_terminal_size().columns
438
+ bar_width = terminal_width - label_width - 3 # Space for '| ' and extra padding
439
+
440
+ # Determine scaling factor based on maximum similarity
441
+ max_similarity = df["similarity"].max()
442
+ scale = bar_width / max_similarity if max_similarity > 0 else 1
443
+
444
+ # Generate and print bars
445
+ for index, row in df.iterrows():
446
+ field = row["field"]
447
+ similarity = row["similarity"]
448
+ if uncertainties:
449
+ uncertainty = row["uncertainty"]
450
+ else:
451
+ uncertainty = None
452
+
453
+ if similarity is None:
454
+ continue # Skip fields with no similarity value
455
+ similarity = cast(float, similarity)
456
+ # Calculate bar length and uncertainty range
457
+ bar_len = round(similarity * scale)
458
+ if uncertainty is not None and uncertainty > 0:
459
+ uncertainty = cast(float, uncertainty)
460
+ uncertainty_start = max(0, round((similarity - uncertainty) * scale))
461
+ uncertainty_end = min(bar_width, round((similarity + uncertainty) * scale))
462
+ else:
463
+ uncertainty_start = bar_len
464
+ uncertainty_end = bar_len # No uncertainty to display
465
+
466
+ # Build the bar string
467
+ bar_string = ""
468
+ for i in range(bar_width):
469
+ if i < bar_len:
470
+ if i < uncertainty_start:
471
+ char = "█" # Solid block for certain part
472
+ else:
473
+ char = "█" # Lighter block for uncertainty overlap
474
+ else:
475
+ if i < uncertainty_end:
476
+ char = "░" # Dash for upper uncertainty range
477
+ else:
478
+ char = " " # Space for empty area
479
+ bar_string += char
480
+
481
+ # Print the label and bar
482
+ score_field = f"[{similarity:.4f}]"
483
+
484
+ print(f"{field:<{label_width}} {score_field} | {bar_string}")