retab 0.0.36__tar.gz → 0.0.37__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. {retab-0.0.36 → retab-0.0.37}/PKG-INFO +7 -6
  2. retab-0.0.37/pyproject.toml +46 -0
  3. {retab-0.0.36/uiform → retab-0.0.37/retab}/_utils/ai_models.py +2 -2
  4. {retab-0.0.36/uiform → retab-0.0.37/retab}/_utils/benchmarking.py +15 -16
  5. {retab-0.0.36/uiform → retab-0.0.37/retab}/_utils/chat.py +9 -14
  6. {retab-0.0.36/uiform → retab-0.0.37/retab}/_utils/display.py +0 -3
  7. {retab-0.0.36/uiform → retab-0.0.37/retab}/_utils/json_schema.py +9 -14
  8. {retab-0.0.36/uiform → retab-0.0.37/retab}/_utils/mime.py +11 -14
  9. {retab-0.0.36/uiform → retab-0.0.37/retab}/_utils/responses.py +9 -3
  10. {retab-0.0.36/uiform → retab-0.0.37/retab}/_utils/stream_context_managers.py +1 -1
  11. {retab-0.0.36/uiform → retab-0.0.37/retab}/_utils/usage/usage.py +28 -28
  12. {retab-0.0.36/uiform → retab-0.0.37/retab}/client.py +32 -31
  13. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/consensus/client.py +17 -36
  14. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/consensus/completions.py +24 -47
  15. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/consensus/completions_stream.py +26 -38
  16. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/consensus/responses.py +31 -80
  17. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/consensus/responses_stream.py +31 -79
  18. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/documents/client.py +59 -45
  19. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/documents/extractions.py +181 -90
  20. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/evals.py +56 -43
  21. retab-0.0.37/retab/resources/evaluations/__init__.py +3 -0
  22. retab-0.0.37/retab/resources/evaluations/client.py +301 -0
  23. retab-0.0.37/retab/resources/evaluations/documents.py +233 -0
  24. retab-0.0.37/retab/resources/evaluations/iterations.py +452 -0
  25. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/files.py +2 -2
  26. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/jsonlUtils.py +220 -216
  27. retab-0.0.37/retab/resources/models.py +73 -0
  28. retab-0.0.37/retab/resources/processors/automations/client.py +244 -0
  29. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/processors/automations/endpoints.py +77 -118
  30. retab-0.0.37/retab/resources/processors/automations/links.py +294 -0
  31. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/processors/automations/logs.py +30 -19
  32. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/processors/automations/mailboxes.py +136 -174
  33. retab-0.0.37/retab/resources/processors/automations/outlook.py +337 -0
  34. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/processors/automations/tests.py +22 -25
  35. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/processors/client.py +179 -164
  36. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/schemas.py +78 -66
  37. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/secrets/external_api_keys.py +1 -5
  38. retab-0.0.37/retab/resources/secrets/webhook.py +64 -0
  39. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/usage.py +39 -2
  40. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/ai_models.py +13 -13
  41. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/automations/cron.py +19 -12
  42. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/automations/endpoints.py +7 -4
  43. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/automations/links.py +7 -3
  44. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/automations/mailboxes.py +9 -9
  45. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/automations/outlook.py +15 -11
  46. retab-0.0.37/retab/types/browser_canvas.py +3 -0
  47. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/chat.py +2 -2
  48. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/completions.py +9 -12
  49. retab-0.0.37/retab/types/consensus.py +19 -0
  50. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/db/annotations.py +3 -3
  51. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/db/files.py +8 -6
  52. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/documents/create_messages.py +18 -20
  53. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/documents/extractions.py +69 -24
  54. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/evals.py +5 -5
  55. retab-0.0.37/retab/types/evaluations/__init__.py +31 -0
  56. retab-0.0.37/retab/types/evaluations/documents.py +30 -0
  57. retab-0.0.37/retab/types/evaluations/iterations.py +112 -0
  58. retab-0.0.37/retab/types/evaluations/model.py +73 -0
  59. retab-0.0.37/retab/types/events.py +79 -0
  60. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/extractions.py +33 -10
  61. retab-0.0.37/retab/types/inference_settings.py +15 -0
  62. retab-0.0.37/retab/types/jobs/base.py +54 -0
  63. retab-0.0.37/retab/types/jobs/batch_annotation.py +12 -0
  64. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/jobs/evaluation.py +1 -2
  65. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/logs.py +37 -34
  66. retab-0.0.37/retab/types/metrics.py +32 -0
  67. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/mime.py +22 -20
  68. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/modalities.py +10 -10
  69. retab-0.0.37/retab/types/predictions.py +19 -0
  70. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/schemas/enhance.py +4 -2
  71. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/schemas/evaluate.py +7 -4
  72. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/schemas/generate.py +6 -3
  73. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/schemas/layout.py +1 -1
  74. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/schemas/object.py +13 -14
  75. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/schemas/templates.py +1 -3
  76. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/secrets/external_api_keys.py +0 -1
  77. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/standards.py +18 -1
  78. {retab-0.0.36 → retab-0.0.37}/retab.egg-info/PKG-INFO +7 -6
  79. retab-0.0.37/retab.egg-info/SOURCES.txt +116 -0
  80. {retab-0.0.36 → retab-0.0.37}/retab.egg-info/requires.txt +1 -0
  81. retab-0.0.37/retab.egg-info/top_level.txt +1 -0
  82. {retab-0.0.36 → retab-0.0.37}/setup.py +6 -6
  83. {retab-0.0.36 → retab-0.0.37}/tests/test_automations_links.py +34 -16
  84. retab-0.0.37/tests/test_automations_mailboxes.py +61 -0
  85. retab-0.0.37/tests/test_documents_api.py +320 -0
  86. retab-0.0.37/tests/test_evaluations.py +607 -0
  87. retab-0.0.37/tests/test_preprocessor.py +49 -0
  88. retab-0.0.36/pyproject.toml +0 -3
  89. retab-0.0.36/retab.egg-info/SOURCES.txt +0 -103
  90. retab-0.0.36/retab.egg-info/top_level.txt +0 -1
  91. retab-0.0.36/tests/test_automations_mailboxes.py +0 -56
  92. retab-0.0.36/tests/test_documents_api.py +0 -262
  93. retab-0.0.36/uiform/_utils/benchmarking copy.py +0 -588
  94. retab-0.0.36/uiform/resources/models.py +0 -45
  95. retab-0.0.36/uiform/resources/processors/automations/client.py +0 -78
  96. retab-0.0.36/uiform/resources/processors/automations/links.py +0 -356
  97. retab-0.0.36/uiform/resources/processors/automations/outlook.py +0 -444
  98. retab-0.0.36/uiform/resources/secrets/webhook.py +0 -62
  99. retab-0.0.36/uiform/types/consensus.py +0 -10
  100. retab-0.0.36/uiform/types/events.py +0 -76
  101. retab-0.0.36/uiform/types/jobs/base.py +0 -150
  102. retab-0.0.36/uiform/types/jobs/batch_annotation.py +0 -22
  103. {retab-0.0.36 → retab-0.0.37}/README.md +0 -0
  104. {retab-0.0.36/uiform → retab-0.0.37/retab}/__init__.py +0 -0
  105. {retab-0.0.36/uiform → retab-0.0.37/retab}/_resource.py +0 -0
  106. {retab-0.0.36/uiform → retab-0.0.37/retab}/_utils/__init__.py +0 -0
  107. {retab-0.0.36/uiform → retab-0.0.37/retab}/_utils/usage/__init__.py +0 -0
  108. {retab-0.0.36/uiform → retab-0.0.37/retab}/py.typed +0 -0
  109. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/__init__.py +0 -0
  110. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/consensus/__init__.py +0 -0
  111. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/documents/__init__.py +0 -0
  112. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/finetuning.py +0 -0
  113. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/openai_example.py +0 -0
  114. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/processors/__init__.py +0 -0
  115. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/processors/automations/__init__.py +0 -0
  116. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/prompt_optimization.py +0 -0
  117. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/secrets/__init__.py +0 -0
  118. {retab-0.0.36/uiform → retab-0.0.37/retab}/resources/secrets/client.py +0 -0
  119. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/__init__.py +0 -0
  120. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/automations/__init__.py +0 -0
  121. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/automations/webhooks.py +0 -0
  122. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/db/__init__.py +0 -0
  123. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/documents/__init__.py +0 -0
  124. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/documents/correct_orientation.py +0 -0
  125. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/jobs/__init__.py +0 -0
  126. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/jobs/finetune.py +0 -0
  127. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/jobs/prompt_optimization.py +0 -0
  128. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/jobs/webcrawl.py +0 -0
  129. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/pagination.py +0 -0
  130. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/schemas/__init__.py +0 -0
  131. {retab-0.0.36/uiform → retab-0.0.37/retab}/types/secrets/__init__.py +0 -0
  132. {retab-0.0.36 → retab-0.0.37}/retab.egg-info/dependency_links.txt +0 -0
  133. {retab-0.0.36 → retab-0.0.37}/setup.cfg +0 -0
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: retab
3
- Version: 0.0.36
4
- Summary: UiForm official python library
5
- Home-page: https://github.com/UiForm/uiform
6
- Author: UiForm
7
- Author-email: contact@uiform.com
8
- Project-URL: Team website, https://uiform.com
3
+ Version: 0.0.37
4
+ Summary: Retab official python library
5
+ Home-page: https://github.com/Retab-dev/retab
6
+ Author: Retab
7
+ Author-email: contact@retab.com
8
+ Project-URL: Team website, https://retab.com
9
9
  Classifier: Programming Language :: Python :: 3
10
10
  Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Operating System :: POSIX :: Linux
@@ -39,6 +39,7 @@ Requires-Dist: google-generativeai
39
39
  Requires-Dist: anthropic
40
40
  Requires-Dist: tiktoken
41
41
  Requires-Dist: truststore
42
+ Requires-Dist: ruff
42
43
 
43
44
  # UiForm
44
45
 
@@ -0,0 +1,46 @@
1
+ [build-system]
2
+ requires = ["setuptools>=42"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [tool.ruff]
6
+ # Exclude a variety of commonly ignored directories.
7
+ exclude = [
8
+ ".bzr",
9
+ ".direnv",
10
+ ".eggs",
11
+ ".git",
12
+ ".git-rewrite",
13
+ ".hg",
14
+ ".ipynb_checkpoints",
15
+ ".mypy_cache",
16
+ ".nox",
17
+ ".pants.d",
18
+ ".pyenv",
19
+ ".pytest_cache",
20
+ ".pytype",
21
+ ".ruff_cache",
22
+ ".svn",
23
+ ".tox",
24
+ ".venv",
25
+ ".vscode",
26
+ "__pypackages__",
27
+ "_build",
28
+ "buck-out",
29
+ "build",
30
+ "dist",
31
+ "node_modules",
32
+ "site-packages",
33
+ "venv"
34
+ ]
35
+
36
+ include = ["uiform/**/*.py", "uiform/*.py"]
37
+
38
+ line-length = 180
39
+
40
+ target-version = "py312"
41
+
42
+ [tool.ruff.lint]
43
+ ignore = ["E701"]
44
+
45
+ [tool.ruff.format]
46
+ docstring-code-format=false
@@ -61,7 +61,7 @@ def assert_valid_model_batch_processing(model: str) -> None:
61
61
  raise ValueError(f"Invalid base model in fine-tuned model '{model}'. Base model must be one of: {get_args(OpenAIModel)}")
62
62
  if not model_id or not model_id.strip():
63
63
  raise ValueError(f"Model ID cannot be empty in fine-tuned model '{model}'")
64
- except ValueError as e:
64
+ except ValueError:
65
65
  if ":" not in model:
66
66
  raise ValueError(
67
67
  f"Invalid model format: {model}. Must be either:\n"
@@ -90,7 +90,7 @@ def assert_valid_model_schema_generation(model: str) -> None:
90
90
  raise ValueError(f"Invalid base model in fine-tuned model '{model}'. Base model must be one of: {get_args(OpenAIModel)}")
91
91
  if not model_id or not model_id.strip():
92
92
  raise ValueError(f"Model ID cannot be empty in fine-tuned model '{model}'")
93
- except ValueError as e:
93
+ except ValueError:
94
94
  if ":" not in model:
95
95
  raise ValueError(
96
96
  f"Invalid model format: {model}. Must be either:\n"
@@ -5,7 +5,7 @@ import shutil
5
5
  # The goal is to leverage this piece of code to open a jsonl file and get an analysis of the performance of the model using a one-liner.
6
6
  ############# BENCHMARKING MODELS #############
7
7
  from itertools import zip_longest
8
- from typing import Any, Callable, Literal, Optional
8
+ from typing import Any, Callable, Literal, Optional, cast
9
9
 
10
10
  import pandas as pd # type: ignore
11
11
  from Levenshtein import distance as levenshtein_distance
@@ -27,7 +27,7 @@ def normalize_string(text: str) -> str:
27
27
  if not text:
28
28
  return ""
29
29
  # Remove all non-alphanumeric characters and convert to lowercase
30
- return re.sub(r'[^a-zA-Z0-9]', '', text).lower()
30
+ return re.sub(r"[^a-zA-Z0-9]", "", text).lower()
31
31
 
32
32
 
33
33
  def hamming_distance_padded(s: str, t: str) -> int:
@@ -45,7 +45,7 @@ def hamming_distance_padded(s: str, t: str) -> int:
45
45
  s = normalize_string(s)
46
46
  t = normalize_string(t)
47
47
 
48
- return sum(a != b for a, b in zip_longest(s, t, fillvalue=' '))
48
+ return sum(a != b for a, b in zip_longest(s, t, fillvalue=" "))
49
49
 
50
50
 
51
51
  def hamming_similarity(str_1: str, str_2: str) -> float:
@@ -385,7 +385,7 @@ class EvalMetrics(BaseModel):
385
385
  distances: dict[dictionary_metrics, EvalMetric]
386
386
 
387
387
 
388
- def flatten_dict(d: dict[str, Any], parent_key: str = '', sep: str = '.') -> dict[str, Any]:
388
+ def flatten_dict(d: dict[str, Any], parent_key: str = "", sep: str = ".") -> dict[str, Any]:
389
389
  """Flatten a nested dictionary with dot-separated keys."""
390
390
  items: list[tuple[str, Any]] = []
391
391
  for k, v in d.items():
@@ -408,16 +408,14 @@ def plot_metrics_with_uncertainty(analysis: dict[str, Any], uncertainties: Optio
408
408
  """
409
409
  # Flatten the dictionaries
410
410
  flattened_analysis = flatten_dict(analysis)
411
- if uncertainties:
412
- flattened_uncertainties = flatten_dict(uncertainties)
413
- else:
414
- uncertainties_list = None
415
-
416
411
  # Prepare data by matching fields
417
412
  fields = list(flattened_analysis.keys())
418
413
  similarities = [flattened_analysis[field] for field in fields]
419
414
 
415
+ # Prepare uncertainties if provided
416
+ uncertainties_list = None
420
417
  if uncertainties:
418
+ flattened_uncertainties = flatten_dict(uncertainties)
421
419
  uncertainties_list = [flattened_uncertainties.get(field, None) for field in fields]
422
420
 
423
421
  # Create a DataFrame
@@ -454,10 +452,11 @@ def plot_metrics_with_uncertainty(analysis: dict[str, Any], uncertainties: Optio
454
452
 
455
453
  if similarity is None:
456
454
  continue # Skip fields with no similarity value
457
-
455
+ similarity = cast(float, similarity)
458
456
  # Calculate bar length and uncertainty range
459
457
  bar_len = round(similarity * scale)
460
458
  if uncertainty is not None and uncertainty > 0:
459
+ uncertainty = cast(float, uncertainty)
461
460
  uncertainty_start = max(0, round((similarity - uncertainty) * scale))
462
461
  uncertainty_end = min(bar_width, round((similarity + uncertainty) * scale))
463
462
  else:
@@ -465,21 +464,21 @@ def plot_metrics_with_uncertainty(analysis: dict[str, Any], uncertainties: Optio
465
464
  uncertainty_end = bar_len # No uncertainty to display
466
465
 
467
466
  # Build the bar string
468
- bar_string = ''
467
+ bar_string = ""
469
468
  for i in range(bar_width):
470
469
  if i < bar_len:
471
470
  if i < uncertainty_start:
472
- char = '' # Solid block for certain part
471
+ char = "" # Solid block for certain part
473
472
  else:
474
- char = '' # Lighter block for uncertainty overlap
473
+ char = "" # Lighter block for uncertainty overlap
475
474
  else:
476
475
  if i < uncertainty_end:
477
- char = '' # Dash for upper uncertainty range
476
+ char = "" # Dash for upper uncertainty range
478
477
  else:
479
- char = ' ' # Space for empty area
478
+ char = " " # Space for empty area
480
479
  bar_string += char
481
480
 
482
481
  # Print the label and bar
483
- score_field = f'[{similarity:.4f}]'
482
+ score_field = f"[{similarity:.4f}]"
484
483
 
485
484
  print(f"{field:<{label_width}} {score_field} | {bar_string}")
@@ -1,22 +1,17 @@
1
1
  import base64
2
- import io
3
2
  import logging
4
3
  from typing import List, Literal, Optional, Union, cast
5
4
 
6
5
  import requests
7
- from anthropic.types.content_block import ContentBlock
8
- from anthropic.types.image_block_param import ImageBlockParam, Source
6
+ from anthropic.types.image_block_param import ImageBlockParam
9
7
  from anthropic.types.message_param import MessageParam
10
8
  from anthropic.types.text_block_param import TextBlockParam
11
- from anthropic.types.tool_result_block_param import ToolResultBlockParam
12
- from anthropic.types.tool_use_block_param import ToolUseBlockParam
13
9
  from google.genai.types import BlobDict, ContentDict, ContentUnionDict, PartDict # type: ignore
14
10
  from openai.types.chat.chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
15
11
  from openai.types.chat.chat_completion_content_part_input_audio_param import ChatCompletionContentPartInputAudioParam
16
12
  from openai.types.chat.chat_completion_content_part_param import ChatCompletionContentPartParam
17
13
  from openai.types.chat.chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
18
14
  from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
19
- from PIL import Image
20
15
 
21
16
  from ..types.chat import ChatCompletionUiformMessage
22
17
 
@@ -64,7 +59,7 @@ def convert_to_google_genai_format(messages: List[ChatCompletionUiformMessage])
64
59
  continue
65
60
  parts: list[PartDict] = []
66
61
 
67
- message_content = message['content']
62
+ message_content = message["content"]
68
63
  if isinstance(message_content, str):
69
64
  # Direct string content is treated as the prompt for the SDK
70
65
  parts.append(PartDict(text=message_content))
@@ -74,8 +69,8 @@ def convert_to_google_genai_format(messages: List[ChatCompletionUiformMessage])
74
69
  if part["type"] == "text":
75
70
  parts.append(PartDict(text=part["text"]))
76
71
  elif part["type"] == "image_url":
77
- url = part['image_url'].get('url', '') # type: ignore
78
- if url.startswith('data:image'):
72
+ url = part["image_url"].get("url", "") # type: ignore
73
+ if url.startswith("data:image"):
79
74
  # Extract base64 data and add it to the formatted inputs
80
75
  media_type, data_content = url.split(";base64,")
81
76
  media_type = media_type.split("data:")[-1] # => "image/jpeg"
@@ -133,24 +128,24 @@ def convert_to_anthropic_format(messages: List[ChatCompletionUiformMessage]) ->
133
128
  # -----------------------
134
129
  # Handle non-system roles
135
130
  # -----------------------
136
- if isinstance(message['content'], str):
131
+ if isinstance(message["content"], str):
137
132
  # Direct string content is treated as a single text block
138
133
  content_blocks.append(
139
134
  {
140
135
  "type": "text",
141
- "text": message['content'],
136
+ "text": message["content"],
142
137
  }
143
138
  )
144
139
 
145
- elif isinstance(message['content'], list):
140
+ elif isinstance(message["content"], list):
146
141
  # Handle structured content
147
- for part in message['content']:
142
+ for part in message["content"]:
148
143
  if part["type"] == "text":
149
144
  part = cast(ChatCompletionContentPartTextParam, part)
150
145
  content_blocks.append(
151
146
  {
152
147
  "type": "text",
153
- "text": part['text'], # type: ignore
148
+ "text": part["text"], # type: ignore
154
149
  }
155
150
  )
156
151
 
@@ -105,9 +105,6 @@ def count_image_tokens(image_url: str, detail: Literal["low", "high", "auto"] =
105
105
  total_tiles = tiles_wide * tiles_high
106
106
 
107
107
  return base_token_cost + (token_per_tile * total_tiles)
108
-
109
-
110
-
111
108
 
112
109
 
113
110
  def process_jsonl_file(jsonl_path: str) -> List[TokenCounts]:
@@ -14,8 +14,8 @@ from email_validator import validate_email
14
14
  from pydantic import BaseModel, BeforeValidator, Field, create_model
15
15
  from pydantic.config import ConfigDict
16
16
 
17
- from uiform._utils.mime import generate_blake2b_hash_from_string
18
- from uiform.types.schemas.layout import Column, FieldItem, Layout, RefObject, Row, RowList
17
+ from ..types.schemas.layout import Column, FieldItem, Layout, RefObject, Row, RowList
18
+ from .mime import generate_blake2b_hash_from_string
19
19
 
20
20
  # **** Validation Functions ****
21
21
 
@@ -116,7 +116,7 @@ def validate_vat_number(v: Any) -> Optional[str]:
116
116
  try:
117
117
  if stdnum.eu.vat.is_valid(v_str):
118
118
  return stdnum.eu.vat.validate(v_str)
119
- except:
119
+ except Exception:
120
120
  pass
121
121
  return None
122
122
 
@@ -150,7 +150,7 @@ def validate_email_address(v: Any) -> Optional[str]:
150
150
  return None
151
151
  try:
152
152
  return validate_email(v_str).normalized
153
- except:
153
+ except Exception:
154
154
  return None
155
155
 
156
156
 
@@ -170,7 +170,7 @@ def validate_frenchpostcode(v: Any) -> Optional[str]:
170
170
  if not v_str.isdigit():
171
171
  return None
172
172
  return v_str
173
- except:
173
+ except Exception:
174
174
  return None
175
175
 
176
176
 
@@ -201,7 +201,7 @@ def validate_un_code(v: Any) -> Optional[int]:
201
201
  val = int(float(v_str)) # handle numeric strings
202
202
  if 0 <= val <= 3481:
203
203
  return val
204
- except:
204
+ except Exception:
205
205
  pass
206
206
  return None
207
207
 
@@ -242,7 +242,7 @@ def validate_integer(v: Any) -> Optional[int]:
242
242
  return None
243
243
  try:
244
244
  return int(float(v_str))
245
- except:
245
+ except Exception:
246
246
  return None
247
247
 
248
248
 
@@ -257,7 +257,7 @@ def validate_float(v: Any) -> Optional[float]:
257
257
  return None
258
258
  try:
259
259
  return float(v_str)
260
- except:
260
+ except Exception:
261
261
  return None
262
262
 
263
263
 
@@ -333,7 +333,7 @@ def validate_bool(v: Any) -> bool:
333
333
  return True
334
334
  elif v_str in false_values:
335
335
  return False
336
- except:
336
+ except Exception:
337
337
  pass
338
338
 
339
339
  return False
@@ -2091,11 +2091,6 @@ def sanitize(instance: Any, schema: dict[str, Any]) -> Any:
2091
2091
  return __sanitize_instance(instance, expanded_schema)
2092
2092
 
2093
2093
 
2094
- import copy
2095
- import json
2096
- from .mime import generate_blake2b_hash_from_string
2097
-
2098
-
2099
2094
  def compute_schema_data_id(json_schema: dict[str, Any]) -> str:
2100
2095
  """Returns the schema_data_id for a given JSON schema.
2101
2096
 
@@ -4,16 +4,17 @@ import io
4
4
  import json
5
5
  import mimetypes
6
6
  from pathlib import Path
7
- from typing import Literal, Sequence, TypeVar, get_args
7
+ from typing import Sequence, TypeVar, get_args
8
8
 
9
9
  import httpx
10
10
  import PIL.Image
11
+ import puremagic
11
12
  from pydantic import HttpUrl
12
13
 
13
14
  from ..types.mime import MIMEData
14
15
  from ..types.modalities import SUPPORTED_TYPES
15
16
 
16
- T = TypeVar('T')
17
+ T = TypeVar("T")
17
18
 
18
19
 
19
20
  def generate_blake2b_hash_from_bytes(bytes_: bytes) -> str:
@@ -25,7 +26,7 @@ def generate_blake2b_hash_from_base64(base64_string: str) -> str:
25
26
 
26
27
 
27
28
  def generate_blake2b_hash_from_string(input_string: str) -> str:
28
- return generate_blake2b_hash_from_bytes(input_string.encode('utf-8'))
29
+ return generate_blake2b_hash_from_bytes(input_string.encode("utf-8"))
29
30
 
30
31
 
31
32
  def generate_blake2b_hash_from_dict(input_dict: dict) -> str:
@@ -43,7 +44,7 @@ def convert_pil_image_to_mime_data(image: PIL.Image.Image) -> MIMEData:
43
44
  """
44
45
  # Convert PIL image to base64 string
45
46
  buffered = io.BytesIO()
46
- choosen_format = image.format if (image.format and image.format.lower() in ['png', 'jpeg', 'gif', 'webp']) else "JPEG"
47
+ choosen_format = image.format if (image.format and image.format.lower() in ["png", "jpeg", "gif", "webp"]) else "JPEG"
47
48
  image.save(buffered, format=choosen_format)
48
49
  base64_content = base64.b64encode(buffered.getvalue()).decode("utf-8")
49
50
 
@@ -98,13 +99,11 @@ def prepare_mime_document(document: Path | str | bytes | io.IOBase | MIMEData |
98
99
  if isinstance(document, bytes):
99
100
  # `document` is already the raw bytes
100
101
  try:
101
- import puremagic
102
-
103
102
  extension = puremagic.from_string(document)
104
103
  if extension.lower() in [".jpg", ".jpeg", ".jfif"]:
105
104
  extension = ".jpeg"
106
- except:
107
- extension = '.txt'
105
+ except Exception:
106
+ extension = ".txt"
108
107
  file_bytes = document
109
108
  filename = "uploaded_file" + extension
110
109
  elif isinstance(document, io.IOBase):
@@ -112,19 +111,17 @@ def prepare_mime_document(document: Path | str | bytes | io.IOBase | MIMEData |
112
111
  file_bytes = document.read()
113
112
  filename = getattr(document, "name", "uploaded_file")
114
113
  filename = Path(filename).name
115
- elif hasattr(document, 'unicode_string') and callable(getattr(document, 'unicode_string')):
114
+ elif hasattr(document, "unicode_string") and callable(getattr(document, "unicode_string")):
116
115
  with httpx.Client() as client:
117
116
  url: str = document.unicode_string() # type: ignore
118
117
  response = client.get(url)
119
118
  response.raise_for_status()
120
119
  try:
121
- import puremagic
122
-
123
120
  extension = puremagic.from_string(response.content)
124
121
  if extension.lower() in [".jpg", ".jpeg", ".jfif"]:
125
122
  extension = ".jpeg"
126
- except:
127
- extension = '.txt'
123
+ except Exception:
124
+ extension = ".txt"
128
125
  file_bytes = response.content # Fix: Use response.content instead of document
129
126
  filename = "uploaded_file" + extension
130
127
  else:
@@ -139,7 +136,7 @@ def prepare_mime_document(document: Path | str | bytes | io.IOBase | MIMEData |
139
136
  encoded_content = base64.b64encode(file_bytes).decode("utf-8")
140
137
  # Compute SHA-256 hash over the *base64-encoded* content
141
138
  hash_obj = hashlib.sha256(encoded_content.encode("utf-8"))
142
- content_hash = hash_obj.hexdigest()
139
+ hash_obj.hexdigest()
143
140
 
144
141
  # Guess MIME type based on file extension
145
142
  guessed_type, _ = mimetypes.guess_type(filename)
@@ -77,13 +77,19 @@ def convert_from_openai_format(messages: list[ResponseInputItemParam]) -> list[C
77
77
  formatted_messages: list[ChatCompletionUiformMessage] = []
78
78
 
79
79
  for message in messages:
80
+ if "role" not in message or "content" not in message:
81
+ # Mandatory fields for a message
82
+ if message.get("type") != "message":
83
+ print(f"Not supported message type: {message.get('type')}... Skipping...")
84
+ continue
85
+
86
+ role = message["role"]
87
+ content = message["content"]
88
+
80
89
  if "type" not in message:
81
90
  # The type is required by all other sub-types of ResponseInputItemParam except for EasyInputMessageParam and Message, which are messages.
82
91
  message["type"] = "message"
83
92
 
84
- if message["type"] != "message":
85
- print(f"Not supported message type: {message['type']}... Skipping...")
86
- continue
87
93
  role = message["role"]
88
94
  content = message["content"]
89
95
  formatted_content: str | list[ChatCompletionContentPartParam]
@@ -1,7 +1,7 @@
1
1
  from contextlib import AbstractAsyncContextManager, AbstractContextManager
2
2
  from typing import Any, AsyncGenerator, Callable, Generator, TypeVar, Union
3
3
 
4
- T = TypeVar('T')
4
+ T = TypeVar("T")
5
5
 
6
6
 
7
7
  class AsyncGeneratorContextManager(AbstractAsyncContextManager[AsyncGenerator[T, None]]):
@@ -1,4 +1,4 @@
1
- from typing import Optional, Dict
1
+ from typing import Optional
2
2
 
3
3
  from openai.types.completion_usage import CompletionUsage
4
4
  from pydantic import BaseModel, Field
@@ -71,14 +71,12 @@ def compute_api_call_cost(pricing: Pricing, usage: CompletionUsage, is_ft: bool
71
71
  total_cost = (total_text_cost + total_audio_cost) / 1e6
72
72
 
73
73
  # Apply fine-tuning price hike if applicable
74
- if is_ft and hasattr(pricing, 'ft_price_hike'):
74
+ if is_ft and hasattr(pricing, "ft_price_hike"):
75
75
  total_cost *= pricing.ft_price_hike
76
76
 
77
77
  return Amount(value=total_cost, currency="USD")
78
78
 
79
79
 
80
-
81
-
82
80
  def compute_cost_from_model(model: str, usage: CompletionUsage) -> Amount:
83
81
  # Extract base model name for fine-tuned models like "ft:gpt-4o:uiform:4389573"
84
82
  is_ft = False
@@ -93,7 +91,7 @@ def compute_cost_from_model(model: str, usage: CompletionUsage) -> Amount:
93
91
  try:
94
92
  model_card = get_model_card(model)
95
93
  pricing = model_card.pricing
96
- except ValueError as e:
94
+ except ValueError:
97
95
  raise ValueError(f"No pricing information found for model: {model}")
98
96
 
99
97
  return compute_api_call_cost(pricing, usage, is_ft)
@@ -124,46 +122,48 @@ class CompletionsUsage(BaseModel):
124
122
  model: Optional[str] = Field(default=None, description="When group_by=model, this field provides the model name of the grouped usage result.")
125
123
  batch: Optional[bool] = Field(default=None, description="When group_by=batch, this field tells whether the grouped usage result is batch or not.")
126
124
 
125
+
127
126
  ########################
128
127
  # DETAILED COST BREAKDOWN
129
128
  ########################
130
129
 
130
+
131
131
  class TokenCounts(BaseModel):
132
132
  """Detailed breakdown of token counts by type and category."""
133
-
133
+
134
134
  # Prompt token counts
135
135
  prompt_regular_text: int
136
136
  prompt_cached_text: int
137
137
  prompt_audio: int
138
-
138
+
139
139
  # Completion token counts
140
140
  completion_regular_text: int
141
141
  completion_audio: int
142
-
142
+
143
143
  # Total tokens (should match sum of all components)
144
144
  total_tokens: int
145
145
 
146
146
 
147
147
  class CostBreakdown(BaseModel):
148
148
  """Detailed breakdown of API call costs by token type and usage category."""
149
-
149
+
150
150
  # Total cost amount
151
151
  total: Amount
152
-
152
+
153
153
  # Text token costs broken down by category
154
154
  text_prompt_cost: Amount
155
155
  text_cached_cost: Amount
156
156
  text_completion_cost: Amount
157
157
  text_total_cost: Amount
158
-
158
+
159
159
  # Audio token costs broken down by category (if applicable)
160
160
  audio_prompt_cost: Optional[Amount] = None
161
161
  audio_completion_cost: Optional[Amount] = None
162
162
  audio_total_cost: Optional[Amount] = None
163
-
163
+
164
164
  # Token counts for reference
165
165
  token_counts: TokenCounts
166
-
166
+
167
167
  # Model and fine-tuning information
168
168
  model: str
169
169
  is_fine_tuned: bool = False
@@ -172,7 +172,7 @@ class CostBreakdown(BaseModel):
172
172
  def compute_api_call_cost_with_breakdown(pricing: Pricing, usage: CompletionUsage, model: str, is_ft: bool = False) -> CostBreakdown:
173
173
  """
174
174
  Computes a detailed price breakdown for the given token usage, based on the pricing.
175
-
175
+
176
176
  Returns a CostBreakdown object containing costs broken down by token type and category.
177
177
  """
178
178
  # ----- Process prompt tokens -----
@@ -211,7 +211,7 @@ def compute_api_call_cost_with_breakdown(pricing: Pricing, usage: CompletionUsag
211
211
  cost_audio_prompt = 0.0
212
212
  cost_audio_completion = 0.0
213
213
  total_audio_cost = 0.0
214
-
214
+
215
215
  if pricing.audio and (prompt_audio > 0 or completion_audio > 0):
216
216
  cost_audio_prompt = prompt_audio * pricing.audio.prompt
217
217
  cost_audio_completion = completion_audio * pricing.audio.completion
@@ -219,27 +219,27 @@ def compute_api_call_cost_with_breakdown(pricing: Pricing, usage: CompletionUsag
219
219
 
220
220
  # Convert to dollars (divide by 1M) and create Amount objects
221
221
  ft_multiplier = pricing.ft_price_hike if is_ft else 1.0
222
-
222
+
223
223
  # Create Amount objects for each cost category
224
224
  text_prompt_amount = Amount(value=(cost_text_prompt / 1e6) * ft_multiplier, currency="USD")
225
225
  text_cached_amount = Amount(value=(cost_text_cached / 1e6) * ft_multiplier, currency="USD")
226
226
  text_completion_amount = Amount(value=(cost_text_completion / 1e6) * ft_multiplier, currency="USD")
227
227
  text_total_amount = Amount(value=(total_text_cost / 1e6) * ft_multiplier, currency="USD")
228
-
228
+
229
229
  # Audio amounts (if applicable)
230
230
  audio_prompt_amount = None
231
231
  audio_completion_amount = None
232
232
  audio_total_amount = None
233
-
233
+
234
234
  if pricing.audio and (prompt_audio > 0 or completion_audio > 0):
235
235
  audio_prompt_amount = Amount(value=(cost_audio_prompt / 1e6) * ft_multiplier, currency="USD")
236
236
  audio_completion_amount = Amount(value=(cost_audio_completion / 1e6) * ft_multiplier, currency="USD")
237
237
  audio_total_amount = Amount(value=(total_audio_cost / 1e6) * ft_multiplier, currency="USD")
238
-
238
+
239
239
  # Total cost
240
240
  total_cost = (total_text_cost + total_audio_cost) / 1e6 * ft_multiplier
241
241
  total_amount = Amount(value=total_cost, currency="USD")
242
-
242
+
243
243
  # Create TokenCounts object with token usage breakdown
244
244
  token_counts = TokenCounts(
245
245
  prompt_regular_text=prompt_regular_text,
@@ -247,9 +247,9 @@ def compute_api_call_cost_with_breakdown(pricing: Pricing, usage: CompletionUsag
247
247
  prompt_audio=prompt_audio,
248
248
  completion_regular_text=completion_regular_text,
249
249
  completion_audio=completion_audio,
250
- total_tokens=usage.total_tokens
250
+ total_tokens=usage.total_tokens,
251
251
  )
252
-
252
+
253
253
  return CostBreakdown(
254
254
  total=total_amount,
255
255
  text_prompt_cost=text_prompt_amount,
@@ -261,28 +261,28 @@ def compute_api_call_cost_with_breakdown(pricing: Pricing, usage: CompletionUsag
261
261
  audio_total_cost=audio_total_amount,
262
262
  token_counts=token_counts,
263
263
  model=model,
264
- is_fine_tuned=is_ft
264
+ is_fine_tuned=is_ft,
265
265
  )
266
266
 
267
267
 
268
268
  def compute_cost_from_model_with_breakdown(model: str, usage: CompletionUsage) -> CostBreakdown:
269
269
  """
270
270
  Computes a detailed cost breakdown for an API call using the specified model and usage.
271
-
271
+
272
272
  Args:
273
273
  model: The model name (can be a fine-tuned model like "ft:gpt-4o:uiform:4389573")
274
274
  usage: Token usage statistics for the API call
275
-
275
+
276
276
  Returns:
277
277
  CostBreakdown object with detailed cost information
278
-
278
+
279
279
  Raises:
280
280
  ValueError: If no pricing information is found for the model
281
281
  """
282
282
  # Extract base model name for fine-tuned models like "ft:gpt-4o:uiform:4389573"
283
283
  original_model = model
284
284
  is_ft = False
285
-
285
+
286
286
  if model.startswith("ft:"):
287
287
  # Split by colon and take the second part (index 1) which contains the base model
288
288
  parts = model.split(":")
@@ -294,7 +294,7 @@ def compute_cost_from_model_with_breakdown(model: str, usage: CompletionUsage) -
294
294
  try:
295
295
  model_card = get_model_card(model)
296
296
  pricing = model_card.pricing
297
- except ValueError as e:
297
+ except ValueError:
298
298
  raise ValueError(f"No pricing information found for model: {original_model}")
299
299
 
300
300
  return compute_api_call_cost_with_breakdown(pricing, usage, original_model, is_ft)