retab 0.0.36__py3-none-any.whl → 0.0.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. retab/__init__.py +4 -0
  2. {uiform → retab}/_resource.py +5 -5
  3. {uiform → retab}/_utils/ai_models.py +2 -2
  4. {uiform → retab}/_utils/benchmarking.py +15 -16
  5. {uiform → retab}/_utils/chat.py +29 -34
  6. {uiform → retab}/_utils/display.py +0 -3
  7. {uiform → retab}/_utils/json_schema.py +9 -14
  8. {uiform → retab}/_utils/mime.py +11 -14
  9. {uiform → retab}/_utils/responses.py +16 -10
  10. {uiform → retab}/_utils/stream_context_managers.py +1 -1
  11. {uiform → retab}/_utils/usage/usage.py +31 -31
  12. {uiform → retab}/client.py +54 -53
  13. {uiform → retab}/resources/consensus/client.py +19 -38
  14. {uiform → retab}/resources/consensus/completions.py +36 -59
  15. {uiform → retab}/resources/consensus/completions_stream.py +35 -47
  16. {uiform → retab}/resources/consensus/responses.py +37 -86
  17. {uiform → retab}/resources/consensus/responses_stream.py +41 -89
  18. retab/resources/documents/client.py +455 -0
  19. {uiform → retab}/resources/documents/extractions.py +192 -101
  20. {uiform → retab}/resources/evals.py +56 -43
  21. retab/resources/evaluations/__init__.py +3 -0
  22. retab/resources/evaluations/client.py +301 -0
  23. retab/resources/evaluations/documents.py +233 -0
  24. retab/resources/evaluations/iterations.py +452 -0
  25. {uiform → retab}/resources/files.py +2 -2
  26. {uiform → retab}/resources/jsonlUtils.py +225 -221
  27. retab/resources/models.py +73 -0
  28. retab/resources/processors/automations/client.py +244 -0
  29. {uiform → retab}/resources/processors/automations/endpoints.py +79 -120
  30. retab/resources/processors/automations/links.py +294 -0
  31. {uiform → retab}/resources/processors/automations/logs.py +30 -19
  32. retab/resources/processors/automations/mailboxes.py +397 -0
  33. retab/resources/processors/automations/outlook.py +337 -0
  34. {uiform → retab}/resources/processors/automations/tests.py +22 -25
  35. {uiform → retab}/resources/processors/client.py +181 -166
  36. {uiform → retab}/resources/schemas.py +78 -66
  37. {uiform → retab}/resources/secrets/external_api_keys.py +1 -5
  38. retab/resources/secrets/webhook.py +64 -0
  39. {uiform → retab}/resources/usage.py +41 -4
  40. {uiform → retab}/types/ai_models.py +17 -17
  41. {uiform → retab}/types/automations/cron.py +19 -12
  42. {uiform → retab}/types/automations/endpoints.py +7 -4
  43. {uiform → retab}/types/automations/links.py +7 -3
  44. {uiform → retab}/types/automations/mailboxes.py +10 -10
  45. {uiform → retab}/types/automations/outlook.py +15 -11
  46. {uiform → retab}/types/automations/webhooks.py +1 -1
  47. retab/types/browser_canvas.py +3 -0
  48. retab/types/chat.py +8 -0
  49. {uiform → retab}/types/completions.py +12 -15
  50. retab/types/consensus.py +19 -0
  51. {uiform → retab}/types/db/annotations.py +3 -3
  52. {uiform → retab}/types/db/files.py +8 -6
  53. {uiform → retab}/types/documents/create_messages.py +20 -22
  54. {uiform → retab}/types/documents/extractions.py +71 -26
  55. {uiform → retab}/types/evals.py +5 -5
  56. retab/types/evaluations/__init__.py +31 -0
  57. retab/types/evaluations/documents.py +30 -0
  58. retab/types/evaluations/iterations.py +112 -0
  59. retab/types/evaluations/model.py +73 -0
  60. retab/types/events.py +79 -0
  61. {uiform → retab}/types/extractions.py +36 -13
  62. retab/types/inference_settings.py +15 -0
  63. retab/types/jobs/base.py +54 -0
  64. retab/types/jobs/batch_annotation.py +12 -0
  65. {uiform → retab}/types/jobs/evaluation.py +1 -2
  66. {uiform → retab}/types/logs.py +37 -34
  67. retab/types/metrics.py +32 -0
  68. {uiform → retab}/types/mime.py +22 -20
  69. {uiform → retab}/types/modalities.py +10 -10
  70. retab/types/predictions.py +19 -0
  71. {uiform → retab}/types/schemas/enhance.py +4 -2
  72. {uiform → retab}/types/schemas/evaluate.py +7 -4
  73. {uiform → retab}/types/schemas/generate.py +6 -3
  74. {uiform → retab}/types/schemas/layout.py +1 -1
  75. {uiform → retab}/types/schemas/object.py +16 -17
  76. {uiform → retab}/types/schemas/templates.py +1 -3
  77. {uiform → retab}/types/secrets/external_api_keys.py +0 -1
  78. {uiform → retab}/types/standards.py +18 -1
  79. {retab-0.0.36.dist-info → retab-0.0.38.dist-info}/METADATA +78 -77
  80. retab-0.0.38.dist-info/RECORD +107 -0
  81. retab-0.0.38.dist-info/top_level.txt +1 -0
  82. retab-0.0.36.dist-info/RECORD +0 -96
  83. retab-0.0.36.dist-info/top_level.txt +0 -1
  84. uiform/__init__.py +0 -4
  85. uiform/_utils/benchmarking copy.py +0 -588
  86. uiform/resources/documents/client.py +0 -255
  87. uiform/resources/models.py +0 -45
  88. uiform/resources/processors/automations/client.py +0 -78
  89. uiform/resources/processors/automations/links.py +0 -356
  90. uiform/resources/processors/automations/mailboxes.py +0 -435
  91. uiform/resources/processors/automations/outlook.py +0 -444
  92. uiform/resources/secrets/webhook.py +0 -62
  93. uiform/types/chat.py +0 -8
  94. uiform/types/consensus.py +0 -10
  95. uiform/types/events.py +0 -76
  96. uiform/types/jobs/base.py +0 -150
  97. uiform/types/jobs/batch_annotation.py +0 -22
  98. {uiform → retab}/_utils/__init__.py +0 -0
  99. {uiform → retab}/_utils/usage/__init__.py +0 -0
  100. {uiform → retab}/py.typed +0 -0
  101. {uiform → retab}/resources/__init__.py +0 -0
  102. {uiform → retab}/resources/consensus/__init__.py +0 -0
  103. {uiform → retab}/resources/documents/__init__.py +0 -0
  104. {uiform → retab}/resources/finetuning.py +0 -0
  105. {uiform → retab}/resources/openai_example.py +0 -0
  106. {uiform → retab}/resources/processors/__init__.py +0 -0
  107. {uiform → retab}/resources/processors/automations/__init__.py +0 -0
  108. {uiform → retab}/resources/prompt_optimization.py +0 -0
  109. {uiform → retab}/resources/secrets/__init__.py +0 -0
  110. {uiform → retab}/resources/secrets/client.py +0 -0
  111. {uiform → retab}/types/__init__.py +0 -0
  112. {uiform → retab}/types/automations/__init__.py +0 -0
  113. {uiform → retab}/types/db/__init__.py +0 -0
  114. {uiform → retab}/types/documents/__init__.py +0 -0
  115. {uiform → retab}/types/documents/correct_orientation.py +0 -0
  116. {uiform → retab}/types/jobs/__init__.py +0 -0
  117. {uiform → retab}/types/jobs/finetune.py +0 -0
  118. {uiform → retab}/types/jobs/prompt_optimization.py +0 -0
  119. {uiform → retab}/types/jobs/webcrawl.py +0 -0
  120. {uiform → retab}/types/pagination.py +0 -0
  121. {uiform → retab}/types/schemas/__init__.py +0 -0
  122. {uiform → retab}/types/secrets/__init__.py +0 -0
  123. {retab-0.0.36.dist-info → retab-0.0.38.dist-info}/WHEEL +0 -0
retab/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .client import AsyncRetab, Retab
2
+ from .types.schemas.object import Schema
3
+
4
+ __all__ = ["Retab", "AsyncRetab", "Schema"]
@@ -5,13 +5,13 @@ import time
5
5
  from typing import TYPE_CHECKING
6
6
 
7
7
  if TYPE_CHECKING:
8
- from .client import AsyncUiForm, UiForm
8
+ from .client import AsyncRetab, Retab
9
9
 
10
10
 
11
11
  class SyncAPIResource:
12
- _client: UiForm
12
+ _client: Retab
13
13
 
14
- def __init__(self, client: UiForm) -> None:
14
+ def __init__(self, client: Retab) -> None:
15
15
  self._client = client
16
16
 
17
17
  def _sleep(self, seconds: float) -> None:
@@ -19,9 +19,9 @@ class SyncAPIResource:
19
19
 
20
20
 
21
21
  class AsyncAPIResource:
22
- _client: AsyncUiForm
22
+ _client: AsyncRetab
23
23
 
24
- def __init__(self, client: AsyncUiForm) -> None:
24
+ def __init__(self, client: AsyncRetab) -> None:
25
25
  self._client = client
26
26
 
27
27
  async def _sleep(self, seconds: float) -> None:
@@ -61,7 +61,7 @@ def assert_valid_model_batch_processing(model: str) -> None:
61
61
  raise ValueError(f"Invalid base model in fine-tuned model '{model}'. Base model must be one of: {get_args(OpenAIModel)}")
62
62
  if not model_id or not model_id.strip():
63
63
  raise ValueError(f"Model ID cannot be empty in fine-tuned model '{model}'")
64
- except ValueError as e:
64
+ except ValueError:
65
65
  if ":" not in model:
66
66
  raise ValueError(
67
67
  f"Invalid model format: {model}. Must be either:\n"
@@ -90,7 +90,7 @@ def assert_valid_model_schema_generation(model: str) -> None:
90
90
  raise ValueError(f"Invalid base model in fine-tuned model '{model}'. Base model must be one of: {get_args(OpenAIModel)}")
91
91
  if not model_id or not model_id.strip():
92
92
  raise ValueError(f"Model ID cannot be empty in fine-tuned model '{model}'")
93
- except ValueError as e:
93
+ except ValueError:
94
94
  if ":" not in model:
95
95
  raise ValueError(
96
96
  f"Invalid model format: {model}. Must be either:\n"
@@ -5,7 +5,7 @@ import shutil
5
5
  # The goal is to leverage this piece of code to open a jsonl file and get an analysis of the performance of the model using a one-liner.
6
6
  ############# BENCHMARKING MODELS #############
7
7
  from itertools import zip_longest
8
- from typing import Any, Callable, Literal, Optional
8
+ from typing import Any, Callable, Literal, Optional, cast
9
9
 
10
10
  import pandas as pd # type: ignore
11
11
  from Levenshtein import distance as levenshtein_distance
@@ -27,7 +27,7 @@ def normalize_string(text: str) -> str:
27
27
  if not text:
28
28
  return ""
29
29
  # Remove all non-alphanumeric characters and convert to lowercase
30
- return re.sub(r'[^a-zA-Z0-9]', '', text).lower()
30
+ return re.sub(r"[^a-zA-Z0-9]", "", text).lower()
31
31
 
32
32
 
33
33
  def hamming_distance_padded(s: str, t: str) -> int:
@@ -45,7 +45,7 @@ def hamming_distance_padded(s: str, t: str) -> int:
45
45
  s = normalize_string(s)
46
46
  t = normalize_string(t)
47
47
 
48
- return sum(a != b for a, b in zip_longest(s, t, fillvalue=' '))
48
+ return sum(a != b for a, b in zip_longest(s, t, fillvalue=" "))
49
49
 
50
50
 
51
51
  def hamming_similarity(str_1: str, str_2: str) -> float:
@@ -385,7 +385,7 @@ class EvalMetrics(BaseModel):
385
385
  distances: dict[dictionary_metrics, EvalMetric]
386
386
 
387
387
 
388
- def flatten_dict(d: dict[str, Any], parent_key: str = '', sep: str = '.') -> dict[str, Any]:
388
+ def flatten_dict(d: dict[str, Any], parent_key: str = "", sep: str = ".") -> dict[str, Any]:
389
389
  """Flatten a nested dictionary with dot-separated keys."""
390
390
  items: list[tuple[str, Any]] = []
391
391
  for k, v in d.items():
@@ -408,16 +408,14 @@ def plot_metrics_with_uncertainty(analysis: dict[str, Any], uncertainties: Optio
408
408
  """
409
409
  # Flatten the dictionaries
410
410
  flattened_analysis = flatten_dict(analysis)
411
- if uncertainties:
412
- flattened_uncertainties = flatten_dict(uncertainties)
413
- else:
414
- uncertainties_list = None
415
-
416
411
  # Prepare data by matching fields
417
412
  fields = list(flattened_analysis.keys())
418
413
  similarities = [flattened_analysis[field] for field in fields]
419
414
 
415
+ # Prepare uncertainties if provided
416
+ uncertainties_list = None
420
417
  if uncertainties:
418
+ flattened_uncertainties = flatten_dict(uncertainties)
421
419
  uncertainties_list = [flattened_uncertainties.get(field, None) for field in fields]
422
420
 
423
421
  # Create a DataFrame
@@ -454,10 +452,11 @@ def plot_metrics_with_uncertainty(analysis: dict[str, Any], uncertainties: Optio
454
452
 
455
453
  if similarity is None:
456
454
  continue # Skip fields with no similarity value
457
-
455
+ similarity = cast(float, similarity)
458
456
  # Calculate bar length and uncertainty range
459
457
  bar_len = round(similarity * scale)
460
458
  if uncertainty is not None and uncertainty > 0:
459
+ uncertainty = cast(float, uncertainty)
461
460
  uncertainty_start = max(0, round((similarity - uncertainty) * scale))
462
461
  uncertainty_end = min(bar_width, round((similarity + uncertainty) * scale))
463
462
  else:
@@ -465,21 +464,21 @@ def plot_metrics_with_uncertainty(analysis: dict[str, Any], uncertainties: Optio
465
464
  uncertainty_end = bar_len # No uncertainty to display
466
465
 
467
466
  # Build the bar string
468
- bar_string = ''
467
+ bar_string = ""
469
468
  for i in range(bar_width):
470
469
  if i < bar_len:
471
470
  if i < uncertainty_start:
472
- char = '' # Solid block for certain part
471
+ char = "" # Solid block for certain part
473
472
  else:
474
- char = '' # Lighter block for uncertainty overlap
473
+ char = "" # Lighter block for uncertainty overlap
475
474
  else:
476
475
  if i < uncertainty_end:
477
- char = '' # Dash for upper uncertainty range
476
+ char = "" # Dash for upper uncertainty range
478
477
  else:
479
- char = ' ' # Space for empty area
478
+ char = " " # Space for empty area
480
479
  bar_string += char
481
480
 
482
481
  # Print the label and bar
483
- score_field = f'[{similarity:.4f}]'
482
+ score_field = f"[{similarity:.4f}]"
484
483
 
485
484
  print(f"{field:<{label_width}} {score_field} | {bar_string}")
@@ -1,31 +1,26 @@
1
1
  import base64
2
- import io
3
2
  import logging
4
3
  from typing import List, Literal, Optional, Union, cast
5
4
 
6
5
  import requests
7
- from anthropic.types.content_block import ContentBlock
8
- from anthropic.types.image_block_param import ImageBlockParam, Source
6
+ from anthropic.types.image_block_param import ImageBlockParam
9
7
  from anthropic.types.message_param import MessageParam
10
8
  from anthropic.types.text_block_param import TextBlockParam
11
- from anthropic.types.tool_result_block_param import ToolResultBlockParam
12
- from anthropic.types.tool_use_block_param import ToolUseBlockParam
13
9
  from google.genai.types import BlobDict, ContentDict, ContentUnionDict, PartDict # type: ignore
14
10
  from openai.types.chat.chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
15
11
  from openai.types.chat.chat_completion_content_part_input_audio_param import ChatCompletionContentPartInputAudioParam
16
12
  from openai.types.chat.chat_completion_content_part_param import ChatCompletionContentPartParam
17
13
  from openai.types.chat.chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
18
14
  from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
19
- from PIL import Image
20
15
 
21
- from ..types.chat import ChatCompletionUiformMessage
16
+ from ..types.chat import ChatCompletionRetabMessage
22
17
 
23
18
  MediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
24
19
 
25
20
 
26
- def convert_to_google_genai_format(messages: List[ChatCompletionUiformMessage]) -> tuple[str, list[ContentUnionDict]]:
21
+ def convert_to_google_genai_format(messages: List[ChatCompletionRetabMessage]) -> tuple[str, list[ContentUnionDict]]:
27
22
  """
28
- Converts a list of ChatCompletionUiFormMessage to a format compatible with the google.genai SDK.
23
+ Converts a list of ChatCompletionRetabMessage to a format compatible with the google.genai SDK.
29
24
 
30
25
 
31
26
  Example:
@@ -45,7 +40,7 @@ def convert_to_google_genai_format(messages: List[ChatCompletionUiformMessage])
45
40
  ```
46
41
 
47
42
  Args:
48
- messages (List[ChatCompletionUiformMessage]): List of chat messages.
43
+ messages (List[ChatCompletionRetabMessage]): List of chat messages.
49
44
 
50
45
  Returns:
51
46
  List[Union[Dict[str, str], str]]: A list of formatted inputs for the google.genai SDK.
@@ -64,7 +59,7 @@ def convert_to_google_genai_format(messages: List[ChatCompletionUiformMessage])
64
59
  continue
65
60
  parts: list[PartDict] = []
66
61
 
67
- message_content = message['content']
62
+ message_content = message["content"]
68
63
  if isinstance(message_content, str):
69
64
  # Direct string content is treated as the prompt for the SDK
70
65
  parts.append(PartDict(text=message_content))
@@ -74,8 +69,8 @@ def convert_to_google_genai_format(messages: List[ChatCompletionUiformMessage])
74
69
  if part["type"] == "text":
75
70
  parts.append(PartDict(text=part["text"]))
76
71
  elif part["type"] == "image_url":
77
- url = part['image_url'].get('url', '') # type: ignore
78
- if url.startswith('data:image'):
72
+ url = part["image_url"].get("url", "") # type: ignore
73
+ if url.startswith("data:image"):
79
74
  # Extract base64 data and add it to the formatted inputs
80
75
  media_type, data_content = url.split(";base64,")
81
76
  media_type = media_type.split("data:")[-1] # => "image/jpeg"
@@ -99,12 +94,12 @@ def convert_to_google_genai_format(messages: List[ChatCompletionUiformMessage])
99
94
  return system_message, formatted_content
100
95
 
101
96
 
102
- def convert_to_anthropic_format(messages: List[ChatCompletionUiformMessage]) -> tuple[str, List[MessageParam]]:
97
+ def convert_to_anthropic_format(messages: List[ChatCompletionRetabMessage]) -> tuple[str, List[MessageParam]]:
103
98
  """
104
- Converts a list of ChatCompletionUiformMessage to a format compatible with the Anthropic SDK.
99
+ Converts a list of ChatCompletionRetabMessage to a format compatible with the Anthropic SDK.
105
100
 
106
101
  Args:
107
- messages (List[ChatCompletionUiformMessage]): List of chat messages.
102
+ messages (List[ChatCompletionRetabMessage]): List of chat messages.
108
103
 
109
104
  Returns:
110
105
  (system_message, formatted_messages):
@@ -133,24 +128,24 @@ def convert_to_anthropic_format(messages: List[ChatCompletionUiformMessage]) ->
133
128
  # -----------------------
134
129
  # Handle non-system roles
135
130
  # -----------------------
136
- if isinstance(message['content'], str):
131
+ if isinstance(message["content"], str):
137
132
  # Direct string content is treated as a single text block
138
133
  content_blocks.append(
139
134
  {
140
135
  "type": "text",
141
- "text": message['content'],
136
+ "text": message["content"],
142
137
  }
143
138
  )
144
139
 
145
- elif isinstance(message['content'], list):
140
+ elif isinstance(message["content"], list):
146
141
  # Handle structured content
147
- for part in message['content']:
142
+ for part in message["content"]:
148
143
  if part["type"] == "text":
149
144
  part = cast(ChatCompletionContentPartTextParam, part)
150
145
  content_blocks.append(
151
146
  {
152
147
  "type": "text",
153
- "text": part['text'], # type: ignore
148
+ "text": part["text"], # type: ignore
154
149
  }
155
150
  )
156
151
 
@@ -221,11 +216,11 @@ def convert_to_anthropic_format(messages: List[ChatCompletionUiformMessage]) ->
221
216
  return system_message, formatted_messages
222
217
 
223
218
 
224
- def convert_from_anthropic_format(messages: list[MessageParam], system_prompt: str) -> list[ChatCompletionUiformMessage]:
219
+ def convert_from_anthropic_format(messages: list[MessageParam], system_prompt: str) -> list[ChatCompletionRetabMessage]:
225
220
  """
226
- Converts a list of Anthropic MessageParam to a list of ChatCompletionUiformMessage.
221
+ Converts a list of Anthropic MessageParam to a list of ChatCompletionRetabMessage.
227
222
  """
228
- formatted_messages: list[ChatCompletionUiformMessage] = [ChatCompletionUiformMessage(role="developer", content=system_prompt)]
223
+ formatted_messages: list[ChatCompletionRetabMessage] = [ChatCompletionRetabMessage(role="developer", content=system_prompt)]
229
224
 
230
225
  for message in messages:
231
226
  role = message["role"]
@@ -234,7 +229,7 @@ def convert_from_anthropic_format(messages: list[MessageParam], system_prompt: s
234
229
  # Handle different content structures
235
230
  if isinstance(content_blocks, list) and len(content_blocks) == 1 and isinstance(content_blocks[0], dict) and content_blocks[0].get("type") == "text":
236
231
  # Simple text message
237
- formatted_messages.append(cast(ChatCompletionUiformMessage, {"role": role, "content": content_blocks[0].get("text", "")}))
232
+ formatted_messages.append(cast(ChatCompletionRetabMessage, {"role": role, "content": content_blocks[0].get("text", "")}))
238
233
  elif isinstance(content_blocks, list):
239
234
  # Message with multiple content parts or non-text content
240
235
  formatted_content: list[ChatCompletionContentPartParam] = []
@@ -253,22 +248,22 @@ def convert_from_anthropic_format(messages: list[MessageParam], system_prompt: s
253
248
 
254
249
  formatted_content.append(cast(ChatCompletionContentPartParam, {"type": "image_url", "image_url": {"url": image_url}}))
255
250
 
256
- formatted_messages.append(cast(ChatCompletionUiformMessage, {"role": role, "content": formatted_content}))
251
+ formatted_messages.append(cast(ChatCompletionRetabMessage, {"role": role, "content": formatted_content}))
257
252
 
258
253
  return formatted_messages
259
254
 
260
255
 
261
- def convert_to_openai_format(messages: List[ChatCompletionUiformMessage]) -> List[ChatCompletionMessageParam]:
256
+ def convert_to_openai_format(messages: List[ChatCompletionRetabMessage]) -> List[ChatCompletionMessageParam]:
262
257
  return cast(list[ChatCompletionMessageParam], messages)
263
258
 
264
259
 
265
- def convert_from_openai_format(messages: list[ChatCompletionMessageParam]) -> list[ChatCompletionUiformMessage]:
266
- return cast(list[ChatCompletionUiformMessage], messages)
260
+ def convert_from_openai_format(messages: list[ChatCompletionMessageParam]) -> list[ChatCompletionRetabMessage]:
261
+ return cast(list[ChatCompletionRetabMessage], messages)
267
262
 
268
263
 
269
264
  def separate_messages(
270
- messages: list[ChatCompletionUiformMessage],
271
- ) -> tuple[Optional[ChatCompletionUiformMessage], list[ChatCompletionUiformMessage], list[ChatCompletionUiformMessage]]:
265
+ messages: list[ChatCompletionRetabMessage],
266
+ ) -> tuple[Optional[ChatCompletionRetabMessage], list[ChatCompletionRetabMessage], list[ChatCompletionRetabMessage]]:
272
267
  """
273
268
  Separates messages into system, user and assistant messages.
274
269
 
@@ -296,12 +291,12 @@ def separate_messages(
296
291
  return system_message, user_messages, assistant_messages
297
292
 
298
293
 
299
- def str_messages(messages: list[ChatCompletionUiformMessage], max_length: int = 100) -> str:
294
+ def str_messages(messages: list[ChatCompletionRetabMessage], max_length: int = 100) -> str:
300
295
  """
301
296
  Converts a list of chat messages into a string representation with faithfully serialized structure.
302
297
 
303
298
  Args:
304
- messages (list[ChatCompletionUiformMessage]): The list of chat messages.
299
+ messages (list[ChatCompletionRetabMessage]): The list of chat messages.
305
300
  max_length (int): Maximum length for content before truncation.
306
301
 
307
302
  Returns:
@@ -312,7 +307,7 @@ def str_messages(messages: list[ChatCompletionUiformMessage], max_length: int =
312
307
  """Truncate text to max_len with ellipsis."""
313
308
  return text if len(text) <= max_len else f"{text[:max_len]}..."
314
309
 
315
- serialized: list[ChatCompletionUiformMessage] = []
310
+ serialized: list[ChatCompletionRetabMessage] = []
316
311
  for message in messages:
317
312
  role = message["role"]
318
313
  content = message["content"]
@@ -105,9 +105,6 @@ def count_image_tokens(image_url: str, detail: Literal["low", "high", "auto"] =
105
105
  total_tiles = tiles_wide * tiles_high
106
106
 
107
107
  return base_token_cost + (token_per_tile * total_tiles)
108
-
109
-
110
-
111
108
 
112
109
 
113
110
  def process_jsonl_file(jsonl_path: str) -> List[TokenCounts]:
@@ -14,8 +14,8 @@ from email_validator import validate_email
14
14
  from pydantic import BaseModel, BeforeValidator, Field, create_model
15
15
  from pydantic.config import ConfigDict
16
16
 
17
- from uiform._utils.mime import generate_blake2b_hash_from_string
18
- from uiform.types.schemas.layout import Column, FieldItem, Layout, RefObject, Row, RowList
17
+ from ..types.schemas.layout import Column, FieldItem, Layout, RefObject, Row, RowList
18
+ from .mime import generate_blake2b_hash_from_string
19
19
 
20
20
  # **** Validation Functions ****
21
21
 
@@ -116,7 +116,7 @@ def validate_vat_number(v: Any) -> Optional[str]:
116
116
  try:
117
117
  if stdnum.eu.vat.is_valid(v_str):
118
118
  return stdnum.eu.vat.validate(v_str)
119
- except:
119
+ except Exception:
120
120
  pass
121
121
  return None
122
122
 
@@ -150,7 +150,7 @@ def validate_email_address(v: Any) -> Optional[str]:
150
150
  return None
151
151
  try:
152
152
  return validate_email(v_str).normalized
153
- except:
153
+ except Exception:
154
154
  return None
155
155
 
156
156
 
@@ -170,7 +170,7 @@ def validate_frenchpostcode(v: Any) -> Optional[str]:
170
170
  if not v_str.isdigit():
171
171
  return None
172
172
  return v_str
173
- except:
173
+ except Exception:
174
174
  return None
175
175
 
176
176
 
@@ -201,7 +201,7 @@ def validate_un_code(v: Any) -> Optional[int]:
201
201
  val = int(float(v_str)) # handle numeric strings
202
202
  if 0 <= val <= 3481:
203
203
  return val
204
- except:
204
+ except Exception:
205
205
  pass
206
206
  return None
207
207
 
@@ -242,7 +242,7 @@ def validate_integer(v: Any) -> Optional[int]:
242
242
  return None
243
243
  try:
244
244
  return int(float(v_str))
245
- except:
245
+ except Exception:
246
246
  return None
247
247
 
248
248
 
@@ -257,7 +257,7 @@ def validate_float(v: Any) -> Optional[float]:
257
257
  return None
258
258
  try:
259
259
  return float(v_str)
260
- except:
260
+ except Exception:
261
261
  return None
262
262
 
263
263
 
@@ -333,7 +333,7 @@ def validate_bool(v: Any) -> bool:
333
333
  return True
334
334
  elif v_str in false_values:
335
335
  return False
336
- except:
336
+ except Exception:
337
337
  pass
338
338
 
339
339
  return False
@@ -2091,11 +2091,6 @@ def sanitize(instance: Any, schema: dict[str, Any]) -> Any:
2091
2091
  return __sanitize_instance(instance, expanded_schema)
2092
2092
 
2093
2093
 
2094
- import copy
2095
- import json
2096
- from .mime import generate_blake2b_hash_from_string
2097
-
2098
-
2099
2094
  def compute_schema_data_id(json_schema: dict[str, Any]) -> str:
2100
2095
  """Returns the schema_data_id for a given JSON schema.
2101
2096
 
@@ -4,16 +4,17 @@ import io
4
4
  import json
5
5
  import mimetypes
6
6
  from pathlib import Path
7
- from typing import Literal, Sequence, TypeVar, get_args
7
+ from typing import Sequence, TypeVar, get_args
8
8
 
9
9
  import httpx
10
10
  import PIL.Image
11
+ import puremagic
11
12
  from pydantic import HttpUrl
12
13
 
13
14
  from ..types.mime import MIMEData
14
15
  from ..types.modalities import SUPPORTED_TYPES
15
16
 
16
- T = TypeVar('T')
17
+ T = TypeVar("T")
17
18
 
18
19
 
19
20
  def generate_blake2b_hash_from_bytes(bytes_: bytes) -> str:
@@ -25,7 +26,7 @@ def generate_blake2b_hash_from_base64(base64_string: str) -> str:
25
26
 
26
27
 
27
28
  def generate_blake2b_hash_from_string(input_string: str) -> str:
28
- return generate_blake2b_hash_from_bytes(input_string.encode('utf-8'))
29
+ return generate_blake2b_hash_from_bytes(input_string.encode("utf-8"))
29
30
 
30
31
 
31
32
  def generate_blake2b_hash_from_dict(input_dict: dict) -> str:
@@ -43,7 +44,7 @@ def convert_pil_image_to_mime_data(image: PIL.Image.Image) -> MIMEData:
43
44
  """
44
45
  # Convert PIL image to base64 string
45
46
  buffered = io.BytesIO()
46
- choosen_format = image.format if (image.format and image.format.lower() in ['png', 'jpeg', 'gif', 'webp']) else "JPEG"
47
+ choosen_format = image.format if (image.format and image.format.lower() in ["png", "jpeg", "gif", "webp"]) else "JPEG"
47
48
  image.save(buffered, format=choosen_format)
48
49
  base64_content = base64.b64encode(buffered.getvalue()).decode("utf-8")
49
50
 
@@ -98,13 +99,11 @@ def prepare_mime_document(document: Path | str | bytes | io.IOBase | MIMEData |
98
99
  if isinstance(document, bytes):
99
100
  # `document` is already the raw bytes
100
101
  try:
101
- import puremagic
102
-
103
102
  extension = puremagic.from_string(document)
104
103
  if extension.lower() in [".jpg", ".jpeg", ".jfif"]:
105
104
  extension = ".jpeg"
106
- except:
107
- extension = '.txt'
105
+ except Exception:
106
+ extension = ".txt"
108
107
  file_bytes = document
109
108
  filename = "uploaded_file" + extension
110
109
  elif isinstance(document, io.IOBase):
@@ -112,19 +111,17 @@ def prepare_mime_document(document: Path | str | bytes | io.IOBase | MIMEData |
112
111
  file_bytes = document.read()
113
112
  filename = getattr(document, "name", "uploaded_file")
114
113
  filename = Path(filename).name
115
- elif hasattr(document, 'unicode_string') and callable(getattr(document, 'unicode_string')):
114
+ elif hasattr(document, "unicode_string") and callable(getattr(document, "unicode_string")):
116
115
  with httpx.Client() as client:
117
116
  url: str = document.unicode_string() # type: ignore
118
117
  response = client.get(url)
119
118
  response.raise_for_status()
120
119
  try:
121
- import puremagic
122
-
123
120
  extension = puremagic.from_string(response.content)
124
121
  if extension.lower() in [".jpg", ".jpeg", ".jfif"]:
125
122
  extension = ".jpeg"
126
- except:
127
- extension = '.txt'
123
+ except Exception:
124
+ extension = ".txt"
128
125
  file_bytes = response.content # Fix: Use response.content instead of document
129
126
  filename = "uploaded_file" + extension
130
127
  else:
@@ -139,7 +136,7 @@ def prepare_mime_document(document: Path | str | bytes | io.IOBase | MIMEData |
139
136
  encoded_content = base64.b64encode(file_bytes).decode("utf-8")
140
137
  # Compute SHA-256 hash over the *base64-encoded* content
141
138
  hash_obj = hashlib.sha256(encoded_content.encode("utf-8"))
142
- content_hash = hash_obj.hexdigest()
139
+ hash_obj.hexdigest()
143
140
 
144
141
  # Guess MIME type based on file extension
145
142
  guessed_type, _ = mimetypes.guess_type(filename)
@@ -16,13 +16,13 @@ from openai.types.responses.response_input_message_content_list_param import Res
16
16
  from openai.types.responses.response_input_param import ResponseInputItemParam
17
17
  from openai.types.responses.response_input_text_param import ResponseInputTextParam
18
18
 
19
- from ..types.chat import ChatCompletionUiformMessage
19
+ from ..types.chat import ChatCompletionRetabMessage
20
20
  from ..types.documents.extractions import UiParsedChatCompletion, UiParsedChoice
21
21
 
22
22
 
23
- def convert_to_openai_format(messages: list[ChatCompletionUiformMessage]) -> list[ResponseInputItemParam]:
23
+ def convert_to_openai_format(messages: list[ChatCompletionRetabMessage]) -> list[ResponseInputItemParam]:
24
24
  """
25
- Converts a list of ChatCompletionUiformMessage to the OpenAI ResponseInputParam format.
25
+ Converts a list of ChatCompletionRetabMessage to the OpenAI ResponseInputParam format.
26
26
 
27
27
  Args:
28
28
  messages: List of chat messages in UIForm format
@@ -64,9 +64,9 @@ def convert_to_openai_format(messages: list[ChatCompletionUiformMessage]) -> lis
64
64
  return formatted_messages
65
65
 
66
66
 
67
- def convert_from_openai_format(messages: list[ResponseInputItemParam]) -> list[ChatCompletionUiformMessage]:
67
+ def convert_from_openai_format(messages: list[ResponseInputItemParam]) -> list[ChatCompletionRetabMessage]:
68
68
  """
69
- Converts messages from OpenAI ResponseInputParam format to ChatCompletionUiformMessage format.
69
+ Converts messages from OpenAI ResponseInputParam format to ChatCompletionRetabMessage format.
70
70
 
71
71
  Args:
72
72
  messages: Messages in OpenAI ResponseInputParam format
@@ -74,16 +74,22 @@ def convert_from_openai_format(messages: list[ResponseInputItemParam]) -> list[C
74
74
  Returns:
75
75
  List of chat messages in UIForm format
76
76
  """
77
- formatted_messages: list[ChatCompletionUiformMessage] = []
77
+ formatted_messages: list[ChatCompletionRetabMessage] = []
78
78
 
79
79
  for message in messages:
80
+ if "role" not in message or "content" not in message:
81
+ # Mandatory fields for a message
82
+ if message.get("type") != "message":
83
+ print(f"Not supported message type: {message.get('type')}... Skipping...")
84
+ continue
85
+
86
+ role = message["role"]
87
+ content = message["content"]
88
+
80
89
  if "type" not in message:
81
90
  # The type is required by all other sub-types of ResponseInputItemParam except for EasyInputMessageParam and Message, which are messages.
82
91
  message["type"] = "message"
83
92
 
84
- if message["type"] != "message":
85
- print(f"Not supported message type: {message['type']}... Skipping...")
86
- continue
87
93
  role = message["role"]
88
94
  content = message["content"]
89
95
  formatted_content: str | list[ChatCompletionContentPartParam]
@@ -104,7 +110,7 @@ def convert_from_openai_format(messages: list[ResponseInputItemParam]) -> list[C
104
110
  print(f"Not supported content type: {part['type']}... Skipping...")
105
111
 
106
112
  # Create message in UIForm format
107
- formatted_message = ChatCompletionUiformMessage(role=role, content=formatted_content)
113
+ formatted_message = ChatCompletionRetabMessage(role=role, content=formatted_content)
108
114
  formatted_messages.append(formatted_message)
109
115
 
110
116
  return formatted_messages
@@ -1,7 +1,7 @@
1
1
  from contextlib import AbstractAsyncContextManager, AbstractContextManager
2
2
  from typing import Any, AsyncGenerator, Callable, Generator, TypeVar, Union
3
3
 
4
- T = TypeVar('T')
4
+ T = TypeVar("T")
5
5
 
6
6
 
7
7
  class AsyncGeneratorContextManager(AbstractAsyncContextManager[AsyncGenerator[T, None]]):