retab 0.0.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. retab-0.0.35.dist-info/METADATA +417 -0
  2. retab-0.0.35.dist-info/RECORD +111 -0
  3. retab-0.0.35.dist-info/WHEEL +5 -0
  4. retab-0.0.35.dist-info/top_level.txt +1 -0
  5. uiform/__init__.py +4 -0
  6. uiform/_resource.py +28 -0
  7. uiform/_utils/__init__.py +0 -0
  8. uiform/_utils/ai_models.py +100 -0
  9. uiform/_utils/benchmarking copy.py +588 -0
  10. uiform/_utils/benchmarking.py +485 -0
  11. uiform/_utils/chat.py +332 -0
  12. uiform/_utils/display.py +443 -0
  13. uiform/_utils/json_schema.py +2161 -0
  14. uiform/_utils/mime.py +168 -0
  15. uiform/_utils/responses.py +163 -0
  16. uiform/_utils/stream_context_managers.py +52 -0
  17. uiform/_utils/usage/__init__.py +0 -0
  18. uiform/_utils/usage/usage.py +300 -0
  19. uiform/client.py +701 -0
  20. uiform/py.typed +0 -0
  21. uiform/resources/__init__.py +0 -0
  22. uiform/resources/consensus/__init__.py +3 -0
  23. uiform/resources/consensus/client.py +114 -0
  24. uiform/resources/consensus/completions.py +252 -0
  25. uiform/resources/consensus/completions_stream.py +278 -0
  26. uiform/resources/consensus/responses.py +325 -0
  27. uiform/resources/consensus/responses_stream.py +373 -0
  28. uiform/resources/deployments/__init__.py +9 -0
  29. uiform/resources/deployments/client.py +78 -0
  30. uiform/resources/deployments/endpoints.py +322 -0
  31. uiform/resources/deployments/links.py +452 -0
  32. uiform/resources/deployments/logs.py +211 -0
  33. uiform/resources/deployments/mailboxes.py +496 -0
  34. uiform/resources/deployments/outlook.py +531 -0
  35. uiform/resources/deployments/tests.py +158 -0
  36. uiform/resources/documents/__init__.py +3 -0
  37. uiform/resources/documents/client.py +255 -0
  38. uiform/resources/documents/extractions.py +441 -0
  39. uiform/resources/evals.py +812 -0
  40. uiform/resources/files.py +24 -0
  41. uiform/resources/finetuning.py +62 -0
  42. uiform/resources/jsonlUtils.py +1046 -0
  43. uiform/resources/models.py +45 -0
  44. uiform/resources/openai_example.py +22 -0
  45. uiform/resources/processors/__init__.py +3 -0
  46. uiform/resources/processors/automations/__init__.py +9 -0
  47. uiform/resources/processors/automations/client.py +78 -0
  48. uiform/resources/processors/automations/endpoints.py +317 -0
  49. uiform/resources/processors/automations/links.py +356 -0
  50. uiform/resources/processors/automations/logs.py +211 -0
  51. uiform/resources/processors/automations/mailboxes.py +435 -0
  52. uiform/resources/processors/automations/outlook.py +444 -0
  53. uiform/resources/processors/automations/tests.py +158 -0
  54. uiform/resources/processors/client.py +474 -0
  55. uiform/resources/prompt_optimization.py +76 -0
  56. uiform/resources/schemas.py +369 -0
  57. uiform/resources/secrets/__init__.py +9 -0
  58. uiform/resources/secrets/client.py +20 -0
  59. uiform/resources/secrets/external_api_keys.py +109 -0
  60. uiform/resources/secrets/webhook.py +62 -0
  61. uiform/resources/usage.py +271 -0
  62. uiform/types/__init__.py +0 -0
  63. uiform/types/ai_models.py +645 -0
  64. uiform/types/automations/__init__.py +0 -0
  65. uiform/types/automations/cron.py +58 -0
  66. uiform/types/automations/endpoints.py +21 -0
  67. uiform/types/automations/links.py +28 -0
  68. uiform/types/automations/mailboxes.py +60 -0
  69. uiform/types/automations/outlook.py +68 -0
  70. uiform/types/automations/webhooks.py +21 -0
  71. uiform/types/chat.py +8 -0
  72. uiform/types/completions.py +93 -0
  73. uiform/types/consensus.py +10 -0
  74. uiform/types/db/__init__.py +0 -0
  75. uiform/types/db/annotations.py +24 -0
  76. uiform/types/db/files.py +36 -0
  77. uiform/types/deployments/__init__.py +0 -0
  78. uiform/types/deployments/cron.py +59 -0
  79. uiform/types/deployments/endpoints.py +28 -0
  80. uiform/types/deployments/links.py +36 -0
  81. uiform/types/deployments/mailboxes.py +67 -0
  82. uiform/types/deployments/outlook.py +76 -0
  83. uiform/types/deployments/webhooks.py +21 -0
  84. uiform/types/documents/__init__.py +0 -0
  85. uiform/types/documents/correct_orientation.py +13 -0
  86. uiform/types/documents/create_messages.py +226 -0
  87. uiform/types/documents/extractions.py +297 -0
  88. uiform/types/evals.py +207 -0
  89. uiform/types/events.py +76 -0
  90. uiform/types/extractions.py +85 -0
  91. uiform/types/jobs/__init__.py +0 -0
  92. uiform/types/jobs/base.py +150 -0
  93. uiform/types/jobs/batch_annotation.py +22 -0
  94. uiform/types/jobs/evaluation.py +133 -0
  95. uiform/types/jobs/finetune.py +6 -0
  96. uiform/types/jobs/prompt_optimization.py +41 -0
  97. uiform/types/jobs/webcrawl.py +6 -0
  98. uiform/types/logs.py +231 -0
  99. uiform/types/mime.py +257 -0
  100. uiform/types/modalities.py +68 -0
  101. uiform/types/pagination.py +6 -0
  102. uiform/types/schemas/__init__.py +0 -0
  103. uiform/types/schemas/enhance.py +53 -0
  104. uiform/types/schemas/evaluate.py +55 -0
  105. uiform/types/schemas/generate.py +32 -0
  106. uiform/types/schemas/layout.py +58 -0
  107. uiform/types/schemas/object.py +631 -0
  108. uiform/types/schemas/templates.py +107 -0
  109. uiform/types/secrets/__init__.py +0 -0
  110. uiform/types/secrets/external_api_keys.py +22 -0
  111. uiform/types/standards.py +39 -0
uiform/_utils/chat.py ADDED
@@ -0,0 +1,332 @@
1
+ import base64
2
+ import io
3
+ import logging
4
+ from typing import List, Literal, Optional, Union, cast
5
+
6
+ import requests
7
+ from anthropic.types.content_block import ContentBlock
8
+ from anthropic.types.image_block_param import ImageBlockParam, Source
9
+ from anthropic.types.message_param import MessageParam
10
+ from anthropic.types.text_block_param import TextBlockParam
11
+ from anthropic.types.tool_result_block_param import ToolResultBlockParam
12
+ from anthropic.types.tool_use_block_param import ToolUseBlockParam
13
+ from google.genai.types import BlobDict, ContentDict, ContentUnionDict, PartDict # type: ignore
14
+ from openai.types.chat.chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
15
+ from openai.types.chat.chat_completion_content_part_input_audio_param import ChatCompletionContentPartInputAudioParam
16
+ from openai.types.chat.chat_completion_content_part_param import ChatCompletionContentPartParam
17
+ from openai.types.chat.chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
18
+ from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
19
+ from PIL import Image
20
+
21
+ from ..types.chat import ChatCompletionUiformMessage
22
+
23
+ MediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
24
+
25
+
26
+ def convert_to_google_genai_format(messages: List[ChatCompletionUiformMessage]) -> tuple[str, list[ContentUnionDict]]:
27
+ """
28
+ Converts a list of ChatCompletionUiFormMessage to a format compatible with the google.genai SDK.
29
+
30
+
31
+ Example:
32
+ ```python
33
+ import google.genai as genai
34
+
35
+ # Configure the Gemini client
36
+ genai.configure(api_key=os.environ["GEMINI_API_KEY"])
37
+
38
+ # Initialize the model
39
+ model = genai.GenerativeModel("gemini-2.0-flash")
40
+
41
+ # Get messages in Gemini format
42
+ gemini_messages = document_message.gemini_messages
43
+
44
+ # Generate a response
45
+ ```
46
+
47
+ Args:
48
+ messages (List[ChatCompletionUiformMessage]): List of chat messages.
49
+
50
+ Returns:
51
+ List[Union[Dict[str, str], str]]: A list of formatted inputs for the google.genai SDK.
52
+ """
53
+ system_message: str = ""
54
+ formatted_content: list[ContentUnionDict] = []
55
+ for message in messages:
56
+ # -----------------------
57
+ # Handle system message
58
+ # -----------------------
59
+ if message["role"] in ("system", "developer"):
60
+ assert isinstance(message["content"], str), "System message content must be a string."
61
+ if system_message != "":
62
+ raise ValueError("Only one system message is allowed per chat.")
63
+ system_message += message["content"]
64
+ continue
65
+ parts: list[PartDict] = []
66
+
67
+ message_content = message['content']
68
+ if isinstance(message_content, str):
69
+ # Direct string content is treated as the prompt for the SDK
70
+ parts.append(PartDict(text=message_content))
71
+ elif isinstance(message_content, list):
72
+ # Handle structured content
73
+ for part in message_content:
74
+ if part["type"] == "text":
75
+ parts.append(PartDict(text=part["text"]))
76
+ elif part["type"] == "image_url":
77
+ url = part['image_url'].get('url', '') # type: ignore
78
+ if url.startswith('data:image'):
79
+ # Extract base64 data and add it to the formatted inputs
80
+ media_type, data_content = url.split(";base64,")
81
+ media_type = media_type.split("data:")[-1] # => "image/jpeg"
82
+ base64_data = data_content
83
+
84
+ # Try to convert to PIL.Image and append it to the formatted inputs
85
+ try:
86
+ image_bytes = base64.b64decode(base64_data)
87
+ parts.append(PartDict(inline_data=BlobDict(data=image_bytes, mime_type=media_type)))
88
+ except Exception:
89
+ pass
90
+ elif part["type"] == "input_audio":
91
+ pass
92
+ elif part["type"] == "file":
93
+ pass
94
+ else:
95
+ pass
96
+
97
+ formatted_content.append(ContentDict(parts=parts, role=("user" if message["role"] == "user" else "model")))
98
+
99
+ return system_message, formatted_content
100
+
101
+
102
+ def convert_to_anthropic_format(messages: List[ChatCompletionUiformMessage]) -> tuple[str, List[MessageParam]]:
103
+ """
104
+ Converts a list of ChatCompletionUiformMessage to a format compatible with the Anthropic SDK.
105
+
106
+ Args:
107
+ messages (List[ChatCompletionUiformMessage]): List of chat messages.
108
+
109
+ Returns:
110
+ (system_message, formatted_messages):
111
+ system_message (str | NotGiven):
112
+ The system message if one was found, otherwise NOT_GIVEN.
113
+ formatted_messages (List[MessageParam]):
114
+ A list of formatted messages ready for Anthropic.
115
+ """
116
+
117
+ formatted_messages: list[MessageParam] = []
118
+ system_message: str = ""
119
+
120
+ for message in messages:
121
+ content_blocks: list[Union[TextBlockParam, ImageBlockParam]] = []
122
+
123
+ # -----------------------
124
+ # Handle system message
125
+ # -----------------------
126
+ if message["role"] in ("system", "developer"):
127
+ assert isinstance(message["content"], str), "System message content must be a string."
128
+ if system_message != "":
129
+ raise ValueError("Only one system message is allowed per chat.")
130
+ system_message += message["content"]
131
+ continue
132
+
133
+ # -----------------------
134
+ # Handle non-system roles
135
+ # -----------------------
136
+ if isinstance(message['content'], str):
137
+ # Direct string content is treated as a single text block
138
+ content_blocks.append(
139
+ {
140
+ "type": "text",
141
+ "text": message['content'],
142
+ }
143
+ )
144
+
145
+ elif isinstance(message['content'], list):
146
+ # Handle structured content
147
+ for part in message['content']:
148
+ if part["type"] == "text":
149
+ part = cast(ChatCompletionContentPartTextParam, part)
150
+ content_blocks.append(
151
+ {
152
+ "type": "text",
153
+ "text": part['text'], # type: ignore
154
+ }
155
+ )
156
+
157
+ elif part["type"] == "input_audio":
158
+ part = cast(ChatCompletionContentPartInputAudioParam, part)
159
+ logging.warning("Audio input is not supported yet.")
160
+ # No blocks appended since not supported
161
+
162
+ elif part["type"] == "image_url":
163
+ # Handle images that may be either base64 data-URLs or standard remote URLs
164
+ part = cast(ChatCompletionContentPartImageParam, part)
165
+ image_url = part["image_url"]["url"]
166
+
167
+ if "base64," in image_url:
168
+ # The string is already something like: data:image/jpeg;base64,xxxxxxxx...
169
+ media_type, data_content = image_url.split(";base64,")
170
+ # media_type might look like: "data:image/jpeg"
171
+ media_type = media_type.split("data:")[-1] # => "image/jpeg"
172
+ base64_data = data_content
173
+ else:
174
+ # It's a remote URL, so fetch, encode, and derive media type from headers
175
+ try:
176
+ r = requests.get(image_url)
177
+ r.raise_for_status()
178
+ content_type = r.headers.get("Content-Type", "image/jpeg")
179
+ # fallback "image/jpeg" if no Content-Type given
180
+
181
+ # Only keep recognized image/* for anthropic
182
+ if content_type not in ("image/jpeg", "image/png", "image/gif", "image/webp"):
183
+ logging.warning(
184
+ "Unrecognized Content-Type '%s' - defaulting to image/jpeg",
185
+ content_type,
186
+ )
187
+ content_type = "image/jpeg"
188
+
189
+ media_type = content_type
190
+ base64_data = base64.b64encode(r.content).decode("utf-8")
191
+
192
+ except Exception:
193
+ logging.warning(
194
+ "Failed to load image from URL: %s",
195
+ image_url,
196
+ exc_info=True,
197
+ stack_info=True,
198
+ )
199
+ # Skip adding this block if error
200
+ continue
201
+
202
+ # Finally, append to content blocks
203
+ content_blocks.append(
204
+ {
205
+ "type": "image",
206
+ "source": {
207
+ "type": "base64",
208
+ "media_type": cast(MediaType, media_type),
209
+ "data": base64_data,
210
+ },
211
+ }
212
+ )
213
+
214
+ formatted_messages.append(
215
+ MessageParam(
216
+ role=message["role"], # type: ignore
217
+ content=content_blocks,
218
+ )
219
+ )
220
+
221
+ return system_message, formatted_messages
222
+
223
+
224
+ def convert_from_anthropic_format(messages: list[MessageParam], system_prompt: str) -> list[ChatCompletionUiformMessage]:
225
+ """
226
+ Converts a list of Anthropic MessageParam to a list of ChatCompletionUiformMessage.
227
+ """
228
+ formatted_messages: list[ChatCompletionUiformMessage] = [ChatCompletionUiformMessage(role="developer", content=system_prompt)]
229
+
230
+ for message in messages:
231
+ role = message["role"]
232
+ content_blocks = message["content"]
233
+
234
+ # Handle different content structures
235
+ if isinstance(content_blocks, list) and len(content_blocks) == 1 and isinstance(content_blocks[0], dict) and content_blocks[0].get("type") == "text":
236
+ # Simple text message
237
+ formatted_messages.append(cast(ChatCompletionUiformMessage, {"role": role, "content": content_blocks[0].get("text", "")}))
238
+ elif isinstance(content_blocks, list):
239
+ # Message with multiple content parts or non-text content
240
+ formatted_content: list[ChatCompletionContentPartParam] = []
241
+
242
+ for block in content_blocks:
243
+ if isinstance(block, dict):
244
+ if block.get("type") == "text":
245
+ formatted_content.append(cast(ChatCompletionContentPartParam, {"type": "text", "text": block.get("text", "")}))
246
+ elif block.get("type") == "image":
247
+ source = block.get("source", {})
248
+ if isinstance(source, dict) and source.get("type") == "base64":
249
+ # Convert base64 image to data URL format
250
+ media_type = source.get("media_type", "image/jpeg")
251
+ data = source.get("data", "")
252
+ image_url = f"data:{media_type};base64,{data}"
253
+
254
+ formatted_content.append(cast(ChatCompletionContentPartParam, {"type": "image_url", "image_url": {"url": image_url}}))
255
+
256
+ formatted_messages.append(cast(ChatCompletionUiformMessage, {"role": role, "content": formatted_content}))
257
+
258
+ return formatted_messages
259
+
260
+
261
+ def convert_to_openai_format(messages: List[ChatCompletionUiformMessage]) -> List[ChatCompletionMessageParam]:
262
+ return cast(list[ChatCompletionMessageParam], messages)
263
+
264
+
265
+ def convert_from_openai_format(messages: list[ChatCompletionMessageParam]) -> list[ChatCompletionUiformMessage]:
266
+ return cast(list[ChatCompletionUiformMessage], messages)
267
+
268
+
269
+ def separate_messages(
270
+ messages: list[ChatCompletionUiformMessage],
271
+ ) -> tuple[Optional[ChatCompletionUiformMessage], list[ChatCompletionUiformMessage], list[ChatCompletionUiformMessage]]:
272
+ """
273
+ Separates messages into system, user and assistant messages.
274
+
275
+ Args:
276
+ messages: List of chat messages containing system, user and assistant messages
277
+
278
+ Returns:
279
+ Tuple containing:
280
+ - The system message if present, otherwise None
281
+ - List of user messages
282
+ - List of assistant messages
283
+ """
284
+ system_message = None
285
+ user_messages = []
286
+ assistant_messages = []
287
+
288
+ for message in messages:
289
+ if message["role"] in ("system", "developer"):
290
+ system_message = message
291
+ elif message["role"] == "user":
292
+ user_messages.append(message)
293
+ elif message["role"] == "assistant":
294
+ assistant_messages.append(message)
295
+
296
+ return system_message, user_messages, assistant_messages
297
+
298
+
299
+ def str_messages(messages: list[ChatCompletionUiformMessage], max_length: int = 100) -> str:
300
+ """
301
+ Converts a list of chat messages into a string representation with faithfully serialized structure.
302
+
303
+ Args:
304
+ messages (list[ChatCompletionUiformMessage]): The list of chat messages.
305
+ max_length (int): Maximum length for content before truncation.
306
+
307
+ Returns:
308
+ str: A string representation of the messages with applied truncation.
309
+ """
310
+
311
+ def truncate(text: str, max_len: int) -> str:
312
+ """Truncate text to max_len with ellipsis."""
313
+ return text if len(text) <= max_len else f"{text[:max_len]}..."
314
+
315
+ serialized: list[ChatCompletionUiformMessage] = []
316
+ for message in messages:
317
+ role = message["role"]
318
+ content = message["content"]
319
+
320
+ if isinstance(content, str):
321
+ serialized.append({"role": role, "content": truncate(content, max_length)})
322
+ elif isinstance(content, list):
323
+ truncated_content: list[ChatCompletionContentPartParam] = []
324
+ for part in content:
325
+ if part["type"] == "text" and part["text"]:
326
+ truncated_content.append({"type": "text", "text": truncate(part["text"], max_length)})
327
+ elif part["type"] == "image_url" and part["image_url"]:
328
+ image_url = part["image_url"].get("url", "unknown image")
329
+ truncated_content.append({"type": "image_url", "image_url": {"url": truncate(image_url, max_length)}})
330
+ serialized.append({"role": role, "content": truncated_content})
331
+
332
+ return repr(serialized)