retab 0.0.35__py3-none-any.whl → 0.0.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {uiform → retab}/_utils/ai_models.py +2 -2
- {uiform → retab}/_utils/benchmarking.py +15 -16
- {uiform → retab}/_utils/chat.py +9 -14
- {uiform → retab}/_utils/display.py +0 -3
- {uiform → retab}/_utils/json_schema.py +9 -14
- {uiform → retab}/_utils/mime.py +11 -14
- {uiform → retab}/_utils/responses.py +9 -3
- {uiform → retab}/_utils/stream_context_managers.py +1 -1
- {uiform → retab}/_utils/usage/usage.py +28 -28
- {uiform → retab}/client.py +32 -31
- {uiform → retab}/resources/consensus/client.py +17 -36
- {uiform → retab}/resources/consensus/completions.py +24 -47
- {uiform → retab}/resources/consensus/completions_stream.py +26 -38
- {uiform → retab}/resources/consensus/responses.py +31 -80
- {uiform → retab}/resources/consensus/responses_stream.py +31 -79
- {uiform → retab}/resources/documents/client.py +59 -45
- {uiform → retab}/resources/documents/extractions.py +181 -90
- {uiform → retab}/resources/evals.py +56 -43
- retab/resources/evaluations/__init__.py +3 -0
- retab/resources/evaluations/client.py +301 -0
- retab/resources/evaluations/documents.py +233 -0
- retab/resources/evaluations/iterations.py +452 -0
- {uiform → retab}/resources/files.py +2 -2
- {uiform → retab}/resources/jsonlUtils.py +220 -216
- retab/resources/models.py +73 -0
- retab/resources/processors/automations/client.py +244 -0
- {uiform → retab}/resources/processors/automations/endpoints.py +77 -118
- retab/resources/processors/automations/links.py +294 -0
- {uiform → retab}/resources/processors/automations/logs.py +30 -19
- {uiform → retab}/resources/processors/automations/mailboxes.py +136 -174
- retab/resources/processors/automations/outlook.py +337 -0
- {uiform → retab}/resources/processors/automations/tests.py +22 -25
- {uiform → retab}/resources/processors/client.py +179 -164
- {uiform → retab}/resources/schemas.py +78 -66
- {uiform → retab}/resources/secrets/external_api_keys.py +1 -5
- retab/resources/secrets/webhook.py +64 -0
- {uiform → retab}/resources/usage.py +39 -2
- {uiform → retab}/types/ai_models.py +13 -13
- {uiform → retab}/types/automations/cron.py +19 -12
- {uiform → retab}/types/automations/endpoints.py +7 -4
- {uiform → retab}/types/automations/links.py +7 -3
- {uiform → retab}/types/automations/mailboxes.py +9 -9
- {uiform → retab}/types/automations/outlook.py +15 -11
- retab/types/browser_canvas.py +3 -0
- {uiform → retab}/types/chat.py +2 -2
- {uiform → retab}/types/completions.py +9 -12
- retab/types/consensus.py +19 -0
- {uiform → retab}/types/db/annotations.py +3 -3
- {uiform → retab}/types/db/files.py +8 -6
- {uiform → retab}/types/documents/create_messages.py +18 -20
- {uiform → retab}/types/documents/extractions.py +69 -24
- {uiform → retab}/types/evals.py +5 -5
- retab/types/evaluations/__init__.py +31 -0
- retab/types/evaluations/documents.py +30 -0
- retab/types/evaluations/iterations.py +112 -0
- retab/types/evaluations/model.py +73 -0
- retab/types/events.py +79 -0
- {uiform → retab}/types/extractions.py +33 -10
- retab/types/inference_settings.py +15 -0
- retab/types/jobs/base.py +54 -0
- retab/types/jobs/batch_annotation.py +12 -0
- {uiform → retab}/types/jobs/evaluation.py +1 -2
- {uiform → retab}/types/logs.py +37 -34
- retab/types/metrics.py +32 -0
- {uiform → retab}/types/mime.py +22 -20
- {uiform → retab}/types/modalities.py +10 -10
- retab/types/predictions.py +19 -0
- {uiform → retab}/types/schemas/enhance.py +4 -2
- {uiform → retab}/types/schemas/evaluate.py +7 -4
- {uiform → retab}/types/schemas/generate.py +6 -3
- {uiform → retab}/types/schemas/layout.py +1 -1
- {uiform → retab}/types/schemas/object.py +13 -14
- {uiform → retab}/types/schemas/templates.py +1 -3
- {uiform → retab}/types/secrets/external_api_keys.py +0 -1
- {uiform → retab}/types/standards.py +18 -1
- {retab-0.0.35.dist-info → retab-0.0.37.dist-info}/METADATA +7 -6
- retab-0.0.37.dist-info/RECORD +107 -0
- retab-0.0.37.dist-info/top_level.txt +1 -0
- retab-0.0.35.dist-info/RECORD +0 -111
- retab-0.0.35.dist-info/top_level.txt +0 -1
- uiform/_utils/benchmarking copy.py +0 -588
- uiform/resources/deployments/__init__.py +0 -9
- uiform/resources/deployments/client.py +0 -78
- uiform/resources/deployments/endpoints.py +0 -322
- uiform/resources/deployments/links.py +0 -452
- uiform/resources/deployments/logs.py +0 -211
- uiform/resources/deployments/mailboxes.py +0 -496
- uiform/resources/deployments/outlook.py +0 -531
- uiform/resources/deployments/tests.py +0 -158
- uiform/resources/models.py +0 -45
- uiform/resources/processors/automations/client.py +0 -78
- uiform/resources/processors/automations/links.py +0 -356
- uiform/resources/processors/automations/outlook.py +0 -444
- uiform/resources/secrets/webhook.py +0 -62
- uiform/types/consensus.py +0 -10
- uiform/types/deployments/cron.py +0 -59
- uiform/types/deployments/endpoints.py +0 -28
- uiform/types/deployments/links.py +0 -36
- uiform/types/deployments/mailboxes.py +0 -67
- uiform/types/deployments/outlook.py +0 -76
- uiform/types/deployments/webhooks.py +0 -21
- uiform/types/events.py +0 -76
- uiform/types/jobs/base.py +0 -150
- uiform/types/jobs/batch_annotation.py +0 -22
- uiform/types/secrets/__init__.py +0 -0
- {uiform → retab}/__init__.py +0 -0
- {uiform → retab}/_resource.py +0 -0
- {uiform → retab}/_utils/__init__.py +0 -0
- {uiform → retab}/_utils/usage/__init__.py +0 -0
- {uiform → retab}/py.typed +0 -0
- {uiform → retab}/resources/__init__.py +0 -0
- {uiform → retab}/resources/consensus/__init__.py +0 -0
- {uiform → retab}/resources/documents/__init__.py +0 -0
- {uiform → retab}/resources/finetuning.py +0 -0
- {uiform → retab}/resources/openai_example.py +0 -0
- {uiform → retab}/resources/processors/__init__.py +0 -0
- {uiform → retab}/resources/processors/automations/__init__.py +0 -0
- {uiform → retab}/resources/prompt_optimization.py +0 -0
- {uiform → retab}/resources/secrets/__init__.py +0 -0
- {uiform → retab}/resources/secrets/client.py +0 -0
- {uiform → retab}/types/__init__.py +0 -0
- {uiform → retab}/types/automations/__init__.py +0 -0
- {uiform → retab}/types/automations/webhooks.py +0 -0
- {uiform → retab}/types/db/__init__.py +0 -0
- {uiform/types/deployments → retab/types/documents}/__init__.py +0 -0
- {uiform → retab}/types/documents/correct_orientation.py +0 -0
- {uiform/types/documents → retab/types/jobs}/__init__.py +0 -0
- {uiform → retab}/types/jobs/finetune.py +0 -0
- {uiform → retab}/types/jobs/prompt_optimization.py +0 -0
- {uiform → retab}/types/jobs/webcrawl.py +0 -0
- {uiform → retab}/types/pagination.py +0 -0
- {uiform/types/jobs → retab/types/schemas}/__init__.py +0 -0
- {uiform/types/schemas → retab/types/secrets}/__init__.py +0 -0
- {retab-0.0.35.dist-info → retab-0.0.37.dist-info}/WHEEL +0 -0
@@ -61,7 +61,7 @@ def assert_valid_model_batch_processing(model: str) -> None:
|
|
61
61
|
raise ValueError(f"Invalid base model in fine-tuned model '{model}'. Base model must be one of: {get_args(OpenAIModel)}")
|
62
62
|
if not model_id or not model_id.strip():
|
63
63
|
raise ValueError(f"Model ID cannot be empty in fine-tuned model '{model}'")
|
64
|
-
except ValueError
|
64
|
+
except ValueError:
|
65
65
|
if ":" not in model:
|
66
66
|
raise ValueError(
|
67
67
|
f"Invalid model format: {model}. Must be either:\n"
|
@@ -90,7 +90,7 @@ def assert_valid_model_schema_generation(model: str) -> None:
|
|
90
90
|
raise ValueError(f"Invalid base model in fine-tuned model '{model}'. Base model must be one of: {get_args(OpenAIModel)}")
|
91
91
|
if not model_id or not model_id.strip():
|
92
92
|
raise ValueError(f"Model ID cannot be empty in fine-tuned model '{model}'")
|
93
|
-
except ValueError
|
93
|
+
except ValueError:
|
94
94
|
if ":" not in model:
|
95
95
|
raise ValueError(
|
96
96
|
f"Invalid model format: {model}. Must be either:\n"
|
@@ -5,7 +5,7 @@ import shutil
|
|
5
5
|
# The goal is to leverage this piece of code to open a jsonl file and get an analysis of the performance of the model using a one-liner.
|
6
6
|
############# BENCHMARKING MODELS #############
|
7
7
|
from itertools import zip_longest
|
8
|
-
from typing import Any, Callable, Literal, Optional
|
8
|
+
from typing import Any, Callable, Literal, Optional, cast
|
9
9
|
|
10
10
|
import pandas as pd # type: ignore
|
11
11
|
from Levenshtein import distance as levenshtein_distance
|
@@ -27,7 +27,7 @@ def normalize_string(text: str) -> str:
|
|
27
27
|
if not text:
|
28
28
|
return ""
|
29
29
|
# Remove all non-alphanumeric characters and convert to lowercase
|
30
|
-
return re.sub(r
|
30
|
+
return re.sub(r"[^a-zA-Z0-9]", "", text).lower()
|
31
31
|
|
32
32
|
|
33
33
|
def hamming_distance_padded(s: str, t: str) -> int:
|
@@ -45,7 +45,7 @@ def hamming_distance_padded(s: str, t: str) -> int:
|
|
45
45
|
s = normalize_string(s)
|
46
46
|
t = normalize_string(t)
|
47
47
|
|
48
|
-
return sum(a != b for a, b in zip_longest(s, t, fillvalue=
|
48
|
+
return sum(a != b for a, b in zip_longest(s, t, fillvalue=" "))
|
49
49
|
|
50
50
|
|
51
51
|
def hamming_similarity(str_1: str, str_2: str) -> float:
|
@@ -385,7 +385,7 @@ class EvalMetrics(BaseModel):
|
|
385
385
|
distances: dict[dictionary_metrics, EvalMetric]
|
386
386
|
|
387
387
|
|
388
|
-
def flatten_dict(d: dict[str, Any], parent_key: str =
|
388
|
+
def flatten_dict(d: dict[str, Any], parent_key: str = "", sep: str = ".") -> dict[str, Any]:
|
389
389
|
"""Flatten a nested dictionary with dot-separated keys."""
|
390
390
|
items: list[tuple[str, Any]] = []
|
391
391
|
for k, v in d.items():
|
@@ -408,16 +408,14 @@ def plot_metrics_with_uncertainty(analysis: dict[str, Any], uncertainties: Optio
|
|
408
408
|
"""
|
409
409
|
# Flatten the dictionaries
|
410
410
|
flattened_analysis = flatten_dict(analysis)
|
411
|
-
if uncertainties:
|
412
|
-
flattened_uncertainties = flatten_dict(uncertainties)
|
413
|
-
else:
|
414
|
-
uncertainties_list = None
|
415
|
-
|
416
411
|
# Prepare data by matching fields
|
417
412
|
fields = list(flattened_analysis.keys())
|
418
413
|
similarities = [flattened_analysis[field] for field in fields]
|
419
414
|
|
415
|
+
# Prepare uncertainties if provided
|
416
|
+
uncertainties_list = None
|
420
417
|
if uncertainties:
|
418
|
+
flattened_uncertainties = flatten_dict(uncertainties)
|
421
419
|
uncertainties_list = [flattened_uncertainties.get(field, None) for field in fields]
|
422
420
|
|
423
421
|
# Create a DataFrame
|
@@ -454,10 +452,11 @@ def plot_metrics_with_uncertainty(analysis: dict[str, Any], uncertainties: Optio
|
|
454
452
|
|
455
453
|
if similarity is None:
|
456
454
|
continue # Skip fields with no similarity value
|
457
|
-
|
455
|
+
similarity = cast(float, similarity)
|
458
456
|
# Calculate bar length and uncertainty range
|
459
457
|
bar_len = round(similarity * scale)
|
460
458
|
if uncertainty is not None and uncertainty > 0:
|
459
|
+
uncertainty = cast(float, uncertainty)
|
461
460
|
uncertainty_start = max(0, round((similarity - uncertainty) * scale))
|
462
461
|
uncertainty_end = min(bar_width, round((similarity + uncertainty) * scale))
|
463
462
|
else:
|
@@ -465,21 +464,21 @@ def plot_metrics_with_uncertainty(analysis: dict[str, Any], uncertainties: Optio
|
|
465
464
|
uncertainty_end = bar_len # No uncertainty to display
|
466
465
|
|
467
466
|
# Build the bar string
|
468
|
-
bar_string =
|
467
|
+
bar_string = ""
|
469
468
|
for i in range(bar_width):
|
470
469
|
if i < bar_len:
|
471
470
|
if i < uncertainty_start:
|
472
|
-
char =
|
471
|
+
char = "█" # Solid block for certain part
|
473
472
|
else:
|
474
|
-
char =
|
473
|
+
char = "█" # Lighter block for uncertainty overlap
|
475
474
|
else:
|
476
475
|
if i < uncertainty_end:
|
477
|
-
char =
|
476
|
+
char = "░" # Dash for upper uncertainty range
|
478
477
|
else:
|
479
|
-
char =
|
478
|
+
char = " " # Space for empty area
|
480
479
|
bar_string += char
|
481
480
|
|
482
481
|
# Print the label and bar
|
483
|
-
score_field = f
|
482
|
+
score_field = f"[{similarity:.4f}]"
|
484
483
|
|
485
484
|
print(f"{field:<{label_width}} {score_field} | {bar_string}")
|
{uiform → retab}/_utils/chat.py
RENAMED
@@ -1,22 +1,17 @@
|
|
1
1
|
import base64
|
2
|
-
import io
|
3
2
|
import logging
|
4
3
|
from typing import List, Literal, Optional, Union, cast
|
5
4
|
|
6
5
|
import requests
|
7
|
-
from anthropic.types.
|
8
|
-
from anthropic.types.image_block_param import ImageBlockParam, Source
|
6
|
+
from anthropic.types.image_block_param import ImageBlockParam
|
9
7
|
from anthropic.types.message_param import MessageParam
|
10
8
|
from anthropic.types.text_block_param import TextBlockParam
|
11
|
-
from anthropic.types.tool_result_block_param import ToolResultBlockParam
|
12
|
-
from anthropic.types.tool_use_block_param import ToolUseBlockParam
|
13
9
|
from google.genai.types import BlobDict, ContentDict, ContentUnionDict, PartDict # type: ignore
|
14
10
|
from openai.types.chat.chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
|
15
11
|
from openai.types.chat.chat_completion_content_part_input_audio_param import ChatCompletionContentPartInputAudioParam
|
16
12
|
from openai.types.chat.chat_completion_content_part_param import ChatCompletionContentPartParam
|
17
13
|
from openai.types.chat.chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
|
18
14
|
from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
|
19
|
-
from PIL import Image
|
20
15
|
|
21
16
|
from ..types.chat import ChatCompletionUiformMessage
|
22
17
|
|
@@ -64,7 +59,7 @@ def convert_to_google_genai_format(messages: List[ChatCompletionUiformMessage])
|
|
64
59
|
continue
|
65
60
|
parts: list[PartDict] = []
|
66
61
|
|
67
|
-
message_content = message[
|
62
|
+
message_content = message["content"]
|
68
63
|
if isinstance(message_content, str):
|
69
64
|
# Direct string content is treated as the prompt for the SDK
|
70
65
|
parts.append(PartDict(text=message_content))
|
@@ -74,8 +69,8 @@ def convert_to_google_genai_format(messages: List[ChatCompletionUiformMessage])
|
|
74
69
|
if part["type"] == "text":
|
75
70
|
parts.append(PartDict(text=part["text"]))
|
76
71
|
elif part["type"] == "image_url":
|
77
|
-
url = part[
|
78
|
-
if url.startswith(
|
72
|
+
url = part["image_url"].get("url", "") # type: ignore
|
73
|
+
if url.startswith("data:image"):
|
79
74
|
# Extract base64 data and add it to the formatted inputs
|
80
75
|
media_type, data_content = url.split(";base64,")
|
81
76
|
media_type = media_type.split("data:")[-1] # => "image/jpeg"
|
@@ -133,24 +128,24 @@ def convert_to_anthropic_format(messages: List[ChatCompletionUiformMessage]) ->
|
|
133
128
|
# -----------------------
|
134
129
|
# Handle non-system roles
|
135
130
|
# -----------------------
|
136
|
-
if isinstance(message[
|
131
|
+
if isinstance(message["content"], str):
|
137
132
|
# Direct string content is treated as a single text block
|
138
133
|
content_blocks.append(
|
139
134
|
{
|
140
135
|
"type": "text",
|
141
|
-
"text": message[
|
136
|
+
"text": message["content"],
|
142
137
|
}
|
143
138
|
)
|
144
139
|
|
145
|
-
elif isinstance(message[
|
140
|
+
elif isinstance(message["content"], list):
|
146
141
|
# Handle structured content
|
147
|
-
for part in message[
|
142
|
+
for part in message["content"]:
|
148
143
|
if part["type"] == "text":
|
149
144
|
part = cast(ChatCompletionContentPartTextParam, part)
|
150
145
|
content_blocks.append(
|
151
146
|
{
|
152
147
|
"type": "text",
|
153
|
-
"text": part[
|
148
|
+
"text": part["text"], # type: ignore
|
154
149
|
}
|
155
150
|
)
|
156
151
|
|
@@ -105,9 +105,6 @@ def count_image_tokens(image_url: str, detail: Literal["low", "high", "auto"] =
|
|
105
105
|
total_tiles = tiles_wide * tiles_high
|
106
106
|
|
107
107
|
return base_token_cost + (token_per_tile * total_tiles)
|
108
|
-
|
109
|
-
|
110
|
-
|
111
108
|
|
112
109
|
|
113
110
|
def process_jsonl_file(jsonl_path: str) -> List[TokenCounts]:
|
@@ -14,8 +14,8 @@ from email_validator import validate_email
|
|
14
14
|
from pydantic import BaseModel, BeforeValidator, Field, create_model
|
15
15
|
from pydantic.config import ConfigDict
|
16
16
|
|
17
|
-
from
|
18
|
-
from
|
17
|
+
from ..types.schemas.layout import Column, FieldItem, Layout, RefObject, Row, RowList
|
18
|
+
from .mime import generate_blake2b_hash_from_string
|
19
19
|
|
20
20
|
# **** Validation Functions ****
|
21
21
|
|
@@ -116,7 +116,7 @@ def validate_vat_number(v: Any) -> Optional[str]:
|
|
116
116
|
try:
|
117
117
|
if stdnum.eu.vat.is_valid(v_str):
|
118
118
|
return stdnum.eu.vat.validate(v_str)
|
119
|
-
except:
|
119
|
+
except Exception:
|
120
120
|
pass
|
121
121
|
return None
|
122
122
|
|
@@ -150,7 +150,7 @@ def validate_email_address(v: Any) -> Optional[str]:
|
|
150
150
|
return None
|
151
151
|
try:
|
152
152
|
return validate_email(v_str).normalized
|
153
|
-
except:
|
153
|
+
except Exception:
|
154
154
|
return None
|
155
155
|
|
156
156
|
|
@@ -170,7 +170,7 @@ def validate_frenchpostcode(v: Any) -> Optional[str]:
|
|
170
170
|
if not v_str.isdigit():
|
171
171
|
return None
|
172
172
|
return v_str
|
173
|
-
except:
|
173
|
+
except Exception:
|
174
174
|
return None
|
175
175
|
|
176
176
|
|
@@ -201,7 +201,7 @@ def validate_un_code(v: Any) -> Optional[int]:
|
|
201
201
|
val = int(float(v_str)) # handle numeric strings
|
202
202
|
if 0 <= val <= 3481:
|
203
203
|
return val
|
204
|
-
except:
|
204
|
+
except Exception:
|
205
205
|
pass
|
206
206
|
return None
|
207
207
|
|
@@ -242,7 +242,7 @@ def validate_integer(v: Any) -> Optional[int]:
|
|
242
242
|
return None
|
243
243
|
try:
|
244
244
|
return int(float(v_str))
|
245
|
-
except:
|
245
|
+
except Exception:
|
246
246
|
return None
|
247
247
|
|
248
248
|
|
@@ -257,7 +257,7 @@ def validate_float(v: Any) -> Optional[float]:
|
|
257
257
|
return None
|
258
258
|
try:
|
259
259
|
return float(v_str)
|
260
|
-
except:
|
260
|
+
except Exception:
|
261
261
|
return None
|
262
262
|
|
263
263
|
|
@@ -333,7 +333,7 @@ def validate_bool(v: Any) -> bool:
|
|
333
333
|
return True
|
334
334
|
elif v_str in false_values:
|
335
335
|
return False
|
336
|
-
except:
|
336
|
+
except Exception:
|
337
337
|
pass
|
338
338
|
|
339
339
|
return False
|
@@ -2091,11 +2091,6 @@ def sanitize(instance: Any, schema: dict[str, Any]) -> Any:
|
|
2091
2091
|
return __sanitize_instance(instance, expanded_schema)
|
2092
2092
|
|
2093
2093
|
|
2094
|
-
import copy
|
2095
|
-
import json
|
2096
|
-
from .mime import generate_blake2b_hash_from_string
|
2097
|
-
|
2098
|
-
|
2099
2094
|
def compute_schema_data_id(json_schema: dict[str, Any]) -> str:
|
2100
2095
|
"""Returns the schema_data_id for a given JSON schema.
|
2101
2096
|
|
{uiform → retab}/_utils/mime.py
RENAMED
@@ -4,16 +4,17 @@ import io
|
|
4
4
|
import json
|
5
5
|
import mimetypes
|
6
6
|
from pathlib import Path
|
7
|
-
from typing import
|
7
|
+
from typing import Sequence, TypeVar, get_args
|
8
8
|
|
9
9
|
import httpx
|
10
10
|
import PIL.Image
|
11
|
+
import puremagic
|
11
12
|
from pydantic import HttpUrl
|
12
13
|
|
13
14
|
from ..types.mime import MIMEData
|
14
15
|
from ..types.modalities import SUPPORTED_TYPES
|
15
16
|
|
16
|
-
T = TypeVar(
|
17
|
+
T = TypeVar("T")
|
17
18
|
|
18
19
|
|
19
20
|
def generate_blake2b_hash_from_bytes(bytes_: bytes) -> str:
|
@@ -25,7 +26,7 @@ def generate_blake2b_hash_from_base64(base64_string: str) -> str:
|
|
25
26
|
|
26
27
|
|
27
28
|
def generate_blake2b_hash_from_string(input_string: str) -> str:
|
28
|
-
return generate_blake2b_hash_from_bytes(input_string.encode(
|
29
|
+
return generate_blake2b_hash_from_bytes(input_string.encode("utf-8"))
|
29
30
|
|
30
31
|
|
31
32
|
def generate_blake2b_hash_from_dict(input_dict: dict) -> str:
|
@@ -43,7 +44,7 @@ def convert_pil_image_to_mime_data(image: PIL.Image.Image) -> MIMEData:
|
|
43
44
|
"""
|
44
45
|
# Convert PIL image to base64 string
|
45
46
|
buffered = io.BytesIO()
|
46
|
-
choosen_format = image.format if (image.format and image.format.lower() in [
|
47
|
+
choosen_format = image.format if (image.format and image.format.lower() in ["png", "jpeg", "gif", "webp"]) else "JPEG"
|
47
48
|
image.save(buffered, format=choosen_format)
|
48
49
|
base64_content = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
49
50
|
|
@@ -98,13 +99,11 @@ def prepare_mime_document(document: Path | str | bytes | io.IOBase | MIMEData |
|
|
98
99
|
if isinstance(document, bytes):
|
99
100
|
# `document` is already the raw bytes
|
100
101
|
try:
|
101
|
-
import puremagic
|
102
|
-
|
103
102
|
extension = puremagic.from_string(document)
|
104
103
|
if extension.lower() in [".jpg", ".jpeg", ".jfif"]:
|
105
104
|
extension = ".jpeg"
|
106
|
-
except:
|
107
|
-
extension =
|
105
|
+
except Exception:
|
106
|
+
extension = ".txt"
|
108
107
|
file_bytes = document
|
109
108
|
filename = "uploaded_file" + extension
|
110
109
|
elif isinstance(document, io.IOBase):
|
@@ -112,19 +111,17 @@ def prepare_mime_document(document: Path | str | bytes | io.IOBase | MIMEData |
|
|
112
111
|
file_bytes = document.read()
|
113
112
|
filename = getattr(document, "name", "uploaded_file")
|
114
113
|
filename = Path(filename).name
|
115
|
-
elif hasattr(document,
|
114
|
+
elif hasattr(document, "unicode_string") and callable(getattr(document, "unicode_string")):
|
116
115
|
with httpx.Client() as client:
|
117
116
|
url: str = document.unicode_string() # type: ignore
|
118
117
|
response = client.get(url)
|
119
118
|
response.raise_for_status()
|
120
119
|
try:
|
121
|
-
import puremagic
|
122
|
-
|
123
120
|
extension = puremagic.from_string(response.content)
|
124
121
|
if extension.lower() in [".jpg", ".jpeg", ".jfif"]:
|
125
122
|
extension = ".jpeg"
|
126
|
-
except:
|
127
|
-
extension =
|
123
|
+
except Exception:
|
124
|
+
extension = ".txt"
|
128
125
|
file_bytes = response.content # Fix: Use response.content instead of document
|
129
126
|
filename = "uploaded_file" + extension
|
130
127
|
else:
|
@@ -139,7 +136,7 @@ def prepare_mime_document(document: Path | str | bytes | io.IOBase | MIMEData |
|
|
139
136
|
encoded_content = base64.b64encode(file_bytes).decode("utf-8")
|
140
137
|
# Compute SHA-256 hash over the *base64-encoded* content
|
141
138
|
hash_obj = hashlib.sha256(encoded_content.encode("utf-8"))
|
142
|
-
|
139
|
+
hash_obj.hexdigest()
|
143
140
|
|
144
141
|
# Guess MIME type based on file extension
|
145
142
|
guessed_type, _ = mimetypes.guess_type(filename)
|
@@ -77,13 +77,19 @@ def convert_from_openai_format(messages: list[ResponseInputItemParam]) -> list[C
|
|
77
77
|
formatted_messages: list[ChatCompletionUiformMessage] = []
|
78
78
|
|
79
79
|
for message in messages:
|
80
|
+
if "role" not in message or "content" not in message:
|
81
|
+
# Mandatory fields for a message
|
82
|
+
if message.get("type") != "message":
|
83
|
+
print(f"Not supported message type: {message.get('type')}... Skipping...")
|
84
|
+
continue
|
85
|
+
|
86
|
+
role = message["role"]
|
87
|
+
content = message["content"]
|
88
|
+
|
80
89
|
if "type" not in message:
|
81
90
|
# The type is required by all other sub-types of ResponseInputItemParam except for EasyInputMessageParam and Message, which are messages.
|
82
91
|
message["type"] = "message"
|
83
92
|
|
84
|
-
if message["type"] != "message":
|
85
|
-
print(f"Not supported message type: {message['type']}... Skipping...")
|
86
|
-
continue
|
87
93
|
role = message["role"]
|
88
94
|
content = message["content"]
|
89
95
|
formatted_content: str | list[ChatCompletionContentPartParam]
|
@@ -1,7 +1,7 @@
|
|
1
1
|
from contextlib import AbstractAsyncContextManager, AbstractContextManager
|
2
2
|
from typing import Any, AsyncGenerator, Callable, Generator, TypeVar, Union
|
3
3
|
|
4
|
-
T = TypeVar(
|
4
|
+
T = TypeVar("T")
|
5
5
|
|
6
6
|
|
7
7
|
class AsyncGeneratorContextManager(AbstractAsyncContextManager[AsyncGenerator[T, None]]):
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Optional
|
1
|
+
from typing import Optional
|
2
2
|
|
3
3
|
from openai.types.completion_usage import CompletionUsage
|
4
4
|
from pydantic import BaseModel, Field
|
@@ -71,14 +71,12 @@ def compute_api_call_cost(pricing: Pricing, usage: CompletionUsage, is_ft: bool
|
|
71
71
|
total_cost = (total_text_cost + total_audio_cost) / 1e6
|
72
72
|
|
73
73
|
# Apply fine-tuning price hike if applicable
|
74
|
-
if is_ft and hasattr(pricing,
|
74
|
+
if is_ft and hasattr(pricing, "ft_price_hike"):
|
75
75
|
total_cost *= pricing.ft_price_hike
|
76
76
|
|
77
77
|
return Amount(value=total_cost, currency="USD")
|
78
78
|
|
79
79
|
|
80
|
-
|
81
|
-
|
82
80
|
def compute_cost_from_model(model: str, usage: CompletionUsage) -> Amount:
|
83
81
|
# Extract base model name for fine-tuned models like "ft:gpt-4o:uiform:4389573"
|
84
82
|
is_ft = False
|
@@ -93,7 +91,7 @@ def compute_cost_from_model(model: str, usage: CompletionUsage) -> Amount:
|
|
93
91
|
try:
|
94
92
|
model_card = get_model_card(model)
|
95
93
|
pricing = model_card.pricing
|
96
|
-
except ValueError
|
94
|
+
except ValueError:
|
97
95
|
raise ValueError(f"No pricing information found for model: {model}")
|
98
96
|
|
99
97
|
return compute_api_call_cost(pricing, usage, is_ft)
|
@@ -124,46 +122,48 @@ class CompletionsUsage(BaseModel):
|
|
124
122
|
model: Optional[str] = Field(default=None, description="When group_by=model, this field provides the model name of the grouped usage result.")
|
125
123
|
batch: Optional[bool] = Field(default=None, description="When group_by=batch, this field tells whether the grouped usage result is batch or not.")
|
126
124
|
|
125
|
+
|
127
126
|
########################
|
128
127
|
# DETAILED COST BREAKDOWN
|
129
128
|
########################
|
130
129
|
|
130
|
+
|
131
131
|
class TokenCounts(BaseModel):
|
132
132
|
"""Detailed breakdown of token counts by type and category."""
|
133
|
-
|
133
|
+
|
134
134
|
# Prompt token counts
|
135
135
|
prompt_regular_text: int
|
136
136
|
prompt_cached_text: int
|
137
137
|
prompt_audio: int
|
138
|
-
|
138
|
+
|
139
139
|
# Completion token counts
|
140
140
|
completion_regular_text: int
|
141
141
|
completion_audio: int
|
142
|
-
|
142
|
+
|
143
143
|
# Total tokens (should match sum of all components)
|
144
144
|
total_tokens: int
|
145
145
|
|
146
146
|
|
147
147
|
class CostBreakdown(BaseModel):
|
148
148
|
"""Detailed breakdown of API call costs by token type and usage category."""
|
149
|
-
|
149
|
+
|
150
150
|
# Total cost amount
|
151
151
|
total: Amount
|
152
|
-
|
152
|
+
|
153
153
|
# Text token costs broken down by category
|
154
154
|
text_prompt_cost: Amount
|
155
155
|
text_cached_cost: Amount
|
156
156
|
text_completion_cost: Amount
|
157
157
|
text_total_cost: Amount
|
158
|
-
|
158
|
+
|
159
159
|
# Audio token costs broken down by category (if applicable)
|
160
160
|
audio_prompt_cost: Optional[Amount] = None
|
161
161
|
audio_completion_cost: Optional[Amount] = None
|
162
162
|
audio_total_cost: Optional[Amount] = None
|
163
|
-
|
163
|
+
|
164
164
|
# Token counts for reference
|
165
165
|
token_counts: TokenCounts
|
166
|
-
|
166
|
+
|
167
167
|
# Model and fine-tuning information
|
168
168
|
model: str
|
169
169
|
is_fine_tuned: bool = False
|
@@ -172,7 +172,7 @@ class CostBreakdown(BaseModel):
|
|
172
172
|
def compute_api_call_cost_with_breakdown(pricing: Pricing, usage: CompletionUsage, model: str, is_ft: bool = False) -> CostBreakdown:
|
173
173
|
"""
|
174
174
|
Computes a detailed price breakdown for the given token usage, based on the pricing.
|
175
|
-
|
175
|
+
|
176
176
|
Returns a CostBreakdown object containing costs broken down by token type and category.
|
177
177
|
"""
|
178
178
|
# ----- Process prompt tokens -----
|
@@ -211,7 +211,7 @@ def compute_api_call_cost_with_breakdown(pricing: Pricing, usage: CompletionUsag
|
|
211
211
|
cost_audio_prompt = 0.0
|
212
212
|
cost_audio_completion = 0.0
|
213
213
|
total_audio_cost = 0.0
|
214
|
-
|
214
|
+
|
215
215
|
if pricing.audio and (prompt_audio > 0 or completion_audio > 0):
|
216
216
|
cost_audio_prompt = prompt_audio * pricing.audio.prompt
|
217
217
|
cost_audio_completion = completion_audio * pricing.audio.completion
|
@@ -219,27 +219,27 @@ def compute_api_call_cost_with_breakdown(pricing: Pricing, usage: CompletionUsag
|
|
219
219
|
|
220
220
|
# Convert to dollars (divide by 1M) and create Amount objects
|
221
221
|
ft_multiplier = pricing.ft_price_hike if is_ft else 1.0
|
222
|
-
|
222
|
+
|
223
223
|
# Create Amount objects for each cost category
|
224
224
|
text_prompt_amount = Amount(value=(cost_text_prompt / 1e6) * ft_multiplier, currency="USD")
|
225
225
|
text_cached_amount = Amount(value=(cost_text_cached / 1e6) * ft_multiplier, currency="USD")
|
226
226
|
text_completion_amount = Amount(value=(cost_text_completion / 1e6) * ft_multiplier, currency="USD")
|
227
227
|
text_total_amount = Amount(value=(total_text_cost / 1e6) * ft_multiplier, currency="USD")
|
228
|
-
|
228
|
+
|
229
229
|
# Audio amounts (if applicable)
|
230
230
|
audio_prompt_amount = None
|
231
231
|
audio_completion_amount = None
|
232
232
|
audio_total_amount = None
|
233
|
-
|
233
|
+
|
234
234
|
if pricing.audio and (prompt_audio > 0 or completion_audio > 0):
|
235
235
|
audio_prompt_amount = Amount(value=(cost_audio_prompt / 1e6) * ft_multiplier, currency="USD")
|
236
236
|
audio_completion_amount = Amount(value=(cost_audio_completion / 1e6) * ft_multiplier, currency="USD")
|
237
237
|
audio_total_amount = Amount(value=(total_audio_cost / 1e6) * ft_multiplier, currency="USD")
|
238
|
-
|
238
|
+
|
239
239
|
# Total cost
|
240
240
|
total_cost = (total_text_cost + total_audio_cost) / 1e6 * ft_multiplier
|
241
241
|
total_amount = Amount(value=total_cost, currency="USD")
|
242
|
-
|
242
|
+
|
243
243
|
# Create TokenCounts object with token usage breakdown
|
244
244
|
token_counts = TokenCounts(
|
245
245
|
prompt_regular_text=prompt_regular_text,
|
@@ -247,9 +247,9 @@ def compute_api_call_cost_with_breakdown(pricing: Pricing, usage: CompletionUsag
|
|
247
247
|
prompt_audio=prompt_audio,
|
248
248
|
completion_regular_text=completion_regular_text,
|
249
249
|
completion_audio=completion_audio,
|
250
|
-
total_tokens=usage.total_tokens
|
250
|
+
total_tokens=usage.total_tokens,
|
251
251
|
)
|
252
|
-
|
252
|
+
|
253
253
|
return CostBreakdown(
|
254
254
|
total=total_amount,
|
255
255
|
text_prompt_cost=text_prompt_amount,
|
@@ -261,28 +261,28 @@ def compute_api_call_cost_with_breakdown(pricing: Pricing, usage: CompletionUsag
|
|
261
261
|
audio_total_cost=audio_total_amount,
|
262
262
|
token_counts=token_counts,
|
263
263
|
model=model,
|
264
|
-
is_fine_tuned=is_ft
|
264
|
+
is_fine_tuned=is_ft,
|
265
265
|
)
|
266
266
|
|
267
267
|
|
268
268
|
def compute_cost_from_model_with_breakdown(model: str, usage: CompletionUsage) -> CostBreakdown:
|
269
269
|
"""
|
270
270
|
Computes a detailed cost breakdown for an API call using the specified model and usage.
|
271
|
-
|
271
|
+
|
272
272
|
Args:
|
273
273
|
model: The model name (can be a fine-tuned model like "ft:gpt-4o:uiform:4389573")
|
274
274
|
usage: Token usage statistics for the API call
|
275
|
-
|
275
|
+
|
276
276
|
Returns:
|
277
277
|
CostBreakdown object with detailed cost information
|
278
|
-
|
278
|
+
|
279
279
|
Raises:
|
280
280
|
ValueError: If no pricing information is found for the model
|
281
281
|
"""
|
282
282
|
# Extract base model name for fine-tuned models like "ft:gpt-4o:uiform:4389573"
|
283
283
|
original_model = model
|
284
284
|
is_ft = False
|
285
|
-
|
285
|
+
|
286
286
|
if model.startswith("ft:"):
|
287
287
|
# Split by colon and take the second part (index 1) which contains the base model
|
288
288
|
parts = model.split(":")
|
@@ -294,7 +294,7 @@ def compute_cost_from_model_with_breakdown(model: str, usage: CompletionUsage) -
|
|
294
294
|
try:
|
295
295
|
model_card = get_model_card(model)
|
296
296
|
pricing = model_card.pricing
|
297
|
-
except ValueError
|
297
|
+
except ValueError:
|
298
298
|
raise ValueError(f"No pricing information found for model: {original_model}")
|
299
299
|
|
300
300
|
return compute_api_call_cost_with_breakdown(pricing, usage, original_model, is_ft)
|