retab 0.0.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. retab-0.0.35.dist-info/METADATA +417 -0
  2. retab-0.0.35.dist-info/RECORD +111 -0
  3. retab-0.0.35.dist-info/WHEEL +5 -0
  4. retab-0.0.35.dist-info/top_level.txt +1 -0
  5. uiform/__init__.py +4 -0
  6. uiform/_resource.py +28 -0
  7. uiform/_utils/__init__.py +0 -0
  8. uiform/_utils/ai_models.py +100 -0
  9. uiform/_utils/benchmarking copy.py +588 -0
  10. uiform/_utils/benchmarking.py +485 -0
  11. uiform/_utils/chat.py +332 -0
  12. uiform/_utils/display.py +443 -0
  13. uiform/_utils/json_schema.py +2161 -0
  14. uiform/_utils/mime.py +168 -0
  15. uiform/_utils/responses.py +163 -0
  16. uiform/_utils/stream_context_managers.py +52 -0
  17. uiform/_utils/usage/__init__.py +0 -0
  18. uiform/_utils/usage/usage.py +300 -0
  19. uiform/client.py +701 -0
  20. uiform/py.typed +0 -0
  21. uiform/resources/__init__.py +0 -0
  22. uiform/resources/consensus/__init__.py +3 -0
  23. uiform/resources/consensus/client.py +114 -0
  24. uiform/resources/consensus/completions.py +252 -0
  25. uiform/resources/consensus/completions_stream.py +278 -0
  26. uiform/resources/consensus/responses.py +325 -0
  27. uiform/resources/consensus/responses_stream.py +373 -0
  28. uiform/resources/deployments/__init__.py +9 -0
  29. uiform/resources/deployments/client.py +78 -0
  30. uiform/resources/deployments/endpoints.py +322 -0
  31. uiform/resources/deployments/links.py +452 -0
  32. uiform/resources/deployments/logs.py +211 -0
  33. uiform/resources/deployments/mailboxes.py +496 -0
  34. uiform/resources/deployments/outlook.py +531 -0
  35. uiform/resources/deployments/tests.py +158 -0
  36. uiform/resources/documents/__init__.py +3 -0
  37. uiform/resources/documents/client.py +255 -0
  38. uiform/resources/documents/extractions.py +441 -0
  39. uiform/resources/evals.py +812 -0
  40. uiform/resources/files.py +24 -0
  41. uiform/resources/finetuning.py +62 -0
  42. uiform/resources/jsonlUtils.py +1046 -0
  43. uiform/resources/models.py +45 -0
  44. uiform/resources/openai_example.py +22 -0
  45. uiform/resources/processors/__init__.py +3 -0
  46. uiform/resources/processors/automations/__init__.py +9 -0
  47. uiform/resources/processors/automations/client.py +78 -0
  48. uiform/resources/processors/automations/endpoints.py +317 -0
  49. uiform/resources/processors/automations/links.py +356 -0
  50. uiform/resources/processors/automations/logs.py +211 -0
  51. uiform/resources/processors/automations/mailboxes.py +435 -0
  52. uiform/resources/processors/automations/outlook.py +444 -0
  53. uiform/resources/processors/automations/tests.py +158 -0
  54. uiform/resources/processors/client.py +474 -0
  55. uiform/resources/prompt_optimization.py +76 -0
  56. uiform/resources/schemas.py +369 -0
  57. uiform/resources/secrets/__init__.py +9 -0
  58. uiform/resources/secrets/client.py +20 -0
  59. uiform/resources/secrets/external_api_keys.py +109 -0
  60. uiform/resources/secrets/webhook.py +62 -0
  61. uiform/resources/usage.py +271 -0
  62. uiform/types/__init__.py +0 -0
  63. uiform/types/ai_models.py +645 -0
  64. uiform/types/automations/__init__.py +0 -0
  65. uiform/types/automations/cron.py +58 -0
  66. uiform/types/automations/endpoints.py +21 -0
  67. uiform/types/automations/links.py +28 -0
  68. uiform/types/automations/mailboxes.py +60 -0
  69. uiform/types/automations/outlook.py +68 -0
  70. uiform/types/automations/webhooks.py +21 -0
  71. uiform/types/chat.py +8 -0
  72. uiform/types/completions.py +93 -0
  73. uiform/types/consensus.py +10 -0
  74. uiform/types/db/__init__.py +0 -0
  75. uiform/types/db/annotations.py +24 -0
  76. uiform/types/db/files.py +36 -0
  77. uiform/types/deployments/__init__.py +0 -0
  78. uiform/types/deployments/cron.py +59 -0
  79. uiform/types/deployments/endpoints.py +28 -0
  80. uiform/types/deployments/links.py +36 -0
  81. uiform/types/deployments/mailboxes.py +67 -0
  82. uiform/types/deployments/outlook.py +76 -0
  83. uiform/types/deployments/webhooks.py +21 -0
  84. uiform/types/documents/__init__.py +0 -0
  85. uiform/types/documents/correct_orientation.py +13 -0
  86. uiform/types/documents/create_messages.py +226 -0
  87. uiform/types/documents/extractions.py +297 -0
  88. uiform/types/evals.py +207 -0
  89. uiform/types/events.py +76 -0
  90. uiform/types/extractions.py +85 -0
  91. uiform/types/jobs/__init__.py +0 -0
  92. uiform/types/jobs/base.py +150 -0
  93. uiform/types/jobs/batch_annotation.py +22 -0
  94. uiform/types/jobs/evaluation.py +133 -0
  95. uiform/types/jobs/finetune.py +6 -0
  96. uiform/types/jobs/prompt_optimization.py +41 -0
  97. uiform/types/jobs/webcrawl.py +6 -0
  98. uiform/types/logs.py +231 -0
  99. uiform/types/mime.py +257 -0
  100. uiform/types/modalities.py +68 -0
  101. uiform/types/pagination.py +6 -0
  102. uiform/types/schemas/__init__.py +0 -0
  103. uiform/types/schemas/enhance.py +53 -0
  104. uiform/types/schemas/evaluate.py +55 -0
  105. uiform/types/schemas/generate.py +32 -0
  106. uiform/types/schemas/layout.py +58 -0
  107. uiform/types/schemas/object.py +631 -0
  108. uiform/types/schemas/templates.py +107 -0
  109. uiform/types/secrets/__init__.py +0 -0
  110. uiform/types/secrets/external_api_keys.py +22 -0
  111. uiform/types/standards.py +39 -0
@@ -0,0 +1,443 @@
1
+ import base64
2
+ import json
3
+ from io import BytesIO
4
+ from math import ceil
5
+ from pathlib import Path
6
+ from typing import List, Literal, Optional, TypedDict
7
+
8
+ import numpy as np
9
+ import requests
10
+ import tiktoken # For text tokenization
11
+ from PIL import Image
12
+ from rich.console import Console
13
+ from rich.table import Table
14
+
15
+
16
+ class TokenStats(TypedDict):
17
+ min: float
18
+ max: float
19
+ mean: float
20
+ median: float
21
+ p5: float
22
+ p95: float
23
+
24
+
25
+ class TokenCounts(TypedDict):
26
+ input_text_tokens: int
27
+ output_text_tokens: int
28
+ input_image_tokens: int
29
+ output_image_tokens: int
30
+
31
+
32
+ class MetricCategory(TypedDict):
33
+ num_examples: int
34
+ total_tokens: TokenStats
35
+ input_tokens: TokenStats
36
+ output_tokens: TokenStats
37
+ sum_total_tokens: float
38
+ sum_input_tokens: float
39
+ sum_output_tokens: float
40
+ num_examples_over_token_limit: int
41
+
42
+
43
+ class Metrics(TypedDict):
44
+ Text: MetricCategory
45
+ Image: MetricCategory
46
+ Total: MetricCategory
47
+
48
+
49
+ def count_text_tokens(content: str, encoding_name: str = "cl100k_base") -> int:
50
+ """
51
+ Count the number of tokens in a given text content using the specified encoding.
52
+ """
53
+ enc = tiktoken.get_encoding(encoding_name)
54
+ return len(enc.encode(content))
55
+
56
+
57
+ def count_image_tokens(image_url: str, detail: Literal["low", "high", "auto"] = "high") -> int:
58
+ base_token_cost = 85 # cost for all images
59
+ token_per_tile = 170 # cost per 512×512 tile in high detail
60
+
61
+ # 1. Decide detail=low or detail=high
62
+ # If detail=auto, figure out from user input or some heuristic
63
+ if detail == "low":
64
+ # 2. Low detail => always 85 tokens
65
+ return base_token_cost
66
+ else:
67
+ assert detail == "high" or detail == "auto"
68
+ # 3. High detail => 2-step scaling + tile-based cost
69
+
70
+ # (a) Get the raw image dimensions
71
+ try:
72
+ if image_url.startswith("data:image"):
73
+ header, encoded_data = image_url.split(",", 1)
74
+ image_data = base64.b64decode(encoded_data)
75
+ img = Image.open(BytesIO(image_data))
76
+ else:
77
+ # HTTP URL or local path
78
+ response = requests.get(image_url, timeout=5)
79
+ response.raise_for_status()
80
+ img = Image.open(BytesIO(response.content))
81
+
82
+ width, height = img.size
83
+ except Exception:
84
+ # If we fail to decode or fetch, maybe return the base cost
85
+ # plus one tile as a fallback
86
+ return base_token_cost + token_per_tile
87
+
88
+ # (b) Scale so neither dimension exceeds 2048
89
+ max_side = max(width, height)
90
+ if max_side > 2048:
91
+ scale_factor = 2048.0 / max_side
92
+ width = int(width * scale_factor)
93
+ height = int(height * scale_factor)
94
+
95
+ # (c) Upscale if shortest side < 768
96
+ min_side = min(width, height)
97
+ if min_side < 768:
98
+ upscale_factor = 768.0 / min_side
99
+ width = int(width * upscale_factor)
100
+ height = int(height * upscale_factor)
101
+
102
+ # (d) Count 512×512 tiles in the scaled image
103
+ tiles_wide = ceil(width / 512)
104
+ tiles_high = ceil(height / 512)
105
+ total_tiles = tiles_wide * tiles_high
106
+
107
+ return base_token_cost + (token_per_tile * total_tiles)
108
+
109
+
110
+
111
+
112
+
113
+ def process_jsonl_file(jsonl_path: str) -> List[TokenCounts]:
114
+ """
115
+ Process a JSONL file and calculate the text and image tokens for each example.
116
+ Returns a list of dictionaries with token counts for system, user, assistant, and images.
117
+ """
118
+ results = []
119
+
120
+ with open(jsonl_path, "r", encoding="utf-8") as file:
121
+ for line in file:
122
+ example = json.loads(line)
123
+ input_text_tokens = 0
124
+ output_text_tokens = 0
125
+ input_image_tokens = 0
126
+ output_image_tokens = 0
127
+
128
+ for message in example.get("messages", []):
129
+ role = message.get("role")
130
+ content = message.get("content")
131
+
132
+ if isinstance(content, str):
133
+ # Count text tokens based on role
134
+ if role in ["developer", "system", "user"]:
135
+ input_text_tokens += count_text_tokens(content)
136
+ elif role == "assistant":
137
+ output_text_tokens += count_text_tokens(content)
138
+
139
+ elif isinstance(content, list): # Check for images in content
140
+ for item in content:
141
+ if item.get("type") == "image_url" and "image_url" in item:
142
+ image_url = item["image_url"]["url"]
143
+ tokens = count_image_tokens(image_url)
144
+ if role in ["developer", "system", "user"]:
145
+ input_image_tokens += tokens
146
+ elif role == "assistant":
147
+ output_image_tokens += tokens
148
+
149
+ results.append(
150
+ TokenCounts(
151
+ input_text_tokens=input_text_tokens, output_text_tokens=output_text_tokens, input_image_tokens=input_image_tokens, output_image_tokens=output_image_tokens
152
+ )
153
+ )
154
+
155
+ return results
156
+
157
+
158
+ def calculate_statistics(data: List[int]) -> TokenStats:
159
+ """
160
+ Calculate statistics for a list of numbers.
161
+ """
162
+ if not data:
163
+ return {"min": 0, "max": 0, "mean": 0, "median": 0, "p5": 0, "p95": 0}
164
+
165
+ return {
166
+ "min": float(min(data)),
167
+ "max": float(max(data)),
168
+ "mean": float(np.mean(data)),
169
+ "median": float(np.median(data)),
170
+ "p5": float(np.percentile(data, 5)),
171
+ "p95": float(np.percentile(data, 95)),
172
+ }
173
+
174
+
175
+ def process_dataset_and_compute_metrics(jsonl_path: Path | str, token_limit: int = 128000) -> Metrics:
176
+ """
177
+ Process the dataset to compute metrics for Text, Image, and Total tokens.
178
+ """
179
+ # Initialize metrics
180
+ metrics: Metrics = {
181
+ "Text": MetricCategory(
182
+ num_examples=0,
183
+ total_tokens=TokenStats(min=0, max=0, mean=0, median=0, p5=0, p95=0),
184
+ input_tokens=TokenStats(min=0, max=0, mean=0, median=0, p5=0, p95=0),
185
+ output_tokens=TokenStats(min=0, max=0, mean=0, median=0, p5=0, p95=0),
186
+ sum_total_tokens=0,
187
+ sum_input_tokens=0,
188
+ sum_output_tokens=0,
189
+ num_examples_over_token_limit=0,
190
+ ),
191
+ "Image": MetricCategory(
192
+ num_examples=0,
193
+ total_tokens=TokenStats(min=0, max=0, mean=0, median=0, p5=0, p95=0),
194
+ input_tokens=TokenStats(min=0, max=0, mean=0, median=0, p5=0, p95=0),
195
+ output_tokens=TokenStats(min=0, max=0, mean=0, median=0, p5=0, p95=0),
196
+ sum_total_tokens=0,
197
+ sum_input_tokens=0,
198
+ sum_output_tokens=0,
199
+ num_examples_over_token_limit=0,
200
+ ),
201
+ "Total": MetricCategory(
202
+ num_examples=0,
203
+ total_tokens=TokenStats(min=0, max=0, mean=0, median=0, p5=0, p95=0),
204
+ input_tokens=TokenStats(min=0, max=0, mean=0, median=0, p5=0, p95=0),
205
+ output_tokens=TokenStats(min=0, max=0, mean=0, median=0, p5=0, p95=0),
206
+ sum_total_tokens=0,
207
+ sum_input_tokens=0,
208
+ sum_output_tokens=0,
209
+ num_examples_over_token_limit=0,
210
+ ),
211
+ }
212
+
213
+ # Accumulate token counts
214
+ input_text_tokens = []
215
+ output_text_tokens = []
216
+ messages_text_tokens = []
217
+
218
+ input_image_tokens = []
219
+ output_image_tokens = []
220
+ messages_image_tokens = []
221
+
222
+ input_total_tokens = []
223
+ output_total_tokens = []
224
+ messages_total_tokens = []
225
+
226
+ with open(jsonl_path, "r", encoding="utf-8") as file:
227
+ for line in file:
228
+ example = json.loads(line)
229
+
230
+ input_text_tokens_example = 0
231
+ output_text_tokens_example = 0
232
+
233
+ input_image_tokens_example = 0
234
+ output_image_tokens_example = 0
235
+
236
+ for message in example.get("messages", []):
237
+ role = message.get("role")
238
+ content = message.get("content")
239
+
240
+ if isinstance(content, str):
241
+ if role in ["developer", "system", "user"]:
242
+ input_text_tokens_example += count_text_tokens(content)
243
+ elif role == "assistant":
244
+ output_text_tokens_example += count_text_tokens(content)
245
+ elif isinstance(content, list): # Handle images
246
+ for item in content:
247
+ if item.get("type") == "image_url" and "image_url" in item:
248
+ image_url = item["image_url"]["url"]
249
+ detail = item["image_url"]["detail"]
250
+ tokens = count_image_tokens(image_url, detail)
251
+ if role in ["developer", "system", "user"]:
252
+ input_image_tokens_example += tokens
253
+ elif role == "assistant":
254
+ output_image_tokens_example += tokens
255
+
256
+ elif item.get("type") == "text":
257
+ if role in ["developer", "system", "user"]:
258
+ input_text_tokens_example += count_text_tokens(item["text"])
259
+ elif role == "assistant":
260
+ output_text_tokens_example += count_text_tokens(item["text"])
261
+
262
+ # Calculate totals for the example
263
+ example_total_tokens = input_text_tokens_example + output_text_tokens_example + input_image_tokens_example + output_image_tokens_example
264
+
265
+ # Add to accumulators
266
+ input_text_tokens.append(input_text_tokens_example)
267
+ output_text_tokens.append(output_text_tokens_example)
268
+ messages_text_tokens.append(input_text_tokens_example + output_text_tokens_example)
269
+
270
+ input_image_tokens.append(input_image_tokens_example)
271
+ output_image_tokens.append(output_image_tokens_example)
272
+ messages_image_tokens.append(input_image_tokens_example + output_image_tokens_example)
273
+
274
+ input_total_tokens.append(input_text_tokens_example + input_image_tokens_example)
275
+ output_total_tokens.append(output_text_tokens_example + output_image_tokens_example)
276
+ messages_total_tokens.append(input_text_tokens_example + output_text_tokens_example + input_image_tokens_example + output_image_tokens_example)
277
+
278
+ # Count examples over token limit
279
+ if input_text_tokens_example > token_limit:
280
+ metrics["Text"]["num_examples_over_token_limit"] += 1
281
+ if input_image_tokens_example > token_limit:
282
+ metrics["Image"]["num_examples_over_token_limit"] += 1
283
+ if example_total_tokens > token_limit:
284
+ metrics["Total"]["num_examples_over_token_limit"] += 1
285
+ # print(example_total_tokens, token_limit)
286
+
287
+ # Update metrics for Text, Image, and Total
288
+ metrics["Text"]["num_examples"] = len(input_text_tokens)
289
+ metrics["Text"]["total_tokens"] = calculate_statistics(messages_text_tokens)
290
+ metrics["Text"]["input_tokens"] = calculate_statistics(input_text_tokens)
291
+ metrics["Text"]["output_tokens"] = calculate_statistics(output_text_tokens)
292
+ metrics["Text"]["sum_input_tokens"] = sum(input_text_tokens)
293
+ metrics["Text"]["sum_output_tokens"] = sum(output_text_tokens)
294
+ metrics["Text"]["sum_total_tokens"] = sum(messages_text_tokens)
295
+
296
+ metrics["Image"]["num_examples"] = len(input_image_tokens)
297
+ metrics["Image"]["total_tokens"] = calculate_statistics(messages_image_tokens)
298
+ metrics["Image"]["input_tokens"] = calculate_statistics(input_image_tokens)
299
+ metrics["Image"]["output_tokens"] = calculate_statistics(output_image_tokens)
300
+ metrics["Image"]["sum_input_tokens"] = sum(input_image_tokens)
301
+ metrics["Image"]["sum_output_tokens"] = sum(output_image_tokens)
302
+ metrics["Image"]["sum_total_tokens"] = sum(messages_image_tokens)
303
+
304
+ metrics["Total"]["num_examples"] = len(input_total_tokens)
305
+ metrics["Total"]["total_tokens"] = calculate_statistics(messages_total_tokens)
306
+ metrics["Total"]["input_tokens"] = calculate_statistics(input_total_tokens)
307
+ metrics["Total"]["output_tokens"] = calculate_statistics(output_total_tokens)
308
+ metrics["Total"]["sum_input_tokens"] = sum(input_total_tokens)
309
+ metrics["Total"]["sum_output_tokens"] = sum(output_total_tokens)
310
+ metrics["Total"]["sum_total_tokens"] = sum(messages_total_tokens)
311
+
312
+ return metrics
313
+
314
+
315
+ def display_metrics(metrics: Metrics, input_token_price: Optional[float] = None, output_token_price: Optional[float] = None) -> None:
316
+ """
317
+ Display the metrics dictionary in a compact table with min/max, mean/median, and p5/p95 on the same row.
318
+ """
319
+ console = Console(style="on grey23")
320
+ table = Table(title="Dataset Metrics", show_lines=True)
321
+
322
+ # Add columns
323
+ table.add_column("Metric", justify="left", style="#BDE8F6", no_wrap=True)
324
+ table.add_column("Text", justify="right", style="#C2BDF6")
325
+ table.add_column("Image", justify="right", style="#F6BDBD")
326
+ table.add_column("Total", justify="right", style="#F6E4BD")
327
+
328
+ # Add rows
329
+ table.add_row("Num Examples", str(metrics["Text"]["num_examples"]), str(metrics["Image"]["num_examples"]), str(metrics["Total"]["num_examples"]))
330
+
331
+ table.add_row(
332
+ "Examples Over Limit",
333
+ str(metrics["Text"]["num_examples_over_token_limit"]),
334
+ str(metrics["Image"]["num_examples_over_token_limit"]),
335
+ str(metrics["Total"]["num_examples_over_token_limit"]),
336
+ )
337
+
338
+ table.add_row("")
339
+
340
+ # Rows for input tokens
341
+ table.add_row(
342
+ "Min / Max Input Tokens",
343
+ f"{metrics['Text']['input_tokens']['min']:.0f} / {metrics['Text']['input_tokens']['max']:.0f}",
344
+ f"{metrics['Image']['input_tokens']['min']:.0f} / {metrics['Image']['input_tokens']['max']:.0f}",
345
+ f"{metrics['Total']['input_tokens']['min']:.0f} / {metrics['Total']['input_tokens']['max']:.0f}",
346
+ )
347
+
348
+ table.add_row(
349
+ "Mean / Median Input Tokens",
350
+ f"{metrics['Text']['input_tokens']['mean']:.0f} / {metrics['Text']['input_tokens']['median']:.0f}",
351
+ f"{metrics['Image']['input_tokens']['mean']:.0f} / {metrics['Image']['input_tokens']['median']:.0f}",
352
+ f"{metrics['Total']['input_tokens']['mean']:.0f} / {metrics['Total']['input_tokens']['median']:.0f}",
353
+ )
354
+
355
+ table.add_row(
356
+ "P5 / P95 Input Tokens",
357
+ f"{metrics['Text']['input_tokens']['p5']:.0f} / {metrics['Text']['input_tokens']['p95']:.0f}",
358
+ f"{metrics['Image']['input_tokens']['p5']:.0f} / {metrics['Image']['input_tokens']['p95']:.0f}",
359
+ f"{metrics['Total']['input_tokens']['p5']:.0f} / {metrics['Total']['input_tokens']['p95']:.0f}",
360
+ )
361
+
362
+ table.add_row("Sum Input Tokens", f"{metrics['Text']['sum_input_tokens']}", f"{metrics['Image']['sum_input_tokens']}", f"{metrics['Total']['sum_input_tokens']}")
363
+
364
+ table.add_row("") # Empty row for spacing
365
+
366
+ # Rows for output tokens
367
+ table.add_row(
368
+ "Min / Max Output Tokens",
369
+ f"{metrics['Text']['output_tokens']['min']:.0f} / {metrics['Text']['output_tokens']['max']:.0f}",
370
+ f"{metrics['Image']['output_tokens']['min']:.0f} / {metrics['Image']['output_tokens']['max']:.0f}",
371
+ f"{metrics['Total']['output_tokens']['min']:.0f} / {metrics['Total']['output_tokens']['max']:.0f}",
372
+ )
373
+
374
+ table.add_row(
375
+ "Mean / Median Output Tokens",
376
+ f"{metrics['Text']['output_tokens']['mean']:.0f} / {metrics['Text']['output_tokens']['median']:.0f}",
377
+ f"{metrics['Image']['output_tokens']['mean']:.0f} / {metrics['Image']['output_tokens']['median']:.0f}",
378
+ f"{metrics['Total']['output_tokens']['mean']:.0f} / {metrics['Total']['output_tokens']['median']:.0f}",
379
+ )
380
+
381
+ table.add_row(
382
+ "P5 / P95 Output Tokens",
383
+ f"{metrics['Text']['output_tokens']['p5']:.0f} / {metrics['Text']['output_tokens']['p95']:.0f}",
384
+ f"{metrics['Image']['output_tokens']['p5']:.0f} / {metrics['Image']['output_tokens']['p95']:.0f}",
385
+ f"{metrics['Total']['output_tokens']['p5']:.0f} / {metrics['Total']['output_tokens']['p95']:.0f}",
386
+ )
387
+
388
+ table.add_row("Sum Output Tokens", f"{metrics['Text']['sum_output_tokens']}", f"{metrics['Image']['sum_output_tokens']}", f"{metrics['Total']['sum_output_tokens']}")
389
+
390
+ table.add_row("") # Empty row for spacing
391
+
392
+ # Total tokens
393
+ table.add_row(
394
+ "Min / Max Tokens",
395
+ f"{metrics['Text']['input_tokens']['min']:.0f} / {metrics['Text']['input_tokens']['max']:.0f}",
396
+ f"{metrics['Image']['input_tokens']['min']:.0f} / {metrics['Image']['input_tokens']['max']:.0f}",
397
+ f"{metrics['Total']['input_tokens']['min']:.0f} / {metrics['Total']['input_tokens']['max']:.0f}",
398
+ )
399
+
400
+ table.add_row(
401
+ "Mean / Median Tokens",
402
+ f"{metrics['Text']['input_tokens']['mean']:.0f} / {metrics['Text']['input_tokens']['median']:.0f}",
403
+ f"{metrics['Image']['input_tokens']['mean']:.0f} / {metrics['Image']['input_tokens']['median']:.0f}",
404
+ f"{metrics['Total']['input_tokens']['mean']:.0f} / {metrics['Total']['input_tokens']['median']:.0f}",
405
+ )
406
+
407
+ table.add_row(
408
+ "P5 / P95 Tokens",
409
+ f"{metrics['Text']['input_tokens']['p5']:.0f} / {metrics['Text']['input_tokens']['p95']:.0f}",
410
+ f"{metrics['Image']['input_tokens']['p5']:.0f} / {metrics['Image']['input_tokens']['p95']:.0f}",
411
+ f"{metrics['Total']['input_tokens']['p5']:.0f} / {metrics['Total']['input_tokens']['p95']:.0f}",
412
+ )
413
+
414
+ table.add_row("Sum Total Tokens", f"{metrics['Text']['sum_input_tokens']}", f"{metrics['Image']['sum_input_tokens']}", f"{metrics['Total']['sum_input_tokens']}")
415
+
416
+ table.add_row("") # Empty row for spacing
417
+
418
+ if input_token_price is not None:
419
+ table.add_row(
420
+ "Input Cost",
421
+ f"{metrics['Text']['sum_input_tokens'] * input_token_price:.2f} USD",
422
+ f"{metrics['Image']['sum_input_tokens'] * input_token_price:.2f} USD",
423
+ f"{metrics['Total']['sum_input_tokens'] * input_token_price:.2f} USD",
424
+ )
425
+
426
+ if output_token_price is not None:
427
+ table.add_row(
428
+ "Output Cost",
429
+ f"{metrics['Text']['sum_output_tokens'] * output_token_price:.2f} USD",
430
+ f"{metrics['Image']['sum_output_tokens'] * output_token_price:.2f} USD",
431
+ f"{metrics['Total']['sum_output_tokens'] * output_token_price:.2f} USD",
432
+ )
433
+
434
+ if input_token_price is not None and output_token_price is not None:
435
+ table.add_row(
436
+ "Total Cost",
437
+ f"{metrics['Text']['sum_total_tokens'] * input_token_price:.2f} USD",
438
+ f"{metrics['Image']['sum_total_tokens'] * input_token_price:.2f} USD",
439
+ f"{metrics['Total']['sum_total_tokens'] * input_token_price:.2f} USD",
440
+ )
441
+
442
+ # Print the table
443
+ console.print(table)