scorebook 0.0.14__py3-none-any.whl → 0.0.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scorebook/__init__.py +2 -0
- scorebook/dashboard/credentials.py +34 -4
- scorebook/eval_datasets/eval_dataset.py +2 -2
- scorebook/evaluate/_async/evaluate_async.py +27 -11
- scorebook/evaluate/_sync/evaluate.py +27 -11
- scorebook/metrics/README.md +121 -0
- scorebook/metrics/__init__.py +8 -0
- scorebook/metrics/accuracy.py +2 -6
- scorebook/metrics/bertscore.py +50 -0
- scorebook/metrics/bleu.py +82 -0
- scorebook/metrics/core/__init__.py +1 -0
- scorebook/metrics/{metric_base.py → core/metric_base.py} +1 -2
- scorebook/metrics/core/metric_registry.py +195 -0
- scorebook/metrics/exactmatch.py +95 -0
- scorebook/metrics/f1.py +96 -0
- scorebook/metrics/precision.py +84 -9
- scorebook/metrics/recall.py +94 -0
- scorebook/metrics/rouge.py +85 -0
- scorebook/score/score_helpers.py +28 -11
- scorebook/types.py +2 -2
- scorebook/utils/progress_bars.py +58 -786
- {scorebook-0.0.14.dist-info → scorebook-0.0.15.dist-info}/METADATA +32 -24
- scorebook-0.0.15.dist-info/RECORD +110 -0
- {scorebook-0.0.14.dist-info → scorebook-0.0.15.dist-info}/WHEEL +1 -1
- tutorials/README.md +147 -0
- tutorials/__init__.py +5 -0
- tutorials/examples/1-score/1-scoring_model_accuracy.py +47 -0
- tutorials/examples/1-score/2-scoring_model_bleu.py +46 -0
- tutorials/examples/1-score/3-scoring_model_f1.py +64 -0
- tutorials/examples/1-score/4-scoring_model_rouge.py +64 -0
- tutorials/examples/1-score/5-scoring_model_exact_match.py +84 -0
- tutorials/examples/1-score/6-scoring_with_bertscore.py +57 -0
- tutorials/examples/1-score/__init__.py +0 -0
- tutorials/examples/2-evaluate/1-evaluating_local_models.py +106 -0
- tutorials/examples/2-evaluate/2-evaluating_local_models_with_batching.py +108 -0
- tutorials/examples/2-evaluate/3-evaluating_cloud_models.py +109 -0
- tutorials/examples/2-evaluate/4-evaluating_cloud_models_with_batching.py +170 -0
- tutorials/examples/2-evaluate/5-hyperparameter_sweeps.py +122 -0
- tutorials/examples/2-evaluate/6-inference_pipelines.py +141 -0
- tutorials/examples/3-evaluation_datasets/1-evaluation_datasets_from_files.py +110 -0
- tutorials/examples/3-evaluation_datasets/2-evaluation_datasets_from_huggingface.py +101 -0
- tutorials/examples/3-evaluation_datasets/3-evaluation_datasets_from_huggingface_with_yaml_configs.py +110 -0
- tutorials/examples/3-evaluation_datasets/example_datasets/basic_questions.csv +11 -0
- tutorials/examples/3-evaluation_datasets/example_datasets/basic_questions.json +42 -0
- tutorials/examples/3-evaluation_datasets/example_yaml_configs/Cais-MMLU.yaml +19 -0
- tutorials/examples/3-evaluation_datasets/example_yaml_configs/TIGER-Lab-MMLU-Pro.yaml +18 -0
- tutorials/examples/4-adaptive_evaluations/1-adaptive_evaluation.py +114 -0
- tutorials/examples/4-adaptive_evaluations/2-adaptive_dataset_splits.py +106 -0
- tutorials/examples/5-upload_results/1-uploading_score_results.py +92 -0
- tutorials/examples/5-upload_results/2-uploading_evaluate_results.py +117 -0
- tutorials/examples/5-upload_results/3-uploading_your_results.py +153 -0
- tutorials/examples/6-providers/aws/__init__.py +1 -0
- tutorials/examples/6-providers/aws/batch_example.py +219 -0
- tutorials/examples/6-providers/portkey/__init__.py +1 -0
- tutorials/examples/6-providers/portkey/batch_example.py +120 -0
- tutorials/examples/6-providers/portkey/messages_example.py +121 -0
- tutorials/examples/6-providers/vertex/__init__.py +1 -0
- tutorials/examples/6-providers/vertex/batch_example.py +166 -0
- tutorials/examples/6-providers/vertex/messages_example.py +142 -0
- tutorials/examples/__init__.py +0 -0
- tutorials/notebooks/1-scoring.ipynb +162 -0
- tutorials/notebooks/2-evaluating.ipynb +316 -0
- tutorials/notebooks/3.1-adaptive_evaluation_phi.ipynb +354 -0
- tutorials/notebooks/3.2-adaptive_evaluation_gpt.ipynb +243 -0
- tutorials/notebooks/4-uploading_results.ipynb +175 -0
- tutorials/quickstarts/adaptive_evaluations/adaptive_evaluation_openai_demo.ipynb +229 -0
- tutorials/quickstarts/adaptive_evaluations/adaptive_evaluation_qwen_demo.ipynb +256 -0
- tutorials/quickstarts/classical_evaluations/classical_evaluation_demo.ipynb +277 -0
- tutorials/quickstarts/getting_started.ipynb +197 -0
- tutorials/utils/__init__.py +35 -0
- tutorials/utils/args_parser.py +132 -0
- tutorials/utils/output.py +23 -0
- tutorials/utils/setup.py +98 -0
- scorebook/metrics/metric_registry.py +0 -107
- scorebook-0.0.14.dist-info/RECORD +0 -53
- {scorebook-0.0.14.dist-info → scorebook-0.0.15.dist-info}/entry_points.txt +0 -0
- {scorebook-0.0.14.dist-info → scorebook-0.0.15.dist-info}/licenses/LICENSE +0 -0
scorebook/utils/progress_bars.py
CHANGED
|
@@ -1,792 +1,100 @@
|
|
|
1
1
|
"""Progress bar utilities for evaluation tracking."""
|
|
2
2
|
|
|
3
|
-
import re
|
|
4
|
-
import shutil
|
|
5
|
-
import threading
|
|
6
|
-
import time
|
|
7
3
|
from contextlib import contextmanager
|
|
8
|
-
from dataclasses import dataclass
|
|
9
|
-
from
|
|
10
|
-
from typing import Callable, Generator, Optional, cast
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Generator, Optional
|
|
11
6
|
|
|
12
7
|
from tqdm.auto import tqdm
|
|
13
8
|
|
|
14
|
-
_IS_NOTEBOOK: Optional[bool] = None
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def _is_notebook() -> bool:
|
|
18
|
-
"""Detect if code is running in a Jupyter notebook environment.
|
|
19
|
-
|
|
20
|
-
Uses lazy evaluation with caching for efficiency.
|
|
21
|
-
"""
|
|
22
|
-
global _IS_NOTEBOOK
|
|
23
|
-
if _IS_NOTEBOOK is None:
|
|
24
|
-
try:
|
|
25
|
-
shell = get_ipython().__class__.__name__ # type: ignore[name-defined]
|
|
26
|
-
_IS_NOTEBOOK = shell == "ZMQInteractiveShell"
|
|
27
|
-
except NameError:
|
|
28
|
-
_IS_NOTEBOOK = False
|
|
29
|
-
return _IS_NOTEBOOK
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
# Color codes - ANSI for terminals, plain text for notebooks
|
|
33
|
-
RESET = "\033[0m"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def _make_color_func(ansi_code: str) -> Callable[[str], str]:
|
|
37
|
-
"""Create a color function that checks notebook status at runtime.
|
|
38
|
-
|
|
39
|
-
Args:
|
|
40
|
-
ansi_code: The ANSI escape code for the color (e.g., "32" for green)
|
|
41
|
-
|
|
42
|
-
Returns:
|
|
43
|
-
A function that formats text with the color, or returns plain text in notebooks
|
|
44
|
-
"""
|
|
45
|
-
|
|
46
|
-
def color_func(text: str) -> str:
|
|
47
|
-
if _is_notebook():
|
|
48
|
-
return text
|
|
49
|
-
return f"\033[{ansi_code}m{text}\033[0m"
|
|
50
|
-
|
|
51
|
-
return color_func
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
# Color functions - automatically handle notebook vs terminal rendering
|
|
55
|
-
GREEN = _make_color_func("32") # Green
|
|
56
|
-
RED = _make_color_func("31") # Red
|
|
57
|
-
LIGHT_GREEN = _make_color_func("92") # Light green
|
|
58
|
-
LIGHT_RED = _make_color_func("91") # Light red
|
|
59
|
-
BLUE_BASE = _make_color_func("34") # Blue
|
|
60
|
-
BLUE_HIGHLIGHT = _make_color_func("1;34") # Bright blue
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
# Shimmer effect width (number of characters highlighted in sweep animation)
|
|
64
|
-
# Tested values: 2 (too subtle), 3 (optimal), 5 (too wide)
|
|
65
|
-
SHIMMER_WIDTH = 3
|
|
66
|
-
|
|
67
|
-
# Spinner blue shimmer colors for terminals (cycled for visual effect)
|
|
68
|
-
SPINNER_BLUE_COLORS = [
|
|
69
|
-
"\033[34m", # Standard blue
|
|
70
|
-
"\033[1;34m", # Bright blue
|
|
71
|
-
"\033[94m", # Light blue
|
|
72
|
-
"\033[36m", # Cyan
|
|
73
|
-
"\033[1;36m", # Bright cyan
|
|
74
|
-
"\033[96m", # Light cyan
|
|
75
|
-
]
|
|
76
|
-
|
|
77
|
-
# Progress bar configuration
|
|
78
|
-
PROGRESS_BAR_FORMAT = "{desc}|{bar}|" # Compact format for progress bars
|
|
79
|
-
HEADER_FORMAT = "{desc}" # Header shows only description, no bar
|
|
80
|
-
|
|
81
|
-
# Spinner update interval in seconds
|
|
82
|
-
# 0.08s = 12.5 Hz provides smooth animation without excessive CPU usage
|
|
83
|
-
# Lower values (0.05) cause flickering, higher values (0.2) appear choppy
|
|
84
|
-
SPINNER_INTERVAL_SECONDS = 0.08
|
|
85
|
-
|
|
86
|
-
# Terminal size fallback if detection fails
|
|
87
|
-
# 120 columns: Common wide terminal default
|
|
88
|
-
# 20 rows: Not used but required by shutil.get_terminal_size()
|
|
89
|
-
TERMINAL_FALLBACK_SIZE = (120, 20)
|
|
90
|
-
|
|
91
|
-
# Minimum spacing between header left and right sections
|
|
92
|
-
# Prevents sections from touching when terminal is narrow
|
|
93
|
-
MINIMUM_HEADER_SPACING = 3
|
|
94
|
-
|
|
95
|
-
# Spinner animation frames
|
|
96
|
-
SPINNER_FRAMES_UNICODE = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
|
|
97
|
-
SPINNER_FRAMES_ASCII = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def _select_spinner_frames() -> list[str]:
|
|
101
|
-
"""Select appropriate spinner frames based on terminal capabilities."""
|
|
102
|
-
import sys
|
|
103
|
-
|
|
104
|
-
encoding = sys.stdout.encoding or "ascii"
|
|
105
|
-
|
|
106
|
-
if encoding.lower() in ("utf-8", "utf8"):
|
|
107
|
-
return SPINNER_FRAMES_UNICODE
|
|
108
|
-
else:
|
|
109
|
-
return SPINNER_FRAMES_ASCII
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
# Use Braille characters for smooth rotation (fallback to ASCII if needed)
|
|
113
|
-
SPINNER_FRAMES = _select_spinner_frames()
|
|
114
|
-
|
|
115
|
-
# Progress bar labels
|
|
116
|
-
EVALUATIONS_LABEL = "Evaluations" # Label for run-level progress
|
|
117
|
-
ITEMS_LABEL = "Items" # Label for item-level progress
|
|
118
|
-
|
|
119
|
-
# Compiled regex pattern for ANSI escape codes (used for calculating visual length)
|
|
120
|
-
_ANSI_ESCAPE_PATTERN = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def _visual_length(text: str) -> int:
|
|
124
|
-
"""Calculate the visual length of text, excluding ANSI escape codes."""
|
|
125
|
-
return len(_ANSI_ESCAPE_PATTERN.sub("", text))
|
|
126
|
-
|
|
127
9
|
|
|
128
10
|
@dataclass
|
|
129
|
-
class EvaluationConfig:
|
|
130
|
-
"""Configuration for evaluation progress tracking."""
|
|
131
|
-
|
|
132
|
-
total_eval_runs: int
|
|
133
|
-
total_items: int
|
|
134
|
-
dataset_count: int
|
|
135
|
-
hyperparam_count: int
|
|
136
|
-
model_display: str
|
|
137
|
-
|
|
138
|
-
@property
|
|
139
|
-
def dataset_label(self) -> str:
|
|
140
|
-
"""Get the appropriate dataset label (singular/plural)."""
|
|
141
|
-
return "Dataset" if self.dataset_count == 1 else "Datasets"
|
|
142
|
-
|
|
143
|
-
@property
|
|
144
|
-
def hyperparam_label(self) -> str:
|
|
145
|
-
"""Get the appropriate hyperparameter label (singular/plural)."""
|
|
146
|
-
if self.hyperparam_count == 1:
|
|
147
|
-
return "Hyperparam Configuration"
|
|
148
|
-
return "Hyperparam Configurations"
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
class ProgressBarFormatter:
|
|
152
|
-
"""Handles formatting for progress bar descriptions and headers.
|
|
153
|
-
|
|
154
|
-
This class is responsible for:
|
|
155
|
-
- Formatting progress descriptions with aligned counts and percentages
|
|
156
|
-
- Building header sections with spinner, timing, and statistics
|
|
157
|
-
- Ensuring proper text alignment accounting for ANSI escape codes
|
|
158
|
-
|
|
159
|
-
The formatter maintains consistent column widths based on the maximum
|
|
160
|
-
number of digits needed for counts, ensuring progress bars don't shift
|
|
161
|
-
as numbers increment.
|
|
162
|
-
"""
|
|
163
|
-
|
|
164
|
-
def __init__(self, config: EvaluationConfig) -> None:
|
|
165
|
-
"""Initialize the formatter with configuration."""
|
|
166
|
-
self.config = config
|
|
167
|
-
self._label_width = max(len(EVALUATIONS_LABEL), len(ITEMS_LABEL))
|
|
168
|
-
self._count_width = max(len(str(config.total_eval_runs)), len(str(config.total_items)), 1)
|
|
169
|
-
|
|
170
|
-
def format_progress_description(self, label: str, completed: int, total: int) -> str:
|
|
171
|
-
"""Format a progress bar description with counts and percentage."""
|
|
172
|
-
label_str = label.ljust(self._label_width)
|
|
173
|
-
count_str = f"{completed:>{self._count_width}}/{total:>{self._count_width}}"
|
|
174
|
-
|
|
175
|
-
if total > 0:
|
|
176
|
-
percent = int((completed / total) * 100)
|
|
177
|
-
percent_str = f"{percent:>3d}%"
|
|
178
|
-
else:
|
|
179
|
-
percent_str = " --%"
|
|
180
|
-
|
|
181
|
-
return f"{label_str} {count_str} {percent_str} "
|
|
182
|
-
|
|
183
|
-
@staticmethod
|
|
184
|
-
def format_elapsed_time(elapsed_seconds: float) -> str:
|
|
185
|
-
"""Format elapsed time as mm:ss or hh:mm:ss."""
|
|
186
|
-
total_seconds = int(max(elapsed_seconds, 0))
|
|
187
|
-
hours, remainder = divmod(total_seconds, 3600)
|
|
188
|
-
minutes, seconds = divmod(remainder, 60)
|
|
189
|
-
|
|
190
|
-
if hours:
|
|
191
|
-
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
|
192
|
-
return f"{minutes:02d}:{seconds:02d}"
|
|
193
|
-
|
|
194
|
-
def format_header(
|
|
195
|
-
self,
|
|
196
|
-
spinner_frame: str,
|
|
197
|
-
elapsed_seconds: float,
|
|
198
|
-
completed_runs: int,
|
|
199
|
-
failed_runs: int,
|
|
200
|
-
uploaded_runs: int,
|
|
201
|
-
upload_failed_runs: int,
|
|
202
|
-
shimmer_text: str = "",
|
|
203
|
-
) -> str:
|
|
204
|
-
"""Compose the header line with spinner, elapsed time, and run statistics."""
|
|
205
|
-
elapsed_str = ProgressBarFormatter.format_elapsed_time(elapsed_seconds)
|
|
206
|
-
left_section = self._build_left_section(spinner_frame, elapsed_str, shimmer_text)
|
|
207
|
-
right_section = ProgressBarFormatter._build_run_status_section(
|
|
208
|
-
completed_runs, failed_runs, uploaded_runs, upload_failed_runs
|
|
209
|
-
)
|
|
210
|
-
|
|
211
|
-
return ProgressBarFormatter._combine_header_sections(left_section, right_section)
|
|
212
|
-
|
|
213
|
-
def _build_left_section(
|
|
214
|
-
self, spinner_frame: str, elapsed_str: str, shimmer_text: str = ""
|
|
215
|
-
) -> str:
|
|
216
|
-
"""Build the left section of the header with spinner and evaluation info."""
|
|
217
|
-
# Apply shimmer effect to the model display name
|
|
218
|
-
evaluating_text = f"Evaluating {self.config.model_display}"
|
|
219
|
-
model_text = shimmer_text if shimmer_text else evaluating_text
|
|
220
|
-
|
|
221
|
-
return (
|
|
222
|
-
f"{spinner_frame} {model_text} ({elapsed_str}) | "
|
|
223
|
-
f"{self.config.dataset_count} {self.config.dataset_label} | "
|
|
224
|
-
f"{self.config.hyperparam_count} {self.config.hyperparam_label}"
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
@staticmethod
|
|
228
|
-
def _build_run_status_section(
|
|
229
|
-
completed_runs: int, failed_runs: int, uploaded_runs: int, upload_failed_runs: int
|
|
230
|
-
) -> tuple[str, str]:
|
|
231
|
-
"""Build the run status section with plain and colored versions."""
|
|
232
|
-
# Build base run statistics
|
|
233
|
-
run_parts = [f"RUNS PASSED: {completed_runs}"]
|
|
234
|
-
colored_run_parts = [GREEN(f"RUNS PASSED: {completed_runs}")]
|
|
235
|
-
|
|
236
|
-
if failed_runs > 0:
|
|
237
|
-
run_parts.append(f"RUNS FAILED: {failed_runs}")
|
|
238
|
-
colored_run_parts.append(RED(f"RUNS FAILED: {failed_runs}"))
|
|
239
|
-
|
|
240
|
-
# Add upload statistics if any uploads have occurred
|
|
241
|
-
if uploaded_runs > 0 or upload_failed_runs > 0:
|
|
242
|
-
run_parts.append(f"RUNS UPLOADED: {uploaded_runs}")
|
|
243
|
-
colored_run_parts.append(LIGHT_GREEN(f"RUNS UPLOADED: {uploaded_runs}"))
|
|
244
|
-
|
|
245
|
-
if upload_failed_runs > 0:
|
|
246
|
-
run_parts.append(f"UPLOADS FAILED: {upload_failed_runs}")
|
|
247
|
-
colored_run_parts.append(LIGHT_RED(f"UPLOADS FAILED: {upload_failed_runs}"))
|
|
248
|
-
|
|
249
|
-
plain = f"[{', '.join(run_parts)}]"
|
|
250
|
-
colored = f"[{', '.join(colored_run_parts)}]"
|
|
251
|
-
|
|
252
|
-
return plain, colored
|
|
253
|
-
|
|
254
|
-
@staticmethod
|
|
255
|
-
def _combine_header_sections(left_section: str, right_sections: tuple[str, str]) -> str:
|
|
256
|
-
"""Combine left and right header sections with appropriate spacing."""
|
|
257
|
-
plain_right, colored_right = right_sections
|
|
258
|
-
|
|
259
|
-
term_width = shutil.get_terminal_size(fallback=TERMINAL_FALLBACK_SIZE).columns
|
|
260
|
-
left_visual_length = _visual_length(left_section)
|
|
261
|
-
right_visual_length = len(plain_right)
|
|
262
|
-
|
|
263
|
-
# Check for terminal width overflow
|
|
264
|
-
total_content_width = left_visual_length + right_visual_length
|
|
265
|
-
if total_content_width >= term_width - MINIMUM_HEADER_SPACING:
|
|
266
|
-
# Terminal too narrow, truncate left section
|
|
267
|
-
max_left_width = term_width - right_visual_length - MINIMUM_HEADER_SPACING - 3
|
|
268
|
-
if max_left_width < 20:
|
|
269
|
-
# Terminal impossibly narrow, just show right section
|
|
270
|
-
return colored_right
|
|
271
|
-
|
|
272
|
-
# Truncate left section (strip ANSI codes for simplicity)
|
|
273
|
-
left_plain = _ANSI_ESCAPE_PATTERN.sub("", left_section)
|
|
274
|
-
left_truncated = left_plain[:max_left_width] + "..."
|
|
275
|
-
left_section = left_truncated
|
|
276
|
-
left_visual_length = len(left_truncated)
|
|
277
|
-
|
|
278
|
-
spacing = term_width - left_visual_length - right_visual_length
|
|
279
|
-
spacing = max(spacing, MINIMUM_HEADER_SPACING)
|
|
280
|
-
|
|
281
|
-
return f"{left_section}{' ' * spacing}{colored_right}"
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
class SpinnerManager:
|
|
285
|
-
"""Manages spinner animation for the progress header.
|
|
286
|
-
|
|
287
|
-
Features:
|
|
288
|
-
- Runs spinner animation in a background daemon thread
|
|
289
|
-
- Applies blue color cycling to spinner frames (terminal only)
|
|
290
|
-
- Provides shimmer sweep effect for text highlighting
|
|
291
|
-
- Thread-safe state management with locks
|
|
292
|
-
|
|
293
|
-
The spinner updates at SPINNER_INTERVAL_SECONDS frequency and
|
|
294
|
-
automatically stops when stop() is called. In notebook environments,
|
|
295
|
-
plain text frames are used without ANSI color codes. The daemon thread
|
|
296
|
-
ensures the program can exit cleanly even if the spinner doesn't stop.
|
|
297
|
-
"""
|
|
298
|
-
|
|
299
|
-
def __init__(self) -> None:
|
|
300
|
-
"""Initialize the spinner manager."""
|
|
301
|
-
self._frames = SpinnerManager._normalize_spinner_frames()
|
|
302
|
-
self._cycle: Optional[cycle] = None
|
|
303
|
-
self._stop_event = threading.Event()
|
|
304
|
-
self._thread: Optional[threading.Thread] = None
|
|
305
|
-
self.frame_width = len(self._frames[0]) if self._frames else 0
|
|
306
|
-
self._shimmer_position = 0 # Position of the shimmer sweep
|
|
307
|
-
self._spinner_color_index = 0 # Index for spinner color cycling
|
|
308
|
-
self._lock = threading.Lock() # Protects spinner state
|
|
309
|
-
|
|
310
|
-
@staticmethod
|
|
311
|
-
def _normalize_spinner_frames() -> list[str]:
|
|
312
|
-
"""Normalize spinner frames to have consistent width."""
|
|
313
|
-
if not SPINNER_FRAMES:
|
|
314
|
-
return []
|
|
315
|
-
|
|
316
|
-
width = max(len(frame) for frame in SPINNER_FRAMES)
|
|
317
|
-
return [frame.ljust(width) for frame in SPINNER_FRAMES]
|
|
318
|
-
|
|
319
|
-
def start(self, update_callback: Callable[[str], None]) -> None:
|
|
320
|
-
"""Start the spinner animation."""
|
|
321
|
-
if self._thread is not None or not self._frames:
|
|
322
|
-
return
|
|
323
|
-
|
|
324
|
-
self._stop_event.clear()
|
|
325
|
-
self._cycle = cycle(self._frames)
|
|
326
|
-
self._thread = threading.Thread(target=self._animate, args=(update_callback,), daemon=True)
|
|
327
|
-
self._thread.start()
|
|
328
|
-
|
|
329
|
-
def is_running(self) -> bool:
|
|
330
|
-
"""Check if the spinner animation is currently running."""
|
|
331
|
-
return self._thread is not None and self._thread.is_alive()
|
|
332
|
-
|
|
333
|
-
def stop(self) -> None:
|
|
334
|
-
"""Stop the spinner animation."""
|
|
335
|
-
if self._thread is None:
|
|
336
|
-
return
|
|
337
|
-
|
|
338
|
-
self._stop_event.set()
|
|
339
|
-
self._thread.join(timeout=5.0)
|
|
340
|
-
|
|
341
|
-
if self._thread.is_alive():
|
|
342
|
-
import logging
|
|
343
|
-
|
|
344
|
-
logger = logging.getLogger(__name__)
|
|
345
|
-
logger.warning("Spinner thread did not stop cleanly within 5 seconds")
|
|
346
|
-
# Thread is daemon, so it will be killed on exit anyway
|
|
347
|
-
|
|
348
|
-
self._thread = None
|
|
349
|
-
|
|
350
|
-
def get_initial_frame(self) -> str:
|
|
351
|
-
"""Get the first spinner frame with blue shimmer effect (terminals only)."""
|
|
352
|
-
if not self._frames:
|
|
353
|
-
return ""
|
|
354
|
-
frame = self._frames[0]
|
|
355
|
-
|
|
356
|
-
# Return plain frame for notebooks (no ANSI colors)
|
|
357
|
-
if _is_notebook():
|
|
358
|
-
return frame
|
|
359
|
-
|
|
360
|
-
# Add color codes for terminals
|
|
361
|
-
color = SPINNER_BLUE_COLORS[self._spinner_color_index % len(SPINNER_BLUE_COLORS)]
|
|
362
|
-
return f"{color}{frame}{RESET}"
|
|
363
|
-
|
|
364
|
-
def get_empty_frame(self) -> str:
|
|
365
|
-
"""Get an empty frame with the same width as spinner frames."""
|
|
366
|
-
return " " * self.frame_width
|
|
367
|
-
|
|
368
|
-
def get_next_spinner_frame(self) -> str:
|
|
369
|
-
"""Get the next spinner frame with blue shimmer effect (terminals only)."""
|
|
370
|
-
if not self._frames or not self._cycle:
|
|
371
|
-
return ""
|
|
372
|
-
|
|
373
|
-
frame = cast(str, next(self._cycle))
|
|
374
|
-
|
|
375
|
-
# Return plain frame for notebooks (no ANSI colors)
|
|
376
|
-
if _is_notebook():
|
|
377
|
-
return frame
|
|
378
|
-
|
|
379
|
-
# Add color codes for terminals (thread-safe)
|
|
380
|
-
with self._lock:
|
|
381
|
-
color = SPINNER_BLUE_COLORS[self._spinner_color_index % len(SPINNER_BLUE_COLORS)]
|
|
382
|
-
self._spinner_color_index += 1
|
|
383
|
-
return f"{color}{frame}{RESET}"
|
|
384
|
-
|
|
385
|
-
def get_shimmer_text(self, text: str) -> str:
|
|
386
|
-
"""Apply sweep shimmer effect to text, returning formatted string."""
|
|
387
|
-
if not text:
|
|
388
|
-
return text
|
|
389
|
-
|
|
390
|
-
# Get current shimmer position (thread-safe)
|
|
391
|
-
with self._lock:
|
|
392
|
-
shimmer_pos = self._shimmer_position
|
|
393
|
-
self._shimmer_position += 1
|
|
394
|
-
if self._shimmer_position >= len(text) + SHIMMER_WIDTH:
|
|
395
|
-
self._shimmer_position = -SHIMMER_WIDTH
|
|
396
|
-
|
|
397
|
-
# Build the text in segments using list (more efficient than string concat)
|
|
398
|
-
result_parts = []
|
|
399
|
-
i = 0
|
|
400
|
-
|
|
401
|
-
while i < len(text):
|
|
402
|
-
# Determine if we're in a highlight segment or base segment
|
|
403
|
-
if shimmer_pos <= i < shimmer_pos + SHIMMER_WIDTH:
|
|
404
|
-
# Start highlight segment
|
|
405
|
-
highlight_start = i
|
|
406
|
-
while i < len(text) and shimmer_pos <= i < shimmer_pos + SHIMMER_WIDTH:
|
|
407
|
-
i += 1
|
|
408
|
-
result_parts.append(BLUE_HIGHLIGHT(text[highlight_start:i]))
|
|
409
|
-
else:
|
|
410
|
-
# Start base segment
|
|
411
|
-
base_start = i
|
|
412
|
-
while i < len(text) and not (shimmer_pos <= i < shimmer_pos + SHIMMER_WIDTH):
|
|
413
|
-
i += 1
|
|
414
|
-
result_parts.append(BLUE_BASE(text[base_start:i]))
|
|
415
|
-
|
|
416
|
-
return "".join(result_parts)
|
|
417
|
-
|
|
418
|
-
def _animate(self, update_callback: Callable[[str], None]) -> None:
|
|
419
|
-
"""Continuously update the spinner animation."""
|
|
420
|
-
import logging
|
|
421
|
-
|
|
422
|
-
logger = logging.getLogger(__name__)
|
|
423
|
-
|
|
424
|
-
while not self._stop_event.is_set() and self._cycle is not None:
|
|
425
|
-
try:
|
|
426
|
-
frame = self.get_next_spinner_frame()
|
|
427
|
-
update_callback(frame)
|
|
428
|
-
time.sleep(SPINNER_INTERVAL_SECONDS)
|
|
429
|
-
except Exception as e:
|
|
430
|
-
logger.error(
|
|
431
|
-
f"Non-critical: Spinner animation thread encountered an error "
|
|
432
|
-
f"and will stop. Progress bars will continue without animation. "
|
|
433
|
-
f"Details: {e}",
|
|
434
|
-
exc_info=True,
|
|
435
|
-
)
|
|
436
|
-
break # Exit gracefully rather than crash silently
|
|
437
|
-
|
|
438
|
-
|
|
439
11
|
class EvaluationProgressBars:
|
|
440
|
-
"""
|
|
441
|
-
|
|
442
|
-
This class coordinates multiple progress displays:
|
|
443
|
-
- Terminal mode: header bar + evaluations bar + items bar
|
|
444
|
-
- Notebook mode: single simplified evaluations bar
|
|
12
|
+
"""Tracks progress for evaluation runs."""
|
|
445
13
|
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
14
|
+
_runs_bar: tqdm
|
|
15
|
+
_items_bar: tqdm
|
|
16
|
+
completed_runs: int = field(default=0, init=False)
|
|
17
|
+
failed_runs: int = field(default=0, init=False)
|
|
18
|
+
uploaded_runs: int = field(default=0, init=False)
|
|
19
|
+
upload_failed_runs: int = field(default=0, init=False)
|
|
449
20
|
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
2. start_progress_bars: Create and display bars
|
|
453
|
-
3. on_run_completed: Update when runs finish
|
|
454
|
-
4. on_upload_completed: Update when uploads finish
|
|
455
|
-
5. close_progress_bars: Clean up and show summary
|
|
456
|
-
"""
|
|
457
|
-
|
|
458
|
-
def __init__(self, config: EvaluationConfig) -> None:
|
|
459
|
-
"""Initialize progress bar manager.
|
|
21
|
+
def on_run_completed(self, items_processed: int, succeeded: bool) -> None:
|
|
22
|
+
"""Update progress when an evaluation run completes.
|
|
460
23
|
|
|
461
24
|
Args:
|
|
462
|
-
|
|
25
|
+
items_processed: Number of items processed in this run.
|
|
26
|
+
Pass 0 for adaptive evals (items tracked via on_item_progress).
|
|
27
|
+
succeeded: Whether the run completed successfully.
|
|
463
28
|
"""
|
|
464
|
-
self.
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
self._header_bar: Optional[tqdm] = None
|
|
470
|
-
self._evaluations_bar: Optional[tqdm] = None
|
|
471
|
-
self._items_bar: Optional[tqdm] = None
|
|
472
|
-
|
|
473
|
-
# State tracking
|
|
474
|
-
self.completed_runs = 0
|
|
475
|
-
self.failed_runs = 0
|
|
476
|
-
self.uploaded_runs = 0
|
|
477
|
-
self.upload_failed_runs = 0
|
|
478
|
-
self._start_time: Optional[float] = None
|
|
479
|
-
self._state_lock = threading.Lock() # Protects run counters
|
|
480
|
-
|
|
481
|
-
def start_progress_bars(self) -> None:
|
|
482
|
-
"""Start the evaluation progress bars."""
|
|
483
|
-
self._start_time = time.monotonic()
|
|
484
|
-
|
|
485
|
-
try:
|
|
486
|
-
self._initialize_progress_bars()
|
|
487
|
-
except Exception:
|
|
488
|
-
# Ensure spinner is stopped if initialization fails
|
|
489
|
-
self.spinner.stop()
|
|
490
|
-
raise
|
|
491
|
-
|
|
492
|
-
def _initialize_progress_bars(self) -> None:
|
|
493
|
-
"""Initialize progress bars based on environment."""
|
|
494
|
-
if _is_notebook():
|
|
495
|
-
# Simplified notebook version - just one progress bar for evaluation runs
|
|
496
|
-
spinner_frame = SPINNER_FRAMES[0] if SPINNER_FRAMES else ""
|
|
497
|
-
desc = (
|
|
498
|
-
f"{spinner_frame} Evaluating {self.config.model_display} | "
|
|
499
|
-
f"{self.config.dataset_count} {self.config.dataset_label} | "
|
|
500
|
-
f"{self.config.hyperparam_count} {self.config.hyperparam_label}"
|
|
501
|
-
)
|
|
502
|
-
self._evaluations_bar = tqdm(
|
|
503
|
-
total=self.config.total_eval_runs,
|
|
504
|
-
desc=desc,
|
|
505
|
-
unit="run",
|
|
506
|
-
leave=False,
|
|
507
|
-
bar_format="{desc} | {n}/{total} Runs {percentage:3.0f}%|{bar}|",
|
|
508
|
-
)
|
|
509
|
-
# Start spinner animation for notebooks
|
|
510
|
-
self.spinner.start(self._update_notebook_spinner)
|
|
29
|
+
self._runs_bar.update(1)
|
|
30
|
+
if items_processed > 0:
|
|
31
|
+
self._items_bar.update(items_processed)
|
|
32
|
+
if succeeded:
|
|
33
|
+
self.completed_runs += 1
|
|
511
34
|
else:
|
|
512
|
-
|
|
513
|
-
initial_frame = self.spinner.get_initial_frame()
|
|
514
|
-
evaluating_text = f"Evaluating {self.config.model_display}"
|
|
515
|
-
initial_shimmer = self.spinner.get_shimmer_text(evaluating_text)
|
|
516
|
-
header_desc = self.formatter.format_header(
|
|
517
|
-
initial_frame, 0.0, 0, 0, 0, 0, initial_shimmer
|
|
518
|
-
)
|
|
519
|
-
self._header_bar = tqdm(
|
|
520
|
-
total=0,
|
|
521
|
-
desc=header_desc,
|
|
522
|
-
leave=False,
|
|
523
|
-
dynamic_ncols=True,
|
|
524
|
-
bar_format=HEADER_FORMAT,
|
|
525
|
-
)
|
|
526
|
-
|
|
527
|
-
eval_desc = self.formatter.format_progress_description(
|
|
528
|
-
EVALUATIONS_LABEL, 0, self.config.total_eval_runs
|
|
529
|
-
)
|
|
530
|
-
self._evaluations_bar = tqdm(
|
|
531
|
-
total=self.config.total_eval_runs,
|
|
532
|
-
desc=eval_desc,
|
|
533
|
-
unit="run",
|
|
534
|
-
leave=False,
|
|
535
|
-
dynamic_ncols=True,
|
|
536
|
-
bar_format=PROGRESS_BAR_FORMAT,
|
|
537
|
-
)
|
|
538
|
-
|
|
539
|
-
items_desc = self.formatter.format_progress_description(
|
|
540
|
-
ITEMS_LABEL, 0, self.config.total_items
|
|
541
|
-
)
|
|
542
|
-
self._items_bar = tqdm(
|
|
543
|
-
total=self.config.total_items,
|
|
544
|
-
desc=items_desc,
|
|
545
|
-
unit="item",
|
|
546
|
-
leave=False,
|
|
547
|
-
dynamic_ncols=True,
|
|
548
|
-
bar_format=PROGRESS_BAR_FORMAT,
|
|
549
|
-
)
|
|
550
|
-
|
|
551
|
-
self._refresh_progress_descriptions()
|
|
552
|
-
self.spinner.start(self._update_header_spinner)
|
|
553
|
-
|
|
554
|
-
def on_run_completed(self, items_processed: int, succeeded: bool) -> None:
|
|
555
|
-
"""Update progress when an evaluation run completes."""
|
|
556
|
-
with self._state_lock:
|
|
557
|
-
if succeeded:
|
|
558
|
-
self.completed_runs += 1
|
|
559
|
-
else:
|
|
560
|
-
self.failed_runs += 1
|
|
561
|
-
|
|
562
|
-
if self._evaluations_bar is not None:
|
|
563
|
-
self._evaluations_bar.update(1)
|
|
35
|
+
self.failed_runs += 1
|
|
564
36
|
|
|
565
|
-
|
|
566
|
-
|
|
37
|
+
def on_item_progress(self, current: int, total: int) -> None:
|
|
38
|
+
"""Update progress for individual items (used by adaptive evaluations).
|
|
567
39
|
|
|
568
|
-
|
|
40
|
+
Args:
|
|
41
|
+
current: Current item count.
|
|
42
|
+
total: Total item count.
|
|
43
|
+
"""
|
|
44
|
+
self._items_bar.n = current
|
|
45
|
+
if total != self._items_bar.total:
|
|
46
|
+
self._items_bar.total = total
|
|
47
|
+
self._items_bar.refresh()
|
|
569
48
|
|
|
570
49
|
def on_upload_completed(self, succeeded: bool) -> None:
|
|
571
50
|
"""Update progress when an upload completes."""
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
self.uploaded_runs += 1
|
|
575
|
-
else:
|
|
576
|
-
self.upload_failed_runs += 1
|
|
577
|
-
|
|
578
|
-
# Trigger header refresh in terminal mode
|
|
579
|
-
if not _is_notebook() and self._header_bar is not None:
|
|
580
|
-
self._refresh_header()
|
|
581
|
-
|
|
582
|
-
def close_progress_bars(self) -> None:
|
|
583
|
-
"""Close all progress bars and cleanup resources."""
|
|
584
|
-
self.spinner.stop()
|
|
585
|
-
self._finalize_header()
|
|
586
|
-
|
|
587
|
-
if self._items_bar is not None:
|
|
588
|
-
self._items_bar.close()
|
|
589
|
-
self._items_bar = None
|
|
590
|
-
if self._evaluations_bar is not None:
|
|
591
|
-
self._evaluations_bar.close()
|
|
592
|
-
self._evaluations_bar = None
|
|
593
|
-
if self._header_bar is not None:
|
|
594
|
-
self._header_bar.close()
|
|
595
|
-
self._header_bar = None
|
|
596
|
-
|
|
597
|
-
self._start_time = None
|
|
598
|
-
|
|
599
|
-
# Print summary after clearing progress bars
|
|
600
|
-
self._print_summary()
|
|
601
|
-
|
|
602
|
-
def _refresh_progress_descriptions(self) -> None:
|
|
603
|
-
"""Refresh progress bar descriptions to maintain alignment as counts change."""
|
|
604
|
-
# Skip refresh in notebooks (spinner handles description updates)
|
|
605
|
-
if _is_notebook():
|
|
606
|
-
return
|
|
607
|
-
|
|
608
|
-
if self._evaluations_bar is not None:
|
|
609
|
-
eval_desc = self.formatter.format_progress_description(
|
|
610
|
-
EVALUATIONS_LABEL,
|
|
611
|
-
min(self._evaluations_bar.n, self.config.total_eval_runs),
|
|
612
|
-
self.config.total_eval_runs,
|
|
613
|
-
)
|
|
614
|
-
self._evaluations_bar.set_description_str(eval_desc, refresh=False)
|
|
615
|
-
|
|
616
|
-
if self._items_bar is not None:
|
|
617
|
-
items_desc = self.formatter.format_progress_description(
|
|
618
|
-
ITEMS_LABEL,
|
|
619
|
-
min(self._items_bar.n, self.config.total_items),
|
|
620
|
-
self.config.total_items,
|
|
621
|
-
)
|
|
622
|
-
self._items_bar.set_description_str(items_desc, refresh=False)
|
|
623
|
-
|
|
624
|
-
# Refresh both bars
|
|
625
|
-
if self._evaluations_bar is not None:
|
|
626
|
-
self._evaluations_bar.refresh()
|
|
627
|
-
if self._items_bar is not None:
|
|
628
|
-
self._items_bar.refresh()
|
|
629
|
-
|
|
630
|
-
def _update_notebook_spinner(self, frame: str) -> None:
|
|
631
|
-
"""Update the notebook progress bar spinner (notebooks only)."""
|
|
632
|
-
if self._evaluations_bar is not None:
|
|
633
|
-
desc = (
|
|
634
|
-
f"{frame} Evaluating {self.config.model_display} | "
|
|
635
|
-
f"{self.config.dataset_count} {self.config.dataset_label} | "
|
|
636
|
-
f"{self.config.hyperparam_count} {self.config.hyperparam_label}"
|
|
637
|
-
)
|
|
638
|
-
self._evaluations_bar.set_description_str(desc, refresh=False)
|
|
639
|
-
self._evaluations_bar.refresh()
|
|
640
|
-
|
|
641
|
-
def _update_header_spinner(self, frame: str) -> None:
|
|
642
|
-
"""Update the header with a new spinner frame (terminals only)."""
|
|
643
|
-
if self._header_bar is not None and self._start_time is not None:
|
|
644
|
-
elapsed = time.monotonic() - self._start_time
|
|
645
|
-
evaluating_text = f"Evaluating {self.config.model_display}"
|
|
646
|
-
shimmer_text = self.spinner.get_shimmer_text(evaluating_text)
|
|
647
|
-
|
|
648
|
-
# Read state with lock
|
|
649
|
-
with self._state_lock:
|
|
650
|
-
completed = self.completed_runs
|
|
651
|
-
failed = self.failed_runs
|
|
652
|
-
uploaded = self.uploaded_runs
|
|
653
|
-
upload_failed = self.upload_failed_runs
|
|
654
|
-
|
|
655
|
-
header_desc = self.formatter.format_header(
|
|
656
|
-
frame,
|
|
657
|
-
elapsed,
|
|
658
|
-
completed,
|
|
659
|
-
failed,
|
|
660
|
-
uploaded,
|
|
661
|
-
upload_failed,
|
|
662
|
-
shimmer_text,
|
|
663
|
-
)
|
|
664
|
-
self._header_bar.set_description_str(header_desc, refresh=False)
|
|
665
|
-
self._header_bar.refresh()
|
|
666
|
-
|
|
667
|
-
def _refresh_header(self) -> None:
|
|
668
|
-
"""Refresh the header bar with current statistics."""
|
|
669
|
-
if self._header_bar is None or self._start_time is None:
|
|
670
|
-
return
|
|
671
|
-
|
|
672
|
-
elapsed = time.monotonic() - self._start_time
|
|
673
|
-
|
|
674
|
-
# Get current spinner frame (or empty if stopped)
|
|
675
|
-
if self.spinner.is_running():
|
|
676
|
-
# Spinner running, will update via callback soon
|
|
677
|
-
return
|
|
51
|
+
if succeeded:
|
|
52
|
+
self.uploaded_runs += 1
|
|
678
53
|
else:
|
|
679
|
-
|
|
680
|
-
frame = self.spinner.get_empty_frame()
|
|
681
|
-
|
|
682
|
-
with self._state_lock:
|
|
683
|
-
completed = self.completed_runs
|
|
684
|
-
failed = self.failed_runs
|
|
685
|
-
uploaded = self.uploaded_runs
|
|
686
|
-
upload_failed = self.upload_failed_runs
|
|
687
|
-
|
|
688
|
-
header_desc = self.formatter.format_header(
|
|
689
|
-
frame, elapsed, completed, failed, uploaded, upload_failed, ""
|
|
690
|
-
)
|
|
691
|
-
self._header_bar.set_description_str(header_desc, refresh=True)
|
|
692
|
-
|
|
693
|
-
def _finalize_header(self) -> None:
|
|
694
|
-
"""Finalize the header line without spinner animation."""
|
|
695
|
-
# Only for terminal mode
|
|
696
|
-
if _is_notebook():
|
|
697
|
-
return
|
|
698
|
-
|
|
699
|
-
if self._header_bar is not None and self._start_time is not None:
|
|
700
|
-
elapsed = time.monotonic() - self._start_time
|
|
701
|
-
final_frame = self.spinner.get_empty_frame()
|
|
702
|
-
|
|
703
|
-
# Read state with lock
|
|
704
|
-
with self._state_lock:
|
|
705
|
-
completed = self.completed_runs
|
|
706
|
-
failed = self.failed_runs
|
|
707
|
-
uploaded = self.uploaded_runs
|
|
708
|
-
upload_failed = self.upload_failed_runs
|
|
709
|
-
|
|
710
|
-
# No shimmer for final header
|
|
711
|
-
final_desc = self.formatter.format_header(
|
|
712
|
-
final_frame, elapsed, completed, failed, uploaded, upload_failed, ""
|
|
713
|
-
)
|
|
714
|
-
self._header_bar.set_description_str(final_desc, refresh=True)
|
|
715
|
-
|
|
716
|
-
def _print_summary(self) -> None:
|
|
717
|
-
"""Print a clean summary after evaluation completes."""
|
|
718
|
-
# Build summary message
|
|
719
|
-
summary_parts = [f"Evaluating {self.config.model_display} Completed"]
|
|
720
|
-
|
|
721
|
-
# Add run completion info
|
|
722
|
-
total_runs = self.completed_runs + self.failed_runs
|
|
723
|
-
expected_runs = self.config.total_eval_runs
|
|
724
|
-
|
|
725
|
-
# Show if some runs didn't complete (cancelled/interrupted)
|
|
726
|
-
if total_runs < expected_runs:
|
|
727
|
-
summary_parts.append(
|
|
728
|
-
f"{self.completed_runs}/{total_runs} Runs Completed Successfully "
|
|
729
|
-
f"(out of {expected_runs} expected)"
|
|
730
|
-
)
|
|
731
|
-
elif self.failed_runs == 0:
|
|
732
|
-
summary_parts.append(f"{self.completed_runs} Runs Completed Successfully")
|
|
733
|
-
else:
|
|
734
|
-
summary_parts.append(f"{self.completed_runs}/{total_runs} Runs Completed Successfully")
|
|
735
|
-
|
|
736
|
-
# Add upload info if any uploads occurred
|
|
737
|
-
if self.uploaded_runs > 0 or self.upload_failed_runs > 0:
|
|
738
|
-
total_uploads = self.uploaded_runs + self.upload_failed_runs
|
|
739
|
-
if self.upload_failed_runs == 0:
|
|
740
|
-
summary_parts.append(f"{self.uploaded_runs} Runs Uploaded Successfully")
|
|
741
|
-
else:
|
|
742
|
-
summary_parts.append(
|
|
743
|
-
f"{self.uploaded_runs}/{total_uploads} Runs Uploaded Successfully"
|
|
744
|
-
)
|
|
745
|
-
|
|
746
|
-
# Join parts with ", " and print
|
|
747
|
-
summary = ", ".join(summary_parts)
|
|
748
|
-
print(summary)
|
|
54
|
+
self.upload_failed_runs += 1
|
|
749
55
|
|
|
750
56
|
|
|
751
57
|
@contextmanager
|
|
752
58
|
def evaluation_progress_context(
|
|
753
59
|
total_eval_runs: int,
|
|
754
60
|
total_items: int,
|
|
755
|
-
dataset_count: int,
|
|
756
|
-
hyperparam_count: int,
|
|
757
61
|
model_display: str,
|
|
758
62
|
enabled: bool = True,
|
|
759
63
|
) -> Generator[Optional[EvaluationProgressBars], None, None]:
|
|
760
64
|
"""Context manager for evaluation progress bars.
|
|
761
65
|
|
|
762
66
|
Args:
|
|
763
|
-
total_eval_runs: Total number of runs
|
|
764
|
-
total_items: Total number of
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
model_display: Human readable model/inference name for the header
|
|
768
|
-
enabled: Whether to show progress bars (default: True)
|
|
67
|
+
total_eval_runs: Total number of evaluation runs.
|
|
68
|
+
total_items: Total number of items across all runs.
|
|
69
|
+
model_display: Model name to display in progress description.
|
|
70
|
+
enabled: Whether to show progress bars.
|
|
769
71
|
|
|
770
72
|
Yields:
|
|
771
|
-
|
|
73
|
+
EvaluationProgressBars instance, or None if disabled.
|
|
772
74
|
"""
|
|
773
75
|
if not enabled:
|
|
774
76
|
yield None
|
|
775
77
|
return
|
|
776
78
|
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
model_display=model_display,
|
|
79
|
+
runs_bar = tqdm(
|
|
80
|
+
total=total_eval_runs,
|
|
81
|
+
desc=f"Evaluating {model_display}",
|
|
82
|
+
unit="run",
|
|
83
|
+
leave=False,
|
|
783
84
|
)
|
|
784
|
-
|
|
785
|
-
|
|
85
|
+
items_bar = tqdm(
|
|
86
|
+
total=total_items,
|
|
87
|
+
desc="Items",
|
|
88
|
+
unit="item",
|
|
89
|
+
leave=False,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
progress = EvaluationProgressBars(_runs_bar=runs_bar, _items_bar=items_bar)
|
|
786
93
|
try:
|
|
787
|
-
yield
|
|
94
|
+
yield progress
|
|
788
95
|
finally:
|
|
789
|
-
|
|
96
|
+
items_bar.close()
|
|
97
|
+
runs_bar.close()
|
|
790
98
|
|
|
791
99
|
|
|
792
100
|
@contextmanager
|
|
@@ -797,60 +105,24 @@ def scoring_progress_context(
|
|
|
797
105
|
"""Context manager for scoring progress display.
|
|
798
106
|
|
|
799
107
|
Args:
|
|
800
|
-
total_metrics: Total number of metrics to score
|
|
801
|
-
enabled: Whether to show progress bar
|
|
108
|
+
total_metrics: Total number of metrics to score.
|
|
109
|
+
enabled: Whether to show progress bar.
|
|
802
110
|
|
|
803
111
|
Yields:
|
|
804
|
-
|
|
112
|
+
tqdm progress bar instance, or None if disabled.
|
|
805
113
|
"""
|
|
806
114
|
if not enabled:
|
|
807
115
|
yield None
|
|
808
116
|
return
|
|
809
117
|
|
|
810
|
-
# Use appropriate spinner frames based on environment
|
|
811
|
-
spinner_frames = SPINNER_FRAMES if SPINNER_FRAMES else ["|"]
|
|
812
|
-
spinner_cycle_obj = cycle(spinner_frames)
|
|
813
|
-
|
|
814
|
-
# Get initial spinner frame
|
|
815
|
-
initial_frame = next(spinner_cycle_obj)
|
|
816
|
-
|
|
817
118
|
progress_bar = tqdm(
|
|
818
119
|
total=total_metrics,
|
|
819
|
-
desc=
|
|
120
|
+
desc="Scoring",
|
|
820
121
|
unit="metric",
|
|
821
122
|
leave=False,
|
|
822
|
-
bar_format="{desc} | {n}/{total} metrics {percentage:3.0f}%|{bar}|",
|
|
823
123
|
)
|
|
824
124
|
|
|
825
|
-
# Start spinner animation thread
|
|
826
|
-
stop_event = threading.Event()
|
|
827
|
-
current_metric_name = [""] # List to allow mutation in closure
|
|
828
|
-
|
|
829
|
-
def animate_spinner() -> None:
|
|
830
|
-
"""Update spinner and description in background thread."""
|
|
831
|
-
while not stop_event.is_set():
|
|
832
|
-
try:
|
|
833
|
-
frame = next(spinner_cycle_obj)
|
|
834
|
-
metric_suffix = f": {current_metric_name[0]}" if current_metric_name[0] else ""
|
|
835
|
-
progress_bar.set_description_str(
|
|
836
|
-
f"{frame} Scoring metrics{metric_suffix}", refresh=True
|
|
837
|
-
)
|
|
838
|
-
time.sleep(SPINNER_INTERVAL_SECONDS)
|
|
839
|
-
except Exception:
|
|
840
|
-
break
|
|
841
|
-
|
|
842
|
-
spinner_thread = threading.Thread(target=animate_spinner, daemon=True)
|
|
843
|
-
spinner_thread.start()
|
|
844
|
-
|
|
845
|
-
# Attach helper method to update current metric name
|
|
846
|
-
def set_current_metric(metric_name: str) -> None:
|
|
847
|
-
current_metric_name[0] = metric_name
|
|
848
|
-
|
|
849
|
-
progress_bar.set_current_metric = set_current_metric
|
|
850
|
-
|
|
851
125
|
try:
|
|
852
126
|
yield progress_bar
|
|
853
127
|
finally:
|
|
854
|
-
stop_event.set()
|
|
855
|
-
spinner_thread.join(timeout=1.0)
|
|
856
128
|
progress_bar.close()
|