scorebook 0.0.10__py3-none-any.whl → 0.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,105 +1,787 @@
1
1
  """Progress bar utilities for evaluation tracking."""
2
2
 
3
+ import re
4
+ import shutil
5
+ import threading
6
+ import time
3
7
  from contextlib import contextmanager
4
- from typing import Generator, Optional
8
+ from dataclasses import dataclass
9
+ from itertools import cycle
10
+ from typing import Callable, Generator, Optional, cast
5
11
 
6
- from tqdm import tqdm
12
+ from tqdm.auto import tqdm
13
+
14
+ _IS_NOTEBOOK: Optional[bool] = None
15
+
16
+
17
+ def _is_notebook() -> bool:
18
+ """Detect if code is running in a Jupyter notebook environment.
19
+
20
+ Uses lazy evaluation with caching for efficiency.
21
+ """
22
+ global _IS_NOTEBOOK
23
+ if _IS_NOTEBOOK is None:
24
+ try:
25
+ shell = get_ipython().__class__.__name__ # type: ignore[name-defined]
26
+ _IS_NOTEBOOK = shell == "ZMQInteractiveShell"
27
+ except NameError:
28
+ _IS_NOTEBOOK = False
29
+ return _IS_NOTEBOOK
30
+
31
+
32
+ # Color codes - ANSI for terminals, plain text for notebooks
33
+ RESET = "\033[0m"
34
+
35
+
36
+ def _make_color_func(ansi_code: str) -> Callable[[str], str]:
37
+ """Create a color function that checks notebook status at runtime.
38
+
39
+ Args:
40
+ ansi_code: The ANSI escape code for the color (e.g., "32" for green)
41
+
42
+ Returns:
43
+ A function that formats text with the color, or returns plain text in notebooks
44
+ """
45
+
46
+ def color_func(text: str) -> str:
47
+ if _is_notebook():
48
+ return text
49
+ return f"\033[{ansi_code}m{text}\033[0m"
50
+
51
+ return color_func
52
+
53
+
54
+ # Color functions - automatically handle notebook vs terminal rendering
55
+ GREEN = _make_color_func("32") # Green
56
+ RED = _make_color_func("31") # Red
57
+ LIGHT_GREEN = _make_color_func("92") # Light green
58
+ LIGHT_RED = _make_color_func("91") # Light red
59
+ BLUE_BASE = _make_color_func("34") # Blue
60
+ BLUE_HIGHLIGHT = _make_color_func("1;34") # Bright blue
61
+
62
+
63
+ # Shimmer effect width (number of characters highlighted in sweep animation)
64
+ # Tested values: 2 (too subtle), 3 (optimal), 5 (too wide)
65
+ SHIMMER_WIDTH = 3
66
+
67
+ # Spinner blue shimmer colors for terminals (cycled for visual effect)
68
+ SPINNER_BLUE_COLORS = [
69
+ "\033[34m", # Standard blue
70
+ "\033[1;34m", # Bright blue
71
+ "\033[94m", # Light blue
72
+ "\033[36m", # Cyan
73
+ "\033[1;36m", # Bright cyan
74
+ "\033[96m", # Light cyan
75
+ ]
76
+
77
+ # Progress bar configuration
78
+ PROGRESS_BAR_FORMAT = "{desc}|{bar}|" # Compact format for progress bars
79
+ HEADER_FORMAT = "{desc}" # Header shows only description, no bar
80
+
81
+ # Spinner update interval in seconds
82
+ # 0.08s = 12.5 Hz provides smooth animation without excessive CPU usage
83
+ # Lower values (0.05) cause flickering, higher values (0.2) appear choppy
84
+ SPINNER_INTERVAL_SECONDS = 0.08
85
+
86
+ # Terminal size fallback if detection fails
87
+ # 120 columns: Common wide terminal default
88
+ # 20 rows: Not used but required by shutil.get_terminal_size()
89
+ TERMINAL_FALLBACK_SIZE = (120, 20)
90
+
91
+ # Minimum spacing between header left and right sections
92
+ # Prevents sections from touching when terminal is narrow
93
+ MINIMUM_HEADER_SPACING = 3
94
+
95
+ # Spinner animation frames
96
+ SPINNER_FRAMES_UNICODE = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
97
+ SPINNER_FRAMES_ASCII = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
98
+
99
+
100
+ def _select_spinner_frames() -> list[str]:
101
+ """Select appropriate spinner frames based on terminal capabilities."""
102
+ import sys
103
+
104
+ encoding = sys.stdout.encoding or "ascii"
105
+
106
+ if encoding.lower() in ("utf-8", "utf8"):
107
+ return SPINNER_FRAMES_UNICODE
108
+ else:
109
+ return SPINNER_FRAMES_ASCII
110
+
111
+
112
+ # Use Braille characters for smooth rotation (fallback to ASCII if needed)
113
+ SPINNER_FRAMES = _select_spinner_frames()
114
+
115
+ # Progress bar labels
116
+ EVALUATIONS_LABEL = "Evaluations" # Label for run-level progress
117
+ ITEMS_LABEL = "Items" # Label for item-level progress
118
+
119
+ # Compiled regex pattern for ANSI escape codes (used for calculating visual length)
120
+ _ANSI_ESCAPE_PATTERN = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
121
+
122
+
123
+ def _visual_length(text: str) -> int:
124
+ """Calculate the visual length of text, excluding ANSI escape codes."""
125
+ return len(_ANSI_ESCAPE_PATTERN.sub("", text))
126
+
127
+
128
+ @dataclass
129
+ class EvaluationConfig:
130
+ """Configuration for evaluation progress tracking."""
131
+
132
+ total_eval_runs: int
133
+ total_items: int
134
+ dataset_count: int
135
+ hyperparam_count: int
136
+ model_display: str
137
+
138
+ @property
139
+ def dataset_label(self) -> str:
140
+ """Get the appropriate dataset label (singular/plural)."""
141
+ return "Dataset" if self.dataset_count == 1 else "Datasets"
142
+
143
+ @property
144
+ def hyperparam_label(self) -> str:
145
+ """Get the appropriate hyperparameter label (singular/plural)."""
146
+ if self.hyperparam_count == 1:
147
+ return "Hyperparam Configuration"
148
+ return "Hyperparam Configurations"
149
+
150
+
151
+ class ProgressBarFormatter:
152
+ """Handles formatting for progress bar descriptions and headers.
153
+
154
+ This class is responsible for:
155
+ - Formatting progress descriptions with aligned counts and percentages
156
+ - Building header sections with spinner, timing, and statistics
157
+ - Ensuring proper text alignment accounting for ANSI escape codes
158
+
159
+ The formatter maintains consistent column widths based on the maximum
160
+ number of digits needed for counts, ensuring progress bars don't shift
161
+ as numbers increment.
162
+ """
163
+
164
+ def __init__(self, config: EvaluationConfig) -> None:
165
+ """Initialize the formatter with configuration."""
166
+ self.config = config
167
+ self._label_width = max(len(EVALUATIONS_LABEL), len(ITEMS_LABEL))
168
+ self._count_width = max(len(str(config.total_eval_runs)), len(str(config.total_items)), 1)
169
+
170
+ def format_progress_description(self, label: str, completed: int, total: int) -> str:
171
+ """Format a progress bar description with counts and percentage."""
172
+ label_str = label.ljust(self._label_width)
173
+ count_str = f"{completed:>{self._count_width}}/{total:>{self._count_width}}"
174
+
175
+ if total > 0:
176
+ percent = int((completed / total) * 100)
177
+ percent_str = f"{percent:>3d}%"
178
+ else:
179
+ percent_str = " --%"
180
+
181
+ return f"{label_str} {count_str} {percent_str} "
182
+
183
+ @staticmethod
184
+ def format_elapsed_time(elapsed_seconds: float) -> str:
185
+ """Format elapsed time as mm:ss or hh:mm:ss."""
186
+ total_seconds = int(max(elapsed_seconds, 0))
187
+ hours, remainder = divmod(total_seconds, 3600)
188
+ minutes, seconds = divmod(remainder, 60)
189
+
190
+ if hours:
191
+ return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
192
+ return f"{minutes:02d}:{seconds:02d}"
193
+
194
+ def format_header(
195
+ self,
196
+ spinner_frame: str,
197
+ elapsed_seconds: float,
198
+ completed_runs: int,
199
+ failed_runs: int,
200
+ uploaded_runs: int,
201
+ upload_failed_runs: int,
202
+ shimmer_text: str = "",
203
+ ) -> str:
204
+ """Compose the header line with spinner, elapsed time, and run statistics."""
205
+ elapsed_str = ProgressBarFormatter.format_elapsed_time(elapsed_seconds)
206
+ left_section = self._build_left_section(spinner_frame, elapsed_str, shimmer_text)
207
+ right_section = ProgressBarFormatter._build_run_status_section(
208
+ completed_runs, failed_runs, uploaded_runs, upload_failed_runs
209
+ )
210
+
211
+ return ProgressBarFormatter._combine_header_sections(left_section, right_section)
212
+
213
+ def _build_left_section(
214
+ self, spinner_frame: str, elapsed_str: str, shimmer_text: str = ""
215
+ ) -> str:
216
+ """Build the left section of the header with spinner and evaluation info."""
217
+ # Apply shimmer effect to the model display name
218
+ evaluating_text = f"Evaluating {self.config.model_display}"
219
+ model_text = shimmer_text if shimmer_text else evaluating_text
220
+
221
+ return (
222
+ f"{spinner_frame} {model_text} ({elapsed_str}) | "
223
+ f"{self.config.dataset_count} {self.config.dataset_label} | "
224
+ f"{self.config.hyperparam_count} {self.config.hyperparam_label}"
225
+ )
226
+
227
+ @staticmethod
228
+ def _build_run_status_section(
229
+ completed_runs: int, failed_runs: int, uploaded_runs: int, upload_failed_runs: int
230
+ ) -> tuple[str, str]:
231
+ """Build the run status section with plain and colored versions."""
232
+ # Build base run statistics
233
+ run_parts = [f"RUNS PASSED: {completed_runs}"]
234
+ colored_run_parts = [GREEN(f"RUNS PASSED: {completed_runs}")]
235
+
236
+ if failed_runs > 0:
237
+ run_parts.append(f"RUNS FAILED: {failed_runs}")
238
+ colored_run_parts.append(RED(f"RUNS FAILED: {failed_runs}"))
239
+
240
+ # Add upload statistics if any uploads have occurred
241
+ if uploaded_runs > 0 or upload_failed_runs > 0:
242
+ run_parts.append(f"RUNS UPLOADED: {uploaded_runs}")
243
+ colored_run_parts.append(LIGHT_GREEN(f"RUNS UPLOADED: {uploaded_runs}"))
244
+
245
+ if upload_failed_runs > 0:
246
+ run_parts.append(f"UPLOADS FAILED: {upload_failed_runs}")
247
+ colored_run_parts.append(LIGHT_RED(f"UPLOADS FAILED: {upload_failed_runs}"))
248
+
249
+ plain = f"[{', '.join(run_parts)}]"
250
+ colored = f"[{', '.join(colored_run_parts)}]"
251
+
252
+ return plain, colored
253
+
254
+ @staticmethod
255
+ def _combine_header_sections(left_section: str, right_sections: tuple[str, str]) -> str:
256
+ """Combine left and right header sections with appropriate spacing."""
257
+ plain_right, colored_right = right_sections
258
+
259
+ term_width = shutil.get_terminal_size(fallback=TERMINAL_FALLBACK_SIZE).columns
260
+ left_visual_length = _visual_length(left_section)
261
+ right_visual_length = len(plain_right)
262
+
263
+ # Check for terminal width overflow
264
+ total_content_width = left_visual_length + right_visual_length
265
+ if total_content_width >= term_width - MINIMUM_HEADER_SPACING:
266
+ # Terminal too narrow, truncate left section
267
+ max_left_width = term_width - right_visual_length - MINIMUM_HEADER_SPACING - 3
268
+ if max_left_width < 20:
269
+ # Terminal impossibly narrow, just show right section
270
+ return colored_right
271
+
272
+ # Truncate left section (strip ANSI codes for simplicity)
273
+ left_plain = _ANSI_ESCAPE_PATTERN.sub("", left_section)
274
+ left_truncated = left_plain[:max_left_width] + "..."
275
+ left_section = left_truncated
276
+ left_visual_length = len(left_truncated)
277
+
278
+ spacing = term_width - left_visual_length - right_visual_length
279
+ spacing = max(spacing, MINIMUM_HEADER_SPACING)
280
+
281
+ return f"{left_section}{' ' * spacing}{colored_right}"
282
+
283
+
284
+ class SpinnerManager:
285
+ """Manages spinner animation for the progress header.
286
+
287
+ Features:
288
+ - Runs spinner animation in a background daemon thread
289
+ - Applies blue color cycling to spinner frames (terminal only)
290
+ - Provides shimmer sweep effect for text highlighting
291
+ - Thread-safe state management with locks
292
+
293
+ The spinner updates at SPINNER_INTERVAL_SECONDS frequency and
294
+ automatically stops when stop() is called. In notebook environments,
295
+ plain text frames are used without ANSI color codes. The daemon thread
296
+ ensures the program can exit cleanly even if the spinner doesn't stop.
297
+ """
298
+
299
+ def __init__(self) -> None:
300
+ """Initialize the spinner manager."""
301
+ self._frames = SpinnerManager._normalize_spinner_frames()
302
+ self._cycle: Optional[cycle] = None
303
+ self._stop_event = threading.Event()
304
+ self._thread: Optional[threading.Thread] = None
305
+ self.frame_width = len(self._frames[0]) if self._frames else 0
306
+ self._shimmer_position = 0 # Position of the shimmer sweep
307
+ self._spinner_color_index = 0 # Index for spinner color cycling
308
+ self._lock = threading.Lock() # Protects spinner state
309
+
310
+ @staticmethod
311
+ def _normalize_spinner_frames() -> list[str]:
312
+ """Normalize spinner frames to have consistent width."""
313
+ if not SPINNER_FRAMES:
314
+ return []
315
+
316
+ width = max(len(frame) for frame in SPINNER_FRAMES)
317
+ return [frame.ljust(width) for frame in SPINNER_FRAMES]
318
+
319
+ def start(self, update_callback: Callable[[str], None]) -> None:
320
+ """Start the spinner animation."""
321
+ if self._thread is not None or not self._frames:
322
+ return
323
+
324
+ self._stop_event.clear()
325
+ self._cycle = cycle(self._frames)
326
+ self._thread = threading.Thread(target=self._animate, args=(update_callback,), daemon=True)
327
+ self._thread.start()
328
+
329
+ def is_running(self) -> bool:
330
+ """Check if the spinner animation is currently running."""
331
+ return self._thread is not None and self._thread.is_alive()
332
+
333
+ def stop(self) -> None:
334
+ """Stop the spinner animation."""
335
+ if self._thread is None:
336
+ return
337
+
338
+ self._stop_event.set()
339
+ self._thread.join(timeout=5.0)
340
+
341
+ if self._thread.is_alive():
342
+ import logging
343
+
344
+ logger = logging.getLogger(__name__)
345
+ logger.warning("Spinner thread did not stop cleanly within 5 seconds")
346
+ # Thread is daemon, so it will be killed on exit anyway
347
+
348
+ self._thread = None
349
+
350
+ def get_initial_frame(self) -> str:
351
+ """Get the first spinner frame with blue shimmer effect (terminals only)."""
352
+ if not self._frames:
353
+ return ""
354
+ frame = self._frames[0]
355
+
356
+ # Return plain frame for notebooks (no ANSI colors)
357
+ if _is_notebook():
358
+ return frame
359
+
360
+ # Add color codes for terminals
361
+ color = SPINNER_BLUE_COLORS[self._spinner_color_index % len(SPINNER_BLUE_COLORS)]
362
+ return f"{color}{frame}{RESET}"
363
+
364
+ def get_empty_frame(self) -> str:
365
+ """Get an empty frame with the same width as spinner frames."""
366
+ return " " * self.frame_width
367
+
368
+ def get_next_spinner_frame(self) -> str:
369
+ """Get the next spinner frame with blue shimmer effect (terminals only)."""
370
+ if not self._frames or not self._cycle:
371
+ return ""
372
+
373
+ frame = cast(str, next(self._cycle))
374
+
375
+ # Return plain frame for notebooks (no ANSI colors)
376
+ if _is_notebook():
377
+ return frame
378
+
379
+ # Add color codes for terminals (thread-safe)
380
+ with self._lock:
381
+ color = SPINNER_BLUE_COLORS[self._spinner_color_index % len(SPINNER_BLUE_COLORS)]
382
+ self._spinner_color_index += 1
383
+ return f"{color}{frame}{RESET}"
384
+
385
+ def get_shimmer_text(self, text: str) -> str:
386
+ """Apply sweep shimmer effect to text, returning formatted string."""
387
+ if not text:
388
+ return text
389
+
390
+ # Get current shimmer position (thread-safe)
391
+ with self._lock:
392
+ shimmer_pos = self._shimmer_position
393
+ self._shimmer_position += 1
394
+ if self._shimmer_position >= len(text) + SHIMMER_WIDTH:
395
+ self._shimmer_position = -SHIMMER_WIDTH
396
+
397
+ # Build the text in segments using list (more efficient than string concat)
398
+ result_parts = []
399
+ i = 0
400
+
401
+ while i < len(text):
402
+ # Determine if we're in a highlight segment or base segment
403
+ if shimmer_pos <= i < shimmer_pos + SHIMMER_WIDTH:
404
+ # Start highlight segment
405
+ highlight_start = i
406
+ while i < len(text) and shimmer_pos <= i < shimmer_pos + SHIMMER_WIDTH:
407
+ i += 1
408
+ result_parts.append(BLUE_HIGHLIGHT(text[highlight_start:i]))
409
+ else:
410
+ # Start base segment
411
+ base_start = i
412
+ while i < len(text) and not (shimmer_pos <= i < shimmer_pos + SHIMMER_WIDTH):
413
+ i += 1
414
+ result_parts.append(BLUE_BASE(text[base_start:i]))
415
+
416
+ return "".join(result_parts)
417
+
418
+ def _animate(self, update_callback: Callable[[str], None]) -> None:
419
+ """Continuously update the spinner animation."""
420
+ import logging
421
+
422
+ logger = logging.getLogger(__name__)
423
+
424
+ while not self._stop_event.is_set() and self._cycle is not None:
425
+ try:
426
+ frame = self.get_next_spinner_frame()
427
+ update_callback(frame)
428
+ time.sleep(SPINNER_INTERVAL_SECONDS)
429
+ except Exception as e:
430
+ logger.error(
431
+ f"Non-critical: Spinner animation thread encountered an error "
432
+ f"and will stop. Progress bars will continue without animation. "
433
+ f"Details: {e}",
434
+ exc_info=True,
435
+ )
436
+ break # Exit gracefully rather than crash silently
7
437
 
8
438
 
9
439
  class EvaluationProgressBars:
10
- """Manages nested progress bars for evaluation tracking."""
440
+ """Manages progress bars for evaluation runs and item processing.
441
+
442
+ This class coordinates multiple progress displays:
443
+ - Terminal mode: header bar + evaluations bar + items bar
444
+ - Notebook mode: single simplified evaluations bar
11
445
 
12
- def __init__(self, dataset_count: int, hyperparam_count: int, total_eval_runs: int) -> None:
446
+ Thread Safety:
447
+ All state updates (completed_runs, failed_runs, etc.) are protected
448
+ by _state_lock to prevent race conditions with the spinner thread.
449
+
450
+ Lifecycle:
451
+ 1. __init__: Initialize with configuration
452
+ 2. start_progress_bars: Create and display bars
453
+ 3. on_run_completed: Update when runs finish
454
+ 4. on_upload_completed: Update when uploads finish
455
+ 5. close_progress_bars: Clean up and show summary
456
+ """
457
+
458
+ def __init__(self, config: EvaluationConfig) -> None:
13
459
  """Initialize progress bar manager.
14
460
 
15
461
  Args:
16
- dataset_count: Number of datasets being evaluated
17
- hyperparam_count: Number of hyperparameter configurations per dataset
18
- total_eval_runs: Total number of EvalRunSpecs (dataset_count * hyperparam_count)
462
+ config: Configuration for the evaluation progress tracking
19
463
  """
20
- self.dataset_count = dataset_count
21
- self.hyperparam_count = hyperparam_count
22
- self.total_eval_runs = total_eval_runs
464
+ self.config = config
465
+ self.formatter = ProgressBarFormatter(config)
466
+ self.spinner = SpinnerManager()
23
467
 
24
- self.dataset_pbar: Optional[tqdm] = None
25
- self.hyperparam_pbar: Optional[tqdm] = None
468
+ # Progress bar instances
469
+ self._header_bar: Optional[tqdm] = None
470
+ self._evaluations_bar: Optional[tqdm] = None
471
+ self._items_bar: Optional[tqdm] = None
26
472
 
27
- # Track progress per dataset
28
- self.current_dataset_idx = 0
29
- self.completed_hyperparams_per_dataset: dict[int, int] = {}
30
- self.completed_eval_runs = 0
473
+ # State tracking
474
+ self.completed_runs = 0
475
+ self.failed_runs = 0
476
+ self.uploaded_runs = 0
477
+ self.upload_failed_runs = 0
478
+ self._start_time: Optional[float] = None
479
+ self._state_lock = threading.Lock() # Protects run counters
31
480
 
32
481
  def start_progress_bars(self) -> None:
33
- """Start both progress bars."""
34
- # Top level: Datasets
35
- self.dataset_pbar = tqdm(
36
- total=self.dataset_count,
37
- desc="Datasets ",
38
- unit="dataset",
39
- position=0,
40
- leave=True,
41
- ncols=80,
42
- bar_format="{desc} {percentage:3.0f}%|{bar:40}| {n_fmt}/{total_fmt}",
43
- )
482
+ """Start the evaluation progress bars."""
483
+ self._start_time = time.monotonic()
44
484
 
45
- # Bottom level: Eval runs
46
- self.hyperparam_pbar = tqdm(
47
- total=self.total_eval_runs,
48
- desc="Eval Runs ",
49
- unit="run",
50
- position=1,
51
- leave=False,
52
- ncols=80,
53
- bar_format="{desc} {percentage:3.0f}%|{bar:40}| {n_fmt}/{total_fmt}",
54
- )
485
+ try:
486
+ self._initialize_progress_bars()
487
+ except Exception:
488
+ # Ensure spinner is stopped if initialization fails
489
+ self.spinner.stop()
490
+ raise
55
491
 
56
- def on_eval_run_completed(self, dataset_idx: int) -> None:
57
- """Update progress when an eval run (EvalRunSpec) completes."""
58
- self.completed_eval_runs += 1
59
- if self.hyperparam_pbar:
60
- self.hyperparam_pbar.update(1)
492
+ def _initialize_progress_bars(self) -> None:
493
+ """Initialize progress bars based on environment."""
494
+ if _is_notebook():
495
+ # Simplified notebook version - just one progress bar for evaluation runs
496
+ spinner_frame = SPINNER_FRAMES[0] if SPINNER_FRAMES else ""
497
+ desc = (
498
+ f"{spinner_frame} Evaluating {self.config.model_display} | "
499
+ f"{self.config.dataset_count} {self.config.dataset_label} | "
500
+ f"{self.config.hyperparam_count} {self.config.hyperparam_label}"
501
+ )
502
+ self._evaluations_bar = tqdm(
503
+ total=self.config.total_eval_runs,
504
+ desc=desc,
505
+ unit="run",
506
+ leave=False,
507
+ bar_format="{desc} | {n}/{total} Runs {percentage:3.0f}%|{bar}|",
508
+ )
509
+ # Start spinner animation for notebooks
510
+ self.spinner.start(self._update_notebook_spinner)
511
+ else:
512
+ # Full terminal version with header, spinner, and multiple bars
513
+ initial_frame = self.spinner.get_initial_frame()
514
+ evaluating_text = f"Evaluating {self.config.model_display}"
515
+ initial_shimmer = self.spinner.get_shimmer_text(evaluating_text)
516
+ header_desc = self.formatter.format_header(
517
+ initial_frame, 0.0, 0, 0, 0, 0, initial_shimmer
518
+ )
519
+ self._header_bar = tqdm(
520
+ total=0,
521
+ desc=header_desc,
522
+ leave=False,
523
+ dynamic_ncols=True,
524
+ bar_format=HEADER_FORMAT,
525
+ )
61
526
 
62
- # Track how many runs completed for this dataset
63
- self.completed_hyperparams_per_dataset[dataset_idx] = (
64
- self.completed_hyperparams_per_dataset.get(dataset_idx, 0) + 1
65
- )
527
+ eval_desc = self.formatter.format_progress_description(
528
+ EVALUATIONS_LABEL, 0, self.config.total_eval_runs
529
+ )
530
+ self._evaluations_bar = tqdm(
531
+ total=self.config.total_eval_runs,
532
+ desc=eval_desc,
533
+ unit="run",
534
+ leave=False,
535
+ dynamic_ncols=True,
536
+ bar_format=PROGRESS_BAR_FORMAT,
537
+ )
538
+
539
+ items_desc = self.formatter.format_progress_description(
540
+ ITEMS_LABEL, 0, self.config.total_items
541
+ )
542
+ self._items_bar = tqdm(
543
+ total=self.config.total_items,
544
+ desc=items_desc,
545
+ unit="item",
546
+ leave=False,
547
+ dynamic_ncols=True,
548
+ bar_format=PROGRESS_BAR_FORMAT,
549
+ )
550
+
551
+ self._refresh_progress_descriptions()
552
+ self.spinner.start(self._update_header_spinner)
553
+
554
+ def on_run_completed(self, items_processed: int, succeeded: bool) -> None:
555
+ """Update progress when an evaluation run completes."""
556
+ with self._state_lock:
557
+ if succeeded:
558
+ self.completed_runs += 1
559
+ else:
560
+ self.failed_runs += 1
66
561
 
67
- # Check if this dataset is complete
68
- if self.completed_hyperparams_per_dataset[dataset_idx] == self.hyperparam_count:
69
- # Update dataset progress
70
- if self.dataset_pbar:
71
- self.dataset_pbar.update(1)
562
+ if self._evaluations_bar is not None:
563
+ self._evaluations_bar.update(1)
72
564
 
73
- # Reset hyperparameter progress for next dataset (if any)
74
- if dataset_idx < self.dataset_count - 1:
75
- if self.hyperparam_pbar:
76
- self.hyperparam_pbar.reset()
565
+ if self._items_bar is not None:
566
+ self._items_bar.update(items_processed)
567
+
568
+ self._refresh_progress_descriptions()
569
+
570
+ def on_upload_completed(self, succeeded: bool) -> None:
571
+ """Update progress when an upload completes."""
572
+ with self._state_lock:
573
+ if succeeded:
574
+ self.uploaded_runs += 1
575
+ else:
576
+ self.upload_failed_runs += 1
577
+
578
+ # Trigger header refresh in terminal mode
579
+ if not _is_notebook() and self._header_bar is not None:
580
+ self._refresh_header()
77
581
 
78
582
  def close_progress_bars(self) -> None:
79
- """Close both progress bars."""
80
- if self.hyperparam_pbar:
81
- self.hyperparam_pbar.close()
82
- self.hyperparam_pbar = None
83
- if self.dataset_pbar:
84
- self.dataset_pbar.close()
85
- self.dataset_pbar = None
583
+ """Close all progress bars and cleanup resources."""
584
+ self.spinner.stop()
585
+ self._finalize_header()
586
+
587
+ if self._items_bar is not None:
588
+ self._items_bar.close()
589
+ self._items_bar = None
590
+ if self._evaluations_bar is not None:
591
+ self._evaluations_bar.close()
592
+ self._evaluations_bar = None
593
+ if self._header_bar is not None:
594
+ self._header_bar.close()
595
+ self._header_bar = None
596
+
597
+ self._start_time = None
598
+
599
+ # Print summary after clearing progress bars
600
+ self._print_summary()
601
+
602
+ def _refresh_progress_descriptions(self) -> None:
603
+ """Refresh progress bar descriptions to maintain alignment as counts change."""
604
+ # Skip refresh in notebooks (spinner handles description updates)
605
+ if _is_notebook():
606
+ return
607
+
608
+ if self._evaluations_bar is not None:
609
+ eval_desc = self.formatter.format_progress_description(
610
+ EVALUATIONS_LABEL,
611
+ min(self._evaluations_bar.n, self.config.total_eval_runs),
612
+ self.config.total_eval_runs,
613
+ )
614
+ self._evaluations_bar.set_description_str(eval_desc, refresh=False)
615
+
616
+ if self._items_bar is not None:
617
+ items_desc = self.formatter.format_progress_description(
618
+ ITEMS_LABEL,
619
+ min(self._items_bar.n, self.config.total_items),
620
+ self.config.total_items,
621
+ )
622
+ self._items_bar.set_description_str(items_desc, refresh=False)
623
+
624
+ # Refresh both bars
625
+ if self._evaluations_bar is not None:
626
+ self._evaluations_bar.refresh()
627
+ if self._items_bar is not None:
628
+ self._items_bar.refresh()
629
+
630
+ def _update_notebook_spinner(self, frame: str) -> None:
631
+ """Update the notebook progress bar spinner (notebooks only)."""
632
+ if self._evaluations_bar is not None:
633
+ desc = (
634
+ f"{frame} Evaluating {self.config.model_display} | "
635
+ f"{self.config.dataset_count} {self.config.dataset_label} | "
636
+ f"{self.config.hyperparam_count} {self.config.hyperparam_label}"
637
+ )
638
+ self._evaluations_bar.set_description_str(desc, refresh=False)
639
+ self._evaluations_bar.refresh()
640
+
641
+ def _update_header_spinner(self, frame: str) -> None:
642
+ """Update the header with a new spinner frame (terminals only)."""
643
+ if self._header_bar is not None and self._start_time is not None:
644
+ elapsed = time.monotonic() - self._start_time
645
+ evaluating_text = f"Evaluating {self.config.model_display}"
646
+ shimmer_text = self.spinner.get_shimmer_text(evaluating_text)
647
+
648
+ # Read state with lock
649
+ with self._state_lock:
650
+ completed = self.completed_runs
651
+ failed = self.failed_runs
652
+ uploaded = self.uploaded_runs
653
+ upload_failed = self.upload_failed_runs
654
+
655
+ header_desc = self.formatter.format_header(
656
+ frame,
657
+ elapsed,
658
+ completed,
659
+ failed,
660
+ uploaded,
661
+ upload_failed,
662
+ shimmer_text,
663
+ )
664
+ self._header_bar.set_description_str(header_desc, refresh=False)
665
+ self._header_bar.refresh()
666
+
667
+ def _refresh_header(self) -> None:
668
+ """Refresh the header bar with current statistics."""
669
+ if self._header_bar is None or self._start_time is None:
670
+ return
671
+
672
+ elapsed = time.monotonic() - self._start_time
673
+
674
+ # Get current spinner frame (or empty if stopped)
675
+ if self.spinner.is_running():
676
+ # Spinner running, will update via callback soon
677
+ return
678
+ else:
679
+ # Spinner stopped, update manually
680
+ frame = self.spinner.get_empty_frame()
681
+
682
+ with self._state_lock:
683
+ completed = self.completed_runs
684
+ failed = self.failed_runs
685
+ uploaded = self.uploaded_runs
686
+ upload_failed = self.upload_failed_runs
687
+
688
+ header_desc = self.formatter.format_header(
689
+ frame, elapsed, completed, failed, uploaded, upload_failed, ""
690
+ )
691
+ self._header_bar.set_description_str(header_desc, refresh=True)
692
+
693
+ def _finalize_header(self) -> None:
694
+ """Finalize the header line without spinner animation."""
695
+ # Only for terminal mode
696
+ if _is_notebook():
697
+ return
698
+
699
+ if self._header_bar is not None and self._start_time is not None:
700
+ elapsed = time.monotonic() - self._start_time
701
+ final_frame = self.spinner.get_empty_frame()
702
+
703
+ # Read state with lock
704
+ with self._state_lock:
705
+ completed = self.completed_runs
706
+ failed = self.failed_runs
707
+ uploaded = self.uploaded_runs
708
+ upload_failed = self.upload_failed_runs
709
+
710
+ # No shimmer for final header
711
+ final_desc = self.formatter.format_header(
712
+ final_frame, elapsed, completed, failed, uploaded, upload_failed, ""
713
+ )
714
+ self._header_bar.set_description_str(final_desc, refresh=True)
715
+
716
+ def _print_summary(self) -> None:
717
+ """Print a clean summary after evaluation completes."""
718
+ # Build summary message
719
+ summary_parts = [f"Evaluating {self.config.model_display} Completed"]
720
+
721
+ # Add run completion info
722
+ total_runs = self.completed_runs + self.failed_runs
723
+ expected_runs = self.config.total_eval_runs
724
+
725
+ # Show if some runs didn't complete (cancelled/interrupted)
726
+ if total_runs < expected_runs:
727
+ summary_parts.append(
728
+ f"{self.completed_runs}/{total_runs} Runs Completed Successfully "
729
+ f"(out of {expected_runs} expected)"
730
+ )
731
+ elif self.failed_runs == 0:
732
+ summary_parts.append(f"{self.completed_runs} Runs Completed Successfully")
733
+ else:
734
+ summary_parts.append(f"{self.completed_runs}/{total_runs} Runs Completed Successfully")
735
+
736
+ # Add upload info if any uploads occurred
737
+ if self.uploaded_runs > 0 or self.upload_failed_runs > 0:
738
+ total_uploads = self.uploaded_runs + self.upload_failed_runs
739
+ if self.upload_failed_runs == 0:
740
+ summary_parts.append(f"{self.uploaded_runs} Runs Uploaded Successfully")
741
+ else:
742
+ summary_parts.append(
743
+ f"{self.uploaded_runs}/{total_uploads} Runs Uploaded Successfully"
744
+ )
745
+
746
+ # Join parts with ", " and print
747
+ summary = ", ".join(summary_parts)
748
+ print(summary)
86
749
 
87
750
 
88
751
  @contextmanager
89
- def evaluation_progress(
90
- dataset_count: int, hyperparameter_config_count: int, run_count: int
91
- ) -> Generator[EvaluationProgressBars, None, None]:
752
+ def evaluation_progress_context(
753
+ total_eval_runs: int,
754
+ total_items: int,
755
+ dataset_count: int,
756
+ hyperparam_count: int,
757
+ model_display: str,
758
+ enabled: bool = True,
759
+ ) -> Generator[Optional[EvaluationProgressBars], None, None]:
92
760
  """Context manager for evaluation progress bars.
93
761
 
94
762
  Args:
95
- dataset_count: Number of datasets being evaluated
96
- hyperparameter_config_count: Number of hyperparameter configurations per dataset
97
- run_count: Total number of EvalRunSpecs
763
+ total_eval_runs: Total number of runs that will be executed
764
+ total_items: Total number of evaluation items across all runs
765
+ dataset_count: Number of datasets included in the evaluation
766
+ hyperparam_count: Number of hyperparameter configurations evaluated
767
+ model_display: Human readable model/inference name for the header
768
+ enabled: Whether to show progress bars (default: True)
98
769
 
99
770
  Yields:
100
- EvaluationProgressBars: Progress bar manager instance
771
+ Optional[EvaluationProgressBars]: Progress bar manager instance (None if disabled)
101
772
  """
102
- progress_bars = EvaluationProgressBars(dataset_count, hyperparameter_config_count, run_count)
773
+ if not enabled:
774
+ yield None
775
+ return
776
+
777
+ config = EvaluationConfig(
778
+ total_eval_runs=total_eval_runs,
779
+ total_items=total_items,
780
+ dataset_count=dataset_count,
781
+ hyperparam_count=hyperparam_count,
782
+ model_display=model_display,
783
+ )
784
+ progress_bars = EvaluationProgressBars(config)
103
785
  progress_bars.start_progress_bars()
104
786
  try:
105
787
  yield progress_bars