scorebook 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ """Logging utilities for Scorebook evaluation framework."""
@@ -9,20 +9,33 @@ from tqdm import tqdm
9
9
  class EvaluationProgressBars:
10
10
  """Manages nested progress bars for evaluation tracking."""
11
11
 
12
- def __init__(self, datasets: List[Any], hyperparam_count: int) -> None:
12
+ def __init__(
13
+ self, datasets: List[Any], hyperparam_count: int, parallel: bool, total_eval_runs: int
14
+ ) -> None:
13
15
  """Initialize progress bar manager.
14
16
 
15
17
  Args:
16
18
  datasets: List of datasets being evaluated
17
19
  hyperparam_count: Number of hyperparameter configurations per dataset
20
+ parallel: Whether running in parallel mode
21
+ total_eval_runs: Total number of EvalRunSpecs (dataset_count * hyperparam_count)
18
22
  """
19
23
  self.datasets = datasets
20
24
  self.hyperparam_count = hyperparam_count
25
+ self.parallel = parallel
26
+ self.total_eval_runs = total_eval_runs
27
+
21
28
  self.dataset_pbar: Optional[tqdm] = None
22
29
  self.hyperparam_pbar: Optional[tqdm] = None
23
30
 
24
- def start_dataset_progress(self) -> None:
25
- """Start the outer progress bar for datasets."""
31
+ # Track progress per dataset
32
+ self.current_dataset_idx = 0
33
+ self.completed_hyperparams_per_dataset: dict[int, int] = {}
34
+ self.completed_eval_runs = 0
35
+
36
+ def start_progress_bars(self) -> None:
37
+ """Start both progress bars."""
38
+ # Top level: Datasets
26
39
  self.dataset_pbar = tqdm(
27
40
  total=len(self.datasets),
28
41
  desc="Datasets ",
@@ -33,57 +46,101 @@ class EvaluationProgressBars:
33
46
  bar_format="{desc} {percentage:3.0f}%|{bar:40}| {n_fmt}/{total_fmt}",
34
47
  )
35
48
 
36
- def update_dataset_progress(self) -> None:
37
- """Update the dataset progress bar."""
38
- if self.dataset_pbar:
39
- self.dataset_pbar.update(1)
49
+ # Bottom level: Hyperparameters/Eval runs
50
+ if self.parallel:
51
+ # In parallel mode: show eval runs completed out of total
52
+ self.hyperparam_pbar = tqdm(
53
+ total=self.total_eval_runs,
54
+ desc="Eval Runs ",
55
+ unit="run",
56
+ position=1,
57
+ leave=False,
58
+ ncols=80,
59
+ bar_format="{desc} {percentage:3.0f}%|{bar:40}| {n_fmt}/{total_fmt}",
60
+ )
61
+ else:
62
+ # In sequential mode: show hyperparams per dataset
63
+ self.hyperparam_pbar = tqdm(
64
+ total=self.hyperparam_count,
65
+ desc="Hyperparams",
66
+ unit="config",
67
+ position=1,
68
+ leave=False,
69
+ ncols=80,
70
+ bar_format="{desc} {percentage:3.0f}%|{bar:40}| {n_fmt}/{total_fmt}",
71
+ )
40
72
 
41
- def close_dataset_progress(self) -> None:
42
- """Close the dataset progress bar."""
43
- if self.dataset_pbar:
44
- self.dataset_pbar.close()
45
- self.dataset_pbar = None
73
+ def on_eval_run_completed(self, dataset_idx: int) -> None:
74
+ """Update progress when an eval run (EvalRunSpec) completes in parallel mode."""
75
+ if not self.parallel:
76
+ return
46
77
 
47
- @contextmanager
48
- def hyperparam_progress_context(self) -> Generator[tqdm, None, None]:
49
- """Context manager for hyperparameter progress bar."""
50
- self.hyperparam_pbar = tqdm(
51
- total=self.hyperparam_count,
52
- desc="Hyperparams",
53
- unit="config",
54
- position=1,
55
- leave=False,
56
- ncols=80,
57
- bar_format="{desc} {percentage:3.0f}%|{bar:40}| {n_fmt}/{total_fmt}",
78
+ self.completed_eval_runs += 1
79
+ if self.hyperparam_pbar:
80
+ self.hyperparam_pbar.update(1)
81
+
82
+ # Track how many runs completed for this dataset
83
+ self.completed_hyperparams_per_dataset[dataset_idx] = (
84
+ self.completed_hyperparams_per_dataset.get(dataset_idx, 0) + 1
58
85
  )
59
- try:
60
- yield self.hyperparam_pbar
61
- finally:
62
- self.hyperparam_pbar.close()
63
- self.hyperparam_pbar = None
64
86
 
65
- def update_hyperparam_progress(self) -> None:
66
- """Update the hyperparameter progress bar."""
87
+ # Check if this dataset is complete
88
+ if self.completed_hyperparams_per_dataset[dataset_idx] == self.hyperparam_count:
89
+ if self.dataset_pbar:
90
+ self.dataset_pbar.update(1)
91
+
92
+ def on_hyperparam_completed(self, dataset_idx: int) -> None:
93
+ """Update progress when a hyperparameter config completes in sequential mode."""
94
+ if self.parallel:
95
+ return
96
+
67
97
  if self.hyperparam_pbar:
68
98
  self.hyperparam_pbar.update(1)
69
99
 
100
+ # Track completed hyperparams for this dataset
101
+ self.completed_hyperparams_per_dataset[dataset_idx] = (
102
+ self.completed_hyperparams_per_dataset.get(dataset_idx, 0) + 1
103
+ )
104
+
105
+ # Check if this dataset is complete
106
+ if self.completed_hyperparams_per_dataset[dataset_idx] == self.hyperparam_count:
107
+ # Update dataset progress
108
+ if self.dataset_pbar:
109
+ self.dataset_pbar.update(1)
110
+
111
+ # Reset hyperparameter progress for next dataset (if any)
112
+ if dataset_idx < len(self.datasets) - 1:
113
+ if self.hyperparam_pbar:
114
+ self.hyperparam_pbar.reset()
115
+
116
+ def close_progress_bars(self) -> None:
117
+ """Close both progress bars."""
118
+ if self.hyperparam_pbar:
119
+ self.hyperparam_pbar.close()
120
+ self.hyperparam_pbar = None
121
+ if self.dataset_pbar:
122
+ self.dataset_pbar.close()
123
+ self.dataset_pbar = None
124
+
70
125
 
71
126
  @contextmanager
72
127
  def evaluation_progress(
73
- datasets: List[Any], hyperparam_count: int
128
+ datasets: List[Any], hyperparam_count: int, parallel: bool, total_eval_runs: int
74
129
  ) -> Generator[EvaluationProgressBars, None, None]:
75
130
  """Context manager for evaluation progress bars.
76
131
 
77
132
  Args:
78
133
  datasets: List of datasets being evaluated
79
134
  hyperparam_count: Number of hyperparameter configurations per dataset
135
+ parallel: Whether running in parallel mode
136
+ total_eval_runs: Total number of EvalRunSpecs
80
137
 
81
138
  Yields:
82
139
  EvaluationProgressBars: Progress bar manager instance
83
140
  """
84
- progress_bars = EvaluationProgressBars(datasets, hyperparam_count)
85
- progress_bars.start_dataset_progress()
141
+ progress_bars = EvaluationProgressBars(datasets, hyperparam_count, parallel, total_eval_runs)
142
+ progress_bars.start_progress_bars()
86
143
  try:
87
144
  yield progress_bars
88
145
  finally:
89
- progress_bars.close_dataset_progress()
146
+ progress_bars.close_progress_bars()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: scorebook
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: A Python project for LLM evaluation.
5
5
  Author: Euan Campbell
6
6
  Author-email: euan@trismik.com
@@ -11,16 +11,26 @@ Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
12
  Classifier: Programming Language :: Python :: 3.12
13
13
  Classifier: Programming Language :: Python :: 3.13
14
+ Provides-Extra: bedrock
14
15
  Provides-Extra: examples
15
16
  Provides-Extra: openai
16
17
  Provides-Extra: portkey
18
+ Provides-Extra: vertex
17
19
  Requires-Dist: accelerate ; extra == "examples"
20
+ Requires-Dist: boto3 (==1.40.0) ; extra == "bedrock"
18
21
  Requires-Dist: datasets (>=3.6.0)
22
+ Requires-Dist: fsspec[gcs] ; extra == "vertex"
23
+ Requires-Dist: google-cloud-storage ; extra == "vertex"
24
+ Requires-Dist: google-genai ; extra == "vertex"
25
+ Requires-Dist: notebook (>=7.4.5,<8.0.0)
19
26
  Requires-Dist: notebook ; extra == "examples"
20
27
  Requires-Dist: openai ; extra == "openai"
28
+ Requires-Dist: pandas ; extra == "vertex"
21
29
  Requires-Dist: portkey-ai ; extra == "portkey"
30
+ Requires-Dist: python-dotenv ; extra == "bedrock"
22
31
  Requires-Dist: python-dotenv ; extra == "openai"
23
32
  Requires-Dist: python-dotenv ; extra == "portkey"
33
+ Requires-Dist: python-dotenv ; extra == "vertex"
24
34
  Requires-Dist: torch ; extra == "examples"
25
35
  Requires-Dist: torchaudio ; extra == "examples"
26
36
  Requires-Dist: torchvision ; extra == "examples"
@@ -0,0 +1,31 @@
1
+ scorebook/__init__.py,sha256=7ac3KpXU3kKFekq8mZ3cVbF7oQ6Q9E-uqX7ijyte1Q0,406
2
+ scorebook/evaluator.py,sha256=mS3G3PI26nHzqkYX4tqusQZJL5Q1xTxzqshAdwscl0s,14170
3
+ scorebook/exceptions.py,sha256=emq2QY-4mW6VXlq1dxunPjt-xZpLQIxo8Ck_gYxz1VE,1827
4
+ scorebook/inference/__init__.py,sha256=tqSXSyVurc_YRfPypYed8iTH7Fwt7iFCXMxBXnqY-9I,242
5
+ scorebook/inference/bedrock.py,sha256=wllq0ysNFQKWJDEqoN-k96Jx43BHCAvfxm14zMRCf90,10074
6
+ scorebook/inference/openai.py,sha256=FqXua4v4PTYSHrdTm_9fM0Us8Mo2n2LSN94CwRipRw4,7658
7
+ scorebook/inference/portkey.py,sha256=OHSS-sa2aLxuO6fEfG8MsPlhXc_95_-6j7ImbCkY8KE,5952
8
+ scorebook/inference/vertex.py,sha256=jv_Nbt1NJQ6mMUyEuW_idxhj_3fugBojshtpGP9fMeY,9874
9
+ scorebook/metrics/__init__.py,sha256=be_riJNojebXw2xfkMsHHjl3HFKgk9jQWlLkXJHhheI,782
10
+ scorebook/metrics/accuracy.py,sha256=5KQ4hfOn9M94sB7WsXUelJWJiuKfoCGQEl5q5q9vNfo,1467
11
+ scorebook/metrics/metric_base.py,sha256=I3L0DGcRojFp93UGFnXG1tZ2UK9ilTcXXJG6lj5ddXA,857
12
+ scorebook/metrics/metric_registry.py,sha256=jWwt9P3zvtFLlEYrd60v7LS7X251nZczouE02zcCxWg,3402
13
+ scorebook/metrics/precision.py,sha256=AaYPYYKnY74Nwqp_p3jd2Ewf3VHNOJjoRWf5fhb-tXk,563
14
+ scorebook/types/__init__.py,sha256=dXY3Y-GiMipVExzVu7H5pbdFfg4HBMEKxqSTfENywSs,427
15
+ scorebook/types/eval_dataset.py,sha256=dCqOHjGaEb7pGG1VF4aGFn6hngFvlxpxddqsDtM4nTs,13870
16
+ scorebook/types/eval_result.py,sha256=R2zuWrx8p9_4A2W3Gmlp-xGgmelPdg8QB5PoV1hiqRc,4728
17
+ scorebook/types/eval_run_spec.py,sha256=nf7LGa_dG60Qb385W6O6qiu7VlJ03-dpo2X1PgKGcRQ,845
18
+ scorebook/types/inference_pipeline.py,sha256=-HcGGbwM34fGJ_FlXcyqj_pV6DjWIXRGgICiN_63UsU,3251
19
+ scorebook/utils/__init__.py,sha256=l_bfi9lAMz1oyGnuyKuzYasQKt2DJwffqsbfSl4-GIQ,452
20
+ scorebook/utils/async_utils.py,sha256=OeNvMrOT9P4rIyaCf5IbR3ZIFMtEzXgoAArNbINRtMU,728
21
+ scorebook/utils/build_prompt.py,sha256=L_Y84a1ewm3GvwnSSuUXfPO_M0QL1Dl8UgOS_l_zvh4,1617
22
+ scorebook/utils/io_helpers.py,sha256=ksOJ9ILcZqqt-HwRUYy1NMQbS6RuMh8i2ZzUADLMlQ8,913
23
+ scorebook/utils/jinja_helpers.py,sha256=ksIKHiKdj8N0o7ZJZGasfbSNoAY6K5d9X_KM6mcKYD4,4208
24
+ scorebook/utils/logging_utils.py,sha256=M4BXt369mJo037WYpvuWDoe3oGWVdHWaGo4Vbl0WDL0,60
25
+ scorebook/utils/mappers.py,sha256=OcUnPBrnSUxZNhAzJhVmVWUWmqIKFXLTrK-xLi6_SUg,1259
26
+ scorebook/utils/progress_bars.py,sha256=TBz41w3yFujsO9n8vUjeubgOrmdiAMI2P2SSVqTJzAA,5269
27
+ scorebook/utils/transform_helpers.py,sha256=UnVLtFvcJrtmBEmLsuA4rrX4iJlNUKxm2DkIOGLl-2o,1030
28
+ scorebook-0.0.3.dist-info/LICENSE,sha256=JLH1g9FhxHZf6CBCeQ_xAisPtICVObuNGW1bLPiTYEs,1068
29
+ scorebook-0.0.3.dist-info/METADATA,sha256=i0tLm4SNSiPTNEP8QU0ZjsfOqizw4uu3GWPVqdxrcso,11409
30
+ scorebook-0.0.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
31
+ scorebook-0.0.3.dist-info/RECORD,,
@@ -1,24 +0,0 @@
1
- scorebook/__init__.py,sha256=cYv8bT3_7o2MTxPVKiv51DcpaPtH_A9qOH5yF_FULZo,336
2
- scorebook/evaluator.py,sha256=Ce4KerLVPlaF63xng9RKH9M1l-ldo3mdrd3T2dBs_YE,8908
3
- scorebook/inference/__init__.py,sha256=sU_ZSN9eO7ajZ-QklNpx8_gf3jCdDn69J-SfU0z07-E,333
4
- scorebook/inference/openai.py,sha256=XD1dbPrEHQJVXOMtqCt9a0yQ-qR381N5XXhCrgz8jio,5826
5
- scorebook/inference/portkey.py,sha256=OHSS-sa2aLxuO6fEfG8MsPlhXc_95_-6j7ImbCkY8KE,5952
6
- scorebook/metrics/__init__.py,sha256=be_riJNojebXw2xfkMsHHjl3HFKgk9jQWlLkXJHhheI,782
7
- scorebook/metrics/accuracy.py,sha256=5KQ4hfOn9M94sB7WsXUelJWJiuKfoCGQEl5q5q9vNfo,1467
8
- scorebook/metrics/metric_base.py,sha256=I3L0DGcRojFp93UGFnXG1tZ2UK9ilTcXXJG6lj5ddXA,857
9
- scorebook/metrics/metric_registry.py,sha256=jWwt9P3zvtFLlEYrd60v7LS7X251nZczouE02zcCxWg,3402
10
- scorebook/metrics/precision.py,sha256=AaYPYYKnY74Nwqp_p3jd2Ewf3VHNOJjoRWf5fhb-tXk,563
11
- scorebook/types/__init__.py,sha256=xQMOae_fIBbeyeuqoa7SbNwjxAiVinPBbckOcUzo57U,358
12
- scorebook/types/eval_dataset.py,sha256=TeIeVHQ597NxedxaTEXohZO8gR5iAiDtJbCja_u69EI,11703
13
- scorebook/types/eval_result.py,sha256=y0vLN6RMgiz1lyai5ltmzDibBHE25-k9bTrQ7U27RZ8,4552
14
- scorebook/types/inference_pipeline.py,sha256=M3JgchpcVdhRJPzn3mh5ys6iivSt8eBmHIj4F5LcFYU,3167
15
- scorebook/utils/__init__.py,sha256=DmhS61OZ2nNWkGxDfVrMBwwiH7dmLAbg3MHuNgaXhQg,382
16
- scorebook/utils/async_utils.py,sha256=OeNvMrOT9P4rIyaCf5IbR3ZIFMtEzXgoAArNbINRtMU,728
17
- scorebook/utils/io_helpers.py,sha256=ksOJ9ILcZqqt-HwRUYy1NMQbS6RuMh8i2ZzUADLMlQ8,913
18
- scorebook/utils/mappers.py,sha256=OcUnPBrnSUxZNhAzJhVmVWUWmqIKFXLTrK-xLi6_SUg,1259
19
- scorebook/utils/progress_bars.py,sha256=BlKqYlXDbik5eUn5nf5f7QnMvnTj8CU_CfXKxCWp3Ww,2909
20
- scorebook/utils/transform_helpers.py,sha256=UnVLtFvcJrtmBEmLsuA4rrX4iJlNUKxm2DkIOGLl-2o,1030
21
- scorebook-0.0.1.dist-info/LICENSE,sha256=JLH1g9FhxHZf6CBCeQ_xAisPtICVObuNGW1bLPiTYEs,1068
22
- scorebook-0.0.1.dist-info/METADATA,sha256=oiwYbuJkRVkoFZkIAQej09LdG5xBLxhKPy2ozWTV-_w,10976
23
- scorebook-0.0.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
- scorebook-0.0.1.dist-info/RECORD,,