chronobench 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ """
2
+ Chronobench: a registry-driven framework for benchmarking machine learning experiments.
3
+ """
4
+
5
+ from ._main import main
6
+ from .context import Context
7
+ from .registry import data_loaders, metrics, models, runner
8
+
9
+ __all__ = [
10
+ "main",
11
+ "Context",
12
+ "data_loaders",
13
+ "models",
14
+ "metrics",
15
+ "runner",
16
+ ]
chronobench/_main.py ADDED
@@ -0,0 +1,61 @@
1
+ """
2
+ Entry point that dispatches CLI arguments to the appropriate execution mode.
3
+ """
4
+
5
+ from . import registry
6
+ from .modes.dry_run import main as main_dry_run
7
+ from .modes.run_many import main as main_run_many
8
+ from .utility.loading import load_environment, load_experiment_dict
9
+ from .utility.parsing_arguments import parse_args
10
+
11
+ __all__ = ["main"]
12
+
13
+
14
+ def main():
15
+ """
16
+ Run chronobench from the command line using the registered components.
17
+
18
+ This is the primary public API. Before calling it, register your data
19
+ loaders, models, and metrics on the module-level registries and mark your
20
+ training loop with :func:`chronobench.runner`::
21
+
22
+ import chronobench as cb
23
+ from src.loaders import IrisLoader
24
+
25
+ cb.data_loaders.register(iris=IrisLoader)
26
+
27
+ @cb.runner
28
+ def runner(data_loader, model, metrics):
29
+ ...
30
+
31
+ cb.main()
32
+
33
+ Experiment TOML entries select an implementation with either a registered
34
+ ``name`` or a dotted ``target`` import path, so stock library classes (e.g.
35
+ ``sklearn.svm.SVC``) can be used without a wrapper. ``main()`` reads the
36
+ registries and the registered runner directly; nothing is passed to it.
37
+ """
38
+ args = parse_args()
39
+ environment = load_environment(args.environment)
40
+ experiments = load_experiment_dict(environment, args.experiments)
41
+
42
+ if args.dry_run:
43
+ if args.debug:
44
+ print("Warning: --debug has no effect in dry-run mode.")
45
+ main_dry_run(
46
+ experiments=experiments,
47
+ data_loaders=registry.data_loaders,
48
+ models=registry.models,
49
+ metrics=registry.metrics,
50
+ environment=environment,
51
+ )
52
+ else:
53
+ main_run_many(
54
+ experiments=experiments,
55
+ debug=args.debug,
56
+ data_loaders=registry.data_loaders,
57
+ models=registry.models,
58
+ metrics=registry.metrics,
59
+ runner=registry.get_runner(),
60
+ environment=environment,
61
+ )
chronobench/context.py ADDED
@@ -0,0 +1,42 @@
1
+ """
2
+ Context dataclass passed to user-supplied initializer callbacks.
3
+ """
4
+
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass
9
+ class Context:
10
+ """
11
+ Cross-initializer context passed progressively to each callback.
12
+
13
+ The context is populated in stages as initialization proceeds:
14
+
15
+ - ``initialize_data_loaders`` receives ``Context()`` (all fields ``None``).
16
+ - ``initialize_models`` receives a ``Context`` with the data-loader fields set.
17
+ - ``initialize_metrics`` receives a fully populated ``Context``.
18
+
19
+ Attributes
20
+ ----------
21
+ data_loader : object, optional
22
+ The instantiated data loader for the current job.
23
+ data_loader_name : str, optional
24
+ The name of the current data loader as defined in the experiment
25
+ configuration.
26
+ data_loader_kwargs : dict, optional
27
+ The keyword arguments used to instantiate the current data loader.
28
+ model : object, optional
29
+ The instantiated model for the current job.
30
+ model_name : str, optional
31
+ The name of the current model as defined in the experiment
32
+ configuration.
33
+ model_kwargs : dict, optional
34
+ The keyword arguments used to instantiate the current model.
35
+ """
36
+
37
+ data_loader: object | None = None
38
+ data_loader_name: str | None = None
39
+ data_loader_kwargs: dict | None = None
40
+ model: object | None = None
41
+ model_name: str | None = None
42
+ model_kwargs: dict | None = None
@@ -0,0 +1,220 @@
1
+ """
2
+ Mode for validating experiment configurations without executing any jobs.
3
+
4
+ The dry-run report is the primary output of this mode, so it is written
5
+ directly to stdout with :func:`print` rather than through :mod:`logging`. This
6
+ means it is always visible without the caller having to configure logging.
7
+ """
8
+
9
+ import shutil
10
+
11
+ from chronobench.context import Context
12
+ from chronobench.registry import resolve
13
+ from chronobench.utility.config_validation import (
14
+ validate_environment,
15
+ validate_experiment,
16
+ )
17
+ from chronobench.utility.handling_kwargs import (
18
+ expand_kwargs_data_loaders,
19
+ expand_kwargs_metrics,
20
+ expand_kwargs_models,
21
+ )
22
+
23
+ __all__ = ["main"]
24
+
25
+ _PREFIX = "[dry-run] "
26
+
27
+
28
+ def main(
29
+ experiments,
30
+ data_loaders,
31
+ models,
32
+ metrics,
33
+ environment,
34
+ ):
35
+ """
36
+ Validate all experiment configurations and report any failures.
37
+
38
+ Checks the environment configuration and each experiment's TOML
39
+ structure, then attempts to instantiate every data loader, model
40
+ variant, and metrics list. A summary of passed and failed checks is
41
+ printed at the end. No jobs are executed and no results are written.
42
+
43
+ Parameters
44
+ ----------
45
+ experiments : dict
46
+ Mapping from experiment name to its parsed configuration dict.
47
+ data_loaders : Registry
48
+ Registry used to resolve data-loader ``name`` selectors.
49
+ models : Registry
50
+ Registry used to resolve model ``name`` selectors.
51
+ metrics : Registry
52
+ Registry used to resolve metric ``name`` selectors.
53
+ environment : dict
54
+ Loaded environment configuration.
55
+
56
+ Notes
57
+ -----
58
+ Initializers are called in order with a progressively populated
59
+ ``Context``, mirroring the staged approach used during a real run.
60
+ The first successful result from each stage is used to build the
61
+ context for the next stage, so cross-initializer dependencies are
62
+ exercised during dry-run validation.
63
+ """
64
+ _full_line("=")
65
+ failures = []
66
+
67
+ try:
68
+ validate_environment(environment)
69
+ except Exception as e:
70
+ print(f"{_PREFIX} FAILED Validating environment configuration")
71
+ failures.append(
72
+ {
73
+ "name": "Validation of environment",
74
+ "error": e,
75
+ }
76
+ )
77
+
78
+ for experiment_name, experiment in experiments.items():
79
+ _full_with_header(experiment_name, "=")
80
+
81
+ try:
82
+ validate_experiment(experiment_name, experiment)
83
+ except ValueError as e:
84
+ print(f"{_PREFIX} FAILED Validating experiment configuration")
85
+ failures.append(
86
+ {
87
+ "name": "Validation of toml file",
88
+ "error": e,
89
+ }
90
+ )
91
+ continue
92
+
93
+ print(f"{_PREFIX}>>> Data")
94
+ dl_results = _check_and_print(
95
+ environment,
96
+ expand_kwargs_data_loaders(experiment),
97
+ data_loaders,
98
+ experiment_name,
99
+ "data",
100
+ failures,
101
+ )
102
+
103
+ dl_ctx = (
104
+ Context(
105
+ data_loader=dl_results[0][2],
106
+ data_loader_name=dl_results[0][0],
107
+ data_loader_kwargs=dl_results[0][1],
108
+ )
109
+ if dl_results
110
+ else Context()
111
+ )
112
+
113
+ print(f"{_PREFIX}>>> Models")
114
+ model_results = _check_and_print(
115
+ environment,
116
+ expand_kwargs_models(experiment),
117
+ models,
118
+ experiment_name,
119
+ "models",
120
+ failures,
121
+ context=dl_ctx,
122
+ )
123
+
124
+ if dl_results and model_results:
125
+ metric_ctx = Context(
126
+ data_loader=dl_results[0][2],
127
+ data_loader_name=dl_results[0][0],
128
+ data_loader_kwargs=dl_results[0][1],
129
+ model=model_results[0][2],
130
+ model_name=model_results[0][0],
131
+ model_kwargs=model_results[0][1],
132
+ )
133
+ else:
134
+ metric_ctx = Context()
135
+
136
+ print(f"{_PREFIX}>>> Metrics")
137
+ _check_and_print(
138
+ environment,
139
+ expand_kwargs_metrics(experiment),
140
+ metrics,
141
+ experiment_name,
142
+ "metrics",
143
+ failures,
144
+ context=metric_ctx,
145
+ )
146
+
147
+ _print_summary(failures)
148
+
149
+
150
+ def _check_and_print(
151
+ environment,
152
+ pairs,
153
+ registry,
154
+ experiment_name,
155
+ selector_key,
156
+ failures,
157
+ context=None,
158
+ ):
159
+ if context is None:
160
+ context = Context()
161
+ results = []
162
+ defer = selector_key == "metrics"
163
+ for selector, kwargs in pairs:
164
+ try:
165
+ result = resolve(
166
+ registry,
167
+ selector,
168
+ environment,
169
+ context,
170
+ defer_if_incomplete=defer,
171
+ **kwargs,
172
+ )
173
+ instances = result if isinstance(result, list) else [result]
174
+ results.append((selector.label, kwargs, instances[0]))
175
+ print(f"{_PREFIX} PASSED {selector.label} {kwargs}")
176
+ except Exception as e:
177
+ print(f"{_PREFIX} FAILED {selector.label} {kwargs}")
178
+ failures.append(
179
+ {
180
+ "name": f"{experiment_name} / {selector_key} / {selector.label} {kwargs}",
181
+ "error": e,
182
+ }
183
+ )
184
+ return results
185
+
186
+
187
+ def _print_summary(failures):
188
+ _full_with_header("SUMMARY", "=")
189
+
190
+ if not failures:
191
+ print(f"{_PREFIX}All configurations passed successfully!")
192
+ print(_PREFIX)
193
+
194
+ _full_line("=")
195
+ return
196
+
197
+ print(f"{_PREFIX}{len(failures)} configuration(s) failed:")
198
+ for i, failure in enumerate(failures, start=1):
199
+ print(f"{_PREFIX} [{i}] {failure['name']}")
200
+ print(
201
+ f"{_PREFIX} {failure['error'].__class__.__name__}: {failure['error']}"
202
+ )
203
+
204
+ _full_line("=")
205
+
206
+
207
+ def _prefix_len() -> int:
208
+ return len(_PREFIX)
209
+
210
+
211
+ def _full_with_header(name: str, symbol: str):
212
+ width = shutil.get_terminal_size().columns - _prefix_len()
213
+ print(_PREFIX)
214
+ print(f"{_PREFIX}{symbol * 3 + ' ' + name + ' ':{symbol}<{width}}")
215
+ print(_PREFIX)
216
+
217
+
218
+ def _full_line(symbol: str):
219
+ width = shutil.get_terminal_size().columns - _prefix_len()
220
+ print(f"{_PREFIX}{symbol * width}")
@@ -0,0 +1,240 @@
1
+ """
2
+ Mode for executing a single experiment and saving results to CSV.
3
+ """
4
+
5
+ import csv
6
+ import logging
7
+ import multiprocessing
8
+ import pathlib
9
+ from datetime import datetime
10
+ from functools import partial
11
+ from typing import NamedTuple
12
+
13
+ try:
14
+ from tqdm import tqdm
15
+ except ImportError: # tqdm is an optional dependency
16
+
17
+ class tqdm: # type: ignore[no-redef] # noqa: N801
18
+ """No-op stand-in used when tqdm is not installed."""
19
+
20
+ def __init__(self, iterable=None, *args, **kwargs):
21
+ self._iterable = iterable if iterable is not None else []
22
+
23
+ def __iter__(self):
24
+ return iter(self._iterable)
25
+
26
+ def __enter__(self):
27
+ return self
28
+
29
+ def __exit__(self, *args):
30
+ return False
31
+
32
+ def update(self, n=1):
33
+ pass
34
+
35
+
36
+ from chronobench.context import Context
37
+ from chronobench.utility.handling_kwargs import (
38
+ initialize_all_data_loaders,
39
+ initialize_all_metrics,
40
+ initialize_all_models,
41
+ )
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ class Job(NamedTuple):
47
+ """
48
+ One unit of work: a single (data loader, model) combination.
49
+
50
+ Produced by the Cartesian product of all data loader variants and model
51
+ variants before execution begins.
52
+ """
53
+
54
+ loader_name: str
55
+ loader_kwargs: dict
56
+ data_loader: object
57
+ model_name: str
58
+ model_kwargs: dict
59
+ model: object
60
+
61
+
62
+ def main(
63
+ name,
64
+ experiment,
65
+ data_loaders,
66
+ models,
67
+ metrics,
68
+ runner,
69
+ debug,
70
+ environment,
71
+ ):
72
+ """
73
+ Execute all jobs for a single experiment and write results to CSV.
74
+
75
+ Jobs are the Cartesian product of all data loader variants and model
76
+ variants. When ``n-jobs > 1`` in the environment configuration, jobs
77
+ run in parallel via :class:`multiprocessing.Pool`. Results are
78
+ appended to a timestamped CSV file under ``path-to-results/raw/``.
79
+
80
+ Parameters
81
+ ----------
82
+ name : str
83
+ Experiment name, used as the output subdirectory.
84
+ experiment : dict
85
+ Parsed experiment configuration.
86
+ data_loaders : Registry
87
+ Registry used to resolve data-loader ``name`` selectors. A factory
88
+ that returns a list fans out into one variant per element.
89
+ models : Registry
90
+ Registry used to resolve model ``name`` selectors. Resolved once per
91
+ data loader so each job receives its own model instance and can read
92
+ from the data-loader context. A factory that returns a list fans out
93
+ into one variant per element.
94
+ metrics : Registry
95
+ Registry used to resolve metric ``name`` selectors. Resolved once per
96
+ job so each job receives fresh metric instances.
97
+ runner : callable
98
+ Callback with signature ``(data_loader, model, metrics)`` returning a
99
+ ``dict`` of computed metric values.
100
+ debug : bool
101
+ When ``True``, limit execution to the first three jobs.
102
+ environment : dict
103
+ Loaded environment configuration.
104
+ """
105
+ dataloaders = initialize_all_data_loaders(environment, experiment, data_loaders)
106
+
107
+ jobs = []
108
+ for loader_name, loader_kwargs, data_loader in dataloaders:
109
+ dl_context = Context(
110
+ data_loader=data_loader,
111
+ data_loader_name=loader_name,
112
+ data_loader_kwargs=loader_kwargs,
113
+ )
114
+ for model_name, model_kwargs, model in initialize_all_models(
115
+ environment, experiment, models, context=dl_context
116
+ ):
117
+ jobs.append(
118
+ Job(
119
+ loader_name,
120
+ loader_kwargs,
121
+ data_loader,
122
+ model_name,
123
+ model_kwargs,
124
+ model,
125
+ )
126
+ )
127
+ if debug:
128
+ jobs = jobs[:3]
129
+
130
+ if environment["n-jobs"] == 1:
131
+ result = [
132
+ _single_job(
133
+ job,
134
+ metrics=metrics,
135
+ environment=environment,
136
+ experiment=experiment,
137
+ runner=runner,
138
+ )
139
+ for job in tqdm(jobs)
140
+ ]
141
+
142
+ else:
143
+ _single_job_function = partial(
144
+ _single_job,
145
+ metrics=metrics,
146
+ environment=environment,
147
+ experiment=experiment,
148
+ runner=runner,
149
+ )
150
+
151
+ with multiprocessing.Pool(processes=environment["n-jobs"]) as pool:
152
+ with tqdm(total=len(jobs)) as pbar:
153
+ result = [
154
+ pool.apply_async(
155
+ _single_job_function,
156
+ args=(job,),
157
+ callback=lambda _: pbar.update(1),
158
+ )
159
+ for job in jobs
160
+ ]
161
+ pool.close()
162
+ pool.join()
163
+
164
+ result = [r.get() for r in result]
165
+
166
+ save_results(environment, name, debug, result)
167
+
168
+
169
+ def _single_job(job, metrics, environment, experiment, runner):
170
+ context = Context(
171
+ data_loader=job.data_loader,
172
+ data_loader_name=job.loader_name,
173
+ data_loader_kwargs=job.loader_kwargs,
174
+ model=job.model,
175
+ model_name=job.model_name,
176
+ model_kwargs=job.model_kwargs,
177
+ )
178
+ metric_instances = initialize_all_metrics(
179
+ environment, experiment, metrics, context=context
180
+ )
181
+ computed_metrics = runner(job.data_loader, job.model, metric_instances)
182
+
183
+ conflicting_keys = job.loader_kwargs.keys() & job.model_kwargs.keys()
184
+ return {
185
+ "Dataset": job.loader_name,
186
+ **{
187
+ f"Dataset.{k}" if k in conflicting_keys else k: v
188
+ for k, v in job.loader_kwargs.items()
189
+ },
190
+ "Model": job.model_name,
191
+ **{
192
+ f"Model.{k}" if k in conflicting_keys else k: v
193
+ for k, v in job.model_kwargs.items()
194
+ },
195
+ **computed_metrics,
196
+ }
197
+
198
+
199
+ def save_results(
200
+ environment,
201
+ name,
202
+ debug,
203
+ rows,
204
+ ) -> None:
205
+ """
206
+ Write experiment results to a timestamped CSV file.
207
+
208
+ The output directory ``path-to-results/raw/<name>/`` is created if it
209
+ does not exist. Column names are derived from the union of all keys
210
+ present in ``rows``. Missing values for any row are written as empty
211
+ strings.
212
+
213
+ Parameters
214
+ ----------
215
+ environment : dict
216
+ Loaded environment configuration. Must contain ``path-to-results``.
217
+ name : str
218
+ Experiment name, used as the output subdirectory under
219
+ ``path-to-results/raw/``.
220
+ debug : bool
221
+ When ``True``, the output filename is prefixed with ``DEBUG_``.
222
+ rows : list of dict
223
+ One dict per job; keys become CSV column headers.
224
+ """
225
+ base_path = pathlib.Path(environment["path-to-results"]).resolve() / "raw" / name
226
+ if not base_path.exists():
227
+ logger.info("Creating directory %s", base_path)
228
+ base_path.mkdir(parents=True)
229
+
230
+ results_path = (
231
+ base_path
232
+ / f"{'DEBUG_' if debug else ''}{datetime.now().strftime('%Y%m%d-%H%M%S')}.csv"
233
+ )
234
+
235
+ fieldnames = list(dict.fromkeys(k for row in rows for k in row.keys()))
236
+
237
+ with open(results_path, "w", newline="") as f:
238
+ writer = csv.DictWriter(f, fieldnames=fieldnames, restval="")
239
+ writer.writeheader()
240
+ writer.writerows(rows)
@@ -0,0 +1,51 @@
1
+ """
2
+ Mode for executing multiple experiments sequentially.
3
+ """
4
+
5
+ from chronobench.modes.run import main as main_run
6
+
7
+
8
+ def main(
9
+ experiments,
10
+ data_loaders,
11
+ models,
12
+ metrics,
13
+ runner,
14
+ debug,
15
+ environment,
16
+ ):
17
+ """
18
+ Execute each experiment in ``experiments`` in sequence.
19
+
20
+ Each experiment is delegated to :func:`chronobench.modes.run.main`.
21
+ Results are written to separate CSV files under ``path-to-results``.
22
+
23
+ Parameters
24
+ ----------
25
+ experiments : dict
26
+ Mapping from experiment name to its parsed configuration dict.
27
+ data_loaders : Registry
28
+ Registry used to resolve data-loader ``name`` selectors.
29
+ models : Registry
30
+ Registry used to resolve model ``name`` selectors.
31
+ metrics : Registry
32
+ Registry used to resolve metric ``name`` selectors.
33
+ runner : callable
34
+ Callback with signature ``(data_loader, model, metrics)`` returning a
35
+ ``dict`` of computed metric values.
36
+ debug : bool
37
+ When ``True``, limit each experiment to the first three jobs.
38
+ environment : dict
39
+ Loaded environment configuration.
40
+ """
41
+ for name, experiment in experiments.items():
42
+ main_run(
43
+ name=name,
44
+ experiment=experiment,
45
+ data_loaders=data_loaders,
46
+ models=models,
47
+ metrics=metrics,
48
+ runner=runner,
49
+ debug=debug,
50
+ environment=environment,
51
+ )