chronobench 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chronobench/__init__.py +16 -0
- chronobench/_main.py +61 -0
- chronobench/context.py +42 -0
- chronobench/modes/dry_run.py +220 -0
- chronobench/modes/run.py +240 -0
- chronobench/modes/run_many.py +51 -0
- chronobench/registry.py +346 -0
- chronobench/utility/config_validation.py +106 -0
- chronobench/utility/handling_kwargs.py +263 -0
- chronobench/utility/loading.py +175 -0
- chronobench/utility/parsing_arguments.py +53 -0
- chronobench-0.1.1.dist-info/METADATA +199 -0
- chronobench-0.1.1.dist-info/RECORD +16 -0
- chronobench-0.1.1.dist-info/WHEEL +5 -0
- chronobench-0.1.1.dist-info/licenses/LICENSE +19 -0
- chronobench-0.1.1.dist-info/top_level.txt +1 -0
chronobench/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Chronobench: a registry-driven framework for benchmarking machine learning experiments.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from ._main import main
|
|
6
|
+
from .context import Context
|
|
7
|
+
from .registry import data_loaders, metrics, models, runner
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"main",
|
|
11
|
+
"Context",
|
|
12
|
+
"data_loaders",
|
|
13
|
+
"models",
|
|
14
|
+
"metrics",
|
|
15
|
+
"runner",
|
|
16
|
+
]
|
chronobench/_main.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Entry point that dispatches CLI arguments to the appropriate execution mode.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from . import registry
|
|
6
|
+
from .modes.dry_run import main as main_dry_run
|
|
7
|
+
from .modes.run_many import main as main_run_many
|
|
8
|
+
from .utility.loading import load_environment, load_experiment_dict
|
|
9
|
+
from .utility.parsing_arguments import parse_args
|
|
10
|
+
|
|
11
|
+
__all__ = ["main"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def main():
|
|
15
|
+
"""
|
|
16
|
+
Run chronobench from the command line using the registered components.
|
|
17
|
+
|
|
18
|
+
This is the primary public API. Before calling it, register your data
|
|
19
|
+
loaders, models, and metrics on the module-level registries and mark your
|
|
20
|
+
training loop with :func:`chronobench.runner`::
|
|
21
|
+
|
|
22
|
+
import chronobench as cb
|
|
23
|
+
from src.loaders import IrisLoader
|
|
24
|
+
|
|
25
|
+
cb.data_loaders.register(iris=IrisLoader)
|
|
26
|
+
|
|
27
|
+
@cb.runner
|
|
28
|
+
def runner(data_loader, model, metrics):
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
cb.main()
|
|
32
|
+
|
|
33
|
+
Experiment TOML entries select an implementation with either a registered
|
|
34
|
+
``name`` or a dotted ``target`` import path, so stock library classes (e.g.
|
|
35
|
+
``sklearn.svm.SVC``) can be used without a wrapper. ``main()`` reads the
|
|
36
|
+
registries and the registered runner directly; nothing is passed to it.
|
|
37
|
+
"""
|
|
38
|
+
args = parse_args()
|
|
39
|
+
environment = load_environment(args.environment)
|
|
40
|
+
experiments = load_experiment_dict(environment, args.experiments)
|
|
41
|
+
|
|
42
|
+
if args.dry_run:
|
|
43
|
+
if args.debug:
|
|
44
|
+
print("Warning: --debug has no effect in dry-run mode.")
|
|
45
|
+
main_dry_run(
|
|
46
|
+
experiments=experiments,
|
|
47
|
+
data_loaders=registry.data_loaders,
|
|
48
|
+
models=registry.models,
|
|
49
|
+
metrics=registry.metrics,
|
|
50
|
+
environment=environment,
|
|
51
|
+
)
|
|
52
|
+
else:
|
|
53
|
+
main_run_many(
|
|
54
|
+
experiments=experiments,
|
|
55
|
+
debug=args.debug,
|
|
56
|
+
data_loaders=registry.data_loaders,
|
|
57
|
+
models=registry.models,
|
|
58
|
+
metrics=registry.metrics,
|
|
59
|
+
runner=registry.get_runner(),
|
|
60
|
+
environment=environment,
|
|
61
|
+
)
|
chronobench/context.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Context dataclass passed to user-supplied initializer callbacks.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class Context:
|
|
10
|
+
"""
|
|
11
|
+
Cross-initializer context passed progressively to each callback.
|
|
12
|
+
|
|
13
|
+
The context is populated in stages as initialization proceeds:
|
|
14
|
+
|
|
15
|
+
- ``initialize_data_loaders`` receives ``Context()`` (all fields ``None``).
|
|
16
|
+
- ``initialize_models`` receives a ``Context`` with the data-loader fields set.
|
|
17
|
+
- ``initialize_metrics`` receives a fully populated ``Context``.
|
|
18
|
+
|
|
19
|
+
Attributes
|
|
20
|
+
----------
|
|
21
|
+
data_loader : object, optional
|
|
22
|
+
The instantiated data loader for the current job.
|
|
23
|
+
data_loader_name : str, optional
|
|
24
|
+
The name of the current data loader as defined in the experiment
|
|
25
|
+
configuration.
|
|
26
|
+
data_loader_kwargs : dict, optional
|
|
27
|
+
The keyword arguments used to instantiate the current data loader.
|
|
28
|
+
model : object, optional
|
|
29
|
+
The instantiated model for the current job.
|
|
30
|
+
model_name : str, optional
|
|
31
|
+
The name of the current model as defined in the experiment
|
|
32
|
+
configuration.
|
|
33
|
+
model_kwargs : dict, optional
|
|
34
|
+
The keyword arguments used to instantiate the current model.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
data_loader: object | None = None
|
|
38
|
+
data_loader_name: str | None = None
|
|
39
|
+
data_loader_kwargs: dict | None = None
|
|
40
|
+
model: object | None = None
|
|
41
|
+
model_name: str | None = None
|
|
42
|
+
model_kwargs: dict | None = None
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Mode for validating experiment configurations without executing any jobs.
|
|
3
|
+
|
|
4
|
+
The dry-run report is the primary output of this mode, so it is written
|
|
5
|
+
directly to stdout with :func:`print` rather than through :mod:`logging`. This
|
|
6
|
+
means it is always visible without the caller having to configure logging.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import shutil
|
|
10
|
+
|
|
11
|
+
from chronobench.context import Context
|
|
12
|
+
from chronobench.registry import resolve
|
|
13
|
+
from chronobench.utility.config_validation import (
|
|
14
|
+
validate_environment,
|
|
15
|
+
validate_experiment,
|
|
16
|
+
)
|
|
17
|
+
from chronobench.utility.handling_kwargs import (
|
|
18
|
+
expand_kwargs_data_loaders,
|
|
19
|
+
expand_kwargs_metrics,
|
|
20
|
+
expand_kwargs_models,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
__all__ = ["main"]
|
|
24
|
+
|
|
25
|
+
_PREFIX = "[dry-run] "
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def main(
|
|
29
|
+
experiments,
|
|
30
|
+
data_loaders,
|
|
31
|
+
models,
|
|
32
|
+
metrics,
|
|
33
|
+
environment,
|
|
34
|
+
):
|
|
35
|
+
"""
|
|
36
|
+
Validate all experiment configurations and report any failures.
|
|
37
|
+
|
|
38
|
+
Checks the environment configuration and each experiment's TOML
|
|
39
|
+
structure, then attempts to instantiate every data loader, model
|
|
40
|
+
variant, and metrics list. A summary of passed and failed checks is
|
|
41
|
+
printed at the end. No jobs are executed and no results are written.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
experiments : dict
|
|
46
|
+
Mapping from experiment name to its parsed configuration dict.
|
|
47
|
+
data_loaders : Registry
|
|
48
|
+
Registry used to resolve data-loader ``name`` selectors.
|
|
49
|
+
models : Registry
|
|
50
|
+
Registry used to resolve model ``name`` selectors.
|
|
51
|
+
metrics : Registry
|
|
52
|
+
Registry used to resolve metric ``name`` selectors.
|
|
53
|
+
environment : dict
|
|
54
|
+
Loaded environment configuration.
|
|
55
|
+
|
|
56
|
+
Notes
|
|
57
|
+
-----
|
|
58
|
+
Initializers are called in order with a progressively populated
|
|
59
|
+
``Context``, mirroring the staged approach used during a real run.
|
|
60
|
+
The first successful result from each stage is used to build the
|
|
61
|
+
context for the next stage, so cross-initializer dependencies are
|
|
62
|
+
exercised during dry-run validation.
|
|
63
|
+
"""
|
|
64
|
+
_full_line("=")
|
|
65
|
+
failures = []
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
validate_environment(environment)
|
|
69
|
+
except Exception as e:
|
|
70
|
+
print(f"{_PREFIX} FAILED Validating environment configuration")
|
|
71
|
+
failures.append(
|
|
72
|
+
{
|
|
73
|
+
"name": "Validation of environment",
|
|
74
|
+
"error": e,
|
|
75
|
+
}
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
for experiment_name, experiment in experiments.items():
|
|
79
|
+
_full_with_header(experiment_name, "=")
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
validate_experiment(experiment_name, experiment)
|
|
83
|
+
except ValueError as e:
|
|
84
|
+
print(f"{_PREFIX} FAILED Validating experiment configuration")
|
|
85
|
+
failures.append(
|
|
86
|
+
{
|
|
87
|
+
"name": "Validation of toml file",
|
|
88
|
+
"error": e,
|
|
89
|
+
}
|
|
90
|
+
)
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
print(f"{_PREFIX}>>> Data")
|
|
94
|
+
dl_results = _check_and_print(
|
|
95
|
+
environment,
|
|
96
|
+
expand_kwargs_data_loaders(experiment),
|
|
97
|
+
data_loaders,
|
|
98
|
+
experiment_name,
|
|
99
|
+
"data",
|
|
100
|
+
failures,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
dl_ctx = (
|
|
104
|
+
Context(
|
|
105
|
+
data_loader=dl_results[0][2],
|
|
106
|
+
data_loader_name=dl_results[0][0],
|
|
107
|
+
data_loader_kwargs=dl_results[0][1],
|
|
108
|
+
)
|
|
109
|
+
if dl_results
|
|
110
|
+
else Context()
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
print(f"{_PREFIX}>>> Models")
|
|
114
|
+
model_results = _check_and_print(
|
|
115
|
+
environment,
|
|
116
|
+
expand_kwargs_models(experiment),
|
|
117
|
+
models,
|
|
118
|
+
experiment_name,
|
|
119
|
+
"models",
|
|
120
|
+
failures,
|
|
121
|
+
context=dl_ctx,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
if dl_results and model_results:
|
|
125
|
+
metric_ctx = Context(
|
|
126
|
+
data_loader=dl_results[0][2],
|
|
127
|
+
data_loader_name=dl_results[0][0],
|
|
128
|
+
data_loader_kwargs=dl_results[0][1],
|
|
129
|
+
model=model_results[0][2],
|
|
130
|
+
model_name=model_results[0][0],
|
|
131
|
+
model_kwargs=model_results[0][1],
|
|
132
|
+
)
|
|
133
|
+
else:
|
|
134
|
+
metric_ctx = Context()
|
|
135
|
+
|
|
136
|
+
print(f"{_PREFIX}>>> Metrics")
|
|
137
|
+
_check_and_print(
|
|
138
|
+
environment,
|
|
139
|
+
expand_kwargs_metrics(experiment),
|
|
140
|
+
metrics,
|
|
141
|
+
experiment_name,
|
|
142
|
+
"metrics",
|
|
143
|
+
failures,
|
|
144
|
+
context=metric_ctx,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
_print_summary(failures)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _check_and_print(
|
|
151
|
+
environment,
|
|
152
|
+
pairs,
|
|
153
|
+
registry,
|
|
154
|
+
experiment_name,
|
|
155
|
+
selector_key,
|
|
156
|
+
failures,
|
|
157
|
+
context=None,
|
|
158
|
+
):
|
|
159
|
+
if context is None:
|
|
160
|
+
context = Context()
|
|
161
|
+
results = []
|
|
162
|
+
defer = selector_key == "metrics"
|
|
163
|
+
for selector, kwargs in pairs:
|
|
164
|
+
try:
|
|
165
|
+
result = resolve(
|
|
166
|
+
registry,
|
|
167
|
+
selector,
|
|
168
|
+
environment,
|
|
169
|
+
context,
|
|
170
|
+
defer_if_incomplete=defer,
|
|
171
|
+
**kwargs,
|
|
172
|
+
)
|
|
173
|
+
instances = result if isinstance(result, list) else [result]
|
|
174
|
+
results.append((selector.label, kwargs, instances[0]))
|
|
175
|
+
print(f"{_PREFIX} PASSED {selector.label} {kwargs}")
|
|
176
|
+
except Exception as e:
|
|
177
|
+
print(f"{_PREFIX} FAILED {selector.label} {kwargs}")
|
|
178
|
+
failures.append(
|
|
179
|
+
{
|
|
180
|
+
"name": f"{experiment_name} / {selector_key} / {selector.label} {kwargs}",
|
|
181
|
+
"error": e,
|
|
182
|
+
}
|
|
183
|
+
)
|
|
184
|
+
return results
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _print_summary(failures):
|
|
188
|
+
_full_with_header("SUMMARY", "=")
|
|
189
|
+
|
|
190
|
+
if not failures:
|
|
191
|
+
print(f"{_PREFIX}All configurations passed successfully!")
|
|
192
|
+
print(_PREFIX)
|
|
193
|
+
|
|
194
|
+
_full_line("=")
|
|
195
|
+
return
|
|
196
|
+
|
|
197
|
+
print(f"{_PREFIX}{len(failures)} configuration(s) failed:")
|
|
198
|
+
for i, failure in enumerate(failures, start=1):
|
|
199
|
+
print(f"{_PREFIX} [{i}] {failure['name']}")
|
|
200
|
+
print(
|
|
201
|
+
f"{_PREFIX} {failure['error'].__class__.__name__}: {failure['error']}"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
_full_line("=")
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _prefix_len() -> int:
|
|
208
|
+
return len(_PREFIX)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _full_with_header(name: str, symbol: str):
|
|
212
|
+
width = shutil.get_terminal_size().columns - _prefix_len()
|
|
213
|
+
print(_PREFIX)
|
|
214
|
+
print(f"{_PREFIX}{symbol * 3 + ' ' + name + ' ':{symbol}<{width}}")
|
|
215
|
+
print(_PREFIX)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _full_line(symbol: str):
|
|
219
|
+
width = shutil.get_terminal_size().columns - _prefix_len()
|
|
220
|
+
print(f"{_PREFIX}{symbol * width}")
|
chronobench/modes/run.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Mode for executing a single experiment and saving results to CSV.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import csv
|
|
6
|
+
import logging
|
|
7
|
+
import multiprocessing
|
|
8
|
+
import pathlib
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from functools import partial
|
|
11
|
+
from typing import NamedTuple
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from tqdm import tqdm
|
|
15
|
+
except ImportError: # tqdm is an optional dependency
|
|
16
|
+
|
|
17
|
+
class tqdm: # type: ignore[no-redef] # noqa: N801
|
|
18
|
+
"""No-op stand-in used when tqdm is not installed."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, iterable=None, *args, **kwargs):
|
|
21
|
+
self._iterable = iterable if iterable is not None else []
|
|
22
|
+
|
|
23
|
+
def __iter__(self):
|
|
24
|
+
return iter(self._iterable)
|
|
25
|
+
|
|
26
|
+
def __enter__(self):
|
|
27
|
+
return self
|
|
28
|
+
|
|
29
|
+
def __exit__(self, *args):
|
|
30
|
+
return False
|
|
31
|
+
|
|
32
|
+
def update(self, n=1):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
from chronobench.context import Context
|
|
37
|
+
from chronobench.utility.handling_kwargs import (
|
|
38
|
+
initialize_all_data_loaders,
|
|
39
|
+
initialize_all_metrics,
|
|
40
|
+
initialize_all_models,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class Job(NamedTuple):
|
|
47
|
+
"""
|
|
48
|
+
One unit of work: a single (data loader, model) combination.
|
|
49
|
+
|
|
50
|
+
Produced by the Cartesian product of all data loader variants and model
|
|
51
|
+
variants before execution begins.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
loader_name: str
|
|
55
|
+
loader_kwargs: dict
|
|
56
|
+
data_loader: object
|
|
57
|
+
model_name: str
|
|
58
|
+
model_kwargs: dict
|
|
59
|
+
model: object
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def main(
|
|
63
|
+
name,
|
|
64
|
+
experiment,
|
|
65
|
+
data_loaders,
|
|
66
|
+
models,
|
|
67
|
+
metrics,
|
|
68
|
+
runner,
|
|
69
|
+
debug,
|
|
70
|
+
environment,
|
|
71
|
+
):
|
|
72
|
+
"""
|
|
73
|
+
Execute all jobs for a single experiment and write results to CSV.
|
|
74
|
+
|
|
75
|
+
Jobs are the Cartesian product of all data loader variants and model
|
|
76
|
+
variants. When ``n-jobs > 1`` in the environment configuration, jobs
|
|
77
|
+
run in parallel via :class:`multiprocessing.Pool`. Results are
|
|
78
|
+
appended to a timestamped CSV file under ``path-to-results/raw/``.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
name : str
|
|
83
|
+
Experiment name, used as the output subdirectory.
|
|
84
|
+
experiment : dict
|
|
85
|
+
Parsed experiment configuration.
|
|
86
|
+
data_loaders : Registry
|
|
87
|
+
Registry used to resolve data-loader ``name`` selectors. A factory
|
|
88
|
+
that returns a list fans out into one variant per element.
|
|
89
|
+
models : Registry
|
|
90
|
+
Registry used to resolve model ``name`` selectors. Resolved once per
|
|
91
|
+
data loader so each job receives its own model instance and can read
|
|
92
|
+
from the data-loader context. A factory that returns a list fans out
|
|
93
|
+
into one variant per element.
|
|
94
|
+
metrics : Registry
|
|
95
|
+
Registry used to resolve metric ``name`` selectors. Resolved once per
|
|
96
|
+
job so each job receives fresh metric instances.
|
|
97
|
+
runner : callable
|
|
98
|
+
Callback with signature ``(data_loader, model, metrics)`` returning a
|
|
99
|
+
``dict`` of computed metric values.
|
|
100
|
+
debug : bool
|
|
101
|
+
When ``True``, limit execution to the first three jobs.
|
|
102
|
+
environment : dict
|
|
103
|
+
Loaded environment configuration.
|
|
104
|
+
"""
|
|
105
|
+
dataloaders = initialize_all_data_loaders(environment, experiment, data_loaders)
|
|
106
|
+
|
|
107
|
+
jobs = []
|
|
108
|
+
for loader_name, loader_kwargs, data_loader in dataloaders:
|
|
109
|
+
dl_context = Context(
|
|
110
|
+
data_loader=data_loader,
|
|
111
|
+
data_loader_name=loader_name,
|
|
112
|
+
data_loader_kwargs=loader_kwargs,
|
|
113
|
+
)
|
|
114
|
+
for model_name, model_kwargs, model in initialize_all_models(
|
|
115
|
+
environment, experiment, models, context=dl_context
|
|
116
|
+
):
|
|
117
|
+
jobs.append(
|
|
118
|
+
Job(
|
|
119
|
+
loader_name,
|
|
120
|
+
loader_kwargs,
|
|
121
|
+
data_loader,
|
|
122
|
+
model_name,
|
|
123
|
+
model_kwargs,
|
|
124
|
+
model,
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
if debug:
|
|
128
|
+
jobs = jobs[:3]
|
|
129
|
+
|
|
130
|
+
if environment["n-jobs"] == 1:
|
|
131
|
+
result = [
|
|
132
|
+
_single_job(
|
|
133
|
+
job,
|
|
134
|
+
metrics=metrics,
|
|
135
|
+
environment=environment,
|
|
136
|
+
experiment=experiment,
|
|
137
|
+
runner=runner,
|
|
138
|
+
)
|
|
139
|
+
for job in tqdm(jobs)
|
|
140
|
+
]
|
|
141
|
+
|
|
142
|
+
else:
|
|
143
|
+
_single_job_function = partial(
|
|
144
|
+
_single_job,
|
|
145
|
+
metrics=metrics,
|
|
146
|
+
environment=environment,
|
|
147
|
+
experiment=experiment,
|
|
148
|
+
runner=runner,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
with multiprocessing.Pool(processes=environment["n-jobs"]) as pool:
|
|
152
|
+
with tqdm(total=len(jobs)) as pbar:
|
|
153
|
+
result = [
|
|
154
|
+
pool.apply_async(
|
|
155
|
+
_single_job_function,
|
|
156
|
+
args=(job,),
|
|
157
|
+
callback=lambda _: pbar.update(1),
|
|
158
|
+
)
|
|
159
|
+
for job in jobs
|
|
160
|
+
]
|
|
161
|
+
pool.close()
|
|
162
|
+
pool.join()
|
|
163
|
+
|
|
164
|
+
result = [r.get() for r in result]
|
|
165
|
+
|
|
166
|
+
save_results(environment, name, debug, result)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _single_job(job, metrics, environment, experiment, runner):
|
|
170
|
+
context = Context(
|
|
171
|
+
data_loader=job.data_loader,
|
|
172
|
+
data_loader_name=job.loader_name,
|
|
173
|
+
data_loader_kwargs=job.loader_kwargs,
|
|
174
|
+
model=job.model,
|
|
175
|
+
model_name=job.model_name,
|
|
176
|
+
model_kwargs=job.model_kwargs,
|
|
177
|
+
)
|
|
178
|
+
metric_instances = initialize_all_metrics(
|
|
179
|
+
environment, experiment, metrics, context=context
|
|
180
|
+
)
|
|
181
|
+
computed_metrics = runner(job.data_loader, job.model, metric_instances)
|
|
182
|
+
|
|
183
|
+
conflicting_keys = job.loader_kwargs.keys() & job.model_kwargs.keys()
|
|
184
|
+
return {
|
|
185
|
+
"Dataset": job.loader_name,
|
|
186
|
+
**{
|
|
187
|
+
f"Dataset.{k}" if k in conflicting_keys else k: v
|
|
188
|
+
for k, v in job.loader_kwargs.items()
|
|
189
|
+
},
|
|
190
|
+
"Model": job.model_name,
|
|
191
|
+
**{
|
|
192
|
+
f"Model.{k}" if k in conflicting_keys else k: v
|
|
193
|
+
for k, v in job.model_kwargs.items()
|
|
194
|
+
},
|
|
195
|
+
**computed_metrics,
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def save_results(
|
|
200
|
+
environment,
|
|
201
|
+
name,
|
|
202
|
+
debug,
|
|
203
|
+
rows,
|
|
204
|
+
) -> None:
|
|
205
|
+
"""
|
|
206
|
+
Write experiment results to a timestamped CSV file.
|
|
207
|
+
|
|
208
|
+
The output directory ``path-to-results/raw/<name>/`` is created if it
|
|
209
|
+
does not exist. Column names are derived from the union of all keys
|
|
210
|
+
present in ``rows``. Missing values for any row are written as empty
|
|
211
|
+
strings.
|
|
212
|
+
|
|
213
|
+
Parameters
|
|
214
|
+
----------
|
|
215
|
+
environment : dict
|
|
216
|
+
Loaded environment configuration. Must contain ``path-to-results``.
|
|
217
|
+
name : str
|
|
218
|
+
Experiment name, used as the output subdirectory under
|
|
219
|
+
``path-to-results/raw/``.
|
|
220
|
+
debug : bool
|
|
221
|
+
When ``True``, the output filename is prefixed with ``DEBUG_``.
|
|
222
|
+
rows : list of dict
|
|
223
|
+
One dict per job; keys become CSV column headers.
|
|
224
|
+
"""
|
|
225
|
+
base_path = pathlib.Path(environment["path-to-results"]).resolve() / "raw" / name
|
|
226
|
+
if not base_path.exists():
|
|
227
|
+
logger.info("Creating directory %s", base_path)
|
|
228
|
+
base_path.mkdir(parents=True)
|
|
229
|
+
|
|
230
|
+
results_path = (
|
|
231
|
+
base_path
|
|
232
|
+
/ f"{'DEBUG_' if debug else ''}{datetime.now().strftime('%Y%m%d-%H%M%S')}.csv"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
fieldnames = list(dict.fromkeys(k for row in rows for k in row.keys()))
|
|
236
|
+
|
|
237
|
+
with open(results_path, "w", newline="") as f:
|
|
238
|
+
writer = csv.DictWriter(f, fieldnames=fieldnames, restval="")
|
|
239
|
+
writer.writeheader()
|
|
240
|
+
writer.writerows(rows)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Mode for executing multiple experiments sequentially.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from chronobench.modes.run import main as main_run
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def main(
|
|
9
|
+
experiments,
|
|
10
|
+
data_loaders,
|
|
11
|
+
models,
|
|
12
|
+
metrics,
|
|
13
|
+
runner,
|
|
14
|
+
debug,
|
|
15
|
+
environment,
|
|
16
|
+
):
|
|
17
|
+
"""
|
|
18
|
+
Execute each experiment in ``experiments`` in sequence.
|
|
19
|
+
|
|
20
|
+
Each experiment is delegated to :func:`chronobench.modes.run.main`.
|
|
21
|
+
Results are written to separate CSV files under ``path-to-results``.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
experiments : dict
|
|
26
|
+
Mapping from experiment name to its parsed configuration dict.
|
|
27
|
+
data_loaders : Registry
|
|
28
|
+
Registry used to resolve data-loader ``name`` selectors.
|
|
29
|
+
models : Registry
|
|
30
|
+
Registry used to resolve model ``name`` selectors.
|
|
31
|
+
metrics : Registry
|
|
32
|
+
Registry used to resolve metric ``name`` selectors.
|
|
33
|
+
runner : callable
|
|
34
|
+
Callback with signature ``(data_loader, model, metrics)`` returning a
|
|
35
|
+
``dict`` of computed metric values.
|
|
36
|
+
debug : bool
|
|
37
|
+
When ``True``, limit each experiment to the first three jobs.
|
|
38
|
+
environment : dict
|
|
39
|
+
Loaded environment configuration.
|
|
40
|
+
"""
|
|
41
|
+
for name, experiment in experiments.items():
|
|
42
|
+
main_run(
|
|
43
|
+
name=name,
|
|
44
|
+
experiment=experiment,
|
|
45
|
+
data_loaders=data_loaders,
|
|
46
|
+
models=models,
|
|
47
|
+
metrics=metrics,
|
|
48
|
+
runner=runner,
|
|
49
|
+
debug=debug,
|
|
50
|
+
environment=environment,
|
|
51
|
+
)
|