pynnlf 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pynnlf/__about__.py +1 -0
- pynnlf/__init__.py +5 -0
- pynnlf/api.py +17 -0
- pynnlf/discovery.py +63 -0
- pynnlf/engine.py +1238 -0
- pynnlf/hyperparams.py +38 -0
- pynnlf/model_utils.py +186 -0
- pynnlf/runner.py +108 -0
- pynnlf/scaffold/README_WORKSPACE.md +0 -0
- pynnlf/scaffold/data/README_data.md +40 -0
- pynnlf/scaffold/data/ds0_test.csv +4081 -0
- pynnlf/scaffold/models/README_models.md +61 -0
- pynnlf/scaffold/models/hyperparameters.yaml +264 -0
- pynnlf/scaffold/models/m10_rf.py +65 -0
- pynnlf/scaffold/models/m11_svr.py +53 -0
- pynnlf/scaffold/models/m12_rnn.py +152 -0
- pynnlf/scaffold/models/m13_lstm.py +208 -0
- pynnlf/scaffold/models/m14_gru.py +139 -0
- pynnlf/scaffold/models/m15_transformer.py +138 -0
- pynnlf/scaffold/models/m16_prophet.py +216 -0
- pynnlf/scaffold/models/m17_xgb.py +66 -0
- pynnlf/scaffold/models/m18_nbeats.py +107 -0
- pynnlf/scaffold/models/m1_naive.py +49 -0
- pynnlf/scaffold/models/m2_snaive.py +49 -0
- pynnlf/scaffold/models/m3_ets.py +133 -0
- pynnlf/scaffold/models/m4_arima.py +123 -0
- pynnlf/scaffold/models/m5_sarima.py +128 -0
- pynnlf/scaffold/models/m6_lr.py +76 -0
- pynnlf/scaffold/models/m7_ann.py +148 -0
- pynnlf/scaffold/models/m8_dnn.py +141 -0
- pynnlf/scaffold/models/m9_rt.py +74 -0
- pynnlf/scaffold/models/mXX_template.py +68 -0
- pynnlf/scaffold/specs/batch.yaml +4 -0
- pynnlf/scaffold/specs/experiment.yaml +4 -0
- pynnlf/scaffold/specs/pynnlf_config.yaml +69 -0
- pynnlf/scaffold/specs/testing_benchmark.csv +613 -0
- pynnlf/scaffold/specs/testing_benchmark_metadata.md +12 -0
- pynnlf/scaffold/specs/tests_ci.yaml +8 -0
- pynnlf/scaffold/specs/tests_full.yaml +23 -0
- pynnlf/tests_runner.py +211 -0
- pynnlf/tools/strip_notebook_artifacts.py +32 -0
- pynnlf/workspace.py +63 -0
- pynnlf/yamlio.py +28 -0
- pynnlf-0.2.2.dist-info/METADATA +168 -0
- pynnlf-0.2.2.dist-info/RECORD +47 -0
- pynnlf-0.2.2.dist-info/WHEEL +5 -0
- pynnlf-0.2.2.dist-info/top_level.txt +1 -0
pynnlf/hyperparams.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
|
|
4
|
+
from typing import Any
|
|
5
|
+
from .yamlio import load_yaml
|
|
6
|
+
|
|
7
|
+
def load_hyperparameters(path) -> dict[str, Any]:
|
|
8
|
+
"""
|
|
9
|
+
Load hyperparameters from YAML.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
path (str | Path): Path to models/hyperparameters.yaml.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
dict: {model_name: {hp_no: hyperparameter_dict}}
|
|
16
|
+
"""
|
|
17
|
+
return load_yaml(path)
|
|
18
|
+
|
|
19
|
+
def get_hp(hparams: dict[str, Any], model_name: str, hp_no: str) -> dict[str, Any]:
|
|
20
|
+
"""
|
|
21
|
+
Get hyperparameter dict for a specific model and hp_no.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
hparams (dict): Output of load_hyperparameters().
|
|
25
|
+
model_name (str): e.g., "m6_lr" (model file stem)
|
|
26
|
+
hp_no (str): e.g., "hp1"
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
dict: Hyperparameter dict for the model/hp.
|
|
30
|
+
"""
|
|
31
|
+
if model_name not in hparams:
|
|
32
|
+
raise KeyError(f"Model '{model_name}' not found in hyperparameters.yaml")
|
|
33
|
+
if hp_no not in hparams[model_name]:
|
|
34
|
+
raise KeyError(f"HP '{hp_no}' not found for model '{model_name}'")
|
|
35
|
+
hp = hparams[model_name][hp_no]
|
|
36
|
+
if not isinstance(hp, dict):
|
|
37
|
+
raise TypeError("Hyperparameter entry must be a mapping/dict")
|
|
38
|
+
return hp
|
pynnlf/model_utils.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
from .hyperparams import load_hyperparameters, get_hp
|
|
8
|
+
from .engine import run_experiment_engine
|
|
9
|
+
|
|
10
|
+
def _load_json(path: Path) -> dict:
|
|
11
|
+
"""
|
|
12
|
+
Load JSON file into dict (legacy JSON workflow).
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
path (Path): JSON file path.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
dict: Parsed JSON.
|
|
19
|
+
"""
|
|
20
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
21
|
+
|
|
22
|
+
def _workspace_root_from_spec(spec_path: Path) -> Path:
|
|
23
|
+
"""
|
|
24
|
+
Infer workspace root from a spec path (legacy JSON workflow).
|
|
25
|
+
|
|
26
|
+
Assumes structure:
|
|
27
|
+
<workspace>/specs/experiment.json
|
|
28
|
+
<workspace>/specs/batch.json
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
spec_path (Path): Path to spec JSON.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Path: Workspace root directory.
|
|
35
|
+
"""
|
|
36
|
+
return spec_path.parent.parent
|
|
37
|
+
|
|
38
|
+
def run_single(spec_path: str | Path) -> None:
|
|
39
|
+
"""
|
|
40
|
+
Run a single experiment from a 4-key JSON spec (legacy workflow).
|
|
41
|
+
|
|
42
|
+
Spec contains only:
|
|
43
|
+
dataset, forecast_horizon, model, hyperparameter
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
spec_path (str | Path): Path to <workspace>/specs/experiment.json
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
None
|
|
50
|
+
"""
|
|
51
|
+
spec_path = Path(spec_path)
|
|
52
|
+
ws = _workspace_root_from_spec(spec_path)
|
|
53
|
+
|
|
54
|
+
spec = _load_json(spec_path)
|
|
55
|
+
cfg = _load_json(ws / "specs" / "pynnlf_config.json")
|
|
56
|
+
|
|
57
|
+
ds_id = spec["dataset"]
|
|
58
|
+
fh_id = spec["forecast_horizon"]
|
|
59
|
+
m_id = spec["model"]
|
|
60
|
+
hp_no = spec["hyperparameter"]
|
|
61
|
+
|
|
62
|
+
data_dir = ws / cfg["paths"]["data_dir"]
|
|
63
|
+
out_dir = ws / cfg["paths"]["output_dir"]
|
|
64
|
+
hp_path = ws / cfg["paths"]["hyperparameters_path"] # models/hyperparameters.json
|
|
65
|
+
|
|
66
|
+
dataset_file = cfg["datasets"][ds_id]
|
|
67
|
+
fh_min = int(cfg["forecast_horizons"][fh_id])
|
|
68
|
+
model_name = cfg["models"][m_id]
|
|
69
|
+
|
|
70
|
+
dataset_path = data_dir / dataset_file
|
|
71
|
+
models_dir = ws / "models"
|
|
72
|
+
|
|
73
|
+
hparams = load_hyperparameters(hp_path)
|
|
74
|
+
hp = get_hp(hparams, model_name, hp_no)
|
|
75
|
+
|
|
76
|
+
run_experiment_engine(
|
|
77
|
+
dataset_path=dataset_path,
|
|
78
|
+
forecast_horizon_min=fh_min,
|
|
79
|
+
model_name=model_name,
|
|
80
|
+
hyperparameter_no=hp_no,
|
|
81
|
+
hyperparameter=hp,
|
|
82
|
+
output_dir=out_dir,
|
|
83
|
+
models_dir=models_dir,
|
|
84
|
+
config=cfg,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def run_batch(spec_path: str | Path) -> None:
|
|
88
|
+
"""
|
|
89
|
+
Run batch experiments from a batch JSON spec (legacy workflow).
|
|
90
|
+
|
|
91
|
+
Batch spec contains:
|
|
92
|
+
datasets: [dsX...]
|
|
93
|
+
forecast_horizons: [fhX...]
|
|
94
|
+
model_and_hp: [[mX, hpY], ...]
|
|
95
|
+
|
|
96
|
+
Runs all combinations:
|
|
97
|
+
datasets × forecast_horizons × model_and_hp
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
spec_path (str | Path): Path to <workspace>/specs/batch.json
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
None
|
|
104
|
+
"""
|
|
105
|
+
spec_path = Path(spec_path)
|
|
106
|
+
ws = _workspace_root_from_spec(spec_path)
|
|
107
|
+
|
|
108
|
+
batch = _load_json(spec_path)
|
|
109
|
+
cfg = _load_json(ws / "specs" / "pynnlf_config.json")
|
|
110
|
+
|
|
111
|
+
data_dir = ws / cfg["paths"]["data_dir"]
|
|
112
|
+
out_dir = ws / cfg["paths"]["output_dir"]
|
|
113
|
+
hp_path = ws / cfg["paths"]["hyperparameters_path"]
|
|
114
|
+
models_dir = ws / "models"
|
|
115
|
+
|
|
116
|
+
hparams = load_hyperparameters(hp_path)
|
|
117
|
+
|
|
118
|
+
ds_files = [cfg["datasets"][d] for d in batch["datasets"]]
|
|
119
|
+
fh_mins = [int(cfg["forecast_horizons"][h]) for h in batch["forecast_horizons"]]
|
|
120
|
+
model_and_hp = [(cfg["models"][m], hp) for (m, hp) in batch["model_and_hp"]]
|
|
121
|
+
|
|
122
|
+
for ds_file in ds_files:
|
|
123
|
+
for fh_min in fh_mins:
|
|
124
|
+
for model_name, hp_no in model_and_hp:
|
|
125
|
+
dataset_path = data_dir / ds_file
|
|
126
|
+
hp = get_hp(hparams, model_name, hp_no)
|
|
127
|
+
|
|
128
|
+
run_experiment_engine(
|
|
129
|
+
dataset_path=dataset_path,
|
|
130
|
+
forecast_horizon_min=fh_min,
|
|
131
|
+
model_name=model_name,
|
|
132
|
+
hyperparameter_no=hp_no,
|
|
133
|
+
hyperparameter=hp,
|
|
134
|
+
output_dir=out_dir,
|
|
135
|
+
models_dir=models_dir,
|
|
136
|
+
config=cfg,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# transform below scripts into function with input train_df_y and output train_df_y_updated
|
|
140
|
+
def remove_jump_df(train_df_y):
|
|
141
|
+
#make docstring with the same format like other cells
|
|
142
|
+
"""
|
|
143
|
+
Remove jump in the time series data
|
|
144
|
+
Parameters:
|
|
145
|
+
train_df_y (pd.Series): Time series data
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
train_df_y_updated (pd.Series): Time series data with jump removed
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
time_diff = train_df_y.index.to_series().diff().dt.total_seconds()
|
|
152
|
+
initial_freq = time_diff.iloc[1]
|
|
153
|
+
jump_indices = time_diff[time_diff > initial_freq].index
|
|
154
|
+
if not jump_indices.empty:
|
|
155
|
+
jump_index = jump_indices[0]
|
|
156
|
+
jump_pos = train_df_y.index.get_loc(jump_index)
|
|
157
|
+
train_df_y_updated = train_df_y.iloc[:jump_pos]
|
|
158
|
+
else:
|
|
159
|
+
train_df_y_updated = train_df_y
|
|
160
|
+
return train_df_y_updated
|
|
161
|
+
|
|
162
|
+
def separate_lag_and_exogenous_features(train_df_X, target_column='y', lag_prefix='y_lag'):
|
|
163
|
+
'''
|
|
164
|
+
This function separates the lag features and exogenous variables from the training dataframe.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
train_df_X (pd.DataFrame): The dataframe containing both lag features and exogenous variables.
|
|
168
|
+
target_column (str): The name of the target column (e.g., 'y').
|
|
169
|
+
lag_prefix (str): The prefix used for lag columns (e.g., 'y_lag').
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
X_lags (pd.DataFrame): DataFrame containing only the lag features.
|
|
173
|
+
X_exog (pd.DataFrame): DataFrame containing only the exogenous variables.
|
|
174
|
+
'''
|
|
175
|
+
|
|
176
|
+
# Identify lag features (columns that start with 'y_lag')
|
|
177
|
+
lag_features = [col for col in train_df_X.columns if col.startswith(lag_prefix)]
|
|
178
|
+
|
|
179
|
+
# Identify exogenous variables (everything except the target and lag features)
|
|
180
|
+
exog_features = [col for col in train_df_X.columns if col not in [target_column] + lag_features]
|
|
181
|
+
|
|
182
|
+
# Create dataframes for lag features and exogenous features
|
|
183
|
+
X_lags = train_df_X[lag_features]
|
|
184
|
+
X_exog = train_df_X[exog_features]
|
|
185
|
+
|
|
186
|
+
return X_lags, X_exog
|
pynnlf/runner.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from .yamlio import load_yaml
|
|
7
|
+
from .hyperparams import load_hyperparameters, get_hp
|
|
8
|
+
from .discovery import discover_model_name, discover_dataset_path
|
|
9
|
+
from .engine import run_experiment_engine
|
|
10
|
+
|
|
11
|
+
def _workspace_root_from_spec(spec_path: Path) -> Path:
|
|
12
|
+
"""
|
|
13
|
+
Infer workspace root from a spec path.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
spec_path (Path): <workspace>/specs/experiment.yaml or batch.yaml
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Path: workspace root
|
|
20
|
+
"""
|
|
21
|
+
return spec_path.parent.parent
|
|
22
|
+
|
|
23
|
+
def run_single(spec_path: str | Path) -> None:
|
|
24
|
+
"""
|
|
25
|
+
Run a single experiment from a 4-key YAML spec.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
spec_path (str | Path): <workspace>/specs/experiment.yaml
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
None
|
|
32
|
+
"""
|
|
33
|
+
spec_path = Path(spec_path)
|
|
34
|
+
ws = _workspace_root_from_spec(spec_path)
|
|
35
|
+
|
|
36
|
+
spec = load_yaml(spec_path)
|
|
37
|
+
cfg = load_yaml(ws / "specs" / "pynnlf_config.yaml")
|
|
38
|
+
|
|
39
|
+
ds_id = spec["dataset"] # e.g. ds19
|
|
40
|
+
fh_id = spec["forecast_horizon"] # e.g. fh1
|
|
41
|
+
m_id = spec["model"] # e.g. m19
|
|
42
|
+
hp_no = spec["hyperparameter"] # e.g. hp1
|
|
43
|
+
|
|
44
|
+
data_dir = ws / cfg["paths"]["data_dir"]
|
|
45
|
+
out_dir = ws / cfg["paths"]["output_dir"]
|
|
46
|
+
models_dir = ws / "models"
|
|
47
|
+
hp_path = ws / cfg["paths"]["hyperparameters_path"] # models/hyperparameters.yaml
|
|
48
|
+
|
|
49
|
+
# auto-discovery (no config edits)
|
|
50
|
+
dataset_path = discover_dataset_path(data_dir, ds_id)
|
|
51
|
+
model_name = discover_model_name(models_dir, m_id)
|
|
52
|
+
|
|
53
|
+
fh_min = int(cfg["forecast_horizons"][fh_id])
|
|
54
|
+
|
|
55
|
+
hparams = load_hyperparameters(hp_path)
|
|
56
|
+
hp = get_hp(hparams, model_name, hp_no)
|
|
57
|
+
|
|
58
|
+
run_experiment_engine(
|
|
59
|
+
dataset_path=dataset_path,
|
|
60
|
+
forecast_horizon_min=fh_min,
|
|
61
|
+
model_name=model_name,
|
|
62
|
+
hyperparameter_no=hp_no,
|
|
63
|
+
hyperparameter=hp,
|
|
64
|
+
output_dir=out_dir,
|
|
65
|
+
models_dir=models_dir,
|
|
66
|
+
config=cfg,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def run_batch(spec_path: str | Path) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Run batch experiments from YAML batch spec (cartesian product).
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
spec_path (str | Path): <workspace>/specs/batch.yaml
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
None
|
|
78
|
+
"""
|
|
79
|
+
spec_path = Path(spec_path)
|
|
80
|
+
ws = _workspace_root_from_spec(spec_path)
|
|
81
|
+
|
|
82
|
+
batch = load_yaml(spec_path)
|
|
83
|
+
cfg = load_yaml(ws / "specs" / "pynnlf_config.yaml")
|
|
84
|
+
|
|
85
|
+
data_dir = ws / cfg["paths"]["data_dir"]
|
|
86
|
+
out_dir = ws / cfg["paths"]["output_dir"]
|
|
87
|
+
models_dir = ws / "models"
|
|
88
|
+
hp_path = ws / cfg["paths"]["hyperparameters_path"]
|
|
89
|
+
|
|
90
|
+
hparams = load_hyperparameters(hp_path)
|
|
91
|
+
|
|
92
|
+
for ds_id in batch["datasets"]:
|
|
93
|
+
dataset_path = discover_dataset_path(data_dir, ds_id)
|
|
94
|
+
for fh_id in batch["forecast_horizons"]:
|
|
95
|
+
fh_min = int(cfg["forecast_horizons"][fh_id])
|
|
96
|
+
for m_id, hp_no in batch["model_and_hp"]:
|
|
97
|
+
model_name = discover_model_name(models_dir, m_id)
|
|
98
|
+
hp = get_hp(hparams, model_name, hp_no)
|
|
99
|
+
run_experiment_engine(
|
|
100
|
+
dataset_path=dataset_path,
|
|
101
|
+
forecast_horizon_min=fh_min,
|
|
102
|
+
model_name=model_name,
|
|
103
|
+
hyperparameter_no=hp_no,
|
|
104
|
+
hyperparameter=hp,
|
|
105
|
+
output_dir=out_dir,
|
|
106
|
+
models_dir=models_dir,
|
|
107
|
+
config=cfg,
|
|
108
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Data folder (auto-discovery)
|
|
2
|
+
|
|
3
|
+
This folder is **user-editable**. You can add new datasets here without editing any package code or config mappings.
|
|
4
|
+
|
|
5
|
+
## Add a new dataset (no config edits)
|
|
6
|
+
|
|
7
|
+
1) Drop your CSV into this folder.
|
|
8
|
+
|
|
9
|
+
2) The file name **must start** with the dataset ID prefix, e.g.:
|
|
10
|
+
|
|
11
|
+
- `ds19_mydata.csv` (example)
|
|
12
|
+
|
|
13
|
+
3) Auto-discovery uses the prefix (`ds19`) to find the dataset file.
|
|
14
|
+
|
|
15
|
+
## Dataset requirements (minimum)
|
|
16
|
+
|
|
17
|
+
- CSV must have a **datetime index** in the first column (same style as `ds0_test.csv`).
|
|
18
|
+
- CSV must contain a target column named `netload_kW` (the engine renames it to `y`).
|
|
19
|
+
|
|
20
|
+
## Run the dataset (4-key spec only)
|
|
21
|
+
|
|
22
|
+
Edit `../specs/experiment.yaml`:
|
|
23
|
+
|
|
24
|
+
```yaml
|
|
25
|
+
dataset: ds19
|
|
26
|
+
forecast_horizon: fh1
|
|
27
|
+
model: m6
|
|
28
|
+
hyperparameter: hp1
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Then run:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
python -c "import pynnlf; pynnlf.run_experiment('PATH_TO_WORKSPACE/specs/experiment.yaml')"
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Gotchas
|
|
38
|
+
|
|
39
|
+
- Auto-discovery requires **exactly one** match for a dataset ID. If you have `ds19_a.csv` and `ds19_b.csv`, the runner will raise an error asking you to rename.
|
|
40
|
+
- If your file is huge, keep it out of Git history (use `.gitignore`).
|