pynnlf 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. pynnlf/__about__.py +1 -0
  2. pynnlf/__init__.py +5 -0
  3. pynnlf/api.py +17 -0
  4. pynnlf/discovery.py +63 -0
  5. pynnlf/engine.py +1238 -0
  6. pynnlf/hyperparams.py +38 -0
  7. pynnlf/model_utils.py +186 -0
  8. pynnlf/runner.py +108 -0
  9. pynnlf/scaffold/README_WORKSPACE.md +0 -0
  10. pynnlf/scaffold/data/README_data.md +40 -0
  11. pynnlf/scaffold/data/ds0_test.csv +4081 -0
  12. pynnlf/scaffold/models/README_models.md +61 -0
  13. pynnlf/scaffold/models/hyperparameters.yaml +264 -0
  14. pynnlf/scaffold/models/m10_rf.py +65 -0
  15. pynnlf/scaffold/models/m11_svr.py +53 -0
  16. pynnlf/scaffold/models/m12_rnn.py +152 -0
  17. pynnlf/scaffold/models/m13_lstm.py +208 -0
  18. pynnlf/scaffold/models/m14_gru.py +139 -0
  19. pynnlf/scaffold/models/m15_transformer.py +138 -0
  20. pynnlf/scaffold/models/m16_prophet.py +216 -0
  21. pynnlf/scaffold/models/m17_xgb.py +66 -0
  22. pynnlf/scaffold/models/m18_nbeats.py +107 -0
  23. pynnlf/scaffold/models/m1_naive.py +49 -0
  24. pynnlf/scaffold/models/m2_snaive.py +49 -0
  25. pynnlf/scaffold/models/m3_ets.py +133 -0
  26. pynnlf/scaffold/models/m4_arima.py +123 -0
  27. pynnlf/scaffold/models/m5_sarima.py +128 -0
  28. pynnlf/scaffold/models/m6_lr.py +76 -0
  29. pynnlf/scaffold/models/m7_ann.py +148 -0
  30. pynnlf/scaffold/models/m8_dnn.py +141 -0
  31. pynnlf/scaffold/models/m9_rt.py +74 -0
  32. pynnlf/scaffold/models/mXX_template.py +68 -0
  33. pynnlf/scaffold/specs/batch.yaml +4 -0
  34. pynnlf/scaffold/specs/experiment.yaml +4 -0
  35. pynnlf/scaffold/specs/pynnlf_config.yaml +69 -0
  36. pynnlf/scaffold/specs/testing_benchmark.csv +613 -0
  37. pynnlf/scaffold/specs/testing_benchmark_metadata.md +12 -0
  38. pynnlf/scaffold/specs/tests_ci.yaml +8 -0
  39. pynnlf/scaffold/specs/tests_full.yaml +23 -0
  40. pynnlf/tests_runner.py +211 -0
  41. pynnlf/tools/strip_notebook_artifacts.py +32 -0
  42. pynnlf/workspace.py +63 -0
  43. pynnlf/yamlio.py +28 -0
  44. pynnlf-0.2.2.dist-info/METADATA +168 -0
  45. pynnlf-0.2.2.dist-info/RECORD +47 -0
  46. pynnlf-0.2.2.dist-info/WHEEL +5 -0
  47. pynnlf-0.2.2.dist-info/top_level.txt +1 -0
pynnlf/hyperparams.py ADDED
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ from typing import Any
5
+ from .yamlio import load_yaml
6
+
7
+ def load_hyperparameters(path) -> dict[str, Any]:
8
+ """
9
+ Load hyperparameters from YAML.
10
+
11
+ Args:
12
+ path (str | Path): Path to models/hyperparameters.yaml.
13
+
14
+ Returns:
15
+ dict: {model_name: {hp_no: hyperparameter_dict}}
16
+ """
17
+ return load_yaml(path)
18
+
19
+ def get_hp(hparams: dict[str, Any], model_name: str, hp_no: str) -> dict[str, Any]:
20
+ """
21
+ Get hyperparameter dict for a specific model and hp_no.
22
+
23
+ Args:
24
+ hparams (dict): Output of load_hyperparameters().
25
+ model_name (str): e.g., "m6_lr" (model file stem)
26
+ hp_no (str): e.g., "hp1"
27
+
28
+ Returns:
29
+ dict: Hyperparameter dict for the model/hp.
30
+ """
31
+ if model_name not in hparams:
32
+ raise KeyError(f"Model '{model_name}' not found in hyperparameters.yaml")
33
+ if hp_no not in hparams[model_name]:
34
+ raise KeyError(f"HP '{hp_no}' not found for model '{model_name}'")
35
+ hp = hparams[model_name][hp_no]
36
+ if not isinstance(hp, dict):
37
+ raise TypeError("Hyperparameter entry must be a mapping/dict")
38
+ return hp
pynnlf/model_utils.py ADDED
@@ -0,0 +1,186 @@
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ from pathlib import Path
5
+ import json
6
+
7
+ from .hyperparams import load_hyperparameters, get_hp
8
+ from .engine import run_experiment_engine
9
+
10
+ def _load_json(path: Path) -> dict:
11
+ """
12
+ Load JSON file into dict (legacy JSON workflow).
13
+
14
+ Args:
15
+ path (Path): JSON file path.
16
+
17
+ Returns:
18
+ dict: Parsed JSON.
19
+ """
20
+ return json.loads(path.read_text(encoding="utf-8"))
21
+
22
+ def _workspace_root_from_spec(spec_path: Path) -> Path:
23
+ """
24
+ Infer workspace root from a spec path (legacy JSON workflow).
25
+
26
+ Assumes structure:
27
+ <workspace>/specs/experiment.json
28
+ <workspace>/specs/batch.json
29
+
30
+ Args:
31
+ spec_path (Path): Path to spec JSON.
32
+
33
+ Returns:
34
+ Path: Workspace root directory.
35
+ """
36
+ return spec_path.parent.parent
37
+
38
+ def run_single(spec_path: str | Path) -> None:
39
+ """
40
+ Run a single experiment from a 4-key JSON spec (legacy workflow).
41
+
42
+ Spec contains only:
43
+ dataset, forecast_horizon, model, hyperparameter
44
+
45
+ Args:
46
+ spec_path (str | Path): Path to <workspace>/specs/experiment.json
47
+
48
+ Returns:
49
+ None
50
+ """
51
+ spec_path = Path(spec_path)
52
+ ws = _workspace_root_from_spec(spec_path)
53
+
54
+ spec = _load_json(spec_path)
55
+ cfg = _load_json(ws / "specs" / "pynnlf_config.json")
56
+
57
+ ds_id = spec["dataset"]
58
+ fh_id = spec["forecast_horizon"]
59
+ m_id = spec["model"]
60
+ hp_no = spec["hyperparameter"]
61
+
62
+ data_dir = ws / cfg["paths"]["data_dir"]
63
+ out_dir = ws / cfg["paths"]["output_dir"]
64
+ hp_path = ws / cfg["paths"]["hyperparameters_path"] # models/hyperparameters.json
65
+
66
+ dataset_file = cfg["datasets"][ds_id]
67
+ fh_min = int(cfg["forecast_horizons"][fh_id])
68
+ model_name = cfg["models"][m_id]
69
+
70
+ dataset_path = data_dir / dataset_file
71
+ models_dir = ws / "models"
72
+
73
+ hparams = load_hyperparameters(hp_path)
74
+ hp = get_hp(hparams, model_name, hp_no)
75
+
76
+ run_experiment_engine(
77
+ dataset_path=dataset_path,
78
+ forecast_horizon_min=fh_min,
79
+ model_name=model_name,
80
+ hyperparameter_no=hp_no,
81
+ hyperparameter=hp,
82
+ output_dir=out_dir,
83
+ models_dir=models_dir,
84
+ config=cfg,
85
+ )
86
+
87
+ def run_batch(spec_path: str | Path) -> None:
88
+ """
89
+ Run batch experiments from a batch JSON spec (legacy workflow).
90
+
91
+ Batch spec contains:
92
+ datasets: [dsX...]
93
+ forecast_horizons: [fhX...]
94
+ model_and_hp: [[mX, hpY], ...]
95
+
96
+ Runs all combinations:
97
+ datasets × forecast_horizons × model_and_hp
98
+
99
+ Args:
100
+ spec_path (str | Path): Path to <workspace>/specs/batch.json
101
+
102
+ Returns:
103
+ None
104
+ """
105
+ spec_path = Path(spec_path)
106
+ ws = _workspace_root_from_spec(spec_path)
107
+
108
+ batch = _load_json(spec_path)
109
+ cfg = _load_json(ws / "specs" / "pynnlf_config.json")
110
+
111
+ data_dir = ws / cfg["paths"]["data_dir"]
112
+ out_dir = ws / cfg["paths"]["output_dir"]
113
+ hp_path = ws / cfg["paths"]["hyperparameters_path"]
114
+ models_dir = ws / "models"
115
+
116
+ hparams = load_hyperparameters(hp_path)
117
+
118
+ ds_files = [cfg["datasets"][d] for d in batch["datasets"]]
119
+ fh_mins = [int(cfg["forecast_horizons"][h]) for h in batch["forecast_horizons"]]
120
+ model_and_hp = [(cfg["models"][m], hp) for (m, hp) in batch["model_and_hp"]]
121
+
122
+ for ds_file in ds_files:
123
+ for fh_min in fh_mins:
124
+ for model_name, hp_no in model_and_hp:
125
+ dataset_path = data_dir / ds_file
126
+ hp = get_hp(hparams, model_name, hp_no)
127
+
128
+ run_experiment_engine(
129
+ dataset_path=dataset_path,
130
+ forecast_horizon_min=fh_min,
131
+ model_name=model_name,
132
+ hyperparameter_no=hp_no,
133
+ hyperparameter=hp,
134
+ output_dir=out_dir,
135
+ models_dir=models_dir,
136
+ config=cfg,
137
+ )
138
+
139
+ # transform below scripts into function with input train_df_y and output train_df_y_updated
140
+ def remove_jump_df(train_df_y):
141
+ #make docstring with the same format like other cells
142
+ """
143
+ Remove jump in the time series data
144
+ Parameters:
145
+ train_df_y (pd.Series): Time series data
146
+
147
+ Returns:
148
+ train_df_y_updated (pd.Series): Time series data with jump removed
149
+ """
150
+
151
+ time_diff = train_df_y.index.to_series().diff().dt.total_seconds()
152
+ initial_freq = time_diff.iloc[1]
153
+ jump_indices = time_diff[time_diff > initial_freq].index
154
+ if not jump_indices.empty:
155
+ jump_index = jump_indices[0]
156
+ jump_pos = train_df_y.index.get_loc(jump_index)
157
+ train_df_y_updated = train_df_y.iloc[:jump_pos]
158
+ else:
159
+ train_df_y_updated = train_df_y
160
+ return train_df_y_updated
161
+
162
+ def separate_lag_and_exogenous_features(train_df_X, target_column='y', lag_prefix='y_lag'):
163
+ '''
164
+ This function separates the lag features and exogenous variables from the training dataframe.
165
+
166
+ Args:
167
+ train_df_X (pd.DataFrame): The dataframe containing both lag features and exogenous variables.
168
+ target_column (str): The name of the target column (e.g., 'y').
169
+ lag_prefix (str): The prefix used for lag columns (e.g., 'y_lag').
170
+
171
+ Returns:
172
+ X_lags (pd.DataFrame): DataFrame containing only the lag features.
173
+ X_exog (pd.DataFrame): DataFrame containing only the exogenous variables.
174
+ '''
175
+
176
+ # Identify lag features (columns that start with 'y_lag')
177
+ lag_features = [col for col in train_df_X.columns if col.startswith(lag_prefix)]
178
+
179
+ # Identify exogenous variables (everything except the target and lag features)
180
+ exog_features = [col for col in train_df_X.columns if col not in [target_column] + lag_features]
181
+
182
+ # Create dataframes for lag features and exogenous features
183
+ X_lags = train_df_X[lag_features]
184
+ X_exog = train_df_X[exog_features]
185
+
186
+ return X_lags, X_exog
pynnlf/runner.py ADDED
@@ -0,0 +1,108 @@
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ from pathlib import Path
5
+
6
+ from .yamlio import load_yaml
7
+ from .hyperparams import load_hyperparameters, get_hp
8
+ from .discovery import discover_model_name, discover_dataset_path
9
+ from .engine import run_experiment_engine
10
+
11
+ def _workspace_root_from_spec(spec_path: Path) -> Path:
12
+ """
13
+ Infer workspace root from a spec path.
14
+
15
+ Args:
16
+ spec_path (Path): <workspace>/specs/experiment.yaml or batch.yaml
17
+
18
+ Returns:
19
+ Path: workspace root
20
+ """
21
+ return spec_path.parent.parent
22
+
23
+ def run_single(spec_path: str | Path) -> None:
24
+ """
25
+ Run a single experiment from a 4-key YAML spec.
26
+
27
+ Args:
28
+ spec_path (str | Path): <workspace>/specs/experiment.yaml
29
+
30
+ Returns:
31
+ None
32
+ """
33
+ spec_path = Path(spec_path)
34
+ ws = _workspace_root_from_spec(spec_path)
35
+
36
+ spec = load_yaml(spec_path)
37
+ cfg = load_yaml(ws / "specs" / "pynnlf_config.yaml")
38
+
39
+ ds_id = spec["dataset"] # e.g. ds19
40
+ fh_id = spec["forecast_horizon"] # e.g. fh1
41
+ m_id = spec["model"] # e.g. m19
42
+ hp_no = spec["hyperparameter"] # e.g. hp1
43
+
44
+ data_dir = ws / cfg["paths"]["data_dir"]
45
+ out_dir = ws / cfg["paths"]["output_dir"]
46
+ models_dir = ws / "models"
47
+ hp_path = ws / cfg["paths"]["hyperparameters_path"] # models/hyperparameters.yaml
48
+
49
+ # auto-discovery (no config edits)
50
+ dataset_path = discover_dataset_path(data_dir, ds_id)
51
+ model_name = discover_model_name(models_dir, m_id)
52
+
53
+ fh_min = int(cfg["forecast_horizons"][fh_id])
54
+
55
+ hparams = load_hyperparameters(hp_path)
56
+ hp = get_hp(hparams, model_name, hp_no)
57
+
58
+ run_experiment_engine(
59
+ dataset_path=dataset_path,
60
+ forecast_horizon_min=fh_min,
61
+ model_name=model_name,
62
+ hyperparameter_no=hp_no,
63
+ hyperparameter=hp,
64
+ output_dir=out_dir,
65
+ models_dir=models_dir,
66
+ config=cfg,
67
+ )
68
+
69
+ def run_batch(spec_path: str | Path) -> None:
70
+ """
71
+ Run batch experiments from YAML batch spec (cartesian product).
72
+
73
+ Args:
74
+ spec_path (str | Path): <workspace>/specs/batch.yaml
75
+
76
+ Returns:
77
+ None
78
+ """
79
+ spec_path = Path(spec_path)
80
+ ws = _workspace_root_from_spec(spec_path)
81
+
82
+ batch = load_yaml(spec_path)
83
+ cfg = load_yaml(ws / "specs" / "pynnlf_config.yaml")
84
+
85
+ data_dir = ws / cfg["paths"]["data_dir"]
86
+ out_dir = ws / cfg["paths"]["output_dir"]
87
+ models_dir = ws / "models"
88
+ hp_path = ws / cfg["paths"]["hyperparameters_path"]
89
+
90
+ hparams = load_hyperparameters(hp_path)
91
+
92
+ for ds_id in batch["datasets"]:
93
+ dataset_path = discover_dataset_path(data_dir, ds_id)
94
+ for fh_id in batch["forecast_horizons"]:
95
+ fh_min = int(cfg["forecast_horizons"][fh_id])
96
+ for m_id, hp_no in batch["model_and_hp"]:
97
+ model_name = discover_model_name(models_dir, m_id)
98
+ hp = get_hp(hparams, model_name, hp_no)
99
+ run_experiment_engine(
100
+ dataset_path=dataset_path,
101
+ forecast_horizon_min=fh_min,
102
+ model_name=model_name,
103
+ hyperparameter_no=hp_no,
104
+ hyperparameter=hp,
105
+ output_dir=out_dir,
106
+ models_dir=models_dir,
107
+ config=cfg,
108
+ )
File without changes
@@ -0,0 +1,40 @@
1
+ # Data folder (auto-discovery)
2
+
3
+ This folder is **user-editable**. You can add new datasets here without editing any package code or config mappings.
4
+
5
+ ## Add a new dataset (no config edits)
6
+
7
+ 1) Drop your CSV into this folder.
8
+
9
+ 2) The file name **must start** with the dataset ID prefix, e.g.:
10
+
11
+ - `ds19_mydata.csv` (example)
12
+
13
+ 3) Auto-discovery uses the prefix (`ds19`) to find the dataset file.
14
+
15
+ ## Dataset requirements (minimum)
16
+
17
+ - CSV must have a **datetime index** in the first column (same style as `ds0_test.csv`).
18
+ - CSV must contain a target column named `netload_kW` (the engine renames it to `y`).
19
+
20
+ ## Run the dataset (4-key spec only)
21
+
22
+ Edit `../specs/experiment.yaml`:
23
+
24
+ ```yaml
25
+ dataset: ds19
26
+ forecast_horizon: fh1
27
+ model: m6
28
+ hyperparameter: hp1
29
+ ```
30
+
31
+ Then run:
32
+
33
+ ```bash
34
+ python -c "import pynnlf; pynnlf.run_experiment('PATH_TO_WORKSPACE/specs/experiment.yaml')"
35
+ ```
36
+
37
+ ## Gotchas
38
+
39
+ - Auto-discovery requires **exactly one** match for a dataset ID. If you have `ds19_a.csv` and `ds19_b.csv`, the runner will raise an error asking you to rename.
40
+ - If your file is huge, keep it out of Git history (use `.gitignore`).