pynnlf 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. pynnlf/__about__.py +1 -0
  2. pynnlf/__init__.py +5 -0
  3. pynnlf/api.py +17 -0
  4. pynnlf/discovery.py +63 -0
  5. pynnlf/engine.py +1238 -0
  6. pynnlf/hyperparams.py +38 -0
  7. pynnlf/model_utils.py +186 -0
  8. pynnlf/runner.py +108 -0
  9. pynnlf/scaffold/README_WORKSPACE.md +0 -0
  10. pynnlf/scaffold/data/README_data.md +40 -0
  11. pynnlf/scaffold/data/ds0_test.csv +4081 -0
  12. pynnlf/scaffold/models/README_models.md +61 -0
  13. pynnlf/scaffold/models/hyperparameters.yaml +264 -0
  14. pynnlf/scaffold/models/m10_rf.py +65 -0
  15. pynnlf/scaffold/models/m11_svr.py +53 -0
  16. pynnlf/scaffold/models/m12_rnn.py +152 -0
  17. pynnlf/scaffold/models/m13_lstm.py +208 -0
  18. pynnlf/scaffold/models/m14_gru.py +139 -0
  19. pynnlf/scaffold/models/m15_transformer.py +138 -0
  20. pynnlf/scaffold/models/m16_prophet.py +216 -0
  21. pynnlf/scaffold/models/m17_xgb.py +66 -0
  22. pynnlf/scaffold/models/m18_nbeats.py +107 -0
  23. pynnlf/scaffold/models/m1_naive.py +49 -0
  24. pynnlf/scaffold/models/m2_snaive.py +49 -0
  25. pynnlf/scaffold/models/m3_ets.py +133 -0
  26. pynnlf/scaffold/models/m4_arima.py +123 -0
  27. pynnlf/scaffold/models/m5_sarima.py +128 -0
  28. pynnlf/scaffold/models/m6_lr.py +76 -0
  29. pynnlf/scaffold/models/m7_ann.py +148 -0
  30. pynnlf/scaffold/models/m8_dnn.py +141 -0
  31. pynnlf/scaffold/models/m9_rt.py +74 -0
  32. pynnlf/scaffold/models/mXX_template.py +68 -0
  33. pynnlf/scaffold/specs/batch.yaml +4 -0
  34. pynnlf/scaffold/specs/experiment.yaml +4 -0
  35. pynnlf/scaffold/specs/pynnlf_config.yaml +69 -0
  36. pynnlf/scaffold/specs/testing_benchmark.csv +613 -0
  37. pynnlf/scaffold/specs/testing_benchmark_metadata.md +12 -0
  38. pynnlf/scaffold/specs/tests_ci.yaml +8 -0
  39. pynnlf/scaffold/specs/tests_full.yaml +23 -0
  40. pynnlf/tests_runner.py +211 -0
  41. pynnlf/tools/strip_notebook_artifacts.py +32 -0
  42. pynnlf/workspace.py +63 -0
  43. pynnlf/yamlio.py +28 -0
  44. pynnlf-0.2.2.dist-info/METADATA +168 -0
  45. pynnlf-0.2.2.dist-info/RECORD +47 -0
  46. pynnlf-0.2.2.dist-info/WHEEL +5 -0
  47. pynnlf-0.2.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,61 @@
1
+ # Models folder (auto-discovery)
2
+
3
+ This folder is **user-editable**. You can add new forecasting models here without editing any package code or config mappings.
4
+
5
+ ## Add a new model (no config edits)
6
+
7
+ 1) Create a new Python file in this folder named like:
8
+
9
+ - `m19_my_model.py` (example)
10
+
11
+ 2) The file name **must start** with the model ID prefix (`m19_`).
12
+
13
+ 3) Inside that file, define **two functions** whose names match the **file stem** (file name without `.py`).
14
+
15
+ Example file: `m19_my_model.py`
16
+
17
+ Required functions:
18
+
19
+ - `train_model_m19_my_model(hyperparameter, train_df_X, train_df_y, forecast_horizon=None)`
20
+ - `produce_forecast_m19_my_model(model, train_df_X, test_df_X, train_df_y=None, forecast_horizon=None)`
21
+
22
+ ## Add hyperparameters (YAML)
23
+
24
+ Hyperparameters live in:
25
+
26
+ - `hyperparameters.yaml`
27
+
28
+ The top-level key **must equal** the model file stem.
29
+
30
+ Example:
31
+
32
+ ```yaml
33
+ m19_my_model:
34
+ hp1:
35
+ bias: 0.0
36
+ hp2:
37
+ bias: 5.0
38
+ ```
39
+
40
+ ## Run the model (4-key spec only)
41
+
42
+ Edit `../specs/experiment.yaml`:
43
+
44
+ ```yaml
45
+ dataset: ds0
46
+ forecast_horizon: fh1
47
+ model: m19
48
+ hyperparameter: hp1
49
+ ```
50
+
51
+ Then run:
52
+
53
+ ```bash
54
+ python -c "import pynnlf; pynnlf.run_experiment('PATH_TO_WORKSPACE/specs/experiment.yaml')"
55
+ ```
56
+
57
+ ## Rules / gotchas
58
+
59
+ - Auto-discovery requires **exactly one** match for a model ID. If you have both `m19_a.py` and `m19_b.py`, the runner will raise an error asking you to rename.
60
+ - Your model outputs can be `pd.Series` or 1D `np.ndarray`. The engine will align outputs to timestamps.
61
+ - Keep model files import-safe: include all required imports inside your model file.
@@ -0,0 +1,264 @@
1
+ m1_naive:
2
+ hp1:
3
+ hyperparameter_1: No hyperparameter
4
+
5
+ m2_snaive:
6
+ hp1:
7
+ days: 1
8
+ hp2:
9
+ days: 7
10
+
11
+ m3_ets:
12
+ hp1:
13
+ trend: false
14
+ damped_trend: false
15
+ seasonal_periods_days: null
16
+ hp2:
17
+ trend: false
18
+ damped_trend: false
19
+ seasonal_periods_days: 1
20
+ hp3:
21
+ trend: add
22
+ damped_trend: false
23
+ seasonal_periods_days: null
24
+
25
+ m4_arima:
26
+ hp1:
27
+ p: 1
28
+ d: 0
29
+ q: 1
30
+ hp2:
31
+ p: 1
32
+ d: 1
33
+ q: 1
34
+
35
+ m5_sarima:
36
+ hp1:
37
+ p: 1
38
+ d: 0
39
+ q: 1
40
+ P: 0
41
+ D: 1
42
+ Q: 0
43
+ seasonal_period_days: 1
44
+ hp2:
45
+ p: 1
46
+ d: 0
47
+ q: 1
48
+ P: 0
49
+ D: 1
50
+ Q: 0
51
+ seasonal_period_days: 7
52
+
53
+ m6_lr:
54
+ hp1:
55
+ num_features: 50
56
+
57
+ m7_ann:
58
+ hp1:
59
+ seed: 99
60
+ hidden_size: 10
61
+ activation_function: relu
62
+ learning_rate: 0.001
63
+ solver: adam
64
+ epochs: 500
65
+ hp2:
66
+ seed: 99
67
+ hidden_size: 10
68
+ activation_function: relu
69
+ learning_rate: 0.01
70
+ solver: adam
71
+ epochs: 500
72
+
73
+ m8_dnn:
74
+ hp1:
75
+ seed: 1
76
+ n_hidden: 3
77
+ hidden_size: 10
78
+ activation_function: relu
79
+ learning_rate: 0.001
80
+ solver: adam
81
+ epochs: 500
82
+ hp2:
83
+ seed: 1
84
+ n_hidden: 3
85
+ hidden_size: 10
86
+ activation_function: relu
87
+ learning_rate: 0.01
88
+ solver: adam
89
+ epochs: 500
90
+ hp3:
91
+ seed: 1
92
+ n_hidden: 4
93
+ hidden_size: 10
94
+ activation_function: relu
95
+ learning_rate: 0.01
96
+ solver: adam
97
+ epochs: 500
98
+
99
+ m9_rt:
100
+ hp1:
101
+ seed: 1
102
+ max_depth: 3
103
+ min_samples_split: 2
104
+ min_samples_leaf: 1
105
+ max_features: sqrt
106
+ hp2:
107
+ seed: 1
108
+ max_depth: 30
109
+ min_samples_split: 2
110
+ min_samples_leaf: 1
111
+ max_features: sqrt
112
+ hp3:
113
+ seed: 1
114
+ max_depth: 15
115
+ min_samples_split: 2
116
+ min_samples_leaf: 1
117
+ max_features: sqrt
118
+
119
+ m10_rf:
120
+ hp1:
121
+ seed: 1
122
+ n_estimators: 100
123
+ max_depth: 3
124
+ min_samples_split: 2
125
+ min_samples_leaf: 1
126
+ hp2:
127
+ seed: 1
128
+ n_estimators: 50
129
+ max_depth: 3
130
+ min_samples_split: 2
131
+ min_samples_leaf: 1
132
+
133
+ m11_svr:
134
+ hp1:
135
+ seed: 1
136
+ kernel: rbf
137
+ C: 100
138
+ gamma: 0.001
139
+ epsilon: 0.3
140
+
141
+ m12_rnn:
142
+ hp1:
143
+ seed: 1
144
+ input_size: 1
145
+ hidden_size: 64
146
+ num_layers: 1
147
+ output_size: 1
148
+ batch_size: 4096
149
+ epochs: 2
150
+ learning_rate: 0.001
151
+ hp2:
152
+ seed: 1
153
+ input_size: 1
154
+ hidden_size: 64
155
+ num_layers: 1
156
+ output_size: 1
157
+ batch_size: 4096
158
+ epochs: 100
159
+ learning_rate: 0.001
160
+
161
+ m13_lstm:
162
+ hp1:
163
+ seed: 1
164
+ input_size: 1
165
+ hidden_size: 64
166
+ num_layers: 1
167
+ output_size: 1
168
+ batch_size: 4096
169
+ epochs: 2
170
+ learning_rate: 0.001
171
+ hp2:
172
+ seed: 1
173
+ input_size: 1
174
+ hidden_size: 64
175
+ num_layers: 1
176
+ output_size: 1
177
+ batch_size: 4096
178
+ epochs: 100
179
+ learning_rate: 0.001
180
+
181
+ m14_gru:
182
+ hp1:
183
+ seed: 1
184
+ input_size: 1
185
+ hidden_size: 64
186
+ num_layers: 1
187
+ output_size: 1
188
+ batch_size: 4096
189
+ epochs: 2
190
+ learning_rate: 0.001
191
+ hp2:
192
+ seed: 1
193
+ input_size: 1
194
+ hidden_size: 64
195
+ num_layers: 1
196
+ output_size: 1
197
+ batch_size: 4096
198
+ epochs: 100
199
+ learning_rate: 0.001
200
+
201
+ m15_transformer:
202
+ hp1:
203
+ seed: 1
204
+ input_size: 1
205
+ hidden_size: 64
206
+ num_layers: 1
207
+ output_size: 1
208
+ batch_size: 4096
209
+ nhead: 4
210
+ epochs: 2
211
+ learning_rate: 0.001
212
+ hp2:
213
+ seed: 1
214
+ input_size: 1
215
+ hidden_size: 64
216
+ num_layers: 1
217
+ output_size: 1
218
+ batch_size: 4096
219
+ nhead: 4
220
+ epochs: 100
221
+ learning_rate: 0.001
222
+
223
+ m16_prophet:
224
+ hp1:
225
+ seed: 1
226
+ seasonality_prior_scale: 10
227
+ seasonality_mode: additive
228
+ weekly_seasonality: true
229
+ daily_seasonality: true
230
+ growth: linear
231
+
232
+ m17_xgb:
233
+ hp1:
234
+ xgb_seed: 1
235
+ n_estimators: 200
236
+ learning_rate: 0.1
237
+ max_depth: 6
238
+ subsample: 0.8
239
+ colsample_bytree: 0.8
240
+ hp2:
241
+ xgb_seed: 1
242
+ n_estimators: 100
243
+ learning_rate: 0.1
244
+ max_depth: 3
245
+ subsample: 0.8
246
+ colsample_bytree: 0.8
247
+
248
+ m18_nbeats:
249
+ hp1:
250
+ seed: 1
251
+ hidden_size: 64
252
+ num_layers: 1
253
+ num_blocks: 3
254
+ output_size: 1
255
+ epochs: 2
256
+ lr: 0.001
257
+ hp2:
258
+ seed: 1
259
+ hidden_size: 64
260
+ num_layers: 1
261
+ num_blocks: 3
262
+ output_size: 1
263
+ epochs: 100
264
+ lr: 0.001
@@ -0,0 +1,65 @@
1
+ # IMPORT IMPORTANT LIBRARY
2
+ from sklearn.ensemble import RandomForestRegressor
3
+ import pandas as pd
4
+
5
+ def train_model_m10_rf(hyperparameter, train_df_X, train_df_y):
6
+ ''' Train and test a random forest model for point forecasting.
7
+
8
+ Args:
9
+ hyperparameter (df) : hyperparameter value of the model consisting of number of features
10
+ train_df_X (df) : features matrix for training
11
+ train_df_y (df) : target matrix for training
12
+
13
+
14
+ Returns:
15
+ model (model) : trained model with all features
16
+ '''
17
+
18
+ #UNPACK HYPERPARAMETER
19
+ seed = int(hyperparameter['seed'])
20
+ n_estimators = int(hyperparameter['n_estimators'])
21
+ max_depth = int(hyperparameter['max_depth'])
22
+ min_samples_split = int(hyperparameter['min_samples_split'])
23
+ min_samples_leaf = int(hyperparameter['min_samples_leaf'])
24
+
25
+
26
+ #TRAIN MODEL
27
+ rf = RandomForestRegressor(
28
+ n_estimators=n_estimators, # number of trees
29
+ max_depth=max_depth, # maximum depth of a tree
30
+ min_samples_split=min_samples_split, # min samples to split a node
31
+ min_samples_leaf=min_samples_leaf, # min samples in a leaf
32
+ random_state=seed
33
+ )
34
+
35
+ rf.fit(train_df_X, train_df_y) # fit the model to the training data
36
+
37
+ # PACK MODEL
38
+ model = {"rf": rf}
39
+
40
+
41
+ return model
42
+
43
+ def produce_forecast_m10_rf(model, train_df_X, test_df_X):
44
+ """Create forecast at the train and test set using the trained model
45
+
46
+ Args:
47
+ model (dictionary): all parameters of the trained model
48
+ train_df_X (df): predictors of train set
49
+ test_df_X (df): predictors of test set
50
+
51
+ Returns:
52
+ train_df_y_hat (df) : forecast result at train set
53
+ test_df_y_hat (df) : forecast result at test set
54
+
55
+ """
56
+
57
+ # UNPACK MODEL
58
+ rf = model['rf']
59
+
60
+ # PRODUCE FORECAST
61
+ train_df_y_hat = pd.DataFrame(rf.predict(train_df_X), index = train_df_X.index, columns = ['y_hat'])
62
+ test_df_y_hat = pd.DataFrame(rf.predict(test_df_X), index = test_df_X.index, columns = ['y_hat'])
63
+
64
+ return train_df_y_hat, test_df_y_hat
65
+
@@ -0,0 +1,53 @@
1
+ def train_model_m11_svr(hyperparameter, train_df_X, train_df_y):
2
+ ''' Train and test a linear model for point forecasting.
3
+
4
+ Args:
5
+ hyperparameter (df) : hyperparameter value of the model consisting of number of features
6
+ train_df_X (df) : features matrix for training
7
+ train_df_y (df) : target matrix for training
8
+
9
+
10
+ Returns:
11
+ model (model) : trained model with all features
12
+ '''
13
+
14
+ from sklearn.svm import SVR
15
+
16
+ #UNPACK HYPERPARAMETER
17
+ seed = hyperparameter['seed'] #seem we can't use this using sklearn
18
+ kernel = hyperparameter['kernel']
19
+ C = hyperparameter['C']
20
+ gamma = hyperparameter['gamma']
21
+ epsilon = hyperparameter['epsilon']
22
+
23
+ #TRAIN MODEL
24
+ train_df_y = train_df_y.values.ravel() # Flatten the target array if necessary
25
+ svr = SVR(kernel=kernel, C=C, gamma=gamma, epsilon=epsilon)
26
+ svr.fit(train_df_X, train_df_y)
27
+
28
+ # PACK MODEL
29
+ model = {"svr": svr}
30
+
31
+
32
+ return model
33
+
34
+ def produce_forecast_m11_svr(model, train_df_X, test_df_X):
35
+ """Create forecast at the train and test set using the trained model
36
+
37
+ Args:
38
+ model (dictionary): all parameters of the trained model
39
+ train_df_X (df): predictors of train set
40
+ test_df_X (df): predictors of test set
41
+
42
+ Returns:
43
+ train_df_y_hat (df) : forecast result at train set
44
+ test_df_y_hat (df) : forecast result at test set
45
+
46
+ """
47
+
48
+ svr = model['svr']
49
+ train_df_y_hat = svr.predict(train_df_X)
50
+ test_df_y_hat = svr.predict(test_df_X)
51
+
52
+ return train_df_y_hat, test_df_y_hat
53
+
@@ -0,0 +1,152 @@
1
+ # IMPORT IMPORTANT LIBRARY
2
+ import time
3
+ import pandas as pd
4
+ import numpy as np
5
+ import random
6
+ import os
7
+ import torch
8
+ import torch.nn as nn
9
+ import torch.optim as optim
10
+ from torch.utils.data import DataLoader, TensorDataset
11
+ from pynnlf.model_utils import separate_lag_and_exogenous_features
12
+
13
+ def train_model_m12_rnn(hyperparameter, train_df_X, train_df_y):
14
+ ''' Train and test an RNN model for point forecasting.
15
+ Essentially we use the RNN block to learn the temporal patterns of the time series,
16
+ and then use a fully connected layer to learn the relationship between the lag features.
17
+ We take the last hidden state of the RNN as the output, and concatenate it with the exogenous features
18
+ (like calendar) to make the final prediction using a fully connected layer.
19
+ '''
20
+
21
+ # UNPACK HYPERPARAMETER
22
+ seed = int(hyperparameter['seed'])
23
+ input_size = int(hyperparameter['input_size'])
24
+ hidden_size = int(hyperparameter['hidden_size'])
25
+ num_layers = int(hyperparameter['num_layers'])
26
+ output_size = int(hyperparameter['output_size'])
27
+ batch_size = int(hyperparameter['batch_size'])
28
+ epochs = int(hyperparameter['epochs'])
29
+ learning_rate = hyperparameter['learning_rate']
30
+
31
+ # DEFINE MODEL AND TRAINING FUNCTION
32
+ class RNNModel(nn.Module):
33
+ def __init__(self, input_size, hidden_size, num_layers, exog_size, output_size=1):
34
+ super(RNNModel, self).__init__()
35
+
36
+ # Define the RNN layer
37
+ self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
38
+
39
+ # Define the Fully Connected (FC) layer
40
+ self.fc = nn.Linear(hidden_size + exog_size, output_size)
41
+
42
+ def forward(self, x, exogenous_data):
43
+ # Pass the input through the RNN
44
+ out, h_n = self.rnn(x)
45
+
46
+ # Get the last timestep hidden state
47
+ last_hidden_state = out[:, -1, :] # Shape: (batch_size, hidden_size)
48
+
49
+ # Concatenate hidden state with exogenous vars
50
+ combined_input = torch.cat((last_hidden_state, exogenous_data), dim=1)
51
+
52
+ # Pass through FC
53
+ out = self.fc(combined_input)
54
+ return out
55
+
56
+ def train_rnn_with_minibatches(model, train_loader, epochs, learning_rate=0.001):
57
+ criterion = nn.MSELoss()
58
+ optimizer = optim.Adam(model.parameters(), lr=learning_rate)
59
+
60
+ for epoch in range(epochs):
61
+ print(f'Epoch [{epoch+1}/{epochs}]')
62
+ start_time = time.time()
63
+
64
+ model.train()
65
+ batch_no = 1
66
+ for X_lags_batch, X_exog_batch, y_batch in train_loader:
67
+ print(f'Epoch [{epoch+1}/{epochs}] and batch [{batch_no}/{len(train_loader)}]')
68
+ batch_no += 1
69
+
70
+ # Forward pass
71
+ predictions = model(X_lags_batch, X_exog_batch)
72
+ loss = criterion(predictions, y_batch)
73
+
74
+ # Backward pass
75
+ optimizer.zero_grad()
76
+ loss.backward()
77
+ optimizer.step()
78
+
79
+ end_time = time.time()
80
+ epoch_time = end_time - start_time
81
+ print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}, time taken: {epoch_time:.2f} seconds')
82
+
83
+ def set_seed(seed=seed):
84
+ random.seed(seed)
85
+ np.random.seed(seed)
86
+ torch.manual_seed(seed)
87
+ os.environ["PYTHONHASHSEED"] = str(seed)
88
+
89
+ # PREPARE TRAIN DATA
90
+ X_lags, X_exog = separate_lag_and_exogenous_features(train_df_X)
91
+ X_lags_tensor = torch.tensor(X_lags.values, dtype=torch.float32)
92
+ X_exog_tensor = torch.tensor(X_exog.values, dtype=torch.float32)
93
+ y_tensor = torch.tensor(train_df_y.values, dtype=torch.float32).view(-1, 1)
94
+
95
+ total_lag_features = X_lags_tensor.shape[1]
96
+ sequence_length = total_lag_features // input_size
97
+ exog_size = X_exog_tensor.shape[1]
98
+
99
+ # Reshape to 3D
100
+ X_lags_tensor = X_lags_tensor.view(-1, sequence_length, input_size)
101
+
102
+ # INITIALIZE MODEL + DATALOADER
103
+ set_seed(seed=seed)
104
+ rnn = RNNModel(input_size, hidden_size, num_layers, exog_size, output_size)
105
+ train_data = TensorDataset(X_lags_tensor, X_exog_tensor, y_tensor)
106
+ train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
107
+
108
+ # TRAIN MODEL
109
+ train_rnn_with_minibatches(rnn, train_loader, epochs=epochs, learning_rate=learning_rate)
110
+
111
+ # PACK MODEL
112
+ model = {"rnn": rnn, 'hyperparameter': hyperparameter, "train_df_X": train_df_X, "train_df_y": train_df_y}
113
+ return model
114
+
115
+ def produce_forecast_m12_rnn(model, train_df_X, test_df_X):
116
+ """Create forecast at the train and test set using the trained RNN model"""
117
+
118
+ # UNPACK MODEL
119
+ rnn = model['rnn']
120
+ hyperparameter = model['hyperparameter']
121
+
122
+ # UNPACK HYPERPARAMETER
123
+ input_size = int(hyperparameter['input_size'])
124
+ batch_size = int(hyperparameter['batch_size'])
125
+
126
+ # PRODUCE FORECAST
127
+ def produce_forecast(rnn, X):
128
+ X_lags, X_exog = separate_lag_and_exogenous_features(X)
129
+ X_lags_tensor = torch.tensor(X_lags.values, dtype=torch.float32)
130
+ X_exog_tensor = torch.tensor(X_exog.values, dtype=torch.float32)
131
+
132
+ total_lag_features = X_lags_tensor.shape[1]
133
+ sequence_length = total_lag_features // input_size
134
+ X_lags_tensor = X_lags_tensor.view(-1, sequence_length, input_size)
135
+
136
+ predictions = []
137
+ for i in range(0, len(X_lags_tensor), batch_size):
138
+ batch_X_lags = X_lags_tensor[i:i+batch_size]
139
+ batch_X_exog = X_exog_tensor[i:i+batch_size]
140
+
141
+ with torch.no_grad():
142
+ batch_pred = rnn(batch_X_lags, batch_X_exog)
143
+
144
+ predictions.append(batch_pred)
145
+
146
+ predictions = torch.cat(predictions, dim=0)
147
+ return predictions.detach().numpy()
148
+
149
+ train_df_y_hat = produce_forecast(rnn, train_df_X)
150
+ test_df_y_hat = produce_forecast(rnn, test_df_X)
151
+
152
+ return train_df_y_hat, test_df_y_hat