pynnlf 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. pynnlf/__about__.py +1 -0
  2. pynnlf/__init__.py +5 -0
  3. pynnlf/api.py +17 -0
  4. pynnlf/discovery.py +63 -0
  5. pynnlf/engine.py +1238 -0
  6. pynnlf/hyperparams.py +38 -0
  7. pynnlf/model_utils.py +186 -0
  8. pynnlf/runner.py +108 -0
  9. pynnlf/scaffold/README_WORKSPACE.md +0 -0
  10. pynnlf/scaffold/data/README_data.md +40 -0
  11. pynnlf/scaffold/data/ds0_test.csv +4081 -0
  12. pynnlf/scaffold/models/README_models.md +61 -0
  13. pynnlf/scaffold/models/hyperparameters.yaml +264 -0
  14. pynnlf/scaffold/models/m10_rf.py +65 -0
  15. pynnlf/scaffold/models/m11_svr.py +53 -0
  16. pynnlf/scaffold/models/m12_rnn.py +152 -0
  17. pynnlf/scaffold/models/m13_lstm.py +208 -0
  18. pynnlf/scaffold/models/m14_gru.py +139 -0
  19. pynnlf/scaffold/models/m15_transformer.py +138 -0
  20. pynnlf/scaffold/models/m16_prophet.py +216 -0
  21. pynnlf/scaffold/models/m17_xgb.py +66 -0
  22. pynnlf/scaffold/models/m18_nbeats.py +107 -0
  23. pynnlf/scaffold/models/m1_naive.py +49 -0
  24. pynnlf/scaffold/models/m2_snaive.py +49 -0
  25. pynnlf/scaffold/models/m3_ets.py +133 -0
  26. pynnlf/scaffold/models/m4_arima.py +123 -0
  27. pynnlf/scaffold/models/m5_sarima.py +128 -0
  28. pynnlf/scaffold/models/m6_lr.py +76 -0
  29. pynnlf/scaffold/models/m7_ann.py +148 -0
  30. pynnlf/scaffold/models/m8_dnn.py +141 -0
  31. pynnlf/scaffold/models/m9_rt.py +74 -0
  32. pynnlf/scaffold/models/mXX_template.py +68 -0
  33. pynnlf/scaffold/specs/batch.yaml +4 -0
  34. pynnlf/scaffold/specs/experiment.yaml +4 -0
  35. pynnlf/scaffold/specs/pynnlf_config.yaml +69 -0
  36. pynnlf/scaffold/specs/testing_benchmark.csv +613 -0
  37. pynnlf/scaffold/specs/testing_benchmark_metadata.md +12 -0
  38. pynnlf/scaffold/specs/tests_ci.yaml +8 -0
  39. pynnlf/scaffold/specs/tests_full.yaml +23 -0
  40. pynnlf/tests_runner.py +211 -0
  41. pynnlf/tools/strip_notebook_artifacts.py +32 -0
  42. pynnlf/workspace.py +63 -0
  43. pynnlf/yamlio.py +28 -0
  44. pynnlf-0.2.2.dist-info/METADATA +168 -0
  45. pynnlf-0.2.2.dist-info/RECORD +47 -0
  46. pynnlf-0.2.2.dist-info/WHEEL +5 -0
  47. pynnlf-0.2.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,216 @@
1
+ # IMPORT IMPORTANT LIBRARY
2
+ import pandas as pd
3
+ from prophet import Prophet
4
+ import numpy as np
5
+ from pynnlf.model_utils import remove_jump_df, separate_lag_and_exogenous_features
6
+
7
+ def train_model_m16_prophet(hyperparameter, train_df_X, train_df_y, forecast_horizon):
8
+ ''' Train a prophet model for point forecasting.
9
+
10
+ Args:
11
+ hyperparameter (df) : hyperparameter value of the model consisting of number of features
12
+ train_df_X (df) : features matrix for training
13
+ train_df_y (df) : target matrix for training
14
+ forecast_horizon (int) : forecast horizon for the model
15
+
16
+ Returns:
17
+ model (model) : trained model with all features
18
+ '''
19
+
20
+ #UNPACK HYPERPARAMETER
21
+ seed = hyperparameter["seed"]
22
+ seasonality_prior_scale = hyperparameter["seasonality_prior_scale"]
23
+ seasonality_mode = hyperparameter["seasonality_mode"]
24
+ weekly_seasonality = hyperparameter["weekly_seasonality"]
25
+ daily_seasonality = hyperparameter["daily_seasonality"]
26
+ growth = hyperparameter["growth"]
27
+
28
+
29
+ # UPDATE train_df to exclude all rows after a sudden jump in the timestep
30
+ train_df_y_updated = remove_jump_df(train_df_y)
31
+ train_df_X_updated = remove_jump_df(train_df_X)
32
+
33
+ # Calculate the frequency of the timesteps using the first and second index values
34
+ timestep_frequency = train_df_y_updated.index[1] - train_df_y_updated.index[0]
35
+ inferred_frequency = pd.infer_freq(train_df_y_updated.index)
36
+ train_df_y_updated = train_df_y_updated.asfreq(inferred_frequency)
37
+
38
+ # INTRODUCE GAP BETWEEN TRAIN AND TEST SET TO AVOID DATA LEAKAGE
39
+ n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
40
+ if n_timestep_forecast_horizon == 1:
41
+ pass
42
+ else:
43
+ train_df_y_updated = train_df_y_updated[:-(n_timestep_forecast_horizon - 1)]
44
+ train_df_X_updated = train_df_X_updated[:-(n_timestep_forecast_horizon - 1)]
45
+
46
+ # Assuming train_df_y_updated is your dataframe and 'y' is the column with the training series
47
+ y = train_df_y_updated.copy()
48
+ X_lags, X_exog = separate_lag_and_exogenous_features(train_df_X_updated)
49
+
50
+ #Initialize the Prophet model with hyperparameters
51
+ prophet_model = Prophet(
52
+ seasonality_prior_scale=seasonality_prior_scale, # Example hyperparameter for seasonality strength
53
+ seasonality_mode=seasonality_mode, # Use multiplicative seasonality
54
+ weekly_seasonality=weekly_seasonality, # Enable weekly seasonality
55
+ daily_seasonality=daily_seasonality, # Enable daily seasonality
56
+ growth=growth # Choose between 'linear' or 'logistic' growth
57
+ # random_state = seed, # cannot set seed in prophet
58
+ )
59
+ for col in X_exog.columns:
60
+ prophet_model.add_regressor(col)
61
+
62
+ # Add exogenous features to the y DataFrame
63
+ y = y.merge(X_exog, on='datetime')
64
+ y.reset_index(inplace=True)
65
+ y.rename(columns={'datetime': 'ds'}, inplace=True)
66
+
67
+ # Train model
68
+ prophet_model.fit(y)
69
+
70
+ # PACK MODEL
71
+ model = {"prophet": prophet_model, "y": y, "hyperparameter": hyperparameter}
72
+
73
+
74
+ return model
75
+
76
+ def produce_forecast_m16_prophet(model, train_df_X, test_df_X, train_df_y, forecast_horizon):
77
+ """Create forecast at the train and test set using the trained model
78
+
79
+ Args:
80
+ model (dictionary): all parameters of the trained model
81
+ train_df_X (df): predictors of train set
82
+ test_df_X (df): predictors of test set
83
+ train_df_y (df): target of train set
84
+ forecast_horizon (int): forecast horizon for the model
85
+
86
+ Returns:
87
+ train_df_y_hat (df) : forecast result at train set
88
+ test_df_y_hat (df) : forecast result at test set
89
+
90
+ """
91
+
92
+ # UNPACK MODEL
93
+ prophet_model = model['prophet']
94
+ y = model['y']
95
+ hyperparameter = model['hyperparameter']
96
+
97
+ #UNPACK HYPERPARAMETER
98
+ seasonality_prior_scale = hyperparameter["seasonality_prior_scale"]
99
+ seasonality_mode = hyperparameter["seasonality_mode"]
100
+ weekly_seasonality = hyperparameter["weekly_seasonality"]
101
+ daily_seasonality = hyperparameter["daily_seasonality"]
102
+ growth = hyperparameter["growth"]
103
+
104
+ # Set up X_exog which is used for prediction
105
+ timestep_frequency = test_df_X.index[1] - test_df_X.index[0]
106
+ n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
107
+
108
+ train_df_X_updated = remove_jump_df(train_df_X)
109
+ test_df_X_updated = remove_jump_df(test_df_X)
110
+
111
+ X_lags, X_exog = separate_lag_and_exogenous_features(train_df_X_updated)
112
+
113
+ X_exog.reset_index(inplace=True)
114
+ X_exog.rename(columns={'datetime': 'ds'}, inplace=True)
115
+
116
+ # Forecast train set
117
+ train_df_y_hat = prophet_model.predict(X_exog)
118
+
119
+ train_df_y_hat = train_df_y_hat[['ds', 'yhat']]
120
+
121
+ train_df_y_hat.set_index('ds', inplace=True)
122
+ train_df_y_hat.index.name = 'datetime'
123
+
124
+ # Set up function to warm start the model for updating the fit
125
+ def warm_start_params(m):
126
+ """
127
+ Retrieve parameters from a trained model in the format used to initialize a new Stan model.
128
+ Note that the new Stan model must have these same settings:
129
+ n_changepoints, seasonality features, mcmc sampling
130
+ for the retrieved parameters to be valid for the new model.
131
+
132
+ Parameters
133
+ ----------
134
+ m: A trained model of the Prophet class.
135
+
136
+ Returns
137
+ -------
138
+ A Dictionary containing retrieved parameters of m.
139
+ """
140
+ res = {}
141
+ for pname in ['k', 'm', 'sigma_obs']:
142
+ if m.mcmc_samples == 0:
143
+ res[pname] = m.params[pname][0][0]
144
+ else:
145
+ res[pname] = np.mean(m.params[pname])
146
+ for pname in ['delta', 'beta']:
147
+ if m.mcmc_samples == 0:
148
+ res[pname] = m.params[pname][0]
149
+ else:
150
+ res[pname] = np.mean(m.params[pname], axis=0)
151
+ return res
152
+
153
+ # PRODUCE FORECASTFOR TEST SET
154
+
155
+ # REFIT THE MODEL AND PRODUCE NEW FORECAST FOR TEST SET
156
+ # The model is refitted for 100 times only so there will be only 100 forecast results.
157
+
158
+ test_df_y_hat = pd.DataFrame(index = test_df_X.index)
159
+ test_df_y_hat['y_hat'] = np.nan
160
+
161
+
162
+ # in the case of CV 10, which is when test df < train df
163
+ # don't compute the test forecast
164
+ if (test_df_X.index[-1] < train_df_X.index[0]):
165
+ # this is the case when we use CV10, where the test set is before the train set
166
+ print("Test set is before train set / CV 10, no test forecast can be made")
167
+ return train_df_y_hat, test_df_y_hat
168
+
169
+ _, X_test = separate_lag_and_exogenous_features(test_df_X)
170
+ X_test.reset_index(inplace=True)
171
+ X_test.rename(columns={'datetime': 'ds'}, inplace=True)
172
+
173
+ n_update = 100
174
+ n_timesteps_per_update = int(len(test_df_y_hat) / (n_update + 1))
175
+
176
+ # TRANSFORM test_df_X to a series with only the last lag
177
+ horizon_timedelta = pd.Timedelta(minutes=forecast_horizon)
178
+ last_observation = f'y_lag_{horizon_timedelta}m'
179
+ test_df_y_last = test_df_X[last_observation]
180
+
181
+ new_y = pd.DataFrame(test_df_y_last)
182
+ new_y.rename(columns={new_y.columns[0]: 'y'}, inplace=True)
183
+ new_y.insert(0, 'ds', new_y.index - pd.Timedelta(minutes=forecast_horizon))
184
+ new_y.reset_index(drop = True, inplace=True)
185
+
186
+ new_y = new_y.drop(0, axis=0).reset_index(drop=True)
187
+ X_exog_complete = pd.concat([X_exog, X_test], axis=0)
188
+ X_exog_complete = X_exog_complete.drop(0, axis=0).reset_index(drop=True)
189
+ new_y = pd.merge(new_y, X_exog_complete, on='ds', how='left')
190
+
191
+ for i in range(n_update):
192
+ # for i in range(2): #for test only
193
+ print('Processing i = ', i + 1, ' out of ', n_update),
194
+ if i == 0:
195
+ X_test_curr = X_test.iloc[:1,:]
196
+ test_df_y_hat.iloc[i, 0] = prophet_model.predict(X_test_curr)['yhat'].values[0]
197
+ else:
198
+ new_rows = new_y.iloc[(i-1)*n_timesteps_per_update : i*n_timesteps_per_update, :]
199
+ y = pd.concat([y, new_rows], ignore_index=True)
200
+
201
+ current_params = warm_start_params(prophet_model)
202
+
203
+ prophet_model = Prophet(
204
+ seasonality_prior_scale=seasonality_prior_scale, # Example hyperparameter for seasonality strength
205
+ seasonality_mode=seasonality_mode, # Use multiplicative seasonality
206
+ weekly_seasonality=weekly_seasonality, # Enable weekly seasonality
207
+ daily_seasonality=daily_seasonality, # Enable daily seasonality
208
+ growth=growth, # Choose between 'linear' or 'logistic' growth
209
+ )
210
+
211
+ prophet_model = prophet_model.fit(y, init=current_params) # Adding the last day, warm-starting from the prev model
212
+ X_test_curr = X_test.iloc[i*n_timesteps_per_update : (1+i*n_timesteps_per_update),:]
213
+ test_df_y_hat.iloc[i*n_timesteps_per_update, 0] = prophet_model.predict(X_test_curr)['yhat'].values[0]
214
+
215
+ return train_df_y_hat, test_df_y_hat
216
+
@@ -0,0 +1,66 @@
1
+ # IMPORT IMPORTANT LIBRARY
2
+ import numpy as np
3
+ import pandas as pd
4
+ from xgboost import XGBRegressor
5
+
6
+ def train_model_m17_xgb(hyperparameter, train_df_X, train_df_y):
7
+ ''' Train and test a xgb model for point forecasting.
8
+
9
+ Args:
10
+ hyperparameter (df) : hyperparameter value of the model consisting of number of features
11
+ train_df_X (df) : features matrix for training
12
+ train_df_y (df) : target matrix for training
13
+
14
+
15
+ Returns:
16
+ model (model) : trained model with all features
17
+ '''
18
+
19
+ #UNPACK HYPERPARAMETER
20
+ xgb_seed = int(hyperparameter["xgb_seed"])
21
+ n_estimators=hyperparameter["n_estimators"]
22
+ learning_rate=hyperparameter["learning_rate"]
23
+ max_depth=hyperparameter["max_depth"]
24
+ subsample=hyperparameter["subsample"]
25
+ colsample_bytree=hyperparameter["colsample_bytree"]
26
+
27
+ #INITIALIZE AND TRAIN MODEL
28
+ xgb = XGBRegressor(
29
+ n_estimators=int(n_estimators),
30
+ learning_rate=float(learning_rate),
31
+ max_depth=int(max_depth),
32
+ subsample=float(subsample),
33
+ colsample_bytree=float(colsample_bytree),
34
+ random_state=xgb_seed,
35
+ )
36
+ xgb.fit(train_df_X, train_df_y)
37
+
38
+ # PACK MODEL
39
+ model = {"xgb": xgb}
40
+
41
+
42
+ return model
43
+
44
+ def produce_forecast_m17_xgb(model, train_df_X, test_df_X):
45
+ """Create forecast at the train and test set using the trained model
46
+
47
+ Args:
48
+ model (dictionary): all parameters of the trained model
49
+ train_df_X (df): predictors of train set
50
+ test_df_X (df): predictors of test set
51
+
52
+ Returns:
53
+ train_df_y_hat (df) : forecast result at train set
54
+ test_df_y_hat (df) : forecast result at test set
55
+
56
+ """
57
+
58
+ # UNPACK MODEL
59
+ xgb = model["xgb"]
60
+
61
+ # PRODUCE FORECAST
62
+ train_df_y_hat = xgb.predict(train_df_X)
63
+ test_df_y_hat = xgb.predict(test_df_X)
64
+
65
+ return train_df_y_hat, test_df_y_hat
66
+
@@ -0,0 +1,107 @@
1
+ from pyexpat import model
2
+
3
+
4
+ def train_model_m18_nbeats(hyperparameter, train_df_X, train_df_y):
5
+ """
6
+ Train and test an NBeats model for point forecasting.
7
+ Uses NBeats architecture for predicting time series with lag+exogenous features.
8
+
9
+ Args:
10
+ hyperparameter (dict) : model hyperparameters
11
+ train_df_X (DataFrame) : predictors for training
12
+ train_df_y (DataFrame) : target for training
13
+
14
+ Returns:
15
+ model : trained PyTorch NBeats model
16
+ """
17
+ # ---- Unpack hyperparameters ----
18
+ input_size = train_df_X.shape[1]
19
+ output_size = int(hyperparameter['output_size'])
20
+ hidden_size = int(hyperparameter['hidden_size'])
21
+ num_blocks = int(hyperparameter['num_blocks'])
22
+ num_layers = int(hyperparameter['num_layers'])
23
+ lr = hyperparameter['lr']
24
+ epochs = int(hyperparameter['epochs'])
25
+ seed = int(hyperparameter['seed'])
26
+
27
+ # ---- Set seeds for reproducibility ----
28
+ import torch, numpy as np, random
29
+ random.seed(seed)
30
+ np.random.seed(seed)
31
+ torch.manual_seed(seed)
32
+ torch.manual_seed(seed)
33
+ if torch.cuda.is_available():
34
+ torch.cuda.manual_seed(seed)
35
+ torch.cuda.manual_seed_all(seed)
36
+ torch.backends.cudnn.deterministic = True
37
+ torch.backends.cudnn.benchmark = False
38
+
39
+ # ---- Define NBeats model inside the function ----
40
+ import torch.nn as nn
41
+ class NBeatsModel(nn.Module):
42
+ def __init__(self, input_size, output_size, hidden_size, num_blocks, num_layers):
43
+ super(NBeatsModel, self).__init__()
44
+ blocks = []
45
+ for _ in range(num_blocks):
46
+ block = []
47
+ for l in range(num_layers):
48
+ block.append(nn.Linear(input_size if l==0 else hidden_size, hidden_size))
49
+ block.append(nn.ReLU())
50
+ block.append(nn.Linear(hidden_size, output_size))
51
+ blocks.append(nn.Sequential(*block))
52
+ self.blocks = nn.ModuleList(blocks)
53
+
54
+ def forward(self, x):
55
+ out = 0
56
+ for block in self.blocks:
57
+ out += block(x)
58
+ return out
59
+
60
+ model = NBeatsModel(input_size, output_size, hidden_size, num_blocks, num_layers)
61
+
62
+ # ---- Training setup ----
63
+ optimizer = torch.optim.Adam(model.parameters(), lr=lr)
64
+ criterion = nn.MSELoss()
65
+ X_tensor = torch.tensor(train_df_X.values, dtype=torch.float32)
66
+ y_tensor = torch.tensor(train_df_y.values, dtype=torch.float32)
67
+
68
+ # ---- Training loop ----
69
+ model.train()
70
+ for epoch in range(epochs):
71
+ optimizer.zero_grad()
72
+ output = model(X_tensor)
73
+ loss = criterion(output, y_tensor)
74
+ loss.backward()
75
+ optimizer.step()
76
+
77
+ return model
78
+
79
+ def produce_forecast_m18_nbeats(model, train_df_X, test_df_X):
80
+ """
81
+ Create forecast at the train and test set using the trained NBeats model.
82
+
83
+ Args:
84
+ model : trained NBeats PyTorch model
85
+ train_df_X (DataFrame) : predictors of train set
86
+ test_df_X (DataFrame) : predictors of test set
87
+
88
+ Returns:
89
+ train_df_y_hat (DataFrame) : forecast result at train set
90
+ test_df_y_hat (DataFrame) : forecast result at test set
91
+ """
92
+ import torch
93
+ model.eval()
94
+
95
+ with torch.no_grad():
96
+ X_train_tensor = torch.tensor(train_df_X.values, dtype=torch.float32)
97
+ X_test_tensor = torch.tensor(test_df_X.values, dtype=torch.float32)
98
+
99
+ y_train_hat = model(X_train_tensor).detach().cpu().numpy()
100
+ y_test_hat = model(X_test_tensor).detach().cpu().numpy()
101
+
102
+ import pandas as pd
103
+ train_df_y_hat = pd.DataFrame(y_train_hat, index=train_df_X.index, columns=['y_hat'])
104
+ test_df_y_hat = pd.DataFrame(y_test_hat, index=test_df_X.index, columns=['y_hat'])
105
+
106
+ return train_df_y_hat, test_df_y_hat
107
+
@@ -0,0 +1,49 @@
1
+ def train_model_m1_naive(hyperparameter, train_df_X, train_df_y):
2
+ ''' Train and test a naive model for point forecasting.
3
+
4
+ Args:
5
+ hyperparameter (df) : hyperparameter value of the model consisting of number of features
6
+ train_df_X (df) : features matrix for training
7
+ train_df_y (df) : target matrix for training
8
+
9
+
10
+ Returns:
11
+ model (model) : trained model with all features
12
+ '''
13
+
14
+ #UNPACK HYPERPARAMETER
15
+ #no hyperparameter for naive model
16
+
17
+ #TRAIN MODEL
18
+ #no training is required for naive model
19
+
20
+ # PACK MODEL
21
+ model = {}
22
+
23
+
24
+ return model
25
+
26
+ def produce_forecast_m1_naive(model, train_df_X, test_df_X, forecast_horizon):
27
+ """Create forecast at the train and test set using the trained model
28
+
29
+ Args:
30
+ model (dictionary): all parameters of the trained model
31
+ train_df_X (df): predictors of train set
32
+ test_df_X (df): predictors of test set
33
+
34
+ Returns:
35
+ train_df_y_hat (df) : forecast result at train set
36
+ test_df_y_hat (df) : forecast result at test set
37
+
38
+ """
39
+
40
+ import pandas as pd
41
+
42
+ # PRODUCE FORECAST
43
+ horizon_timedelta = pd.Timedelta(minutes=forecast_horizon)
44
+ last_observation = f'y_lag_{horizon_timedelta}m'
45
+ train_df_y_hat = train_df_X[last_observation]
46
+ test_df_y_hat = test_df_X[last_observation]
47
+
48
+ return train_df_y_hat, test_df_y_hat
49
+
@@ -0,0 +1,49 @@
1
+ def train_model_m2_snaive(hyperparameter, train_df_X, train_df_y):
2
+ ''' Train and test a seasonal model for point forecasting.
3
+
4
+ Args:
5
+ hyperparameter (df) : hyperparameter value of the model consisting of number of features
6
+ train_df_X (df) : features matrix for training
7
+ train_df_y (df) : target matrix for training
8
+
9
+
10
+ Returns:
11
+ model (model) : trained model with all features
12
+ '''
13
+
14
+ #UNPACK HYPERPARAMETER
15
+ days = hyperparameter['days']
16
+ col_name = f'y_lag_{days} days 00:00:00m'
17
+
18
+ #TRAIN MODEL
19
+ #no training is required for seasonal naive model
20
+
21
+ # PACK MODEL
22
+ model = {"col_name": col_name }
23
+
24
+
25
+ return model
26
+
27
+ def produce_forecast_m2_snaive(model, train_df_X, test_df_X):
28
+ """Create forecast at the train and test set using the trained model
29
+
30
+ Args:
31
+ model (dictionary): all parameters of the trained model
32
+ train_df_X (df): predictors of train set
33
+ test_df_X (df): predictors of test set
34
+
35
+ Returns:
36
+ train_df_y_hat (df) : forecast result at train set
37
+ test_df_y_hat (df) : forecast result at test set
38
+
39
+ """
40
+
41
+ # UNPACK MODEL
42
+ col_name = model['col_name'] #this depends on the lag day
43
+
44
+ # PRODUCE FORECAST
45
+ train_df_y_hat = train_df_X[col_name]
46
+ test_df_y_hat = test_df_X[col_name]
47
+
48
+ return train_df_y_hat, test_df_y_hat
49
+
@@ -0,0 +1,133 @@
1
+ # IMPORT IMPORTANT LIBRARY
2
+ import pandas as pd
3
+ from statsmodels.tsa.statespace.exponential_smoothing import ExponentialSmoothing
4
+ from pynnlf.model_utils import remove_jump_df
5
+ import numpy as np
6
+ import datetime as dt
7
+
8
+ def train_model_m3_ets(hyperparameter, train_df_X, train_df_y, forecast_horizon):
9
+ ''' Train and test a linear model for point forecasting.
10
+ https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.exponential_smoothing.ExponentialSmoothing.html
11
+
12
+ Args:
13
+ hyperparameter (df) : hyperparameter value of the model consisting of number of features
14
+ train_df_X (df) : features matrix for training
15
+ train_df_y (df) : target matrix for training
16
+ forecast_horizon (int) : forecast horizon in mins
17
+
18
+ Returns:
19
+ model (model) : trained model with all features
20
+ '''
21
+
22
+ #UNPACK HYPERPARAMETER
23
+ trend = hyperparameter['trend']
24
+ damped_trend = hyperparameter['damped_trend']
25
+ seasonal_periods_days = hyperparameter['seasonal_periods_days']
26
+
27
+ # UPDATE train_df_y to exclude all rows after a sudden jump in the timestep
28
+ train_df_y_updated = remove_jump_df(train_df_y)
29
+
30
+ # TRAIN MODEL
31
+ # Calculate the frequency of the timesteps using the first and second index values
32
+ timestep_frequency = train_df_y_updated.index[1] - train_df_y_updated.index[0]
33
+ inferred_frequency = pd.infer_freq(train_df_y_updated.index)
34
+ train_df_y_updated = train_df_y_updated.asfreq(inferred_frequency)
35
+
36
+ # INTRODUCE GAP BETWEEN TRAIN AND TEST SET TO AVOID DATA LEAKAGE
37
+ n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
38
+ if n_timestep_forecast_horizon == 1:
39
+ pass
40
+ else:
41
+ train_df_y_updated = train_df_y_updated[:-(n_timestep_forecast_horizon - 1)]
42
+
43
+ # Assuming train_df_y_updated is your dataframe and 'y' is the column with the training series
44
+ y = train_df_y_updated['y']
45
+
46
+ # Build and fit the state-space Exponential Smoothing model
47
+ model_fitted = ExponentialSmoothing(
48
+ y,
49
+ trend=trend,
50
+ seasonal=None, #can be updated later
51
+ damped_trend=damped_trend
52
+ ).fit()
53
+
54
+
55
+ # Print the model summary
56
+ # print(model_fitted.summary())
57
+
58
+ # PACK MODEL
59
+ model = {"model_fitted": model_fitted}
60
+
61
+
62
+ return model
63
+
64
+ def produce_forecast_m3_ets(model, train_df_X, test_df_X, forecast_horizon):
65
+ """Create forecast at the train and test set using the trained model
66
+
67
+ Args:
68
+ model (dictionary): all parameters of the trained model
69
+ train_df_X (df): predictors of train set
70
+ test_df_X (df): predictors of test set
71
+ forecast_horizon (int): forecast horizon in mins
72
+
73
+ Returns:
74
+ train_df_y_hat (df) : forecast result at train set
75
+ test_df_y_hat (df) : forecast result at test set
76
+
77
+ """
78
+
79
+ timestep_frequency = test_df_X.index[1] - test_df_X.index[0]
80
+ n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
81
+
82
+
83
+ train_df_X_updated = remove_jump_df(train_df_X)
84
+ test_df_X_updated = remove_jump_df(test_df_X)
85
+
86
+ # UNPACK MODEL
87
+ model_fitted = model['model_fitted']
88
+
89
+ # PRODUCE FORECAST FOR TRAIN SET
90
+ train_df_y_hat = pd.DataFrame(model_fitted.fittedvalues)
91
+ train_df_y_hat.columns = ['y']
92
+
93
+ # train_df_y_hat_2 = pd.DataFrame(model_fitted.forecast(n_timestep_forecast_horizon-1))
94
+ # train_df_y_hat_2.columns = ['y']
95
+ # train_df_y_hat = pd.concat([train_df_y_hat, train_df_y_hat_2])
96
+
97
+ train_df_y_hat.index.name = 'datetime'
98
+
99
+ # TRANSFORM test_df_X to a series with only the last lag
100
+ horizon_timedelta = pd.Timedelta(minutes=forecast_horizon)
101
+ last_observation = f'y_lag_{horizon_timedelta}m'
102
+ test_df_y_last = test_df_X[last_observation]
103
+
104
+
105
+ # REFIT THE MODEL AND PRODUCE NEW FORECAST FOR TEST SET
106
+ # THIS CODE RESULTS IN 2 MINS
107
+ test_df_y_hat = pd.DataFrame(index = test_df_X.index)
108
+ test_df_y_hat['y_hat'] = np.nan
109
+
110
+
111
+ # in the case of CV 10, which is when test df < train df
112
+ # don't compute the test forecast
113
+ if (test_df_X.index[-1] < train_df_X.index[0]):
114
+ # this is the case when we use CV10, where the test set is before the train set
115
+ print("Test set is before train set / CV 10, no test forecast can be made")
116
+ return train_df_y_hat, test_df_y_hat
117
+
118
+ for i in range(len(test_df_y_last)):
119
+ # for i in range(2): #for test only
120
+ print('Processing i = ', i + 1, ' out of ', len(test_df_y_last)),
121
+ if i == 0:
122
+ test_df_y_hat.iloc[i, 0] = model_fitted.forecast(steps=n_timestep_forecast_horizon).iloc[-1]
123
+ else:
124
+ new_row = pd.DataFrame([test_df_y_last.values[i]], columns=['y'], index=[test_df_y_last.index[i] - dt.timedelta(minutes=forecast_horizon)])
125
+ new_row = new_row.asfreq(test_df_X_updated.index.freq)
126
+
127
+ model_fitted = model_fitted.append(new_row)
128
+ test_df_y_hat.iloc[i, 0] = model_fitted.forecast(steps=n_timestep_forecast_horizon).iloc[-1] # to update based on the forecast horizon
129
+
130
+ # test_df_y_hat = m06_lr.predict(test_df_X)
131
+
132
+ return train_df_y_hat, test_df_y_hat
133
+