pynnlf 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pynnlf/__about__.py +1 -0
- pynnlf/__init__.py +5 -0
- pynnlf/api.py +17 -0
- pynnlf/discovery.py +63 -0
- pynnlf/engine.py +1238 -0
- pynnlf/hyperparams.py +38 -0
- pynnlf/model_utils.py +186 -0
- pynnlf/runner.py +108 -0
- pynnlf/scaffold/README_WORKSPACE.md +0 -0
- pynnlf/scaffold/data/README_data.md +40 -0
- pynnlf/scaffold/data/ds0_test.csv +4081 -0
- pynnlf/scaffold/models/README_models.md +61 -0
- pynnlf/scaffold/models/hyperparameters.yaml +264 -0
- pynnlf/scaffold/models/m10_rf.py +65 -0
- pynnlf/scaffold/models/m11_svr.py +53 -0
- pynnlf/scaffold/models/m12_rnn.py +152 -0
- pynnlf/scaffold/models/m13_lstm.py +208 -0
- pynnlf/scaffold/models/m14_gru.py +139 -0
- pynnlf/scaffold/models/m15_transformer.py +138 -0
- pynnlf/scaffold/models/m16_prophet.py +216 -0
- pynnlf/scaffold/models/m17_xgb.py +66 -0
- pynnlf/scaffold/models/m18_nbeats.py +107 -0
- pynnlf/scaffold/models/m1_naive.py +49 -0
- pynnlf/scaffold/models/m2_snaive.py +49 -0
- pynnlf/scaffold/models/m3_ets.py +133 -0
- pynnlf/scaffold/models/m4_arima.py +123 -0
- pynnlf/scaffold/models/m5_sarima.py +128 -0
- pynnlf/scaffold/models/m6_lr.py +76 -0
- pynnlf/scaffold/models/m7_ann.py +148 -0
- pynnlf/scaffold/models/m8_dnn.py +141 -0
- pynnlf/scaffold/models/m9_rt.py +74 -0
- pynnlf/scaffold/models/mXX_template.py +68 -0
- pynnlf/scaffold/specs/batch.yaml +4 -0
- pynnlf/scaffold/specs/experiment.yaml +4 -0
- pynnlf/scaffold/specs/pynnlf_config.yaml +69 -0
- pynnlf/scaffold/specs/testing_benchmark.csv +613 -0
- pynnlf/scaffold/specs/testing_benchmark_metadata.md +12 -0
- pynnlf/scaffold/specs/tests_ci.yaml +8 -0
- pynnlf/scaffold/specs/tests_full.yaml +23 -0
- pynnlf/tests_runner.py +211 -0
- pynnlf/tools/strip_notebook_artifacts.py +32 -0
- pynnlf/workspace.py +63 -0
- pynnlf/yamlio.py +28 -0
- pynnlf-0.2.2.dist-info/METADATA +168 -0
- pynnlf-0.2.2.dist-info/RECORD +47 -0
- pynnlf-0.2.2.dist-info/WHEEL +5 -0
- pynnlf-0.2.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# IMPORT IMPORTANT LIBRARY
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from prophet import Prophet
|
|
4
|
+
import numpy as np
|
|
5
|
+
from pynnlf.model_utils import remove_jump_df, separate_lag_and_exogenous_features
|
|
6
|
+
|
|
7
|
+
def train_model_m16_prophet(hyperparameter, train_df_X, train_df_y, forecast_horizon):
|
|
8
|
+
''' Train a prophet model for point forecasting.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
hyperparameter (df) : hyperparameter value of the model consisting of number of features
|
|
12
|
+
train_df_X (df) : features matrix for training
|
|
13
|
+
train_df_y (df) : target matrix for training
|
|
14
|
+
forecast_horizon (int) : forecast horizon for the model
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
model (model) : trained model with all features
|
|
18
|
+
'''
|
|
19
|
+
|
|
20
|
+
#UNPACK HYPERPARAMETER
|
|
21
|
+
seed = hyperparameter["seed"]
|
|
22
|
+
seasonality_prior_scale = hyperparameter["seasonality_prior_scale"]
|
|
23
|
+
seasonality_mode = hyperparameter["seasonality_mode"]
|
|
24
|
+
weekly_seasonality = hyperparameter["weekly_seasonality"]
|
|
25
|
+
daily_seasonality = hyperparameter["daily_seasonality"]
|
|
26
|
+
growth = hyperparameter["growth"]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# UPDATE train_df to exclude all rows after a sudden jump in the timestep
|
|
30
|
+
train_df_y_updated = remove_jump_df(train_df_y)
|
|
31
|
+
train_df_X_updated = remove_jump_df(train_df_X)
|
|
32
|
+
|
|
33
|
+
# Calculate the frequency of the timesteps using the first and second index values
|
|
34
|
+
timestep_frequency = train_df_y_updated.index[1] - train_df_y_updated.index[0]
|
|
35
|
+
inferred_frequency = pd.infer_freq(train_df_y_updated.index)
|
|
36
|
+
train_df_y_updated = train_df_y_updated.asfreq(inferred_frequency)
|
|
37
|
+
|
|
38
|
+
# INTRODUCE GAP BETWEEN TRAIN AND TEST SET TO AVOID DATA LEAKAGE
|
|
39
|
+
n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
|
|
40
|
+
if n_timestep_forecast_horizon == 1:
|
|
41
|
+
pass
|
|
42
|
+
else:
|
|
43
|
+
train_df_y_updated = train_df_y_updated[:-(n_timestep_forecast_horizon - 1)]
|
|
44
|
+
train_df_X_updated = train_df_X_updated[:-(n_timestep_forecast_horizon - 1)]
|
|
45
|
+
|
|
46
|
+
# Assuming train_df_y_updated is your dataframe and 'y' is the column with the training series
|
|
47
|
+
y = train_df_y_updated.copy()
|
|
48
|
+
X_lags, X_exog = separate_lag_and_exogenous_features(train_df_X_updated)
|
|
49
|
+
|
|
50
|
+
#Initialize the Prophet model with hyperparameters
|
|
51
|
+
prophet_model = Prophet(
|
|
52
|
+
seasonality_prior_scale=seasonality_prior_scale, # Example hyperparameter for seasonality strength
|
|
53
|
+
seasonality_mode=seasonality_mode, # Use multiplicative seasonality
|
|
54
|
+
weekly_seasonality=weekly_seasonality, # Enable weekly seasonality
|
|
55
|
+
daily_seasonality=daily_seasonality, # Enable daily seasonality
|
|
56
|
+
growth=growth # Choose between 'linear' or 'logistic' growth
|
|
57
|
+
# random_state = seed, # cannot set seed in prophet
|
|
58
|
+
)
|
|
59
|
+
for col in X_exog.columns:
|
|
60
|
+
prophet_model.add_regressor(col)
|
|
61
|
+
|
|
62
|
+
# Add exogenous features to the y DataFrame
|
|
63
|
+
y = y.merge(X_exog, on='datetime')
|
|
64
|
+
y.reset_index(inplace=True)
|
|
65
|
+
y.rename(columns={'datetime': 'ds'}, inplace=True)
|
|
66
|
+
|
|
67
|
+
# Train model
|
|
68
|
+
prophet_model.fit(y)
|
|
69
|
+
|
|
70
|
+
# PACK MODEL
|
|
71
|
+
model = {"prophet": prophet_model, "y": y, "hyperparameter": hyperparameter}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
return model
|
|
75
|
+
|
|
76
|
+
def produce_forecast_m16_prophet(model, train_df_X, test_df_X, train_df_y, forecast_horizon):
|
|
77
|
+
"""Create forecast at the train and test set using the trained model
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
model (dictionary): all parameters of the trained model
|
|
81
|
+
train_df_X (df): predictors of train set
|
|
82
|
+
test_df_X (df): predictors of test set
|
|
83
|
+
train_df_y (df): target of train set
|
|
84
|
+
forecast_horizon (int): forecast horizon for the model
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
train_df_y_hat (df) : forecast result at train set
|
|
88
|
+
test_df_y_hat (df) : forecast result at test set
|
|
89
|
+
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
# UNPACK MODEL
|
|
93
|
+
prophet_model = model['prophet']
|
|
94
|
+
y = model['y']
|
|
95
|
+
hyperparameter = model['hyperparameter']
|
|
96
|
+
|
|
97
|
+
#UNPACK HYPERPARAMETER
|
|
98
|
+
seasonality_prior_scale = hyperparameter["seasonality_prior_scale"]
|
|
99
|
+
seasonality_mode = hyperparameter["seasonality_mode"]
|
|
100
|
+
weekly_seasonality = hyperparameter["weekly_seasonality"]
|
|
101
|
+
daily_seasonality = hyperparameter["daily_seasonality"]
|
|
102
|
+
growth = hyperparameter["growth"]
|
|
103
|
+
|
|
104
|
+
# Set up X_exog which is used for prediction
|
|
105
|
+
timestep_frequency = test_df_X.index[1] - test_df_X.index[0]
|
|
106
|
+
n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
|
|
107
|
+
|
|
108
|
+
train_df_X_updated = remove_jump_df(train_df_X)
|
|
109
|
+
test_df_X_updated = remove_jump_df(test_df_X)
|
|
110
|
+
|
|
111
|
+
X_lags, X_exog = separate_lag_and_exogenous_features(train_df_X_updated)
|
|
112
|
+
|
|
113
|
+
X_exog.reset_index(inplace=True)
|
|
114
|
+
X_exog.rename(columns={'datetime': 'ds'}, inplace=True)
|
|
115
|
+
|
|
116
|
+
# Forecast train set
|
|
117
|
+
train_df_y_hat = prophet_model.predict(X_exog)
|
|
118
|
+
|
|
119
|
+
train_df_y_hat = train_df_y_hat[['ds', 'yhat']]
|
|
120
|
+
|
|
121
|
+
train_df_y_hat.set_index('ds', inplace=True)
|
|
122
|
+
train_df_y_hat.index.name = 'datetime'
|
|
123
|
+
|
|
124
|
+
# Set up function to warm start the model for updating the fit
|
|
125
|
+
def warm_start_params(m):
|
|
126
|
+
"""
|
|
127
|
+
Retrieve parameters from a trained model in the format used to initialize a new Stan model.
|
|
128
|
+
Note that the new Stan model must have these same settings:
|
|
129
|
+
n_changepoints, seasonality features, mcmc sampling
|
|
130
|
+
for the retrieved parameters to be valid for the new model.
|
|
131
|
+
|
|
132
|
+
Parameters
|
|
133
|
+
----------
|
|
134
|
+
m: A trained model of the Prophet class.
|
|
135
|
+
|
|
136
|
+
Returns
|
|
137
|
+
-------
|
|
138
|
+
A Dictionary containing retrieved parameters of m.
|
|
139
|
+
"""
|
|
140
|
+
res = {}
|
|
141
|
+
for pname in ['k', 'm', 'sigma_obs']:
|
|
142
|
+
if m.mcmc_samples == 0:
|
|
143
|
+
res[pname] = m.params[pname][0][0]
|
|
144
|
+
else:
|
|
145
|
+
res[pname] = np.mean(m.params[pname])
|
|
146
|
+
for pname in ['delta', 'beta']:
|
|
147
|
+
if m.mcmc_samples == 0:
|
|
148
|
+
res[pname] = m.params[pname][0]
|
|
149
|
+
else:
|
|
150
|
+
res[pname] = np.mean(m.params[pname], axis=0)
|
|
151
|
+
return res
|
|
152
|
+
|
|
153
|
+
# PRODUCE FORECASTFOR TEST SET
|
|
154
|
+
|
|
155
|
+
# REFIT THE MODEL AND PRODUCE NEW FORECAST FOR TEST SET
|
|
156
|
+
# The model is refitted for 100 times only so there will be only 100 forecast results.
|
|
157
|
+
|
|
158
|
+
test_df_y_hat = pd.DataFrame(index = test_df_X.index)
|
|
159
|
+
test_df_y_hat['y_hat'] = np.nan
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# in the case of CV 10, which is when test df < train df
|
|
163
|
+
# don't compute the test forecast
|
|
164
|
+
if (test_df_X.index[-1] < train_df_X.index[0]):
|
|
165
|
+
# this is the case when we use CV10, where the test set is before the train set
|
|
166
|
+
print("Test set is before train set / CV 10, no test forecast can be made")
|
|
167
|
+
return train_df_y_hat, test_df_y_hat
|
|
168
|
+
|
|
169
|
+
_, X_test = separate_lag_and_exogenous_features(test_df_X)
|
|
170
|
+
X_test.reset_index(inplace=True)
|
|
171
|
+
X_test.rename(columns={'datetime': 'ds'}, inplace=True)
|
|
172
|
+
|
|
173
|
+
n_update = 100
|
|
174
|
+
n_timesteps_per_update = int(len(test_df_y_hat) / (n_update + 1))
|
|
175
|
+
|
|
176
|
+
# TRANSFORM test_df_X to a series with only the last lag
|
|
177
|
+
horizon_timedelta = pd.Timedelta(minutes=forecast_horizon)
|
|
178
|
+
last_observation = f'y_lag_{horizon_timedelta}m'
|
|
179
|
+
test_df_y_last = test_df_X[last_observation]
|
|
180
|
+
|
|
181
|
+
new_y = pd.DataFrame(test_df_y_last)
|
|
182
|
+
new_y.rename(columns={new_y.columns[0]: 'y'}, inplace=True)
|
|
183
|
+
new_y.insert(0, 'ds', new_y.index - pd.Timedelta(minutes=forecast_horizon))
|
|
184
|
+
new_y.reset_index(drop = True, inplace=True)
|
|
185
|
+
|
|
186
|
+
new_y = new_y.drop(0, axis=0).reset_index(drop=True)
|
|
187
|
+
X_exog_complete = pd.concat([X_exog, X_test], axis=0)
|
|
188
|
+
X_exog_complete = X_exog_complete.drop(0, axis=0).reset_index(drop=True)
|
|
189
|
+
new_y = pd.merge(new_y, X_exog_complete, on='ds', how='left')
|
|
190
|
+
|
|
191
|
+
for i in range(n_update):
|
|
192
|
+
# for i in range(2): #for test only
|
|
193
|
+
print('Processing i = ', i + 1, ' out of ', n_update),
|
|
194
|
+
if i == 0:
|
|
195
|
+
X_test_curr = X_test.iloc[:1,:]
|
|
196
|
+
test_df_y_hat.iloc[i, 0] = prophet_model.predict(X_test_curr)['yhat'].values[0]
|
|
197
|
+
else:
|
|
198
|
+
new_rows = new_y.iloc[(i-1)*n_timesteps_per_update : i*n_timesteps_per_update, :]
|
|
199
|
+
y = pd.concat([y, new_rows], ignore_index=True)
|
|
200
|
+
|
|
201
|
+
current_params = warm_start_params(prophet_model)
|
|
202
|
+
|
|
203
|
+
prophet_model = Prophet(
|
|
204
|
+
seasonality_prior_scale=seasonality_prior_scale, # Example hyperparameter for seasonality strength
|
|
205
|
+
seasonality_mode=seasonality_mode, # Use multiplicative seasonality
|
|
206
|
+
weekly_seasonality=weekly_seasonality, # Enable weekly seasonality
|
|
207
|
+
daily_seasonality=daily_seasonality, # Enable daily seasonality
|
|
208
|
+
growth=growth, # Choose between 'linear' or 'logistic' growth
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
prophet_model = prophet_model.fit(y, init=current_params) # Adding the last day, warm-starting from the prev model
|
|
212
|
+
X_test_curr = X_test.iloc[i*n_timesteps_per_update : (1+i*n_timesteps_per_update),:]
|
|
213
|
+
test_df_y_hat.iloc[i*n_timesteps_per_update, 0] = prophet_model.predict(X_test_curr)['yhat'].values[0]
|
|
214
|
+
|
|
215
|
+
return train_df_y_hat, test_df_y_hat
|
|
216
|
+
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# IMPORT IMPORTANT LIBRARY
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from xgboost import XGBRegressor
|
|
5
|
+
|
|
6
|
+
def train_model_m17_xgb(hyperparameter, train_df_X, train_df_y):
|
|
7
|
+
''' Train and test a xgb model for point forecasting.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
hyperparameter (df) : hyperparameter value of the model consisting of number of features
|
|
11
|
+
train_df_X (df) : features matrix for training
|
|
12
|
+
train_df_y (df) : target matrix for training
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
model (model) : trained model with all features
|
|
17
|
+
'''
|
|
18
|
+
|
|
19
|
+
#UNPACK HYPERPARAMETER
|
|
20
|
+
xgb_seed = int(hyperparameter["xgb_seed"])
|
|
21
|
+
n_estimators=hyperparameter["n_estimators"]
|
|
22
|
+
learning_rate=hyperparameter["learning_rate"]
|
|
23
|
+
max_depth=hyperparameter["max_depth"]
|
|
24
|
+
subsample=hyperparameter["subsample"]
|
|
25
|
+
colsample_bytree=hyperparameter["colsample_bytree"]
|
|
26
|
+
|
|
27
|
+
#INITIALIZE AND TRAIN MODEL
|
|
28
|
+
xgb = XGBRegressor(
|
|
29
|
+
n_estimators=int(n_estimators),
|
|
30
|
+
learning_rate=float(learning_rate),
|
|
31
|
+
max_depth=int(max_depth),
|
|
32
|
+
subsample=float(subsample),
|
|
33
|
+
colsample_bytree=float(colsample_bytree),
|
|
34
|
+
random_state=xgb_seed,
|
|
35
|
+
)
|
|
36
|
+
xgb.fit(train_df_X, train_df_y)
|
|
37
|
+
|
|
38
|
+
# PACK MODEL
|
|
39
|
+
model = {"xgb": xgb}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
return model
|
|
43
|
+
|
|
44
|
+
def produce_forecast_m17_xgb(model, train_df_X, test_df_X):
|
|
45
|
+
"""Create forecast at the train and test set using the trained model
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
model (dictionary): all parameters of the trained model
|
|
49
|
+
train_df_X (df): predictors of train set
|
|
50
|
+
test_df_X (df): predictors of test set
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
train_df_y_hat (df) : forecast result at train set
|
|
54
|
+
test_df_y_hat (df) : forecast result at test set
|
|
55
|
+
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
# UNPACK MODEL
|
|
59
|
+
xgb = model["xgb"]
|
|
60
|
+
|
|
61
|
+
# PRODUCE FORECAST
|
|
62
|
+
train_df_y_hat = xgb.predict(train_df_X)
|
|
63
|
+
test_df_y_hat = xgb.predict(test_df_X)
|
|
64
|
+
|
|
65
|
+
return train_df_y_hat, test_df_y_hat
|
|
66
|
+
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from pyexpat import model
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def train_model_m18_nbeats(hyperparameter, train_df_X, train_df_y):
|
|
5
|
+
"""
|
|
6
|
+
Train and test an NBeats model for point forecasting.
|
|
7
|
+
Uses NBeats architecture for predicting time series with lag+exogenous features.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
hyperparameter (dict) : model hyperparameters
|
|
11
|
+
train_df_X (DataFrame) : predictors for training
|
|
12
|
+
train_df_y (DataFrame) : target for training
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
model : trained PyTorch NBeats model
|
|
16
|
+
"""
|
|
17
|
+
# ---- Unpack hyperparameters ----
|
|
18
|
+
input_size = train_df_X.shape[1]
|
|
19
|
+
output_size = int(hyperparameter['output_size'])
|
|
20
|
+
hidden_size = int(hyperparameter['hidden_size'])
|
|
21
|
+
num_blocks = int(hyperparameter['num_blocks'])
|
|
22
|
+
num_layers = int(hyperparameter['num_layers'])
|
|
23
|
+
lr = hyperparameter['lr']
|
|
24
|
+
epochs = int(hyperparameter['epochs'])
|
|
25
|
+
seed = int(hyperparameter['seed'])
|
|
26
|
+
|
|
27
|
+
# ---- Set seeds for reproducibility ----
|
|
28
|
+
import torch, numpy as np, random
|
|
29
|
+
random.seed(seed)
|
|
30
|
+
np.random.seed(seed)
|
|
31
|
+
torch.manual_seed(seed)
|
|
32
|
+
torch.manual_seed(seed)
|
|
33
|
+
if torch.cuda.is_available():
|
|
34
|
+
torch.cuda.manual_seed(seed)
|
|
35
|
+
torch.cuda.manual_seed_all(seed)
|
|
36
|
+
torch.backends.cudnn.deterministic = True
|
|
37
|
+
torch.backends.cudnn.benchmark = False
|
|
38
|
+
|
|
39
|
+
# ---- Define NBeats model inside the function ----
|
|
40
|
+
import torch.nn as nn
|
|
41
|
+
class NBeatsModel(nn.Module):
|
|
42
|
+
def __init__(self, input_size, output_size, hidden_size, num_blocks, num_layers):
|
|
43
|
+
super(NBeatsModel, self).__init__()
|
|
44
|
+
blocks = []
|
|
45
|
+
for _ in range(num_blocks):
|
|
46
|
+
block = []
|
|
47
|
+
for l in range(num_layers):
|
|
48
|
+
block.append(nn.Linear(input_size if l==0 else hidden_size, hidden_size))
|
|
49
|
+
block.append(nn.ReLU())
|
|
50
|
+
block.append(nn.Linear(hidden_size, output_size))
|
|
51
|
+
blocks.append(nn.Sequential(*block))
|
|
52
|
+
self.blocks = nn.ModuleList(blocks)
|
|
53
|
+
|
|
54
|
+
def forward(self, x):
|
|
55
|
+
out = 0
|
|
56
|
+
for block in self.blocks:
|
|
57
|
+
out += block(x)
|
|
58
|
+
return out
|
|
59
|
+
|
|
60
|
+
model = NBeatsModel(input_size, output_size, hidden_size, num_blocks, num_layers)
|
|
61
|
+
|
|
62
|
+
# ---- Training setup ----
|
|
63
|
+
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
|
|
64
|
+
criterion = nn.MSELoss()
|
|
65
|
+
X_tensor = torch.tensor(train_df_X.values, dtype=torch.float32)
|
|
66
|
+
y_tensor = torch.tensor(train_df_y.values, dtype=torch.float32)
|
|
67
|
+
|
|
68
|
+
# ---- Training loop ----
|
|
69
|
+
model.train()
|
|
70
|
+
for epoch in range(epochs):
|
|
71
|
+
optimizer.zero_grad()
|
|
72
|
+
output = model(X_tensor)
|
|
73
|
+
loss = criterion(output, y_tensor)
|
|
74
|
+
loss.backward()
|
|
75
|
+
optimizer.step()
|
|
76
|
+
|
|
77
|
+
return model
|
|
78
|
+
|
|
79
|
+
def produce_forecast_m18_nbeats(model, train_df_X, test_df_X):
|
|
80
|
+
"""
|
|
81
|
+
Create forecast at the train and test set using the trained NBeats model.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
model : trained NBeats PyTorch model
|
|
85
|
+
train_df_X (DataFrame) : predictors of train set
|
|
86
|
+
test_df_X (DataFrame) : predictors of test set
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
train_df_y_hat (DataFrame) : forecast result at train set
|
|
90
|
+
test_df_y_hat (DataFrame) : forecast result at test set
|
|
91
|
+
"""
|
|
92
|
+
import torch
|
|
93
|
+
model.eval()
|
|
94
|
+
|
|
95
|
+
with torch.no_grad():
|
|
96
|
+
X_train_tensor = torch.tensor(train_df_X.values, dtype=torch.float32)
|
|
97
|
+
X_test_tensor = torch.tensor(test_df_X.values, dtype=torch.float32)
|
|
98
|
+
|
|
99
|
+
y_train_hat = model(X_train_tensor).detach().cpu().numpy()
|
|
100
|
+
y_test_hat = model(X_test_tensor).detach().cpu().numpy()
|
|
101
|
+
|
|
102
|
+
import pandas as pd
|
|
103
|
+
train_df_y_hat = pd.DataFrame(y_train_hat, index=train_df_X.index, columns=['y_hat'])
|
|
104
|
+
test_df_y_hat = pd.DataFrame(y_test_hat, index=test_df_X.index, columns=['y_hat'])
|
|
105
|
+
|
|
106
|
+
return train_df_y_hat, test_df_y_hat
|
|
107
|
+
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
def train_model_m1_naive(hyperparameter, train_df_X, train_df_y):
|
|
2
|
+
''' Train and test a naive model for point forecasting.
|
|
3
|
+
|
|
4
|
+
Args:
|
|
5
|
+
hyperparameter (df) : hyperparameter value of the model consisting of number of features
|
|
6
|
+
train_df_X (df) : features matrix for training
|
|
7
|
+
train_df_y (df) : target matrix for training
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
Returns:
|
|
11
|
+
model (model) : trained model with all features
|
|
12
|
+
'''
|
|
13
|
+
|
|
14
|
+
#UNPACK HYPERPARAMETER
|
|
15
|
+
#no hyperparameter for naive model
|
|
16
|
+
|
|
17
|
+
#TRAIN MODEL
|
|
18
|
+
#no training is required for naive model
|
|
19
|
+
|
|
20
|
+
# PACK MODEL
|
|
21
|
+
model = {}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
return model
|
|
25
|
+
|
|
26
|
+
def produce_forecast_m1_naive(model, train_df_X, test_df_X, forecast_horizon):
|
|
27
|
+
"""Create forecast at the train and test set using the trained model
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
model (dictionary): all parameters of the trained model
|
|
31
|
+
train_df_X (df): predictors of train set
|
|
32
|
+
test_df_X (df): predictors of test set
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
train_df_y_hat (df) : forecast result at train set
|
|
36
|
+
test_df_y_hat (df) : forecast result at test set
|
|
37
|
+
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
import pandas as pd
|
|
41
|
+
|
|
42
|
+
# PRODUCE FORECAST
|
|
43
|
+
horizon_timedelta = pd.Timedelta(minutes=forecast_horizon)
|
|
44
|
+
last_observation = f'y_lag_{horizon_timedelta}m'
|
|
45
|
+
train_df_y_hat = train_df_X[last_observation]
|
|
46
|
+
test_df_y_hat = test_df_X[last_observation]
|
|
47
|
+
|
|
48
|
+
return train_df_y_hat, test_df_y_hat
|
|
49
|
+
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
def train_model_m2_snaive(hyperparameter, train_df_X, train_df_y):
|
|
2
|
+
''' Train and test a seasonal model for point forecasting.
|
|
3
|
+
|
|
4
|
+
Args:
|
|
5
|
+
hyperparameter (df) : hyperparameter value of the model consisting of number of features
|
|
6
|
+
train_df_X (df) : features matrix for training
|
|
7
|
+
train_df_y (df) : target matrix for training
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
Returns:
|
|
11
|
+
model (model) : trained model with all features
|
|
12
|
+
'''
|
|
13
|
+
|
|
14
|
+
#UNPACK HYPERPARAMETER
|
|
15
|
+
days = hyperparameter['days']
|
|
16
|
+
col_name = f'y_lag_{days} days 00:00:00m'
|
|
17
|
+
|
|
18
|
+
#TRAIN MODEL
|
|
19
|
+
#no training is required for seasonal naive model
|
|
20
|
+
|
|
21
|
+
# PACK MODEL
|
|
22
|
+
model = {"col_name": col_name }
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
return model
|
|
26
|
+
|
|
27
|
+
def produce_forecast_m2_snaive(model, train_df_X, test_df_X):
|
|
28
|
+
"""Create forecast at the train and test set using the trained model
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
model (dictionary): all parameters of the trained model
|
|
32
|
+
train_df_X (df): predictors of train set
|
|
33
|
+
test_df_X (df): predictors of test set
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
train_df_y_hat (df) : forecast result at train set
|
|
37
|
+
test_df_y_hat (df) : forecast result at test set
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
# UNPACK MODEL
|
|
42
|
+
col_name = model['col_name'] #this depends on the lag day
|
|
43
|
+
|
|
44
|
+
# PRODUCE FORECAST
|
|
45
|
+
train_df_y_hat = train_df_X[col_name]
|
|
46
|
+
test_df_y_hat = test_df_X[col_name]
|
|
47
|
+
|
|
48
|
+
return train_df_y_hat, test_df_y_hat
|
|
49
|
+
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# IMPORT IMPORTANT LIBRARY
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from statsmodels.tsa.statespace.exponential_smoothing import ExponentialSmoothing
|
|
4
|
+
from pynnlf.model_utils import remove_jump_df
|
|
5
|
+
import numpy as np
|
|
6
|
+
import datetime as dt
|
|
7
|
+
|
|
8
|
+
def train_model_m3_ets(hyperparameter, train_df_X, train_df_y, forecast_horizon):
|
|
9
|
+
''' Train and test a linear model for point forecasting.
|
|
10
|
+
https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.exponential_smoothing.ExponentialSmoothing.html
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
hyperparameter (df) : hyperparameter value of the model consisting of number of features
|
|
14
|
+
train_df_X (df) : features matrix for training
|
|
15
|
+
train_df_y (df) : target matrix for training
|
|
16
|
+
forecast_horizon (int) : forecast horizon in mins
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
model (model) : trained model with all features
|
|
20
|
+
'''
|
|
21
|
+
|
|
22
|
+
#UNPACK HYPERPARAMETER
|
|
23
|
+
trend = hyperparameter['trend']
|
|
24
|
+
damped_trend = hyperparameter['damped_trend']
|
|
25
|
+
seasonal_periods_days = hyperparameter['seasonal_periods_days']
|
|
26
|
+
|
|
27
|
+
# UPDATE train_df_y to exclude all rows after a sudden jump in the timestep
|
|
28
|
+
train_df_y_updated = remove_jump_df(train_df_y)
|
|
29
|
+
|
|
30
|
+
# TRAIN MODEL
|
|
31
|
+
# Calculate the frequency of the timesteps using the first and second index values
|
|
32
|
+
timestep_frequency = train_df_y_updated.index[1] - train_df_y_updated.index[0]
|
|
33
|
+
inferred_frequency = pd.infer_freq(train_df_y_updated.index)
|
|
34
|
+
train_df_y_updated = train_df_y_updated.asfreq(inferred_frequency)
|
|
35
|
+
|
|
36
|
+
# INTRODUCE GAP BETWEEN TRAIN AND TEST SET TO AVOID DATA LEAKAGE
|
|
37
|
+
n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
|
|
38
|
+
if n_timestep_forecast_horizon == 1:
|
|
39
|
+
pass
|
|
40
|
+
else:
|
|
41
|
+
train_df_y_updated = train_df_y_updated[:-(n_timestep_forecast_horizon - 1)]
|
|
42
|
+
|
|
43
|
+
# Assuming train_df_y_updated is your dataframe and 'y' is the column with the training series
|
|
44
|
+
y = train_df_y_updated['y']
|
|
45
|
+
|
|
46
|
+
# Build and fit the state-space Exponential Smoothing model
|
|
47
|
+
model_fitted = ExponentialSmoothing(
|
|
48
|
+
y,
|
|
49
|
+
trend=trend,
|
|
50
|
+
seasonal=None, #can be updated later
|
|
51
|
+
damped_trend=damped_trend
|
|
52
|
+
).fit()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Print the model summary
|
|
56
|
+
# print(model_fitted.summary())
|
|
57
|
+
|
|
58
|
+
# PACK MODEL
|
|
59
|
+
model = {"model_fitted": model_fitted}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
return model
|
|
63
|
+
|
|
64
|
+
def produce_forecast_m3_ets(model, train_df_X, test_df_X, forecast_horizon):
|
|
65
|
+
"""Create forecast at the train and test set using the trained model
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
model (dictionary): all parameters of the trained model
|
|
69
|
+
train_df_X (df): predictors of train set
|
|
70
|
+
test_df_X (df): predictors of test set
|
|
71
|
+
forecast_horizon (int): forecast horizon in mins
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
train_df_y_hat (df) : forecast result at train set
|
|
75
|
+
test_df_y_hat (df) : forecast result at test set
|
|
76
|
+
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
timestep_frequency = test_df_X.index[1] - test_df_X.index[0]
|
|
80
|
+
n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
train_df_X_updated = remove_jump_df(train_df_X)
|
|
84
|
+
test_df_X_updated = remove_jump_df(test_df_X)
|
|
85
|
+
|
|
86
|
+
# UNPACK MODEL
|
|
87
|
+
model_fitted = model['model_fitted']
|
|
88
|
+
|
|
89
|
+
# PRODUCE FORECAST FOR TRAIN SET
|
|
90
|
+
train_df_y_hat = pd.DataFrame(model_fitted.fittedvalues)
|
|
91
|
+
train_df_y_hat.columns = ['y']
|
|
92
|
+
|
|
93
|
+
# train_df_y_hat_2 = pd.DataFrame(model_fitted.forecast(n_timestep_forecast_horizon-1))
|
|
94
|
+
# train_df_y_hat_2.columns = ['y']
|
|
95
|
+
# train_df_y_hat = pd.concat([train_df_y_hat, train_df_y_hat_2])
|
|
96
|
+
|
|
97
|
+
train_df_y_hat.index.name = 'datetime'
|
|
98
|
+
|
|
99
|
+
# TRANSFORM test_df_X to a series with only the last lag
|
|
100
|
+
horizon_timedelta = pd.Timedelta(minutes=forecast_horizon)
|
|
101
|
+
last_observation = f'y_lag_{horizon_timedelta}m'
|
|
102
|
+
test_df_y_last = test_df_X[last_observation]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# REFIT THE MODEL AND PRODUCE NEW FORECAST FOR TEST SET
|
|
106
|
+
# THIS CODE RESULTS IN 2 MINS
|
|
107
|
+
test_df_y_hat = pd.DataFrame(index = test_df_X.index)
|
|
108
|
+
test_df_y_hat['y_hat'] = np.nan
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# in the case of CV 10, which is when test df < train df
|
|
112
|
+
# don't compute the test forecast
|
|
113
|
+
if (test_df_X.index[-1] < train_df_X.index[0]):
|
|
114
|
+
# this is the case when we use CV10, where the test set is before the train set
|
|
115
|
+
print("Test set is before train set / CV 10, no test forecast can be made")
|
|
116
|
+
return train_df_y_hat, test_df_y_hat
|
|
117
|
+
|
|
118
|
+
for i in range(len(test_df_y_last)):
|
|
119
|
+
# for i in range(2): #for test only
|
|
120
|
+
print('Processing i = ', i + 1, ' out of ', len(test_df_y_last)),
|
|
121
|
+
if i == 0:
|
|
122
|
+
test_df_y_hat.iloc[i, 0] = model_fitted.forecast(steps=n_timestep_forecast_horizon).iloc[-1]
|
|
123
|
+
else:
|
|
124
|
+
new_row = pd.DataFrame([test_df_y_last.values[i]], columns=['y'], index=[test_df_y_last.index[i] - dt.timedelta(minutes=forecast_horizon)])
|
|
125
|
+
new_row = new_row.asfreq(test_df_X_updated.index.freq)
|
|
126
|
+
|
|
127
|
+
model_fitted = model_fitted.append(new_row)
|
|
128
|
+
test_df_y_hat.iloc[i, 0] = model_fitted.forecast(steps=n_timestep_forecast_horizon).iloc[-1] # to update based on the forecast horizon
|
|
129
|
+
|
|
130
|
+
# test_df_y_hat = m06_lr.predict(test_df_X)
|
|
131
|
+
|
|
132
|
+
return train_df_y_hat, test_df_y_hat
|
|
133
|
+
|