pynnlf 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pynnlf/__about__.py +1 -0
- pynnlf/__init__.py +5 -0
- pynnlf/api.py +17 -0
- pynnlf/discovery.py +63 -0
- pynnlf/engine.py +1238 -0
- pynnlf/hyperparams.py +38 -0
- pynnlf/model_utils.py +186 -0
- pynnlf/runner.py +108 -0
- pynnlf/scaffold/README_WORKSPACE.md +0 -0
- pynnlf/scaffold/data/README_data.md +40 -0
- pynnlf/scaffold/data/ds0_test.csv +4081 -0
- pynnlf/scaffold/models/README_models.md +61 -0
- pynnlf/scaffold/models/hyperparameters.yaml +264 -0
- pynnlf/scaffold/models/m10_rf.py +65 -0
- pynnlf/scaffold/models/m11_svr.py +53 -0
- pynnlf/scaffold/models/m12_rnn.py +152 -0
- pynnlf/scaffold/models/m13_lstm.py +208 -0
- pynnlf/scaffold/models/m14_gru.py +139 -0
- pynnlf/scaffold/models/m15_transformer.py +138 -0
- pynnlf/scaffold/models/m16_prophet.py +216 -0
- pynnlf/scaffold/models/m17_xgb.py +66 -0
- pynnlf/scaffold/models/m18_nbeats.py +107 -0
- pynnlf/scaffold/models/m1_naive.py +49 -0
- pynnlf/scaffold/models/m2_snaive.py +49 -0
- pynnlf/scaffold/models/m3_ets.py +133 -0
- pynnlf/scaffold/models/m4_arima.py +123 -0
- pynnlf/scaffold/models/m5_sarima.py +128 -0
- pynnlf/scaffold/models/m6_lr.py +76 -0
- pynnlf/scaffold/models/m7_ann.py +148 -0
- pynnlf/scaffold/models/m8_dnn.py +141 -0
- pynnlf/scaffold/models/m9_rt.py +74 -0
- pynnlf/scaffold/models/mXX_template.py +68 -0
- pynnlf/scaffold/specs/batch.yaml +4 -0
- pynnlf/scaffold/specs/experiment.yaml +4 -0
- pynnlf/scaffold/specs/pynnlf_config.yaml +69 -0
- pynnlf/scaffold/specs/testing_benchmark.csv +613 -0
- pynnlf/scaffold/specs/testing_benchmark_metadata.md +12 -0
- pynnlf/scaffold/specs/tests_ci.yaml +8 -0
- pynnlf/scaffold/specs/tests_full.yaml +23 -0
- pynnlf/tests_runner.py +211 -0
- pynnlf/tools/strip_notebook_artifacts.py +32 -0
- pynnlf/workspace.py +63 -0
- pynnlf/yamlio.py +28 -0
- pynnlf-0.2.2.dist-info/METADATA +168 -0
- pynnlf-0.2.2.dist-info/RECORD +47 -0
- pynnlf-0.2.2.dist-info/WHEEL +5 -0
- pynnlf-0.2.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# IMPORT IMPORTANT LIBRARY
|
|
2
|
+
from statsmodels.tsa.arima.model import ARIMA
|
|
3
|
+
from pynnlf.model_utils import remove_jump_df
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import numpy as np
|
|
6
|
+
import datetime as dt
|
|
7
|
+
|
|
8
|
+
def train_model_m4_arima(hyperparameter, train_df_X, train_df_y, forecast_horizon):
|
|
9
|
+
''' Train and test a linear model for point forecasting.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
hyperparameter (df) : hyperparameter value of the model consisting of number of features
|
|
13
|
+
train_df_X (df) : features matrix for training
|
|
14
|
+
train_df_y (df) : target matrix for training
|
|
15
|
+
forecast_horizon (int) : forecast horizon in mins
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
model (model) : trained model with all features
|
|
20
|
+
'''
|
|
21
|
+
|
|
22
|
+
#UNPACK HYPERPARAMETER
|
|
23
|
+
p = hyperparameter['p']
|
|
24
|
+
d = hyperparameter['d']
|
|
25
|
+
q = hyperparameter['q']
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# UPDATE train_df_y to exclude all rows after a sudden jump in the timestep
|
|
29
|
+
train_df_y_updated = remove_jump_df(train_df_y)
|
|
30
|
+
|
|
31
|
+
# TRAIN MODEL
|
|
32
|
+
# Calculate the frequency of the timesteps using the first and second index values
|
|
33
|
+
timestep_frequency = train_df_y_updated.index[1] - train_df_y_updated.index[0]
|
|
34
|
+
inferred_frequency = pd.infer_freq(train_df_y_updated.index)
|
|
35
|
+
train_df_y_updated = train_df_y_updated.asfreq(inferred_frequency)
|
|
36
|
+
|
|
37
|
+
# INTRODUCE GAP BETWEEN TRAIN AND TEST SET TO AVOID DATA LEAKAGE
|
|
38
|
+
n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
|
|
39
|
+
if n_timestep_forecast_horizon == 1:
|
|
40
|
+
pass
|
|
41
|
+
else:
|
|
42
|
+
train_df_y_updated = train_df_y_updated[:-(n_timestep_forecast_horizon - 1)]
|
|
43
|
+
|
|
44
|
+
# Assuming train_df_y_updated is your dataframe and 'y' is the column with the training series
|
|
45
|
+
y = train_df_y_updated['y']
|
|
46
|
+
|
|
47
|
+
# Build and fit the state-space ARIMA model
|
|
48
|
+
model_fitted = ARIMA(y, order=(p, d, q), freq=inferred_frequency).fit()
|
|
49
|
+
|
|
50
|
+
# PACK MODEL
|
|
51
|
+
model = {"model_fitted": model_fitted}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
return model
|
|
55
|
+
|
|
56
|
+
def produce_forecast_m4_arima(model, train_df_X, test_df_X, forecast_horizon):
|
|
57
|
+
"""Create forecast at the train and test set using the trained model
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
model (dictionary): all parameters of the trained model
|
|
61
|
+
train_df_X (df): predictors of train set
|
|
62
|
+
test_df_X (df): predictors of test set
|
|
63
|
+
forecast_horizon (int): forecast horizon in mins
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
train_df_y_hat (df) : forecast result at train set
|
|
67
|
+
test_df_y_hat (df) : forecast result at test set
|
|
68
|
+
|
|
69
|
+
"""
|
|
70
|
+
timestep_frequency = test_df_X.index[1] - test_df_X.index[0]
|
|
71
|
+
n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
|
|
72
|
+
|
|
73
|
+
train_df_X_updated = remove_jump_df(train_df_X)
|
|
74
|
+
test_df_X_updated = remove_jump_df(test_df_X)
|
|
75
|
+
|
|
76
|
+
# UNPACK MODEL
|
|
77
|
+
model_fitted = model['model_fitted']
|
|
78
|
+
|
|
79
|
+
# PRODUCE FORECAST FOR TRAIN SET
|
|
80
|
+
train_df_y_hat = pd.DataFrame(model_fitted.fittedvalues)
|
|
81
|
+
train_df_y_hat.columns = ['y']
|
|
82
|
+
|
|
83
|
+
# train_df_y_hat_2 = pd.DataFrame(model_fitted.forecast(n_timestep_forecast_horizon-1))
|
|
84
|
+
# train_df_y_hat_2.columns = ['y']
|
|
85
|
+
# train_df_y_hat = pd.concat([train_df_y_hat, train_df_y_hat_2])
|
|
86
|
+
|
|
87
|
+
train_df_y_hat.index.name = 'datetime'
|
|
88
|
+
|
|
89
|
+
# TRANSFORM test_df_X to a series with only the last lag
|
|
90
|
+
horizon_timedelta = pd.Timedelta(minutes=forecast_horizon)
|
|
91
|
+
last_observation = f'y_lag_{horizon_timedelta}m'
|
|
92
|
+
test_df_y_last = test_df_X[last_observation]
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# REFIT THE MODEL AND PRODUCE NEW FORECAST FOR TEST SET
|
|
96
|
+
# THIS CODE RESULTS IN 2 MINS
|
|
97
|
+
test_df_y_hat = pd.DataFrame(index = test_df_X.index)
|
|
98
|
+
test_df_y_hat['y_hat'] = np.nan
|
|
99
|
+
|
|
100
|
+
# in the case of CV 10, which is when test df < train df
|
|
101
|
+
# don't compute the test forecast
|
|
102
|
+
if (test_df_X.index[-1] < train_df_X.index[0]):
|
|
103
|
+
# this is the case when we use CV10, where the test set is before the train set
|
|
104
|
+
print("Test set is before train set / CV 10, no test forecast can be made")
|
|
105
|
+
return train_df_y_hat, test_df_y_hat
|
|
106
|
+
|
|
107
|
+
for i in range(len(test_df_y_last)):
|
|
108
|
+
# for i in range(2): #for test only
|
|
109
|
+
print('Processing i = ', i + 1, ' out of ', len(test_df_y_last)),
|
|
110
|
+
if i == 0:
|
|
111
|
+
test_df_y_hat.iloc[i, 0] = model_fitted.forecast(steps=n_timestep_forecast_horizon).iloc[-1]
|
|
112
|
+
else:
|
|
113
|
+
new_row = pd.DataFrame([test_df_y_last.values[i]], columns=['y'], index=[test_df_y_last.index[i] - dt.timedelta(minutes=forecast_horizon)])
|
|
114
|
+
new_row = new_row.asfreq(test_df_X_updated.index.freq)
|
|
115
|
+
|
|
116
|
+
model_fitted = model_fitted.append(new_row)
|
|
117
|
+
test_df_y_hat.iloc[i, 0] = model_fitted.forecast(steps=n_timestep_forecast_horizon).iloc[-1] # to update based on the forecast horizon
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# test_df_y_hat = m06_lr.predict(test_df_X)
|
|
121
|
+
|
|
122
|
+
return train_df_y_hat, test_df_y_hat
|
|
123
|
+
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# IMPORT IMPORTANT LIBRARY
|
|
2
|
+
from statsmodels.tsa.statespace.sarimax import SARIMAX
|
|
3
|
+
from pynnlf.model_utils import remove_jump_df
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import numpy as np
|
|
6
|
+
import datetime as dt
|
|
7
|
+
|
|
8
|
+
def train_model_m5_sarima(hyperparameter, train_df_X, train_df_y, forecast_horizon):
|
|
9
|
+
''' Train and test a linear model for point forecasting.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
hyperparameter (df) : hyperparameter value of the model consisting of number of features
|
|
13
|
+
train_df_X (df) : features matrix for training
|
|
14
|
+
train_df_y (df) : target matrix for training
|
|
15
|
+
forecast_horizon (int) : forecast horizon in mins
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
model (model) : trained model with all features
|
|
20
|
+
'''
|
|
21
|
+
|
|
22
|
+
#UNPACK HYPERPARAMETER
|
|
23
|
+
p = hyperparameter['p']
|
|
24
|
+
d = hyperparameter['d']
|
|
25
|
+
q = hyperparameter['q']
|
|
26
|
+
P = hyperparameter['P']
|
|
27
|
+
D = hyperparameter['D']
|
|
28
|
+
Q = hyperparameter['Q']
|
|
29
|
+
seasonal_period_days = hyperparameter['seasonal_period_days']
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# UPDATE train_df_y to exclude all rows after a sudden jump in the timestep
|
|
33
|
+
train_df_y_updated = remove_jump_df(train_df_y)
|
|
34
|
+
|
|
35
|
+
# TRAIN MODEL
|
|
36
|
+
# Calculate the frequency of the timesteps using the first and second index values
|
|
37
|
+
timestep_frequency = train_df_y_updated.index[1] - train_df_y_updated.index[0]
|
|
38
|
+
s = int(seasonal_period_days * 24 * 60 / (timestep_frequency.seconds / 60))
|
|
39
|
+
inferred_frequency = pd.infer_freq(train_df_y_updated.index)
|
|
40
|
+
train_df_y_updated = train_df_y_updated.asfreq(inferred_frequency)
|
|
41
|
+
|
|
42
|
+
# INTRODUCE GAP BETWEEN TRAIN AND TEST SET TO AVOID DATA LEAKAGE
|
|
43
|
+
n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
|
|
44
|
+
if n_timestep_forecast_horizon == 1:
|
|
45
|
+
pass
|
|
46
|
+
else:
|
|
47
|
+
train_df_y_updated = train_df_y_updated[:-(n_timestep_forecast_horizon - 1)]
|
|
48
|
+
|
|
49
|
+
# Assuming train_df_y_updated is your dataframe and 'y' is the column with the training series
|
|
50
|
+
y = train_df_y_updated['y']
|
|
51
|
+
|
|
52
|
+
# Build and fit the state-space ARIMA model
|
|
53
|
+
model_fitted = SARIMAX(y, order=(p, d, q), seasonal_order = (P, D, Q, s), freq=inferred_frequency).fit()
|
|
54
|
+
|
|
55
|
+
# PACK MODEL
|
|
56
|
+
model = {"model_fitted": model_fitted}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
return model
|
|
60
|
+
|
|
61
|
+
def produce_forecast_m5_sarima(model, train_df_X, test_df_X, forecast_horizon):
|
|
62
|
+
"""Create forecast at the train and test set using the trained model
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
model (dictionary): all parameters of the trained model
|
|
66
|
+
train_df_X (df): predictors of train set
|
|
67
|
+
test_df_X (df): predictors of test set
|
|
68
|
+
forecast_horizon (int): forecast horizon in mins
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
train_df_y_hat (df) : forecast result at train set
|
|
72
|
+
test_df_y_hat (df) : forecast result at test set
|
|
73
|
+
|
|
74
|
+
"""
|
|
75
|
+
timestep_frequency = test_df_X.index[1] - test_df_X.index[0]
|
|
76
|
+
n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
|
|
77
|
+
|
|
78
|
+
train_df_X_updated = remove_jump_df(train_df_X)
|
|
79
|
+
test_df_X_updated = remove_jump_df(test_df_X)
|
|
80
|
+
|
|
81
|
+
# UNPACK MODEL
|
|
82
|
+
model_fitted = model['model_fitted']
|
|
83
|
+
|
|
84
|
+
# PRODUCE FORECAST FOR TRAIN SET
|
|
85
|
+
train_df_y_hat = pd.DataFrame(model_fitted.fittedvalues)
|
|
86
|
+
train_df_y_hat.columns = ['y']
|
|
87
|
+
|
|
88
|
+
# train_df_y_hat_2 = pd.DataFrame(model_fitted.forecast(n_timestep_forecast_horizon-1))
|
|
89
|
+
# train_df_y_hat_2.columns = ['y']
|
|
90
|
+
# train_df_y_hat = pd.concat([train_df_y_hat, train_df_y_hat_2])
|
|
91
|
+
|
|
92
|
+
train_df_y_hat.index.name = 'datetime'
|
|
93
|
+
|
|
94
|
+
# TRANSFORM test_df_X to a series with only the last lag
|
|
95
|
+
horizon_timedelta = pd.Timedelta(minutes=forecast_horizon)
|
|
96
|
+
last_observation = f'y_lag_{horizon_timedelta}m'
|
|
97
|
+
test_df_y_last = test_df_X[last_observation]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# REFIT THE MODEL AND PRODUCE NEW FORECAST FOR TEST SET
|
|
101
|
+
# THIS CODE RESULTS IN 2 MINS
|
|
102
|
+
test_df_y_hat = pd.DataFrame(index = test_df_X.index)
|
|
103
|
+
test_df_y_hat['y_hat'] = np.nan
|
|
104
|
+
|
|
105
|
+
# in the case of CV 10, which is when test df < train df
|
|
106
|
+
# don't compute the test forecast
|
|
107
|
+
if (test_df_X.index[-1] < train_df_X.index[0]):
|
|
108
|
+
# this is the case when we use CV10, where the test set is before the train set
|
|
109
|
+
print("Test set is before train set / CV 10, no test forecast can be made")
|
|
110
|
+
return train_df_y_hat, test_df_y_hat
|
|
111
|
+
|
|
112
|
+
for i in range(len(test_df_y_last)):
|
|
113
|
+
# for i in range(2): #for test only
|
|
114
|
+
print('Processing i = ', i + 1, ' out of ', len(test_df_y_last)),
|
|
115
|
+
if i == 0:
|
|
116
|
+
test_df_y_hat.iloc[i, 0] = model_fitted.forecast(steps=n_timestep_forecast_horizon).iloc[-1]
|
|
117
|
+
else:
|
|
118
|
+
new_row = pd.DataFrame([test_df_y_last.values[i]], columns=['y'], index=[test_df_y_last.index[i] - dt.timedelta(minutes=forecast_horizon)])
|
|
119
|
+
new_row = new_row.asfreq(test_df_X_updated.index.freq)
|
|
120
|
+
|
|
121
|
+
model_fitted = model_fitted.append(new_row)
|
|
122
|
+
test_df_y_hat.iloc[i, 0] = model_fitted.forecast(steps=n_timestep_forecast_horizon).iloc[-1] # to update based on the forecast horizon
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# test_df_y_hat = m06_lr.predict(test_df_X)
|
|
126
|
+
|
|
127
|
+
return train_df_y_hat, test_df_y_hat
|
|
128
|
+
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import sklearn
|
|
2
|
+
from sklearn.feature_selection import SelectKBest, chi2, f_regression
|
|
3
|
+
from sklearn.linear_model import LinearRegression
|
|
4
|
+
|
|
5
|
+
def train_model_m6_lr(hyperparameter, train_df_X, train_df_y):
|
|
6
|
+
''' Train and test a linear model for point forecasting.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
hyperparameter (df) : hyperparameter value of the model consisting of number of features
|
|
10
|
+
train_df_X (df) : features matrix for training
|
|
11
|
+
train_df_y (df) : target matrix for training
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
model (dictionary) : trained model with all features
|
|
16
|
+
'''
|
|
17
|
+
|
|
18
|
+
#UNPACK HYPERPARAMETER
|
|
19
|
+
num_feature = int(hyperparameter['num_features'])
|
|
20
|
+
|
|
21
|
+
# FEATURE SELECTOR
|
|
22
|
+
def select_features(train_df_X, train_df_y, num_feature):
|
|
23
|
+
''' Make model to select K best feature.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
train_df_X (df) : features matrix for training
|
|
27
|
+
train_df_y (df) : target matrix for training
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
fs_lr (model) : feature selector
|
|
31
|
+
'''
|
|
32
|
+
|
|
33
|
+
train_df_y = train_df_y.values.ravel()
|
|
34
|
+
fs_lr = SelectKBest(f_regression, k = num_feature)
|
|
35
|
+
fs_lr.fit(train_df_X, train_df_y)
|
|
36
|
+
|
|
37
|
+
return fs_lr
|
|
38
|
+
|
|
39
|
+
fs_lr = select_features(train_df_X, train_df_y, num_feature)
|
|
40
|
+
|
|
41
|
+
#TRAIN MODEL
|
|
42
|
+
train_df_X = fs_lr.transform(train_df_X)
|
|
43
|
+
m06_lr = LinearRegression()
|
|
44
|
+
m06_lr.fit(train_df_X, train_df_y)
|
|
45
|
+
|
|
46
|
+
# PACK MODEL
|
|
47
|
+
model = {"feature_selector": fs_lr, "regression_model": m06_lr}
|
|
48
|
+
|
|
49
|
+
return model
|
|
50
|
+
|
|
51
|
+
def produce_forecast_m6_lr(model, train_df_X, test_df_X):
|
|
52
|
+
"""Create forecast at the train and test set using the trained model
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
model (dictionary): all parameters of the trained model
|
|
56
|
+
train_df_X (df): predictors of train set
|
|
57
|
+
test_df_X (df): predictors of test set
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
train_df_y_hat (df) : forecast result at train set
|
|
61
|
+
test_df_y_hat (df) : forecast result at test set
|
|
62
|
+
|
|
63
|
+
"""
|
|
64
|
+
fs_lr = model['feature_selector']
|
|
65
|
+
m06_lr = model['regression_model']
|
|
66
|
+
|
|
67
|
+
# SELECT K BEST FEATURES
|
|
68
|
+
train_df_X = fs_lr.transform(train_df_X)
|
|
69
|
+
test_df_X = fs_lr.transform(test_df_X)
|
|
70
|
+
|
|
71
|
+
# PRODUCE FORECAST
|
|
72
|
+
train_df_y_hat = m06_lr.predict(train_df_X)
|
|
73
|
+
test_df_y_hat = m06_lr.predict(test_df_X)
|
|
74
|
+
|
|
75
|
+
return train_df_y_hat, test_df_y_hat
|
|
76
|
+
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# IMPORT IMPORTANT LIBRARY
|
|
2
|
+
import torch
|
|
3
|
+
import torch.nn as nn
|
|
4
|
+
import torch.optim as optim
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import random
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
def train_model_m7_ann(hyperparameter, train_df_X, train_df_y):
|
|
11
|
+
''' Train and test a linear model for point forecasting.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
hyperparameter (df) : hyperparameter value of the model consisting of number of features
|
|
15
|
+
train_df_X (df) : features matrix for training
|
|
16
|
+
train_df_y (df) : target matrix for training
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
model (model) : trained model with all features
|
|
21
|
+
'''
|
|
22
|
+
|
|
23
|
+
#UNPACK HYPERPARAMETER
|
|
24
|
+
|
|
25
|
+
# Set random seed for reproducibility
|
|
26
|
+
def set_seed(seed):
|
|
27
|
+
random.seed(seed)
|
|
28
|
+
os.environ["PYTHONHASHSEED"] = str(seed)
|
|
29
|
+
np.random.seed(seed)
|
|
30
|
+
torch.manual_seed(seed)
|
|
31
|
+
torch.cuda.manual_seed(seed)
|
|
32
|
+
torch.backends.cudnn.deterministic = True
|
|
33
|
+
|
|
34
|
+
seed = int(hyperparameter['seed'])
|
|
35
|
+
|
|
36
|
+
hidden_size = hyperparameter['hidden_size']
|
|
37
|
+
activation_function = hyperparameter['activation_function']
|
|
38
|
+
learning_rate = hyperparameter['learning_rate']
|
|
39
|
+
# learning_rate = 0.001
|
|
40
|
+
solver = hyperparameter['solver']
|
|
41
|
+
epochs = hyperparameter['epochs']
|
|
42
|
+
|
|
43
|
+
# Use proper format for X and y
|
|
44
|
+
X = torch.tensor(train_df_X.values, dtype=torch.float32)
|
|
45
|
+
y = torch.tensor(train_df_y.values, dtype=torch.float32).view(-1, 1)
|
|
46
|
+
|
|
47
|
+
# Define the ANN model
|
|
48
|
+
class ANNModel(nn.Module):
|
|
49
|
+
def __init__(self, input_size, hidden_size, output_size):
|
|
50
|
+
super(ANNModel, self).__init__()
|
|
51
|
+
self.fc1 = nn.Linear(input_size, hidden_size)
|
|
52
|
+
self.fc2 = nn.Linear(hidden_size, output_size)
|
|
53
|
+
self.relu = nn.ReLU() # Activation function
|
|
54
|
+
|
|
55
|
+
def forward(self, x):
|
|
56
|
+
x = self.fc1(x)
|
|
57
|
+
if activation_function == 'relu':
|
|
58
|
+
x = self.relu(x)
|
|
59
|
+
elif activation_function == 'sigmoid':
|
|
60
|
+
x = torch.sigmoid(x)
|
|
61
|
+
else:
|
|
62
|
+
x = torch.tanh(x)
|
|
63
|
+
x = self.fc2(x)
|
|
64
|
+
return x
|
|
65
|
+
|
|
66
|
+
# Model initialization
|
|
67
|
+
input_size = X.shape[1]
|
|
68
|
+
output_size = y.shape[1]
|
|
69
|
+
|
|
70
|
+
set_seed(seed)
|
|
71
|
+
|
|
72
|
+
model_ann = ANNModel(input_size, hidden_size, output_size)
|
|
73
|
+
if solver == 'adam':
|
|
74
|
+
optimizer = optim.Adam(model_ann.parameters(), lr=learning_rate)
|
|
75
|
+
elif solver == 'sgd':
|
|
76
|
+
optimizer = optim.SGD(model_ann.parameters(), lr=learning_rate)
|
|
77
|
+
else:
|
|
78
|
+
raise ValueError('Solver not found')
|
|
79
|
+
|
|
80
|
+
# Loss function
|
|
81
|
+
criterion = nn.MSELoss() # Mean Squared Error loss for regression
|
|
82
|
+
|
|
83
|
+
#TRAIN MODEL
|
|
84
|
+
# Training loop
|
|
85
|
+
for epoch in range(epochs):
|
|
86
|
+
model_ann.train()
|
|
87
|
+
|
|
88
|
+
# Forward pass
|
|
89
|
+
output = model_ann(X)
|
|
90
|
+
loss = criterion(output, y)
|
|
91
|
+
|
|
92
|
+
# Backward pass
|
|
93
|
+
optimizer.zero_grad()
|
|
94
|
+
loss.backward()
|
|
95
|
+
|
|
96
|
+
# Update weights
|
|
97
|
+
optimizer.step()
|
|
98
|
+
|
|
99
|
+
if epoch % 10 == 0:
|
|
100
|
+
print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
|
|
101
|
+
|
|
102
|
+
# PACK MODEL
|
|
103
|
+
model = {"model_ann": model_ann}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
return model
|
|
107
|
+
|
|
108
|
+
def produce_forecast_m7_ann(model, train_df_X, test_df_X):
|
|
109
|
+
"""Create forecast at the train and test set using the trained model
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
model (dictionary): all parameters of the trained model
|
|
113
|
+
train_df_X (df): predictors of train set
|
|
114
|
+
test_df_X (df): predictors of test set
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
train_df_y_hat (df) : forecast result at train set
|
|
118
|
+
test_df_y_hat (df) : forecast result at test set
|
|
119
|
+
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
# UNPACK MODEL
|
|
123
|
+
model_ann = model["model_ann"]
|
|
124
|
+
|
|
125
|
+
# PREPARE FORMAT
|
|
126
|
+
train_df_X_tensor = torch.tensor(train_df_X.values, dtype=torch.float32)
|
|
127
|
+
test_df_X_tensor = torch.tensor(test_df_X.values, dtype=torch.float32)
|
|
128
|
+
|
|
129
|
+
# PRODUCE FORECAST
|
|
130
|
+
# Switch model to evaluation mode for inference
|
|
131
|
+
model_ann.eval()
|
|
132
|
+
|
|
133
|
+
# TRAIN SET FORECAST
|
|
134
|
+
with torch.no_grad(): # Disable gradient calculation to save memory
|
|
135
|
+
train_df_y_hat_tensor = model_ann(train_df_X_tensor)
|
|
136
|
+
|
|
137
|
+
# TEST SET FORECAST
|
|
138
|
+
with torch.no_grad(): # Disable gradient calculation to save memory
|
|
139
|
+
test_df_y_hat_tensor = model_ann(test_df_X_tensor)
|
|
140
|
+
|
|
141
|
+
# Create DataFrames of result
|
|
142
|
+
train_df_y_hat = pd.DataFrame(train_df_y_hat_tensor, index=train_df_X.index, columns=['y_hat'])
|
|
143
|
+
test_df_y_hat = pd.DataFrame(test_df_y_hat_tensor, index=test_df_X.index, columns=['y_hat'])
|
|
144
|
+
|
|
145
|
+
return train_df_y_hat, test_df_y_hat
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# # MESSY
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
import torch.optim as optim
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
def train_model_m8_dnn(hyperparameter, train_df_X, train_df_y):
|
|
8
|
+
''' Train and test a linear model for point forecasting.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
hyperparameter (dict) : hyperparameter value of the model consisting of number of features
|
|
12
|
+
train_df_X (DataFrame) : features matrix for training
|
|
13
|
+
train_df_y (DataFrame) : target matrix for training
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
model (dict) : trained model with all features
|
|
17
|
+
'''
|
|
18
|
+
|
|
19
|
+
# UNPACK HYPERPARAMETER
|
|
20
|
+
seed = hyperparameter['seed']
|
|
21
|
+
torch.manual_seed(seed) # Set seed for PyTorch
|
|
22
|
+
|
|
23
|
+
n_hidden = hyperparameter['n_hidden']
|
|
24
|
+
hidden_size = hyperparameter['hidden_size']
|
|
25
|
+
activation_function = hyperparameter['activation_function']
|
|
26
|
+
learning_rate = hyperparameter['learning_rate']
|
|
27
|
+
solver = hyperparameter['solver']
|
|
28
|
+
epochs = hyperparameter['epochs']
|
|
29
|
+
|
|
30
|
+
# Use proper format for X and y
|
|
31
|
+
X = torch.tensor(train_df_X.values, dtype=torch.float32)
|
|
32
|
+
y = torch.tensor(train_df_y.values, dtype=torch.float32).view(-1, 1)
|
|
33
|
+
|
|
34
|
+
# Define the DNN model
|
|
35
|
+
class DNNModel(nn.Module):
|
|
36
|
+
def __init__(self, input_size, hidden_size, output_size, n_hidden, activation_function):
|
|
37
|
+
super(DNNModel, self).__init__()
|
|
38
|
+
self.layers = nn.ModuleList()
|
|
39
|
+
self.activation_function = activation_function
|
|
40
|
+
|
|
41
|
+
# Input layer
|
|
42
|
+
self.layers.append(nn.Linear(input_size, hidden_size))
|
|
43
|
+
|
|
44
|
+
# Hidden layers
|
|
45
|
+
for _ in range(n_hidden - 1):
|
|
46
|
+
self.layers.append(nn.Linear(hidden_size, hidden_size))
|
|
47
|
+
|
|
48
|
+
# Output layer
|
|
49
|
+
self.layers.append(nn.Linear(hidden_size, output_size))
|
|
50
|
+
|
|
51
|
+
def forward(self, x):
|
|
52
|
+
for i, layer in enumerate(self.layers[:-1]): # Iterate through hidden layers
|
|
53
|
+
x = layer(x)
|
|
54
|
+
if self.activation_function == 'relu':
|
|
55
|
+
x = nn.ReLU()(x)
|
|
56
|
+
elif self.activation_function == 'sigmoid':
|
|
57
|
+
x = torch.sigmoid(x)
|
|
58
|
+
elif self.activation_function == 'tanh':
|
|
59
|
+
x = torch.tanh(x)
|
|
60
|
+
|
|
61
|
+
# Apply the output layer without activation function
|
|
62
|
+
x = self.layers[-1](x)
|
|
63
|
+
return x
|
|
64
|
+
|
|
65
|
+
# Model initialization
|
|
66
|
+
input_size = X.shape[1]
|
|
67
|
+
output_size = y.shape[1]
|
|
68
|
+
model_dnn = DNNModel(input_size, hidden_size, output_size, n_hidden, activation_function)
|
|
69
|
+
|
|
70
|
+
if solver == 'adam':
|
|
71
|
+
optimizer = optim.Adam(model_dnn.parameters(), lr=learning_rate)
|
|
72
|
+
elif solver == 'sgd':
|
|
73
|
+
optimizer = optim.SGD(model_dnn.parameters(), lr=learning_rate)
|
|
74
|
+
else:
|
|
75
|
+
raise ValueError('Solver not found')
|
|
76
|
+
|
|
77
|
+
# Loss function
|
|
78
|
+
criterion = nn.MSELoss() # Mean Squared Error loss for regression
|
|
79
|
+
|
|
80
|
+
# TRAIN MODEL
|
|
81
|
+
# Training loop
|
|
82
|
+
for epoch in range(epochs):
|
|
83
|
+
model_dnn.train()
|
|
84
|
+
|
|
85
|
+
# Forward pass
|
|
86
|
+
output = model_dnn(X)
|
|
87
|
+
loss = criterion(output, y)
|
|
88
|
+
|
|
89
|
+
# Backward pass
|
|
90
|
+
optimizer.zero_grad()
|
|
91
|
+
loss.backward()
|
|
92
|
+
|
|
93
|
+
# Update weights
|
|
94
|
+
optimizer.step()
|
|
95
|
+
|
|
96
|
+
if epoch % 10 == 0:
|
|
97
|
+
print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
|
|
98
|
+
|
|
99
|
+
# PACK MODEL
|
|
100
|
+
model = {"model_dnn": model_dnn}
|
|
101
|
+
|
|
102
|
+
return model
|
|
103
|
+
|
|
104
|
+
def produce_forecast_m8_dnn(model, train_df_X, test_df_X):
|
|
105
|
+
"""Create forecast at the train and test set using the trained model
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
model (dict): all parameters of the trained model
|
|
109
|
+
train_df_X (DataFrame): predictors of train set
|
|
110
|
+
test_df_X (DataFrame): predictors of test set
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
train_df_y_hat (DataFrame) : forecast result at train set
|
|
114
|
+
test_df_y_hat (DataFrame) : forecast result at test set
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
# UNPACK MODEL
|
|
118
|
+
model_dnn = model["model_dnn"]
|
|
119
|
+
|
|
120
|
+
# PREPARE FORMAT
|
|
121
|
+
train_df_X_tensor = torch.tensor(train_df_X.values, dtype=torch.float32)
|
|
122
|
+
test_df_X_tensor = torch.tensor(test_df_X.values, dtype=torch.float32)
|
|
123
|
+
|
|
124
|
+
# PRODUCE FORECAST
|
|
125
|
+
# Switch model to evaluation mode for inference
|
|
126
|
+
model_dnn.eval()
|
|
127
|
+
|
|
128
|
+
# TRAIN SET FORECAST
|
|
129
|
+
with torch.no_grad(): # Disable gradient calculation to save memory
|
|
130
|
+
train_df_y_hat_tensor = model_dnn(train_df_X_tensor)
|
|
131
|
+
|
|
132
|
+
# TEST SET FORECAST
|
|
133
|
+
with torch.no_grad(): # Disable gradient calculation to save memory
|
|
134
|
+
test_df_y_hat_tensor = model_dnn(test_df_X_tensor)
|
|
135
|
+
|
|
136
|
+
# Create DataFrames of result
|
|
137
|
+
train_df_y_hat = pd.DataFrame(train_df_y_hat_tensor.numpy(), index=train_df_X.index, columns=['y_hat'])
|
|
138
|
+
test_df_y_hat = pd.DataFrame(test_df_y_hat_tensor.numpy(), index=test_df_X.index, columns=['y_hat'])
|
|
139
|
+
|
|
140
|
+
return train_df_y_hat, test_df_y_hat
|
|
141
|
+
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# IMPORT IMPORTANT LIBRARY
|
|
2
|
+
from sklearn.tree import DecisionTreeRegressor
|
|
3
|
+
from sklearn.metrics import mean_squared_error
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
def train_model_m9_rt(hyperparameter, train_df_X, train_df_y):
|
|
7
|
+
''' Train and test a regression tree model for point forecasting.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
hyperparameter (df) : hyperparameter value of the model consisting of number of features
|
|
11
|
+
train_df_X (df) : features matrix for training
|
|
12
|
+
train_df_y (df) : target matrix for training
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
model (model) : trained model with all features
|
|
17
|
+
'''
|
|
18
|
+
|
|
19
|
+
#UNPACK HYPERPARAMETER
|
|
20
|
+
seed = hyperparameter['seed']
|
|
21
|
+
max_depth = hyperparameter['max_depth']
|
|
22
|
+
min_samples_split = hyperparameter['min_samples_split']
|
|
23
|
+
min_samples_leaf = hyperparameter['min_samples_leaf']
|
|
24
|
+
max_features = hyperparameter['max_features']
|
|
25
|
+
|
|
26
|
+
#TRAIN MODEL
|
|
27
|
+
# Initialize the regression tree model with important hyperparameters
|
|
28
|
+
regressor = DecisionTreeRegressor(
|
|
29
|
+
criterion='squared_error',
|
|
30
|
+
max_depth=max_depth,
|
|
31
|
+
min_samples_split = min_samples_split,
|
|
32
|
+
min_samples_leaf = min_samples_leaf,
|
|
33
|
+
max_features = max_features,
|
|
34
|
+
random_state = seed
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Train the model
|
|
38
|
+
regressor.fit(train_df_X, train_df_y)
|
|
39
|
+
|
|
40
|
+
# PACK MODEL
|
|
41
|
+
model = {"rt": regressor}
|
|
42
|
+
|
|
43
|
+
# print('I am here after training the model')
|
|
44
|
+
|
|
45
|
+
return model
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def produce_forecast_m9_rt(model, train_df_X, test_df_X):
|
|
49
|
+
"""Create forecast at the train and test set using the trained model
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
model (dictionary): all parameters of the trained model
|
|
53
|
+
train_df_X (df): predictors of train set
|
|
54
|
+
test_df_X (df): predictors of test set
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
train_df_y_hat (df) : forecast result at train set
|
|
58
|
+
test_df_y_hat (df) : forecast result at test set
|
|
59
|
+
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
# UNPACK MODEL
|
|
63
|
+
regressor = model['rt']
|
|
64
|
+
|
|
65
|
+
# PRODUCE FORECAST
|
|
66
|
+
train_df_y_hat = pd.DataFrame(regressor.predict(train_df_X), index = train_df_X.index, columns = ['y_hat'])
|
|
67
|
+
test_df_y_hat = pd.DataFrame(regressor.predict(test_df_X), index = test_df_X.index, columns = ['y_hat'])
|
|
68
|
+
|
|
69
|
+
# print('I am here after training the model')
|
|
70
|
+
# print('train_df_y_hat', train_df_y_hat)
|
|
71
|
+
# print('test_df_y_hat', test_df_y_hat)
|
|
72
|
+
|
|
73
|
+
return train_df_y_hat, test_df_y_hat
|
|
74
|
+
|