pynnlf 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. pynnlf/__about__.py +1 -0
  2. pynnlf/__init__.py +5 -0
  3. pynnlf/api.py +17 -0
  4. pynnlf/discovery.py +63 -0
  5. pynnlf/engine.py +1238 -0
  6. pynnlf/hyperparams.py +38 -0
  7. pynnlf/model_utils.py +186 -0
  8. pynnlf/runner.py +108 -0
  9. pynnlf/scaffold/README_WORKSPACE.md +0 -0
  10. pynnlf/scaffold/data/README_data.md +40 -0
  11. pynnlf/scaffold/data/ds0_test.csv +4081 -0
  12. pynnlf/scaffold/models/README_models.md +61 -0
  13. pynnlf/scaffold/models/hyperparameters.yaml +264 -0
  14. pynnlf/scaffold/models/m10_rf.py +65 -0
  15. pynnlf/scaffold/models/m11_svr.py +53 -0
  16. pynnlf/scaffold/models/m12_rnn.py +152 -0
  17. pynnlf/scaffold/models/m13_lstm.py +208 -0
  18. pynnlf/scaffold/models/m14_gru.py +139 -0
  19. pynnlf/scaffold/models/m15_transformer.py +138 -0
  20. pynnlf/scaffold/models/m16_prophet.py +216 -0
  21. pynnlf/scaffold/models/m17_xgb.py +66 -0
  22. pynnlf/scaffold/models/m18_nbeats.py +107 -0
  23. pynnlf/scaffold/models/m1_naive.py +49 -0
  24. pynnlf/scaffold/models/m2_snaive.py +49 -0
  25. pynnlf/scaffold/models/m3_ets.py +133 -0
  26. pynnlf/scaffold/models/m4_arima.py +123 -0
  27. pynnlf/scaffold/models/m5_sarima.py +128 -0
  28. pynnlf/scaffold/models/m6_lr.py +76 -0
  29. pynnlf/scaffold/models/m7_ann.py +148 -0
  30. pynnlf/scaffold/models/m8_dnn.py +141 -0
  31. pynnlf/scaffold/models/m9_rt.py +74 -0
  32. pynnlf/scaffold/models/mXX_template.py +68 -0
  33. pynnlf/scaffold/specs/batch.yaml +4 -0
  34. pynnlf/scaffold/specs/experiment.yaml +4 -0
  35. pynnlf/scaffold/specs/pynnlf_config.yaml +69 -0
  36. pynnlf/scaffold/specs/testing_benchmark.csv +613 -0
  37. pynnlf/scaffold/specs/testing_benchmark_metadata.md +12 -0
  38. pynnlf/scaffold/specs/tests_ci.yaml +8 -0
  39. pynnlf/scaffold/specs/tests_full.yaml +23 -0
  40. pynnlf/tests_runner.py +211 -0
  41. pynnlf/tools/strip_notebook_artifacts.py +32 -0
  42. pynnlf/workspace.py +63 -0
  43. pynnlf/yamlio.py +28 -0
  44. pynnlf-0.2.2.dist-info/METADATA +168 -0
  45. pynnlf-0.2.2.dist-info/RECORD +47 -0
  46. pynnlf-0.2.2.dist-info/WHEEL +5 -0
  47. pynnlf-0.2.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,123 @@
1
+ # IMPORT IMPORTANT LIBRARY
2
+ from statsmodels.tsa.arima.model import ARIMA
3
+ from pynnlf.model_utils import remove_jump_df
4
+ import pandas as pd
5
+ import numpy as np
6
+ import datetime as dt
7
+
8
+ def train_model_m4_arima(hyperparameter, train_df_X, train_df_y, forecast_horizon):
9
+ ''' Train and test a linear model for point forecasting.
10
+
11
+ Args:
12
+ hyperparameter (df) : hyperparameter value of the model consisting of number of features
13
+ train_df_X (df) : features matrix for training
14
+ train_df_y (df) : target matrix for training
15
+ forecast_horizon (int) : forecast horizon in mins
16
+
17
+
18
+ Returns:
19
+ model (model) : trained model with all features
20
+ '''
21
+
22
+ #UNPACK HYPERPARAMETER
23
+ p = hyperparameter['p']
24
+ d = hyperparameter['d']
25
+ q = hyperparameter['q']
26
+
27
+
28
+ # UPDATE train_df_y to exclude all rows after a sudden jump in the timestep
29
+ train_df_y_updated = remove_jump_df(train_df_y)
30
+
31
+ # TRAIN MODEL
32
+ # Calculate the frequency of the timesteps using the first and second index values
33
+ timestep_frequency = train_df_y_updated.index[1] - train_df_y_updated.index[0]
34
+ inferred_frequency = pd.infer_freq(train_df_y_updated.index)
35
+ train_df_y_updated = train_df_y_updated.asfreq(inferred_frequency)
36
+
37
+ # INTRODUCE GAP BETWEEN TRAIN AND TEST SET TO AVOID DATA LEAKAGE
38
+ n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
39
+ if n_timestep_forecast_horizon == 1:
40
+ pass
41
+ else:
42
+ train_df_y_updated = train_df_y_updated[:-(n_timestep_forecast_horizon - 1)]
43
+
44
+ # Assuming train_df_y_updated is your dataframe and 'y' is the column with the training series
45
+ y = train_df_y_updated['y']
46
+
47
+ # Build and fit the state-space ARIMA model
48
+ model_fitted = ARIMA(y, order=(p, d, q), freq=inferred_frequency).fit()
49
+
50
+ # PACK MODEL
51
+ model = {"model_fitted": model_fitted}
52
+
53
+
54
+ return model
55
+
56
+ def produce_forecast_m4_arima(model, train_df_X, test_df_X, forecast_horizon):
57
+ """Create forecast at the train and test set using the trained model
58
+
59
+ Args:
60
+ model (dictionary): all parameters of the trained model
61
+ train_df_X (df): predictors of train set
62
+ test_df_X (df): predictors of test set
63
+ forecast_horizon (int): forecast horizon in mins
64
+
65
+ Returns:
66
+ train_df_y_hat (df) : forecast result at train set
67
+ test_df_y_hat (df) : forecast result at test set
68
+
69
+ """
70
+ timestep_frequency = test_df_X.index[1] - test_df_X.index[0]
71
+ n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
72
+
73
+ train_df_X_updated = remove_jump_df(train_df_X)
74
+ test_df_X_updated = remove_jump_df(test_df_X)
75
+
76
+ # UNPACK MODEL
77
+ model_fitted = model['model_fitted']
78
+
79
+ # PRODUCE FORECAST FOR TRAIN SET
80
+ train_df_y_hat = pd.DataFrame(model_fitted.fittedvalues)
81
+ train_df_y_hat.columns = ['y']
82
+
83
+ # train_df_y_hat_2 = pd.DataFrame(model_fitted.forecast(n_timestep_forecast_horizon-1))
84
+ # train_df_y_hat_2.columns = ['y']
85
+ # train_df_y_hat = pd.concat([train_df_y_hat, train_df_y_hat_2])
86
+
87
+ train_df_y_hat.index.name = 'datetime'
88
+
89
+ # TRANSFORM test_df_X to a series with only the last lag
90
+ horizon_timedelta = pd.Timedelta(minutes=forecast_horizon)
91
+ last_observation = f'y_lag_{horizon_timedelta}m'
92
+ test_df_y_last = test_df_X[last_observation]
93
+
94
+
95
+ # REFIT THE MODEL AND PRODUCE NEW FORECAST FOR TEST SET
96
+ # THIS CODE RESULTS IN 2 MINS
97
+ test_df_y_hat = pd.DataFrame(index = test_df_X.index)
98
+ test_df_y_hat['y_hat'] = np.nan
99
+
100
+ # in the case of CV 10, which is when test df < train df
101
+ # don't compute the test forecast
102
+ if (test_df_X.index[-1] < train_df_X.index[0]):
103
+ # this is the case when we use CV10, where the test set is before the train set
104
+ print("Test set is before train set / CV 10, no test forecast can be made")
105
+ return train_df_y_hat, test_df_y_hat
106
+
107
+ for i in range(len(test_df_y_last)):
108
+ # for i in range(2): #for test only
109
+ print('Processing i = ', i + 1, ' out of ', len(test_df_y_last)),
110
+ if i == 0:
111
+ test_df_y_hat.iloc[i, 0] = model_fitted.forecast(steps=n_timestep_forecast_horizon).iloc[-1]
112
+ else:
113
+ new_row = pd.DataFrame([test_df_y_last.values[i]], columns=['y'], index=[test_df_y_last.index[i] - dt.timedelta(minutes=forecast_horizon)])
114
+ new_row = new_row.asfreq(test_df_X_updated.index.freq)
115
+
116
+ model_fitted = model_fitted.append(new_row)
117
+ test_df_y_hat.iloc[i, 0] = model_fitted.forecast(steps=n_timestep_forecast_horizon).iloc[-1] # to update based on the forecast horizon
118
+
119
+
120
+ # test_df_y_hat = m06_lr.predict(test_df_X)
121
+
122
+ return train_df_y_hat, test_df_y_hat
123
+
@@ -0,0 +1,128 @@
1
+ # IMPORT IMPORTANT LIBRARY
2
+ from statsmodels.tsa.statespace.sarimax import SARIMAX
3
+ from pynnlf.model_utils import remove_jump_df
4
+ import pandas as pd
5
+ import numpy as np
6
+ import datetime as dt
7
+
8
+ def train_model_m5_sarima(hyperparameter, train_df_X, train_df_y, forecast_horizon):
9
+ ''' Train and test a linear model for point forecasting.
10
+
11
+ Args:
12
+ hyperparameter (df) : hyperparameter value of the model consisting of number of features
13
+ train_df_X (df) : features matrix for training
14
+ train_df_y (df) : target matrix for training
15
+ forecast_horizon (int) : forecast horizon in mins
16
+
17
+
18
+ Returns:
19
+ model (model) : trained model with all features
20
+ '''
21
+
22
+ #UNPACK HYPERPARAMETER
23
+ p = hyperparameter['p']
24
+ d = hyperparameter['d']
25
+ q = hyperparameter['q']
26
+ P = hyperparameter['P']
27
+ D = hyperparameter['D']
28
+ Q = hyperparameter['Q']
29
+ seasonal_period_days = hyperparameter['seasonal_period_days']
30
+
31
+
32
+ # UPDATE train_df_y to exclude all rows after a sudden jump in the timestep
33
+ train_df_y_updated = remove_jump_df(train_df_y)
34
+
35
+ # TRAIN MODEL
36
+ # Calculate the frequency of the timesteps using the first and second index values
37
+ timestep_frequency = train_df_y_updated.index[1] - train_df_y_updated.index[0]
38
+ s = int(seasonal_period_days * 24 * 60 / (timestep_frequency.seconds / 60))
39
+ inferred_frequency = pd.infer_freq(train_df_y_updated.index)
40
+ train_df_y_updated = train_df_y_updated.asfreq(inferred_frequency)
41
+
42
+ # INTRODUCE GAP BETWEEN TRAIN AND TEST SET TO AVOID DATA LEAKAGE
43
+ n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
44
+ if n_timestep_forecast_horizon == 1:
45
+ pass
46
+ else:
47
+ train_df_y_updated = train_df_y_updated[:-(n_timestep_forecast_horizon - 1)]
48
+
49
+ # Assuming train_df_y_updated is your dataframe and 'y' is the column with the training series
50
+ y = train_df_y_updated['y']
51
+
52
+ # Build and fit the state-space ARIMA model
53
+ model_fitted = SARIMAX(y, order=(p, d, q), seasonal_order = (P, D, Q, s), freq=inferred_frequency).fit()
54
+
55
+ # PACK MODEL
56
+ model = {"model_fitted": model_fitted}
57
+
58
+
59
+ return model
60
+
61
+ def produce_forecast_m5_sarima(model, train_df_X, test_df_X, forecast_horizon):
62
+ """Create forecast at the train and test set using the trained model
63
+
64
+ Args:
65
+ model (dictionary): all parameters of the trained model
66
+ train_df_X (df): predictors of train set
67
+ test_df_X (df): predictors of test set
68
+ forecast_horizon (int): forecast horizon in mins
69
+
70
+ Returns:
71
+ train_df_y_hat (df) : forecast result at train set
72
+ test_df_y_hat (df) : forecast result at test set
73
+
74
+ """
75
+ timestep_frequency = test_df_X.index[1] - test_df_X.index[0]
76
+ n_timestep_forecast_horizon = int(forecast_horizon / (timestep_frequency.total_seconds() / 60))
77
+
78
+ train_df_X_updated = remove_jump_df(train_df_X)
79
+ test_df_X_updated = remove_jump_df(test_df_X)
80
+
81
+ # UNPACK MODEL
82
+ model_fitted = model['model_fitted']
83
+
84
+ # PRODUCE FORECAST FOR TRAIN SET
85
+ train_df_y_hat = pd.DataFrame(model_fitted.fittedvalues)
86
+ train_df_y_hat.columns = ['y']
87
+
88
+ # train_df_y_hat_2 = pd.DataFrame(model_fitted.forecast(n_timestep_forecast_horizon-1))
89
+ # train_df_y_hat_2.columns = ['y']
90
+ # train_df_y_hat = pd.concat([train_df_y_hat, train_df_y_hat_2])
91
+
92
+ train_df_y_hat.index.name = 'datetime'
93
+
94
+ # TRANSFORM test_df_X to a series with only the last lag
95
+ horizon_timedelta = pd.Timedelta(minutes=forecast_horizon)
96
+ last_observation = f'y_lag_{horizon_timedelta}m'
97
+ test_df_y_last = test_df_X[last_observation]
98
+
99
+
100
+ # REFIT THE MODEL AND PRODUCE NEW FORECAST FOR TEST SET
101
+ # THIS CODE RESULTS IN 2 MINS
102
+ test_df_y_hat = pd.DataFrame(index = test_df_X.index)
103
+ test_df_y_hat['y_hat'] = np.nan
104
+
105
+ # in the case of CV 10, which is when test df < train df
106
+ # don't compute the test forecast
107
+ if (test_df_X.index[-1] < train_df_X.index[0]):
108
+ # this is the case when we use CV10, where the test set is before the train set
109
+ print("Test set is before train set / CV 10, no test forecast can be made")
110
+ return train_df_y_hat, test_df_y_hat
111
+
112
+ for i in range(len(test_df_y_last)):
113
+ # for i in range(2): #for test only
114
+ print('Processing i = ', i + 1, ' out of ', len(test_df_y_last)),
115
+ if i == 0:
116
+ test_df_y_hat.iloc[i, 0] = model_fitted.forecast(steps=n_timestep_forecast_horizon).iloc[-1]
117
+ else:
118
+ new_row = pd.DataFrame([test_df_y_last.values[i]], columns=['y'], index=[test_df_y_last.index[i] - dt.timedelta(minutes=forecast_horizon)])
119
+ new_row = new_row.asfreq(test_df_X_updated.index.freq)
120
+
121
+ model_fitted = model_fitted.append(new_row)
122
+ test_df_y_hat.iloc[i, 0] = model_fitted.forecast(steps=n_timestep_forecast_horizon).iloc[-1] # to update based on the forecast horizon
123
+
124
+
125
+ # test_df_y_hat = m06_lr.predict(test_df_X)
126
+
127
+ return train_df_y_hat, test_df_y_hat
128
+
@@ -0,0 +1,76 @@
1
+ import sklearn
2
+ from sklearn.feature_selection import SelectKBest, chi2, f_regression
3
+ from sklearn.linear_model import LinearRegression
4
+
5
+ def train_model_m6_lr(hyperparameter, train_df_X, train_df_y):
6
+ ''' Train and test a linear model for point forecasting.
7
+
8
+ Args:
9
+ hyperparameter (df) : hyperparameter value of the model consisting of number of features
10
+ train_df_X (df) : features matrix for training
11
+ train_df_y (df) : target matrix for training
12
+
13
+
14
+ Returns:
15
+ model (dictionary) : trained model with all features
16
+ '''
17
+
18
+ #UNPACK HYPERPARAMETER
19
+ num_feature = int(hyperparameter['num_features'])
20
+
21
+ # FEATURE SELECTOR
22
+ def select_features(train_df_X, train_df_y, num_feature):
23
+ ''' Make model to select K best feature.
24
+
25
+ Args:
26
+ train_df_X (df) : features matrix for training
27
+ train_df_y (df) : target matrix for training
28
+
29
+ Returns:
30
+ fs_lr (model) : feature selector
31
+ '''
32
+
33
+ train_df_y = train_df_y.values.ravel()
34
+ fs_lr = SelectKBest(f_regression, k = num_feature)
35
+ fs_lr.fit(train_df_X, train_df_y)
36
+
37
+ return fs_lr
38
+
39
+ fs_lr = select_features(train_df_X, train_df_y, num_feature)
40
+
41
+ #TRAIN MODEL
42
+ train_df_X = fs_lr.transform(train_df_X)
43
+ m06_lr = LinearRegression()
44
+ m06_lr.fit(train_df_X, train_df_y)
45
+
46
+ # PACK MODEL
47
+ model = {"feature_selector": fs_lr, "regression_model": m06_lr}
48
+
49
+ return model
50
+
51
+ def produce_forecast_m6_lr(model, train_df_X, test_df_X):
52
+ """Create forecast at the train and test set using the trained model
53
+
54
+ Args:
55
+ model (dictionary): all parameters of the trained model
56
+ train_df_X (df): predictors of train set
57
+ test_df_X (df): predictors of test set
58
+
59
+ Returns:
60
+ train_df_y_hat (df) : forecast result at train set
61
+ test_df_y_hat (df) : forecast result at test set
62
+
63
+ """
64
+ fs_lr = model['feature_selector']
65
+ m06_lr = model['regression_model']
66
+
67
+ # SELECT K BEST FEATURES
68
+ train_df_X = fs_lr.transform(train_df_X)
69
+ test_df_X = fs_lr.transform(test_df_X)
70
+
71
+ # PRODUCE FORECAST
72
+ train_df_y_hat = m06_lr.predict(train_df_X)
73
+ test_df_y_hat = m06_lr.predict(test_df_X)
74
+
75
+ return train_df_y_hat, test_df_y_hat
76
+
@@ -0,0 +1,148 @@
1
+ # IMPORT IMPORTANT LIBRARY
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.optim as optim
5
+ import numpy as np
6
+ import pandas as pd
7
+ import random
8
+ import os
9
+
10
+ def train_model_m7_ann(hyperparameter, train_df_X, train_df_y):
11
+ ''' Train and test a linear model for point forecasting.
12
+
13
+ Args:
14
+ hyperparameter (df) : hyperparameter value of the model consisting of number of features
15
+ train_df_X (df) : features matrix for training
16
+ train_df_y (df) : target matrix for training
17
+
18
+
19
+ Returns:
20
+ model (model) : trained model with all features
21
+ '''
22
+
23
+ #UNPACK HYPERPARAMETER
24
+
25
+ # Set random seed for reproducibility
26
+ def set_seed(seed):
27
+ random.seed(seed)
28
+ os.environ["PYTHONHASHSEED"] = str(seed)
29
+ np.random.seed(seed)
30
+ torch.manual_seed(seed)
31
+ torch.cuda.manual_seed(seed)
32
+ torch.backends.cudnn.deterministic = True
33
+
34
+ seed = int(hyperparameter['seed'])
35
+
36
+ hidden_size = hyperparameter['hidden_size']
37
+ activation_function = hyperparameter['activation_function']
38
+ learning_rate = hyperparameter['learning_rate']
39
+ # learning_rate = 0.001
40
+ solver = hyperparameter['solver']
41
+ epochs = hyperparameter['epochs']
42
+
43
+ # Use proper format for X and y
44
+ X = torch.tensor(train_df_X.values, dtype=torch.float32)
45
+ y = torch.tensor(train_df_y.values, dtype=torch.float32).view(-1, 1)
46
+
47
+ # Define the ANN model
48
+ class ANNModel(nn.Module):
49
+ def __init__(self, input_size, hidden_size, output_size):
50
+ super(ANNModel, self).__init__()
51
+ self.fc1 = nn.Linear(input_size, hidden_size)
52
+ self.fc2 = nn.Linear(hidden_size, output_size)
53
+ self.relu = nn.ReLU() # Activation function
54
+
55
+ def forward(self, x):
56
+ x = self.fc1(x)
57
+ if activation_function == 'relu':
58
+ x = self.relu(x)
59
+ elif activation_function == 'sigmoid':
60
+ x = torch.sigmoid(x)
61
+ else:
62
+ x = torch.tanh(x)
63
+ x = self.fc2(x)
64
+ return x
65
+
66
+ # Model initialization
67
+ input_size = X.shape[1]
68
+ output_size = y.shape[1]
69
+
70
+ set_seed(seed)
71
+
72
+ model_ann = ANNModel(input_size, hidden_size, output_size)
73
+ if solver == 'adam':
74
+ optimizer = optim.Adam(model_ann.parameters(), lr=learning_rate)
75
+ elif solver == 'sgd':
76
+ optimizer = optim.SGD(model_ann.parameters(), lr=learning_rate)
77
+ else:
78
+ raise ValueError('Solver not found')
79
+
80
+ # Loss function
81
+ criterion = nn.MSELoss() # Mean Squared Error loss for regression
82
+
83
+ #TRAIN MODEL
84
+ # Training loop
85
+ for epoch in range(epochs):
86
+ model_ann.train()
87
+
88
+ # Forward pass
89
+ output = model_ann(X)
90
+ loss = criterion(output, y)
91
+
92
+ # Backward pass
93
+ optimizer.zero_grad()
94
+ loss.backward()
95
+
96
+ # Update weights
97
+ optimizer.step()
98
+
99
+ if epoch % 10 == 0:
100
+ print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
101
+
102
+ # PACK MODEL
103
+ model = {"model_ann": model_ann}
104
+
105
+
106
+ return model
107
+
108
+ def produce_forecast_m7_ann(model, train_df_X, test_df_X):
109
+ """Create forecast at the train and test set using the trained model
110
+
111
+ Args:
112
+ model (dictionary): all parameters of the trained model
113
+ train_df_X (df): predictors of train set
114
+ test_df_X (df): predictors of test set
115
+
116
+ Returns:
117
+ train_df_y_hat (df) : forecast result at train set
118
+ test_df_y_hat (df) : forecast result at test set
119
+
120
+ """
121
+
122
+ # UNPACK MODEL
123
+ model_ann = model["model_ann"]
124
+
125
+ # PREPARE FORMAT
126
+ train_df_X_tensor = torch.tensor(train_df_X.values, dtype=torch.float32)
127
+ test_df_X_tensor = torch.tensor(test_df_X.values, dtype=torch.float32)
128
+
129
+ # PRODUCE FORECAST
130
+ # Switch model to evaluation mode for inference
131
+ model_ann.eval()
132
+
133
+ # TRAIN SET FORECAST
134
+ with torch.no_grad(): # Disable gradient calculation to save memory
135
+ train_df_y_hat_tensor = model_ann(train_df_X_tensor)
136
+
137
+ # TEST SET FORECAST
138
+ with torch.no_grad(): # Disable gradient calculation to save memory
139
+ test_df_y_hat_tensor = model_ann(test_df_X_tensor)
140
+
141
+ # Create DataFrames of result
142
+ train_df_y_hat = pd.DataFrame(train_df_y_hat_tensor, index=train_df_X.index, columns=['y_hat'])
143
+ test_df_y_hat = pd.DataFrame(test_df_y_hat_tensor, index=test_df_X.index, columns=['y_hat'])
144
+
145
+ return train_df_y_hat, test_df_y_hat
146
+
147
+
148
+ # # MESSY
@@ -0,0 +1,141 @@
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ def train_model_m8_dnn(hyperparameter, train_df_X, train_df_y):
8
+ ''' Train and test a linear model for point forecasting.
9
+
10
+ Args:
11
+ hyperparameter (dict) : hyperparameter value of the model consisting of number of features
12
+ train_df_X (DataFrame) : features matrix for training
13
+ train_df_y (DataFrame) : target matrix for training
14
+
15
+ Returns:
16
+ model (dict) : trained model with all features
17
+ '''
18
+
19
+ # UNPACK HYPERPARAMETER
20
+ seed = hyperparameter['seed']
21
+ torch.manual_seed(seed) # Set seed for PyTorch
22
+
23
+ n_hidden = hyperparameter['n_hidden']
24
+ hidden_size = hyperparameter['hidden_size']
25
+ activation_function = hyperparameter['activation_function']
26
+ learning_rate = hyperparameter['learning_rate']
27
+ solver = hyperparameter['solver']
28
+ epochs = hyperparameter['epochs']
29
+
30
+ # Use proper format for X and y
31
+ X = torch.tensor(train_df_X.values, dtype=torch.float32)
32
+ y = torch.tensor(train_df_y.values, dtype=torch.float32).view(-1, 1)
33
+
34
+ # Define the DNN model
35
+ class DNNModel(nn.Module):
36
+ def __init__(self, input_size, hidden_size, output_size, n_hidden, activation_function):
37
+ super(DNNModel, self).__init__()
38
+ self.layers = nn.ModuleList()
39
+ self.activation_function = activation_function
40
+
41
+ # Input layer
42
+ self.layers.append(nn.Linear(input_size, hidden_size))
43
+
44
+ # Hidden layers
45
+ for _ in range(n_hidden - 1):
46
+ self.layers.append(nn.Linear(hidden_size, hidden_size))
47
+
48
+ # Output layer
49
+ self.layers.append(nn.Linear(hidden_size, output_size))
50
+
51
+ def forward(self, x):
52
+ for i, layer in enumerate(self.layers[:-1]): # Iterate through hidden layers
53
+ x = layer(x)
54
+ if self.activation_function == 'relu':
55
+ x = nn.ReLU()(x)
56
+ elif self.activation_function == 'sigmoid':
57
+ x = torch.sigmoid(x)
58
+ elif self.activation_function == 'tanh':
59
+ x = torch.tanh(x)
60
+
61
+ # Apply the output layer without activation function
62
+ x = self.layers[-1](x)
63
+ return x
64
+
65
+ # Model initialization
66
+ input_size = X.shape[1]
67
+ output_size = y.shape[1]
68
+ model_dnn = DNNModel(input_size, hidden_size, output_size, n_hidden, activation_function)
69
+
70
+ if solver == 'adam':
71
+ optimizer = optim.Adam(model_dnn.parameters(), lr=learning_rate)
72
+ elif solver == 'sgd':
73
+ optimizer = optim.SGD(model_dnn.parameters(), lr=learning_rate)
74
+ else:
75
+ raise ValueError('Solver not found')
76
+
77
+ # Loss function
78
+ criterion = nn.MSELoss() # Mean Squared Error loss for regression
79
+
80
+ # TRAIN MODEL
81
+ # Training loop
82
+ for epoch in range(epochs):
83
+ model_dnn.train()
84
+
85
+ # Forward pass
86
+ output = model_dnn(X)
87
+ loss = criterion(output, y)
88
+
89
+ # Backward pass
90
+ optimizer.zero_grad()
91
+ loss.backward()
92
+
93
+ # Update weights
94
+ optimizer.step()
95
+
96
+ if epoch % 10 == 0:
97
+ print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
98
+
99
+ # PACK MODEL
100
+ model = {"model_dnn": model_dnn}
101
+
102
+ return model
103
+
104
+ def produce_forecast_m8_dnn(model, train_df_X, test_df_X):
105
+ """Create forecast at the train and test set using the trained model
106
+
107
+ Args:
108
+ model (dict): all parameters of the trained model
109
+ train_df_X (DataFrame): predictors of train set
110
+ test_df_X (DataFrame): predictors of test set
111
+
112
+ Returns:
113
+ train_df_y_hat (DataFrame) : forecast result at train set
114
+ test_df_y_hat (DataFrame) : forecast result at test set
115
+ """
116
+
117
+ # UNPACK MODEL
118
+ model_dnn = model["model_dnn"]
119
+
120
+ # PREPARE FORMAT
121
+ train_df_X_tensor = torch.tensor(train_df_X.values, dtype=torch.float32)
122
+ test_df_X_tensor = torch.tensor(test_df_X.values, dtype=torch.float32)
123
+
124
+ # PRODUCE FORECAST
125
+ # Switch model to evaluation mode for inference
126
+ model_dnn.eval()
127
+
128
+ # TRAIN SET FORECAST
129
+ with torch.no_grad(): # Disable gradient calculation to save memory
130
+ train_df_y_hat_tensor = model_dnn(train_df_X_tensor)
131
+
132
+ # TEST SET FORECAST
133
+ with torch.no_grad(): # Disable gradient calculation to save memory
134
+ test_df_y_hat_tensor = model_dnn(test_df_X_tensor)
135
+
136
+ # Create DataFrames of result
137
+ train_df_y_hat = pd.DataFrame(train_df_y_hat_tensor.numpy(), index=train_df_X.index, columns=['y_hat'])
138
+ test_df_y_hat = pd.DataFrame(test_df_y_hat_tensor.numpy(), index=test_df_X.index, columns=['y_hat'])
139
+
140
+ return train_df_y_hat, test_df_y_hat
141
+
@@ -0,0 +1,74 @@
1
+ # IMPORT IMPORTANT LIBRARY
2
+ from sklearn.tree import DecisionTreeRegressor
3
+ from sklearn.metrics import mean_squared_error
4
+ import pandas as pd
5
+
6
+ def train_model_m9_rt(hyperparameter, train_df_X, train_df_y):
7
+ ''' Train and test a regression tree model for point forecasting.
8
+
9
+ Args:
10
+ hyperparameter (df) : hyperparameter value of the model consisting of number of features
11
+ train_df_X (df) : features matrix for training
12
+ train_df_y (df) : target matrix for training
13
+
14
+
15
+ Returns:
16
+ model (model) : trained model with all features
17
+ '''
18
+
19
+ #UNPACK HYPERPARAMETER
20
+ seed = hyperparameter['seed']
21
+ max_depth = hyperparameter['max_depth']
22
+ min_samples_split = hyperparameter['min_samples_split']
23
+ min_samples_leaf = hyperparameter['min_samples_leaf']
24
+ max_features = hyperparameter['max_features']
25
+
26
+ #TRAIN MODEL
27
+ # Initialize the regression tree model with important hyperparameters
28
+ regressor = DecisionTreeRegressor(
29
+ criterion='squared_error',
30
+ max_depth=max_depth,
31
+ min_samples_split = min_samples_split,
32
+ min_samples_leaf = min_samples_leaf,
33
+ max_features = max_features,
34
+ random_state = seed
35
+ )
36
+
37
+ # Train the model
38
+ regressor.fit(train_df_X, train_df_y)
39
+
40
+ # PACK MODEL
41
+ model = {"rt": regressor}
42
+
43
+ # print('I am here after training the model')
44
+
45
+ return model
46
+
47
+
48
+ def produce_forecast_m9_rt(model, train_df_X, test_df_X):
49
+ """Create forecast at the train and test set using the trained model
50
+
51
+ Args:
52
+ model (dictionary): all parameters of the trained model
53
+ train_df_X (df): predictors of train set
54
+ test_df_X (df): predictors of test set
55
+
56
+ Returns:
57
+ train_df_y_hat (df) : forecast result at train set
58
+ test_df_y_hat (df) : forecast result at test set
59
+
60
+ """
61
+
62
+ # UNPACK MODEL
63
+ regressor = model['rt']
64
+
65
+ # PRODUCE FORECAST
66
+ train_df_y_hat = pd.DataFrame(regressor.predict(train_df_X), index = train_df_X.index, columns = ['y_hat'])
67
+ test_df_y_hat = pd.DataFrame(regressor.predict(test_df_X), index = test_df_X.index, columns = ['y_hat'])
68
+
69
+ # print('I am here after training the model')
70
+ # print('train_df_y_hat', train_df_y_hat)
71
+ # print('test_df_y_hat', test_df_y_hat)
72
+
73
+ return train_df_y_hat, test_df_y_hat
74
+