pynnlf 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pynnlf/__about__.py +1 -0
- pynnlf/__init__.py +5 -0
- pynnlf/api.py +17 -0
- pynnlf/discovery.py +63 -0
- pynnlf/engine.py +1238 -0
- pynnlf/hyperparams.py +38 -0
- pynnlf/model_utils.py +186 -0
- pynnlf/runner.py +108 -0
- pynnlf/scaffold/README_WORKSPACE.md +0 -0
- pynnlf/scaffold/data/README_data.md +40 -0
- pynnlf/scaffold/data/ds0_test.csv +4081 -0
- pynnlf/scaffold/models/README_models.md +61 -0
- pynnlf/scaffold/models/hyperparameters.yaml +264 -0
- pynnlf/scaffold/models/m10_rf.py +65 -0
- pynnlf/scaffold/models/m11_svr.py +53 -0
- pynnlf/scaffold/models/m12_rnn.py +152 -0
- pynnlf/scaffold/models/m13_lstm.py +208 -0
- pynnlf/scaffold/models/m14_gru.py +139 -0
- pynnlf/scaffold/models/m15_transformer.py +138 -0
- pynnlf/scaffold/models/m16_prophet.py +216 -0
- pynnlf/scaffold/models/m17_xgb.py +66 -0
- pynnlf/scaffold/models/m18_nbeats.py +107 -0
- pynnlf/scaffold/models/m1_naive.py +49 -0
- pynnlf/scaffold/models/m2_snaive.py +49 -0
- pynnlf/scaffold/models/m3_ets.py +133 -0
- pynnlf/scaffold/models/m4_arima.py +123 -0
- pynnlf/scaffold/models/m5_sarima.py +128 -0
- pynnlf/scaffold/models/m6_lr.py +76 -0
- pynnlf/scaffold/models/m7_ann.py +148 -0
- pynnlf/scaffold/models/m8_dnn.py +141 -0
- pynnlf/scaffold/models/m9_rt.py +74 -0
- pynnlf/scaffold/models/mXX_template.py +68 -0
- pynnlf/scaffold/specs/batch.yaml +4 -0
- pynnlf/scaffold/specs/experiment.yaml +4 -0
- pynnlf/scaffold/specs/pynnlf_config.yaml +69 -0
- pynnlf/scaffold/specs/testing_benchmark.csv +613 -0
- pynnlf/scaffold/specs/testing_benchmark_metadata.md +12 -0
- pynnlf/scaffold/specs/tests_ci.yaml +8 -0
- pynnlf/scaffold/specs/tests_full.yaml +23 -0
- pynnlf/tests_runner.py +211 -0
- pynnlf/tools/strip_notebook_artifacts.py +32 -0
- pynnlf/workspace.py +63 -0
- pynnlf/yamlio.py +28 -0
- pynnlf-0.2.2.dist-info/METADATA +168 -0
- pynnlf-0.2.2.dist-info/RECORD +47 -0
- pynnlf-0.2.2.dist-info/WHEEL +5 -0
- pynnlf-0.2.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# IMPORT IMPORTANT LIBRARY
|
|
2
|
+
import time
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
import random
|
|
6
|
+
import os
|
|
7
|
+
import torch
|
|
8
|
+
import torch.nn as nn
|
|
9
|
+
import torch.optim as optim
|
|
10
|
+
from torch.utils.data import DataLoader, TensorDataset
|
|
11
|
+
from pynnlf.model_utils import separate_lag_and_exogenous_features
|
|
12
|
+
|
|
13
|
+
def train_model_m13_lstm(hyperparameter, train_df_X, train_df_y):
|
|
14
|
+
''' Train and test an LSTM model for point forecasting.
|
|
15
|
+
Essentially we use the LSTM block to learn the temporal patterns of the time series, and then use a fully connected layer to learn the relationship between the lag features.
|
|
16
|
+
We take the last hidden state of the LSTM as the output, and concatenate it with the exogenous features (like calendar) to make the final prediction using a fully connected layer.
|
|
17
|
+
Future imporvement maybe to improve the architecture of the fully connected layer after LSTM.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
hyperparameter (df) : hyperparameter value of the model consisting of number of features
|
|
21
|
+
train_df_X (df) : features matrix for training
|
|
22
|
+
train_df_y (df) : target matrix for training
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
model (model) : trained model with all features
|
|
27
|
+
'''
|
|
28
|
+
|
|
29
|
+
#UNPACK HYPERPARAMETER
|
|
30
|
+
seed = int(hyperparameter['seed'])
|
|
31
|
+
input_size = int(hyperparameter['input_size']) #this is one since we only use lag features to be fed into the LSTM. The exogenous features like calenndar are fed to the fully connected layer, together with the last hidden state of LSTM.
|
|
32
|
+
hidden_size = int(hyperparameter['hidden_size']) #this is the size of hidden state, and we aim to use many to one architecture. Meaning we only take the last hidden state as output, and fed into the fully connected layer.
|
|
33
|
+
num_layers = int(hyperparameter['num_layers']) # we use 1 by default to make it simple.
|
|
34
|
+
output_size = int(hyperparameter['output_size']) #this is one since we only predict one value.
|
|
35
|
+
batch_size = int(hyperparameter['batch_size']) #using minibatch is important cuz if we train all samples at once, the memory is not enough.
|
|
36
|
+
epochs = int(hyperparameter['epochs'])
|
|
37
|
+
learning_rate = hyperparameter['learning_rate'] # No change for learning rate
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
#DEFINE MODEL AND TRAINING FUNCTION
|
|
41
|
+
class LSTMModel(nn.Module):
|
|
42
|
+
def __init__(self, input_size, hidden_size, num_layers, exog_size, output_size=1):
|
|
43
|
+
super(LSTMModel, self).__init__()
|
|
44
|
+
|
|
45
|
+
# Define the LSTM layer
|
|
46
|
+
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
|
|
47
|
+
|
|
48
|
+
# Define the Fully Connected (FC) layer
|
|
49
|
+
# The FC layer input size is the concatenation of LSTM output and exogenous variables
|
|
50
|
+
self.fc = nn.Linear(hidden_size + exog_size, output_size) # exog_size is the number of exogenous features
|
|
51
|
+
|
|
52
|
+
def forward(self, x, exogenous_data):
|
|
53
|
+
# Pass the input through the LSTM
|
|
54
|
+
out, (h_n, c_n) = self.lstm(x)
|
|
55
|
+
|
|
56
|
+
# Get the last timestep hidden state (h3)
|
|
57
|
+
last_hidden_state = out[:, -1, :] # Shape: (batch_size, hidden_size)
|
|
58
|
+
|
|
59
|
+
# Concatenate the LSTM output (h3) with the exogenous variables (for timestep t+100)
|
|
60
|
+
combined_input = torch.cat((last_hidden_state, exogenous_data), dim=1) # Shape: (batch_size, hidden_size + exog_size)
|
|
61
|
+
|
|
62
|
+
# Pass the combined input through the FC layer
|
|
63
|
+
out = self.fc(combined_input)
|
|
64
|
+
return out
|
|
65
|
+
|
|
66
|
+
def train_lstm_with_minibatches(model, train_loader, epochs, learning_rate=0.001):
|
|
67
|
+
# Define the loss function (Mean Squared Error)
|
|
68
|
+
criterion = nn.MSELoss()
|
|
69
|
+
|
|
70
|
+
# Define the optimizer (Adam)
|
|
71
|
+
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
|
72
|
+
|
|
73
|
+
for epoch in range(epochs):
|
|
74
|
+
print(f'Epoch [{epoch+1}/{epochs}]')
|
|
75
|
+
start_time = time.time()
|
|
76
|
+
|
|
77
|
+
model.train() # Set model to training mode
|
|
78
|
+
# print(f'I am here')
|
|
79
|
+
|
|
80
|
+
# Iterate over mini-batches
|
|
81
|
+
batch_no = 1
|
|
82
|
+
for X_lags_batch, X_exog_batch, y_batch in train_loader:
|
|
83
|
+
# print(f'I am here now')
|
|
84
|
+
# Print the loss and time taken for this epoch
|
|
85
|
+
print(f'Epoch [{epoch+1}/{epochs}] and batch [{batch_no}/{len(train_loader)}]')
|
|
86
|
+
batch_no += 1
|
|
87
|
+
# Forward pass
|
|
88
|
+
predictions = model(X_lags_batch, X_exog_batch)
|
|
89
|
+
loss = criterion(predictions, y_batch)
|
|
90
|
+
|
|
91
|
+
# Backward pass
|
|
92
|
+
optimizer.zero_grad() # Zero gradients from previous step
|
|
93
|
+
loss.backward() # Backpropagate the error
|
|
94
|
+
optimizer.step() # Update the model's weights
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
end_time = time.time()
|
|
99
|
+
epoch_time = end_time - start_time
|
|
100
|
+
print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}, time taken: {epoch_time:.2f} seconds')
|
|
101
|
+
|
|
102
|
+
def set_seed(seed=seed):
|
|
103
|
+
random.seed(seed)
|
|
104
|
+
np.random.seed(seed)
|
|
105
|
+
torch.manual_seed(seed)
|
|
106
|
+
os.environ["PYTHONHASHSEED"] = str(seed)
|
|
107
|
+
|
|
108
|
+
# PREPARE TRAIN DATA
|
|
109
|
+
# SEPARATE LAG AND EXOGENOUS FEATURES
|
|
110
|
+
X_lags, X_exog = separate_lag_and_exogenous_features(train_df_X)
|
|
111
|
+
X_lags_tensor = torch.tensor(X_lags.values, dtype=torch.float32) # Shape: (batch_size, sequence_length, input_size)
|
|
112
|
+
X_exog_tensor = torch.tensor(X_exog.values, dtype=torch.float32) # Shape: (batch_size, exog_size)
|
|
113
|
+
y_tensor = torch.tensor(train_df_y.values, dtype=torch.float32).view(-1, 1)
|
|
114
|
+
|
|
115
|
+
total_lag_features = X_lags_tensor.shape[1] # Number of lag features (columns)
|
|
116
|
+
sequence_length = total_lag_features // input_size
|
|
117
|
+
exog_size = X_exog_tensor.shape[1] # Number of exogenous features
|
|
118
|
+
|
|
119
|
+
# Reshaping X_lags_tensor to 3D: (batch_size, sequence_length, input_size)
|
|
120
|
+
X_lags_tensor = X_lags_tensor.view(-1, sequence_length, input_size)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# INITIALIZE MODEL and MAKE TRAINING BATCHES
|
|
124
|
+
set_seed(seed = seed) # Set random seed for reproducibility
|
|
125
|
+
lstm = LSTMModel(input_size, hidden_size, num_layers, exog_size, output_size)
|
|
126
|
+
# Create a TensorDataset with your features and target
|
|
127
|
+
train_data = TensorDataset(X_lags_tensor, X_exog_tensor, y_tensor)
|
|
128
|
+
# Create a DataLoader to handle mini-batching
|
|
129
|
+
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
|
|
130
|
+
|
|
131
|
+
# TRAIN MODEL
|
|
132
|
+
train_lstm_with_minibatches(lstm, train_loader, epochs=epochs, learning_rate=learning_rate)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# PACK MODEL
|
|
136
|
+
model = {"lstm": lstm, 'hyperparameter': hyperparameter, "train_df_X": train_df_X, "train_df_y": train_df_y}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
return model
|
|
140
|
+
|
|
141
|
+
def produce_forecast_m13_lstm(model, train_df_X, test_df_X):
|
|
142
|
+
"""Create forecast at the train and test set using the trained model
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
model (dictionary): all parameters of the trained model
|
|
146
|
+
train_df_X (df): predictors of train set
|
|
147
|
+
test_df_X (df): predictors of test set
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
train_df_y_hat (df) : forecast result at train set
|
|
151
|
+
test_df_y_hat (df) : forecast result at test set
|
|
152
|
+
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
# UNPACK MODEL
|
|
156
|
+
lstm = model['lstm']
|
|
157
|
+
hyperparameter = model['hyperparameter']
|
|
158
|
+
|
|
159
|
+
#UNPACK HYPERPARAMETER
|
|
160
|
+
seed = int(hyperparameter['seed'])
|
|
161
|
+
input_size = int(hyperparameter['input_size']) #this is one since we only use lag features to be fed into the LSTM. The exogenous features like calenndar are fed to the fully connected layer, together with the last hidden state of LSTM.
|
|
162
|
+
hidden_size = int(hyperparameter['hidden_size']) #this is the size of hidden state, and we aim to use many to one architecture. Meaning we only take the last hidden state as output, and fed into the fully connected layer.
|
|
163
|
+
num_layers = int(hyperparameter['num_layers']) # we use 1 by default to make it simple.
|
|
164
|
+
output_size = int(hyperparameter['output_size']) #this is one since we only predict one value.
|
|
165
|
+
batch_size = int(hyperparameter['batch_size']) #using minibatch is important cuz if we train all samples at once, the memory is not enough.
|
|
166
|
+
epochs = int(hyperparameter['epochs'])
|
|
167
|
+
learning_rate = hyperparameter['learning_rate'] # No change for learning rate
|
|
168
|
+
|
|
169
|
+
# PRODUCE FORECAST
|
|
170
|
+
def produce_forecast(lstm, X):
|
|
171
|
+
# Convert X into X_lag and X_exog
|
|
172
|
+
X_lags, X_exog = separate_lag_and_exogenous_features(X)
|
|
173
|
+
X_lags_tensor = torch.tensor(X_lags.values, dtype=torch.float32) # Shape: (batch_size, sequence_length, input_size)
|
|
174
|
+
X_exog_tensor = torch.tensor(X_exog.values, dtype=torch.float32) # Shape: (batch_size, exog_size)
|
|
175
|
+
# y_tensor = torch.tensor(train_df_y.values, dtype=torch.float32).view(-1, 1) to be deleted.
|
|
176
|
+
|
|
177
|
+
total_lag_features = X_lags_tensor.shape[1] # Number of lag features (columns)
|
|
178
|
+
sequence_length = total_lag_features // input_size
|
|
179
|
+
exog_size = X_exog_tensor.shape[1] # Number of exogenous features
|
|
180
|
+
|
|
181
|
+
# Reshaping X_lags_tensor to 3D: (batch_size, sequence_length, input_size)
|
|
182
|
+
X_lags_tensor = X_lags_tensor.view(-1, sequence_length, input_size)
|
|
183
|
+
|
|
184
|
+
#predictions = lstm(X_lags_tensor, X_exog_tensor) #this doesn't work because of the batch size is too big, not enough memory.
|
|
185
|
+
predictions = []
|
|
186
|
+
for i in range(0, len(X_lags_tensor), batch_size):
|
|
187
|
+
# Get the current minibatch for both X_lags_tensor and X_exog_tensor
|
|
188
|
+
batch_X_lags = X_lags_tensor[i:i+batch_size]
|
|
189
|
+
batch_X_exog = X_exog_tensor[i:i+batch_size]
|
|
190
|
+
|
|
191
|
+
with torch.no_grad():
|
|
192
|
+
# Make predictions for the minibatch
|
|
193
|
+
batch_pred = lstm(batch_X_lags, batch_X_exog)
|
|
194
|
+
|
|
195
|
+
# Store the predictions for the current batch
|
|
196
|
+
predictions.append(batch_pred)
|
|
197
|
+
|
|
198
|
+
# Concatenate all predictions to get the full result
|
|
199
|
+
predictions = torch.cat(predictions, dim=0)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
return predictions.detach().numpy()
|
|
203
|
+
|
|
204
|
+
train_df_y_hat = produce_forecast(lstm, train_df_X)
|
|
205
|
+
test_df_y_hat = produce_forecast(lstm, test_df_X)
|
|
206
|
+
|
|
207
|
+
return train_df_y_hat, test_df_y_hat
|
|
208
|
+
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import os
|
|
3
|
+
import random
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import torch
|
|
7
|
+
import torch.nn as nn
|
|
8
|
+
import torch.optim as optim
|
|
9
|
+
from torch.utils.data import DataLoader, TensorDataset
|
|
10
|
+
from pynnlf.model_utils import separate_lag_and_exogenous_features
|
|
11
|
+
|
|
12
|
+
def train_model_m14_gru(hyperparameter, train_df_X, train_df_y):
|
|
13
|
+
''' Train and test a GRU model for point forecasting.
|
|
14
|
+
Uses GRU for temporal patterns, FC layer for lag+exogenous features.
|
|
15
|
+
Args:
|
|
16
|
+
hyperparameter (df) : hyperparameter value of the model consisting of number of features
|
|
17
|
+
train_df_X (df) : features matrix for training
|
|
18
|
+
train_df_y (df) : target matrix for training
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
model (model) : trained model with all features
|
|
23
|
+
'''
|
|
24
|
+
|
|
25
|
+
# UNPACK HYPERPARAMETER
|
|
26
|
+
seed = int(hyperparameter['seed'])
|
|
27
|
+
input_size = int(hyperparameter['input_size'])
|
|
28
|
+
hidden_size = int(hyperparameter['hidden_size'])
|
|
29
|
+
num_layers = int(hyperparameter['num_layers'])
|
|
30
|
+
output_size = int(hyperparameter['output_size'])
|
|
31
|
+
batch_size = int(hyperparameter['batch_size'])
|
|
32
|
+
epochs = int(hyperparameter['epochs'])
|
|
33
|
+
learning_rate = hyperparameter['learning_rate']
|
|
34
|
+
|
|
35
|
+
# DEFINE MODEL
|
|
36
|
+
class GRUModel(nn.Module):
|
|
37
|
+
def __init__(self, input_size, hidden_size, num_layers, exog_size, output_size=1):
|
|
38
|
+
super(GRUModel, self).__init__()
|
|
39
|
+
self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
|
|
40
|
+
self.fc = nn.Linear(hidden_size + exog_size, output_size)
|
|
41
|
+
|
|
42
|
+
def forward(self, x, exogenous_data):
|
|
43
|
+
out, h_n = self.gru(x)
|
|
44
|
+
last_hidden_state = out[:, -1, :]
|
|
45
|
+
combined_input = torch.cat((last_hidden_state, exogenous_data), dim=1)
|
|
46
|
+
out = self.fc(combined_input)
|
|
47
|
+
return out
|
|
48
|
+
|
|
49
|
+
def train_gru_with_minibatches(model, train_loader, epochs, learning_rate=0.001):
|
|
50
|
+
criterion = nn.MSELoss()
|
|
51
|
+
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
|
52
|
+
|
|
53
|
+
for epoch in range(epochs):
|
|
54
|
+
print(f'Epoch [{epoch+1}/{epochs}]')
|
|
55
|
+
start_time = time.time()
|
|
56
|
+
model.train()
|
|
57
|
+
batch_no = 1
|
|
58
|
+
for X_lags_batch, X_exog_batch, y_batch in train_loader:
|
|
59
|
+
print(f'Epoch [{epoch+1}/{epochs}] and batch [{batch_no}/{len(train_loader)}]')
|
|
60
|
+
batch_no += 1
|
|
61
|
+
|
|
62
|
+
predictions = model(X_lags_batch, X_exog_batch)
|
|
63
|
+
loss = criterion(predictions, y_batch)
|
|
64
|
+
|
|
65
|
+
optimizer.zero_grad()
|
|
66
|
+
loss.backward()
|
|
67
|
+
optimizer.step()
|
|
68
|
+
|
|
69
|
+
print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}, time taken: {time.time() - start_time:.2f}s')
|
|
70
|
+
|
|
71
|
+
def set_seed(seed=seed):
|
|
72
|
+
random.seed(seed)
|
|
73
|
+
np.random.seed(seed)
|
|
74
|
+
torch.manual_seed(seed)
|
|
75
|
+
os.environ["PYTHONHASHSEED"] = str(seed)
|
|
76
|
+
|
|
77
|
+
# PREPARE TRAIN DATA
|
|
78
|
+
X_lags, X_exog = separate_lag_and_exogenous_features(train_df_X)
|
|
79
|
+
X_lags_tensor = torch.tensor(X_lags.values, dtype=torch.float32)
|
|
80
|
+
X_exog_tensor = torch.tensor(X_exog.values, dtype=torch.float32)
|
|
81
|
+
y_tensor = torch.tensor(train_df_y.values, dtype=torch.float32).view(-1, 1)
|
|
82
|
+
|
|
83
|
+
total_lag_features = X_lags_tensor.shape[1]
|
|
84
|
+
sequence_length = total_lag_features // input_size
|
|
85
|
+
exog_size = X_exog_tensor.shape[1]
|
|
86
|
+
|
|
87
|
+
X_lags_tensor = X_lags_tensor.view(-1, sequence_length, input_size)
|
|
88
|
+
|
|
89
|
+
# INITIALIZE MODEL + DATALOADER
|
|
90
|
+
set_seed(seed=seed)
|
|
91
|
+
gru = GRUModel(input_size, hidden_size, num_layers, exog_size, output_size)
|
|
92
|
+
train_data = TensorDataset(X_lags_tensor, X_exog_tensor, y_tensor)
|
|
93
|
+
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
|
|
94
|
+
|
|
95
|
+
# TRAIN MODEL
|
|
96
|
+
train_gru_with_minibatches(gru, train_loader, epochs=epochs, learning_rate=learning_rate)
|
|
97
|
+
|
|
98
|
+
# PACK MODEL
|
|
99
|
+
model = {"gru": gru, 'hyperparameter': hyperparameter, "train_df_X": train_df_X, "train_df_y": train_df_y}
|
|
100
|
+
return model
|
|
101
|
+
|
|
102
|
+
def produce_forecast_m14_gru(model, train_df_X, test_df_X):
|
|
103
|
+
"""Create forecast at the train and test set using the trained GRU model
|
|
104
|
+
Args:
|
|
105
|
+
model (dictionary): all parameters of the trained model
|
|
106
|
+
train_df_X (df): predictors of train set
|
|
107
|
+
test_df_X (df): predictors of test set
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
train_df_y_hat (df) : forecast result at train set
|
|
111
|
+
test_df_y_hat (df) : forecast result at test set
|
|
112
|
+
"""
|
|
113
|
+
gru = model['gru']
|
|
114
|
+
hyperparameter = model['hyperparameter']
|
|
115
|
+
input_size = int(hyperparameter['input_size'])
|
|
116
|
+
batch_size = int(hyperparameter['batch_size'])
|
|
117
|
+
|
|
118
|
+
def produce_forecast(gru, X):
|
|
119
|
+
X_lags, X_exog = separate_lag_and_exogenous_features(X)
|
|
120
|
+
X_lags_tensor = torch.tensor(X_lags.values, dtype=torch.float32)
|
|
121
|
+
X_exog_tensor = torch.tensor(X_exog.values, dtype=torch.float32)
|
|
122
|
+
|
|
123
|
+
total_lag_features = X_lags_tensor.shape[1]
|
|
124
|
+
sequence_length = total_lag_features // input_size
|
|
125
|
+
X_lags_tensor = X_lags_tensor.view(-1, sequence_length, input_size)
|
|
126
|
+
|
|
127
|
+
predictions = []
|
|
128
|
+
for i in range(0, len(X_lags_tensor), batch_size):
|
|
129
|
+
batch_X_lags = X_lags_tensor[i:i+batch_size]
|
|
130
|
+
batch_X_exog = X_exog_tensor[i:i+batch_size]
|
|
131
|
+
with torch.no_grad():
|
|
132
|
+
batch_pred = gru(batch_X_lags, batch_X_exog)
|
|
133
|
+
predictions.append(batch_pred)
|
|
134
|
+
return torch.cat(predictions, dim=0).detach().numpy()
|
|
135
|
+
|
|
136
|
+
train_df_y_hat = produce_forecast(gru, train_df_X)
|
|
137
|
+
test_df_y_hat = produce_forecast(gru, test_df_X)
|
|
138
|
+
return train_df_y_hat, test_df_y_hat
|
|
139
|
+
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
from pynnlf.model_utils import separate_lag_and_exogenous_features
|
|
3
|
+
|
|
4
|
+
def train_model_m15_transformer(hyperparameter, train_df_X, train_df_y):
|
|
5
|
+
''' Train and test a Transformer model for point forecasting.
|
|
6
|
+
Uses Transformer for temporal patterns, FC layer for lag+exogenous features.
|
|
7
|
+
Args:
|
|
8
|
+
hyperparameter (df) : hyperparameter value of the model consisting of number of features
|
|
9
|
+
train_df_X (df) : features matrix for training
|
|
10
|
+
train_df_y (df) : target matrix for training
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
model (model) : trained model with all features
|
|
14
|
+
'''
|
|
15
|
+
|
|
16
|
+
# UNPACK HYPERPARAMETER
|
|
17
|
+
seed = int(hyperparameter['seed'])
|
|
18
|
+
input_size = int(hyperparameter['input_size'])
|
|
19
|
+
hidden_size = int(hyperparameter['hidden_size'])
|
|
20
|
+
num_layers = int(hyperparameter['num_layers'])
|
|
21
|
+
output_size = int(hyperparameter['output_size'])
|
|
22
|
+
batch_size = int(hyperparameter['batch_size'])
|
|
23
|
+
epochs = int(hyperparameter['epochs'])
|
|
24
|
+
nhead = int(hyperparameter['nhead'])
|
|
25
|
+
learning_rate = hyperparameter['learning_rate']
|
|
26
|
+
|
|
27
|
+
import torch
|
|
28
|
+
import torch.nn as nn
|
|
29
|
+
import torch.optim as optim
|
|
30
|
+
import random, numpy as np, os, time
|
|
31
|
+
from torch.utils.data import DataLoader, TensorDataset
|
|
32
|
+
|
|
33
|
+
# TRANSFORMER MODEL
|
|
34
|
+
class TransformerModel(nn.Module):
|
|
35
|
+
def __init__(self, input_size, hidden_size, num_layers, exog_size, output_size=1):
|
|
36
|
+
super(TransformerModel, self).__init__()
|
|
37
|
+
# Transformer embedding
|
|
38
|
+
self.embedding = nn.Linear(input_size, hidden_size)
|
|
39
|
+
encoder_layer = nn.TransformerEncoderLayer(
|
|
40
|
+
d_model=hidden_size,
|
|
41
|
+
nhead=nhead,
|
|
42
|
+
dim_feedforward=hidden_size * 2,
|
|
43
|
+
batch_first=True
|
|
44
|
+
)
|
|
45
|
+
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
|
|
46
|
+
# Fully connected output layer
|
|
47
|
+
self.fc = nn.Linear(hidden_size + exog_size, output_size)
|
|
48
|
+
|
|
49
|
+
def forward(self, x, exogenous_data):
|
|
50
|
+
x = self.embedding(x)
|
|
51
|
+
x = self.transformer_encoder(x)
|
|
52
|
+
last_hidden_state = x[:, -1, :]
|
|
53
|
+
combined_input = torch.cat((last_hidden_state, exogenous_data), dim=1)
|
|
54
|
+
out = self.fc(combined_input)
|
|
55
|
+
return out
|
|
56
|
+
|
|
57
|
+
def train_transformer_with_minibatches(model, train_loader, epochs, learning_rate=learning_rate):
|
|
58
|
+
criterion = nn.MSELoss()
|
|
59
|
+
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
|
60
|
+
|
|
61
|
+
for epoch in range(epochs):
|
|
62
|
+
print(f'Epoch [{epoch+1}/{epochs}]')
|
|
63
|
+
start_time = time.time()
|
|
64
|
+
model.train()
|
|
65
|
+
batch_no = 1
|
|
66
|
+
for X_lags_batch, X_exog_batch, y_batch in train_loader:
|
|
67
|
+
print(f'Epoch [{epoch+1}/{epochs}] batch [{batch_no}/{len(train_loader)}]')
|
|
68
|
+
batch_no += 1
|
|
69
|
+
predictions = model(X_lags_batch, X_exog_batch)
|
|
70
|
+
loss = criterion(predictions, y_batch)
|
|
71
|
+
optimizer.zero_grad()
|
|
72
|
+
loss.backward()
|
|
73
|
+
optimizer.step()
|
|
74
|
+
end_time = time.time()
|
|
75
|
+
print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}, time: {end_time - start_time:.2f}s')
|
|
76
|
+
|
|
77
|
+
def set_seed(seed=seed):
|
|
78
|
+
random.seed(seed)
|
|
79
|
+
np.random.seed(seed)
|
|
80
|
+
torch.manual_seed(seed)
|
|
81
|
+
os.environ["PYTHONHASHSEED"] = str(seed)
|
|
82
|
+
|
|
83
|
+
# --- DATA PREP ---
|
|
84
|
+
X_lags, X_exog = separate_lag_and_exogenous_features(train_df_X)
|
|
85
|
+
X_lags_tensor = torch.tensor(X_lags.values, dtype=torch.float32)
|
|
86
|
+
X_exog_tensor = torch.tensor(X_exog.values, dtype=torch.float32)
|
|
87
|
+
y_tensor = torch.tensor(train_df_y.values, dtype=torch.float32).view(-1, 1)
|
|
88
|
+
total_lag_features = X_lags_tensor.shape[1]
|
|
89
|
+
sequence_length = total_lag_features // input_size
|
|
90
|
+
exog_size = X_exog_tensor.shape[1]
|
|
91
|
+
X_lags_tensor = X_lags_tensor.view(-1, sequence_length, input_size)
|
|
92
|
+
|
|
93
|
+
# --- INIT MODEL AND DATALOADER ---
|
|
94
|
+
set_seed(seed)
|
|
95
|
+
transformer = TransformerModel(input_size, hidden_size, num_layers, exog_size, output_size)
|
|
96
|
+
train_data = TensorDataset(X_lags_tensor, X_exog_tensor, y_tensor)
|
|
97
|
+
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
|
|
98
|
+
train_transformer_with_minibatches(transformer, train_loader, epochs=epochs, learning_rate=learning_rate)
|
|
99
|
+
|
|
100
|
+
model = {"transformer": transformer, 'hyperparameter': hyperparameter, "train_df_X": train_df_X, "train_df_y": train_df_y}
|
|
101
|
+
return model
|
|
102
|
+
|
|
103
|
+
def produce_forecast_m15_transformer(model, train_df_X, test_df_X):
|
|
104
|
+
"""Create forecast at the train and test set using the trained Transformer model
|
|
105
|
+
Args:
|
|
106
|
+
model (dictionary): all parameters of the trained model
|
|
107
|
+
train_df_X (df): predictors of train set
|
|
108
|
+
test_df_X (df): predictors of test set
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
train_df_y_hat (df) : forecast result at train set
|
|
112
|
+
test_df_y_hat (df) : forecast result at test set
|
|
113
|
+
"""
|
|
114
|
+
transformer = model['transformer']
|
|
115
|
+
hyperparameter = model['hyperparameter']
|
|
116
|
+
batch_size = int(hyperparameter['batch_size'])
|
|
117
|
+
input_size = int(hyperparameter['input_size'])
|
|
118
|
+
|
|
119
|
+
def produce_forecast(transformer, X):
|
|
120
|
+
X_lags, X_exog = separate_lag_and_exogenous_features(X)
|
|
121
|
+
X_lags_tensor = torch.tensor(X_lags.values, dtype=torch.float32)
|
|
122
|
+
X_exog_tensor = torch.tensor(X_exog.values, dtype=torch.float32)
|
|
123
|
+
total_lag_features = X_lags_tensor.shape[1]
|
|
124
|
+
sequence_length = total_lag_features // input_size
|
|
125
|
+
X_lags_tensor = X_lags_tensor.view(-1, sequence_length, input_size)
|
|
126
|
+
predictions = []
|
|
127
|
+
for i in range(0, len(X_lags_tensor), batch_size):
|
|
128
|
+
batch_X_lags = X_lags_tensor[i:i+batch_size]
|
|
129
|
+
batch_X_exog = X_exog_tensor[i:i+batch_size]
|
|
130
|
+
with torch.no_grad():
|
|
131
|
+
batch_pred = transformer(batch_X_lags, batch_X_exog)
|
|
132
|
+
predictions.append(batch_pred)
|
|
133
|
+
return torch.cat(predictions, dim=0).detach().numpy()
|
|
134
|
+
|
|
135
|
+
train_df_y_hat = produce_forecast(transformer, train_df_X)
|
|
136
|
+
test_df_y_hat = produce_forecast(transformer, test_df_X)
|
|
137
|
+
return train_df_y_hat, test_df_y_hat
|
|
138
|
+
|