pydmoo 0.0.18__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydmoo/algorithms/base/__init__.py +20 -0
- pydmoo/algorithms/base/core/__init__.py +0 -0
- pydmoo/algorithms/base/core/algorithm.py +416 -0
- pydmoo/algorithms/base/core/genetic.py +129 -0
- pydmoo/algorithms/base/dmoo/__init__.py +0 -0
- pydmoo/algorithms/base/dmoo/dmoead.py +131 -0
- pydmoo/algorithms/base/dmoo/dmoeadde.py +131 -0
- pydmoo/algorithms/base/dmoo/dmopso.py +0 -0
- pydmoo/algorithms/base/dmoo/dnsga2.py +137 -0
- pydmoo/algorithms/base/moo/__init__.py +0 -0
- pydmoo/algorithms/base/moo/moead.py +199 -0
- pydmoo/algorithms/base/moo/moeadde.py +105 -0
- pydmoo/algorithms/base/moo/mopso.py +0 -0
- pydmoo/algorithms/base/moo/nsga2.py +122 -0
- pydmoo/algorithms/modern/__init__.py +94 -0
- pydmoo/algorithms/modern/moead_imkt.py +161 -0
- pydmoo/algorithms/modern/moead_imkt_igp.py +56 -0
- pydmoo/algorithms/modern/moead_imkt_lstm.py +109 -0
- pydmoo/algorithms/modern/moead_imkt_n.py +117 -0
- pydmoo/algorithms/modern/moead_imkt_n_igp.py +56 -0
- pydmoo/algorithms/modern/moead_imkt_n_lstm.py +111 -0
- pydmoo/algorithms/modern/moead_ktmm.py +112 -0
- pydmoo/algorithms/modern/moeadde_imkt.py +161 -0
- pydmoo/algorithms/modern/moeadde_imkt_clstm.py +223 -0
- pydmoo/algorithms/modern/moeadde_imkt_igp.py +56 -0
- pydmoo/algorithms/modern/moeadde_imkt_lstm.py +212 -0
- pydmoo/algorithms/modern/moeadde_imkt_n.py +117 -0
- pydmoo/algorithms/modern/moeadde_imkt_n_clstm.py +146 -0
- pydmoo/algorithms/modern/moeadde_imkt_n_igp.py +56 -0
- pydmoo/algorithms/modern/moeadde_imkt_n_lstm.py +114 -0
- pydmoo/algorithms/modern/moeadde_ktmm.py +112 -0
- pydmoo/algorithms/modern/nsga2_imkt.py +162 -0
- pydmoo/algorithms/modern/nsga2_imkt_clstm.py +223 -0
- pydmoo/algorithms/modern/nsga2_imkt_igp.py +56 -0
- pydmoo/algorithms/modern/nsga2_imkt_lstm.py +248 -0
- pydmoo/algorithms/modern/nsga2_imkt_n.py +117 -0
- pydmoo/algorithms/modern/nsga2_imkt_n_clstm.py +146 -0
- pydmoo/algorithms/modern/nsga2_imkt_n_igp.py +57 -0
- pydmoo/algorithms/modern/nsga2_imkt_n_lstm.py +154 -0
- pydmoo/algorithms/modern/nsga2_ktmm.py +112 -0
- pydmoo/algorithms/utils/__init__.py +0 -0
- pydmoo/algorithms/utils/utils.py +166 -0
- pydmoo/core/__init__.py +0 -0
- pydmoo/{response → core}/ar_model.py +4 -4
- pydmoo/{response → core}/bounds.py +35 -2
- pydmoo/core/distance.py +45 -0
- pydmoo/core/inverse.py +55 -0
- pydmoo/core/lstm/__init__.py +0 -0
- pydmoo/core/lstm/base.py +291 -0
- pydmoo/core/lstm/lstm.py +491 -0
- pydmoo/core/manifold.py +93 -0
- pydmoo/core/predictions.py +50 -0
- pydmoo/core/sample_gaussian.py +56 -0
- pydmoo/core/sample_uniform.py +63 -0
- pydmoo/{response/tca_model.py → core/transfer.py} +3 -3
- pydmoo/problems/__init__.py +53 -49
- pydmoo/problems/dyn.py +94 -13
- pydmoo/problems/dynamic/cec2015.py +10 -5
- pydmoo/problems/dynamic/df.py +6 -3
- pydmoo/problems/dynamic/gts.py +69 -34
- pydmoo/problems/real_world/__init__.py +0 -0
- pydmoo/problems/real_world/dsrp.py +168 -0
- pydmoo/problems/real_world/dwbdp.py +189 -0
- {pydmoo-0.0.18.dist-info → pydmoo-0.1.0.dist-info}/METADATA +11 -10
- pydmoo-0.1.0.dist-info/RECORD +70 -0
- {pydmoo-0.0.18.dist-info → pydmoo-0.1.0.dist-info}/WHEEL +1 -1
- pydmoo-0.0.18.dist-info/RECORD +0 -15
- /pydmoo/{response → algorithms}/__init__.py +0 -0
- {pydmoo-0.0.18.dist-info → pydmoo-0.1.0.dist-info}/licenses/LICENSE +0 -0
pydmoo/core/lstm/lstm.py
ADDED
|
@@ -0,0 +1,491 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
import torch.optim as optim
|
|
4
|
+
|
|
5
|
+
from .base import TimeSeriesBase
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PredictorLSTM(nn.Module):
|
|
9
|
+
"""Lightweight LSTM network for time-series forecasting.
|
|
10
|
+
|
|
11
|
+
Attributes
|
|
12
|
+
----------
|
|
13
|
+
input_dim : int
|
|
14
|
+
Number of input features per time step.
|
|
15
|
+
hidden_dim : int
|
|
16
|
+
Number of hidden units in LSTM layers.
|
|
17
|
+
num_layers : int
|
|
18
|
+
Number of LSTM layers.
|
|
19
|
+
lstm : nn.LSTM
|
|
20
|
+
Core LSTM network module.
|
|
21
|
+
output_layer : nn.Linear
|
|
22
|
+
Final output projection layer.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, input_dim, hidden_dim, num_layers, output_dim, dropout):
|
|
26
|
+
"""Initialize the LSTM predictor model.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
input_dim : int
|
|
31
|
+
Number of input features per time step.
|
|
32
|
+
hidden_dim : int
|
|
33
|
+
Number of hidden units in LSTM layers.
|
|
34
|
+
num_layers : int
|
|
35
|
+
Number of LSTM layers.
|
|
36
|
+
output_dim : int
|
|
37
|
+
Number of output features to predict.
|
|
38
|
+
dropout : float
|
|
39
|
+
Dropout probability between LSTM layers (applied only when num_layers > 1).
|
|
40
|
+
"""
|
|
41
|
+
super().__init__()
|
|
42
|
+
self.input_dim = input_dim
|
|
43
|
+
self.hidden_dim = hidden_dim
|
|
44
|
+
self.num_layers = num_layers
|
|
45
|
+
|
|
46
|
+
# LSTM architecture with cuDNN optimization
|
|
47
|
+
self.lstm = nn.LSTM(
|
|
48
|
+
input_size=input_dim,
|
|
49
|
+
hidden_size=hidden_dim,
|
|
50
|
+
num_layers=num_layers,
|
|
51
|
+
batch_first=True,
|
|
52
|
+
dropout=dropout if num_layers > 1 else 0.0,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Output projection layer
|
|
56
|
+
self.output_layer = nn.Linear(hidden_dim, output_dim)
|
|
57
|
+
|
|
58
|
+
def forward(self, x: torch.Tensor, hidden=None):
|
|
59
|
+
"""Forward pass through the LSTM network.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
x : torch.Tensor
|
|
64
|
+
Input sequence tensor of shape (batch_size, sequence_length, input_dim).
|
|
65
|
+
hidden : tuple, optional
|
|
66
|
+
Initial hidden state tuple (h_0, c_0) where:
|
|
67
|
+
- h_0 : torch.Tensor, shape (num_layers, batch_size, hidden_dim)
|
|
68
|
+
- c_0 : torch.Tensor, shape (num_layers, batch_size, hidden_dim)
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
pred : torch.Tensor
|
|
73
|
+
Prediction for next time step with shape (batch_size, output_dim).
|
|
74
|
+
hidden_out : tuple
|
|
75
|
+
Final hidden state tuple (h_n, c_n) with same shapes as input hidden.
|
|
76
|
+
"""
|
|
77
|
+
# LSTM forward pass
|
|
78
|
+
output, hidden_out = self.lstm(x, hidden)
|
|
79
|
+
|
|
80
|
+
# Extract hidden state from the last time step
|
|
81
|
+
last_hidden = output[:, -1, :] # shape: (batch_size, hidden_dim)
|
|
82
|
+
|
|
83
|
+
# Generate prediction for next time step
|
|
84
|
+
pred = self.output_layer(last_hidden) # shape: (batch_size, output_dim)
|
|
85
|
+
|
|
86
|
+
return pred, hidden_out
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class PredictorMultiLayerLSTM(nn.Module):
|
|
90
|
+
"""Multi-layer LSTM model for time series prediction.
|
|
91
|
+
|
|
92
|
+
Uses sequential LSTM layers to encode temporal patterns and predict next time step.
|
|
93
|
+
|
|
94
|
+
Attributes
|
|
95
|
+
----------
|
|
96
|
+
input_dim : int
|
|
97
|
+
Number of input features per time step.
|
|
98
|
+
hidden_dim : int
|
|
99
|
+
Number of hidden units in each LSTM layer.
|
|
100
|
+
lstm_layers : nn.ModuleList
|
|
101
|
+
Stack of LSTM layers.
|
|
102
|
+
output_layer : nn.Sequential
|
|
103
|
+
Output network to generate predictions.
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
|
|
107
|
+
"""Initialize the Multi-layer LSTM predictor model.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
input_dim : int
|
|
112
|
+
Number of input features per time step.
|
|
113
|
+
hidden_dim : int
|
|
114
|
+
Number of hidden units in each LSTM layer.
|
|
115
|
+
num_layers : int
|
|
116
|
+
Number of LSTM layers to stack.
|
|
117
|
+
output_dim : int
|
|
118
|
+
Number of output features to predict.
|
|
119
|
+
"""
|
|
120
|
+
super().__init__()
|
|
121
|
+
self.input_dim = input_dim
|
|
122
|
+
self.hidden_dim = hidden_dim
|
|
123
|
+
|
|
124
|
+
# Create stack of LSTM layers
|
|
125
|
+
# First layer: input_dim -> hidden_dim, subsequent layers: hidden_dim -> hidden_dim
|
|
126
|
+
self.lstm_layers = nn.ModuleList(
|
|
127
|
+
[
|
|
128
|
+
nn.LSTM(
|
|
129
|
+
input_size=input_dim if i == 0 else hidden_dim,
|
|
130
|
+
hidden_size=hidden_dim,
|
|
131
|
+
batch_first=True, # Input shape: (batch_size, sequence_length, input_dim)
|
|
132
|
+
num_layers=1, # Each layer is separate for individual hidden state control
|
|
133
|
+
dropout=0,
|
|
134
|
+
)
|
|
135
|
+
for i in range(num_layers)
|
|
136
|
+
]
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Output network to transform LSTM encoding to prediction
|
|
140
|
+
self.output_layer = nn.Sequential(
|
|
141
|
+
nn.Linear(hidden_dim, hidden_dim // 2), # Reduce dimension for computational efficiency
|
|
142
|
+
nn.ReLU(), # Introduce non-linearity
|
|
143
|
+
nn.Dropout(0.1), # Prevent overfitting
|
|
144
|
+
nn.Linear(hidden_dim // 2, output_dim), # Final prediction layer
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
def forward(self, x: torch.Tensor, hidden_states=None):
|
|
148
|
+
"""Forward pass through the LSTM network.
|
|
149
|
+
|
|
150
|
+
Parameters
|
|
151
|
+
----------
|
|
152
|
+
x : torch.Tensor
|
|
153
|
+
Input sequence tensor of shape (batch_size, sequence_length, input_dim).
|
|
154
|
+
hidden_states : list of tuples, optional
|
|
155
|
+
Previous hidden states for each LSTM layer as (hidden_state, cell_state) tuples.
|
|
156
|
+
|
|
157
|
+
Returns
|
|
158
|
+
-------
|
|
159
|
+
prediction : torch.Tensor
|
|
160
|
+
Predicted values for next time step with shape (batch_size, output_dim).
|
|
161
|
+
new_hidden_states : list of tuples
|
|
162
|
+
Updated hidden states for each LSTM layer.
|
|
163
|
+
"""
|
|
164
|
+
# Initialize hidden states if not provided
|
|
165
|
+
if hidden_states is None:
|
|
166
|
+
hidden_states = [None] * len(self.lstm_layers)
|
|
167
|
+
|
|
168
|
+
# Process input through each LSTM layer sequentially
|
|
169
|
+
output = x # Start with raw input: shape (batch_size, sequence_length, input_dim)
|
|
170
|
+
new_hidden_states = [] # Store updated hidden states for all layers
|
|
171
|
+
|
|
172
|
+
# Pass data through each LSTM layer
|
|
173
|
+
for i, lstm_layer in enumerate(self.lstm_layers):
|
|
174
|
+
# Each LSTM layer processes the sequence
|
|
175
|
+
# output shape: (batch_size, sequence_length, hidden_dim)
|
|
176
|
+
output, (h_n, c_n) = lstm_layer(output, hidden_states[i] if hidden_states[i] is not None else None)
|
|
177
|
+
new_hidden_states.append((h_n, c_n))
|
|
178
|
+
|
|
179
|
+
# Extract final time step encoding for prediction
|
|
180
|
+
# last_output shape: (batch_size, hidden_dim)
|
|
181
|
+
last_output = output[:, -1, :]
|
|
182
|
+
|
|
183
|
+
# Generate prediction for next time step
|
|
184
|
+
# prediction shape: (batch_size, output_dim)
|
|
185
|
+
prediction = self.output_layer(last_output)
|
|
186
|
+
|
|
187
|
+
return prediction, new_hidden_states
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class LSTMpredictor(TimeSeriesBase):
|
|
191
|
+
"""LSTM-based time series predictor with sliding window approach."""
|
|
192
|
+
|
|
193
|
+
def __init__(
|
|
194
|
+
self,
|
|
195
|
+
sequence_length,
|
|
196
|
+
hidden_dim: int = 64,
|
|
197
|
+
num_layers: int = 1,
|
|
198
|
+
dropout: float = 0.0,
|
|
199
|
+
epochs: int = 50,
|
|
200
|
+
batch_size: int = 32,
|
|
201
|
+
lr: float = 0.001,
|
|
202
|
+
device: str = "cpu",
|
|
203
|
+
patience: int = 5,
|
|
204
|
+
seed: int | None = None,
|
|
205
|
+
model_type: str = "lstm",
|
|
206
|
+
incremental_learning: bool = False,
|
|
207
|
+
):
|
|
208
|
+
"""Initialize the LSTM predictor with training configuration.
|
|
209
|
+
|
|
210
|
+
Parameters
|
|
211
|
+
----------
|
|
212
|
+
sequence_length : int
|
|
213
|
+
Number of historical time steps used for prediction.
|
|
214
|
+
hidden_dim : int
|
|
215
|
+
Number of hidden units in LSTM layers, by default 64.
|
|
216
|
+
num_layers : int
|
|
217
|
+
Number of LSTM layers, by default 1.
|
|
218
|
+
dropout : float
|
|
219
|
+
Dropout probability between layers, by default 0.0.
|
|
220
|
+
epochs : int
|
|
221
|
+
Maximum number of training epochs, by default 50.
|
|
222
|
+
batch_size : int
|
|
223
|
+
Number of samples per training batch, by default 32.
|
|
224
|
+
lr : float
|
|
225
|
+
Learning rate for optimizer, by default 0.001.
|
|
226
|
+
device : str
|
|
227
|
+
Computation device ('cpu' or 'cuda'), by default "cpu".
|
|
228
|
+
patience : int
|
|
229
|
+
Early stopping patience (epochs without improvement), by default 5.
|
|
230
|
+
seed : int, optional
|
|
231
|
+
Random seed for reproducibility, by default None.
|
|
232
|
+
model_type : str
|
|
233
|
+
Type of model architecture, by default "lstm".
|
|
234
|
+
Supported values: 'lstm', 'transformer'
|
|
235
|
+
incremental_learning : bool
|
|
236
|
+
Whether to use incremental learning mode, by default False.
|
|
237
|
+
If True, model will be reused and updated with new data.
|
|
238
|
+
"""
|
|
239
|
+
self.sequence_length = sequence_length
|
|
240
|
+
if self.sequence_length < 1:
|
|
241
|
+
raise ValueError("The sequence length should be greater than 0.")
|
|
242
|
+
|
|
243
|
+
# Model hyperparameters
|
|
244
|
+
self.hidden_dim = hidden_dim
|
|
245
|
+
self.num_layers = num_layers
|
|
246
|
+
self.dropout = dropout if num_layers > 1 else 0.0
|
|
247
|
+
|
|
248
|
+
# Training parameters
|
|
249
|
+
self.epochs = epochs
|
|
250
|
+
self.batch_size = batch_size
|
|
251
|
+
self.lr = lr
|
|
252
|
+
|
|
253
|
+
# Device configuration
|
|
254
|
+
self.device = torch.device(device) if device else torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
255
|
+
|
|
256
|
+
# Training parameters
|
|
257
|
+
self.patience = patience
|
|
258
|
+
self._improvement_threshold = 0.001
|
|
259
|
+
|
|
260
|
+
# Repeatability
|
|
261
|
+
self.seed = seed
|
|
262
|
+
if self.seed is not None:
|
|
263
|
+
self._set_random_seed(self.seed)
|
|
264
|
+
|
|
265
|
+
# Initialize parent class
|
|
266
|
+
super().__init__(self.sequence_length, self.device, model_type=model_type)
|
|
267
|
+
|
|
268
|
+
# Incremental learning setup
|
|
269
|
+
self.model = None
|
|
270
|
+
self.incremental_learning = incremental_learning
|
|
271
|
+
|
|
272
|
+
def convert_train_predict(self, time_series_data: list[list[float]]) -> torch.Tensor:
|
|
273
|
+
"""Complete pipeline: convert data, train model, and generate prediction.
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
time_series_data : list[list[float]]
|
|
278
|
+
Input time series data as list of feature vectors.
|
|
279
|
+
Shape: (n_timesteps, n_features)
|
|
280
|
+
Example: [[1.0, 2.0], [1.1, 2.1], [1.2, 2.2]] for 3 timesteps, 2 features
|
|
281
|
+
|
|
282
|
+
Returns
|
|
283
|
+
-------
|
|
284
|
+
next_prediction : torch.Tensor
|
|
285
|
+
Predicted values for the next time step.
|
|
286
|
+
Shape: (n_features,)
|
|
287
|
+
Example: tensor([1.3, 2.3]) for 2 features
|
|
288
|
+
|
|
289
|
+
Notes
|
|
290
|
+
-----
|
|
291
|
+
This method provides an end-to-end workflow:
|
|
292
|
+
1. Convert raw data to tensor format
|
|
293
|
+
2. Train LSTM model on the entire dataset
|
|
294
|
+
3. Generate prediction for the next time step
|
|
295
|
+
The trained model is stored in self.model for future predictions.
|
|
296
|
+
"""
|
|
297
|
+
# Convert input data to tensor format
|
|
298
|
+
tensor_data = self.convert_to_tensor(time_series_data)
|
|
299
|
+
|
|
300
|
+
# Train model on the prepared data
|
|
301
|
+
trained_model, _ = self.train_model(tensor_data)
|
|
302
|
+
|
|
303
|
+
# Generate prediction using trained model
|
|
304
|
+
next_prediction = self.predict_future(trained_model, tensor_data, n_steps=1)
|
|
305
|
+
|
|
306
|
+
return next_prediction[0]
|
|
307
|
+
|
|
308
|
+
def train_model(self, series_data: torch.Tensor):
|
|
309
|
+
"""Train LSTM model on provided time series data.
|
|
310
|
+
|
|
311
|
+
Parameters
|
|
312
|
+
----------
|
|
313
|
+
series_data : torch.Tensor
|
|
314
|
+
Input time series data of shape (n_timesteps, n_features)
|
|
315
|
+
|
|
316
|
+
Returns
|
|
317
|
+
-------
|
|
318
|
+
tuple[nn.Module, list[float]]
|
|
319
|
+
Trained model and list of training losses
|
|
320
|
+
|
|
321
|
+
Notes
|
|
322
|
+
-----
|
|
323
|
+
Training workflow:
|
|
324
|
+
1. Prepare training sequences and targets using sliding window
|
|
325
|
+
2. Create DataLoader for efficient batch processing
|
|
326
|
+
3. Train model using configured optimization parameters
|
|
327
|
+
"""
|
|
328
|
+
# Prepare training sequences and targets
|
|
329
|
+
sequences_tensor, targets_tensor = self.prepare_training_data(series_data)
|
|
330
|
+
|
|
331
|
+
# Create DataLoader with appropriate batch size
|
|
332
|
+
dataloader = self.create_training_dataloader(
|
|
333
|
+
sequences_tensor, targets_tensor, batch_size=min(self.batch_size, sequences_tensor.shape[0])
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
# Train LSTM model
|
|
337
|
+
trained_model, losses = self._train_model(dataloader)
|
|
338
|
+
|
|
339
|
+
return trained_model, losses
|
|
340
|
+
|
|
341
|
+
def _construct_model(self, dataloader: torch.utils.data.DataLoader) -> nn.Module:
|
|
342
|
+
"""Construct LSTM model using prepared sequential data.
|
|
343
|
+
|
|
344
|
+
Parameters
|
|
345
|
+
----------
|
|
346
|
+
dataloader : torch.utils.data.DataLoader
|
|
347
|
+
DataLoader containing training sequences and corresponding targets.
|
|
348
|
+
|
|
349
|
+
Returns
|
|
350
|
+
-------
|
|
351
|
+
model : nn.Module
|
|
352
|
+
Configured LSTM model instance.
|
|
353
|
+
"""
|
|
354
|
+
# Extract model dimensions from dataset
|
|
355
|
+
input_dim = dataloader.dataset[0][0].shape[-1]
|
|
356
|
+
output_dim = dataloader.dataset[0][1].shape[-1]
|
|
357
|
+
|
|
358
|
+
# Initialize model based on continual learning configuration
|
|
359
|
+
if (self.model is None) or (not self.incremental_learning):
|
|
360
|
+
# Create new model for initial training or non-continual learning scenarios
|
|
361
|
+
model = PredictorLSTM(
|
|
362
|
+
input_dim=input_dim,
|
|
363
|
+
hidden_dim=self.hidden_dim,
|
|
364
|
+
num_layers=self.num_layers,
|
|
365
|
+
output_dim=output_dim,
|
|
366
|
+
dropout=self.dropout,
|
|
367
|
+
).to(self.device)
|
|
368
|
+
if self.incremental_learning:
|
|
369
|
+
self.model = model # Store model reference for future continual learning
|
|
370
|
+
else:
|
|
371
|
+
# Utilize existing model for continual learning
|
|
372
|
+
model = self.model
|
|
373
|
+
|
|
374
|
+
return model
|
|
375
|
+
|
|
376
|
+
def _train_model(self, dataloader: torch.utils.data.DataLoader) -> tuple[nn.Module, list[float]]:
|
|
377
|
+
"""Train LSTM model on prepared sequence data.
|
|
378
|
+
|
|
379
|
+
Parameters
|
|
380
|
+
----------
|
|
381
|
+
dataloader : torch.utils.data.DataLoader
|
|
382
|
+
DataLoader containing training sequences and targets
|
|
383
|
+
|
|
384
|
+
Returns
|
|
385
|
+
-------
|
|
386
|
+
model : nn.Module
|
|
387
|
+
Trained LSTM model instance
|
|
388
|
+
losses : list[float]
|
|
389
|
+
List of average losses per epoch
|
|
390
|
+
|
|
391
|
+
Notes
|
|
392
|
+
-----
|
|
393
|
+
Training configuration:
|
|
394
|
+
- Loss: Mean Squared Error (MSE)
|
|
395
|
+
- Optimizer: Adam with L2 regularization
|
|
396
|
+
- Learning rate scheduler: StepLR with decay
|
|
397
|
+
- Gradient clipping: Prevents gradient explosion
|
|
398
|
+
- Early stopping: Monitors validation loss improvement
|
|
399
|
+
|
|
400
|
+
The training process includes:
|
|
401
|
+
1. Model initialization with appropriate dimensions
|
|
402
|
+
2. Batch-wise forward and backward passes
|
|
403
|
+
3. Learning rate scheduling
|
|
404
|
+
4. Early stopping based on loss improvement
|
|
405
|
+
5. Progress logging at regular intervals
|
|
406
|
+
"""
|
|
407
|
+
# Construct model
|
|
408
|
+
model = self._construct_model(dataloader)
|
|
409
|
+
|
|
410
|
+
# Training configuration
|
|
411
|
+
criterion, optimizer, scheduler = self._optimizer_scheduler(model)
|
|
412
|
+
|
|
413
|
+
# Early stopping initialization
|
|
414
|
+
best_loss = float("inf") # positive infinity
|
|
415
|
+
patience_counter = 0
|
|
416
|
+
|
|
417
|
+
# Training loop
|
|
418
|
+
losses = []
|
|
419
|
+
for epoch in range(self.epochs):
|
|
420
|
+
model.train()
|
|
421
|
+
epoch_loss = 0.0
|
|
422
|
+
batch_count = 0
|
|
423
|
+
|
|
424
|
+
for batch_idx, (X_batch, y_batch) in enumerate(dataloader):
|
|
425
|
+
X_batch, y_batch = X_batch.to(self.device), y_batch.to(self.device)
|
|
426
|
+
|
|
427
|
+
# Forward pass
|
|
428
|
+
optimizer.zero_grad()
|
|
429
|
+
predictions, _ = model(X_batch) # stateless; independent-sequence
|
|
430
|
+
loss = criterion(predictions, y_batch)
|
|
431
|
+
|
|
432
|
+
# Backward pass with gradient clipping
|
|
433
|
+
loss.backward()
|
|
434
|
+
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
|
|
435
|
+
optimizer.step()
|
|
436
|
+
|
|
437
|
+
# Accumulate loss
|
|
438
|
+
current_loss = loss.item()
|
|
439
|
+
epoch_loss += current_loss
|
|
440
|
+
batch_count += 1
|
|
441
|
+
|
|
442
|
+
# Print batch progress
|
|
443
|
+
if (batch_idx + 1) % 100 == 0:
|
|
444
|
+
print(f"Batch {batch_idx}, Loss: {current_loss:.4f}")
|
|
445
|
+
|
|
446
|
+
# Update learning rate
|
|
447
|
+
scheduler.step()
|
|
448
|
+
|
|
449
|
+
# Calculate average epoch loss
|
|
450
|
+
avg_epoch_loss = epoch_loss / batch_count if batch_count > 0 else 0.0
|
|
451
|
+
losses.append(avg_epoch_loss)
|
|
452
|
+
|
|
453
|
+
# Early stopping check
|
|
454
|
+
if avg_epoch_loss < best_loss * (1 - self._improvement_threshold): # relative improvement
|
|
455
|
+
best_loss = avg_epoch_loss
|
|
456
|
+
patience_counter = 0
|
|
457
|
+
else:
|
|
458
|
+
patience_counter += 1
|
|
459
|
+
if patience_counter >= self.patience:
|
|
460
|
+
# print(f"Early stopping at epoch {epoch + 1}")
|
|
461
|
+
break
|
|
462
|
+
|
|
463
|
+
# Print epoch progress
|
|
464
|
+
if (epoch + 1) % 100 == 0:
|
|
465
|
+
current_lr = optimizer.param_groups[0]["lr"]
|
|
466
|
+
print(f"Epoch [{epoch + 1}/{self.epochs}], Average Loss: {avg_epoch_loss:.6f}, LR: {current_lr:.6f}")
|
|
467
|
+
|
|
468
|
+
# Store model for future continual learning
|
|
469
|
+
if self.incremental_learning:
|
|
470
|
+
self.model = model
|
|
471
|
+
return model, losses
|
|
472
|
+
|
|
473
|
+
def _optimizer_scheduler(self, model: nn.Module):
|
|
474
|
+
"""Configure optimizer and learning rate scheduler.
|
|
475
|
+
|
|
476
|
+
Parameters
|
|
477
|
+
----------
|
|
478
|
+
model : torch.nn.Module
|
|
479
|
+
The model to optimize
|
|
480
|
+
|
|
481
|
+
Returns
|
|
482
|
+
-------
|
|
483
|
+
tuple
|
|
484
|
+
criterion: loss function
|
|
485
|
+
optimizer: configured optimizer
|
|
486
|
+
scheduler: learning rate scheduler
|
|
487
|
+
"""
|
|
488
|
+
criterion = nn.MSELoss()
|
|
489
|
+
optimizer = optim.Adam(model.parameters(), lr=self.lr, weight_decay=1e-5)
|
|
490
|
+
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9)
|
|
491
|
+
return criterion, optimizer, scheduler
|
pydmoo/core/manifold.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def manifold_prediction(X0, X1):
|
|
5
|
+
"""
|
|
6
|
+
Predict the next manifold and compute its dispersion metric.
|
|
7
|
+
|
|
8
|
+
Given two sequential populations in a manifold, this method:
|
|
9
|
+
1. Centers the populations by removing their means
|
|
10
|
+
2. Computes a dispersion metric (sigma) based on set distances
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
X0 : ndarray, shape (N, n)
|
|
15
|
+
Population at time t-1, where:
|
|
16
|
+
- N: number of points/samples
|
|
17
|
+
- n: dimensionality of the manifold
|
|
18
|
+
X1 : ndarray, shape (N, n)
|
|
19
|
+
Population at time t
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
tuple (ndarray, float)
|
|
24
|
+
- C1: ndarray, shape (N, n)
|
|
25
|
+
Centered manifold at time t
|
|
26
|
+
- variance: float
|
|
27
|
+
Normalized dispersion metric computed as:
|
|
28
|
+
variance = (D(C1,C0)^2) / n
|
|
29
|
+
where D is the set distance between manifolds
|
|
30
|
+
|
|
31
|
+
Notes
|
|
32
|
+
-----
|
|
33
|
+
1. The manifold is constructed by centering the input data
|
|
34
|
+
2. The dispersion metric variance quantifies the normalized squared distance
|
|
35
|
+
between consecutive manifolds
|
|
36
|
+
3. Requires self.set_distance() method to be implemented
|
|
37
|
+
4. Both input arrays must have same shape (N, n)
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
# Center the populations by removing column means
|
|
41
|
+
C0 = X0 - np.mean(X0, axis=0) # Centered manifold at t-1
|
|
42
|
+
C1 = X1 - np.mean(X1, axis=0) # Centered manifold at t
|
|
43
|
+
|
|
44
|
+
return C1, set_distance(C1, C0)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def set_distance(A, B):
|
|
48
|
+
"""
|
|
49
|
+
Compute the average minimum distance between two sets of points.
|
|
50
|
+
|
|
51
|
+
The distance metric is defined as:
|
|
52
|
+
D(A, B) = (1/|A|) ∑_{x∈A} min_{y∈B} ||x - y||
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
A : ndarray, shape (m, d)
|
|
57
|
+
First set of points containing m samples in d-dimensional space.
|
|
58
|
+
B : ndarray, shape (n, d)
|
|
59
|
+
Second set of points containing n samples in d-dimensional space.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
float
|
|
64
|
+
The average minimum Euclidean distance between points in A and their
|
|
65
|
+
nearest neighbors in B.
|
|
66
|
+
|
|
67
|
+
Notes
|
|
68
|
+
-----
|
|
69
|
+
1. Uses Euclidean (L2) norm for distance computation.
|
|
70
|
+
2. For empty sets, will raise ValueError.
|
|
71
|
+
3. Computational complexity is O(m*n*d) where:
|
|
72
|
+
- m = number of points in A
|
|
73
|
+
- n = number of points in B
|
|
74
|
+
- d = dimensionality
|
|
75
|
+
|
|
76
|
+
Examples
|
|
77
|
+
--------
|
|
78
|
+
>>> A = np.array([[1.0, 2.0], [3.0, 4.0]])
|
|
79
|
+
>>> B = np.array([[1.1, 2.1], [3.1, 4.1], [5.0, 6.0]])
|
|
80
|
+
>>> set_distance(A, B)
|
|
81
|
+
0.14142135623730953
|
|
82
|
+
"""
|
|
83
|
+
# Compute pairwise Euclidean distances using broadcasting:
|
|
84
|
+
# A[:, np.newaxis, :] reshapes to (m, 1, d)
|
|
85
|
+
# B[np.newaxis, :, :] reshapes to (1, n, d)
|
|
86
|
+
# Resulting subtraction produces (m, n, d) array
|
|
87
|
+
distances = np.linalg.norm(A[:, np.newaxis, :] - B[np.newaxis, :, :], axis=2)
|
|
88
|
+
|
|
89
|
+
# Find minimum distance for each point in A to any point in B
|
|
90
|
+
min_distances = np.min(distances, axis=1)
|
|
91
|
+
|
|
92
|
+
# Return average minimum distance
|
|
93
|
+
return np.mean(min_distances)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def igp_based_predictor(PS, PF, F_, sigma_n_2):
|
|
5
|
+
# algorithm 2: IGP-Based Predictor
|
|
6
|
+
# algorithm 1: IGPR
|
|
7
|
+
K_g = np.dot(PF, PF.T)
|
|
8
|
+
K_g_noise = K_g + sigma_n_2 * np.eye(len(PF))
|
|
9
|
+
k_inv = np.linalg.inv(K_g_noise)
|
|
10
|
+
# k_inv = cholesky_inverse_numpy(K_g_noise)
|
|
11
|
+
|
|
12
|
+
X_ = np.dot(np.dot(np.dot(F_, PF.T), k_inv), PS)
|
|
13
|
+
return X_
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def cholesky_inverse_numpy(A):
|
|
17
|
+
"""
|
|
18
|
+
Compute the inverse of a symmetric positive definite (SPD) matrix using Cholesky decomposition.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
A : numpy.ndarray
|
|
23
|
+
A symmetric positive definite matrix (must satisfy A = A.T and all eigenvalues > 0)
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
numpy.ndarray
|
|
28
|
+
The inverse matrix A^{-1} computed via Cholesky decomposition
|
|
29
|
+
|
|
30
|
+
Raises
|
|
31
|
+
------
|
|
32
|
+
AssertionError
|
|
33
|
+
If input matrix is not symmetric or not positive definite
|
|
34
|
+
"""
|
|
35
|
+
# 1. Check if matrix is symmetric and positive definite
|
|
36
|
+
assert np.allclose(A, A.T), "Matrix must be symmetric"
|
|
37
|
+
assert np.all(np.linalg.eigvals(A) > 0), "Matrix must be positive definite"
|
|
38
|
+
|
|
39
|
+
# 2. Compute Cholesky decomposition A = L L^T
|
|
40
|
+
# L is lower triangular matrix with positive diagonal entries
|
|
41
|
+
L = np.linalg.cholesky(A)
|
|
42
|
+
|
|
43
|
+
# 3. Compute inverse of L (triangular matrix inversion)
|
|
44
|
+
# Since L is lower triangular, its inverse can be computed efficiently
|
|
45
|
+
inv_L = np.linalg.inv(L) # Alternative: solve triangular systems
|
|
46
|
+
|
|
47
|
+
# 4. Compute inverse of A using A^{-1} = (L^{-1})^T L^{-1}
|
|
48
|
+
A_inv = inv_L.T @ inv_L
|
|
49
|
+
|
|
50
|
+
return A_inv
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def univariate_gaussian_sample(mean, std, n_samples=100, random_state=None):
|
|
5
|
+
"""
|
|
6
|
+
Generate samples from a 1-dimensional Gaussian distribution.
|
|
7
|
+
|
|
8
|
+
Parameters
|
|
9
|
+
----------
|
|
10
|
+
mean : array_like, shape (n_features,)
|
|
11
|
+
Mean values of the Gaussian distribution for each feature.
|
|
12
|
+
std : array_like, shape (n_features,)
|
|
13
|
+
Standard deviation of the Gaussian distribution for each feature.
|
|
14
|
+
n_samples : int, optional
|
|
15
|
+
Number of samples to generate (default=100).
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
ndarray, shape (n_samples, n_features)
|
|
20
|
+
Random samples from the specified Gaussian distribution.
|
|
21
|
+
|
|
22
|
+
Notes
|
|
23
|
+
-----
|
|
24
|
+
This function generates independent 1D Gaussian samples for each feature.
|
|
25
|
+
"""
|
|
26
|
+
return random_state.normal(mean, std, size=(n_samples, len(mean))) # np.random
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def multivariate_gaussian_sample(mean, cov, n_samples=1, random_state=None):
|
|
30
|
+
"""
|
|
31
|
+
Generate samples from a multivariate Gaussian distribution.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
mean : array_like, shape (n_features,)
|
|
36
|
+
Mean vector of the distribution.
|
|
37
|
+
cov : array_like, shape (n_features, n_features)
|
|
38
|
+
Covariance matrix of the distribution.
|
|
39
|
+
n_samples : int, optional
|
|
40
|
+
Number of samples to generate (default=1).
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
ndarray, shape (n_samples, n_features)
|
|
45
|
+
Random samples from the multivariate Gaussian distribution.
|
|
46
|
+
|
|
47
|
+
Raises
|
|
48
|
+
------
|
|
49
|
+
ValueError
|
|
50
|
+
If the covariance matrix is not positive-semidefinite.
|
|
51
|
+
|
|
52
|
+
Notes
|
|
53
|
+
-----
|
|
54
|
+
Uses numpy.random.multivariate_normal for sampling.
|
|
55
|
+
"""
|
|
56
|
+
return random_state.multivariate_normal(mean, cov, size=n_samples) # np.random
|