PODImodels 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PODImodels/PODImodels.py +1033 -0
- PODImodels/PODdata.py +522 -0
- PODImodels/__init__.py +61 -0
- PODImodels/podImodelabstract.py +840 -0
- podimodels-0.0.3.dist-info/METADATA +211 -0
- podimodels-0.0.3.dist-info/RECORD +9 -0
- podimodels-0.0.3.dist-info/WHEEL +5 -0
- podimodels-0.0.3.dist-info/licenses/LICENSE +21 -0
- podimodels-0.0.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,840 @@
|
|
|
1
|
+
"""
|
|
2
|
+
POD-based Interpolation Model Abstract Base Class
|
|
3
|
+
=================================================
|
|
4
|
+
|
|
5
|
+
This module defines the abstract base class for all POD-based interpolation models.
|
|
6
|
+
It provides a common interface and shared functionality for building reduced-order
|
|
7
|
+
models that combine Proper Orthogonal Decomposition with various machine learning
|
|
8
|
+
techniques.
|
|
9
|
+
|
|
10
|
+
Classes
|
|
11
|
+
-------
|
|
12
|
+
PODImodelAbstract
|
|
13
|
+
Abstract base class for POD-based interpolation models.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
17
|
+
import warnings
|
|
18
|
+
import numpy as np
|
|
19
|
+
from sklearn.model_selection import train_test_split
|
|
20
|
+
from sklearn.preprocessing import MinMaxScaler
|
|
21
|
+
import pyvista as pv
|
|
22
|
+
from .PODdata import vtk_writer
|
|
23
|
+
from scipy.linalg import svd
|
|
24
|
+
from typing import Optional, Union, Tuple, List
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class PODImodelAbstract(ABC):
|
|
28
|
+
"""
|
|
29
|
+
Abstract base class for POD-based interpolation models.
|
|
30
|
+
|
|
31
|
+
This class provides a common interface for building reduced-order models that
|
|
32
|
+
combine Proper Orthogonal Decomposition (POD) with various machine learning
|
|
33
|
+
techniques. All concrete interpolation model classes should inherit from this
|
|
34
|
+
class and implement the abstract methods.
|
|
35
|
+
|
|
36
|
+
The class handles common functionality including:
|
|
37
|
+
- Data scaling and preprocessing
|
|
38
|
+
- POD decomposition for dimensionality reduction
|
|
39
|
+
- Model validation and error assessment
|
|
40
|
+
- VTK file output for visualization
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
rank : int, optional
|
|
45
|
+
The number of POD modes to retain. Default is 10.
|
|
46
|
+
with_scaler_x : bool, optional
|
|
47
|
+
Whether to apply MinMax scaling to input features. Default is True.
|
|
48
|
+
with_scaler_y : bool, optional
|
|
49
|
+
Whether to apply MinMax scaling to target values. Default is True.
|
|
50
|
+
POD_algo : {'svd', 'eigen'}, optional
|
|
51
|
+
The algorithm to use for POD computation. 'svd' uses singular value
|
|
52
|
+
decomposition, 'eigen' uses eigenvalue decomposition. Default is 'eigen'.
|
|
53
|
+
|
|
54
|
+
Attributes
|
|
55
|
+
----------
|
|
56
|
+
rank : int
|
|
57
|
+
Number of POD modes to retain.
|
|
58
|
+
with_scaler_x : bool
|
|
59
|
+
Flag for input scaling.
|
|
60
|
+
with_scaler_y : bool
|
|
61
|
+
Flag for output scaling.
|
|
62
|
+
POD_algo : str
|
|
63
|
+
POD algorithm type.
|
|
64
|
+
scalar_X : MinMaxScaler, optional
|
|
65
|
+
Scaler for input features (created if with_scaler_x=True).
|
|
66
|
+
scalar_Y : MinMaxScaler, optional
|
|
67
|
+
Scaler for output features (created if with_scaler_y=True).
|
|
68
|
+
v : np.ndarray
|
|
69
|
+
Truncated POD modes matrix.
|
|
70
|
+
v_all : np.ndarray
|
|
71
|
+
Full POD modes matrix.
|
|
72
|
+
s : np.ndarray
|
|
73
|
+
Truncated singular values.
|
|
74
|
+
s_all : np.ndarray
|
|
75
|
+
Full singular values.
|
|
76
|
+
coeffs : np.ndarray
|
|
77
|
+
POD coefficients matrix.
|
|
78
|
+
|
|
79
|
+
Notes
|
|
80
|
+
-----
|
|
81
|
+
Subclasses must implement `fit_tmp` and `predict_tmp` methods to define
|
|
82
|
+
the specific machine learning algorithm used for interpolation.
|
|
83
|
+
|
|
84
|
+
Examples
|
|
85
|
+
--------
|
|
86
|
+
>>> # Define a concrete implementation (example)
|
|
87
|
+
>>> class MyPODModel(PODImodelAbstract):
|
|
88
|
+
... def __init__(self, **kwargs):
|
|
89
|
+
... super().__init__(**kwargs)
|
|
90
|
+
... # Initialize specific model
|
|
91
|
+
...
|
|
92
|
+
... def fit_tmp(self, x, y):
|
|
93
|
+
... # Implement specific fitting logic
|
|
94
|
+
... pass
|
|
95
|
+
...
|
|
96
|
+
... def predict_tmp(self, x):
|
|
97
|
+
... # Implement specific prediction logic
|
|
98
|
+
... return predictions
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
@abstractmethod
|
|
102
|
+
def __init__(
|
|
103
|
+
self,
|
|
104
|
+
rank: int = 10,
|
|
105
|
+
with_scaler_x: bool = True,
|
|
106
|
+
with_scaler_y: bool = True,
|
|
107
|
+
POD_algo: str = "eigen",
|
|
108
|
+
):
|
|
109
|
+
"""
|
|
110
|
+
Abstract initialization method.
|
|
111
|
+
|
|
112
|
+
Parameters
|
|
113
|
+
----------
|
|
114
|
+
rank : int, optional
|
|
115
|
+
The number of POD modes to retain. Default is 10.
|
|
116
|
+
with_scaler_x : bool, optional
|
|
117
|
+
Whether to apply MinMax scaling to input features. Default is True.
|
|
118
|
+
with_scaler_y : bool, optional
|
|
119
|
+
Whether to apply MinMax scaling to target values. Default is True.
|
|
120
|
+
POD_algo : {'svd', 'eigen'}, optional
|
|
121
|
+
The algorithm to use for POD computation. Default is 'eigen'.
|
|
122
|
+
"""
|
|
123
|
+
self.rank: int = rank
|
|
124
|
+
self.with_scaler_x: bool = with_scaler_x
|
|
125
|
+
self.with_scaler_y: bool = with_scaler_y
|
|
126
|
+
self.POD_algo: str = POD_algo
|
|
127
|
+
self.scalar_X: Optional[MinMaxScaler] = None
|
|
128
|
+
self.scalar_Y: Optional[MinMaxScaler] = None
|
|
129
|
+
self.v: Optional[np.ndarray] = None
|
|
130
|
+
self.v_all: Optional[np.ndarray] = None
|
|
131
|
+
self.s: Optional[np.ndarray] = None
|
|
132
|
+
self.s_all: Optional[np.ndarray] = None
|
|
133
|
+
self.coeffs: Optional[np.ndarray] = None
|
|
134
|
+
|
|
135
|
+
@abstractmethod
|
|
136
|
+
def fit_tmp(self, x: np.ndarray, y: np.ndarray) -> None:
|
|
137
|
+
"""
|
|
138
|
+
Abstract method for model-specific fitting logic.
|
|
139
|
+
|
|
140
|
+
This method should be implemented by subclasses to define the specific
|
|
141
|
+
machine learning algorithm used for learning the input-output mapping.
|
|
142
|
+
|
|
143
|
+
Parameters
|
|
144
|
+
----------
|
|
145
|
+
x : np.ndarray
|
|
146
|
+
Preprocessed input features of shape (n_samples, n_features).
|
|
147
|
+
y : np.ndarray
|
|
148
|
+
Preprocessed target values of shape (n_samples, n_targets).
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
@abstractmethod
|
|
152
|
+
def predict_tmp(self, x: np.ndarray) -> np.ndarray:
|
|
153
|
+
"""
|
|
154
|
+
Abstract method for model-specific prediction logic.
|
|
155
|
+
|
|
156
|
+
This method should be implemented by subclasses to define how predictions
|
|
157
|
+
are made using the trained model.
|
|
158
|
+
|
|
159
|
+
Parameters
|
|
160
|
+
----------
|
|
161
|
+
x : np.ndarray
|
|
162
|
+
Input features for prediction.
|
|
163
|
+
|
|
164
|
+
Returns
|
|
165
|
+
-------
|
|
166
|
+
np.ndarray
|
|
167
|
+
Predicted values.
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
def fit(self, x: np.ndarray, y: np.ndarray) -> None:
|
|
171
|
+
"""
|
|
172
|
+
Fit the model to the training data.
|
|
173
|
+
|
|
174
|
+
This method handles the complete training pipeline including input validation,
|
|
175
|
+
POD decomposition (if applicable), data scaling, and calling the model-specific
|
|
176
|
+
fitting method.
|
|
177
|
+
|
|
178
|
+
Parameters
|
|
179
|
+
----------
|
|
180
|
+
x : np.ndarray
|
|
181
|
+
Input features of shape (n_samples, n_input_features).
|
|
182
|
+
y : np.ndarray
|
|
183
|
+
Target values of shape (n_samples, n_output_features).
|
|
184
|
+
|
|
185
|
+
Raises
|
|
186
|
+
------
|
|
187
|
+
ValueError
|
|
188
|
+
If the number of samples in x and y don't match, or if inputs are not 2D.
|
|
189
|
+
"""
|
|
190
|
+
if x.shape[0] != y.shape[0]:
|
|
191
|
+
raise ValueError("Number of samples in X_train and y_train must match.")
|
|
192
|
+
if x.ndim != 2 or y.ndim != 2:
|
|
193
|
+
raise ValueError("Input and output data must be 2D numpy arrays.")
|
|
194
|
+
|
|
195
|
+
if "POD" in self.__class__.__name__:
|
|
196
|
+
y = self.performPOD(y)
|
|
197
|
+
|
|
198
|
+
if self.with_scaler_x:
|
|
199
|
+
self.scalar_X = MinMaxScaler()
|
|
200
|
+
x = self.scalar_X.fit_transform(x)
|
|
201
|
+
if self.with_scaler_y:
|
|
202
|
+
self.scalar_Y = MinMaxScaler()
|
|
203
|
+
y = self.scalar_Y.fit_transform(y)
|
|
204
|
+
|
|
205
|
+
self.fit_tmp(x, y)
|
|
206
|
+
|
|
207
|
+
def predict(self, x: np.ndarray) -> np.ndarray:
|
|
208
|
+
"""
|
|
209
|
+
Predict target values for given input features.
|
|
210
|
+
|
|
211
|
+
This method serves as a wrapper around the model-specific prediction method,
|
|
212
|
+
ensuring consistent interface across all model types.
|
|
213
|
+
|
|
214
|
+
Parameters
|
|
215
|
+
----------
|
|
216
|
+
x : np.ndarray
|
|
217
|
+
Input features of shape (n_samples, n_input_features).
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
np.ndarray
|
|
222
|
+
Predicted target values of shape (n_samples, n_output_features).
|
|
223
|
+
"""
|
|
224
|
+
return self.predict_tmp(x)
|
|
225
|
+
|
|
226
|
+
def frobenius_norm(
|
|
227
|
+
self,
|
|
228
|
+
x: np.ndarray,
|
|
229
|
+
y: np.ndarray,
|
|
230
|
+
separate_err: bool = False,
|
|
231
|
+
lift_y: Optional[np.ndarray] = None,
|
|
232
|
+
) -> Union[float, np.ndarray]:
|
|
233
|
+
"""
|
|
234
|
+
Calculate the Frobenius norm of prediction errors.
|
|
235
|
+
|
|
236
|
+
Computes the relative Frobenius norm between true and predicted values,
|
|
237
|
+
which provides a measure of the overall prediction accuracy.
|
|
238
|
+
|
|
239
|
+
Parameters
|
|
240
|
+
----------
|
|
241
|
+
x : np.ndarray
|
|
242
|
+
Input features for prediction.
|
|
243
|
+
y : np.ndarray
|
|
244
|
+
True target values.
|
|
245
|
+
separate_err : bool, optional
|
|
246
|
+
If True, return the error for each sample separately.
|
|
247
|
+
If False, return the overall aggregated error. Default is False.
|
|
248
|
+
lift_y : np.ndarray, optional
|
|
249
|
+
If provided, this array is added to both true and predicted values
|
|
250
|
+
before error calculation. Default is None.
|
|
251
|
+
|
|
252
|
+
Returns
|
|
253
|
+
-------
|
|
254
|
+
np.ndarray or float
|
|
255
|
+
If separate_err=True, returns array of relative errors for each sample.
|
|
256
|
+
If separate_err=False, returns overall relative Frobenius norm error.
|
|
257
|
+
|
|
258
|
+
Notes
|
|
259
|
+
-----
|
|
260
|
+
The relative Frobenius norm is calculated as:
|
|
261
|
+
||y_true - y_pred||_F / ||y_true||_F
|
|
262
|
+
|
|
263
|
+
For separate errors, it's calculated per sample as:
|
|
264
|
+
||y_true[i] - y_pred[i]||_2 / ||y_true[i]||_2
|
|
265
|
+
"""
|
|
266
|
+
if separate_err:
|
|
267
|
+
err = []
|
|
268
|
+
y_pred = self.predict(x)
|
|
269
|
+
if lift_y is not None:
|
|
270
|
+
y_pred += lift_y
|
|
271
|
+
y += lift_y
|
|
272
|
+
for i in range(len(x)):
|
|
273
|
+
err.append(np.linalg.norm(y[i] - y_pred[i]) / np.linalg.norm(y[i]))
|
|
274
|
+
return np.array(err)
|
|
275
|
+
else:
|
|
276
|
+
if lift_y is not None:
|
|
277
|
+
y_pred = self.predict(x) + lift_y
|
|
278
|
+
y = y + lift_y
|
|
279
|
+
return np.linalg.norm(y - y_pred) / np.linalg.norm(y)
|
|
280
|
+
return np.linalg.norm(y - self.predict(x)) / np.linalg.norm(y)
|
|
281
|
+
|
|
282
|
+
def inf_norm(
|
|
283
|
+
self, x: np.ndarray, y: np.ndarray, separate_err: bool = False
|
|
284
|
+
) -> Union[float, np.ndarray]:
|
|
285
|
+
"""
|
|
286
|
+
Calculate the infinity norm of prediction errors.
|
|
287
|
+
|
|
288
|
+
Computes the maximum absolute error between true and predicted values,
|
|
289
|
+
which provides a measure of the worst-case prediction error.
|
|
290
|
+
|
|
291
|
+
Parameters
|
|
292
|
+
----------
|
|
293
|
+
x : np.ndarray
|
|
294
|
+
Input features for prediction.
|
|
295
|
+
y : np.ndarray
|
|
296
|
+
True target values.
|
|
297
|
+
separate_err : bool, optional
|
|
298
|
+
If True, return the error for each sample separately.
|
|
299
|
+
If False, return the overall aggregated error. Default is False.
|
|
300
|
+
|
|
301
|
+
Returns
|
|
302
|
+
-------
|
|
303
|
+
np.ndarray or float
|
|
304
|
+
If separate_err=True, returns array of infinity norm errors for each sample.
|
|
305
|
+
If separate_err=False, returns overall infinity norm error.
|
|
306
|
+
|
|
307
|
+
Notes
|
|
308
|
+
-----
|
|
309
|
+
The infinity norm is the maximum absolute difference:
|
|
310
|
+
||y_true - y_pred||_∞ = max|y_true - y_pred|
|
|
311
|
+
"""
|
|
312
|
+
if separate_err:
|
|
313
|
+
err = []
|
|
314
|
+
y_pred = self.predict(x)
|
|
315
|
+
for i in range(len(x)):
|
|
316
|
+
err.append(np.linalg.norm(y[i] - y_pred[i], ord=np.inf))
|
|
317
|
+
return np.array(err)
|
|
318
|
+
else:
|
|
319
|
+
return np.linalg.norm(y - self.predict(x), ord=np.inf)
|
|
320
|
+
|
|
321
|
+
def performPOD(self, y: np.ndarray) -> np.ndarray:
|
|
322
|
+
"""
|
|
323
|
+
Perform Proper Orthogonal Decomposition on the training data.
|
|
324
|
+
|
|
325
|
+
This method applies POD to reduce the dimensionality of the target data
|
|
326
|
+
from the full field representation to a reduced set of POD coefficients.
|
|
327
|
+
It handles the truncation to the specified rank and validates the input.
|
|
328
|
+
|
|
329
|
+
Parameters
|
|
330
|
+
----------
|
|
331
|
+
y : np.ndarray
|
|
332
|
+
Training data matrix of shape (n_samples, n_features) for which
|
|
333
|
+
POD decomposition is to be performed.
|
|
334
|
+
|
|
335
|
+
Returns
|
|
336
|
+
-------
|
|
337
|
+
np.ndarray
|
|
338
|
+
POD coefficients matrix of shape (n_samples, rank) representing
|
|
339
|
+
the training data in the reduced POD space.
|
|
340
|
+
|
|
341
|
+
Raises
|
|
342
|
+
------
|
|
343
|
+
ValueError
|
|
344
|
+
If the specified rank is greater than the number of available modes.
|
|
345
|
+
|
|
346
|
+
Notes
|
|
347
|
+
-----
|
|
348
|
+
This method calls `reduction` if POD has not been computed yet, then
|
|
349
|
+
truncates the modes and coefficients to the specified rank. The POD
|
|
350
|
+
decomposition follows: y ≈ coeffs @ modes, where coeffs are the returned
|
|
351
|
+
values and modes are stored in self.v.
|
|
352
|
+
"""
|
|
353
|
+
if not hasattr(self, "v_all"):
|
|
354
|
+
self.reduction(y)
|
|
355
|
+
if self.rank > self.v_all.shape[0]:
|
|
356
|
+
raise ValueError("Rank is greater than the number of modes.")
|
|
357
|
+
self.s = self.s_all[: self.rank]
|
|
358
|
+
self.v = self.v_all[: self.rank]
|
|
359
|
+
return self.coeffs[:, : self.rank]
|
|
360
|
+
|
|
361
|
+
def reduction(self, y: np.ndarray) -> None:
|
|
362
|
+
"""
|
|
363
|
+
Perform POD using the specified algorithm (SVD or eigenvalue decomposition).
|
|
364
|
+
|
|
365
|
+
This method computes the full POD decomposition of the training data using
|
|
366
|
+
either singular value decomposition or eigenvalue decomposition, depending
|
|
367
|
+
on the POD_algo parameter.
|
|
368
|
+
|
|
369
|
+
Parameters
|
|
370
|
+
----------
|
|
371
|
+
y : np.ndarray
|
|
372
|
+
Training data matrix of shape (n_samples, n_features).
|
|
373
|
+
|
|
374
|
+
Raises
|
|
375
|
+
------
|
|
376
|
+
ValueError
|
|
377
|
+
If an invalid POD algorithm is specified.
|
|
378
|
+
|
|
379
|
+
Notes
|
|
380
|
+
-----
|
|
381
|
+
Two algorithms are supported:
|
|
382
|
+
|
|
383
|
+
1. 'svd': Direct SVD decomposition
|
|
384
|
+
- More accurate for well-conditioned problems
|
|
385
|
+
- Better numerical stability
|
|
386
|
+
- Recommended for most applications
|
|
387
|
+
|
|
388
|
+
2. 'eigen': Eigenvalue decomposition of the covariance matrix
|
|
389
|
+
- More memory efficient for wide matrices (n_features >> n_samples)
|
|
390
|
+
- Potentially less stable for ill-conditioned problems
|
|
391
|
+
- Useful when n_samples << n_features
|
|
392
|
+
|
|
393
|
+
The method stores the full decomposition in attributes:
|
|
394
|
+
- s_all: all singular values
|
|
395
|
+
- v_all: all POD modes (right singular vectors)
|
|
396
|
+
- coeffs: all POD coefficients (scaled left singular vectors)
|
|
397
|
+
"""
|
|
398
|
+
if self.POD_algo == "svd":
|
|
399
|
+
u, self.s_all, self.v_all = svd(y, full_matrices=False)
|
|
400
|
+
self.coeffs = u @ np.diag(self.s_all)
|
|
401
|
+
print(f"POD_SVD reduction completed.")
|
|
402
|
+
elif self.POD_algo == "eigen":
|
|
403
|
+
N, M = y.shape
|
|
404
|
+
|
|
405
|
+
C = y @ y.T
|
|
406
|
+
eigenvalues, U = np.linalg.eigh(C)
|
|
407
|
+
|
|
408
|
+
sorted_indices = np.argsort(eigenvalues)[::-1]
|
|
409
|
+
sorted_eigenvalues = eigenvalues[sorted_indices]
|
|
410
|
+
U = U[:, sorted_indices]
|
|
411
|
+
|
|
412
|
+
self.s_all = np.sqrt(sorted_eigenvalues)
|
|
413
|
+
self.coeffs = U @ np.diag(self.s_all)
|
|
414
|
+
|
|
415
|
+
self.v_all = np.zeros((N, M))
|
|
416
|
+
tolerance = 1e-10
|
|
417
|
+
for i in range(N):
|
|
418
|
+
if self.s_all[i] > tolerance:
|
|
419
|
+
u_i = U[:, i]
|
|
420
|
+
self.v_all[i, :] = (1 / self.s_all[i]) * (u_i.T @ y)
|
|
421
|
+
print("POD_eigen reduction completed.")
|
|
422
|
+
else:
|
|
423
|
+
raise ValueError("Invalid POD method.")
|
|
424
|
+
|
|
425
|
+
def truncation_error(self) -> Tuple[float, float]:
|
|
426
|
+
"""
|
|
427
|
+
Calculate the truncation error of the POD decomposition.
|
|
428
|
+
|
|
429
|
+
This method computes the relative truncation error based on the singular
|
|
430
|
+
values obtained from the POD decomposition. The truncation error quantifies
|
|
431
|
+
the amount of information lost by retaining only a subset of the POD modes.
|
|
432
|
+
|
|
433
|
+
Returns
|
|
434
|
+
-------
|
|
435
|
+
float
|
|
436
|
+
The relative truncation error, defined as:
|
|
437
|
+
(sum of discarded singular values) / (sum of all singular values).
|
|
438
|
+
"""
|
|
439
|
+
if not hasattr(self, "s_all") or not hasattr(self, "s"):
|
|
440
|
+
raise ValueError("POD decomposition has not been performed yet.")
|
|
441
|
+
total_energy = np.sum(self.s_all**2)
|
|
442
|
+
retained_energy = np.cumsum(self.s_all**2)
|
|
443
|
+
truncation_error = 1 - retained_energy / total_energy
|
|
444
|
+
projection_error = np.sqrt(truncation_error)
|
|
445
|
+
|
|
446
|
+
return truncation_error, projection_error
|
|
447
|
+
|
|
448
|
+
def reconstruct(
|
|
449
|
+
self,
|
|
450
|
+
x: np.ndarray,
|
|
451
|
+
y: np.ndarray,
|
|
452
|
+
refVTMName: str,
|
|
453
|
+
saveFileName: str,
|
|
454
|
+
dataType: str,
|
|
455
|
+
x_train: Optional[np.ndarray] = None,
|
|
456
|
+
y_train: Optional[np.ndarray] = None,
|
|
457
|
+
x_test: Optional[np.ndarray] = None,
|
|
458
|
+
y_test: Optional[np.ndarray] = None,
|
|
459
|
+
is2D: bool = False,
|
|
460
|
+
) -> None:
|
|
461
|
+
"""
|
|
462
|
+
Reconstruct the model predictions and save results to VTK files.
|
|
463
|
+
|
|
464
|
+
This method trains the model and generates VTK files containing the true values,
|
|
465
|
+
reconstructed values, and prediction errors for visualization and analysis.
|
|
466
|
+
|
|
467
|
+
Parameters
|
|
468
|
+
----------
|
|
469
|
+
x : np.ndarray
|
|
470
|
+
Input features. Used for train-test split if specific splits not provided.
|
|
471
|
+
y : np.ndarray
|
|
472
|
+
Target values. Used for train-test split if specific splits not provided.
|
|
473
|
+
refVTMName : str
|
|
474
|
+
Path to the reference VTM file that provides the mesh structure.
|
|
475
|
+
saveFileName : str
|
|
476
|
+
Base filename for saving the reconstruction results.
|
|
477
|
+
dataType : {'scalar', 'vector'}
|
|
478
|
+
Type of data being reconstructed for VTK output.
|
|
479
|
+
x_train : np.ndarray, optional
|
|
480
|
+
Specific training input features. If None, automatic split is used.
|
|
481
|
+
y_train : np.ndarray, optional
|
|
482
|
+
Specific training target values. If None, automatic split is used.
|
|
483
|
+
x_test : np.ndarray, optional
|
|
484
|
+
Specific testing input features. If None, automatic split is used.
|
|
485
|
+
y_test : np.ndarray, optional
|
|
486
|
+
Specific testing target values. If None, automatic split is used.
|
|
487
|
+
is2D : bool, optional
|
|
488
|
+
Whether the data is 2D (for vector fields). Default is False.
|
|
489
|
+
|
|
490
|
+
Notes
|
|
491
|
+
-----
|
|
492
|
+
The method creates a VTK file with three sets of fields for each test sample:
|
|
493
|
+
- 'true_{i}': Original target values
|
|
494
|
+
- 'rec_{i}': Reconstructed/predicted values
|
|
495
|
+
- 'err_{i}': Absolute error (true - predicted)
|
|
496
|
+
|
|
497
|
+
If no specific train/test split is provided, the method uses an 80-20 split
|
|
498
|
+
with random_state=42.
|
|
499
|
+
|
|
500
|
+
Examples
|
|
501
|
+
--------
|
|
502
|
+
>>> model.reconstruct(X, Y, 'mesh.vtm', 'results', 'vector', is2D=True)
|
|
503
|
+
# Creates results.vtm with true, reconstructed, and error fields
|
|
504
|
+
"""
|
|
505
|
+
if x_train is None or y_train is None or x_test is None or y_test is None:
|
|
506
|
+
x_train, x_test, y_train, y_test = train_test_split(
|
|
507
|
+
x, y, train_size=0.8, random_state=42
|
|
508
|
+
)
|
|
509
|
+
self.fit(x_train, y_train)
|
|
510
|
+
|
|
511
|
+
# Write the velocity data into VTK file
|
|
512
|
+
refVTM = pv.MultiBlock(refVTMName)
|
|
513
|
+
field_name = (
|
|
514
|
+
[f"true_{i}" for i in range(x_test.shape[0])]
|
|
515
|
+
+ [f"rec_{i}" for i in range(x_test.shape[0])]
|
|
516
|
+
+ [f"err_{i}" for i in range(x_test.shape[0])]
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
# loop all test data and write the data into VTK file
|
|
520
|
+
vtk_writer(
|
|
521
|
+
np.vstack((y_test, self.predict(x_test), y_test - self.predict(x_test))),
|
|
522
|
+
field_name,
|
|
523
|
+
dataType,
|
|
524
|
+
refVTM,
|
|
525
|
+
saveFileName,
|
|
526
|
+
is2D=is2D,
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
def validate(
|
|
530
|
+
self,
|
|
531
|
+
x: np.ndarray,
|
|
532
|
+
y: np.ndarray,
|
|
533
|
+
training_ratio: float = 0.8,
|
|
534
|
+
rand_seed: int = 42,
|
|
535
|
+
norm: str = "Frobenius",
|
|
536
|
+
separate_err: bool = False,
|
|
537
|
+
lift_y: Optional[np.ndarray] = None,
|
|
538
|
+
) -> Union[float, np.ndarray]:
|
|
539
|
+
"""
|
|
540
|
+
Validate the model using a train-test split.
|
|
541
|
+
|
|
542
|
+
This method provides a convenient way to assess model performance by
|
|
543
|
+
automatically splitting the data, training the model, and computing
|
|
544
|
+
prediction errors on the test set.
|
|
545
|
+
|
|
546
|
+
Parameters
|
|
547
|
+
----------
|
|
548
|
+
x : np.ndarray
|
|
549
|
+
Input features of shape (n_samples, n_input_features).
|
|
550
|
+
y : np.ndarray
|
|
551
|
+
Target values of shape (n_samples, n_output_features).
|
|
552
|
+
training_ratio : float, optional
|
|
553
|
+
Fraction of data to use for training (0 < training_ratio < 1).
|
|
554
|
+
Default is 0.8.
|
|
555
|
+
rand_seed : int, optional
|
|
556
|
+
Random seed for reproducible train-test splits. Default is 42.
|
|
557
|
+
norm : {'Frobenius', 'inf'}, optional
|
|
558
|
+
Type of norm to use for error calculation. Default is 'Frobenius'.
|
|
559
|
+
separate_err : bool, optional
|
|
560
|
+
If True, return the error for each test sample separately.
|
|
561
|
+
If False, return the overall aggregated error. Default is False.
|
|
562
|
+
lift_y : np.ndarray, optional
|
|
563
|
+
If provided, this array is added to both true and predicted values
|
|
564
|
+
before error calculation. Default is None.
|
|
565
|
+
|
|
566
|
+
Returns
|
|
567
|
+
-------
|
|
568
|
+
float or np.ndarray
|
|
569
|
+
The calculated norm of the prediction error on the test set.
|
|
570
|
+
If separate_err=True, returns an array of errors for each test sample.
|
|
571
|
+
If separate_err=False, returns a single aggregated error value.
|
|
572
|
+
|
|
573
|
+
Raises
|
|
574
|
+
------
|
|
575
|
+
AssertionError
|
|
576
|
+
If an invalid norm type is specified.
|
|
577
|
+
|
|
578
|
+
Examples
|
|
579
|
+
--------
|
|
580
|
+
>>> model = SomeConcreteModel(rank=10)
|
|
581
|
+
>>> error = model.validate(X, Y, training_ratio=0.7, norm='Frobenius')
|
|
582
|
+
>>> print(f"Validation error: {error:.6f}")
|
|
583
|
+
>>> # Using lifting to adjust predictions
|
|
584
|
+
>>> lift = np.mean(Y, axis=0)
|
|
585
|
+
>>> error_lifted = model.validate(X, Y, lift_y=lift, norm='inf')
|
|
586
|
+
>>> print(f"Validation error with lifting: {error_lifted:.6f}")
|
|
587
|
+
"""
|
|
588
|
+
x_train, x_test, y_train, y_test = train_test_split(
|
|
589
|
+
x, y, train_size=training_ratio, random_state=rand_seed
|
|
590
|
+
)
|
|
591
|
+
self.fit(x_train, y_train)
|
|
592
|
+
|
|
593
|
+
if norm == "Frobenius":
|
|
594
|
+
return self.frobenius_norm(
|
|
595
|
+
x_test, y_test, lift_y=lift_y, separate_err=separate_err
|
|
596
|
+
)
|
|
597
|
+
elif norm == "inf":
|
|
598
|
+
return self.inf_norm(x_test, y_test, separate_err=separate_err)
|
|
599
|
+
else:
|
|
600
|
+
print("Please enter variable norm with value 'Frobenius' or 'inf'")
|
|
601
|
+
assert False
|
|
602
|
+
|
|
603
|
+
def fixed_validate(
|
|
604
|
+
self,
|
|
605
|
+
x_train: np.ndarray,
|
|
606
|
+
y_train: np.ndarray,
|
|
607
|
+
x_test: np.ndarray,
|
|
608
|
+
y_test: np.ndarray,
|
|
609
|
+
norm: str = "Frobenius",
|
|
610
|
+
separate_err: bool = False,
|
|
611
|
+
lift_y: Optional[np.ndarray] = None,
|
|
612
|
+
) -> Union[float, np.ndarray]:
|
|
613
|
+
"""
|
|
614
|
+
Validate the model with fixed training and testing datasets.
|
|
615
|
+
|
|
616
|
+
This method allows for validation with predetermined train-test splits,
|
|
617
|
+
which is useful for consistent benchmarking and when specific data
|
|
618
|
+
partitioning is required.
|
|
619
|
+
|
|
620
|
+
Parameters
|
|
621
|
+
----------
|
|
622
|
+
x_train : np.ndarray
|
|
623
|
+
Training input features of shape (n_train_samples, n_input_features).
|
|
624
|
+
y_train : np.ndarray
|
|
625
|
+
Training target values of shape (n_train_samples, n_output_features).
|
|
626
|
+
x_test : np.ndarray
|
|
627
|
+
Testing input features of shape (n_test_samples, n_input_features).
|
|
628
|
+
y_test : np.ndarray
|
|
629
|
+
Testing target values of shape (n_test_samples, n_output_features).
|
|
630
|
+
norm : {'Frobenius', 'inf'}, optional
|
|
631
|
+
Type of norm to use for error calculation. Default is 'Frobenius'.
|
|
632
|
+
separate_err : bool, optional
|
|
633
|
+
If True, return the error for each test sample separately.
|
|
634
|
+
If False, return the overall aggregated error. Default is False.
|
|
635
|
+
lift_y : np.ndarray, optional
|
|
636
|
+
If provided, this array is added to both true and predicted values
|
|
637
|
+
before error calculation. Default is None.
|
|
638
|
+
|
|
639
|
+
Returns
|
|
640
|
+
-------
|
|
641
|
+
float or np.ndarray
|
|
642
|
+
The calculated norm of the prediction error. If separate_err=True,
|
|
643
|
+
returns an array of errors for each test sample.
|
|
644
|
+
|
|
645
|
+
Raises
|
|
646
|
+
------
|
|
647
|
+
AssertionError
|
|
648
|
+
If an invalid norm type is specified.
|
|
649
|
+
"""
|
|
650
|
+
self.fit(x_train, y_train)
|
|
651
|
+
|
|
652
|
+
if norm == "Frobenius":
|
|
653
|
+
return self.frobenius_norm(
|
|
654
|
+
x_test, y_test, separate_err=separate_err, lift_y=lift_y
|
|
655
|
+
)
|
|
656
|
+
elif norm == "inf":
|
|
657
|
+
return self.inf_norm(x_test, y_test, separate_err=separate_err)
|
|
658
|
+
else:
|
|
659
|
+
print("Please enter variable norm with value 'Frobenius' or 'inf'")
|
|
660
|
+
assert False
|
|
661
|
+
|
|
662
|
+
def multi_validate(
|
|
663
|
+
self,
|
|
664
|
+
x: np.ndarray,
|
|
665
|
+
y: np.ndarray,
|
|
666
|
+
ranks: List[int],
|
|
667
|
+
training_ratio: float = 0.8,
|
|
668
|
+
rand_seed: int = 42,
|
|
669
|
+
norm: str = "Frobenius",
|
|
670
|
+
separate_err: bool = False,
|
|
671
|
+
lift_y: Optional[np.ndarray] = None,
|
|
672
|
+
) -> np.ndarray:
|
|
673
|
+
"""
|
|
674
|
+
Validate the model performance across multiple POD ranks.
|
|
675
|
+
|
|
676
|
+
This method systematically evaluates model performance for different
|
|
677
|
+
numbers of POD modes, which is useful for determining the optimal
|
|
678
|
+
rank-accuracy trade-off.
|
|
679
|
+
|
|
680
|
+
Parameters
|
|
681
|
+
----------
|
|
682
|
+
x : np.ndarray
|
|
683
|
+
Input features of shape (n_samples, n_input_features).
|
|
684
|
+
y : np.ndarray
|
|
685
|
+
Target values of shape (n_samples, n_output_features).
|
|
686
|
+
ranks : list of int
|
|
687
|
+
List of POD ranks to evaluate.
|
|
688
|
+
training_ratio : float, optional
|
|
689
|
+
Fraction of data to use for training. Default is 0.8.
|
|
690
|
+
rand_seed : int, optional
|
|
691
|
+
Random seed for reproducible train-test splits. Default is 42.
|
|
692
|
+
norm : {'Frobenius', 'inf'}, optional
|
|
693
|
+
Type of norm to use for error calculation. Default is 'Frobenius'.
|
|
694
|
+
separate_err : bool, optional
|
|
695
|
+
If True, return the error for each test sample separately for each rank.
|
|
696
|
+
If False, return overall aggregated errors. Default is False.
|
|
697
|
+
lift_y : np.ndarray, optional
|
|
698
|
+
If provided, this array is added to both true and predicted values
|
|
699
|
+
before error calculation. Default is None.
|
|
700
|
+
|
|
701
|
+
Returns
|
|
702
|
+
-------
|
|
703
|
+
np.ndarray
|
|
704
|
+
Array of validation errors corresponding to each rank in the input list.
|
|
705
|
+
|
|
706
|
+
Raises
|
|
707
|
+
------
|
|
708
|
+
AssertionError
|
|
709
|
+
If an invalid norm type is specified.
|
|
710
|
+
|
|
711
|
+
Notes
|
|
712
|
+
-----
|
|
713
|
+
The method uses the same train-test split for all ranks to ensure
|
|
714
|
+
fair comparison. The original rank setting is modified during the
|
|
715
|
+
process and should be reset if needed after calling this method.
|
|
716
|
+
|
|
717
|
+
Examples
|
|
718
|
+
--------
|
|
719
|
+
>>> ranks = [5, 10, 15, 20, 25]
|
|
720
|
+
>>> errors = model.multi_validate(X, Y, ranks, training_ratio=0.75)
|
|
721
|
+
>>> optimal_rank = ranks[np.argmin(errors)]
|
|
722
|
+
>>> print(f"Optimal rank: {optimal_rank}")
|
|
723
|
+
"""
|
|
724
|
+
x_train, x_test, y_train, y_test = train_test_split(
|
|
725
|
+
x, y, train_size=training_ratio, random_state=rand_seed
|
|
726
|
+
)
|
|
727
|
+
errors = []
|
|
728
|
+
for i in ranks:
|
|
729
|
+
self.rank = i
|
|
730
|
+
self.fit(x_train, y_train)
|
|
731
|
+
if norm == "Frobenius":
|
|
732
|
+
errors.append(
|
|
733
|
+
self.frobenius_norm(
|
|
734
|
+
x_test, y_test, separate_err=separate_err, lift_y=lift_y
|
|
735
|
+
)
|
|
736
|
+
)
|
|
737
|
+
elif norm == "inf":
|
|
738
|
+
errors.append(self.inf_norm(x_test, y_test, separate_err=separate_err))
|
|
739
|
+
else:
|
|
740
|
+
print("Please enter variable norm with value 'Frobenius' or 'inf'")
|
|
741
|
+
assert False
|
|
742
|
+
return np.array(errors)
|
|
743
|
+
|
|
744
|
+
def multi_validate_fixed(
|
|
745
|
+
self,
|
|
746
|
+
x_train: np.ndarray,
|
|
747
|
+
y_train: np.ndarray,
|
|
748
|
+
x_test: np.ndarray,
|
|
749
|
+
y_test: np.ndarray,
|
|
750
|
+
ranks: List[int],
|
|
751
|
+
norm: str = "Frobenius",
|
|
752
|
+
separate_err: bool = False,
|
|
753
|
+
lift_y: Optional[np.ndarray] = None,
|
|
754
|
+
) -> np.ndarray:
|
|
755
|
+
"""
|
|
756
|
+
Validate the model across multiple POD ranks with fixed datasets.
|
|
757
|
+
|
|
758
|
+
This method combines the functionality of multi-rank validation with
|
|
759
|
+
fixed train-test splits, providing consistent evaluation across different
|
|
760
|
+
POD ranks using predetermined data partitions.
|
|
761
|
+
|
|
762
|
+
Parameters
|
|
763
|
+
----------
|
|
764
|
+
x_train : np.ndarray
|
|
765
|
+
Training input features of shape (n_train_samples, n_input_features).
|
|
766
|
+
y_train : np.ndarray
|
|
767
|
+
Training target values of shape (n_train_samples, n_output_features).
|
|
768
|
+
x_test : np.ndarray
|
|
769
|
+
Testing input features of shape (n_test_samples, n_input_features).
|
|
770
|
+
y_test : np.ndarray
|
|
771
|
+
Testing target values of shape (n_test_samples, n_output_features).
|
|
772
|
+
ranks : list of int
|
|
773
|
+
List of POD ranks to evaluate.
|
|
774
|
+
norm : {'Frobenius', 'inf'}, optional
|
|
775
|
+
Type of norm to use for error calculation. Default is 'Frobenius'.
|
|
776
|
+
separate_err : bool, optional
|
|
777
|
+
If True, return the error for each test sample separately for each rank.
|
|
778
|
+
If False, return overall aggregated errors. Default is False.
|
|
779
|
+
lift_y : np.ndarray, optional
|
|
780
|
+
If provided, this array is added to both true and predicted values
|
|
781
|
+
before error calculation. Default is None.
|
|
782
|
+
|
|
783
|
+
Returns
|
|
784
|
+
-------
|
|
785
|
+
np.ndarray
|
|
786
|
+
Array of validation errors corresponding to each rank. Shape depends
|
|
787
|
+
on separate_err: if False, shape is (len(ranks),); if True, shape is
|
|
788
|
+
(len(ranks), n_test_samples).
|
|
789
|
+
|
|
790
|
+
Raises
|
|
791
|
+
------
|
|
792
|
+
AssertionError
|
|
793
|
+
If an invalid norm type is specified.
|
|
794
|
+
|
|
795
|
+
Notes
|
|
796
|
+
-----
|
|
797
|
+
This method is particularly useful for:
|
|
798
|
+
- Systematic rank selection studies
|
|
799
|
+
- Benchmarking with consistent datasets
|
|
800
|
+
- Error analysis across different dimensionality reductions
|
|
801
|
+
|
|
802
|
+
The original rank setting is modified during the process and should be
|
|
803
|
+
reset if needed after calling this method.
|
|
804
|
+
"""
|
|
805
|
+
errors = []
|
|
806
|
+
for i in ranks:
|
|
807
|
+
self.rank = i
|
|
808
|
+
self.fit(x_train, y_train)
|
|
809
|
+
if norm == "Frobenius":
|
|
810
|
+
errors.append(
|
|
811
|
+
self.frobenius_norm(
|
|
812
|
+
x_test, y_test, separate_err=separate_err, lift_y=lift_y
|
|
813
|
+
)
|
|
814
|
+
)
|
|
815
|
+
elif norm == "inf":
|
|
816
|
+
errors.append(self.inf_norm(x_test, y_test, separate_err=separate_err))
|
|
817
|
+
else:
|
|
818
|
+
print("Please enter variable norm with value 'Frobenius' or 'inf'")
|
|
819
|
+
assert False
|
|
820
|
+
return np.array(errors)
|
|
821
|
+
|
|
822
|
+
def check_input(self, x: np.ndarray) -> np.ndarray:
|
|
823
|
+
tolerance = 0.5
|
|
824
|
+
list_warning = []
|
|
825
|
+
for i in range(x.shape[0]):
|
|
826
|
+
for j in range(x.shape[1]):
|
|
827
|
+
if x[i, j] > 1 + tolerance:
|
|
828
|
+
list_warning.append(np.array([i, j, x[i, j]]))
|
|
829
|
+
x[i, j] = 1 + tolerance
|
|
830
|
+
elif x[i, j] < -tolerance:
|
|
831
|
+
list_warning.append(np.array([i, j, x[i, j]]))
|
|
832
|
+
x[i, j] = -tolerance
|
|
833
|
+
|
|
834
|
+
# if len(list_warning) > 0:
|
|
835
|
+
# warnings.warn(
|
|
836
|
+
# f"{len(list_warning)} input features are out of the expected range [0, 1]. "
|
|
837
|
+
# )
|
|
838
|
+
|
|
839
|
+
self.list_warning = list_warning
|
|
840
|
+
return x
|