PODImodels 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,840 @@
1
+ """
2
+ POD-based Interpolation Model Abstract Base Class
3
+ =================================================
4
+
5
+ This module defines the abstract base class for all POD-based interpolation models.
6
+ It provides a common interface and shared functionality for building reduced-order
7
+ models that combine Proper Orthogonal Decomposition with various machine learning
8
+ techniques.
9
+
10
+ Classes
11
+ -------
12
+ PODImodelAbstract
13
+ Abstract base class for POD-based interpolation models.
14
+ """
15
+
16
+ from abc import ABC, abstractmethod
17
+ import warnings
18
+ import numpy as np
19
+ from sklearn.model_selection import train_test_split
20
+ from sklearn.preprocessing import MinMaxScaler
21
+ import pyvista as pv
22
+ from .PODdata import vtk_writer
23
+ from scipy.linalg import svd
24
+ from typing import Optional, Union, Tuple, List
25
+
26
+
27
+ class PODImodelAbstract(ABC):
28
+ """
29
+ Abstract base class for POD-based interpolation models.
30
+
31
+ This class provides a common interface for building reduced-order models that
32
+ combine Proper Orthogonal Decomposition (POD) with various machine learning
33
+ techniques. All concrete interpolation model classes should inherit from this
34
+ class and implement the abstract methods.
35
+
36
+ The class handles common functionality including:
37
+ - Data scaling and preprocessing
38
+ - POD decomposition for dimensionality reduction
39
+ - Model validation and error assessment
40
+ - VTK file output for visualization
41
+
42
+ Parameters
43
+ ----------
44
+ rank : int, optional
45
+ The number of POD modes to retain. Default is 10.
46
+ with_scaler_x : bool, optional
47
+ Whether to apply MinMax scaling to input features. Default is True.
48
+ with_scaler_y : bool, optional
49
+ Whether to apply MinMax scaling to target values. Default is True.
50
+ POD_algo : {'svd', 'eigen'}, optional
51
+ The algorithm to use for POD computation. 'svd' uses singular value
52
+ decomposition, 'eigen' uses eigenvalue decomposition. Default is 'eigen'.
53
+
54
+ Attributes
55
+ ----------
56
+ rank : int
57
+ Number of POD modes to retain.
58
+ with_scaler_x : bool
59
+ Flag for input scaling.
60
+ with_scaler_y : bool
61
+ Flag for output scaling.
62
+ POD_algo : str
63
+ POD algorithm type.
64
+ scalar_X : MinMaxScaler, optional
65
+ Scaler for input features (created if with_scaler_x=True).
66
+ scalar_Y : MinMaxScaler, optional
67
+ Scaler for output features (created if with_scaler_y=True).
68
+ v : np.ndarray
69
+ Truncated POD modes matrix.
70
+ v_all : np.ndarray
71
+ Full POD modes matrix.
72
+ s : np.ndarray
73
+ Truncated singular values.
74
+ s_all : np.ndarray
75
+ Full singular values.
76
+ coeffs : np.ndarray
77
+ POD coefficients matrix.
78
+
79
+ Notes
80
+ -----
81
+ Subclasses must implement `fit_tmp` and `predict_tmp` methods to define
82
+ the specific machine learning algorithm used for interpolation.
83
+
84
+ Examples
85
+ --------
86
+ >>> # Define a concrete implementation (example)
87
+ >>> class MyPODModel(PODImodelAbstract):
88
+ ... def __init__(self, **kwargs):
89
+ ... super().__init__(**kwargs)
90
+ ... # Initialize specific model
91
+ ...
92
+ ... def fit_tmp(self, x, y):
93
+ ... # Implement specific fitting logic
94
+ ... pass
95
+ ...
96
+ ... def predict_tmp(self, x):
97
+ ... # Implement specific prediction logic
98
+ ... return predictions
99
+ """
100
+
101
+ @abstractmethod
102
+ def __init__(
103
+ self,
104
+ rank: int = 10,
105
+ with_scaler_x: bool = True,
106
+ with_scaler_y: bool = True,
107
+ POD_algo: str = "eigen",
108
+ ):
109
+ """
110
+ Abstract initialization method.
111
+
112
+ Parameters
113
+ ----------
114
+ rank : int, optional
115
+ The number of POD modes to retain. Default is 10.
116
+ with_scaler_x : bool, optional
117
+ Whether to apply MinMax scaling to input features. Default is True.
118
+ with_scaler_y : bool, optional
119
+ Whether to apply MinMax scaling to target values. Default is True.
120
+ POD_algo : {'svd', 'eigen'}, optional
121
+ The algorithm to use for POD computation. Default is 'eigen'.
122
+ """
123
+ self.rank: int = rank
124
+ self.with_scaler_x: bool = with_scaler_x
125
+ self.with_scaler_y: bool = with_scaler_y
126
+ self.POD_algo: str = POD_algo
127
+ self.scalar_X: Optional[MinMaxScaler] = None
128
+ self.scalar_Y: Optional[MinMaxScaler] = None
129
+ self.v: Optional[np.ndarray] = None
130
+ self.v_all: Optional[np.ndarray] = None
131
+ self.s: Optional[np.ndarray] = None
132
+ self.s_all: Optional[np.ndarray] = None
133
+ self.coeffs: Optional[np.ndarray] = None
134
+
135
+ @abstractmethod
136
+ def fit_tmp(self, x: np.ndarray, y: np.ndarray) -> None:
137
+ """
138
+ Abstract method for model-specific fitting logic.
139
+
140
+ This method should be implemented by subclasses to define the specific
141
+ machine learning algorithm used for learning the input-output mapping.
142
+
143
+ Parameters
144
+ ----------
145
+ x : np.ndarray
146
+ Preprocessed input features of shape (n_samples, n_features).
147
+ y : np.ndarray
148
+ Preprocessed target values of shape (n_samples, n_targets).
149
+ """
150
+
151
+ @abstractmethod
152
+ def predict_tmp(self, x: np.ndarray) -> np.ndarray:
153
+ """
154
+ Abstract method for model-specific prediction logic.
155
+
156
+ This method should be implemented by subclasses to define how predictions
157
+ are made using the trained model.
158
+
159
+ Parameters
160
+ ----------
161
+ x : np.ndarray
162
+ Input features for prediction.
163
+
164
+ Returns
165
+ -------
166
+ np.ndarray
167
+ Predicted values.
168
+ """
169
+
170
+ def fit(self, x: np.ndarray, y: np.ndarray) -> None:
171
+ """
172
+ Fit the model to the training data.
173
+
174
+ This method handles the complete training pipeline including input validation,
175
+ POD decomposition (if applicable), data scaling, and calling the model-specific
176
+ fitting method.
177
+
178
+ Parameters
179
+ ----------
180
+ x : np.ndarray
181
+ Input features of shape (n_samples, n_input_features).
182
+ y : np.ndarray
183
+ Target values of shape (n_samples, n_output_features).
184
+
185
+ Raises
186
+ ------
187
+ ValueError
188
+ If the number of samples in x and y don't match, or if inputs are not 2D.
189
+ """
190
+ if x.shape[0] != y.shape[0]:
191
+ raise ValueError("Number of samples in X_train and y_train must match.")
192
+ if x.ndim != 2 or y.ndim != 2:
193
+ raise ValueError("Input and output data must be 2D numpy arrays.")
194
+
195
+ if "POD" in self.__class__.__name__:
196
+ y = self.performPOD(y)
197
+
198
+ if self.with_scaler_x:
199
+ self.scalar_X = MinMaxScaler()
200
+ x = self.scalar_X.fit_transform(x)
201
+ if self.with_scaler_y:
202
+ self.scalar_Y = MinMaxScaler()
203
+ y = self.scalar_Y.fit_transform(y)
204
+
205
+ self.fit_tmp(x, y)
206
+
207
+ def predict(self, x: np.ndarray) -> np.ndarray:
208
+ """
209
+ Predict target values for given input features.
210
+
211
+ This method serves as a wrapper around the model-specific prediction method,
212
+ ensuring consistent interface across all model types.
213
+
214
+ Parameters
215
+ ----------
216
+ x : np.ndarray
217
+ Input features of shape (n_samples, n_input_features).
218
+
219
+ Returns
220
+ -------
221
+ np.ndarray
222
+ Predicted target values of shape (n_samples, n_output_features).
223
+ """
224
+ return self.predict_tmp(x)
225
+
226
+ def frobenius_norm(
227
+ self,
228
+ x: np.ndarray,
229
+ y: np.ndarray,
230
+ separate_err: bool = False,
231
+ lift_y: Optional[np.ndarray] = None,
232
+ ) -> Union[float, np.ndarray]:
233
+ """
234
+ Calculate the Frobenius norm of prediction errors.
235
+
236
+ Computes the relative Frobenius norm between true and predicted values,
237
+ which provides a measure of the overall prediction accuracy.
238
+
239
+ Parameters
240
+ ----------
241
+ x : np.ndarray
242
+ Input features for prediction.
243
+ y : np.ndarray
244
+ True target values.
245
+ separate_err : bool, optional
246
+ If True, return the error for each sample separately.
247
+ If False, return the overall aggregated error. Default is False.
248
+ lift_y : np.ndarray, optional
249
+ If provided, this array is added to both true and predicted values
250
+ before error calculation. Default is None.
251
+
252
+ Returns
253
+ -------
254
+ np.ndarray or float
255
+ If separate_err=True, returns array of relative errors for each sample.
256
+ If separate_err=False, returns overall relative Frobenius norm error.
257
+
258
+ Notes
259
+ -----
260
+ The relative Frobenius norm is calculated as:
261
+ ||y_true - y_pred||_F / ||y_true||_F
262
+
263
+ For separate errors, it's calculated per sample as:
264
+ ||y_true[i] - y_pred[i]||_2 / ||y_true[i]||_2
265
+ """
266
+ if separate_err:
267
+ err = []
268
+ y_pred = self.predict(x)
269
+ if lift_y is not None:
270
+ y_pred += lift_y
271
+ y += lift_y
272
+ for i in range(len(x)):
273
+ err.append(np.linalg.norm(y[i] - y_pred[i]) / np.linalg.norm(y[i]))
274
+ return np.array(err)
275
+ else:
276
+ if lift_y is not None:
277
+ y_pred = self.predict(x) + lift_y
278
+ y = y + lift_y
279
+ return np.linalg.norm(y - y_pred) / np.linalg.norm(y)
280
+ return np.linalg.norm(y - self.predict(x)) / np.linalg.norm(y)
281
+
282
+ def inf_norm(
283
+ self, x: np.ndarray, y: np.ndarray, separate_err: bool = False
284
+ ) -> Union[float, np.ndarray]:
285
+ """
286
+ Calculate the infinity norm of prediction errors.
287
+
288
+ Computes the maximum absolute error between true and predicted values,
289
+ which provides a measure of the worst-case prediction error.
290
+
291
+ Parameters
292
+ ----------
293
+ x : np.ndarray
294
+ Input features for prediction.
295
+ y : np.ndarray
296
+ True target values.
297
+ separate_err : bool, optional
298
+ If True, return the error for each sample separately.
299
+ If False, return the overall aggregated error. Default is False.
300
+
301
+ Returns
302
+ -------
303
+ np.ndarray or float
304
+ If separate_err=True, returns array of infinity norm errors for each sample.
305
+ If separate_err=False, returns overall infinity norm error.
306
+
307
+ Notes
308
+ -----
309
+ The infinity norm is the maximum absolute difference:
310
+ ||y_true - y_pred||_∞ = max|y_true - y_pred|
311
+ """
312
+ if separate_err:
313
+ err = []
314
+ y_pred = self.predict(x)
315
+ for i in range(len(x)):
316
+ err.append(np.linalg.norm(y[i] - y_pred[i], ord=np.inf))
317
+ return np.array(err)
318
+ else:
319
+ return np.linalg.norm(y - self.predict(x), ord=np.inf)
320
+
321
+ def performPOD(self, y: np.ndarray) -> np.ndarray:
322
+ """
323
+ Perform Proper Orthogonal Decomposition on the training data.
324
+
325
+ This method applies POD to reduce the dimensionality of the target data
326
+ from the full field representation to a reduced set of POD coefficients.
327
+ It handles the truncation to the specified rank and validates the input.
328
+
329
+ Parameters
330
+ ----------
331
+ y : np.ndarray
332
+ Training data matrix of shape (n_samples, n_features) for which
333
+ POD decomposition is to be performed.
334
+
335
+ Returns
336
+ -------
337
+ np.ndarray
338
+ POD coefficients matrix of shape (n_samples, rank) representing
339
+ the training data in the reduced POD space.
340
+
341
+ Raises
342
+ ------
343
+ ValueError
344
+ If the specified rank is greater than the number of available modes.
345
+
346
+ Notes
347
+ -----
348
+ This method calls `reduction` if POD has not been computed yet, then
349
+ truncates the modes and coefficients to the specified rank. The POD
350
+ decomposition follows: y ≈ coeffs @ modes, where coeffs are the returned
351
+ values and modes are stored in self.v.
352
+ """
353
+ if not hasattr(self, "v_all"):
354
+ self.reduction(y)
355
+ if self.rank > self.v_all.shape[0]:
356
+ raise ValueError("Rank is greater than the number of modes.")
357
+ self.s = self.s_all[: self.rank]
358
+ self.v = self.v_all[: self.rank]
359
+ return self.coeffs[:, : self.rank]
360
+
361
+ def reduction(self, y: np.ndarray) -> None:
362
+ """
363
+ Perform POD using the specified algorithm (SVD or eigenvalue decomposition).
364
+
365
+ This method computes the full POD decomposition of the training data using
366
+ either singular value decomposition or eigenvalue decomposition, depending
367
+ on the POD_algo parameter.
368
+
369
+ Parameters
370
+ ----------
371
+ y : np.ndarray
372
+ Training data matrix of shape (n_samples, n_features).
373
+
374
+ Raises
375
+ ------
376
+ ValueError
377
+ If an invalid POD algorithm is specified.
378
+
379
+ Notes
380
+ -----
381
+ Two algorithms are supported:
382
+
383
+ 1. 'svd': Direct SVD decomposition
384
+ - More accurate for well-conditioned problems
385
+ - Better numerical stability
386
+ - Recommended for most applications
387
+
388
+ 2. 'eigen': Eigenvalue decomposition of the covariance matrix
389
+ - More memory efficient for wide matrices (n_features >> n_samples)
390
+ - Potentially less stable for ill-conditioned problems
391
+ - Useful when n_samples << n_features
392
+
393
+ The method stores the full decomposition in attributes:
394
+ - s_all: all singular values
395
+ - v_all: all POD modes (right singular vectors)
396
+ - coeffs: all POD coefficients (scaled left singular vectors)
397
+ """
398
+ if self.POD_algo == "svd":
399
+ u, self.s_all, self.v_all = svd(y, full_matrices=False)
400
+ self.coeffs = u @ np.diag(self.s_all)
401
+ print(f"POD_SVD reduction completed.")
402
+ elif self.POD_algo == "eigen":
403
+ N, M = y.shape
404
+
405
+ C = y @ y.T
406
+ eigenvalues, U = np.linalg.eigh(C)
407
+
408
+ sorted_indices = np.argsort(eigenvalues)[::-1]
409
+ sorted_eigenvalues = eigenvalues[sorted_indices]
410
+ U = U[:, sorted_indices]
411
+
412
+ self.s_all = np.sqrt(sorted_eigenvalues)
413
+ self.coeffs = U @ np.diag(self.s_all)
414
+
415
+ self.v_all = np.zeros((N, M))
416
+ tolerance = 1e-10
417
+ for i in range(N):
418
+ if self.s_all[i] > tolerance:
419
+ u_i = U[:, i]
420
+ self.v_all[i, :] = (1 / self.s_all[i]) * (u_i.T @ y)
421
+ print("POD_eigen reduction completed.")
422
+ else:
423
+ raise ValueError("Invalid POD method.")
424
+
425
+ def truncation_error(self) -> Tuple[float, float]:
426
+ """
427
+ Calculate the truncation error of the POD decomposition.
428
+
429
+ This method computes the relative truncation error based on the singular
430
+ values obtained from the POD decomposition. The truncation error quantifies
431
+ the amount of information lost by retaining only a subset of the POD modes.
432
+
433
+ Returns
434
+ -------
435
+ float
436
+ The relative truncation error, defined as:
437
+ (sum of discarded singular values) / (sum of all singular values).
438
+ """
439
+ if not hasattr(self, "s_all") or not hasattr(self, "s"):
440
+ raise ValueError("POD decomposition has not been performed yet.")
441
+ total_energy = np.sum(self.s_all**2)
442
+ retained_energy = np.cumsum(self.s_all**2)
443
+ truncation_error = 1 - retained_energy / total_energy
444
+ projection_error = np.sqrt(truncation_error)
445
+
446
+ return truncation_error, projection_error
447
+
448
+ def reconstruct(
449
+ self,
450
+ x: np.ndarray,
451
+ y: np.ndarray,
452
+ refVTMName: str,
453
+ saveFileName: str,
454
+ dataType: str,
455
+ x_train: Optional[np.ndarray] = None,
456
+ y_train: Optional[np.ndarray] = None,
457
+ x_test: Optional[np.ndarray] = None,
458
+ y_test: Optional[np.ndarray] = None,
459
+ is2D: bool = False,
460
+ ) -> None:
461
+ """
462
+ Reconstruct the model predictions and save results to VTK files.
463
+
464
+ This method trains the model and generates VTK files containing the true values,
465
+ reconstructed values, and prediction errors for visualization and analysis.
466
+
467
+ Parameters
468
+ ----------
469
+ x : np.ndarray
470
+ Input features. Used for train-test split if specific splits not provided.
471
+ y : np.ndarray
472
+ Target values. Used for train-test split if specific splits not provided.
473
+ refVTMName : str
474
+ Path to the reference VTM file that provides the mesh structure.
475
+ saveFileName : str
476
+ Base filename for saving the reconstruction results.
477
+ dataType : {'scalar', 'vector'}
478
+ Type of data being reconstructed for VTK output.
479
+ x_train : np.ndarray, optional
480
+ Specific training input features. If None, automatic split is used.
481
+ y_train : np.ndarray, optional
482
+ Specific training target values. If None, automatic split is used.
483
+ x_test : np.ndarray, optional
484
+ Specific testing input features. If None, automatic split is used.
485
+ y_test : np.ndarray, optional
486
+ Specific testing target values. If None, automatic split is used.
487
+ is2D : bool, optional
488
+ Whether the data is 2D (for vector fields). Default is False.
489
+
490
+ Notes
491
+ -----
492
+ The method creates a VTK file with three sets of fields for each test sample:
493
+ - 'true_{i}': Original target values
494
+ - 'rec_{i}': Reconstructed/predicted values
495
+ - 'err_{i}': Absolute error (true - predicted)
496
+
497
+ If no specific train/test split is provided, the method uses an 80-20 split
498
+ with random_state=42.
499
+
500
+ Examples
501
+ --------
502
+ >>> model.reconstruct(X, Y, 'mesh.vtm', 'results', 'vector', is2D=True)
503
+ # Creates results.vtm with true, reconstructed, and error fields
504
+ """
505
+ if x_train is None or y_train is None or x_test is None or y_test is None:
506
+ x_train, x_test, y_train, y_test = train_test_split(
507
+ x, y, train_size=0.8, random_state=42
508
+ )
509
+ self.fit(x_train, y_train)
510
+
511
+ # Write the velocity data into VTK file
512
+ refVTM = pv.MultiBlock(refVTMName)
513
+ field_name = (
514
+ [f"true_{i}" for i in range(x_test.shape[0])]
515
+ + [f"rec_{i}" for i in range(x_test.shape[0])]
516
+ + [f"err_{i}" for i in range(x_test.shape[0])]
517
+ )
518
+
519
+ # loop all test data and write the data into VTK file
520
+ vtk_writer(
521
+ np.vstack((y_test, self.predict(x_test), y_test - self.predict(x_test))),
522
+ field_name,
523
+ dataType,
524
+ refVTM,
525
+ saveFileName,
526
+ is2D=is2D,
527
+ )
528
+
529
+ def validate(
530
+ self,
531
+ x: np.ndarray,
532
+ y: np.ndarray,
533
+ training_ratio: float = 0.8,
534
+ rand_seed: int = 42,
535
+ norm: str = "Frobenius",
536
+ separate_err: bool = False,
537
+ lift_y: Optional[np.ndarray] = None,
538
+ ) -> Union[float, np.ndarray]:
539
+ """
540
+ Validate the model using a train-test split.
541
+
542
+ This method provides a convenient way to assess model performance by
543
+ automatically splitting the data, training the model, and computing
544
+ prediction errors on the test set.
545
+
546
+ Parameters
547
+ ----------
548
+ x : np.ndarray
549
+ Input features of shape (n_samples, n_input_features).
550
+ y : np.ndarray
551
+ Target values of shape (n_samples, n_output_features).
552
+ training_ratio : float, optional
553
+ Fraction of data to use for training (0 < training_ratio < 1).
554
+ Default is 0.8.
555
+ rand_seed : int, optional
556
+ Random seed for reproducible train-test splits. Default is 42.
557
+ norm : {'Frobenius', 'inf'}, optional
558
+ Type of norm to use for error calculation. Default is 'Frobenius'.
559
+ separate_err : bool, optional
560
+ If True, return the error for each test sample separately.
561
+ If False, return the overall aggregated error. Default is False.
562
+ lift_y : np.ndarray, optional
563
+ If provided, this array is added to both true and predicted values
564
+ before error calculation. Default is None.
565
+
566
+ Returns
567
+ -------
568
+ float or np.ndarray
569
+ The calculated norm of the prediction error on the test set.
570
+ If separate_err=True, returns an array of errors for each test sample.
571
+ If separate_err=False, returns a single aggregated error value.
572
+
573
+ Raises
574
+ ------
575
+ AssertionError
576
+ If an invalid norm type is specified.
577
+
578
+ Examples
579
+ --------
580
+ >>> model = SomeConcreteModel(rank=10)
581
+ >>> error = model.validate(X, Y, training_ratio=0.7, norm='Frobenius')
582
+ >>> print(f"Validation error: {error:.6f}")
583
+ >>> # Using lifting to adjust predictions
584
+ >>> lift = np.mean(Y, axis=0)
585
+ >>> error_lifted = model.validate(X, Y, lift_y=lift, norm='inf')
586
+ >>> print(f"Validation error with lifting: {error_lifted:.6f}")
587
+ """
588
+ x_train, x_test, y_train, y_test = train_test_split(
589
+ x, y, train_size=training_ratio, random_state=rand_seed
590
+ )
591
+ self.fit(x_train, y_train)
592
+
593
+ if norm == "Frobenius":
594
+ return self.frobenius_norm(
595
+ x_test, y_test, lift_y=lift_y, separate_err=separate_err
596
+ )
597
+ elif norm == "inf":
598
+ return self.inf_norm(x_test, y_test, separate_err=separate_err)
599
+ else:
600
+ print("Please enter variable norm with value 'Frobenius' or 'inf'")
601
+ assert False
602
+
603
+ def fixed_validate(
604
+ self,
605
+ x_train: np.ndarray,
606
+ y_train: np.ndarray,
607
+ x_test: np.ndarray,
608
+ y_test: np.ndarray,
609
+ norm: str = "Frobenius",
610
+ separate_err: bool = False,
611
+ lift_y: Optional[np.ndarray] = None,
612
+ ) -> Union[float, np.ndarray]:
613
+ """
614
+ Validate the model with fixed training and testing datasets.
615
+
616
+ This method allows for validation with predetermined train-test splits,
617
+ which is useful for consistent benchmarking and when specific data
618
+ partitioning is required.
619
+
620
+ Parameters
621
+ ----------
622
+ x_train : np.ndarray
623
+ Training input features of shape (n_train_samples, n_input_features).
624
+ y_train : np.ndarray
625
+ Training target values of shape (n_train_samples, n_output_features).
626
+ x_test : np.ndarray
627
+ Testing input features of shape (n_test_samples, n_input_features).
628
+ y_test : np.ndarray
629
+ Testing target values of shape (n_test_samples, n_output_features).
630
+ norm : {'Frobenius', 'inf'}, optional
631
+ Type of norm to use for error calculation. Default is 'Frobenius'.
632
+ separate_err : bool, optional
633
+ If True, return the error for each test sample separately.
634
+ If False, return the overall aggregated error. Default is False.
635
+ lift_y : np.ndarray, optional
636
+ If provided, this array is added to both true and predicted values
637
+ before error calculation. Default is None.
638
+
639
+ Returns
640
+ -------
641
+ float or np.ndarray
642
+ The calculated norm of the prediction error. If separate_err=True,
643
+ returns an array of errors for each test sample.
644
+
645
+ Raises
646
+ ------
647
+ AssertionError
648
+ If an invalid norm type is specified.
649
+ """
650
+ self.fit(x_train, y_train)
651
+
652
+ if norm == "Frobenius":
653
+ return self.frobenius_norm(
654
+ x_test, y_test, separate_err=separate_err, lift_y=lift_y
655
+ )
656
+ elif norm == "inf":
657
+ return self.inf_norm(x_test, y_test, separate_err=separate_err)
658
+ else:
659
+ print("Please enter variable norm with value 'Frobenius' or 'inf'")
660
+ assert False
661
+
662
+ def multi_validate(
663
+ self,
664
+ x: np.ndarray,
665
+ y: np.ndarray,
666
+ ranks: List[int],
667
+ training_ratio: float = 0.8,
668
+ rand_seed: int = 42,
669
+ norm: str = "Frobenius",
670
+ separate_err: bool = False,
671
+ lift_y: Optional[np.ndarray] = None,
672
+ ) -> np.ndarray:
673
+ """
674
+ Validate the model performance across multiple POD ranks.
675
+
676
+ This method systematically evaluates model performance for different
677
+ numbers of POD modes, which is useful for determining the optimal
678
+ rank-accuracy trade-off.
679
+
680
+ Parameters
681
+ ----------
682
+ x : np.ndarray
683
+ Input features of shape (n_samples, n_input_features).
684
+ y : np.ndarray
685
+ Target values of shape (n_samples, n_output_features).
686
+ ranks : list of int
687
+ List of POD ranks to evaluate.
688
+ training_ratio : float, optional
689
+ Fraction of data to use for training. Default is 0.8.
690
+ rand_seed : int, optional
691
+ Random seed for reproducible train-test splits. Default is 42.
692
+ norm : {'Frobenius', 'inf'}, optional
693
+ Type of norm to use for error calculation. Default is 'Frobenius'.
694
+ separate_err : bool, optional
695
+ If True, return the error for each test sample separately for each rank.
696
+ If False, return overall aggregated errors. Default is False.
697
+ lift_y : np.ndarray, optional
698
+ If provided, this array is added to both true and predicted values
699
+ before error calculation. Default is None.
700
+
701
+ Returns
702
+ -------
703
+ np.ndarray
704
+ Array of validation errors corresponding to each rank in the input list.
705
+
706
+ Raises
707
+ ------
708
+ AssertionError
709
+ If an invalid norm type is specified.
710
+
711
+ Notes
712
+ -----
713
+ The method uses the same train-test split for all ranks to ensure
714
+ fair comparison. The original rank setting is modified during the
715
+ process and should be reset if needed after calling this method.
716
+
717
+ Examples
718
+ --------
719
+ >>> ranks = [5, 10, 15, 20, 25]
720
+ >>> errors = model.multi_validate(X, Y, ranks, training_ratio=0.75)
721
+ >>> optimal_rank = ranks[np.argmin(errors)]
722
+ >>> print(f"Optimal rank: {optimal_rank}")
723
+ """
724
+ x_train, x_test, y_train, y_test = train_test_split(
725
+ x, y, train_size=training_ratio, random_state=rand_seed
726
+ )
727
+ errors = []
728
+ for i in ranks:
729
+ self.rank = i
730
+ self.fit(x_train, y_train)
731
+ if norm == "Frobenius":
732
+ errors.append(
733
+ self.frobenius_norm(
734
+ x_test, y_test, separate_err=separate_err, lift_y=lift_y
735
+ )
736
+ )
737
+ elif norm == "inf":
738
+ errors.append(self.inf_norm(x_test, y_test, separate_err=separate_err))
739
+ else:
740
+ print("Please enter variable norm with value 'Frobenius' or 'inf'")
741
+ assert False
742
+ return np.array(errors)
743
+
744
+ def multi_validate_fixed(
745
+ self,
746
+ x_train: np.ndarray,
747
+ y_train: np.ndarray,
748
+ x_test: np.ndarray,
749
+ y_test: np.ndarray,
750
+ ranks: List[int],
751
+ norm: str = "Frobenius",
752
+ separate_err: bool = False,
753
+ lift_y: Optional[np.ndarray] = None,
754
+ ) -> np.ndarray:
755
+ """
756
+ Validate the model across multiple POD ranks with fixed datasets.
757
+
758
+ This method combines the functionality of multi-rank validation with
759
+ fixed train-test splits, providing consistent evaluation across different
760
+ POD ranks using predetermined data partitions.
761
+
762
+ Parameters
763
+ ----------
764
+ x_train : np.ndarray
765
+ Training input features of shape (n_train_samples, n_input_features).
766
+ y_train : np.ndarray
767
+ Training target values of shape (n_train_samples, n_output_features).
768
+ x_test : np.ndarray
769
+ Testing input features of shape (n_test_samples, n_input_features).
770
+ y_test : np.ndarray
771
+ Testing target values of shape (n_test_samples, n_output_features).
772
+ ranks : list of int
773
+ List of POD ranks to evaluate.
774
+ norm : {'Frobenius', 'inf'}, optional
775
+ Type of norm to use for error calculation. Default is 'Frobenius'.
776
+ separate_err : bool, optional
777
+ If True, return the error for each test sample separately for each rank.
778
+ If False, return overall aggregated errors. Default is False.
779
+ lift_y : np.ndarray, optional
780
+ If provided, this array is added to both true and predicted values
781
+ before error calculation. Default is None.
782
+
783
+ Returns
784
+ -------
785
+ np.ndarray
786
+ Array of validation errors corresponding to each rank. Shape depends
787
+ on separate_err: if False, shape is (len(ranks),); if True, shape is
788
+ (len(ranks), n_test_samples).
789
+
790
+ Raises
791
+ ------
792
+ AssertionError
793
+ If an invalid norm type is specified.
794
+
795
+ Notes
796
+ -----
797
+ This method is particularly useful for:
798
+ - Systematic rank selection studies
799
+ - Benchmarking with consistent datasets
800
+ - Error analysis across different dimensionality reductions
801
+
802
+ The original rank setting is modified during the process and should be
803
+ reset if needed after calling this method.
804
+ """
805
+ errors = []
806
+ for i in ranks:
807
+ self.rank = i
808
+ self.fit(x_train, y_train)
809
+ if norm == "Frobenius":
810
+ errors.append(
811
+ self.frobenius_norm(
812
+ x_test, y_test, separate_err=separate_err, lift_y=lift_y
813
+ )
814
+ )
815
+ elif norm == "inf":
816
+ errors.append(self.inf_norm(x_test, y_test, separate_err=separate_err))
817
+ else:
818
+ print("Please enter variable norm with value 'Frobenius' or 'inf'")
819
+ assert False
820
+ return np.array(errors)
821
+
822
+ def check_input(self, x: np.ndarray) -> np.ndarray:
823
+ tolerance = 0.5
824
+ list_warning = []
825
+ for i in range(x.shape[0]):
826
+ for j in range(x.shape[1]):
827
+ if x[i, j] > 1 + tolerance:
828
+ list_warning.append(np.array([i, j, x[i, j]]))
829
+ x[i, j] = 1 + tolerance
830
+ elif x[i, j] < -tolerance:
831
+ list_warning.append(np.array([i, j, x[i, j]]))
832
+ x[i, j] = -tolerance
833
+
834
+ # if len(list_warning) > 0:
835
+ # warnings.warn(
836
+ # f"{len(list_warning)} input features are out of the expected range [0, 1]. "
837
+ # )
838
+
839
+ self.list_warning = list_warning
840
+ return x