POPSRegression 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- popsregression-0.1.0/LICENSE +21 -0
- popsregression-0.1.0/PKG-INFO +100 -0
- popsregression-0.1.0/POPSRegression/POPSRegression.py +394 -0
- popsregression-0.1.0/POPSRegression/__init__.py +1 -0
- popsregression-0.1.0/POPSRegression.egg-info/PKG-INFO +100 -0
- popsregression-0.1.0/POPSRegression.egg-info/SOURCES.txt +10 -0
- popsregression-0.1.0/POPSRegression.egg-info/dependency_links.txt +1 -0
- popsregression-0.1.0/POPSRegression.egg-info/requires.txt +3 -0
- popsregression-0.1.0/POPSRegression.egg-info/top_level.txt +1 -0
- popsregression-0.1.0/README.md +84 -0
- popsregression-0.1.0/pyproject.toml +29 -0
- popsregression-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 TD Swinburne (Tom)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: POPSRegression
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Bayesian regression for low-noise data using POPS algorithm
|
|
5
|
+
Author-email: Thomas D Swinburne <thomas.swinburne@cnrs.fr>, Danny Perez <danny_perez@lanl.gov>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/tomswinburne/POPS-Regression
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/tomswinburne/POPS-Regression/issues
|
|
9
|
+
Project-URL: Documentation, https://github.com/tomswinburne/POPS-Regression/blob/main/README.md
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: scikit-learn>=0.24.0
|
|
14
|
+
Requires-Dist: scipy>=1.6.0
|
|
15
|
+
Requires-Dist: numpy>=1.20.0
|
|
16
|
+
|
|
17
|
+
# POPSRegression
|
|
18
|
+
Regression scheme from the paper
|
|
19
|
+
|
|
20
|
+
*Parameter uncertainties for imperfect surrogate models in the low-noise regime*
|
|
21
|
+
|
|
22
|
+
TD Swinburne and D Perez, [arXiv 2024](https://arxiv.org/abs/2402.01810v3)
|
|
23
|
+
|
|
24
|
+
```bibtex
|
|
25
|
+
@misc{swinburne2024,
|
|
26
|
+
title={Parameter uncertainties for imperfect surrogate models in the low-noise regime},
|
|
27
|
+
author={Thomas D Swinburne and Danny Perez},
|
|
28
|
+
year={2024},
|
|
29
|
+
eprint={2402.01810},
|
|
30
|
+
archivePrefix={arXiv},
|
|
31
|
+
primaryClass={stat.ML},
|
|
32
|
+
url={https://arxiv.org/abs/2402.01810v3},
|
|
33
|
+
}
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
Bayesian regression for low-noise data (vanishing aleatoric uncertainty).
|
|
38
|
+
|
|
39
|
+
Fits the weights of a regression model using BayesianRidge, then estimates weight uncertainties accounting for model misspecification using the POPS (Pointwise Optimal Parameter Sets) algorithm.
|
|
40
|
+
|
|
41
|
+
The method can easily handle high-dimensional linear problems with minimal overhead compared to any linear regression scheme.
|
|
42
|
+
|
|
43
|
+
Bayesian regression is often used in computational science to fit the weights of a surrogate model which approximates some complex calcualtion.
|
|
44
|
+
In many important cases the target calcualtion is near-deterministic, or low-noise, meaning the true data has vanishing aleatoric uncertainty.
|
|
45
|
+
However, there can be large misspecification uncertainty, i.e. the model weights are instrinsically uncertain as the model is unable to exactly match training data.
|
|
46
|
+
Existing Bayesian regression schemes based on loss minimization can only estimate epistemic and aleatoric uncertainties.
|
|
47
|
+
|
|
48
|
+
## Example usage
|
|
49
|
+
Here, usage follows `sklearn.linear_model`, inheriting `BayesianRidge`
|
|
50
|
+
|
|
51
|
+
After running `BayesianRidge.fit(..)`, the `alpha_` attribute is fixed to `np.inf` as aleatoric uncertainty is assumed negligable.
|
|
52
|
+
|
|
53
|
+
The `sigma_` matrix still contains epistemic weight uncertainties, whilst `misspecification_sigma_` contains the POPS uncertainties.
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
|
|
57
|
+
from POPSRegression import POPSRegression
|
|
58
|
+
|
|
59
|
+
X_train,X_test,y_train,y_test = ...
|
|
60
|
+
|
|
61
|
+
# Sobol resampling of hypercube with 1.0 samples / training point
|
|
62
|
+
model = POPSRegression(resampling_method='sobol',resample_density=1.)
|
|
63
|
+
|
|
64
|
+
# fit the model, sample POPS hypercube
|
|
65
|
+
model.fit(X_train,y_train)
|
|
66
|
+
|
|
67
|
+
# Return hypercube std, max/min and epistemic uncertaint from inference
|
|
68
|
+
y_pred, y_std, y_max, y_min, y_std_epistmic = \
|
|
69
|
+
model.predict(X_test,return_bounds=True,resample=True,return_epistemic_std=True)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# can also return max/min
|
|
75
|
+
y_pred, y_std, y_max, y_min = model.predict(X_test,return_bounds=True)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# returns std by default
|
|
79
|
+
y_pred, y_std = model.predict(X_test)
|
|
80
|
+
|
|
81
|
+
# can also return max/min
|
|
82
|
+
y_pred, y_std, y_max, y_min = model.predict(X_test,return_bounds=True)
|
|
83
|
+
|
|
84
|
+
# can also resample the hypercube vectors
|
|
85
|
+
y_pred, y_std, y_max, y_min = model.predict(X_test,return_bounds=True,resample=True)
|
|
86
|
+
|
|
87
|
+
# can also return the epistemic uncertainty (descreases as 1/sqrt(n_samples))
|
|
88
|
+
y_pred, y_std, y_max, y_min, y_std_epistmic = model.predict(X_test,return_bounds=True,resample=True,return_epistemic_std=True)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
As can be seen, the final error bars give very good coverage of the test output
|
|
92
|
+
|
|
93
|
+
Extreme low-dimensional case, fitting N data points to a quartic polynomial (P=5 parameters) to some complex oscillatory function
|
|
94
|
+
|
|
95
|
+
Green: two sigma of `sigma_` weight uncertainty from Bayesian Regression (i.e. without `alpha_` term for aleatoric error)
|
|
96
|
+
|
|
97
|
+
Orange: two sigma of `sigma_` and `misspecification_sigma_` posterior from POPS Regression
|
|
98
|
+
|
|
99
|
+

|
|
100
|
+
|
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from sklearn.linear_model import BayesianRidge
|
|
3
|
+
from scipy.linalg import pinvh, eigh
|
|
4
|
+
from sklearn.base import BaseEstimator, RegressorMixin, _fit_context
|
|
5
|
+
from sklearn.utils._param_validation import Interval, StrOptions
|
|
6
|
+
from numbers import Real, Integral
|
|
7
|
+
from scipy.stats import qmc
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
###############################################################################
|
|
11
|
+
# POPS (Pointwise Optimal Parameter Sets) regression
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class POPSRegression(BayesianRidge):
|
|
15
|
+
"""
|
|
16
|
+
Bayesian regression for low-noise data (vanishing aleatoric uncertainty).
|
|
17
|
+
|
|
18
|
+
Fits the weights of a regression model using BayesianRidge, then estimates weight uncertainties (`sigma_` in `BayesianRidge`) accounting for model misspecification using the POPS (Pointwise Optimal Parameter Sets) algorithm [1]. The default`alpha_` attribute is fixed to `np.inf` as aleatoric uncertainty is assumed negligable.
|
|
19
|
+
|
|
20
|
+
Bayesian regression is often used in computational science to fit the weights of a surrogate model which approximates some complex calcualtion.
|
|
21
|
+
In many important cases the target calcualtion is near-deterministic, or low-noise, meaning the true data has vanishing aleatoric uncertainty. However, there can be large misspecification uncertainty, i.e. the model weights are instrinsically uncertain as the model is unable to exactly match training data.
|
|
22
|
+
|
|
23
|
+
Existing Bayesian regression schemes based on loss minimization can only estimate epistemic and aleatoric uncertainties. In the low-noise limit,
|
|
24
|
+
weight uncertainties (`sigma_` in `BayesianRidge`) are significantly underestimated as they only account for epistemic uncertainties which decay with increasing data. Predictions then assume any additional error is due to an aleatoric uncertainty (`alpha_` in `BayesianRidge`), which is erroneous in a low-noise setting. This has significant implications on how uncertainty is propagated using weight uncertainties.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
max_iter : int, default=300
|
|
29
|
+
Maximum number of iterations.
|
|
30
|
+
tol : float, default=1e-3
|
|
31
|
+
Stop the algorithm if w has converged.
|
|
32
|
+
alpha_1 : float, default=1e-6
|
|
33
|
+
Hyper-parameter : shape parameter for the Gamma distribution prior over the alpha parameter.
|
|
34
|
+
alpha_2 : float, default=1e-6
|
|
35
|
+
Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the alpha parameter.
|
|
36
|
+
lambda_1 : float, default=1e-6
|
|
37
|
+
Hyper-parameter : shape parameter for the Gamma distribution prior over the lambda parameter.
|
|
38
|
+
lambda_2 : float, default=1e-6
|
|
39
|
+
Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the lambda parameter.
|
|
40
|
+
alpha_init : float, default=None
|
|
41
|
+
Initial value for alpha (precision of the noise).
|
|
42
|
+
lambda_init : float, default=None
|
|
43
|
+
Initial value for lambda (precision of the weights).
|
|
44
|
+
compute_score : bool, default=False
|
|
45
|
+
If True, compute the objective function at each step of the model.
|
|
46
|
+
fit_intercept : bool, default=False
|
|
47
|
+
Whether to calculate the intercept for this model.
|
|
48
|
+
copy_X : bool, default=True
|
|
49
|
+
If True, X will be copied; else, it may be overwritten.
|
|
50
|
+
verbose : bool, default=False
|
|
51
|
+
Verbose mode when fitting the model.
|
|
52
|
+
mode_threshold : float, default=1e-3
|
|
53
|
+
Threshold for determining the mode of the posterior distribution.
|
|
54
|
+
resample_density : float, default=1.0
|
|
55
|
+
Density of resampling for the POPS algorithm- number of hypercube per training point. Default is the greater of 0.5 or 100/n_samples.
|
|
56
|
+
resampling_method : str, default='uniform'
|
|
57
|
+
Method of resampling for the POPS algorithm.
|
|
58
|
+
must be one of 'sobol', 'latin', 'halton', 'grid', or 'uniform'.
|
|
59
|
+
percentile_clipping : float, default=0.0
|
|
60
|
+
Percentile to clip from each end of the distribution when determining the hypercube bounds, i.e. spans [x,100-x]. Must be between 0 and 50, but in practice should be between 0.0% and 0.5% for robust bounds
|
|
61
|
+
Attributes
|
|
62
|
+
----------
|
|
63
|
+
coef_ : array-like of shape (n_features,)
|
|
64
|
+
Coefficients of the regression model (mean of distribution).
|
|
65
|
+
intercept_ : float
|
|
66
|
+
Independent term in decision function. Set to 0.0 if
|
|
67
|
+
`fit_intercept = False`.
|
|
68
|
+
alpha_ : float
|
|
69
|
+
Estimated precision of the noise.
|
|
70
|
+
lambda_ : float
|
|
71
|
+
Estimated precision of the weights.
|
|
72
|
+
sigma_ : array-like of shape (n_features, n_features)
|
|
73
|
+
Estimated variance-covariance matrix of the weights.
|
|
74
|
+
scores_ : list
|
|
75
|
+
If computed, value of the objective function (to be maximized).
|
|
76
|
+
|
|
77
|
+
Notes
|
|
78
|
+
-----
|
|
79
|
+
The POPS algorithm extends Bayesian Ridge Regression by incorporating
|
|
80
|
+
probabilistic optimization of predictive subspaces, which can lead to
|
|
81
|
+
improved performance in high-dimensional settings.
|
|
82
|
+
|
|
83
|
+
References
|
|
84
|
+
----------
|
|
85
|
+
.. [1] Swinburne, T.D and Perez, D (2024).
|
|
86
|
+
Parameter uncertainties for imperfect surrogate models in the low-noise regime, arXiv:2402.01810v3
|
|
87
|
+
"""
|
|
88
|
+
_parameter_constraints: dict = {
|
|
89
|
+
"max_iter": [Interval(Integral, 1, None, closed="left")],
|
|
90
|
+
"tol": [Interval(Real, 0, None, closed="neither")],
|
|
91
|
+
"alpha_1": [Interval(Real, 0, None, closed="left")],
|
|
92
|
+
"alpha_2": [Interval(Real, 0, None, closed="left")],
|
|
93
|
+
"lambda_1": [Interval(Real, 0, None, closed="left")],
|
|
94
|
+
"lambda_2": [Interval(Real, 0, None, closed="left")],
|
|
95
|
+
"alpha_init": [None, Interval(Real, 0, None, closed="left")],
|
|
96
|
+
"lambda_init": [None, Interval(Real, 0, None, closed="left")],
|
|
97
|
+
"compute_score": ["boolean"],
|
|
98
|
+
"fit_intercept": ["boolean"],
|
|
99
|
+
"copy_X": ["boolean"],
|
|
100
|
+
"verbose": ["verbose"],
|
|
101
|
+
"resampling_method": [StrOptions({"uniform","sobol","latin","halton"})],
|
|
102
|
+
"mode_threshold": [Interval(Real, 0, None, closed="neither")],
|
|
103
|
+
"resample_density": [Interval(Real, 0, None, closed="neither")],
|
|
104
|
+
"percentile_clipping": [Interval(Real, 0, 50., closed="both")]
|
|
105
|
+
}
|
|
106
|
+
def __init__(
|
|
107
|
+
self,
|
|
108
|
+
*,
|
|
109
|
+
max_iter=300,
|
|
110
|
+
tol=1.0e-3,
|
|
111
|
+
alpha_1=1.0e-6,
|
|
112
|
+
alpha_2=1.0e-6,
|
|
113
|
+
lambda_1=1.0e-6,
|
|
114
|
+
lambda_2=1.0e-6,
|
|
115
|
+
alpha_init=None,
|
|
116
|
+
lambda_init=None,
|
|
117
|
+
compute_score=False,
|
|
118
|
+
fit_intercept=False,
|
|
119
|
+
copy_X=True,
|
|
120
|
+
verbose=False,
|
|
121
|
+
mode_threshold=1.0e-8,
|
|
122
|
+
resample_density=1.0,
|
|
123
|
+
resampling_method='uniform',
|
|
124
|
+
percentile_clipping=0.0,
|
|
125
|
+
):
|
|
126
|
+
super().__init__(
|
|
127
|
+
max_iter=max_iter,
|
|
128
|
+
tol=tol,
|
|
129
|
+
alpha_1=alpha_1,
|
|
130
|
+
alpha_2=alpha_2,
|
|
131
|
+
lambda_1=lambda_1,
|
|
132
|
+
lambda_2=lambda_2,
|
|
133
|
+
alpha_init=alpha_init,
|
|
134
|
+
lambda_init=lambda_init,
|
|
135
|
+
compute_score=compute_score,
|
|
136
|
+
fit_intercept=fit_intercept,
|
|
137
|
+
copy_X=copy_X,
|
|
138
|
+
verbose=verbose,
|
|
139
|
+
)
|
|
140
|
+
self.mode_threshold = mode_threshold
|
|
141
|
+
self.fit_intercept_flag = False
|
|
142
|
+
self.resample_density = resample_density
|
|
143
|
+
self.resampling_method = resampling_method
|
|
144
|
+
self.percentile_clipping = percentile_clipping
|
|
145
|
+
self._validate_params()
|
|
146
|
+
|
|
147
|
+
if self.fit_intercept:
|
|
148
|
+
print("Warning: fit_intercept is set to False for POPS regression. A constant feature will be added to the design matrix.")
|
|
149
|
+
self.fit_intercept_flag = True
|
|
150
|
+
self.fit_intercept = False
|
|
151
|
+
|
|
152
|
+
@_fit_context(prefer_skip_nested_validation=True)
|
|
153
|
+
def fit(self, X, y, sample_weight=None):
|
|
154
|
+
"""
|
|
155
|
+
Fit the POPS regression model.
|
|
156
|
+
|
|
157
|
+
This method extends the fit method of BayesianRidge to include POPS-specific
|
|
158
|
+
computations, such as calculating leverage scores, pointwise corrections,
|
|
159
|
+
and determining the hypercube support.
|
|
160
|
+
|
|
161
|
+
Parameters:
|
|
162
|
+
-----------
|
|
163
|
+
X : array-like of shape (n_samples, n_features)
|
|
164
|
+
The input samples.
|
|
165
|
+
y : array-like of shape (n_samples,)
|
|
166
|
+
The target values.
|
|
167
|
+
sample_weight : array-like of shape (n_samples,), default=None
|
|
168
|
+
Individual weights for each sample.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
--------
|
|
172
|
+
self : object
|
|
173
|
+
Returns the instance itself.
|
|
174
|
+
"""
|
|
175
|
+
if self.fit_intercept_flag:
|
|
176
|
+
print("Warning: fit_intercept is set to False for POPS regression. Adding a constant feature for regression.")
|
|
177
|
+
X = np.hstack([X, np.ones((X.shape[0], 1))])
|
|
178
|
+
|
|
179
|
+
super().fit(X, y, sample_weight)
|
|
180
|
+
|
|
181
|
+
# suppress aleatoric uncertainty
|
|
182
|
+
self.alpha_ = np.inf
|
|
183
|
+
|
|
184
|
+
n_features = X.shape[1]
|
|
185
|
+
n_samples = X.shape[0]
|
|
186
|
+
|
|
187
|
+
# Prior is lambda_ from BayesianRidge
|
|
188
|
+
prior = self.lambda_ * np.eye(n_features) / n_samples
|
|
189
|
+
|
|
190
|
+
# Prior ensures that the design matrix is invertible
|
|
191
|
+
inverse_design_matrix = pinvh(X.T @ X / n_samples + prior)
|
|
192
|
+
|
|
193
|
+
# Calculate leverage and pointwise fits
|
|
194
|
+
errors = y - X @ self.coef_ # errors to mean prediction
|
|
195
|
+
X_inverse_design_matrix = X @ inverse_design_matrix
|
|
196
|
+
|
|
197
|
+
self.leverage_scores = (X_inverse_design_matrix * X).sum(1)
|
|
198
|
+
|
|
199
|
+
self.pointwise_correction = X_inverse_design_matrix \
|
|
200
|
+
* (errors/self.leverage_scores)[:,None]
|
|
201
|
+
|
|
202
|
+
# Determine bounding hypercube from pointwise fits
|
|
203
|
+
self.hypercube_support, self.hypercube_bounds = \
|
|
204
|
+
self._hypercube_fit(self.pointwise_correction,\
|
|
205
|
+
self.percentile_clipping)
|
|
206
|
+
|
|
207
|
+
self.hypercube_samples,self.misspecification_sigma_ = \
|
|
208
|
+
self._resample_hypercube()
|
|
209
|
+
|
|
210
|
+
def _hypercube_fit(self,pointwise_correction,percentile_clipping=0.0):
|
|
211
|
+
"""
|
|
212
|
+
Fit a hypercube to the pointwise corrections.
|
|
213
|
+
|
|
214
|
+
This method calculates the principal components of the pointwise corrections
|
|
215
|
+
and determines the bounding box (hypercube) in the space of these components.
|
|
216
|
+
|
|
217
|
+
Parameters:
|
|
218
|
+
-----------
|
|
219
|
+
pointwise_correction : numpy.ndarray
|
|
220
|
+
Array of pointwise corrections, shape (n_samples, n_features).
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
--------
|
|
224
|
+
projections : numpy.ndarray
|
|
225
|
+
The principal component vectors that define the hypercube space.
|
|
226
|
+
bounds : numpy.ndarray
|
|
227
|
+
The min and max bounds of the hypercube along each principal component.
|
|
228
|
+
|
|
229
|
+
Notes:
|
|
230
|
+
------
|
|
231
|
+
The method performs the following steps:
|
|
232
|
+
1. Compute the eigendecomposition of the covariance matrix of pointwise corrections.
|
|
233
|
+
2. Select principal components based on the mode_threshold.
|
|
234
|
+
3. Project the pointwise corrections onto these components.
|
|
235
|
+
4. Determine the bounding box (hypercube) in this projected space.
|
|
236
|
+
|
|
237
|
+
The resulting hypercube represents the uncertainty in the parameter estimates,
|
|
238
|
+
which can be used for subsequent resampling and uncertainty quantification.
|
|
239
|
+
"""
|
|
240
|
+
|
|
241
|
+
e_values, e_vectors = eigh(pointwise_correction.T @pointwise_correction)
|
|
242
|
+
|
|
243
|
+
mask = e_values > self.mode_threshold * e_values.max()
|
|
244
|
+
e_vectors = e_vectors[:,mask]
|
|
245
|
+
e_values = e_values[mask]
|
|
246
|
+
|
|
247
|
+
projections = e_vectors.copy()
|
|
248
|
+
projected = pointwise_correction @ projections
|
|
249
|
+
bounds = [np.percentile(projected,percentile_clipping,axis=0)]
|
|
250
|
+
bounds += [np.percentile(projected,100.-percentile_clipping,axis=0)]
|
|
251
|
+
|
|
252
|
+
return projections, bounds
|
|
253
|
+
|
|
254
|
+
def _resample_hypercube(self,size=None,resampling_method=None):
|
|
255
|
+
"""
|
|
256
|
+
Resample points from the hypercube.
|
|
257
|
+
|
|
258
|
+
This method generates new samples from the hypercube defined by the
|
|
259
|
+
bounding box of the pointwise corrections. The sampling is uniform
|
|
260
|
+
within the hypercube.
|
|
261
|
+
|
|
262
|
+
Parameters:
|
|
263
|
+
-----------
|
|
264
|
+
size : int, optional
|
|
265
|
+
The number of samples to generate. If None, the number of samples
|
|
266
|
+
is determined by self.resample_density * self.leverage_scores.size.
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
--------
|
|
270
|
+
numpy.ndarray
|
|
271
|
+
An array of shape (n_features, n_samples) containing the resampled
|
|
272
|
+
points in the feature space.
|
|
273
|
+
|
|
274
|
+
Notes:
|
|
275
|
+
------
|
|
276
|
+
The resampling process involves the following steps:
|
|
277
|
+
1. Generate uniform random numbers between 0 and 1.
|
|
278
|
+
2. Scale these numbers to the range of the hypercube bounds.
|
|
279
|
+
3. Project the scaled points back to the original feature space using
|
|
280
|
+
the hypercube support vectors.
|
|
281
|
+
|
|
282
|
+
This method is used to generate new possible parameter values within
|
|
283
|
+
the uncertainty bounds of the model, which can be used for uncertainty
|
|
284
|
+
quantification in predictions.
|
|
285
|
+
"""
|
|
286
|
+
if resampling_method is None:
|
|
287
|
+
resampling_method = self.resampling_method
|
|
288
|
+
|
|
289
|
+
# Validate resampling_method parameter
|
|
290
|
+
valid_methods = ['latin', 'sobol', 'grid', 'halton', 'uniform']
|
|
291
|
+
if resampling_method not in valid_methods:
|
|
292
|
+
raise ValueError(f"Invalid resampling_method. Must be one of {valid_methods}")
|
|
293
|
+
|
|
294
|
+
low = self.hypercube_bounds[0]
|
|
295
|
+
high = self.hypercube_bounds[1]
|
|
296
|
+
if size is None:
|
|
297
|
+
n_resample = int(self.resample_density*self.leverage_scores.size)
|
|
298
|
+
else:
|
|
299
|
+
n_resample = size
|
|
300
|
+
n_resample = max(n_resample,100)
|
|
301
|
+
|
|
302
|
+
# Sobol sequence
|
|
303
|
+
if resampling_method == 'latin':
|
|
304
|
+
sampler = qmc.LatinHypercube(d=low.size)
|
|
305
|
+
samples = sampler.random(n_resample).T
|
|
306
|
+
elif resampling_method == 'sobol':
|
|
307
|
+
sampler = qmc.Sobol(d=low.size)
|
|
308
|
+
n_resample = 2**int(np.log(n_resample)/np.log(2.0))
|
|
309
|
+
samples = sampler.random(n_resample).T
|
|
310
|
+
elif resampling_method == 'grid':
|
|
311
|
+
samples = np.linspace(0,1,n_resample).T
|
|
312
|
+
elif resampling_method == 'halton':
|
|
313
|
+
sampler = qmc.Halton(d=low.size)
|
|
314
|
+
samples = sampler.random(n_resample).T
|
|
315
|
+
elif resampling_method == 'uniform':
|
|
316
|
+
samples = np.random.uniform(size=(low.size,n_resample))
|
|
317
|
+
samples = low[:,None] + (high-low)[:,None]*samples
|
|
318
|
+
|
|
319
|
+
hypercube_samples = self.hypercube_support@samples
|
|
320
|
+
hypercube_sigma = hypercube_samples@hypercube_samples.T
|
|
321
|
+
hypercube_sigma /= hypercube_samples.shape[1]
|
|
322
|
+
|
|
323
|
+
return hypercube_samples,hypercube_sigma
|
|
324
|
+
def resample(self,resampling_method=None):
|
|
325
|
+
"""
|
|
326
|
+
Resample the hypercube samples and update the model's internal state.
|
|
327
|
+
|
|
328
|
+
This method calls the _resample_hypercube method to generate new samples
|
|
329
|
+
and update the hypercube_samples and hypercube_sigma attributes of the model.
|
|
330
|
+
|
|
331
|
+
Parameters:
|
|
332
|
+
-----------
|
|
333
|
+
resampling_method : str, optional (default=None)
|
|
334
|
+
The method to use for resampling. If None, the model's default
|
|
335
|
+
resampling_method will be used. Valid options are:
|
|
336
|
+
'latin', 'sobol', 'grid', 'halton', 'uniform'.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
--------
|
|
340
|
+
None
|
|
341
|
+
|
|
342
|
+
Notes:
|
|
343
|
+
------
|
|
344
|
+
This method is used to update the model's uncertainty estimates by
|
|
345
|
+
generating new samples within the hypercube bounds. It's particularly
|
|
346
|
+
useful for uncertainty quantification in predictions and can be called
|
|
347
|
+
multiple times to get different uncertainty estimates.
|
|
348
|
+
"""
|
|
349
|
+
self._resample_hypercube(resampling_method=resampling_method)
|
|
350
|
+
|
|
351
|
+
def predict(self,X,return_bounds=False,return_epistemic_std=False):
|
|
352
|
+
"""
|
|
353
|
+
Make predictions using the POPS model.
|
|
354
|
+
|
|
355
|
+
Parameters:
|
|
356
|
+
-----------
|
|
357
|
+
X : array-like of shape (n_samples, n_features)
|
|
358
|
+
The input samples for prediction.
|
|
359
|
+
return_bounds : bool, default=False
|
|
360
|
+
If True, return the min and max bounds of the prediction.
|
|
361
|
+
return_epistemic_std : bool, default=False
|
|
362
|
+
If True, return the epistemic standard deviation.
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
--------
|
|
366
|
+
y_pred : array-like of shape (n_samples,)
|
|
367
|
+
The predicted mean values.
|
|
368
|
+
y_std : array-like of shape (n_samples,)
|
|
369
|
+
The predicted standard deviation (uncertainty) for each prediction.
|
|
370
|
+
y_max : array-like of shape (n_samples,), optional
|
|
371
|
+
The upper bound of the prediction interval. Only returned if return_bounds is True.
|
|
372
|
+
y_min : array-like of shape (n_samples,), optional
|
|
373
|
+
The lower bound of the prediction interval. Only returned if return_bounds is True.
|
|
374
|
+
"""
|
|
375
|
+
|
|
376
|
+
# DeterministicBayesianRidge suppresses aleatoric uncertainty
|
|
377
|
+
y_pred, y_epistemic_std = \
|
|
378
|
+
super().predict(X,return_std=True)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
# Combine misspecification and epistemic uncertainty
|
|
382
|
+
y_misspecification_var = (X@self.misspecification_sigma_ * X).sum(1)
|
|
383
|
+
y_std = np.sqrt(y_misspecification_var + y_epistemic_std**2)
|
|
384
|
+
|
|
385
|
+
res = [y_pred, y_std]
|
|
386
|
+
if return_bounds:
|
|
387
|
+
y_max = (X@self.hypercube_samples).max(1) + y_pred
|
|
388
|
+
y_min = (X@self.hypercube_samples).min(1) + y_pred
|
|
389
|
+
res += [y_max, y_min]
|
|
390
|
+
|
|
391
|
+
if return_epistemic_std:
|
|
392
|
+
res += [y_epistemic_std]
|
|
393
|
+
|
|
394
|
+
return tuple(res)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .POPSRegression import POPSRegression
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: POPSRegression
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Bayesian regression for low-noise data using POPS algorithm
|
|
5
|
+
Author-email: Thomas D Swinburne <thomas.swinburne@cnrs.fr>, Danny Perez <danny_perez@lanl.gov>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/tomswinburne/POPS-Regression
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/tomswinburne/POPS-Regression/issues
|
|
9
|
+
Project-URL: Documentation, https://github.com/tomswinburne/POPS-Regression/blob/main/README.md
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: scikit-learn>=0.24.0
|
|
14
|
+
Requires-Dist: scipy>=1.6.0
|
|
15
|
+
Requires-Dist: numpy>=1.20.0
|
|
16
|
+
|
|
17
|
+
# POPSRegression
|
|
18
|
+
Regression scheme from the paper
|
|
19
|
+
|
|
20
|
+
*Parameter uncertainties for imperfect surrogate models in the low-noise regime*
|
|
21
|
+
|
|
22
|
+
TD Swinburne and D Perez, [arXiv 2024](https://arxiv.org/abs/2402.01810v3)
|
|
23
|
+
|
|
24
|
+
```bibtex
|
|
25
|
+
@misc{swinburne2024,
|
|
26
|
+
title={Parameter uncertainties for imperfect surrogate models in the low-noise regime},
|
|
27
|
+
author={Thomas D Swinburne and Danny Perez},
|
|
28
|
+
year={2024},
|
|
29
|
+
eprint={2402.01810},
|
|
30
|
+
archivePrefix={arXiv},
|
|
31
|
+
primaryClass={stat.ML},
|
|
32
|
+
url={https://arxiv.org/abs/2402.01810v3},
|
|
33
|
+
}
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
Bayesian regression for low-noise data (vanishing aleatoric uncertainty).
|
|
38
|
+
|
|
39
|
+
Fits the weights of a regression model using BayesianRidge, then estimates weight uncertainties accounting for model misspecification using the POPS (Pointwise Optimal Parameter Sets) algorithm.
|
|
40
|
+
|
|
41
|
+
The method can easily handle high-dimensional linear problems with minimal overhead compared to any linear regression scheme.
|
|
42
|
+
|
|
43
|
+
Bayesian regression is often used in computational science to fit the weights of a surrogate model which approximates some complex calcualtion.
|
|
44
|
+
In many important cases the target calcualtion is near-deterministic, or low-noise, meaning the true data has vanishing aleatoric uncertainty.
|
|
45
|
+
However, there can be large misspecification uncertainty, i.e. the model weights are instrinsically uncertain as the model is unable to exactly match training data.
|
|
46
|
+
Existing Bayesian regression schemes based on loss minimization can only estimate epistemic and aleatoric uncertainties.
|
|
47
|
+
|
|
48
|
+
## Example usage
|
|
49
|
+
Here, usage follows `sklearn.linear_model`, inheriting `BayesianRidge`
|
|
50
|
+
|
|
51
|
+
After running `BayesianRidge.fit(..)`, the `alpha_` attribute is fixed to `np.inf` as aleatoric uncertainty is assumed negligable.
|
|
52
|
+
|
|
53
|
+
The `sigma_` matrix still contains epistemic weight uncertainties, whilst `misspecification_sigma_` contains the POPS uncertainties.
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
|
|
57
|
+
from POPSRegression import POPSRegression
|
|
58
|
+
|
|
59
|
+
X_train,X_test,y_train,y_test = ...
|
|
60
|
+
|
|
61
|
+
# Sobol resampling of hypercube with 1.0 samples / training point
|
|
62
|
+
model = POPSRegression(resampling_method='sobol',resample_density=1.)
|
|
63
|
+
|
|
64
|
+
# fit the model, sample POPS hypercube
|
|
65
|
+
model.fit(X_train,y_train)
|
|
66
|
+
|
|
67
|
+
# Return hypercube std, max/min and epistemic uncertaint from inference
|
|
68
|
+
y_pred, y_std, y_max, y_min, y_std_epistmic = \
|
|
69
|
+
model.predict(X_test,return_bounds=True,resample=True,return_epistemic_std=True)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# can also return max/min
|
|
75
|
+
y_pred, y_std, y_max, y_min = model.predict(X_test,return_bounds=True)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# returns std by default
|
|
79
|
+
y_pred, y_std = model.predict(X_test)
|
|
80
|
+
|
|
81
|
+
# can also return max/min
|
|
82
|
+
y_pred, y_std, y_max, y_min = model.predict(X_test,return_bounds=True)
|
|
83
|
+
|
|
84
|
+
# can also resample the hypercube vectors
|
|
85
|
+
y_pred, y_std, y_max, y_min = model.predict(X_test,return_bounds=True,resample=True)
|
|
86
|
+
|
|
87
|
+
# can also return the epistemic uncertainty (descreases as 1/sqrt(n_samples))
|
|
88
|
+
y_pred, y_std, y_max, y_min, y_std_epistmic = model.predict(X_test,return_bounds=True,resample=True,return_epistemic_std=True)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
As can be seen, the final error bars give very good coverage of the test output
|
|
92
|
+
|
|
93
|
+
Extreme low-dimensional case, fitting N data points to a quartic polynomial (P=5 parameters) to some complex oscillatory function
|
|
94
|
+
|
|
95
|
+
Green: two sigma of `sigma_` weight uncertainty from Bayesian Regression (i.e. without `alpha_` term for aleatoric error)
|
|
96
|
+
|
|
97
|
+
Orange: two sigma of `sigma_` and `misspecification_sigma_` posterior from POPS Regression
|
|
98
|
+
|
|
99
|
+

|
|
100
|
+
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
POPSRegression/POPSRegression.py
|
|
5
|
+
POPSRegression/__init__.py
|
|
6
|
+
POPSRegression.egg-info/PKG-INFO
|
|
7
|
+
POPSRegression.egg-info/SOURCES.txt
|
|
8
|
+
POPSRegression.egg-info/dependency_links.txt
|
|
9
|
+
POPSRegression.egg-info/requires.txt
|
|
10
|
+
POPSRegression.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
POPSRegression
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# POPSRegression
|
|
2
|
+
Regression scheme from the paper
|
|
3
|
+
|
|
4
|
+
*Parameter uncertainties for imperfect surrogate models in the low-noise regime*
|
|
5
|
+
|
|
6
|
+
TD Swinburne and D Perez, [arXiv 2024](https://arxiv.org/abs/2402.01810v3)
|
|
7
|
+
|
|
8
|
+
```bibtex
|
|
9
|
+
@misc{swinburne2024,
|
|
10
|
+
title={Parameter uncertainties for imperfect surrogate models in the low-noise regime},
|
|
11
|
+
author={Thomas D Swinburne and Danny Perez},
|
|
12
|
+
year={2024},
|
|
13
|
+
eprint={2402.01810},
|
|
14
|
+
archivePrefix={arXiv},
|
|
15
|
+
primaryClass={stat.ML},
|
|
16
|
+
url={https://arxiv.org/abs/2402.01810v3},
|
|
17
|
+
}
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
Bayesian regression for low-noise data (vanishing aleatoric uncertainty).
|
|
22
|
+
|
|
23
|
+
Fits the weights of a regression model using BayesianRidge, then estimates weight uncertainties accounting for model misspecification using the POPS (Pointwise Optimal Parameter Sets) algorithm.
|
|
24
|
+
|
|
25
|
+
The method can easily handle high-dimensional linear problems with minimal overhead compared to any linear regression scheme.
|
|
26
|
+
|
|
27
|
+
Bayesian regression is often used in computational science to fit the weights of a surrogate model which approximates some complex calcualtion.
|
|
28
|
+
In many important cases the target calcualtion is near-deterministic, or low-noise, meaning the true data has vanishing aleatoric uncertainty.
|
|
29
|
+
However, there can be large misspecification uncertainty, i.e. the model weights are instrinsically uncertain as the model is unable to exactly match training data.
|
|
30
|
+
Existing Bayesian regression schemes based on loss minimization can only estimate epistemic and aleatoric uncertainties.
|
|
31
|
+
|
|
32
|
+
## Example usage
|
|
33
|
+
Here, usage follows `sklearn.linear_model`, inheriting `BayesianRidge`
|
|
34
|
+
|
|
35
|
+
After running `BayesianRidge.fit(..)`, the `alpha_` attribute is fixed to `np.inf` as aleatoric uncertainty is assumed negligable.
|
|
36
|
+
|
|
37
|
+
The `sigma_` matrix still contains epistemic weight uncertainties, whilst `misspecification_sigma_` contains the POPS uncertainties.
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
|
|
41
|
+
from POPSRegression import POPSRegression
|
|
42
|
+
|
|
43
|
+
X_train,X_test,y_train,y_test = ...
|
|
44
|
+
|
|
45
|
+
# Sobol resampling of hypercube with 1.0 samples / training point
|
|
46
|
+
model = POPSRegression(resampling_method='sobol',resample_density=1.)
|
|
47
|
+
|
|
48
|
+
# fit the model, sample POPS hypercube
|
|
49
|
+
model.fit(X_train,y_train)
|
|
50
|
+
|
|
51
|
+
# Return hypercube std, max/min and epistemic uncertaint from inference
|
|
52
|
+
y_pred, y_std, y_max, y_min, y_std_epistmic = \
|
|
53
|
+
model.predict(X_test,return_bounds=True,resample=True,return_epistemic_std=True)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# can also return max/min
|
|
59
|
+
y_pred, y_std, y_max, y_min = model.predict(X_test,return_bounds=True)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# returns std by default
|
|
63
|
+
y_pred, y_std = model.predict(X_test)
|
|
64
|
+
|
|
65
|
+
# can also return max/min
|
|
66
|
+
y_pred, y_std, y_max, y_min = model.predict(X_test,return_bounds=True)
|
|
67
|
+
|
|
68
|
+
# can also resample the hypercube vectors
|
|
69
|
+
y_pred, y_std, y_max, y_min = model.predict(X_test,return_bounds=True,resample=True)
|
|
70
|
+
|
|
71
|
+
# can also return the epistemic uncertainty (descreases as 1/sqrt(n_samples))
|
|
72
|
+
y_pred, y_std, y_max, y_min, y_std_epistmic = model.predict(X_test,return_bounds=True,resample=True,return_epistemic_std=True)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
As can be seen, the final error bars give very good coverage of the test output
|
|
76
|
+
|
|
77
|
+
Extreme low-dimensional case, fitting N data points to a quartic polynomial (P=5 parameters) to some complex oscillatory function
|
|
78
|
+
|
|
79
|
+
Green: two sigma of `sigma_` weight uncertainty from Bayesian Regression (i.e. without `alpha_` term for aleatoric error)
|
|
80
|
+
|
|
81
|
+
Orange: two sigma of `sigma_` and `misspecification_sigma_` posterior from POPS Regression
|
|
82
|
+
|
|
83
|
+

|
|
84
|
+
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=45", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "POPSRegression"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Bayesian regression for low-noise data using POPS algorithm"
|
|
9
|
+
authors = [
|
|
10
|
+
{name = "Thomas D Swinburne", email = "thomas.swinburne@cnrs.fr"},
|
|
11
|
+
{name = "Danny Perez", email = "danny_perez@lanl.gov"},
|
|
12
|
+
]
|
|
13
|
+
license = {text = "MIT"}
|
|
14
|
+
readme = "README.md"
|
|
15
|
+
requires-python = ">=3.10"
|
|
16
|
+
|
|
17
|
+
dependencies = [
|
|
18
|
+
"scikit-learn>=0.24.0",
|
|
19
|
+
"scipy>=1.6.0",
|
|
20
|
+
"numpy>=1.20.0",
|
|
21
|
+
]
|
|
22
|
+
[project.urls]
|
|
23
|
+
"Homepage" = "https://github.com/tomswinburne/POPS-Regression"
|
|
24
|
+
"Bug Tracker" = "https://github.com/tomswinburne/POPS-Regression/issues"
|
|
25
|
+
"Documentation" = "https://github.com/tomswinburne/POPS-Regression/blob/main/README.md"
|
|
26
|
+
|
|
27
|
+
[tool.setuptools]
|
|
28
|
+
packages = ["POPSRegression"]
|
|
29
|
+
|