mvBayes 1.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mvbayes-1.0.3/LICENSE +2 -0
- mvbayes-1.0.3/PKG-INFO +20 -0
- mvbayes-1.0.3/README.md +29 -0
- mvbayes-1.0.3/mvBayes/__init__.py +3 -0
- mvbayes-1.0.3/mvBayes/cov.py +84 -0
- mvbayes-1.0.3/mvBayes/cv.py +149 -0
- mvbayes-1.0.3/mvBayes/mvBayes.py +1979 -0
- mvbayes-1.0.3/mvBayes.egg-info/PKG-INFO +20 -0
- mvbayes-1.0.3/mvBayes.egg-info/SOURCES.txt +18 -0
- mvbayes-1.0.3/mvBayes.egg-info/dependency_links.txt +1 -0
- mvbayes-1.0.3/mvBayes.egg-info/not-zip-safe +1 -0
- mvbayes-1.0.3/mvBayes.egg-info/requires.txt +11 -0
- mvbayes-1.0.3/mvBayes.egg-info/top_level.txt +2 -0
- mvbayes-1.0.3/pyproject.toml +34 -0
- mvbayes-1.0.3/setup.cfg +4 -0
- mvbayes-1.0.3/tests/__init__.py +0 -0
- mvbayes-1.0.3/tests/test_basisSetup.py +59 -0
- mvbayes-1.0.3/tests/test_bass_fit.py +33 -0
- mvbayes-1.0.3/tests/test_mvBayes.py +62 -0
- mvbayes-1.0.3/tests/util.py +5 -0
mvbayes-1.0.3/LICENSE
ADDED
mvbayes-1.0.3/PKG-INFO
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mvBayes
|
|
3
|
+
Version: 1.0.3
|
|
4
|
+
Summary: Multivariate Bayesian Modeling
|
|
5
|
+
Author-email: Gavin Collins <gqcolli@sandia.gov>, "J. Derek Tucker" <jdtuck@sandia.gov>
|
|
6
|
+
License: BSD-3
|
|
7
|
+
Project-URL: Homepage, https://cee-gitlab.sandia.gov/gqcolli/mvBayes
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: numpy
|
|
11
|
+
Requires-Dist: scipy
|
|
12
|
+
Requires-Dist: matplotlib
|
|
13
|
+
Requires-Dist: scikit-learn
|
|
14
|
+
Requires-Dist: patsy
|
|
15
|
+
Provides-Extra: optional
|
|
16
|
+
Requires-Dist: pathos; extra == "optional"
|
|
17
|
+
Requires-Dist: fdasrsf; extra == "optional"
|
|
18
|
+
Requires-Dist: pyBASS; extra == "optional"
|
|
19
|
+
Requires-Dist: pyBayesPPR; extra == "optional"
|
|
20
|
+
Dynamic: license-file
|
mvbayes-1.0.3/README.md
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[](https://github.com/sandialabs/mvBayesPy/actions/workflows/python-package.yml)
|
|
2
|
+
# mvBayes
|
|
3
|
+
|
|
4
|
+
A Python implementation of the multivariate Bayesian regression (mvBayes) framework. Decomposes a multivariate/functional response using a user-specified orthogonal basis decomposition, and then models each basis component independently using an arbitrary user-specified (univariate) Bayesian regression model. Includes prediction and plotting methods.
|
|
5
|
+
|
|
6
|
+
## Installation
|
|
7
|
+
------------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
To install the most up to date version on github
|
|
10
|
+
> `pip install -e .`
|
|
11
|
+
|
|
12
|
+
please see [requirements](requirements.txt) for a list of packages `mvBayes`
|
|
13
|
+
depends on
|
|
14
|
+
|
|
15
|
+
------------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
## Examples
|
|
19
|
+
* [Friedman Example](examples/friedman_demo.py) - An extension of the "Friedman function" to functional response. The Bayesian regression model here is BASS (Bayesian Adaptive Smoothing Splines, see https://github.com/lanl/pyBASS)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
## References
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
************
|
|
26
|
+
|
|
27
|
+
Author: Gavin Q. Collins and J. Derek Tucker
|
|
28
|
+
Sandia National Laboratories
|
|
29
|
+
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from scipy.optimize import minimize_scalar
|
|
3
|
+
from scipy.linalg import toeplitz
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def covDiag(resid):
|
|
7
|
+
cov = np.diag(resid.var(axis=0))
|
|
8
|
+
|
|
9
|
+
return cov
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def covMA1(resid, varEqual=True):
|
|
13
|
+
nMV = resid.shape[1]
|
|
14
|
+
|
|
15
|
+
if nMV == 1:
|
|
16
|
+
return np.array([[1]])
|
|
17
|
+
|
|
18
|
+
if varEqual is True:
|
|
19
|
+
std = np.std(resid, ddof=1).reshape((1,))
|
|
20
|
+
else:
|
|
21
|
+
std = np.std(resid, ddof=1, axis=0)
|
|
22
|
+
C = std[:, None] * std[None, :]
|
|
23
|
+
covFull = np.cov(resid.T)
|
|
24
|
+
|
|
25
|
+
def getMA1(coef):
|
|
26
|
+
theta = np.hstack([1, coef, np.repeat(0, nMV-2)])
|
|
27
|
+
return toeplitz(theta)
|
|
28
|
+
|
|
29
|
+
def minLik(coef):
|
|
30
|
+
covMA1 = C * getMA1(coef)
|
|
31
|
+
D, Q = np.linalg.eigh(covMA1)
|
|
32
|
+
logDetCov = np.sum(np.log(D))
|
|
33
|
+
CovInvMA1 = Q @ np.diag(1/D) @ Q.T
|
|
34
|
+
return logDetCov + np.sum(CovInvMA1 * covFull)
|
|
35
|
+
|
|
36
|
+
coef = minimize_scalar(
|
|
37
|
+
minLik,
|
|
38
|
+
bounds = (-0.5, 0.5)
|
|
39
|
+
).x
|
|
40
|
+
|
|
41
|
+
return C * getMA1(coef), coef
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def covAR1(resid, varEqual=True):
|
|
45
|
+
nMV = resid.shape[1]
|
|
46
|
+
|
|
47
|
+
if nMV == 1:
|
|
48
|
+
return np.array([[1]])
|
|
49
|
+
|
|
50
|
+
if varEqual is True:
|
|
51
|
+
std = np.std(resid, ddof=1).reshape((1,))
|
|
52
|
+
else:
|
|
53
|
+
std = np.std(resid, ddof=1, axis=0)
|
|
54
|
+
C = std[:, None] * std[None, :]
|
|
55
|
+
covFull = np.cov(resid.T)
|
|
56
|
+
|
|
57
|
+
def getAR1(coef):
|
|
58
|
+
theta = coef ** np.arange(nMV)
|
|
59
|
+
return toeplitz(theta)
|
|
60
|
+
|
|
61
|
+
if nMV <= 3:
|
|
62
|
+
def minLik(coef):
|
|
63
|
+
covAR1 = C * getAR1(coef)
|
|
64
|
+
D, Q = np.linalg.eigh(covAR1)
|
|
65
|
+
logDetCov = np.sum(np.log(D))
|
|
66
|
+
CovInvAR1 = Q @ np.diag(1/D) @ Q.T
|
|
67
|
+
return logDetCov + np.sum(CovInvAR1 * covFull)
|
|
68
|
+
else: # compute closed form solution for AR(1)
|
|
69
|
+
def minLik(coef):
|
|
70
|
+
logDetCov = (nMV - 1) * np.log(1 - coef**2)
|
|
71
|
+
theta = np.hstack([1, -coef, np.repeat(0, nMV-2)]) / (1 - coef**2)
|
|
72
|
+
CorInvAR1 = toeplitz(theta)
|
|
73
|
+
for i in range(1, nMV-1):
|
|
74
|
+
CorInvAR1[i, i] += coef**2 / (1 - coef**2)
|
|
75
|
+
CovInvAR1 = CorInvAR1 / C
|
|
76
|
+
return logDetCov + np.sum(CovInvAR1 * covFull)
|
|
77
|
+
|
|
78
|
+
coef = minimize_scalar(
|
|
79
|
+
minLik,
|
|
80
|
+
bounds = (-1+1e-5, 1-1e-5)
|
|
81
|
+
).x
|
|
82
|
+
|
|
83
|
+
return C * getAR1(coef), coef
|
|
84
|
+
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import time
|
|
3
|
+
import mvBayes as mb
|
|
4
|
+
from scipy.stats import multivariate_normal
|
|
5
|
+
|
|
6
|
+
def mvCV(bayesModel, X, Y, nTrain=None, nTest=None, nRep=1, seed=None, coverageTarget=0.95, idxSamples="default", uqTruncMethod="gaussian", **kwargs):
|
|
7
|
+
"""
|
|
8
|
+
Cross-Validation (CV) of a Multivariate Bayesian Regression Model
|
|
9
|
+
|
|
10
|
+
Parameters:
|
|
11
|
+
bayesModel: A Bayesian regression model-fitting function, with first argument taking an nxp input matrix and second argument taking an n-vector of numeric responses.
|
|
12
|
+
X: A matrix of predictors of dimension nxp, where n is the total number of examples (including training and test sets) and p is the number of inputs (features).
|
|
13
|
+
Y: A response matrix of dimension nxq, where q is the number of multivariate/functional responses.
|
|
14
|
+
nTrain: Number of examples to use in the training set. If None, nTrain = n - nTest; unless nTest is also None, in which case nTrain = ceil(n/2).
|
|
15
|
+
nTest: Number of examples to use in the test set. If None, nTest = n - nTrain.
|
|
16
|
+
nRep: Number of repetitions of CV process.
|
|
17
|
+
seed: Randomization seed, for replication of the train/test split. If None, no seed is set.
|
|
18
|
+
coverageTarget: Level of coverage desired (default: 0.95).
|
|
19
|
+
idxSamples: Which samples to use in CV (default: "all").
|
|
20
|
+
uqTruncMethod: Method to use for UQ truncation ("gaussian" or "empirical").
|
|
21
|
+
**kwargs: Additional arguments to mvBayes, including arguments to bayesModel.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
A dictionary containing the out-of-sample RMSE for each replication, fitting and prediction times, and other metrics.
|
|
25
|
+
"""
|
|
26
|
+
# Setup
|
|
27
|
+
n, p = X.shape
|
|
28
|
+
alpha = 1 - coverageTarget
|
|
29
|
+
|
|
30
|
+
if nTest is None:
|
|
31
|
+
if nTrain is None:
|
|
32
|
+
nTest = n // 2 # half in test set
|
|
33
|
+
nTrain = n - nTest
|
|
34
|
+
elif nTrain >= n:
|
|
35
|
+
raise ValueError("Must have nTrain < nrow(X)")
|
|
36
|
+
else:
|
|
37
|
+
nTest = n - nTrain
|
|
38
|
+
else:
|
|
39
|
+
if nTest >= n:
|
|
40
|
+
raise ValueError("Must have nTest < nrow(X)")
|
|
41
|
+
elif nTrain is None:
|
|
42
|
+
nTrain = n - nTest
|
|
43
|
+
elif nTrain + nTest > n:
|
|
44
|
+
raise ValueError("Must have nTrain + nTest <= n")
|
|
45
|
+
|
|
46
|
+
# Get fold indices
|
|
47
|
+
np.random.seed(seed)
|
|
48
|
+
idxTest = [np.random.choice(n, size=nTest, replace=False) for _ in range(nRep)]
|
|
49
|
+
idxTrain = [np.random.choice(np.setdiff1d(np.arange(n), idx), size=nTrain, replace=False) for idx in idxTest]
|
|
50
|
+
np.random.seed(None) # Reset seed
|
|
51
|
+
|
|
52
|
+
# Run CV
|
|
53
|
+
rmse = np.zeros(nRep)
|
|
54
|
+
rSquared = np.zeros(nRep)
|
|
55
|
+
coverage = np.zeros(nRep)
|
|
56
|
+
intervalWidth = np.zeros(nRep)
|
|
57
|
+
intervalScore = np.zeros(nRep)
|
|
58
|
+
fitTime = np.zeros(nRep)
|
|
59
|
+
predictTime = np.zeros(nRep)
|
|
60
|
+
|
|
61
|
+
for r in range(nRep):
|
|
62
|
+
# Set up train/test split
|
|
63
|
+
Xtrain, Ytrain = X[idxTrain[r], :], Y[idxTrain[r], :]
|
|
64
|
+
Xtest, Ytest = X[idxTest[r], :], Y[idxTest[r], :]
|
|
65
|
+
|
|
66
|
+
# Fit models
|
|
67
|
+
startFit = time.time()
|
|
68
|
+
fit = mb.mvBayes(bayesModel, Xtrain, Ytrain, **kwargs)
|
|
69
|
+
fitTime[r] = time.time() - startFit
|
|
70
|
+
|
|
71
|
+
# Predict
|
|
72
|
+
startPred = time.time()
|
|
73
|
+
preds = fit.predict(Xtest, idxSamples=idxSamples)
|
|
74
|
+
predictTime[r] = time.time() - startPred
|
|
75
|
+
|
|
76
|
+
Yhat = np.median(preds, axis=0)
|
|
77
|
+
|
|
78
|
+
# Calculate RMSE and R-squared
|
|
79
|
+
rmse[r] = np.sqrt(np.mean((Ytest - Yhat) ** 2))
|
|
80
|
+
rSquared[r] = 1 - np.mean((Ytest - Yhat) ** 2) / np.mean((Ytest - np.mean(Ytrain, axis=0)) ** 2)
|
|
81
|
+
|
|
82
|
+
# Get truncation error for UQ
|
|
83
|
+
if uqTruncMethod == "gaussian":
|
|
84
|
+
truncErrorVar = np.cov(fit.basisInfo.truncError, rowvar=False)
|
|
85
|
+
truncError = multivariate_normal.rvs(
|
|
86
|
+
mean=np.zeros(truncErrorVar.shape[0]),
|
|
87
|
+
cov=truncErrorVar,
|
|
88
|
+
size=np.prod(preds.shape[:2])
|
|
89
|
+
).reshape(preds.shape)
|
|
90
|
+
elif uqTruncMethod == "empirical":
|
|
91
|
+
idxResample = np.random.choice(nTrain, size=np.prod(preds.shape[:2]), replace=True)
|
|
92
|
+
truncError = fit.basisInfo.truncError[idxResample, :].reshape(preds.shape)
|
|
93
|
+
preds += truncError
|
|
94
|
+
del truncError
|
|
95
|
+
|
|
96
|
+
# Get regression error for UQ
|
|
97
|
+
coefsResidError = np.zeros(preds.shape[:2] + (fit.basisInfo.nBasis, ))
|
|
98
|
+
for k in range(fit.basisInfo.nBasis):
|
|
99
|
+
residSD = np.repeat(
|
|
100
|
+
fit.bmList[k].samples.residSD,
|
|
101
|
+
preds.shape[1]
|
|
102
|
+
).reshape(preds.shape[:2])
|
|
103
|
+
coefsResidError[:, :, k] = np.random.normal(
|
|
104
|
+
0.0,
|
|
105
|
+
residSD,
|
|
106
|
+
preds.shape[:2]
|
|
107
|
+
)
|
|
108
|
+
residError = coefsResidError @ fit.basisInfo.basis
|
|
109
|
+
del coefsResidError
|
|
110
|
+
preds += residError
|
|
111
|
+
del residError
|
|
112
|
+
|
|
113
|
+
# Calculate distance from posterior mean
|
|
114
|
+
distBound = np.zeros(nTest)
|
|
115
|
+
for idx in range(nTest):
|
|
116
|
+
distSamples = np.sqrt(np.mean((preds[:, idx, :] - Yhat[idx, :]) ** 2, axis=1))
|
|
117
|
+
distBound[idx] = np.quantile(distSamples, coverageTarget)
|
|
118
|
+
distTest = np.sqrt(np.mean((Ytest - Yhat) ** 2, axis=1))
|
|
119
|
+
|
|
120
|
+
# Calculate UQ metrics
|
|
121
|
+
distRatio = distTest / distBound
|
|
122
|
+
coverage[r] = np.mean(distRatio <= 1)
|
|
123
|
+
intervalWidth[r] = np.exp(np.mean(np.log(distBound)))
|
|
124
|
+
intervalScore[r] = intervalWidth[r] * np.exp(np.mean(np.log(distRatio) * (distRatio > 1)) / alpha)
|
|
125
|
+
|
|
126
|
+
# Output results
|
|
127
|
+
out = {
|
|
128
|
+
"rmse": rmse,
|
|
129
|
+
"rSquared": rSquared,
|
|
130
|
+
"coverageTarget": coverageTarget,
|
|
131
|
+
"coverage": coverage,
|
|
132
|
+
"intervalWidth": intervalWidth,
|
|
133
|
+
"intervalScore": intervalScore,
|
|
134
|
+
"fitTime": fitTime,
|
|
135
|
+
"predictTime": predictTime,
|
|
136
|
+
"effectiveArgs": {
|
|
137
|
+
"nTrain": nTrain,
|
|
138
|
+
"nTest": nTest,
|
|
139
|
+
"nRep": nRep,
|
|
140
|
+
"seed": seed,
|
|
141
|
+
"coverageTarget": coverageTarget,
|
|
142
|
+
"idxSamples": idxSamples,
|
|
143
|
+
"uqTruncMethod": uqTruncMethod
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return out
|
|
148
|
+
|
|
149
|
+
|