pcntoolkit 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pcntoolkit/__init__.py +4 -0
- pcntoolkit/configs.py +9 -0
- pcntoolkit/dataio/__init__.py +1 -0
- pcntoolkit/dataio/fileio.py +608 -0
- pcntoolkit/model/KnuOp.py +48 -0
- pcntoolkit/model/NP.py +88 -0
- pcntoolkit/model/NPR.py +86 -0
- pcntoolkit/model/SHASH.py +509 -0
- pcntoolkit/model/__init__.py +6 -0
- pcntoolkit/model/architecture.py +219 -0
- pcntoolkit/model/bayesreg.py +585 -0
- pcntoolkit/model/core.21290 +0 -0
- pcntoolkit/model/gp.py +489 -0
- pcntoolkit/model/hbr.py +1584 -0
- pcntoolkit/model/rfa.py +245 -0
- pcntoolkit/normative.py +1647 -0
- pcntoolkit/normative_NP.py +336 -0
- pcntoolkit/normative_model/__init__.py +6 -0
- pcntoolkit/normative_model/norm_base.py +62 -0
- pcntoolkit/normative_model/norm_blr.py +303 -0
- pcntoolkit/normative_model/norm_gpr.py +112 -0
- pcntoolkit/normative_model/norm_hbr.py +752 -0
- pcntoolkit/normative_model/norm_np.py +333 -0
- pcntoolkit/normative_model/norm_rfa.py +109 -0
- pcntoolkit/normative_model/norm_utils.py +29 -0
- pcntoolkit/normative_parallel.py +1420 -0
- pcntoolkit/regression_model/blr/warp.py +1 -0
- pcntoolkit/trendsurf.py +315 -0
- pcntoolkit/util/__init__.py +1 -0
- pcntoolkit/util/bspline.py +149 -0
- pcntoolkit/util/hbr_utils.py +242 -0
- pcntoolkit/util/utils.py +1698 -0
- pcntoolkit-0.32.0.dist-info/LICENSE +674 -0
- pcntoolkit-0.32.0.dist-info/METADATA +134 -0
- pcntoolkit-0.32.0.dist-info/RECORD +37 -0
- pcntoolkit-0.32.0.dist-info/WHEEL +4 -0
- pcntoolkit-0.32.0.dist-info/entry_points.txt +5 -0
|
@@ -0,0 +1,585 @@
|
|
|
1
|
+
from __future__ import print_function
|
|
2
|
+
from __future__ import division
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from scipy import optimize, linalg
|
|
6
|
+
from scipy.linalg import LinAlgError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BLR:
|
|
10
|
+
"""Bayesian linear regression
|
|
11
|
+
|
|
12
|
+
Estimation and prediction of Bayesian linear regression models
|
|
13
|
+
|
|
14
|
+
Basic usage::
|
|
15
|
+
|
|
16
|
+
B = BLR()
|
|
17
|
+
hyp = B.estimate(hyp0, X, y)
|
|
18
|
+
ys,s2 = B.predict(hyp, X, y, Xs)
|
|
19
|
+
|
|
20
|
+
where the variables are
|
|
21
|
+
|
|
22
|
+
:param hyp: vector of hyperparmaters.
|
|
23
|
+
:param X: N x D data array
|
|
24
|
+
:param y: 1D Array of targets (length N)
|
|
25
|
+
:param Xs: Nte x D array of test cases
|
|
26
|
+
:param hyp0: starting estimates for hyperparameter optimisation
|
|
27
|
+
|
|
28
|
+
:returns: * ys - predictive mean
|
|
29
|
+
* s2 - predictive variance
|
|
30
|
+
|
|
31
|
+
The hyperparameters are::
|
|
32
|
+
|
|
33
|
+
hyp = ( log(beta), log(alpha) ) # hyp is a list or numpy array
|
|
34
|
+
|
|
35
|
+
The implementation and notation mostly follows Bishop (2006).
|
|
36
|
+
The hyperparameter beta is the noise precision and alpha is the precision
|
|
37
|
+
over lengthscale parameters. This can be either a scalar variable (a
|
|
38
|
+
common lengthscale for all input variables), or a vector of length D (a
|
|
39
|
+
different lengthscale for each input variable, derived using an automatic
|
|
40
|
+
relevance determination formulation). These are estimated using conjugate
|
|
41
|
+
gradient optimisation of the marginal likelihood.
|
|
42
|
+
|
|
43
|
+
Reference:
|
|
44
|
+
Bishop (2006) Pattern Recognition and Machine Learning, Springer
|
|
45
|
+
|
|
46
|
+
Written by A. Marquand
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, **kwargs):
|
|
50
|
+
# parse arguments
|
|
51
|
+
n_iter = kwargs.get('n_iter', 100)
|
|
52
|
+
tol = kwargs.get('tol', 1e-3)
|
|
53
|
+
verbose = kwargs.get('verbose', False)
|
|
54
|
+
var_groups = kwargs.get('var_groups', None)
|
|
55
|
+
var_covariates = kwargs.get('var_covariates', None)
|
|
56
|
+
warp = kwargs.get('warp', None)
|
|
57
|
+
warp_reparam = kwargs.get('warp_reparam', False)
|
|
58
|
+
|
|
59
|
+
if var_groups is not None and var_covariates is not None:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
"var_covariates and var_groups cannot both be used")
|
|
62
|
+
|
|
63
|
+
# basic parameters
|
|
64
|
+
self.hyp = np.nan
|
|
65
|
+
self.nlZ = np.nan
|
|
66
|
+
self.tol = tol # not used at present
|
|
67
|
+
self.n_iter = n_iter
|
|
68
|
+
self.verbose = verbose
|
|
69
|
+
self.var_groups = var_groups
|
|
70
|
+
if var_covariates is not None:
|
|
71
|
+
self.hetero_var = True
|
|
72
|
+
else:
|
|
73
|
+
self.hetero_var = False
|
|
74
|
+
if self.var_groups is not None:
|
|
75
|
+
self.var_ids = set(self.var_groups)
|
|
76
|
+
self.var_ids = sorted(list(self.var_ids))
|
|
77
|
+
|
|
78
|
+
# set up warped likelihood
|
|
79
|
+
if verbose:
|
|
80
|
+
print('warp:', warp, 'warp_reparam:', warp_reparam)
|
|
81
|
+
if warp is None:
|
|
82
|
+
self.warp = None
|
|
83
|
+
self.n_warp_param = 0
|
|
84
|
+
else:
|
|
85
|
+
self.warp = warp
|
|
86
|
+
self.n_warp_param = warp.get_n_params()
|
|
87
|
+
self.warp_reparam = warp_reparam
|
|
88
|
+
|
|
89
|
+
self.gamma = None
|
|
90
|
+
|
|
91
|
+
def _parse_hyps(self, hyp, X, Xv=None):
|
|
92
|
+
"""
|
|
93
|
+
Parse hyperparameters into noise precision, lengthscale precision and
|
|
94
|
+
lengthscale parameters.
|
|
95
|
+
|
|
96
|
+
:param hyp: hyperparameter vector
|
|
97
|
+
:param X: covariates
|
|
98
|
+
:param Xv: covariates for heteroskedastic noise
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
N = X.shape[0]
|
|
102
|
+
|
|
103
|
+
# noise precision
|
|
104
|
+
if Xv is not None:
|
|
105
|
+
if len(Xv.shape) == 1:
|
|
106
|
+
Dv = 1
|
|
107
|
+
Xv = Xv[:, np.newaxis]
|
|
108
|
+
else:
|
|
109
|
+
Dv = Xv.shape[1]
|
|
110
|
+
w_d = np.asarray(hyp[0:Dv])
|
|
111
|
+
beta = np.exp(Xv.dot(w_d))
|
|
112
|
+
n_lik_param = len(w_d)
|
|
113
|
+
elif self.var_groups is not None:
|
|
114
|
+
beta = np.exp(hyp[0:len(self.var_ids)])
|
|
115
|
+
n_lik_param = len(beta)
|
|
116
|
+
else:
|
|
117
|
+
beta = np.asarray([np.exp(hyp[0])])
|
|
118
|
+
n_lik_param = len(beta)
|
|
119
|
+
|
|
120
|
+
# parameters for warping the likelihood function
|
|
121
|
+
if self.warp is not None:
|
|
122
|
+
gamma = hyp[n_lik_param:(n_lik_param + self.n_warp_param)]
|
|
123
|
+
n_lik_param += self.n_warp_param
|
|
124
|
+
else:
|
|
125
|
+
gamma = None
|
|
126
|
+
|
|
127
|
+
# precision for the coefficients
|
|
128
|
+
if isinstance(beta, list) or type(beta) is np.ndarray:
|
|
129
|
+
alpha = np.exp(hyp[n_lik_param:])
|
|
130
|
+
else:
|
|
131
|
+
alpha = np.exp(hyp[1:])
|
|
132
|
+
|
|
133
|
+
# reparameterise the warp (WarpSinArcsinh only)
|
|
134
|
+
if self.warp is not None and self.warp_reparam:
|
|
135
|
+
delta = np.exp(gamma[1])
|
|
136
|
+
beta = beta/(delta**2)
|
|
137
|
+
|
|
138
|
+
# Create precision matrix from noise precision
|
|
139
|
+
if Xv is not None:
|
|
140
|
+
self.lambda_n_vec = beta
|
|
141
|
+
elif self.var_groups is not None:
|
|
142
|
+
beta_all = np.ones(N)
|
|
143
|
+
for v in range(len(self.var_ids)):
|
|
144
|
+
beta_all[self.var_groups == self.var_ids[v]] = beta[v]
|
|
145
|
+
self.lambda_n_vec = beta_all
|
|
146
|
+
else:
|
|
147
|
+
self.lambda_n_vec = np.ones(N)*beta
|
|
148
|
+
|
|
149
|
+
return beta, alpha, gamma
|
|
150
|
+
|
|
151
|
+
def post(self, hyp, X, y, Xv=None):
|
|
152
|
+
""" Generic function to compute posterior distribution.
|
|
153
|
+
|
|
154
|
+
This function will save the posterior mean and precision matrix as
|
|
155
|
+
self.m and self.A and will also update internal parameters (e.g.
|
|
156
|
+
N, D and the prior covariance (Sigma_a) and precision (Lambda_a).
|
|
157
|
+
|
|
158
|
+
:param hyp: hyperparameter vector
|
|
159
|
+
:param X: covariates
|
|
160
|
+
:param y: responses
|
|
161
|
+
:param Xv: covariates for heteroskedastic noise
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
N = X.shape[0]
|
|
165
|
+
if len(X.shape) == 1:
|
|
166
|
+
D = 1
|
|
167
|
+
else:
|
|
168
|
+
D = X.shape[1]
|
|
169
|
+
|
|
170
|
+
if (hyp == self.hyp).all() and hasattr(self, 'N'):
|
|
171
|
+
print("hyperparameters have not changed, exiting")
|
|
172
|
+
return
|
|
173
|
+
|
|
174
|
+
beta, alpha, gamma = self._parse_hyps(hyp, X, Xv)
|
|
175
|
+
|
|
176
|
+
if self.verbose:
|
|
177
|
+
print("estimating posterior ... | hyp=", hyp)
|
|
178
|
+
|
|
179
|
+
# prior variance
|
|
180
|
+
if len(alpha) == 1 or len(alpha) == D:
|
|
181
|
+
self.Sigma_a = np.diag(np.ones(D))/alpha
|
|
182
|
+
self.Lambda_a = np.diag(np.ones(D))*alpha
|
|
183
|
+
else:
|
|
184
|
+
raise ValueError("hyperparameter vector has invalid length")
|
|
185
|
+
|
|
186
|
+
# compute posterior precision and mean
|
|
187
|
+
# this is equivalent to the following operation but makes much more
|
|
188
|
+
# efficient use of memory by avoiding the need to store Lambda_n
|
|
189
|
+
#
|
|
190
|
+
# self.A = X.T.dot(self.Lambda_n).dot(X) + self.Lambda_a
|
|
191
|
+
# self.m = linalg.solve(self.A, X.T,
|
|
192
|
+
# check_finite=False).dot(self.Lambda_n).dot(y)
|
|
193
|
+
|
|
194
|
+
XtLambda_n = X.T*self.lambda_n_vec
|
|
195
|
+
self.A = XtLambda_n.dot(X) + self.Lambda_a
|
|
196
|
+
invAXt = linalg.solve(self.A, X.T, check_finite=False)
|
|
197
|
+
self.m = (invAXt*self.lambda_n_vec).dot(y)
|
|
198
|
+
|
|
199
|
+
# save stuff
|
|
200
|
+
self.N = N
|
|
201
|
+
self.D = D
|
|
202
|
+
self.hyp = hyp
|
|
203
|
+
|
|
204
|
+
def loglik(self, hyp, X, y, Xv=None):
|
|
205
|
+
""" Function to compute compute log (marginal) likelihood """
|
|
206
|
+
|
|
207
|
+
# hyperparameters (alpha not needed)
|
|
208
|
+
beta, alpha, gamma = self._parse_hyps(hyp, X, Xv)
|
|
209
|
+
|
|
210
|
+
# warp the likelihood?
|
|
211
|
+
if self.warp is not None:
|
|
212
|
+
if self.verbose:
|
|
213
|
+
print('warping input...')
|
|
214
|
+
y_unwarped = y
|
|
215
|
+
y = self.warp.f(y, gamma)
|
|
216
|
+
|
|
217
|
+
# load posterior and prior covariance
|
|
218
|
+
if (hyp != self.hyp).any() or not (hasattr(self, 'A')):
|
|
219
|
+
try:
|
|
220
|
+
self.post(hyp, X, y, Xv)
|
|
221
|
+
except ValueError:
|
|
222
|
+
print("Warning: Estimation of posterior distribution failed")
|
|
223
|
+
nlZ = 1/np.finfo(float).eps
|
|
224
|
+
return nlZ
|
|
225
|
+
|
|
226
|
+
try:
|
|
227
|
+
# compute the log determinants in a numerically stable way
|
|
228
|
+
logdetA = 2*sum(np.log(np.diag(np.linalg.cholesky(self.A))))
|
|
229
|
+
except (ValueError, LinAlgError):
|
|
230
|
+
print("Warning: Estimation of posterior distribution failed")
|
|
231
|
+
nlZ = 1/np.finfo(float).eps
|
|
232
|
+
return nlZ
|
|
233
|
+
|
|
234
|
+
logdetSigma_a = sum(np.log(np.diag(self.Sigma_a))) # diagonal
|
|
235
|
+
logdetSigma_n = sum(np.log(1/self.lambda_n_vec))
|
|
236
|
+
|
|
237
|
+
# compute negative marginal log likelihood
|
|
238
|
+
X_y_t_sLambda_n = (y-X.dot(self.m))*np.sqrt(self.lambda_n_vec)
|
|
239
|
+
nlZ = -0.5 * (-self.N*np.log(2*np.pi) -
|
|
240
|
+
logdetSigma_n -
|
|
241
|
+
logdetSigma_a -
|
|
242
|
+
X_y_t_sLambda_n.T.dot(X_y_t_sLambda_n) -
|
|
243
|
+
self.m.T.dot(self.Lambda_a).dot(self.m) -
|
|
244
|
+
logdetA
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
if self.warp is not None:
|
|
248
|
+
# add in the Jacobian
|
|
249
|
+
nlZ = nlZ - sum(np.log(self.warp.df(y_unwarped, gamma)))
|
|
250
|
+
|
|
251
|
+
# make sure the output is finite to stop the minimizer getting upset
|
|
252
|
+
if not np.isfinite(nlZ):
|
|
253
|
+
nlZ = 1/np.finfo(float).eps
|
|
254
|
+
|
|
255
|
+
if self.verbose:
|
|
256
|
+
print("nlZ= ", nlZ, " | hyp=", hyp)
|
|
257
|
+
|
|
258
|
+
self.nlZ = nlZ
|
|
259
|
+
return nlZ
|
|
260
|
+
|
|
261
|
+
def penalized_loglik(self, hyp, X, y, Xv=None, l=0.1, norm='L1'):
|
|
262
|
+
""" Function to compute the penalized log (marginal) likelihood
|
|
263
|
+
|
|
264
|
+
:param hyp: hyperparameter vector
|
|
265
|
+
:param X: covariates
|
|
266
|
+
:param y: responses
|
|
267
|
+
:param Xv: covariates for heteroskedastic noise
|
|
268
|
+
:param l: regularisation penalty
|
|
269
|
+
:param norm: type of regulariser (L1 or L2)
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
if norm.lower() == 'l1':
|
|
273
|
+
L = self.loglik(hyp, X, y, Xv) + l * sum(abs(hyp))
|
|
274
|
+
elif norm.lower() == 'l2':
|
|
275
|
+
L = self.loglik(hyp, X, y, Xv) + l * sum(np.sqrt(hyp**2))
|
|
276
|
+
else:
|
|
277
|
+
print("Requested penalty not recognized, choose between 'L1' or 'L2'.")
|
|
278
|
+
return L
|
|
279
|
+
|
|
280
|
+
def dloglik(self, hyp, X, y, Xv=None):
|
|
281
|
+
""" Function to compute derivatives """
|
|
282
|
+
|
|
283
|
+
# hyperparameters
|
|
284
|
+
beta, alpha, gamma = self._parse_hyps(hyp, X, Xv)
|
|
285
|
+
|
|
286
|
+
if self.warp is not None:
|
|
287
|
+
raise ValueError('optimization with derivatives is not yet ' +
|
|
288
|
+
'supported for warped liklihood')
|
|
289
|
+
|
|
290
|
+
# load posterior and prior covariance
|
|
291
|
+
if (hyp != self.hyp).any() or not (hasattr(self, 'A')):
|
|
292
|
+
try:
|
|
293
|
+
self.post(hyp, X, y, Xv)
|
|
294
|
+
except ValueError:
|
|
295
|
+
print("Warning: Estimation of posterior distribution failed")
|
|
296
|
+
dnlZ = np.sign(self.dnlZ) / np.finfo(float).eps
|
|
297
|
+
return dnlZ
|
|
298
|
+
|
|
299
|
+
# precompute re-used quantities to maximise speed
|
|
300
|
+
# todo: revise implementation to use Cholesky throughout
|
|
301
|
+
# that would remove the need to explicitly compute the inverse
|
|
302
|
+
S = np.linalg.inv(self.A) # posterior covariance
|
|
303
|
+
SX = S.dot(X.T)
|
|
304
|
+
XLn = X.T*self.lambda_n_vec # = X.T.dot(self.Lambda_n)
|
|
305
|
+
XLny = XLn.dot(y)
|
|
306
|
+
SXLny = S.dot(XLny)
|
|
307
|
+
XLnXm = XLn.dot(X).dot(self.m)
|
|
308
|
+
|
|
309
|
+
# initialise derivatives
|
|
310
|
+
dnlZ = np.zeros(hyp.shape)
|
|
311
|
+
dnl2 = np.zeros(hyp.shape)
|
|
312
|
+
|
|
313
|
+
# noise precision parameter(s)
|
|
314
|
+
for i in range(0, len(beta)):
|
|
315
|
+
# first compute derivative of Lambda_n with respect to beta
|
|
316
|
+
dL_n_vec = np.zeros(self.N)
|
|
317
|
+
if self.var_groups is None:
|
|
318
|
+
dL_n_vec = np.ones(self.N)
|
|
319
|
+
else:
|
|
320
|
+
dL_n_vec[np.where(self.var_groups == self.var_ids[i])[0]] = 1
|
|
321
|
+
dLambda_n = np.diag(dL_n_vec)
|
|
322
|
+
|
|
323
|
+
# compute quantities used multiple times
|
|
324
|
+
XdLnX = X.T.dot(dLambda_n).dot(X)
|
|
325
|
+
dA = XdLnX
|
|
326
|
+
|
|
327
|
+
# derivative of posterior parameters with respect to beta
|
|
328
|
+
b = -S.dot(dA).dot(SXLny) + SX.dot(dLambda_n).dot(y)
|
|
329
|
+
|
|
330
|
+
# compute np.trace(self.Sigma_n.dot(dLambda_n)) efficiently
|
|
331
|
+
trSigma_ndLambda_n = sum((1/self.lambda_n_vec)*np.diag(dLambda_n))
|
|
332
|
+
|
|
333
|
+
# compute y.T.dot(Lambda_n) efficiently
|
|
334
|
+
ytLn = (y*self.lambda_n_vec).T
|
|
335
|
+
|
|
336
|
+
# compute derivatives
|
|
337
|
+
dnlZ[i] = - (0.5 * trSigma_ndLambda_n -
|
|
338
|
+
0.5 * y.dot(dLambda_n).dot(y) +
|
|
339
|
+
y.dot(dLambda_n).dot(X).dot(self.m) +
|
|
340
|
+
ytLn.dot(X).dot(b) -
|
|
341
|
+
0.5 * self.m.T.dot(XdLnX).dot(self.m) -
|
|
342
|
+
b.T.dot(XLnXm) -
|
|
343
|
+
b.T.dot(self.Lambda_a).dot(self.m) -
|
|
344
|
+
0.5 * np.trace(S.dot(dA))
|
|
345
|
+
) * beta[i]
|
|
346
|
+
|
|
347
|
+
# scaling parameter(s)
|
|
348
|
+
for i in range(0, len(alpha)):
|
|
349
|
+
# first compute derivatives with respect to alpha
|
|
350
|
+
if len(alpha) == self.D: # are we using ARD?
|
|
351
|
+
dLambda_a = np.zeros((self.D, self.D))
|
|
352
|
+
dLambda_a[i, i] = 1
|
|
353
|
+
else:
|
|
354
|
+
dLambda_a = np.eye(self.D)
|
|
355
|
+
|
|
356
|
+
F = dLambda_a
|
|
357
|
+
c = -S.dot(F).dot(SXLny)
|
|
358
|
+
|
|
359
|
+
# compute np.trace(self.Sigma_a.dot(dLambda_a)) efficiently
|
|
360
|
+
trSigma_adLambda_a = sum(np.diag(self.Sigma_a)*np.diag(dLambda_a))
|
|
361
|
+
|
|
362
|
+
dnlZ[i+len(beta)] = -(0.5 * trSigma_adLambda_a +
|
|
363
|
+
XLny.T.dot(c) -
|
|
364
|
+
c.T.dot(XLnXm) -
|
|
365
|
+
c.T.dot(self.Lambda_a).dot(self.m) -
|
|
366
|
+
0.5 * self.m.T.dot(F).dot(self.m) -
|
|
367
|
+
0.5*np.trace(linalg.solve(self.A, F))
|
|
368
|
+
) * alpha[i]
|
|
369
|
+
|
|
370
|
+
# make sure the gradient is finite to stop the minimizer getting upset
|
|
371
|
+
if not all(np.isfinite(dnlZ)):
|
|
372
|
+
bad = np.where(np.logical_not(np.isfinite(dnlZ)))
|
|
373
|
+
for b in bad:
|
|
374
|
+
dnlZ[b] = np.sign(self.dnlZ[b]) / np.finfo(float).eps
|
|
375
|
+
|
|
376
|
+
if self.verbose:
|
|
377
|
+
print("dnlZ= ", dnlZ, " | hyp=", hyp)
|
|
378
|
+
|
|
379
|
+
self.dnlZ = dnlZ
|
|
380
|
+
return dnlZ
|
|
381
|
+
|
|
382
|
+
# model estimation (optimization)
|
|
383
|
+
def estimate(self, hyp0, X, y, **kwargs):
|
|
384
|
+
""" Function to estimate the model
|
|
385
|
+
|
|
386
|
+
:param hyp: hyperparameter vector
|
|
387
|
+
:param X: covariates
|
|
388
|
+
:param y: responses
|
|
389
|
+
:param optimizer: optimisation algorithm ('cg','powell','nelder-mead','l0bfgs-b')
|
|
390
|
+
"""
|
|
391
|
+
|
|
392
|
+
optimizer = kwargs.get('optimizer', 'cg')
|
|
393
|
+
|
|
394
|
+
# covariates for heteroskedastic noise
|
|
395
|
+
Xv = kwargs.get('var_covariates', None)
|
|
396
|
+
|
|
397
|
+
# options for l-bfgs-b
|
|
398
|
+
l = float(kwargs.get('l', 0.1))
|
|
399
|
+
epsilon = float(kwargs.get('epsilon', 0.1))
|
|
400
|
+
norm = kwargs.get('norm', 'l2')
|
|
401
|
+
|
|
402
|
+
if optimizer.lower() == 'cg': # conjugate gradients
|
|
403
|
+
out = optimize.fmin_cg(self.loglik, hyp0, self.dloglik, (X, y, Xv),
|
|
404
|
+
disp=True, gtol=self.tol,
|
|
405
|
+
maxiter=self.n_iter, full_output=1)
|
|
406
|
+
elif optimizer.lower() == 'powell': # Powell's method
|
|
407
|
+
out = optimize.fmin_powell(self.loglik, hyp0, (X, y, Xv),
|
|
408
|
+
full_output=1)
|
|
409
|
+
elif optimizer.lower() == 'nelder-mead':
|
|
410
|
+
out = optimize.fmin(self.loglik, hyp0, (X, y, Xv),
|
|
411
|
+
full_output=1)
|
|
412
|
+
elif optimizer.lower() == 'l-bfgs-b':
|
|
413
|
+
all_hyp_i = [hyp0]
|
|
414
|
+
|
|
415
|
+
def store(X):
|
|
416
|
+
hyp = X
|
|
417
|
+
all_hyp_i.append(hyp)
|
|
418
|
+
try:
|
|
419
|
+
out = optimize.fmin_l_bfgs_b(self.penalized_loglik, x0=hyp0,
|
|
420
|
+
args=(X, y, Xv, l,
|
|
421
|
+
norm), approx_grad=True,
|
|
422
|
+
epsilon=epsilon, callback=store)
|
|
423
|
+
# If the matrix becomes singular restart at last found hyp
|
|
424
|
+
except np.linalg.LinAlgError:
|
|
425
|
+
print(
|
|
426
|
+
f'Restarting estimation at hyp = {all_hyp_i[-1]}, due to *** numpy.linalg.LinAlgError: Matrix is singular.')
|
|
427
|
+
out = optimize.fmin_l_bfgs_b(self.penalized_loglik, x0=all_hyp_i[-1],
|
|
428
|
+
args=(X, y, Xv, l,
|
|
429
|
+
norm), approx_grad=True,
|
|
430
|
+
epsilon=epsilon)
|
|
431
|
+
else:
|
|
432
|
+
raise ValueError("unknown optimizer")
|
|
433
|
+
|
|
434
|
+
self.hyp = out[0]
|
|
435
|
+
self.nlZ = out[1]
|
|
436
|
+
self.optimizer = optimizer
|
|
437
|
+
|
|
438
|
+
return self.hyp
|
|
439
|
+
|
|
440
|
+
def predict(self, hyp, X, y, Xs,
|
|
441
|
+
var_groups_test=None,
|
|
442
|
+
var_covariates_test=None, **kwargs):
|
|
443
|
+
""" Function to make predictions from the model
|
|
444
|
+
|
|
445
|
+
:param hyp: hyperparameter vector
|
|
446
|
+
:param X: covariates for training data
|
|
447
|
+
:param y: responses for training data
|
|
448
|
+
:param Xs: covariates for test data
|
|
449
|
+
:param var_covariates_test: test covariates for heteroskedastic noise
|
|
450
|
+
|
|
451
|
+
This always returns Gaussian predictions, i.e.
|
|
452
|
+
|
|
453
|
+
:returns: * ys - predictive mean
|
|
454
|
+
* s2 - predictive variance
|
|
455
|
+
"""
|
|
456
|
+
|
|
457
|
+
Xvs = var_covariates_test
|
|
458
|
+
if Xvs is not None and len(Xvs.shape) == 1:
|
|
459
|
+
Xvs = Xvs[:, np.newaxis]
|
|
460
|
+
|
|
461
|
+
if X is None or y is None:
|
|
462
|
+
# set dummy hyperparameters
|
|
463
|
+
beta, alpha, gamma = self._parse_hyps(
|
|
464
|
+
hyp, np.zeros((self.N, self.D)), Xvs)
|
|
465
|
+
else:
|
|
466
|
+
|
|
467
|
+
# set hyperparameters
|
|
468
|
+
beta, alpha, gamma = self._parse_hyps(hyp, X, Xvs)
|
|
469
|
+
|
|
470
|
+
# do we need to re-estimate the posterior?
|
|
471
|
+
if (hyp != self.hyp).any() or not (hasattr(self, 'A')):
|
|
472
|
+
raise ValueError('posterior not properly estimated')
|
|
473
|
+
|
|
474
|
+
N_test = Xs.shape[0]
|
|
475
|
+
|
|
476
|
+
ys = Xs.dot(self.m)
|
|
477
|
+
|
|
478
|
+
if self.var_groups is not None:
|
|
479
|
+
if len(var_groups_test) != N_test:
|
|
480
|
+
raise ValueError('Invalid variance groups for test')
|
|
481
|
+
# separate variance groups
|
|
482
|
+
s2n = np.ones(N_test)
|
|
483
|
+
for v in range(len(self.var_ids)):
|
|
484
|
+
s2n[var_groups_test == self.var_ids[v]] = 1/beta[v]
|
|
485
|
+
else:
|
|
486
|
+
s2n = 1/beta
|
|
487
|
+
|
|
488
|
+
# compute xs.dot(S).dot(xs.T) avoiding computing off-diagonal entries
|
|
489
|
+
s2 = s2n + np.sum(Xs*linalg.solve(self.A, Xs.T).T, axis=1)
|
|
490
|
+
|
|
491
|
+
return ys, s2
|
|
492
|
+
|
|
493
|
+
def predict_and_adjust(self, hyp, X, y, Xs=None,
|
|
494
|
+
ys=None,
|
|
495
|
+
var_groups_test=None,
|
|
496
|
+
var_groups_adapt=None, **kwargs):
|
|
497
|
+
""" Function to transfer the model to a new site. This is done by
|
|
498
|
+
first making predictions on the adaptation data given by X,
|
|
499
|
+
adjusting by the residuals with respect to y.
|
|
500
|
+
|
|
501
|
+
:param hyp: hyperparameter vector
|
|
502
|
+
:param X: covariates for adaptation (i.e. calibration) data
|
|
503
|
+
:param y: responses for adaptation data
|
|
504
|
+
:param Xs: covariate data (for which predictions should be adjusted)
|
|
505
|
+
:param ys: true response variables (to be adjusted)
|
|
506
|
+
:param var_groups_test: variance groups (e.g. sites) for test data
|
|
507
|
+
:param var_groups_adapt: variance groups for adaptation data
|
|
508
|
+
|
|
509
|
+
There are two possible ways of using this function, depending on
|
|
510
|
+
whether ys or Xs is specified
|
|
511
|
+
|
|
512
|
+
If ys is specified, this is applied directly to the data, which is
|
|
513
|
+
assumed to be in the input space (i.e. not warped). In this case
|
|
514
|
+
the adjusted true data points are returned in the same space
|
|
515
|
+
|
|
516
|
+
Alternatively, Xs is specified, then the predictions are made and
|
|
517
|
+
adjusted. In this case the predictive variance are returned in the
|
|
518
|
+
warped (i.e. Gaussian) space.
|
|
519
|
+
|
|
520
|
+
This function needs to know which sites are associated with which
|
|
521
|
+
data points, which provided by var_groups_xxx, which is a list or
|
|
522
|
+
array of scalar ids .
|
|
523
|
+
"""
|
|
524
|
+
|
|
525
|
+
if ys is None:
|
|
526
|
+
if Xs is None:
|
|
527
|
+
raise ValueError('Either ys or Xs must be specified')
|
|
528
|
+
else:
|
|
529
|
+
N = Xs.shape[0]
|
|
530
|
+
else:
|
|
531
|
+
if len(ys.shape) < 1:
|
|
532
|
+
raise ValueError('ys is specified but has insufficent length')
|
|
533
|
+
N = ys.shape[0]
|
|
534
|
+
|
|
535
|
+
if var_groups_test is None:
|
|
536
|
+
var_groups_test = np.ones(N)
|
|
537
|
+
var_groups_adapt = np.ones(X.shape[0])
|
|
538
|
+
|
|
539
|
+
ys_out = np.zeros(N)
|
|
540
|
+
s2_out = np.zeros(N)
|
|
541
|
+
for g in np.unique(var_groups_test):
|
|
542
|
+
idx_s = var_groups_test == g
|
|
543
|
+
idx_a = var_groups_adapt == g
|
|
544
|
+
|
|
545
|
+
if sum(idx_a) < 2:
|
|
546
|
+
raise ValueError(
|
|
547
|
+
'Insufficient adaptation data to estimate variance')
|
|
548
|
+
|
|
549
|
+
# Get predictions from old model on new data X
|
|
550
|
+
ys_ref, s2_ref = self.predict(hyp, None, None, X[idx_a, :])
|
|
551
|
+
|
|
552
|
+
# Subtract the predictions from true data to get the residuals
|
|
553
|
+
if self.warp is None:
|
|
554
|
+
residuals = ys_ref-y[idx_a]
|
|
555
|
+
else:
|
|
556
|
+
# Calculate the residuals in warped space
|
|
557
|
+
y_ref_ws = self.warp.f(
|
|
558
|
+
y[idx_a], hyp[1:self.warp.get_n_params()+1])
|
|
559
|
+
residuals = ys_ref - y_ref_ws
|
|
560
|
+
|
|
561
|
+
residuals_mu = np.mean(residuals)
|
|
562
|
+
residuals_sd = np.std(residuals)
|
|
563
|
+
|
|
564
|
+
# Adjust the mean with the mean of the residuals
|
|
565
|
+
if ys is None:
|
|
566
|
+
# make and adjust predictions
|
|
567
|
+
ys_out[idx_s], s2_out[idx_s] = self.predict(
|
|
568
|
+
hyp, None, None, Xs[idx_s, :])
|
|
569
|
+
ys_out[idx_s] = ys_out[idx_s] - residuals_mu
|
|
570
|
+
|
|
571
|
+
# Set the deviation to the devations of the residuals
|
|
572
|
+
s2_out[idx_s] = np.ones(len(s2_out[idx_s]))*residuals_sd**2
|
|
573
|
+
else:
|
|
574
|
+
# adjust the data
|
|
575
|
+
if self.warp is not None:
|
|
576
|
+
y_ws = self.warp.f(
|
|
577
|
+
ys[idx_s], hyp[1:self.warp.get_n_params()+1])
|
|
578
|
+
ys_out[idx_s] = y_ws + residuals_mu
|
|
579
|
+
ys_out[idx_s] = self.warp.invf(
|
|
580
|
+
ys_out[idx_s], hyp[1:self.warp.get_n_params()+1])
|
|
581
|
+
else:
|
|
582
|
+
ys = ys - residuals_mu
|
|
583
|
+
s2_out = None
|
|
584
|
+
|
|
585
|
+
return ys_out, s2_out
|
|
Binary file
|