pcntoolkit 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pcntoolkit/model/gp.py ADDED
@@ -0,0 +1,489 @@
1
+ from __future__ import print_function
2
+ from __future__ import division
3
+
4
+ import os
5
+ import sys
6
+ import numpy as np
7
+ from scipy import optimize
8
+ from numpy.linalg import solve, LinAlgError
9
+ from numpy.linalg import cholesky as chol
10
+ from six import with_metaclass
11
+ from abc import ABCMeta, abstractmethod
12
+
13
+
14
+ try: # Run as a package if installed
15
+ from pcntoolkit.util.utils import squared_dist
16
+ except ImportError:
17
+ pass
18
+
19
+ path = os.path.abspath(os.path.dirname(__file__))
20
+ path = os.path.dirname(path) # parent directory
21
+ if path not in sys.path:
22
+ sys.path.append(path)
23
+ del path
24
+
25
+ from util.utils import squared_dist
26
+
27
+ # --------------------
28
+ # Covariance functions
29
+ # --------------------
30
+
31
+
32
+ class CovBase(with_metaclass(ABCMeta)):
33
+ """ Base class for covariance functions.
34
+
35
+ All covariance functions must define the following methods::
36
+
37
+ CovFunction.get_n_params()
38
+ CovFunction.cov()
39
+ CovFunction.xcov()
40
+ CovFunction.dcov()
41
+ """
42
+
43
+ def __init__(self, x=None):
44
+ self.n_params = np.nan
45
+
46
+ def get_n_params(self):
47
+ """ Report the number of parameters required """
48
+
49
+ assert not np.isnan(self.n_params), \
50
+ "Covariance function not initialised"
51
+
52
+ return self.n_params
53
+
54
+ @abstractmethod
55
+ def cov(self, theta, x, z=None):
56
+ """ Return the full covariance (or cross-covariance if z is given) """
57
+
58
+ @abstractmethod
59
+ def dcov(self, theta, x, i):
60
+ """ Return the derivative of the covariance function with respect to
61
+ the i-th hyperparameter """
62
+
63
+
64
+ class CovLin(CovBase):
65
+ """ Linear covariance function (no hyperparameters)
66
+ """
67
+
68
+ def __init__(self, x=None):
69
+ self.n_params = 0
70
+ self.first_call = False
71
+
72
+ def cov(self, theta, x, z=None):
73
+ if not self.first_call and not theta and theta is not None:
74
+ self.first_call = True
75
+ if len(theta) > 0 and theta[0] is not None:
76
+ print("CovLin: ignoring unnecessary hyperparameter ...")
77
+
78
+ if z is None:
79
+ z = x
80
+
81
+ K = x.dot(z.T)
82
+ return K
83
+
84
+ def dcov(self, theta, x, i):
85
+ raise ValueError("Invalid covariance function parameter")
86
+
87
+
88
+ class CovSqExp(CovBase):
89
+ """ Ordinary squared exponential covariance function.
90
+ The hyperparameters are::
91
+
92
+ theta = ( log(ell), log(sf) )
93
+
94
+ where ell is a lengthscale parameter and sf2 is the signal variance
95
+ """
96
+
97
+ def __init__(self, x=None):
98
+ self.n_params = 2
99
+
100
+ def cov(self, theta, x, z=None):
101
+ self.ell = np.exp(theta[0])
102
+ self.sf2 = np.exp(2*theta[1])
103
+
104
+ if z is None:
105
+ z = x
106
+
107
+ R = squared_dist(x/self.ell, z/self.ell)
108
+ K = self.sf2 * np.exp(-R/2)
109
+ return K
110
+
111
+ def dcov(self, theta, x, i):
112
+ self.ell = np.exp(theta[0])
113
+ self.sf2 = np.exp(2*theta[1])
114
+
115
+ R = squared_dist(x/self.ell, x/self.ell)
116
+
117
+ if i == 0: # return derivative of lengthscale parameter
118
+ dK = self.sf2 * np.exp(-R/2) * R
119
+ return dK
120
+ elif i == 1: # return derivative of signal variance parameter
121
+ dK = 2*self.sf2 * np.exp(-R/2)
122
+ return dK
123
+ else:
124
+ raise ValueError("Invalid covariance function parameter")
125
+
126
+
127
+ class CovSqExpARD(CovBase):
128
+ """ Squared exponential covariance function with ARD
129
+ The hyperparameters are::
130
+
131
+ theta = (log(ell_1, ..., log_ell_D), log(sf))
132
+
133
+ where ell_i are lengthscale parameters and sf2 is the signal variance
134
+ """
135
+
136
+ def __init__(self, x=None):
137
+ if x is None:
138
+ raise ValueError("N x D data matrix must be supplied as input")
139
+ if len(x.shape) == 1:
140
+ self.D = 1
141
+ else:
142
+ self.D = x.shape[1]
143
+ self.n_params = self.D + 1
144
+
145
+ def cov(self, theta, x, z=None):
146
+ self.ell = np.exp(theta[0:self.D])
147
+ self.sf2 = np.exp(2*theta[self.D])
148
+
149
+ if z is None:
150
+ z = x
151
+
152
+ R = squared_dist(x.dot(np.diag(1./self.ell)),
153
+ z.dot(np.diag(1./self.ell)))
154
+ K = self.sf2*np.exp(-R/2)
155
+ return K
156
+
157
+ def dcov(self, theta, x, i):
158
+ K = self.cov(theta, x)
159
+ if i < self.D: # return derivative of lengthscale parameter
160
+ dK = K * squared_dist(x[:, i]/self.ell[i], x[:, i]/self.ell[i])
161
+ return dK
162
+ elif i == self.D: # return derivative of signal variance parameter
163
+ dK = 2*K
164
+ return dK
165
+ else:
166
+ raise ValueError("Invalid covariance function parameter")
167
+
168
+
169
+ class CovSum(CovBase):
170
+ """ Sum of covariance functions. These are passed in as a cell array and
171
+ intialised automatically. For example::
172
+
173
+ C = CovSum(x,(CovLin, CovSqExpARD))
174
+ C = CovSum.cov(x, )
175
+
176
+ The hyperparameters are::
177
+
178
+ theta = ( log(ell_1, ..., log_ell_D), log(sf2) )
179
+
180
+ where ell_i are lengthscale parameters and sf2 is the signal variance
181
+ """
182
+
183
+ def __init__(self, x=None, covfuncnames=None):
184
+ if x is None:
185
+ raise ValueError("N x D data matrix must be supplied as input")
186
+ if covfuncnames is None:
187
+ raise ValueError("A list of covariance functions is required")
188
+ self.covfuncs = []
189
+ self.n_params = 0
190
+ for cname in covfuncnames:
191
+ covfunc = eval(cname + '(x)')
192
+ self.n_params += covfunc.get_n_params()
193
+ self.covfuncs.append(covfunc)
194
+
195
+ if len(x.shape) == 1:
196
+ self.N = len(x)
197
+ self.D = 1
198
+ else:
199
+ self.N, self.D = x.shape
200
+
201
+ def cov(self, theta, x, z=None):
202
+ theta_offset = 0
203
+ for ci, covfunc in enumerate(self.covfuncs):
204
+ try:
205
+ n_params_c = covfunc.get_n_params()
206
+ theta_c = [theta[c] for c in
207
+ range(theta_offset, theta_offset + n_params_c)]
208
+ theta_offset += n_params_c
209
+ except Exception as e:
210
+ print(e)
211
+
212
+ if ci == 0:
213
+ K = covfunc.cov(theta_c, x, z)
214
+ else:
215
+ K += covfunc.cov(theta_c, x, z)
216
+ return K
217
+
218
+ def dcov(self, theta, x, i):
219
+ theta_offset = 0
220
+ for covfunc in self.covfuncs:
221
+ n_params_c = covfunc.get_n_params()
222
+ theta_c = [theta[c] for c in
223
+ range(theta_offset, theta_offset + n_params_c)]
224
+ theta_offset += n_params_c
225
+
226
+ if theta_c: # does the variable have any hyperparameters?
227
+ if 'dK' not in locals():
228
+ dK = covfunc.dcov(theta_c, x, i)
229
+ else:
230
+ dK += covfunc.dcov(theta_c, x, i)
231
+ return dK
232
+
233
+ # -----------------------
234
+ # Gaussian process models
235
+ # -----------------------
236
+
237
+
238
+ class GPR:
239
+ """Gaussian process regression
240
+
241
+ Estimation and prediction of Gaussian process regression models
242
+
243
+ Basic usage::
244
+
245
+ G = GPR()
246
+ hyp = B.estimate(hyp0, cov, X, y)
247
+ ys, ys2 = B.predict(hyp, cov, X, y, Xs)
248
+
249
+ where the variables are
250
+
251
+ :param hyp: vector of hyperparmaters
252
+ :param cov: covariance function
253
+ :param X: N x D data array
254
+ :param y: 1D Array of targets (length N)
255
+ :param Xs: Nte x D array of test cases
256
+ :param hyp0: starting estimates for hyperparameter optimisation
257
+
258
+ :returns: * ys - predictive mean
259
+ * ys2 - predictive variance
260
+
261
+ The hyperparameters are::
262
+
263
+ hyp = ( log(sn), (cov function params) ) # hyp is a list or array
264
+
265
+ The implementation and notation follows Rasmussen and Williams (2006).
266
+ As in the gpml toolbox, these parameters are estimated using conjugate
267
+ gradient optimisation of the marginal likelihood. Note that there is no
268
+ explicit mean function, thus the gpr routines are limited to modelling
269
+ zero-mean processes.
270
+
271
+ Reference:
272
+ C. Rasmussen and C. Williams (2006) Gaussian Processes for Machine Learning
273
+
274
+ Written by A. Marquand
275
+ """
276
+
277
+ def __init__(self, hyp=None, covfunc=None, X=None, y=None, n_iter=100,
278
+ tol=1e-3, verbose=False, warp=None):
279
+
280
+ self.hyp = np.nan
281
+ self.nlZ = np.nan
282
+ self.tol = tol # not used at present
283
+ self.n_iter = n_iter
284
+ self.verbose = verbose
285
+
286
+ # set up warped likelihood
287
+ if warp is None:
288
+ self.warp = None
289
+ self.n_warp_param = 0
290
+ else:
291
+ self.warp = warp
292
+ self.n_warp_param = warp.get_n_params()
293
+
294
+ self.gamma = None
295
+
296
+ def _updatepost(self, hyp, covfunc):
297
+
298
+ hypeq = np.asarray(hyp == self.hyp)
299
+ if hypeq.all() and hasattr(self, 'alpha') and \
300
+ (hasattr(self, 'covfunc') and covfunc == self.covfunc):
301
+ return False
302
+ else:
303
+ return True
304
+
305
+ def post(self, hyp, covfunc, X, y):
306
+ """ Generic function to compute posterior distribution.
307
+ """
308
+
309
+ if len(hyp.shape) > 1: # force 1d hyperparameter array
310
+ hyp = hyp.flatten()
311
+
312
+ if len(X.shape) == 1:
313
+ X = X[:, np.newaxis]
314
+ self.N, self.D = X.shape
315
+
316
+ # hyperparameters
317
+ sn2 = np.exp(2*hyp[0]) # noise variance
318
+ if self.warp is not None: # parameters for warping the likelhood
319
+ n_lik_param = self.n_warp_param+1
320
+ else:
321
+ n_lik_param = 1
322
+ theta = hyp[n_lik_param:] # (generic) covariance hyperparameters
323
+
324
+ if self.verbose:
325
+ print("estimating posterior ... | hyp=", hyp)
326
+
327
+ self.K = covfunc.cov(theta, X)
328
+ self.L = chol(self.K + sn2*np.eye(self.N))
329
+ self.alpha = solve(self.L.T, solve(self.L, y))
330
+ self.hyp = hyp
331
+ self.covfunc = covfunc
332
+
333
+ def loglik(self, hyp, covfunc, X, y):
334
+ """ Function to compute compute log (marginal) likelihood
335
+ """
336
+
337
+ # load or recompute posterior
338
+ if self.verbose:
339
+ print("computing likelihood ... | hyp=", hyp)
340
+
341
+ # parameters for warping the likelhood function
342
+ if self.warp is not None:
343
+ gamma = hyp[1:(self.n_warp_param+1)]
344
+ y = self.warp.f(y, gamma)
345
+ y_unwarped = y
346
+
347
+ if len(hyp.shape) > 1: # force 1d hyperparameter array
348
+ hyp = hyp.flatten()
349
+ if self._updatepost(hyp, covfunc):
350
+ try:
351
+ self.post(hyp, covfunc, X, y)
352
+ except (ValueError, LinAlgError):
353
+ print("Warning: Estimation of posterior distribution failed")
354
+ self.nlZ = 1/np.finfo(float).eps
355
+ return self.nlZ
356
+
357
+ self.nlZ = 0.5*y.T.dot(self.alpha) + sum(np.log(np.diag(self.L))) + \
358
+ 0.5*self.N*np.log(2*np.pi)
359
+
360
+ if self.warp is not None:
361
+ # add in the Jacobian
362
+ self.nlZ = self.nlZ - sum(np.log(self.warp.df(y_unwarped, gamma)))
363
+
364
+ # make sure the output is finite to stop the minimizer getting upset
365
+ if not np.isfinite(self.nlZ):
366
+ self.nlZ = 1/np.finfo(float).eps
367
+
368
+ if self.verbose:
369
+ print("nlZ= ", self.nlZ, " | hyp=", hyp)
370
+
371
+ return self.nlZ
372
+
373
+ def dloglik(self, hyp, covfunc, X, y):
374
+ """ Function to compute derivatives
375
+ """
376
+
377
+ if len(hyp.shape) > 1: # force 1d hyperparameter array
378
+ hyp = hyp.flatten()
379
+
380
+ if self.warp is not None:
381
+ raise ValueError('optimization with derivatives is not yet ' +
382
+ 'supported for warped liklihood')
383
+
384
+ # hyperparameters
385
+ sn2 = np.exp(2*hyp[0]) # noise variance
386
+ theta = hyp[1:] # (generic) covariance hyperparameters
387
+
388
+ # load posterior and prior covariance
389
+ if self._updatepost(hyp, covfunc):
390
+ try:
391
+ self.post(hyp, covfunc, X, y)
392
+ except (ValueError, LinAlgError):
393
+ print("Warning: Estimation of posterior distribution failed")
394
+ dnlZ = np.sign(self.dnlZ) / np.finfo(float).eps
395
+ return dnlZ
396
+
397
+ # compute Q = alpha*alpha' - inv(K)
398
+ Q = np.outer(self.alpha, self.alpha) - \
399
+ solve(self.L.T, solve(self.L, np.eye(self.N)))
400
+
401
+ # initialise derivatives
402
+ self.dnlZ = np.zeros(len(hyp))
403
+
404
+ # noise variance
405
+ self.dnlZ[0] = -sn2*np.trace(Q)
406
+
407
+ # covariance parameter(s)
408
+ for par in range(0, len(theta)):
409
+ # compute -0.5*trace(Q.dot(dK/d[theta_i])) efficiently
410
+ dK = covfunc.dcov(theta, X, i=par)
411
+ self.dnlZ[par+1] = -0.5*np.sum(np.sum(Q*dK.T))
412
+
413
+ # make sure the gradient is finite to stop the minimizer getting upset
414
+ if not all(np.isfinite(self.dnlZ)):
415
+ bad = np.where(np.logical_not(np.isfinite(self.dnlZ)))
416
+ for b in bad:
417
+ self.dnlZ[b] = np.sign(self.dnlZ[b]) / np.finfo(float).eps
418
+
419
+ if self.verbose:
420
+ print("dnlZ= ", self.dnlZ, " | hyp=", hyp)
421
+
422
+ return self.dnlZ
423
+
424
+ # model estimation (optimization)
425
+ def estimate(self, hyp0, covfunc, X, y, optimizer='cg'):
426
+ """ Function to estimate the model
427
+ """
428
+ if len(X.shape) == 1:
429
+ X = X[:, np.newaxis]
430
+
431
+ self.hyp0 = hyp0
432
+
433
+ if optimizer.lower() == 'cg': # conjugate gradients
434
+ out = optimize.fmin_cg(self.loglik, hyp0, self.dloglik,
435
+ (covfunc, X, y), disp=True, gtol=self.tol,
436
+ maxiter=self.n_iter, full_output=1)
437
+
438
+ elif optimizer.lower() == 'powell': # Powell's method
439
+ out = optimize.fmin_powell(self.loglik, hyp0, (covfunc, X, y),
440
+ full_output=1)
441
+ else:
442
+ raise ValueError("unknown optimizer")
443
+
444
+ # Always return a 1d array. The optimizer sometimes changes dimesnions
445
+ if len(out[0].shape) > 1:
446
+ self.hyp = out[0].flatten()
447
+ else:
448
+ self.hyp = out[0]
449
+ self.nlZ = out[1]
450
+ self.optimizer = optimizer
451
+
452
+ return self.hyp
453
+
454
+ def predict(self, hyp, X, y, Xs):
455
+ """ Function to make predictions from the model
456
+ """
457
+ if len(hyp.shape) > 1: # force 1d hyperparameter array
458
+ hyp = hyp.flatten()
459
+
460
+ # ensure X and Xs are multi-dimensional arrays
461
+ if len(Xs.shape) == 1:
462
+ Xs = Xs[:, np.newaxis]
463
+ if len(X.shape) == 1:
464
+ X = X[:, np.newaxis]
465
+
466
+ # parameters for warping the likelhood function
467
+ if self.warp is not None:
468
+ gamma = hyp[1:(self.n_warp_param+1)]
469
+ y = self.warp.f(y, gamma)
470
+
471
+ # reestimate posterior (avoids numerical problems with optimizer)
472
+ self.post(hyp, self.covfunc, X, y)
473
+
474
+ # hyperparameters
475
+ sn2 = np.exp(2*hyp[0]) # noise variance
476
+ # (generic) covariance hyperparameters
477
+ theta = hyp[(self.n_warp_param + 1):]
478
+
479
+ Ks = self.covfunc.cov(theta, Xs, X)
480
+ kss = self.covfunc.cov(theta, Xs)
481
+
482
+ # predictive mean
483
+ ymu = Ks.dot(self.alpha)
484
+
485
+ # predictive variance (for a noisy test input)
486
+ v = solve(self.L, Ks.T)
487
+ ys2 = kss - v.T.dot(v) + sn2
488
+
489
+ return ymu, ys2