sgptools 1.2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgptools/__init__.py +3 -4
- sgptools/core/__init__.py +1 -0
- sgptools/{models/core → core}/augmented_gpr.py +11 -17
- sgptools/{models/core → core}/augmented_sgpr.py +27 -34
- sgptools/core/osgpr.py +417 -0
- sgptools/core/transformations.py +699 -0
- sgptools/kernels/__init__.py +0 -8
- sgptools/kernels/attentive_kernel.py +214 -69
- sgptools/kernels/neural_kernel.py +268 -92
- sgptools/kernels/neural_network.py +127 -28
- sgptools/methods.py +1047 -0
- sgptools/objectives.py +275 -0
- sgptools/utils/__init__.py +0 -9
- sgptools/utils/data.py +452 -149
- sgptools/utils/gpflow.py +335 -174
- sgptools/utils/metrics.py +375 -102
- sgptools/utils/misc.py +145 -111
- sgptools/utils/tsp.py +224 -84
- sgptools-2.0.0.dist-info/METADATA +216 -0
- sgptools-2.0.0.dist-info/RECORD +23 -0
- {sgptools-1.2.0.dist-info → sgptools-2.0.0.dist-info}/WHEEL +1 -1
- sgptools/models/__init__.py +0 -10
- sgptools/models/bo.py +0 -118
- sgptools/models/cma_es.py +0 -121
- sgptools/models/continuous_sgp.py +0 -68
- sgptools/models/core/__init__.py +0 -9
- sgptools/models/core/osgpr.py +0 -291
- sgptools/models/core/transformations.py +0 -434
- sgptools/models/greedy_mi.py +0 -115
- sgptools/models/greedy_sgp.py +0 -97
- sgptools-1.2.0.dist-info/METADATA +0 -39
- sgptools-1.2.0.dist-info/RECORD +0 -27
- {sgptools-1.2.0.dist-info → sgptools-2.0.0.dist-info/licenses}/LICENSE.txt +0 -0
- {sgptools-1.2.0.dist-info → sgptools-2.0.0.dist-info}/top_level.txt +0 -0
sgptools/__init__.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
"""
|
2
|
-
|
3
2
|
SGP-Tools: SGP-based Optimization Tools
|
4
3
|
|
5
4
|
Software Suite for Sensor Placement and Informative Path Planning.
|
@@ -12,9 +11,9 @@ The library includes python code for the following:
|
|
12
11
|
|
13
12
|
"""
|
14
13
|
|
15
|
-
__version__ = "
|
14
|
+
__version__ = "2.0.0"
|
16
15
|
__author__ = 'Kalvik'
|
17
16
|
|
18
|
-
from .
|
19
|
-
from .
|
17
|
+
from .core import *
|
18
|
+
from .kernels import *
|
20
19
|
from .utils import *
|
@@ -0,0 +1 @@
|
|
1
|
+
# sgptools/core/__init__.py
|
@@ -13,7 +13,6 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
# Original GP code from GPflow library (https://github.com/GPflow/GPflow)
|
16
|
-
|
17
16
|
"""Provides a Gaussian process model with expand and aggregate functions
|
18
17
|
"""
|
19
18
|
|
@@ -25,6 +24,7 @@ from gpflow.base import InputData, MeanAndVariance
|
|
25
24
|
from gpflow.utilities import add_likelihood_noise_cov, assert_params_false
|
26
25
|
from .transformations import Transform
|
27
26
|
|
27
|
+
|
28
28
|
class AugmentedGPR(GPR):
|
29
29
|
"""GPR model from the GPFlow library augmented to use a transform object's
|
30
30
|
expand and aggregate functions on the data points where necessary.
|
@@ -39,24 +39,18 @@ class AugmentedGPR(GPR):
|
|
39
39
|
noise_variance (float): data variance
|
40
40
|
transform (Transform): Transform object
|
41
41
|
"""
|
42
|
-
|
43
|
-
|
44
|
-
*args,
|
45
|
-
transform=None,
|
46
|
-
**kwargs
|
47
|
-
):
|
48
|
-
super().__init__(
|
49
|
-
*args,
|
50
|
-
**kwargs
|
51
|
-
)
|
42
|
+
|
43
|
+
def __init__(self, *args, transform=None, **kwargs):
|
44
|
+
super().__init__(*args, **kwargs)
|
52
45
|
if transform is None:
|
53
46
|
self.transform = Transform()
|
54
47
|
else:
|
55
48
|
self.transform = transform
|
56
49
|
|
57
50
|
def predict_f(
|
58
|
-
self,
|
59
|
-
|
51
|
+
self,
|
52
|
+
Xnew: InputData,
|
53
|
+
full_cov: bool = True,
|
60
54
|
full_output_cov: bool = False,
|
61
55
|
aggregate_train: bool = False,
|
62
56
|
) -> MeanAndVariance:
|
@@ -84,10 +78,10 @@ class AugmentedGPR(GPR):
|
|
84
78
|
# which can when train and test data are the same size
|
85
79
|
if kmn.shape[0] != kmn.shape[1]:
|
86
80
|
kmn = self.transform.aggregate(kmn)
|
87
|
-
|
81
|
+
|
88
82
|
conditional = gpflow.conditionals.base_conditional
|
89
83
|
f_mean_zero, f_var = conditional(
|
90
|
-
kmn, kmm_plus_s, knn, err, full_cov=full_cov,
|
91
|
-
|
84
|
+
kmn, kmm_plus_s, knn, err, full_cov=full_cov,
|
85
|
+
white=False) # [N, P], [N, P] or [P, N, N]
|
92
86
|
f_mean = f_mean_zero + self.mean_function(Xnew)
|
93
|
-
return f_mean, f_var
|
87
|
+
return f_mean, f_var
|
@@ -13,7 +13,6 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
# Original SGP code from GPflow library (https://github.com/GPflow/GPflow)
|
16
|
-
|
17
16
|
"""Provides a sparse Gaussian process model with update, expand, and aggregate functions
|
18
17
|
"""
|
19
18
|
|
@@ -49,37 +48,34 @@ class AugmentedSGPR(SGPR):
|
|
49
48
|
inducing_variable_time (ndarray): (m, d); Temporal dimensions of the inducing points,
|
50
49
|
used when modeling spatio-temporal IPP
|
51
50
|
"""
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
super().__init__(
|
60
|
-
*args,
|
61
|
-
**kwargs
|
62
|
-
)
|
51
|
+
|
52
|
+
def __init__(self,
|
53
|
+
*args,
|
54
|
+
transform=None,
|
55
|
+
inducing_variable_time=None,
|
56
|
+
**kwargs):
|
57
|
+
super().__init__(*args, **kwargs)
|
63
58
|
if transform is None:
|
64
59
|
self.transform = Transform()
|
65
60
|
else:
|
66
61
|
self.transform = transform
|
67
62
|
|
68
63
|
if inducing_variable_time is not None:
|
69
|
-
self.inducing_variable_time = inducingpoint_wrapper(
|
64
|
+
self.inducing_variable_time = inducingpoint_wrapper(
|
65
|
+
inducing_variable_time)
|
70
66
|
self.transform.inducing_variable_time = self.inducing_variable_time
|
71
67
|
else:
|
72
68
|
self.inducing_variable_time = None
|
73
69
|
|
74
|
-
def update(self,
|
70
|
+
def update(self, kernel, noise_variance):
|
75
71
|
"""Update SGP noise variance and kernel function parameters
|
76
72
|
|
77
73
|
Args:
|
78
|
-
noise_variance (float): data variance
|
79
74
|
kernel (gpflow.kernels.Kernel): gpflow kernel function
|
75
|
+
noise_variance (float): data variance
|
80
76
|
"""
|
81
77
|
self.likelihood.variance.assign(noise_variance)
|
82
|
-
for self_var, var in zip(self.kernel.trainable_variables,
|
78
|
+
for self_var, var in zip(self.kernel.trainable_variables,
|
83
79
|
kernel.trainable_variables):
|
84
80
|
self_var.assign(var)
|
85
81
|
|
@@ -91,7 +87,7 @@ class AugmentedSGPR(SGPR):
|
|
91
87
|
A is M x N, B is M x M, LB is M x M, AAT is M x M
|
92
88
|
"""
|
93
89
|
x, _ = self.data
|
94
|
-
|
90
|
+
|
95
91
|
iv = self.inducing_variable.Z # [M]
|
96
92
|
iv = self.transform.expand(iv)
|
97
93
|
|
@@ -130,9 +126,10 @@ class AugmentedSGPR(SGPR):
|
|
130
126
|
constraints = self.transform.constraints(self.inducing_variable.Z)
|
131
127
|
return const + logdet + quad + constraints
|
132
128
|
|
133
|
-
def predict_f(
|
134
|
-
|
135
|
-
|
129
|
+
def predict_f(self,
|
130
|
+
Xnew: InputData,
|
131
|
+
full_cov: bool = False,
|
132
|
+
full_output_cov: bool = False) -> MeanAndVariance:
|
136
133
|
|
137
134
|
# could copy into posterior into a fused version
|
138
135
|
"""
|
@@ -141,7 +138,7 @@ class AugmentedSGPR(SGPR):
|
|
141
138
|
notebook.
|
142
139
|
"""
|
143
140
|
X_data, Y_data = self.data
|
144
|
-
|
141
|
+
|
145
142
|
iv = self.inducing_variable.Z
|
146
143
|
iv = self.transform.expand(iv)
|
147
144
|
|
@@ -155,8 +152,7 @@ class AugmentedSGPR(SGPR):
|
|
155
152
|
L = tf.linalg.cholesky(kuu)
|
156
153
|
A = tf.linalg.triangular_solve(L, kuf, lower=True) / sigma
|
157
154
|
B = tf.linalg.matmul(A, A, transpose_b=True) + tf.eye(
|
158
|
-
num_inducing, dtype=default_float()
|
159
|
-
) # cache qinv
|
155
|
+
num_inducing, dtype=default_float()) # cache qinv
|
160
156
|
LB = tf.linalg.cholesky(B)
|
161
157
|
Aerr = tf.linalg.matmul(A, err)
|
162
158
|
c = tf.linalg.triangular_solve(LB, Aerr, lower=True) / sigma
|
@@ -164,18 +160,15 @@ class AugmentedSGPR(SGPR):
|
|
164
160
|
tmp2 = tf.linalg.triangular_solve(LB, tmp1, lower=True)
|
165
161
|
mean = tf.linalg.matmul(tmp2, c, transpose_a=True)
|
166
162
|
if full_cov:
|
167
|
-
var = (
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
var = tf.tile(var[None, ...], [self.num_latent_gps, 1, 1]) # [P, N, N]
|
163
|
+
var = (self.kernel(Xnew) +
|
164
|
+
tf.linalg.matmul(tmp2, tmp2, transpose_a=True) -
|
165
|
+
tf.linalg.matmul(tmp1, tmp1, transpose_a=True))
|
166
|
+
var = tf.tile(var[None, ...],
|
167
|
+
[self.num_latent_gps, 1, 1]) # [P, N, N]
|
173
168
|
else:
|
174
|
-
var = (
|
175
|
-
|
176
|
-
|
177
|
-
- tf.reduce_sum(tf.square(tmp1), 0)
|
178
|
-
)
|
169
|
+
var = (self.kernel(Xnew, full_cov=False) +
|
170
|
+
tf.reduce_sum(tf.square(tmp2), 0) -
|
171
|
+
tf.reduce_sum(tf.square(tmp1), 0))
|
179
172
|
var = tf.tile(var[:, None], [1, self.num_latent_gps])
|
180
173
|
|
181
174
|
return mean + self.mean_function(Xnew), var
|
sgptools/core/osgpr.py
ADDED
@@ -0,0 +1,417 @@
|
|
1
|
+
# Copyright 2024 The streaming_sparse_gp Contributors. All Rights Reserved.
|
2
|
+
# https://github.com/thangbui/streaming_sparse_gp/tree/master
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
"""Provides a streaming sparse Gaussian process model along with initialization function
|
16
|
+
"""
|
17
|
+
|
18
|
+
import tensorflow as tf
|
19
|
+
import numpy as np
|
20
|
+
|
21
|
+
import gpflow
|
22
|
+
from gpflow.inducing_variables import InducingPoints
|
23
|
+
from gpflow.models import GPModel, InternalDataTrainingLossMixin
|
24
|
+
from gpflow import covariances
|
25
|
+
from ..utils.data import get_inducing_pts
|
26
|
+
from typing import Union, Optional
|
27
|
+
|
28
|
+
|
29
|
+
class OSGPR_VFE(GPModel, InternalDataTrainingLossMixin):
|
30
|
+
"""Online Sparse Variational GP regression model from [streaming_sparse_gp](https://github.com/thangbui/streaming_sparse_gp/tree/master)
|
31
|
+
|
32
|
+
Refer to the following paper for more details:
|
33
|
+
- Streaming Gaussian process approximations [Bui et al., 2017]
|
34
|
+
|
35
|
+
Args:
|
36
|
+
data (tuple): (X, y) ndarrays with inputs (n, d) and labels (n, 1)
|
37
|
+
kernel (gpflow.kernels.Kernel): gpflow kernel function
|
38
|
+
mu_old (ndarray): mean of old `q(u)`; here `u` are the latents corresponding to the inducing points `Z_old`
|
39
|
+
Su_old (ndarray): posterior covariance of old `q(u)`
|
40
|
+
Kaa_old (ndarray): prior covariance of old `q(u)`
|
41
|
+
Z_old (ndarray): (m_old, d): Old initial inducing points
|
42
|
+
Z (ndarray): (m_new, d): New initial inducing points
|
43
|
+
mean_function (function): GP mean function
|
44
|
+
"""
|
45
|
+
|
46
|
+
def __init__(self,
|
47
|
+
data,
|
48
|
+
kernel,
|
49
|
+
mu_old,
|
50
|
+
Su_old,
|
51
|
+
Kaa_old,
|
52
|
+
Z_old,
|
53
|
+
Z,
|
54
|
+
mean_function=None):
|
55
|
+
self.X, self.Y = self.data = gpflow.models.util.data_input_to_tensor(
|
56
|
+
data)
|
57
|
+
likelihood = gpflow.likelihoods.Gaussian()
|
58
|
+
num_latent_gps = GPModel.calc_num_latent_gps_from_data(
|
59
|
+
data, kernel, likelihood)
|
60
|
+
super().__init__(kernel, likelihood, mean_function, num_latent_gps)
|
61
|
+
|
62
|
+
self.inducing_variable = InducingPoints(Z)
|
63
|
+
self.num_data = self.X.shape[0]
|
64
|
+
|
65
|
+
self.mu_old = tf.Variable(mu_old,
|
66
|
+
shape=tf.TensorShape(None),
|
67
|
+
trainable=False)
|
68
|
+
self.M_old = Z_old.shape[0]
|
69
|
+
self.Su_old = tf.Variable(Su_old,
|
70
|
+
shape=tf.TensorShape(None),
|
71
|
+
trainable=False)
|
72
|
+
self.Kaa_old = tf.Variable(Kaa_old,
|
73
|
+
shape=tf.TensorShape(None),
|
74
|
+
trainable=False)
|
75
|
+
self.Z_old = tf.Variable(Z_old,
|
76
|
+
shape=tf.TensorShape(None),
|
77
|
+
trainable=False)
|
78
|
+
|
79
|
+
def init_Z(self) -> np.ndarray:
|
80
|
+
"""
|
81
|
+
Initializes the new set of inducing points (Z) for the OSGPR model.
|
82
|
+
It combines a subset of the old inducing points (Z_old) with a subset
|
83
|
+
of the current training data (X).
|
84
|
+
|
85
|
+
Returns:
|
86
|
+
np.ndarray: (M, d); A NumPy array of the newly initialized inducing points,
|
87
|
+
combining old and new data-based points.
|
88
|
+
"""
|
89
|
+
M = self.inducing_variable.Z.shape[0]
|
90
|
+
M_old = int(0.7 * M) # Proportion of old inducing points to retain
|
91
|
+
M_new = M - M_old # Proportion of new data points to select
|
92
|
+
|
93
|
+
# Randomly select M_old points from the old inducing points
|
94
|
+
old_Z = self.Z_old.numpy()[np.random.permutation(M)[0:M_old], :]
|
95
|
+
|
96
|
+
# Randomly select M_new points from the current training data
|
97
|
+
new_Z = self.X.numpy()[
|
98
|
+
np.random.permutation(self.X.shape[0])[0:M_new], :]
|
99
|
+
|
100
|
+
# Vertically stack the selected old and new points to form the new Z
|
101
|
+
Z = np.vstack((old_Z, new_Z))
|
102
|
+
return Z
|
103
|
+
|
104
|
+
def update(self, data, inducing_variable=None, update_inducing=True):
|
105
|
+
"""
|
106
|
+
Configures the OSGPR model to adapt to a new batch of data.
|
107
|
+
This method updates the model's data, its inducing points (optionally),
|
108
|
+
and caches the posterior mean and covariance of the *old* inducing points
|
109
|
+
to facilitate the streaming update equations.
|
110
|
+
|
111
|
+
Note: After calling this update, the OSGPR model typically needs to be
|
112
|
+
trained further using gradient-based optimization to fully incorporate
|
113
|
+
the new data and optimize its parameters.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
data (Tuple[np.ndarray, np.ndarray]): A tuple (X, y) representing the new batch
|
117
|
+
of input data `X` (n, d) and corresponding labels `y` (n, 1).
|
118
|
+
inducing_variable (Optional[np.ndarray]): (m_new, d); Optional NumPy array for the new
|
119
|
+
set of inducing points. If None and `update_inducing`
|
120
|
+
is True, `init_Z` will be called to determine them.
|
121
|
+
Defaults to None.
|
122
|
+
update_inducing (bool): If True, the inducing points will be updated. If False,
|
123
|
+
they will remain as they were before the update call.
|
124
|
+
Defaults to True.
|
125
|
+
"""
|
126
|
+
self.X, self.Y = self.data = gpflow.models.util.data_input_to_tensor(
|
127
|
+
data)
|
128
|
+
self.num_data = self.X.shape[0]
|
129
|
+
|
130
|
+
# Store the current inducing points as 'old' for the next update step
|
131
|
+
self.Z_old.assign(self.inducing_variable.Z)
|
132
|
+
|
133
|
+
# Update the inducing points based on `update_inducing` flag
|
134
|
+
if update_inducing:
|
135
|
+
if inducing_variable is None:
|
136
|
+
# If no explicit inducing_variable is provided, initialize new ones
|
137
|
+
new_Z_init = self.init_Z()
|
138
|
+
else:
|
139
|
+
# Use the explicitly provided inducing_variable
|
140
|
+
new_Z_init = inducing_variable
|
141
|
+
self.inducing_variable.Z.assign(
|
142
|
+
tf.constant(new_Z_init, dtype=self.inducing_variable.Z.dtype))
|
143
|
+
# If update_inducing is False, inducing_variable.Z retains its current value.
|
144
|
+
|
145
|
+
# Get posterior mean and covariance for the *old* inducing points using the current model state
|
146
|
+
mu_old, Su_old = self.predict_f(self.Z_old, full_cov=True)
|
147
|
+
self.mu_old.assign(mu_old)
|
148
|
+
self.Su_old.assign(Su_old)
|
149
|
+
|
150
|
+
# Get the prior covariance matrix for the *old* inducing points using the current kernel
|
151
|
+
Kaa_old = self.kernel(self.Z_old)
|
152
|
+
self.Kaa_old.assign(Kaa_old)
|
153
|
+
|
154
|
+
def _common_terms(self):
|
155
|
+
Mb = self.inducing_variable.num_inducing
|
156
|
+
Ma = self.M_old
|
157
|
+
# jitter = gpflow.default_jitter()
|
158
|
+
jitter = gpflow.utilities.to_default_float(1e-4)
|
159
|
+
sigma2 = self.likelihood.variance
|
160
|
+
sigma = tf.sqrt(sigma2)
|
161
|
+
|
162
|
+
Saa = self.Su_old
|
163
|
+
ma = self.mu_old
|
164
|
+
|
165
|
+
# a is old inducing points, b is new
|
166
|
+
# f is training points
|
167
|
+
# s is test points
|
168
|
+
Kbf = covariances.Kuf(self.inducing_variable, self.kernel, self.X)
|
169
|
+
Kbb = covariances.Kuu(self.inducing_variable,
|
170
|
+
self.kernel,
|
171
|
+
jitter=jitter)
|
172
|
+
Kba = covariances.Kuf(self.inducing_variable, self.kernel, self.Z_old)
|
173
|
+
Kaa_cur = gpflow.utilities.add_noise_cov(self.kernel(self.Z_old),
|
174
|
+
jitter)
|
175
|
+
Kaa = gpflow.utilities.add_noise_cov(self.Kaa_old, jitter)
|
176
|
+
|
177
|
+
err = self.Y - self.mean_function(self.X)
|
178
|
+
|
179
|
+
Sainv_ma = tf.linalg.solve(Saa, ma)
|
180
|
+
Sinv_y = self.Y / sigma2
|
181
|
+
c1 = tf.matmul(Kbf, Sinv_y)
|
182
|
+
c2 = tf.matmul(Kba, Sainv_ma)
|
183
|
+
c = c1 + c2
|
184
|
+
|
185
|
+
Lb = tf.linalg.cholesky(Kbb)
|
186
|
+
Lbinv_c = tf.linalg.triangular_solve(Lb, c, lower=True)
|
187
|
+
Lbinv_Kba = tf.linalg.triangular_solve(Lb, Kba, lower=True)
|
188
|
+
Lbinv_Kbf = tf.linalg.triangular_solve(Lb, Kbf, lower=True) / sigma
|
189
|
+
d1 = tf.matmul(Lbinv_Kbf, Lbinv_Kbf, transpose_b=True)
|
190
|
+
|
191
|
+
LSa = tf.linalg.cholesky(Saa)
|
192
|
+
Kab_Lbinv = tf.linalg.matrix_transpose(Lbinv_Kba)
|
193
|
+
LSainv_Kab_Lbinv = tf.linalg.triangular_solve(LSa,
|
194
|
+
Kab_Lbinv,
|
195
|
+
lower=True)
|
196
|
+
d2 = tf.matmul(LSainv_Kab_Lbinv, LSainv_Kab_Lbinv, transpose_a=True)
|
197
|
+
|
198
|
+
La = tf.linalg.cholesky(Kaa)
|
199
|
+
Lainv_Kab_Lbinv = tf.linalg.triangular_solve(La, Kab_Lbinv, lower=True)
|
200
|
+
d3 = tf.matmul(Lainv_Kab_Lbinv, Lainv_Kab_Lbinv, transpose_a=True)
|
201
|
+
|
202
|
+
D = tf.eye(Mb, dtype=gpflow.default_float()) + d1 + d2 - d3
|
203
|
+
D = gpflow.utilities.add_noise_cov(D, jitter)
|
204
|
+
LD = tf.linalg.cholesky(D)
|
205
|
+
|
206
|
+
LDinv_Lbinv_c = tf.linalg.triangular_solve(LD, Lbinv_c, lower=True)
|
207
|
+
|
208
|
+
return (Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD, Lbinv_Kba,
|
209
|
+
LDinv_Lbinv_c, err, d1)
|
210
|
+
|
211
|
+
def maximum_log_likelihood_objective(self):
|
212
|
+
"""
|
213
|
+
Construct a tensorflow function to compute the bound on the marginal
|
214
|
+
likelihood.
|
215
|
+
"""
|
216
|
+
|
217
|
+
Mb = self.inducing_variable.num_inducing
|
218
|
+
Ma = self.M_old
|
219
|
+
jitter = gpflow.default_jitter()
|
220
|
+
# jitter = gpflow.utilities.to_default_float(1e-4)
|
221
|
+
sigma2 = self.likelihood.variance
|
222
|
+
sigma = tf.sqrt(sigma2)
|
223
|
+
N = self.num_data
|
224
|
+
|
225
|
+
Saa = self.Su_old
|
226
|
+
ma = self.mu_old
|
227
|
+
|
228
|
+
# a is old inducing points, b is new
|
229
|
+
# f is training points
|
230
|
+
Kfdiag = self.kernel(self.X, full_cov=False)
|
231
|
+
(Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD, Lbinv_Kba, LDinv_Lbinv_c,
|
232
|
+
err, Qff) = self._common_terms()
|
233
|
+
|
234
|
+
LSa = tf.linalg.cholesky(Saa)
|
235
|
+
Lainv_ma = tf.linalg.triangular_solve(LSa, ma, lower=True)
|
236
|
+
|
237
|
+
# constant term
|
238
|
+
bound = -0.5 * N * np.log(2 * np.pi)
|
239
|
+
# quadratic term
|
240
|
+
bound += -0.5 * tf.reduce_sum(tf.square(err)) / sigma2
|
241
|
+
# bound += -0.5 * tf.reduce_sum(ma * Sainv_ma)
|
242
|
+
bound += -0.5 * tf.reduce_sum(tf.square(Lainv_ma))
|
243
|
+
bound += 0.5 * tf.reduce_sum(tf.square(LDinv_Lbinv_c))
|
244
|
+
# log det term
|
245
|
+
bound += -0.5 * N * tf.reduce_sum(tf.math.log(sigma2))
|
246
|
+
bound += -tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LD)))
|
247
|
+
|
248
|
+
# delta 1: trace term
|
249
|
+
bound += -0.5 * tf.reduce_sum(Kfdiag) / sigma2
|
250
|
+
bound += 0.5 * tf.reduce_sum(tf.linalg.diag_part(Qff))
|
251
|
+
|
252
|
+
# delta 2: a and b difference
|
253
|
+
bound += tf.reduce_sum(tf.math.log(tf.linalg.diag_part(La)))
|
254
|
+
bound += -tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LSa)))
|
255
|
+
|
256
|
+
Kaadiff = Kaa_cur - tf.matmul(Lbinv_Kba, Lbinv_Kba, transpose_a=True)
|
257
|
+
Sainv_Kaadiff = tf.linalg.solve(Saa, Kaadiff)
|
258
|
+
Kainv_Kaadiff = tf.linalg.solve(Kaa, Kaadiff)
|
259
|
+
|
260
|
+
bound += -0.5 * tf.reduce_sum(
|
261
|
+
tf.linalg.diag_part(Sainv_Kaadiff) -
|
262
|
+
tf.linalg.diag_part(Kainv_Kaadiff))
|
263
|
+
|
264
|
+
return bound
|
265
|
+
|
266
|
+
def predict_f(self, Xnew, full_cov=False):
|
267
|
+
"""
|
268
|
+
Compute the mean and variance of the latent function at some new points
|
269
|
+
Xnew.
|
270
|
+
"""
|
271
|
+
|
272
|
+
# jitter = gpflow.default_jitter()
|
273
|
+
jitter = gpflow.utilities.to_default_float(1e-4)
|
274
|
+
|
275
|
+
# a is old inducing points, b is new
|
276
|
+
# f is training points
|
277
|
+
# s is test points
|
278
|
+
Kbs = covariances.Kuf(self.inducing_variable, self.kernel, Xnew)
|
279
|
+
(Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD, Lbinv_Kba, LDinv_Lbinv_c,
|
280
|
+
err, Qff) = self._common_terms()
|
281
|
+
|
282
|
+
Lbinv_Kbs = tf.linalg.triangular_solve(Lb, Kbs, lower=True)
|
283
|
+
LDinv_Lbinv_Kbs = tf.linalg.triangular_solve(LD, Lbinv_Kbs, lower=True)
|
284
|
+
mean = tf.matmul(LDinv_Lbinv_Kbs, LDinv_Lbinv_c, transpose_a=True)
|
285
|
+
|
286
|
+
if full_cov:
|
287
|
+
Kss = self.kernel(Xnew) + jitter * tf.eye(
|
288
|
+
tf.shape(Xnew)[0], dtype=gpflow.default_float())
|
289
|
+
var1 = Kss
|
290
|
+
var2 = -tf.matmul(Lbinv_Kbs, Lbinv_Kbs, transpose_a=True)
|
291
|
+
var3 = tf.matmul(LDinv_Lbinv_Kbs,
|
292
|
+
LDinv_Lbinv_Kbs,
|
293
|
+
transpose_a=True)
|
294
|
+
var = var1 + var2 + var3
|
295
|
+
else:
|
296
|
+
var1 = self.kernel(Xnew, full_cov=False)
|
297
|
+
var2 = -tf.reduce_sum(tf.square(Lbinv_Kbs), axis=0)
|
298
|
+
var3 = tf.reduce_sum(tf.square(LDinv_Lbinv_Kbs), axis=0)
|
299
|
+
var = var1 + var2 + var3
|
300
|
+
|
301
|
+
return mean + self.mean_function(Xnew), var
|
302
|
+
|
303
|
+
|
304
|
+
def init_osgpr(X_train: np.ndarray,
|
305
|
+
num_inducing: int = 10,
|
306
|
+
lengthscales: Union[float, np.ndarray] = 1.0,
|
307
|
+
variance: float = 1.0,
|
308
|
+
noise_variance: float = 0.001,
|
309
|
+
kernel: Optional[gpflow.kernels.Kernel] = None,
|
310
|
+
ndim: int = 1) -> OSGPR_VFE:
|
311
|
+
"""
|
312
|
+
Initializes an Online Sparse Variational Gaussian Process Regression (OSGPR_VFE) model.
|
313
|
+
This function first fits a standard Sparse Gaussian Process Regression (SGPR) model
|
314
|
+
to a dummy dataset (representing initial data/environment bounds) to obtain an
|
315
|
+
initial set of optimized inducing points and their corresponding posterior.
|
316
|
+
These are then used to set up the `OSGPR_VFE` model for streaming updates.
|
317
|
+
|
318
|
+
Args:
|
319
|
+
X_train (np.ndarray): (n, d); Unlabeled random sampled training points.
|
320
|
+
These points are primarily used to define the spatial bounds
|
321
|
+
and for initial selection of inducing points. Their labels are
|
322
|
+
set to zeros for the SGPR initialization.
|
323
|
+
num_inducing (int): The number of inducing points to use for the OSGPR model. Defaults to 10.
|
324
|
+
lengthscales (Union[float, np.ndarray]): Initial lengthscale(s) for the RBF kernel.
|
325
|
+
If a float, it's applied uniformly. If a NumPy array,
|
326
|
+
each element corresponds to a dimension. Defaults to 1.0.
|
327
|
+
variance (float): Initial variance (amplitude) for the RBF kernel. Defaults to 1.0.
|
328
|
+
noise_variance (float): Initial data noise variance for the Gaussian likelihood. Defaults to 0.001.
|
329
|
+
kernel (Optional[gpflow.kernels.Kernel]): A pre-defined GPflow kernel function. If None,
|
330
|
+
a `gpflow.kernels.SquaredExponential` (RBF) kernel is created
|
331
|
+
with the provided `lengthscales` and `variance`. Defaults to None.
|
332
|
+
ndim (int): Number of output dimensions for the dummy training labels `y_train`. Defaults to 1.
|
333
|
+
|
334
|
+
Returns:
|
335
|
+
OSGPR_VFE: An initialized `OSGPR_VFE` model instance, ready to accept
|
336
|
+
new data batches via its `update` method.
|
337
|
+
|
338
|
+
Usage:
|
339
|
+
```python
|
340
|
+
import numpy as np
|
341
|
+
# from sgptools.core.osgpr import init_osgpr
|
342
|
+
|
343
|
+
# Define some dummy training data to establish initial bounds
|
344
|
+
X_initial_env = np.random.rand(100, 2) * 10
|
345
|
+
|
346
|
+
# Initialize the OSGPR model
|
347
|
+
online_gp_model = init_osgpr(
|
348
|
+
X_initial_env,
|
349
|
+
num_inducing=50,
|
350
|
+
lengthscales=2.0,
|
351
|
+
variance=1.5,
|
352
|
+
noise_variance=0.01
|
353
|
+
)
|
354
|
+
|
355
|
+
# Example of updating the model with new data (typically in a loop)
|
356
|
+
# new_X_batch = np.random.rand(10, 2) * 10
|
357
|
+
# new_y_batch = np.sin(new_X_batch[:, 0:1]) + np.random.randn(10, 1) * 0.1
|
358
|
+
# online_gp_model.update(data=(new_X_batch, new_y_batch))
|
359
|
+
```
|
360
|
+
"""
|
361
|
+
if kernel is None:
|
362
|
+
# If no kernel is provided, initialize a SquaredExponential (RBF) kernel.
|
363
|
+
kernel = gpflow.kernels.SquaredExponential(lengthscales=lengthscales,
|
364
|
+
variance=variance)
|
365
|
+
|
366
|
+
# Create a dummy y_train: SGPR needs labels, but for initialization purposes here,
|
367
|
+
# we use zeros as the actual labels will come in through online updates.
|
368
|
+
y_train_dummy = np.zeros((len(X_train), ndim), dtype=X_train.dtype)
|
369
|
+
|
370
|
+
# Select initial inducing points from X_train using get_inducing_pts utility
|
371
|
+
Z_init = get_inducing_pts(X_train, num_inducing)
|
372
|
+
|
373
|
+
# Initialize a standard SGPR model. This model helps in getting an initial
|
374
|
+
# posterior (mu, Su) for the inducing points (Z_init) under the given kernel
|
375
|
+
# and noise variance. This posterior then becomes the 'old' posterior for OSGPR_VFE.
|
376
|
+
init_sgpr_model = gpflow.models.SGPR(data=(X_train, y_train_dummy),
|
377
|
+
kernel=kernel,
|
378
|
+
inducing_variable=Z_init,
|
379
|
+
noise_variance=noise_variance)
|
380
|
+
|
381
|
+
# Extract optimized (or initial) inducing points from the SGPR model
|
382
|
+
Zopt_np = init_sgpr_model.inducing_variable.Z.numpy()
|
383
|
+
|
384
|
+
# Predict the mean (mu) and full covariance (Su) of the latent function
|
385
|
+
# at these initial inducing points (Zopt). This represents the 'old' posterior.
|
386
|
+
mu_old_tf, Su_old_tf_full_cov = init_sgpr_model.predict_f(tf.constant(
|
387
|
+
Zopt_np, dtype=X_train.dtype),
|
388
|
+
full_cov=True)
|
389
|
+
|
390
|
+
# Kaa_old: Prior covariance matrix of the old inducing points
|
391
|
+
Kaa_old_tf = init_sgpr_model.kernel(
|
392
|
+
tf.constant(Zopt_np, dtype=X_train.dtype))
|
393
|
+
|
394
|
+
# Prepare dummy initial data for OSGPR_VFE. This data will be overwritten
|
395
|
+
# by the first actual `update` call.
|
396
|
+
dummy_X_online = np.zeros([2, X_train.shape[-1]], dtype=X_train.dtype)
|
397
|
+
dummy_y_online = np.zeros([2, ndim], dtype=X_train.dtype)
|
398
|
+
|
399
|
+
# Initialize the OSGPR_VFE model with the extracted parameters.
|
400
|
+
# The `Su_old_tf_full_cov` is expected to be a (1, M, M) tensor for single latent GP,
|
401
|
+
# so we extract the (M, M) covariance matrix `Su_old_tf_full_cov[0]`.
|
402
|
+
online_osgpr_model = OSGPR_VFE(
|
403
|
+
data=(tf.constant(dummy_X_online), tf.constant(dummy_y_online)),
|
404
|
+
kernel=init_sgpr_model.
|
405
|
+
kernel, # Pass the kernel (potentially optimized by SGPR init)
|
406
|
+
mu_old=mu_old_tf,
|
407
|
+
Su_old=Su_old_tf_full_cov[0],
|
408
|
+
Kaa_old=Kaa_old_tf,
|
409
|
+
Z_old=tf.constant(Zopt_np, dtype=X_train.dtype),
|
410
|
+
Z=tf.constant(Zopt_np,
|
411
|
+
dtype=X_train.dtype)) # New Z is same as old Z initially
|
412
|
+
|
413
|
+
# Assign the noise variance from the initial SGPR model to the OSGPR model's likelihood
|
414
|
+
online_osgpr_model.likelihood.variance.assign(
|
415
|
+
init_sgpr_model.likelihood.variance)
|
416
|
+
|
417
|
+
return online_osgpr_model
|