sgptools 1.2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sgptools/__init__.py CHANGED
@@ -1,5 +1,4 @@
1
1
  """
2
-
3
2
  SGP-Tools: SGP-based Optimization Tools
4
3
 
5
4
  Software Suite for Sensor Placement and Informative Path Planning.
@@ -12,9 +11,9 @@ The library includes python code for the following:
12
11
 
13
12
  """
14
13
 
15
- __version__ = "1.2.0"
14
+ __version__ = "2.0.0"
16
15
  __author__ = 'Kalvik'
17
16
 
18
- from .models.core import *
19
- from .models import *
17
+ from .core import *
18
+ from .kernels import *
20
19
  from .utils import *
@@ -0,0 +1 @@
1
+ # sgptools/core/__init__.py
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  # Original GP code from GPflow library (https://github.com/GPflow/GPflow)
16
-
17
16
  """Provides a Gaussian process model with expand and aggregate functions
18
17
  """
19
18
 
@@ -25,6 +24,7 @@ from gpflow.base import InputData, MeanAndVariance
25
24
  from gpflow.utilities import add_likelihood_noise_cov, assert_params_false
26
25
  from .transformations import Transform
27
26
 
27
+
28
28
  class AugmentedGPR(GPR):
29
29
  """GPR model from the GPFlow library augmented to use a transform object's
30
30
  expand and aggregate functions on the data points where necessary.
@@ -39,24 +39,18 @@ class AugmentedGPR(GPR):
39
39
  noise_variance (float): data variance
40
40
  transform (Transform): Transform object
41
41
  """
42
- def __init__(
43
- self,
44
- *args,
45
- transform=None,
46
- **kwargs
47
- ):
48
- super().__init__(
49
- *args,
50
- **kwargs
51
- )
42
+
43
+ def __init__(self, *args, transform=None, **kwargs):
44
+ super().__init__(*args, **kwargs)
52
45
  if transform is None:
53
46
  self.transform = Transform()
54
47
  else:
55
48
  self.transform = transform
56
49
 
57
50
  def predict_f(
58
- self, Xnew: InputData,
59
- full_cov: bool = True,
51
+ self,
52
+ Xnew: InputData,
53
+ full_cov: bool = True,
60
54
  full_output_cov: bool = False,
61
55
  aggregate_train: bool = False,
62
56
  ) -> MeanAndVariance:
@@ -84,10 +78,10 @@ class AugmentedGPR(GPR):
84
78
  # which can when train and test data are the same size
85
79
  if kmn.shape[0] != kmn.shape[1]:
86
80
  kmn = self.transform.aggregate(kmn)
87
-
81
+
88
82
  conditional = gpflow.conditionals.base_conditional
89
83
  f_mean_zero, f_var = conditional(
90
- kmn, kmm_plus_s, knn, err, full_cov=full_cov, white=False
91
- ) # [N, P], [N, P] or [P, N, N]
84
+ kmn, kmm_plus_s, knn, err, full_cov=full_cov,
85
+ white=False) # [N, P], [N, P] or [P, N, N]
92
86
  f_mean = f_mean_zero + self.mean_function(Xnew)
93
- return f_mean, f_var
87
+ return f_mean, f_var
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  # Original SGP code from GPflow library (https://github.com/GPflow/GPflow)
16
-
17
16
  """Provides a sparse Gaussian process model with update, expand, and aggregate functions
18
17
  """
19
18
 
@@ -49,37 +48,34 @@ class AugmentedSGPR(SGPR):
49
48
  inducing_variable_time (ndarray): (m, d); Temporal dimensions of the inducing points,
50
49
  used when modeling spatio-temporal IPP
51
50
  """
52
- def __init__(
53
- self,
54
- *args,
55
- transform=None,
56
- inducing_variable_time=None,
57
- **kwargs
58
- ):
59
- super().__init__(
60
- *args,
61
- **kwargs
62
- )
51
+
52
+ def __init__(self,
53
+ *args,
54
+ transform=None,
55
+ inducing_variable_time=None,
56
+ **kwargs):
57
+ super().__init__(*args, **kwargs)
63
58
  if transform is None:
64
59
  self.transform = Transform()
65
60
  else:
66
61
  self.transform = transform
67
62
 
68
63
  if inducing_variable_time is not None:
69
- self.inducing_variable_time = inducingpoint_wrapper(inducing_variable_time)
64
+ self.inducing_variable_time = inducingpoint_wrapper(
65
+ inducing_variable_time)
70
66
  self.transform.inducing_variable_time = self.inducing_variable_time
71
67
  else:
72
68
  self.inducing_variable_time = None
73
69
 
74
- def update(self, noise_variance, kernel):
70
+ def update(self, kernel, noise_variance):
75
71
  """Update SGP noise variance and kernel function parameters
76
72
 
77
73
  Args:
78
- noise_variance (float): data variance
79
74
  kernel (gpflow.kernels.Kernel): gpflow kernel function
75
+ noise_variance (float): data variance
80
76
  """
81
77
  self.likelihood.variance.assign(noise_variance)
82
- for self_var, var in zip(self.kernel.trainable_variables,
78
+ for self_var, var in zip(self.kernel.trainable_variables,
83
79
  kernel.trainable_variables):
84
80
  self_var.assign(var)
85
81
 
@@ -91,7 +87,7 @@ class AugmentedSGPR(SGPR):
91
87
  A is M x N, B is M x M, LB is M x M, AAT is M x M
92
88
  """
93
89
  x, _ = self.data
94
-
90
+
95
91
  iv = self.inducing_variable.Z # [M]
96
92
  iv = self.transform.expand(iv)
97
93
 
@@ -130,9 +126,10 @@ class AugmentedSGPR(SGPR):
130
126
  constraints = self.transform.constraints(self.inducing_variable.Z)
131
127
  return const + logdet + quad + constraints
132
128
 
133
- def predict_f(
134
- self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False
135
- ) -> MeanAndVariance:
129
+ def predict_f(self,
130
+ Xnew: InputData,
131
+ full_cov: bool = False,
132
+ full_output_cov: bool = False) -> MeanAndVariance:
136
133
 
137
134
  # could copy into posterior into a fused version
138
135
  """
@@ -141,7 +138,7 @@ class AugmentedSGPR(SGPR):
141
138
  notebook.
142
139
  """
143
140
  X_data, Y_data = self.data
144
-
141
+
145
142
  iv = self.inducing_variable.Z
146
143
  iv = self.transform.expand(iv)
147
144
 
@@ -155,8 +152,7 @@ class AugmentedSGPR(SGPR):
155
152
  L = tf.linalg.cholesky(kuu)
156
153
  A = tf.linalg.triangular_solve(L, kuf, lower=True) / sigma
157
154
  B = tf.linalg.matmul(A, A, transpose_b=True) + tf.eye(
158
- num_inducing, dtype=default_float()
159
- ) # cache qinv
155
+ num_inducing, dtype=default_float()) # cache qinv
160
156
  LB = tf.linalg.cholesky(B)
161
157
  Aerr = tf.linalg.matmul(A, err)
162
158
  c = tf.linalg.triangular_solve(LB, Aerr, lower=True) / sigma
@@ -164,18 +160,15 @@ class AugmentedSGPR(SGPR):
164
160
  tmp2 = tf.linalg.triangular_solve(LB, tmp1, lower=True)
165
161
  mean = tf.linalg.matmul(tmp2, c, transpose_a=True)
166
162
  if full_cov:
167
- var = (
168
- self.kernel(Xnew)
169
- + tf.linalg.matmul(tmp2, tmp2, transpose_a=True)
170
- - tf.linalg.matmul(tmp1, tmp1, transpose_a=True)
171
- )
172
- var = tf.tile(var[None, ...], [self.num_latent_gps, 1, 1]) # [P, N, N]
163
+ var = (self.kernel(Xnew) +
164
+ tf.linalg.matmul(tmp2, tmp2, transpose_a=True) -
165
+ tf.linalg.matmul(tmp1, tmp1, transpose_a=True))
166
+ var = tf.tile(var[None, ...],
167
+ [self.num_latent_gps, 1, 1]) # [P, N, N]
173
168
  else:
174
- var = (
175
- self.kernel(Xnew, full_cov=False)
176
- + tf.reduce_sum(tf.square(tmp2), 0)
177
- - tf.reduce_sum(tf.square(tmp1), 0)
178
- )
169
+ var = (self.kernel(Xnew, full_cov=False) +
170
+ tf.reduce_sum(tf.square(tmp2), 0) -
171
+ tf.reduce_sum(tf.square(tmp1), 0))
179
172
  var = tf.tile(var[:, None], [1, self.num_latent_gps])
180
173
 
181
174
  return mean + self.mean_function(Xnew), var
sgptools/core/osgpr.py ADDED
@@ -0,0 +1,417 @@
1
+ # Copyright 2024 The streaming_sparse_gp Contributors. All Rights Reserved.
2
+ # https://github.com/thangbui/streaming_sparse_gp/tree/master
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Provides a streaming sparse Gaussian process model along with initialization function
16
+ """
17
+
18
+ import tensorflow as tf
19
+ import numpy as np
20
+
21
+ import gpflow
22
+ from gpflow.inducing_variables import InducingPoints
23
+ from gpflow.models import GPModel, InternalDataTrainingLossMixin
24
+ from gpflow import covariances
25
+ from ..utils.data import get_inducing_pts
26
+ from typing import Union, Optional
27
+
28
+
29
+ class OSGPR_VFE(GPModel, InternalDataTrainingLossMixin):
30
+ """Online Sparse Variational GP regression model from [streaming_sparse_gp](https://github.com/thangbui/streaming_sparse_gp/tree/master)
31
+
32
+ Refer to the following paper for more details:
33
+ - Streaming Gaussian process approximations [Bui et al., 2017]
34
+
35
+ Args:
36
+ data (tuple): (X, y) ndarrays with inputs (n, d) and labels (n, 1)
37
+ kernel (gpflow.kernels.Kernel): gpflow kernel function
38
+ mu_old (ndarray): mean of old `q(u)`; here `u` are the latents corresponding to the inducing points `Z_old`
39
+ Su_old (ndarray): posterior covariance of old `q(u)`
40
+ Kaa_old (ndarray): prior covariance of old `q(u)`
41
+ Z_old (ndarray): (m_old, d): Old initial inducing points
42
+ Z (ndarray): (m_new, d): New initial inducing points
43
+ mean_function (function): GP mean function
44
+ """
45
+
46
+ def __init__(self,
47
+ data,
48
+ kernel,
49
+ mu_old,
50
+ Su_old,
51
+ Kaa_old,
52
+ Z_old,
53
+ Z,
54
+ mean_function=None):
55
+ self.X, self.Y = self.data = gpflow.models.util.data_input_to_tensor(
56
+ data)
57
+ likelihood = gpflow.likelihoods.Gaussian()
58
+ num_latent_gps = GPModel.calc_num_latent_gps_from_data(
59
+ data, kernel, likelihood)
60
+ super().__init__(kernel, likelihood, mean_function, num_latent_gps)
61
+
62
+ self.inducing_variable = InducingPoints(Z)
63
+ self.num_data = self.X.shape[0]
64
+
65
+ self.mu_old = tf.Variable(mu_old,
66
+ shape=tf.TensorShape(None),
67
+ trainable=False)
68
+ self.M_old = Z_old.shape[0]
69
+ self.Su_old = tf.Variable(Su_old,
70
+ shape=tf.TensorShape(None),
71
+ trainable=False)
72
+ self.Kaa_old = tf.Variable(Kaa_old,
73
+ shape=tf.TensorShape(None),
74
+ trainable=False)
75
+ self.Z_old = tf.Variable(Z_old,
76
+ shape=tf.TensorShape(None),
77
+ trainable=False)
78
+
79
+ def init_Z(self) -> np.ndarray:
80
+ """
81
+ Initializes the new set of inducing points (Z) for the OSGPR model.
82
+ It combines a subset of the old inducing points (Z_old) with a subset
83
+ of the current training data (X).
84
+
85
+ Returns:
86
+ np.ndarray: (M, d); A NumPy array of the newly initialized inducing points,
87
+ combining old and new data-based points.
88
+ """
89
+ M = self.inducing_variable.Z.shape[0]
90
+ M_old = int(0.7 * M) # Proportion of old inducing points to retain
91
+ M_new = M - M_old # Proportion of new data points to select
92
+
93
+ # Randomly select M_old points from the old inducing points
94
+ old_Z = self.Z_old.numpy()[np.random.permutation(M)[0:M_old], :]
95
+
96
+ # Randomly select M_new points from the current training data
97
+ new_Z = self.X.numpy()[
98
+ np.random.permutation(self.X.shape[0])[0:M_new], :]
99
+
100
+ # Vertically stack the selected old and new points to form the new Z
101
+ Z = np.vstack((old_Z, new_Z))
102
+ return Z
103
+
104
+ def update(self, data, inducing_variable=None, update_inducing=True):
105
+ """
106
+ Configures the OSGPR model to adapt to a new batch of data.
107
+ This method updates the model's data, its inducing points (optionally),
108
+ and caches the posterior mean and covariance of the *old* inducing points
109
+ to facilitate the streaming update equations.
110
+
111
+ Note: After calling this update, the OSGPR model typically needs to be
112
+ trained further using gradient-based optimization to fully incorporate
113
+ the new data and optimize its parameters.
114
+
115
+ Args:
116
+ data (Tuple[np.ndarray, np.ndarray]): A tuple (X, y) representing the new batch
117
+ of input data `X` (n, d) and corresponding labels `y` (n, 1).
118
+ inducing_variable (Optional[np.ndarray]): (m_new, d); Optional NumPy array for the new
119
+ set of inducing points. If None and `update_inducing`
120
+ is True, `init_Z` will be called to determine them.
121
+ Defaults to None.
122
+ update_inducing (bool): If True, the inducing points will be updated. If False,
123
+ they will remain as they were before the update call.
124
+ Defaults to True.
125
+ """
126
+ self.X, self.Y = self.data = gpflow.models.util.data_input_to_tensor(
127
+ data)
128
+ self.num_data = self.X.shape[0]
129
+
130
+ # Store the current inducing points as 'old' for the next update step
131
+ self.Z_old.assign(self.inducing_variable.Z)
132
+
133
+ # Update the inducing points based on `update_inducing` flag
134
+ if update_inducing:
135
+ if inducing_variable is None:
136
+ # If no explicit inducing_variable is provided, initialize new ones
137
+ new_Z_init = self.init_Z()
138
+ else:
139
+ # Use the explicitly provided inducing_variable
140
+ new_Z_init = inducing_variable
141
+ self.inducing_variable.Z.assign(
142
+ tf.constant(new_Z_init, dtype=self.inducing_variable.Z.dtype))
143
+ # If update_inducing is False, inducing_variable.Z retains its current value.
144
+
145
+ # Get posterior mean and covariance for the *old* inducing points using the current model state
146
+ mu_old, Su_old = self.predict_f(self.Z_old, full_cov=True)
147
+ self.mu_old.assign(mu_old)
148
+ self.Su_old.assign(Su_old)
149
+
150
+ # Get the prior covariance matrix for the *old* inducing points using the current kernel
151
+ Kaa_old = self.kernel(self.Z_old)
152
+ self.Kaa_old.assign(Kaa_old)
153
+
154
+ def _common_terms(self):
155
+ Mb = self.inducing_variable.num_inducing
156
+ Ma = self.M_old
157
+ # jitter = gpflow.default_jitter()
158
+ jitter = gpflow.utilities.to_default_float(1e-4)
159
+ sigma2 = self.likelihood.variance
160
+ sigma = tf.sqrt(sigma2)
161
+
162
+ Saa = self.Su_old
163
+ ma = self.mu_old
164
+
165
+ # a is old inducing points, b is new
166
+ # f is training points
167
+ # s is test points
168
+ Kbf = covariances.Kuf(self.inducing_variable, self.kernel, self.X)
169
+ Kbb = covariances.Kuu(self.inducing_variable,
170
+ self.kernel,
171
+ jitter=jitter)
172
+ Kba = covariances.Kuf(self.inducing_variable, self.kernel, self.Z_old)
173
+ Kaa_cur = gpflow.utilities.add_noise_cov(self.kernel(self.Z_old),
174
+ jitter)
175
+ Kaa = gpflow.utilities.add_noise_cov(self.Kaa_old, jitter)
176
+
177
+ err = self.Y - self.mean_function(self.X)
178
+
179
+ Sainv_ma = tf.linalg.solve(Saa, ma)
180
+ Sinv_y = self.Y / sigma2
181
+ c1 = tf.matmul(Kbf, Sinv_y)
182
+ c2 = tf.matmul(Kba, Sainv_ma)
183
+ c = c1 + c2
184
+
185
+ Lb = tf.linalg.cholesky(Kbb)
186
+ Lbinv_c = tf.linalg.triangular_solve(Lb, c, lower=True)
187
+ Lbinv_Kba = tf.linalg.triangular_solve(Lb, Kba, lower=True)
188
+ Lbinv_Kbf = tf.linalg.triangular_solve(Lb, Kbf, lower=True) / sigma
189
+ d1 = tf.matmul(Lbinv_Kbf, Lbinv_Kbf, transpose_b=True)
190
+
191
+ LSa = tf.linalg.cholesky(Saa)
192
+ Kab_Lbinv = tf.linalg.matrix_transpose(Lbinv_Kba)
193
+ LSainv_Kab_Lbinv = tf.linalg.triangular_solve(LSa,
194
+ Kab_Lbinv,
195
+ lower=True)
196
+ d2 = tf.matmul(LSainv_Kab_Lbinv, LSainv_Kab_Lbinv, transpose_a=True)
197
+
198
+ La = tf.linalg.cholesky(Kaa)
199
+ Lainv_Kab_Lbinv = tf.linalg.triangular_solve(La, Kab_Lbinv, lower=True)
200
+ d3 = tf.matmul(Lainv_Kab_Lbinv, Lainv_Kab_Lbinv, transpose_a=True)
201
+
202
+ D = tf.eye(Mb, dtype=gpflow.default_float()) + d1 + d2 - d3
203
+ D = gpflow.utilities.add_noise_cov(D, jitter)
204
+ LD = tf.linalg.cholesky(D)
205
+
206
+ LDinv_Lbinv_c = tf.linalg.triangular_solve(LD, Lbinv_c, lower=True)
207
+
208
+ return (Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD, Lbinv_Kba,
209
+ LDinv_Lbinv_c, err, d1)
210
+
211
+ def maximum_log_likelihood_objective(self):
212
+ """
213
+ Construct a tensorflow function to compute the bound on the marginal
214
+ likelihood.
215
+ """
216
+
217
+ Mb = self.inducing_variable.num_inducing
218
+ Ma = self.M_old
219
+ jitter = gpflow.default_jitter()
220
+ # jitter = gpflow.utilities.to_default_float(1e-4)
221
+ sigma2 = self.likelihood.variance
222
+ sigma = tf.sqrt(sigma2)
223
+ N = self.num_data
224
+
225
+ Saa = self.Su_old
226
+ ma = self.mu_old
227
+
228
+ # a is old inducing points, b is new
229
+ # f is training points
230
+ Kfdiag = self.kernel(self.X, full_cov=False)
231
+ (Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD, Lbinv_Kba, LDinv_Lbinv_c,
232
+ err, Qff) = self._common_terms()
233
+
234
+ LSa = tf.linalg.cholesky(Saa)
235
+ Lainv_ma = tf.linalg.triangular_solve(LSa, ma, lower=True)
236
+
237
+ # constant term
238
+ bound = -0.5 * N * np.log(2 * np.pi)
239
+ # quadratic term
240
+ bound += -0.5 * tf.reduce_sum(tf.square(err)) / sigma2
241
+ # bound += -0.5 * tf.reduce_sum(ma * Sainv_ma)
242
+ bound += -0.5 * tf.reduce_sum(tf.square(Lainv_ma))
243
+ bound += 0.5 * tf.reduce_sum(tf.square(LDinv_Lbinv_c))
244
+ # log det term
245
+ bound += -0.5 * N * tf.reduce_sum(tf.math.log(sigma2))
246
+ bound += -tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LD)))
247
+
248
+ # delta 1: trace term
249
+ bound += -0.5 * tf.reduce_sum(Kfdiag) / sigma2
250
+ bound += 0.5 * tf.reduce_sum(tf.linalg.diag_part(Qff))
251
+
252
+ # delta 2: a and b difference
253
+ bound += tf.reduce_sum(tf.math.log(tf.linalg.diag_part(La)))
254
+ bound += -tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LSa)))
255
+
256
+ Kaadiff = Kaa_cur - tf.matmul(Lbinv_Kba, Lbinv_Kba, transpose_a=True)
257
+ Sainv_Kaadiff = tf.linalg.solve(Saa, Kaadiff)
258
+ Kainv_Kaadiff = tf.linalg.solve(Kaa, Kaadiff)
259
+
260
+ bound += -0.5 * tf.reduce_sum(
261
+ tf.linalg.diag_part(Sainv_Kaadiff) -
262
+ tf.linalg.diag_part(Kainv_Kaadiff))
263
+
264
+ return bound
265
+
266
+ def predict_f(self, Xnew, full_cov=False):
267
+ """
268
+ Compute the mean and variance of the latent function at some new points
269
+ Xnew.
270
+ """
271
+
272
+ # jitter = gpflow.default_jitter()
273
+ jitter = gpflow.utilities.to_default_float(1e-4)
274
+
275
+ # a is old inducing points, b is new
276
+ # f is training points
277
+ # s is test points
278
+ Kbs = covariances.Kuf(self.inducing_variable, self.kernel, Xnew)
279
+ (Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD, Lbinv_Kba, LDinv_Lbinv_c,
280
+ err, Qff) = self._common_terms()
281
+
282
+ Lbinv_Kbs = tf.linalg.triangular_solve(Lb, Kbs, lower=True)
283
+ LDinv_Lbinv_Kbs = tf.linalg.triangular_solve(LD, Lbinv_Kbs, lower=True)
284
+ mean = tf.matmul(LDinv_Lbinv_Kbs, LDinv_Lbinv_c, transpose_a=True)
285
+
286
+ if full_cov:
287
+ Kss = self.kernel(Xnew) + jitter * tf.eye(
288
+ tf.shape(Xnew)[0], dtype=gpflow.default_float())
289
+ var1 = Kss
290
+ var2 = -tf.matmul(Lbinv_Kbs, Lbinv_Kbs, transpose_a=True)
291
+ var3 = tf.matmul(LDinv_Lbinv_Kbs,
292
+ LDinv_Lbinv_Kbs,
293
+ transpose_a=True)
294
+ var = var1 + var2 + var3
295
+ else:
296
+ var1 = self.kernel(Xnew, full_cov=False)
297
+ var2 = -tf.reduce_sum(tf.square(Lbinv_Kbs), axis=0)
298
+ var3 = tf.reduce_sum(tf.square(LDinv_Lbinv_Kbs), axis=0)
299
+ var = var1 + var2 + var3
300
+
301
+ return mean + self.mean_function(Xnew), var
302
+
303
+
304
+ def init_osgpr(X_train: np.ndarray,
305
+ num_inducing: int = 10,
306
+ lengthscales: Union[float, np.ndarray] = 1.0,
307
+ variance: float = 1.0,
308
+ noise_variance: float = 0.001,
309
+ kernel: Optional[gpflow.kernels.Kernel] = None,
310
+ ndim: int = 1) -> OSGPR_VFE:
311
+ """
312
+ Initializes an Online Sparse Variational Gaussian Process Regression (OSGPR_VFE) model.
313
+ This function first fits a standard Sparse Gaussian Process Regression (SGPR) model
314
+ to a dummy dataset (representing initial data/environment bounds) to obtain an
315
+ initial set of optimized inducing points and their corresponding posterior.
316
+ These are then used to set up the `OSGPR_VFE` model for streaming updates.
317
+
318
+ Args:
319
+ X_train (np.ndarray): (n, d); Unlabeled random sampled training points.
320
+ These points are primarily used to define the spatial bounds
321
+ and for initial selection of inducing points. Their labels are
322
+ set to zeros for the SGPR initialization.
323
+ num_inducing (int): The number of inducing points to use for the OSGPR model. Defaults to 10.
324
+ lengthscales (Union[float, np.ndarray]): Initial lengthscale(s) for the RBF kernel.
325
+ If a float, it's applied uniformly. If a NumPy array,
326
+ each element corresponds to a dimension. Defaults to 1.0.
327
+ variance (float): Initial variance (amplitude) for the RBF kernel. Defaults to 1.0.
328
+ noise_variance (float): Initial data noise variance for the Gaussian likelihood. Defaults to 0.001.
329
+ kernel (Optional[gpflow.kernels.Kernel]): A pre-defined GPflow kernel function. If None,
330
+ a `gpflow.kernels.SquaredExponential` (RBF) kernel is created
331
+ with the provided `lengthscales` and `variance`. Defaults to None.
332
+ ndim (int): Number of output dimensions for the dummy training labels `y_train`. Defaults to 1.
333
+
334
+ Returns:
335
+ OSGPR_VFE: An initialized `OSGPR_VFE` model instance, ready to accept
336
+ new data batches via its `update` method.
337
+
338
+ Usage:
339
+ ```python
340
+ import numpy as np
341
+ # from sgptools.core.osgpr import init_osgpr
342
+
343
+ # Define some dummy training data to establish initial bounds
344
+ X_initial_env = np.random.rand(100, 2) * 10
345
+
346
+ # Initialize the OSGPR model
347
+ online_gp_model = init_osgpr(
348
+ X_initial_env,
349
+ num_inducing=50,
350
+ lengthscales=2.0,
351
+ variance=1.5,
352
+ noise_variance=0.01
353
+ )
354
+
355
+ # Example of updating the model with new data (typically in a loop)
356
+ # new_X_batch = np.random.rand(10, 2) * 10
357
+ # new_y_batch = np.sin(new_X_batch[:, 0:1]) + np.random.randn(10, 1) * 0.1
358
+ # online_gp_model.update(data=(new_X_batch, new_y_batch))
359
+ ```
360
+ """
361
+ if kernel is None:
362
+ # If no kernel is provided, initialize a SquaredExponential (RBF) kernel.
363
+ kernel = gpflow.kernels.SquaredExponential(lengthscales=lengthscales,
364
+ variance=variance)
365
+
366
+ # Create a dummy y_train: SGPR needs labels, but for initialization purposes here,
367
+ # we use zeros as the actual labels will come in through online updates.
368
+ y_train_dummy = np.zeros((len(X_train), ndim), dtype=X_train.dtype)
369
+
370
+ # Select initial inducing points from X_train using get_inducing_pts utility
371
+ Z_init = get_inducing_pts(X_train, num_inducing)
372
+
373
+ # Initialize a standard SGPR model. This model helps in getting an initial
374
+ # posterior (mu, Su) for the inducing points (Z_init) under the given kernel
375
+ # and noise variance. This posterior then becomes the 'old' posterior for OSGPR_VFE.
376
+ init_sgpr_model = gpflow.models.SGPR(data=(X_train, y_train_dummy),
377
+ kernel=kernel,
378
+ inducing_variable=Z_init,
379
+ noise_variance=noise_variance)
380
+
381
+ # Extract optimized (or initial) inducing points from the SGPR model
382
+ Zopt_np = init_sgpr_model.inducing_variable.Z.numpy()
383
+
384
+ # Predict the mean (mu) and full covariance (Su) of the latent function
385
+ # at these initial inducing points (Zopt). This represents the 'old' posterior.
386
+ mu_old_tf, Su_old_tf_full_cov = init_sgpr_model.predict_f(tf.constant(
387
+ Zopt_np, dtype=X_train.dtype),
388
+ full_cov=True)
389
+
390
+ # Kaa_old: Prior covariance matrix of the old inducing points
391
+ Kaa_old_tf = init_sgpr_model.kernel(
392
+ tf.constant(Zopt_np, dtype=X_train.dtype))
393
+
394
+ # Prepare dummy initial data for OSGPR_VFE. This data will be overwritten
395
+ # by the first actual `update` call.
396
+ dummy_X_online = np.zeros([2, X_train.shape[-1]], dtype=X_train.dtype)
397
+ dummy_y_online = np.zeros([2, ndim], dtype=X_train.dtype)
398
+
399
+ # Initialize the OSGPR_VFE model with the extracted parameters.
400
+ # The `Su_old_tf_full_cov` is expected to be a (1, M, M) tensor for single latent GP,
401
+ # so we extract the (M, M) covariance matrix `Su_old_tf_full_cov[0]`.
402
+ online_osgpr_model = OSGPR_VFE(
403
+ data=(tf.constant(dummy_X_online), tf.constant(dummy_y_online)),
404
+ kernel=init_sgpr_model.
405
+ kernel, # Pass the kernel (potentially optimized by SGPR init)
406
+ mu_old=mu_old_tf,
407
+ Su_old=Su_old_tf_full_cov[0],
408
+ Kaa_old=Kaa_old_tf,
409
+ Z_old=tf.constant(Zopt_np, dtype=X_train.dtype),
410
+ Z=tf.constant(Zopt_np,
411
+ dtype=X_train.dtype)) # New Z is same as old Z initially
412
+
413
+ # Assign the noise variance from the initial SGPR model to the OSGPR model's likelihood
414
+ online_osgpr_model.likelihood.variance.assign(
415
+ init_sgpr_model.likelihood.variance)
416
+
417
+ return online_osgpr_model