sgptools 1.2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1 @@
1
1
  # sgptools/kernels/__init__.py
2
-
3
- """Special non-stationary kernel functions in this package:
4
-
5
- - `neural_kernel`: Provides the neural spectral kernel that uses a mixture of multilayer perceptrons
6
- - `attentive_kernel`: Provides the attentive kernel that uses a multilayer perceptron to get a mixture of RBF kernels
7
- - `neural_network`: Helper class that provides a multilayer perceptron compatible with GPFlow
8
-
9
- """
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
15
14
  """Attentive Kernel function
16
15
  """
17
16
 
@@ -20,100 +19,246 @@ import tensorflow as tf
20
19
 
21
20
  import gpflow
22
21
  from gpflow.config import default_float
22
+
23
23
  float_type = default_float()
24
24
 
25
25
  from .neural_network import NN
26
+ from typing import List, Union, Optional
26
27
 
27
28
 
28
29
  class AttentiveKernel(gpflow.kernels.Kernel):
29
- """Attentive Kernel function (non-stationary kernel function).
30
- Based on the implementation from this [repo](https://github.com/Weizhe-Chen/attentive_kernels)
30
+ """
31
+ Attentive Kernel function (non-stationary kernel function).
32
+
33
+ This kernel uses a Multi-Layer Perceptron (MLP) to learn attention weights
34
+ for a mixture of RBF kernel components, making it adapt to local data
35
+ characteristics. It is based on the implementation from
36
+ [Weizhe-Chen/attentive_kernels](https://github.com/Weizhe-Chen/attentive_kernels).
31
37
 
32
- Refer to the following papers for more details:
38
+ Refer to the following paper for more details:
33
39
  - AK: Attentive Kernel for Information Gathering [Chen et al., 2022]
34
40
 
35
- Args:
36
- lengthscales (List): List of lengthscales to use in the mixture components. The lengthscales are not trained.
37
- amplitude (int): Initial amplitude of the kernel function
38
- dim_hidden (int): Number of MLP hidden layer nodes (The NN will have two of these layers)
39
- num_dim (int): Number of dimensions of the data points
41
+ Attributes:
42
+ _free_amplitude (tf.Variable): The amplitude (variance) parameter of the kernel.
43
+ lengthscales (tf.Variable): Fixed lengthscales for each RBF mixture component.
44
+ num_lengthscales (int): Number of RBF mixture components.
45
+ nn (NN): The Neural Network (MLP) used to generate attention representations.
40
46
  """
41
- def __init__(self,
42
- lengthscales,
43
- dim_hidden=10,
44
- amplitude=1.0,
45
- num_dim=2):
47
+
48
+ def __init__(self,
49
+ lengthscales: Union[List[float], np.ndarray],
50
+ hidden_sizes: List[int] = None,
51
+ amplitude: float = 1.0,
52
+ num_dim: int = 2):
53
+ """
54
+ Initializes the Attentive Kernel.
55
+
56
+ Args:
57
+ lengthscales (Union[List[float], np.ndarray]): A list or NumPy array of
58
+ lengthscale values to be used in the
59
+ RBF mixture components. These lengthscales
60
+ are not trained by the optimizer.
61
+ hidden_sizes (List[int]): A list where each element specifies the number of hidden units
62
+ in a layer of the MLPs. The length of this list determines
63
+ the number of hidden layers. Defaults to [10, 10].
64
+ amplitude (float): Initial amplitude (variance) of the kernel function.
65
+ This parameter is trainable. Defaults to 1.0.
66
+ num_dim (int): The dimensionality of the input data points (e.g., 2 for 2D data).
67
+ Defaults to 2.
68
+
69
+ Usage:
70
+ ```python
71
+ import gpflow
72
+ import numpy as np
73
+ from sgptools.kernels.attentive_kernel import AttentiveKernel
74
+
75
+ # Example: 10 fixed lengthscales ranging from 0.01 to 2.0
76
+ l_scales = np.linspace(0.01, 2.0, 10).astype(np.float32)
77
+
78
+ # Initialize Attentive Kernel for 2D data
79
+ kernel = AttentiveKernel(lengthscales=l_scales, hidden_sizes=[10, 10], num_dim=2)
80
+
81
+ # You can then use this kernel in a GPflow model:
82
+ # model = gpflow.models.GPR(data=(X_train, Y_train), kernel=kernel, noise_variance=0.1)
83
+ # optimize_model(model)
84
+ ```
85
+ """
46
86
  super().__init__()
87
+
88
+ if hidden_sizes is None:
89
+ hidden_sizes = [10, 10] # Default if not provided
90
+ else:
91
+ hidden_sizes = list(hidden_sizes)
92
+
47
93
  with self.name_scope:
48
94
  self.num_lengthscales = len(lengthscales)
49
- self._free_amplitude = tf.Variable(amplitude,
95
+ self._free_amplitude = tf.Variable(amplitude,
50
96
  shape=[],
51
97
  trainable=True,
52
98
  dtype=float_type)
53
- self.lengthscales = tf.Variable(lengthscales,
54
- shape=[self.num_lengthscales],
55
- trainable=False,
56
- dtype=float_type)
57
-
58
- self.nn = NN([num_dim, dim_hidden, dim_hidden, self.num_lengthscales])
99
+ # Lengthscales are treated as fixed parameters in this implementation
100
+ self.lengthscales = tf.Variable(
101
+ tf.cast(lengthscales, float_type),
102
+ shape=[self.num_lengthscales],
103
+ trainable=False, # Not trainable
104
+ dtype=float_type)
105
+
106
+ # The neural network maps input dimensions to the number of lengthscales
107
+ # to produce attention weights for each RBF component.
108
+ # Structure: input_dim -> dim_hidden -> dim_hidden -> num_lengthscales
109
+ self.nn = NN([num_dim] + hidden_sizes + [self.num_lengthscales],
110
+ output_activation_fn='softplus')
111
+
112
+ @tf.autograph.experimental.do_not_convert
113
+ def get_representations(self, X: tf.Tensor) -> tf.Tensor:
114
+ """
115
+ Computes normalized latent representations for input data points `X` using the MLP.
116
+ These representations are used to calculate attention weights for the kernel mixture.
117
+
118
+ Args:
119
+ X (tf.Tensor): (N, D); Input data points. `N` is the number of points,
120
+ `D` is the input dimensionality (`num_dim`).
59
121
 
60
- def get_representations(self, X):
122
+ Returns:
123
+ tf.Tensor: (N, num_lengthscales); Normalized latent representations for each input point.
124
+ """
61
125
  Z = self.nn(X)
126
+ # Normalize the representations to have unit L2-norm along the last axis.
127
+ # This is common in attention mechanisms.
62
128
  representations = Z / tf.norm(Z, axis=1, keepdims=True)
63
129
  return representations
64
130
 
65
- def K(self, X, X2=None):
66
- """Computes the covariances between/amongst the input variables
131
+ @tf.autograph.experimental.do_not_convert
132
+ def K(self, X: tf.Tensor, X2: Optional[tf.Tensor] = None) -> tf.Tensor:
133
+ """
134
+ Computes the covariance matrix between input data points `X` and `X2`.
135
+ If `X2` is None, it computes the covariance matrix `K(X, X)`.
136
+
137
+ The covariance is calculated as a weighted sum of RBF kernels, where
138
+ the weights are derived from the attention representations generated by the MLP.
139
+
140
+ Formula (simplified):
141
+ $K(X, X') = \text{amplitude} \times \text{attention}(X, X') \times \sum_{i=1}^{Q} \text{RBF}(||X-X'||, \text{lengthscale}_i) \times \text{attention_lengthscale}_i(X,X')$
142
+ where $\text{attention}(X, X') = \text{representation}(X) \cdot \text{representation}(X')^T$.
67
143
 
68
144
  Args:
69
- X (ndarray): Variables to compute the covariance matrix
70
- X2 (ndarray): If passed, the covariance between X and X2 is computed. Otherwise,
71
- the covariance between X and X is computed.
145
+ X (tf.Tensor): (N1, D); Input data points. `N1` is the number of points,
146
+ `D` is the input dimensionality.
147
+ X2 (Optional[tf.Tensor]): (N2, D); Optional second set of input data points.
148
+ If None, `X` is used as `X2`. `N2` is the number of points.
72
149
 
73
150
  Returns:
74
- cov (ndarray): covariance matrix
151
+ tf.Tensor: (N1, N2); The computed covariance matrix.
75
152
  """
76
153
  if X2 is None:
77
- X2 = X
78
-
79
- dist = cdist(X, X2)
80
- repre1 = self.get_representations(X)
81
- repre2 = self.get_representations(X2)
82
-
83
- def get_mixture_component(i):
84
- attention_lengthscales = tf.tensordot(repre1[:, i], repre2[:, i], axes=0)
85
- cov_mat = rbf(dist, self.lengthscales[i]) * attention_lengthscales
86
- return cov_mat
87
-
88
- cov_mat = tf.map_fn(fn=get_mixture_component,
89
- elems=tf.range(self.num_lengthscales, dtype=tf.int64),
90
- fn_output_signature=dist.dtype)
91
- cov_mat = tf.math.reduce_sum(cov_mat, axis=0)
92
- attention_inputs = repre1 @ tf.transpose(repre2)
93
- cov_mat *= self._free_amplitude * attention_inputs
94
-
95
- return cov_mat
96
-
97
- def K_diag(self, X):
98
- return self._free_amplitude * tf.ones((X.shape[0]), dtype=X.dtype)
99
-
100
- '''
101
- Helper functions
102
- '''
103
- def rbf(dist, lengthscale):
104
- '''
105
- RBF kernel function
106
- '''
154
+ X2_internal = X
155
+ else:
156
+ X2_internal = X2
157
+
158
+ # Compute pairwise Euclidean distances between X and X2
159
+ dist = cdist(X,
160
+ X2_internal) # This returns (N1, N2) Euclidean distances
161
+
162
+ # Get normalized latent representations for X and X2
163
+ repre1 = self.get_representations(X) # (N1, num_lengthscales)
164
+ repre2 = self.get_representations(
165
+ X2_internal) # (N2, num_lengthscales)
166
+
167
+ # Function to compute a single mixture component for the kernel
168
+ # This function is mapped over each lengthscale index 'i'
169
+ def get_mixture_component(i: tf.Tensor) -> tf.Tensor:
170
+ """
171
+ Computes a single RBF mixture component, incorporating attention
172
+ based on the i-th dimension of the representations.
173
+ """
174
+ # attention_lengthscales: (N1, N2) matrix
175
+ # This term scales the RBF based on similarity in the i-th latent dimension.
176
+ attention_lengthscales = tf.tensordot(repre1[:, i],
177
+ repre2[:, i],
178
+ axes=0)
179
+
180
+ # rbf(dist, self.lengthscales[i]) computes the RBF kernel for the current lengthscale
181
+ # Element-wise multiplication with attention_lengthscales applies the attention.
182
+ cov_mat_component = rbf(
183
+ dist, self.lengthscales[i]) * attention_lengthscales
184
+ return cov_mat_component
185
+
186
+ # tf.map_fn applies `get_mixture_component` to each lengthscale index.
187
+ # The result `cov_mat_per_ls` will be (num_lengthscales, N1, N2).
188
+ cov_mat_per_ls = tf.map_fn(fn=get_mixture_component,
189
+ elems=tf.range(self.num_lengthscales,
190
+ dtype=tf.int64),
191
+ fn_output_signature=dist.dtype)
192
+
193
+ # Sum all mixture components along the first axis to get (N1, N2)
194
+ cov_mat_summed_components = tf.math.reduce_sum(cov_mat_per_ls, axis=0)
195
+
196
+ # Overall attention term based on the dot product of representations
197
+ # (N1, num_lengthscales) @ (num_lengthscales, N2) -> (N1, N2)
198
+ attention_inputs = tf.matmul(repre1, repre2, transpose_b=True)
199
+
200
+ # Final covariance: Apply the learned amplitude and the overall attention
201
+ # Element-wise multiplication to scale the summed RBF components
202
+ final_cov_mat = self._free_amplitude * attention_inputs * cov_mat_summed_components
203
+
204
+ return final_cov_mat
205
+
206
+ @tf.autograph.experimental.do_not_convert
207
+ def K_diag(self, X: tf.Tensor) -> tf.Tensor:
208
+ """
209
+ Computes the diagonal of the covariance matrix `K(X, X)`.
210
+
211
+ Args:
212
+ X (tf.Tensor): (N, D); Input data points. `N` is the number of points.
213
+
214
+ Returns:
215
+ tf.Tensor: (N,); A 1D tensor representing the diagonal elements of the
216
+ covariance matrix.
217
+ """
218
+ return self._free_amplitude * tf.ones((X.shape[0], ), dtype=X.dtype)
219
+
220
+
221
+ # --- Helper functions for kernel computations ---
222
+ @tf.autograph.experimental.do_not_convert
223
+ def rbf(dist: tf.Tensor, lengthscale: tf.Tensor) -> tf.Tensor:
224
+ """
225
+ Computes the Radial Basis Function (RBF) kernel component.
226
+
227
+ Formula: $k(d, l) = \exp(-0.5 \times (d/l)^2)$
228
+
229
+ Args:
230
+ dist (tf.Tensor): Pairwise Euclidean distances (or other relevant distances).
231
+ Can be (N1, N2) or (N,).
232
+ lengthscale (tf.Tensor): The lengthscale parameter, typically a scalar tensor.
233
+
234
+ Returns:
235
+ tf.Tensor: The RBF kernel values.
236
+ """
107
237
  return tf.math.exp(-0.5 * tf.math.square(dist / lengthscale))
108
238
 
109
- def cdist(x, y):
110
- '''
111
- Calculate the pairwise euclidean distances
112
- '''
113
- # Calculate distance for a single row of x.
114
- per_x_dist = lambda i : tf.norm(x[i:(i+1),:] - y, axis=1)
115
- # Compute and stack distances for all rows of x.
116
- dist = tf.map_fn(fn=per_x_dist,
117
- elems=tf.range(tf.shape(x)[0], dtype=tf.int64),
118
- fn_output_signature=x.dtype)
119
- return dist
239
+
240
+ @tf.autograph.experimental.do_not_convert
241
+ def cdist(x: tf.Tensor, y: tf.Tensor) -> tf.Tensor:
242
+ """
243
+ Calculates the pairwise Euclidean distances between two sets of points.
244
+
245
+ Args:
246
+ x (tf.Tensor): (N1, D); First set of points.
247
+ y (tf.Tensor): (N2, D); Second set of points.
248
+
249
+ Returns:
250
+ tf.Tensor: (N1, N2); A tensor where element (i, j) is the Euclidean distance
251
+ between `x[i, :]` and `y[j, :]`.
252
+ """
253
+ # Define a function to compute distances for a single row of `x` against all rows of `y`.
254
+ # The `axis=1` ensures the norm is taken over the last dimension (the coordinates),
255
+ # resulting in a scalar distance for each pair.
256
+ per_x_dist = lambda i: tf.norm(x[i:(i + 1), :] - y, axis=1)
257
+
258
+ # Use `tf.map_fn` to apply `per_x_dist` to each row of `x`.
259
+ # `elems=tf.range(tf.shape(x)[0], dtype=tf.int64)` creates a sequence of indices (0, 1, ..., N1-1).
260
+ distances = tf.map_fn(fn=per_x_dist,
261
+ elems=tf.range(tf.shape(x)[0], dtype=tf.int64),
262
+ fn_output_signature=x.dtype)
263
+
264
+ return distances