sgptools 1.2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgptools/__init__.py +3 -4
- sgptools/core/__init__.py +1 -0
- sgptools/{models/core → core}/augmented_gpr.py +11 -17
- sgptools/{models/core → core}/augmented_sgpr.py +27 -34
- sgptools/core/osgpr.py +417 -0
- sgptools/core/transformations.py +699 -0
- sgptools/kernels/__init__.py +0 -8
- sgptools/kernels/attentive_kernel.py +214 -69
- sgptools/kernels/neural_kernel.py +268 -92
- sgptools/kernels/neural_network.py +127 -28
- sgptools/methods.py +1047 -0
- sgptools/objectives.py +275 -0
- sgptools/utils/__init__.py +0 -9
- sgptools/utils/data.py +452 -149
- sgptools/utils/gpflow.py +335 -174
- sgptools/utils/metrics.py +375 -102
- sgptools/utils/misc.py +145 -111
- sgptools/utils/tsp.py +224 -84
- sgptools-2.0.0.dist-info/METADATA +216 -0
- sgptools-2.0.0.dist-info/RECORD +23 -0
- {sgptools-1.2.0.dist-info → sgptools-2.0.0.dist-info}/WHEEL +1 -1
- sgptools/models/__init__.py +0 -10
- sgptools/models/bo.py +0 -118
- sgptools/models/cma_es.py +0 -121
- sgptools/models/continuous_sgp.py +0 -68
- sgptools/models/core/__init__.py +0 -9
- sgptools/models/core/osgpr.py +0 -291
- sgptools/models/core/transformations.py +0 -434
- sgptools/models/greedy_mi.py +0 -115
- sgptools/models/greedy_sgp.py +0 -97
- sgptools-1.2.0.dist-info/METADATA +0 -39
- sgptools-1.2.0.dist-info/RECORD +0 -27
- {sgptools-1.2.0.dist-info → sgptools-2.0.0.dist-info/licenses}/LICENSE.txt +0 -0
- {sgptools-1.2.0.dist-info → sgptools-2.0.0.dist-info}/top_level.txt +0 -0
sgptools/kernels/__init__.py
CHANGED
@@ -1,9 +1 @@
|
|
1
1
|
# sgptools/kernels/__init__.py
|
2
|
-
|
3
|
-
"""Special non-stationary kernel functions in this package:
|
4
|
-
|
5
|
-
- `neural_kernel`: Provides the neural spectral kernel that uses a mixture of multilayer perceptrons
|
6
|
-
- `attentive_kernel`: Provides the attentive kernel that uses a multilayer perceptron to get a mixture of RBF kernels
|
7
|
-
- `neural_network`: Helper class that provides a multilayer perceptron compatible with GPFlow
|
8
|
-
|
9
|
-
"""
|
@@ -11,7 +11,6 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
-
|
15
14
|
"""Attentive Kernel function
|
16
15
|
"""
|
17
16
|
|
@@ -20,100 +19,246 @@ import tensorflow as tf
|
|
20
19
|
|
21
20
|
import gpflow
|
22
21
|
from gpflow.config import default_float
|
22
|
+
|
23
23
|
float_type = default_float()
|
24
24
|
|
25
25
|
from .neural_network import NN
|
26
|
+
from typing import List, Union, Optional
|
26
27
|
|
27
28
|
|
28
29
|
class AttentiveKernel(gpflow.kernels.Kernel):
|
29
|
-
"""
|
30
|
-
|
30
|
+
"""
|
31
|
+
Attentive Kernel function (non-stationary kernel function).
|
32
|
+
|
33
|
+
This kernel uses a Multi-Layer Perceptron (MLP) to learn attention weights
|
34
|
+
for a mixture of RBF kernel components, making it adapt to local data
|
35
|
+
characteristics. It is based on the implementation from
|
36
|
+
[Weizhe-Chen/attentive_kernels](https://github.com/Weizhe-Chen/attentive_kernels).
|
31
37
|
|
32
|
-
Refer to the following
|
38
|
+
Refer to the following paper for more details:
|
33
39
|
- AK: Attentive Kernel for Information Gathering [Chen et al., 2022]
|
34
40
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
41
|
+
Attributes:
|
42
|
+
_free_amplitude (tf.Variable): The amplitude (variance) parameter of the kernel.
|
43
|
+
lengthscales (tf.Variable): Fixed lengthscales for each RBF mixture component.
|
44
|
+
num_lengthscales (int): Number of RBF mixture components.
|
45
|
+
nn (NN): The Neural Network (MLP) used to generate attention representations.
|
40
46
|
"""
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
47
|
+
|
48
|
+
def __init__(self,
|
49
|
+
lengthscales: Union[List[float], np.ndarray],
|
50
|
+
hidden_sizes: List[int] = None,
|
51
|
+
amplitude: float = 1.0,
|
52
|
+
num_dim: int = 2):
|
53
|
+
"""
|
54
|
+
Initializes the Attentive Kernel.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
lengthscales (Union[List[float], np.ndarray]): A list or NumPy array of
|
58
|
+
lengthscale values to be used in the
|
59
|
+
RBF mixture components. These lengthscales
|
60
|
+
are not trained by the optimizer.
|
61
|
+
hidden_sizes (List[int]): A list where each element specifies the number of hidden units
|
62
|
+
in a layer of the MLPs. The length of this list determines
|
63
|
+
the number of hidden layers. Defaults to [10, 10].
|
64
|
+
amplitude (float): Initial amplitude (variance) of the kernel function.
|
65
|
+
This parameter is trainable. Defaults to 1.0.
|
66
|
+
num_dim (int): The dimensionality of the input data points (e.g., 2 for 2D data).
|
67
|
+
Defaults to 2.
|
68
|
+
|
69
|
+
Usage:
|
70
|
+
```python
|
71
|
+
import gpflow
|
72
|
+
import numpy as np
|
73
|
+
from sgptools.kernels.attentive_kernel import AttentiveKernel
|
74
|
+
|
75
|
+
# Example: 10 fixed lengthscales ranging from 0.01 to 2.0
|
76
|
+
l_scales = np.linspace(0.01, 2.0, 10).astype(np.float32)
|
77
|
+
|
78
|
+
# Initialize Attentive Kernel for 2D data
|
79
|
+
kernel = AttentiveKernel(lengthscales=l_scales, hidden_sizes=[10, 10], num_dim=2)
|
80
|
+
|
81
|
+
# You can then use this kernel in a GPflow model:
|
82
|
+
# model = gpflow.models.GPR(data=(X_train, Y_train), kernel=kernel, noise_variance=0.1)
|
83
|
+
# optimize_model(model)
|
84
|
+
```
|
85
|
+
"""
|
46
86
|
super().__init__()
|
87
|
+
|
88
|
+
if hidden_sizes is None:
|
89
|
+
hidden_sizes = [10, 10] # Default if not provided
|
90
|
+
else:
|
91
|
+
hidden_sizes = list(hidden_sizes)
|
92
|
+
|
47
93
|
with self.name_scope:
|
48
94
|
self.num_lengthscales = len(lengthscales)
|
49
|
-
self._free_amplitude = tf.Variable(amplitude,
|
95
|
+
self._free_amplitude = tf.Variable(amplitude,
|
50
96
|
shape=[],
|
51
97
|
trainable=True,
|
52
98
|
dtype=float_type)
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
99
|
+
# Lengthscales are treated as fixed parameters in this implementation
|
100
|
+
self.lengthscales = tf.Variable(
|
101
|
+
tf.cast(lengthscales, float_type),
|
102
|
+
shape=[self.num_lengthscales],
|
103
|
+
trainable=False, # Not trainable
|
104
|
+
dtype=float_type)
|
105
|
+
|
106
|
+
# The neural network maps input dimensions to the number of lengthscales
|
107
|
+
# to produce attention weights for each RBF component.
|
108
|
+
# Structure: input_dim -> dim_hidden -> dim_hidden -> num_lengthscales
|
109
|
+
self.nn = NN([num_dim] + hidden_sizes + [self.num_lengthscales],
|
110
|
+
output_activation_fn='softplus')
|
111
|
+
|
112
|
+
@tf.autograph.experimental.do_not_convert
|
113
|
+
def get_representations(self, X: tf.Tensor) -> tf.Tensor:
|
114
|
+
"""
|
115
|
+
Computes normalized latent representations for input data points `X` using the MLP.
|
116
|
+
These representations are used to calculate attention weights for the kernel mixture.
|
117
|
+
|
118
|
+
Args:
|
119
|
+
X (tf.Tensor): (N, D); Input data points. `N` is the number of points,
|
120
|
+
`D` is the input dimensionality (`num_dim`).
|
59
121
|
|
60
|
-
|
122
|
+
Returns:
|
123
|
+
tf.Tensor: (N, num_lengthscales); Normalized latent representations for each input point.
|
124
|
+
"""
|
61
125
|
Z = self.nn(X)
|
126
|
+
# Normalize the representations to have unit L2-norm along the last axis.
|
127
|
+
# This is common in attention mechanisms.
|
62
128
|
representations = Z / tf.norm(Z, axis=1, keepdims=True)
|
63
129
|
return representations
|
64
130
|
|
65
|
-
|
66
|
-
|
131
|
+
@tf.autograph.experimental.do_not_convert
|
132
|
+
def K(self, X: tf.Tensor, X2: Optional[tf.Tensor] = None) -> tf.Tensor:
|
133
|
+
"""
|
134
|
+
Computes the covariance matrix between input data points `X` and `X2`.
|
135
|
+
If `X2` is None, it computes the covariance matrix `K(X, X)`.
|
136
|
+
|
137
|
+
The covariance is calculated as a weighted sum of RBF kernels, where
|
138
|
+
the weights are derived from the attention representations generated by the MLP.
|
139
|
+
|
140
|
+
Formula (simplified):
|
141
|
+
$K(X, X') = \text{amplitude} \times \text{attention}(X, X') \times \sum_{i=1}^{Q} \text{RBF}(||X-X'||, \text{lengthscale}_i) \times \text{attention_lengthscale}_i(X,X')$
|
142
|
+
where $\text{attention}(X, X') = \text{representation}(X) \cdot \text{representation}(X')^T$.
|
67
143
|
|
68
144
|
Args:
|
69
|
-
X (
|
70
|
-
|
71
|
-
|
145
|
+
X (tf.Tensor): (N1, D); Input data points. `N1` is the number of points,
|
146
|
+
`D` is the input dimensionality.
|
147
|
+
X2 (Optional[tf.Tensor]): (N2, D); Optional second set of input data points.
|
148
|
+
If None, `X` is used as `X2`. `N2` is the number of points.
|
72
149
|
|
73
150
|
Returns:
|
74
|
-
|
151
|
+
tf.Tensor: (N1, N2); The computed covariance matrix.
|
75
152
|
"""
|
76
153
|
if X2 is None:
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
154
|
+
X2_internal = X
|
155
|
+
else:
|
156
|
+
X2_internal = X2
|
157
|
+
|
158
|
+
# Compute pairwise Euclidean distances between X and X2
|
159
|
+
dist = cdist(X,
|
160
|
+
X2_internal) # This returns (N1, N2) Euclidean distances
|
161
|
+
|
162
|
+
# Get normalized latent representations for X and X2
|
163
|
+
repre1 = self.get_representations(X) # (N1, num_lengthscales)
|
164
|
+
repre2 = self.get_representations(
|
165
|
+
X2_internal) # (N2, num_lengthscales)
|
166
|
+
|
167
|
+
# Function to compute a single mixture component for the kernel
|
168
|
+
# This function is mapped over each lengthscale index 'i'
|
169
|
+
def get_mixture_component(i: tf.Tensor) -> tf.Tensor:
|
170
|
+
"""
|
171
|
+
Computes a single RBF mixture component, incorporating attention
|
172
|
+
based on the i-th dimension of the representations.
|
173
|
+
"""
|
174
|
+
# attention_lengthscales: (N1, N2) matrix
|
175
|
+
# This term scales the RBF based on similarity in the i-th latent dimension.
|
176
|
+
attention_lengthscales = tf.tensordot(repre1[:, i],
|
177
|
+
repre2[:, i],
|
178
|
+
axes=0)
|
179
|
+
|
180
|
+
# rbf(dist, self.lengthscales[i]) computes the RBF kernel for the current lengthscale
|
181
|
+
# Element-wise multiplication with attention_lengthscales applies the attention.
|
182
|
+
cov_mat_component = rbf(
|
183
|
+
dist, self.lengthscales[i]) * attention_lengthscales
|
184
|
+
return cov_mat_component
|
185
|
+
|
186
|
+
# tf.map_fn applies `get_mixture_component` to each lengthscale index.
|
187
|
+
# The result `cov_mat_per_ls` will be (num_lengthscales, N1, N2).
|
188
|
+
cov_mat_per_ls = tf.map_fn(fn=get_mixture_component,
|
189
|
+
elems=tf.range(self.num_lengthscales,
|
190
|
+
dtype=tf.int64),
|
191
|
+
fn_output_signature=dist.dtype)
|
192
|
+
|
193
|
+
# Sum all mixture components along the first axis to get (N1, N2)
|
194
|
+
cov_mat_summed_components = tf.math.reduce_sum(cov_mat_per_ls, axis=0)
|
195
|
+
|
196
|
+
# Overall attention term based on the dot product of representations
|
197
|
+
# (N1, num_lengthscales) @ (num_lengthscales, N2) -> (N1, N2)
|
198
|
+
attention_inputs = tf.matmul(repre1, repre2, transpose_b=True)
|
199
|
+
|
200
|
+
# Final covariance: Apply the learned amplitude and the overall attention
|
201
|
+
# Element-wise multiplication to scale the summed RBF components
|
202
|
+
final_cov_mat = self._free_amplitude * attention_inputs * cov_mat_summed_components
|
203
|
+
|
204
|
+
return final_cov_mat
|
205
|
+
|
206
|
+
@tf.autograph.experimental.do_not_convert
|
207
|
+
def K_diag(self, X: tf.Tensor) -> tf.Tensor:
|
208
|
+
"""
|
209
|
+
Computes the diagonal of the covariance matrix `K(X, X)`.
|
210
|
+
|
211
|
+
Args:
|
212
|
+
X (tf.Tensor): (N, D); Input data points. `N` is the number of points.
|
213
|
+
|
214
|
+
Returns:
|
215
|
+
tf.Tensor: (N,); A 1D tensor representing the diagonal elements of the
|
216
|
+
covariance matrix.
|
217
|
+
"""
|
218
|
+
return self._free_amplitude * tf.ones((X.shape[0], ), dtype=X.dtype)
|
219
|
+
|
220
|
+
|
221
|
+
# --- Helper functions for kernel computations ---
|
222
|
+
@tf.autograph.experimental.do_not_convert
|
223
|
+
def rbf(dist: tf.Tensor, lengthscale: tf.Tensor) -> tf.Tensor:
|
224
|
+
"""
|
225
|
+
Computes the Radial Basis Function (RBF) kernel component.
|
226
|
+
|
227
|
+
Formula: $k(d, l) = \exp(-0.5 \times (d/l)^2)$
|
228
|
+
|
229
|
+
Args:
|
230
|
+
dist (tf.Tensor): Pairwise Euclidean distances (or other relevant distances).
|
231
|
+
Can be (N1, N2) or (N,).
|
232
|
+
lengthscale (tf.Tensor): The lengthscale parameter, typically a scalar tensor.
|
233
|
+
|
234
|
+
Returns:
|
235
|
+
tf.Tensor: The RBF kernel values.
|
236
|
+
"""
|
107
237
|
return tf.math.exp(-0.5 * tf.math.square(dist / lengthscale))
|
108
238
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
239
|
+
|
240
|
+
@tf.autograph.experimental.do_not_convert
|
241
|
+
def cdist(x: tf.Tensor, y: tf.Tensor) -> tf.Tensor:
|
242
|
+
"""
|
243
|
+
Calculates the pairwise Euclidean distances between two sets of points.
|
244
|
+
|
245
|
+
Args:
|
246
|
+
x (tf.Tensor): (N1, D); First set of points.
|
247
|
+
y (tf.Tensor): (N2, D); Second set of points.
|
248
|
+
|
249
|
+
Returns:
|
250
|
+
tf.Tensor: (N1, N2); A tensor where element (i, j) is the Euclidean distance
|
251
|
+
between `x[i, :]` and `y[j, :]`.
|
252
|
+
"""
|
253
|
+
# Define a function to compute distances for a single row of `x` against all rows of `y`.
|
254
|
+
# The `axis=1` ensures the norm is taken over the last dimension (the coordinates),
|
255
|
+
# resulting in a scalar distance for each pair.
|
256
|
+
per_x_dist = lambda i: tf.norm(x[i:(i + 1), :] - y, axis=1)
|
257
|
+
|
258
|
+
# Use `tf.map_fn` to apply `per_x_dist` to each row of `x`.
|
259
|
+
# `elems=tf.range(tf.shape(x)[0], dtype=tf.int64)` creates a sequence of indices (0, 1, ..., N1-1).
|
260
|
+
distances = tf.map_fn(fn=per_x_dist,
|
261
|
+
elems=tf.range(tf.shape(x)[0], dtype=tf.int64),
|
262
|
+
fn_output_signature=x.dtype)
|
263
|
+
|
264
|
+
return distances
|