fvgp 4.2.0__tar.gz → 4.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fvgp-4.2/fvgp.egg-info → fvgp-4.2.2}/PKG-INFO +1 -1
- {fvgp-4.2 → fvgp-4.2.2}/fvgp/_version.py +3 -3
- {fvgp-4.2 → fvgp-4.2.2}/fvgp/deep_kernel_network.py +8 -3
- {fvgp-4.2 → fvgp-4.2.2}/fvgp/fvgp.py +12 -75
- {fvgp-4.2 → fvgp-4.2.2}/fvgp/gp.py +88 -97
- {fvgp-4.2 → fvgp-4.2.2}/fvgp/gpMCMC.py +20 -22
- {fvgp-4.2 → fvgp-4.2.2}/fvgp/gp_data.py +0 -1
- {fvgp-4.2 → fvgp-4.2.2}/fvgp/gp_kernels.py +6 -55
- fvgp-4.2.2/fvgp/gp_lin_alg.py +250 -0
- fvgp-4.2.2/fvgp/gp_marginal_density.py +363 -0
- {fvgp-4.2 → fvgp-4.2.2}/fvgp/gp_posterior.py +28 -46
- {fvgp-4.2 → fvgp-4.2.2}/fvgp/gp_prior.py +1 -1
- {fvgp-4.2 → fvgp-4.2.2}/fvgp/gp_training.py +19 -27
- {fvgp-4.2 → fvgp-4.2.2}/fvgp/mcmc.py +4 -3
- {fvgp-4.2 → fvgp-4.2.2/fvgp.egg-info}/PKG-INFO +1 -1
- {fvgp-4.2 → fvgp-4.2.2}/fvgp.egg-info/SOURCES.txt +1 -1
- {fvgp-4.2 → fvgp-4.2.2}/tests/test_fvgp.py +35 -1
- fvgp-4.2/fvgp/gp_marginal_density.py +0 -348
- fvgp-4.2/fvgp/misc.py +0 -100
- {fvgp-4.2 → fvgp-4.2.2}/AUTHORS.rst +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/CONTRIBUTING.rst +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/COPYING +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/HISTORY.rst +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/LICENSE +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/MANIFEST.in +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/README.md +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/docs/Makefile +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/docs/make.bat +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/docs/source/_static/landing.png +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/docs/source/conf.py +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/fvgp/__init__.py +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/fvgp/gp_likelihood.py +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/fvgp.egg-info/dependency_links.txt +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/fvgp.egg-info/entry_points.txt +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/fvgp.egg-info/not-zip-safe +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/fvgp.egg-info/requires.txt +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/fvgp.egg-info/top_level.txt +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/setup.cfg +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/setup.py +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/tests/__init__.py +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/tests/latest_hps.npy +0 -0
- {fvgp-4.2 → fvgp-4.2.2}/versioneer.py +0 -0
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2024-
|
|
11
|
+
"date": "2024-05-31T14:47:35-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "4.2.
|
|
14
|
+
"full-revisionid": "c7d74e2e459b81c40049a2af64298d8d63111af3",
|
|
15
|
+
"version": "4.2.2"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import torch
|
|
2
2
|
from torch import nn
|
|
3
|
-
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Network(nn.Module): # pragma: no cover
|
|
4
6
|
def __init__(self, dim, layer_width):
|
|
5
7
|
super().__init__()
|
|
6
8
|
# Inputs to hidden layer linear transformation
|
|
7
9
|
self.layer1 = nn.Linear(dim, layer_width)
|
|
8
10
|
self.layer2 = nn.Linear(layer_width, layer_width)
|
|
9
11
|
self.layer3 = nn.Linear(layer_width, dim)
|
|
12
|
+
self.number_of_hps = int(2. * dim * layer_width + layer_width ** 2 + 2. * layer_width + dim)
|
|
10
13
|
|
|
11
14
|
def forward(self, x):
|
|
12
15
|
x = torch.Tensor(x)
|
|
@@ -15,16 +18,18 @@ class Network(nn.Module): # pragma: no cover
|
|
|
15
18
|
x = torch.nn.functional.relu(self.layer3(x))
|
|
16
19
|
return x.detach().numpy()
|
|
17
20
|
|
|
18
|
-
def set_weights(self,w1,w2,w3):
|
|
21
|
+
def set_weights(self, w1, w2, w3):
|
|
19
22
|
with torch.no_grad(): self.layer1.weight = nn.Parameter(torch.from_numpy(w1).float())
|
|
20
23
|
with torch.no_grad(): self.layer2.weight = nn.Parameter(torch.from_numpy(w2).float())
|
|
21
24
|
with torch.no_grad(): self.layer3.weight = nn.Parameter(torch.from_numpy(w3).float())
|
|
22
|
-
|
|
25
|
+
|
|
26
|
+
def set_biases(self, b1, b2, b3):
|
|
23
27
|
with torch.no_grad(): self.layer1.bias = nn.Parameter(torch.from_numpy(b1).float())
|
|
24
28
|
with torch.no_grad(): self.layer2.bias = nn.Parameter(torch.from_numpy(b2).float())
|
|
25
29
|
with torch.no_grad(): self.layer3.bias = nn.Parameter(torch.from_numpy(b3).float())
|
|
26
30
|
|
|
27
31
|
def get_weights(self):
|
|
28
32
|
return self.layer1.weight, self.layer2.weight, self.layer3.weight
|
|
33
|
+
|
|
29
34
|
def get_biases(self):
|
|
30
35
|
return self.layer1.bias, self.layer2.bias, self.layer3.bias
|
|
@@ -56,10 +56,6 @@ class fvGP(GP):
|
|
|
56
56
|
fvgp.fvGP.gp_deep_kernel_layer_width. If you specify
|
|
57
57
|
another kernel, please provide
|
|
58
58
|
init_hyperparameters.
|
|
59
|
-
hyperparameter_bounds : np.ndarray, optional
|
|
60
|
-
A 2d numpy array of shape (N x 2), where N is the number of needed hyperparameters.
|
|
61
|
-
The default is None, in that case hyperparameter_bounds have to be specified
|
|
62
|
-
in the train calls or default bounds are used. Those only work for the default kernel.
|
|
63
59
|
output_positions : np.ndarray, optional
|
|
64
60
|
A 2-D numpy array of shape (U x output_number), so that for each measurement position, the outputs
|
|
65
61
|
are clearly defined by their positions in the output space. The default is
|
|
@@ -87,9 +83,6 @@ class fvGP(GP):
|
|
|
87
83
|
is a 1d array of length N depending on how many hyperparameters are initialized, and
|
|
88
84
|
obj is an `fvgp.GP` instance. The default is a deep kernel with 2 hidden layers and
|
|
89
85
|
a width of fvgp.fvGP.gp_deep_kernel_layer_width.
|
|
90
|
-
gp_deep_kernel_layer_width : int, optional
|
|
91
|
-
If no kernel is provided, fvGP will use a deep kernel of depth 2 and width gp_deep_kernel_layer_width.
|
|
92
|
-
If a user defined kernel is provided this parameter is irrelevant. The default is 5.
|
|
93
86
|
gp_kernel_function_grad : Callable, optional
|
|
94
87
|
A function that calculates the derivative of the `gp_kernel_function` with respect to the hyperparameters.
|
|
95
88
|
If provided, it will be used for local training (optimization) and can speed up the calculations.
|
|
@@ -210,12 +203,10 @@ class fvGP(GP):
|
|
|
210
203
|
x_data,
|
|
211
204
|
y_data,
|
|
212
205
|
init_hyperparameters=None,
|
|
213
|
-
hyperparameter_bounds=None,
|
|
214
206
|
output_positions=None,
|
|
215
207
|
noise_variances=None,
|
|
216
208
|
compute_device="cpu",
|
|
217
209
|
gp_kernel_function=None,
|
|
218
|
-
gp_deep_kernel_layer_width=5,
|
|
219
210
|
gp_kernel_function_grad=None,
|
|
220
211
|
gp_noise_function=None,
|
|
221
212
|
gp_noise_function_grad=None,
|
|
@@ -238,8 +229,6 @@ class fvGP(GP):
|
|
|
238
229
|
output_space_dim = 1
|
|
239
230
|
###check the output dims
|
|
240
231
|
|
|
241
|
-
|
|
242
|
-
|
|
243
232
|
if np.ndim(y_data) == 1:
|
|
244
233
|
raise ValueError("The output number is 1, you can use GP for single-task GPs")
|
|
245
234
|
if output_space_dim == 1 and isinstance(output_positions, np.ndarray) is False:
|
|
@@ -255,39 +244,12 @@ class fvGP(GP):
|
|
|
255
244
|
self.fvgp_noise_variances = noise_variances
|
|
256
245
|
x_data, y_data, noise_variances = self._transform_index_set(x_data, y_data, noise_variances,
|
|
257
246
|
self.output_positions)
|
|
258
|
-
init_hps = init_hyperparameters
|
|
259
|
-
|
|
260
|
-
if gp_kernel_function is None and isinstance(x_data, np.ndarray):
|
|
261
|
-
gp_kernel_function = self._default_multi_task_kernel
|
|
262
|
-
try:
|
|
263
|
-
from .deep_kernel_network import Network
|
|
264
|
-
except:
|
|
265
|
-
raise Exception("You have not specified a kernel and the default kernel will be used. \n \
|
|
266
|
-
The default kernel needs pytorch to be installed manually.")
|
|
267
|
-
self.gp_deep_kernel_layer_width = gp_deep_kernel_layer_width
|
|
268
|
-
self.n = Network(self.iset_dim, gp_deep_kernel_layer_width)
|
|
269
|
-
number_of_hps = int(2. * self.iset_dim * gp_deep_kernel_layer_width +
|
|
270
|
-
gp_deep_kernel_layer_width ** 2 + 2. * gp_deep_kernel_layer_width + self.iset_dim + 2.)
|
|
271
|
-
self.hps_bounds = np.zeros((number_of_hps, 2))
|
|
272
|
-
self.hps_bounds[0] = np.array([np.var(y_data) / 10., np.var(y_data) * 10.])
|
|
273
|
-
self.hps_bounds[1] = np.array([(np.max(x_data) - np.min(x_data)) / 100., (np.max(x_data) -
|
|
274
|
-
np.min(x_data)) * 100.])
|
|
275
|
-
self.hps_bounds[2:] = np.array([-1., 1.])
|
|
276
|
-
init_hps = np.random.uniform(low=self.hps_bounds[:, 0],
|
|
277
|
-
high=self.hps_bounds[:, 1], size=len(self.hps_bounds))
|
|
278
|
-
warnings.warn("Hyperparameter bounds have been initialized automatically \
|
|
279
|
-
\n for the default kernel in fvgp. They will automatically used for the training.\
|
|
280
|
-
\n However, you can also define and provide new bounds.")
|
|
281
|
-
hyperparameter_bounds = self.hps_bounds
|
|
282
|
-
else:
|
|
283
|
-
warnings.warn("Default kernel could not be defined.")
|
|
284
247
|
|
|
285
248
|
####init GP
|
|
286
249
|
super().__init__(
|
|
287
250
|
x_data,
|
|
288
251
|
y_data,
|
|
289
|
-
init_hyperparameters=
|
|
290
|
-
hyperparameter_bounds=hyperparameter_bounds,
|
|
252
|
+
init_hyperparameters=init_hyperparameters,
|
|
291
253
|
noise_variances=noise_variances,
|
|
292
254
|
compute_device=compute_device,
|
|
293
255
|
gp_kernel_function=gp_kernel_function,
|
|
@@ -300,7 +262,7 @@ class fvGP(GP):
|
|
|
300
262
|
gp2Scale_dask_client=gp2Scale_dask_client,
|
|
301
263
|
gp2Scale_batch_size=gp2Scale_batch_size,
|
|
302
264
|
calc_inv=calc_inv,
|
|
303
|
-
online
|
|
265
|
+
online=online,
|
|
304
266
|
ram_economy=ram_economy,
|
|
305
267
|
args=args,
|
|
306
268
|
info=info)
|
|
@@ -352,7 +314,7 @@ class fvGP(GP):
|
|
|
352
314
|
######################################
|
|
353
315
|
x_data, y_data, noise_variances = self._transform_index_set(x_new, y_new, noise_variances_new,
|
|
354
316
|
output_positions_new)
|
|
355
|
-
super().update_gp_data(
|
|
317
|
+
super().update_gp_data(x_data, y_data, noise_variances, append=append)
|
|
356
318
|
self.output_positions = np.row_stack([self.output_positions, output_positions_new])
|
|
357
319
|
|
|
358
320
|
################################################################################################
|
|
@@ -362,24 +324,25 @@ class fvGP(GP):
|
|
|
362
324
|
value_pos[:, j] = j
|
|
363
325
|
return value_pos
|
|
364
326
|
|
|
365
|
-
def get_fvgp_data(self, labels):
|
|
366
|
-
for i in range(len(self.output_num)):
|
|
367
|
-
pass
|
|
368
327
|
################################################################################################
|
|
369
328
|
def _transform_index_set(self, x_data, y_data, noise_variances, output_positions):
|
|
370
329
|
point_number = len(x_data)
|
|
371
330
|
assert isinstance(x_data, np.ndarray) or isinstance(x_data, list)
|
|
372
|
-
if isinstance(x_data, np.ndarray):
|
|
373
|
-
|
|
331
|
+
if isinstance(x_data, np.ndarray):
|
|
332
|
+
new_points = np.zeros((point_number * self.output_num, self.iset_dim))
|
|
333
|
+
else:
|
|
334
|
+
new_points = [0.] * point_number * self.output_num
|
|
374
335
|
new_values = np.zeros((point_number * self.output_num))
|
|
375
|
-
if noise_variances is not None:
|
|
376
|
-
|
|
336
|
+
if noise_variances is not None:
|
|
337
|
+
new_variances = np.zeros((point_number * self.output_num))
|
|
338
|
+
else:
|
|
339
|
+
new_variances = None
|
|
377
340
|
for i in range(self.output_num):
|
|
378
341
|
if isinstance(x_data, np.ndarray):
|
|
379
342
|
new_points[i * point_number: (i + 1) * point_number] = np.column_stack([x_data, output_positions[:, i]])
|
|
380
343
|
if isinstance(x_data, list):
|
|
381
344
|
for j in range(len(x_data)):
|
|
382
|
-
new_points[i*point_number+j] = [x_data[j], output_positions[j, i]]
|
|
345
|
+
new_points[i * point_number + j] = [x_data[j], output_positions[j, i]]
|
|
383
346
|
new_values[i * point_number: (i + 1) * point_number] = y_data[:, i]
|
|
384
347
|
if noise_variances is not None:
|
|
385
348
|
new_variances[i * point_number: (i + 1) * point_number] = noise_variances[:, i]
|
|
@@ -387,30 +350,4 @@ class fvGP(GP):
|
|
|
387
350
|
return new_points, new_values, new_variances
|
|
388
351
|
|
|
389
352
|
################################################################################################
|
|
390
|
-
def _default_multi_task_kernel(self, x1, x2, hps, obj): # pragma: no cover
|
|
391
|
-
signal_var = hps[0]
|
|
392
|
-
length_scale = hps[1]
|
|
393
|
-
hps_nn = hps[2:]
|
|
394
|
-
w1_indices = np.arange(0, self.gp_deep_kernel_layer_width * self.iset_dim)
|
|
395
|
-
last = self.gp_deep_kernel_layer_width * self.iset_dim
|
|
396
|
-
w2_indices = np.arange(last, last + self.gp_deep_kernel_layer_width ** 2)
|
|
397
|
-
last = last + self.gp_deep_kernel_layer_width ** 2
|
|
398
|
-
w3_indices = np.arange(last, last + self.gp_deep_kernel_layer_width * self.iset_dim)
|
|
399
|
-
last = last + self.gp_deep_kernel_layer_width * self.iset_dim
|
|
400
|
-
b1_indices = np.arange(last, last + self.gp_deep_kernel_layer_width)
|
|
401
|
-
last = last + self.gp_deep_kernel_layer_width
|
|
402
|
-
b2_indices = np.arange(last, last + self.gp_deep_kernel_layer_width)
|
|
403
|
-
last = last + self.gp_deep_kernel_layer_width
|
|
404
|
-
b3_indices = np.arange(last, last + self.iset_dim)
|
|
405
353
|
|
|
406
|
-
self.n.set_weights(hps_nn[w1_indices].reshape(self.gp_deep_kernel_layer_width, self.iset_dim),
|
|
407
|
-
hps_nn[w2_indices].reshape(self.gp_deep_kernel_layer_width, self.gp_deep_kernel_layer_width),
|
|
408
|
-
hps_nn[w3_indices].reshape(self.iset_dim, self.gp_deep_kernel_layer_width))
|
|
409
|
-
self.n.set_biases(hps_nn[b1_indices].reshape(self.gp_deep_kernel_layer_width),
|
|
410
|
-
hps_nn[b2_indices].reshape(self.gp_deep_kernel_layer_width),
|
|
411
|
-
hps_nn[b3_indices].reshape(self.iset_dim))
|
|
412
|
-
x1_nn = self.n.forward(x1)
|
|
413
|
-
x2_nn = self.n.forward(x2)
|
|
414
|
-
d = get_distance_matrix(x1_nn, x2_nn)
|
|
415
|
-
k = signal_var * matern_kernel_diff1(d, length_scale)
|
|
416
|
-
return k
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
|
|
3
3
|
import warnings
|
|
4
|
-
import dask.distributed as distributed
|
|
5
4
|
import numpy as np
|
|
6
5
|
from loguru import logger
|
|
7
6
|
from dask.distributed import Client
|
|
@@ -12,17 +11,17 @@ from .gp_marginal_density import GPMarginalDensity
|
|
|
12
11
|
from .gp_likelihood import GPlikelihood
|
|
13
12
|
from .gp_training import GPtraining
|
|
14
13
|
from .gp_posterior import GPposterior
|
|
14
|
+
import sys
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
# TODO: search below "TODO"
|
|
18
|
-
# neither minres nor random logdet are doing a good job
|
|
19
|
-
#
|
|
20
|
-
# and self.dummy_data = True. This should be overwritten in the update_data and used for warning in train and posteriors.
|
|
21
|
-
# the noise will have to be either given as a function or itialized randomly too with a warning that noise will have to be communited in the update.
|
|
18
|
+
# neither minres nor random logdet are doing a good job in gp2Scale,
|
|
19
|
+
# cg is better but we might need a preconditioner , maybe a large LU?
|
|
22
20
|
# the mcmc in default mode should not need proposal distributions explicitly
|
|
23
21
|
# reshape posteriors if x_out
|
|
24
22
|
# when are we really using gpu vs cpu as compute_device
|
|
25
23
|
|
|
24
|
+
|
|
26
25
|
class GP:
|
|
27
26
|
"""
|
|
28
27
|
This class provides all the tools for a single-task Gaussian Process (GP).
|
|
@@ -47,17 +46,9 @@ class GP:
|
|
|
47
46
|
init_hyperparameters : np.ndarray, optional
|
|
48
47
|
Vector of hyperparameters used by the GP initially.
|
|
49
48
|
This class provides methods to train hyperparameters.
|
|
50
|
-
The default is
|
|
51
|
-
within hyperparameter_bounds, with a shape appropriate
|
|
52
|
-
for the default kernel (D + 1), which is an anisotropic Matern
|
|
49
|
+
The default is an array of ones with the right length for the anisotropic Matern
|
|
53
50
|
kernel with automatic relevance determination (ARD). If sparse_node or gp2Scale is
|
|
54
51
|
enabled, the default kernel changes to the anisotropic Wendland kernel.
|
|
55
|
-
hyperparameter_bounds : np.ndarray, optional
|
|
56
|
-
A 2d numpy array of shape (N x 2), where N is the number of needed hyperparameters.
|
|
57
|
-
The default is None, in which case the hyperparameter_bounds are estimated from the domain size
|
|
58
|
-
and the initial y_data. If the data set changes significantly,
|
|
59
|
-
the hyperparameters and the bounds should be changed/retrained. Initial hyperparameters and bounds
|
|
60
|
-
can also be set in the train calls. The default only works for the default kernels.
|
|
61
52
|
noise_variances : np.ndarray, optional
|
|
62
53
|
An numpy array defining the uncertainties/noise in the data
|
|
63
54
|
`y_data` in form of a point-wise variance. Shape (len(y_data), 1) or (len(y_data)).
|
|
@@ -154,9 +145,12 @@ class GP:
|
|
|
154
145
|
a good option when the dataset is not too large and the posterior covariance is heavily used.
|
|
155
146
|
online : bool, optional
|
|
156
147
|
A new setting that allows optimization for online applications. Default=False. If True,
|
|
157
|
-
|
|
148
|
+
the inverse (if calc_inv is True), or the Cholesky factors (if calc_inv is False) and the logdet()
|
|
149
|
+
will only be computed
|
|
158
150
|
once in the beginning and after that only updated. This leads to a significant speedup because
|
|
159
|
-
the most costly aspects of a GP are entirely avoided.
|
|
151
|
+
the most costly aspects of a GP are entirely avoided. A good indicator whether `online` is a good choice is
|
|
152
|
+
the `append` option in the gp update. You always append data, never overwrite, online should be True
|
|
153
|
+
to save some time.
|
|
160
154
|
ram_economy : bool, optional
|
|
161
155
|
Only of interest if the gradient and/or Hessian of the marginal log_likelihood is/are used for the training.
|
|
162
156
|
If True, components of the derivative of the marginal log-likelihood are
|
|
@@ -204,7 +198,6 @@ class GP:
|
|
|
204
198
|
x_data,
|
|
205
199
|
y_data,
|
|
206
200
|
init_hyperparameters=None,
|
|
207
|
-
hyperparameter_bounds=None,
|
|
208
201
|
noise_variances=None,
|
|
209
202
|
compute_device="cpu",
|
|
210
203
|
gp_kernel_function=None,
|
|
@@ -225,6 +218,10 @@ class GP:
|
|
|
225
218
|
self.compute_device = compute_device
|
|
226
219
|
self.args = args
|
|
227
220
|
self.info = info
|
|
221
|
+
if info:
|
|
222
|
+
logger.remove()
|
|
223
|
+
logger.enable("fvgp")
|
|
224
|
+
logger.add(sys.stdout, filter="fvgp", level="INFO")
|
|
228
225
|
self.calc_inv = calc_inv
|
|
229
226
|
self.gp2Scale = gp2Scale
|
|
230
227
|
self.gp2Scale_dask_client = gp2Scale_dask_client
|
|
@@ -238,30 +235,18 @@ class GP:
|
|
|
238
235
|
# prepare initial hyperparameters and bounds
|
|
239
236
|
if self.data.Euclidean:
|
|
240
237
|
if callable(gp_kernel_function) or callable(gp_mean_function) or callable(gp_noise_function):
|
|
241
|
-
if
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
"initial hyperparameters or hyperparameter bounds. Please provide"
|
|
245
|
-
"at least one of them at initialization.")
|
|
246
|
-
else:
|
|
247
|
-
if init_hyperparameters is None:
|
|
248
|
-
hyperparameters, hyperparameter_bounds = self._get_default_hyperparameters(
|
|
249
|
-
hyperparameter_bounds)
|
|
238
|
+
if init_hyperparameters is None: raise Exception(
|
|
239
|
+
"You have provided callables for kernel, mean, or noise functions but no"
|
|
240
|
+
"initial hyperparameters.")
|
|
250
241
|
else:
|
|
251
|
-
if init_hyperparameters is None: hyperparameters
|
|
252
|
-
self._get_default_hyperparameters(hyperparameter_bounds)
|
|
242
|
+
if init_hyperparameters is None: hyperparameters = np.ones((self.data.input_space_dim + 1))
|
|
253
243
|
else:
|
|
254
|
-
hyperparameters
|
|
244
|
+
hyperparameters = init_hyperparameters
|
|
255
245
|
|
|
256
246
|
# warn if they could not be prepared
|
|
257
247
|
if hyperparameters is None:
|
|
258
248
|
raise Exception("'init_hyperparameters' not provided and could not be calculated. Please provide them ")
|
|
259
249
|
|
|
260
|
-
if hyperparameter_bounds is None:
|
|
261
|
-
warnings.warn("hyperparameter_bounds not provided. "
|
|
262
|
-
"They will have to be provided in the training call.")
|
|
263
|
-
self.hyperparameter_bounds = hyperparameter_bounds
|
|
264
|
-
|
|
265
250
|
if gp2Scale:
|
|
266
251
|
try:
|
|
267
252
|
import imate
|
|
@@ -282,19 +267,19 @@ class GP:
|
|
|
282
267
|
###init prior instance##################
|
|
283
268
|
########################################
|
|
284
269
|
self.prior = GPprior(self.data.input_space_dim,
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
270
|
+
self.data.x_data,
|
|
271
|
+
self.data.Euclidean,
|
|
272
|
+
hyperparameters=hyperparameters,
|
|
273
|
+
gp_kernel_function=gp_kernel_function,
|
|
274
|
+
gp_mean_function=gp_mean_function,
|
|
275
|
+
gp_kernel_function_grad=gp_kernel_function_grad,
|
|
276
|
+
gp_mean_function_grad=gp_mean_function_grad,
|
|
277
|
+
constant_mean=np.mean(y_data),
|
|
278
|
+
gp2Scale=gp2Scale,
|
|
279
|
+
gp2Scale_dask_client=gp2Scale_dask_client,
|
|
280
|
+
gp2Scale_batch_size=gp2Scale_batch_size,
|
|
281
|
+
ram_economy=ram_economy
|
|
282
|
+
)
|
|
298
283
|
########################################
|
|
299
284
|
###init likelihood instance#############
|
|
300
285
|
########################################
|
|
@@ -389,22 +374,26 @@ class GP:
|
|
|
389
374
|
self.x_data = self.data.x_data
|
|
390
375
|
self.y_data = self.data.y_data
|
|
391
376
|
|
|
392
|
-
def
|
|
377
|
+
def _get_default_hyperparameter_bounds(self):
|
|
393
378
|
"""
|
|
394
|
-
This function will create hyperparameter bounds
|
|
395
|
-
|
|
396
|
-
"""
|
|
397
|
-
if hyperparameter_bounds is None:
|
|
398
|
-
hyperparameter_bounds = np.zeros((self.data.input_space_dim + 1, 2))
|
|
399
|
-
hyperparameter_bounds[0] = np.array([np.var(self.data.y_data) / 100., np.var(self.data.y_data) * 10.])
|
|
400
|
-
for i in range(self.data.input_space_dim):
|
|
401
|
-
range_xi = np.max(self.data.x_data[:, i]) - np.min(self.data.x_data[:, i])
|
|
402
|
-
hyperparameter_bounds[i + 1] = np.array([range_xi / 100., range_xi * 10.])
|
|
379
|
+
This function will create hyperparameter bounds for the default kernel based
|
|
380
|
+
on the data only.
|
|
403
381
|
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
382
|
+
Return:
|
|
383
|
+
--------
|
|
384
|
+
hyperparameter bounds for the default kernel : np.ndarray
|
|
385
|
+
"""
|
|
386
|
+
if not self.data.Euclidean: raise Exception("Please provide custom hyperparameter bounds to "
|
|
387
|
+
"the training in the non-Euclidean setting")
|
|
388
|
+
if len(self.prior.hyperparameters) != self.data.input_space_dim + 1:
|
|
389
|
+
raise Exception("Please provide custom hyperparameter_bounds when kernel, mean or noise"
|
|
390
|
+
" functions are customized")
|
|
391
|
+
hyperparameter_bounds = np.zeros((self.data.input_space_dim + 1, 2))
|
|
392
|
+
hyperparameter_bounds[0] = np.array([np.var(self.data.y_data) / 100., np.var(self.data.y_data) * 10.])
|
|
393
|
+
for i in range(self.data.input_space_dim):
|
|
394
|
+
range_xi = np.max(self.data.x_data[:, i]) - np.min(self.data.x_data[:, i])
|
|
395
|
+
hyperparameter_bounds[i + 1] = np.array([range_xi / 100., range_xi * 10.])
|
|
396
|
+
return hyperparameter_bounds
|
|
408
397
|
|
|
409
398
|
###################################################################################
|
|
410
399
|
###################################################################################
|
|
@@ -412,10 +401,10 @@ class GP:
|
|
|
412
401
|
#################TRAINING##########################################################
|
|
413
402
|
###################################################################################
|
|
414
403
|
def train(self,
|
|
404
|
+
hyperparameter_bounds=None,
|
|
415
405
|
objective_function=None,
|
|
416
406
|
objective_function_gradient=None,
|
|
417
407
|
objective_function_hessian=None,
|
|
418
|
-
hyperparameter_bounds=None,
|
|
419
408
|
init_hyperparameters=None,
|
|
420
409
|
method="global",
|
|
421
410
|
pop_size=20,
|
|
@@ -434,6 +423,11 @@ class GP:
|
|
|
434
423
|
|
|
435
424
|
Parameters
|
|
436
425
|
----------
|
|
426
|
+
hyperparameter_bounds : np.ndarray
|
|
427
|
+
A numpy array of shape (D x 2), defining the bounds for the optimization.
|
|
428
|
+
A 2d numpy array of shape (N x 2), where N is the number of hyperparameters.
|
|
429
|
+
If the data set changes significantly,
|
|
430
|
+
the hyperparameters and the bounds should be changed/retrained.
|
|
437
431
|
objective_function : callable, optional
|
|
438
432
|
The function that will be MINIMIZED for training the GP. The form of the function is f(hyperparameters=hps)
|
|
439
433
|
and returns a scalar. This function can be used to train via non-standard user-defined objectives.
|
|
@@ -450,13 +444,6 @@ class GP:
|
|
|
450
444
|
and returns a matrix of shape(len(hps),len(hps)). This function can be used to train
|
|
451
445
|
via non-standard user-defined objectives.
|
|
452
446
|
The default is the hessian of the negative log marginal likelihood.
|
|
453
|
-
hyperparameter_bounds : np.ndarray, optional
|
|
454
|
-
A numpy array of shape (D x 2), defining the bounds for the optimization.
|
|
455
|
-
A 2d numpy array of shape (N x 2), where N is the number of hyperparameters.
|
|
456
|
-
The default is None, in which case the hyperparameter_bounds are estimated from the domain size
|
|
457
|
-
and the y_data. If the data set changes significantly,
|
|
458
|
-
the hyperparameters and the bounds should be changed/retrained.
|
|
459
|
-
The default only works for the default kernels.
|
|
460
447
|
init_hyperparameters : np.ndarray, optional
|
|
461
448
|
Initial hyperparameters used as starting location for all optimizers with local component.
|
|
462
449
|
The default is a random draw from a uniform distribution within the bounds.
|
|
@@ -485,23 +472,29 @@ class GP:
|
|
|
485
472
|
A Dask Distributed Client instance for distributed training if HGDL is used. If None is provided, a new
|
|
486
473
|
`dask.distributed.Client` instance is constructed.
|
|
487
474
|
"""
|
|
475
|
+
if self.gp2Scale: method = 'mcmc'
|
|
488
476
|
if hyperparameter_bounds is None:
|
|
489
|
-
|
|
490
|
-
hyperparameter_bounds
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
477
|
+
hyperparameter_bounds = self._get_default_hyperparameter_bounds()
|
|
478
|
+
warnings.warn("Default hyperparameter_bounds initialized because none were provided. "
|
|
479
|
+
"This will fail for custom kernel,"
|
|
480
|
+
" mean, or noise functions")
|
|
481
|
+
if init_hyperparameters is None:
|
|
482
|
+
init_hyperparameters = np.random.uniform(low=hyperparameter_bounds[:, 0],
|
|
483
|
+
high=hyperparameter_bounds[:, 1],
|
|
484
|
+
size=len(hyperparameter_bounds))
|
|
495
485
|
if objective_function is not None and method == 'mcmc':
|
|
496
486
|
warnings.warn("MCMC will ignore the user-defined objective function")
|
|
497
487
|
if objective_function is not None and objective_function_gradient is None and (method == 'local' or 'hgdl'):
|
|
498
488
|
raise Exception("For user-defined objective functions and local or hybrid optimization, a gradient and\
|
|
499
489
|
Hessian function of the objective function have to be defined.")
|
|
490
|
+
if method == 'mcmc': objective_function = self.marginal_density.log_likelihood
|
|
500
491
|
if objective_function is None: objective_function = self.marginal_density.neg_log_likelihood
|
|
501
|
-
if objective_function is None and method == 'mcmc': objective_function = self.marginal_density.log_likelihood
|
|
502
492
|
if objective_function_gradient is None: objective_function_gradient = self.marginal_density.neg_log_likelihood_gradient
|
|
503
493
|
if objective_function_hessian is None: objective_function_hessian = self.marginal_density.neg_log_likelihood_hessian
|
|
504
494
|
|
|
495
|
+
logger.info("objective function: {}", objective_function)
|
|
496
|
+
logger.info("method: {}", method)
|
|
497
|
+
|
|
505
498
|
hyperparameters = self.trainer.train(
|
|
506
499
|
objective_function=objective_function,
|
|
507
500
|
objective_function_gradient=objective_function_gradient,
|
|
@@ -525,10 +518,10 @@ class GP:
|
|
|
525
518
|
|
|
526
519
|
##################################################################################
|
|
527
520
|
def train_async(self,
|
|
521
|
+
hyperparameter_bounds=None,
|
|
528
522
|
objective_function=None,
|
|
529
523
|
objective_function_gradient=None,
|
|
530
524
|
objective_function_hessian=None,
|
|
531
|
-
hyperparameter_bounds=None,
|
|
532
525
|
init_hyperparameters=None,
|
|
533
526
|
max_iter=10000,
|
|
534
527
|
local_optimizer="L-BFGS-B",
|
|
@@ -544,6 +537,11 @@ class GP:
|
|
|
544
537
|
|
|
545
538
|
Parameters
|
|
546
539
|
----------
|
|
540
|
+
hyperparameter_bounds : np.ndarray
|
|
541
|
+
A numpy array of shape (D x 2), defining the bounds for the optimization.
|
|
542
|
+
A 2d numpy array of shape (N x 2), where N is the number of hyperparameters.
|
|
543
|
+
If the data set changes significantly,
|
|
544
|
+
the hyperparameters and the bounds should be changed/retrained.
|
|
547
545
|
objective_function : callable, optional
|
|
548
546
|
The function that will be MINIMIZED for training the GP. The form of the function is f(hyperparameters=hps)
|
|
549
547
|
and returns a scalar. This function can be used to train via non-standard user-defined objectives.
|
|
@@ -560,13 +558,6 @@ class GP:
|
|
|
560
558
|
and returns a matrix of shape(len(hps),len(hps)). This function can be used to train
|
|
561
559
|
via non-standard user-defined objectives.
|
|
562
560
|
The default is the hessian of the negative log marginal likelihood.
|
|
563
|
-
hyperparameter_bounds : np.ndarray, optional
|
|
564
|
-
A numpy array of shape (D x 2), defining the bounds for the optimization.
|
|
565
|
-
A 2d numpy array of shape (N x 2), where N is the number of hyperparameters.
|
|
566
|
-
The default is None, in which case the hyperparameter_bounds are estimated from the domain size
|
|
567
|
-
and the y_data. If the data set changes significantly,
|
|
568
|
-
the hyperparameters and the bounds should be changed/retrained.
|
|
569
|
-
The default only works for the default kernels.
|
|
570
561
|
init_hyperparameters : np.ndarray, optional
|
|
571
562
|
Initial hyperparameters used as starting location for all optimizers with local component.
|
|
572
563
|
The default is a random draw from a uniform distribution within the bounds.
|
|
@@ -589,14 +580,15 @@ class GP:
|
|
|
589
580
|
to update the prior GP : object instance
|
|
590
581
|
"""
|
|
591
582
|
if self.gp2Scale: raise Exception("gp2Scale does not allow asynchronous training!")
|
|
592
|
-
if dask_client is None: dask_client = distributed.Client()
|
|
593
583
|
if hyperparameter_bounds is None:
|
|
594
|
-
|
|
595
|
-
hyperparameter_bounds
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
584
|
+
hyperparameter_bounds = self._get_default_hyperparameter_bounds()
|
|
585
|
+
warnings.warn("Default hyperparameter_bounds initialized because none were provided. "
|
|
586
|
+
"This will fail for custom kernel,"
|
|
587
|
+
" mean, or noise functions")
|
|
588
|
+
if init_hyperparameters is None:
|
|
589
|
+
init_hyperparameters = np.random.uniform(low=hyperparameter_bounds[:, 0],
|
|
590
|
+
high=hyperparameter_bounds[:, 1],
|
|
591
|
+
size=len(hyperparameter_bounds))
|
|
600
592
|
if objective_function is None: objective_function = self.marginal_density.neg_log_likelihood
|
|
601
593
|
if objective_function_gradient is None: objective_function_gradient = self.marginal_density.neg_log_likelihood_gradient
|
|
602
594
|
if objective_function_hessian is None: objective_function_hessian = self.marginal_density.neg_log_likelihood_hessian
|
|
@@ -725,8 +717,7 @@ class GP:
|
|
|
725
717
|
A dictionary containing information about the GP prior distribution : dict
|
|
726
718
|
"""
|
|
727
719
|
|
|
728
|
-
return {"prior covariance (K)": self.prior.K,
|
|
729
|
-
"inv(KV)": self.marginal_density.KVinv,
|
|
720
|
+
return {"prior covariance (K)": self.prior.K,
|
|
730
721
|
"prior mean": self.prior.m}
|
|
731
722
|
|
|
732
723
|
def log_likelihood(self, hyperparameters=None):
|
|
@@ -1296,8 +1287,8 @@ class GP:
|
|
|
1296
1287
|
####################################################################################
|
|
1297
1288
|
def _crps_s(self, x, mu, sigma):
|
|
1298
1289
|
res = abs(sigma * ((1. / np.sqrt(np.pi))
|
|
1299
|
-
|
|
1300
|
-
|
|
1290
|
+
- 2. * norm.pdf((x - mu) / sigma)
|
|
1291
|
+
- (((x - mu) / sigma) * (2. * norm.cdf((x - mu) / sigma) - 1.))))
|
|
1301
1292
|
return np.mean(res), np.sqrt(np.var(res))
|
|
1302
1293
|
|
|
1303
1294
|
def crps(self, x_test, y_test):
|
|
@@ -1341,8 +1332,8 @@ class GP:
|
|
|
1341
1332
|
RMSE : float
|
|
1342
1333
|
"""
|
|
1343
1334
|
|
|
1344
|
-
v1 = y_test
|
|
1345
|
-
v2 = self.posterior_mean(x_test)["f(x)"]
|
|
1335
|
+
v1 = y_test.reshape(len(y_test))
|
|
1336
|
+
v2 = self.posterior_mean(x_test)["f(x)"].reshape(len(v1))
|
|
1346
1337
|
return np.sqrt(np.sum((v1 - v2) ** 2) / len(v1))
|
|
1347
1338
|
|
|
1348
1339
|
def make_2d_x_pred(self, bx, by, resx=100, resy=100): # pragma: no cover
|