moospread 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moospread/__init__.py +3 -0
- moospread/core.py +1881 -0
- moospread/problem.py +193 -0
- moospread/tasks/__init__.py +4 -0
- moospread/tasks/dtlz_torch.py +139 -0
- moospread/tasks/mw_torch.py +274 -0
- moospread/tasks/re_torch.py +394 -0
- moospread/tasks/zdt_torch.py +112 -0
- moospread/utils/__init__.py +8 -0
- moospread/utils/constraint_utils/__init__.py +2 -0
- moospread/utils/constraint_utils/gradient.py +72 -0
- moospread/utils/constraint_utils/mgda_core.py +69 -0
- moospread/utils/constraint_utils/pmgda_solver.py +308 -0
- moospread/utils/constraint_utils/prefs.py +64 -0
- moospread/utils/ditmoo.py +127 -0
- moospread/utils/lhs.py +74 -0
- moospread/utils/misc.py +28 -0
- moospread/utils/mobo_utils/__init__.py +11 -0
- moospread/utils/mobo_utils/evolution/__init__.py +0 -0
- moospread/utils/mobo_utils/evolution/dom.py +60 -0
- moospread/utils/mobo_utils/evolution/norm.py +40 -0
- moospread/utils/mobo_utils/evolution/utils.py +97 -0
- moospread/utils/mobo_utils/learning/__init__.py +0 -0
- moospread/utils/mobo_utils/learning/model.py +40 -0
- moospread/utils/mobo_utils/learning/model_init.py +33 -0
- moospread/utils/mobo_utils/learning/model_update.py +51 -0
- moospread/utils/mobo_utils/learning/prediction.py +116 -0
- moospread/utils/mobo_utils/learning/utils.py +143 -0
- moospread/utils/mobo_utils/lhs_for_mobo.py +243 -0
- moospread/utils/mobo_utils/mobo/__init__.py +0 -0
- moospread/utils/mobo_utils/mobo/acquisition.py +209 -0
- moospread/utils/mobo_utils/mobo/algorithms.py +91 -0
- moospread/utils/mobo_utils/mobo/factory.py +86 -0
- moospread/utils/mobo_utils/mobo/mobo.py +132 -0
- moospread/utils/mobo_utils/mobo/selection.py +182 -0
- moospread/utils/mobo_utils/mobo/solver/__init__.py +5 -0
- moospread/utils/mobo_utils/mobo/solver/moead.py +17 -0
- moospread/utils/mobo_utils/mobo/solver/nsga2.py +10 -0
- moospread/utils/mobo_utils/mobo/solver/parego/__init__.py +1 -0
- moospread/utils/mobo_utils/mobo/solver/parego/parego.py +62 -0
- moospread/utils/mobo_utils/mobo/solver/parego/utils.py +34 -0
- moospread/utils/mobo_utils/mobo/solver/pareto_discovery/__init__.py +1 -0
- moospread/utils/mobo_utils/mobo/solver/pareto_discovery/buffer.py +364 -0
- moospread/utils/mobo_utils/mobo/solver/pareto_discovery/pareto_discovery.py +571 -0
- moospread/utils/mobo_utils/mobo/solver/pareto_discovery/utils.py +168 -0
- moospread/utils/mobo_utils/mobo/solver/solver.py +74 -0
- moospread/utils/mobo_utils/mobo/surrogate_model/__init__.py +2 -0
- moospread/utils/mobo_utils/mobo/surrogate_model/base.py +36 -0
- moospread/utils/mobo_utils/mobo/surrogate_model/gaussian_process.py +177 -0
- moospread/utils/mobo_utils/mobo/surrogate_model/thompson_sampling.py +79 -0
- moospread/utils/mobo_utils/mobo/surrogate_problem.py +44 -0
- moospread/utils/mobo_utils/mobo/transformation.py +106 -0
- moospread/utils/mobo_utils/mobo/utils.py +65 -0
- moospread/utils/mobo_utils/spread_mobo_utils.py +854 -0
- moospread/utils/offline_utils/__init__.py +10 -0
- moospread/utils/offline_utils/handle_task.py +203 -0
- moospread/utils/offline_utils/proxies.py +338 -0
- moospread/utils/spread_utils.py +91 -0
- moospread-0.1.0.dist-info/METADATA +75 -0
- moospread-0.1.0.dist-info/RECORD +63 -0
- moospread-0.1.0.dist-info/WHEEL +5 -0
- moospread-0.1.0.dist-info/licenses/LICENSE +10 -0
- moospread-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pymoo.factory import get_performance_indicator
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def propose_next_batch(curr_pfront, ref_point, pred_pfront, pred_pset, batch_size, labels):
|
|
6
|
+
'''
|
|
7
|
+
Propose next batch of design variables to evaluate by maximizing hypervolume contribution.
|
|
8
|
+
Greedely add samples with maximum hypervolume from each family.
|
|
9
|
+
Input:
|
|
10
|
+
curr_pfront: current pareto front of evaluated design samples
|
|
11
|
+
pred_pfront: predicted pareto front from sampled objective functions
|
|
12
|
+
pred_pset: predicted pareto set from sampled objective functions
|
|
13
|
+
batch_size: batch size of design samples to be proposed
|
|
14
|
+
labels: family labels for pred_pset
|
|
15
|
+
Output:
|
|
16
|
+
X_next: next batch of design variables to evaluate
|
|
17
|
+
Y_next: expected output of next batch of design variables to evaluate
|
|
18
|
+
family_lbls: family labels of proposed batch samples
|
|
19
|
+
'''
|
|
20
|
+
#assert len(pred_pset) >= batch_size, "predicted pareto set is smaller than proposed batch size!"
|
|
21
|
+
|
|
22
|
+
curr_pfront = curr_pfront.copy()
|
|
23
|
+
hv = get_performance_indicator('hv', ref_point=ref_point)
|
|
24
|
+
idx_choices = np.ma.array(np.arange(len(pred_pset)), mask=False) # mask array for index choices
|
|
25
|
+
iter_idx_choices = np.ma.array(np.arange(len(pred_pset)), mask=False) # mask array for index choices of unvisited family samples
|
|
26
|
+
next_batch_indices = []
|
|
27
|
+
family_lbls_next = []
|
|
28
|
+
num_families = len(np.unique(labels))
|
|
29
|
+
print('Number of families is: '+str(num_families))
|
|
30
|
+
|
|
31
|
+
if len(pred_pset) < batch_size:
|
|
32
|
+
print('Predicted pareto set is smaller than proposed batch size and has '+ str(len(pred_pset)) +' points.')
|
|
33
|
+
next_batch_indices = [0] * (batch_size - len(pred_pset))
|
|
34
|
+
batch_size = len(pred_pset)
|
|
35
|
+
|
|
36
|
+
# greedily select indices that maximize hypervolume contribution
|
|
37
|
+
for _ in range(batch_size):
|
|
38
|
+
#if all families were visited, start new cycle
|
|
39
|
+
if len(iter_idx_choices.compressed())==0:
|
|
40
|
+
iter_idx_choices = idx_choices.copy()
|
|
41
|
+
curr_hv = hv.calc(curr_pfront)
|
|
42
|
+
max_hv_contrib = 0.
|
|
43
|
+
max_hv_idx = -1
|
|
44
|
+
for idx in iter_idx_choices.compressed():
|
|
45
|
+
# calculate hypervolume contribution
|
|
46
|
+
new_hv = hv.calc(np.vstack([curr_pfront, pred_pfront[idx]]))
|
|
47
|
+
hv_contrib = new_hv - curr_hv
|
|
48
|
+
if hv_contrib > max_hv_contrib:
|
|
49
|
+
max_hv_contrib = hv_contrib
|
|
50
|
+
max_hv_idx = idx
|
|
51
|
+
if max_hv_idx == -1: # if all candidates have no hypervolume contribution, just randomly select one
|
|
52
|
+
max_hv_idx = np.random.choice(iter_idx_choices.compressed())
|
|
53
|
+
|
|
54
|
+
idx_choices.mask[max_hv_idx] = True # mask as selected
|
|
55
|
+
curr_pfront = np.vstack([curr_pfront, pred_pfront[max_hv_idx]]) # add to current pareto front
|
|
56
|
+
next_batch_indices.append(max_hv_idx)
|
|
57
|
+
family_lbls_next.append(labels[max_hv_idx])
|
|
58
|
+
#find which family to mask all family memebers as visited in this cycle
|
|
59
|
+
family_ids = np.where(labels == labels[max_hv_idx])[0]
|
|
60
|
+
for fid in family_ids:
|
|
61
|
+
iter_idx_choices.mask[fid] = True
|
|
62
|
+
|
|
63
|
+
X_next = pred_pset[next_batch_indices].copy()
|
|
64
|
+
Y_next = pred_pfront[next_batch_indices].copy()
|
|
65
|
+
return X_next, Y_next, family_lbls_next
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def propose_next_batch_without_label(curr_pfront, ref_point, pred_pfront, pred_pset, batch_size):
|
|
69
|
+
'''
|
|
70
|
+
Propose next batch of design variables to evaluate by maximizing hypervolume contribution
|
|
71
|
+
Input:
|
|
72
|
+
curr_pfront: current pareto front of evaluated design samples
|
|
73
|
+
pred_pfront: predicted pareto front from sampled objective functions
|
|
74
|
+
pred_pset: predicted pareto set from sampled objective functions
|
|
75
|
+
batch_size: batch size of design samples to be proposed
|
|
76
|
+
Output:
|
|
77
|
+
X_next: next batch of design variables to evaluate
|
|
78
|
+
'''
|
|
79
|
+
#assert len(pred_pset) >= batch_size, "predicted pareto set is smaller than proposed batch size!
|
|
80
|
+
|
|
81
|
+
curr_pfront = curr_pfront.copy()
|
|
82
|
+
hv = get_performance_indicator('hv', ref_point=ref_point)
|
|
83
|
+
idx_choices = np.ma.array(np.arange(len(pred_pset)), mask=False) # mask array for index choices
|
|
84
|
+
next_batch_indices = []
|
|
85
|
+
|
|
86
|
+
if len(pred_pset) < batch_size:
|
|
87
|
+
print('Predicted pareto set is smaller than proposed batch size and has '+ str(len(pred_pset)) +' points.')
|
|
88
|
+
next_batch_indices = [0] * (batch_size - len(pred_pset))
|
|
89
|
+
batch_size = len(pred_pset)
|
|
90
|
+
|
|
91
|
+
# greedily select indices that maximize hypervolume contribution
|
|
92
|
+
for _ in range(batch_size):
|
|
93
|
+
curr_hv = hv.calc(curr_pfront)
|
|
94
|
+
max_hv_contrib = 0.
|
|
95
|
+
max_hv_idx = -1
|
|
96
|
+
for idx in idx_choices.compressed():
|
|
97
|
+
# calculate hypervolume contribution
|
|
98
|
+
new_hv = hv.calc(np.vstack([curr_pfront, pred_pfront[idx]]))
|
|
99
|
+
hv_contrib = new_hv - curr_hv
|
|
100
|
+
if hv_contrib > max_hv_contrib:
|
|
101
|
+
max_hv_contrib = hv_contrib
|
|
102
|
+
max_hv_idx = idx
|
|
103
|
+
if max_hv_idx == -1: # if all candidates have no hypervolume contribution, just randomly select one
|
|
104
|
+
max_hv_idx = np.random.choice(idx_choices.compressed())
|
|
105
|
+
|
|
106
|
+
idx_choices.mask[max_hv_idx] = True # mask as selected
|
|
107
|
+
curr_pfront = np.vstack([curr_pfront, pred_pfront[max_hv_idx]]) # add to current pareto front
|
|
108
|
+
next_batch_indices.append(max_hv_idx)
|
|
109
|
+
|
|
110
|
+
X_next = pred_pset[next_batch_indices].copy()
|
|
111
|
+
Y_next = pred_pfront[next_batch_indices].copy()
|
|
112
|
+
return X_next, Y_next
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def generate_weights_batch_dfs(i, n_dim, min_weight, max_weight, delta_weight, weight, weights_batch):
|
|
116
|
+
if i == n_dim - 1:
|
|
117
|
+
weight.append(1.0 - np.sum(weight[0:i]))
|
|
118
|
+
weights_batch.append(weight.copy())
|
|
119
|
+
weight = weight[0:i]
|
|
120
|
+
return
|
|
121
|
+
w = min_weight
|
|
122
|
+
while w < max_weight + 0.5 * delta_weight and np.sum(weight[0:i]) + w < 1.0 + 0.5 * delta_weight:
|
|
123
|
+
weight.append(w)
|
|
124
|
+
generate_weights_batch_dfs(i + 1, n_dim, min_weight, max_weight, delta_weight, weight, weights_batch)
|
|
125
|
+
weight = weight[0:i]
|
|
126
|
+
w += delta_weight
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def generate_weights_batch(n_dim, delta_weight):
|
|
130
|
+
'''
|
|
131
|
+
Generate n dimensional uniformly distributed weights using depth first search.
|
|
132
|
+
e.g. generate_weights_batch(2, 0.5) returns [[0.0, 1.0], [0.5, 0.5], [1.0, 0.0]]
|
|
133
|
+
'''
|
|
134
|
+
weights_batch = []
|
|
135
|
+
generate_weights_batch_dfs(0, n_dim, 0.0, 1.0, delta_weight, [], weights_batch)
|
|
136
|
+
return np.array(weights_batch)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def get_sample_num_from_families(n_sample, family_sizes):
|
|
140
|
+
'''
|
|
141
|
+
Choose certain number of samples from all families, as uniformly as possible.
|
|
142
|
+
Input:
|
|
143
|
+
n_sample: total number of samples to be chosen
|
|
144
|
+
family_sizes: array containing size of each family, shape = (n_family,)
|
|
145
|
+
Output:
|
|
146
|
+
sample_nums: number of samples we choose from each samily, shape = (n_family,)
|
|
147
|
+
'''
|
|
148
|
+
assert np.sum(family_sizes) >= n_sample
|
|
149
|
+
|
|
150
|
+
family_sizes = np.array(family_sizes, dtype=np.int32)
|
|
151
|
+
valid_idx = np.where(family_sizes > 0)[0]
|
|
152
|
+
valid_family_sizes = family_sizes[valid_idx]
|
|
153
|
+
n_family = len(valid_idx)
|
|
154
|
+
sample_nums = np.zeros_like(family_sizes, dtype=np.int32)
|
|
155
|
+
|
|
156
|
+
if n_sample > n_family:
|
|
157
|
+
# distribute n_sample to n_family as uniformly as possible
|
|
158
|
+
curr_n_sample_each_fam = min(n_sample // n_family, np.min(valid_family_sizes))
|
|
159
|
+
remain_n_sample = n_sample - curr_n_sample_each_fam * n_family
|
|
160
|
+
remain_family_sizes = valid_family_sizes - curr_n_sample_each_fam
|
|
161
|
+
sample_nums[valid_idx] += curr_n_sample_each_fam
|
|
162
|
+
sample_nums[valid_idx] += get_sample_num_from_families(remain_n_sample, remain_family_sizes)
|
|
163
|
+
else:
|
|
164
|
+
# randomly choose n_sample families to sample
|
|
165
|
+
random_idx = np.random.choice(np.arange(n_family), n_sample, replace=False)
|
|
166
|
+
sample_nums[valid_idx[random_idx]] = 1
|
|
167
|
+
|
|
168
|
+
return sample_nums
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pymoo.optimize import minimize
|
|
3
|
+
from pymoo.util.nds.non_dominated_sorting import NonDominatedSorting
|
|
4
|
+
from pymoo.operators.sampling.random_sampling import FloatRandomSampling
|
|
5
|
+
from pymoo.operators.sampling.latin_hypercube_sampling import LatinHypercubeSampling
|
|
6
|
+
from external import lhs
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Solver:
|
|
10
|
+
'''
|
|
11
|
+
Multi-objective solver
|
|
12
|
+
'''
|
|
13
|
+
def __init__(self, n_gen, pop_init_method, batch_size, algo, **kwargs):
|
|
14
|
+
'''
|
|
15
|
+
Input:
|
|
16
|
+
n_gen: number of generations to solve
|
|
17
|
+
pop_init_method: method to initialize population
|
|
18
|
+
algo: class of multi-objective algorithm to use
|
|
19
|
+
kwargs: other keyword arguments for algorithm to initialize
|
|
20
|
+
'''
|
|
21
|
+
self.n_gen = n_gen
|
|
22
|
+
self.pop_init_method = pop_init_method
|
|
23
|
+
self.batch_size = batch_size
|
|
24
|
+
self.algo_type = algo
|
|
25
|
+
self.algo_kwargs = kwargs
|
|
26
|
+
self.solution = None
|
|
27
|
+
|
|
28
|
+
def solve(self, problem, X, Y):
|
|
29
|
+
'''
|
|
30
|
+
Solve the multi-objective problem
|
|
31
|
+
'''
|
|
32
|
+
# initialize population
|
|
33
|
+
sampling = self._get_sampling(X, Y)
|
|
34
|
+
|
|
35
|
+
# setup algorithm
|
|
36
|
+
algo = self.algo_type(sampling=sampling, **self.algo_kwargs)
|
|
37
|
+
|
|
38
|
+
# optimization
|
|
39
|
+
res = minimize(problem, algo, ('n_gen', self.n_gen))
|
|
40
|
+
|
|
41
|
+
# construct solution
|
|
42
|
+
self.solution = {'x': res.pop.get('X'), 'y': res.pop.get('F'), 'algo': res.algorithm}
|
|
43
|
+
|
|
44
|
+
# fill the solution in case less than batch size
|
|
45
|
+
pop_size = len(self.solution['x'])
|
|
46
|
+
if pop_size < self.batch_size:
|
|
47
|
+
indices = np.concatenate([np.arange(pop_size), np.random.choice(np.arange(pop_size), self.batch_size - pop_size)])
|
|
48
|
+
self.solution['x'] = np.array(self.solution['x'])[indices]
|
|
49
|
+
self.solution['y'] = np.array(self.solution['y'])[indices]
|
|
50
|
+
|
|
51
|
+
return self.solution
|
|
52
|
+
|
|
53
|
+
def _get_sampling(self, X, Y):
|
|
54
|
+
'''
|
|
55
|
+
Initialize population from data
|
|
56
|
+
'''
|
|
57
|
+
if self.pop_init_method == 'lhs':
|
|
58
|
+
sampling = LatinHypercubeSampling()
|
|
59
|
+
elif self.pop_init_method == 'nds':
|
|
60
|
+
sorted_indices = NonDominatedSorting().do(Y)
|
|
61
|
+
pop_size = self.algo_kwargs['pop_size']
|
|
62
|
+
sampling = X[np.concatenate(sorted_indices)][:pop_size]
|
|
63
|
+
# NOTE: use lhs if current samples are not enough
|
|
64
|
+
if len(sampling) < pop_size:
|
|
65
|
+
rest_sampling = lhs(X.shape[1], pop_size - len(sampling))
|
|
66
|
+
sampling = np.vstack([sampling, rest_sampling])
|
|
67
|
+
elif self.pop_init_method == 'random':
|
|
68
|
+
sampling = FloatRandomSampling()
|
|
69
|
+
else:
|
|
70
|
+
raise NotImplementedError
|
|
71
|
+
|
|
72
|
+
return sampling
|
|
73
|
+
|
|
74
|
+
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
'''
|
|
4
|
+
Surrogate model that predicts the performance of given design variables
|
|
5
|
+
'''
|
|
6
|
+
|
|
7
|
+
class SurrogateModel(ABC):
|
|
8
|
+
'''
|
|
9
|
+
Base class of surrogate model
|
|
10
|
+
'''
|
|
11
|
+
def __init__(self, n_var, n_obj):
|
|
12
|
+
self.n_var = n_var
|
|
13
|
+
self.n_obj = n_obj
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def fit(self, X, Y):
|
|
17
|
+
'''
|
|
18
|
+
Fit the surrogate model from data (X, Y)
|
|
19
|
+
'''
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def evaluate(self, X, std=False, calc_gradient=False, calc_hessian=False):
|
|
24
|
+
'''
|
|
25
|
+
Predict the performance given set of design variables X
|
|
26
|
+
Input:
|
|
27
|
+
std / calc_gradient / calc_hessian : whether to calculate std / gradient / hessian of prediction
|
|
28
|
+
Output:
|
|
29
|
+
val['F']: mean, shape (N, n_obj)
|
|
30
|
+
val['dF']: gradient of mean, shape (N, n_obj, n_var)
|
|
31
|
+
val['hF']: hessian of mean, shape (N, n_obj, n_var, n_var)
|
|
32
|
+
val['S']: std, shape (N, n_obj)
|
|
33
|
+
val['dS']: gradient of std, shape (N, n_obj, n_var)
|
|
34
|
+
val['hS']: hessian of std, shape (N, n_obj, n_var, n_var)
|
|
35
|
+
'''
|
|
36
|
+
pass
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from sklearn.gaussian_process import GaussianProcessRegressor
|
|
3
|
+
from sklearn.gaussian_process.kernels import Matern, RBF, ConstantKernel
|
|
4
|
+
from sklearn.utils.optimize import _check_optimize_result
|
|
5
|
+
from scipy.optimize import minimize
|
|
6
|
+
from scipy.linalg import solve_triangular
|
|
7
|
+
from scipy.spatial.distance import cdist
|
|
8
|
+
|
|
9
|
+
from moospread.utils.mobo_utils.mobo.surrogate_model.base import SurrogateModel
|
|
10
|
+
from moospread.utils.mobo_utils.mobo.utils import safe_divide
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GaussianProcess(SurrogateModel):
|
|
14
|
+
|
|
15
|
+
def warn(*args, **kwargs):
|
|
16
|
+
pass
|
|
17
|
+
import warnings
|
|
18
|
+
warnings.warn = warn
|
|
19
|
+
|
|
20
|
+
'''
|
|
21
|
+
Gaussian process
|
|
22
|
+
'''
|
|
23
|
+
def __init__(self, n_var, n_obj, nu, **kwargs):
|
|
24
|
+
super().__init__(n_var, n_obj)
|
|
25
|
+
|
|
26
|
+
self.nu = nu
|
|
27
|
+
self.gps = []
|
|
28
|
+
|
|
29
|
+
def constrained_optimization(obj_func, initial_theta, bounds):
|
|
30
|
+
opt_res = minimize(obj_func, initial_theta, method="L-BFGS-B", jac=True, bounds=bounds)
|
|
31
|
+
'''
|
|
32
|
+
NOTE: Temporarily disable the checking below because this error sometimes occurs:
|
|
33
|
+
ConvergenceWarning: lbfgs failed to converge (status=2):
|
|
34
|
+
ABNORMAL_TERMINATION_IN_LNSRCH
|
|
35
|
+
, though we already optimized enough number of iterations and scaled the data.
|
|
36
|
+
Still don't know the exact reason of this yet.
|
|
37
|
+
'''
|
|
38
|
+
# _check_optimize_result("lbfgs", opt_res)
|
|
39
|
+
return opt_res.x, opt_res.fun
|
|
40
|
+
|
|
41
|
+
for _ in range(n_obj):
|
|
42
|
+
if nu > 0:
|
|
43
|
+
main_kernel = Matern(length_scale=np.ones(n_var), length_scale_bounds=(np.sqrt(1e-3), np.sqrt(1e3)), nu=0.5 * nu)
|
|
44
|
+
else:
|
|
45
|
+
main_kernel = RBF(length_scale=np.ones(n_var), length_scale_bounds=(np.sqrt(1e-3), np.sqrt(1e3)))
|
|
46
|
+
|
|
47
|
+
kernel = ConstantKernel(constant_value=1.0, constant_value_bounds=(np.sqrt(1e-3), np.sqrt(1e3))) * \
|
|
48
|
+
main_kernel + \
|
|
49
|
+
ConstantKernel(constant_value=1e-2, constant_value_bounds=(np.exp(-6), np.exp(0)))
|
|
50
|
+
|
|
51
|
+
gp = GaussianProcessRegressor(kernel=kernel, optimizer=constrained_optimization)
|
|
52
|
+
self.gps.append(gp)
|
|
53
|
+
|
|
54
|
+
def fit(self, X, Y):
|
|
55
|
+
for i, gp in enumerate(self.gps):
|
|
56
|
+
gp.fit(X, Y[:, i])
|
|
57
|
+
|
|
58
|
+
def evaluate(self, X, std=False, calc_gradient=False, calc_hessian=False):
|
|
59
|
+
F, dF, hF = [], [], [] # mean
|
|
60
|
+
S, dS, hS = [], [], [] # std
|
|
61
|
+
|
|
62
|
+
for gp in self.gps:
|
|
63
|
+
|
|
64
|
+
# mean
|
|
65
|
+
K = gp.kernel_(X, gp.X_train_) # K: shape (N, N_train)
|
|
66
|
+
y_mean = K.dot(gp.alpha_)
|
|
67
|
+
|
|
68
|
+
F.append(y_mean) # y_mean: shape (N,)
|
|
69
|
+
|
|
70
|
+
if std:
|
|
71
|
+
|
|
72
|
+
L_inv = solve_triangular(gp.L_.T,
|
|
73
|
+
np.eye(gp.L_.shape[0]))
|
|
74
|
+
K_inv = L_inv.dot(L_inv.T)
|
|
75
|
+
|
|
76
|
+
y_var = gp.kernel_.diag(X)
|
|
77
|
+
y_var -= np.einsum("ij,ij->i",
|
|
78
|
+
np.dot(K, K_inv), K)
|
|
79
|
+
|
|
80
|
+
y_var_negative = y_var < 0
|
|
81
|
+
if np.any(y_var_negative):
|
|
82
|
+
y_var[y_var_negative] = 0.0
|
|
83
|
+
|
|
84
|
+
y_std = np.sqrt(y_var)
|
|
85
|
+
|
|
86
|
+
S.append(y_std) # y_std: shape (N,)
|
|
87
|
+
|
|
88
|
+
if not (calc_gradient or calc_hessian): continue
|
|
89
|
+
|
|
90
|
+
ell = np.exp(gp.kernel_.theta[1:-1]) # ell: shape (n_var,)
|
|
91
|
+
sf2 = np.exp(gp.kernel_.theta[0]) # sf2: shape (1,)
|
|
92
|
+
d = np.expand_dims(cdist(X / ell, gp.X_train_ / ell), 2) # d: shape (N, N_train, 1)
|
|
93
|
+
X_, X_train_ = np.expand_dims(X, 1), np.expand_dims(gp.X_train_, 0)
|
|
94
|
+
dd_N = X_ - X_train_ # numerator
|
|
95
|
+
dd_D = d * ell ** 2 # denominator
|
|
96
|
+
dd = safe_divide(dd_N, dd_D) # dd: shape (N, N_train, n_var)
|
|
97
|
+
|
|
98
|
+
if calc_gradient or calc_hessian:
|
|
99
|
+
if self.nu == 1:
|
|
100
|
+
dK = -sf2 * np.exp(-d) * dd
|
|
101
|
+
|
|
102
|
+
elif self.nu == 3:
|
|
103
|
+
dK = -3 * sf2 * np.exp(-np.sqrt(3) * d) * d * dd
|
|
104
|
+
|
|
105
|
+
elif self.nu == 5:
|
|
106
|
+
dK = -5. / 3 * sf2 * np.exp(-np.sqrt(5) * d) * (1 + np.sqrt(5) * d) * d * dd
|
|
107
|
+
|
|
108
|
+
else: # RBF
|
|
109
|
+
dK = -sf2 * np.exp(-0.5 * d ** 2) * d * dd
|
|
110
|
+
|
|
111
|
+
dK_T = dK.transpose(0, 2, 1) # dK: shape (N, N_train, n_var), dK_T: shape (N, n_var, N_train)
|
|
112
|
+
|
|
113
|
+
if calc_gradient:
|
|
114
|
+
dy_mean = dK_T @ gp.alpha_ # gp.alpha_: shape (N_train,)
|
|
115
|
+
dF.append(dy_mean) # dy_mean: shape (N, n_var)
|
|
116
|
+
|
|
117
|
+
# TODO: check
|
|
118
|
+
if std:
|
|
119
|
+
K = np.expand_dims(K, 1) # K: shape (N, 1, N_train)
|
|
120
|
+
K_Ki = K @ K_inv # gp._K_inv: shape (N_train, N_train), K_Ki: shape (N, 1, N_train)
|
|
121
|
+
dK_Ki = dK_T @ K_inv # dK_Ki: shape (N, n_var, N_train)
|
|
122
|
+
|
|
123
|
+
dy_var = -np.sum(dK_Ki * K + K_Ki * dK_T, axis=2) # dy_var: shape (N, n_var)
|
|
124
|
+
#print(dy_var.shape)
|
|
125
|
+
#print(np.expand_dims(y_std,1).shape)
|
|
126
|
+
#dy_std = 0.5 * safe_divide(dy_var, y_std) # dy_std: shape (N, n_var)
|
|
127
|
+
if np.min(y_std) != 0:
|
|
128
|
+
dy_std = 0.5 * dy_var / np.expand_dims(y_std,1) # dy_std: shape (N, n_var)
|
|
129
|
+
else:
|
|
130
|
+
dy_std=np.zeros(dy_var.shape)
|
|
131
|
+
dS.append(dy_std)
|
|
132
|
+
|
|
133
|
+
if calc_hessian:
|
|
134
|
+
d = np.expand_dims(d, 3) # d: shape (N, N_train, 1, 1)
|
|
135
|
+
dd = np.expand_dims(dd, 2) # dd: shape (N, N_train, 1, n_var)
|
|
136
|
+
hd_N = d * np.expand_dims(np.eye(len(ell)), (0, 1)) - np.expand_dims(X_ - X_train_, 3) * dd # numerator
|
|
137
|
+
hd_D = d ** 2 * np.expand_dims(ell ** 2, (0, 1, 3)) # denominator
|
|
138
|
+
hd = safe_divide(hd_N, hd_D) # hd: shape (N, N_train, n_var, n_var)
|
|
139
|
+
|
|
140
|
+
if self.nu == 1:
|
|
141
|
+
hK = -sf2 * np.exp(-d) * (hd - dd ** 2)
|
|
142
|
+
|
|
143
|
+
elif self.nu == 3:
|
|
144
|
+
hK = -3 * sf2 * np.exp(-np.sqrt(3) * d) * (d * hd + (1 - np.sqrt(3) * d) * dd ** 2)
|
|
145
|
+
|
|
146
|
+
elif self.nu == 5:
|
|
147
|
+
hK = -5. / 3 * sf2 * np.exp(-np.sqrt(5) * d) * (-5 * d ** 2 * dd ** 2 + (1 + np.sqrt(5) * d) * (dd ** 2 + d * hd))
|
|
148
|
+
|
|
149
|
+
else: # RBF
|
|
150
|
+
hK = -sf2 * np.exp(-0.5 * d ** 2) * ((1 - d ** 2) * dd ** 2 + d * hd)
|
|
151
|
+
|
|
152
|
+
hK_T = hK.transpose(0, 2, 3, 1) # hK: shape (N, N_train, n_var, n_var), hK_T: shape (N, n_var, n_var, N_train)
|
|
153
|
+
|
|
154
|
+
hy_mean = hK_T @ gp.alpha_ # hy_mean: shape (N, n_var, n_var)
|
|
155
|
+
hF.append(hy_mean)
|
|
156
|
+
|
|
157
|
+
# TODO: check
|
|
158
|
+
if std:
|
|
159
|
+
K = np.expand_dims(K, 2) # K: shape (N, 1, 1, N_train)
|
|
160
|
+
dK = np.expand_dims(dK_T, 2) # dK: shape (N, n_var, 1, N_train)
|
|
161
|
+
dK_Ki = np.expand_dims(dK_Ki, 2) # dK_Ki: shape (N, n_var, 1, N_train)
|
|
162
|
+
hK_Ki = hK_T @ K_inv # hK_Ki: shape (N, n_var, n_var, N_train)
|
|
163
|
+
|
|
164
|
+
hy_var = -np.sum(hK_Ki * K + 2 * dK_Ki * dK + K_Ki * hK_T, axis=3) # hy_var: shape (N, n_var, n_var)
|
|
165
|
+
hy_std = 0.5 * safe_divide(hy_var * y_std - dy_var * dy_std, y_var) # hy_std: shape (N, n_var, n_var)
|
|
166
|
+
hS.append(hy_std)
|
|
167
|
+
|
|
168
|
+
F = np.stack(F, axis=1)
|
|
169
|
+
dF = np.stack(dF, axis=1) if calc_gradient else None
|
|
170
|
+
hF = np.stack(hF, axis=1) if calc_hessian else None
|
|
171
|
+
|
|
172
|
+
S = np.stack(S, axis=1) if std else None
|
|
173
|
+
dS = np.stack(dS, axis=1) if std and calc_gradient else None
|
|
174
|
+
hS = np.stack(hS, axis=1) if std and calc_hessian else None
|
|
175
|
+
|
|
176
|
+
out = {'F': F, 'dF': dF, 'hF': hF, 'S': S, 'dS': dS, 'hS': hS}
|
|
177
|
+
return out
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from numpy import linalg as LA
|
|
3
|
+
from scipy.stats.distributions import chi2
|
|
4
|
+
from scipy.stats import norm
|
|
5
|
+
|
|
6
|
+
import moospread.utils.mobo_utils.lhs_for_mobo as lhs
|
|
7
|
+
from moospread.utils.mobo_utils.mobo.surrogate_model.gaussian_process import GaussianProcess
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ThompsonSampling(GaussianProcess):
|
|
11
|
+
'''
|
|
12
|
+
Sampled functions from Gaussian process using Thompson Sampling
|
|
13
|
+
'''
|
|
14
|
+
def __init__(self, n_var, n_obj, nu, n_spectral_pts, mean_sample, **kwargs):
|
|
15
|
+
super().__init__(n_var, n_obj, nu)
|
|
16
|
+
|
|
17
|
+
self.M = n_spectral_pts
|
|
18
|
+
self.thetas, self.Ws, self.bs, self.sf2s = None, None, None, None
|
|
19
|
+
self.mean_sample = mean_sample
|
|
20
|
+
|
|
21
|
+
def fit(self, X, Y):
|
|
22
|
+
self.thetas, self.Ws, self.bs, self.sf2s = [], [], [], []
|
|
23
|
+
n_sample = X.shape[0]
|
|
24
|
+
|
|
25
|
+
for i, gp in enumerate(self.gps):
|
|
26
|
+
gp.fit(X, Y[:, i])
|
|
27
|
+
|
|
28
|
+
ell = np.exp(gp.kernel_.theta[1:-1])
|
|
29
|
+
sf2 = np.exp(2 * gp.kernel_.theta[0])
|
|
30
|
+
sn2 = np.exp(2 * gp.kernel_.theta[-1])
|
|
31
|
+
|
|
32
|
+
sw1, sw2 = lhs(self.n_var, self.M), lhs(self.n_var, self.M)
|
|
33
|
+
if self.nu > 0:
|
|
34
|
+
W = np.tile(1. / ell, (self.M, 1)) * norm.ppf(sw1) * np.sqrt(self.nu / chi2.ppf(sw2, df=self.nu))
|
|
35
|
+
else:
|
|
36
|
+
W = np.random.uniform(size=(self.M, self.n_var)) * np.tile(1. / ell, (self.M, 1))
|
|
37
|
+
b = 2 * np.pi * lhs(1, self.M)
|
|
38
|
+
phi = np.sqrt(2. * sf2 / self.M) * np.cos(W @ X.T + np.tile(b, (1, n_sample)))
|
|
39
|
+
A = phi @ phi.T + sn2 * np.eye(self.M)
|
|
40
|
+
invcholA = LA.inv(LA.cholesky(A))
|
|
41
|
+
invA = invcholA.T @ invcholA
|
|
42
|
+
mu_theta = invA @ phi @ Y[:, i]
|
|
43
|
+
if self.mean_sample:
|
|
44
|
+
theta = mu_theta
|
|
45
|
+
else:
|
|
46
|
+
cov_theta = sn2 * invA
|
|
47
|
+
cov_theta = 0.5 * (cov_theta + cov_theta.T)
|
|
48
|
+
theta = mu_theta + LA.cholesky(cov_theta) @ np.random.standard_normal(self.M)
|
|
49
|
+
|
|
50
|
+
self.thetas.append(theta.copy())
|
|
51
|
+
self.Ws.append(W.copy())
|
|
52
|
+
self.bs.append(b.copy())
|
|
53
|
+
self.sf2s.append(sf2)
|
|
54
|
+
|
|
55
|
+
def evaluate(self, X, std=False, calc_gradient=False, calc_hessian=False):
|
|
56
|
+
F, dF, hF = [], [], []
|
|
57
|
+
n_sample = X.shape[0] if len(X.shape) > 1 else 1
|
|
58
|
+
|
|
59
|
+
for theta, W, b, sf2 in zip(self.thetas, self.Ws, self.bs, self.sf2s):
|
|
60
|
+
factor = np.sqrt(2. * sf2 / self.M)
|
|
61
|
+
W_X_b = W @ X.T + np.tile(b, (1, n_sample))
|
|
62
|
+
F.append(factor * (theta @ np.cos(W_X_b)))
|
|
63
|
+
|
|
64
|
+
if calc_gradient:
|
|
65
|
+
dF.append(-factor * np.expand_dims(theta, 0) * np.sin(W_X_b).T @ W)
|
|
66
|
+
|
|
67
|
+
if calc_hessian:
|
|
68
|
+
hF.append(-factor * np.einsum('ij,jk->ikj', np.expand_dims(theta, 0) * np.cos(W_X_b).T, W) @ W)
|
|
69
|
+
|
|
70
|
+
F = np.stack(F, axis=1)
|
|
71
|
+
dF = np.stack(dF, axis=1) if calc_gradient else None
|
|
72
|
+
hF = np.stack(hF, axis=1) if calc_hessian else None
|
|
73
|
+
|
|
74
|
+
S = np.zeros((n_sample, self.n_obj)) if std else None
|
|
75
|
+
dS = np.zeros((n_sample, self.n_obj, self.n_var)) if std and calc_gradient else None
|
|
76
|
+
hS = np.zeros((n_sample, self.n_obj, self.n_var, self.n_var)) if std and calc_hessian else None
|
|
77
|
+
|
|
78
|
+
out = {'F': F, 'dF': dF, 'hF': hF, 'S': S, 'dS': dS, 'hS': hS}
|
|
79
|
+
return out
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from problems import Problem
|
|
3
|
+
|
|
4
|
+
'''
|
|
5
|
+
Surrogate problem that mimics the real problem based on surrogate model
|
|
6
|
+
'''
|
|
7
|
+
|
|
8
|
+
class SurrogateProblem(Problem):
|
|
9
|
+
|
|
10
|
+
def __init__(self, real_problem, surrogate_model, acquisition, transformation):
|
|
11
|
+
'''
|
|
12
|
+
Input:
|
|
13
|
+
real_problem: the original optimization problem which this surrogate is approximating
|
|
14
|
+
surrogate_model: fitted surrogate model
|
|
15
|
+
acquisition: the acquisition function to evaluate the fitness of samples
|
|
16
|
+
transformation: data normalization for surrogate model fitting
|
|
17
|
+
'''
|
|
18
|
+
self.real_problem = real_problem
|
|
19
|
+
self.surrogate_model = surrogate_model
|
|
20
|
+
self.acquisition = acquisition
|
|
21
|
+
self.transformation = transformation
|
|
22
|
+
xl = transformation.do(x=real_problem.xl)
|
|
23
|
+
xu = transformation.do(x=real_problem.xu)
|
|
24
|
+
super().__init__(n_var=real_problem.n_var, n_obj=real_problem.n_obj, n_constr=real_problem.n_constr, xl=xl, xu=xu)
|
|
25
|
+
|
|
26
|
+
def evaluate(self, *args, return_values_of="auto", **kwargs):
|
|
27
|
+
assert self.surrogate_model is not None, 'surrogate model must be set first before evaluation'
|
|
28
|
+
|
|
29
|
+
# handle hF (hessian) computation, which is not supported by Pymoo
|
|
30
|
+
calc_hessian = (type(return_values_of) == list and 'hF' in return_values_of)
|
|
31
|
+
|
|
32
|
+
return super().evaluate(*args, return_values_of=return_values_of, calc_hessian=calc_hessian, **kwargs)
|
|
33
|
+
|
|
34
|
+
def _evaluate(self, x, out, *args, calc_gradient=False, calc_hessian=False, **kwargs):
|
|
35
|
+
# evaluate value by surrogate model
|
|
36
|
+
std = self.acquisition.requires_std
|
|
37
|
+
val = self.surrogate_model.evaluate(x, std, calc_gradient, calc_hessian)
|
|
38
|
+
|
|
39
|
+
# evaluate out['F/dF/hF'] by certain acquisition function
|
|
40
|
+
out['F'], out['dF'], out['hF'] = self.acquisition.evaluate(val, calc_gradient, calc_hessian)
|
|
41
|
+
|
|
42
|
+
# evaluate constraints by real problem
|
|
43
|
+
x_ori = self.transformation.undo(x)
|
|
44
|
+
out['G'], out['CV'], out['feasible'] = self.real_problem.evaluate(x_ori, return_values_of=['G', 'CV', 'feasible'], requires_F=False)
|