moospread 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moospread/__init__.py +3 -0
- moospread/core.py +1881 -0
- moospread/problem.py +193 -0
- moospread/tasks/__init__.py +4 -0
- moospread/tasks/dtlz_torch.py +139 -0
- moospread/tasks/mw_torch.py +274 -0
- moospread/tasks/re_torch.py +394 -0
- moospread/tasks/zdt_torch.py +112 -0
- moospread/utils/__init__.py +8 -0
- moospread/utils/constraint_utils/__init__.py +2 -0
- moospread/utils/constraint_utils/gradient.py +72 -0
- moospread/utils/constraint_utils/mgda_core.py +69 -0
- moospread/utils/constraint_utils/pmgda_solver.py +308 -0
- moospread/utils/constraint_utils/prefs.py +64 -0
- moospread/utils/ditmoo.py +127 -0
- moospread/utils/lhs.py +74 -0
- moospread/utils/misc.py +28 -0
- moospread/utils/mobo_utils/__init__.py +11 -0
- moospread/utils/mobo_utils/evolution/__init__.py +0 -0
- moospread/utils/mobo_utils/evolution/dom.py +60 -0
- moospread/utils/mobo_utils/evolution/norm.py +40 -0
- moospread/utils/mobo_utils/evolution/utils.py +97 -0
- moospread/utils/mobo_utils/learning/__init__.py +0 -0
- moospread/utils/mobo_utils/learning/model.py +40 -0
- moospread/utils/mobo_utils/learning/model_init.py +33 -0
- moospread/utils/mobo_utils/learning/model_update.py +51 -0
- moospread/utils/mobo_utils/learning/prediction.py +116 -0
- moospread/utils/mobo_utils/learning/utils.py +143 -0
- moospread/utils/mobo_utils/lhs_for_mobo.py +243 -0
- moospread/utils/mobo_utils/mobo/__init__.py +0 -0
- moospread/utils/mobo_utils/mobo/acquisition.py +209 -0
- moospread/utils/mobo_utils/mobo/algorithms.py +91 -0
- moospread/utils/mobo_utils/mobo/factory.py +86 -0
- moospread/utils/mobo_utils/mobo/mobo.py +132 -0
- moospread/utils/mobo_utils/mobo/selection.py +182 -0
- moospread/utils/mobo_utils/mobo/solver/__init__.py +5 -0
- moospread/utils/mobo_utils/mobo/solver/moead.py +17 -0
- moospread/utils/mobo_utils/mobo/solver/nsga2.py +10 -0
- moospread/utils/mobo_utils/mobo/solver/parego/__init__.py +1 -0
- moospread/utils/mobo_utils/mobo/solver/parego/parego.py +62 -0
- moospread/utils/mobo_utils/mobo/solver/parego/utils.py +34 -0
- moospread/utils/mobo_utils/mobo/solver/pareto_discovery/__init__.py +1 -0
- moospread/utils/mobo_utils/mobo/solver/pareto_discovery/buffer.py +364 -0
- moospread/utils/mobo_utils/mobo/solver/pareto_discovery/pareto_discovery.py +571 -0
- moospread/utils/mobo_utils/mobo/solver/pareto_discovery/utils.py +168 -0
- moospread/utils/mobo_utils/mobo/solver/solver.py +74 -0
- moospread/utils/mobo_utils/mobo/surrogate_model/__init__.py +2 -0
- moospread/utils/mobo_utils/mobo/surrogate_model/base.py +36 -0
- moospread/utils/mobo_utils/mobo/surrogate_model/gaussian_process.py +177 -0
- moospread/utils/mobo_utils/mobo/surrogate_model/thompson_sampling.py +79 -0
- moospread/utils/mobo_utils/mobo/surrogate_problem.py +44 -0
- moospread/utils/mobo_utils/mobo/transformation.py +106 -0
- moospread/utils/mobo_utils/mobo/utils.py +65 -0
- moospread/utils/mobo_utils/spread_mobo_utils.py +854 -0
- moospread/utils/offline_utils/__init__.py +10 -0
- moospread/utils/offline_utils/handle_task.py +203 -0
- moospread/utils/offline_utils/proxies.py +338 -0
- moospread/utils/spread_utils.py +91 -0
- moospread-0.1.0.dist-info/METADATA +75 -0
- moospread-0.1.0.dist-info/RECORD +63 -0
- moospread-0.1.0.dist-info/WHEEL +5 -0
- moospread-0.1.0.dist-info/licenses/LICENSE +10 -0
- moospread-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from torch.optim import SGD
|
|
3
|
+
from tqdm import tqdm
|
|
4
|
+
from pymoo.indicators.hv import HV
|
|
5
|
+
import math
|
|
6
|
+
import torch
|
|
7
|
+
from cvxopt import matrix, solvers
|
|
8
|
+
solvers.options['show_progress'] = False
|
|
9
|
+
import numpy as np
|
|
10
|
+
from torch import nn
|
|
11
|
+
from torch.autograd import Variable
|
|
12
|
+
from torch import Tensor
|
|
13
|
+
from moospread.utils.constraint_utils.mgda_core import solve_mgda
|
|
14
|
+
from moospread.utils.constraint_utils.gradient import get_moo_Jacobian_batch
|
|
15
|
+
|
|
16
|
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
17
|
+
import json
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PMGDACore():
|
|
22
|
+
def __init__(self, n_var, prefs=None, n_prob=None, n_obj=None):
|
|
23
|
+
'''
|
|
24
|
+
Input:
|
|
25
|
+
n_var: int, number of variables.
|
|
26
|
+
prefs: (n_prob, n_obj).
|
|
27
|
+
'''
|
|
28
|
+
self.core_name = 'PMGDACore'
|
|
29
|
+
self.prefs = prefs
|
|
30
|
+
if self.prefs is not None:
|
|
31
|
+
self.n_prob, self.n_obj = prefs.shape[0], prefs.shape[1]
|
|
32
|
+
else:
|
|
33
|
+
assert n_prob is not None and n_obj is not None, "Please provide n_prob and n_obj if prefs is None."
|
|
34
|
+
self.n_prob = n_prob
|
|
35
|
+
self.n_obj = n_obj
|
|
36
|
+
self.n_var = n_var
|
|
37
|
+
# self.prefs_np = prefs.cpu().numpy() if type(prefs) == torch.Tensor else prefs
|
|
38
|
+
self.h_eps = 0.01
|
|
39
|
+
self.sigma = 0.95
|
|
40
|
+
|
|
41
|
+
def get_alpha(self, Jacobian, losses, idx,
|
|
42
|
+
grad_h=None, h_val=None, constraint_mtd='pbi'):
|
|
43
|
+
'''
|
|
44
|
+
Input:
|
|
45
|
+
Jacobian: (n_obj, n_var), torch.Tensor
|
|
46
|
+
losses: (n_obj,), torch.Tensor
|
|
47
|
+
idx: int
|
|
48
|
+
'''
|
|
49
|
+
# (1) get the constraint value
|
|
50
|
+
losses_var = Variable(losses, requires_grad=True)
|
|
51
|
+
if grad_h is not None:
|
|
52
|
+
Jacobian_h_losses = None
|
|
53
|
+
else:
|
|
54
|
+
h_var = constraint(losses_var, pref=self.prefs[idx],
|
|
55
|
+
constraint_mtd=constraint_mtd)
|
|
56
|
+
h_val = h_var.detach().cpu().clone().numpy()
|
|
57
|
+
h_var.backward()
|
|
58
|
+
Jacobian_h_losses = losses_var.grad.detach().clone()
|
|
59
|
+
# shape: (n_obj)
|
|
60
|
+
try:
|
|
61
|
+
alpha = solve_pmgda(Jacobian, Jacobian_h_losses, grad_h,
|
|
62
|
+
h_val, self.h_eps, self.sigma)
|
|
63
|
+
except:
|
|
64
|
+
alpha = [1/self.n_obj] * self.n_obj
|
|
65
|
+
return torch.Tensor(alpha).to(Jacobian.device)
|
|
66
|
+
|
|
67
|
+
def get_nn_pmgda_componets(loss_vec, pref,
|
|
68
|
+
h_vals=None, constraint_mtd='pbi'):
|
|
69
|
+
'''
|
|
70
|
+
return: h_val, grad_h, J_hf
|
|
71
|
+
'''
|
|
72
|
+
# Here, use a single small bp graph
|
|
73
|
+
loss_vec_var = Variable(loss_vec, requires_grad=True)
|
|
74
|
+
h = constraint(loss_vec_var, pref=pref,
|
|
75
|
+
h_vals=h_vals, constraint_mtd=constraint_mtd)
|
|
76
|
+
h.backward()
|
|
77
|
+
J_hf = loss_vec_var.grad
|
|
78
|
+
h_val = h.detach().clone().item()
|
|
79
|
+
# grad_h = J_hf @ Jacobian?
|
|
80
|
+
return h_val, J_hf
|
|
81
|
+
|
|
82
|
+
def ts_to_np(grad_arr):
|
|
83
|
+
g_np_arr = [0] * len(grad_arr)
|
|
84
|
+
for idx, g_ts in enumerate(grad_arr):
|
|
85
|
+
g_np_arr[idx] = g_ts.detach().clone().cpu().numpy()[0]
|
|
86
|
+
return np.array(g_np_arr)
|
|
87
|
+
|
|
88
|
+
def pbi(f, lamb):
|
|
89
|
+
lamb_ts = torch.Tensor(lamb)
|
|
90
|
+
lamb0 = lamb_ts / torch.norm(lamb_ts)
|
|
91
|
+
lamb0 = lamb0.double().to(f.device)
|
|
92
|
+
d1 = f.squeeze().double() @ lamb0
|
|
93
|
+
d2 = torch.norm(f.squeeze().double() - d1 * lamb0)
|
|
94
|
+
return d1, d2
|
|
95
|
+
|
|
96
|
+
def constraint(loss_arr, pref=Tensor([0, 1]),
|
|
97
|
+
pre_h_vals=None,
|
|
98
|
+
constraint_mtd='pbi'):
|
|
99
|
+
|
|
100
|
+
if type(pref) == np.ndarray:
|
|
101
|
+
pref = Tensor(pref)
|
|
102
|
+
|
|
103
|
+
if constraint_mtd == 'pbi':
|
|
104
|
+
_, d2 = pbi(loss_arr, pref)
|
|
105
|
+
d2 = d2.unsqueeze(0)
|
|
106
|
+
elif constraint_mtd == 'ineq':
|
|
107
|
+
assert pre_h_vals is not None, "pre_h_vals instance is required for inequality constraint."
|
|
108
|
+
ineq_violation = torch.clamp(pre_h_vals, min=1e-6)
|
|
109
|
+
d2 = ineq_violation.unsqueeze(0)
|
|
110
|
+
elif constraint_mtd == 'eq':
|
|
111
|
+
assert pre_h_vals is not None, "pre_h_vals instance is required for equality constraint."
|
|
112
|
+
d2 = pre_h_vals.unsqueeze(0)
|
|
113
|
+
elif constraint_mtd == 'cel':
|
|
114
|
+
eps = 1e-3
|
|
115
|
+
loss_arr_0 = torch.clip(loss_arr / torch.sum(loss_arr), eps)
|
|
116
|
+
res = torch.sum(loss_arr_0 * torch.log(loss_arr_0 / pref)) + torch.sum(
|
|
117
|
+
pref * torch.log(pref / loss_arr_0))
|
|
118
|
+
d2 = res.unsqueeze(0)
|
|
119
|
+
elif constraint_mtd == 'cos':
|
|
120
|
+
cos = nn.CosineSimilarity(dim=1, eps=1e-6)
|
|
121
|
+
pref_ts = pref.to(loss_arr.device)
|
|
122
|
+
d2 = (1 - cos(loss_arr.unsqueeze(0), pref_ts.unsqueeze(0)))
|
|
123
|
+
else:
|
|
124
|
+
raise ValueError("Unknown constraint method:", constraint_mtd)
|
|
125
|
+
|
|
126
|
+
return d2
|
|
127
|
+
|
|
128
|
+
def cosmos(losses, pref, coeff=5.0):
|
|
129
|
+
if type(pref) == np.ndarray:
|
|
130
|
+
pref = Tensor(pref)
|
|
131
|
+
d1 = losses @ pref
|
|
132
|
+
d2 = losses @ pref / torch.norm(losses) / torch.norm(pref)
|
|
133
|
+
return d1 - coeff * d2
|
|
134
|
+
|
|
135
|
+
def get_cosmos_Jhf(loss, pref):
|
|
136
|
+
loss = Variable(Tensor(loss), requires_grad=True)
|
|
137
|
+
cosmos_loss = cosmos(loss, pref)
|
|
138
|
+
cosmos_loss.backward()
|
|
139
|
+
Jhf = loss.grad.detach().clone().cpu().numpy()
|
|
140
|
+
return Jhf
|
|
141
|
+
|
|
142
|
+
def solve_pmgda(Jacobian, Jacobian_h_losses,
|
|
143
|
+
grad_h, h_val, h_tol, sigma):
|
|
144
|
+
'''
|
|
145
|
+
Input:
|
|
146
|
+
Jacobian: (n_obj, n_var) : Tensor
|
|
147
|
+
grad_h: (1, n_var)
|
|
148
|
+
h_val: (1,) : float
|
|
149
|
+
Jhf: (m,)
|
|
150
|
+
Output:
|
|
151
|
+
alpha: (m,)
|
|
152
|
+
'''
|
|
153
|
+
if grad_h is None:
|
|
154
|
+
assert Jacobian_h_losses is not None, "Either grad_h or Jacobian_h_losses should be provided."
|
|
155
|
+
# Compute grad_h from Jacobian_h_losses via chain rule
|
|
156
|
+
grad_h = Jacobian_h_losses @ Jacobian
|
|
157
|
+
Jacobian_ts = Jacobian.detach().clone().to(device)
|
|
158
|
+
grad_h_np = grad_h.detach().clone().cpu().numpy()
|
|
159
|
+
G_ts = torch.cat((Jacobian, grad_h.unsqueeze(0)), dim=0).detach()
|
|
160
|
+
G_norm = torch.norm(G_ts, dim=1, keepdim=True)
|
|
161
|
+
G_n = G_ts / (G_norm + 1e-4)
|
|
162
|
+
GGn = (G_ts @ G_n.T).clone().cpu().numpy()
|
|
163
|
+
(m, n) = Jacobian_ts.shape
|
|
164
|
+
condition = h_val < h_tol
|
|
165
|
+
if condition:
|
|
166
|
+
mu_prime = solve_mgda(Jacobian_ts)
|
|
167
|
+
else:
|
|
168
|
+
# Do the correction step. Eq. (20) in the main paper.
|
|
169
|
+
# The total optimization number is m+2. A is the constraint matrix, and b is the constraint vector.
|
|
170
|
+
A1 = -GGn
|
|
171
|
+
A_tmp = - np.ones((m + 1, 1))
|
|
172
|
+
A_tmp[-1][0] = 0
|
|
173
|
+
A1 = np.c_[A1, A_tmp]
|
|
174
|
+
b1 = np.zeros(m + 1)
|
|
175
|
+
b1[-1] = - sigma * np.linalg.norm(grad_h_np)
|
|
176
|
+
# A2 plus A3 are the simplex constraint, A2 is for non-zero constraints.
|
|
177
|
+
A2 = np.c_[-np.eye(m + 1), np.zeros((m + 1, 1))]
|
|
178
|
+
b2 = -np.zeros(m + 1)
|
|
179
|
+
# A3, A4 are for the sum-equal-one constraint
|
|
180
|
+
A3 = np.ones((1, m + 2))
|
|
181
|
+
A3[0][-1] = 0.0
|
|
182
|
+
b3 = np.ones(1)
|
|
183
|
+
|
|
184
|
+
A4 = -np.ones((1, m + 2))
|
|
185
|
+
A4[0][-1] = 0.0
|
|
186
|
+
b4 = -np.ones(1)
|
|
187
|
+
A_all = np.concatenate((A1, A2, A3, A4), 0)
|
|
188
|
+
b_all = np.r_[b1, b2, b3, b4]
|
|
189
|
+
A_matrix = matrix(A_all) # The constraint matrix.
|
|
190
|
+
b_matrix = matrix(b_all) # The constraint vector.
|
|
191
|
+
c = np.zeros(m + 2) # The objective function.
|
|
192
|
+
c[-1] = 1
|
|
193
|
+
c_matrix = matrix(c)
|
|
194
|
+
# print the type of the matrices: array or tensor or ...
|
|
195
|
+
# print("c_matrix:", type(c_matrix))
|
|
196
|
+
# print("A_matrix:", type(A_matrix))
|
|
197
|
+
# print("b_matrix:", type(b_matrix))
|
|
198
|
+
c_matrix = matrix(np.nan_to_num(c_matrix, nan=1e-6))
|
|
199
|
+
A_matrix = matrix(np.nan_to_num(A_matrix, nan=1e-6))
|
|
200
|
+
b_matrix = matrix(np.nan_to_num(b_matrix, nan=1e-6))
|
|
201
|
+
sol = solvers.lp(c_matrix, A_matrix, b_matrix)
|
|
202
|
+
res = np.array(sol['x']).squeeze()
|
|
203
|
+
# res = np.array(sol.get('x', [1e-6] * (m + 2))
|
|
204
|
+
# ).squeeze()
|
|
205
|
+
# # print( len(res) )
|
|
206
|
+
# # print("PMGDA res:", res)
|
|
207
|
+
|
|
208
|
+
# if np.ndim(res) == 0:
|
|
209
|
+
# # only one coefficient, no mu vector
|
|
210
|
+
# mu = np.array([res.item()] * (m + 2))
|
|
211
|
+
# coeff = res.item()
|
|
212
|
+
# else:
|
|
213
|
+
# res = np.atleast_1d(res)
|
|
214
|
+
# mu, coeff = res[:-1], res[-1]
|
|
215
|
+
|
|
216
|
+
mu, coeff = res[:-1], res[-1]
|
|
217
|
+
# gw = G_n.T @ torch.Tensor(mu).to(G_n.device)
|
|
218
|
+
# coeff, Eq. (18) in the main paper.
|
|
219
|
+
mu_prime = get_pmgda_DWA_coeff(mu, Jacobian_h_losses, G_norm, m)
|
|
220
|
+
return mu_prime
|
|
221
|
+
|
|
222
|
+
def get_pmgda_DWA_coeff(mu, Jhf, G_norm, m):
|
|
223
|
+
'''
|
|
224
|
+
This function is to compute the coefficient of the dynamic weight adjustment.
|
|
225
|
+
Please ref the Eq. (18) for the formulation in the main paper.
|
|
226
|
+
'''
|
|
227
|
+
mu_prime = np.zeros( m )
|
|
228
|
+
for i in range( m ):
|
|
229
|
+
mu_prime[i] = mu[i] / G_norm[i] + mu[m] / G_norm[m] * Jhf[i]
|
|
230
|
+
return mu_prime
|
|
231
|
+
|
|
232
|
+
def solve_mgda_null(G_tilde):
|
|
233
|
+
# G_tilde.shape: (m+1, n)
|
|
234
|
+
GG = G_tilde @ G_tilde.T
|
|
235
|
+
# GG.shape : (m+1, m+1)
|
|
236
|
+
Q = matrix(GG.astype(np.double))
|
|
237
|
+
m, n = G_tilde.shape
|
|
238
|
+
p = matrix(np.zeros(m))
|
|
239
|
+
G = -np.eye(m)
|
|
240
|
+
G[-1][-1] = 0.0
|
|
241
|
+
G = matrix(G)
|
|
242
|
+
|
|
243
|
+
h = matrix(np.zeros(m))
|
|
244
|
+
A = np.ones(m)
|
|
245
|
+
A[-1] = 0
|
|
246
|
+
A = matrix(A, (1, m))
|
|
247
|
+
b = matrix(1.0)
|
|
248
|
+
sol = solvers.qp(Q, p, G, h, A, b)
|
|
249
|
+
res = np.array(sol['x']).squeeze()
|
|
250
|
+
gw = res @ G_tilde
|
|
251
|
+
return gw
|
|
252
|
+
|
|
253
|
+
def get_Jhf(f_arr, pref, return_h=False,
|
|
254
|
+
h_vals=None, constraint_mtd='pbi'):
|
|
255
|
+
f = Variable(f_arr, requires_grad=True)
|
|
256
|
+
h = constraint(f, pref=pref, h_vals=h_vals, constraint_mtd=constraint_mtd)
|
|
257
|
+
h.backward()
|
|
258
|
+
Jhf = f.grad.detach().clone().cpu().numpy()
|
|
259
|
+
if return_h:
|
|
260
|
+
return Jhf, float(h.detach().clone().cpu().numpy())
|
|
261
|
+
else:
|
|
262
|
+
return Jhf
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class PMGDASolver(object):
|
|
266
|
+
# The PGMDA paper: http://arxiv.org/abs/2402.09492.
|
|
267
|
+
def __init__(self, problem, prefs, n_prob, n_obj,
|
|
268
|
+
step_size=1e-3, n_epoch=500, tol=1e-3,
|
|
269
|
+
sigma=0.1, h_tol=1e-3,
|
|
270
|
+
folder_name=None, verbose=True):
|
|
271
|
+
self.folder_name=folder_name
|
|
272
|
+
self.verbose = verbose
|
|
273
|
+
self.problem = problem
|
|
274
|
+
self.sigma = sigma
|
|
275
|
+
self.h_tol = h_tol
|
|
276
|
+
self.n_epoch = n_epoch
|
|
277
|
+
self.core_solver = PMGDACore(n_var=problem.n_var,
|
|
278
|
+
prefs=prefs,
|
|
279
|
+
n_prob=n_prob, n_obj=n_obj)
|
|
280
|
+
self.prefs = prefs
|
|
281
|
+
self.solver_name = 'PMGDA'
|
|
282
|
+
self.n_prob = n_prob
|
|
283
|
+
self.n_obj = n_obj
|
|
284
|
+
self.step_size = step_size
|
|
285
|
+
|
|
286
|
+
def compute_weights(self, x, y, pre_h_vals=None, constraint_mtd='pbi', as_list=False):
|
|
287
|
+
Jacobian_array = get_moo_Jacobian_batch(x, y, self.n_obj)
|
|
288
|
+
x.grad.zero_()
|
|
289
|
+
grad_h = None
|
|
290
|
+
if pre_h_vals is not None:
|
|
291
|
+
h_vars = constraint(y, pref=None,
|
|
292
|
+
pre_h_vals=pre_h_vals, constraint_mtd=constraint_mtd)
|
|
293
|
+
h_vars.backward()
|
|
294
|
+
grad_h = x.grad.detach().clone()
|
|
295
|
+
print("grad_h:", grad_h)
|
|
296
|
+
h_vals = h_vars.detach().cpu().clone().numpy()
|
|
297
|
+
y_detach = y.detach().clone()
|
|
298
|
+
if grad_h is not None:
|
|
299
|
+
alpha_array = [self.core_solver.get_alpha(Jacobian_array[idx], y_detach[idx], idx, grad_h=grad_h[idx], h_val = h_vals[idx], constraint_mtd=constraint_mtd) for idx in
|
|
300
|
+
range(self.n_prob)]
|
|
301
|
+
else:
|
|
302
|
+
alpha_array = [self.core_solver.get_alpha(Jacobian_array[idx], y_detach[idx], idx, constraint_mtd=constraint_mtd) for idx in
|
|
303
|
+
range(self.n_prob)]
|
|
304
|
+
if as_list:
|
|
305
|
+
return alpha_array
|
|
306
|
+
else:
|
|
307
|
+
alpha_array = torch.stack(alpha_array)
|
|
308
|
+
return alpha_array
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import torch
|
|
3
|
+
|
|
4
|
+
def das_dennis(n_partitions, n_dim):
|
|
5
|
+
if n_partitions == 0:
|
|
6
|
+
return np.full((1, n_dim), 1 / n_dim)
|
|
7
|
+
else:
|
|
8
|
+
ref_dirs = []
|
|
9
|
+
ref_dir = np.full(n_dim, np.nan)
|
|
10
|
+
das_dennis_recursion(ref_dirs, ref_dir, n_partitions, n_partitions, 0)
|
|
11
|
+
return np.concatenate(ref_dirs, axis=0)
|
|
12
|
+
|
|
13
|
+
def das_dennis_recursion(ref_dirs, ref_dir, n_partitions, beta, depth):
|
|
14
|
+
if depth == len(ref_dir) - 1:
|
|
15
|
+
ref_dir[depth] = beta / (1.0 * n_partitions)
|
|
16
|
+
ref_dirs.append(ref_dir[None, :])
|
|
17
|
+
else:
|
|
18
|
+
for i in range(beta + 1):
|
|
19
|
+
ref_dir[depth] = 1.0 * i / (1.0 * n_partitions)
|
|
20
|
+
das_dennis_recursion(ref_dirs, np.copy(ref_dir), n_partitions, beta - i, depth + 1)
|
|
21
|
+
|
|
22
|
+
def get_uniform_pref(n_prob, n_obj=2, clip_eps=0, mode='uniform', dtype='Tensor'):
|
|
23
|
+
if n_obj == 2:
|
|
24
|
+
# Just generate linear uniform preferences
|
|
25
|
+
pref_1 = np.linspace(clip_eps, 1-clip_eps, n_prob)
|
|
26
|
+
pref_2 = 1 - pref_1
|
|
27
|
+
prefs = np.stack((pref_1, pref_2), axis=1)
|
|
28
|
+
else:
|
|
29
|
+
from pymoo.util.ref_dirs import get_reference_directions
|
|
30
|
+
prefs = get_reference_directions("energy", n_obj, n_prob, seed=1)
|
|
31
|
+
prefs = np.clip(prefs, clip_eps, 1-clip_eps)
|
|
32
|
+
prefs = prefs / prefs.sum(axis=1)[:, None]
|
|
33
|
+
if dtype == 'Tensor':
|
|
34
|
+
return torch.Tensor(prefs)
|
|
35
|
+
else:
|
|
36
|
+
return prefs
|
|
37
|
+
|
|
38
|
+
def get_x_init(n_prob, n_var, lbound=None, ubound=None):
|
|
39
|
+
if type(lbound)==type(None):
|
|
40
|
+
x_init = torch.rand(n_prob, n_var)
|
|
41
|
+
else:
|
|
42
|
+
x_init = torch.rand(n_prob, n_var) * (ubound - lbound) + lbound
|
|
43
|
+
return x_init
|
|
44
|
+
|
|
45
|
+
def get_random_prefs(batch_size, n_obj, type='Tensor'):
|
|
46
|
+
import torch
|
|
47
|
+
if type == 'Tensor':
|
|
48
|
+
return torch.distributions.dirichlet.Dirichlet(torch.ones(n_obj)).sample((batch_size,)).squeeze()
|
|
49
|
+
else:
|
|
50
|
+
return np.random.dirichlet(np.ones(n_obj), batch_size)
|
|
51
|
+
|
|
52
|
+
def pref2angle(pref):
|
|
53
|
+
if type(pref) == torch.Tensor:
|
|
54
|
+
angle = torch.arctan2(pref[:,1], pref[:,0])
|
|
55
|
+
angle = angle.unsqueeze(1)
|
|
56
|
+
else:
|
|
57
|
+
angle = np.arctan2(pref[:,0], pref[:,1])
|
|
58
|
+
return angle
|
|
59
|
+
|
|
60
|
+
def angle2pref(angle):
|
|
61
|
+
if type(angle) == torch.Tensor:
|
|
62
|
+
return torch.squeeze(torch.stack([torch.cos(angle), torch.sin(angle)], dim=1))
|
|
63
|
+
else:
|
|
64
|
+
return np.stack([np.cos(angle), np.sin(angle)], axis=1)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DiTMOO: (Diffusion Transformer for Multi-Objective Optimization)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import torch
|
|
6
|
+
import torch.nn as nn
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DiTBlock(nn.Module):
|
|
10
|
+
"""Single DiT transformer block."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, hidden_dim: int, num_heads: int, num_obj: int):
|
|
13
|
+
super().__init__()
|
|
14
|
+
# Timestep embedding
|
|
15
|
+
self.timestep_embedding = nn.Linear(1, hidden_dim)
|
|
16
|
+
# Normalisations
|
|
17
|
+
self.ln1 = nn.LayerNorm(hidden_dim)
|
|
18
|
+
self.ln2 = nn.LayerNorm(hidden_dim)
|
|
19
|
+
self.ln3 = nn.LayerNorm(hidden_dim)
|
|
20
|
+
# Self-attention
|
|
21
|
+
self.self_attn = nn.MultiheadAttention(
|
|
22
|
+
embed_dim=hidden_dim, num_heads=num_heads, batch_first=True
|
|
23
|
+
)
|
|
24
|
+
# Conditioning cross-attention
|
|
25
|
+
self.cond_proj = nn.Sequential(
|
|
26
|
+
nn.Linear(num_obj, hidden_dim),
|
|
27
|
+
nn.LayerNorm(hidden_dim),
|
|
28
|
+
nn.ReLU(),
|
|
29
|
+
)
|
|
30
|
+
self.cross_attn = nn.MultiheadAttention(
|
|
31
|
+
embed_dim=hidden_dim, num_heads=num_heads, batch_first=True
|
|
32
|
+
)
|
|
33
|
+
# Pointwise feed-forward
|
|
34
|
+
self.ffn = nn.Sequential(
|
|
35
|
+
nn.Linear(hidden_dim, hidden_dim * 4),
|
|
36
|
+
nn.ReLU(),
|
|
37
|
+
nn.Dropout(0.1),
|
|
38
|
+
nn.Linear(hidden_dim * 4, hidden_dim),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def forward(
|
|
42
|
+
self,
|
|
43
|
+
x: torch.Tensor,
|
|
44
|
+
t: torch.Tensor,
|
|
45
|
+
c: torch.Tensor,
|
|
46
|
+
) -> torch.Tensor:
|
|
47
|
+
# Self-attention
|
|
48
|
+
x = x + self.self_attn(self.ln1(x), self.ln1(x), self.ln1(x))[0]
|
|
49
|
+
|
|
50
|
+
# Cross-attention
|
|
51
|
+
c_proj = self.cond_proj(c).unsqueeze(1) # (B, 1, H)
|
|
52
|
+
t_emb = self.timestep_embedding(t.unsqueeze(-1)).unsqueeze(1) # (B, 1, H)
|
|
53
|
+
c_proj = torch.cat([t_emb, c_proj], dim=1) # (B, 2, H)
|
|
54
|
+
x = x + self.cross_attn(self.ln2(x), c_proj, c_proj)[0] # (B, 1, H)
|
|
55
|
+
|
|
56
|
+
# Feed-forward
|
|
57
|
+
x = x + self.ffn(self.ln3(x))
|
|
58
|
+
return x
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class DiTMOO(nn.Module):
|
|
62
|
+
"""
|
|
63
|
+
DiTMOO: Diffusion Transformer for Multi-Objective Optimization.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
input_dim : int
|
|
68
|
+
Dimensionality of the raw input vector.
|
|
69
|
+
num_obj : int
|
|
70
|
+
Dimensionality of the conditioning vector.
|
|
71
|
+
hidden_dim : int, optional
|
|
72
|
+
Transformer hidden dimension. Default is 128.
|
|
73
|
+
num_heads : int, optional
|
|
74
|
+
Number of attention heads. Default is 4.
|
|
75
|
+
num_blocks : int, optional
|
|
76
|
+
How many times to apply the DiT block. Default is 1.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(
|
|
80
|
+
self,
|
|
81
|
+
input_dim: int,
|
|
82
|
+
num_obj: int,
|
|
83
|
+
hidden_dim: int = 128,
|
|
84
|
+
num_heads: int = 4,
|
|
85
|
+
num_blocks: int = 1,
|
|
86
|
+
):
|
|
87
|
+
super().__init__()
|
|
88
|
+
|
|
89
|
+
self.input_proj = nn.Linear(input_dim, hidden_dim)
|
|
90
|
+
|
|
91
|
+
# Stack of DiT blocks
|
|
92
|
+
self.blocks = nn.ModuleList(
|
|
93
|
+
[
|
|
94
|
+
DiTBlock(hidden_dim, num_heads, num_obj)
|
|
95
|
+
for _ in range(num_blocks)
|
|
96
|
+
]
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Final normalisation and projection
|
|
100
|
+
self.ln_out = nn.LayerNorm(hidden_dim)
|
|
101
|
+
self.output_proj = nn.Linear(hidden_dim, input_dim)
|
|
102
|
+
|
|
103
|
+
def forward(
|
|
104
|
+
self,
|
|
105
|
+
x: torch.Tensor,
|
|
106
|
+
t: torch.Tensor,
|
|
107
|
+
c: torch.Tensor,
|
|
108
|
+
) -> torch.Tensor:
|
|
109
|
+
"""
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
x : (B, input_dim)
|
|
113
|
+
t : (B,) or (B, 1)
|
|
114
|
+
Scalar timestep per sample.
|
|
115
|
+
c : (B, num_obj)
|
|
116
|
+
Conditioning information.
|
|
117
|
+
"""
|
|
118
|
+
# Project input and add timestep embedding
|
|
119
|
+
x = self.input_proj(x).unsqueeze(1) # (B, 1, H)
|
|
120
|
+
|
|
121
|
+
# Apply DiT blocks
|
|
122
|
+
for block in self.blocks:
|
|
123
|
+
x = block(x, t, c)
|
|
124
|
+
|
|
125
|
+
# Project back to original dimension
|
|
126
|
+
x = self.output_proj(self.ln_out(x).squeeze(1))
|
|
127
|
+
return x
|
moospread/utils/lhs.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pymoo.core.sampling import Sampling
|
|
3
|
+
from pymoo.util.misc import cdist
|
|
4
|
+
import torch
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def criterion_maxmin(X):
|
|
8
|
+
D = cdist(X, X)
|
|
9
|
+
np.fill_diagonal(D, np.inf)
|
|
10
|
+
return np.min(D)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def criterion_corr(X):
|
|
14
|
+
M = np.corrcoef(X.T, rowvar=True)
|
|
15
|
+
return -np.sum(np.tril(M, -1) ** 2)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def sampling_lhs(n_samples, n_var, xl=0, xu=1, smooth=True, criterion=criterion_maxmin, n_iter=50):
|
|
19
|
+
|
|
20
|
+
X = sampling_lhs_unit(n_samples, n_var, smooth=smooth)
|
|
21
|
+
|
|
22
|
+
# if a criterion is selected to further improve the sampling
|
|
23
|
+
if criterion is not None:
|
|
24
|
+
|
|
25
|
+
# current best score is stored here
|
|
26
|
+
score = criterion(X)
|
|
27
|
+
|
|
28
|
+
for j in range(1, n_iter):
|
|
29
|
+
|
|
30
|
+
# create new random sample and check the score again
|
|
31
|
+
_X = sampling_lhs_unit(n_samples, n_var, smooth=smooth)
|
|
32
|
+
_score = criterion(_X)
|
|
33
|
+
|
|
34
|
+
if _score > score:
|
|
35
|
+
X, score = _X, _score
|
|
36
|
+
|
|
37
|
+
if isinstance(xl, torch.Tensor) and isinstance(xu, torch.Tensor):
|
|
38
|
+
X = torch.from_numpy(X).float()
|
|
39
|
+
return xl + X * (xu - xl)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def sampling_lhs_unit(n_samples, n_var, smooth=True):
|
|
43
|
+
X = np.random.random(size=(n_samples, n_var))
|
|
44
|
+
Xp = X.argsort(axis=0) + 1
|
|
45
|
+
|
|
46
|
+
if smooth:
|
|
47
|
+
Xp = Xp - np.random.random(Xp.shape)
|
|
48
|
+
else:
|
|
49
|
+
Xp = Xp - 0.5
|
|
50
|
+
Xp /= n_samples
|
|
51
|
+
return Xp
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class LatinHypercubeSampling(Sampling):
|
|
55
|
+
|
|
56
|
+
def __init__(self,
|
|
57
|
+
smooth=True,
|
|
58
|
+
iterations=20,
|
|
59
|
+
criterion=criterion_maxmin) -> None:
|
|
60
|
+
super().__init__()
|
|
61
|
+
self.smooth = smooth
|
|
62
|
+
self.iterations = iterations
|
|
63
|
+
self.criterion = criterion
|
|
64
|
+
|
|
65
|
+
def _do(self, problem, n_samples, **kwargs):
|
|
66
|
+
xl, xu = problem.bounds()
|
|
67
|
+
X = sampling_lhs(n_samples, problem.n_var, xl=xl, xu=xu, smooth=self.smooth,
|
|
68
|
+
criterion=self.criterion, n_iter=self.iterations)
|
|
69
|
+
|
|
70
|
+
return X
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class LHS(LatinHypercubeSampling):
|
|
74
|
+
pass
|
moospread/utils/misc.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import random
|
|
3
|
+
import torch
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def set_seed(seed: int = 0):
|
|
7
|
+
"""
|
|
8
|
+
Set the random seed for reproducibility.
|
|
9
|
+
"""
|
|
10
|
+
np.random.seed(seed)
|
|
11
|
+
random.seed(seed)
|
|
12
|
+
|
|
13
|
+
torch.manual_seed(seed)
|
|
14
|
+
if torch.cuda.is_available():
|
|
15
|
+
torch.cuda.manual_seed(seed)
|
|
16
|
+
torch.cuda.manual_seed_all(seed)
|
|
17
|
+
|
|
18
|
+
torch.backends.cudnn.enabled = True
|
|
19
|
+
torch.backends.cudnn.benchmark = False
|
|
20
|
+
torch.backends.cudnn.deterministic = True
|
|
21
|
+
|
|
22
|
+
def convert_seconds(seconds):
|
|
23
|
+
# Calculate hours, minutes, and seconds
|
|
24
|
+
hours = seconds // 3600
|
|
25
|
+
minutes = (seconds % 3600) // 60
|
|
26
|
+
remaining_seconds = seconds % 60
|
|
27
|
+
# Format the result
|
|
28
|
+
print(f"Time: {hours} hours {minutes} minutes {remaining_seconds} seconds")
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from moospread.utils.mobo_utils.mobo.surrogate_model import GaussianProcess
|
|
2
|
+
from moospread.utils.mobo_utils.mobo.transformation import StandardTransform
|
|
3
|
+
from moospread.utils.mobo_utils.evolution.utils import *
|
|
4
|
+
from moospread.utils.mobo_utils.learning.model_init import *
|
|
5
|
+
from moospread.utils.mobo_utils.learning.model_update import *
|
|
6
|
+
from moospread.utils.mobo_utils.learning.prediction import *
|
|
7
|
+
from moospread.utils.mobo_utils.lhs_for_mobo import lhs_no_evaluation
|
|
8
|
+
from moospread.utils.mobo_utils.spread_mobo_utils import (environment_selection,
|
|
9
|
+
sort_population, sbx,
|
|
10
|
+
pm_mutation,
|
|
11
|
+
mobo_get_ddpm_dataloader)
|
|
File without changes
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
|
|
2
|
+
# This file computes dominance relation between two evaluates solutions
|
|
3
|
+
# ind1 dominates ind2, output 1
|
|
4
|
+
# ind2 dominates ind1, output 2
|
|
5
|
+
# otherwise, ouput 0
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# judge the Pareto dominance relation between two evaluated solutions
|
|
9
|
+
def pareto_dominance(ind1, ind2):
|
|
10
|
+
r = get_pareto_dom_rel(ind1, ind2)
|
|
11
|
+
return r
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def scalar_dominance(ind1, ind2):
|
|
17
|
+
if ind1.fitness.valid and ind2.fitness.valid:
|
|
18
|
+
if ind1.cluster_id != ind2.cluster_id:
|
|
19
|
+
return 0
|
|
20
|
+
else:
|
|
21
|
+
if ind1.scalar_dist < ind2.scalar_dist:
|
|
22
|
+
return 1
|
|
23
|
+
else:
|
|
24
|
+
return 2
|
|
25
|
+
else:
|
|
26
|
+
raise TypeError("Scalar dominance comparison cannot be done "
|
|
27
|
+
"when either of two individuals has not been evaluated")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_pareto_dom_rel(values1, values2):
|
|
31
|
+
n1, n2 = 0, 0
|
|
32
|
+
for v1, v2 in zip(values1, values2):
|
|
33
|
+
if v1 < v2:
|
|
34
|
+
n1 += 1
|
|
35
|
+
elif v2 < v1:
|
|
36
|
+
n2 += 1
|
|
37
|
+
|
|
38
|
+
if n1 > 0 and n2 > 0:
|
|
39
|
+
return 0
|
|
40
|
+
|
|
41
|
+
if n2 == 0 and n1 > 0:
|
|
42
|
+
return 2
|
|
43
|
+
elif n1 == 0 and n2 > 0:
|
|
44
|
+
return 0
|
|
45
|
+
else:
|
|
46
|
+
return 1
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_inverted_dom_rel(r):
|
|
50
|
+
return r if r == 0 else 3 - r
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def access_dom_rel(i, j, x, y, rel_map, dom):
|
|
54
|
+
if rel_map[i, j] != -1:
|
|
55
|
+
return rel_map[i, j]
|
|
56
|
+
else:
|
|
57
|
+
r = dom(y[i,:], y[j,:])
|
|
58
|
+
rel_map[i, j] = r
|
|
59
|
+
rel_map[j, i] = get_inverted_dom_rel(r)
|
|
60
|
+
return r
|