moospread 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. moospread/__init__.py +3 -0
  2. moospread/core.py +1881 -0
  3. moospread/problem.py +193 -0
  4. moospread/tasks/__init__.py +4 -0
  5. moospread/tasks/dtlz_torch.py +139 -0
  6. moospread/tasks/mw_torch.py +274 -0
  7. moospread/tasks/re_torch.py +394 -0
  8. moospread/tasks/zdt_torch.py +112 -0
  9. moospread/utils/__init__.py +8 -0
  10. moospread/utils/constraint_utils/__init__.py +2 -0
  11. moospread/utils/constraint_utils/gradient.py +72 -0
  12. moospread/utils/constraint_utils/mgda_core.py +69 -0
  13. moospread/utils/constraint_utils/pmgda_solver.py +308 -0
  14. moospread/utils/constraint_utils/prefs.py +64 -0
  15. moospread/utils/ditmoo.py +127 -0
  16. moospread/utils/lhs.py +74 -0
  17. moospread/utils/misc.py +28 -0
  18. moospread/utils/mobo_utils/__init__.py +11 -0
  19. moospread/utils/mobo_utils/evolution/__init__.py +0 -0
  20. moospread/utils/mobo_utils/evolution/dom.py +60 -0
  21. moospread/utils/mobo_utils/evolution/norm.py +40 -0
  22. moospread/utils/mobo_utils/evolution/utils.py +97 -0
  23. moospread/utils/mobo_utils/learning/__init__.py +0 -0
  24. moospread/utils/mobo_utils/learning/model.py +40 -0
  25. moospread/utils/mobo_utils/learning/model_init.py +33 -0
  26. moospread/utils/mobo_utils/learning/model_update.py +51 -0
  27. moospread/utils/mobo_utils/learning/prediction.py +116 -0
  28. moospread/utils/mobo_utils/learning/utils.py +143 -0
  29. moospread/utils/mobo_utils/lhs_for_mobo.py +243 -0
  30. moospread/utils/mobo_utils/mobo/__init__.py +0 -0
  31. moospread/utils/mobo_utils/mobo/acquisition.py +209 -0
  32. moospread/utils/mobo_utils/mobo/algorithms.py +91 -0
  33. moospread/utils/mobo_utils/mobo/factory.py +86 -0
  34. moospread/utils/mobo_utils/mobo/mobo.py +132 -0
  35. moospread/utils/mobo_utils/mobo/selection.py +182 -0
  36. moospread/utils/mobo_utils/mobo/solver/__init__.py +5 -0
  37. moospread/utils/mobo_utils/mobo/solver/moead.py +17 -0
  38. moospread/utils/mobo_utils/mobo/solver/nsga2.py +10 -0
  39. moospread/utils/mobo_utils/mobo/solver/parego/__init__.py +1 -0
  40. moospread/utils/mobo_utils/mobo/solver/parego/parego.py +62 -0
  41. moospread/utils/mobo_utils/mobo/solver/parego/utils.py +34 -0
  42. moospread/utils/mobo_utils/mobo/solver/pareto_discovery/__init__.py +1 -0
  43. moospread/utils/mobo_utils/mobo/solver/pareto_discovery/buffer.py +364 -0
  44. moospread/utils/mobo_utils/mobo/solver/pareto_discovery/pareto_discovery.py +571 -0
  45. moospread/utils/mobo_utils/mobo/solver/pareto_discovery/utils.py +168 -0
  46. moospread/utils/mobo_utils/mobo/solver/solver.py +74 -0
  47. moospread/utils/mobo_utils/mobo/surrogate_model/__init__.py +2 -0
  48. moospread/utils/mobo_utils/mobo/surrogate_model/base.py +36 -0
  49. moospread/utils/mobo_utils/mobo/surrogate_model/gaussian_process.py +177 -0
  50. moospread/utils/mobo_utils/mobo/surrogate_model/thompson_sampling.py +79 -0
  51. moospread/utils/mobo_utils/mobo/surrogate_problem.py +44 -0
  52. moospread/utils/mobo_utils/mobo/transformation.py +106 -0
  53. moospread/utils/mobo_utils/mobo/utils.py +65 -0
  54. moospread/utils/mobo_utils/spread_mobo_utils.py +854 -0
  55. moospread/utils/offline_utils/__init__.py +10 -0
  56. moospread/utils/offline_utils/handle_task.py +203 -0
  57. moospread/utils/offline_utils/proxies.py +338 -0
  58. moospread/utils/spread_utils.py +91 -0
  59. moospread-0.1.0.dist-info/METADATA +75 -0
  60. moospread-0.1.0.dist-info/RECORD +63 -0
  61. moospread-0.1.0.dist-info/WHEEL +5 -0
  62. moospread-0.1.0.dist-info/licenses/LICENSE +10 -0
  63. moospread-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,854 @@
1
+ import numpy as np
2
+ from scipy.spatial.distance import cdist
3
+ import torch
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.optim as optim
7
+ import copy
8
+ import random
9
+ import math
10
+ from pymoo.indicators.hv import HV
11
+ from moospread.utils.mobo_utils.learning.prediction import *
12
+ import os
13
+
14
+ ###########################################################
15
+ ### DDPM training
16
+ def random_perturbation(data, perturb_scale=0.05):
17
+ perturb = (2 * np.random.rand(*data.shape) - 1) * perturb_scale
18
+ return data + perturb
19
+
20
+ def interpolation(data, num_samples):
21
+ interpolated_samples = []
22
+ n = data.shape[0]
23
+ for _ in range(num_samples):
24
+ idx1, idx2 = np.random.choice(n, 2, replace=False)
25
+ alpha = np.random.rand()
26
+ interpolated_sample = alpha * data[idx1] + (1 - alpha) * data[idx2]
27
+ interpolated_samples.append(interpolated_sample)
28
+ return np.array(interpolated_samples)
29
+
30
+ def gaussian_noise(data, noise_scale=0.05):
31
+ noise = np.random.normal(0, noise_scale, data.shape)
32
+ return data + noise
33
+
34
+ def data_enhancement(offspringA, augmentation_factor=2, max_generation=32):
35
+ """
36
+ offspringA: np.array of shape [M, d]
37
+ augmentation_factor: desired final size = M * augmentation_factor
38
+ max_generation: upper‐limit on number of NEW (=augmented) samples
39
+ """
40
+ M = offspringA.shape[0]
41
+
42
+ # how many augmented samples we'd like
43
+ want = M * (augmentation_factor - 1)
44
+ # but never more than max_generation
45
+ augmented_needed = min(want, max_generation)
46
+
47
+ # -- now proceed exactly as before, using augmented_needed --
48
+ # generate candidates
49
+ perturbed = random_perturbation(offspringA)
50
+ interpolated = interpolation(offspringA, augmented_needed // 3)
51
+ noised = gaussian_noise(offspringA)
52
+
53
+ all_aug = np.vstack([perturbed, interpolated, noised])
54
+ np.random.shuffle(all_aug)
55
+
56
+ # slice down to the capped number
57
+ all_aug = all_aug[:augmented_needed, :]
58
+
59
+ # stack originals + capped augmented
60
+ return np.vstack([offspringA, all_aug])
61
+
62
+ def mobo_get_ddpm_dataloader(Parent,
63
+ objective_functions,
64
+ device,
65
+ batch_size,
66
+ validation_split=0.1):
67
+
68
+ rows_to_take = int(1 / 3 * Parent.shape[0])
69
+ pop = Parent[:rows_to_take, :]
70
+ if len(pop) % 2 == 1:
71
+ pop = pop[:-1]
72
+
73
+ augmentation_factor = 10
74
+ augmented_pop = data_enhancement(
75
+ pop,
76
+ augmentation_factor=augmentation_factor
77
+ )
78
+
79
+ dataset = torch.tensor(augmented_pop).float().to(device)
80
+ dataset_size = dataset.shape[0]
81
+
82
+ ##### TRAINING #####
83
+ if validation_split > 0.0:
84
+ # Split dataset: 10% for evaluation, 90% for training
85
+ total_size = len(dataset)
86
+ eval_size = int(0.10 * total_size)
87
+ train_size = total_size - eval_size
88
+
89
+ dataset = dataset[torch.randperm(dataset.size(0))]
90
+ train_dataset = dataset[:train_size]
91
+ eval_dataset = dataset[train_size:]
92
+
93
+ y_train = objective_functions(train_dataset)
94
+ y_val = objective_functions(eval_dataset)
95
+ dataset_train = torch.utils.data.TensorDataset(train_dataset, y_train)
96
+ dataset_val = torch.utils.data.TensorDataset(eval_dataset, y_val)
97
+ train_loader = torch.utils.data.DataLoader(
98
+ dataset_train,
99
+ batch_size=batch_size,
100
+ shuffle=True,
101
+ )
102
+ eval_loader = torch.utils.data.DataLoader(
103
+ dataset_val,
104
+ batch_size=batch_size,
105
+ shuffle=False,
106
+ )
107
+
108
+ else:
109
+ # Use the entire dataset for training
110
+ y_train = objective_functions(dataset)
111
+ dataset_train = torch.utils.data.TensorDataset(dataset, y_train)
112
+ train_loader = torch.utils.data.DataLoader(
113
+ dataset_train, batch_size=batch_size, shuffle=True
114
+ )
115
+ eval_loader = None
116
+ return train_loader, eval_loader, dataset_size
117
+
118
+ ###########################################################
119
+
120
+ def get_max_hv_re(problem_name: str,
121
+ front_dir: str,
122
+ ref_point: np.ndarray = None,
123
+ n_pareto_points: int = None) -> float:
124
+ """
125
+ Compute the maximum hypervolume of the approximated Pareto front
126
+ for a given RE problem (RE21 - RE27) from the RE-problems data.
127
+
128
+ Args:
129
+ problem_name: one of "re21", "re22", ..., "re27".
130
+ front_dir: path to the 'approximated_Pareto_fronts' folder cloned from
131
+ https://github.com/ryojitanabe/reproblems :contentReference[oaicite:2]{index=2}.
132
+ ref_point: optional array of shape (n_obj,) dominating the front.
133
+ If None, it is set to 10% above the per‐objective maximum.
134
+ n_pareto_points: ignored (front files are precomputed).
135
+
136
+ Returns:
137
+ max_hv: the hypervolume (float) of the true front under `ref_point`.
138
+ """
139
+ file_key = problem_name.upper()
140
+
141
+ # 2. Build full path to data file
142
+ fname = f"reference_points_{file_key}.dat"
143
+ path = os.path.join(front_dir, fname)
144
+ if not os.path.isfile(path):
145
+ raise FileNotFoundError(f"Pareto front file not found: {path}")
146
+
147
+ # 3. Load the front (shape: [N, n_obj])
148
+ front = np.loadtxt(path) # whitespace‐delimited numeric data :contentReference[oaicite:4]{index=4}
149
+
150
+ # 4. Determine reference point if not provided
151
+ if ref_point is None:
152
+ # 10% margin beyond the worst‐case nadir
153
+ ref_point = np.max(front, axis=0) * 1.1
154
+
155
+ # 5. Compute hypervolume via Pymoo’s HV indicator
156
+ hv_indicator = HV(ref_point=ref_point)
157
+ max_hv = hv_indicator.calc(front) # yields a scalar :contentReference[oaicite:5]{index=5}
158
+
159
+ return max_hv
160
+
161
+
162
+ def get_max_hv_pymoo(problem, n_pareto_points=100, ref_point=None):
163
+ """
164
+ Compute the maximum hypervolume of the true Pareto front for a Pymoo problem.
165
+
166
+ Args:
167
+ problem: An instance of a Pymoo Problem with a known Pareto front.
168
+ n_pareto_points: Number of points to sample on the Pareto front.
169
+ ref_point: Optional NumPy array of shape (n_obj,) defining the reference point.
170
+ If None, uses max(front) * 1.1 per objective.
171
+
172
+ Returns:
173
+ max_hv: Float, the hypervolume of the Pareto front w.r.t. ref_point.
174
+ """
175
+ # 1. Retrieve Pareto front
176
+ if hasattr(problem, '_calc_pareto_front'):
177
+ front = problem._calc_pareto_front(n_pareto_points)
178
+ else:
179
+ raise AttributeError("Problem does not implement _calc_pareto_front")
180
+
181
+ # 2. Determine reference point
182
+ if ref_point is None:
183
+ # take 10% above the maximum of the front
184
+ ref_point = np.max(front, axis=0) * 1.1
185
+
186
+ # 3. Compute hypervolume
187
+ hv_indicator = HV(ref_point=ref_point)
188
+ max_hv = hv_indicator.calc(front)
189
+
190
+ return max_hv
191
+
192
+
193
+ def objective_functions_GP(points, surrogate_model, coef_lcb, device, get_grad = False):
194
+ x = points.detach().cpu().numpy()
195
+ eval_result = surrogate_model.evaluate(x, std=True, calc_gradient=get_grad)
196
+ if get_grad:
197
+ mean = torch.from_numpy(eval_result["F"]).float().to(device)
198
+ mean_grad = torch.from_numpy(eval_result["dF"]).float().to(device)
199
+ std = torch.from_numpy(eval_result["S"]).float().to(device)
200
+ std_grad = torch.from_numpy(eval_result["dS"]).float().to(device)
201
+
202
+ Y_val = mean - coef_lcb * std
203
+ Grad_val = mean_grad - coef_lcb * std_grad
204
+
205
+ return list(torch.split(Y_val, 1, dim=1)), [Grad_val[:, i, :] for i in range(Grad_val.shape[1])]
206
+ else:
207
+ mean = torch.from_numpy(eval_result["F"]).float().to(device)
208
+ std = torch.from_numpy(eval_result["S"]).float().to(device)
209
+ Y_val = mean - coef_lcb * std
210
+
211
+ return list(torch.split(Y_val, 1, dim=1))
212
+
213
+ def solve_min_norm_2_loss(grad_1, grad_2, return_gamma=False):
214
+ v1v1 = torch.sum(grad_1 * grad_1, dim=1)
215
+ v2v2 = torch.sum(grad_2 * grad_2, dim=1)
216
+ v1v2 = torch.sum(grad_1 * grad_2, dim=1)
217
+ gamma = torch.zeros_like(v1v1)
218
+ gamma = -1.0 * ((v1v2 - v2v2) / (v1v1 + v2v2 - 2 * v1v2))
219
+ gamma[v1v2 >= v1v1] = 0.999
220
+ gamma[v1v2 >= v2v2] = 0.001
221
+ gamma = gamma.view(-1, 1)
222
+ g_w = (
223
+ gamma.repeat(1, grad_1.shape[1]) * grad_1
224
+ + (1.0 - gamma.repeat(1, grad_2.shape[1])) * grad_2
225
+ )
226
+ if return_gamma:
227
+ return g_w, torch.cat(
228
+ (gamma.reshape(1, -1), (1.0 - gamma).reshape(1, -1)),
229
+ dim=0,
230
+ )
231
+
232
+ return g_w
233
+
234
+ def repair_bounds(x, lower, upper, args = None):
235
+ """
236
+ Clips a tensor x of shape [N, d] such that for each column j:
237
+ x[:, j] is clipped to be between xl[j] and xu[j].
238
+ """
239
+ return torch.clamp(x.data.clone(), min=lower, max=upper)
240
+
241
+ def get_mgd_grad(grads):
242
+ """
243
+ Compute the MGDA combined descent direction given a list of gradient tensors.
244
+ All grads are assumed to have the same shape (parameters' shape).
245
+ Returns a tensor of the same shape as each gradient, representing the direction g.
246
+ """
247
+ m = len(grads)
248
+
249
+ # Flatten gradients and stack into matrix of shape (m, p), where p is number of params
250
+ flat_grads = [g.reshape(-1) for g in grads]
251
+ G = torch.stack(flat_grads, dim=0) # shape: (m, p)
252
+ # Compute Gram matrix of size (m, m): entry (i,j) = g_i \cdot g_j
253
+ gram_matrix = G @ G.t() # shape: (m, m)
254
+
255
+ # Solve quadratic problem: minimize 0.5 * alpha^T Gram * alpha s.t. sum(alpha)=1, alpha>=0
256
+ # We use the closed-form solution via KKT for equality constraint, then adjust for alpha>=0.
257
+ ones = torch.ones(m, device=gram_matrix.device, dtype=gram_matrix.dtype)
258
+ # Solve Gram * alpha = mu * 1 (plus sum(alpha)=1). This is a linear system with Lagrange multiplier mu.
259
+ # Use pseudo-inverse in case Gram is singular.
260
+ inv_gram = torch.linalg.pinv(gram_matrix)
261
+ alpha = inv_gram @ ones # solution of Gram * alpha = 1 (unnormalized)
262
+ if alpha.sum() == 0:
263
+ # If all alpha became 0 (numerical issues), fall back to equal weights
264
+ alpha = torch.ones(m, device=alpha.device) / m
265
+ else:
266
+ alpha = alpha / alpha.sum()
267
+
268
+ # Clamp negative weights to 0 and renormalize if needed (active-set correction for constraints)
269
+ if (alpha < 0).any():
270
+ alpha = torch.clamp(alpha, min=0.0)
271
+ if alpha.sum() == 0:
272
+ # If all alpha became 0 (numerical issues), fall back to equal weights
273
+ alpha = torch.ones(m, device=alpha.device) / m
274
+ else:
275
+ alpha = alpha / alpha.sum()
276
+
277
+ # Compute the combined gradient direction g
278
+ # Reshape each gradient to original shape and sum with weights
279
+ g = torch.zeros_like(grads[0])
280
+ for weight, grad in zip(alpha, grads):
281
+ g += weight * grad
282
+ return g
283
+
284
+ def median(tensor):
285
+ """
286
+ torch.median() acts differently from np.median(). We want to simulate numpy implementation.
287
+ """
288
+ tensor = tensor.detach().flatten()
289
+ tensor_max = tensor.max()[None]
290
+ return (torch.cat((tensor, tensor_max)).median() + tensor.median()) / 2.0
291
+
292
+
293
+ def repulsion_loss(F_, sigma=1.0, use_sigma=True):
294
+ """
295
+ Computes the repulsion loss over a batch of points in the objective space.
296
+ F_: Tensors of shape (n, m), where n is the batch size.
297
+ Only unique pairs (i < j) are considered.
298
+ """
299
+ n = F_.shape[0]
300
+ # Compute pairwise differences: shape [n, n, m]
301
+ dist_sq = torch.norm(F_[:, None] - F_, dim=2).pow(2)
302
+ # Compute RBF values for the distances
303
+ if use_sigma:
304
+ repulsion = torch.exp(-dist_sq / (2 * sigma**2))
305
+ else:
306
+ s = median(dist_sq) / math.log(n)
307
+ repulsion = torch.exp(-dist_sq / 5e-6 * s)
308
+
309
+ # Normalize by the number of pairs
310
+ loss = repulsion.sum() / n
311
+ return loss
312
+
313
+ def crowding_distance(points):
314
+ """
315
+ Compute crowding distances for points.
316
+ points: Tensor of shape (N, D) in the objective space.
317
+ Returns: Tensor of shape (N,) containing crowding distances.
318
+ """
319
+ N, D = points.shape
320
+ distances = torch.zeros(N, device=points.device)
321
+
322
+ for d in range(D):
323
+ sorted_points, indices = torch.sort(points[:, d])
324
+ distances[indices[0]] = distances[indices[-1]] = float("inf")
325
+
326
+ min_d, max_d = sorted_points[0], sorted_points[-1]
327
+ norm_range = max_d - min_d if max_d > min_d else 1.0
328
+
329
+ # Compute normalized crowding distance
330
+ distances[indices[1:-1]] += (
331
+ sorted_points[2:] - sorted_points[:-2]
332
+ ) / norm_range
333
+
334
+ return distances
335
+
336
+
337
+ def select_top_n_for_BaySpread(
338
+ pop, # population to select from (tensor)
339
+ net, # neural network for dominance prediction
340
+ device,
341
+ surrogate_model, # surrogate model with .evaluate()
342
+ coef_lcb: float,
343
+ n: int,
344
+ top_frac: float = 0.9
345
+ ):
346
+ """
347
+ 1) Predict pairwise dominance via NN.
348
+ 2) Identify non‑dominated indices.
349
+ - If > n: compute crowding distance for those,
350
+ pick the top n by distance, shuffle, and return.
351
+ - Else: do the 'top_frac% by rank + crowding fill' to get up to n.
352
+ """
353
+
354
+ N = pop.shape[0]
355
+
356
+ # 1) Predict dominance
357
+ label_matrix, conf_matrix = nn_predict_dom_intra(pop.detach().cpu().numpy(),
358
+ net,
359
+ device)
360
+
361
+ # 2) Find non‑dominated indices
362
+ nondom_inds = [
363
+ i for i in range(N)
364
+ if not any(label_matrix[j, i] == 2 for j in range(N))
365
+ ]
366
+
367
+ # --- CASE A: too many non‑dominated → pick top-n by crowding ---
368
+ if len(nondom_inds) > n:
369
+ # Evaluate objectives on just the non-dominated set
370
+ # pts_nd = torch.from_numpy(pop[nondom_inds]).float().to(device)
371
+ pts_nd = pop[nondom_inds].to(device)
372
+ Y_t = torch.cat(
373
+ objective_functions_GP(pts_nd, surrogate_model, coef_lcb, device),
374
+ dim=1
375
+ )
376
+
377
+ # Compute crowding distances and select top-n
378
+ distances = crowding_distance(Y_t)
379
+ topk = torch.topk(distances, n).indices.tolist()
380
+
381
+ selected_nd = [nondom_inds[i] for i in topk]
382
+
383
+ # Shuffle before returning
384
+ perm = torch.randperm(n, device=pop.device)
385
+ final_idx = torch.tensor(selected_nd, device=pop.device)[perm]
386
+ return pop[final_idx] #.detach().cpu().numpy()
387
+
388
+ # --- CASE B: nondom ≤ n → fill up via rank + top_frac% + crowding ---
389
+ # 3) Compute dom counts & avg confidence for all
390
+ dom_counts = []
391
+ avg_conf = []
392
+ for i in range(N):
393
+ dom_by = (label_matrix[:, i] == 2)
394
+ cnt = int(dom_by.sum())
395
+ dom_counts.append(cnt)
396
+ avg_conf.append(
397
+ float(conf_matrix[dom_by, i].sum()) / cnt
398
+ if cnt > 0 else 0.0
399
+ )
400
+
401
+ # 4) Sort full pop by (dom_count asc, avg_conf desc)
402
+ idxs = list(range(N))
403
+ idxs.sort(key=lambda i: (dom_counts[i], -avg_conf[i]))
404
+
405
+ # 5) Keep only top top_frac% of that ranking
406
+ k90 = int(np.floor(top_frac * N))
407
+ top90 = idxs[:k90]
408
+
409
+ # 6) Evaluate
410
+ pts90 = pop[top90]
411
+ Y_t = torch.cat(
412
+ objective_functions_GP(pts90, surrogate_model, coef_lcb, device),
413
+ dim=1
414
+ )
415
+
416
+ # 7) Crowding distance & pick as many as needed to reach n
417
+ distances = crowding_distance(Y_t)
418
+ need = n - len(nondom_inds)
419
+ need = max(need, 0)
420
+ k_sel = min(need, len(top90))
421
+ sel90 = torch.topk(distances, k_sel).indices.tolist()
422
+ selected_from_top_frac = [ top90[i] for i in sel90 ]
423
+
424
+ # 8) Build final list: all nondom + selected_from_top_frac
425
+ final_inds = nondom_inds + selected_from_top_frac
426
+
427
+ # 9) If still short (e.g. N<n), pad with best remaining in idxs
428
+ if len(final_inds) < n:
429
+ remaining = [i for i in idxs if i not in final_inds]
430
+ to_add = n - len(final_inds)
431
+ final_inds += remaining[:to_add]
432
+
433
+ # 10) Shuffle final indices
434
+ perm = torch.randperm(len(final_inds), device=pop.device)
435
+ final_idx = torch.tensor(final_inds, device=pop.device)[perm]
436
+
437
+ return pop[final_idx] #.detach().cpu().numpy()
438
+
439
+ def l_simple_loss(predicted_noise, actual_noise):
440
+ return nn.MSELoss()(predicted_noise, actual_noise)
441
+
442
+ def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999):
443
+ """
444
+ Create a beta schedule that discretizes the given alpha_t_bar function,
445
+ which defines the cumulative product of (1-beta) over time from t = [0,1].
446
+
447
+ :param num_diffusion_timesteps: the number of betas to produce.
448
+ :param alpha_bar: a lambda that takes an argument t from 0 to 1 and
449
+ produces the cumulative product of (1-beta) up to that
450
+ part of the diffusion process.
451
+ :param max_beta: the maximum beta to use; use values lower than 1 to
452
+ prevent singularities.
453
+ """
454
+ betas = []
455
+ for i in range(num_diffusion_timesteps):
456
+ t1 = i / num_diffusion_timesteps
457
+ t2 = (i + 1) / num_diffusion_timesteps
458
+ betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
459
+ return torch.from_numpy(np.array(betas)).float()
460
+
461
+
462
+ # Noise Scheduler (Cosine Schedule)
463
+ def cosine_beta_schedule(timesteps, s=0.008):
464
+ """
465
+ Cosine schedule for beta values over timesteps.
466
+ """
467
+ return betas_for_alpha_bar(
468
+ timesteps,
469
+ lambda t: math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2,
470
+ )
471
+
472
+ def mgd_armijo_step(
473
+ x_t: torch.Tensor,
474
+ d: torch.Tensor,
475
+ f_old,
476
+ grads, # (N, m, d)
477
+ objective_functions_GP,
478
+ surrogate_model,
479
+ args,
480
+ eta_init = 0.9,
481
+ rho = 0.9,
482
+ c1 = 1e-4,
483
+ max_backtracks = 10,
484
+ ):
485
+ """
486
+ Batched Armijo back-tracking line search for Multiple-Gradient-Descent (MGD).
487
+
488
+ Returns
489
+ -------
490
+ eta : torch.Tensor, shape (N,)
491
+ Final step sizes.
492
+ """
493
+
494
+ x = x_t.clone().detach()
495
+
496
+ if not torch.is_tensor(eta_init):
497
+ eta = torch.full((x.shape[0],), float(eta_init),
498
+ dtype=x.dtype, device=x.device)
499
+ else:
500
+ eta = eta_init.clone().to(x)
501
+
502
+ grad_dot = torch.einsum('nkd,nd->nk', grads, d)
503
+
504
+ improve = torch.ones_like(eta, dtype=torch.bool)
505
+
506
+ for _ in range(max_backtracks):
507
+ if not improve.any():
508
+ break
509
+
510
+ # Evaluate objectives at trial points
511
+ trial_x = x[improve] + eta[improve, None] * d[improve]
512
+ f_new = torch.cat(objective_functions_GP(
513
+ trial_x, surrogate_model, args.coef_lcb, args.device
514
+ ), dim=1)
515
+
516
+ # Armijo test (vectorised over objectives)
517
+ # f_new <= f_old + c1 * eta * grad_dot (element-wise)
518
+ ok = (f_new <= f_old[improve] + c1 * eta[improve, None] * grad_dot[improve]).all(dim=1)
519
+
520
+ # Update masks and step sizes
521
+ eta[improve] = torch.where(ok, eta[improve], rho * eta[improve])
522
+ improve_mask = improve.clone()
523
+ improve[improve_mask] = ~ok
524
+
525
+ return eta[:, None]
526
+
527
+
528
+ def adaptive_scale_delta_vect(
529
+ g: torch.Tensor, delta_raw: torch.Tensor, grads: torch.Tensor, gamma: float = 0.9
530
+ ) -> torch.Tensor:
531
+ """
532
+ Adaptive scaling to preserve *positivity*:
533
+
534
+ ∇f_j(x_i)^T [ g_i + rho_i * delta_raw_i ] > 0 for all j.
535
+
536
+ Args:
537
+ g (torch.Tensor): [n_points, d], the multi-objective "gradient"
538
+ (which we *subtract* in the update).
539
+ delta_raw (torch.Tensor): [n_points, d], the unscaled diversity/repulsion direction.
540
+ grads (torch.Tensor): [m, n_points, d], storing ∇f_j(x_i).
541
+ gamma (float): Safety factor in (0,1).
542
+
543
+ Returns:
544
+ delta_scaled (torch.Tensor): [n_points, d], scaled directions s.t.
545
+ for all j: grads[j,i]ᵀ [g[i] + delta_scaled[i]] > 0.
546
+ """
547
+ # n_points, d = g.shape
548
+ # m = grads.shape[0] # number of objectives
549
+
550
+ # 1) Compute alpha_{i,j} = ∇f_j(x_i)^T g_i
551
+ # shape of alpha: [n_points, m]
552
+ alpha = torch.einsum("j i d, i d -> i j", grads, g)
553
+
554
+ # 2) Compute beta_{i,j} = ∇f_j(x_i)^T delta_raw_i
555
+ # shape of beta: [n_points, m]
556
+ beta = torch.einsum("j i d, i d -> i j", grads, delta_raw)
557
+
558
+ # 3) We only need to restrict rho_i if alpha_{i,j} > 0 and beta_{i,j} < 0.
559
+ # Because for alpha + rho*beta to stay > 0, we need
560
+ # rho < alpha / -beta
561
+ # when beta<0 and alpha>0.
562
+ mask = (alpha > 0.0) & (beta < 0.0)
563
+
564
+ # Prepare an array of ratios = alpha / -beta, default +∞
565
+ ratio = torch.full_like(alpha, float("inf"))
566
+
567
+ # Where mask is True, compute ratio_{i,j}
568
+ ratio[mask] = alpha[mask] / (-beta[mask]) # must remain below this
569
+
570
+ # 4) For each point i, we pick rho_i = gamma * min_j ratio[i,j].
571
+ # If the min is +∞ => no constraints => set rho_i=1.0
572
+ ratio_min, _ = ratio.min(dim=1) # [n_points]
573
+ rho = gamma * ratio_min
574
+ # If ratio_min == +∞ => no constraint => set rho_i=1.
575
+ inf_mask = torch.isinf(ratio_min)
576
+ rho[inf_mask] = 1.0
577
+
578
+ # 5) Scale delta_raw by rho_i
579
+ delta_scaled = delta_raw * rho.unsqueeze(1) # broadcast along dim
580
+
581
+ return delta_scaled
582
+
583
+ def solve_for_h(
584
+ x_t_minus,
585
+ f,
586
+ surrogate_model,
587
+ g_val,
588
+ grads,
589
+ args,
590
+ eta,
591
+ lambda_rep,
592
+ sigma=1.0,
593
+ use_sigma=True,
594
+ num_inner_steps=10,
595
+ lr_inner=1e-2,
596
+ ):
597
+ """
598
+ For a given batch x_t and its corresponding g(x_t), solve for h by
599
+ minimizing -L (i.e., maximizing L) via gradient descent.
600
+ """
601
+
602
+ x_t_h = x_t_minus.clone().detach()
603
+ g = g_val.clone().detach()
604
+ g_targ = torch.randn((1, g.shape[1]), device=g.device)
605
+
606
+ # Initialize h
607
+ h = torch.zeros_like(g, requires_grad=False) + 1e-6 # as a free parameter
608
+ h = h.requires_grad_()
609
+
610
+ optimizer_inner = optim.Adam([h], lr=lr_inner)
611
+
612
+ for step in range(num_inner_steps):
613
+ gtarg_scaled = adaptive_scale_delta_vect(
614
+ h, g_targ, grads, gamma=args.gamma_scale_delta
615
+ )
616
+
617
+ # Alignment term: maximize <g, h>
618
+ # To maximize L, we minimize -L:
619
+ alignment = -torch.mean(torch.sum(g * h, dim=-1))
620
+ # Update points:
621
+ x_t_h = x_t_h - eta * (h + gtarg_scaled)
622
+
623
+ x_t_h.data = repair_bounds(
624
+ x_t_h.data, args.bounds[0], args.bounds[1]
625
+ )
626
+
627
+ # Map the updated points to the objective space
628
+ F_vals = f(
629
+ x_t_h, surrogate_model, args.coef_lcb, args.device
630
+ )
631
+ F_ = torch.cat([f_i.unsqueeze(1) for f_i in F_vals], dim=1)
632
+
633
+ # Compute repulsion loss to encourage diversity
634
+ if use_sigma:
635
+ rep_loss = repulsion_loss(F_, sigma)
636
+ else:
637
+ rep_loss = repulsion_loss(F_, use_sigma=False)
638
+
639
+ # Our composite objective L is:
640
+ loss = alignment + lambda_rep * rep_loss
641
+
642
+ optimizer_inner.zero_grad()
643
+ loss.backward(retain_graph=True)
644
+ optimizer_inner.step()
645
+
646
+ h = h + gtarg_scaled # This is h*(x_t) in the paper
647
+
648
+ return h.detach(), [alignment.detach(), rep_loss.detach()]
649
+
650
+
651
+ def select_top_n_candidates_for_bayMS(
652
+ points: torch.Tensor,
653
+ n,
654
+ f,
655
+ coef_lcb,
656
+ style="crowding",
657
+ ) -> torch.Tensor:
658
+ """
659
+ Selects the top `n` points from `points` based on a given style.
660
+
661
+ Args:
662
+ points (torch.Tensor): Candidate solutions (shape [N, D]).
663
+ n: number of points to select
664
+ f: objective functions (Sorogate models)
665
+ coef_lcb: coefficient of LCB
666
+ style: for selecting points based on a given style (E.g. 'crowding' for crowding distance).
667
+
668
+ Returns:
669
+ torch.Tensor: The best subset of points (shape [n, D]).
670
+ """
671
+
672
+ if style == "crowding":
673
+ Y_candidate_mean = f.evaluate(points.detach().cpu().numpy())["F"]
674
+ Y_candidata_std = f.evaluate(points.detach().cpu().numpy(), std=True)["S"]
675
+ rows_with_nan = np.any(np.isnan(Y_candidate_mean), axis=1)
676
+ Y_candidate_mean = Y_candidate_mean[~rows_with_nan]
677
+ Y_candidata_std = Y_candidata_std[~rows_with_nan]
678
+ Y_candidate = Y_candidate_mean - coef_lcb * Y_candidata_std
679
+ full_p_front = torch.from_numpy(Y_candidate).float()
680
+ # distances = crowding_distance(points)
681
+ distances = crowding_distance(full_p_front)
682
+ top_indices = torch.topk(distances, n).indices
683
+ return points[top_indices]
684
+
685
+ else:
686
+ raise ValueError(f"Unknown style: {style}")
687
+
688
+ def sbx(sorted_pop, eta=15):
689
+ n_pop, n_var = sorted_pop.shape
690
+ new_pop = np.empty_like(sorted_pop)
691
+
692
+ for i in range(0, n_pop, 2):
693
+ parent1, parent2 = (
694
+ sorted_pop[np.random.choice(n_pop)],
695
+ sorted_pop[np.random.choice(n_pop)],
696
+ )
697
+ rand = np.random.random(n_var)
698
+ gamma = np.empty_like(rand)
699
+ mask = rand <= 0.5
700
+ gamma[mask] = (2 * rand[mask]) ** (1 / (eta + 1))
701
+ gamma[~mask] = (1 / (2 * (1 - rand[~mask]))) ** (1 / (eta + 1))
702
+
703
+ offspring1 = 0.5 * ((1 + gamma) * parent1 + (1 - gamma) * parent2)
704
+ offspring2 = 0.5 * ((1 - gamma) * parent1 + (1 + gamma) * parent2)
705
+
706
+ new_pop[i] = offspring1
707
+ if i + 1 < n_pop:
708
+ new_pop[i + 1] = offspring2
709
+
710
+ return new_pop
711
+
712
+
713
+ def environment_selection(population, n):
714
+ """
715
+ environmental selection in SPEA-2
716
+ :param population: current population
717
+ :param n: number of selected individuals
718
+ :return: next generation population
719
+ """
720
+ fitness = cal_fit(population)
721
+ index = np.nonzero(fitness < 1)[0]
722
+ if len(index) < n:
723
+ rank = np.argsort(fitness)
724
+ index = rank[:n]
725
+ elif len(index) > n:
726
+ del_no = trunc(population[index, :], len(index) - n)
727
+ index = np.setdiff1d(index, index[del_no])
728
+
729
+ population = population[index, :]
730
+ return population, index
731
+
732
+
733
+ def trunc(pop_obj, k):
734
+ n, m = np.shape(pop_obj)
735
+ distance = cdist(pop_obj, pop_obj)
736
+ distance[np.eye(n) > 0] = np.inf
737
+ del_no = np.ones(n) < 0
738
+ while np.sum(del_no) < k:
739
+ remain = np.nonzero(np.logical_not(del_no))[0]
740
+ temp = np.sort(distance[remain, :][:, remain], axis=1)
741
+ rank = np.argsort(temp[:, 0])
742
+ del_no[remain[rank[0]]] = True
743
+ return del_no
744
+
745
+
746
+ def cal_fit(pop_obj):
747
+ n, m = np.shape(pop_obj)
748
+ dominance = np.ones((n, n)) < 0
749
+ for i in range(0, n - 1):
750
+ for j in range(i + 1, n):
751
+ k = int(np.any(pop_obj[i, :] < pop_obj[j, :])) - int(
752
+ np.any(pop_obj[i, :] > pop_obj[j, :])
753
+ )
754
+ if k == 1:
755
+ dominance[i, j] = True
756
+ elif k == -1:
757
+ dominance[j, i] = True
758
+
759
+ s = np.sum(dominance, axis=1, keepdims=True)
760
+
761
+ r = np.zeros(n)
762
+ for i in range(n):
763
+ r[i] = np.sum(s[dominance[:, i]])
764
+
765
+ distance = cdist(pop_obj, pop_obj)
766
+ distance[np.eye(n) > 0] = np.inf
767
+ distance = np.sort(distance, axis=1)
768
+ d = 1 / (distance[:, int(np.sqrt(n))] + 2)
769
+
770
+ fitness = r + d
771
+ return fitness
772
+
773
+
774
+ def pm_mutation(pop_dec, boundary):
775
+
776
+ pro_m = 1
777
+ dis_m = 20
778
+ pop_dec = pop_dec[: (len(pop_dec) // 2) * 2, :]
779
+ n, d = np.shape(pop_dec)
780
+
781
+ site = np.random.random((n, d)) < pro_m / d
782
+ mu = np.random.random((n, d))
783
+ temp = site & (mu <= 0.5)
784
+ lower, upper = np.tile(boundary[0], (n, 1)), np.tile(boundary[1], (n, 1))
785
+ pop_dec = np.minimum(np.maximum(pop_dec, lower), upper)
786
+ norm = (pop_dec[temp] - lower[temp]) / (upper[temp] - lower[temp])
787
+ pop_dec[temp] += (upper[temp] - lower[temp]) * (
788
+ np.power(
789
+ 2.0 * mu[temp] + (1.0 - 2.0 * mu[temp]) * np.power(1.0 - norm, dis_m + 1.0),
790
+ 1.0 / (dis_m + 1),
791
+ )
792
+ - 1.0
793
+ )
794
+ temp = site & (mu > 0.5)
795
+ norm = (upper[temp] - pop_dec[temp]) / (upper[temp] - lower[temp])
796
+ pop_dec[temp] += (upper[temp] - lower[temp]) * (
797
+ 1.0
798
+ - np.power(
799
+ 2.0 * (1.0 - mu[temp])
800
+ + 2.0 * (mu[temp] - 0.5) * np.power(1.0 - norm, dis_m + 1.0),
801
+ 1.0 / (dis_m + 1.0),
802
+ )
803
+ )
804
+ offspring_dec = np.maximum(np.minimum(pop_dec, upper), lower)
805
+ return offspring_dec
806
+
807
+
808
+ def sort_population(pop, label_matrix, conf_matrix):
809
+ size = len(pop)
810
+ domination_counts = []
811
+ avg_confidences = []
812
+ for i in range(size):
813
+ count = sum(label_matrix[j, i] == 2 for j in range(size))
814
+ domination_counts.append(count)
815
+ confidence = sum(
816
+ conf_matrix[j, i] for j in range(size) if label_matrix[j, i] == 2
817
+ )
818
+ avg_confidences.append(confidence / (count if count > 0 else 1))
819
+
820
+ sorted_pop = sorted(
821
+ zip(pop, domination_counts, avg_confidences),
822
+ key=lambda x: (x[1], -x[2]),
823
+ )
824
+ sorted_pop = [x[0] for x in sorted_pop]
825
+
826
+ sorted_pop_array = np.array(sorted_pop)
827
+
828
+ return sorted_pop_array
829
+
830
+
831
+ def convert_seconds(seconds):
832
+ # Calculate hours, minutes, and seconds
833
+ hours = seconds // 3600
834
+ minutes = (seconds % 3600) // 60
835
+ remaining_seconds = seconds % 60
836
+ # Format the result
837
+ print(f"Time: {hours} hours {minutes} minutes {remaining_seconds} seconds")
838
+
839
+ def set_seed(seed):
840
+ """for reproducibility
841
+ :param seed:
842
+ :return:
843
+ """
844
+ np.random.seed(seed)
845
+ random.seed(seed)
846
+
847
+ torch.manual_seed(seed)
848
+ if torch.cuda.is_available():
849
+ torch.cuda.manual_seed(seed)
850
+ torch.cuda.manual_seed_all(seed)
851
+
852
+ torch.backends.cudnn.enabled = True
853
+ torch.backends.cudnn.benchmark = False
854
+ torch.backends.cudnn.deterministic = True