evograd-diff 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evograd/__init__.py +67 -0
- evograd/algorithms/__init__.py +138 -0
- evograd/algorithms/cmaes.py +1365 -0
- evograd/algorithms/de.py +895 -0
- evograd/algorithms/ga.py +532 -0
- evograd/algorithms/pso.py +648 -0
- evograd/algorithms/shade.py +1165 -0
- evograd/benchmarks/functions/__init__.py +229 -0
- evograd/benchmarks/functions/base.py +217 -0
- evograd/benchmarks/functions/cec2017/__init__.py +250 -0
- evograd/benchmarks/functions/cec2017/basic.py +413 -0
- evograd/benchmarks/functions/cec2017/composition.py +580 -0
- evograd/benchmarks/functions/cec2017/data.pkl +0 -0
- evograd/benchmarks/functions/cec2017/data.py +350 -0
- evograd/benchmarks/functions/cec2017/hybrid.py +406 -0
- evograd/benchmarks/functions/cec2017/simple.py +326 -0
- evograd/benchmarks/functions/classical.py +649 -0
- evograd/benchmarks/functions/smoothed_funnel.py +476 -0
- evograd/benchmarks/functions/transforms.py +463 -0
- evograd/benchmarks/run_benchmark_functions.py +1208 -0
- evograd/core/__init__.py +73 -0
- evograd/core/algorithm.py +778 -0
- evograd/core/maximize.py +269 -0
- evograd/core/minimize.py +740 -0
- evograd/core/problem.py +444 -0
- evograd/core/result.py +571 -0
- evograd/core/termination.py +602 -0
- evograd/operators/__init__.py +178 -0
- evograd/operators/crossover.py +1117 -0
- evograd/operators/mutation.py +1098 -0
- evograd/operators/relaxations.py +175 -0
- evograd/operators/repair.py +601 -0
- evograd/operators/sampling.py +577 -0
- evograd/operators/selection.py +981 -0
- evograd/operators/survival.py +1000 -0
- evograd/tests/__init__.py +11 -0
- evograd/tests/run_all.py +78 -0
- evograd/tests/test_core.py +528 -0
- evograd/tests/test_ga.py +572 -0
- evograd/tests/test_operators.py +662 -0
- evograd/tests/test_per_individual.py +326 -0
- evograd/tests/test_utils.py +328 -0
- evograd/utils/__init__.py +97 -0
- evograd/utils/callbacks.py +926 -0
- evograd/utils/device.py +502 -0
- evograd/utils/duplicates.py +421 -0
- evograd_diff-0.1.0.dist-info/METADATA +439 -0
- evograd_diff-0.1.0.dist-info/RECORD +50 -0
- evograd_diff-0.1.0.dist-info/WHEEL +4 -0
- evograd_diff-0.1.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Smoothed Multi-Funnel Benchmark Functions
|
|
3
|
+
|
|
4
|
+
These benchmarks create landscapes with multiple attraction basins where:
|
|
5
|
+
- One basin is wide but suboptimal (gradient trap)
|
|
6
|
+
- One basin is narrow but optimal (requires exploration to find)
|
|
7
|
+
- The optimal basin contains an ill-conditioned valley (benefits from gradients)
|
|
8
|
+
|
|
9
|
+
This design specifically targets the scenario where:
|
|
10
|
+
- Pure gradient methods: Fall into the wide basin, converge to suboptimal
|
|
11
|
+
- Pure EAs: Can find the narrow basin, but waste evaluations on fine convergence
|
|
12
|
+
- Differentiable EAs: Population finds basins + gradients accelerate valley convergence
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from typing import Optional, Tuple
|
|
16
|
+
import math
|
|
17
|
+
|
|
18
|
+
import torch
|
|
19
|
+
from torch import Tensor
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
from .base import BenchmarkFunction
|
|
23
|
+
except ImportError:
|
|
24
|
+
from base import BenchmarkFunction
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def log_sum_exp_min(f_values: Tensor, tau: float = 1.0) -> Tensor:
|
|
28
|
+
"""
|
|
29
|
+
Smooth approximation to min using log-sum-exp.
|
|
30
|
+
|
|
31
|
+
f(x) = -τ * log(Σ exp(-f_i/τ))
|
|
32
|
+
|
|
33
|
+
As τ → 0, this approaches min(f_values).
|
|
34
|
+
Larger τ gives smoother transitions between basins.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
f_values: (..., K) tensor of K function values
|
|
38
|
+
tau: Temperature parameter (smaller = sharper min)
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
(...,) tensor of smoothed minimum values
|
|
42
|
+
"""
|
|
43
|
+
# Use logsumexp for numerical stability
|
|
44
|
+
return -tau * torch.logsumexp(-f_values / tau, dim=-1)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def random_orthogonal_matrix(n: int, seed: int = 0) -> Tensor:
|
|
48
|
+
"""Generate a random orthogonal matrix via QR decomposition."""
|
|
49
|
+
g = torch.Generator()
|
|
50
|
+
g.manual_seed(seed)
|
|
51
|
+
A = torch.randn(n, n, generator=g)
|
|
52
|
+
Q, _ = torch.linalg.qr(A)
|
|
53
|
+
return Q
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class MultiBasinRastrigin(BenchmarkFunction):
|
|
57
|
+
"""
|
|
58
|
+
Multi-basin Rastrigin: multiple attraction basins combined with log-sum-exp.
|
|
59
|
+
|
|
60
|
+
Creates K Rastrigin basins at different centers with different offsets.
|
|
61
|
+
The basins are combined using log-sum-exp smoothing, creating a landscape
|
|
62
|
+
where gradient descent gets trapped in the nearest basin, but population-based
|
|
63
|
+
search can discover the global optimum.
|
|
64
|
+
|
|
65
|
+
Key properties:
|
|
66
|
+
- Each basin has Rastrigin's characteristic local minima structure
|
|
67
|
+
- Basins are smoothly connected (differentiable everywhere)
|
|
68
|
+
- Global basin has offset=0, distractor basins have offset>0
|
|
69
|
+
- From most starting points, gradients lead to a distractor
|
|
70
|
+
|
|
71
|
+
f(x) = -τ·log(Σₖ exp(-fₖ(x)/τ))
|
|
72
|
+
|
|
73
|
+
where fₖ(x) = A·n + Σᵢ[(xᵢ - cₖᵢ)² - A·cos(2π(xᵢ - cₖᵢ))] + δₖ
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
n_var : int
|
|
78
|
+
Dimensionality. Default 10.
|
|
79
|
+
n_basins : int
|
|
80
|
+
Number of basins. Default 3.
|
|
81
|
+
amplitude : float
|
|
82
|
+
Rastrigin amplitude A. Default 10.0.
|
|
83
|
+
tau : float
|
|
84
|
+
Smoothing temperature. Smaller = sharper basin boundaries. Default 1.0.
|
|
85
|
+
basin_separation : float
|
|
86
|
+
Distance between basin centers. Default 4.0.
|
|
87
|
+
distractor_offset : float
|
|
88
|
+
Minimum offset for distractor basins. Default 5.0.
|
|
89
|
+
seed : int
|
|
90
|
+
Random seed for basin placement. Default 0.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def default_bounds() -> Tuple[float, float]:
|
|
95
|
+
return (-5.0, 5.0)
|
|
96
|
+
|
|
97
|
+
def __init__(
|
|
98
|
+
self,
|
|
99
|
+
n_var: int = 10,
|
|
100
|
+
n_basins: int = 3,
|
|
101
|
+
amplitude: float = 10.0,
|
|
102
|
+
tau: float = 1.0,
|
|
103
|
+
basin_separation: float = 4.0,
|
|
104
|
+
distractor_offset: float = 5.0,
|
|
105
|
+
seed: int = 0,
|
|
106
|
+
xl: float = -5.0,
|
|
107
|
+
xu: float = 5.0,
|
|
108
|
+
):
|
|
109
|
+
super().__init__(n_var=n_var, xl=xl, xu=xu)
|
|
110
|
+
|
|
111
|
+
self.name = "MultiBasinRastrigin"
|
|
112
|
+
self.n_basins = n_basins
|
|
113
|
+
self.amplitude = amplitude
|
|
114
|
+
self.tau = tau
|
|
115
|
+
self.seed = seed
|
|
116
|
+
|
|
117
|
+
torch.manual_seed(seed)
|
|
118
|
+
|
|
119
|
+
# Basin centers: global at origin, distractors spread around
|
|
120
|
+
self._centers = torch.zeros(n_basins, n_var)
|
|
121
|
+
|
|
122
|
+
# Global basin at origin
|
|
123
|
+
self._centers[0] = torch.zeros(n_var)
|
|
124
|
+
|
|
125
|
+
# Distractor basins: placed at corners/edges of search space
|
|
126
|
+
# This ensures they capture random initializations
|
|
127
|
+
if n_basins >= 2:
|
|
128
|
+
# First distractor: negative quadrant (captures ~half of random inits)
|
|
129
|
+
self._centers[1] = torch.full((n_var,), -basin_separation / 2)
|
|
130
|
+
|
|
131
|
+
if n_basins >= 3:
|
|
132
|
+
# Second distractor: positive quadrant
|
|
133
|
+
self._centers[2] = torch.full((n_var,), basin_separation / 2)
|
|
134
|
+
|
|
135
|
+
# Additional distractors at random positions
|
|
136
|
+
for k in range(3, n_basins):
|
|
137
|
+
direction = torch.randn(n_var)
|
|
138
|
+
direction = direction / direction.norm()
|
|
139
|
+
self._centers[k] = direction * basin_separation
|
|
140
|
+
|
|
141
|
+
# Basin offsets: global=0, distractors>0
|
|
142
|
+
self._offsets = torch.zeros(n_basins)
|
|
143
|
+
self._offsets[1:] = distractor_offset + torch.rand(n_basins - 1) * distractor_offset
|
|
144
|
+
|
|
145
|
+
# Global optimum
|
|
146
|
+
self._optimal_x = self._centers[0].clone()
|
|
147
|
+
self._optimal_value = 0.0
|
|
148
|
+
|
|
149
|
+
@property
|
|
150
|
+
def optimal_x(self) -> Tensor:
|
|
151
|
+
return self._optimal_x
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def optimal_value(self) -> float:
|
|
155
|
+
return self._optimal_value
|
|
156
|
+
|
|
157
|
+
def _rastrigin(self, x: Tensor, center: Tensor) -> Tensor:
|
|
158
|
+
"""Rastrigin function centered at given point."""
|
|
159
|
+
A = self.amplitude
|
|
160
|
+
diff = x - center.to(x.device)
|
|
161
|
+
n = diff.shape[-1]
|
|
162
|
+
|
|
163
|
+
quad = (diff ** 2).sum(dim=-1)
|
|
164
|
+
cosine = (A * torch.cos(2 * math.pi * diff)).sum(dim=-1)
|
|
165
|
+
|
|
166
|
+
return A * n + quad - cosine
|
|
167
|
+
|
|
168
|
+
def __call__(self, x: Tensor) -> Tensor:
|
|
169
|
+
"""Evaluate multi-basin Rastrigin with log-sum-exp smoothing."""
|
|
170
|
+
centers = self._centers.to(x.device)
|
|
171
|
+
offsets = self._offsets.to(x.device)
|
|
172
|
+
|
|
173
|
+
# Compute Rastrigin value for each basin
|
|
174
|
+
# x: (..., n_var), centers: (n_basins, n_var)
|
|
175
|
+
f_basins = []
|
|
176
|
+
for k in range(self.n_basins):
|
|
177
|
+
f_k = self._rastrigin(x, centers[k]) + offsets[k]
|
|
178
|
+
f_basins.append(f_k)
|
|
179
|
+
|
|
180
|
+
# Stack: (..., n_basins)
|
|
181
|
+
f_all = torch.stack(f_basins, dim=-1)
|
|
182
|
+
|
|
183
|
+
# Smooth min via log-sum-exp
|
|
184
|
+
return log_sum_exp_min(f_all, self.tau)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class MultiBasinRosenbrock(BenchmarkFunction):
|
|
188
|
+
"""
|
|
189
|
+
Multiple Rosenbrock basins with smooth transitions.
|
|
190
|
+
|
|
191
|
+
Creates K funnels, each a shifted/rotated Rosenbrock with different
|
|
192
|
+
biases. Uses log-sum-exp for smooth differentiable combination.
|
|
193
|
+
|
|
194
|
+
Unlike the hard-min version, this maintains gradients across basin
|
|
195
|
+
boundaries, enabling gradient-based methods to potentially escape
|
|
196
|
+
suboptimal basins.
|
|
197
|
+
|
|
198
|
+
Parameters
|
|
199
|
+
----------
|
|
200
|
+
n_var : int
|
|
201
|
+
Dimensionality.
|
|
202
|
+
n_funnels : int
|
|
203
|
+
Number of funnels. Default 4.
|
|
204
|
+
tau : float
|
|
205
|
+
Temperature for smoothing. Default 1.0.
|
|
206
|
+
shift_scale : float
|
|
207
|
+
Scale of random shifts. Default 2.0.
|
|
208
|
+
bias_scale : float
|
|
209
|
+
Scale of random biases (funnel depth differences). Default 50.0.
|
|
210
|
+
rotate_funnels : bool
|
|
211
|
+
Whether each funnel gets a random rotation. Default True.
|
|
212
|
+
seed : int
|
|
213
|
+
Random seed. Default 0.
|
|
214
|
+
xl : float
|
|
215
|
+
Lower bound. Default -5.0.
|
|
216
|
+
xu : float
|
|
217
|
+
Upper bound. Default 5.0.
|
|
218
|
+
"""
|
|
219
|
+
|
|
220
|
+
@staticmethod
|
|
221
|
+
def default_bounds() -> Tuple[float, float]:
|
|
222
|
+
"""Default bounds for MultiBasinRosenbrock."""
|
|
223
|
+
return (-5.0, 5.0)
|
|
224
|
+
|
|
225
|
+
def __init__(
|
|
226
|
+
self,
|
|
227
|
+
n_var: int = 10,
|
|
228
|
+
n_funnels: int = 4,
|
|
229
|
+
tau: float = 1.0,
|
|
230
|
+
shift_scale: float = 2.0,
|
|
231
|
+
bias_scale: float = 50.0,
|
|
232
|
+
rotate_funnels: bool = True,
|
|
233
|
+
seed: int = 0,
|
|
234
|
+
xl: float = -5.0,
|
|
235
|
+
xu: float = 5.0,
|
|
236
|
+
):
|
|
237
|
+
super().__init__(n_var=n_var, xl=xl, xu=xu)
|
|
238
|
+
|
|
239
|
+
if n_var < 2:
|
|
240
|
+
raise ValueError("MultiBasinRosenbrock requires n_var >= 2")
|
|
241
|
+
if n_funnels < 1:
|
|
242
|
+
raise ValueError("n_funnels must be >= 1")
|
|
243
|
+
|
|
244
|
+
self.name = "MultiBasinRosenbrock"
|
|
245
|
+
self.n_funnels = n_funnels
|
|
246
|
+
self.tau = tau
|
|
247
|
+
|
|
248
|
+
g = torch.Generator()
|
|
249
|
+
g.manual_seed(seed)
|
|
250
|
+
|
|
251
|
+
# Generate shifts (funnel centers offset from optimum)
|
|
252
|
+
# Optimum of shifted Rosenbrock(y) is at y = 1, so x* = center + 1
|
|
253
|
+
centers = torch.randn(n_funnels, n_var, generator=g) * shift_scale
|
|
254
|
+
centers = centers.clamp(xl - 1.0, xu - 1.0) # Keep optima in bounds
|
|
255
|
+
self._centers = centers
|
|
256
|
+
|
|
257
|
+
# Generate biases (funnel 0 is global best)
|
|
258
|
+
if n_funnels == 1:
|
|
259
|
+
biases = torch.zeros(1)
|
|
260
|
+
else:
|
|
261
|
+
biases = torch.zeros(n_funnels)
|
|
262
|
+
biases[1:] = torch.rand(n_funnels - 1, generator=g) * bias_scale
|
|
263
|
+
self._biases = biases
|
|
264
|
+
|
|
265
|
+
# Generate per-funnel rotations
|
|
266
|
+
if rotate_funnels:
|
|
267
|
+
self._rotations = torch.stack([
|
|
268
|
+
random_orthogonal_matrix(n_var, seed + k)
|
|
269
|
+
for k in range(n_funnels)
|
|
270
|
+
])
|
|
271
|
+
else:
|
|
272
|
+
self._rotations = torch.eye(n_var).unsqueeze(0).expand(n_funnels, -1, -1)
|
|
273
|
+
|
|
274
|
+
# Global optimum
|
|
275
|
+
self._optimal_x = (self._rotations[0].T @ (self._centers[0] + 1.0))
|
|
276
|
+
self._optimal_value = float(self._biases[0].item())
|
|
277
|
+
|
|
278
|
+
@property
|
|
279
|
+
def optimal_x(self) -> Tensor:
|
|
280
|
+
"""Global optimum location."""
|
|
281
|
+
return self._optimal_x
|
|
282
|
+
|
|
283
|
+
@property
|
|
284
|
+
def optimal_value(self) -> float:
|
|
285
|
+
"""Global optimum value."""
|
|
286
|
+
return self._optimal_value
|
|
287
|
+
|
|
288
|
+
def _rosenbrock(self, x: Tensor) -> Tensor:
|
|
289
|
+
"""Standard Rosenbrock function."""
|
|
290
|
+
x_i = x[..., :-1]
|
|
291
|
+
x_ip1 = x[..., 1:]
|
|
292
|
+
return (100.0 * (x_ip1 - x_i ** 2) ** 2 + (1.0 - x_i) ** 2).sum(dim=-1)
|
|
293
|
+
|
|
294
|
+
def __call__(self, x: Tensor) -> Tensor:
|
|
295
|
+
"""
|
|
296
|
+
Evaluate the multi-basin function.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
x: (..., n_var) input tensor
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
(...,) function values
|
|
303
|
+
"""
|
|
304
|
+
centers = self._centers.to(x.device)
|
|
305
|
+
biases = self._biases.to(x.device)
|
|
306
|
+
rotations = self._rotations.to(x.device)
|
|
307
|
+
|
|
308
|
+
# Compute all funnel values
|
|
309
|
+
# x: (..., D), centers: (K, D) -> shifted: (..., K, D)
|
|
310
|
+
shifted = x.unsqueeze(-2) - centers
|
|
311
|
+
|
|
312
|
+
# Apply per-funnel rotations: (..., K, D) @ (K, D, D) -> (..., K, D)
|
|
313
|
+
# Use einsum for batched matrix multiply
|
|
314
|
+
rotated = torch.einsum('...kd,kde->...ke', shifted, rotations)
|
|
315
|
+
|
|
316
|
+
# Rosenbrock on each funnel
|
|
317
|
+
y_i = rotated[..., :-1]
|
|
318
|
+
y_ip1 = rotated[..., 1:]
|
|
319
|
+
rosen = (100.0 * (y_ip1 - y_i ** 2) ** 2 + (1.0 - y_i) ** 2).sum(dim=-1)
|
|
320
|
+
|
|
321
|
+
# Add biases: (..., K)
|
|
322
|
+
f_all = rosen + biases
|
|
323
|
+
|
|
324
|
+
# Smooth min over funnels
|
|
325
|
+
return log_sum_exp_min(f_all, self.tau)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
class DeceptiveLandscape(BenchmarkFunction):
|
|
329
|
+
"""
|
|
330
|
+
Highly deceptive landscape with controllable difficulty.
|
|
331
|
+
|
|
332
|
+
Combines multiple elements designed to challenge different optimization
|
|
333
|
+
approaches:
|
|
334
|
+
|
|
335
|
+
1. A wide, smooth distractor basin (traps gradient methods)
|
|
336
|
+
2. A narrow global basin with ill-conditioning (needs exploration + exploitation)
|
|
337
|
+
3. Optional saddle points and ridges
|
|
338
|
+
4. Non-separable structure via rotation
|
|
339
|
+
|
|
340
|
+
Parameters
|
|
341
|
+
----------
|
|
342
|
+
n_var : int
|
|
343
|
+
Dimensionality.
|
|
344
|
+
tau : float
|
|
345
|
+
Smoothing temperature. Default 0.5 (fairly sharp).
|
|
346
|
+
n_distractors : int
|
|
347
|
+
Number of distractor basins. Default 2.
|
|
348
|
+
distractor_depth : float
|
|
349
|
+
How close distractors are to global optimum. Default 5.0.
|
|
350
|
+
global_conditioning : float
|
|
351
|
+
Condition number of global basin valley. Default 100.0.
|
|
352
|
+
rotate : bool
|
|
353
|
+
Apply random rotation. Default True.
|
|
354
|
+
seed : int
|
|
355
|
+
Random seed. Default 0.
|
|
356
|
+
xl : float
|
|
357
|
+
Lower bound. Default -5.0.
|
|
358
|
+
xu : float
|
|
359
|
+
Upper bound. Default 5.0.
|
|
360
|
+
"""
|
|
361
|
+
|
|
362
|
+
@staticmethod
|
|
363
|
+
def default_bounds() -> Tuple[float, float]:
|
|
364
|
+
"""Default bounds for DeceptiveLandscape."""
|
|
365
|
+
return (-5.0, 5.0)
|
|
366
|
+
|
|
367
|
+
def __init__(
|
|
368
|
+
self,
|
|
369
|
+
n_var: int = 10,
|
|
370
|
+
tau: float = 0.5,
|
|
371
|
+
n_distractors: int = 2,
|
|
372
|
+
distractor_depth: float = 5.0,
|
|
373
|
+
global_conditioning: float = 100.0,
|
|
374
|
+
rotate: bool = True,
|
|
375
|
+
seed: int = 0,
|
|
376
|
+
xl: float = -5.0,
|
|
377
|
+
xu: float = 5.0,
|
|
378
|
+
):
|
|
379
|
+
super().__init__(n_var=n_var, xl=xl, xu=xu)
|
|
380
|
+
|
|
381
|
+
if n_var < 2:
|
|
382
|
+
raise ValueError("DeceptiveLandscape requires n_var >= 2")
|
|
383
|
+
|
|
384
|
+
self.name = "DeceptiveLandscape"
|
|
385
|
+
self.tau = tau
|
|
386
|
+
self.n_distractors = n_distractors
|
|
387
|
+
self.distractor_depth = distractor_depth
|
|
388
|
+
|
|
389
|
+
g = torch.Generator()
|
|
390
|
+
g.manual_seed(seed)
|
|
391
|
+
|
|
392
|
+
# Rotation for non-separability
|
|
393
|
+
if rotate:
|
|
394
|
+
self._Q = random_orthogonal_matrix(n_var, seed)
|
|
395
|
+
else:
|
|
396
|
+
self._Q = torch.eye(n_var)
|
|
397
|
+
|
|
398
|
+
# Generate distractor centers (spread around the space)
|
|
399
|
+
self._distractor_centers = torch.randn(n_distractors, n_var, generator=g) * 2.0
|
|
400
|
+
|
|
401
|
+
# Distractor widths (wider = easier to fall into)
|
|
402
|
+
self._distractor_widths = torch.rand(n_distractors, generator=g) * 0.5 + 0.5
|
|
403
|
+
|
|
404
|
+
# Condition scaling for global basin (ill-conditioned valley)
|
|
405
|
+
self._global_scales = torch.logspace(
|
|
406
|
+
0, torch.log10(torch.tensor(global_conditioning)), n_var
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
# Global optimum at origin in rotated coordinates
|
|
410
|
+
self._optimal_x = torch.zeros(n_var)
|
|
411
|
+
self._optimal_value = 0.0
|
|
412
|
+
|
|
413
|
+
@property
|
|
414
|
+
def optimal_x(self) -> Tensor:
|
|
415
|
+
"""Global optimum location."""
|
|
416
|
+
return self._optimal_x
|
|
417
|
+
|
|
418
|
+
@property
|
|
419
|
+
def optimal_value(self) -> float:
|
|
420
|
+
"""Global optimum value."""
|
|
421
|
+
return self._optimal_value
|
|
422
|
+
|
|
423
|
+
def _global_basin(self, x: Tensor) -> Tensor:
|
|
424
|
+
"""
|
|
425
|
+
Global optimum: Ill-conditioned ellipsoid in rotated coordinates.
|
|
426
|
+
|
|
427
|
+
This creates a narrow valley that benefits from gradient-based
|
|
428
|
+
fine-tuning once the basin is found.
|
|
429
|
+
"""
|
|
430
|
+
Q = self._Q.to(x.device)
|
|
431
|
+
scales = self._global_scales.to(x.device)
|
|
432
|
+
|
|
433
|
+
y = x @ Q # Rotated
|
|
434
|
+
return (scales * y ** 2).sum(dim=-1)
|
|
435
|
+
|
|
436
|
+
def _distractor_basins(self, x: Tensor) -> Tensor:
|
|
437
|
+
"""
|
|
438
|
+
Distractor basins: Wide spheres offset from origin.
|
|
439
|
+
|
|
440
|
+
Returns tensor of shape (..., n_distractors) with value for each distractor.
|
|
441
|
+
"""
|
|
442
|
+
centers = self._distractor_centers.to(x.device)
|
|
443
|
+
widths = self._distractor_widths.to(x.device)
|
|
444
|
+
|
|
445
|
+
# (..., D) - (K, D) -> (..., K, D)
|
|
446
|
+
diff = x.unsqueeze(-2) - centers
|
|
447
|
+
|
|
448
|
+
# Scaled squared distance + offset
|
|
449
|
+
dist_sq = (diff ** 2).sum(dim=-1) # (..., K)
|
|
450
|
+
return dist_sq * widths + self.distractor_depth
|
|
451
|
+
|
|
452
|
+
def __call__(self, x: Tensor) -> Tensor:
|
|
453
|
+
"""
|
|
454
|
+
Evaluate the deceptive landscape.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
x: (..., n_var) input tensor
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
(...,) function values
|
|
461
|
+
"""
|
|
462
|
+
f_global = self._global_basin(x) # (...,)
|
|
463
|
+
f_distractors = self._distractor_basins(x) # (..., K)
|
|
464
|
+
|
|
465
|
+
# Combine all basins
|
|
466
|
+
f_all = torch.cat([f_global.unsqueeze(-1), f_distractors], dim=-1)
|
|
467
|
+
|
|
468
|
+
return log_sum_exp_min(f_all, self.tau)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
# Registry
|
|
472
|
+
SMOOTHED_FUNNEL_FUNCTIONS = {
|
|
473
|
+
"multibasinrastrigin": MultiBasinRastrigin,
|
|
474
|
+
"multibasinrosenbrock": MultiBasinRosenbrock,
|
|
475
|
+
"deceptivelandscape": DeceptiveLandscape,
|
|
476
|
+
}
|