evograd-diff 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. evograd/__init__.py +67 -0
  2. evograd/algorithms/__init__.py +138 -0
  3. evograd/algorithms/cmaes.py +1365 -0
  4. evograd/algorithms/de.py +895 -0
  5. evograd/algorithms/ga.py +532 -0
  6. evograd/algorithms/pso.py +648 -0
  7. evograd/algorithms/shade.py +1165 -0
  8. evograd/benchmarks/functions/__init__.py +229 -0
  9. evograd/benchmarks/functions/base.py +217 -0
  10. evograd/benchmarks/functions/cec2017/__init__.py +250 -0
  11. evograd/benchmarks/functions/cec2017/basic.py +413 -0
  12. evograd/benchmarks/functions/cec2017/composition.py +580 -0
  13. evograd/benchmarks/functions/cec2017/data.pkl +0 -0
  14. evograd/benchmarks/functions/cec2017/data.py +350 -0
  15. evograd/benchmarks/functions/cec2017/hybrid.py +406 -0
  16. evograd/benchmarks/functions/cec2017/simple.py +326 -0
  17. evograd/benchmarks/functions/classical.py +649 -0
  18. evograd/benchmarks/functions/smoothed_funnel.py +476 -0
  19. evograd/benchmarks/functions/transforms.py +463 -0
  20. evograd/benchmarks/run_benchmark_functions.py +1208 -0
  21. evograd/core/__init__.py +73 -0
  22. evograd/core/algorithm.py +778 -0
  23. evograd/core/maximize.py +269 -0
  24. evograd/core/minimize.py +740 -0
  25. evograd/core/problem.py +444 -0
  26. evograd/core/result.py +571 -0
  27. evograd/core/termination.py +602 -0
  28. evograd/operators/__init__.py +178 -0
  29. evograd/operators/crossover.py +1117 -0
  30. evograd/operators/mutation.py +1098 -0
  31. evograd/operators/relaxations.py +175 -0
  32. evograd/operators/repair.py +601 -0
  33. evograd/operators/sampling.py +577 -0
  34. evograd/operators/selection.py +981 -0
  35. evograd/operators/survival.py +1000 -0
  36. evograd/tests/__init__.py +11 -0
  37. evograd/tests/run_all.py +78 -0
  38. evograd/tests/test_core.py +528 -0
  39. evograd/tests/test_ga.py +572 -0
  40. evograd/tests/test_operators.py +662 -0
  41. evograd/tests/test_per_individual.py +326 -0
  42. evograd/tests/test_utils.py +328 -0
  43. evograd/utils/__init__.py +97 -0
  44. evograd/utils/callbacks.py +926 -0
  45. evograd/utils/device.py +502 -0
  46. evograd/utils/duplicates.py +421 -0
  47. evograd_diff-0.1.0.dist-info/METADATA +439 -0
  48. evograd_diff-0.1.0.dist-info/RECORD +50 -0
  49. evograd_diff-0.1.0.dist-info/WHEEL +4 -0
  50. evograd_diff-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,476 @@
1
+ """
2
+ Smoothed Multi-Funnel Benchmark Functions
3
+
4
+ These benchmarks create landscapes with multiple attraction basins where:
5
+ - One basin is wide but suboptimal (gradient trap)
6
+ - One basin is narrow but optimal (requires exploration to find)
7
+ - The optimal basin contains an ill-conditioned valley (benefits from gradients)
8
+
9
+ This design specifically targets the scenario where:
10
+ - Pure gradient methods: Fall into the wide basin, converge to suboptimal
11
+ - Pure EAs: Can find the narrow basin, but waste evaluations on fine convergence
12
+ - Differentiable EAs: Population finds basins + gradients accelerate valley convergence
13
+ """
14
+
15
+ from typing import Optional, Tuple
16
+ import math
17
+
18
+ import torch
19
+ from torch import Tensor
20
+
21
+ try:
22
+ from .base import BenchmarkFunction
23
+ except ImportError:
24
+ from base import BenchmarkFunction
25
+
26
+
27
+ def log_sum_exp_min(f_values: Tensor, tau: float = 1.0) -> Tensor:
28
+ """
29
+ Smooth approximation to min using log-sum-exp.
30
+
31
+ f(x) = -τ * log(Σ exp(-f_i/τ))
32
+
33
+ As τ → 0, this approaches min(f_values).
34
+ Larger τ gives smoother transitions between basins.
35
+
36
+ Args:
37
+ f_values: (..., K) tensor of K function values
38
+ tau: Temperature parameter (smaller = sharper min)
39
+
40
+ Returns:
41
+ (...,) tensor of smoothed minimum values
42
+ """
43
+ # Use logsumexp for numerical stability
44
+ return -tau * torch.logsumexp(-f_values / tau, dim=-1)
45
+
46
+
47
+ def random_orthogonal_matrix(n: int, seed: int = 0) -> Tensor:
48
+ """Generate a random orthogonal matrix via QR decomposition."""
49
+ g = torch.Generator()
50
+ g.manual_seed(seed)
51
+ A = torch.randn(n, n, generator=g)
52
+ Q, _ = torch.linalg.qr(A)
53
+ return Q
54
+
55
+
56
+ class MultiBasinRastrigin(BenchmarkFunction):
57
+ """
58
+ Multi-basin Rastrigin: multiple attraction basins combined with log-sum-exp.
59
+
60
+ Creates K Rastrigin basins at different centers with different offsets.
61
+ The basins are combined using log-sum-exp smoothing, creating a landscape
62
+ where gradient descent gets trapped in the nearest basin, but population-based
63
+ search can discover the global optimum.
64
+
65
+ Key properties:
66
+ - Each basin has Rastrigin's characteristic local minima structure
67
+ - Basins are smoothly connected (differentiable everywhere)
68
+ - Global basin has offset=0, distractor basins have offset>0
69
+ - From most starting points, gradients lead to a distractor
70
+
71
+ f(x) = -τ·log(Σₖ exp(-fₖ(x)/τ))
72
+
73
+ where fₖ(x) = A·n + Σᵢ[(xᵢ - cₖᵢ)² - A·cos(2π(xᵢ - cₖᵢ))] + δₖ
74
+
75
+ Parameters
76
+ ----------
77
+ n_var : int
78
+ Dimensionality. Default 10.
79
+ n_basins : int
80
+ Number of basins. Default 3.
81
+ amplitude : float
82
+ Rastrigin amplitude A. Default 10.0.
83
+ tau : float
84
+ Smoothing temperature. Smaller = sharper basin boundaries. Default 1.0.
85
+ basin_separation : float
86
+ Distance between basin centers. Default 4.0.
87
+ distractor_offset : float
88
+ Minimum offset for distractor basins. Default 5.0.
89
+ seed : int
90
+ Random seed for basin placement. Default 0.
91
+ """
92
+
93
+ @staticmethod
94
+ def default_bounds() -> Tuple[float, float]:
95
+ return (-5.0, 5.0)
96
+
97
+ def __init__(
98
+ self,
99
+ n_var: int = 10,
100
+ n_basins: int = 3,
101
+ amplitude: float = 10.0,
102
+ tau: float = 1.0,
103
+ basin_separation: float = 4.0,
104
+ distractor_offset: float = 5.0,
105
+ seed: int = 0,
106
+ xl: float = -5.0,
107
+ xu: float = 5.0,
108
+ ):
109
+ super().__init__(n_var=n_var, xl=xl, xu=xu)
110
+
111
+ self.name = "MultiBasinRastrigin"
112
+ self.n_basins = n_basins
113
+ self.amplitude = amplitude
114
+ self.tau = tau
115
+ self.seed = seed
116
+
117
+ torch.manual_seed(seed)
118
+
119
+ # Basin centers: global at origin, distractors spread around
120
+ self._centers = torch.zeros(n_basins, n_var)
121
+
122
+ # Global basin at origin
123
+ self._centers[0] = torch.zeros(n_var)
124
+
125
+ # Distractor basins: placed at corners/edges of search space
126
+ # This ensures they capture random initializations
127
+ if n_basins >= 2:
128
+ # First distractor: negative quadrant (captures ~half of random inits)
129
+ self._centers[1] = torch.full((n_var,), -basin_separation / 2)
130
+
131
+ if n_basins >= 3:
132
+ # Second distractor: positive quadrant
133
+ self._centers[2] = torch.full((n_var,), basin_separation / 2)
134
+
135
+ # Additional distractors at random positions
136
+ for k in range(3, n_basins):
137
+ direction = torch.randn(n_var)
138
+ direction = direction / direction.norm()
139
+ self._centers[k] = direction * basin_separation
140
+
141
+ # Basin offsets: global=0, distractors>0
142
+ self._offsets = torch.zeros(n_basins)
143
+ self._offsets[1:] = distractor_offset + torch.rand(n_basins - 1) * distractor_offset
144
+
145
+ # Global optimum
146
+ self._optimal_x = self._centers[0].clone()
147
+ self._optimal_value = 0.0
148
+
149
+ @property
150
+ def optimal_x(self) -> Tensor:
151
+ return self._optimal_x
152
+
153
+ @property
154
+ def optimal_value(self) -> float:
155
+ return self._optimal_value
156
+
157
+ def _rastrigin(self, x: Tensor, center: Tensor) -> Tensor:
158
+ """Rastrigin function centered at given point."""
159
+ A = self.amplitude
160
+ diff = x - center.to(x.device)
161
+ n = diff.shape[-1]
162
+
163
+ quad = (diff ** 2).sum(dim=-1)
164
+ cosine = (A * torch.cos(2 * math.pi * diff)).sum(dim=-1)
165
+
166
+ return A * n + quad - cosine
167
+
168
+ def __call__(self, x: Tensor) -> Tensor:
169
+ """Evaluate multi-basin Rastrigin with log-sum-exp smoothing."""
170
+ centers = self._centers.to(x.device)
171
+ offsets = self._offsets.to(x.device)
172
+
173
+ # Compute Rastrigin value for each basin
174
+ # x: (..., n_var), centers: (n_basins, n_var)
175
+ f_basins = []
176
+ for k in range(self.n_basins):
177
+ f_k = self._rastrigin(x, centers[k]) + offsets[k]
178
+ f_basins.append(f_k)
179
+
180
+ # Stack: (..., n_basins)
181
+ f_all = torch.stack(f_basins, dim=-1)
182
+
183
+ # Smooth min via log-sum-exp
184
+ return log_sum_exp_min(f_all, self.tau)
185
+
186
+
187
+ class MultiBasinRosenbrock(BenchmarkFunction):
188
+ """
189
+ Multiple Rosenbrock basins with smooth transitions.
190
+
191
+ Creates K funnels, each a shifted/rotated Rosenbrock with different
192
+ biases. Uses log-sum-exp for smooth differentiable combination.
193
+
194
+ Unlike the hard-min version, this maintains gradients across basin
195
+ boundaries, enabling gradient-based methods to potentially escape
196
+ suboptimal basins.
197
+
198
+ Parameters
199
+ ----------
200
+ n_var : int
201
+ Dimensionality.
202
+ n_funnels : int
203
+ Number of funnels. Default 4.
204
+ tau : float
205
+ Temperature for smoothing. Default 1.0.
206
+ shift_scale : float
207
+ Scale of random shifts. Default 2.0.
208
+ bias_scale : float
209
+ Scale of random biases (funnel depth differences). Default 50.0.
210
+ rotate_funnels : bool
211
+ Whether each funnel gets a random rotation. Default True.
212
+ seed : int
213
+ Random seed. Default 0.
214
+ xl : float
215
+ Lower bound. Default -5.0.
216
+ xu : float
217
+ Upper bound. Default 5.0.
218
+ """
219
+
220
+ @staticmethod
221
+ def default_bounds() -> Tuple[float, float]:
222
+ """Default bounds for MultiBasinRosenbrock."""
223
+ return (-5.0, 5.0)
224
+
225
+ def __init__(
226
+ self,
227
+ n_var: int = 10,
228
+ n_funnels: int = 4,
229
+ tau: float = 1.0,
230
+ shift_scale: float = 2.0,
231
+ bias_scale: float = 50.0,
232
+ rotate_funnels: bool = True,
233
+ seed: int = 0,
234
+ xl: float = -5.0,
235
+ xu: float = 5.0,
236
+ ):
237
+ super().__init__(n_var=n_var, xl=xl, xu=xu)
238
+
239
+ if n_var < 2:
240
+ raise ValueError("MultiBasinRosenbrock requires n_var >= 2")
241
+ if n_funnels < 1:
242
+ raise ValueError("n_funnels must be >= 1")
243
+
244
+ self.name = "MultiBasinRosenbrock"
245
+ self.n_funnels = n_funnels
246
+ self.tau = tau
247
+
248
+ g = torch.Generator()
249
+ g.manual_seed(seed)
250
+
251
+ # Generate shifts (funnel centers offset from optimum)
252
+ # Optimum of shifted Rosenbrock(y) is at y = 1, so x* = center + 1
253
+ centers = torch.randn(n_funnels, n_var, generator=g) * shift_scale
254
+ centers = centers.clamp(xl - 1.0, xu - 1.0) # Keep optima in bounds
255
+ self._centers = centers
256
+
257
+ # Generate biases (funnel 0 is global best)
258
+ if n_funnels == 1:
259
+ biases = torch.zeros(1)
260
+ else:
261
+ biases = torch.zeros(n_funnels)
262
+ biases[1:] = torch.rand(n_funnels - 1, generator=g) * bias_scale
263
+ self._biases = biases
264
+
265
+ # Generate per-funnel rotations
266
+ if rotate_funnels:
267
+ self._rotations = torch.stack([
268
+ random_orthogonal_matrix(n_var, seed + k)
269
+ for k in range(n_funnels)
270
+ ])
271
+ else:
272
+ self._rotations = torch.eye(n_var).unsqueeze(0).expand(n_funnels, -1, -1)
273
+
274
+ # Global optimum
275
+ self._optimal_x = (self._rotations[0].T @ (self._centers[0] + 1.0))
276
+ self._optimal_value = float(self._biases[0].item())
277
+
278
+ @property
279
+ def optimal_x(self) -> Tensor:
280
+ """Global optimum location."""
281
+ return self._optimal_x
282
+
283
+ @property
284
+ def optimal_value(self) -> float:
285
+ """Global optimum value."""
286
+ return self._optimal_value
287
+
288
+ def _rosenbrock(self, x: Tensor) -> Tensor:
289
+ """Standard Rosenbrock function."""
290
+ x_i = x[..., :-1]
291
+ x_ip1 = x[..., 1:]
292
+ return (100.0 * (x_ip1 - x_i ** 2) ** 2 + (1.0 - x_i) ** 2).sum(dim=-1)
293
+
294
+ def __call__(self, x: Tensor) -> Tensor:
295
+ """
296
+ Evaluate the multi-basin function.
297
+
298
+ Args:
299
+ x: (..., n_var) input tensor
300
+
301
+ Returns:
302
+ (...,) function values
303
+ """
304
+ centers = self._centers.to(x.device)
305
+ biases = self._biases.to(x.device)
306
+ rotations = self._rotations.to(x.device)
307
+
308
+ # Compute all funnel values
309
+ # x: (..., D), centers: (K, D) -> shifted: (..., K, D)
310
+ shifted = x.unsqueeze(-2) - centers
311
+
312
+ # Apply per-funnel rotations: (..., K, D) @ (K, D, D) -> (..., K, D)
313
+ # Use einsum for batched matrix multiply
314
+ rotated = torch.einsum('...kd,kde->...ke', shifted, rotations)
315
+
316
+ # Rosenbrock on each funnel
317
+ y_i = rotated[..., :-1]
318
+ y_ip1 = rotated[..., 1:]
319
+ rosen = (100.0 * (y_ip1 - y_i ** 2) ** 2 + (1.0 - y_i) ** 2).sum(dim=-1)
320
+
321
+ # Add biases: (..., K)
322
+ f_all = rosen + biases
323
+
324
+ # Smooth min over funnels
325
+ return log_sum_exp_min(f_all, self.tau)
326
+
327
+
328
+ class DeceptiveLandscape(BenchmarkFunction):
329
+ """
330
+ Highly deceptive landscape with controllable difficulty.
331
+
332
+ Combines multiple elements designed to challenge different optimization
333
+ approaches:
334
+
335
+ 1. A wide, smooth distractor basin (traps gradient methods)
336
+ 2. A narrow global basin with ill-conditioning (needs exploration + exploitation)
337
+ 3. Optional saddle points and ridges
338
+ 4. Non-separable structure via rotation
339
+
340
+ Parameters
341
+ ----------
342
+ n_var : int
343
+ Dimensionality.
344
+ tau : float
345
+ Smoothing temperature. Default 0.5 (fairly sharp).
346
+ n_distractors : int
347
+ Number of distractor basins. Default 2.
348
+ distractor_depth : float
349
+ How close distractors are to global optimum. Default 5.0.
350
+ global_conditioning : float
351
+ Condition number of global basin valley. Default 100.0.
352
+ rotate : bool
353
+ Apply random rotation. Default True.
354
+ seed : int
355
+ Random seed. Default 0.
356
+ xl : float
357
+ Lower bound. Default -5.0.
358
+ xu : float
359
+ Upper bound. Default 5.0.
360
+ """
361
+
362
+ @staticmethod
363
+ def default_bounds() -> Tuple[float, float]:
364
+ """Default bounds for DeceptiveLandscape."""
365
+ return (-5.0, 5.0)
366
+
367
+ def __init__(
368
+ self,
369
+ n_var: int = 10,
370
+ tau: float = 0.5,
371
+ n_distractors: int = 2,
372
+ distractor_depth: float = 5.0,
373
+ global_conditioning: float = 100.0,
374
+ rotate: bool = True,
375
+ seed: int = 0,
376
+ xl: float = -5.0,
377
+ xu: float = 5.0,
378
+ ):
379
+ super().__init__(n_var=n_var, xl=xl, xu=xu)
380
+
381
+ if n_var < 2:
382
+ raise ValueError("DeceptiveLandscape requires n_var >= 2")
383
+
384
+ self.name = "DeceptiveLandscape"
385
+ self.tau = tau
386
+ self.n_distractors = n_distractors
387
+ self.distractor_depth = distractor_depth
388
+
389
+ g = torch.Generator()
390
+ g.manual_seed(seed)
391
+
392
+ # Rotation for non-separability
393
+ if rotate:
394
+ self._Q = random_orthogonal_matrix(n_var, seed)
395
+ else:
396
+ self._Q = torch.eye(n_var)
397
+
398
+ # Generate distractor centers (spread around the space)
399
+ self._distractor_centers = torch.randn(n_distractors, n_var, generator=g) * 2.0
400
+
401
+ # Distractor widths (wider = easier to fall into)
402
+ self._distractor_widths = torch.rand(n_distractors, generator=g) * 0.5 + 0.5
403
+
404
+ # Condition scaling for global basin (ill-conditioned valley)
405
+ self._global_scales = torch.logspace(
406
+ 0, torch.log10(torch.tensor(global_conditioning)), n_var
407
+ )
408
+
409
+ # Global optimum at origin in rotated coordinates
410
+ self._optimal_x = torch.zeros(n_var)
411
+ self._optimal_value = 0.0
412
+
413
+ @property
414
+ def optimal_x(self) -> Tensor:
415
+ """Global optimum location."""
416
+ return self._optimal_x
417
+
418
+ @property
419
+ def optimal_value(self) -> float:
420
+ """Global optimum value."""
421
+ return self._optimal_value
422
+
423
+ def _global_basin(self, x: Tensor) -> Tensor:
424
+ """
425
+ Global optimum: Ill-conditioned ellipsoid in rotated coordinates.
426
+
427
+ This creates a narrow valley that benefits from gradient-based
428
+ fine-tuning once the basin is found.
429
+ """
430
+ Q = self._Q.to(x.device)
431
+ scales = self._global_scales.to(x.device)
432
+
433
+ y = x @ Q # Rotated
434
+ return (scales * y ** 2).sum(dim=-1)
435
+
436
+ def _distractor_basins(self, x: Tensor) -> Tensor:
437
+ """
438
+ Distractor basins: Wide spheres offset from origin.
439
+
440
+ Returns tensor of shape (..., n_distractors) with value for each distractor.
441
+ """
442
+ centers = self._distractor_centers.to(x.device)
443
+ widths = self._distractor_widths.to(x.device)
444
+
445
+ # (..., D) - (K, D) -> (..., K, D)
446
+ diff = x.unsqueeze(-2) - centers
447
+
448
+ # Scaled squared distance + offset
449
+ dist_sq = (diff ** 2).sum(dim=-1) # (..., K)
450
+ return dist_sq * widths + self.distractor_depth
451
+
452
+ def __call__(self, x: Tensor) -> Tensor:
453
+ """
454
+ Evaluate the deceptive landscape.
455
+
456
+ Args:
457
+ x: (..., n_var) input tensor
458
+
459
+ Returns:
460
+ (...,) function values
461
+ """
462
+ f_global = self._global_basin(x) # (...,)
463
+ f_distractors = self._distractor_basins(x) # (..., K)
464
+
465
+ # Combine all basins
466
+ f_all = torch.cat([f_global.unsqueeze(-1), f_distractors], dim=-1)
467
+
468
+ return log_sum_exp_min(f_all, self.tau)
469
+
470
+
471
+ # Registry
472
+ SMOOTHED_FUNNEL_FUNCTIONS = {
473
+ "multibasinrastrigin": MultiBasinRastrigin,
474
+ "multibasinrosenbrock": MultiBasinRosenbrock,
475
+ "deceptivelandscape": DeceptiveLandscape,
476
+ }