evograd-diff 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. evograd/__init__.py +67 -0
  2. evograd/algorithms/__init__.py +138 -0
  3. evograd/algorithms/cmaes.py +1365 -0
  4. evograd/algorithms/de.py +895 -0
  5. evograd/algorithms/ga.py +532 -0
  6. evograd/algorithms/pso.py +648 -0
  7. evograd/algorithms/shade.py +1165 -0
  8. evograd/benchmarks/functions/__init__.py +229 -0
  9. evograd/benchmarks/functions/base.py +217 -0
  10. evograd/benchmarks/functions/cec2017/__init__.py +250 -0
  11. evograd/benchmarks/functions/cec2017/basic.py +413 -0
  12. evograd/benchmarks/functions/cec2017/composition.py +580 -0
  13. evograd/benchmarks/functions/cec2017/data.pkl +0 -0
  14. evograd/benchmarks/functions/cec2017/data.py +350 -0
  15. evograd/benchmarks/functions/cec2017/hybrid.py +406 -0
  16. evograd/benchmarks/functions/cec2017/simple.py +326 -0
  17. evograd/benchmarks/functions/classical.py +649 -0
  18. evograd/benchmarks/functions/smoothed_funnel.py +476 -0
  19. evograd/benchmarks/functions/transforms.py +463 -0
  20. evograd/benchmarks/run_benchmark_functions.py +1208 -0
  21. evograd/core/__init__.py +73 -0
  22. evograd/core/algorithm.py +778 -0
  23. evograd/core/maximize.py +269 -0
  24. evograd/core/minimize.py +740 -0
  25. evograd/core/problem.py +444 -0
  26. evograd/core/result.py +571 -0
  27. evograd/core/termination.py +602 -0
  28. evograd/operators/__init__.py +178 -0
  29. evograd/operators/crossover.py +1117 -0
  30. evograd/operators/mutation.py +1098 -0
  31. evograd/operators/relaxations.py +175 -0
  32. evograd/operators/repair.py +601 -0
  33. evograd/operators/sampling.py +577 -0
  34. evograd/operators/selection.py +981 -0
  35. evograd/operators/survival.py +1000 -0
  36. evograd/tests/__init__.py +11 -0
  37. evograd/tests/run_all.py +78 -0
  38. evograd/tests/test_core.py +528 -0
  39. evograd/tests/test_ga.py +572 -0
  40. evograd/tests/test_operators.py +662 -0
  41. evograd/tests/test_per_individual.py +326 -0
  42. evograd/tests/test_utils.py +328 -0
  43. evograd/utils/__init__.py +97 -0
  44. evograd/utils/callbacks.py +926 -0
  45. evograd/utils/device.py +502 -0
  46. evograd/utils/duplicates.py +421 -0
  47. evograd_diff-0.1.0.dist-info/METADATA +439 -0
  48. evograd_diff-0.1.0.dist-info/RECORD +50 -0
  49. evograd_diff-0.1.0.dist-info/WHEEL +4 -0
  50. evograd_diff-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,1365 @@
1
+ """
2
+ Covariance Matrix Adaptation Evolution Strategy (CMA-ES) for EvoGrad.
3
+
4
+ This module provides a fully differentiable CMA-ES implementation that
5
+ supports both classical and gradient-enabled optimisation modes.
6
+
7
+ CMA-ES evolves a multivariate Gaussian distribution N(μ, σ²C) through:
8
+ 1. Sampling: Generate offspring from the distribution
9
+ 2. Selection: Rank solutions by fitness
10
+ 3. Recombination: Update mean using weighted average of best solutions
11
+ 4. Adaptation: Update covariance matrix and step-size
12
+
13
+ The key components are:
14
+ - μ (mean): Center of the search distribution
15
+ - σ (sigma): Overall step-size (scale)
16
+ - C (covariance): Shape of the distribution (via Cholesky factor L)
17
+ - Evolution paths: p_σ and p_c for adaptation
18
+
19
+ Restart Strategies:
20
+ - IPOP-CMA-ES: Restart with increasing population size
21
+ - BIPOP-CMA-ES: Alternate between small (focused) and large (broad) populations
22
+
23
+ Modes:
24
+ - adaptive=False, differentiable=False: Classical CMA-ES
25
+ - adaptive=True, differentiable=False: Adaptation coefficients
26
+ (cc, cs, c1, cmu, damps) are learnable via backpropagation
27
+ - adaptive=False, differentiable=True: Mean μ is learnable
28
+ via backpropagation
29
+ - adaptive=True, differentiable=True: Both adaptation coefficients
30
+ and mean are learnable
31
+
32
+ Numerical note:
33
+ The covariance matrix is stored via its Cholesky factor ``L``. In
34
+ differentiable mode, gradient-based updates to ``L`` may occasionally
35
+ produce a non-positive-definite covariance; ``_safe_cholesky`` handles
36
+ this with cascading fallbacks (eigenvalue correction → regularisation
37
+ → identity reset). See its docstring for details.
38
+
39
+ Example:
40
+ >>> from evograd.algorithms import CMAES
41
+ >>> from evograd.core import Problem, minimize
42
+ >>>
43
+ >>> problem = Problem(
44
+ ... objective=lambda x: (x**2).sum(dim=-1),
45
+ ... n_var=30,
46
+ ... xl=-100.0,
47
+ ... xu=100.0,
48
+ ... )
49
+ >>>
50
+ >>> # Classical CMA-ES
51
+ >>> cmaes = CMAES(pop_size=50, sigma=0.5)
52
+ >>> result = minimize(problem, cmaes, max_evals=10000)
53
+ >>>
54
+ >>> # Adaptive CMA-ES with learnable coefficients
55
+ >>> cmaes = CMAES(pop_size=50, adaptive=True)
56
+ >>> result = minimize(problem, cmaes, max_evals=10000)
57
+ >>>
58
+ >>> # IPOP-CMA-ES with restarts
59
+ >>> cmaes = CMAES(pop_size=50, restarts=9, incpopsize=2)
60
+ >>> result = minimize(problem, cmaes, max_evals=100000)
61
+ >>>
62
+ >>> # BIPOP-CMA-ES
63
+ >>> cmaes = CMAES(pop_size=50, restarts=9, bipop=True)
64
+ >>> result = minimize(problem, cmaes, max_evals=100000)
65
+
66
+ Reference:
67
+ Hansen, N. & Ostermeier, A. (2001). Completely Derandomized
68
+ Self-Adaptation in Evolution Strategies. Evolutionary Computation.
69
+
70
+ Hansen, N. (2009). Benchmarking a BI-Population CMA-ES on the
71
+ BBOB-2009 Function Testbed. GECCO Workshop.
72
+ """
73
+
74
+ from __future__ import annotations
75
+
76
+ import math
77
+ from dataclasses import dataclass, field
78
+ from enum import Enum
79
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
80
+
81
+ import torch
82
+ import torch.nn as nn
83
+ from torch import Tensor
84
+
85
+ from evograd.core.algorithm import Algorithm
86
+
87
+ if TYPE_CHECKING:
88
+ from evograd.core.problem import Problem
89
+
90
+ __all__ = [
91
+ "CMAES",
92
+ "cmaes_default",
93
+ "cmaes_small",
94
+ "cmaes_large",
95
+ "cmaes_adaptive",
96
+ "cmaes_ipop",
97
+ "cmaes_bipop",
98
+ ]
99
+
100
+
101
+ def _expected_norm(dim: int) -> float:
102
+ """
103
+ Expected norm of a standard normal vector E[||N(0, I)||].
104
+
105
+ Accurate to O(1/dim).
106
+ """
107
+ d = float(dim)
108
+ return math.sqrt(d) * (1.0 - 1.0 / (4.0 * d) + 1.0 / (21.0 * d * d))
109
+
110
+
111
+ class RestartRegime(Enum):
112
+ """Restart regime for BIPOP strategy."""
113
+ LARGE = "large" # IPOP-like increasing population
114
+ SMALL = "small" # Small focused population
115
+
116
+
117
+ @dataclass
118
+ class RestartState:
119
+ """
120
+ Tracks restart-related state for IPOP/BIPOP strategies.
121
+
122
+ Attributes:
123
+ n_restarts: Number of restarts performed so far.
124
+ initial_pop_size: Original population size before any restarts.
125
+ current_pop_size: Current population size after restarts.
126
+ best_ever_x: Best solution found across all restarts.
127
+ best_ever_f: Best fitness found across all restarts.
128
+ regime: Current regime for BIPOP (LARGE or SMALL).
129
+ large_evals: Total evaluations used by large populations (BIPOP).
130
+ small_evals: Total evaluations used by small populations (BIPOP).
131
+ small_n_restarts: Number of small-population restarts (BIPOP).
132
+ run_history: History of best fitness per run.
133
+ """
134
+ n_restarts: int = 0
135
+ initial_pop_size: int = 0
136
+ current_pop_size: int = 0
137
+ best_ever_x: Optional[Tensor] = None
138
+ best_ever_f: float = float('inf')
139
+ regime: RestartRegime = RestartRegime.LARGE
140
+ large_evals: int = 0
141
+ small_evals: int = 0
142
+ small_n_restarts: int = 0
143
+ run_history: List[float] = field(default_factory=list)
144
+
145
+
146
+ class CMAES(Algorithm):
147
+ """
148
+ Covariance Matrix Adaptation Evolution Strategy (CMA-ES).
149
+
150
+ CMA-ES is a state-of-the-art evolutionary algorithm for continuous
151
+ optimisation. It adapts a full covariance matrix to learn the
152
+ structure of the objective function landscape.
153
+
154
+ The algorithm samples from N(μ, σ²C) and adapts:
155
+ - μ: Distribution mean (search center)
156
+ - σ: Step-size (overall scale)
157
+ - C: Covariance matrix (search shape)
158
+
159
+ Args:
160
+ pop_size: Population size (lambda). If None, uses 4 + floor(3*ln(n)).
161
+ sigma: Initial step-size. Default: 0.5.
162
+ x0: Initial mean. If None, uses center of bounds.
163
+ cc: Cumulation constant for rank-one update. If None, uses default.
164
+ cs: Cumulation constant for step-size control. If None, uses default.
165
+ c1: Learning rate for rank-one update. If None, uses default.
166
+ cmu: Learning rate for rank-mu update. If None, uses default.
167
+ damps: Damping for step-size update. If None, uses default.
168
+ restarts: Number of restarts with increasing population size (IPOP).
169
+ Set to 0 for no restarts. Default: 0.
170
+ restart_from_best: If True, restart from best-ever solution.
171
+ If False, restart from random point. Default: False.
172
+ incpopsize: Multiplier for population size increase after restart.
173
+ Default: 2.
174
+ bipop: If True, use BIPOP strategy alternating between small and
175
+ large populations. Requires restarts > 0. Default: False.
176
+ tolfun: Tolerance on function value for restart detection.
177
+ Default: 1e-11.
178
+ tolx: Tolerance on x change for restart detection. Default: 1e-11.
179
+ sampling: Operator for initial population generation.
180
+ repair: Repair operator for constraint handling.
181
+ adaptive: If True, adaptation coefficients are learnable.
182
+ differentiable: If True, mean μ is learnable.
183
+ dtype: Tensor dtype.
184
+
185
+ Attributes:
186
+ mean: Current distribution mean μ.
187
+ sigma: Current step-size σ.
188
+ C: Current covariance matrix.
189
+ L: Cholesky factor of C.
190
+ p_sigma: Evolution path for step-size.
191
+ p_c: Evolution path for covariance.
192
+ restart_state: State tracking for IPOP/BIPOP restarts.
193
+
194
+ Example:
195
+ >>> # Classical CMA-ES
196
+ >>> cmaes = CMAES(pop_size=50, sigma=0.3)
197
+ >>>
198
+ >>> # Adaptive CMA-ES
199
+ >>> cmaes = CMAES(adaptive=True)
200
+ >>>
201
+ >>> # IPOP-CMA-ES with 9 restarts
202
+ >>> cmaes = CMAES(restarts=9, incpopsize=2)
203
+ >>>
204
+ >>> # BIPOP-CMA-ES
205
+ >>> cmaes = CMAES(restarts=9, bipop=True)
206
+ >>>
207
+ >>> # Differentiable mean
208
+ >>> cmaes = CMAES(differentiable=True)
209
+ >>>
210
+ >>> # Fully differentiable
211
+ >>> cmaes = CMAES(adaptive=True, differentiable=True)
212
+ """
213
+
214
+ def __init__(
215
+ self,
216
+ pop_size: Optional[int] = None,
217
+ sigma: float = 0.5,
218
+ x0: Optional[Tensor] = None,
219
+ cc: Optional[float] = None,
220
+ cs: Optional[float] = None,
221
+ c1: Optional[float] = None,
222
+ cmu: Optional[float] = None,
223
+ damps: Optional[float] = None,
224
+ # Restart parameters
225
+ restarts: int = 0,
226
+ restart_from_best: bool = False,
227
+ incpopsize: int = 2,
228
+ bipop: bool = False,
229
+ tolfun: float = 1e-11,
230
+ tolx: float = 1e-11,
231
+ # Standard parameters
232
+ sampling: Optional[nn.Module] = None,
233
+ repair: Optional[nn.Module] = None,
234
+ adaptive: bool = False,
235
+ differentiable: bool = False,
236
+ dtype: torch.dtype = torch.float32,
237
+ ) -> None:
238
+ self.adaptive = adaptive
239
+ self._init_sigma = sigma
240
+ self._init_x0 = x0
241
+ self._init_cc = cc
242
+ self._init_cs = cs
243
+ self._init_c1 = c1
244
+ self._init_cmu = cmu
245
+ self._init_damps = damps
246
+
247
+ # Restart parameters
248
+ self._restarts = restarts
249
+ self._restart_from_best = restart_from_best
250
+ self._incpopsize = incpopsize
251
+ self._bipop = bipop
252
+ self._tolfun = tolfun
253
+ self._tolx = tolx
254
+
255
+ # Store pop_size for later (will be computed in _setup if None)
256
+ self._requested_pop_size = pop_size
257
+
258
+ # Use a default pop_size for base class, will be updated in _setup
259
+ effective_pop_size = pop_size if pop_size is not None else 10
260
+
261
+ # CMA-ES doesn't use standard EA operators
262
+ super().__init__(
263
+ pop_size=effective_pop_size,
264
+ sampling=sampling,
265
+ selection=None,
266
+ crossover=None,
267
+ mutation=None,
268
+ survival=None,
269
+ repair=repair,
270
+ eliminate_duplicates=False,
271
+ n_offsprings=effective_pop_size,
272
+ differentiable=differentiable,
273
+ adaptive=adaptive,
274
+ dtype=dtype,
275
+ )
276
+
277
+ # Initialize restart state
278
+ self.restart_state = RestartState()
279
+
280
+ # =========================================================================
281
+ # Setup
282
+ # =========================================================================
283
+
284
+ def _setup(self) -> None:
285
+ """CMA-ES specific setup after initialization."""
286
+ n_var = self.problem.n_var
287
+
288
+ # Compute default population size if not provided
289
+ if self._requested_pop_size is None:
290
+ self.pop_size = 4 + int(3 * math.log(n_var))
291
+ self.n_offsprings = self.pop_size
292
+
293
+ # Initialize restart state
294
+ self.restart_state.initial_pop_size = self.pop_size
295
+ self.restart_state.current_pop_size = self.pop_size
296
+
297
+ # Number of parents for recombination
298
+ self._mu = self.pop_size // 2
299
+
300
+ # Compute recombination weights
301
+ self._setup_weights()
302
+
303
+ # Setup mean
304
+ self._setup_mean(n_var)
305
+
306
+ # Setup step-size (sigma)
307
+ self._setup_sigma()
308
+
309
+ # Setup covariance matrix (via Cholesky factor)
310
+ self._setup_covariance(n_var)
311
+
312
+ # Setup evolution paths
313
+ self._setup_evolution_paths(n_var)
314
+
315
+ # Setup adaptation coefficients
316
+ self._setup_coefficients(n_var)
317
+
318
+ # Expected norm of N(0,I)
319
+ self._chi_n = _expected_norm(n_var)
320
+
321
+ # Small epsilon for numerical stability
322
+ self._eps = 1e-14 * (n_var ** 2)
323
+ self._eps = min(self._eps, 1e-4)
324
+
325
+ # History for restart detection
326
+ self._fitness_history: List[float] = []
327
+ self._generation_count = 0
328
+
329
+ def _setup_weights(self) -> None:
330
+ """Setup recombination weights."""
331
+ mu = self._mu
332
+
333
+ # Log weights: w_i = log(mu + 0.5) - log(i)
334
+ raw_weights = torch.log(
335
+ torch.tensor(mu + 0.5, device=self.device, dtype=self.dtype)
336
+ ) - torch.log(
337
+ torch.arange(1, mu + 1, device=self.device, dtype=self.dtype)
338
+ )
339
+
340
+ # Normalize
341
+ weights = raw_weights / raw_weights.sum()
342
+ self.register_buffer("_weights", weights)
343
+
344
+ # Variance effective selection mass
345
+ self._mu_eff = float(1.0 / (weights ** 2).sum())
346
+
347
+ def _setup_mean(self, n_var: int, restart: bool = False) -> None:
348
+ """Setup distribution mean."""
349
+ if restart and self._restart_from_best and self.restart_state.best_ever_x is not None:
350
+ # Restart from best-ever solution
351
+ mean = self.restart_state.best_ever_x.clone()
352
+ elif self._init_x0 is not None and not restart:
353
+ mean = self._init_x0.to(device=self.device, dtype=self.dtype)
354
+ else:
355
+ # Random point within bounds or center
356
+ if restart:
357
+ # Random initialization for restart
358
+ mean = self.xl + torch.rand(n_var, device=self.device, dtype=self.dtype) * (self.xu - self.xl)
359
+ else:
360
+ # Center of bounds
361
+ mean = 0.5 * (self.xl + self.xu)
362
+
363
+ # Mean is always a parameter (for differentiable mode)
364
+ # But gradients only flow when differentiable=True
365
+ if self.differentiable:
366
+ if hasattr(self, '_mean') and isinstance(self._mean, nn.Parameter):
367
+ with torch.no_grad():
368
+ self._mean.copy_(mean)
369
+ else:
370
+ self._mean = nn.Parameter(mean.clone())
371
+ else:
372
+ if hasattr(self, '_mean'):
373
+ self._mean.copy_(mean)
374
+ else:
375
+ self.register_buffer("_mean", mean.clone())
376
+
377
+ def _setup_sigma(self, restart: bool = False) -> None:
378
+ """Setup step-size."""
379
+ sigma_val = self._init_sigma
380
+
381
+ if self.adaptive:
382
+ if hasattr(self, '_log_sigma') and isinstance(self._log_sigma, nn.Parameter):
383
+ with torch.no_grad():
384
+ self._log_sigma.fill_(math.log(sigma_val))
385
+ else:
386
+ self._log_sigma = nn.Parameter(
387
+ torch.tensor(sigma_val, device=self.device, dtype=self.dtype).log()
388
+ )
389
+ else:
390
+ if hasattr(self, '_sigma_buffer'):
391
+ self._sigma_buffer.fill_(sigma_val)
392
+ else:
393
+ self.register_buffer(
394
+ "_sigma_buffer",
395
+ torch.tensor(sigma_val, device=self.device, dtype=self.dtype)
396
+ )
397
+
398
+ def _setup_covariance(self, n_var: int, restart: bool = False) -> None:
399
+ """Setup covariance matrix via Cholesky factor."""
400
+ # Initialize as identity (C = I, L = I)
401
+ L_init = torch.eye(n_var, device=self.device, dtype=self.dtype)
402
+
403
+ if self.adaptive:
404
+ if hasattr(self, '_L') and isinstance(self._L, nn.Parameter):
405
+ with torch.no_grad():
406
+ self._L.copy_(L_init)
407
+ else:
408
+ self._L = nn.Parameter(L_init)
409
+ else:
410
+ if hasattr(self, '_L'):
411
+ self._L.copy_(L_init)
412
+ else:
413
+ self.register_buffer("_L", L_init)
414
+
415
+ def _setup_evolution_paths(self, n_var: int, restart: bool = False) -> None:
416
+ """Setup evolution paths."""
417
+ zeros = torch.zeros(n_var, device=self.device, dtype=self.dtype)
418
+
419
+ if hasattr(self, '_p_sigma'):
420
+ self._p_sigma.copy_(zeros)
421
+ else:
422
+ self.register_buffer("_p_sigma", zeros.clone())
423
+
424
+ if hasattr(self, '_p_c'):
425
+ self._p_c.copy_(zeros)
426
+ else:
427
+ self.register_buffer("_p_c", zeros.clone())
428
+
429
+ def _setup_coefficients(self, n_var: int) -> None:
430
+ """Setup adaptation coefficients."""
431
+ mu_eff = self._mu_eff
432
+ d = float(n_var)
433
+
434
+ # Default values following Hansen's recommendations
435
+ cc_default = (4 + mu_eff / d) / (d + 4 + 2 * mu_eff / d)
436
+ cs_default = (mu_eff + 2) / (d + mu_eff + 5)
437
+ c1_default = 2 / ((d + 1.3) ** 2 + mu_eff)
438
+ cmu_default = min(
439
+ 1 - c1_default,
440
+ 2 * (mu_eff - 2 + 1 / mu_eff) / ((d + 2) ** 2 + mu_eff)
441
+ )
442
+ damps_default = 1 + 2 * max(0, math.sqrt((mu_eff - 1) / (d + 1)) - 1) + cs_default
443
+
444
+ # Use provided values or defaults
445
+ cc = self._init_cc if self._init_cc is not None else cc_default
446
+ cs = self._init_cs if self._init_cs is not None else cs_default
447
+ c1 = self._init_c1 if self._init_c1 is not None else c1_default
448
+ cmu = self._init_cmu if self._init_cmu is not None else cmu_default
449
+ damps = self._init_damps if self._init_damps is not None else damps_default
450
+
451
+ if self.adaptive:
452
+ # Store as logits for bounded optimization
453
+ if not hasattr(self, '_cc_logit'):
454
+ self._cc_logit = nn.Parameter(
455
+ self._to_logit(cc).to(device=self.device, dtype=self.dtype)
456
+ )
457
+ self._cs_logit = nn.Parameter(
458
+ self._to_logit(cs).to(device=self.device, dtype=self.dtype)
459
+ )
460
+ self._c1_logit = nn.Parameter(
461
+ self._to_logit(c1).to(device=self.device, dtype=self.dtype)
462
+ )
463
+ self._cmu_logit = nn.Parameter(
464
+ self._to_logit(cmu).to(device=self.device, dtype=self.dtype)
465
+ )
466
+ # damps is positive, store as log
467
+ self._log_damps = nn.Parameter(
468
+ torch.tensor(damps, device=self.device, dtype=self.dtype).log()
469
+ )
470
+ else:
471
+ if not hasattr(self, '_cc'):
472
+ self.register_buffer("_cc", torch.tensor(cc, device=self.device, dtype=self.dtype))
473
+ self.register_buffer("_cs", torch.tensor(cs, device=self.device, dtype=self.dtype))
474
+ self.register_buffer("_c1", torch.tensor(c1, device=self.device, dtype=self.dtype))
475
+ self.register_buffer("_cmu", torch.tensor(cmu, device=self.device, dtype=self.dtype))
476
+ self.register_buffer("_damps", torch.tensor(damps, device=self.device, dtype=self.dtype))
477
+
478
+ @staticmethod
479
+ def _to_logit(p: float, eps: float = 1e-7) -> Tensor:
480
+ """Convert probability/rate to logit."""
481
+ p = max(min(p, 1 - eps), eps)
482
+ return torch.tensor(math.log(p / (1 - p)))
483
+
484
+ # =========================================================================
485
+ # Properties
486
+ # =========================================================================
487
+
488
+ @property
489
+ def population(self) -> Tensor:
490
+ """Current population (sampled points)."""
491
+ return self._population
492
+
493
+ @property
494
+ def fitness(self) -> Tensor:
495
+ """Current fitness values."""
496
+ return self.state.fitness
497
+
498
+ @property
499
+ def mean(self) -> Tensor:
500
+ """Current distribution mean."""
501
+ return self._mean
502
+
503
+ @property
504
+ def sigma(self) -> Tensor:
505
+ """Current step-size."""
506
+ if self.adaptive:
507
+ return self._log_sigma.exp()
508
+ return self._sigma_buffer
509
+
510
+ @property
511
+ def L(self) -> Tensor:
512
+ """Cholesky factor of covariance matrix."""
513
+ return torch.tril(self._L)
514
+
515
+ @property
516
+ def C(self) -> Tensor:
517
+ """Covariance matrix C = L @ L.T."""
518
+ L = self.L
519
+ return L @ L.T
520
+
521
+ @property
522
+ def p_sigma(self) -> Tensor:
523
+ """Evolution path for step-size control."""
524
+ return self._p_sigma
525
+
526
+ @property
527
+ def p_c(self) -> Tensor:
528
+ """Evolution path for covariance update."""
529
+ return self._p_c
530
+
531
+ @property
532
+ def cc(self) -> Tensor:
533
+ """Cumulation constant for rank-one update."""
534
+ if self.adaptive:
535
+ return torch.sigmoid(self._cc_logit)
536
+ return self._cc
537
+
538
+ @property
539
+ def cs(self) -> Tensor:
540
+ """Cumulation constant for step-size control."""
541
+ if self.adaptive:
542
+ return torch.sigmoid(self._cs_logit)
543
+ return self._cs
544
+
545
+ @property
546
+ def c1(self) -> Tensor:
547
+ """Learning rate for rank-one update."""
548
+ if self.adaptive:
549
+ return torch.sigmoid(self._c1_logit)
550
+ return self._c1
551
+
552
+ @property
553
+ def cmu(self) -> Tensor:
554
+ """Learning rate for rank-mu update."""
555
+ if self.adaptive:
556
+ return torch.sigmoid(self._cmu_logit)
557
+ return self._cmu
558
+
559
+ @property
560
+ def damps(self) -> Tensor:
561
+ """Damping for step-size update."""
562
+ if self.adaptive:
563
+ return self._log_damps.exp()
564
+ return self._damps
565
+
566
+ @property
567
+ def n_restarts_remaining(self) -> int:
568
+ """Number of restarts remaining."""
569
+ return max(0, self._restarts - self.restart_state.n_restarts)
570
+
571
+ # =========================================================================
572
+ # Core CMA-ES Methods
573
+ # =========================================================================
574
+
575
+ def _sample(self) -> tuple:
576
+ """
577
+ Sample offspring from N(μ, σ²C) using reparameterization.
578
+
579
+ x = μ + σ * L @ z, where z ~ N(0, I)
580
+
581
+ Returns:
582
+ Tuple of (offspring, z_vectors) where z are the standard normal samples.
583
+ """
584
+ N, D = self.pop_size, self.n_var
585
+
586
+ # Sample standard normal
587
+ z = torch.randn(N, D, device=self.device, dtype=self.dtype)
588
+
589
+ # Transform: y = L @ z
590
+ L = self.L
591
+ y = (L @ z.T).T # [N, D]
592
+
593
+ # Scale and shift: x = μ + σ * y
594
+ offspring = self.mean + self.sigma * y
595
+
596
+ return offspring, z, y
597
+
598
+ def _infill(self) -> Tensor:
599
+ """
600
+ Generate offspring through sampling.
601
+
602
+ Returns:
603
+ Offspring population [pop_size, n_var].
604
+ """
605
+ # Sample from distribution
606
+ offspring, z, y = self._sample()
607
+
608
+ # Store for later use in adaptation
609
+ self._pending_z = z
610
+ self._pending_y = y
611
+
612
+ # Repair bounds
613
+ if self.repair is not None:
614
+ offspring = self.repair(offspring, self.xl, self.xu)
615
+ else:
616
+ offspring = torch.clamp(offspring, self.xl, self.xu)
617
+
618
+ return offspring
619
+
620
+ def _advance(self, offspring: Tensor, offspring_fitness: Tensor) -> None:
621
+ """
622
+ Update CMA-ES state based on offspring evaluation.
623
+
624
+ Args:
625
+ offspring: Offspring population [pop_size, n_var].
626
+ offspring_fitness: Fitness values [pop_size].
627
+ """
628
+ N, D = self.pop_size, self.n_var
629
+ mu = self._mu
630
+
631
+ # Sort by fitness (ascending for minimization)
632
+ sorted_indices = torch.argsort(offspring_fitness)
633
+ selected_indices = sorted_indices[:mu]
634
+
635
+ # Get selected y vectors (in transformed space)
636
+ y_selected = self._pending_y[selected_indices] # [mu, D]
637
+
638
+ # Weighted recombination in y-space
639
+ y_w = (self._weights.unsqueeze(-1) * y_selected).sum(dim=0) # [D]
640
+
641
+ # Update mean
642
+ new_mean = self.mean + self.sigma * y_w
643
+
644
+ # Update evolution paths
645
+ new_p_sigma, new_p_c, h_sigma = self._update_evolution_paths(y_w)
646
+
647
+ # Update covariance
648
+ new_L = self._update_covariance(y_selected, new_p_c, h_sigma)
649
+
650
+ # Update step-size
651
+ new_sigma = self._update_sigma(new_p_sigma)
652
+
653
+ # Commit updates
654
+ self._commit_updates(
655
+ new_mean=new_mean,
656
+ new_sigma=new_sigma,
657
+ new_L=new_L,
658
+ new_p_sigma=new_p_sigma,
659
+ new_p_c=new_p_c,
660
+ offspring=offspring,
661
+ offspring_fitness=offspring_fitness,
662
+ )
663
+
664
+ # Update generation count
665
+ self._generation_count += 1
666
+
667
+ # Track fitness history for restart detection
668
+ best_fitness = float(offspring_fitness.min())
669
+ self._fitness_history.append(best_fitness)
670
+
671
+ # Update best-ever solution
672
+ if best_fitness < self.restart_state.best_ever_f:
673
+ best_idx = offspring_fitness.argmin()
674
+ self.restart_state.best_ever_f = best_fitness
675
+ self.restart_state.best_ever_x = offspring[best_idx].clone().detach()
676
+
677
+ # Check for restart (only if not in differentiable mode)
678
+ if self._restarts > 0 and not self.differentiable:
679
+ self._check_and_perform_restart()
680
+
681
+ # Cleanup
682
+ del self._pending_z
683
+ del self._pending_y
684
+
685
+ def _update_evolution_paths(self, y_w: Tensor) -> tuple:
686
+ """
687
+ Update evolution paths p_σ and p_c.
688
+
689
+ Args:
690
+ y_w: Weighted mean of selected y vectors [n_var].
691
+
692
+ Returns:
693
+ Tuple of (new_p_sigma, new_p_c, h_sigma).
694
+ """
695
+ D = self.n_var
696
+ mu_eff = self._mu_eff
697
+
698
+ # Compute C^(-1/2) @ y_w using L^(-1) @ y_w
699
+ L = self.L
700
+ # Solve L @ z_w = y_w for z_w (equivalent to L^(-1) @ y_w)
701
+ z_w = torch.linalg.solve_triangular(L, y_w.unsqueeze(-1), upper=False).squeeze(-1)
702
+
703
+ # Update p_sigma (conjugate evolution path)
704
+ cs = self.cs
705
+ new_p_sigma = (1 - cs) * self.p_sigma + math.sqrt(cs * (2 - cs) * mu_eff) * z_w
706
+
707
+ # Heaviside function h_sigma (smooth approximation)
708
+ norm_p_sigma = new_p_sigma.norm()
709
+ threshold = 1.4 + 2.0 / (D + 1)
710
+ h_sigma = torch.sigmoid(10 * (threshold - norm_p_sigma / self._chi_n))
711
+
712
+ # Update p_c (evolution path for covariance)
713
+ cc = self.cc
714
+ new_p_c = (1 - cc) * self.p_c + h_sigma * math.sqrt(cc * (2 - cc) * mu_eff) * y_w
715
+
716
+ return new_p_sigma, new_p_c, h_sigma
717
+
718
+ def _update_covariance(
719
+ self,
720
+ y_selected: Tensor,
721
+ new_p_c: Tensor,
722
+ h_sigma: Tensor,
723
+ ) -> Tensor:
724
+ """
725
+ Update covariance matrix C.
726
+
727
+ Args:
728
+ y_selected: Selected y vectors [mu, n_var].
729
+ new_p_c: New evolution path [n_var].
730
+ h_sigma: Heaviside indicator.
731
+
732
+ Returns:
733
+ New Cholesky factor L.
734
+ """
735
+ D = self.n_var
736
+ c1 = self.c1
737
+ cmu = self.cmu
738
+ cc = self.cc
739
+
740
+ # Current covariance
741
+ L = self.L
742
+ C = L @ L.T
743
+
744
+ # Rank-one update
745
+ rank_one = torch.outer(new_p_c, new_p_c)
746
+
747
+ # Rank-mu update
748
+ rank_mu = (
749
+ self._weights.unsqueeze(-1).unsqueeze(-1) *
750
+ y_selected.unsqueeze(-1) * y_selected.unsqueeze(-2)
751
+ ).sum(dim=0)
752
+
753
+ # Old C decay correction for h_sigma < 1
754
+ c1_correction = c1 * (1 - h_sigma ** 2) * cc * (2 - cc)
755
+
756
+ # New covariance
757
+ C_new = (
758
+ (1 - c1 - cmu + c1_correction) * C +
759
+ c1 * rank_one +
760
+ cmu * rank_mu
761
+ )
762
+
763
+ # Ensure symmetry
764
+ C_new = 0.5 * (C_new + C_new.T)
765
+
766
+ # Add small diagonal for numerical stability
767
+ C_new = C_new + self._eps * torch.eye(D, device=self.device, dtype=self.dtype)
768
+
769
+ # Compute new Cholesky factor with robust fallback
770
+ L_new = self._safe_cholesky(C_new)
771
+
772
+ return L_new
773
+
774
+ def _safe_cholesky(self, C: Tensor) -> Tensor:
775
+ """
776
+ Compute Cholesky decomposition with robust fallback.
777
+
778
+ If standard Cholesky fails, applies eigenvalue correction
779
+ and regularization to ensure positive definiteness.
780
+
781
+ **Numerical risk in differentiable mode:** When the Cholesky factor
782
+ ``L`` is updated via gradient descent (``adaptive=True`` or
783
+ ``differentiable=True``), the optimizer step may push the
784
+ reconstructed covariance matrix ``C = L @ L.T`` towards a
785
+ non-positive-definite region. The cascading fallbacks below handle
786
+ this gracefully, but gradient-based updates can still introduce
787
+ noise through the eigendecomposition path. For an alternative that
788
+ guarantees PD by construction, consider a log-Cholesky
789
+ parameterisation (``L_diag = exp(l_diag)``). The current approach
790
+ is retained for simplicity and compatibility with the classical
791
+ CMA-ES update path.
792
+
793
+ Args:
794
+ C: Covariance matrix [n_var, n_var].
795
+
796
+ Returns:
797
+ Lower triangular Cholesky factor L where C ≈ L @ L.T.
798
+ """
799
+ D = C.shape[0]
800
+
801
+ # Attempt 1: Direct Cholesky
802
+ try:
803
+ return torch.linalg.cholesky(C)
804
+ except RuntimeError:
805
+ pass
806
+
807
+ # Attempt 2: Eigendecomposition with correction
808
+ try:
809
+ # Use eigh for symmetric matrices (more stable than eig)
810
+ eigval, eigvec = torch.linalg.eigh(C)
811
+
812
+ # Clamp eigenvalues to be positive
813
+ min_eigval = max(1e-10, float(eigval.max()) * 1e-12)
814
+ eigval_fixed = torch.clamp(eigval, min=min_eigval)
815
+
816
+ # Reconstruct covariance
817
+ C_fixed = eigvec @ torch.diag(eigval_fixed) @ eigvec.T
818
+
819
+ # Force symmetry (numerical errors can break it)
820
+ C_fixed = 0.5 * (C_fixed + C_fixed.T)
821
+
822
+ # Add small diagonal regularization
823
+ reg = 1e-8 * eigval_fixed.max() * torch.eye(D, device=C.device, dtype=C.dtype)
824
+ C_fixed = C_fixed + reg
825
+
826
+ return torch.linalg.cholesky(C_fixed)
827
+ except RuntimeError:
828
+ pass
829
+
830
+ # Attempt 3: More aggressive regularization
831
+ try:
832
+ eigval, eigvec = torch.linalg.eigh(C)
833
+ eigval_fixed = torch.clamp(eigval, min=1e-6)
834
+ C_fixed = eigvec @ torch.diag(eigval_fixed) @ eigvec.T
835
+ C_fixed = 0.5 * (C_fixed + C_fixed.T)
836
+
837
+ # Stronger regularization
838
+ reg = 1e-4 * torch.eye(D, device=C.device, dtype=C.dtype)
839
+ C_fixed = C_fixed + reg
840
+
841
+ return torch.linalg.cholesky(C_fixed)
842
+ except RuntimeError:
843
+ pass
844
+
845
+ # Attempt 4: Last resort - reset to scaled identity
846
+ # Preserve the trace (total variance) from original matrix
847
+ trace = torch.trace(C).clamp(min=1e-6)
848
+ scale = torch.sqrt(trace / D)
849
+ L_identity = scale * torch.eye(D, device=C.device, dtype=C.dtype)
850
+
851
+ return L_identity
852
+
853
+ def _update_sigma(self, new_p_sigma: Tensor) -> Tensor:
854
+ """
855
+ Update step-size using CSA (Cumulative Step-size Adaptation).
856
+
857
+ Args:
858
+ new_p_sigma: New evolution path for step-size.
859
+
860
+ Returns:
861
+ New step-size.
862
+ """
863
+ cs = self.cs
864
+ damps = self.damps
865
+
866
+ # Step-size update factor
867
+ norm_p_sigma = new_p_sigma.norm()
868
+ factor = torch.exp((cs / damps) * (norm_p_sigma / self._chi_n - 1))
869
+
870
+ new_sigma = self.sigma * factor
871
+
872
+ return new_sigma
873
+
874
+ def _commit_updates(
875
+ self,
876
+ new_mean: Tensor,
877
+ new_sigma: Tensor,
878
+ new_L: Tensor,
879
+ new_p_sigma: Tensor,
880
+ new_p_c: Tensor,
881
+ offspring: Tensor,
882
+ offspring_fitness: Tensor,
883
+ ) -> None:
884
+ """Commit all updates to state."""
885
+ with torch.no_grad():
886
+ # Update mean
887
+ if isinstance(self._mean, nn.Parameter):
888
+ self._mean.copy_(new_mean)
889
+ else:
890
+ self._mean.copy_(new_mean)
891
+
892
+ # Update sigma
893
+ if self.adaptive:
894
+ self._log_sigma.copy_(new_sigma.log())
895
+ else:
896
+ self._sigma_buffer.copy_(new_sigma)
897
+
898
+ # Update Cholesky factor
899
+ if isinstance(self._L, nn.Parameter):
900
+ self._L.copy_(new_L)
901
+ else:
902
+ self._L.copy_(new_L)
903
+
904
+ # Update evolution paths
905
+ self._p_sigma.copy_(new_p_sigma)
906
+ self._p_c.copy_(new_p_c)
907
+
908
+ # Update population
909
+ self._population.copy_(offspring)
910
+
911
+ # Update fitness
912
+ self.state.fitness = offspring_fitness
913
+ self.state.population = self._population
914
+
915
+ # Update best solution
916
+ self.state.update_best(offspring, offspring_fitness)
917
+
918
+ # =========================================================================
919
+ # Restart Methods (IPOP/BIPOP)
920
+ # =========================================================================
921
+
922
+ def _should_restart(self) -> bool:
923
+ """
924
+ Check if restart conditions are met.
925
+
926
+ Returns:
927
+ True if algorithm should restart.
928
+ """
929
+ # Need enough history
930
+ if len(self._fitness_history) < 10:
931
+ return False
932
+
933
+ # Check tolerance on function values (stagnation)
934
+ recent = self._fitness_history[-10:]
935
+ if max(recent) - min(recent) < self._tolfun:
936
+ return True
937
+
938
+ # Check tolerance on sigma (step-size too small)
939
+ sigma_val = float(self.sigma)
940
+ if sigma_val < self._tolx:
941
+ return True
942
+
943
+ # Check condition number of C (degenerate distribution)
944
+ try:
945
+ L = self.L
946
+ C = L @ L.T
947
+ eigvals = torch.linalg.eigvalsh(C)
948
+ cond = eigvals.max() / eigvals.min().clamp(min=1e-30)
949
+ if cond > 1e14:
950
+ return True
951
+ except RuntimeError:
952
+ return True
953
+
954
+ return False
955
+
956
+ def _check_and_perform_restart(self) -> None:
957
+ """Check restart conditions and perform restart if needed."""
958
+ if not self._should_restart():
959
+ return
960
+
961
+ # Check if we have restarts remaining
962
+ if self.restart_state.n_restarts >= self._restarts:
963
+ return
964
+
965
+ # Record this run's result
966
+ if self._fitness_history:
967
+ self.restart_state.run_history.append(min(self._fitness_history))
968
+
969
+ # Determine new population size and regime
970
+ if self._bipop:
971
+ self._bipop_restart()
972
+ else:
973
+ self._ipop_restart()
974
+
975
+ def _ipop_restart(self) -> None:
976
+ """Perform IPOP restart (increasing population)."""
977
+ n_var = self.n_var
978
+
979
+ # Increase population size
980
+ new_pop_size = self.restart_state.current_pop_size * self._incpopsize
981
+ self.restart_state.current_pop_size = new_pop_size
982
+ self.restart_state.n_restarts += 1
983
+
984
+ # Update population size
985
+ self.pop_size = new_pop_size
986
+ self.n_offsprings = new_pop_size
987
+ self._mu = new_pop_size // 2
988
+
989
+ # Re-setup weights for new population size
990
+ self._setup_weights()
991
+
992
+ # Reset CMA-ES state
993
+ self._setup_mean(n_var, restart=True)
994
+ self._setup_sigma(restart=True)
995
+ self._setup_covariance(n_var, restart=True)
996
+ self._setup_evolution_paths(n_var, restart=True)
997
+
998
+ # Re-initialize population buffer + state tensors
999
+ new_pop = torch.zeros(self.pop_size, n_var, device=self.device, dtype=self.dtype)
1000
+ if hasattr(self, "_population") and self._population.shape == new_pop.shape:
1001
+ self._population.copy_(new_pop)
1002
+ else:
1003
+ self._population = new_pop
1004
+
1005
+ self.state.population = self._population
1006
+ self.state.fitness = torch.full(
1007
+ (self.pop_size,), float("inf"), device=self.device, dtype=self.dtype
1008
+ )
1009
+
1010
+ # Clear fitness history
1011
+ self._fitness_history = []
1012
+ self._generation_count = 0
1013
+
1014
+ def _bipop_restart(self) -> None:
1015
+ """
1016
+ Perform BIPOP restart (alternating small/large populations).
1017
+
1018
+ BIPOP alternates between:
1019
+ - Large population regime: progressively increasing (like IPOP)
1020
+ - Small population regime: small focused search
1021
+
1022
+ The regime is chosen based on which has used fewer evaluations.
1023
+ """
1024
+ n_var = self.n_var
1025
+
1026
+ # Update evaluation counts for current regime
1027
+ evals_this_run = self._generation_count * self.pop_size
1028
+ if self.restart_state.regime == RestartRegime.LARGE:
1029
+ self.restart_state.large_evals += evals_this_run
1030
+ else:
1031
+ self.restart_state.small_evals += evals_this_run
1032
+
1033
+ # Decide next regime based on evaluation budget balance
1034
+ if self.restart_state.small_evals <= self.restart_state.large_evals:
1035
+ # Do small-population restart
1036
+ self.restart_state.regime = RestartRegime.SMALL
1037
+ self.restart_state.small_n_restarts += 1
1038
+
1039
+ # Small population: use default size with some randomization
1040
+ # Population size uniform in [2, default_size]
1041
+ default_size = 4 + int(3 * math.log(n_var))
1042
+ new_pop_size = max(2, int(torch.rand(1).item() * default_size))
1043
+
1044
+ # Sigma for small regime: smaller initial step-size for focused search
1045
+ small_sigma = self._init_sigma * (0.01 + 0.49 * torch.rand(1).item())
1046
+ if self.adaptive:
1047
+ with torch.no_grad():
1048
+ self._log_sigma.fill_(math.log(small_sigma))
1049
+ else:
1050
+ self._sigma_buffer.fill_(small_sigma)
1051
+ else:
1052
+ # Do large-population restart (IPOP-style)
1053
+ self.restart_state.regime = RestartRegime.LARGE
1054
+ self.restart_state.n_restarts += 1
1055
+
1056
+ # Increase population size
1057
+ new_pop_size = self.restart_state.current_pop_size * self._incpopsize
1058
+ self.restart_state.current_pop_size = new_pop_size
1059
+
1060
+ # Update population size
1061
+ self.pop_size = new_pop_size
1062
+ self.n_offsprings = new_pop_size
1063
+ self._mu = new_pop_size // 2
1064
+
1065
+ # Re-setup weights for new population size
1066
+ self._setup_weights()
1067
+
1068
+ # Reset CMA-ES state
1069
+ self._setup_mean(n_var, restart=True)
1070
+ if self.restart_state.regime == RestartRegime.LARGE:
1071
+ self._setup_sigma(restart=True)
1072
+ self._setup_covariance(n_var, restart=True)
1073
+ self._setup_evolution_paths(n_var, restart=True)
1074
+
1075
+ # Re-initialize population buffer + state tensors
1076
+ new_pop = torch.zeros(self.pop_size, n_var, device=self.device, dtype=self.dtype)
1077
+ if hasattr(self, "_population") and self._population.shape == new_pop.shape:
1078
+ self._population.copy_(new_pop)
1079
+ else:
1080
+ self._population = new_pop
1081
+
1082
+ self.state.population = self._population
1083
+ self.state.fitness = torch.full(
1084
+ (self.pop_size,), float("inf"), device=self.device, dtype=self.dtype
1085
+ )
1086
+
1087
+ # Clear fitness history
1088
+ self._fitness_history = []
1089
+ self._generation_count = 0
1090
+
1091
+ # =========================================================================
1092
+ # Hyperparameter Management
1093
+ # =========================================================================
1094
+
1095
+ @torch.no_grad()
1096
+ def _clamp_hyperparams(self) -> None:
1097
+ """Clamp learnable hyperparameters to valid ranges."""
1098
+ if self.adaptive:
1099
+ # Sigma in (1e-10, 1e10)
1100
+ self._log_sigma.clamp_(min=-23, max=23)
1101
+
1102
+ # Ensure c1 + cmu <= 1
1103
+ c1 = torch.sigmoid(self._c1_logit)
1104
+ cmu = torch.sigmoid(self._cmu_logit)
1105
+ total = c1 + cmu
1106
+ if total > 0.99:
1107
+ scale = 0.99 / total
1108
+ self._c1_logit.fill_(self._to_logit(float(c1 * scale)))
1109
+ self._cmu_logit.fill_(self._to_logit(float(cmu * scale)))
1110
+
1111
+ def update_state(self) -> None:
1112
+ """Commit pending changes and clamp hyperparameters."""
1113
+ super().update_state()
1114
+ self._clamp_hyperparams()
1115
+
1116
+ def _get_hyperparams(self) -> Dict[str, Any]:
1117
+ """Return current hyperparameter values."""
1118
+ return {
1119
+ 'pop_size': self.pop_size,
1120
+ 'mu': self._mu,
1121
+ 'mu_eff': self._mu_eff,
1122
+ 'sigma': float(self.sigma.item()),
1123
+ 'cc': float(self.cc.item()),
1124
+ 'cs': float(self.cs.item()),
1125
+ 'c1': float(self.c1.item()),
1126
+ 'cmu': float(self.cmu.item()),
1127
+ 'damps': float(self.damps.item()),
1128
+ 'adaptive': self.adaptive,
1129
+ 'differentiable': self.differentiable,
1130
+ 'restarts': self._restarts,
1131
+ 'restart_from_best': self._restart_from_best,
1132
+ 'incpopsize': self._incpopsize,
1133
+ 'bipop': self._bipop,
1134
+ 'n_restarts_done': self.restart_state.n_restarts,
1135
+ 'current_pop_size': self.restart_state.current_pop_size,
1136
+ 'best_ever_f': self.restart_state.best_ever_f,
1137
+ }
1138
+
1139
+ # =========================================================================
1140
+ # String Representation
1141
+ # =========================================================================
1142
+
1143
+ def __repr__(self) -> str:
1144
+ parts = [
1145
+ f"CMAES(pop_size={self.pop_size}",
1146
+ f"sigma={float(self._init_sigma):.4f}",
1147
+ ]
1148
+ if self._restarts > 0:
1149
+ if self._bipop:
1150
+ parts.append(f"bipop=True")
1151
+ else:
1152
+ parts.append(f"restarts={self._restarts}")
1153
+ parts.append(f"incpopsize={self._incpopsize}")
1154
+ parts.append(f"adaptive={self.adaptive}")
1155
+ parts.append(f"differentiable={self.differentiable})")
1156
+ return ", ".join(parts)
1157
+
1158
+
1159
+ # =============================================================================
1160
+ # Convenience Factory Functions
1161
+ # =============================================================================
1162
+
1163
+ def cmaes_default(
1164
+ pop_size: Optional[int] = None,
1165
+ sigma: float = 0.5,
1166
+ adaptive: bool = False,
1167
+ differentiable: bool = False,
1168
+ **kwargs,
1169
+ ) -> CMAES:
1170
+ """
1171
+ Create CMA-ES with default settings.
1172
+
1173
+ Population size defaults to 4 + floor(3*ln(n)) where n is
1174
+ the number of variables.
1175
+
1176
+ Args:
1177
+ pop_size: Population size. If None, computed from n_var.
1178
+ sigma: Initial step-size.
1179
+ adaptive: If True, adaptation coefficients are learnable.
1180
+ differentiable: If True, mean is learnable.
1181
+ **kwargs: Additional arguments passed to CMAES.
1182
+
1183
+ Returns:
1184
+ Configured CMAES instance.
1185
+ """
1186
+ return CMAES(
1187
+ pop_size=pop_size,
1188
+ sigma=sigma,
1189
+ adaptive=adaptive,
1190
+ differentiable=differentiable,
1191
+ **kwargs,
1192
+ )
1193
+
1194
+
1195
+ def cmaes_small(
1196
+ sigma: float = 0.3,
1197
+ adaptive: bool = False,
1198
+ differentiable: bool = False,
1199
+ **kwargs,
1200
+ ) -> CMAES:
1201
+ """
1202
+ Create CMA-ES with small population for fast convergence.
1203
+
1204
+ Uses minimum recommended population size.
1205
+
1206
+ Args:
1207
+ sigma: Initial step-size.
1208
+ adaptive: If True, adaptation coefficients are learnable.
1209
+ differentiable: If True, mean is learnable.
1210
+ **kwargs: Additional arguments passed to CMAES.
1211
+
1212
+ Returns:
1213
+ Configured CMAES instance.
1214
+ """
1215
+ # pop_size will be computed as 4 + 3*ln(n) in _setup
1216
+ return CMAES(
1217
+ pop_size=None,
1218
+ sigma=sigma,
1219
+ adaptive=adaptive,
1220
+ differentiable=differentiable,
1221
+ **kwargs,
1222
+ )
1223
+
1224
+
1225
+ def cmaes_large(
1226
+ pop_size_factor: float = 2.0,
1227
+ sigma: float = 0.5,
1228
+ adaptive: bool = False,
1229
+ differentiable: bool = False,
1230
+ **kwargs,
1231
+ ) -> CMAES:
1232
+ """
1233
+ Create CMA-ES with larger population for more robust search.
1234
+
1235
+ Multiplies the default population size by a factor.
1236
+
1237
+ Note: pop_size is computed after initialization when n_var is known.
1238
+ For now, this creates a CMAES that will use a larger population.
1239
+
1240
+ Args:
1241
+ pop_size_factor: Multiplier for default population size.
1242
+ sigma: Initial step-size.
1243
+ adaptive: If True, adaptation coefficients are learnable.
1244
+ differentiable: If True, mean is learnable.
1245
+ **kwargs: Additional arguments passed to CMAES.
1246
+
1247
+ Returns:
1248
+ Configured CMAES instance.
1249
+ """
1250
+ # Store factor for custom handling
1251
+ cmaes = CMAES(
1252
+ pop_size=None,
1253
+ sigma=sigma,
1254
+ adaptive=adaptive,
1255
+ differentiable=differentiable,
1256
+ **kwargs,
1257
+ )
1258
+ cmaes._pop_size_factor = pop_size_factor
1259
+ return cmaes
1260
+
1261
+
1262
+ def cmaes_adaptive(
1263
+ pop_size: Optional[int] = None,
1264
+ sigma: float = 0.5,
1265
+ differentiable: bool = False,
1266
+ **kwargs,
1267
+ ) -> CMAES:
1268
+ """
1269
+ Create CMA-ES with adaptive (learnable) hyperparameters.
1270
+
1271
+ The adaptation coefficients (cc, cs, c1, cmu, damps) are
1272
+ learned via backpropagation.
1273
+
1274
+ Args:
1275
+ pop_size: Population size. If None, computed from n_var.
1276
+ sigma: Initial step-size.
1277
+ differentiable: If True, mean is also learnable.
1278
+ **kwargs: Additional arguments passed to CMAES.
1279
+
1280
+ Returns:
1281
+ Configured CMAES instance with adaptive=True.
1282
+ """
1283
+ return CMAES(
1284
+ pop_size=pop_size,
1285
+ sigma=sigma,
1286
+ adaptive=True,
1287
+ differentiable=differentiable,
1288
+ **kwargs,
1289
+ )
1290
+
1291
+
1292
+ def cmaes_ipop(
1293
+ restarts: int = 9,
1294
+ incpopsize: int = 2,
1295
+ restart_from_best: bool = False,
1296
+ sigma: float = 0.5,
1297
+ **kwargs,
1298
+ ) -> CMAES:
1299
+ """
1300
+ Create IPOP-CMA-ES with increasing population restarts.
1301
+
1302
+ IPOP-CMA-ES restarts the algorithm with doubled population
1303
+ size after convergence, allowing escape from local optima.
1304
+
1305
+ Args:
1306
+ restarts: Number of restarts to perform. Default: 9.
1307
+ incpopsize: Population size multiplier after restart. Default: 2.
1308
+ restart_from_best: If True, restart from best solution found.
1309
+ If False, restart from random point. Default: False.
1310
+ sigma: Initial step-size.
1311
+ **kwargs: Additional arguments passed to CMAES.
1312
+
1313
+ Returns:
1314
+ Configured CMAES instance with IPOP restart strategy.
1315
+
1316
+ Reference:
1317
+ Auger, A. & Hansen, N. (2005). A Restart CMA Evolution Strategy
1318
+ With Increasing Population Size. CEC 2005.
1319
+ """
1320
+ return CMAES(
1321
+ sigma=sigma,
1322
+ restarts=restarts,
1323
+ restart_from_best=restart_from_best,
1324
+ incpopsize=incpopsize,
1325
+ bipop=False,
1326
+ **kwargs,
1327
+ )
1328
+
1329
+
1330
+ def cmaes_bipop(
1331
+ restarts: int = 9,
1332
+ incpopsize: int = 2,
1333
+ sigma: float = 0.5,
1334
+ **kwargs,
1335
+ ) -> CMAES:
1336
+ """
1337
+ Create BIPOP-CMA-ES with alternating population sizes.
1338
+
1339
+ BIPOP-CMA-ES alternates between:
1340
+ - Small populations: Focused search for exploiting local structure
1341
+ - Large populations: Broad search (IPOP-style) for exploration
1342
+
1343
+ This strategy performs well on both functions with many regularly
1344
+ or irregularly arranged local optima.
1345
+
1346
+ Args:
1347
+ restarts: Number of large-population restarts. Default: 9.
1348
+ incpopsize: Population size multiplier for large regime. Default: 2.
1349
+ sigma: Initial step-size.
1350
+ **kwargs: Additional arguments passed to CMAES.
1351
+
1352
+ Returns:
1353
+ Configured CMAES instance with BIPOP restart strategy.
1354
+
1355
+ Reference:
1356
+ Hansen, N. (2009). Benchmarking a BI-Population CMA-ES on the
1357
+ BBOB-2009 Function Testbed. GECCO Workshop.
1358
+ """
1359
+ return CMAES(
1360
+ sigma=sigma,
1361
+ restarts=restarts,
1362
+ incpopsize=incpopsize,
1363
+ bipop=True,
1364
+ **kwargs,
1365
+ )