evograd-diff 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. evograd/__init__.py +67 -0
  2. evograd/algorithms/__init__.py +138 -0
  3. evograd/algorithms/cmaes.py +1365 -0
  4. evograd/algorithms/de.py +895 -0
  5. evograd/algorithms/ga.py +532 -0
  6. evograd/algorithms/pso.py +648 -0
  7. evograd/algorithms/shade.py +1165 -0
  8. evograd/benchmarks/functions/__init__.py +229 -0
  9. evograd/benchmarks/functions/base.py +217 -0
  10. evograd/benchmarks/functions/cec2017/__init__.py +250 -0
  11. evograd/benchmarks/functions/cec2017/basic.py +413 -0
  12. evograd/benchmarks/functions/cec2017/composition.py +580 -0
  13. evograd/benchmarks/functions/cec2017/data.pkl +0 -0
  14. evograd/benchmarks/functions/cec2017/data.py +350 -0
  15. evograd/benchmarks/functions/cec2017/hybrid.py +406 -0
  16. evograd/benchmarks/functions/cec2017/simple.py +326 -0
  17. evograd/benchmarks/functions/classical.py +649 -0
  18. evograd/benchmarks/functions/smoothed_funnel.py +476 -0
  19. evograd/benchmarks/functions/transforms.py +463 -0
  20. evograd/benchmarks/run_benchmark_functions.py +1208 -0
  21. evograd/core/__init__.py +73 -0
  22. evograd/core/algorithm.py +778 -0
  23. evograd/core/maximize.py +269 -0
  24. evograd/core/minimize.py +740 -0
  25. evograd/core/problem.py +444 -0
  26. evograd/core/result.py +571 -0
  27. evograd/core/termination.py +602 -0
  28. evograd/operators/__init__.py +178 -0
  29. evograd/operators/crossover.py +1117 -0
  30. evograd/operators/mutation.py +1098 -0
  31. evograd/operators/relaxations.py +175 -0
  32. evograd/operators/repair.py +601 -0
  33. evograd/operators/sampling.py +577 -0
  34. evograd/operators/selection.py +981 -0
  35. evograd/operators/survival.py +1000 -0
  36. evograd/tests/__init__.py +11 -0
  37. evograd/tests/run_all.py +78 -0
  38. evograd/tests/test_core.py +528 -0
  39. evograd/tests/test_ga.py +572 -0
  40. evograd/tests/test_operators.py +662 -0
  41. evograd/tests/test_per_individual.py +326 -0
  42. evograd/tests/test_utils.py +328 -0
  43. evograd/utils/__init__.py +97 -0
  44. evograd/utils/callbacks.py +926 -0
  45. evograd/utils/device.py +502 -0
  46. evograd/utils/duplicates.py +421 -0
  47. evograd_diff-0.1.0.dist-info/METADATA +439 -0
  48. evograd_diff-0.1.0.dist-info/RECORD +50 -0
  49. evograd_diff-0.1.0.dist-info/WHEEL +4 -0
  50. evograd_diff-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,1165 @@
1
+ """
2
+ SHADE (Success-History based Adaptive Differential Evolution) for EvoGrad.
3
+
4
+ This module provides SHADE and L-SHADE (with Linear Population Size Reduction),
5
+ which are self-adaptive DE variants that use historical memory of successful
6
+ F and CR parameters to guide adaptation.
7
+
8
+ Key Features:
9
+ - Uses "current-to-pbest/1" mutation strategy
10
+ - F values sampled from Cauchy distribution centered at memory values
11
+ - CR values sampled from Normal distribution centered at memory values
12
+ - Successful parameters are stored using weighted Lehmer mean
13
+ - External archive stores replaced inferior solutions for diversity
14
+
15
+ Variants:
16
+ - SHADE: Standard success-history based adaptation
17
+ - L-SHADE: SHADE with Linear Population Size Reduction (LPSR)
18
+
19
+ Modes:
20
+ - adaptive=False, differentiable=False: Classical SHADE
21
+ - adaptive=True, differentiable=False: Learnable memory/operators via backprop
22
+ - adaptive=False, differentiable=True: Learnable population via backprop
23
+ - adaptive=True, differentiable=True: Full end-to-end differentiable
24
+
25
+ Note on adaptive vs differentiable:
26
+ - `adaptive=True`: Enables backpropagation for OPERATORS (memory M_F, M_CR,
27
+ selection temperature, crossover parameters become learnable)
28
+ - `differentiable=True`: Enables backpropagation for POPULATION (the
29
+ population tensor becomes an nn.Parameter, selection uses Gumbel-Softmax)
30
+
31
+ References:
32
+ Tanabe, R. & Fukunaga, A. (2013). Success-History Based Parameter Adaptation
33
+ for Differential Evolution. CEC 2013.
34
+
35
+ Tanabe, R. & Fukunaga, A. (2014). Improving the Search Performance of SHADE
36
+ Using Linear Population Size Reduction. CEC 2014.
37
+
38
+ Example:
39
+ >>> from evograd.algorithms import SHADE, LSHADE
40
+ >>> from evograd.core import Problem, minimize
41
+ >>>
42
+ >>> problem = Problem(
43
+ ... objective=lambda x: (x**2).sum(dim=-1),
44
+ ... n_var=30,
45
+ ... xl=-100.0,
46
+ ... xu=100.0,
47
+ ... )
48
+ >>>
49
+ >>> # Standard SHADE
50
+ >>> shade = SHADE(pop_size=100, memory_size=100)
51
+ >>> result = minimize(problem, shade, max_evals=100000)
52
+ >>>
53
+ >>> # L-SHADE with population reduction
54
+ >>> lshade = LSHADE(pop_size_init=18*30, pop_size_min=4)
55
+ >>> result = minimize(problem, lshade, max_evals=100000)
56
+ """
57
+
58
+ from __future__ import annotations
59
+
60
+ from dataclasses import dataclass, field
61
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
62
+
63
+ import torch
64
+ import torch.nn as nn
65
+ from torch import Tensor
66
+
67
+ from evograd.core.algorithm import Algorithm
68
+
69
+ if TYPE_CHECKING:
70
+ from evograd.core.problem import Problem
71
+
72
+ __all__ = [
73
+ "SHADE",
74
+ "LSHADE",
75
+ "SHADEMemory",
76
+ "shade_default",
77
+ "shade_adaptive",
78
+ "lshade_default",
79
+ "lshade_adaptive",
80
+ ]
81
+
82
+
83
+ # =============================================================================
84
+ # SHADE Memory Container
85
+ # =============================================================================
86
+
87
+ @dataclass
88
+ class SHADEMemory:
89
+ """
90
+ Success-history memory for SHADE parameter adaptation.
91
+
92
+ Stores historical successful F and CR values that guide the generation
93
+ of new parameter values via Cauchy and Normal distributions.
94
+
95
+ Attributes:
96
+ M_F: Memory of successful mutation scale factors [H].
97
+ M_CR: Memory of successful crossover rates [H].
98
+ k: Current memory index for update (circular).
99
+ H: Memory size.
100
+ archive: External archive of replaced inferior solutions.
101
+ archive_size: Current archive size.
102
+ max_archive_size: Maximum archive size.
103
+ """
104
+ M_F: Tensor
105
+ M_CR: Tensor
106
+ k: int = 0
107
+ H: int = 100
108
+ archive: Optional[Tensor] = None
109
+ archive_size: int = 0
110
+ max_archive_size: int = 100
111
+
112
+ @classmethod
113
+ def create(
114
+ cls,
115
+ H: int = 100,
116
+ max_archive_size: int = 100,
117
+ init_F: float = 0.5,
118
+ init_CR: float = 0.5,
119
+ device: Optional[torch.device] = None,
120
+ dtype: torch.dtype = torch.float32,
121
+ ) -> "SHADEMemory":
122
+ """
123
+ Create initial SHADE memory.
124
+
125
+ Args:
126
+ H: Memory size.
127
+ max_archive_size: Maximum external archive size.
128
+ init_F: Initial F memory values.
129
+ init_CR: Initial CR memory values.
130
+ device: Computation device.
131
+ dtype: Tensor dtype.
132
+
133
+ Returns:
134
+ Initialized SHADEMemory.
135
+ """
136
+ return cls(
137
+ M_F=torch.full((H,), init_F, device=device, dtype=dtype),
138
+ M_CR=torch.full((H,), init_CR, device=device, dtype=dtype),
139
+ k=0,
140
+ H=H,
141
+ archive=None,
142
+ archive_size=0,
143
+ max_archive_size=max_archive_size,
144
+ )
145
+
146
+ def sample_F(self, n: int, device: torch.device, dtype: torch.dtype) -> Tensor:
147
+ """
148
+ Sample F values from Cauchy distribution centered at random memory cells.
149
+
150
+ F_i = cauchy(M_F[r_i], 0.1), truncated to (0, 1]
151
+
152
+ Args:
153
+ n: Number of samples.
154
+ device: Computation device.
155
+ dtype: Tensor dtype.
156
+
157
+ Returns:
158
+ Sampled F values [n].
159
+ """
160
+ # Select random memory indices
161
+ r_idx = torch.randint(0, self.H, (n,), device=device)
162
+ mu_F = self.M_F.to(device)[r_idx]
163
+
164
+ # Sample from Cauchy distribution (using inverse CDF)
165
+ # Cauchy(mu, gamma) = mu + gamma * tan(pi * (u - 0.5))
166
+ u = torch.rand(n, device=device, dtype=dtype)
167
+ gamma = 0.1 # Standard SHADE scale parameter
168
+ F = mu_F + gamma * torch.tan(torch.pi * (u - 0.5))
169
+
170
+ # Truncate to (0, 1]
171
+ # Values > 1 are set to 1 in original SHADE
172
+ F = torch.where(F > 1.0, torch.ones_like(F), F)
173
+
174
+ # Values <= 0 are regenerated (we approximate by reflecting)
175
+ F = torch.where(F <= 0, torch.abs(F) + 1e-4, F)
176
+ F = torch.clamp(F, min=1e-4, max=1.0)
177
+
178
+ return F
179
+
180
+ def sample_CR(self, n: int, device: torch.device, dtype: torch.dtype) -> Tensor:
181
+ """
182
+ Sample CR values from Normal distribution centered at random memory cells.
183
+
184
+ CR_i = N(M_CR[r_i], 0.1), truncated to [0, 1]
185
+
186
+ Args:
187
+ n: Number of samples.
188
+ device: Computation device.
189
+ dtype: Tensor dtype.
190
+
191
+ Returns:
192
+ Sampled CR values [n].
193
+ """
194
+ # Select random memory indices
195
+ r_idx = torch.randint(0, self.H, (n,), device=device)
196
+ mu_CR = self.M_CR.to(device)[r_idx]
197
+
198
+ # Sample from Normal distribution
199
+ sigma = 0.1 # Standard SHADE scale parameter
200
+ CR = mu_CR + sigma * torch.randn(n, device=device, dtype=dtype)
201
+
202
+ # Truncate to [0, 1]
203
+ CR = torch.clamp(CR, min=0.0, max=1.0)
204
+
205
+ return CR
206
+
207
+ def update(
208
+ self,
209
+ S_F: Tensor,
210
+ S_CR: Tensor,
211
+ weights: Tensor,
212
+ ) -> None:
213
+ """
214
+ Update memory with successful F and CR values.
215
+
216
+ Uses weighted Lehmer mean for F and weighted arithmetic mean for CR.
217
+
218
+ Args:
219
+ S_F: Successful F values [n_success].
220
+ S_CR: Successful CR values [n_success].
221
+ weights: Improvement weights [n_success].
222
+ """
223
+ if len(S_F) == 0:
224
+ return
225
+
226
+ # Normalise weights
227
+ w = weights / (weights.sum() + 1e-10)
228
+
229
+ # Weighted Lehmer mean for F (reduces bias towards small values)
230
+ # mean_WL = sum(w * F^2) / sum(w * F)
231
+ mean_F = (w * S_F * S_F).sum() / ((w * S_F).sum() + 1e-10)
232
+
233
+ # Weighted arithmetic mean for CR
234
+ mean_CR = (w * S_CR).sum()
235
+
236
+ # Update memory at position k
237
+ device = self.M_F.device
238
+ self.M_F[self.k] = mean_F.to(device)
239
+ self.M_CR[self.k] = mean_CR.to(device)
240
+
241
+ # Circular increment
242
+ self.k = (self.k + 1) % self.H
243
+
244
+ def add_to_archive(self, solutions: Tensor) -> None:
245
+ """
246
+ Add replaced solutions to external archive.
247
+
248
+ The archive maintains diversity by storing inferior solutions
249
+ that were replaced during selection.
250
+
251
+ Args:
252
+ solutions: Solutions to add [n, n_var].
253
+ """
254
+ if solutions.numel() == 0:
255
+ return
256
+
257
+ if self.archive is None:
258
+ self.archive = solutions.clone()
259
+ self.archive_size = solutions.shape[0]
260
+ else:
261
+ # Concatenate new solutions
262
+ self.archive = torch.cat([self.archive, solutions], dim=0)
263
+ self.archive_size = self.archive.shape[0]
264
+
265
+ # If archive exceeds max size, randomly remove excess
266
+ if self.archive_size > self.max_archive_size:
267
+ perm = torch.randperm(self.archive_size, device=self.archive.device)
268
+ self.archive = self.archive[perm[:self.max_archive_size]]
269
+ self.archive_size = self.max_archive_size
270
+
271
+
272
+ # =============================================================================
273
+ # SHADE Algorithm
274
+ # =============================================================================
275
+
276
+ class SHADE(Algorithm):
277
+ """
278
+ Success-History based Adaptive Differential Evolution (SHADE).
279
+
280
+ SHADE adapts F and CR parameters using a success-history mechanism.
281
+ F values are sampled from Cauchy distributions and CR values from
282
+ Normal distributions, both centered at memory values updated with
283
+ successful parameters.
284
+
285
+ Args:
286
+ pop_size: Population size.
287
+ memory_size: Size of success-history memory (H). Default: 100.
288
+ p_best_rate: Fraction of top individuals for pbest selection.
289
+ Default: 0.1 (10%).
290
+ archive_rate: Archive size as fraction of pop_size. Default: 1.0.
291
+ init_F: Initial F memory values. Default: 0.5.
292
+ init_CR: Initial CR memory values. Default: 0.5.
293
+ sampling: Operator for initial population generation.
294
+ repair: Repair operator for constraint handling.
295
+ adaptive: If True, memory and operator parameters become learnable
296
+ via backpropagation (learnable hyperparameters).
297
+ differentiable: If True, population becomes an nn.Parameter and
298
+ selection uses Gumbel-Softmax (learnable population).
299
+ selection_temperature: Temperature for Gumbel-Softmax selection.
300
+ dtype: Tensor dtype.
301
+
302
+ Attributes:
303
+ memory: SHADEMemory containing M_F, M_CR, and archive.
304
+ p_best_rate: Rate for pbest selection.
305
+
306
+ Example:
307
+ >>> # Standard SHADE
308
+ >>> shade = SHADE(pop_size=100, memory_size=100)
309
+ >>>
310
+ >>> # SHADE with larger archive
311
+ >>> shade = SHADE(pop_size=100, archive_rate=2.0)
312
+ >>>
313
+ >>> # Differentiable SHADE for meta-learning
314
+ >>> shade = SHADE(pop_size=100, adaptive=True, differentiable=True)
315
+ """
316
+
317
+ def __init__(
318
+ self,
319
+ pop_size: int = 100,
320
+ memory_size: int = 100,
321
+ p_best_rate: float = 0.1,
322
+ archive_rate: float = 1.0,
323
+ init_F: float = 0.5,
324
+ init_CR: float = 0.5,
325
+ sampling: Optional[nn.Module] = None,
326
+ repair: Optional[nn.Module] = None,
327
+ adaptive: bool = False,
328
+ differentiable: bool = False,
329
+ selection_temperature: float = 1.0,
330
+ dtype: torch.dtype = torch.float32,
331
+ ) -> None:
332
+ self.memory_size = memory_size
333
+ self.p_best_rate = p_best_rate
334
+ self.archive_rate = archive_rate
335
+ self._init_F = init_F
336
+ self._init_CR = init_CR
337
+ self.adaptive = adaptive
338
+ self._selection_temperature = selection_temperature
339
+
340
+ # Create pbest selection operator using TruncationSelection
341
+ # Selects from top p_best_rate fraction of population
342
+ pbest_selection = self._create_pbest_selection(p_best_rate, adaptive, selection_temperature)
343
+
344
+ # Create random selection for r1 and r2
345
+ random_selection = self._create_random_selection(adaptive, selection_temperature)
346
+
347
+ # Create BinomialCrossover that supports per-individual CR via forward(cr=...)
348
+ # - If adaptive=True: crossover is differentiable
349
+ crossover = self._create_crossover(adaptive)
350
+
351
+ # Call base class
352
+ super().__init__(
353
+ pop_size=pop_size,
354
+ sampling=sampling,
355
+ selection=random_selection,
356
+ crossover=crossover,
357
+ mutation=None, # SHADE mutation is handled internally
358
+ survival=None, # SHADE uses greedy selection
359
+ repair=repair,
360
+ eliminate_duplicates=False,
361
+ n_offsprings=pop_size,
362
+ differentiable=differentiable, # Controls whether population is learnable
363
+ adaptive=adaptive,
364
+ dtype=dtype,
365
+ )
366
+
367
+ # Register additional selection operators as submodules
368
+ self._register_operator("pbest_selection", pbest_selection)
369
+ self.pbest_selection = pbest_selection
370
+
371
+
372
+ def _create_pbest_selection(
373
+ self,
374
+ p_best_rate: float,
375
+ adaptive: bool,
376
+ temperature: float,
377
+ ) -> nn.Module:
378
+ """
379
+ Create pbest selection operator using TruncationSelection.
380
+
381
+ Selects from top p_best_rate fraction of population.
382
+
383
+ Args:
384
+ p_best_rate: Fraction of top individuals to consider.
385
+ adaptive: If True, temperature is learnable and use Gumbel-Softmax selection.
386
+ temperature: Temperature for soft selection.
387
+
388
+ Returns:
389
+ TruncationSelection operator.
390
+ """
391
+
392
+ from evograd.operators.selection import TruncationSelection
393
+ return TruncationSelection(
394
+ truncation_ratio=p_best_rate,
395
+ adaptive=adaptive,
396
+ temperature=temperature,
397
+ learn_temperature=adaptive, # Only learn if adaptive
398
+ minimize=True,
399
+ )
400
+
401
+ def _create_random_selection(
402
+ self,
403
+ adaptive: bool,
404
+ temperature: float,
405
+ ) -> nn.Module:
406
+ """
407
+ Create random selection operator.
408
+
409
+ Args:
410
+ adaptive: If True, use differentiable selection and use Gumbel-Softmax selection.
411
+ temperature: Temperature for soft selection.
412
+
413
+ Returns:
414
+ RandomSelection operator.
415
+ """
416
+
417
+ from evograd.operators.selection import RandomSelection
418
+ return RandomSelection(replacement=True,
419
+ adaptive=adaptive,
420
+ temperature=temperature,
421
+ )
422
+
423
+ def _create_crossover(self, adaptive: bool) -> nn.Module:
424
+ """
425
+ Create binomial crossover operator.
426
+
427
+ Uses BinomialCrossover from evograd.operators which supports
428
+ per-individual CR via the `cr` parameter in forward().
429
+
430
+ Args:
431
+ adaptive: If True, crossover is differentiable.
432
+
433
+ Returns:
434
+ BinomialCrossover operator.
435
+ """
436
+ from evograd.operators.crossover import BinomialCrossover
437
+ return BinomialCrossover(
438
+ cr=0.5, # Default CR, will be overridden per-individual
439
+ adaptive=adaptive, # Differentiable if adaptive
440
+ learn_cr=False, # CR comes from memory sampling, not learned directly
441
+ )
442
+
443
+ # =========================================================================
444
+ # Setup
445
+ # =========================================================================
446
+
447
+ def _setup(self) -> None:
448
+ """SHADE-specific setup after initialization."""
449
+ # Create success-history memory
450
+ max_archive_size = int(self.archive_rate * self.pop_size)
451
+ self.memory = SHADEMemory.create(
452
+ H=self.memory_size,
453
+ max_archive_size=max_archive_size,
454
+ init_F=self._init_F,
455
+ init_CR=self._init_CR,
456
+ device=self.device,
457
+ dtype=self.dtype,
458
+ )
459
+
460
+ # Make memory learnable if adaptive mode
461
+ if self.adaptive:
462
+ self._M_F_param = nn.Parameter(self.memory.M_F.clone())
463
+ self._M_CR_param = nn.Parameter(self.memory.M_CR.clone())
464
+
465
+ # Store per-individual F and CR for current generation
466
+ self._current_F: Optional[Tensor] = None
467
+ self._current_CR: Optional[Tensor] = None
468
+
469
+ @property
470
+ def M_F(self) -> Tensor:
471
+ """Current F memory."""
472
+ if self.adaptive:
473
+ return self._M_F_param
474
+ return self.memory.M_F
475
+
476
+ @property
477
+ def M_CR(self) -> Tensor:
478
+ """Current CR memory."""
479
+ if self.adaptive:
480
+ return self._M_CR_param
481
+ return self.memory.M_CR
482
+
483
+ @property
484
+ def population(self) -> Tensor:
485
+ """Current population."""
486
+ return self._population
487
+
488
+ @property
489
+ def fitness(self) -> Tensor:
490
+ """Current fitness values."""
491
+ return self.state.fitness
492
+
493
+ # =========================================================================
494
+ # Core SHADE Methods
495
+ # =========================================================================
496
+
497
+ def _sample_parameters(self) -> Tuple[Tensor, Tensor]:
498
+ """
499
+ Sample F and CR values from memory distributions.
500
+
501
+ If adaptive=True, uses reparameterization trick for gradient flow.
502
+
503
+ Returns:
504
+ Tuple of (F_values, CR_values), each [pop_size].
505
+ """
506
+ N = self.pop_size
507
+
508
+ if self.adaptive:
509
+ # Use learnable memory with reparameterization
510
+ # Select random memory indices
511
+ r_idx = torch.randint(0, self.memory_size, (N,), device=self.device)
512
+ mu_F = self.M_F[r_idx]
513
+ mu_CR = self.M_CR[r_idx]
514
+
515
+ # Reparameterized Cauchy for F (using inverse CDF)
516
+ u = torch.rand(N, device=self.device, dtype=self.dtype)
517
+ gamma = 0.1
518
+ F = mu_F + gamma * torch.tan(torch.pi * (u - 0.5))
519
+ F = torch.clamp(F, min=1e-4, max=1.0)
520
+
521
+ # Reparameterized Normal for CR
522
+ sigma = 0.1
523
+ eps = torch.randn(N, device=self.device, dtype=self.dtype)
524
+ CR = mu_CR + sigma * eps
525
+ CR = torch.clamp(CR, min=0.0, max=1.0)
526
+ else:
527
+ # Standard sampling from memory
528
+ F = self.memory.sample_F(N, self.device, self.dtype)
529
+ CR = self.memory.sample_CR(N, self.device, self.dtype)
530
+
531
+ return F, CR
532
+
533
+ def _select_pbest(self) -> Tensor:
534
+ """
535
+ Select random pbest individuals from top p% of population.
536
+
537
+ Uses TruncationSelection operator which handles both hard and
538
+ soft (Gumbel-Softmax) selection modes.
539
+
540
+ Args:
541
+ p_rate: Fraction of top individuals to consider.
542
+
543
+ Returns:
544
+ Selected pbest individuals [pop_size, n_var].
545
+ """
546
+ N = self.pop_size
547
+
548
+ # Use TruncationSelection to select from top p_rate fraction
549
+ # The operator handles differentiable vs hard selection internally
550
+ pbest = self.pbest_selection(self.population, self.fitness, n_select=N)
551
+
552
+ return pbest
553
+
554
+ def _select_random_from_union(self) -> Tensor:
555
+ """
556
+ Select random individuals from population ∪ archive.
557
+
558
+ Uses RandomSelection operator which handles both hard and
559
+ soft (Gumbel-Softmax uniform) selection modes.
560
+
561
+ Returns:
562
+ Selected individuals [pop_size, n_var].
563
+ """
564
+ N = self.pop_size
565
+
566
+ # Combine population and archive
567
+ if self.memory.archive is not None and self.memory.archive_size > 0:
568
+ union = torch.cat([self.population, self.memory.archive], dim=0)
569
+ else:
570
+ union = self.population
571
+
572
+ # Create dummy fitness for random selection (RandomSelection ignores fitness)
573
+ union_fitness = torch.zeros(union.shape[0], device=self.device, dtype=self.dtype)
574
+
575
+ # Use RandomSelection operator for uniform selection from union
576
+ selected = self.selection(union, union_fitness, n_select=N)
577
+
578
+ return selected
579
+
580
+ def _top_p_indices(self) -> Tensor:
581
+ N = self.pop_size
582
+ p = max(2, int(torch.ceil(torch.tensor(self.p_best_rate * N)).item()))
583
+ return torch.argsort(self.fitness)[:p]
584
+
585
+ def _rand_indices_excluding(self, n: int, exclude: Tensor, high: int) -> Tensor:
586
+ # exclude: [n] indices in [0, high)
587
+ # sample with rejection (vectorized-ish, few retries)
588
+ idx = torch.randint(0, high, (n,), device=self.device)
589
+ for _ in range(5):
590
+ bad = idx.eq(exclude)
591
+ if not bad.any():
592
+ break
593
+ idx[bad] = torch.randint(0, high, (bad.sum().item(),), device=self.device)
594
+ # final fallback: shift bad by 1
595
+ bad = idx.eq(exclude)
596
+ if bad.any():
597
+ idx[bad] = (idx[bad] + 1) % high
598
+ return idx
599
+
600
+ def _rand_indices_excluding_two(self, n: int, exclude1: Tensor, exclude2: Tensor, high: int) -> Tensor:
601
+ idx = torch.randint(0, high, (n,), device=self.device)
602
+ for _ in range(7):
603
+ bad = idx.eq(exclude1) | idx.eq(exclude2)
604
+ if not bad.any():
605
+ break
606
+ idx[bad] = torch.randint(0, high, (bad.sum().item(),), device=self.device)
607
+ bad = idx.eq(exclude1) | idx.eq(exclude2)
608
+ if bad.any():
609
+ idx[bad] = (idx[bad] + 1) % high
610
+ return idx
611
+
612
+ # def _mutate(self) -> Tensor:
613
+ # """
614
+ # Generate donor vectors using current-to-pbest/1 mutation.
615
+
616
+ # v_i = x_i + F_i * (x_pbest - x_i) + F_i * (x_r1 - x_r2)
617
+
618
+ # Returns:
619
+ # Donor vectors [pop_size, n_var].
620
+ # """
621
+ # N = self.pop_size
622
+
623
+ # # Sample F and CR for this generation
624
+ # self._current_F, self._current_CR = self._sample_parameters()
625
+
626
+ # # Select pbest (random from top p%)
627
+ # x_pbest = self._select_pbest()
628
+
629
+ # # Select r1 from population (random, different from current)
630
+ # x_r1 = self.selection(self.population, self.fitness, n_select=N)
631
+
632
+ # # Select r2 from population ∪ archive
633
+ # x_r2 = self._select_random_from_union()
634
+
635
+ # # Ensure F has correct shape for broadcasting [N, 1]
636
+ # F = self._current_F.unsqueeze(-1)
637
+
638
+ # # current-to-pbest/1 mutation
639
+ # # v_i = x_i + F * (x_pbest - x_i) + F * (x_r1 - x_r2)
640
+ # donor = self.population + F * (x_pbest - self.population) + F * (x_r1 - x_r2)
641
+
642
+ # return donor
643
+
644
+ def _mutate(self) -> Tensor:
645
+ """
646
+ Generate donor vectors using current-to-pbest/1 mutation.
647
+
648
+ v_i = x_i + F_i * (x_pbest - x_i) + F_i * (x_r1 - x_r2)
649
+
650
+ Returns:
651
+ Donor vectors [pop_size, n_var].
652
+ """
653
+ N = self.pop_size
654
+
655
+ # Sample F and CR for this generation
656
+ self._current_F, self._current_CR = self._sample_parameters()
657
+
658
+ N = self.pop_size
659
+ i_idx = torch.arange(N, device=self.device)
660
+
661
+ # --- pbest index: random from top p%, excluding i ---
662
+ top_idx = self._top_p_indices() # [p]
663
+ pbest_idx = top_idx[torch.randint(0, top_idx.numel(), (N,), device=self.device)]
664
+ # ensure pbest != i
665
+ same = pbest_idx.eq(i_idx)
666
+ if same.any():
667
+ # resample where needed
668
+ pbest_idx[same] = top_idx[torch.randint(0, top_idx.numel(), (same.sum().item(),), device=self.device)]
669
+ # final fallback
670
+ same = pbest_idx.eq(i_idx)
671
+ if same.any():
672
+ pbest_idx[same] = (pbest_idx[same] + 1) % N
673
+
674
+ # --- r1 index: from population excluding i ---
675
+ r1_idx = self._rand_indices_excluding(N, i_idx, high=N)
676
+
677
+ # --- r2 index: from union (pop + archive) excluding i and r1 ---
678
+ if self.memory.archive is not None and self.memory.archive_size > 0:
679
+ union = torch.cat([self.population, self.memory.archive], dim=0)
680
+ union_N = union.shape[0]
681
+ # map i and r1 into union space (they refer to pop indices)
682
+ r2_idx = self._rand_indices_excluding_two(N, i_idx, r1_idx, high=union_N)
683
+ x_r2 = union[r2_idx]
684
+ else:
685
+ # if no archive, select from population excluding i and r1
686
+ r2_idx = self._rand_indices_excluding_two(N, i_idx, r1_idx, high=N)
687
+ x_r2 = self.population[r2_idx]
688
+
689
+ x_pbest = self.population[pbest_idx]
690
+ x_r1 = self.population[r1_idx]
691
+
692
+ F = self._current_F.unsqueeze(-1)
693
+ donor = self.population + F * (x_pbest - self.population) + F * (x_r1 - x_r2)
694
+
695
+ return donor
696
+
697
+ def _infill(self) -> Tensor:
698
+ """
699
+ Generate trial vectors through mutation and crossover.
700
+
701
+ Returns:
702
+ Trial vectors [pop_size, n_var].
703
+ """
704
+ # 1. Mutation: create donor vectors
705
+ donor = self._mutate()
706
+
707
+ # 2. Crossover: binomial with per-individual CR
708
+ # Use our BinomialCrossover with per-individual CR override
709
+ trial = self.crossover(self.population, donor, cr=self._current_CR)
710
+
711
+ # 3. Repair bounds
712
+ if self.repair is not None:
713
+ trial = self.repair(trial, self.xl, self.xu)
714
+ else:
715
+ trial = torch.clamp(trial, self.xl, self.xu)
716
+
717
+ return trial
718
+
719
+ def _advance(self, offspring: Tensor, offspring_fitness: Tensor) -> None:
720
+ """
721
+ Apply greedy selection and update memory.
722
+
723
+ Args:
724
+ offspring: Trial vectors [pop_size, n_var].
725
+ offspring_fitness: Fitness of trials [pop_size].
726
+ """
727
+ # Identify successful trials (trial better than target)
728
+ improved = offspring_fitness < self.fitness
729
+
730
+ # Collect successful F and CR values
731
+ if improved.any():
732
+ S_F = self._current_F[improved]
733
+ S_CR = self._current_CR[improved]
734
+
735
+ # Weights based on fitness improvement (delta f)
736
+ delta_f = self.fitness[improved] - offspring_fitness[improved]
737
+ weights = delta_f
738
+
739
+ # Update memory with successful parameters
740
+ # In adaptive mode, gradients update memory directly, so skip
741
+ if not self.adaptive:
742
+ self.memory.update(S_F, S_CR, weights)
743
+
744
+ # Add replaced solutions to archive
745
+ replaced_solutions = self.population[improved].detach()
746
+ self.memory.add_to_archive(replaced_solutions)
747
+
748
+ # Greedy selection: keep trial if better, else keep target
749
+ new_pop = torch.where(
750
+ improved.unsqueeze(-1),
751
+ offspring,
752
+ self.population
753
+ )
754
+ new_fitness = torch.where(improved, offspring_fitness, self.fitness)
755
+
756
+ # Update internal state
757
+ self._update_population(new_pop, new_fitness)
758
+
759
+ # Update best solution tracking
760
+ self.state.update_best(self.population, self.state.fitness)
761
+
762
+ def _update_population(self, new_pop: Tensor, new_fitness: Tensor) -> None:
763
+ """
764
+ Update population and fitness tensors.
765
+
766
+ Args:
767
+ new_pop: New population tensor [pop_size, n_var].
768
+ new_fitness: New fitness tensor [pop_size].
769
+ """
770
+ with torch.no_grad():
771
+ self._population.copy_(new_pop)
772
+ self.state.fitness = new_fitness
773
+ self.state.population = self._population
774
+
775
+ # =========================================================================
776
+ # Hyperparameter Access
777
+ # =========================================================================
778
+
779
+ def _get_hyperparams(self) -> Dict[str, Any]:
780
+ """Return current hyperparameter values."""
781
+ params = {
782
+ 'pop_size': self.pop_size,
783
+ 'memory_size': self.memory_size,
784
+ 'p_best_rate': self.p_best_rate,
785
+ 'archive_rate': self.archive_rate,
786
+ 'adaptive': self.adaptive,
787
+ 'differentiable': self.differentiable,
788
+ 'M_F_mean': float(self.M_F.mean().item()),
789
+ 'M_CR_mean': float(self.M_CR.mean().item()),
790
+ 'archive_size': self.memory.archive_size,
791
+ }
792
+ return params
793
+
794
+ # =========================================================================
795
+ # State Management
796
+ # =========================================================================
797
+
798
+ @torch.no_grad()
799
+ def _clamp_hyperparams(self) -> None:
800
+ """Clamp learnable hyperparameters to valid ranges."""
801
+ if self.adaptive:
802
+ # F memory in (0, 1]
803
+ self._M_F_param.clamp_(min=1e-4, max=1.0)
804
+ # CR memory in [0, 1]
805
+ self._M_CR_param.clamp_(min=0.0, max=1.0)
806
+
807
+ def update_state(self) -> None:
808
+ """Commit pending changes and clamp hyperparameters."""
809
+ super().update_state()
810
+ self._clamp_hyperparams()
811
+
812
+ # Sync learnable memory back to SHADEMemory structure
813
+ if self.adaptive:
814
+ self.memory.M_F = self._M_F_param.detach().clone()
815
+ self.memory.M_CR = self._M_CR_param.detach().clone()
816
+
817
+ # =========================================================================
818
+ # String Representation
819
+ # =========================================================================
820
+
821
+ def __repr__(self) -> str:
822
+ return (
823
+ f"SHADE(pop_size={self.pop_size}, "
824
+ f"memory_size={self.memory_size}, "
825
+ f"p_best_rate={self.p_best_rate:.2f}, "
826
+ f"adaptive={self.adaptive}, "
827
+ f"differentiable={self.differentiable})"
828
+ )
829
+
830
+
831
+ # =============================================================================
832
+ # L-SHADE Algorithm (SHADE with Linear Population Size Reduction)
833
+ # =============================================================================
834
+
835
+ class LSHADE(SHADE):
836
+ """
837
+ L-SHADE: SHADE with Linear Population Size Reduction (LPSR).
838
+
839
+ L-SHADE extends SHADE by linearly reducing the population size during
840
+ optimisation. This allows early exploration with a large population
841
+ and later exploitation with a focused small population.
842
+
843
+ Population size at generation g:
844
+ N_g = round((N_min - N_init) / max_evals * n_evals + N_init)
845
+
846
+ Args:
847
+ pop_size_init: Initial population size. Default: 18 * n_var.
848
+ pop_size_min: Minimum population size. Default: 4.
849
+ memory_size: Size of success-history memory (H). Default: 100.
850
+ p_best_rate: Fraction of top individuals for pbest selection.
851
+ archive_rate: Archive size as fraction of pop_size.
852
+ init_F: Initial F memory values.
853
+ init_CR: Initial CR memory values.
854
+ sampling: Operator for initial population generation.
855
+ repair: Repair operator for constraint handling.
856
+ adaptive: If True, memory becomes learnable.
857
+ differentiable: If True, population becomes learnable.
858
+ selection_temperature: Temperature for differentiable selection.
859
+ seed: Random seed.
860
+ device: Computation device.
861
+ dtype: Tensor dtype.
862
+
863
+ Attributes:
864
+ pop_size_init: Initial population size.
865
+ pop_size_min: Minimum population size.
866
+ max_evals: Maximum evaluations (set via set_max_evals).
867
+
868
+ Example:
869
+ >>> # Standard L-SHADE for 30D problem
870
+ >>> lshade = LSHADE(pop_size_init=18*30, pop_size_min=4)
871
+ >>> lshade.set_max_evals(100000)
872
+ >>> result = minimize(problem, lshade, max_evals=100000)
873
+ """
874
+
875
+ def __init__(
876
+ self,
877
+ pop_size_init: Optional[int] = None,
878
+ pop_size_min: int = 4,
879
+ memory_size: int = 100,
880
+ p_best_rate: float = 0.1,
881
+ archive_rate: float = 2.6, # L-SHADE default
882
+ init_F: float = 0.5,
883
+ init_CR: float = 0.5,
884
+ sampling: Optional[nn.Module] = None,
885
+ repair: Optional[nn.Module] = None,
886
+ adaptive: bool = False,
887
+ differentiable: bool = False,
888
+ selection_temperature: float = 1.0,
889
+ seed: Optional[int] = None,
890
+ device: Optional[Union[str, torch.device]] = None,
891
+ dtype: torch.dtype = torch.float32,
892
+ ) -> None:
893
+ self.pop_size_init = pop_size_init # Will be set in _setup if None
894
+ self.pop_size_min = pop_size_min
895
+ self._max_evals: Optional[int] = None
896
+
897
+ # Use pop_size_init as initial pop_size, or default of 100
898
+ init_pop_size = pop_size_init if pop_size_init is not None else 100
899
+
900
+ super().__init__(
901
+ pop_size=init_pop_size,
902
+ memory_size=memory_size,
903
+ p_best_rate=p_best_rate,
904
+ archive_rate=archive_rate,
905
+ init_F=init_F,
906
+ init_CR=init_CR,
907
+ sampling=sampling,
908
+ repair=repair,
909
+ differentiable=differentiable,
910
+ adaptive=adaptive,
911
+ selection_temperature=selection_temperature,
912
+ seed=seed,
913
+ device=device,
914
+ dtype=dtype,
915
+ )
916
+
917
+ def _setup(self) -> None:
918
+ """L-SHADE-specific setup after initialization."""
919
+ # Set default pop_size_init based on problem dimension
920
+ if self.pop_size_init is None:
921
+ self.pop_size_init = 18 * self.problem.n_var
922
+ self._pop_size = self.pop_size_init
923
+
924
+ # Create success-history memory with L-SHADE archive size
925
+ self.memory = SHADEMemory.create(
926
+ H=self.memory_size,
927
+ max_archive_size=int(self.archive_rate * self.pop_size_init),
928
+ init_F=self._init_F,
929
+ init_CR=self._init_CR,
930
+ device=self.device,
931
+ dtype=self.dtype,
932
+ )
933
+
934
+ # Make memory learnable if adaptive mode
935
+ if self.adaptive:
936
+ self._M_F_param = nn.Parameter(self.memory.M_F.clone())
937
+ self._M_CR_param = nn.Parameter(self.memory.M_CR.clone())
938
+
939
+ # Store per-individual F and CR
940
+ self._current_F = None
941
+ self._current_CR = None
942
+
943
+ def set_max_evals(self, max_evals: int) -> None:
944
+ """
945
+ Set maximum evaluations for population size reduction.
946
+
947
+ Must be called before running the algorithm.
948
+
949
+ Args:
950
+ max_evals: Maximum fitness evaluations.
951
+ """
952
+ self._max_evals = max_evals
953
+
954
+ @property
955
+ def target_pop_size(self) -> int:
956
+ """
957
+ Calculate target population size based on current evaluations.
958
+
959
+ Returns:
960
+ Target population size for current generation.
961
+ """
962
+ if self._max_evals is None:
963
+ return self._pop_size
964
+
965
+ n_evals = self.n_evals
966
+ N_init = self.pop_size_init
967
+ N_min = self.pop_size_min
968
+
969
+ # Linear reduction formula
970
+ N_g = round((N_min - N_init) / self._max_evals * n_evals + N_init)
971
+ N_g = max(N_g, N_min)
972
+
973
+ return N_g
974
+
975
+ def _reduce_population(self) -> None:
976
+ """
977
+ Reduce population size according to LPSR schedule.
978
+
979
+ Removes worst individuals to reach target population size.
980
+ """
981
+ target_size = self.target_pop_size
982
+ current_size = self._pop_size
983
+
984
+ if target_size >= current_size:
985
+ return
986
+
987
+ n_remove = current_size - target_size
988
+
989
+ # Get indices of best individuals to keep
990
+ keep_idx = torch.argsort(self.fitness)[:target_size]
991
+
992
+ # Keep only best individuals
993
+ with torch.no_grad():
994
+ new_pop = self.population[keep_idx].clone()
995
+ new_fitness = self.fitness[keep_idx].clone()
996
+
997
+ # Resize population tensor
998
+ if self.differentiable:
999
+ self._population = nn.Parameter(new_pop)
1000
+ else:
1001
+ # Re-register buffer with new size
1002
+ delattr(self, '_population')
1003
+ self.register_buffer('_population', new_pop)
1004
+
1005
+ self.state.fitness = new_fitness
1006
+ self.state.population = self._population
1007
+ self._pop_size = target_size
1008
+
1009
+ # Also reduce archive if needed
1010
+ self.memory.max_archive_size = int(self.archive_rate * target_size)
1011
+ if self.memory.archive_size > self.memory.max_archive_size:
1012
+ perm = torch.randperm(self.memory.archive_size, device=self.memory.archive.device)
1013
+ self.memory.archive = self.memory.archive[perm[:self.memory.max_archive_size]]
1014
+ self.memory.archive_size = self.memory.max_archive_size
1015
+
1016
+ def _advance(self, offspring: Tensor, offspring_fitness: Tensor) -> None:
1017
+ """
1018
+ Apply greedy selection, update memory, and reduce population.
1019
+
1020
+ Args:
1021
+ offspring: Trial vectors.
1022
+ offspring_fitness: Fitness of trials.
1023
+ """
1024
+ # Standard SHADE advance
1025
+ super()._advance(offspring, offspring_fitness)
1026
+
1027
+ # Apply population size reduction
1028
+ self._reduce_population()
1029
+
1030
+ def _get_hyperparams(self) -> Dict[str, Any]:
1031
+ """Return current hyperparameter values."""
1032
+ params = super()._get_hyperparams()
1033
+ params.update({
1034
+ 'pop_size_init': self.pop_size_init,
1035
+ 'pop_size_min': self.pop_size_min,
1036
+ 'target_pop_size': self.target_pop_size,
1037
+ 'max_evals': self._max_evals,
1038
+ })
1039
+ return params
1040
+
1041
+ def __repr__(self) -> str:
1042
+ return (
1043
+ f"LSHADE(pop_size={self.pop_size}, "
1044
+ f"pop_size_init={self.pop_size_init}, "
1045
+ f"pop_size_min={self.pop_size_min}, "
1046
+ f"memory_size={self.memory_size}, "
1047
+ f"adaptive={self.adaptive}, "
1048
+ f"differentiable={self.differentiable})"
1049
+ )
1050
+
1051
+
1052
+ # =============================================================================
1053
+ # Convenience Factory Functions
1054
+ # =============================================================================
1055
+
1056
+ def shade_default(
1057
+ pop_size: int = 100,
1058
+ memory_size: int = 100,
1059
+ p_best_rate: float = 0.1,
1060
+ **kwargs,
1061
+ ) -> SHADE:
1062
+ """
1063
+ Create standard SHADE with default parameters.
1064
+
1065
+ Args:
1066
+ pop_size: Population size.
1067
+ memory_size: Size of success-history memory.
1068
+ p_best_rate: Fraction of top individuals for pbest.
1069
+ **kwargs: Additional arguments passed to SHADE.
1070
+
1071
+ Returns:
1072
+ Configured SHADE instance.
1073
+ """
1074
+ return SHADE(
1075
+ pop_size=pop_size,
1076
+ memory_size=memory_size,
1077
+ p_best_rate=p_best_rate,
1078
+ **kwargs,
1079
+ )
1080
+
1081
+
1082
+ def shade_adaptive(
1083
+ pop_size: int = 100,
1084
+ memory_size: int = 100,
1085
+ adaptive: bool = True,
1086
+ differentiable: bool = True,
1087
+ **kwargs,
1088
+ ) -> SHADE:
1089
+ """
1090
+ Create SHADE with learnable memory and differentiable population.
1091
+
1092
+ Args:
1093
+ pop_size: Population size.
1094
+ memory_size: Size of success-history memory.
1095
+ adaptive: If True, memory is learnable.
1096
+ differentiable: If True, population is learnable.
1097
+ **kwargs: Additional arguments passed to SHADE.
1098
+
1099
+ Returns:
1100
+ Configured SHADE instance.
1101
+ """
1102
+ return SHADE(
1103
+ pop_size=pop_size,
1104
+ memory_size=memory_size,
1105
+ adaptive=adaptive,
1106
+ differentiable=differentiable,
1107
+ **kwargs,
1108
+ )
1109
+
1110
+
1111
+ def lshade_default(
1112
+ pop_size_init: Optional[int] = None,
1113
+ pop_size_min: int = 4,
1114
+ memory_size: int = 100,
1115
+ **kwargs,
1116
+ ) -> LSHADE:
1117
+ """
1118
+ Create standard L-SHADE with default parameters.
1119
+
1120
+ If pop_size_init is None, it defaults to 18 * n_var during setup.
1121
+
1122
+ Args:
1123
+ pop_size_init: Initial population size (None = 18*n_var).
1124
+ pop_size_min: Minimum population size.
1125
+ memory_size: Size of success-history memory.
1126
+ **kwargs: Additional arguments passed to LSHADE.
1127
+
1128
+ Returns:
1129
+ Configured LSHADE instance.
1130
+ """
1131
+ return LSHADE(
1132
+ pop_size_init=pop_size_init,
1133
+ pop_size_min=pop_size_min,
1134
+ memory_size=memory_size,
1135
+ **kwargs,
1136
+ )
1137
+
1138
+
1139
+ def lshade_adaptive(
1140
+ pop_size_init: Optional[int] = None,
1141
+ pop_size_min: int = 4,
1142
+ adaptive: bool = True,
1143
+ differentiable: bool = True,
1144
+ **kwargs,
1145
+ ) -> LSHADE:
1146
+ """
1147
+ Create L-SHADE with learnable memory and differentiable population.
1148
+
1149
+ Args:
1150
+ pop_size_init: Initial population size (None = 18*n_var).
1151
+ pop_size_min: Minimum population size.
1152
+ adaptive: If True, memory is learnable.
1153
+ differentiable: If True, population is learnable.
1154
+ **kwargs: Additional arguments passed to LSHADE.
1155
+
1156
+ Returns:
1157
+ Configured LSHADE instance.
1158
+ """
1159
+ return LSHADE(
1160
+ pop_size_init=pop_size_init,
1161
+ pop_size_min=pop_size_min,
1162
+ adaptive=adaptive,
1163
+ differentiable=differentiable,
1164
+ **kwargs,
1165
+ )