evograd-diff 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. evograd/__init__.py +67 -0
  2. evograd/algorithms/__init__.py +138 -0
  3. evograd/algorithms/cmaes.py +1365 -0
  4. evograd/algorithms/de.py +895 -0
  5. evograd/algorithms/ga.py +532 -0
  6. evograd/algorithms/pso.py +648 -0
  7. evograd/algorithms/shade.py +1165 -0
  8. evograd/benchmarks/functions/__init__.py +229 -0
  9. evograd/benchmarks/functions/base.py +217 -0
  10. evograd/benchmarks/functions/cec2017/__init__.py +250 -0
  11. evograd/benchmarks/functions/cec2017/basic.py +413 -0
  12. evograd/benchmarks/functions/cec2017/composition.py +580 -0
  13. evograd/benchmarks/functions/cec2017/data.pkl +0 -0
  14. evograd/benchmarks/functions/cec2017/data.py +350 -0
  15. evograd/benchmarks/functions/cec2017/hybrid.py +406 -0
  16. evograd/benchmarks/functions/cec2017/simple.py +326 -0
  17. evograd/benchmarks/functions/classical.py +649 -0
  18. evograd/benchmarks/functions/smoothed_funnel.py +476 -0
  19. evograd/benchmarks/functions/transforms.py +463 -0
  20. evograd/benchmarks/run_benchmark_functions.py +1208 -0
  21. evograd/core/__init__.py +73 -0
  22. evograd/core/algorithm.py +778 -0
  23. evograd/core/maximize.py +269 -0
  24. evograd/core/minimize.py +740 -0
  25. evograd/core/problem.py +444 -0
  26. evograd/core/result.py +571 -0
  27. evograd/core/termination.py +602 -0
  28. evograd/operators/__init__.py +178 -0
  29. evograd/operators/crossover.py +1117 -0
  30. evograd/operators/mutation.py +1098 -0
  31. evograd/operators/relaxations.py +175 -0
  32. evograd/operators/repair.py +601 -0
  33. evograd/operators/sampling.py +577 -0
  34. evograd/operators/selection.py +981 -0
  35. evograd/operators/survival.py +1000 -0
  36. evograd/tests/__init__.py +11 -0
  37. evograd/tests/run_all.py +78 -0
  38. evograd/tests/test_core.py +528 -0
  39. evograd/tests/test_ga.py +572 -0
  40. evograd/tests/test_operators.py +662 -0
  41. evograd/tests/test_per_individual.py +326 -0
  42. evograd/tests/test_utils.py +328 -0
  43. evograd/utils/__init__.py +97 -0
  44. evograd/utils/callbacks.py +926 -0
  45. evograd/utils/device.py +502 -0
  46. evograd/utils/duplicates.py +421 -0
  47. evograd_diff-0.1.0.dist-info/METADATA +439 -0
  48. evograd_diff-0.1.0.dist-info/RECORD +50 -0
  49. evograd_diff-0.1.0.dist-info/WHEEL +4 -0
  50. evograd_diff-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,1117 @@
1
+ """
2
+ Crossover operators for recombination.
3
+
4
+ This module provides crossover (recombination) operators that
5
+ combine genetic information from parent individuals to create
6
+ offspring. All operators support both classical and differentiable
7
+ (i.e., adaptive) modes.
8
+
9
+ Available crossovers:
10
+ - SBXCrossover: Simulated Binary Crossover (GA)
11
+ - BlendCrossover: BLX-alpha crossover (GA)
12
+ - BinomialCrossover: DE-style binomial crossover
13
+ - ExponentialCrossover: DE-style exponential crossover
14
+ - UniformCrossover: Simple uniform crossover
15
+ - ArithmeticCrossover: Weighted average of parents
16
+ - NPointCrossover: N-point crossover
17
+
18
+ Differentiable Mode:
19
+ When `adaptive=True`, crossover masks use Binary-Concrete
20
+ (Gumbel-Sigmoid) relaxation with straight-through estimator,
21
+ allowing gradients to flow through crossover decisions.
22
+
23
+ Per-Individual/Per-Gene Parameters:
24
+ All crossover operators support four parameter configurations via
25
+ optional runtime overrides in forward(). This is essential for
26
+ self-adaptive algorithms like SHADE, jDE, or self-adaptive GAs.
27
+
28
+ Configurations:
29
+ - Fixed (scalar): Same value for all individuals and genes
30
+ - Per-gene [D]: Different value per gene, same across individuals
31
+ - Per-individual [N]: Different value per individual, same across genes
32
+ - Per-gene + Per-individual [N, D]: Full matrix, different for each
33
+
34
+ Example:
35
+ >>> # SHADE-style per-individual CR
36
+ >>> cr_per_ind = torch.rand(pop_size) # [N]
37
+ >>> trial = crossover(target, donor, cr=cr_per_ind)
38
+ >>>
39
+ >>> # Per-gene CR
40
+ >>> cr_per_gene = torch.rand(n_var) # [D]
41
+ >>> trial = crossover(target, donor, cr=cr_per_gene)
42
+ >>>
43
+ >>> # Full matrix
44
+ >>> cr_matrix = torch.rand(pop_size, n_var) # [N, D]
45
+ >>> trial = crossover(target, donor, cr=cr_matrix)
46
+
47
+ Example:
48
+ >>> from evograd.operators import SBXCrossover
49
+ >>>
50
+ >>> # Classical mode
51
+ >>> crossover = SBXCrossover(eta=15, prob=0.9)
52
+ >>> offspring = crossover(parent1, parent2)
53
+ >>>
54
+ >>> # Differentiable mode
55
+ >>> crossover = SBXCrossover(
56
+ ... eta=15,
57
+ ... prob=0.9,
58
+ ... adaptive=True,
59
+ ... learn_eta=True,
60
+ ... )
61
+ >>> offspring = crossover(parent1, parent2)
62
+ """
63
+
64
+ from __future__ import annotations
65
+
66
+ from abc import ABC, abstractmethod
67
+ from typing import Optional, Tuple, Union
68
+
69
+ import math
70
+ import torch
71
+ import torch.nn as nn
72
+ from torch import Tensor
73
+
74
+ from evograd.operators.relaxations import binary_concrete, expand_param
75
+
76
+ __all__ = [
77
+ "Crossover",
78
+ "SBXCrossover",
79
+ "BlendCrossover",
80
+ "BinomialCrossover",
81
+ "ExponentialCrossover",
82
+ "UniformCrossover",
83
+ "ArithmeticCrossover",
84
+ "NPointCrossover",
85
+ ]
86
+
87
+
88
+ # =============================================================================
89
+ # Base Crossover Class
90
+ # =============================================================================
91
+
92
+ class Crossover(nn.Module, ABC):
93
+ """
94
+ Abstract base class for crossover operators.
95
+
96
+ Subclasses must implement:
97
+ - _crossover(): Perform crossover between parents
98
+
99
+ Args:
100
+ prob: Crossover probability (per individual or per gene).
101
+ adaptive: If True, use Binary-Concrete for soft masks.
102
+ temperature: Temperature for Binary-Concrete.
103
+ learn_temperature: If True, temperature is learnable.
104
+ learn_prob: If True, crossover probability is learnable.
105
+ n_var: Number of variables (for per-gene probability).
106
+
107
+ Per-Individual/Per-Gene Parameters:
108
+ The forward() method accepts optional parameter overrides that
109
+ support four configurations:
110
+
111
+ - scalar: Fixed value for all individuals and genes
112
+ - [D] tensor: Per-gene values (same across individuals)
113
+ - [N] tensor: Per-individual values (same across genes)
114
+ - [N, D] tensor: Full matrix (different for each individual and gene)
115
+
116
+ When an override is provided, it takes precedence over the stored
117
+ parameter. This enables self-adaptive algorithms like SHADE.
118
+ """
119
+
120
+ def __init__(
121
+ self,
122
+ prob: float = 0.9,
123
+ adaptive: bool = False,
124
+ temperature: float = 1.0,
125
+ learn_temperature: bool = True,
126
+ learn_prob: bool = True,
127
+ n_var: Optional[int] = None,
128
+ ) -> None:
129
+ super().__init__()
130
+
131
+ self._MIN_TEMPERATURE = 0.05
132
+ self._MAX_TEMPERATURE = 10.0
133
+
134
+ self.adaptive = adaptive
135
+ self.n_var = n_var
136
+
137
+ # Temperature parameter (log for positivity)
138
+ if learn_temperature and adaptive:
139
+ self._log_temperature = nn.Parameter(
140
+ torch.tensor(temperature).log()
141
+ )
142
+ else:
143
+ self.register_buffer(
144
+ "_log_temperature",
145
+ torch.tensor(temperature).log()
146
+ )
147
+
148
+ # Crossover probability as logits
149
+ prob_logit = self._prob_to_logit(prob)
150
+ if learn_prob and adaptive:
151
+ if n_var is not None:
152
+ # Per-gene probability
153
+ self.prob_logits = nn.Parameter(
154
+ torch.full((n_var,), prob_logit)
155
+ )
156
+ else:
157
+ # Scalar probability (will be expanded later)
158
+ self.prob_logits = nn.Parameter(torch.tensor(prob_logit))
159
+ else:
160
+ if n_var is not None:
161
+ self.register_buffer(
162
+ "prob_logits",
163
+ torch.full((n_var,), prob_logit)
164
+ )
165
+ else:
166
+ self.register_buffer(
167
+ "prob_logits",
168
+ torch.tensor(prob_logit)
169
+ )
170
+
171
+ @staticmethod
172
+ def _prob_to_logit(p: float, eps: float = 1e-7) -> float:
173
+ """Convert probability to logit."""
174
+ p = max(min(p, 1 - eps), eps)
175
+ return torch.logit(torch.tensor(p)).item()
176
+
177
+ @property
178
+ def temperature(self) -> Tensor:
179
+ """Current temperature value."""
180
+ return self._log_temperature.exp()
181
+
182
+ @property
183
+ def prob(self) -> Tensor:
184
+ """Current crossover probability."""
185
+ return torch.sigmoid(self.prob_logits)
186
+
187
+ def _get_prob_logits(self, n_var: int, device: torch.device) -> Tensor:
188
+ """Get probability logits, expanding if necessary."""
189
+ logits = self.prob_logits.to(device)
190
+ if logits.dim() == 0:
191
+ # Scalar -> expand to n_var
192
+ return logits.expand(n_var)
193
+ return logits
194
+
195
+ def _clamp_temperature(self):
196
+ if hasattr(self, "_log_temperature") and self._log_temperature is not None:
197
+ with torch.no_grad():
198
+ self._log_temperature.clamp_(
199
+ math.log(self._MIN_TEMPERATURE),
200
+ math.log(self._MAX_TEMPERATURE),
201
+ )
202
+
203
+ @abstractmethod
204
+ def _crossover(
205
+ self,
206
+ parent1: Tensor,
207
+ parent2: Tensor,
208
+ **kwargs,
209
+ ) -> Tensor:
210
+ """
211
+ Perform crossover between parent pairs.
212
+
213
+ Args:
214
+ parent1: First parents [n_pairs, n_var].
215
+ parent2: Second parents [n_pairs, n_var].
216
+ **kwargs: Optional per-individual/per-gene parameter overrides.
217
+
218
+ Returns:
219
+ Offspring [n_pairs, n_var].
220
+ """
221
+ pass
222
+
223
+ def forward(
224
+ self,
225
+ parent1: Tensor,
226
+ parent2: Tensor,
227
+ **kwargs,
228
+ ) -> Tensor:
229
+ """
230
+ Apply crossover to parent pairs.
231
+
232
+ Args:
233
+ parent1: First parents [n_pairs, n_var].
234
+ parent2: Second parents [n_pairs, n_var].
235
+ **kwargs: Optional parameter overrides for per-individual or
236
+ per-gene operation. Supported kwargs depend on the specific
237
+ crossover operator (e.g., cr, eta, prob, alpha).
238
+
239
+ Each parameter can be:
240
+ - scalar: Fixed value for all
241
+ - [D] tensor: Per-gene values
242
+ - [N] tensor: Per-individual values
243
+ - [N, D] tensor: Full matrix
244
+
245
+ Returns:
246
+ Offspring [n_pairs, n_var].
247
+
248
+ Example:
249
+ >>> # Standard call (uses stored parameters)
250
+ >>> offspring = crossover(parent1, parent2)
251
+ >>>
252
+ >>> # Per-individual CR override (for SHADE)
253
+ >>> offspring = crossover(parent1, parent2, cr=cr_per_individual)
254
+ >>>
255
+ >>> # Per-gene eta override
256
+ >>> offspring = crossover(parent1, parent2, eta=eta_per_gene)
257
+ """
258
+
259
+ self._clamp_temperature()
260
+ return self._crossover(parent1, parent2, **kwargs)
261
+
262
+ # Note: Do NOT override __call__. nn.Module.__call__ dispatches to
263
+ # forward() and fires registered hooks (forward_pre_hooks, forward_hooks,
264
+ # and the autograd profiler). Overriding __call__ would bypass all of these.
265
+
266
+
267
+ # =============================================================================
268
+ # Simulated Binary Crossover (SBX)
269
+ # =============================================================================
270
+
271
+ class SBXCrossover(Crossover):
272
+ """
273
+ Simulated Binary Crossover (SBX).
274
+
275
+ SBX simulates single-point crossover for real-valued variables.
276
+ It creates offspring that are distributed around the parents
277
+ with spread controlled by the distribution index eta.
278
+
279
+ Higher eta values produce offspring closer to parents (more
280
+ exploitation), while lower values produce more spread (more
281
+ exploration).
282
+
283
+ Args:
284
+ eta: Distribution index (higher = tighter spread).
285
+ prob: Crossover probability per gene.
286
+ adaptive: If True, use Binary-Concrete masks.
287
+ temperature: Temperature for Binary-Concrete.
288
+ learn_eta: If True, eta is learnable.
289
+ learn_prob: If True, crossover probability is learnable.
290
+ n_var: Number of variables (for per-gene probability).
291
+
292
+ Per-Individual/Per-Gene Parameters:
293
+ The forward() method accepts optional overrides:
294
+ - eta: Distribution index [scalar, D, N, or N×D]
295
+ - prob: Crossover probability [scalar, D, N, or N×D]
296
+
297
+ Example:
298
+ >>> crossover = SBXCrossover(eta=15, prob=0.9)
299
+ >>> offspring = crossover(parent1, parent2)
300
+ >>>
301
+ >>> # Per-individual eta (for self-adaptive GA)
302
+ >>> eta_per_ind = torch.rand(pop_size) * 20 + 5 # [N]
303
+ >>> offspring = crossover(parent1, parent2, eta=eta_per_ind)
304
+
305
+ Reference:
306
+ Deb & Agrawal (1995). Simulated Binary Crossover for
307
+ Continuous Search Space.
308
+ """
309
+
310
+ def __init__(
311
+ self,
312
+ eta: float = 15.0,
313
+ prob: float = 0.9,
314
+ adaptive: bool = False,
315
+ temperature: float = 1.0,
316
+ learn_eta: bool = True,
317
+ learn_prob: bool = True,
318
+ n_var: Optional[int] = None,
319
+ ) -> None:
320
+ super().__init__(
321
+ prob=prob,
322
+ adaptive=adaptive,
323
+ temperature=temperature,
324
+ learn_temperature=True,
325
+ learn_prob=learn_prob,
326
+ n_var=n_var,
327
+ )
328
+
329
+ # Eta parameter (log for positivity)
330
+ if learn_eta and adaptive:
331
+ self._log_eta = nn.Parameter(torch.tensor(eta).log())
332
+ else:
333
+ self.register_buffer("_log_eta", torch.tensor(eta).log())
334
+
335
+ @property
336
+ def eta(self) -> Tensor:
337
+ """Current eta value."""
338
+ return self._log_eta.exp()
339
+
340
+ def _crossover(
341
+ self,
342
+ parent1: Tensor,
343
+ parent2: Tensor,
344
+ eta: Optional[Tensor] = None,
345
+ prob: Optional[Tensor] = None,
346
+ **kwargs,
347
+ ) -> Tensor:
348
+ """
349
+ Perform SBX crossover.
350
+
351
+ Args:
352
+ parent1: First parents [N, D].
353
+ parent2: Second parents [N, D].
354
+ eta: Optional distribution index override [scalar, D, N, or N×D].
355
+ prob: Optional crossover probability override [scalar, D, N, or N×D].
356
+
357
+ Returns:
358
+ Offspring [N, D].
359
+ """
360
+ n_pairs, n_var = parent1.shape
361
+ device = parent1.device
362
+ dtype = parent1.dtype
363
+
364
+ # Expand eta to [N, D]
365
+ eta_expanded = expand_param(eta, self.eta, n_pairs, n_var, device, dtype)
366
+
367
+ # Expand prob to [N, D]
368
+ prob_expanded = expand_param(prob, self.prob, n_pairs, n_var, device, dtype)
369
+
370
+ # Get crossover mask (which genes to cross)
371
+ if self.adaptive:
372
+ # Convert prob to logits for Binary-Concrete
373
+ prob_logits = torch.logit(prob_expanded.clamp(1e-7, 1 - 1e-7))
374
+ mask = binary_concrete(
375
+ prob_logits,
376
+ temperature=self.temperature # Pass temperature
377
+ )
378
+ else:
379
+ # Hard Bernoulli mask
380
+ mask = (torch.rand(n_pairs, n_var, device=device) < prob_expanded).float()
381
+
382
+ # Compute SBX spread factor beta
383
+ u = torch.rand(n_pairs, n_var, device=device, dtype=dtype)
384
+
385
+ beta = torch.where(
386
+ u <= 0.5,
387
+ (2 * u).pow(1.0 / (eta_expanded + 1)),
388
+ (2 * (1 - u)).pow(-1.0 / (eta_expanded + 1))
389
+ )
390
+
391
+ # Apply mask: beta=1 means no crossover (offspring = parent)
392
+ beta = mask * beta + (1.0 - mask) * 1.0
393
+
394
+ # Generate offspring
395
+ offspring = 0.5 * ((1 + beta) * parent1 + (1 - beta) * parent2)
396
+
397
+ return offspring
398
+
399
+ def __repr__(self) -> str:
400
+ return (
401
+ f"SBXCrossover("
402
+ f"eta={self.eta.item():.2f}, "
403
+ f"prob={self.prob.mean().item():.3f}, "
404
+ f"adaptive={self.adaptive})"
405
+ )
406
+
407
+
408
+ # =============================================================================
409
+ # Blend Crossover (BLX-alpha)
410
+ # =============================================================================
411
+
412
+ class BlendCrossover(Crossover):
413
+ """
414
+ Blend Crossover (BLX-alpha).
415
+
416
+ Creates offspring by sampling uniformly from an extended
417
+ interval around the parents. The interval is extended by
418
+ alpha * (parent_max - parent_min) on each side.
419
+
420
+ With alpha=0, offspring are sampled between parents.
421
+ With alpha=0.5 (default), the interval is extended by 50%.
422
+
423
+ Args:
424
+ alpha: Extension factor for the interval.
425
+ prob: Crossover probability (per individual).
426
+ adaptive: If True, use soft interpolation.
427
+ learn_alpha: If True, alpha is learnable.
428
+
429
+ Per-Individual/Per-Gene Parameters:
430
+ The forward() method accepts optional overrides:
431
+ - alpha: Extension factor [scalar, D, N, or N×D]
432
+ - prob: Crossover probability [scalar, D, N, or N×D]
433
+
434
+ Example:
435
+ >>> crossover = BlendCrossover(alpha=0.5)
436
+ >>> offspring = crossover(parent1, parent2)
437
+ >>>
438
+ >>> # Per-individual alpha
439
+ >>> alpha_per_ind = torch.rand(pop_size) # [N]
440
+ >>> offspring = crossover(parent1, parent2, alpha=alpha_per_ind)
441
+
442
+ Reference:
443
+ Eshelman & Schaffer (1993). Real-Coded Genetic Algorithms
444
+ and Interval-Schemata.
445
+ """
446
+
447
+ def __init__(
448
+ self,
449
+ alpha: float = 0.5,
450
+ prob: float = 0.9,
451
+ adaptive: bool = False,
452
+ learn_alpha: bool = True,
453
+ ) -> None:
454
+ super().__init__(
455
+ prob=prob,
456
+ adaptive=adaptive,
457
+ temperature=1.0,
458
+ learn_temperature=False,
459
+ learn_prob=False,
460
+ n_var=None,
461
+ )
462
+
463
+ # Alpha as sigmoid(logit) to keep in [0, 2]
464
+ # alpha = 2 * sigmoid(logit), so logit = logit(alpha/2)
465
+ alpha_logit = torch.logit(torch.tensor(alpha / 2.0).clamp(1e-7, 1 - 1e-7))
466
+
467
+ if learn_alpha and adaptive:
468
+ self._alpha_logit = nn.Parameter(alpha_logit)
469
+ else:
470
+ self.register_buffer("_alpha_logit", alpha_logit)
471
+
472
+ @property
473
+ def alpha(self) -> Tensor:
474
+ """Current alpha value in [0, 2]."""
475
+ return 2.0 * torch.sigmoid(self._alpha_logit)
476
+
477
+ def _crossover(
478
+ self,
479
+ parent1: Tensor,
480
+ parent2: Tensor,
481
+ alpha: Optional[Tensor] = None,
482
+ prob: Optional[Tensor] = None,
483
+ **kwargs,
484
+ ) -> Tensor:
485
+ """
486
+ Perform blend crossover.
487
+
488
+ Args:
489
+ parent1: First parents [N, D].
490
+ parent2: Second parents [N, D].
491
+ alpha: Optional extension factor override [scalar, D, N, or N×D].
492
+ prob: Optional crossover probability override [scalar, D, N, or N×D].
493
+
494
+ Returns:
495
+ Offspring [N, D].
496
+ """
497
+ n_pairs, n_var = parent1.shape
498
+ device = parent1.device
499
+ dtype = parent1.dtype
500
+
501
+ # Expand alpha to [N, D]
502
+ alpha_expanded = expand_param(alpha, self.alpha, n_pairs, n_var, device, dtype)
503
+
504
+ # Expand prob to [N, D] (but we use per-individual for blend)
505
+ prob_expanded = expand_param(prob, self.prob, n_pairs, n_var, device, dtype)
506
+
507
+ # Determine interval bounds
508
+ p_min = torch.minimum(parent1, parent2)
509
+ p_max = torch.maximum(parent1, parent2)
510
+ diff = p_max - p_min
511
+
512
+ # Extended interval
513
+ lower = p_min - alpha_expanded * diff
514
+ upper = p_max + alpha_expanded * diff
515
+
516
+ # Sample uniformly from interval
517
+ u = torch.rand(n_pairs, n_var, device=device, dtype=dtype)
518
+ offspring = lower + u * (upper - lower)
519
+
520
+ # Apply crossover probability (per individual, use first column)
521
+ if not self.adaptive:
522
+ do_cross = (torch.rand(n_pairs, 1, device=device) < prob_expanded[:, :1]).float()
523
+ offspring = do_cross * offspring + (1 - do_cross) * parent1
524
+
525
+ return offspring
526
+
527
+ def __repr__(self) -> str:
528
+ return (
529
+ f"BlendCrossover("
530
+ f"alpha={self.alpha.item():.3f}, "
531
+ f"prob={self.prob.item():.3f})"
532
+ )
533
+
534
+
535
+ # =============================================================================
536
+ # Binomial Crossover (DE-style)
537
+ # =============================================================================
538
+
539
+ class BinomialCrossover(Crossover):
540
+ """
541
+ Binomial (uniform) crossover for Differential Evolution.
542
+
543
+ Each gene is independently selected from either the target
544
+ or donor vector based on the crossover rate. At least one
545
+ gene is always taken from the donor (j_rand).
546
+
547
+ Args:
548
+ cr: Crossover rate (probability of taking donor gene).
549
+ adaptive: If True, use Binary-Concrete masks.
550
+ temperature: Temperature for Binary-Concrete.
551
+ learn_cr: If True, crossover rate is learnable.
552
+ n_var: Number of variables (for per-gene CR).
553
+
554
+ Per-Individual/Per-Gene Parameters:
555
+ The forward() method accepts optional overrides:
556
+ - cr: Crossover rate [scalar, D, N, or N×D]
557
+
558
+ This is essential for SHADE/L-SHADE where each individual
559
+ has its own CR value sampled from the historical memory.
560
+
561
+ Example:
562
+ >>> # target = current individual, donor = mutant vector
563
+ >>> crossover = BinomialCrossover(cr=0.9)
564
+ >>> trial = crossover(target, donor)
565
+ >>>
566
+ >>> # SHADE-style per-individual CR
567
+ >>> cr_per_ind = torch.rand(pop_size) # [N]
568
+ >>> trial = crossover(target, donor, cr=cr_per_ind)
569
+
570
+ Note:
571
+ In DE terminology:
572
+ - parent1 = target vector (current individual)
573
+ - parent2 = donor vector (mutant)
574
+ - output = trial vector
575
+ """
576
+
577
+ def __init__(
578
+ self,
579
+ cr: float = 0.9,
580
+ adaptive: bool = False,
581
+ temperature: float = 1.0,
582
+ learn_cr: bool = True,
583
+ n_var: Optional[int] = None,
584
+ ) -> None:
585
+ super().__init__(
586
+ prob=cr,
587
+ adaptive=adaptive,
588
+ temperature=temperature,
589
+ learn_temperature=True,
590
+ learn_prob=learn_cr,
591
+ n_var=n_var,
592
+ )
593
+
594
+ @property
595
+ def cr(self) -> Tensor:
596
+ """Current crossover rate."""
597
+ return self.prob
598
+
599
+ def _crossover(
600
+ self,
601
+ parent1: Tensor,
602
+ parent2: Tensor,
603
+ cr: Optional[Tensor] = None,
604
+ **kwargs,
605
+ ) -> Tensor:
606
+ """
607
+ Binomial crossover.
608
+
609
+ Args:
610
+ parent1: Target vectors [N, D].
611
+ parent2: Donor vectors [N, D].
612
+ cr: Optional crossover rate override [scalar, D, N, or N×D].
613
+
614
+ Returns:
615
+ Trial vectors [N, D].
616
+ """
617
+ n_pairs, n_var = parent1.shape
618
+ device = parent1.device
619
+ dtype = parent1.dtype
620
+
621
+ # Expand CR to [N, D]
622
+ cr_expanded = expand_param(cr, self.cr, n_pairs, n_var, device, dtype)
623
+
624
+ if self.adaptive:
625
+ # Convert CR to logits for Binary-Concrete
626
+ cr_logits = torch.logit(cr_expanded.clamp(1e-7, 1 - 1e-7))
627
+ mask = binary_concrete(
628
+ cr_logits,
629
+ temperature=self.temperature # Pass temperature
630
+ )
631
+ else:
632
+ # Hard Bernoulli mask
633
+ mask = (torch.rand(n_pairs, n_var, device=device) < cr_expanded).float()
634
+
635
+ # Ensure at least one gene from donor (j_rand)
636
+ j_rand = torch.randint(0, n_var, (n_pairs,), device=device)
637
+ mask[torch.arange(n_pairs, device=device), j_rand] = 1.0
638
+
639
+ # Trial vector: mask=1 -> donor, mask=0 -> target
640
+ trial = mask * parent2 + (1.0 - mask) * parent1
641
+
642
+ return trial
643
+
644
+ def __repr__(self) -> str:
645
+ return (
646
+ f"BinomialCrossover("
647
+ f"cr={self.cr.mean().item():.3f}, "
648
+ f"adaptive={self.adaptive})"
649
+ )
650
+
651
+
652
+ # =============================================================================
653
+ # Exponential Crossover (DE-style)
654
+ # =============================================================================
655
+
656
+ class ExponentialCrossover(Crossover):
657
+ """
658
+ Exponential crossover for Differential Evolution.
659
+
660
+ Copies a contiguous segment of genes from the donor vector,
661
+ starting at a random position. The segment length follows
662
+ a geometric distribution with parameter CR.
663
+
664
+ Args:
665
+ cr: Crossover rate (probability of extending segment).
666
+ adaptive: If True, use soft approximation.
667
+ temperature: Temperature for soft crossover.
668
+ learn_cr: If True, crossover rate is learnable.
669
+
670
+ Per-Individual/Per-Gene Parameters:
671
+ The forward() method accepts optional overrides:
672
+ - cr: Crossover rate [scalar or N] (per-gene not supported
673
+ for exponential due to contiguous segment nature)
674
+
675
+ Example:
676
+ >>> crossover = ExponentialCrossover(cr=0.9)
677
+ >>> trial = crossover(target, donor)
678
+ >>>
679
+ >>> # Per-individual CR
680
+ >>> cr_per_ind = torch.rand(pop_size) # [N]
681
+ >>> trial = crossover(target, donor, cr=cr_per_ind)
682
+
683
+ Note:
684
+ Exponential crossover tends to preserve more structure
685
+ from the target vector compared to binomial crossover.
686
+
687
+ .. warning:: Differentiability caveat
688
+
689
+ The contiguous-segment mask is built via ``torch.cumprod`` over
690
+ Bernoulli draws. While technically differentiable, the gradient
691
+ signal through ``cumprod`` of near-binary values is extremely
692
+ noisy in practice — small perturbations in early continuation
693
+ probabilities can flip the entire segment length. For reliable
694
+ gradient-based hyperparameter learning, prefer
695
+ :class:`BinomialCrossover` with ``adaptive=True``.
696
+ """
697
+
698
+ def __init__(
699
+ self,
700
+ cr: float = 0.9,
701
+ adaptive: bool = False,
702
+ temperature: float = 1.0,
703
+ learn_cr: bool = True,
704
+ ) -> None:
705
+ super().__init__(
706
+ prob=cr,
707
+ adaptive=adaptive,
708
+ temperature=temperature,
709
+ learn_temperature=True,
710
+ learn_prob=learn_cr,
711
+ n_var=None,
712
+ )
713
+
714
+ @property
715
+ def cr(self) -> Tensor:
716
+ """Current crossover rate."""
717
+ return self.prob
718
+
719
+ def _crossover(
720
+ self,
721
+ parent1: Tensor,
722
+ parent2: Tensor,
723
+ cr: Optional[Tensor] = None,
724
+ **kwargs,
725
+ ) -> Tensor:
726
+ """
727
+ Exponential crossover.
728
+
729
+ Args:
730
+ parent1: Target vectors [N, D].
731
+ parent2: Donor vectors [N, D].
732
+ cr: Optional crossover rate override [scalar or N].
733
+ Note: Per-gene CR not supported for exponential crossover.
734
+
735
+ Returns:
736
+ Trial vectors [N, D].
737
+ """
738
+ n_pairs, n_var = parent1.shape
739
+ device = parent1.device
740
+ dtype = parent1.dtype
741
+
742
+ # Get CR value (scalar or per-individual [N])
743
+ if cr is None:
744
+ cr_val = self.cr
745
+ else:
746
+ cr_val = cr if isinstance(cr, Tensor) else torch.tensor(cr)
747
+
748
+ # Ensure on correct device
749
+ cr_val = cr_val.to(device=device, dtype=dtype)
750
+
751
+ # Expand to [N] if scalar
752
+ if cr_val.dim() == 0:
753
+ cr_val = cr_val.expand(n_pairs)
754
+ elif cr_val.dim() == 1 and cr_val.shape[0] != n_pairs:
755
+ raise ValueError(f"CR must be scalar or [N={n_pairs}], got [{cr_val.shape[0]}]")
756
+ elif cr_val.dim() == 2:
757
+ # For exponential, use mean across genes if [N, D] provided
758
+ cr_val = cr_val.mean(dim=1)
759
+
760
+ # Random start position for each individual
761
+ j_rand = torch.randint(0, n_var, (n_pairs,), device=device)
762
+
763
+ # Random numbers to determine segment length
764
+ u = torch.rand(n_pairs, n_var, device=device)
765
+
766
+ # Roll so column 0 is the starting position
767
+ cols = torch.arange(n_var, device=device).unsqueeze(0)
768
+ indices = (cols - j_rand.unsqueeze(1)) % n_var
769
+ u_rolled = u.gather(1, indices)
770
+
771
+ # Continuation mask: 1 while u < CR (per-individual CR)
772
+ cr_expanded = cr_val.unsqueeze(1) # [N, 1]
773
+ cont = (u_rolled < cr_expanded).float()
774
+ cont[:, 0] = 1.0 # Always take at least one gene
775
+
776
+ # Segment mask: 1 until first 0
777
+ segment = torch.cumprod(cont, dim=1)
778
+
779
+ # Roll back to original gene order
780
+ mask = torch.zeros_like(segment)
781
+ mask.scatter_(1, indices, segment)
782
+
783
+ if not self.adaptive:
784
+ # Hard mask
785
+ mask = mask.detach()
786
+
787
+ # Trial vector
788
+ trial = mask * parent2 + (1.0 - mask) * parent1
789
+
790
+ return trial
791
+
792
+ def __repr__(self) -> str:
793
+ return (
794
+ f"ExponentialCrossover("
795
+ f"cr={self.cr.item():.3f}, "
796
+ f"adaptive={self.adaptive})"
797
+ )
798
+
799
+
800
+ # =============================================================================
801
+ # Uniform Crossover
802
+ # =============================================================================
803
+
804
+ class UniformCrossover(Crossover):
805
+ """
806
+ Uniform crossover.
807
+
808
+ Each gene is independently selected from either parent
809
+ with equal probability (0.5). Simpler than binomial
810
+ crossover as there's no CR parameter.
811
+
812
+ Args:
813
+ prob: Probability of crossover occurring per individual.
814
+ adaptive: If True, use Binary-Concrete masks.
815
+ temperature: Temperature for Binary-Concrete.
816
+
817
+ Per-Individual/Per-Gene Parameters:
818
+ The forward() method accepts optional overrides:
819
+ - prob: Crossover probability [scalar, D, N, or N×D]
820
+
821
+ Example:
822
+ >>> crossover = UniformCrossover()
823
+ >>> offspring = crossover(parent1, parent2)
824
+ """
825
+
826
+ def __init__(
827
+ self,
828
+ prob: float = 0.9,
829
+ adaptive: bool = False,
830
+ temperature: float = 1.0,
831
+ ) -> None:
832
+ super().__init__(
833
+ prob=prob,
834
+ adaptive=adaptive,
835
+ temperature=temperature,
836
+ learn_temperature=True,
837
+ learn_prob=False,
838
+ n_var=None,
839
+ )
840
+
841
+ def _crossover(
842
+ self,
843
+ parent1: Tensor,
844
+ parent2: Tensor,
845
+ prob: Optional[Tensor] = None,
846
+ **kwargs,
847
+ ) -> Tensor:
848
+ """
849
+ Uniform crossover.
850
+
851
+ Args:
852
+ parent1: First parents [N, D].
853
+ parent2: Second parents [N, D].
854
+ prob: Optional crossover probability override [scalar, D, N, or N×D].
855
+
856
+ Returns:
857
+ Offspring [N, D].
858
+ """
859
+ n_pairs, n_var = parent1.shape
860
+ device = parent1.device
861
+ dtype = parent1.dtype
862
+
863
+ # 50-50 mask for each gene
864
+ if self.adaptive:
865
+ # Binary-Concrete with logits=0 (p=0.5)
866
+ logits = torch.zeros(n_pairs, n_var, device=device)
867
+ mask = binary_concrete(
868
+ logits,
869
+ temperature=self.temperature # Pass temperature
870
+ )
871
+ else:
872
+ mask = (torch.rand(n_pairs, n_var, device=device) < 0.5).float()
873
+
874
+ # Create offspring
875
+ offspring = mask * parent1 + (1.0 - mask) * parent2
876
+
877
+ # Apply per-individual crossover probability
878
+ if not self.adaptive:
879
+ # Expand prob to [N, D]
880
+ prob_expanded = expand_param(prob, self.prob, n_pairs, n_var, device, dtype)
881
+ do_cross = (torch.rand(n_pairs, 1, device=device) < prob_expanded[:, :1]).float()
882
+ offspring = do_cross * offspring + (1 - do_cross) * parent1
883
+
884
+ return offspring
885
+
886
+ def __repr__(self) -> str:
887
+ return f"UniformCrossover(prob={self.prob.item():.3f})"
888
+
889
+
890
+ # =============================================================================
891
+ # Arithmetic Crossover
892
+ # =============================================================================
893
+
894
+ class ArithmeticCrossover(Crossover):
895
+ """
896
+ Arithmetic (intermediate) crossover.
897
+
898
+ Creates offspring as a weighted average of parents:
899
+ offspring = alpha * parent1 + (1 - alpha) * parent2
900
+
901
+ Args:
902
+ alpha: Weighting factor. If None, sampled randomly
903
+ from [0, 1] for each crossover.
904
+ whole: If True, same alpha for all genes. If False,
905
+ different alpha per gene.
906
+ adaptive: If True, alpha is learnable.
907
+ learn_alpha: If True, alpha is a learnable parameter.
908
+
909
+ Per-Individual/Per-Gene Parameters:
910
+ The forward() method accepts optional overrides:
911
+ - alpha: Weighting factor [scalar, D, N, or N×D]
912
+
913
+ Example:
914
+ >>> # Fixed alpha
915
+ >>> crossover = ArithmeticCrossover(alpha=0.5, whole=True)
916
+ >>> offspring = crossover(parent1, parent2)
917
+ >>>
918
+ >>> # Per-individual alpha
919
+ >>> alpha_per_ind = torch.rand(pop_size) # [N]
920
+ >>> offspring = crossover(parent1, parent2, alpha=alpha_per_ind)
921
+ """
922
+
923
+ def __init__(
924
+ self,
925
+ alpha: Optional[float] = 0.5,
926
+ whole: bool = True,
927
+ adaptive: bool = False,
928
+ learn_alpha: bool = True,
929
+ ) -> None:
930
+ super().__init__(
931
+ prob=1.0,
932
+ adaptive=adaptive,
933
+ temperature=1.0,
934
+ learn_temperature=False,
935
+ learn_prob=False,
936
+ n_var=None,
937
+ )
938
+
939
+ self.whole = whole
940
+ self._random_alpha = alpha is None
941
+
942
+ if alpha is not None:
943
+ # Alpha as sigmoid(logit) to keep in [0, 1]
944
+ alpha_logit = torch.logit(torch.tensor(alpha).clamp(1e-7, 1 - 1e-7))
945
+
946
+ if learn_alpha and adaptive:
947
+ self._alpha_logit = nn.Parameter(alpha_logit)
948
+ else:
949
+ self.register_buffer("_alpha_logit", alpha_logit)
950
+ else:
951
+ self.register_buffer("_alpha_logit", torch.tensor(0.0))
952
+
953
+ @property
954
+ def alpha(self) -> Optional[Tensor]:
955
+ """Current alpha value (None if random)."""
956
+ if self._random_alpha:
957
+ return None
958
+ return torch.sigmoid(self._alpha_logit)
959
+
960
+ def _crossover(
961
+ self,
962
+ parent1: Tensor,
963
+ parent2: Tensor,
964
+ alpha: Optional[Tensor] = None,
965
+ **kwargs,
966
+ ) -> Tensor:
967
+ """
968
+ Arithmetic crossover.
969
+
970
+ Args:
971
+ parent1: First parents [N, D].
972
+ parent2: Second parents [N, D].
973
+ alpha: Optional weighting factor override [scalar, D, N, or N×D].
974
+
975
+ Returns:
976
+ Offspring [N, D].
977
+ """
978
+ n_pairs, n_var = parent1.shape
979
+ device = parent1.device
980
+ dtype = parent1.dtype
981
+
982
+ # Determine alpha value
983
+ if alpha is not None:
984
+ # Use provided override
985
+ alpha_val = alpha
986
+ elif self._random_alpha:
987
+ # Sample random alpha
988
+ if self.whole:
989
+ alpha_val = torch.rand(n_pairs, 1, device=device, dtype=dtype)
990
+ else:
991
+ alpha_val = torch.rand(n_pairs, n_var, device=device, dtype=dtype)
992
+ else:
993
+ # Use stored alpha
994
+ alpha_val = self.alpha
995
+
996
+ # Expand alpha to [N, D]
997
+ alpha_expanded = expand_param(alpha_val, torch.tensor(0.5), n_pairs, n_var, device, dtype)
998
+
999
+ # Weighted average
1000
+ offspring = alpha_expanded * parent1 + (1.0 - alpha_expanded) * parent2
1001
+
1002
+ return offspring
1003
+
1004
+ def __repr__(self) -> str:
1005
+ if self._random_alpha:
1006
+ return f"ArithmeticCrossover(alpha=random, whole={self.whole})"
1007
+ return f"ArithmeticCrossover(alpha={self.alpha.item():.3f}, whole={self.whole})"
1008
+
1009
+
1010
+ # =============================================================================
1011
+ # N-Point Crossover
1012
+ # =============================================================================
1013
+
1014
+ class NPointCrossover(Crossover):
1015
+ """
1016
+ N-point crossover.
1017
+
1018
+ Selects N random crossover points and alternates between
1019
+ parents at each point. Classic crossover operator for
1020
+ binary and real-coded GAs.
1021
+
1022
+ Args:
1023
+ n_points: Number of crossover points (1 for single-point,
1024
+ 2 for two-point, etc.).
1025
+ prob: Crossover probability per individual.
1026
+ adaptive: If True, use soft masks.
1027
+ temperature: Temperature for soft crossover.
1028
+
1029
+ Per-Individual/Per-Gene Parameters:
1030
+ The forward() method accepts optional overrides:
1031
+ - prob: Crossover probability [scalar, D, N, or N×D]
1032
+
1033
+ Note: n_points cannot be overridden per-individual.
1034
+
1035
+ Example:
1036
+ >>> # Single-point crossover
1037
+ >>> crossover = NPointCrossover(n_points=1)
1038
+ >>>
1039
+ >>> # Two-point crossover
1040
+ >>> crossover = NPointCrossover(n_points=2)
1041
+ """
1042
+
1043
+ def __init__(
1044
+ self,
1045
+ n_points: int = 1,
1046
+ prob: float = 0.9,
1047
+ adaptive: bool = False,
1048
+ temperature: float = 1.0,
1049
+ ) -> None:
1050
+ super().__init__(
1051
+ prob=prob,
1052
+ adaptive=adaptive,
1053
+ temperature=temperature,
1054
+ learn_temperature=True,
1055
+ learn_prob=False,
1056
+ n_var=None,
1057
+ )
1058
+
1059
+ if n_points < 1:
1060
+ raise ValueError(f"n_points must be >= 1, got {n_points}")
1061
+
1062
+ self.n_points = n_points
1063
+
1064
+ def _crossover(
1065
+ self,
1066
+ parent1: Tensor,
1067
+ parent2: Tensor,
1068
+ prob: Optional[Tensor] = None,
1069
+ **kwargs,
1070
+ ) -> Tensor:
1071
+ """
1072
+ N-point crossover.
1073
+
1074
+ Args:
1075
+ parent1: First parents [N, D].
1076
+ parent2: Second parents [N, D].
1077
+ prob: Optional crossover probability override [scalar, D, N, or N×D].
1078
+
1079
+ Returns:
1080
+ Offspring [N, D].
1081
+ """
1082
+ n_pairs, n_var = parent1.shape
1083
+ device = parent1.device
1084
+ dtype = parent1.dtype
1085
+
1086
+ # Generate random crossover points
1087
+ # For each individual, select n_points positions
1088
+ points = torch.sort(
1089
+ torch.randint(1, n_var, (n_pairs, self.n_points), device=device),
1090
+ dim=1
1091
+ ).values
1092
+
1093
+ # Create mask based on crossover points
1094
+ positions = torch.arange(n_var, device=device).unsqueeze(0)
1095
+
1096
+ # Count how many crossover points are before each position
1097
+ # Even count -> parent1, odd count -> parent2
1098
+ count_before = (positions.unsqueeze(-1) >= points.unsqueeze(1)).sum(dim=-1)
1099
+ mask = (count_before % 2 == 0).float()
1100
+
1101
+ offspring = mask * parent1 + (1.0 - mask) * parent2
1102
+
1103
+ # Apply per-individual crossover probability
1104
+ if not self.adaptive:
1105
+ # Expand prob to [N, D]
1106
+ prob_expanded = expand_param(prob, self.prob, n_pairs, n_var, device, dtype)
1107
+ do_cross = (torch.rand(n_pairs, 1, device=device) < prob_expanded[:, :1]).float()
1108
+ offspring = do_cross * offspring + (1 - do_cross) * parent1
1109
+
1110
+ return offspring
1111
+
1112
+ def __repr__(self) -> str:
1113
+ return (
1114
+ f"NPointCrossover("
1115
+ f"n_points={self.n_points}, "
1116
+ f"prob={self.prob.item():.3f})"
1117
+ )