evograd-diff 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. evograd/__init__.py +67 -0
  2. evograd/algorithms/__init__.py +138 -0
  3. evograd/algorithms/cmaes.py +1365 -0
  4. evograd/algorithms/de.py +895 -0
  5. evograd/algorithms/ga.py +532 -0
  6. evograd/algorithms/pso.py +648 -0
  7. evograd/algorithms/shade.py +1165 -0
  8. evograd/benchmarks/functions/__init__.py +229 -0
  9. evograd/benchmarks/functions/base.py +217 -0
  10. evograd/benchmarks/functions/cec2017/__init__.py +250 -0
  11. evograd/benchmarks/functions/cec2017/basic.py +413 -0
  12. evograd/benchmarks/functions/cec2017/composition.py +580 -0
  13. evograd/benchmarks/functions/cec2017/data.pkl +0 -0
  14. evograd/benchmarks/functions/cec2017/data.py +350 -0
  15. evograd/benchmarks/functions/cec2017/hybrid.py +406 -0
  16. evograd/benchmarks/functions/cec2017/simple.py +326 -0
  17. evograd/benchmarks/functions/classical.py +649 -0
  18. evograd/benchmarks/functions/smoothed_funnel.py +476 -0
  19. evograd/benchmarks/functions/transforms.py +463 -0
  20. evograd/benchmarks/run_benchmark_functions.py +1208 -0
  21. evograd/core/__init__.py +73 -0
  22. evograd/core/algorithm.py +778 -0
  23. evograd/core/maximize.py +269 -0
  24. evograd/core/minimize.py +740 -0
  25. evograd/core/problem.py +444 -0
  26. evograd/core/result.py +571 -0
  27. evograd/core/termination.py +602 -0
  28. evograd/operators/__init__.py +178 -0
  29. evograd/operators/crossover.py +1117 -0
  30. evograd/operators/mutation.py +1098 -0
  31. evograd/operators/relaxations.py +175 -0
  32. evograd/operators/repair.py +601 -0
  33. evograd/operators/sampling.py +577 -0
  34. evograd/operators/selection.py +981 -0
  35. evograd/operators/survival.py +1000 -0
  36. evograd/tests/__init__.py +11 -0
  37. evograd/tests/run_all.py +78 -0
  38. evograd/tests/test_core.py +528 -0
  39. evograd/tests/test_ga.py +572 -0
  40. evograd/tests/test_operators.py +662 -0
  41. evograd/tests/test_per_individual.py +326 -0
  42. evograd/tests/test_utils.py +328 -0
  43. evograd/utils/__init__.py +97 -0
  44. evograd/utils/callbacks.py +926 -0
  45. evograd/utils/device.py +502 -0
  46. evograd/utils/duplicates.py +421 -0
  47. evograd_diff-0.1.0.dist-info/METADATA +439 -0
  48. evograd_diff-0.1.0.dist-info/RECORD +50 -0
  49. evograd_diff-0.1.0.dist-info/WHEEL +4 -0
  50. evograd_diff-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,1098 @@
1
+ """
2
+ Mutation operators for introducing variation.
3
+
4
+ This module provides mutation operators that introduce random
5
+ perturbations to individuals, promoting exploration of the search
6
+ space. All operators support both classical and differentiable
7
+ (i.e., adaptive) modes.
8
+
9
+ Available mutations:
10
+ - PolynomialMutation: Bounded polynomial mutation (GA)
11
+ - GaussianMutation: Gaussian/normal perturbation
12
+ - UniformMutation: Uniform random perturbation
13
+ - NonUniformMutation: Decreasing perturbation over time
14
+ - BoundaryMutation: Reset genes to boundary values
15
+ - NoMutation: Identity (no mutation)
16
+ - CombinedMutation: Chain multiple mutations
17
+
18
+ Differentiable Mode:
19
+ When `adaptive=True`, mutation masks use Binary-Concrete
20
+ (Gumbel-Sigmoid) relaxation, and perturbations use the
21
+ reparameterisation trick for gradient flow.
22
+
23
+ Per-Individual/Per-Gene Parameters:
24
+ All mutation operators support four parameter configurations via
25
+ optional runtime overrides in forward(). This is essential for
26
+ self-adaptive algorithms like SHADE, jDE, or self-adaptive GAs.
27
+
28
+ Configurations:
29
+ - Fixed (scalar): Same value for all individuals and genes
30
+ - Per-gene [D]: Different value per gene, same across individuals
31
+ - Per-individual [N]: Different value per individual, same across genes
32
+ - Per-gene + Per-individual [N, D]: Full matrix, different for each
33
+
34
+ Example:
35
+ >>> # SHADE-style per-individual sigma/F
36
+ >>> sigma_per_ind = torch.rand(pop_size) * 0.5 # [N]
37
+ >>> mutated = mutation(population, xl, xu, sigma=sigma_per_ind)
38
+ >>>
39
+ >>> # Per-gene mutation probability
40
+ >>> prob_per_gene = torch.rand(n_var) * 0.2 # [D]
41
+ >>> mutated = mutation(population, xl, xu, prob=prob_per_gene)
42
+
43
+ Example:
44
+ >>> from evograd.operators import PolynomialMutation
45
+ >>>
46
+ >>> # Classical mode
47
+ >>> mutation = PolynomialMutation(eta=20, prob=None) # prob=1/n_var
48
+ >>> offspring = mutation(population, xl, xu)
49
+ >>>
50
+ >>> # Differentiable mode
51
+ >>> mutation = PolynomialMutation(
52
+ ... eta=20,
53
+ ... prob=0.1,
54
+ ... adaptive=True,
55
+ ... learn_eta=True,
56
+ ... )
57
+ >>> offspring = mutation(population, xl, xu)
58
+ """
59
+
60
+ from __future__ import annotations
61
+
62
+ from abc import ABC, abstractmethod
63
+ from typing import TYPE_CHECKING, List, Optional, Union
64
+
65
+ import math
66
+ import torch
67
+ import torch.nn as nn
68
+ from torch import Tensor
69
+
70
+ from evograd.operators.relaxations import binary_concrete, expand_param
71
+
72
+ if TYPE_CHECKING:
73
+ from evograd.core.problem import Problem
74
+
75
+ __all__ = [
76
+ "Mutation",
77
+ "PolynomialMutation",
78
+ "GaussianMutation",
79
+ "UniformMutation",
80
+ "NonUniformMutation",
81
+ "BoundaryMutation",
82
+ "NoMutation",
83
+ "CombinedMutation",
84
+ ]
85
+
86
+
87
+ # =============================================================================
88
+ # Base Mutation Class
89
+ # =============================================================================
90
+
91
+ class Mutation(nn.Module, ABC):
92
+ """
93
+ Abstract base class for mutation operators.
94
+
95
+ Subclasses must implement:
96
+ - _mutate(): Apply mutation to individuals
97
+
98
+ Args:
99
+ prob: Mutation probability per gene. If None, defaults to 1/n_var.
100
+ adaptive: If True, use Binary-Concrete for soft masks.
101
+ temperature: Temperature for Binary-Concrete.
102
+ learn_temperature: If True, temperature is learnable.
103
+ learn_prob: If True, mutation probability is learnable.
104
+ n_var: Number of variables (for per-gene probability).
105
+
106
+ Per-Individual/Per-Gene Parameters:
107
+ The forward() method accepts optional parameter overrides that
108
+ support four configurations:
109
+
110
+ - scalar: Fixed value for all individuals and genes
111
+ - [D] tensor: Per-gene values (same across individuals)
112
+ - [N] tensor: Per-individual values (same across genes)
113
+ - [N, D] tensor: Full matrix (different for each individual and gene)
114
+
115
+ When an override is provided, it takes precedence over the stored
116
+ parameter. This enables self-adaptive algorithms like SHADE.
117
+ """
118
+
119
+ def __init__(
120
+ self,
121
+ prob: Optional[float] = None,
122
+ adaptive: bool = False,
123
+ temperature: float = 1.0,
124
+ learn_temperature: bool = True,
125
+ learn_prob: bool = True,
126
+ n_var: Optional[int] = None,
127
+ ) -> None:
128
+ super().__init__()
129
+
130
+ self._MIN_TEMPERATURE = 0.05
131
+ self._MAX_TEMPERATURE = 10.0
132
+
133
+ self.adaptive = adaptive
134
+ self.n_var = n_var
135
+ self._default_prob = prob is None
136
+
137
+ # Temperature parameter (log for positivity)
138
+ if learn_temperature and adaptive:
139
+ self._log_temperature = nn.Parameter(
140
+ torch.tensor(temperature).log()
141
+ )
142
+ else:
143
+ self.register_buffer(
144
+ "_log_temperature",
145
+ torch.tensor(temperature).log()
146
+ )
147
+
148
+ # Mutation probability as logits
149
+ # If prob is None, we'll compute 1/n_var at runtime
150
+ if prob is not None:
151
+ prob_logit = self._prob_to_logit(prob)
152
+ if learn_prob and adaptive:
153
+ if n_var is not None:
154
+ self.prob_logits = nn.Parameter(
155
+ torch.full((n_var,), prob_logit)
156
+ )
157
+ else:
158
+ self.prob_logits = nn.Parameter(torch.tensor(prob_logit))
159
+ else:
160
+ if n_var is not None:
161
+ self.register_buffer(
162
+ "prob_logits",
163
+ torch.full((n_var,), prob_logit)
164
+ )
165
+ else:
166
+ self.register_buffer(
167
+ "prob_logits",
168
+ torch.tensor(prob_logit)
169
+ )
170
+ else:
171
+ # Will be set dynamically based on n_var
172
+ self.prob_logits = None
173
+
174
+ @staticmethod
175
+ def _prob_to_logit(p: float, eps: float = 1e-7) -> float:
176
+ """Convert probability to logit."""
177
+ p = max(min(p, 1 - eps), eps)
178
+ return torch.logit(torch.tensor(p)).item()
179
+
180
+ @property
181
+ def temperature(self) -> Tensor:
182
+ """Current temperature value."""
183
+ return self._log_temperature.exp()
184
+
185
+ @property
186
+ def prob(self) -> Optional[Tensor]:
187
+ """Current mutation probability."""
188
+ if self.prob_logits is not None:
189
+ return torch.sigmoid(self.prob_logits)
190
+ return None
191
+
192
+ def _get_prob(self, n_var: int, device: torch.device) -> Tensor:
193
+ """Get mutation probability, computing default if needed."""
194
+ if self.prob_logits is not None:
195
+ prob = torch.sigmoid(self.prob_logits.to(device))
196
+ if prob.dim() == 0:
197
+ return prob.expand(n_var)
198
+ return prob
199
+ # Default: 1/n_var
200
+ return torch.full((n_var,), 1.0 / n_var, device=device)
201
+
202
+ def _get_prob_logits(self, n_var: int, device: torch.device) -> Tensor:
203
+ """Get probability logits, computing default if needed."""
204
+ if self.prob_logits is not None:
205
+ logits = self.prob_logits.to(device)
206
+ if logits.dim() == 0:
207
+ return logits.expand(n_var)
208
+ return logits
209
+ # Default: 1/n_var
210
+ default_prob = 1.0 / n_var
211
+ return torch.full(
212
+ (n_var,),
213
+ self._prob_to_logit(default_prob),
214
+ device=device
215
+ )
216
+
217
+ def _clamp_temperature(self):
218
+ if hasattr(self, "_log_temperature") and self._log_temperature is not None:
219
+ with torch.no_grad():
220
+ self._log_temperature.clamp_(
221
+ math.log(self._MIN_TEMPERATURE),
222
+ math.log(self._MAX_TEMPERATURE),
223
+ )
224
+
225
+ @abstractmethod
226
+ def _mutate(
227
+ self,
228
+ x: Tensor,
229
+ xl: Tensor,
230
+ xu: Tensor,
231
+ **kwargs,
232
+ ) -> Tensor:
233
+ """
234
+ Apply mutation to individuals.
235
+
236
+ Args:
237
+ x: Individuals to mutate [n_pop, n_var].
238
+ xl: Lower bounds [n_var] or scalar.
239
+ xu: Upper bounds [n_var] or scalar.
240
+ **kwargs: Optional per-individual/per-gene parameter overrides.
241
+
242
+ Returns:
243
+ Mutated individuals [n_pop, n_var].
244
+ """
245
+ pass
246
+
247
+ def forward(
248
+ self,
249
+ x: Tensor,
250
+ xl: Optional[Tensor] = None,
251
+ xu: Optional[Tensor] = None,
252
+ problem: Optional["Problem"] = None,
253
+ **kwargs,
254
+ ) -> Tensor:
255
+ """
256
+ Apply mutation.
257
+
258
+ Args:
259
+ x: Individuals to mutate [n_pop, n_var].
260
+ xl: Lower bounds (or provide problem).
261
+ xu: Upper bounds (or provide problem).
262
+ problem: Problem instance with bounds.
263
+ **kwargs: Optional parameter overrides for per-individual or
264
+ per-gene operation. Supported kwargs depend on the specific
265
+ mutation operator (e.g., eta, sigma, prob).
266
+
267
+ Each parameter can be:
268
+ - scalar: Fixed value for all
269
+ - [D] tensor: Per-gene values
270
+ - [N] tensor: Per-individual values
271
+ - [N, D] tensor: Full matrix
272
+
273
+ Returns:
274
+ Mutated individuals [n_pop, n_var].
275
+
276
+ Example:
277
+ >>> # Standard call (uses stored parameters)
278
+ >>> mutated = mutation(population, xl, xu)
279
+ >>>
280
+ >>> # Per-individual sigma override (for SHADE)
281
+ >>> mutated = mutation(population, xl, xu, sigma=sigma_per_individual)
282
+ >>>
283
+ >>> # Per-gene eta override
284
+ >>> mutated = mutation(population, xl, xu, eta=eta_per_gene)
285
+ """
286
+ # Get bounds from problem if provided
287
+ if problem is not None:
288
+ xl = problem.xl
289
+ xu = problem.xu
290
+
291
+ # Default bounds if not provided
292
+ if xl is None:
293
+ xl = torch.zeros(x.shape[-1], device=x.device, dtype=x.dtype)
294
+ if xu is None:
295
+ xu = torch.ones(x.shape[-1], device=x.device, dtype=x.dtype)
296
+
297
+ self._clamp_temperature()
298
+ return self._mutate(x, xl, xu, **kwargs)
299
+
300
+ # Note: Do NOT override __call__. nn.Module.__call__ dispatches to
301
+ # forward() and fires registered hooks (forward_pre_hooks, forward_hooks,
302
+ # and the autograd profiler). Overriding __call__ would bypass all of these.
303
+
304
+
305
+ # =============================================================================
306
+ # Polynomial Mutation
307
+ # =============================================================================
308
+
309
+ class PolynomialMutation(Mutation):
310
+ """
311
+ Polynomial mutation for real-coded GAs.
312
+
313
+ Applies a polynomial perturbation to selected genes, with the
314
+ perturbation bounded by the variable bounds. The distribution
315
+ index eta controls the spread of mutations.
316
+
317
+ Higher eta values produce mutations closer to the original
318
+ value (more exploitation), while lower values produce more
319
+ spread (more exploration).
320
+
321
+ Args:
322
+ eta: Distribution index (higher = smaller perturbations).
323
+ prob: Mutation probability per gene. If None, defaults to 1/n_var.
324
+ adaptive: If True, use Binary-Concrete masks.
325
+ temperature: Temperature for Binary-Concrete.
326
+ learn_eta: If True, eta is learnable.
327
+ learn_prob: If True, mutation probability is learnable.
328
+ n_var: Number of variables.
329
+
330
+ Per-Individual/Per-Gene Parameters:
331
+ The forward() method accepts optional overrides:
332
+ - eta: Distribution index [scalar, D, N, or N×D]
333
+ - prob: Mutation probability [scalar, D, N, or N×D]
334
+
335
+ Example:
336
+ >>> mutation = PolynomialMutation(eta=20)
337
+ >>> mutated = mutation(population, xl, xu)
338
+ >>>
339
+ >>> # Per-individual eta (for self-adaptive GA)
340
+ >>> eta_per_ind = torch.rand(pop_size) * 20 + 5 # [N]
341
+ >>> mutated = mutation(population, xl, xu, eta=eta_per_ind)
342
+
343
+ Reference:
344
+ Deb & Deb (2014). Analysing Mutation Schemes for
345
+ Real-Parameter Genetic Algorithms.
346
+ """
347
+
348
+ def __init__(
349
+ self,
350
+ eta: float = 20.0,
351
+ prob: Optional[float] = None,
352
+ adaptive: bool = False,
353
+ temperature: float = 1.0,
354
+ learn_eta: bool = True,
355
+ learn_prob: bool = True,
356
+ n_var: Optional[int] = None,
357
+ ) -> None:
358
+ super().__init__(
359
+ prob=prob,
360
+ adaptive=adaptive,
361
+ temperature=temperature,
362
+ learn_temperature=True,
363
+ learn_prob=learn_prob,
364
+ n_var=n_var,
365
+ )
366
+
367
+ # Eta parameter (log for positivity)
368
+ if learn_eta and adaptive:
369
+ self._log_eta = nn.Parameter(torch.tensor(eta).log())
370
+ else:
371
+ self.register_buffer("_log_eta", torch.tensor(eta).log())
372
+
373
+ @property
374
+ def eta(self) -> Tensor:
375
+ """Current eta value."""
376
+ return self._log_eta.exp()
377
+
378
+ def _mutate(
379
+ self,
380
+ x: Tensor,
381
+ xl: Tensor,
382
+ xu: Tensor,
383
+ eta: Optional[Tensor] = None,
384
+ prob: Optional[Tensor] = None,
385
+ **kwargs,
386
+ ) -> Tensor:
387
+ """
388
+ Apply polynomial mutation.
389
+
390
+ Args:
391
+ x: Individuals to mutate [N, D].
392
+ xl: Lower bounds [D] or scalar.
393
+ xu: Upper bounds [D] or scalar.
394
+ eta: Optional distribution index override [scalar, D, N, or N×D].
395
+ prob: Optional mutation probability override [scalar, D, N, or N×D].
396
+
397
+ Returns:
398
+ Mutated individuals [N, D].
399
+ """
400
+ n_pop, n_var = x.shape
401
+ device = x.device
402
+ dtype = x.dtype
403
+
404
+ # Ensure bounds are tensors with correct shape
405
+ if xl.dim() == 0:
406
+ xl = xl.expand(n_var)
407
+ if xu.dim() == 0:
408
+ xu = xu.expand(n_var)
409
+
410
+ # Expand eta to [N, D]
411
+ eta_expanded = expand_param(eta, self.eta, n_pop, n_var, device, dtype)
412
+
413
+ # Expand prob to [N, D]
414
+ default_prob = self._get_prob(n_var, device)
415
+ prob_expanded = expand_param(prob, default_prob, n_pop, n_var, device, dtype)
416
+
417
+ # Get mutation mask
418
+ if self.adaptive:
419
+ prob_logits = torch.logit(prob_expanded.clamp(1e-7, 1 - 1e-7))
420
+ mask = binary_concrete(
421
+ prob_logits,
422
+ temperature=self.temperature # Pass temperature
423
+ )
424
+ else:
425
+ mask = (torch.rand(n_pop, n_var, device=device) < prob_expanded).float()
426
+
427
+ # Compute polynomial perturbation
428
+ u = torch.rand(n_pop, n_var, device=device, dtype=dtype)
429
+
430
+ # Polynomial distribution
431
+ mut_pow = 1.0 / (eta_expanded + 1.0)
432
+
433
+ delta = torch.where(
434
+ u < 0.5,
435
+ (2.0 * u).pow(mut_pow) - 1.0,
436
+ 1.0 - (2.0 * (1.0 - u)).pow(mut_pow)
437
+ )
438
+
439
+ # Scale by bounds range
440
+ range_val = xu - xl
441
+ perturbation = delta * range_val
442
+
443
+ # Apply mutation with mask
444
+ y = x + mask * perturbation
445
+
446
+ return y
447
+
448
+ def __repr__(self) -> str:
449
+ prob_str = f"{self.prob.mean().item():.3f}" if self.prob is not None else "1/n_var"
450
+ return (
451
+ f"PolynomialMutation("
452
+ f"eta={self.eta.item():.2f}, "
453
+ f"prob={prob_str}, "
454
+ f"adaptive={self.adaptive})"
455
+ )
456
+
457
+
458
+ # =============================================================================
459
+ # Gaussian Mutation
460
+ # =============================================================================
461
+
462
+ class GaussianMutation(Mutation):
463
+ """
464
+ Gaussian (normal) mutation.
465
+
466
+ Adds Gaussian noise to selected genes. The standard deviation
467
+ can be specified as a fixed value or as a fraction of the
468
+ variable range.
469
+
470
+ Args:
471
+ sigma: Standard deviation of Gaussian noise.
472
+ sigma_frac: Sigma as fraction of range (alternative to sigma).
473
+ If both provided, sigma takes precedence.
474
+ prob: Mutation probability per gene. If None, defaults to 1/n_var.
475
+ adaptive: If True, use reparameterisation trick.
476
+ temperature: Temperature for Binary-Concrete mask.
477
+ learn_sigma: If True, sigma is learnable.
478
+ learn_prob: If True, mutation probability is learnable.
479
+ n_var: Number of variables.
480
+
481
+ Per-Individual/Per-Gene Parameters:
482
+ The forward() method accepts optional overrides:
483
+ - sigma: Standard deviation [scalar, D, N, or N×D]
484
+ - prob: Mutation probability [scalar, D, N, or N×D]
485
+
486
+ This is essential for SHADE where each individual has its
487
+ own F (scale factor) that can be used as sigma.
488
+
489
+ Example:
490
+ >>> # Fixed sigma
491
+ >>> mutation = GaussianMutation(sigma=0.1)
492
+ >>>
493
+ >>> # Sigma as fraction of range
494
+ >>> mutation = GaussianMutation(sigma_frac=0.1) # sigma = 0.1 * (xu - xl)
495
+ >>>
496
+ >>> # Per-individual sigma (for SHADE/DE)
497
+ >>> F_per_ind = torch.rand(pop_size) * 0.5 + 0.5 # [N]
498
+ >>> mutated = mutation(population, xl, xu, sigma=F_per_ind)
499
+ """
500
+
501
+ def __init__(
502
+ self,
503
+ sigma: Optional[float] = None,
504
+ sigma_frac: float = 0.1,
505
+ prob: Optional[float] = None,
506
+ adaptive: bool = False,
507
+ temperature: float = 1.0,
508
+ learn_sigma: bool = True,
509
+ learn_prob: bool = True,
510
+ n_var: Optional[int] = None,
511
+ ) -> None:
512
+ super().__init__(
513
+ prob=prob,
514
+ adaptive=adaptive,
515
+ temperature=temperature,
516
+ learn_temperature=True,
517
+ learn_prob=learn_prob,
518
+ n_var=n_var,
519
+ )
520
+
521
+ self._use_frac = sigma is None
522
+
523
+ # Sigma parameter (log for positivity)
524
+ sigma_val = sigma if sigma is not None else sigma_frac
525
+ if learn_sigma and adaptive:
526
+ self._log_sigma = nn.Parameter(torch.tensor(sigma_val).log())
527
+ else:
528
+ self.register_buffer("_log_sigma", torch.tensor(sigma_val).log())
529
+
530
+ @property
531
+ def sigma(self) -> Tensor:
532
+ """Current sigma value."""
533
+ return self._log_sigma.exp()
534
+
535
+ def _mutate(
536
+ self,
537
+ x: Tensor,
538
+ xl: Tensor,
539
+ xu: Tensor,
540
+ sigma: Optional[Tensor] = None,
541
+ prob: Optional[Tensor] = None,
542
+ **kwargs,
543
+ ) -> Tensor:
544
+ """
545
+ Apply Gaussian mutation.
546
+
547
+ Args:
548
+ x: Individuals to mutate [N, D].
549
+ xl: Lower bounds [D] or scalar.
550
+ xu: Upper bounds [D] or scalar.
551
+ sigma: Optional standard deviation override [scalar, D, N, or N×D].
552
+ prob: Optional mutation probability override [scalar, D, N, or N×D].
553
+
554
+ Returns:
555
+ Mutated individuals [N, D].
556
+ """
557
+ n_pop, n_var = x.shape
558
+ device = x.device
559
+ dtype = x.dtype
560
+
561
+ # Ensure bounds are tensors
562
+ if xl.dim() == 0:
563
+ xl = xl.expand(n_var)
564
+ if xu.dim() == 0:
565
+ xu = xu.expand(n_var)
566
+
567
+ # Expand prob to [N, D]
568
+ default_prob = self._get_prob(n_var, device)
569
+ prob_expanded = expand_param(prob, default_prob, n_pop, n_var, device, dtype)
570
+
571
+ # Get mutation mask
572
+ if self.adaptive:
573
+ prob_logits = torch.logit(prob_expanded.clamp(1e-7, 1 - 1e-7))
574
+ mask = binary_concrete(
575
+ prob_logits,
576
+ temperature=self.temperature # Pass temperature
577
+ )
578
+ else:
579
+ mask = (torch.rand(n_pop, n_var, device=device) < prob_expanded).float()
580
+
581
+ # Compute sigma (possibly scaled by range)
582
+ if sigma is not None:
583
+ # Use provided sigma
584
+ sigma_expanded = expand_param(sigma, self.sigma, n_pop, n_var, device, dtype)
585
+ if self._use_frac:
586
+ sigma_expanded = sigma_expanded * (xu - xl)
587
+ else:
588
+ # Use stored sigma
589
+ if self._use_frac:
590
+ sigma_expanded = self.sigma * (xu - xl)
591
+ sigma_expanded = sigma_expanded.unsqueeze(0).expand(n_pop, -1)
592
+ else:
593
+ sigma_expanded = expand_param(None, self.sigma, n_pop, n_var, device, dtype)
594
+
595
+ # Gaussian noise (reparameterised)
596
+ noise = torch.randn(n_pop, n_var, device=device, dtype=dtype) * sigma_expanded
597
+
598
+ # Apply mutation with mask
599
+ y = x + mask * noise
600
+
601
+ return y
602
+
603
+ def __repr__(self) -> str:
604
+ prob_str = f"{self.prob.mean().item():.3f}" if self.prob is not None else "1/n_var"
605
+ sigma_type = "frac" if self._use_frac else "fixed"
606
+ return (
607
+ f"GaussianMutation("
608
+ f"sigma={self.sigma.item():.4f} ({sigma_type}), "
609
+ f"prob={prob_str}, "
610
+ f"adaptive={self.adaptive})"
611
+ )
612
+
613
+
614
+ # =============================================================================
615
+ # Uniform Mutation
616
+ # =============================================================================
617
+
618
+ class UniformMutation(Mutation):
619
+ """
620
+ Uniform mutation.
621
+
622
+ Replaces selected genes with uniformly random values within
623
+ the variable bounds. This is a more disruptive mutation than
624
+ Gaussian or polynomial.
625
+
626
+ Args:
627
+ prob: Mutation probability per gene. If None, defaults to 1/n_var.
628
+ adaptive: If True, use Binary-Concrete masks.
629
+ temperature: Temperature for Binary-Concrete.
630
+ learn_prob: If True, mutation probability is learnable.
631
+ n_var: Number of variables.
632
+
633
+ Per-Individual/Per-Gene Parameters:
634
+ The forward() method accepts optional overrides:
635
+ - prob: Mutation probability [scalar, D, N, or N×D]
636
+
637
+ Example:
638
+ >>> mutation = UniformMutation(prob=0.05)
639
+ >>> mutated = mutation(population, xl, xu)
640
+ """
641
+
642
+ def __init__(
643
+ self,
644
+ prob: Optional[float] = None,
645
+ adaptive: bool = False,
646
+ temperature: float = 1.0,
647
+ learn_prob: bool = True,
648
+ n_var: Optional[int] = None,
649
+ ) -> None:
650
+ super().__init__(
651
+ prob=prob,
652
+ adaptive=adaptive,
653
+ temperature=temperature,
654
+ learn_temperature=True,
655
+ learn_prob=learn_prob,
656
+ n_var=n_var,
657
+ )
658
+
659
+ def _mutate(
660
+ self,
661
+ x: Tensor,
662
+ xl: Tensor,
663
+ xu: Tensor,
664
+ prob: Optional[Tensor] = None,
665
+ **kwargs,
666
+ ) -> Tensor:
667
+ """
668
+ Apply uniform mutation.
669
+
670
+ Args:
671
+ x: Individuals to mutate [N, D].
672
+ xl: Lower bounds [D] or scalar.
673
+ xu: Upper bounds [D] or scalar.
674
+ prob: Optional mutation probability override [scalar, D, N, or N×D].
675
+
676
+ Returns:
677
+ Mutated individuals [N, D].
678
+ """
679
+ n_pop, n_var = x.shape
680
+ device = x.device
681
+ dtype = x.dtype
682
+
683
+ # Ensure bounds are tensors
684
+ if xl.dim() == 0:
685
+ xl = xl.expand(n_var)
686
+ if xu.dim() == 0:
687
+ xu = xu.expand(n_var)
688
+
689
+ # Expand prob to [N, D]
690
+ default_prob = self._get_prob(n_var, device)
691
+ prob_expanded = expand_param(prob, default_prob, n_pop, n_var, device, dtype)
692
+
693
+ # Get mutation mask
694
+ if self.adaptive:
695
+ prob_logits = torch.logit(prob_expanded.clamp(1e-7, 1 - 1e-7))
696
+ mask = binary_concrete(
697
+ prob_logits,
698
+ temperature=self.temperature # Pass temperature
699
+ )
700
+ else:
701
+ mask = (torch.rand(n_pop, n_var, device=device) < prob_expanded).float()
702
+
703
+ # Random values within bounds
704
+ random_vals = xl + (xu - xl) * torch.rand(n_pop, n_var, device=device, dtype=dtype)
705
+
706
+ # Apply mutation with mask
707
+ y = mask * random_vals + (1.0 - mask) * x
708
+
709
+ return y
710
+
711
+ def __repr__(self) -> str:
712
+ prob_str = f"{self.prob.mean().item():.3f}" if self.prob is not None else "1/n_var"
713
+ return f"UniformMutation(prob={prob_str})"
714
+
715
+
716
+ # =============================================================================
717
+ # Non-Uniform Mutation
718
+ # =============================================================================
719
+
720
+ class NonUniformMutation(Mutation):
721
+ """
722
+ Non-uniform mutation with decreasing perturbation.
723
+
724
+ The perturbation magnitude decreases over generations, allowing
725
+ large exploration early and fine-tuning later. Uses the formula:
726
+ delta = (xu - x) * (1 - r^((1 - t/T)^b)) if coin flip
727
+ delta = (x - xl) * (1 - r^((1 - t/T)^b)) otherwise
728
+
729
+ where t is current generation, T is max generations, r is random,
730
+ and b controls the decay rate.
731
+
732
+ Args:
733
+ max_generations: Maximum number of generations (T).
734
+ b: Shape parameter controlling decay (higher = faster decay).
735
+ prob: Mutation probability per gene.
736
+ adaptive: If True, use differentiable operations.
737
+ learn_b: If True, b is learnable.
738
+
739
+ Per-Individual/Per-Gene Parameters:
740
+ The forward() method accepts optional overrides:
741
+ - prob: Mutation probability [scalar, D, N, or N×D]
742
+ - progress: Progress ratio t/T override [scalar or N]
743
+
744
+ Example:
745
+ >>> mutation = NonUniformMutation(max_generations=500, b=5.0)
746
+ >>> mutation.set_generation(100)
747
+ >>> mutated = mutation(population, xl, xu)
748
+ >>>
749
+ >>> # Per-individual progress (for heterogeneous adaptation)
750
+ >>> progress_per_ind = torch.rand(pop_size) # [N]
751
+ >>> mutated = mutation(population, xl, xu, progress=progress_per_ind)
752
+
753
+ Reference:
754
+ Michalewicz (1996). Genetic Algorithms + Data Structures =
755
+ Evolution Programs.
756
+ """
757
+
758
+ def __init__(
759
+ self,
760
+ max_generations: int = 500,
761
+ b: float = 5.0,
762
+ prob: Optional[float] = None,
763
+ adaptive: bool = False,
764
+ learn_b: bool = True,
765
+ ) -> None:
766
+ super().__init__(
767
+ prob=prob,
768
+ adaptive=adaptive,
769
+ temperature=1.0,
770
+ learn_temperature=False,
771
+ learn_prob=False,
772
+ n_var=None,
773
+ )
774
+
775
+ self.max_generations = max_generations
776
+
777
+ # Current generation (updated externally)
778
+ self.register_buffer("_generation", torch.tensor(0))
779
+
780
+ # B parameter (log for positivity)
781
+ if learn_b and adaptive:
782
+ self._log_b = nn.Parameter(torch.tensor(b).log())
783
+ else:
784
+ self.register_buffer("_log_b", torch.tensor(b).log())
785
+
786
+ @property
787
+ def b(self) -> Tensor:
788
+ """Current b value."""
789
+ return self._log_b.exp()
790
+
791
+ @property
792
+ def generation(self) -> int:
793
+ """Current generation."""
794
+ return self._generation.item()
795
+
796
+ def set_generation(self, gen: int) -> None:
797
+ """Set current generation."""
798
+ self._generation.fill_(gen)
799
+
800
+ def _mutate(
801
+ self,
802
+ x: Tensor,
803
+ xl: Tensor,
804
+ xu: Tensor,
805
+ prob: Optional[Tensor] = None,
806
+ progress: Optional[Tensor] = None,
807
+ **kwargs,
808
+ ) -> Tensor:
809
+ """
810
+ Apply non-uniform mutation.
811
+
812
+ Args:
813
+ x: Individuals to mutate [N, D].
814
+ xl: Lower bounds [D] or scalar.
815
+ xu: Upper bounds [D] or scalar.
816
+ prob: Optional mutation probability override [scalar, D, N, or N×D].
817
+ progress: Optional progress ratio (t/T) override [scalar or N].
818
+
819
+ Returns:
820
+ Mutated individuals [N, D].
821
+ """
822
+ n_pop, n_var = x.shape
823
+ device = x.device
824
+ dtype = x.dtype
825
+
826
+ # Ensure bounds are tensors
827
+ if xl.dim() == 0:
828
+ xl = xl.expand(n_var)
829
+ if xu.dim() == 0:
830
+ xu = xu.expand(n_var)
831
+
832
+ # Expand prob to [N, D]
833
+ default_prob = self._get_prob(n_var, device)
834
+ prob_expanded = expand_param(prob, default_prob, n_pop, n_var, device, dtype)
835
+
836
+ # Get mutation mask
837
+ if self.adaptive:
838
+ prob_logits = torch.logit(prob_expanded.clamp(1e-7, 1 - 1e-7))
839
+ mask = binary_concrete(
840
+ prob_logits,
841
+ temperature=self.temperature # Pass temperature
842
+ )
843
+ else:
844
+ mask = (torch.rand(n_pop, n_var, device=device) < prob_expanded).float()
845
+
846
+ # Compute progress ratio
847
+ if progress is not None:
848
+ if isinstance(progress, Tensor):
849
+ t_ratio = progress.to(device=device, dtype=dtype)
850
+ if t_ratio.dim() == 0:
851
+ t_ratio = t_ratio.expand(n_pop)
852
+ else:
853
+ t_ratio = torch.full((n_pop,), progress, device=device, dtype=dtype)
854
+ else:
855
+ t_ratio = torch.full(
856
+ (n_pop,),
857
+ self.generation / max(self.max_generations, 1),
858
+ device=device,
859
+ dtype=dtype
860
+ )
861
+
862
+ # Expand t_ratio to [N, D]
863
+ t_ratio = t_ratio.unsqueeze(1).expand(-1, n_var)
864
+
865
+ # Compute non-uniform delta
866
+ r = torch.rand(n_pop, n_var, device=device, dtype=dtype)
867
+
868
+ # decay = (1 - t/T)^b
869
+ decay = (1.0 - t_ratio).pow(self.b)
870
+
871
+ # delta factor = 1 - r^decay
872
+ delta_factor = 1.0 - r.pow(decay)
873
+
874
+ # Direction (coin flip per gene)
875
+ direction = (torch.rand(n_pop, n_var, device=device) < 0.5).float()
876
+
877
+ # Compute perturbation
878
+ delta_up = (xu - x) * delta_factor
879
+ delta_down = (x - xl) * delta_factor
880
+ delta = direction * delta_up - (1.0 - direction) * delta_down
881
+
882
+ # Apply mutation with mask
883
+ y = x + mask * delta
884
+
885
+ return y
886
+
887
+ def __repr__(self) -> str:
888
+ prob_str = f"{self.prob.mean().item():.3f}" if self.prob is not None else "1/n_var"
889
+ return (
890
+ f"NonUniformMutation("
891
+ f"b={self.b.item():.2f}, "
892
+ f"prob={prob_str}, "
893
+ f"max_gen={self.max_generations})"
894
+ )
895
+
896
+
897
+ # =============================================================================
898
+ # Boundary Mutation
899
+ # =============================================================================
900
+
901
+ class BoundaryMutation(Mutation):
902
+ """
903
+ Boundary mutation.
904
+
905
+ Resets selected genes to either the lower or upper bound
906
+ (chosen randomly). Useful for exploring boundary regions
907
+ of the search space.
908
+
909
+ Args:
910
+ prob: Mutation probability per gene. If None, defaults to 1/n_var.
911
+ adaptive: If True, use Binary-Concrete masks.
912
+ temperature: Temperature for Binary-Concrete.
913
+ learn_prob: If True, mutation probability is learnable.
914
+
915
+ Per-Individual/Per-Gene Parameters:
916
+ The forward() method accepts optional overrides:
917
+ - prob: Mutation probability [scalar, D, N, or N×D]
918
+
919
+ Example:
920
+ >>> mutation = BoundaryMutation(prob=0.01)
921
+ >>> mutated = mutation(population, xl, xu)
922
+ """
923
+
924
+ def __init__(
925
+ self,
926
+ prob: Optional[float] = None,
927
+ adaptive: bool = False,
928
+ temperature: float = 1.0,
929
+ learn_prob: bool = True,
930
+ ) -> None:
931
+ super().__init__(
932
+ prob=prob,
933
+ adaptive=adaptive,
934
+ temperature=temperature,
935
+ learn_temperature=True,
936
+ learn_prob=learn_prob,
937
+ n_var=None,
938
+ )
939
+
940
+ def _mutate(
941
+ self,
942
+ x: Tensor,
943
+ xl: Tensor,
944
+ xu: Tensor,
945
+ prob: Optional[Tensor] = None,
946
+ **kwargs,
947
+ ) -> Tensor:
948
+ """
949
+ Apply boundary mutation.
950
+
951
+ Args:
952
+ x: Individuals to mutate [N, D].
953
+ xl: Lower bounds [D] or scalar.
954
+ xu: Upper bounds [D] or scalar.
955
+ prob: Optional mutation probability override [scalar, D, N, or N×D].
956
+
957
+ Returns:
958
+ Mutated individuals [N, D].
959
+ """
960
+ n_pop, n_var = x.shape
961
+ device = x.device
962
+ dtype = x.dtype
963
+
964
+ # Ensure bounds are tensors
965
+ if xl.dim() == 0:
966
+ xl = xl.expand(n_var)
967
+ if xu.dim() == 0:
968
+ xu = xu.expand(n_var)
969
+
970
+ # Expand prob to [N, D]
971
+ default_prob = self._get_prob(n_var, device)
972
+ prob_expanded = expand_param(prob, default_prob, n_pop, n_var, device, dtype)
973
+
974
+ # Get mutation mask
975
+ if self.adaptive:
976
+ prob_logits = torch.logit(prob_expanded.clamp(1e-7, 1 - 1e-7))
977
+ mask = binary_concrete(
978
+ prob_logits,
979
+ temperature=self.temperature # Pass temperature
980
+ )
981
+ else:
982
+ mask = (torch.rand(n_pop, n_var, device=device) < prob_expanded).float()
983
+
984
+ # Choose lower or upper bound randomly
985
+ use_upper = (torch.rand(n_pop, n_var, device=device) < 0.5).float()
986
+ boundary_vals = use_upper * xu + (1.0 - use_upper) * xl
987
+
988
+ # Apply mutation with mask
989
+ y = mask * boundary_vals + (1.0 - mask) * x
990
+
991
+ return y
992
+
993
+ def __repr__(self) -> str:
994
+ prob_str = f"{self.prob.mean().item():.3f}" if self.prob is not None else "1/n_var"
995
+ return f"BoundaryMutation(prob={prob_str})"
996
+
997
+
998
+ # =============================================================================
999
+ # No Mutation (Identity)
1000
+ # =============================================================================
1001
+
1002
+ class NoMutation(Mutation):
1003
+ """
1004
+ No mutation (identity operator).
1005
+
1006
+ Returns input unchanged. Useful as a placeholder or when
1007
+ mutation should be disabled.
1008
+
1009
+ Example:
1010
+ >>> mutation = NoMutation()
1011
+ >>> mutated = mutation(population, xl, xu) # Returns unchanged
1012
+ """
1013
+
1014
+ def __init__(self) -> None:
1015
+ super().__init__(
1016
+ prob=0.0,
1017
+ adaptive=False,
1018
+ temperature=1.0,
1019
+ learn_temperature=False,
1020
+ learn_prob=False,
1021
+ n_var=None,
1022
+ )
1023
+
1024
+ def _mutate(
1025
+ self,
1026
+ x: Tensor,
1027
+ xl: Tensor,
1028
+ xu: Tensor,
1029
+ **kwargs,
1030
+ ) -> Tensor:
1031
+ return x
1032
+
1033
+ def __repr__(self) -> str:
1034
+ return "NoMutation()"
1035
+
1036
+
1037
+ # =============================================================================
1038
+ # Combined Mutation
1039
+ # =============================================================================
1040
+
1041
+ class CombinedMutation(Mutation):
1042
+ """
1043
+ Combined mutation applying multiple operators sequentially.
1044
+
1045
+ Chains multiple mutation operators together, applying them
1046
+ in sequence to the population.
1047
+
1048
+ Args:
1049
+ mutations: List of mutation operators to chain.
1050
+
1051
+ Example:
1052
+ >>> combined = CombinedMutation([
1053
+ ... GaussianMutation(sigma=0.1, prob=0.5),
1054
+ ... PolynomialMutation(eta=20, prob=0.1),
1055
+ ... ])
1056
+ >>> mutated = combined(population, xl, xu)
1057
+
1058
+ Note:
1059
+ Per-individual parameters are NOT propagated to child
1060
+ operators. Use individual operators directly for per-
1061
+ individual control.
1062
+ """
1063
+
1064
+ def __init__(
1065
+ self,
1066
+ mutations: List[Mutation],
1067
+ ) -> None:
1068
+ super().__init__(
1069
+ prob=1.0,
1070
+ adaptive=False,
1071
+ temperature=1.0,
1072
+ learn_temperature=False,
1073
+ learn_prob=False,
1074
+ n_var=None,
1075
+ )
1076
+
1077
+ self.mutations = nn.ModuleList(mutations)
1078
+
1079
+ def _mutate(
1080
+ self,
1081
+ x: Tensor,
1082
+ xl: Tensor,
1083
+ xu: Tensor,
1084
+ **kwargs,
1085
+ ) -> Tensor:
1086
+ """
1087
+ Apply all mutations sequentially.
1088
+
1089
+ Note: kwargs are NOT passed to child operators.
1090
+ """
1091
+ y = x
1092
+ for mut in self.mutations:
1093
+ y = mut(y, xl, xu)
1094
+ return y
1095
+
1096
+ def __repr__(self) -> str:
1097
+ muts_str = ", ".join(repr(m) for m in self.mutations)
1098
+ return f"CombinedMutation([{muts_str}])"