evograd-diff 0.1.1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/PKG-INFO +99 -18
  2. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/README.md +98 -17
  3. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/__init__.py +1 -1
  4. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/core/algorithm.py +53 -9
  5. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/core/maximize.py +4 -0
  6. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/core/minimize.py +19 -3
  7. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/pyproject.toml +12 -1
  8. evograd_diff-0.1.1/.claude/settings.local.json +0 -16
  9. evograd_diff-0.1.1/.python-version +0 -1
  10. evograd_diff-0.1.1/Test_new_evograd.ipynb +0 -31427
  11. evograd_diff-0.1.1/plot_benchmarks.py +0 -896
  12. evograd_diff-0.1.1/uv.lock +0 -1014
  13. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/.gitignore +0 -0
  14. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/LICENSE +0 -0
  15. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/algorithms/__init__.py +0 -0
  16. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/algorithms/cmaes.py +0 -0
  17. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/algorithms/de.py +0 -0
  18. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/algorithms/ga.py +0 -0
  19. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/algorithms/pso.py +0 -0
  20. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/algorithms/shade.py +0 -0
  21. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/benchmarks/functions/__init__.py +0 -0
  22. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/benchmarks/functions/base.py +0 -0
  23. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/__init__.py +0 -0
  24. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/basic.py +0 -0
  25. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/composition.py +0 -0
  26. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/data.pkl +0 -0
  27. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/data.py +0 -0
  28. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/hybrid.py +0 -0
  29. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/simple.py +0 -0
  30. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/benchmarks/functions/classical.py +0 -0
  31. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/benchmarks/functions/smoothed_funnel.py +0 -0
  32. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/benchmarks/functions/transforms.py +0 -0
  33. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/benchmarks/run_benchmark_functions.py +0 -0
  34. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/core/__init__.py +0 -0
  35. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/core/problem.py +0 -0
  36. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/core/result.py +0 -0
  37. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/core/termination.py +0 -0
  38. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/operators/__init__.py +0 -0
  39. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/operators/crossover.py +0 -0
  40. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/operators/mutation.py +0 -0
  41. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/operators/relaxations.py +0 -0
  42. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/operators/repair.py +0 -0
  43. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/operators/sampling.py +0 -0
  44. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/operators/selection.py +0 -0
  45. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/operators/survival.py +0 -0
  46. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/tests/__init__.py +0 -0
  47. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/tests/run_all.py +0 -0
  48. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/tests/test_cmaes.py +0 -0
  49. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/tests/test_core.py +0 -0
  50. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/tests/test_ga.py +0 -0
  51. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/tests/test_operators.py +0 -0
  52. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/tests/test_per_individual.py +0 -0
  53. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/tests/test_utils.py +0 -0
  54. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/utils/__init__.py +0 -0
  55. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/utils/callbacks.py +0 -0
  56. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/utils/device.py +0 -0
  57. {evograd_diff-0.1.1 → evograd_diff-0.2.0}/evograd/utils/duplicates.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evograd-diff
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: PyTorch-based framework for differentiable evolutionary computation and swarm intelligence
5
5
  Project-URL: Homepage, https://github.com/andreatangherloni/EvoGrad
6
6
  Project-URL: Repository, https://github.com/andreatangherloni/EvoGrad
@@ -25,15 +25,18 @@ Requires-Dist: seaborn>=0.13.2
25
25
  Requires-Dist: torch>=2.11.0
26
26
  Description-Content-Type: text/markdown
27
27
 
28
- # EvoGrad: Metaheuristics in a Differentiable Wonderland
28
+ # EvoGrad: Accelerated Metaheuristics in a Differentiable Wonderland
29
29
 
30
30
  <p align="center">
31
31
  <img src="https://img.shields.io/badge/python-3.9+-blue.svg" alt="Python 3.9+">
32
32
  <img src="https://img.shields.io/badge/pytorch-2.0+-orange.svg" alt="PyTorch 2.0+">
33
33
  <img src="https://img.shields.io/badge/license-Apache%20License%202.0-blue.svg" alt="License: Apache-2.0">
34
+ <img src="https://img.shields.io/badge/IEEE%20CEC-2026-success.svg" alt="IEEE CEC 2026">
34
35
 
35
36
  </p>
36
37
 
38
+ > 🎉 **EvoGrad has been accepted at [IEEE CEC 2026](#-citation)!**
39
+
37
40
  **EvoGrad** is a PyTorch-based framework for differentiable Evolutionary Computation and Swarm Intelligence. It bridges classical population-based optimisation with modern differentiable programming by enabling gradient flow through evolutionary operators.
38
41
 
39
42
  ## 🌟 Key Features
@@ -122,10 +125,10 @@ ga = GA(pop_size=100, differentiable=False)
122
125
  # Fully differentiable GA with custom operators
123
126
  ga = GA(
124
127
  pop_size=100,
125
- selection=RouletteSelection(differentiable=True, learn_temperature=True),
126
- crossover=SBXCrossover(differentiable=True, learn_eta=True, learn_prob=True),
127
- mutation=PolynomialMutation(differentiable=True, learn_eta=True, learn_prob=True),
128
- survival=MergeSurvival(selection=RouletteSelection(differentiable=True)),
128
+ selection=RouletteSelection(adaptive=True, learn_temperature=True),
129
+ crossover=SBXCrossover(adaptive=True, learn_eta=True, learn_prob=True),
130
+ mutation=PolynomialMutation(adaptive=True, learn_eta=True, learn_prob=True),
131
+ survival=MergeSurvival(elitism=True, adaptive=True),
129
132
  differentiable=True, # Makes population learnable
130
133
  )
131
134
  ```
@@ -134,7 +137,7 @@ ga = GA(
134
137
  |-----------|--------|
135
138
  | `differentiable=False` | Classical GA with discrete operators |
136
139
  | `differentiable=True` | Population is an `nn.Parameter` (learnable via backprop) |
137
- | Operator `differentiable=True` | Operator uses Gumbel-Softmax/Binary-Concrete for gradient flow |
140
+ | Operator `adaptive=True` | Operator uses Gumbel-Softmax/Binary-Concrete for gradient flow |
138
141
  | Operator `learn_*=True` | Operator hyperparameters become learnable `nn.Parameter` |
139
142
 
140
143
  ### Differential Evolution (DE)
@@ -175,10 +178,10 @@ de = DE(pop_size=100, variant="DE/current-to-best/1/bin", adaptive=True, differe
175
178
  PSO uses the same **algorithm-level flags** as DE:
176
179
 
177
180
  ```python
178
- from evograd.algorithms import PSO, pso_constriction, pso_adaptive
181
+ from evograd.algorithms import PSO, pso_constriction, pso_default
179
182
 
180
183
  # Classical PSO
181
- pso = PSO(pop_size=100, inertia=0.7, c1=1.5, c2=1.5)
184
+ pso = PSO(pop_size=100, w=0.7, c1=1.5, c2=1.5)
182
185
 
183
186
  # Adaptive PSO (learnable inertia, c1, c2)
184
187
  pso = PSO(pop_size=100, adaptive=True)
@@ -256,7 +259,7 @@ EvoGrad provides a comprehensive library of evolutionary operators:
256
259
  | `BlendCrossover` | BLX-α crossover | ✓ |
257
260
  | `ArithmeticCrossover` | Weighted average | ✓ |
258
261
  | `UniformCrossover` | Gene-wise uniform swap | ✓ |
259
- | `SimulatedBinaryCrossover` | Alias for SBX | ✓ |
262
+ | `NPointCrossover` | N-point crossover | ✓ |
260
263
 
261
264
  ### Mutation
262
265
  | Operator | Description | Differentiable |
@@ -264,21 +267,22 @@ EvoGrad provides a comprehensive library of evolutionary operators:
264
267
  | `PolynomialMutation` | Polynomial bounded mutation | ✓ |
265
268
  | `GaussianMutation` | Additive Gaussian noise | ✓ |
266
269
  | `UniformMutation` | Uniform random replacement | ✓ |
267
- | `AdaptiveMutation` | Self-adaptive mutation rates | ✓ |
270
+ | `NonUniformMutation` | Annealed mutation strength | ✓ |
268
271
 
269
272
  ### Survival
270
273
  | Operator | Description |
271
274
  |----------|-------------|
272
275
  | `MergeSurvival` | (μ+λ) with optional elitism |
273
- | `ReplacementSurvival` | (μ,λ) generational replacement |
274
- | `AgingSurvival` | Age-based replacement |
276
+ | `CommaSurvival` | (μ,λ) generational replacement |
277
+ | `ReplaceWorstSurvival` | Steady-state worst replacement |
278
+ | `AgeSurvival` | Age-based replacement |
275
279
  | `FitnessSurvival` | Pure fitness-based truncation |
276
280
 
277
281
  ### Repair
278
282
  | Operator | Description |
279
283
  |----------|-------------|
280
284
  | `BoundsRepair` | Clamp to bounds |
281
- | `ReflectionRepair` | Bounce off boundaries |
285
+ | `ReflectRepair` | Bounce off boundaries |
282
286
  | `WrapRepair` | Toroidal wrap-around |
283
287
  | `RandomRepair` | Random resampling |
284
288
 
@@ -290,7 +294,7 @@ EvoGrad provides a comprehensive library of evolutionary operators:
290
294
  import torch
291
295
  import torch.nn as nn
292
296
  from evograd.algorithms import CMAES
293
- from evograd.core import Problem
297
+ from evograd.core import Problem, minimize
294
298
  from evograd.core.termination import MaxEvaluations
295
299
 
296
300
 
@@ -344,7 +348,7 @@ print(f"Final loss: {result.best_fitness:.6f}")
344
348
  ### Callbacks for Logging
345
349
 
346
350
  ```python
347
- from evograd.core import minimize
351
+ from evograd.core import minimize, MaxEvaluations
348
352
  from evograd.utils import HistoryCallback, PrintCallback
349
353
 
350
354
  callbacks = [
@@ -420,11 +424,88 @@ EvoGrad makes evolutionary algorithms differentiable through:
420
424
 
421
425
  ## 📊 Benchmarks
422
426
 
423
- TODO
427
+ EvoGrad ships a self-contained, **PyTorch-native benchmark suite** (`evograd.benchmarks`) together with a parallel runner that evaluates every algorithm in its four operating modes against two reference baselines.
428
+
429
+ ### Function library
430
+
431
+ All functions share a common `BenchmarkFunction` interface (`f(x)` on an `(N, n_var)` batch, plus `.bounds` and the known optimum) and run on CPU/GPU/MPS.
432
+
433
+ | Category | Functions |
434
+ |----------|-----------|
435
+ | **Classical — unimodal** | Sphere, Ellipsoid, SumOfDifferentPowers, Schwefel 2.22, Cigar, Discus, BentCigar, Rosenbrock, DixonPrice, Powell, Trid |
436
+ | **Classical — multimodal** | Rastrigin, Ackley, Griewank, Schwefel, Levy, Michalewicz, Zakharov, Weierstrass, Alpine, Salomon, Styblinski–Tang |
437
+ | **CEC 2017** (`F1`–`F30`) | Simple/unimodal (F1–F10), Hybrid (F11–F20), Composition (F21–F30) — the full competition suite, **rewritten from scratch in PyTorch** |
438
+ | **Multi-Basin / Smoothed-Funnel** | `MultiBasinRastrigin`, `MultiBasinRosenbrock`, `DeceptiveLandscape` — designed for differentiable EAs |
439
+ | **Transforms** | Shifted / Rotated / Scaled / Asymmetric / Oscillated / Biased wrappers for building custom variants |
440
+
441
+ ```python
442
+ import torch
443
+ from evograd.benchmarks.functions import Sphere, Rastrigin, get_cec2017_function, MultiBasinRastrigin
444
+
445
+ f = get_cec2017_function(14, n_var=30) # CEC 2017 F14 in 30D
446
+ y = f(torch.randn(100, 30)) # batch evaluation -> shape [100]
447
+ ```
448
+
449
+ The **Multi-Basin** functions aggregate `K` basins (each a full Rastrigin/Rosenbrock landscape) with a smooth *log-sum-exp* minimum, so the surface stays differentiable everywhere while still trapping pure gradient descent in distractor basins — exactly the setting where population search combined with gradient refinement pays off.
450
+
451
+ ### Running the benchmarks
452
+
453
+ The runner evaluates the four EvoGrad modes — **Classical**, **Differentiable**, **Adaptive**, **Full** — and, by default, the **pymoo** and **Adam** (multi-start) baselines:
454
+
455
+ ```bash
456
+ # 30 runs of DE on the full CEC 2017 suite in 30D (vs pymoo + Adam)
457
+ python -m evograd.benchmarks.run_benchmark_functions -a DE -s cec2017 -D 30 -r 30
458
+
459
+ # CMA-ES on the multi-basin functions, on GPU
460
+ python -m evograd.benchmarks.run_benchmark_functions -a CMAES -s funnel -D 30 --device cuda
461
+
462
+ # List every available function and suite
463
+ python -m evograd.benchmarks.run_benchmark_functions --list_functions
464
+ ```
465
+
466
+ Key flags: `-a {DE,SHADE,PSO,GA,CMAES,ADAM}`, `-s` suite (`classical`, `standard`, `cec2017[_simple|_hybrid|_composition]`, `funnel`, …), `-D` dimensionality, `-r` runs, `-p` population size, `--no_pymoo` / `--no_adam` to drop baselines. Plotting utilities live in `plot_benchmarks.py`.
467
+
468
+ ### Results
469
+
470
+ The three differentiable variants are compared against the **Classical** baseline and pymoo:
471
+
472
+ - **Adaptive** — learnable hyperparameters, purely stochastic variation (no gradient through the population).
473
+ - **Diff** (Differentiable) — fixed hyperparameters, gradients refine the population.
474
+ - **Full** — both: learnable hyperparameters *and* gradient-based population refinement.
475
+
476
+ **CEC 2017 (30D & 100D).** 29 functions (F2 excluded, per the competition), search space `[-100, 100]^D`, 100 individuals, `10000·D` evaluations, 30 independent paired runs, one-sided Wilcoxon signed-rank test with Benjamini–Hochberg correction. Highlights:
477
+
478
+ - Differentiable variants are **statistically significantly better than the classical baseline in ~31% of all comparisons**, and **never substantially worse** — gradient refinement can be added to EAs safely.
479
+ - Gains concentrate where local refinement helps most: **GA (70.1%)** and **DE (46.0%)** of comparisons improved, versus **PSO (6.9%)** and **CMA-ES (1.1%)**, which already include strong built-in adaptation.
480
+ - Across variants, **Full (41.4%) > Adaptive (35.3%) > Diff (16.4%)** — combining hyperparameter learning with population refinement helps the most, increasingly so at 100D.
481
+ - CMA-ES is the strongest method overall (especially on hybrid/composition functions), and EvoGrad runs ~**3× faster** than the pymoo baselines on CPU *despite* the added gradient computation.
482
+
483
+ **Multi-Basin Rastrigin** (`D=30`, bounds `[-5, 5]^D`, 150,000 evaluations, 30 runs). Every CMA-ES variant locates the global basin (best fitness `0.00`); a multi-start **Adam** baseline (100 parallel solutions) stays trapped in distractor basins:
484
+
485
+ | Configuration | Best | Mean | Std | Time (s) |
486
+ |---|---|---|---|---|
487
+ | CMA-ES Classical | 0.00 | 2.22 | 3.04 | 25.66 |
488
+ | CMA-ES Differentiable | 0.00 | 1.49 | 2.16 | 9.77 |
489
+ | CMA-ES Adaptive | 0.00 | **0.99** | **1.36** | 45.24 |
490
+ | CMA-ES Full | 0.00 | 1.29 | 2.12 | **7.94** |
491
+ | Adam (multi-start, pop-based) | 116.41 | 153.77 | 13.98 | 3.88 |
492
+
493
+ The **Adaptive** variant reaches the lowest mean/variance, while **Full** matches it closely at the **fastest** runtime — gradient flow yields large speed-ups while population search secures the global basin. Adam alone is **>2 orders of magnitude worse**, confirming that pure gradient descent cannot escape distractor basins.
494
+
495
+ > Full experimental details are in the paper (see [Citation](#-citation)).
424
496
 
425
497
  ## 📖 Citation
426
498
 
427
- TBA
499
+ EvoGrad was accepted at the **IEEE Congress on Evolutionary Computation (CEC) 2026**. If you use EvoGrad in your research, please cite:
500
+
501
+ ```bibtex
502
+ @inproceedings{citterio2026evograd,
503
+ title = {{EvoGrad}: Accelerated Metaheuristics in a Differentiable Wonderland},
504
+ author = {Citterio, Beatrice F. R. and Papetti, Daniele M. and Dimitri, Giovanna Maria and Tangherloni, Andrea},
505
+ booktitle = {Proceedings of the IEEE Congress on Evolutionary Computation (CEC)},
506
+ year = {2026},
507
+ }
508
+ ```
428
509
 
429
510
  ## 📄 License
430
511
 
@@ -1,12 +1,15 @@
1
- # EvoGrad: Metaheuristics in a Differentiable Wonderland
1
+ # EvoGrad: Accelerated Metaheuristics in a Differentiable Wonderland
2
2
 
3
3
  <p align="center">
4
4
  <img src="https://img.shields.io/badge/python-3.9+-blue.svg" alt="Python 3.9+">
5
5
  <img src="https://img.shields.io/badge/pytorch-2.0+-orange.svg" alt="PyTorch 2.0+">
6
6
  <img src="https://img.shields.io/badge/license-Apache%20License%202.0-blue.svg" alt="License: Apache-2.0">
7
+ <img src="https://img.shields.io/badge/IEEE%20CEC-2026-success.svg" alt="IEEE CEC 2026">
7
8
 
8
9
  </p>
9
10
 
11
+ > 🎉 **EvoGrad has been accepted at [IEEE CEC 2026](#-citation)!**
12
+
10
13
  **EvoGrad** is a PyTorch-based framework for differentiable Evolutionary Computation and Swarm Intelligence. It bridges classical population-based optimisation with modern differentiable programming by enabling gradient flow through evolutionary operators.
11
14
 
12
15
  ## 🌟 Key Features
@@ -95,10 +98,10 @@ ga = GA(pop_size=100, differentiable=False)
95
98
  # Fully differentiable GA with custom operators
96
99
  ga = GA(
97
100
  pop_size=100,
98
- selection=RouletteSelection(differentiable=True, learn_temperature=True),
99
- crossover=SBXCrossover(differentiable=True, learn_eta=True, learn_prob=True),
100
- mutation=PolynomialMutation(differentiable=True, learn_eta=True, learn_prob=True),
101
- survival=MergeSurvival(selection=RouletteSelection(differentiable=True)),
101
+ selection=RouletteSelection(adaptive=True, learn_temperature=True),
102
+ crossover=SBXCrossover(adaptive=True, learn_eta=True, learn_prob=True),
103
+ mutation=PolynomialMutation(adaptive=True, learn_eta=True, learn_prob=True),
104
+ survival=MergeSurvival(elitism=True, adaptive=True),
102
105
  differentiable=True, # Makes population learnable
103
106
  )
104
107
  ```
@@ -107,7 +110,7 @@ ga = GA(
107
110
  |-----------|--------|
108
111
  | `differentiable=False` | Classical GA with discrete operators |
109
112
  | `differentiable=True` | Population is an `nn.Parameter` (learnable via backprop) |
110
- | Operator `differentiable=True` | Operator uses Gumbel-Softmax/Binary-Concrete for gradient flow |
113
+ | Operator `adaptive=True` | Operator uses Gumbel-Softmax/Binary-Concrete for gradient flow |
111
114
  | Operator `learn_*=True` | Operator hyperparameters become learnable `nn.Parameter` |
112
115
 
113
116
  ### Differential Evolution (DE)
@@ -148,10 +151,10 @@ de = DE(pop_size=100, variant="DE/current-to-best/1/bin", adaptive=True, differe
148
151
  PSO uses the same **algorithm-level flags** as DE:
149
152
 
150
153
  ```python
151
- from evograd.algorithms import PSO, pso_constriction, pso_adaptive
154
+ from evograd.algorithms import PSO, pso_constriction, pso_default
152
155
 
153
156
  # Classical PSO
154
- pso = PSO(pop_size=100, inertia=0.7, c1=1.5, c2=1.5)
157
+ pso = PSO(pop_size=100, w=0.7, c1=1.5, c2=1.5)
155
158
 
156
159
  # Adaptive PSO (learnable inertia, c1, c2)
157
160
  pso = PSO(pop_size=100, adaptive=True)
@@ -229,7 +232,7 @@ EvoGrad provides a comprehensive library of evolutionary operators:
229
232
  | `BlendCrossover` | BLX-α crossover | ✓ |
230
233
  | `ArithmeticCrossover` | Weighted average | ✓ |
231
234
  | `UniformCrossover` | Gene-wise uniform swap | ✓ |
232
- | `SimulatedBinaryCrossover` | Alias for SBX | ✓ |
235
+ | `NPointCrossover` | N-point crossover | ✓ |
233
236
 
234
237
  ### Mutation
235
238
  | Operator | Description | Differentiable |
@@ -237,21 +240,22 @@ EvoGrad provides a comprehensive library of evolutionary operators:
237
240
  | `PolynomialMutation` | Polynomial bounded mutation | ✓ |
238
241
  | `GaussianMutation` | Additive Gaussian noise | ✓ |
239
242
  | `UniformMutation` | Uniform random replacement | ✓ |
240
- | `AdaptiveMutation` | Self-adaptive mutation rates | ✓ |
243
+ | `NonUniformMutation` | Annealed mutation strength | ✓ |
241
244
 
242
245
  ### Survival
243
246
  | Operator | Description |
244
247
  |----------|-------------|
245
248
  | `MergeSurvival` | (μ+λ) with optional elitism |
246
- | `ReplacementSurvival` | (μ,λ) generational replacement |
247
- | `AgingSurvival` | Age-based replacement |
249
+ | `CommaSurvival` | (μ,λ) generational replacement |
250
+ | `ReplaceWorstSurvival` | Steady-state worst replacement |
251
+ | `AgeSurvival` | Age-based replacement |
248
252
  | `FitnessSurvival` | Pure fitness-based truncation |
249
253
 
250
254
  ### Repair
251
255
  | Operator | Description |
252
256
  |----------|-------------|
253
257
  | `BoundsRepair` | Clamp to bounds |
254
- | `ReflectionRepair` | Bounce off boundaries |
258
+ | `ReflectRepair` | Bounce off boundaries |
255
259
  | `WrapRepair` | Toroidal wrap-around |
256
260
  | `RandomRepair` | Random resampling |
257
261
 
@@ -263,7 +267,7 @@ EvoGrad provides a comprehensive library of evolutionary operators:
263
267
  import torch
264
268
  import torch.nn as nn
265
269
  from evograd.algorithms import CMAES
266
- from evograd.core import Problem
270
+ from evograd.core import Problem, minimize
267
271
  from evograd.core.termination import MaxEvaluations
268
272
 
269
273
 
@@ -317,7 +321,7 @@ print(f"Final loss: {result.best_fitness:.6f}")
317
321
  ### Callbacks for Logging
318
322
 
319
323
  ```python
320
- from evograd.core import minimize
324
+ from evograd.core import minimize, MaxEvaluations
321
325
  from evograd.utils import HistoryCallback, PrintCallback
322
326
 
323
327
  callbacks = [
@@ -393,11 +397,88 @@ EvoGrad makes evolutionary algorithms differentiable through:
393
397
 
394
398
  ## 📊 Benchmarks
395
399
 
396
- TODO
400
+ EvoGrad ships a self-contained, **PyTorch-native benchmark suite** (`evograd.benchmarks`) together with a parallel runner that evaluates every algorithm in its four operating modes against two reference baselines.
401
+
402
+ ### Function library
403
+
404
+ All functions share a common `BenchmarkFunction` interface (`f(x)` on an `(N, n_var)` batch, plus `.bounds` and the known optimum) and run on CPU/GPU/MPS.
405
+
406
+ | Category | Functions |
407
+ |----------|-----------|
408
+ | **Classical — unimodal** | Sphere, Ellipsoid, SumOfDifferentPowers, Schwefel 2.22, Cigar, Discus, BentCigar, Rosenbrock, DixonPrice, Powell, Trid |
409
+ | **Classical — multimodal** | Rastrigin, Ackley, Griewank, Schwefel, Levy, Michalewicz, Zakharov, Weierstrass, Alpine, Salomon, Styblinski–Tang |
410
+ | **CEC 2017** (`F1`–`F30`) | Simple/unimodal (F1–F10), Hybrid (F11–F20), Composition (F21–F30) — the full competition suite, **rewritten from scratch in PyTorch** |
411
+ | **Multi-Basin / Smoothed-Funnel** | `MultiBasinRastrigin`, `MultiBasinRosenbrock`, `DeceptiveLandscape` — designed for differentiable EAs |
412
+ | **Transforms** | Shifted / Rotated / Scaled / Asymmetric / Oscillated / Biased wrappers for building custom variants |
413
+
414
+ ```python
415
+ import torch
416
+ from evograd.benchmarks.functions import Sphere, Rastrigin, get_cec2017_function, MultiBasinRastrigin
417
+
418
+ f = get_cec2017_function(14, n_var=30) # CEC 2017 F14 in 30D
419
+ y = f(torch.randn(100, 30)) # batch evaluation -> shape [100]
420
+ ```
421
+
422
+ The **Multi-Basin** functions aggregate `K` basins (each a full Rastrigin/Rosenbrock landscape) with a smooth *log-sum-exp* minimum, so the surface stays differentiable everywhere while still trapping pure gradient descent in distractor basins — exactly the setting where population search combined with gradient refinement pays off.
423
+
424
+ ### Running the benchmarks
425
+
426
+ The runner evaluates the four EvoGrad modes — **Classical**, **Differentiable**, **Adaptive**, **Full** — and, by default, the **pymoo** and **Adam** (multi-start) baselines:
427
+
428
+ ```bash
429
+ # 30 runs of DE on the full CEC 2017 suite in 30D (vs pymoo + Adam)
430
+ python -m evograd.benchmarks.run_benchmark_functions -a DE -s cec2017 -D 30 -r 30
431
+
432
+ # CMA-ES on the multi-basin functions, on GPU
433
+ python -m evograd.benchmarks.run_benchmark_functions -a CMAES -s funnel -D 30 --device cuda
434
+
435
+ # List every available function and suite
436
+ python -m evograd.benchmarks.run_benchmark_functions --list_functions
437
+ ```
438
+
439
+ Key flags: `-a {DE,SHADE,PSO,GA,CMAES,ADAM}`, `-s` suite (`classical`, `standard`, `cec2017[_simple|_hybrid|_composition]`, `funnel`, …), `-D` dimensionality, `-r` runs, `-p` population size, `--no_pymoo` / `--no_adam` to drop baselines. Plotting utilities live in `plot_benchmarks.py`.
440
+
441
+ ### Results
442
+
443
+ The three differentiable variants are compared against the **Classical** baseline and pymoo:
444
+
445
+ - **Adaptive** — learnable hyperparameters, purely stochastic variation (no gradient through the population).
446
+ - **Diff** (Differentiable) — fixed hyperparameters, gradients refine the population.
447
+ - **Full** — both: learnable hyperparameters *and* gradient-based population refinement.
448
+
449
+ **CEC 2017 (30D & 100D).** 29 functions (F2 excluded, per the competition), search space `[-100, 100]^D`, 100 individuals, `10000·D` evaluations, 30 independent paired runs, one-sided Wilcoxon signed-rank test with Benjamini–Hochberg correction. Highlights:
450
+
451
+ - Differentiable variants are **statistically significantly better than the classical baseline in ~31% of all comparisons**, and **never substantially worse** — gradient refinement can be added to EAs safely.
452
+ - Gains concentrate where local refinement helps most: **GA (70.1%)** and **DE (46.0%)** of comparisons improved, versus **PSO (6.9%)** and **CMA-ES (1.1%)**, which already include strong built-in adaptation.
453
+ - Across variants, **Full (41.4%) > Adaptive (35.3%) > Diff (16.4%)** — combining hyperparameter learning with population refinement helps the most, increasingly so at 100D.
454
+ - CMA-ES is the strongest method overall (especially on hybrid/composition functions), and EvoGrad runs ~**3× faster** than the pymoo baselines on CPU *despite* the added gradient computation.
455
+
456
+ **Multi-Basin Rastrigin** (`D=30`, bounds `[-5, 5]^D`, 150,000 evaluations, 30 runs). Every CMA-ES variant locates the global basin (best fitness `0.00`); a multi-start **Adam** baseline (100 parallel solutions) stays trapped in distractor basins:
457
+
458
+ | Configuration | Best | Mean | Std | Time (s) |
459
+ |---|---|---|---|---|
460
+ | CMA-ES Classical | 0.00 | 2.22 | 3.04 | 25.66 |
461
+ | CMA-ES Differentiable | 0.00 | 1.49 | 2.16 | 9.77 |
462
+ | CMA-ES Adaptive | 0.00 | **0.99** | **1.36** | 45.24 |
463
+ | CMA-ES Full | 0.00 | 1.29 | 2.12 | **7.94** |
464
+ | Adam (multi-start, pop-based) | 116.41 | 153.77 | 13.98 | 3.88 |
465
+
466
+ The **Adaptive** variant reaches the lowest mean/variance, while **Full** matches it closely at the **fastest** runtime — gradient flow yields large speed-ups while population search secures the global basin. Adam alone is **>2 orders of magnitude worse**, confirming that pure gradient descent cannot escape distractor basins.
467
+
468
+ > Full experimental details are in the paper (see [Citation](#-citation)).
397
469
 
398
470
  ## 📖 Citation
399
471
 
400
- TBA
472
+ EvoGrad was accepted at the **IEEE Congress on Evolutionary Computation (CEC) 2026**. If you use EvoGrad in your research, please cite:
473
+
474
+ ```bibtex
475
+ @inproceedings{citterio2026evograd,
476
+ title = {{EvoGrad}: Accelerated Metaheuristics in a Differentiable Wonderland},
477
+ author = {Citterio, Beatrice F. R. and Papetti, Daniele M. and Dimitri, Giovanna Maria and Tangherloni, Andrea},
478
+ booktitle = {Proceedings of the IEEE Congress on Evolutionary Computation (CEC)},
479
+ year = {2026},
480
+ }
481
+ ```
401
482
 
402
483
  ## 📄 License
403
484
 
@@ -57,7 +57,7 @@ Authors
57
57
  Andrea Tangherloni <andrea.tangherloni@unibocconi.it>
58
58
  """
59
59
 
60
- __version__ = "0.1.0"
60
+ __version__ = "0.2.0"
61
61
  __author__ = "Andrea Tangherloni"
62
62
 
63
63
  __all__ = [
@@ -523,26 +523,65 @@ class Algorithm(nn.Module, ABC):
523
523
 
524
524
  return self.state.best_fitness
525
525
 
526
- def forward(self) -> Tensor:
526
+ def forward(self, reduction: str = "mean", live_selection: bool = True) -> Tensor:
527
527
  """
528
528
  PyTorch forward pass for differentiable optimisation.
529
-
529
+
530
530
  In differentiable mode, this builds a computation graph
531
- through the entire generation, returning the best fitness
532
- as a differentiable scalar loss. Call update_state() after
531
+ through the entire generation and reduces the per-offspring
532
+ fitness to a scalar loss. Call update_state() after
533
533
  loss.backward() and optimizer.step() to commit changes.
534
-
534
+
535
+ Args:
536
+ reduction: How to reduce the (n_offsprings,) offspring fitness
537
+ into the scalar loss that is backpropagated:
538
+ - 'mean' (default): average fitness — gradient reaches the
539
+ whole population, driving every member downhill.
540
+ - 'sum': total fitness — same per-member gradient direction
541
+ as 'mean', scaled by n_offsprings.
542
+ - 'min': best offspring only — gradient flows solely through
543
+ the single best offspring's ancestry (sparse signal).
544
+ live_selection: Whether selection routing carries gradient back to
545
+ the population.
546
+ - True (default, "live"): re-evaluate the current population so
547
+ the selection logits depend on the live parameter — the
548
+ Gumbel-Softmax selection gradient then reaches the population
549
+ (fully end-to-end differentiable generation). For a
550
+ deterministic objective this re-evaluation reproduces the
551
+ committed fitness values exactly, so it is graph
552
+ reconstruction, not new sampling: it is intentionally NOT
553
+ counted in the evaluation budget (n_evals). It is, however, a
554
+ real extra objective pass (wall-clock/FLOPs). For a stochastic
555
+ objective the values may differ from the committed fitness.
556
+ - False ("detached", memetic): selection uses the cached,
557
+ detached committed fitness as fixed routing weights; gradient
558
+ only refines positions. Cheaper (no extra pass), lower
559
+ variance, and the correct choice for stochastic objectives.
560
+
535
561
  Returns:
536
- Best fitness as a scalar tensor (for backprop).
537
-
562
+ Reduced offspring fitness as a scalar tensor (for backprop).
563
+
538
564
  Raises:
539
565
  RuntimeError: If algorithm not initialized.
566
+ ValueError: If reduction is not one of 'mean', 'sum', 'min'.
540
567
  """
541
568
  if not self._is_initialized:
542
569
  raise RuntimeError(
543
570
  "Algorithm not initialized. Call initialize(problem) first."
544
571
  )
545
-
572
+
573
+ if reduction not in ("mean", "sum", "min"):
574
+ raise ValueError(
575
+ f"reduction must be one of 'mean', 'sum', 'min'; got {reduction!r}"
576
+ )
577
+
578
+ # Live selection: attach a fresh autograd graph to the parent fitness so
579
+ # selection gradients flow into the population. Deliberately does NOT
580
+ # increment n_evals (values match the committed fitness for a
581
+ # deterministic objective — this only rebuilds the graph).
582
+ if live_selection:
583
+ self.state.fitness = self._evaluate(self.population)
584
+
546
585
  # Generate offspring (differentiable)
547
586
  offspring = self._infill()
548
587
 
@@ -561,7 +600,12 @@ class Algorithm(nn.Module, ABC):
561
600
  self._pending_offspring = offspring
562
601
  self._pending_fitness = offspring_fitness
563
602
 
564
- # Return best fitness as loss
603
+ # Reduce per-offspring fitness to the scalar loss for backprop.
604
+ if reduction == "mean":
605
+ return offspring_fitness.mean()
606
+ if reduction == "sum":
607
+ return offspring_fitness.sum()
608
+ # reduction == "min"
565
609
  return offspring_fitness.min()
566
610
 
567
611
  @torch.no_grad()
@@ -168,6 +168,8 @@ def maximize(
168
168
  scheduler_patience: int = 50,
169
169
  scheduler_factor: float = 0.5,
170
170
  min_lr: float = 1e-6,
171
+ reduction: str = "mean",
172
+ live_selection: bool = True,
171
173
  ) -> Result:
172
174
  """
173
175
  Maximise an objective function using a population-based algorithm.
@@ -260,6 +262,8 @@ def maximize(
260
262
  scheduler_patience=scheduler_patience,
261
263
  scheduler_factor=scheduler_factor,
262
264
  min_lr=min_lr,
265
+ reduction=reduction,
266
+ live_selection=live_selection,
263
267
  )
264
268
 
265
269
  # Fix problem name in result
@@ -93,7 +93,9 @@ def minimize(
93
93
  scheduler_patience: int = 50,
94
94
  scheduler_factor: float = 0.5,
95
95
  min_lr: float = 1e-6,
96
- ) -> Result:
96
+ reduction: str = "mean",
97
+ live_selection: bool = True,
98
+ ) -> Result:
97
99
 
98
100
  """
99
101
  Minimise an objective function using a population-based algorithm.
@@ -169,7 +171,17 @@ def minimize(
169
171
  reducing LR (for 'plateau' scheduler).
170
172
  scheduler_factor: Factor to multiply LR when reducing.
171
173
  min_lr: Minimum learning rate.
172
-
174
+ reduction: Reduction used to turn the (n_offsprings,) offspring
175
+ fitness into the scalar loss in differentiable mode:
176
+ 'mean' (default), 'sum', or 'min'. Only used when backprop is
177
+ active; ignored in classical mode.
178
+ live_selection: If True (default), selection routing carries gradient
179
+ to the population via a per-generation re-evaluation of the current
180
+ population (not counted in n_evals; deterministic objectives only —
181
+ see Algorithm.forward). If False, selection uses the cached,
182
+ detached fitness (memetic; cheaper; for stochastic objectives).
183
+ Only used when backprop is active.
184
+
173
185
  Returns:
174
186
  Result object containing:
175
187
  - best_solution: Best solution found
@@ -381,6 +393,8 @@ def minimize(
381
393
  hyper_params,
382
394
  grad_clip_pop,
383
395
  grad_clip_hyper,
396
+ reduction,
397
+ live_selection,
384
398
  )
385
399
  else:
386
400
  algorithm.step()
@@ -680,6 +694,8 @@ def _step_differentiable(
680
694
  hyper_params: Optional[List],
681
695
  grad_clip_pop: Optional[float],
682
696
  grad_clip_hyper: Optional[float],
697
+ reduction: str = "mean",
698
+ live_selection: bool = True,
683
699
  ) -> float:
684
700
  """
685
701
  Perform one generation step with gradient-based updates.
@@ -709,7 +725,7 @@ def _step_differentiable(
709
725
  opt.zero_grad(set_to_none=True)
710
726
 
711
727
  # Forward pass (builds computation graph)
712
- loss = algorithm.forward()
728
+ loss = algorithm.forward(reduction=reduction, live_selection=live_selection)
713
729
 
714
730
  # Backward pass
715
731
  loss.backward()
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "evograd-diff"
7
- version = "0.1.1"
7
+ version = "0.2.0"
8
8
  description = "PyTorch-based framework for differentiable evolutionary computation and swarm intelligence"
9
9
  readme = "README.md"
10
10
  license = { text = "Apache-2.0" }
@@ -51,3 +51,14 @@ packages = ["evograd"]
51
51
 
52
52
  [tool.hatch.build.targets.wheel.force-include]
53
53
  "evograd/benchmarks/functions/cec2017/data.pkl" = "evograd/benchmarks/functions/cec2017/data.pkl"
54
+
55
+ # Restrict the source distribution to the package and standard metadata so that
56
+ # repo-only files (paper source, notebooks, .claude/, scratch scripts, lockfile)
57
+ # are not shipped to PyPI.
58
+ [tool.hatch.build.targets.sdist]
59
+ include = [
60
+ "/evograd",
61
+ "/README.md",
62
+ "/LICENSE",
63
+ "/pyproject.toml",
64
+ ]
@@ -1,16 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Bash(git add *)",
5
- "Bash(git commit -m ' *)",
6
- "Bash(git push *)",
7
- "Bash(uv build *)",
8
- "Bash(python -m zipfile -l dist/evograd-0.1.0-py3-none-any.whl)",
9
- "WebFetch(domain:pypi.org)",
10
- "Bash(uv run *)",
11
- "Bash(git check-ignore *)",
12
- "Bash(uv lock *)",
13
- "Bash(git stash *)"
14
- ]
15
- }
16
- }
@@ -1 +0,0 @@
1
- 3.12