evograd-diff 0.1.2__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/PKG-INFO +84 -4
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/README.md +83 -3
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/__init__.py +1 -1
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/algorithm.py +53 -9
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/maximize.py +4 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/minimize.py +19 -3
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/pyproject.toml +12 -1
- evograd_diff-0.1.2/.claude/settings.local.json +0 -17
- evograd_diff-0.1.2/.python-version +0 -1
- evograd_diff-0.1.2/Test_new_evograd.ipynb +0 -31427
- evograd_diff-0.1.2/plot_benchmarks.py +0 -896
- evograd_diff-0.1.2/test.py +0 -32
- evograd_diff-0.1.2/test2.py +0 -20
- evograd_diff-0.1.2/uv.lock +0 -1014
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/.gitignore +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/LICENSE +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/algorithms/__init__.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/algorithms/cmaes.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/algorithms/de.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/algorithms/ga.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/algorithms/pso.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/algorithms/shade.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/__init__.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/base.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/__init__.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/basic.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/composition.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/data.pkl +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/data.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/hybrid.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/simple.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/classical.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/smoothed_funnel.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/transforms.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/run_benchmark_functions.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/__init__.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/problem.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/result.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/termination.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/__init__.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/crossover.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/mutation.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/relaxations.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/repair.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/sampling.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/selection.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/survival.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/__init__.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/run_all.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/test_cmaes.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/test_core.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/test_ga.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/test_operators.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/test_per_individual.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/test_utils.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/utils/__init__.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/utils/callbacks.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/utils/device.py +0 -0
- {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/utils/duplicates.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: evograd-diff
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: PyTorch-based framework for differentiable evolutionary computation and swarm intelligence
|
|
5
5
|
Project-URL: Homepage, https://github.com/andreatangherloni/EvoGrad
|
|
6
6
|
Project-URL: Repository, https://github.com/andreatangherloni/EvoGrad
|
|
@@ -25,15 +25,18 @@ Requires-Dist: seaborn>=0.13.2
|
|
|
25
25
|
Requires-Dist: torch>=2.11.0
|
|
26
26
|
Description-Content-Type: text/markdown
|
|
27
27
|
|
|
28
|
-
# EvoGrad: Metaheuristics in a Differentiable Wonderland
|
|
28
|
+
# EvoGrad: Accelerated Metaheuristics in a Differentiable Wonderland
|
|
29
29
|
|
|
30
30
|
<p align="center">
|
|
31
31
|
<img src="https://img.shields.io/badge/python-3.9+-blue.svg" alt="Python 3.9+">
|
|
32
32
|
<img src="https://img.shields.io/badge/pytorch-2.0+-orange.svg" alt="PyTorch 2.0+">
|
|
33
33
|
<img src="https://img.shields.io/badge/license-Apache%20License%202.0-blue.svg" alt="License: Apache-2.0">
|
|
34
|
+
<img src="https://img.shields.io/badge/IEEE%20CEC-2026-success.svg" alt="IEEE CEC 2026">
|
|
34
35
|
|
|
35
36
|
</p>
|
|
36
37
|
|
|
38
|
+
> 🎉 **EvoGrad has been accepted at [IEEE CEC 2026](#-citation)!**
|
|
39
|
+
|
|
37
40
|
**EvoGrad** is a PyTorch-based framework for differentiable Evolutionary Computation and Swarm Intelligence. It bridges classical population-based optimisation with modern differentiable programming by enabling gradient flow through evolutionary operators.
|
|
38
41
|
|
|
39
42
|
## 🌟 Key Features
|
|
@@ -421,11 +424,88 @@ EvoGrad makes evolutionary algorithms differentiable through:
|
|
|
421
424
|
|
|
422
425
|
## 📊 Benchmarks
|
|
423
426
|
|
|
424
|
-
|
|
427
|
+
EvoGrad ships a self-contained, **PyTorch-native benchmark suite** (`evograd.benchmarks`) together with a parallel runner that evaluates every algorithm in its four operating modes against two reference baselines.
|
|
428
|
+
|
|
429
|
+
### Function library
|
|
430
|
+
|
|
431
|
+
All functions share a common `BenchmarkFunction` interface (`f(x)` on an `(N, n_var)` batch, plus `.bounds` and the known optimum) and run on CPU/GPU/MPS.
|
|
432
|
+
|
|
433
|
+
| Category | Functions |
|
|
434
|
+
|----------|-----------|
|
|
435
|
+
| **Classical — unimodal** | Sphere, Ellipsoid, SumOfDifferentPowers, Schwefel 2.22, Cigar, Discus, BentCigar, Rosenbrock, DixonPrice, Powell, Trid |
|
|
436
|
+
| **Classical — multimodal** | Rastrigin, Ackley, Griewank, Schwefel, Levy, Michalewicz, Zakharov, Weierstrass, Alpine, Salomon, Styblinski–Tang |
|
|
437
|
+
| **CEC 2017** (`F1`–`F30`) | Simple/unimodal (F1–F10), Hybrid (F11–F20), Composition (F21–F30) — the full competition suite, **rewritten from scratch in PyTorch** |
|
|
438
|
+
| **Multi-Basin / Smoothed-Funnel** | `MultiBasinRastrigin`, `MultiBasinRosenbrock`, `DeceptiveLandscape` — designed for differentiable EAs |
|
|
439
|
+
| **Transforms** | Shifted / Rotated / Scaled / Asymmetric / Oscillated / Biased wrappers for building custom variants |
|
|
440
|
+
|
|
441
|
+
```python
|
|
442
|
+
import torch
|
|
443
|
+
from evograd.benchmarks.functions import Sphere, Rastrigin, get_cec2017_function, MultiBasinRastrigin
|
|
444
|
+
|
|
445
|
+
f = get_cec2017_function(14, n_var=30) # CEC 2017 F14 in 30D
|
|
446
|
+
y = f(torch.randn(100, 30)) # batch evaluation -> shape [100]
|
|
447
|
+
```
|
|
448
|
+
|
|
449
|
+
The **Multi-Basin** functions aggregate `K` basins (each a full Rastrigin/Rosenbrock landscape) with a smooth *log-sum-exp* minimum, so the surface stays differentiable everywhere while still trapping pure gradient descent in distractor basins — exactly the setting where population search combined with gradient refinement pays off.
|
|
450
|
+
|
|
451
|
+
### Running the benchmarks
|
|
452
|
+
|
|
453
|
+
The runner evaluates the four EvoGrad modes — **Classical**, **Differentiable**, **Adaptive**, **Full** — and, by default, the **pymoo** and **Adam** (multi-start) baselines:
|
|
454
|
+
|
|
455
|
+
```bash
|
|
456
|
+
# 30 runs of DE on the full CEC 2017 suite in 30D (vs pymoo + Adam)
|
|
457
|
+
python -m evograd.benchmarks.run_benchmark_functions -a DE -s cec2017 -D 30 -r 30
|
|
458
|
+
|
|
459
|
+
# CMA-ES on the multi-basin functions, on GPU
|
|
460
|
+
python -m evograd.benchmarks.run_benchmark_functions -a CMAES -s funnel -D 30 --device cuda
|
|
461
|
+
|
|
462
|
+
# List every available function and suite
|
|
463
|
+
python -m evograd.benchmarks.run_benchmark_functions --list_functions
|
|
464
|
+
```
|
|
465
|
+
|
|
466
|
+
Key flags: `-a {DE,SHADE,PSO,GA,CMAES,ADAM}`, `-s` suite (`classical`, `standard`, `cec2017[_simple|_hybrid|_composition]`, `funnel`, …), `-D` dimensionality, `-r` runs, `-p` population size, `--no_pymoo` / `--no_adam` to drop baselines. Plotting utilities live in `plot_benchmarks.py`.
|
|
467
|
+
|
|
468
|
+
### Results
|
|
469
|
+
|
|
470
|
+
The three differentiable variants are compared against the **Classical** baseline and pymoo:
|
|
471
|
+
|
|
472
|
+
- **Adaptive** — learnable hyperparameters, purely stochastic variation (no gradient through the population).
|
|
473
|
+
- **Diff** (Differentiable) — fixed hyperparameters, gradients refine the population.
|
|
474
|
+
- **Full** — both: learnable hyperparameters *and* gradient-based population refinement.
|
|
475
|
+
|
|
476
|
+
**CEC 2017 (30D & 100D).** 29 functions (F2 excluded, per the competition), search space `[-100, 100]^D`, 100 individuals, `10000·D` evaluations, 30 independent paired runs, one-sided Wilcoxon signed-rank test with Benjamini–Hochberg correction. Highlights:
|
|
477
|
+
|
|
478
|
+
- Differentiable variants are **statistically significantly better than the classical baseline in ~31% of all comparisons**, and **never substantially worse** — gradient refinement can be added to EAs safely.
|
|
479
|
+
- Gains concentrate where local refinement helps most: **GA (70.1%)** and **DE (46.0%)** of comparisons improved, versus **PSO (6.9%)** and **CMA-ES (1.1%)**, which already include strong built-in adaptation.
|
|
480
|
+
- Across variants, **Full (41.4%) > Adaptive (35.3%) > Diff (16.4%)** — combining hyperparameter learning with population refinement helps the most, increasingly so at 100D.
|
|
481
|
+
- CMA-ES is the strongest method overall (especially on hybrid/composition functions), and EvoGrad runs ~**3× faster** than the pymoo baselines on CPU *despite* the added gradient computation.
|
|
482
|
+
|
|
483
|
+
**Multi-Basin Rastrigin** (`D=30`, bounds `[-5, 5]^D`, 150,000 evaluations, 30 runs). Every CMA-ES variant locates the global basin (best fitness `0.00`); a multi-start **Adam** baseline (100 parallel solutions) stays trapped in distractor basins:
|
|
484
|
+
|
|
485
|
+
| Configuration | Best | Mean | Std | Time (s) |
|
|
486
|
+
|---|---|---|---|---|
|
|
487
|
+
| CMA-ES Classical | 0.00 | 2.22 | 3.04 | 25.66 |
|
|
488
|
+
| CMA-ES Differentiable | 0.00 | 1.49 | 2.16 | 9.77 |
|
|
489
|
+
| CMA-ES Adaptive | 0.00 | **0.99** | **1.36** | 45.24 |
|
|
490
|
+
| CMA-ES Full | 0.00 | 1.29 | 2.12 | **7.94** |
|
|
491
|
+
| Adam (multi-start, pop-based) | 116.41 | 153.77 | 13.98 | 3.88 |
|
|
492
|
+
|
|
493
|
+
The **Adaptive** variant reaches the lowest mean/variance, while **Full** matches it closely at the **fastest** runtime — gradient flow yields large speed-ups while population search secures the global basin. Adam alone is **>2 orders of magnitude worse**, confirming that pure gradient descent cannot escape distractor basins.
|
|
494
|
+
|
|
495
|
+
> Full experimental details are in the paper (see [Citation](#-citation)).
|
|
425
496
|
|
|
426
497
|
## 📖 Citation
|
|
427
498
|
|
|
428
|
-
|
|
499
|
+
EvoGrad was accepted at the **IEEE Congress on Evolutionary Computation (CEC) 2026**. If you use EvoGrad in your research, please cite:
|
|
500
|
+
|
|
501
|
+
```bibtex
|
|
502
|
+
@inproceedings{citterio2026evograd,
|
|
503
|
+
title = {{EvoGrad}: Accelerated Metaheuristics in a Differentiable Wonderland},
|
|
504
|
+
author = {Citterio, Beatrice F. R. and Papetti, Daniele M. and Dimitri, Giovanna Maria and Tangherloni, Andrea},
|
|
505
|
+
booktitle = {Proceedings of the IEEE Congress on Evolutionary Computation (CEC)},
|
|
506
|
+
year = {2026},
|
|
507
|
+
}
|
|
508
|
+
```
|
|
429
509
|
|
|
430
510
|
## 📄 License
|
|
431
511
|
|
|
@@ -1,12 +1,15 @@
|
|
|
1
|
-
# EvoGrad: Metaheuristics in a Differentiable Wonderland
|
|
1
|
+
# EvoGrad: Accelerated Metaheuristics in a Differentiable Wonderland
|
|
2
2
|
|
|
3
3
|
<p align="center">
|
|
4
4
|
<img src="https://img.shields.io/badge/python-3.9+-blue.svg" alt="Python 3.9+">
|
|
5
5
|
<img src="https://img.shields.io/badge/pytorch-2.0+-orange.svg" alt="PyTorch 2.0+">
|
|
6
6
|
<img src="https://img.shields.io/badge/license-Apache%20License%202.0-blue.svg" alt="License: Apache-2.0">
|
|
7
|
+
<img src="https://img.shields.io/badge/IEEE%20CEC-2026-success.svg" alt="IEEE CEC 2026">
|
|
7
8
|
|
|
8
9
|
</p>
|
|
9
10
|
|
|
11
|
+
> 🎉 **EvoGrad has been accepted at [IEEE CEC 2026](#-citation)!**
|
|
12
|
+
|
|
10
13
|
**EvoGrad** is a PyTorch-based framework for differentiable Evolutionary Computation and Swarm Intelligence. It bridges classical population-based optimisation with modern differentiable programming by enabling gradient flow through evolutionary operators.
|
|
11
14
|
|
|
12
15
|
## 🌟 Key Features
|
|
@@ -394,11 +397,88 @@ EvoGrad makes evolutionary algorithms differentiable through:
|
|
|
394
397
|
|
|
395
398
|
## 📊 Benchmarks
|
|
396
399
|
|
|
397
|
-
|
|
400
|
+
EvoGrad ships a self-contained, **PyTorch-native benchmark suite** (`evograd.benchmarks`) together with a parallel runner that evaluates every algorithm in its four operating modes against two reference baselines.
|
|
401
|
+
|
|
402
|
+
### Function library
|
|
403
|
+
|
|
404
|
+
All functions share a common `BenchmarkFunction` interface (`f(x)` on an `(N, n_var)` batch, plus `.bounds` and the known optimum) and run on CPU/GPU/MPS.
|
|
405
|
+
|
|
406
|
+
| Category | Functions |
|
|
407
|
+
|----------|-----------|
|
|
408
|
+
| **Classical — unimodal** | Sphere, Ellipsoid, SumOfDifferentPowers, Schwefel 2.22, Cigar, Discus, BentCigar, Rosenbrock, DixonPrice, Powell, Trid |
|
|
409
|
+
| **Classical — multimodal** | Rastrigin, Ackley, Griewank, Schwefel, Levy, Michalewicz, Zakharov, Weierstrass, Alpine, Salomon, Styblinski–Tang |
|
|
410
|
+
| **CEC 2017** (`F1`–`F30`) | Simple/unimodal (F1–F10), Hybrid (F11–F20), Composition (F21–F30) — the full competition suite, **rewritten from scratch in PyTorch** |
|
|
411
|
+
| **Multi-Basin / Smoothed-Funnel** | `MultiBasinRastrigin`, `MultiBasinRosenbrock`, `DeceptiveLandscape` — designed for differentiable EAs |
|
|
412
|
+
| **Transforms** | Shifted / Rotated / Scaled / Asymmetric / Oscillated / Biased wrappers for building custom variants |
|
|
413
|
+
|
|
414
|
+
```python
|
|
415
|
+
import torch
|
|
416
|
+
from evograd.benchmarks.functions import Sphere, Rastrigin, get_cec2017_function, MultiBasinRastrigin
|
|
417
|
+
|
|
418
|
+
f = get_cec2017_function(14, n_var=30) # CEC 2017 F14 in 30D
|
|
419
|
+
y = f(torch.randn(100, 30)) # batch evaluation -> shape [100]
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
The **Multi-Basin** functions aggregate `K` basins (each a full Rastrigin/Rosenbrock landscape) with a smooth *log-sum-exp* minimum, so the surface stays differentiable everywhere while still trapping pure gradient descent in distractor basins — exactly the setting where population search combined with gradient refinement pays off.
|
|
423
|
+
|
|
424
|
+
### Running the benchmarks
|
|
425
|
+
|
|
426
|
+
The runner evaluates the four EvoGrad modes — **Classical**, **Differentiable**, **Adaptive**, **Full** — and, by default, the **pymoo** and **Adam** (multi-start) baselines:
|
|
427
|
+
|
|
428
|
+
```bash
|
|
429
|
+
# 30 runs of DE on the full CEC 2017 suite in 30D (vs pymoo + Adam)
|
|
430
|
+
python -m evograd.benchmarks.run_benchmark_functions -a DE -s cec2017 -D 30 -r 30
|
|
431
|
+
|
|
432
|
+
# CMA-ES on the multi-basin functions, on GPU
|
|
433
|
+
python -m evograd.benchmarks.run_benchmark_functions -a CMAES -s funnel -D 30 --device cuda
|
|
434
|
+
|
|
435
|
+
# List every available function and suite
|
|
436
|
+
python -m evograd.benchmarks.run_benchmark_functions --list_functions
|
|
437
|
+
```
|
|
438
|
+
|
|
439
|
+
Key flags: `-a {DE,SHADE,PSO,GA,CMAES,ADAM}`, `-s` suite (`classical`, `standard`, `cec2017[_simple|_hybrid|_composition]`, `funnel`, …), `-D` dimensionality, `-r` runs, `-p` population size, `--no_pymoo` / `--no_adam` to drop baselines. Plotting utilities live in `plot_benchmarks.py`.
|
|
440
|
+
|
|
441
|
+
### Results
|
|
442
|
+
|
|
443
|
+
The three differentiable variants are compared against the **Classical** baseline and pymoo:
|
|
444
|
+
|
|
445
|
+
- **Adaptive** — learnable hyperparameters, purely stochastic variation (no gradient through the population).
|
|
446
|
+
- **Diff** (Differentiable) — fixed hyperparameters, gradients refine the population.
|
|
447
|
+
- **Full** — both: learnable hyperparameters *and* gradient-based population refinement.
|
|
448
|
+
|
|
449
|
+
**CEC 2017 (30D & 100D).** 29 functions (F2 excluded, per the competition), search space `[-100, 100]^D`, 100 individuals, `10000·D` evaluations, 30 independent paired runs, one-sided Wilcoxon signed-rank test with Benjamini–Hochberg correction. Highlights:
|
|
450
|
+
|
|
451
|
+
- Differentiable variants are **statistically significantly better than the classical baseline in ~31% of all comparisons**, and **never substantially worse** — gradient refinement can be added to EAs safely.
|
|
452
|
+
- Gains concentrate where local refinement helps most: **GA (70.1%)** and **DE (46.0%)** of comparisons improved, versus **PSO (6.9%)** and **CMA-ES (1.1%)**, which already include strong built-in adaptation.
|
|
453
|
+
- Across variants, **Full (41.4%) > Adaptive (35.3%) > Diff (16.4%)** — combining hyperparameter learning with population refinement helps the most, increasingly so at 100D.
|
|
454
|
+
- CMA-ES is the strongest method overall (especially on hybrid/composition functions), and EvoGrad runs ~**3× faster** than the pymoo baselines on CPU *despite* the added gradient computation.
|
|
455
|
+
|
|
456
|
+
**Multi-Basin Rastrigin** (`D=30`, bounds `[-5, 5]^D`, 150,000 evaluations, 30 runs). Every CMA-ES variant locates the global basin (best fitness `0.00`); a multi-start **Adam** baseline (100 parallel solutions) stays trapped in distractor basins:
|
|
457
|
+
|
|
458
|
+
| Configuration | Best | Mean | Std | Time (s) |
|
|
459
|
+
|---|---|---|---|---|
|
|
460
|
+
| CMA-ES Classical | 0.00 | 2.22 | 3.04 | 25.66 |
|
|
461
|
+
| CMA-ES Differentiable | 0.00 | 1.49 | 2.16 | 9.77 |
|
|
462
|
+
| CMA-ES Adaptive | 0.00 | **0.99** | **1.36** | 45.24 |
|
|
463
|
+
| CMA-ES Full | 0.00 | 1.29 | 2.12 | **7.94** |
|
|
464
|
+
| Adam (multi-start, pop-based) | 116.41 | 153.77 | 13.98 | 3.88 |
|
|
465
|
+
|
|
466
|
+
The **Adaptive** variant reaches the lowest mean/variance, while **Full** matches it closely at the **fastest** runtime — gradient flow yields large speed-ups while population search secures the global basin. Adam alone is **>2 orders of magnitude worse**, confirming that pure gradient descent cannot escape distractor basins.
|
|
467
|
+
|
|
468
|
+
> Full experimental details are in the paper (see [Citation](#-citation)).
|
|
398
469
|
|
|
399
470
|
## 📖 Citation
|
|
400
471
|
|
|
401
|
-
|
|
472
|
+
EvoGrad was accepted at the **IEEE Congress on Evolutionary Computation (CEC) 2026**. If you use EvoGrad in your research, please cite:
|
|
473
|
+
|
|
474
|
+
```bibtex
|
|
475
|
+
@inproceedings{citterio2026evograd,
|
|
476
|
+
title = {{EvoGrad}: Accelerated Metaheuristics in a Differentiable Wonderland},
|
|
477
|
+
author = {Citterio, Beatrice F. R. and Papetti, Daniele M. and Dimitri, Giovanna Maria and Tangherloni, Andrea},
|
|
478
|
+
booktitle = {Proceedings of the IEEE Congress on Evolutionary Computation (CEC)},
|
|
479
|
+
year = {2026},
|
|
480
|
+
}
|
|
481
|
+
```
|
|
402
482
|
|
|
403
483
|
## 📄 License
|
|
404
484
|
|
|
@@ -523,26 +523,65 @@ class Algorithm(nn.Module, ABC):
|
|
|
523
523
|
|
|
524
524
|
return self.state.best_fitness
|
|
525
525
|
|
|
526
|
-
def forward(self) -> Tensor:
|
|
526
|
+
def forward(self, reduction: str = "mean", live_selection: bool = True) -> Tensor:
|
|
527
527
|
"""
|
|
528
528
|
PyTorch forward pass for differentiable optimisation.
|
|
529
|
-
|
|
529
|
+
|
|
530
530
|
In differentiable mode, this builds a computation graph
|
|
531
|
-
through the entire generation
|
|
532
|
-
|
|
531
|
+
through the entire generation and reduces the per-offspring
|
|
532
|
+
fitness to a scalar loss. Call update_state() after
|
|
533
533
|
loss.backward() and optimizer.step() to commit changes.
|
|
534
|
-
|
|
534
|
+
|
|
535
|
+
Args:
|
|
536
|
+
reduction: How to reduce the (n_offsprings,) offspring fitness
|
|
537
|
+
into the scalar loss that is backpropagated:
|
|
538
|
+
- 'mean' (default): average fitness — gradient reaches the
|
|
539
|
+
whole population, driving every member downhill.
|
|
540
|
+
- 'sum': total fitness — same per-member gradient direction
|
|
541
|
+
as 'mean', scaled by n_offsprings.
|
|
542
|
+
- 'min': best offspring only — gradient flows solely through
|
|
543
|
+
the single best offspring's ancestry (sparse signal).
|
|
544
|
+
live_selection: Whether selection routing carries gradient back to
|
|
545
|
+
the population.
|
|
546
|
+
- True (default, "live"): re-evaluate the current population so
|
|
547
|
+
the selection logits depend on the live parameter — the
|
|
548
|
+
Gumbel-Softmax selection gradient then reaches the population
|
|
549
|
+
(fully end-to-end differentiable generation). For a
|
|
550
|
+
deterministic objective this re-evaluation reproduces the
|
|
551
|
+
committed fitness values exactly, so it is graph
|
|
552
|
+
reconstruction, not new sampling: it is intentionally NOT
|
|
553
|
+
counted in the evaluation budget (n_evals). It is, however, a
|
|
554
|
+
real extra objective pass (wall-clock/FLOPs). For a stochastic
|
|
555
|
+
objective the values may differ from the committed fitness.
|
|
556
|
+
- False ("detached", memetic): selection uses the cached,
|
|
557
|
+
detached committed fitness as fixed routing weights; gradient
|
|
558
|
+
only refines positions. Cheaper (no extra pass), lower
|
|
559
|
+
variance, and the correct choice for stochastic objectives.
|
|
560
|
+
|
|
535
561
|
Returns:
|
|
536
|
-
|
|
537
|
-
|
|
562
|
+
Reduced offspring fitness as a scalar tensor (for backprop).
|
|
563
|
+
|
|
538
564
|
Raises:
|
|
539
565
|
RuntimeError: If algorithm not initialized.
|
|
566
|
+
ValueError: If reduction is not one of 'mean', 'sum', 'min'.
|
|
540
567
|
"""
|
|
541
568
|
if not self._is_initialized:
|
|
542
569
|
raise RuntimeError(
|
|
543
570
|
"Algorithm not initialized. Call initialize(problem) first."
|
|
544
571
|
)
|
|
545
|
-
|
|
572
|
+
|
|
573
|
+
if reduction not in ("mean", "sum", "min"):
|
|
574
|
+
raise ValueError(
|
|
575
|
+
f"reduction must be one of 'mean', 'sum', 'min'; got {reduction!r}"
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
# Live selection: attach a fresh autograd graph to the parent fitness so
|
|
579
|
+
# selection gradients flow into the population. Deliberately does NOT
|
|
580
|
+
# increment n_evals (values match the committed fitness for a
|
|
581
|
+
# deterministic objective — this only rebuilds the graph).
|
|
582
|
+
if live_selection:
|
|
583
|
+
self.state.fitness = self._evaluate(self.population)
|
|
584
|
+
|
|
546
585
|
# Generate offspring (differentiable)
|
|
547
586
|
offspring = self._infill()
|
|
548
587
|
|
|
@@ -561,7 +600,12 @@ class Algorithm(nn.Module, ABC):
|
|
|
561
600
|
self._pending_offspring = offspring
|
|
562
601
|
self._pending_fitness = offspring_fitness
|
|
563
602
|
|
|
564
|
-
#
|
|
603
|
+
# Reduce per-offspring fitness to the scalar loss for backprop.
|
|
604
|
+
if reduction == "mean":
|
|
605
|
+
return offspring_fitness.mean()
|
|
606
|
+
if reduction == "sum":
|
|
607
|
+
return offspring_fitness.sum()
|
|
608
|
+
# reduction == "min"
|
|
565
609
|
return offspring_fitness.min()
|
|
566
610
|
|
|
567
611
|
@torch.no_grad()
|
|
@@ -168,6 +168,8 @@ def maximize(
|
|
|
168
168
|
scheduler_patience: int = 50,
|
|
169
169
|
scheduler_factor: float = 0.5,
|
|
170
170
|
min_lr: float = 1e-6,
|
|
171
|
+
reduction: str = "mean",
|
|
172
|
+
live_selection: bool = True,
|
|
171
173
|
) -> Result:
|
|
172
174
|
"""
|
|
173
175
|
Maximise an objective function using a population-based algorithm.
|
|
@@ -260,6 +262,8 @@ def maximize(
|
|
|
260
262
|
scheduler_patience=scheduler_patience,
|
|
261
263
|
scheduler_factor=scheduler_factor,
|
|
262
264
|
min_lr=min_lr,
|
|
265
|
+
reduction=reduction,
|
|
266
|
+
live_selection=live_selection,
|
|
263
267
|
)
|
|
264
268
|
|
|
265
269
|
# Fix problem name in result
|
|
@@ -93,7 +93,9 @@ def minimize(
|
|
|
93
93
|
scheduler_patience: int = 50,
|
|
94
94
|
scheduler_factor: float = 0.5,
|
|
95
95
|
min_lr: float = 1e-6,
|
|
96
|
-
|
|
96
|
+
reduction: str = "mean",
|
|
97
|
+
live_selection: bool = True,
|
|
98
|
+
) -> Result:
|
|
97
99
|
|
|
98
100
|
"""
|
|
99
101
|
Minimise an objective function using a population-based algorithm.
|
|
@@ -169,7 +171,17 @@ def minimize(
|
|
|
169
171
|
reducing LR (for 'plateau' scheduler).
|
|
170
172
|
scheduler_factor: Factor to multiply LR when reducing.
|
|
171
173
|
min_lr: Minimum learning rate.
|
|
172
|
-
|
|
174
|
+
reduction: Reduction used to turn the (n_offsprings,) offspring
|
|
175
|
+
fitness into the scalar loss in differentiable mode:
|
|
176
|
+
'mean' (default), 'sum', or 'min'. Only used when backprop is
|
|
177
|
+
active; ignored in classical mode.
|
|
178
|
+
live_selection: If True (default), selection routing carries gradient
|
|
179
|
+
to the population via a per-generation re-evaluation of the current
|
|
180
|
+
population (not counted in n_evals; deterministic objectives only —
|
|
181
|
+
see Algorithm.forward). If False, selection uses the cached,
|
|
182
|
+
detached fitness (memetic; cheaper; for stochastic objectives).
|
|
183
|
+
Only used when backprop is active.
|
|
184
|
+
|
|
173
185
|
Returns:
|
|
174
186
|
Result object containing:
|
|
175
187
|
- best_solution: Best solution found
|
|
@@ -381,6 +393,8 @@ def minimize(
|
|
|
381
393
|
hyper_params,
|
|
382
394
|
grad_clip_pop,
|
|
383
395
|
grad_clip_hyper,
|
|
396
|
+
reduction,
|
|
397
|
+
live_selection,
|
|
384
398
|
)
|
|
385
399
|
else:
|
|
386
400
|
algorithm.step()
|
|
@@ -680,6 +694,8 @@ def _step_differentiable(
|
|
|
680
694
|
hyper_params: Optional[List],
|
|
681
695
|
grad_clip_pop: Optional[float],
|
|
682
696
|
grad_clip_hyper: Optional[float],
|
|
697
|
+
reduction: str = "mean",
|
|
698
|
+
live_selection: bool = True,
|
|
683
699
|
) -> float:
|
|
684
700
|
"""
|
|
685
701
|
Perform one generation step with gradient-based updates.
|
|
@@ -709,7 +725,7 @@ def _step_differentiable(
|
|
|
709
725
|
opt.zero_grad(set_to_none=True)
|
|
710
726
|
|
|
711
727
|
# Forward pass (builds computation graph)
|
|
712
|
-
loss = algorithm.forward()
|
|
728
|
+
loss = algorithm.forward(reduction=reduction, live_selection=live_selection)
|
|
713
729
|
|
|
714
730
|
# Backward pass
|
|
715
731
|
loss.backward()
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "evograd-diff"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "PyTorch-based framework for differentiable evolutionary computation and swarm intelligence"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "Apache-2.0" }
|
|
@@ -51,3 +51,14 @@ packages = ["evograd"]
|
|
|
51
51
|
|
|
52
52
|
[tool.hatch.build.targets.wheel.force-include]
|
|
53
53
|
"evograd/benchmarks/functions/cec2017/data.pkl" = "evograd/benchmarks/functions/cec2017/data.pkl"
|
|
54
|
+
|
|
55
|
+
# Restrict the source distribution to the package and standard metadata so that
|
|
56
|
+
# repo-only files (paper source, notebooks, .claude/, scratch scripts, lockfile)
|
|
57
|
+
# are not shipped to PyPI.
|
|
58
|
+
[tool.hatch.build.targets.sdist]
|
|
59
|
+
include = [
|
|
60
|
+
"/evograd",
|
|
61
|
+
"/README.md",
|
|
62
|
+
"/LICENSE",
|
|
63
|
+
"/pyproject.toml",
|
|
64
|
+
]
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"permissions": {
|
|
3
|
-
"allow": [
|
|
4
|
-
"Bash(git add *)",
|
|
5
|
-
"Bash(git commit -m ' *)",
|
|
6
|
-
"Bash(git push *)",
|
|
7
|
-
"Bash(uv build *)",
|
|
8
|
-
"Bash(python -m zipfile -l dist/evograd-0.1.0-py3-none-any.whl)",
|
|
9
|
-
"WebFetch(domain:pypi.org)",
|
|
10
|
-
"Bash(uv run *)",
|
|
11
|
-
"Bash(git check-ignore *)",
|
|
12
|
-
"Bash(uv lock *)",
|
|
13
|
-
"Bash(git stash *)",
|
|
14
|
-
"Bash(grep -n '```' README.md)"
|
|
15
|
-
]
|
|
16
|
-
}
|
|
17
|
-
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
3.12
|