evograd-diff 0.1.2__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/PKG-INFO +84 -4
  2. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/README.md +83 -3
  3. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/__init__.py +1 -1
  4. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/algorithm.py +53 -9
  5. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/maximize.py +4 -0
  6. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/minimize.py +19 -3
  7. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/pyproject.toml +12 -1
  8. evograd_diff-0.1.2/.claude/settings.local.json +0 -17
  9. evograd_diff-0.1.2/.python-version +0 -1
  10. evograd_diff-0.1.2/Test_new_evograd.ipynb +0 -31427
  11. evograd_diff-0.1.2/plot_benchmarks.py +0 -896
  12. evograd_diff-0.1.2/test.py +0 -32
  13. evograd_diff-0.1.2/test2.py +0 -20
  14. evograd_diff-0.1.2/uv.lock +0 -1014
  15. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/.gitignore +0 -0
  16. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/LICENSE +0 -0
  17. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/algorithms/__init__.py +0 -0
  18. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/algorithms/cmaes.py +0 -0
  19. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/algorithms/de.py +0 -0
  20. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/algorithms/ga.py +0 -0
  21. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/algorithms/pso.py +0 -0
  22. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/algorithms/shade.py +0 -0
  23. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/__init__.py +0 -0
  24. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/base.py +0 -0
  25. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/__init__.py +0 -0
  26. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/basic.py +0 -0
  27. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/composition.py +0 -0
  28. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/data.pkl +0 -0
  29. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/data.py +0 -0
  30. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/hybrid.py +0 -0
  31. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/cec2017/simple.py +0 -0
  32. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/classical.py +0 -0
  33. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/smoothed_funnel.py +0 -0
  34. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/functions/transforms.py +0 -0
  35. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/benchmarks/run_benchmark_functions.py +0 -0
  36. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/__init__.py +0 -0
  37. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/problem.py +0 -0
  38. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/result.py +0 -0
  39. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/core/termination.py +0 -0
  40. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/__init__.py +0 -0
  41. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/crossover.py +0 -0
  42. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/mutation.py +0 -0
  43. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/relaxations.py +0 -0
  44. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/repair.py +0 -0
  45. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/sampling.py +0 -0
  46. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/selection.py +0 -0
  47. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/operators/survival.py +0 -0
  48. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/__init__.py +0 -0
  49. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/run_all.py +0 -0
  50. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/test_cmaes.py +0 -0
  51. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/test_core.py +0 -0
  52. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/test_ga.py +0 -0
  53. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/test_operators.py +0 -0
  54. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/test_per_individual.py +0 -0
  55. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/tests/test_utils.py +0 -0
  56. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/utils/__init__.py +0 -0
  57. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/utils/callbacks.py +0 -0
  58. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/utils/device.py +0 -0
  59. {evograd_diff-0.1.2 → evograd_diff-0.2.0}/evograd/utils/duplicates.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evograd-diff
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: PyTorch-based framework for differentiable evolutionary computation and swarm intelligence
5
5
  Project-URL: Homepage, https://github.com/andreatangherloni/EvoGrad
6
6
  Project-URL: Repository, https://github.com/andreatangherloni/EvoGrad
@@ -25,15 +25,18 @@ Requires-Dist: seaborn>=0.13.2
25
25
  Requires-Dist: torch>=2.11.0
26
26
  Description-Content-Type: text/markdown
27
27
 
28
- # EvoGrad: Metaheuristics in a Differentiable Wonderland
28
+ # EvoGrad: Accelerated Metaheuristics in a Differentiable Wonderland
29
29
 
30
30
  <p align="center">
31
31
  <img src="https://img.shields.io/badge/python-3.9+-blue.svg" alt="Python 3.9+">
32
32
  <img src="https://img.shields.io/badge/pytorch-2.0+-orange.svg" alt="PyTorch 2.0+">
33
33
  <img src="https://img.shields.io/badge/license-Apache%20License%202.0-blue.svg" alt="License: Apache-2.0">
34
+ <img src="https://img.shields.io/badge/IEEE%20CEC-2026-success.svg" alt="IEEE CEC 2026">
34
35
 
35
36
  </p>
36
37
 
38
+ > 🎉 **EvoGrad has been accepted at [IEEE CEC 2026](#-citation)!**
39
+
37
40
  **EvoGrad** is a PyTorch-based framework for differentiable Evolutionary Computation and Swarm Intelligence. It bridges classical population-based optimisation with modern differentiable programming by enabling gradient flow through evolutionary operators.
38
41
 
39
42
  ## 🌟 Key Features
@@ -421,11 +424,88 @@ EvoGrad makes evolutionary algorithms differentiable through:
421
424
 
422
425
  ## 📊 Benchmarks
423
426
 
424
- TODO
427
+ EvoGrad ships a self-contained, **PyTorch-native benchmark suite** (`evograd.benchmarks`) together with a parallel runner that evaluates every algorithm in its four operating modes against two reference baselines.
428
+
429
+ ### Function library
430
+
431
+ All functions share a common `BenchmarkFunction` interface (`f(x)` on an `(N, n_var)` batch, plus `.bounds` and the known optimum) and run on CPU/GPU/MPS.
432
+
433
+ | Category | Functions |
434
+ |----------|-----------|
435
+ | **Classical — unimodal** | Sphere, Ellipsoid, SumOfDifferentPowers, Schwefel 2.22, Cigar, Discus, BentCigar, Rosenbrock, DixonPrice, Powell, Trid |
436
+ | **Classical — multimodal** | Rastrigin, Ackley, Griewank, Schwefel, Levy, Michalewicz, Zakharov, Weierstrass, Alpine, Salomon, Styblinski–Tang |
437
+ | **CEC 2017** (`F1`–`F30`) | Simple/unimodal (F1–F10), Hybrid (F11–F20), Composition (F21–F30) — the full competition suite, **rewritten from scratch in PyTorch** |
438
+ | **Multi-Basin / Smoothed-Funnel** | `MultiBasinRastrigin`, `MultiBasinRosenbrock`, `DeceptiveLandscape` — designed for differentiable EAs |
439
+ | **Transforms** | Shifted / Rotated / Scaled / Asymmetric / Oscillated / Biased wrappers for building custom variants |
440
+
441
+ ```python
442
+ import torch
443
+ from evograd.benchmarks.functions import Sphere, Rastrigin, get_cec2017_function, MultiBasinRastrigin
444
+
445
+ f = get_cec2017_function(14, n_var=30) # CEC 2017 F14 in 30D
446
+ y = f(torch.randn(100, 30)) # batch evaluation -> shape [100]
447
+ ```
448
+
449
+ The **Multi-Basin** functions aggregate `K` basins (each a full Rastrigin/Rosenbrock landscape) with a smooth *log-sum-exp* minimum, so the surface stays differentiable everywhere while still trapping pure gradient descent in distractor basins — exactly the setting where population search combined with gradient refinement pays off.
450
+
451
+ ### Running the benchmarks
452
+
453
+ The runner evaluates the four EvoGrad modes — **Classical**, **Differentiable**, **Adaptive**, **Full** — and, by default, the **pymoo** and **Adam** (multi-start) baselines:
454
+
455
+ ```bash
456
+ # 30 runs of DE on the full CEC 2017 suite in 30D (vs pymoo + Adam)
457
+ python -m evograd.benchmarks.run_benchmark_functions -a DE -s cec2017 -D 30 -r 30
458
+
459
+ # CMA-ES on the multi-basin functions, on GPU
460
+ python -m evograd.benchmarks.run_benchmark_functions -a CMAES -s funnel -D 30 --device cuda
461
+
462
+ # List every available function and suite
463
+ python -m evograd.benchmarks.run_benchmark_functions --list_functions
464
+ ```
465
+
466
+ Key flags: `-a {DE,SHADE,PSO,GA,CMAES,ADAM}`, `-s` suite (`classical`, `standard`, `cec2017[_simple|_hybrid|_composition]`, `funnel`, …), `-D` dimensionality, `-r` runs, `-p` population size, `--no_pymoo` / `--no_adam` to drop baselines. Plotting utilities live in `plot_benchmarks.py`.
467
+
468
+ ### Results
469
+
470
+ The three differentiable variants are compared against the **Classical** baseline and pymoo:
471
+
472
+ - **Adaptive** — learnable hyperparameters, purely stochastic variation (no gradient through the population).
473
+ - **Diff** (Differentiable) — fixed hyperparameters, gradients refine the population.
474
+ - **Full** — both: learnable hyperparameters *and* gradient-based population refinement.
475
+
476
+ **CEC 2017 (30D & 100D).** 29 functions (F2 excluded, per the competition), search space `[-100, 100]^D`, 100 individuals, `10000·D` evaluations, 30 independent paired runs, one-sided Wilcoxon signed-rank test with Benjamini–Hochberg correction. Highlights:
477
+
478
+ - Differentiable variants are **statistically significantly better than the classical baseline in ~31% of all comparisons**, and **never substantially worse** — gradient refinement can be added to EAs safely.
479
+ - Gains concentrate where local refinement helps most: **GA (70.1%)** and **DE (46.0%)** of comparisons improved, versus **PSO (6.9%)** and **CMA-ES (1.1%)**, which already include strong built-in adaptation.
480
+ - Across variants, **Full (41.4%) > Adaptive (35.3%) > Diff (16.4%)** — combining hyperparameter learning with population refinement helps the most, increasingly so at 100D.
481
+ - CMA-ES is the strongest method overall (especially on hybrid/composition functions), and EvoGrad runs ~**3× faster** than the pymoo baselines on CPU *despite* the added gradient computation.
482
+
483
+ **Multi-Basin Rastrigin** (`D=30`, bounds `[-5, 5]^D`, 150,000 evaluations, 30 runs). Every CMA-ES variant locates the global basin (best fitness `0.00`); a multi-start **Adam** baseline (100 parallel solutions) stays trapped in distractor basins:
484
+
485
+ | Configuration | Best | Mean | Std | Time (s) |
486
+ |---|---|---|---|---|
487
+ | CMA-ES Classical | 0.00 | 2.22 | 3.04 | 25.66 |
488
+ | CMA-ES Differentiable | 0.00 | 1.49 | 2.16 | 9.77 |
489
+ | CMA-ES Adaptive | 0.00 | **0.99** | **1.36** | 45.24 |
490
+ | CMA-ES Full | 0.00 | 1.29 | 2.12 | **7.94** |
491
+ | Adam (multi-start, pop-based) | 116.41 | 153.77 | 13.98 | 3.88 |
492
+
493
+ The **Adaptive** variant reaches the lowest mean/variance, while **Full** matches it closely at the **fastest** runtime — gradient flow yields large speed-ups while population search secures the global basin. Adam alone is **>2 orders of magnitude worse**, confirming that pure gradient descent cannot escape distractor basins.
494
+
495
+ > Full experimental details are in the paper (see [Citation](#-citation)).
425
496
 
426
497
  ## 📖 Citation
427
498
 
428
- TBA
499
+ EvoGrad was accepted at the **IEEE Congress on Evolutionary Computation (CEC) 2026**. If you use EvoGrad in your research, please cite:
500
+
501
+ ```bibtex
502
+ @inproceedings{citterio2026evograd,
503
+ title = {{EvoGrad}: Accelerated Metaheuristics in a Differentiable Wonderland},
504
+ author = {Citterio, Beatrice F. R. and Papetti, Daniele M. and Dimitri, Giovanna Maria and Tangherloni, Andrea},
505
+ booktitle = {Proceedings of the IEEE Congress on Evolutionary Computation (CEC)},
506
+ year = {2026},
507
+ }
508
+ ```
429
509
 
430
510
  ## 📄 License
431
511
 
@@ -1,12 +1,15 @@
1
- # EvoGrad: Metaheuristics in a Differentiable Wonderland
1
+ # EvoGrad: Accelerated Metaheuristics in a Differentiable Wonderland
2
2
 
3
3
  <p align="center">
4
4
  <img src="https://img.shields.io/badge/python-3.9+-blue.svg" alt="Python 3.9+">
5
5
  <img src="https://img.shields.io/badge/pytorch-2.0+-orange.svg" alt="PyTorch 2.0+">
6
6
  <img src="https://img.shields.io/badge/license-Apache%20License%202.0-blue.svg" alt="License: Apache-2.0">
7
+ <img src="https://img.shields.io/badge/IEEE%20CEC-2026-success.svg" alt="IEEE CEC 2026">
7
8
 
8
9
  </p>
9
10
 
11
+ > 🎉 **EvoGrad has been accepted at [IEEE CEC 2026](#-citation)!**
12
+
10
13
  **EvoGrad** is a PyTorch-based framework for differentiable Evolutionary Computation and Swarm Intelligence. It bridges classical population-based optimisation with modern differentiable programming by enabling gradient flow through evolutionary operators.
11
14
 
12
15
  ## 🌟 Key Features
@@ -394,11 +397,88 @@ EvoGrad makes evolutionary algorithms differentiable through:
394
397
 
395
398
  ## 📊 Benchmarks
396
399
 
397
- TODO
400
+ EvoGrad ships a self-contained, **PyTorch-native benchmark suite** (`evograd.benchmarks`) together with a parallel runner that evaluates every algorithm in its four operating modes against two reference baselines.
401
+
402
+ ### Function library
403
+
404
+ All functions share a common `BenchmarkFunction` interface (`f(x)` on an `(N, n_var)` batch, plus `.bounds` and the known optimum) and run on CPU/GPU/MPS.
405
+
406
+ | Category | Functions |
407
+ |----------|-----------|
408
+ | **Classical — unimodal** | Sphere, Ellipsoid, SumOfDifferentPowers, Schwefel 2.22, Cigar, Discus, BentCigar, Rosenbrock, DixonPrice, Powell, Trid |
409
+ | **Classical — multimodal** | Rastrigin, Ackley, Griewank, Schwefel, Levy, Michalewicz, Zakharov, Weierstrass, Alpine, Salomon, Styblinski–Tang |
410
+ | **CEC 2017** (`F1`–`F30`) | Simple/unimodal (F1–F10), Hybrid (F11–F20), Composition (F21–F30) — the full competition suite, **rewritten from scratch in PyTorch** |
411
+ | **Multi-Basin / Smoothed-Funnel** | `MultiBasinRastrigin`, `MultiBasinRosenbrock`, `DeceptiveLandscape` — designed for differentiable EAs |
412
+ | **Transforms** | Shifted / Rotated / Scaled / Asymmetric / Oscillated / Biased wrappers for building custom variants |
413
+
414
+ ```python
415
+ import torch
416
+ from evograd.benchmarks.functions import Sphere, Rastrigin, get_cec2017_function, MultiBasinRastrigin
417
+
418
+ f = get_cec2017_function(14, n_var=30) # CEC 2017 F14 in 30D
419
+ y = f(torch.randn(100, 30)) # batch evaluation -> shape [100]
420
+ ```
421
+
422
+ The **Multi-Basin** functions aggregate `K` basins (each a full Rastrigin/Rosenbrock landscape) with a smooth *log-sum-exp* minimum, so the surface stays differentiable everywhere while still trapping pure gradient descent in distractor basins — exactly the setting where population search combined with gradient refinement pays off.
423
+
424
+ ### Running the benchmarks
425
+
426
+ The runner evaluates the four EvoGrad modes — **Classical**, **Differentiable**, **Adaptive**, **Full** — and, by default, the **pymoo** and **Adam** (multi-start) baselines:
427
+
428
+ ```bash
429
+ # 30 runs of DE on the full CEC 2017 suite in 30D (vs pymoo + Adam)
430
+ python -m evograd.benchmarks.run_benchmark_functions -a DE -s cec2017 -D 30 -r 30
431
+
432
+ # CMA-ES on the multi-basin functions, on GPU
433
+ python -m evograd.benchmarks.run_benchmark_functions -a CMAES -s funnel -D 30 --device cuda
434
+
435
+ # List every available function and suite
436
+ python -m evograd.benchmarks.run_benchmark_functions --list_functions
437
+ ```
438
+
439
+ Key flags: `-a {DE,SHADE,PSO,GA,CMAES,ADAM}`, `-s` suite (`classical`, `standard`, `cec2017[_simple|_hybrid|_composition]`, `funnel`, …), `-D` dimensionality, `-r` runs, `-p` population size, `--no_pymoo` / `--no_adam` to drop baselines. Plotting utilities live in `plot_benchmarks.py`.
440
+
441
+ ### Results
442
+
443
+ The three differentiable variants are compared against the **Classical** baseline and pymoo:
444
+
445
+ - **Adaptive** — learnable hyperparameters, purely stochastic variation (no gradient through the population).
446
+ - **Diff** (Differentiable) — fixed hyperparameters, gradients refine the population.
447
+ - **Full** — both: learnable hyperparameters *and* gradient-based population refinement.
448
+
449
+ **CEC 2017 (30D & 100D).** 29 functions (F2 excluded, per the competition), search space `[-100, 100]^D`, 100 individuals, `10000·D` evaluations, 30 independent paired runs, one-sided Wilcoxon signed-rank test with Benjamini–Hochberg correction. Highlights:
450
+
451
+ - Differentiable variants are **statistically significantly better than the classical baseline in ~31% of all comparisons**, and **never substantially worse** — gradient refinement can be added to EAs safely.
452
+ - Gains concentrate where local refinement helps most: **GA (70.1%)** and **DE (46.0%)** of comparisons improved, versus **PSO (6.9%)** and **CMA-ES (1.1%)**, which already include strong built-in adaptation.
453
+ - Across variants, **Full (41.4%) > Adaptive (35.3%) > Diff (16.4%)** — combining hyperparameter learning with population refinement helps the most, increasingly so at 100D.
454
+ - CMA-ES is the strongest method overall (especially on hybrid/composition functions), and EvoGrad runs ~**3× faster** than the pymoo baselines on CPU *despite* the added gradient computation.
455
+
456
+ **Multi-Basin Rastrigin** (`D=30`, bounds `[-5, 5]^D`, 150,000 evaluations, 30 runs). Every CMA-ES variant locates the global basin (best fitness `0.00`); a multi-start **Adam** baseline (100 parallel solutions) stays trapped in distractor basins:
457
+
458
+ | Configuration | Best | Mean | Std | Time (s) |
459
+ |---|---|---|---|---|
460
+ | CMA-ES Classical | 0.00 | 2.22 | 3.04 | 25.66 |
461
+ | CMA-ES Differentiable | 0.00 | 1.49 | 2.16 | 9.77 |
462
+ | CMA-ES Adaptive | 0.00 | **0.99** | **1.36** | 45.24 |
463
+ | CMA-ES Full | 0.00 | 1.29 | 2.12 | **7.94** |
464
+ | Adam (multi-start, pop-based) | 116.41 | 153.77 | 13.98 | 3.88 |
465
+
466
+ The **Adaptive** variant reaches the lowest mean/variance, while **Full** matches it closely at the **fastest** runtime — gradient flow yields large speed-ups while population search secures the global basin. Adam alone is **>2 orders of magnitude worse**, confirming that pure gradient descent cannot escape distractor basins.
467
+
468
+ > Full experimental details are in the paper (see [Citation](#-citation)).
398
469
 
399
470
  ## 📖 Citation
400
471
 
401
- TBA
472
+ EvoGrad was accepted at the **IEEE Congress on Evolutionary Computation (CEC) 2026**. If you use EvoGrad in your research, please cite:
473
+
474
+ ```bibtex
475
+ @inproceedings{citterio2026evograd,
476
+ title = {{EvoGrad}: Accelerated Metaheuristics in a Differentiable Wonderland},
477
+ author = {Citterio, Beatrice F. R. and Papetti, Daniele M. and Dimitri, Giovanna Maria and Tangherloni, Andrea},
478
+ booktitle = {Proceedings of the IEEE Congress on Evolutionary Computation (CEC)},
479
+ year = {2026},
480
+ }
481
+ ```
402
482
 
403
483
  ## 📄 License
404
484
 
@@ -57,7 +57,7 @@ Authors
57
57
  Andrea Tangherloni <andrea.tangherloni@unibocconi.it>
58
58
  """
59
59
 
60
- __version__ = "0.1.0"
60
+ __version__ = "0.2.0"
61
61
  __author__ = "Andrea Tangherloni"
62
62
 
63
63
  __all__ = [
@@ -523,26 +523,65 @@ class Algorithm(nn.Module, ABC):
523
523
 
524
524
  return self.state.best_fitness
525
525
 
526
- def forward(self) -> Tensor:
526
+ def forward(self, reduction: str = "mean", live_selection: bool = True) -> Tensor:
527
527
  """
528
528
  PyTorch forward pass for differentiable optimisation.
529
-
529
+
530
530
  In differentiable mode, this builds a computation graph
531
- through the entire generation, returning the best fitness
532
- as a differentiable scalar loss. Call update_state() after
531
+ through the entire generation and reduces the per-offspring
532
+ fitness to a scalar loss. Call update_state() after
533
533
  loss.backward() and optimizer.step() to commit changes.
534
-
534
+
535
+ Args:
536
+ reduction: How to reduce the (n_offsprings,) offspring fitness
537
+ into the scalar loss that is backpropagated:
538
+ - 'mean' (default): average fitness — gradient reaches the
539
+ whole population, driving every member downhill.
540
+ - 'sum': total fitness — same per-member gradient direction
541
+ as 'mean', scaled by n_offsprings.
542
+ - 'min': best offspring only — gradient flows solely through
543
+ the single best offspring's ancestry (sparse signal).
544
+ live_selection: Whether selection routing carries gradient back to
545
+ the population.
546
+ - True (default, "live"): re-evaluate the current population so
547
+ the selection logits depend on the live parameter — the
548
+ Gumbel-Softmax selection gradient then reaches the population
549
+ (fully end-to-end differentiable generation). For a
550
+ deterministic objective this re-evaluation reproduces the
551
+ committed fitness values exactly, so it is graph
552
+ reconstruction, not new sampling: it is intentionally NOT
553
+ counted in the evaluation budget (n_evals). It is, however, a
554
+ real extra objective pass (wall-clock/FLOPs). For a stochastic
555
+ objective the values may differ from the committed fitness.
556
+ - False ("detached", memetic): selection uses the cached,
557
+ detached committed fitness as fixed routing weights; gradient
558
+ only refines positions. Cheaper (no extra pass), lower
559
+ variance, and the correct choice for stochastic objectives.
560
+
535
561
  Returns:
536
- Best fitness as a scalar tensor (for backprop).
537
-
562
+ Reduced offspring fitness as a scalar tensor (for backprop).
563
+
538
564
  Raises:
539
565
  RuntimeError: If algorithm not initialized.
566
+ ValueError: If reduction is not one of 'mean', 'sum', 'min'.
540
567
  """
541
568
  if not self._is_initialized:
542
569
  raise RuntimeError(
543
570
  "Algorithm not initialized. Call initialize(problem) first."
544
571
  )
545
-
572
+
573
+ if reduction not in ("mean", "sum", "min"):
574
+ raise ValueError(
575
+ f"reduction must be one of 'mean', 'sum', 'min'; got {reduction!r}"
576
+ )
577
+
578
+ # Live selection: attach a fresh autograd graph to the parent fitness so
579
+ # selection gradients flow into the population. Deliberately does NOT
580
+ # increment n_evals (values match the committed fitness for a
581
+ # deterministic objective — this only rebuilds the graph).
582
+ if live_selection:
583
+ self.state.fitness = self._evaluate(self.population)
584
+
546
585
  # Generate offspring (differentiable)
547
586
  offspring = self._infill()
548
587
 
@@ -561,7 +600,12 @@ class Algorithm(nn.Module, ABC):
561
600
  self._pending_offspring = offspring
562
601
  self._pending_fitness = offspring_fitness
563
602
 
564
- # Return best fitness as loss
603
+ # Reduce per-offspring fitness to the scalar loss for backprop.
604
+ if reduction == "mean":
605
+ return offspring_fitness.mean()
606
+ if reduction == "sum":
607
+ return offspring_fitness.sum()
608
+ # reduction == "min"
565
609
  return offspring_fitness.min()
566
610
 
567
611
  @torch.no_grad()
@@ -168,6 +168,8 @@ def maximize(
168
168
  scheduler_patience: int = 50,
169
169
  scheduler_factor: float = 0.5,
170
170
  min_lr: float = 1e-6,
171
+ reduction: str = "mean",
172
+ live_selection: bool = True,
171
173
  ) -> Result:
172
174
  """
173
175
  Maximise an objective function using a population-based algorithm.
@@ -260,6 +262,8 @@ def maximize(
260
262
  scheduler_patience=scheduler_patience,
261
263
  scheduler_factor=scheduler_factor,
262
264
  min_lr=min_lr,
265
+ reduction=reduction,
266
+ live_selection=live_selection,
263
267
  )
264
268
 
265
269
  # Fix problem name in result
@@ -93,7 +93,9 @@ def minimize(
93
93
  scheduler_patience: int = 50,
94
94
  scheduler_factor: float = 0.5,
95
95
  min_lr: float = 1e-6,
96
- ) -> Result:
96
+ reduction: str = "mean",
97
+ live_selection: bool = True,
98
+ ) -> Result:
97
99
 
98
100
  """
99
101
  Minimise an objective function using a population-based algorithm.
@@ -169,7 +171,17 @@ def minimize(
169
171
  reducing LR (for 'plateau' scheduler).
170
172
  scheduler_factor: Factor to multiply LR when reducing.
171
173
  min_lr: Minimum learning rate.
172
-
174
+ reduction: Reduction used to turn the (n_offsprings,) offspring
175
+ fitness into the scalar loss in differentiable mode:
176
+ 'mean' (default), 'sum', or 'min'. Only used when backprop is
177
+ active; ignored in classical mode.
178
+ live_selection: If True (default), selection routing carries gradient
179
+ to the population via a per-generation re-evaluation of the current
180
+ population (not counted in n_evals; deterministic objectives only —
181
+ see Algorithm.forward). If False, selection uses the cached,
182
+ detached fitness (memetic; cheaper; for stochastic objectives).
183
+ Only used when backprop is active.
184
+
173
185
  Returns:
174
186
  Result object containing:
175
187
  - best_solution: Best solution found
@@ -381,6 +393,8 @@ def minimize(
381
393
  hyper_params,
382
394
  grad_clip_pop,
383
395
  grad_clip_hyper,
396
+ reduction,
397
+ live_selection,
384
398
  )
385
399
  else:
386
400
  algorithm.step()
@@ -680,6 +694,8 @@ def _step_differentiable(
680
694
  hyper_params: Optional[List],
681
695
  grad_clip_pop: Optional[float],
682
696
  grad_clip_hyper: Optional[float],
697
+ reduction: str = "mean",
698
+ live_selection: bool = True,
683
699
  ) -> float:
684
700
  """
685
701
  Perform one generation step with gradient-based updates.
@@ -709,7 +725,7 @@ def _step_differentiable(
709
725
  opt.zero_grad(set_to_none=True)
710
726
 
711
727
  # Forward pass (builds computation graph)
712
- loss = algorithm.forward()
728
+ loss = algorithm.forward(reduction=reduction, live_selection=live_selection)
713
729
 
714
730
  # Backward pass
715
731
  loss.backward()
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "evograd-diff"
7
- version = "0.1.2"
7
+ version = "0.2.0"
8
8
  description = "PyTorch-based framework for differentiable evolutionary computation and swarm intelligence"
9
9
  readme = "README.md"
10
10
  license = { text = "Apache-2.0" }
@@ -51,3 +51,14 @@ packages = ["evograd"]
51
51
 
52
52
  [tool.hatch.build.targets.wheel.force-include]
53
53
  "evograd/benchmarks/functions/cec2017/data.pkl" = "evograd/benchmarks/functions/cec2017/data.pkl"
54
+
55
+ # Restrict the source distribution to the package and standard metadata so that
56
+ # repo-only files (paper source, notebooks, .claude/, scratch scripts, lockfile)
57
+ # are not shipped to PyPI.
58
+ [tool.hatch.build.targets.sdist]
59
+ include = [
60
+ "/evograd",
61
+ "/README.md",
62
+ "/LICENSE",
63
+ "/pyproject.toml",
64
+ ]
@@ -1,17 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Bash(git add *)",
5
- "Bash(git commit -m ' *)",
6
- "Bash(git push *)",
7
- "Bash(uv build *)",
8
- "Bash(python -m zipfile -l dist/evograd-0.1.0-py3-none-any.whl)",
9
- "WebFetch(domain:pypi.org)",
10
- "Bash(uv run *)",
11
- "Bash(git check-ignore *)",
12
- "Bash(uv lock *)",
13
- "Bash(git stash *)",
14
- "Bash(grep -n '```' README.md)"
15
- ]
16
- }
17
- }
@@ -1 +0,0 @@
1
- 3.12