evo-gafs 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evo_gafs-0.1.0/PKG-INFO +137 -0
- evo_gafs-0.1.0/README.md +101 -0
- evo_gafs-0.1.0/pyproject.toml +97 -0
- evo_gafs-0.1.0/setup.cfg +4 -0
- evo_gafs-0.1.0/src/evo_gafs/__init__.py +34 -0
- evo_gafs-0.1.0/src/evo_gafs/algorithms/__init__.py +6 -0
- evo_gafs-0.1.0/src/evo_gafs/algorithms/nsga2.py +125 -0
- evo_gafs-0.1.0/src/evo_gafs/algorithms/single.py +133 -0
- evo_gafs-0.1.0/src/evo_gafs/benchmark/__init__.py +5 -0
- evo_gafs-0.1.0/src/evo_gafs/benchmark/runner.py +230 -0
- evo_gafs-0.1.0/src/evo_gafs/core/__init__.py +13 -0
- evo_gafs-0.1.0/src/evo_gafs/core/config.py +250 -0
- evo_gafs-0.1.0/src/evo_gafs/core/evaluator.py +136 -0
- evo_gafs-0.1.0/src/evo_gafs/core/selector.py +326 -0
- evo_gafs-0.1.0/src/evo_gafs/operators/__init__.py +12 -0
- evo_gafs-0.1.0/src/evo_gafs/operators/crossover.py +39 -0
- evo_gafs-0.1.0/src/evo_gafs/operators/mutation.py +33 -0
- evo_gafs-0.1.0/src/evo_gafs/operators/repair.py +68 -0
- evo_gafs-0.1.0/src/evo_gafs/py.typed +0 -0
- evo_gafs-0.1.0/src/evo_gafs/utils/__init__.py +12 -0
- evo_gafs-0.1.0/src/evo_gafs/utils/deap_utils.py +75 -0
- evo_gafs-0.1.0/src/evo_gafs/utils/validation.py +51 -0
- evo_gafs-0.1.0/src/evo_gafs/visualization/__init__.py +5 -0
- evo_gafs-0.1.0/src/evo_gafs/visualization/plots.py +166 -0
- evo_gafs-0.1.0/src/evo_gafs.egg-info/PKG-INFO +137 -0
- evo_gafs-0.1.0/src/evo_gafs.egg-info/SOURCES.txt +27 -0
- evo_gafs-0.1.0/src/evo_gafs.egg-info/dependency_links.txt +1 -0
- evo_gafs-0.1.0/src/evo_gafs.egg-info/requires.txt +14 -0
- evo_gafs-0.1.0/src/evo_gafs.egg-info/top_level.txt +1 -0
evo_gafs-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: evo-gafs
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Genetic Algorithm Feature Selector — a scikit-learn-compatible wrapper feature selector for tabular data (evo-suite)
|
|
5
|
+
Author: Axel Skrauba
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/AxelSkrauba/evo-suite
|
|
8
|
+
Project-URL: Documentation, https://github.com/AxelSkrauba/evo-suite/tree/main/packages/evo-gafs
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/AxelSkrauba/evo-suite/issues
|
|
10
|
+
Keywords: feature selection,genetic algorithm,DEAP,NSGA-II,machine learning,scikit-learn,wrapper method,evolutionary computation
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Operating System :: OS Independent
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Requires-Dist: numpy>=1.24
|
|
25
|
+
Requires-Dist: pandas>=1.5
|
|
26
|
+
Requires-Dist: scikit-learn>=1.6
|
|
27
|
+
Requires-Dist: deap>=1.4
|
|
28
|
+
Provides-Extra: viz
|
|
29
|
+
Requires-Dist: matplotlib>=3.6; extra == "viz"
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
32
|
+
Requires-Dist: pytest-cov>=4.1; extra == "dev"
|
|
33
|
+
Requires-Dist: ruff>=0.6; extra == "dev"
|
|
34
|
+
Requires-Dist: mypy>=1.8; extra == "dev"
|
|
35
|
+
Requires-Dist: matplotlib>=3.6; extra == "dev"
|
|
36
|
+
|
|
37
|
+
# evo-gafs — Genetic Algorithm Feature Selector
|
|
38
|
+
|
|
39
|
+
[](../../LICENSE)
|
|
40
|
+
|
|
41
|
+
A **scikit-learn-compatible** wrapper feature selector for tabular data, powered
|
|
42
|
+
by [DEAP](https://github.com/DEAP/deap). `evo-gafs` searches for the subset of
|
|
43
|
+
features that maximises a cross-validated score of your model, and lets you
|
|
44
|
+
explicitly trade raw performance for a smaller feature set — useful for edge
|
|
45
|
+
deployment.
|
|
46
|
+
|
|
47
|
+
Part of the [`evo-suite`](../../README.md) family (import name: `evo_gafs`).
|
|
48
|
+
|
|
49
|
+
## Why evo-gafs?
|
|
50
|
+
|
|
51
|
+
| Capability | evo-gafs |
|
|
52
|
+
|------------|----------|
|
|
53
|
+
| Single-objective **weighted** fitness with a configurable `alpha` (performance ↔ compression) | ✓ |
|
|
54
|
+
| **Multi-objective** NSGA-II with an accessible Pareto front | ✓ |
|
|
55
|
+
| **Repair operator** guaranteeing a minimum number of features | ✓ |
|
|
56
|
+
| Evaluation **cache** to skip repeated genomes | ✓ |
|
|
57
|
+
| Native scikit-learn `fit`/`transform`/`get_support`, usable in a `Pipeline` | ✓ |
|
|
58
|
+
| Built-in multi-dataset `BenchmarkRunner` | ✓ |
|
|
59
|
+
|
|
60
|
+
## Installation
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install evo-gafs # core
|
|
64
|
+
pip install evo-gafs[viz] # + matplotlib for the plotting helpers
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Quickstart
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from sklearn.datasets import load_breast_cancer
|
|
71
|
+
from sklearn.tree import DecisionTreeClassifier
|
|
72
|
+
from evo_gafs import GAFeatureSelector, GAConfig
|
|
73
|
+
|
|
74
|
+
X, y = load_breast_cancer(return_X_y=True, as_frame=True)
|
|
75
|
+
|
|
76
|
+
selector = GAFeatureSelector(
|
|
77
|
+
estimator=DecisionTreeClassifier(random_state=42),
|
|
78
|
+
config=GAConfig(population_size=30, n_generations=20, alpha=0.8, verbose=False),
|
|
79
|
+
)
|
|
80
|
+
selector.fit(X, y)
|
|
81
|
+
|
|
82
|
+
print(selector.summary())
|
|
83
|
+
X_reduced = selector.transform(X)
|
|
84
|
+
print("Selected:", selector.get_support(indices=True))
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Multi-objective (Pareto front)
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
config = GAConfig(mode="multiobjective", population_size=40, n_generations=30, verbose=False)
|
|
91
|
+
selector = GAFeatureSelector(estimator=DecisionTreeClassifier(random_state=42), config=config)
|
|
92
|
+
selector.fit(X, y)
|
|
93
|
+
|
|
94
|
+
for point in selector.result_.pareto_front:
|
|
95
|
+
print(point["n_features"], point["cv_score"])
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### In a scikit-learn pipeline
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from sklearn.pipeline import Pipeline
|
|
102
|
+
from sklearn.preprocessing import StandardScaler
|
|
103
|
+
from sklearn.svm import SVC
|
|
104
|
+
|
|
105
|
+
pipe = Pipeline([
|
|
106
|
+
("scaler", StandardScaler()),
|
|
107
|
+
("selector", GAFeatureSelector(estimator=DecisionTreeClassifier(), config=config)),
|
|
108
|
+
("clf", SVC()),
|
|
109
|
+
])
|
|
110
|
+
pipe.fit(X, y)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## The `alpha` trade-off (single-objective)
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
fitness = alpha * cv_score + (1 - alpha) * compression
|
|
117
|
+
compression = 1 - n_selected / n_total
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
- `alpha = 1.0` → pure wrapper (performance only)
|
|
121
|
+
- `alpha ≈ 0.7` → balanced, good default for edge deployment
|
|
122
|
+
|
|
123
|
+
## Citation
|
|
124
|
+
|
|
125
|
+
```bibtex
|
|
126
|
+
@software{evo_gafs,
|
|
127
|
+
author = {Skrauba, Axel},
|
|
128
|
+
title = {evo-gafs: Genetic Algorithm Feature Selector for tabular data},
|
|
129
|
+
year = {2026},
|
|
130
|
+
version = {0.1.0},
|
|
131
|
+
url = {https://github.com/AxelSkrauba/evo-suite}
|
|
132
|
+
}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## License
|
|
136
|
+
|
|
137
|
+
[MIT](../../LICENSE)
|
evo_gafs-0.1.0/README.md
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# evo-gafs — Genetic Algorithm Feature Selector
|
|
2
|
+
|
|
3
|
+
[](../../LICENSE)
|
|
4
|
+
|
|
5
|
+
A **scikit-learn-compatible** wrapper feature selector for tabular data, powered
|
|
6
|
+
by [DEAP](https://github.com/DEAP/deap). `evo-gafs` searches for the subset of
|
|
7
|
+
features that maximises a cross-validated score of your model, and lets you
|
|
8
|
+
explicitly trade raw performance for a smaller feature set — useful for edge
|
|
9
|
+
deployment.
|
|
10
|
+
|
|
11
|
+
Part of the [`evo-suite`](../../README.md) family (import name: `evo_gafs`).
|
|
12
|
+
|
|
13
|
+
## Why evo-gafs?
|
|
14
|
+
|
|
15
|
+
| Capability | evo-gafs |
|
|
16
|
+
|------------|----------|
|
|
17
|
+
| Single-objective **weighted** fitness with a configurable `alpha` (performance ↔ compression) | ✓ |
|
|
18
|
+
| **Multi-objective** NSGA-II with an accessible Pareto front | ✓ |
|
|
19
|
+
| **Repair operator** guaranteeing a minimum number of features | ✓ |
|
|
20
|
+
| Evaluation **cache** to skip repeated genomes | ✓ |
|
|
21
|
+
| Native scikit-learn `fit`/`transform`/`get_support`, usable in a `Pipeline` | ✓ |
|
|
22
|
+
| Built-in multi-dataset `BenchmarkRunner` | ✓ |
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install evo-gafs # core
|
|
28
|
+
pip install evo-gafs[viz] # + matplotlib for the plotting helpers
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Quickstart
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from sklearn.datasets import load_breast_cancer
|
|
35
|
+
from sklearn.tree import DecisionTreeClassifier
|
|
36
|
+
from evo_gafs import GAFeatureSelector, GAConfig
|
|
37
|
+
|
|
38
|
+
X, y = load_breast_cancer(return_X_y=True, as_frame=True)
|
|
39
|
+
|
|
40
|
+
selector = GAFeatureSelector(
|
|
41
|
+
estimator=DecisionTreeClassifier(random_state=42),
|
|
42
|
+
config=GAConfig(population_size=30, n_generations=20, alpha=0.8, verbose=False),
|
|
43
|
+
)
|
|
44
|
+
selector.fit(X, y)
|
|
45
|
+
|
|
46
|
+
print(selector.summary())
|
|
47
|
+
X_reduced = selector.transform(X)
|
|
48
|
+
print("Selected:", selector.get_support(indices=True))
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Multi-objective (Pareto front)
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
config = GAConfig(mode="multiobjective", population_size=40, n_generations=30, verbose=False)
|
|
55
|
+
selector = GAFeatureSelector(estimator=DecisionTreeClassifier(random_state=42), config=config)
|
|
56
|
+
selector.fit(X, y)
|
|
57
|
+
|
|
58
|
+
for point in selector.result_.pareto_front:
|
|
59
|
+
print(point["n_features"], point["cv_score"])
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### In a scikit-learn pipeline
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from sklearn.pipeline import Pipeline
|
|
66
|
+
from sklearn.preprocessing import StandardScaler
|
|
67
|
+
from sklearn.svm import SVC
|
|
68
|
+
|
|
69
|
+
pipe = Pipeline([
|
|
70
|
+
("scaler", StandardScaler()),
|
|
71
|
+
("selector", GAFeatureSelector(estimator=DecisionTreeClassifier(), config=config)),
|
|
72
|
+
("clf", SVC()),
|
|
73
|
+
])
|
|
74
|
+
pipe.fit(X, y)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## The `alpha` trade-off (single-objective)
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
fitness = alpha * cv_score + (1 - alpha) * compression
|
|
81
|
+
compression = 1 - n_selected / n_total
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
- `alpha = 1.0` → pure wrapper (performance only)
|
|
85
|
+
- `alpha ≈ 0.7` → balanced, good default for edge deployment
|
|
86
|
+
|
|
87
|
+
## Citation
|
|
88
|
+
|
|
89
|
+
```bibtex
|
|
90
|
+
@software{evo_gafs,
|
|
91
|
+
author = {Skrauba, Axel},
|
|
92
|
+
title = {evo-gafs: Genetic Algorithm Feature Selector for tabular data},
|
|
93
|
+
year = {2026},
|
|
94
|
+
version = {0.1.0},
|
|
95
|
+
url = {https://github.com/AxelSkrauba/evo-suite}
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## License
|
|
100
|
+
|
|
101
|
+
[MIT](../../LICENSE)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "evo-gafs"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Genetic Algorithm Feature Selector — a scikit-learn-compatible wrapper feature selector for tabular data (evo-suite)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Axel Skrauba" },
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"feature selection",
|
|
17
|
+
"genetic algorithm",
|
|
18
|
+
"DEAP",
|
|
19
|
+
"NSGA-II",
|
|
20
|
+
"machine learning",
|
|
21
|
+
"scikit-learn",
|
|
22
|
+
"wrapper method",
|
|
23
|
+
"evolutionary computation",
|
|
24
|
+
]
|
|
25
|
+
classifiers = [
|
|
26
|
+
"Development Status :: 3 - Alpha",
|
|
27
|
+
"Intended Audience :: Science/Research",
|
|
28
|
+
"Intended Audience :: Developers",
|
|
29
|
+
"License :: OSI Approved :: MIT License",
|
|
30
|
+
"Programming Language :: Python :: 3",
|
|
31
|
+
"Programming Language :: Python :: 3.9",
|
|
32
|
+
"Programming Language :: Python :: 3.10",
|
|
33
|
+
"Programming Language :: Python :: 3.11",
|
|
34
|
+
"Programming Language :: Python :: 3.12",
|
|
35
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
36
|
+
"Operating System :: OS Independent",
|
|
37
|
+
]
|
|
38
|
+
dependencies = [
|
|
39
|
+
"numpy>=1.24",
|
|
40
|
+
"pandas>=1.5",
|
|
41
|
+
"scikit-learn>=1.6",
|
|
42
|
+
"deap>=1.4",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.optional-dependencies]
|
|
46
|
+
viz = ["matplotlib>=3.6"]
|
|
47
|
+
dev = [
|
|
48
|
+
"pytest>=7.4",
|
|
49
|
+
"pytest-cov>=4.1",
|
|
50
|
+
"ruff>=0.6",
|
|
51
|
+
"mypy>=1.8",
|
|
52
|
+
"matplotlib>=3.6",
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
[project.urls]
|
|
56
|
+
Repository = "https://github.com/AxelSkrauba/evo-suite"
|
|
57
|
+
Documentation = "https://github.com/AxelSkrauba/evo-suite/tree/main/packages/evo-gafs"
|
|
58
|
+
"Bug Tracker" = "https://github.com/AxelSkrauba/evo-suite/issues"
|
|
59
|
+
|
|
60
|
+
[tool.setuptools.packages.find]
|
|
61
|
+
where = ["src"]
|
|
62
|
+
|
|
63
|
+
[tool.setuptools.package-data]
|
|
64
|
+
evo_gafs = ["py.typed"]
|
|
65
|
+
|
|
66
|
+
# ── Tooling ──────────────────────────────────────────────────────────────────
|
|
67
|
+
|
|
68
|
+
[tool.pytest.ini_options]
|
|
69
|
+
testpaths = ["tests"]
|
|
70
|
+
addopts = "-ra --strict-markers"
|
|
71
|
+
markers = [
|
|
72
|
+
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
[tool.coverage.run]
|
|
76
|
+
source = ["evo_gafs"]
|
|
77
|
+
branch = true
|
|
78
|
+
|
|
79
|
+
[tool.coverage.report]
|
|
80
|
+
show_missing = true
|
|
81
|
+
|
|
82
|
+
[tool.ruff]
|
|
83
|
+
line-length = 100
|
|
84
|
+
target-version = "py39"
|
|
85
|
+
src = ["src", "tests"]
|
|
86
|
+
|
|
87
|
+
[tool.ruff.lint]
|
|
88
|
+
select = ["E", "F", "I", "UP", "B", "C4", "SIM"]
|
|
89
|
+
ignore = [
|
|
90
|
+
"E501", # line length handled by formatter
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
[tool.mypy]
|
|
94
|
+
python_version = "3.10"
|
|
95
|
+
warn_unused_configs = true
|
|
96
|
+
ignore_missing_imports = true
|
|
97
|
+
check_untyped_defs = true
|
evo_gafs-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""evo_gafs — Genetic Algorithm Feature Selector.
|
|
2
|
+
|
|
3
|
+
A scikit-learn-compatible wrapper feature selector for tabular data, built on
|
|
4
|
+
DEAP. Supports single-objective (weighted) and multi-objective (NSGA-II,
|
|
5
|
+
Pareto front) selection for both classification and regression.
|
|
6
|
+
|
|
7
|
+
Examples
|
|
8
|
+
--------
|
|
9
|
+
>>> from sklearn.tree import DecisionTreeClassifier
|
|
10
|
+
>>> from evo_gafs import GAFeatureSelector, GAConfig
|
|
11
|
+
>>> selector = GAFeatureSelector(
|
|
12
|
+
... estimator=DecisionTreeClassifier(random_state=0),
|
|
13
|
+
... config=GAConfig(population_size=20, n_generations=10, verbose=False),
|
|
14
|
+
... )
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from evo_gafs.benchmark.runner import BenchmarkRunner
|
|
18
|
+
from evo_gafs.core.config import EvolutionStats, GAConfig, SelectionResult
|
|
19
|
+
from evo_gafs.core.evaluator import FitnessEvaluator
|
|
20
|
+
from evo_gafs.core.selector import GAFeatureSelector
|
|
21
|
+
from evo_gafs.visualization.plots import GAPlotter
|
|
22
|
+
|
|
23
|
+
__version__ = "0.1.0"
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"BenchmarkRunner",
|
|
27
|
+
"EvolutionStats",
|
|
28
|
+
"FitnessEvaluator",
|
|
29
|
+
"GAConfig",
|
|
30
|
+
"GAFeatureSelector",
|
|
31
|
+
"GAPlotter",
|
|
32
|
+
"SelectionResult",
|
|
33
|
+
"__version__",
|
|
34
|
+
]
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""Multi-objective genetic algorithm: NSGA-II."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import random
|
|
6
|
+
import time
|
|
7
|
+
from typing import Callable
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from deap import base, tools
|
|
11
|
+
|
|
12
|
+
from evo_gafs.core.config import EvolutionStats, GAConfig
|
|
13
|
+
|
|
14
|
+
Callback = Callable[[int, EvolutionStats, list], bool]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _n_features_of(individual: list[int]) -> int:
|
|
18
|
+
return int(sum(individual))
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def run_nsga2(
|
|
22
|
+
toolbox: base.Toolbox,
|
|
23
|
+
config: GAConfig,
|
|
24
|
+
callbacks: list[Callback] | None = None,
|
|
25
|
+
) -> tuple[list, tools.Logbook, list[EvolutionStats]]:
|
|
26
|
+
"""Run NSGA-II for two maximised objectives: ``cv_score`` and ``compression``.
|
|
27
|
+
|
|
28
|
+
NSGA-II is the canonical multi-objective evolutionary algorithm: it ranks
|
|
29
|
+
the population by Pareto dominance and crowding distance to preserve a
|
|
30
|
+
diverse front.
|
|
31
|
+
|
|
32
|
+
Notes
|
|
33
|
+
-----
|
|
34
|
+
``selTournamentDCD`` requires the selection size to be a multiple of four.
|
|
35
|
+
We round the population size up to the next multiple of four and pad with
|
|
36
|
+
clones, then trim the offspring back to ``population_size``.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
toolbox : deap.base.Toolbox
|
|
41
|
+
Pre-configured toolbox (``select`` must be ``selNSGA2``).
|
|
42
|
+
config : GAConfig
|
|
43
|
+
Algorithm configuration.
|
|
44
|
+
callbacks : list of callable, optional
|
|
45
|
+
Functions ``f(gen, stats, population) -> bool``.
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
population : list
|
|
50
|
+
Final population (carries the Pareto front).
|
|
51
|
+
logbook : deap.tools.Logbook
|
|
52
|
+
DEAP logbook of compiled statistics.
|
|
53
|
+
history : list of EvolutionStats
|
|
54
|
+
Per-generation statistics.
|
|
55
|
+
"""
|
|
56
|
+
population = toolbox.population(n=config.population_size)
|
|
57
|
+
logbook = tools.Logbook()
|
|
58
|
+
history: list[EvolutionStats] = []
|
|
59
|
+
|
|
60
|
+
stats = tools.Statistics(lambda ind: ind.fitness.values[0])
|
|
61
|
+
stats.register("mean", np.mean)
|
|
62
|
+
stats.register("std", np.std)
|
|
63
|
+
stats.register("max", np.max)
|
|
64
|
+
|
|
65
|
+
for ind, fit in zip(population, map(toolbox.evaluate, population)):
|
|
66
|
+
ind.fitness.values = fit
|
|
67
|
+
|
|
68
|
+
# Assign initial crowding distance.
|
|
69
|
+
population = toolbox.select(population, len(population))
|
|
70
|
+
|
|
71
|
+
for gen in range(config.n_generations):
|
|
72
|
+
t_gen_start = time.time()
|
|
73
|
+
|
|
74
|
+
k_dcd = config.population_size
|
|
75
|
+
if k_dcd % 4 != 0:
|
|
76
|
+
k_dcd += 4 - (k_dcd % 4)
|
|
77
|
+
pop_for_dcd = population[:]
|
|
78
|
+
while len(pop_for_dcd) < k_dcd:
|
|
79
|
+
pop_for_dcd.append(toolbox.clone(random.choice(population)))
|
|
80
|
+
|
|
81
|
+
offspring = tools.selTournamentDCD(pop_for_dcd, k_dcd)[: config.population_size]
|
|
82
|
+
offspring = [toolbox.clone(ind) for ind in offspring]
|
|
83
|
+
|
|
84
|
+
for child1, child2 in zip(offspring[::2], offspring[1::2]):
|
|
85
|
+
if random.random() < config.crossover_prob:
|
|
86
|
+
toolbox.mate(child1, child2)
|
|
87
|
+
del child1.fitness.values
|
|
88
|
+
del child2.fitness.values
|
|
89
|
+
|
|
90
|
+
for mutant in offspring:
|
|
91
|
+
if random.random() < config.mutation_prob:
|
|
92
|
+
toolbox.mutate(mutant)
|
|
93
|
+
del mutant.fitness.values
|
|
94
|
+
|
|
95
|
+
invalid = [ind for ind in offspring if not ind.fitness.valid]
|
|
96
|
+
for ind, fit in zip(invalid, map(toolbox.evaluate, invalid)):
|
|
97
|
+
ind.fitness.values = fit
|
|
98
|
+
|
|
99
|
+
population[:] = toolbox.select(population + offspring, config.population_size)
|
|
100
|
+
|
|
101
|
+
fits_cv = [ind.fitness.values[0] for ind in population]
|
|
102
|
+
n_feats = [_n_features_of(ind) for ind in population]
|
|
103
|
+
best_ind = max(population, key=lambda ind: ind.fitness.values[0])
|
|
104
|
+
|
|
105
|
+
gen_stats = EvolutionStats(
|
|
106
|
+
generation=gen,
|
|
107
|
+
best_fitness=float(best_ind.fitness.values[0]),
|
|
108
|
+
mean_fitness=float(np.mean(fits_cv)),
|
|
109
|
+
std_fitness=float(np.std(fits_cv)),
|
|
110
|
+
best_n_features=_n_features_of(best_ind),
|
|
111
|
+
mean_n_features=float(np.mean(n_feats)),
|
|
112
|
+
elapsed_time=time.time() - t_gen_start,
|
|
113
|
+
)
|
|
114
|
+
history.append(gen_stats)
|
|
115
|
+
logbook.record(gen=gen, **stats.compile(population))
|
|
116
|
+
|
|
117
|
+
if config.verbose and (gen % 10 == 0 or gen == config.n_generations - 1):
|
|
118
|
+
print(repr(gen_stats))
|
|
119
|
+
|
|
120
|
+
if callbacks and any(cb(gen, gen_stats, population) for cb in callbacks):
|
|
121
|
+
if config.verbose:
|
|
122
|
+
print(f" Stopped by callback at generation {gen}.")
|
|
123
|
+
break
|
|
124
|
+
|
|
125
|
+
return population, logbook, history
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Single-objective genetic algorithm with elitism and early stopping."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import random
|
|
6
|
+
import time
|
|
7
|
+
from typing import Callable
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from deap import base, tools
|
|
11
|
+
|
|
12
|
+
from evo_gafs.core.config import EvolutionStats, GAConfig
|
|
13
|
+
|
|
14
|
+
Callback = Callable[[int, EvolutionStats, list], bool]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _n_features_of(individual: list[int]) -> int:
|
|
18
|
+
return int(sum(individual))
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def run_single_objective(
|
|
22
|
+
toolbox: base.Toolbox,
|
|
23
|
+
config: GAConfig,
|
|
24
|
+
callbacks: list[Callback] | None = None,
|
|
25
|
+
) -> tuple[list, tools.Logbook, list[EvolutionStats]]:
|
|
26
|
+
"""Run the single-objective GA.
|
|
27
|
+
|
|
28
|
+
The loop applies tournament selection, uniform crossover, bit-flip mutation
|
|
29
|
+
and elitism (the best ``elite_size`` individuals survive unchanged). It
|
|
30
|
+
supports both configuration-driven early stopping and callback-driven
|
|
31
|
+
stopping (a callback returning ``True`` halts evolution).
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
toolbox : deap.base.Toolbox
|
|
36
|
+
Pre-configured toolbox with ``population``, ``evaluate``, ``select``,
|
|
37
|
+
``mate``, ``mutate`` and ``clone`` registered.
|
|
38
|
+
config : GAConfig
|
|
39
|
+
Algorithm configuration.
|
|
40
|
+
callbacks : list of callable, optional
|
|
41
|
+
Functions ``f(gen, stats, population) -> bool``.
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
population : list
|
|
46
|
+
Final population.
|
|
47
|
+
logbook : deap.tools.Logbook
|
|
48
|
+
DEAP logbook of compiled statistics.
|
|
49
|
+
history : list of EvolutionStats
|
|
50
|
+
Per-generation statistics.
|
|
51
|
+
"""
|
|
52
|
+
population = toolbox.population(n=config.population_size)
|
|
53
|
+
logbook = tools.Logbook()
|
|
54
|
+
history: list[EvolutionStats] = []
|
|
55
|
+
|
|
56
|
+
stats = tools.Statistics(lambda ind: ind.fitness.values[0])
|
|
57
|
+
stats.register("mean", np.mean)
|
|
58
|
+
stats.register("std", np.std)
|
|
59
|
+
stats.register("min", np.min)
|
|
60
|
+
stats.register("max", np.max)
|
|
61
|
+
|
|
62
|
+
for ind, fit in zip(population, map(toolbox.evaluate, population)):
|
|
63
|
+
ind.fitness.values = fit
|
|
64
|
+
|
|
65
|
+
best_fitness_history: list[float] = []
|
|
66
|
+
no_improve_count = 0
|
|
67
|
+
|
|
68
|
+
for gen in range(config.n_generations):
|
|
69
|
+
t_gen_start = time.time()
|
|
70
|
+
|
|
71
|
+
elites = [toolbox.clone(e) for e in tools.selBest(population, config.elite_size)]
|
|
72
|
+
|
|
73
|
+
offspring = [toolbox.clone(ind) for ind in toolbox.select(population, len(population))]
|
|
74
|
+
|
|
75
|
+
for child1, child2 in zip(offspring[::2], offspring[1::2]):
|
|
76
|
+
if random.random() < config.crossover_prob:
|
|
77
|
+
toolbox.mate(child1, child2)
|
|
78
|
+
del child1.fitness.values
|
|
79
|
+
del child2.fitness.values
|
|
80
|
+
|
|
81
|
+
for mutant in offspring:
|
|
82
|
+
if random.random() < config.mutation_prob:
|
|
83
|
+
toolbox.mutate(mutant)
|
|
84
|
+
del mutant.fitness.values
|
|
85
|
+
|
|
86
|
+
invalid = [ind for ind in offspring if not ind.fitness.valid]
|
|
87
|
+
for ind, fit in zip(invalid, map(toolbox.evaluate, invalid)):
|
|
88
|
+
ind.fitness.values = fit
|
|
89
|
+
|
|
90
|
+
# Re-insert elites in place of the worst offspring.
|
|
91
|
+
offspring.sort(key=lambda ind: ind.fitness.values[0])
|
|
92
|
+
for i, elite in enumerate(elites):
|
|
93
|
+
offspring[i] = elite
|
|
94
|
+
population[:] = offspring
|
|
95
|
+
|
|
96
|
+
fits = [ind.fitness.values[0] for ind in population]
|
|
97
|
+
n_feats = [_n_features_of(ind) for ind in population]
|
|
98
|
+
best_ind = tools.selBest(population, 1)[0]
|
|
99
|
+
|
|
100
|
+
gen_stats = EvolutionStats(
|
|
101
|
+
generation=gen,
|
|
102
|
+
best_fitness=float(max(fits)),
|
|
103
|
+
mean_fitness=float(np.mean(fits)),
|
|
104
|
+
std_fitness=float(np.std(fits)),
|
|
105
|
+
best_n_features=_n_features_of(best_ind),
|
|
106
|
+
mean_n_features=float(np.mean(n_feats)),
|
|
107
|
+
elapsed_time=time.time() - t_gen_start,
|
|
108
|
+
)
|
|
109
|
+
history.append(gen_stats)
|
|
110
|
+
logbook.record(gen=gen, **stats.compile(population))
|
|
111
|
+
|
|
112
|
+
if config.verbose and (gen % 10 == 0 or gen == config.n_generations - 1):
|
|
113
|
+
print(repr(gen_stats))
|
|
114
|
+
|
|
115
|
+
if callbacks and any(cb(gen, gen_stats, population) for cb in callbacks):
|
|
116
|
+
if config.verbose:
|
|
117
|
+
print(f" Stopped by callback at generation {gen}.")
|
|
118
|
+
break
|
|
119
|
+
|
|
120
|
+
if config.early_stopping_rounds is not None:
|
|
121
|
+
best_fitness_history.append(float(max(fits)))
|
|
122
|
+
if len(best_fitness_history) > config.early_stopping_rounds:
|
|
123
|
+
window = best_fitness_history[-config.early_stopping_rounds :]
|
|
124
|
+
if max(window) - min(window) < config.early_stopping_tol:
|
|
125
|
+
no_improve_count += 1
|
|
126
|
+
if no_improve_count >= config.early_stopping_rounds:
|
|
127
|
+
if config.verbose:
|
|
128
|
+
print(f" Early stopping at generation {gen}.")
|
|
129
|
+
break
|
|
130
|
+
else:
|
|
131
|
+
no_improve_count = 0
|
|
132
|
+
|
|
133
|
+
return population, logbook, history
|