asf 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- asf-0.1.1/LICENSE +21 -0
- asf-0.1.1/PKG-INFO +179 -0
- asf-0.1.1/README.md +109 -0
- asf-0.1.1/asf/__init__.py +0 -0
- asf-0.1.1/asf/cli/__init__.py +0 -0
- asf-0.1.1/asf/cli/cli_train.py +161 -0
- asf-0.1.1/asf/epm/__init__.py +4 -0
- asf-0.1.1/asf/epm/epm.py +147 -0
- asf-0.1.1/asf/epm/epm_tuner.py +141 -0
- asf-0.1.1/asf/metrics/__init__.py +14 -0
- asf-0.1.1/asf/metrics/baselines.py +127 -0
- asf-0.1.1/asf/pre_selector/__init__.py +18 -0
- asf-0.1.1/asf/pre_selector/abstract_pre_selector.py +32 -0
- asf-0.1.1/asf/pre_selector/beam_search_pre_selection.py +84 -0
- asf-0.1.1/asf/pre_selector/brute_force_pre_selection.py +83 -0
- asf-0.1.1/asf/pre_selector/knee_of_the_curve_pre_selector.py +140 -0
- asf-0.1.1/asf/pre_selector/marginal_contribution_based.py +97 -0
- asf-0.1.1/asf/pre_selector/optimize_pre_selection.py +146 -0
- asf-0.1.1/asf/pre_selector/sbs_pre_selection.py +75 -0
- asf-0.1.1/asf/predictors/__init__.py +33 -0
- asf-0.1.1/asf/predictors/abstract_predictor.py +155 -0
- asf-0.1.1/asf/predictors/epm_extra_trees.py +137 -0
- asf-0.1.1/asf/predictors/epm_random_forest.py +215 -0
- asf-0.1.1/asf/predictors/linear_model.py +245 -0
- asf-0.1.1/asf/predictors/mlp.py +329 -0
- asf-0.1.1/asf/predictors/random_forest.py +293 -0
- asf-0.1.1/asf/predictors/ranking_mlp.py +207 -0
- asf-0.1.1/asf/predictors/regression_mlp.py +165 -0
- asf-0.1.1/asf/predictors/sklearn_wrapper.py +108 -0
- asf-0.1.1/asf/predictors/svm.py +301 -0
- asf-0.1.1/asf/predictors/utils/datasets.py +90 -0
- asf-0.1.1/asf/predictors/utils/losses.py +40 -0
- asf-0.1.1/asf/predictors/utils/mlp.py +26 -0
- asf-0.1.1/asf/predictors/xgboost.py +553 -0
- asf-0.1.1/asf/preprocessing/__init__.py +27 -0
- asf-0.1.1/asf/preprocessing/performace_scaling.py +500 -0
- asf-0.1.1/asf/preprocessing/sklearn_preprocessor.py +49 -0
- asf-0.1.1/asf/presolving/__init__.py +7 -0
- asf-0.1.1/asf/presolving/asap_v2.py +277 -0
- asf-0.1.1/asf/presolving/aspeed.py +189 -0
- asf-0.1.1/asf/presolving/presolver.py +22 -0
- asf-0.1.1/asf/scenario/__init__.py +3 -0
- asf-0.1.1/asf/scenario/aslib_reader.py +207 -0
- asf-0.1.1/asf/scenario/epmbench_reader.py +178 -0
- asf-0.1.1/asf/selectors/__init__.py +28 -0
- asf-0.1.1/asf/selectors/abstract_model_based_selector.py +70 -0
- asf-0.1.1/asf/selectors/abstract_selector.py +249 -0
- asf-0.1.1/asf/selectors/feature_generator.py +38 -0
- asf-0.1.1/asf/selectors/joint_ranking.py +107 -0
- asf-0.1.1/asf/selectors/mutli_class.py +60 -0
- asf-0.1.1/asf/selectors/pairwise_classifier.py +252 -0
- asf-0.1.1/asf/selectors/pairwise_regressor.py +235 -0
- asf-0.1.1/asf/selectors/performance_model.py +156 -0
- asf-0.1.1/asf/selectors/selector_pipeline.py +128 -0
- asf-0.1.1/asf/selectors/selector_tuner.py +196 -0
- asf-0.1.1/asf/selectors/simple_ranking.py +133 -0
- asf-0.1.1/asf/selectors/survival_analysis.py +146 -0
- asf-0.1.1/asf/utils/groupkfoldshuffle.py +29 -0
- asf-0.1.1/asf.egg-info/PKG-INFO +179 -0
- asf-0.1.1/asf.egg-info/SOURCES.txt +63 -0
- asf-0.1.1/asf.egg-info/dependency_links.txt +1 -0
- asf-0.1.1/asf.egg-info/requires.txt +31 -0
- asf-0.1.1/asf.egg-info/top_level.txt +1 -0
- asf-0.1.1/pyproject.toml +53 -0
- asf-0.1.1/setup.cfg +4 -0
asf-0.1.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Hadar Shavit
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
asf-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: asf
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Algorithm selection framework
|
|
5
|
+
Author-email: Hadar Shavit <shavit@aim.rwth-aachen.de>
|
|
6
|
+
Maintainer-email: Hadar Shavit <shavit@aim.rwth-aachen.de>
|
|
7
|
+
License: MIT License
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2025 Hadar Shavit
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in all
|
|
19
|
+
copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
+
SOFTWARE.
|
|
28
|
+
|
|
29
|
+
Project-URL: documentation, https://hadarshavit.github.io/asf/latest/
|
|
30
|
+
Project-URL: repository, https://github.com/hadarshavit/asf
|
|
31
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
32
|
+
Classifier: Intended Audience :: Developers
|
|
33
|
+
Classifier: Intended Audience :: Science/Research
|
|
34
|
+
Classifier: Natural Language :: English
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
41
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
42
|
+
Requires-Python: >=3.10
|
|
43
|
+
Description-Content-Type: text/markdown
|
|
44
|
+
License-File: LICENSE
|
|
45
|
+
Requires-Dist: scikit-learn
|
|
46
|
+
Requires-Dist: pandas
|
|
47
|
+
Requires-Dist: numpy
|
|
48
|
+
Provides-Extra: aslib
|
|
49
|
+
Requires-Dist: pyyaml; extra == "aslib"
|
|
50
|
+
Requires-Dist: liac-arff; extra == "aslib"
|
|
51
|
+
Provides-Extra: nn
|
|
52
|
+
Requires-Dist: torch; extra == "nn"
|
|
53
|
+
Provides-Extra: xgb
|
|
54
|
+
Requires-Dist: xgboost; extra == "xgb"
|
|
55
|
+
Provides-Extra: tune
|
|
56
|
+
Requires-Dist: smac; extra == "tune"
|
|
57
|
+
Provides-Extra: aspeed
|
|
58
|
+
Requires-Dist: clingo; extra == "aspeed"
|
|
59
|
+
Provides-Extra: survival
|
|
60
|
+
Requires-Dist: scikit-survival; extra == "survival"
|
|
61
|
+
Provides-Extra: all
|
|
62
|
+
Requires-Dist: pyyaml; extra == "all"
|
|
63
|
+
Requires-Dist: liac-arff; extra == "all"
|
|
64
|
+
Requires-Dist: torch; extra == "all"
|
|
65
|
+
Requires-Dist: xgboost; extra == "all"
|
|
66
|
+
Requires-Dist: smac; extra == "all"
|
|
67
|
+
Requires-Dist: clingo; extra == "all"
|
|
68
|
+
Requires-Dist: scikit-survival; extra == "all"
|
|
69
|
+
Dynamic: license-file
|
|
70
|
+
|
|
71
|
+
[](https://pypi.org/project/asf-lib/)
|
|
72
|
+
[](https://pypi.org/project/asf-lib/)
|
|
73
|
+
[](LICENSE)
|
|
74
|
+
[](https://github.com/hadarshavit/asf/actions/workflows/tests.yml)
|
|
75
|
+
[](https://doi.org/10.5281/zenodo.14957286)
|
|
76
|
+
|
|
77
|
+
# Algorithm Selection Framework (ASF)
|
|
78
|
+
|
|
79
|
+
ASF is a lightweight yet powerful Python library for algorithm selection and empirical performance prediction.
|
|
80
|
+
It implements various algorithm selection methods, along with algorithm pre-selection, pre-solving schedules and more features to easily create algorithm selection pipeline.
|
|
81
|
+
ASF is a modular framework that allows easy extensions to tailor made an algorithm selector for every use-case.
|
|
82
|
+
While ASF includes several built-in machine learning models through scikit-learn and XGBoost, it supports every model that complies with the scikit-learn API.
|
|
83
|
+
ASF also implements empirical performance prediction, allowing to use different performance scalings.
|
|
84
|
+
|
|
85
|
+
ASF is written in Python 3 and is intended to use with Python 3.10+. It requires only scikit-learn, NumPy as Pandas as basic requirements. More advanced features (such as hyperparameter optimisation) requires additional dependencies.
|
|
86
|
+
|
|
87
|
+
You can find full documentation in: https://hadarshavit.github.io/asf/
|
|
88
|
+
## Installation
|
|
89
|
+
|
|
90
|
+
ASF is written in Python3 and requires Python version 3.10+.
|
|
91
|
+
The basic installation is lightweight and requires only NumPy, Pandas and scikit-learn.
|
|
92
|
+
|
|
93
|
+
ASF is currently tested on Linux machines. Mac and Windows (official) support will be released in the near future.
|
|
94
|
+
|
|
95
|
+
To install the base version run
|
|
96
|
+
```bash
|
|
97
|
+
pip install asf-lib
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Additional options
|
|
101
|
+
|
|
102
|
+
Additional options include:
|
|
103
|
+
|
|
104
|
+
- XGBoost model suppot `pip install asf-lib[xgb]`
|
|
105
|
+
- PyTorch-based models `pip install asf-lib[nn]`
|
|
106
|
+
- ASlib scenarios reading `pip install asf-lib[aslib]`
|
|
107
|
+
|
|
108
|
+
## Quick start
|
|
109
|
+
|
|
110
|
+
The first step is to define a the data. It can be either NumPy array or Pandas DataFrame.
|
|
111
|
+
The data contains of (at least) two matrices. The first defines the instance features with a row for every instance and each column defines one feature.
|
|
112
|
+
The second is the performance data, which for which every row describes an instance and each column the performance of a single algorithm.
|
|
113
|
+
|
|
114
|
+
Here, we define some toy data on three instances, three features and three algorithms.
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
data = np.array(
|
|
118
|
+
[
|
|
119
|
+
[10, 5, 1],
|
|
120
|
+
[20, 10, 2],
|
|
121
|
+
[15, 8, 1.5],
|
|
122
|
+
]
|
|
123
|
+
)
|
|
124
|
+
features = pd.DataFrame(data, columns=["feature1", "feature2", "feature3"])
|
|
125
|
+
performance = np.array(
|
|
126
|
+
[
|
|
127
|
+
[120, 100, 110],
|
|
128
|
+
[140, 150, 130],
|
|
129
|
+
[180, 170, 190],
|
|
130
|
+
]
|
|
131
|
+
)
|
|
132
|
+
performance = pd.DataFrame(data, columns=["algo1", "algo2", "algo3"])
|
|
133
|
+
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
We can then define a selector:
|
|
137
|
+
```python
|
|
138
|
+
from asf.selectors import PairwiseClassifier
|
|
139
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
140
|
+
|
|
141
|
+
selector = PairwiseClassifier(model_class=RandomForestClassifier)
|
|
142
|
+
|
|
143
|
+
selector.fit(features, performance)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Next, we can use the selector to predict on unseen dta:
|
|
147
|
+
```
|
|
148
|
+
selector.predict(features)
|
|
149
|
+
```
|
|
150
|
+
Currently, ASF always returns the prediction in the ASlib format: a dictionary which has the instance id (row index, in case of a numpy array or the index of the row for a pandas dataframe) as keys and an array of tuples (predicted algorithm, budget).
|
|
151
|
+
The selectors has only one tuple in the array, which is the selected algorithm.
|
|
152
|
+
An example output is:
|
|
153
|
+
```
|
|
154
|
+
{
|
|
155
|
+
0: [('algo2', None)],
|
|
156
|
+
1: [('algo3', None)],
|
|
157
|
+
2: [('algo2', None)]
|
|
158
|
+
}
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
The budget is set by default to None. To change the budget, you can pass it as an argument for the selector initialisation.
|
|
162
|
+
Similarly, ASF minimises the performance by default. To change it, pass `maxmimize=True` to the selector.
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
## Cite Us
|
|
168
|
+
|
|
169
|
+
If you use ASF, please cite the Zenodo DOI. We are currently working on publishing a paper on ASF, but by then a Zenodo citation will do it.
|
|
170
|
+
|
|
171
|
+
```bibtex
|
|
172
|
+
@software{ASF,
|
|
173
|
+
author = {Hadar Shavit and Holger Hoos},
|
|
174
|
+
doi = {10.5281/zenodo.15288151},
|
|
175
|
+
title = {ASF: Algorithm Selection Framework},
|
|
176
|
+
url = {https://doi.org/10.5281/zenodo.15288151},
|
|
177
|
+
year = {in progress},
|
|
178
|
+
}
|
|
179
|
+
```
|
asf-0.1.1/README.md
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
[](https://pypi.org/project/asf-lib/)
|
|
2
|
+
[](https://pypi.org/project/asf-lib/)
|
|
3
|
+
[](LICENSE)
|
|
4
|
+
[](https://github.com/hadarshavit/asf/actions/workflows/tests.yml)
|
|
5
|
+
[](https://doi.org/10.5281/zenodo.14957286)
|
|
6
|
+
|
|
7
|
+
# Algorithm Selection Framework (ASF)
|
|
8
|
+
|
|
9
|
+
ASF is a lightweight yet powerful Python library for algorithm selection and empirical performance prediction.
|
|
10
|
+
It implements various algorithm selection methods, along with algorithm pre-selection, pre-solving schedules and more features to easily create algorithm selection pipeline.
|
|
11
|
+
ASF is a modular framework that allows easy extensions to tailor made an algorithm selector for every use-case.
|
|
12
|
+
While ASF includes several built-in machine learning models through scikit-learn and XGBoost, it supports every model that complies with the scikit-learn API.
|
|
13
|
+
ASF also implements empirical performance prediction, allowing to use different performance scalings.
|
|
14
|
+
|
|
15
|
+
ASF is written in Python 3 and is intended to use with Python 3.10+. It requires only scikit-learn, NumPy as Pandas as basic requirements. More advanced features (such as hyperparameter optimisation) requires additional dependencies.
|
|
16
|
+
|
|
17
|
+
You can find full documentation in: https://hadarshavit.github.io/asf/
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
ASF is written in Python3 and requires Python version 3.10+.
|
|
21
|
+
The basic installation is lightweight and requires only NumPy, Pandas and scikit-learn.
|
|
22
|
+
|
|
23
|
+
ASF is currently tested on Linux machines. Mac and Windows (official) support will be released in the near future.
|
|
24
|
+
|
|
25
|
+
To install the base version run
|
|
26
|
+
```bash
|
|
27
|
+
pip install asf-lib
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Additional options
|
|
31
|
+
|
|
32
|
+
Additional options include:
|
|
33
|
+
|
|
34
|
+
- XGBoost model suppot `pip install asf-lib[xgb]`
|
|
35
|
+
- PyTorch-based models `pip install asf-lib[nn]`
|
|
36
|
+
- ASlib scenarios reading `pip install asf-lib[aslib]`
|
|
37
|
+
|
|
38
|
+
## Quick start
|
|
39
|
+
|
|
40
|
+
The first step is to define a the data. It can be either NumPy array or Pandas DataFrame.
|
|
41
|
+
The data contains of (at least) two matrices. The first defines the instance features with a row for every instance and each column defines one feature.
|
|
42
|
+
The second is the performance data, which for which every row describes an instance and each column the performance of a single algorithm.
|
|
43
|
+
|
|
44
|
+
Here, we define some toy data on three instances, three features and three algorithms.
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
data = np.array(
|
|
48
|
+
[
|
|
49
|
+
[10, 5, 1],
|
|
50
|
+
[20, 10, 2],
|
|
51
|
+
[15, 8, 1.5],
|
|
52
|
+
]
|
|
53
|
+
)
|
|
54
|
+
features = pd.DataFrame(data, columns=["feature1", "feature2", "feature3"])
|
|
55
|
+
performance = np.array(
|
|
56
|
+
[
|
|
57
|
+
[120, 100, 110],
|
|
58
|
+
[140, 150, 130],
|
|
59
|
+
[180, 170, 190],
|
|
60
|
+
]
|
|
61
|
+
)
|
|
62
|
+
performance = pd.DataFrame(data, columns=["algo1", "algo2", "algo3"])
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
We can then define a selector:
|
|
67
|
+
```python
|
|
68
|
+
from asf.selectors import PairwiseClassifier
|
|
69
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
70
|
+
|
|
71
|
+
selector = PairwiseClassifier(model_class=RandomForestClassifier)
|
|
72
|
+
|
|
73
|
+
selector.fit(features, performance)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Next, we can use the selector to predict on unseen dta:
|
|
77
|
+
```
|
|
78
|
+
selector.predict(features)
|
|
79
|
+
```
|
|
80
|
+
Currently, ASF always returns the prediction in the ASlib format: a dictionary which has the instance id (row index, in case of a numpy array or the index of the row for a pandas dataframe) as keys and an array of tuples (predicted algorithm, budget).
|
|
81
|
+
The selectors has only one tuple in the array, which is the selected algorithm.
|
|
82
|
+
An example output is:
|
|
83
|
+
```
|
|
84
|
+
{
|
|
85
|
+
0: [('algo2', None)],
|
|
86
|
+
1: [('algo3', None)],
|
|
87
|
+
2: [('algo2', None)]
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The budget is set by default to None. To change the budget, you can pass it as an argument for the selector initialisation.
|
|
92
|
+
Similarly, ASF minimises the performance by default. To change it, pass `maxmimize=True` to the selector.
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
## Cite Us
|
|
98
|
+
|
|
99
|
+
If you use ASF, please cite the Zenodo DOI. We are currently working on publishing a paper on ASF, but by then a Zenodo citation will do it.
|
|
100
|
+
|
|
101
|
+
```bibtex
|
|
102
|
+
@software{ASF,
|
|
103
|
+
author = {Hadar Shavit and Holger Hoos},
|
|
104
|
+
doi = {10.5281/zenodo.15288151},
|
|
105
|
+
title = {ASF: Algorithm Selection Framework},
|
|
106
|
+
url = {https://doi.org/10.5281/zenodo.15288151},
|
|
107
|
+
year = {in progress},
|
|
108
|
+
}
|
|
109
|
+
```
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""CLI entry point for training selectors.
|
|
3
|
+
|
|
4
|
+
This script provides a command-line interface for training model-based selectors.
|
|
5
|
+
It allows users to specify the selector type, model, budget, and other parameters
|
|
6
|
+
to train and save the selector model.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from functools import partial
|
|
12
|
+
from typing import Dict, Callable, List
|
|
13
|
+
|
|
14
|
+
import pandas as pd
|
|
15
|
+
|
|
16
|
+
from asf import selectors
|
|
17
|
+
|
|
18
|
+
import sklearn
|
|
19
|
+
|
|
20
|
+
# Mapping of file extensions to pandas read functions
|
|
21
|
+
pandas_read_map: Dict[str, Callable] = {
|
|
22
|
+
".csv": pd.read_csv,
|
|
23
|
+
".parquet": pd.read_parquet,
|
|
24
|
+
".json": pd.read_json,
|
|
25
|
+
".feather": pd.read_feather,
|
|
26
|
+
".hdf": pd.read_hdf,
|
|
27
|
+
".html": pd.read_html,
|
|
28
|
+
".xml": pd.read_xml,
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def parser_function() -> argparse.ArgumentParser:
|
|
33
|
+
"""Define command line arguments for the CLI.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
argparse.ArgumentParser: The argument parser with defined arguments.
|
|
37
|
+
"""
|
|
38
|
+
parser = argparse.ArgumentParser()
|
|
39
|
+
parser.add_argument(
|
|
40
|
+
"--selector",
|
|
41
|
+
choices=selectors.__implemented__,
|
|
42
|
+
required=True,
|
|
43
|
+
help="Selector to train",
|
|
44
|
+
)
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"--model",
|
|
47
|
+
default="RandomForestClassifier",
|
|
48
|
+
help="Model to use for the selector. "
|
|
49
|
+
"Make sure to specify as an attribute of sklearn.ensemble.",
|
|
50
|
+
)
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"--budget",
|
|
53
|
+
type=int,
|
|
54
|
+
default=None,
|
|
55
|
+
required=False,
|
|
56
|
+
help="Budget for the solvers",
|
|
57
|
+
)
|
|
58
|
+
parser.add_argument(
|
|
59
|
+
"--maximize",
|
|
60
|
+
type=bool,
|
|
61
|
+
default=False,
|
|
62
|
+
required=False,
|
|
63
|
+
help="Maximize the objective",
|
|
64
|
+
)
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
"--performance-metric",
|
|
67
|
+
type=str,
|
|
68
|
+
default="",
|
|
69
|
+
required=False,
|
|
70
|
+
help="Performance metric to optimize",
|
|
71
|
+
)
|
|
72
|
+
parser.add_argument(
|
|
73
|
+
"--feature-data",
|
|
74
|
+
type=Path,
|
|
75
|
+
required=True,
|
|
76
|
+
help="Path to feature data",
|
|
77
|
+
)
|
|
78
|
+
parser.add_argument(
|
|
79
|
+
"--performance-data",
|
|
80
|
+
type=Path,
|
|
81
|
+
required=True,
|
|
82
|
+
help="Path to performance data",
|
|
83
|
+
)
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"--model-path",
|
|
86
|
+
type=Path,
|
|
87
|
+
required=True,
|
|
88
|
+
help="Path to save model",
|
|
89
|
+
)
|
|
90
|
+
return parser
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def build_cli_command(
|
|
94
|
+
selector: selectors.AbstractModelBasedSelector,
|
|
95
|
+
feature_data: Path,
|
|
96
|
+
performance_data: Path,
|
|
97
|
+
destination: Path,
|
|
98
|
+
) -> List[str]:
|
|
99
|
+
"""Build a CLI command from variables for async jobs.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
selector (selectors.AbstractModelBasedSelector): Selector to train.
|
|
103
|
+
feature_data (Path): Path to feature data DataFrame.
|
|
104
|
+
performance_data (Path): Path to performance data DataFrame.
|
|
105
|
+
destination (Path): Path to save the trained model.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
List[str]: A list of command-line arguments to execute the training job.
|
|
109
|
+
"""
|
|
110
|
+
model_class = (
|
|
111
|
+
selector.model_class.args[0]
|
|
112
|
+
if isinstance(selector.model_class, partial)
|
|
113
|
+
else selector.model_class
|
|
114
|
+
)
|
|
115
|
+
return [
|
|
116
|
+
"python",
|
|
117
|
+
str(Path(__file__).absolute()),
|
|
118
|
+
"--selector",
|
|
119
|
+
type(selector).__name__,
|
|
120
|
+
"--model",
|
|
121
|
+
f"{model_class.__name__}",
|
|
122
|
+
"--budget",
|
|
123
|
+
str(selector.budget),
|
|
124
|
+
"--maximize",
|
|
125
|
+
str(selector.maximize),
|
|
126
|
+
"--performance-metric",
|
|
127
|
+
str(selector.performance_metric),
|
|
128
|
+
"--feature-data",
|
|
129
|
+
str(feature_data),
|
|
130
|
+
"--performance-data",
|
|
131
|
+
str(performance_data),
|
|
132
|
+
"--model-path",
|
|
133
|
+
str(destination),
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
if __name__ == "__main__":
|
|
138
|
+
parser = parser_function()
|
|
139
|
+
args = parser.parse_args()
|
|
140
|
+
|
|
141
|
+
# Parse selector into variable
|
|
142
|
+
selector_class = getattr(selectors, args.selector)
|
|
143
|
+
model_class = getattr(sklearn.ensemble, args.model)
|
|
144
|
+
|
|
145
|
+
# Parse training data into variables
|
|
146
|
+
features: pd.DataFrame = pandas_read_map[args.feature_data.suffix](
|
|
147
|
+
args.feature_data, index_col=0
|
|
148
|
+
)
|
|
149
|
+
performance_data: pd.DataFrame = pandas_read_map[args.performance_data.suffix](
|
|
150
|
+
args.performance_data, index_col=0
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
selector = selector_class(
|
|
154
|
+
model_class,
|
|
155
|
+
maximize=args.maximize,
|
|
156
|
+
budget=args.budget,
|
|
157
|
+
)
|
|
158
|
+
selector.fit(features, performance_data)
|
|
159
|
+
|
|
160
|
+
# Save the model to the specified path
|
|
161
|
+
selector.save(args.model_path)
|
asf-0.1.1/asf/epm/epm.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
from functools import partial
|
|
2
|
+
from typing import Type, Union, Optional
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import numpy as np
|
|
6
|
+
from sklearn.base import RegressorMixin
|
|
7
|
+
|
|
8
|
+
from asf.preprocessing.performace_scaling import AbstractNormalization, LogNormalization
|
|
9
|
+
from asf.predictors import SklearnWrapper
|
|
10
|
+
from asf.preprocessing.sklearn_preprocessor import get_default_preprocessor
|
|
11
|
+
from sklearn.base import TransformerMixin
|
|
12
|
+
from asf.predictors.abstract_predictor import AbstractPredictor
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class EPM:
|
|
16
|
+
"""
|
|
17
|
+
The EPM (Empirical Performance Model) class is a wrapper for machine learning models
|
|
18
|
+
that includes preprocessing, normalization, and optional inverse transformation of predictions.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
predictor_class (Type[AbstractPredictor] | Type[RegressorMixin]): The class of the predictor to use.
|
|
22
|
+
normalization_class (Type[AbstractNormalization]): The normalization class to apply to the target variable.
|
|
23
|
+
transform_back (bool): Whether to apply inverse transformation to predictions.
|
|
24
|
+
features_preprocessing (Union[str, TransformerMixin]): Preprocessing pipeline for features.
|
|
25
|
+
predictor_config (Optional[dict]): Configuration for the predictor.
|
|
26
|
+
predictor_kwargs (Optional[dict]): Additional keyword arguments for the predictor.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
predictor_class: Union[Type[AbstractPredictor], Type[RegressorMixin]],
|
|
32
|
+
normalization_class: Type[AbstractNormalization] = LogNormalization,
|
|
33
|
+
transform_back: bool = True,
|
|
34
|
+
features_preprocessing: Union[str, TransformerMixin] = "default",
|
|
35
|
+
categorical_features: Optional[list] = None,
|
|
36
|
+
numerical_features: Optional[list] = None,
|
|
37
|
+
predictor_config: Optional[dict] = None,
|
|
38
|
+
predictor_kwargs: Optional[dict] = None,
|
|
39
|
+
):
|
|
40
|
+
"""
|
|
41
|
+
Initialize the EPM model.
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
predictor_class (Type[AbstractPredictor] | Type[RegressorMixin]): The class of the predictor to use.
|
|
45
|
+
normalization_class (Type[AbstractNormalization]): The normalization class to apply to the target variable.
|
|
46
|
+
transform_back (bool): Whether to apply inverse transformation to predictions.
|
|
47
|
+
features_preprocessing (Union[str, TransformerMixin]): Preprocessing pipeline for features.
|
|
48
|
+
categorical_features (Optional[list]): List of categorical feature names.
|
|
49
|
+
numerical_features (Optional[list]): List of numerical feature names.
|
|
50
|
+
predictor_config (Optional[dict]): Configuration for the predictor.
|
|
51
|
+
predictor_kwargs (Optional[dict]): Additional keyword arguments for the predictor.
|
|
52
|
+
"""
|
|
53
|
+
if isinstance(predictor_class, type) and issubclass(
|
|
54
|
+
predictor_class, (RegressorMixin)
|
|
55
|
+
):
|
|
56
|
+
self.model_class = partial(SklearnWrapper, predictor_class)
|
|
57
|
+
else:
|
|
58
|
+
self.model_class = predictor_class
|
|
59
|
+
|
|
60
|
+
self.predictor_class = predictor_class
|
|
61
|
+
self.normalization_class = normalization_class
|
|
62
|
+
self.transform_back = transform_back
|
|
63
|
+
self.predictor_config = predictor_config
|
|
64
|
+
self.predictor_kwargs = predictor_kwargs or {}
|
|
65
|
+
self.numpy = False
|
|
66
|
+
|
|
67
|
+
if features_preprocessing == "default":
|
|
68
|
+
self.features_preprocessing = get_default_preprocessor(
|
|
69
|
+
categorical_features=categorical_features,
|
|
70
|
+
numerical_features=numerical_features,
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
self.features_preprocessing = features_preprocessing
|
|
74
|
+
|
|
75
|
+
def fit(
|
|
76
|
+
self,
|
|
77
|
+
X: Union[pd.DataFrame, pd.Series, list],
|
|
78
|
+
y: Union[pd.Series, list],
|
|
79
|
+
sample_weight: Optional[list] = None,
|
|
80
|
+
) -> "EPM":
|
|
81
|
+
"""
|
|
82
|
+
Fit the EPM model to the data.
|
|
83
|
+
|
|
84
|
+
Parameters:
|
|
85
|
+
X (Union[pd.DataFrame, pd.Series, list]): Features.
|
|
86
|
+
y (Union[pd.Series, list]): Target variable.
|
|
87
|
+
sample_weight (Optional[list]): Sample weights (optional).
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
EPM: The fitted EPM model.
|
|
91
|
+
"""
|
|
92
|
+
if isinstance(X, np.ndarray) and isinstance(y, np.ndarray):
|
|
93
|
+
X = pd.DataFrame(
|
|
94
|
+
X,
|
|
95
|
+
index=range(len(X)),
|
|
96
|
+
columns=[f"f_{i}" for i in range(X.shape[1])],
|
|
97
|
+
)
|
|
98
|
+
y = pd.Series(
|
|
99
|
+
y,
|
|
100
|
+
index=range(len(y)),
|
|
101
|
+
)
|
|
102
|
+
self.numpy = True
|
|
103
|
+
|
|
104
|
+
if self.features_preprocessing is not None:
|
|
105
|
+
X = self.features_preprocessing.fit_transform(X)
|
|
106
|
+
|
|
107
|
+
self.normalization = self.normalization_class()
|
|
108
|
+
self.normalization.fit(y)
|
|
109
|
+
y = self.normalization.transform(y)
|
|
110
|
+
|
|
111
|
+
if self.predictor_config is None:
|
|
112
|
+
self.predictor = self.predictor_class()
|
|
113
|
+
else:
|
|
114
|
+
self.predictor = self.predictor_class.get_from_configuration(
|
|
115
|
+
self.predictor_config, **self.predictor_kwargs
|
|
116
|
+
)()
|
|
117
|
+
|
|
118
|
+
self.predictor.fit(X, y, sample_weight=sample_weight)
|
|
119
|
+
return self
|
|
120
|
+
|
|
121
|
+
def predict(self, X: Union[pd.DataFrame, pd.Series, list]) -> list:
|
|
122
|
+
"""
|
|
123
|
+
Predict using the fitted EPM model.
|
|
124
|
+
|
|
125
|
+
Parameters:
|
|
126
|
+
X (Union[pd.DataFrame, pd.Series, list]): Features.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
list: Predicted values.
|
|
130
|
+
"""
|
|
131
|
+
if self.numpy:
|
|
132
|
+
if isinstance(X, np.ndarray):
|
|
133
|
+
X = pd.DataFrame(
|
|
134
|
+
X,
|
|
135
|
+
index=range(len(X)),
|
|
136
|
+
columns=[f"f_{i}" for i in range(X.shape[1])],
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if self.features_preprocessing is not None:
|
|
140
|
+
X = self.features_preprocessing.transform(X)
|
|
141
|
+
|
|
142
|
+
y_pred = self.predictor.predict(X)
|
|
143
|
+
|
|
144
|
+
if self.transform_back:
|
|
145
|
+
y_pred = self.normalization.inverse_transform(y_pred)
|
|
146
|
+
|
|
147
|
+
return y_pred
|