asf 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. asf-0.1.1/LICENSE +21 -0
  2. asf-0.1.1/PKG-INFO +179 -0
  3. asf-0.1.1/README.md +109 -0
  4. asf-0.1.1/asf/__init__.py +0 -0
  5. asf-0.1.1/asf/cli/__init__.py +0 -0
  6. asf-0.1.1/asf/cli/cli_train.py +161 -0
  7. asf-0.1.1/asf/epm/__init__.py +4 -0
  8. asf-0.1.1/asf/epm/epm.py +147 -0
  9. asf-0.1.1/asf/epm/epm_tuner.py +141 -0
  10. asf-0.1.1/asf/metrics/__init__.py +14 -0
  11. asf-0.1.1/asf/metrics/baselines.py +127 -0
  12. asf-0.1.1/asf/pre_selector/__init__.py +18 -0
  13. asf-0.1.1/asf/pre_selector/abstract_pre_selector.py +32 -0
  14. asf-0.1.1/asf/pre_selector/beam_search_pre_selection.py +84 -0
  15. asf-0.1.1/asf/pre_selector/brute_force_pre_selection.py +83 -0
  16. asf-0.1.1/asf/pre_selector/knee_of_the_curve_pre_selector.py +140 -0
  17. asf-0.1.1/asf/pre_selector/marginal_contribution_based.py +97 -0
  18. asf-0.1.1/asf/pre_selector/optimize_pre_selection.py +146 -0
  19. asf-0.1.1/asf/pre_selector/sbs_pre_selection.py +75 -0
  20. asf-0.1.1/asf/predictors/__init__.py +33 -0
  21. asf-0.1.1/asf/predictors/abstract_predictor.py +155 -0
  22. asf-0.1.1/asf/predictors/epm_extra_trees.py +137 -0
  23. asf-0.1.1/asf/predictors/epm_random_forest.py +215 -0
  24. asf-0.1.1/asf/predictors/linear_model.py +245 -0
  25. asf-0.1.1/asf/predictors/mlp.py +329 -0
  26. asf-0.1.1/asf/predictors/random_forest.py +293 -0
  27. asf-0.1.1/asf/predictors/ranking_mlp.py +207 -0
  28. asf-0.1.1/asf/predictors/regression_mlp.py +165 -0
  29. asf-0.1.1/asf/predictors/sklearn_wrapper.py +108 -0
  30. asf-0.1.1/asf/predictors/svm.py +301 -0
  31. asf-0.1.1/asf/predictors/utils/datasets.py +90 -0
  32. asf-0.1.1/asf/predictors/utils/losses.py +40 -0
  33. asf-0.1.1/asf/predictors/utils/mlp.py +26 -0
  34. asf-0.1.1/asf/predictors/xgboost.py +553 -0
  35. asf-0.1.1/asf/preprocessing/__init__.py +27 -0
  36. asf-0.1.1/asf/preprocessing/performace_scaling.py +500 -0
  37. asf-0.1.1/asf/preprocessing/sklearn_preprocessor.py +49 -0
  38. asf-0.1.1/asf/presolving/__init__.py +7 -0
  39. asf-0.1.1/asf/presolving/asap_v2.py +277 -0
  40. asf-0.1.1/asf/presolving/aspeed.py +189 -0
  41. asf-0.1.1/asf/presolving/presolver.py +22 -0
  42. asf-0.1.1/asf/scenario/__init__.py +3 -0
  43. asf-0.1.1/asf/scenario/aslib_reader.py +207 -0
  44. asf-0.1.1/asf/scenario/epmbench_reader.py +178 -0
  45. asf-0.1.1/asf/selectors/__init__.py +28 -0
  46. asf-0.1.1/asf/selectors/abstract_model_based_selector.py +70 -0
  47. asf-0.1.1/asf/selectors/abstract_selector.py +249 -0
  48. asf-0.1.1/asf/selectors/feature_generator.py +38 -0
  49. asf-0.1.1/asf/selectors/joint_ranking.py +107 -0
  50. asf-0.1.1/asf/selectors/mutli_class.py +60 -0
  51. asf-0.1.1/asf/selectors/pairwise_classifier.py +252 -0
  52. asf-0.1.1/asf/selectors/pairwise_regressor.py +235 -0
  53. asf-0.1.1/asf/selectors/performance_model.py +156 -0
  54. asf-0.1.1/asf/selectors/selector_pipeline.py +128 -0
  55. asf-0.1.1/asf/selectors/selector_tuner.py +196 -0
  56. asf-0.1.1/asf/selectors/simple_ranking.py +133 -0
  57. asf-0.1.1/asf/selectors/survival_analysis.py +146 -0
  58. asf-0.1.1/asf/utils/groupkfoldshuffle.py +29 -0
  59. asf-0.1.1/asf.egg-info/PKG-INFO +179 -0
  60. asf-0.1.1/asf.egg-info/SOURCES.txt +63 -0
  61. asf-0.1.1/asf.egg-info/dependency_links.txt +1 -0
  62. asf-0.1.1/asf.egg-info/requires.txt +31 -0
  63. asf-0.1.1/asf.egg-info/top_level.txt +1 -0
  64. asf-0.1.1/pyproject.toml +53 -0
  65. asf-0.1.1/setup.cfg +4 -0
asf-0.1.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Hadar Shavit
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
asf-0.1.1/PKG-INFO ADDED
@@ -0,0 +1,179 @@
1
+ Metadata-Version: 2.4
2
+ Name: asf
3
+ Version: 0.1.1
4
+ Summary: Algorithm selection framework
5
+ Author-email: Hadar Shavit <shavit@aim.rwth-aachen.de>
6
+ Maintainer-email: Hadar Shavit <shavit@aim.rwth-aachen.de>
7
+ License: MIT License
8
+
9
+ Copyright (c) 2025 Hadar Shavit
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
28
+
29
+ Project-URL: documentation, https://hadarshavit.github.io/asf/latest/
30
+ Project-URL: repository, https://github.com/hadarshavit/asf
31
+ Classifier: Development Status :: 2 - Pre-Alpha
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: Intended Audience :: Science/Research
34
+ Classifier: Natural Language :: English
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Operating System :: POSIX :: Linux
37
+ Classifier: Programming Language :: Python :: 3.10
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Programming Language :: Python :: 3.13
41
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
42
+ Requires-Python: >=3.10
43
+ Description-Content-Type: text/markdown
44
+ License-File: LICENSE
45
+ Requires-Dist: scikit-learn
46
+ Requires-Dist: pandas
47
+ Requires-Dist: numpy
48
+ Provides-Extra: aslib
49
+ Requires-Dist: pyyaml; extra == "aslib"
50
+ Requires-Dist: liac-arff; extra == "aslib"
51
+ Provides-Extra: nn
52
+ Requires-Dist: torch; extra == "nn"
53
+ Provides-Extra: xgb
54
+ Requires-Dist: xgboost; extra == "xgb"
55
+ Provides-Extra: tune
56
+ Requires-Dist: smac; extra == "tune"
57
+ Provides-Extra: aspeed
58
+ Requires-Dist: clingo; extra == "aspeed"
59
+ Provides-Extra: survival
60
+ Requires-Dist: scikit-survival; extra == "survival"
61
+ Provides-Extra: all
62
+ Requires-Dist: pyyaml; extra == "all"
63
+ Requires-Dist: liac-arff; extra == "all"
64
+ Requires-Dist: torch; extra == "all"
65
+ Requires-Dist: xgboost; extra == "all"
66
+ Requires-Dist: smac; extra == "all"
67
+ Requires-Dist: clingo; extra == "all"
68
+ Requires-Dist: scikit-survival; extra == "all"
69
+ Dynamic: license-file
70
+
71
+ [![PyPI - Version](https://img.shields.io/pypi/v/asf-lib)](https://pypi.org/project/asf-lib/)
72
+ [![Python versions](https://img.shields.io/pypi/pyversions/asf-lib)](https://pypi.org/project/asf-lib/)
73
+ [![License](https://img.shields.io/pypi/l/asf-lib?color=informational)](LICENSE)
74
+ [![Python application](https://github.com/hadarshavit/asf/actions/workflows/tests.yml/badge.svg)](https://github.com/hadarshavit/asf/actions/workflows/tests.yml)
75
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.14957286.svg)](https://doi.org/10.5281/zenodo.14957286)
76
+
77
+ # Algorithm Selection Framework (ASF)
78
+
79
+ ASF is a lightweight yet powerful Python library for algorithm selection and empirical performance prediction.
80
+ It implements various algorithm selection methods, along with algorithm pre-selection, pre-solving schedules and more features to easily create algorithm selection pipeline.
81
+ ASF is a modular framework that allows easy extensions to tailor made an algorithm selector for every use-case.
82
+ While ASF includes several built-in machine learning models through scikit-learn and XGBoost, it supports every model that complies with the scikit-learn API.
83
+ ASF also implements empirical performance prediction, allowing to use different performance scalings.
84
+
85
+ ASF is written in Python 3 and is intended to use with Python 3.10+. It requires only scikit-learn, NumPy as Pandas as basic requirements. More advanced features (such as hyperparameter optimisation) requires additional dependencies.
86
+
87
+ You can find full documentation in: https://hadarshavit.github.io/asf/
88
+ ## Installation
89
+
90
+ ASF is written in Python3 and requires Python version 3.10+.
91
+ The basic installation is lightweight and requires only NumPy, Pandas and scikit-learn.
92
+
93
+ ASF is currently tested on Linux machines. Mac and Windows (official) support will be released in the near future.
94
+
95
+ To install the base version run
96
+ ```bash
97
+ pip install asf-lib
98
+ ```
99
+
100
+ ### Additional options
101
+
102
+ Additional options include:
103
+
104
+ - XGBoost model suppot `pip install asf-lib[xgb]`
105
+ - PyTorch-based models `pip install asf-lib[nn]`
106
+ - ASlib scenarios reading `pip install asf-lib[aslib]`
107
+
108
+ ## Quick start
109
+
110
+ The first step is to define a the data. It can be either NumPy array or Pandas DataFrame.
111
+ The data contains of (at least) two matrices. The first defines the instance features with a row for every instance and each column defines one feature.
112
+ The second is the performance data, which for which every row describes an instance and each column the performance of a single algorithm.
113
+
114
+ Here, we define some toy data on three instances, three features and three algorithms.
115
+
116
+ ```python
117
+ data = np.array(
118
+ [
119
+ [10, 5, 1],
120
+ [20, 10, 2],
121
+ [15, 8, 1.5],
122
+ ]
123
+ )
124
+ features = pd.DataFrame(data, columns=["feature1", "feature2", "feature3"])
125
+ performance = np.array(
126
+ [
127
+ [120, 100, 110],
128
+ [140, 150, 130],
129
+ [180, 170, 190],
130
+ ]
131
+ )
132
+ performance = pd.DataFrame(data, columns=["algo1", "algo2", "algo3"])
133
+
134
+ ```
135
+
136
+ We can then define a selector:
137
+ ```python
138
+ from asf.selectors import PairwiseClassifier
139
+ from sklearn.ensemble import RandomForestClassifier
140
+
141
+ selector = PairwiseClassifier(model_class=RandomForestClassifier)
142
+
143
+ selector.fit(features, performance)
144
+ ```
145
+
146
+ Next, we can use the selector to predict on unseen dta:
147
+ ```
148
+ selector.predict(features)
149
+ ```
150
+ Currently, ASF always returns the prediction in the ASlib format: a dictionary which has the instance id (row index, in case of a numpy array or the index of the row for a pandas dataframe) as keys and an array of tuples (predicted algorithm, budget).
151
+ The selectors has only one tuple in the array, which is the selected algorithm.
152
+ An example output is:
153
+ ```
154
+ {
155
+ 0: [('algo2', None)],
156
+ 1: [('algo3', None)],
157
+ 2: [('algo2', None)]
158
+ }
159
+ ```
160
+
161
+ The budget is set by default to None. To change the budget, you can pass it as an argument for the selector initialisation.
162
+ Similarly, ASF minimises the performance by default. To change it, pass `maxmimize=True` to the selector.
163
+
164
+
165
+
166
+
167
+ ## Cite Us
168
+
169
+ If you use ASF, please cite the Zenodo DOI. We are currently working on publishing a paper on ASF, but by then a Zenodo citation will do it.
170
+
171
+ ```bibtex
172
+ @software{ASF,
173
+ author = {Hadar Shavit and Holger Hoos},
174
+ doi = {10.5281/zenodo.15288151},
175
+ title = {ASF: Algorithm Selection Framework},
176
+ url = {https://doi.org/10.5281/zenodo.15288151},
177
+ year = {in progress},
178
+ }
179
+ ```
asf-0.1.1/README.md ADDED
@@ -0,0 +1,109 @@
1
+ [![PyPI - Version](https://img.shields.io/pypi/v/asf-lib)](https://pypi.org/project/asf-lib/)
2
+ [![Python versions](https://img.shields.io/pypi/pyversions/asf-lib)](https://pypi.org/project/asf-lib/)
3
+ [![License](https://img.shields.io/pypi/l/asf-lib?color=informational)](LICENSE)
4
+ [![Python application](https://github.com/hadarshavit/asf/actions/workflows/tests.yml/badge.svg)](https://github.com/hadarshavit/asf/actions/workflows/tests.yml)
5
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.14957286.svg)](https://doi.org/10.5281/zenodo.14957286)
6
+
7
+ # Algorithm Selection Framework (ASF)
8
+
9
+ ASF is a lightweight yet powerful Python library for algorithm selection and empirical performance prediction.
10
+ It implements various algorithm selection methods, along with algorithm pre-selection, pre-solving schedules and more features to easily create algorithm selection pipeline.
11
+ ASF is a modular framework that allows easy extensions to tailor made an algorithm selector for every use-case.
12
+ While ASF includes several built-in machine learning models through scikit-learn and XGBoost, it supports every model that complies with the scikit-learn API.
13
+ ASF also implements empirical performance prediction, allowing to use different performance scalings.
14
+
15
+ ASF is written in Python 3 and is intended to use with Python 3.10+. It requires only scikit-learn, NumPy as Pandas as basic requirements. More advanced features (such as hyperparameter optimisation) requires additional dependencies.
16
+
17
+ You can find full documentation in: https://hadarshavit.github.io/asf/
18
+ ## Installation
19
+
20
+ ASF is written in Python3 and requires Python version 3.10+.
21
+ The basic installation is lightweight and requires only NumPy, Pandas and scikit-learn.
22
+
23
+ ASF is currently tested on Linux machines. Mac and Windows (official) support will be released in the near future.
24
+
25
+ To install the base version run
26
+ ```bash
27
+ pip install asf-lib
28
+ ```
29
+
30
+ ### Additional options
31
+
32
+ Additional options include:
33
+
34
+ - XGBoost model suppot `pip install asf-lib[xgb]`
35
+ - PyTorch-based models `pip install asf-lib[nn]`
36
+ - ASlib scenarios reading `pip install asf-lib[aslib]`
37
+
38
+ ## Quick start
39
+
40
+ The first step is to define a the data. It can be either NumPy array or Pandas DataFrame.
41
+ The data contains of (at least) two matrices. The first defines the instance features with a row for every instance and each column defines one feature.
42
+ The second is the performance data, which for which every row describes an instance and each column the performance of a single algorithm.
43
+
44
+ Here, we define some toy data on three instances, three features and three algorithms.
45
+
46
+ ```python
47
+ data = np.array(
48
+ [
49
+ [10, 5, 1],
50
+ [20, 10, 2],
51
+ [15, 8, 1.5],
52
+ ]
53
+ )
54
+ features = pd.DataFrame(data, columns=["feature1", "feature2", "feature3"])
55
+ performance = np.array(
56
+ [
57
+ [120, 100, 110],
58
+ [140, 150, 130],
59
+ [180, 170, 190],
60
+ ]
61
+ )
62
+ performance = pd.DataFrame(data, columns=["algo1", "algo2", "algo3"])
63
+
64
+ ```
65
+
66
+ We can then define a selector:
67
+ ```python
68
+ from asf.selectors import PairwiseClassifier
69
+ from sklearn.ensemble import RandomForestClassifier
70
+
71
+ selector = PairwiseClassifier(model_class=RandomForestClassifier)
72
+
73
+ selector.fit(features, performance)
74
+ ```
75
+
76
+ Next, we can use the selector to predict on unseen dta:
77
+ ```
78
+ selector.predict(features)
79
+ ```
80
+ Currently, ASF always returns the prediction in the ASlib format: a dictionary which has the instance id (row index, in case of a numpy array or the index of the row for a pandas dataframe) as keys and an array of tuples (predicted algorithm, budget).
81
+ The selectors has only one tuple in the array, which is the selected algorithm.
82
+ An example output is:
83
+ ```
84
+ {
85
+ 0: [('algo2', None)],
86
+ 1: [('algo3', None)],
87
+ 2: [('algo2', None)]
88
+ }
89
+ ```
90
+
91
+ The budget is set by default to None. To change the budget, you can pass it as an argument for the selector initialisation.
92
+ Similarly, ASF minimises the performance by default. To change it, pass `maxmimize=True` to the selector.
93
+
94
+
95
+
96
+
97
+ ## Cite Us
98
+
99
+ If you use ASF, please cite the Zenodo DOI. We are currently working on publishing a paper on ASF, but by then a Zenodo citation will do it.
100
+
101
+ ```bibtex
102
+ @software{ASF,
103
+ author = {Hadar Shavit and Holger Hoos},
104
+ doi = {10.5281/zenodo.15288151},
105
+ title = {ASF: Algorithm Selection Framework},
106
+ url = {https://doi.org/10.5281/zenodo.15288151},
107
+ year = {in progress},
108
+ }
109
+ ```
File without changes
File without changes
@@ -0,0 +1,161 @@
1
+ #!/usr/bin/env python3
2
+ """CLI entry point for training selectors.
3
+
4
+ This script provides a command-line interface for training model-based selectors.
5
+ It allows users to specify the selector type, model, budget, and other parameters
6
+ to train and save the selector model.
7
+ """
8
+
9
+ import argparse
10
+ from pathlib import Path
11
+ from functools import partial
12
+ from typing import Dict, Callable, List
13
+
14
+ import pandas as pd
15
+
16
+ from asf import selectors
17
+
18
+ import sklearn
19
+
20
+ # Mapping of file extensions to pandas read functions
21
+ pandas_read_map: Dict[str, Callable] = {
22
+ ".csv": pd.read_csv,
23
+ ".parquet": pd.read_parquet,
24
+ ".json": pd.read_json,
25
+ ".feather": pd.read_feather,
26
+ ".hdf": pd.read_hdf,
27
+ ".html": pd.read_html,
28
+ ".xml": pd.read_xml,
29
+ }
30
+
31
+
32
+ def parser_function() -> argparse.ArgumentParser:
33
+ """Define command line arguments for the CLI.
34
+
35
+ Returns:
36
+ argparse.ArgumentParser: The argument parser with defined arguments.
37
+ """
38
+ parser = argparse.ArgumentParser()
39
+ parser.add_argument(
40
+ "--selector",
41
+ choices=selectors.__implemented__,
42
+ required=True,
43
+ help="Selector to train",
44
+ )
45
+ parser.add_argument(
46
+ "--model",
47
+ default="RandomForestClassifier",
48
+ help="Model to use for the selector. "
49
+ "Make sure to specify as an attribute of sklearn.ensemble.",
50
+ )
51
+ parser.add_argument(
52
+ "--budget",
53
+ type=int,
54
+ default=None,
55
+ required=False,
56
+ help="Budget for the solvers",
57
+ )
58
+ parser.add_argument(
59
+ "--maximize",
60
+ type=bool,
61
+ default=False,
62
+ required=False,
63
+ help="Maximize the objective",
64
+ )
65
+ parser.add_argument(
66
+ "--performance-metric",
67
+ type=str,
68
+ default="",
69
+ required=False,
70
+ help="Performance metric to optimize",
71
+ )
72
+ parser.add_argument(
73
+ "--feature-data",
74
+ type=Path,
75
+ required=True,
76
+ help="Path to feature data",
77
+ )
78
+ parser.add_argument(
79
+ "--performance-data",
80
+ type=Path,
81
+ required=True,
82
+ help="Path to performance data",
83
+ )
84
+ parser.add_argument(
85
+ "--model-path",
86
+ type=Path,
87
+ required=True,
88
+ help="Path to save model",
89
+ )
90
+ return parser
91
+
92
+
93
+ def build_cli_command(
94
+ selector: selectors.AbstractModelBasedSelector,
95
+ feature_data: Path,
96
+ performance_data: Path,
97
+ destination: Path,
98
+ ) -> List[str]:
99
+ """Build a CLI command from variables for async jobs.
100
+
101
+ Args:
102
+ selector (selectors.AbstractModelBasedSelector): Selector to train.
103
+ feature_data (Path): Path to feature data DataFrame.
104
+ performance_data (Path): Path to performance data DataFrame.
105
+ destination (Path): Path to save the trained model.
106
+
107
+ Returns:
108
+ List[str]: A list of command-line arguments to execute the training job.
109
+ """
110
+ model_class = (
111
+ selector.model_class.args[0]
112
+ if isinstance(selector.model_class, partial)
113
+ else selector.model_class
114
+ )
115
+ return [
116
+ "python",
117
+ str(Path(__file__).absolute()),
118
+ "--selector",
119
+ type(selector).__name__,
120
+ "--model",
121
+ f"{model_class.__name__}",
122
+ "--budget",
123
+ str(selector.budget),
124
+ "--maximize",
125
+ str(selector.maximize),
126
+ "--performance-metric",
127
+ str(selector.performance_metric),
128
+ "--feature-data",
129
+ str(feature_data),
130
+ "--performance-data",
131
+ str(performance_data),
132
+ "--model-path",
133
+ str(destination),
134
+ ]
135
+
136
+
137
+ if __name__ == "__main__":
138
+ parser = parser_function()
139
+ args = parser.parse_args()
140
+
141
+ # Parse selector into variable
142
+ selector_class = getattr(selectors, args.selector)
143
+ model_class = getattr(sklearn.ensemble, args.model)
144
+
145
+ # Parse training data into variables
146
+ features: pd.DataFrame = pandas_read_map[args.feature_data.suffix](
147
+ args.feature_data, index_col=0
148
+ )
149
+ performance_data: pd.DataFrame = pandas_read_map[args.performance_data.suffix](
150
+ args.performance_data, index_col=0
151
+ )
152
+
153
+ selector = selector_class(
154
+ model_class,
155
+ maximize=args.maximize,
156
+ budget=args.budget,
157
+ )
158
+ selector.fit(features, performance_data)
159
+
160
+ # Save the model to the specified path
161
+ selector.save(args.model_path)
@@ -0,0 +1,4 @@
1
+ from asf.epm.epm import EPM
2
+ from asf.epm.epm_tuner import tune_epm
3
+
4
+ __all__ = ["EPM", "tune_epm"]
@@ -0,0 +1,147 @@
1
+ from functools import partial
2
+ from typing import Type, Union, Optional
3
+
4
+ import pandas as pd
5
+ import numpy as np
6
+ from sklearn.base import RegressorMixin
7
+
8
+ from asf.preprocessing.performace_scaling import AbstractNormalization, LogNormalization
9
+ from asf.predictors import SklearnWrapper
10
+ from asf.preprocessing.sklearn_preprocessor import get_default_preprocessor
11
+ from sklearn.base import TransformerMixin
12
+ from asf.predictors.abstract_predictor import AbstractPredictor
13
+
14
+
15
+ class EPM:
16
+ """
17
+ The EPM (Empirical Performance Model) class is a wrapper for machine learning models
18
+ that includes preprocessing, normalization, and optional inverse transformation of predictions.
19
+
20
+ Attributes:
21
+ predictor_class (Type[AbstractPredictor] | Type[RegressorMixin]): The class of the predictor to use.
22
+ normalization_class (Type[AbstractNormalization]): The normalization class to apply to the target variable.
23
+ transform_back (bool): Whether to apply inverse transformation to predictions.
24
+ features_preprocessing (Union[str, TransformerMixin]): Preprocessing pipeline for features.
25
+ predictor_config (Optional[dict]): Configuration for the predictor.
26
+ predictor_kwargs (Optional[dict]): Additional keyword arguments for the predictor.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ predictor_class: Union[Type[AbstractPredictor], Type[RegressorMixin]],
32
+ normalization_class: Type[AbstractNormalization] = LogNormalization,
33
+ transform_back: bool = True,
34
+ features_preprocessing: Union[str, TransformerMixin] = "default",
35
+ categorical_features: Optional[list] = None,
36
+ numerical_features: Optional[list] = None,
37
+ predictor_config: Optional[dict] = None,
38
+ predictor_kwargs: Optional[dict] = None,
39
+ ):
40
+ """
41
+ Initialize the EPM model.
42
+
43
+ Parameters:
44
+ predictor_class (Type[AbstractPredictor] | Type[RegressorMixin]): The class of the predictor to use.
45
+ normalization_class (Type[AbstractNormalization]): The normalization class to apply to the target variable.
46
+ transform_back (bool): Whether to apply inverse transformation to predictions.
47
+ features_preprocessing (Union[str, TransformerMixin]): Preprocessing pipeline for features.
48
+ categorical_features (Optional[list]): List of categorical feature names.
49
+ numerical_features (Optional[list]): List of numerical feature names.
50
+ predictor_config (Optional[dict]): Configuration for the predictor.
51
+ predictor_kwargs (Optional[dict]): Additional keyword arguments for the predictor.
52
+ """
53
+ if isinstance(predictor_class, type) and issubclass(
54
+ predictor_class, (RegressorMixin)
55
+ ):
56
+ self.model_class = partial(SklearnWrapper, predictor_class)
57
+ else:
58
+ self.model_class = predictor_class
59
+
60
+ self.predictor_class = predictor_class
61
+ self.normalization_class = normalization_class
62
+ self.transform_back = transform_back
63
+ self.predictor_config = predictor_config
64
+ self.predictor_kwargs = predictor_kwargs or {}
65
+ self.numpy = False
66
+
67
+ if features_preprocessing == "default":
68
+ self.features_preprocessing = get_default_preprocessor(
69
+ categorical_features=categorical_features,
70
+ numerical_features=numerical_features,
71
+ )
72
+ else:
73
+ self.features_preprocessing = features_preprocessing
74
+
75
+ def fit(
76
+ self,
77
+ X: Union[pd.DataFrame, pd.Series, list],
78
+ y: Union[pd.Series, list],
79
+ sample_weight: Optional[list] = None,
80
+ ) -> "EPM":
81
+ """
82
+ Fit the EPM model to the data.
83
+
84
+ Parameters:
85
+ X (Union[pd.DataFrame, pd.Series, list]): Features.
86
+ y (Union[pd.Series, list]): Target variable.
87
+ sample_weight (Optional[list]): Sample weights (optional).
88
+
89
+ Returns:
90
+ EPM: The fitted EPM model.
91
+ """
92
+ if isinstance(X, np.ndarray) and isinstance(y, np.ndarray):
93
+ X = pd.DataFrame(
94
+ X,
95
+ index=range(len(X)),
96
+ columns=[f"f_{i}" for i in range(X.shape[1])],
97
+ )
98
+ y = pd.Series(
99
+ y,
100
+ index=range(len(y)),
101
+ )
102
+ self.numpy = True
103
+
104
+ if self.features_preprocessing is not None:
105
+ X = self.features_preprocessing.fit_transform(X)
106
+
107
+ self.normalization = self.normalization_class()
108
+ self.normalization.fit(y)
109
+ y = self.normalization.transform(y)
110
+
111
+ if self.predictor_config is None:
112
+ self.predictor = self.predictor_class()
113
+ else:
114
+ self.predictor = self.predictor_class.get_from_configuration(
115
+ self.predictor_config, **self.predictor_kwargs
116
+ )()
117
+
118
+ self.predictor.fit(X, y, sample_weight=sample_weight)
119
+ return self
120
+
121
+ def predict(self, X: Union[pd.DataFrame, pd.Series, list]) -> list:
122
+ """
123
+ Predict using the fitted EPM model.
124
+
125
+ Parameters:
126
+ X (Union[pd.DataFrame, pd.Series, list]): Features.
127
+
128
+ Returns:
129
+ list: Predicted values.
130
+ """
131
+ if self.numpy:
132
+ if isinstance(X, np.ndarray):
133
+ X = pd.DataFrame(
134
+ X,
135
+ index=range(len(X)),
136
+ columns=[f"f_{i}" for i in range(X.shape[1])],
137
+ )
138
+
139
+ if self.features_preprocessing is not None:
140
+ X = self.features_preprocessing.transform(X)
141
+
142
+ y_pred = self.predictor.predict(X)
143
+
144
+ if self.transform_back:
145
+ y_pred = self.normalization.inverse_transform(y_pred)
146
+
147
+ return y_pred