asf 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. asf/__init__.py +0 -0
  2. asf/cli/__init__.py +0 -0
  3. asf/cli/cli_train.py +161 -0
  4. asf/epm/__init__.py +4 -0
  5. asf/epm/epm.py +147 -0
  6. asf/epm/epm_tuner.py +141 -0
  7. asf/metrics/__init__.py +14 -0
  8. asf/metrics/baselines.py +127 -0
  9. asf/pre_selector/__init__.py +18 -0
  10. asf/pre_selector/abstract_pre_selector.py +32 -0
  11. asf/pre_selector/beam_search_pre_selection.py +84 -0
  12. asf/pre_selector/brute_force_pre_selection.py +83 -0
  13. asf/pre_selector/knee_of_the_curve_pre_selector.py +140 -0
  14. asf/pre_selector/marginal_contribution_based.py +97 -0
  15. asf/pre_selector/optimize_pre_selection.py +146 -0
  16. asf/pre_selector/sbs_pre_selection.py +75 -0
  17. asf/predictors/__init__.py +33 -0
  18. asf/predictors/abstract_predictor.py +155 -0
  19. asf/predictors/epm_extra_trees.py +137 -0
  20. asf/predictors/epm_random_forest.py +215 -0
  21. asf/predictors/linear_model.py +245 -0
  22. asf/predictors/mlp.py +329 -0
  23. asf/predictors/random_forest.py +293 -0
  24. asf/predictors/ranking_mlp.py +207 -0
  25. asf/predictors/regression_mlp.py +165 -0
  26. asf/predictors/sklearn_wrapper.py +108 -0
  27. asf/predictors/svm.py +301 -0
  28. asf/predictors/utils/datasets.py +90 -0
  29. asf/predictors/utils/losses.py +40 -0
  30. asf/predictors/utils/mlp.py +26 -0
  31. asf/predictors/xgboost.py +553 -0
  32. asf/preprocessing/__init__.py +27 -0
  33. asf/preprocessing/performace_scaling.py +500 -0
  34. asf/preprocessing/sklearn_preprocessor.py +49 -0
  35. asf/presolving/__init__.py +7 -0
  36. asf/presolving/asap_v2.py +277 -0
  37. asf/presolving/aspeed.py +189 -0
  38. asf/presolving/presolver.py +22 -0
  39. asf/scenario/__init__.py +3 -0
  40. asf/scenario/aslib_reader.py +207 -0
  41. asf/scenario/epmbench_reader.py +178 -0
  42. asf/selectors/__init__.py +28 -0
  43. asf/selectors/abstract_model_based_selector.py +70 -0
  44. asf/selectors/abstract_selector.py +249 -0
  45. asf/selectors/feature_generator.py +38 -0
  46. asf/selectors/joint_ranking.py +107 -0
  47. asf/selectors/mutli_class.py +60 -0
  48. asf/selectors/pairwise_classifier.py +252 -0
  49. asf/selectors/pairwise_regressor.py +235 -0
  50. asf/selectors/performance_model.py +156 -0
  51. asf/selectors/selector_pipeline.py +128 -0
  52. asf/selectors/selector_tuner.py +196 -0
  53. asf/selectors/simple_ranking.py +133 -0
  54. asf/selectors/survival_analysis.py +146 -0
  55. asf/utils/groupkfoldshuffle.py +29 -0
  56. asf-0.1.1.dist-info/METADATA +179 -0
  57. asf-0.1.1.dist-info/RECORD +60 -0
  58. asf-0.1.1.dist-info/WHEEL +5 -0
  59. asf-0.1.1.dist-info/licenses/LICENSE +21 -0
  60. asf-0.1.1.dist-info/top_level.txt +1 -0
asf/__init__.py ADDED
File without changes
asf/cli/__init__.py ADDED
File without changes
asf/cli/cli_train.py ADDED
@@ -0,0 +1,161 @@
1
+ #!/usr/bin/env python3
2
+ """CLI entry point for training selectors.
3
+
4
+ This script provides a command-line interface for training model-based selectors.
5
+ It allows users to specify the selector type, model, budget, and other parameters
6
+ to train and save the selector model.
7
+ """
8
+
9
+ import argparse
10
+ from pathlib import Path
11
+ from functools import partial
12
+ from typing import Dict, Callable, List
13
+
14
+ import pandas as pd
15
+
16
+ from asf import selectors
17
+
18
+ import sklearn
19
+
20
+ # Mapping of file extensions to pandas read functions
21
+ pandas_read_map: Dict[str, Callable] = {
22
+ ".csv": pd.read_csv,
23
+ ".parquet": pd.read_parquet,
24
+ ".json": pd.read_json,
25
+ ".feather": pd.read_feather,
26
+ ".hdf": pd.read_hdf,
27
+ ".html": pd.read_html,
28
+ ".xml": pd.read_xml,
29
+ }
30
+
31
+
32
+ def parser_function() -> argparse.ArgumentParser:
33
+ """Define command line arguments for the CLI.
34
+
35
+ Returns:
36
+ argparse.ArgumentParser: The argument parser with defined arguments.
37
+ """
38
+ parser = argparse.ArgumentParser()
39
+ parser.add_argument(
40
+ "--selector",
41
+ choices=selectors.__implemented__,
42
+ required=True,
43
+ help="Selector to train",
44
+ )
45
+ parser.add_argument(
46
+ "--model",
47
+ default="RandomForestClassifier",
48
+ help="Model to use for the selector. "
49
+ "Make sure to specify as an attribute of sklearn.ensemble.",
50
+ )
51
+ parser.add_argument(
52
+ "--budget",
53
+ type=int,
54
+ default=None,
55
+ required=False,
56
+ help="Budget for the solvers",
57
+ )
58
+ parser.add_argument(
59
+ "--maximize",
60
+ type=bool,
61
+ default=False,
62
+ required=False,
63
+ help="Maximize the objective",
64
+ )
65
+ parser.add_argument(
66
+ "--performance-metric",
67
+ type=str,
68
+ default="",
69
+ required=False,
70
+ help="Performance metric to optimize",
71
+ )
72
+ parser.add_argument(
73
+ "--feature-data",
74
+ type=Path,
75
+ required=True,
76
+ help="Path to feature data",
77
+ )
78
+ parser.add_argument(
79
+ "--performance-data",
80
+ type=Path,
81
+ required=True,
82
+ help="Path to performance data",
83
+ )
84
+ parser.add_argument(
85
+ "--model-path",
86
+ type=Path,
87
+ required=True,
88
+ help="Path to save model",
89
+ )
90
+ return parser
91
+
92
+
93
+ def build_cli_command(
94
+ selector: selectors.AbstractModelBasedSelector,
95
+ feature_data: Path,
96
+ performance_data: Path,
97
+ destination: Path,
98
+ ) -> List[str]:
99
+ """Build a CLI command from variables for async jobs.
100
+
101
+ Args:
102
+ selector (selectors.AbstractModelBasedSelector): Selector to train.
103
+ feature_data (Path): Path to feature data DataFrame.
104
+ performance_data (Path): Path to performance data DataFrame.
105
+ destination (Path): Path to save the trained model.
106
+
107
+ Returns:
108
+ List[str]: A list of command-line arguments to execute the training job.
109
+ """
110
+ model_class = (
111
+ selector.model_class.args[0]
112
+ if isinstance(selector.model_class, partial)
113
+ else selector.model_class
114
+ )
115
+ return [
116
+ "python",
117
+ str(Path(__file__).absolute()),
118
+ "--selector",
119
+ type(selector).__name__,
120
+ "--model",
121
+ f"{model_class.__name__}",
122
+ "--budget",
123
+ str(selector.budget),
124
+ "--maximize",
125
+ str(selector.maximize),
126
+ "--performance-metric",
127
+ str(selector.performance_metric),
128
+ "--feature-data",
129
+ str(feature_data),
130
+ "--performance-data",
131
+ str(performance_data),
132
+ "--model-path",
133
+ str(destination),
134
+ ]
135
+
136
+
137
+ if __name__ == "__main__":
138
+ parser = parser_function()
139
+ args = parser.parse_args()
140
+
141
+ # Parse selector into variable
142
+ selector_class = getattr(selectors, args.selector)
143
+ model_class = getattr(sklearn.ensemble, args.model)
144
+
145
+ # Parse training data into variables
146
+ features: pd.DataFrame = pandas_read_map[args.feature_data.suffix](
147
+ args.feature_data, index_col=0
148
+ )
149
+ performance_data: pd.DataFrame = pandas_read_map[args.performance_data.suffix](
150
+ args.performance_data, index_col=0
151
+ )
152
+
153
+ selector = selector_class(
154
+ model_class,
155
+ maximize=args.maximize,
156
+ budget=args.budget,
157
+ )
158
+ selector.fit(features, performance_data)
159
+
160
+ # Save the model to the specified path
161
+ selector.save(args.model_path)
asf/epm/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from asf.epm.epm import EPM
2
+ from asf.epm.epm_tuner import tune_epm
3
+
4
+ __all__ = ["EPM", "tune_epm"]
asf/epm/epm.py ADDED
@@ -0,0 +1,147 @@
1
+ from functools import partial
2
+ from typing import Type, Union, Optional
3
+
4
+ import pandas as pd
5
+ import numpy as np
6
+ from sklearn.base import RegressorMixin
7
+
8
+ from asf.preprocessing.performace_scaling import AbstractNormalization, LogNormalization
9
+ from asf.predictors import SklearnWrapper
10
+ from asf.preprocessing.sklearn_preprocessor import get_default_preprocessor
11
+ from sklearn.base import TransformerMixin
12
+ from asf.predictors.abstract_predictor import AbstractPredictor
13
+
14
+
15
+ class EPM:
16
+ """
17
+ The EPM (Empirical Performance Model) class is a wrapper for machine learning models
18
+ that includes preprocessing, normalization, and optional inverse transformation of predictions.
19
+
20
+ Attributes:
21
+ predictor_class (Type[AbstractPredictor] | Type[RegressorMixin]): The class of the predictor to use.
22
+ normalization_class (Type[AbstractNormalization]): The normalization class to apply to the target variable.
23
+ transform_back (bool): Whether to apply inverse transformation to predictions.
24
+ features_preprocessing (Union[str, TransformerMixin]): Preprocessing pipeline for features.
25
+ predictor_config (Optional[dict]): Configuration for the predictor.
26
+ predictor_kwargs (Optional[dict]): Additional keyword arguments for the predictor.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ predictor_class: Union[Type[AbstractPredictor], Type[RegressorMixin]],
32
+ normalization_class: Type[AbstractNormalization] = LogNormalization,
33
+ transform_back: bool = True,
34
+ features_preprocessing: Union[str, TransformerMixin] = "default",
35
+ categorical_features: Optional[list] = None,
36
+ numerical_features: Optional[list] = None,
37
+ predictor_config: Optional[dict] = None,
38
+ predictor_kwargs: Optional[dict] = None,
39
+ ):
40
+ """
41
+ Initialize the EPM model.
42
+
43
+ Parameters:
44
+ predictor_class (Type[AbstractPredictor] | Type[RegressorMixin]): The class of the predictor to use.
45
+ normalization_class (Type[AbstractNormalization]): The normalization class to apply to the target variable.
46
+ transform_back (bool): Whether to apply inverse transformation to predictions.
47
+ features_preprocessing (Union[str, TransformerMixin]): Preprocessing pipeline for features.
48
+ categorical_features (Optional[list]): List of categorical feature names.
49
+ numerical_features (Optional[list]): List of numerical feature names.
50
+ predictor_config (Optional[dict]): Configuration for the predictor.
51
+ predictor_kwargs (Optional[dict]): Additional keyword arguments for the predictor.
52
+ """
53
+ if isinstance(predictor_class, type) and issubclass(
54
+ predictor_class, (RegressorMixin)
55
+ ):
56
+ self.model_class = partial(SklearnWrapper, predictor_class)
57
+ else:
58
+ self.model_class = predictor_class
59
+
60
+ self.predictor_class = predictor_class
61
+ self.normalization_class = normalization_class
62
+ self.transform_back = transform_back
63
+ self.predictor_config = predictor_config
64
+ self.predictor_kwargs = predictor_kwargs or {}
65
+ self.numpy = False
66
+
67
+ if features_preprocessing == "default":
68
+ self.features_preprocessing = get_default_preprocessor(
69
+ categorical_features=categorical_features,
70
+ numerical_features=numerical_features,
71
+ )
72
+ else:
73
+ self.features_preprocessing = features_preprocessing
74
+
75
+ def fit(
76
+ self,
77
+ X: Union[pd.DataFrame, pd.Series, list],
78
+ y: Union[pd.Series, list],
79
+ sample_weight: Optional[list] = None,
80
+ ) -> "EPM":
81
+ """
82
+ Fit the EPM model to the data.
83
+
84
+ Parameters:
85
+ X (Union[pd.DataFrame, pd.Series, list]): Features.
86
+ y (Union[pd.Series, list]): Target variable.
87
+ sample_weight (Optional[list]): Sample weights (optional).
88
+
89
+ Returns:
90
+ EPM: The fitted EPM model.
91
+ """
92
+ if isinstance(X, np.ndarray) and isinstance(y, np.ndarray):
93
+ X = pd.DataFrame(
94
+ X,
95
+ index=range(len(X)),
96
+ columns=[f"f_{i}" for i in range(X.shape[1])],
97
+ )
98
+ y = pd.Series(
99
+ y,
100
+ index=range(len(y)),
101
+ )
102
+ self.numpy = True
103
+
104
+ if self.features_preprocessing is not None:
105
+ X = self.features_preprocessing.fit_transform(X)
106
+
107
+ self.normalization = self.normalization_class()
108
+ self.normalization.fit(y)
109
+ y = self.normalization.transform(y)
110
+
111
+ if self.predictor_config is None:
112
+ self.predictor = self.predictor_class()
113
+ else:
114
+ self.predictor = self.predictor_class.get_from_configuration(
115
+ self.predictor_config, **self.predictor_kwargs
116
+ )()
117
+
118
+ self.predictor.fit(X, y, sample_weight=sample_weight)
119
+ return self
120
+
121
+ def predict(self, X: Union[pd.DataFrame, pd.Series, list]) -> list:
122
+ """
123
+ Predict using the fitted EPM model.
124
+
125
+ Parameters:
126
+ X (Union[pd.DataFrame, pd.Series, list]): Features.
127
+
128
+ Returns:
129
+ list: Predicted values.
130
+ """
131
+ if self.numpy:
132
+ if isinstance(X, np.ndarray):
133
+ X = pd.DataFrame(
134
+ X,
135
+ index=range(len(X)),
136
+ columns=[f"f_{i}" for i in range(X.shape[1])],
137
+ )
138
+
139
+ if self.features_preprocessing is not None:
140
+ X = self.features_preprocessing.transform(X)
141
+
142
+ y_pred = self.predictor.predict(X)
143
+
144
+ if self.transform_back:
145
+ y_pred = self.normalization.inverse_transform(y_pred)
146
+
147
+ return y_pred
asf/epm/epm_tuner.py ADDED
@@ -0,0 +1,141 @@
1
+ from typing import Type, Union, Optional
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+ from sklearn.base import TransformerMixin
6
+ from sklearn.metrics import mean_squared_error # Fixed incorrect import
7
+ from sklearn.model_selection import KFold
8
+ from smac import HyperparameterOptimizationFacade, Scenario
9
+ from asf.utils.groupkfoldshuffle import GroupKFoldShuffle
10
+
11
+ from asf.epm.epm import EPM
12
+ from asf.preprocessing.performace_scaling import AbstractNormalization, LogNormalization
13
+ from asf.predictors.abstract_predictor import AbstractPredictor
14
+
15
+
16
+ def tune_epm(
17
+ X: np.ndarray,
18
+ y: np.ndarray,
19
+ model_class: Type[AbstractPredictor],
20
+ normalization_class: Type[AbstractNormalization] = LogNormalization,
21
+ features_preprocessing: Union[str, TransformerMixin] = "default",
22
+ categorical_features: Optional[list] = None,
23
+ numerical_features: Optional[list] = None,
24
+ groups: Optional[np.ndarray] = None,
25
+ cv: int = 5,
26
+ timeout: int = 3600,
27
+ runcount_limit: int = 100,
28
+ output_dir: str = "./smac_output",
29
+ seed: int = 0,
30
+ smac_metric: callable = mean_squared_error, # Fixed incorrect import
31
+ smac_scenario_kwargs: Optional[dict] = {},
32
+ smac_kwargs: Optional[dict] = {},
33
+ predictor_kwargs: Optional[dict] = {},
34
+ ) -> EPM:
35
+ """
36
+ Tune the Empirical Performance Model (EPM) using SMAC (Sequential Model-based Algorithm Configuration).
37
+
38
+ Parameters:
39
+ ----------
40
+ X : np.ndarray
41
+ Feature matrix for training and validation.
42
+ y : np.ndarray
43
+ Target values corresponding to the feature matrix.
44
+ model_class : Type[AbstractPredictor]
45
+ The predictor class to be tuned.
46
+ normalization_class : Type[AbstractNormalization], optional
47
+ The normalization class to be applied to the data. Defaults to LogNormalization.
48
+ features_preprocessing : Union[str, TransformerMixin], optional
49
+ Preprocessing method for features. Defaults to "default".
50
+ categorical_features : Optional[list], optional
51
+ List of categorical feature names. Defaults to None.
52
+ numerical_features : Optional[list], optional
53
+ List of numerical feature names. Defaults to None.
54
+ groups : Optional[np.ndarray], optional
55
+ Group labels for cross-validation. Defaults to None.
56
+ cv : int, optional
57
+ Number of cross-validation folds. Defaults to 5.
58
+ timeout : int, optional
59
+ Time limit for the tuning process in seconds. Defaults to 3600.
60
+ runcount_limit : int, optional
61
+ Maximum number of configurations to evaluate. Defaults to 100.
62
+ output_dir : str, optional
63
+ Directory to store SMAC output. Defaults to "./smac_output".
64
+ seed : int, optional
65
+ Random seed for reproducibility. Defaults to 0.
66
+ smac_metric : callable, optional
67
+ Metric function to evaluate model performance. Defaults to mean_squared_error.
68
+ smac_scenario_kwargs : Optional[dict], optional
69
+ Additional keyword arguments for the SMAC scenario. Defaults to None.
70
+ smac_kwargs : Optional[dict], optional
71
+ Additional keyword arguments for SMAC optimization. Defaults to None.
72
+ predictor_kwargs : Optional[dict], optional
73
+ Additional keyword arguments for the predictor. Defaults to None.
74
+
75
+ Returns:
76
+ -------
77
+ EPM
78
+ The tuned Empirical Performance Model instance.
79
+ """
80
+ if isinstance(X, np.ndarray) and isinstance(y, np.ndarray):
81
+ X = pd.DataFrame(
82
+ X,
83
+ index=range(len(X)),
84
+ columns=[f"f_{i}" for i in range(X.shape[1])],
85
+ )
86
+ y = pd.Series(
87
+ y,
88
+ index=range(len(y)),
89
+ )
90
+
91
+ scenario = Scenario(
92
+ configspace=model_class.get_configuration_space(),
93
+ n_trials=runcount_limit,
94
+ walltime_limit=timeout,
95
+ deterministic=True,
96
+ output_directory=output_dir,
97
+ seed=seed,
98
+ **smac_scenario_kwargs,
99
+ )
100
+
101
+ def target_function(config, seed):
102
+ if groups is not None:
103
+ kfold = GroupKFoldShuffle(n_splits=cv, shuffle=True, random_state=seed)
104
+ else:
105
+ kfold = KFold(n_splits=cv, shuffle=True, random_state=seed)
106
+
107
+ scores = []
108
+ for train_idx, test_idx in kfold.split(X, y, groups):
109
+ X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
110
+ y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
111
+
112
+ epm = EPM(
113
+ predictor_class=model_class,
114
+ normalization_class=normalization_class,
115
+ transform_back=True,
116
+ predictor_config=config,
117
+ predictor_kwargs=predictor_kwargs,
118
+ features_preprocessing=features_preprocessing,
119
+ categorical_features=categorical_features,
120
+ numerical_features=numerical_features,
121
+ )
122
+ epm.fit(X_train, y_train)
123
+
124
+ y_pred = epm.predict(X_test)
125
+ score = smac_metric(y_test, y_pred)
126
+ scores.append(score)
127
+
128
+ return np.mean(scores)
129
+
130
+ smac = HyperparameterOptimizationFacade(scenario, target_function, **smac_kwargs)
131
+ best_config = smac.optimize()
132
+
133
+ return EPM(
134
+ predictor_class=model_class,
135
+ normalization_class=normalization_class,
136
+ transform_back=True,
137
+ predictor_config=best_config,
138
+ features_preprocessing=features_preprocessing,
139
+ categorical_features=categorical_features,
140
+ numerical_features=numerical_features,
141
+ )
@@ -0,0 +1,14 @@
1
+ from asf.metrics.baselines import (
2
+ single_best_solver,
3
+ virtual_best_solver,
4
+ running_time_selector_performance,
5
+ running_time_closed_gap,
6
+ )
7
+
8
+
9
+ __all__ = [
10
+ "single_best_solver",
11
+ "virtual_best_solver",
12
+ "running_time_selector_performance",
13
+ "running_time_closed_gap",
14
+ ]
@@ -0,0 +1,127 @@
1
+ import pandas as pd
2
+ from typing import Dict, List, Tuple, Optional, Union
3
+
4
+
5
+ def single_best_solver(performance: pd.DataFrame, maximize: bool = False) -> float:
6
+ """
7
+ Selects the single best solver across all instances based on the aggregated performance.
8
+
9
+ Args:
10
+ schedules (pd.DataFrame): The schedules to evaluate (not used in this function).
11
+ performance (pd.DataFrame): The performance data for the algorithms.
12
+ maximize (bool): Whether to maximize or minimize the performance.
13
+
14
+ Returns:
15
+ float: The best aggregated performance value across all instances.
16
+ """
17
+ perf_sum = performance.sum(axis=0)
18
+ if maximize:
19
+ return perf_sum.max()
20
+ else:
21
+ return perf_sum.min()
22
+
23
+
24
+ def virtual_best_solver(performance: pd.DataFrame, maximize: bool = False) -> float:
25
+ """
26
+ Selects the virtual best solver for each instance by choosing the best performance per instance.
27
+
28
+ Args:
29
+ schedules (pd.DataFrame): The schedules to evaluate (not used in this function).
30
+ performance (pd.DataFrame): The performance data for the algorithms.
31
+ maximize (bool): Whether to maximize or minimize the performance.
32
+
33
+ Returns:
34
+ float: The sum of the best performance values for each instance.
35
+ """
36
+ if maximize:
37
+ return performance.max(axis=1).sum()
38
+ else:
39
+ return performance.min(axis=1).sum()
40
+
41
+
42
+ def running_time_selector_performance(
43
+ schedules: Dict[str, List[Tuple[str, float]]],
44
+ performance: pd.DataFrame,
45
+ budget: float = 5000,
46
+ par: float = 10,
47
+ feature_time: Optional[pd.DataFrame] = None,
48
+ ) -> Dict[str, Union[float, int]]:
49
+ """
50
+ Calculates the total running time for a selector based on the given schedules and performance data.
51
+
52
+ Args:
53
+ schedules (Dict[str, List[Tuple[str, float]]]): The schedules to evaluate, where each key is an instance
54
+ and the value is a list of tuples (algorithm, allocated budget).
55
+ performance (pd.DataFrame): The performance data for the algorithms.
56
+ budget (float): The budget for the scenario.
57
+ par (float): The penalization factor for unsolved instances.
58
+ feature_time (Optional[pd.DataFrame]): The feature time data for each instance. Defaults to zero if not provided.
59
+
60
+ Returns:
61
+ Dict[str, Union[float, int]]: A dictionary mapping each instance to its total running time.
62
+ """
63
+ if feature_time is None:
64
+ feature_time = pd.DataFrame(
65
+ 0, index=performance.index, columns=["feature_time"]
66
+ )
67
+ total_time = {}
68
+ for instance, schedule in schedules.items():
69
+ allocated_times = {algorithm: 0 for algorithm in performance.columns}
70
+ solved = False
71
+ for algorithm, algo_budget in schedule:
72
+ remaining_budget = (
73
+ budget
74
+ - sum(allocated_times.values())
75
+ - feature_time.loc[instance].sum().item()
76
+ )
77
+ remaining_time_to_solve = performance.loc[instance, algorithm] - (
78
+ algo_budget + allocated_times[algorithm]
79
+ )
80
+ if remaining_time_to_solve < 0:
81
+ allocated_times[algorithm] = performance.loc[instance, algorithm]
82
+ solved = True
83
+ break
84
+ elif remaining_time_to_solve <= remaining_budget:
85
+ allocated_times[algorithm] += remaining_time_to_solve
86
+ else:
87
+ allocated_times[algorithm] += remaining_budget
88
+ break
89
+ if solved:
90
+ total_time[instance] = (
91
+ sum(allocated_times.values()) + feature_time.loc[instance].sum().item()
92
+ )
93
+ else:
94
+ total_time[instance] = budget * par
95
+
96
+ total_time = sum(list(total_time.values()))
97
+ return total_time
98
+
99
+
100
+ def running_time_closed_gap(
101
+ schedules: Dict[str, List[Tuple[str, float]]],
102
+ performance: pd.DataFrame,
103
+ budget: float,
104
+ feature_time: pd.DataFrame,
105
+ par: float = 10,
106
+ ) -> float:
107
+ """
108
+ Calculates the closed gap metric for a given selector.
109
+
110
+ Args:
111
+ schedules (Dict[str, List[Tuple[str, float]]]): The schedules to evaluate.
112
+ performance (pd.DataFrame): The performance data for the algorithms.
113
+ budget (float): The budget for the scenario.
114
+ par (float): The penalization factor for unsolved instances.
115
+ feature_time (pd.DataFrame): The feature time data for each instance.
116
+
117
+ Returns:
118
+ float: The closed gap value, representing the improvement of the selector over the single best solver
119
+ relative to the virtual best solver.
120
+ """
121
+ sbs_val = single_best_solver(performance, False)
122
+ vbs_val = virtual_best_solver(performance, False)
123
+ s_val = running_time_selector_performance(
124
+ schedules, performance, budget, par, feature_time
125
+ )
126
+
127
+ return (sbs_val - s_val) / (sbs_val - vbs_val)
@@ -0,0 +1,18 @@
1
+ from asf.pre_selector.marginal_contribution_based import (
2
+ MarginalContributionBasedPreSelector,
3
+ )
4
+ from asf.pre_selector.optimize_pre_selection import OptimizePreSelection
5
+ from asf.pre_selector.sbs_pre_selection import SBSPreSelector
6
+ from asf.pre_selector.brute_force_pre_selection import BruteForcePreSelector
7
+ from asf.pre_selector.beam_search_pre_selection import BeamSearchPreSelector
8
+ from asf.pre_selector.knee_of_the_curve_pre_selector import KneeOfCurvePreSelector
9
+
10
+
11
+ __all__ = [
12
+ "MarginalContributionBasedPreSelector",
13
+ "OptimizePreSelection",
14
+ "SBSPreSelector",
15
+ "BruteForcePreSelector",
16
+ "BeamSearchPreSelector",
17
+ "KneeOfCurvePreSelector",
18
+ ]
@@ -0,0 +1,32 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from typing import Union, Optional
4
+
5
+
6
+ class AbstractPreSelector:
7
+ """
8
+ Abstract class for pre-selectors.
9
+ """
10
+
11
+ def __init__(self, n_algorithms: Optional[int] = None):
12
+ """
13
+ Initialize the pre-selector with the given configuration.
14
+
15
+ Args:
16
+ config (dict): Configuration for the pre-selector.
17
+ """
18
+ self.n_algorithms = n_algorithms
19
+
20
+ def fit_transform(
21
+ self, performance: Union[pd.DataFrame, np.ndarray]
22
+ ) -> Union[pd.DataFrame, np.ndarray]:
23
+ """
24
+ Fit the pre-selector to the performance data and transform it.
25
+ Args:
26
+ performance (Union[pd.DataFrame, np.ndarray]): Performance data to fit and transform.
27
+ Returns:
28
+ Union[pd.DataFrame, np.ndarray]: Transformed performance data.
29
+ """
30
+ raise NotImplementedError(
31
+ "fit_transform method must be implemented in subclasses."
32
+ )