moose-fs 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. LICENSE +21 -0
  2. README.md +190 -0
  3. moose_fs-0.1.0.dist-info/METADATA +232 -0
  4. moose_fs-0.1.0.dist-info/RECORD +40 -0
  5. moose_fs-0.1.0.dist-info/WHEEL +4 -0
  6. moose_fs-0.1.0.dist-info/entry_points.txt +2 -0
  7. moose_fs-0.1.0.dist-info/licenses/LICENSE +21 -0
  8. moosefs/__init__.py +6 -0
  9. moosefs/core/__init__.py +6 -0
  10. moosefs/core/data_processor.py +319 -0
  11. moosefs/core/feature.py +44 -0
  12. moosefs/core/novovicova.py +60 -0
  13. moosefs/core/pareto.py +90 -0
  14. moosefs/feature_selection_pipeline.py +548 -0
  15. moosefs/feature_selectors/__init__.py +26 -0
  16. moosefs/feature_selectors/base_selector.py +38 -0
  17. moosefs/feature_selectors/default_variance.py +21 -0
  18. moosefs/feature_selectors/elastic_net_selector.py +75 -0
  19. moosefs/feature_selectors/f_statistic_selector.py +42 -0
  20. moosefs/feature_selectors/lasso_selector.py +46 -0
  21. moosefs/feature_selectors/mrmr_selector.py +57 -0
  22. moosefs/feature_selectors/mutual_info_selector.py +45 -0
  23. moosefs/feature_selectors/random_forest_selector.py +48 -0
  24. moosefs/feature_selectors/svm_selector.py +50 -0
  25. moosefs/feature_selectors/variance_selectors.py +16 -0
  26. moosefs/feature_selectors/xgboost_selector.py +44 -0
  27. moosefs/merging_strategies/__init__.py +17 -0
  28. moosefs/merging_strategies/arithmetic_mean_merger.py +46 -0
  29. moosefs/merging_strategies/base_merger.py +64 -0
  30. moosefs/merging_strategies/borda_merger.py +46 -0
  31. moosefs/merging_strategies/consensus_merger.py +80 -0
  32. moosefs/merging_strategies/l2_norm_merger.py +42 -0
  33. moosefs/merging_strategies/union_of_intersections_merger.py +89 -0
  34. moosefs/metrics/__init__.py +23 -0
  35. moosefs/metrics/performance_metrics.py +239 -0
  36. moosefs/metrics/stability_metrics.py +49 -0
  37. moosefs/utils.py +161 -0
  38. scripts/config.yml +92 -0
  39. scripts/main.py +163 -0
  40. scripts/utils.py +186 -0
@@ -0,0 +1,75 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+ import warnings
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from sklearn.exceptions import ConvergenceWarning
9
+ from sklearn.linear_model import ElasticNet, LogisticRegression
10
+ from sklearn.pipeline import make_pipeline
11
+ from sklearn.preprocessing import StandardScaler
12
+
13
+ from .base_selector import FeatureSelector
14
+
15
+
16
+ class ElasticNetSelector(FeatureSelector):
17
+ """Elastic‑net based selector.
18
+
19
+ • regression → sklearn.linear_model.ElasticNet (L1+L2 on y∈ℝ)
20
+ • classification → sklearn.linear_model.LogisticRegression with penalty='elasticnet' (solver='saga')
21
+
22
+ Scores are |coef| (mean over classes if multiclass).
23
+ """
24
+
25
+ name = "ElasticNet"
26
+
27
+ def __init__(self, task: str, num_features_to_select: int, **kwargs: Any) -> None:
28
+ super().__init__(task, num_features_to_select)
29
+ self.kwargs = kwargs
30
+
31
+ def compute_scores(self, X: Any, y: Any) -> np.ndarray:
32
+ # Ensure tabular objects for column-safe slicing later in the pipeline
33
+ if isinstance(X, np.ndarray):
34
+ X = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
35
+ if isinstance(y, (pd.DataFrame, np.ndarray)) and getattr(y, "ndim", 1) == 2:
36
+ y = np.ravel(y)
37
+
38
+ if self.task == "regression":
39
+ params = {
40
+ "alpha": self.kwargs.pop("alpha", 1.0),
41
+ "l1_ratio": self.kwargs.pop("l1_ratio", 0.5),
42
+ "max_iter": self.kwargs.pop("max_iter", 100_000),
43
+ **self.kwargs,
44
+ }
45
+ model = make_pipeline(StandardScaler(with_mean=True, with_std=True), ElasticNet(**params))
46
+ # Fit, silencing only ConvergenceWarning (optional but useful)
47
+ with warnings.catch_warnings():
48
+ warnings.filterwarnings("ignore", category=ConvergenceWarning)
49
+ model.fit(X, y)
50
+ # model = ElasticNet(**params)
51
+ model.fit(X, y)
52
+ coef = model[-1].coef_
53
+
54
+ elif self.task == "classification":
55
+ # LogisticRegression uses C instead of alpha; keep both if user passes.
56
+ l1_ratio = self.kwargs.pop("l1_ratio", 0.5)
57
+ C = self.kwargs.pop("C", 1.0)
58
+ params = {
59
+ "penalty": "elasticnet",
60
+ "solver": "saga",
61
+ "l1_ratio": l1_ratio,
62
+ "C": C,
63
+ "max_iter": self.kwargs.pop("max_iter", 100_000),
64
+ **self.kwargs,
65
+ }
66
+ model = LogisticRegression(**params)
67
+ model.fit(X, y)
68
+ coef = model.coef_ # shape (n_classes, n_features) or (1, n_features)
69
+ if coef.ndim > 1:
70
+ coef = np.mean(np.abs(coef), axis=0)
71
+ else:
72
+ raise ValueError("Task must be 'classification' or 'regression'.")
73
+
74
+ scores = np.abs(coef) if isinstance(coef, np.ndarray) else np.abs(np.asarray(coef))
75
+ return scores
@@ -0,0 +1,42 @@
1
+ from typing import Any
2
+
3
+ import numpy as np
4
+ from sklearn.feature_selection import f_classif, f_regression
5
+
6
+ from .base_selector import FeatureSelector
7
+
8
+
9
+ class FStatisticSelector(FeatureSelector):
10
+ """Feature selector using F-statistic scores."""
11
+
12
+ name = "FStatistic"
13
+
14
+ def __init__(self, task: str, num_features_to_select: int, **kwargs: Any) -> None:
15
+ """
16
+ Args:
17
+ task: ML task ('classification' or 'regression').
18
+ num_features_to_select: Number of features to select.
19
+ **kwargs: Additional arguments for the scoring function.
20
+ """
21
+ super().__init__(task, num_features_to_select)
22
+ self.kwargs = kwargs
23
+
24
+ def compute_scores(self, X: Any, y: Any) -> np.ndarray:
25
+ """
26
+ Computes F-statistic scores.
27
+
28
+ Args:
29
+ X: Training samples.
30
+ y: Target values.
31
+
32
+ Returns:
33
+ F-statistic scores for each feature.
34
+
35
+ Raises:
36
+ ValueError: If task is not 'classification' or 'regression'.
37
+ """
38
+ score_func = {"classification": f_classif, "regression": f_regression}.get(self.task)
39
+ if score_func is None:
40
+ raise ValueError("Task must be 'classification' or 'regression'.")
41
+ scores, _ = score_func(X, y, **self.kwargs)
42
+ return scores
@@ -0,0 +1,46 @@
1
+ from typing import Any
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sklearn.linear_model import Lasso
6
+
7
+ from .base_selector import FeatureSelector
8
+
9
+
10
+ class LassoSelector(FeatureSelector):
11
+ """Feature selector using Lasso regression."""
12
+
13
+ name = "Lasso"
14
+
15
+ def __init__(self, task: str, num_features_to_select: int, **kwargs: Any) -> None:
16
+ """
17
+ Args:
18
+ task: ML task ('classification' or 'regression').
19
+ num_features_to_select: Number of features to select.
20
+ **kwargs: Additional arguments for Lasso.
21
+ """
22
+ super().__init__(task, num_features_to_select)
23
+ self.kwargs = kwargs
24
+
25
+ def compute_scores(self, X: Any, y: Any) -> np.ndarray:
26
+ """
27
+ Computes feature scores using Lasso regression.
28
+
29
+ Args:
30
+ X: Training samples.
31
+ y: Target values.
32
+
33
+ Returns:
34
+ Feature scores based on absolute Lasso coefficients.
35
+ """
36
+ if isinstance(X, np.ndarray):
37
+ X = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
38
+
39
+ if isinstance(y, np.ndarray) and y.ndim == 2:
40
+ y = y.ravel()
41
+
42
+ # set default alpha to 0.05 if not provided in kwargs
43
+ model = Lasso(alpha=self.kwargs.pop("alpha", 0.05))
44
+ model.fit(X, y)
45
+ scores = np.abs(model.coef_)
46
+ return scores
@@ -0,0 +1,57 @@
1
+ from typing import Any
2
+
3
+ from mrmr import mrmr_classif, mrmr_regression
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ from .base_selector import FeatureSelector
8
+
9
+
10
+ class MRMRSelector(FeatureSelector):
11
+ """Feature selector using Minimum Redundancy Maximum Relevance (MRMR)."""
12
+
13
+ name = "MRMR"
14
+
15
+ def __init__(self, task: str, num_features_to_select: int, **kwargs: Any) -> None:
16
+ """
17
+ Args:
18
+ task: ML task ('classification' or 'regression').
19
+ num_features_to_select: Number of features to select.
20
+ **kwargs: Additional arguments for mRMR functions.
21
+ """
22
+ super().__init__(task, num_features_to_select)
23
+ self.kwargs = kwargs
24
+
25
+ def compute_scores(self, X: Any, y: Any) -> np.ndarray:
26
+ """
27
+ Computes feature scores using the MRMR algorithm.
28
+
29
+ Args:
30
+ X: Training samples.
31
+ y: Target values.
32
+
33
+ Returns:
34
+ MRMR scores for each feature.
35
+ """
36
+ if isinstance(X, np.ndarray):
37
+ X = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
38
+ elif not isinstance(X, pd.DataFrame):
39
+ raise TypeError("X must be a pandas DataFrame or a NumPy array.")
40
+
41
+ if isinstance(y, np.ndarray):
42
+ y = pd.Series(y)
43
+
44
+ score_func = {
45
+ "classification": mrmr_classif,
46
+ "regression": mrmr_regression,
47
+ }.get(self.task)
48
+ if score_func is None:
49
+ raise ValueError("Task must be 'classification' or 'regression'.")
50
+
51
+ _, relevance, redundancy = score_func(X, y, K=self.num_features_to_select, return_scores=True, **self.kwargs)
52
+
53
+ # Compute MRMR scores (Relevance / Mean Redundancy), handling division by zero
54
+ mrmr_scores = relevance / redundancy.mean(axis=1).replace(0, np.nan)
55
+ mrmr_scores = mrmr_scores.fillna(0)
56
+ scores = np.array([mrmr_scores.get(feature, 0) for feature in X.columns])
57
+ return scores
@@ -0,0 +1,45 @@
1
+ from typing import Any
2
+
3
+ import numpy as np
4
+ from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
5
+
6
+ from .base_selector import FeatureSelector
7
+
8
+
9
+ class MutualInfoSelector(FeatureSelector):
10
+ """Feature selector using mutual information scores."""
11
+
12
+ name = "MutualInfo"
13
+
14
+ def __init__(self, task: str, num_features_to_select: int, **kwargs: Any) -> None:
15
+ """
16
+ Args:
17
+ task: ML task ('classification' or 'regression').
18
+ num_features_to_select: Number of features to select.
19
+ **kwargs: Additional arguments for mutual information function.
20
+ """
21
+ super().__init__(task, num_features_to_select)
22
+ self.kwargs = kwargs
23
+
24
+ def compute_scores(self, X: Any, y: Any) -> np.ndarray:
25
+ """
26
+ Computes mutual information scores.
27
+
28
+ Args:
29
+ X: Training samples.
30
+ y: Target values.
31
+
32
+ Returns:
33
+ Mutual information scores for each feature.
34
+
35
+ Raises:
36
+ ValueError: If task is not 'classification' or 'regression'.
37
+ """
38
+ mutual_info_func = {
39
+ "classification": mutual_info_classif,
40
+ "regression": mutual_info_regression,
41
+ }.get(self.task)
42
+ if mutual_info_func is None:
43
+ raise ValueError("Task must be 'classification' or 'regression'.")
44
+ scores = mutual_info_func(X, y)
45
+ return scores
@@ -0,0 +1,48 @@
1
+ from typing import Any
2
+
3
+ import numpy as np
4
+ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
5
+
6
+ from .base_selector import FeatureSelector
7
+
8
+
9
+ class RandomForestSelector(FeatureSelector):
10
+ """Feature selector using RandomForest feature importance."""
11
+
12
+ name = "RandomForest"
13
+
14
+ def __init__(self, task: str, num_features_to_select: int, **kwargs: Any) -> None:
15
+ """
16
+ Args:
17
+ task: ML task ('classification' or 'regression').
18
+ num_features_to_select: Number of features to select.
19
+ **kwargs: Additional arguments for RandomForest model.
20
+ """
21
+ super().__init__(task, num_features_to_select)
22
+ self.kwargs = kwargs
23
+
24
+ def compute_scores(self, X: Any, y: Any) -> np.ndarray:
25
+ """
26
+ Computes feature importances using a RandomForest model.
27
+
28
+ Args:
29
+ X: Training samples.
30
+ y: Target values.
31
+
32
+ Returns:
33
+ Feature importances from the trained RandomForest model.
34
+
35
+ Raises:
36
+ ValueError: If task is not 'classification' or 'regression'.
37
+ """
38
+ model_cls = {
39
+ "classification": RandomForestClassifier,
40
+ "regression": RandomForestRegressor,
41
+ }.get(self.task)
42
+ if model_cls is None:
43
+ raise ValueError("Task must be 'classification' or 'regression'.")
44
+
45
+ model = model_cls()
46
+ model.fit(X, y)
47
+ scores = model.feature_importances_
48
+ return scores
@@ -0,0 +1,50 @@
1
+ from typing import Any
2
+
3
+ import numpy as np
4
+ from sklearn.svm import SVC, SVR
5
+
6
+ from .base_selector import FeatureSelector
7
+
8
+
9
+ class SVMSelector(FeatureSelector):
10
+ """Feature selector using SVM coefficients."""
11
+
12
+ name = "SVM"
13
+
14
+ def __init__(self, task: str, num_features_to_select: int, **kwargs: Any) -> None:
15
+ """
16
+ Args:
17
+ task: ML task ('classification' or 'regression').
18
+ num_features_to_select: Number of features to select.
19
+ **kwargs: Additional arguments for the SVM model.
20
+ """
21
+ super().__init__(task, num_features_to_select)
22
+ self.kwargs = kwargs
23
+
24
+ def compute_scores(self, X: Any, y: Any) -> np.ndarray:
25
+ """
26
+ Computes feature importances using an SVM model.
27
+
28
+ Args:
29
+ X: Training samples.
30
+ y: Target values.
31
+
32
+ Returns:
33
+ Feature importances derived from SVM model coefficients.
34
+
35
+ Raises:
36
+ ValueError: If task is not 'classification' or 'regression'.
37
+ """
38
+ model_cls = {"classification": SVC, "regression": SVR}.get(self.task)
39
+ if model_cls is None:
40
+ raise ValueError("Task must be 'classification' or 'regression'.")
41
+
42
+ # Only remove `random_state` for SVR
43
+ filtered_kwargs = (
44
+ {k: v for k, v in self.kwargs.items() if k != "random_state"} if self.task == "regression" else self.kwargs
45
+ )
46
+
47
+ model = model_cls(kernel="linear", **filtered_kwargs)
48
+ model.fit(X, y)
49
+ scores = np.abs(model.coef_[0])
50
+ return scores
@@ -0,0 +1,16 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from .base_selector import FeatureSelector
5
+
6
+
7
+ class VarianceSelector(FeatureSelector):
8
+ name = "Variance"
9
+
10
+ def __init__(self, task: str, num_features_to_select: int, **kwargs):
11
+ super().__init__(task, num_features_to_select)
12
+
13
+ def compute_scores(self, X, y):
14
+ if isinstance(X, np.ndarray):
15
+ X = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
16
+ return X.var(ddof=0).values # base class will keep the highest variances
@@ -0,0 +1,44 @@
1
+ from typing import Any
2
+
3
+ import numpy as np
4
+ from xgboost import XGBClassifier, XGBRegressor
5
+
6
+ from .base_selector import FeatureSelector
7
+
8
+
9
+ class XGBoostSelector(FeatureSelector):
10
+ """Feature selector using XGBoost feature importance."""
11
+
12
+ name = "XGBoost"
13
+
14
+ def __init__(self, task: str, num_features_to_select: int, **kwargs: Any) -> None:
15
+ """
16
+ Args:
17
+ task: ML task ('classification' or 'regression').
18
+ num_features_to_select: Number of features to select.
19
+ **kwargs: Additional arguments for the XGBoost model.
20
+ """
21
+ super().__init__(task, num_features_to_select)
22
+ self.kwargs = kwargs
23
+
24
+ def compute_scores(self, X: Any, y: Any) -> np.ndarray:
25
+ """
26
+ Computes feature importances using an XGBoost model.
27
+
28
+ Args:
29
+ X: Training samples.
30
+ y: Target values.
31
+
32
+ Returns:
33
+ Feature importances from the trained XGBoost model.
34
+
35
+ Raises:
36
+ ValueError: If task is not 'classification' or 'regression'.
37
+ """
38
+ model_cls = {"classification": XGBClassifier, "regression": XGBRegressor}.get(self.task)
39
+ if model_cls is None:
40
+ raise ValueError("Task must be 'classification' or 'regression'.")
41
+ model = model_cls()
42
+ model.fit(X, y)
43
+ scores = model.feature_importances_
44
+ return scores
@@ -0,0 +1,17 @@
1
+ # merging_strategies/__init__.py
2
+
3
+ from .arithmetic_mean_merger import ArithmeticMeanMerger
4
+ from .base_merger import MergingStrategy
5
+ from .borda_merger import BordaMerger
6
+ from .consensus_merger import ConsensusMerger
7
+ from .l2_norm_merger import L2NormMerger
8
+ from .union_of_intersections_merger import UnionOfIntersectionsMerger
9
+
10
+ __all__ = [
11
+ "MergingStrategy",
12
+ "BordaMerger",
13
+ "UnionOfIntersectionsMerger",
14
+ "ArithmeticMeanMerger",
15
+ "L2NormMerger",
16
+ "ConsensusMerger",
17
+ ]
@@ -0,0 +1,46 @@
1
+ import numpy as np
2
+
3
+ from .base_merger import MergingStrategy
4
+
5
+
6
+ class ArithmeticMeanMerger(MergingStrategy):
7
+ """Rank-based merging using the arithmetic mean of scores."""
8
+
9
+ name = "ArithmeticMean"
10
+
11
+ def __init__(self, **kwargs) -> None:
12
+ # Keep taxonomy consistent with existing mergers
13
+ super().__init__("rank-based")
14
+ self.kwargs = kwargs
15
+
16
+ def merge(
17
+ self,
18
+ subsets: list,
19
+ num_features_to_select: int,
20
+ **kwargs,
21
+ ) -> list:
22
+ """Return the top‑k feature names after arithmetic-mean aggregation.
23
+
24
+ Args:
25
+ subsets: Feature lists (one list per selector).
26
+ num_features_to_select: Number of names to return.
27
+
28
+ Returns:
29
+ Feature names sorted by mean score.
30
+ """
31
+ self._validate_input(subsets)
32
+
33
+ # Shortcut if only one selector supplied
34
+ if len(subsets) == 1:
35
+ return [f.name for f in subsets[0]][:num_features_to_select]
36
+
37
+ feature_names = [f.name for f in subsets[0]]
38
+ # shape: (n_features, n_selectors)
39
+ scores = np.array([[f.score for f in s] for s in subsets]).T
40
+
41
+ # Arithmetic mean across selectors
42
+ scores_merged = scores.mean(axis=1)
43
+
44
+ # Lower score ⇒ higher rank (same convention as Borda)
45
+ sorted_names = [feature_names[i] for i in np.argsort(-scores_merged, kind="stable")]
46
+ return sorted_names[:num_features_to_select]
@@ -0,0 +1,64 @@
1
+ from ..core.feature import Feature
2
+
3
+
4
+ class MergingStrategy:
5
+ """Abstract base for merging strategies.
6
+
7
+ Strategies can be "set-based" or "rank-based" depending on how they merge
8
+ the per-selector outputs.
9
+ """
10
+
11
+ def __init__(self, strategy_type: str) -> None:
12
+ """Initialize the strategy.
13
+
14
+ Args:
15
+ strategy_type: Either "set-based" or "rank-based".
16
+ """
17
+ self.strategy_type = strategy_type
18
+
19
+ def merge(self, data: list, num_features_to_select: int, **kwargs) -> list:
20
+ """Merge input data according to the strategy.
21
+
22
+ Subclasses must implement this method.
23
+
24
+ Args:
25
+ data: List of Feature lists (one list per selector) or a single list.
26
+ num_features_to_select: Number of top features to return.
27
+ **kwargs: Strategy-specific options.
28
+
29
+ Returns:
30
+ A list of merged features (or names depending on strategy).
31
+
32
+ Raises:
33
+ NotImplementedError: If not implemented in a subclass.
34
+ """
35
+ raise NotImplementedError("Subclasses must implement this method")
36
+
37
+ def is_set_based(self) -> bool:
38
+ """Return True if the strategy is set-based."""
39
+ return self.strategy_type == "set-based"
40
+
41
+ def is_rank_based(self) -> bool:
42
+ """Return True if the strategy is rank-based."""
43
+ return self.strategy_type == "rank-based"
44
+
45
+ def _validate_input(self, subsets: list) -> None:
46
+ """Validate that ``subsets`` contains Feature objects.
47
+
48
+ Args:
49
+ subsets: A list of Feature or a list of Feature lists.
50
+
51
+ Raises:
52
+ ValueError: If empty or containing invalid types.
53
+ """
54
+ if not subsets:
55
+ raise ValueError("Subsets cannot be empty.")
56
+
57
+ if isinstance(subsets[0], list): # List of lists case
58
+ if not all(isinstance(sub, list) and sub for sub in subsets):
59
+ raise ValueError("Subsets cannot contain empty lists.")
60
+ if not all(isinstance(feature, Feature) for sub in subsets for feature in sub):
61
+ raise ValueError("Subsets must contain Feature objects.")
62
+ else: # Single list case
63
+ if not all(isinstance(feature, Feature) for feature in subsets):
64
+ raise ValueError("Subsets must contain Feature objects.")
@@ -0,0 +1,46 @@
1
+ import numpy as np
2
+ from ranky import borda
3
+
4
+ from .base_merger import MergingStrategy
5
+
6
+
7
+ class BordaMerger(MergingStrategy):
8
+ """Rank-based merging using the Borda count method."""
9
+
10
+ name = "Borda"
11
+
12
+ def __init__(self, **kwargs) -> None:
13
+ """Initialize a rank-based merger.
14
+
15
+ Args:
16
+ **kwargs: Forwarded to the Borda routine (if applicable).
17
+ """
18
+ super().__init__("rank-based")
19
+ self.kwargs = kwargs
20
+
21
+ def merge(self, subsets: list, num_features_to_select: int, **kwargs) -> list:
22
+ """Merge by Borda and return top-k names.
23
+
24
+ Args:
25
+ subsets: Feature lists (one list per selector).
26
+ num_features_to_select: Number of names to return.
27
+
28
+ Returns:
29
+ Feature names sorted by merged Borda scores.
30
+ """
31
+ self._validate_input(subsets)
32
+
33
+ if len(subsets) == 1:
34
+ return [feature.name for feature in subsets[0]][:num_features_to_select]
35
+
36
+ # Extract feature names (from the first subset) and scores
37
+ feature_names = [feature.name for feature in subsets[0]]
38
+ scores = np.array([[feature.score for feature in subset] for subset in subsets]).T
39
+
40
+ # Apply Borda count method
41
+ scores_merged = borda(m=scores, **self.kwargs)
42
+
43
+ # Sort based on Borda scores (lower score = higher rank)
44
+ sorted_names = [feature_names[i] for i in np.argsort(scores_merged, kind="stable")]
45
+
46
+ return list(sorted_names[:num_features_to_select])