featcopilot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,161 @@
1
+ """Model-based feature importance selection."""
2
+
3
+ from typing import Optional, Union
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from featcopilot.core.base import BaseSelector
9
+
10
+
11
+ class ImportanceSelector(BaseSelector):
12
+ """
13
+ Feature selector based on model importance scores.
14
+
15
+ Uses tree-based models to evaluate feature importance.
16
+
17
+ Parameters
18
+ ----------
19
+ model : str, default='random_forest'
20
+ Model to use ('random_forest', 'gradient_boosting', 'xgboost')
21
+ max_features : int, optional
22
+ Maximum features to select
23
+ threshold : float, optional
24
+ Minimum importance threshold
25
+
26
+ Examples
27
+ --------
28
+ >>> selector = ImportanceSelector(model='random_forest', max_features=50)
29
+ >>> X_selected = selector.fit_transform(X, y)
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ model: str = "random_forest",
35
+ max_features: Optional[int] = None,
36
+ threshold: Optional[float] = None,
37
+ n_estimators: int = 100,
38
+ verbose: bool = False,
39
+ **kwargs,
40
+ ):
41
+ super().__init__(**kwargs)
42
+ self.model_type = model
43
+ self.max_features = max_features
44
+ self.threshold = threshold
45
+ self.n_estimators = n_estimators
46
+ self.verbose = verbose
47
+ self._model = None
48
+
49
+ def fit(
50
+ self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray], **kwargs
51
+ ) -> "ImportanceSelector":
52
+ """
53
+ Fit selector using a tree model.
54
+
55
+ Parameters
56
+ ----------
57
+ X : DataFrame or ndarray
58
+ Input features
59
+ y : Series or ndarray
60
+ Target variable
61
+
62
+ Returns
63
+ -------
64
+ self : ImportanceSelector
65
+ """
66
+ X = self._validate_input(X)
67
+ y = np.array(y)
68
+
69
+ # Determine task type
70
+ unique_y = len(np.unique(y))
71
+ is_classification = unique_y < 20 and not np.issubdtype(y.dtype, np.floating)
72
+
73
+ # Create model
74
+ self._model = self._create_model(is_classification)
75
+
76
+ # Fit model
77
+ X_array = X.fillna(0).values
78
+ self._model.fit(X_array, y)
79
+
80
+ # Get importances
81
+ importances = self._model.feature_importances_
82
+ self._feature_scores = dict(zip(X.columns, importances))
83
+
84
+ # Select features
85
+ self._select_features()
86
+
87
+ self._is_fitted = True
88
+ return self
89
+
90
+ def _create_model(self, is_classification: bool):
91
+ """Create the appropriate model."""
92
+ if self.model_type == "random_forest":
93
+ if is_classification:
94
+ from sklearn.ensemble import RandomForestClassifier
95
+
96
+ return RandomForestClassifier(n_estimators=self.n_estimators, random_state=42, n_jobs=-1)
97
+ else:
98
+ from sklearn.ensemble import RandomForestRegressor
99
+
100
+ return RandomForestRegressor(n_estimators=self.n_estimators, random_state=42, n_jobs=-1)
101
+
102
+ elif self.model_type == "gradient_boosting":
103
+ if is_classification:
104
+ from sklearn.ensemble import GradientBoostingClassifier
105
+
106
+ return GradientBoostingClassifier(n_estimators=self.n_estimators, random_state=42)
107
+ else:
108
+ from sklearn.ensemble import GradientBoostingRegressor
109
+
110
+ return GradientBoostingRegressor(n_estimators=self.n_estimators, random_state=42)
111
+
112
+ elif self.model_type == "xgboost":
113
+ try:
114
+ import xgboost as xgb
115
+
116
+ if is_classification:
117
+ return xgb.XGBClassifier(n_estimators=self.n_estimators, random_state=42, n_jobs=-1)
118
+ else:
119
+ return xgb.XGBRegressor(n_estimators=self.n_estimators, random_state=42, n_jobs=-1)
120
+ except ImportError:
121
+ if self.verbose:
122
+ print("XGBoost not available, falling back to RandomForest")
123
+ return self._create_model_fallback(is_classification)
124
+
125
+ else:
126
+ raise ValueError(f"Unknown model type: {self.model_type}")
127
+
128
+ def _create_model_fallback(self, is_classification: bool):
129
+ """Fallback to RandomForest."""
130
+ if is_classification:
131
+ from sklearn.ensemble import RandomForestClassifier
132
+
133
+ return RandomForestClassifier(n_estimators=self.n_estimators, random_state=42)
134
+ else:
135
+ from sklearn.ensemble import RandomForestRegressor
136
+
137
+ return RandomForestRegressor(n_estimators=self.n_estimators, random_state=42)
138
+
139
+ def _select_features(self) -> None:
140
+ """Select features based on importance."""
141
+ sorted_features = sorted(self._feature_scores.items(), key=lambda x: x[1], reverse=True)
142
+
143
+ if self.threshold is not None:
144
+ sorted_features = [(name, score) for name, score in sorted_features if score >= self.threshold]
145
+
146
+ if self.max_features is not None:
147
+ sorted_features = sorted_features[: self.max_features]
148
+
149
+ self._selected_features = [name for name, _ in sorted_features]
150
+
151
+ if self.verbose:
152
+ print(f"ImportanceSelector: Selected {len(self._selected_features)} features")
153
+
154
+ def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
155
+ """Select features from data."""
156
+ if not self._is_fitted:
157
+ raise RuntimeError("Selector must be fitted before transform")
158
+
159
+ X = self._validate_input(X)
160
+ available = [f for f in self._selected_features if f in X.columns]
161
+ return X[available]
@@ -0,0 +1,156 @@
1
+ """Redundancy elimination through correlation analysis."""
2
+
3
+ from typing import Optional, Union
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from featcopilot.core.base import BaseSelector
9
+
10
+
11
+ class RedundancyEliminator(BaseSelector):
12
+ """
13
+ Eliminate redundant features based on correlation.
14
+
15
+ Removes highly correlated features, keeping the one with
16
+ higher importance (if provided) or the first one.
17
+
18
+ Parameters
19
+ ----------
20
+ correlation_threshold : float, default=0.95
21
+ Correlation threshold for redundancy
22
+ method : str, default='pearson'
23
+ Correlation method ('pearson', 'spearman', 'kendall')
24
+
25
+ Examples
26
+ --------
27
+ >>> eliminator = RedundancyEliminator(correlation_threshold=0.95)
28
+ >>> X_reduced = eliminator.fit_transform(X, y)
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ correlation_threshold: float = 0.95,
34
+ method: str = "pearson",
35
+ importance_scores: Optional[dict[str, float]] = None,
36
+ verbose: bool = False,
37
+ **kwargs,
38
+ ):
39
+ super().__init__(**kwargs)
40
+ self.correlation_threshold = correlation_threshold
41
+ self.method = method
42
+ self.importance_scores = importance_scores or {}
43
+ self.verbose = verbose
44
+ self._correlation_matrix: Optional[pd.DataFrame] = None
45
+
46
+ def fit_transform(
47
+ self,
48
+ X: Union[pd.DataFrame, np.ndarray],
49
+ y: Optional[Union[pd.Series, np.ndarray]] = None,
50
+ **kwargs,
51
+ ) -> pd.DataFrame:
52
+ """Fit and transform in one step (y is optional for this selector)."""
53
+ return self.fit(X, y, **kwargs).transform(X, **kwargs)
54
+
55
+ def fit(
56
+ self,
57
+ X: Union[pd.DataFrame, np.ndarray],
58
+ y: Optional[Union[pd.Series, np.ndarray]] = None,
59
+ importance_scores: Optional[dict[str, float]] = None,
60
+ **kwargs,
61
+ ) -> "RedundancyEliminator":
62
+ """
63
+ Fit eliminator by computing correlations.
64
+
65
+ Parameters
66
+ ----------
67
+ X : DataFrame or ndarray
68
+ Input features
69
+ y : Series or ndarray, optional
70
+ Target variable (unused)
71
+ importance_scores : dict, optional
72
+ Pre-computed importance scores
73
+
74
+ Returns
75
+ -------
76
+ self : RedundancyEliminator
77
+ """
78
+ X = self._validate_input(X)
79
+
80
+ if importance_scores:
81
+ self.importance_scores = importance_scores
82
+
83
+ # Compute correlation matrix
84
+ numeric_cols = X.select_dtypes(include=[np.number]).columns
85
+ self._correlation_matrix = X[numeric_cols].corr(method=self.method)
86
+
87
+ # Find redundant features
88
+ self._find_redundant_features(numeric_cols)
89
+
90
+ self._is_fitted = True
91
+ return self
92
+
93
+ def _find_redundant_features(self, columns: list[str]) -> None:
94
+ """Identify and mark redundant features for removal."""
95
+ to_remove: set[str] = set()
96
+ checked_pairs: set[tuple] = set()
97
+
98
+ for i, col1 in enumerate(columns):
99
+ if col1 in to_remove:
100
+ continue
101
+
102
+ for col2 in columns[i + 1 :]:
103
+ if col2 in to_remove:
104
+ continue
105
+
106
+ pair = tuple(sorted([col1, col2]))
107
+ if pair in checked_pairs:
108
+ continue
109
+ checked_pairs.add(pair)
110
+
111
+ # Get correlation
112
+ corr = abs(self._correlation_matrix.loc[col1, col2])
113
+
114
+ if corr >= self.correlation_threshold:
115
+ # Decide which to remove based on importance
116
+ imp1 = self.importance_scores.get(col1, 0)
117
+ imp2 = self.importance_scores.get(col2, 0)
118
+
119
+ if imp1 >= imp2:
120
+ to_remove.add(col2)
121
+ if self.verbose:
122
+ print(f"Removing {col2} (corr={corr:.3f} with {col1})")
123
+ else:
124
+ to_remove.add(col1)
125
+ if self.verbose:
126
+ print(f"Removing {col1} (corr={corr:.3f} with {col2})")
127
+ break # col1 is removed, move to next
128
+
129
+ # Selected features are those not removed
130
+ self._selected_features = [c for c in columns if c not in to_remove]
131
+ self._removed_features = list(to_remove)
132
+
133
+ if self.verbose:
134
+ print(f"RedundancyEliminator: Removed {len(to_remove)} redundant features")
135
+
136
+ def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
137
+ """Remove redundant features."""
138
+ if not self._is_fitted:
139
+ raise RuntimeError("Eliminator must be fitted before transform")
140
+
141
+ X = self._validate_input(X)
142
+
143
+ # Keep selected features plus any non-numeric columns
144
+ non_numeric = X.select_dtypes(exclude=[np.number]).columns.tolist()
145
+ keep_cols = [c for c in self._selected_features if c in X.columns]
146
+ keep_cols.extend([c for c in non_numeric if c not in keep_cols])
147
+
148
+ return X[keep_cols]
149
+
150
+ def get_removed_features(self) -> list[str]:
151
+ """Get list of removed redundant features."""
152
+ return getattr(self, "_removed_features", [])
153
+
154
+ def get_correlation_matrix(self) -> Optional[pd.DataFrame]:
155
+ """Get the computed correlation matrix."""
156
+ return self._correlation_matrix
@@ -0,0 +1,199 @@
1
+ """Statistical feature selection methods."""
2
+
3
+ from typing import Optional, Union
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from featcopilot.core.base import BaseSelector
9
+
10
+
11
+ class StatisticalSelector(BaseSelector):
12
+ """
13
+ Feature selector based on statistical tests.
14
+
15
+ Uses statistical tests to evaluate feature relevance:
16
+ - Mutual information
17
+ - Chi-square test (categorical)
18
+ - F-test (ANOVA)
19
+ - Correlation with target
20
+
21
+ Parameters
22
+ ----------
23
+ method : str, default='mutual_info'
24
+ Selection method ('mutual_info', 'f_test', 'chi2', 'correlation')
25
+ max_features : int, optional
26
+ Maximum features to select
27
+ threshold : float, optional
28
+ Minimum score threshold
29
+
30
+ Examples
31
+ --------
32
+ >>> selector = StatisticalSelector(method='mutual_info', max_features=50)
33
+ >>> X_selected = selector.fit_transform(X, y)
34
+ """
35
+
36
+ METHODS = ["mutual_info", "f_test", "chi2", "correlation"]
37
+
38
+ def __init__(
39
+ self,
40
+ method: str = "mutual_info",
41
+ max_features: Optional[int] = None,
42
+ threshold: Optional[float] = None,
43
+ verbose: bool = False,
44
+ **kwargs,
45
+ ):
46
+ super().__init__(**kwargs)
47
+ if method not in self.METHODS:
48
+ raise ValueError(f"Method must be one of {self.METHODS}")
49
+
50
+ self.method = method
51
+ self.max_features = max_features
52
+ self.threshold = threshold
53
+ self.verbose = verbose
54
+
55
+ def fit(
56
+ self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray], **kwargs
57
+ ) -> "StatisticalSelector":
58
+ """
59
+ Fit selector to compute feature scores.
60
+
61
+ Parameters
62
+ ----------
63
+ X : DataFrame or ndarray
64
+ Input features
65
+ y : Series or ndarray
66
+ Target variable
67
+
68
+ Returns
69
+ -------
70
+ self : StatisticalSelector
71
+ """
72
+ X = self._validate_input(X)
73
+ y = np.array(y)
74
+
75
+ # Compute scores based on method
76
+ if self.method == "mutual_info":
77
+ scores = self._compute_mutual_info(X, y)
78
+ elif self.method == "f_test":
79
+ scores = self._compute_f_test(X, y)
80
+ elif self.method == "chi2":
81
+ scores = self._compute_chi2(X, y)
82
+ elif self.method == "correlation":
83
+ scores = self._compute_correlation(X, y)
84
+ else:
85
+ raise ValueError(f"Unknown method: {self.method}")
86
+
87
+ self._feature_scores = dict(zip(X.columns, scores))
88
+
89
+ # Select features
90
+ self._select_features()
91
+
92
+ self._is_fitted = True
93
+ return self
94
+
95
+ def _compute_mutual_info(self, X: pd.DataFrame, y: np.ndarray) -> np.ndarray:
96
+ """Compute mutual information scores."""
97
+ from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
98
+
99
+ # Determine if classification or regression
100
+ unique_y = len(np.unique(y))
101
+ is_classification = unique_y < 20 and y.dtype in [np.int32, np.int64, "object"]
102
+
103
+ X_array = X.fillna(0).values
104
+
105
+ if is_classification:
106
+ scores = mutual_info_classif(X_array, y, random_state=42)
107
+ else:
108
+ scores = mutual_info_regression(X_array, y, random_state=42)
109
+
110
+ return scores
111
+
112
+ def _compute_f_test(self, X: pd.DataFrame, y: np.ndarray) -> np.ndarray:
113
+ """Compute F-test scores."""
114
+ from sklearn.feature_selection import f_classif, f_regression
115
+
116
+ unique_y = len(np.unique(y))
117
+ is_classification = unique_y < 20
118
+
119
+ X_array = X.fillna(0).values
120
+
121
+ if is_classification:
122
+ scores, _ = f_classif(X_array, y)
123
+ else:
124
+ scores, _ = f_regression(X_array, y)
125
+
126
+ # Handle NaN scores
127
+ scores = np.nan_to_num(scores, 0)
128
+ return scores
129
+
130
+ def _compute_chi2(self, X: pd.DataFrame, y: np.ndarray) -> np.ndarray:
131
+ """Compute chi-square scores (for non-negative features)."""
132
+ from sklearn.feature_selection import chi2
133
+
134
+ X_array = X.fillna(0).values
135
+
136
+ # Chi2 requires non-negative values
137
+ X_positive = X_array - X_array.min(axis=0) + 1e-8
138
+
139
+ try:
140
+ scores, _ = chi2(X_positive, y)
141
+ scores = np.nan_to_num(scores, 0)
142
+ except Exception:
143
+ # Fallback to mutual information
144
+ scores = self._compute_mutual_info(X, y)
145
+
146
+ return scores
147
+
148
+ def _compute_correlation(self, X: pd.DataFrame, y: np.ndarray) -> np.ndarray:
149
+ """Compute absolute correlation with target."""
150
+ scores = []
151
+ for col in X.columns:
152
+ try:
153
+ corr = np.abs(np.corrcoef(X[col].fillna(0).values, y)[0, 1])
154
+ scores.append(corr if not np.isnan(corr) else 0)
155
+ except Exception:
156
+ scores.append(0)
157
+
158
+ return np.array(scores)
159
+
160
+ def _select_features(self) -> None:
161
+ """Select features based on scores."""
162
+ # Sort features by score
163
+ sorted_features = sorted(self._feature_scores.items(), key=lambda x: x[1], reverse=True)
164
+
165
+ # Apply threshold
166
+ if self.threshold is not None:
167
+ sorted_features = [(name, score) for name, score in sorted_features if score >= self.threshold]
168
+
169
+ # Apply max_features limit
170
+ if self.max_features is not None:
171
+ sorted_features = sorted_features[: self.max_features]
172
+
173
+ self._selected_features = [name for name, _ in sorted_features]
174
+
175
+ if self.verbose:
176
+ print(f"StatisticalSelector: Selected {len(self._selected_features)} features")
177
+
178
+ def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
179
+ """
180
+ Select features from data.
181
+
182
+ Parameters
183
+ ----------
184
+ X : DataFrame or ndarray
185
+ Input features
186
+
187
+ Returns
188
+ -------
189
+ X_selected : DataFrame
190
+ Data with only selected features
191
+ """
192
+ if not self._is_fitted:
193
+ raise RuntimeError("Selector must be fitted before transform")
194
+
195
+ X = self._validate_input(X)
196
+
197
+ # Keep only selected features that exist in X
198
+ available = [f for f in self._selected_features if f in X.columns]
199
+ return X[available]
@@ -0,0 +1,172 @@
1
+ """Unified feature selector combining multiple methods."""
2
+
3
+ from typing import Optional, Union
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from featcopilot.core.base import BaseSelector
9
+ from featcopilot.selection.importance import ImportanceSelector
10
+ from featcopilot.selection.redundancy import RedundancyEliminator
11
+ from featcopilot.selection.statistical import StatisticalSelector
12
+
13
+
14
+ class FeatureSelector(BaseSelector):
15
+ """
16
+ Unified feature selector combining multiple selection methods.
17
+
18
+ Combines statistical tests, model importance, and redundancy
19
+ elimination for comprehensive feature selection.
20
+
21
+ Parameters
22
+ ----------
23
+ methods : list, default=['mutual_info', 'importance']
24
+ Selection methods to use
25
+ max_features : int, optional
26
+ Maximum features to select
27
+ correlation_threshold : float, default=0.95
28
+ Threshold for redundancy elimination
29
+
30
+ Examples
31
+ --------
32
+ >>> selector = FeatureSelector(
33
+ ... methods=['mutual_info', 'importance', 'correlation'],
34
+ ... max_features=50,
35
+ ... correlation_threshold=0.95
36
+ ... )
37
+ >>> X_selected = selector.fit_transform(X, y)
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ methods: Optional[list[str]] = None,
43
+ max_features: Optional[int] = None,
44
+ correlation_threshold: float = 0.95,
45
+ combination: str = "union",
46
+ verbose: bool = False,
47
+ **kwargs,
48
+ ):
49
+ super().__init__(**kwargs)
50
+ self.methods = methods or ["mutual_info", "importance"]
51
+ self.max_features = max_features
52
+ self.correlation_threshold = correlation_threshold
53
+ self.combination = combination # 'union' or 'intersection'
54
+ self.verbose = verbose
55
+ self._selectors: dict[str, BaseSelector] = {}
56
+ self._method_scores: dict[str, dict[str, float]] = {}
57
+
58
+ def fit(self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray], **kwargs) -> "FeatureSelector":
59
+ """
60
+ Fit all selection methods.
61
+
62
+ Parameters
63
+ ----------
64
+ X : DataFrame or ndarray
65
+ Input features
66
+ y : Series or ndarray
67
+ Target variable
68
+
69
+ Returns
70
+ -------
71
+ self : FeatureSelector
72
+ """
73
+ X = self._validate_input(X)
74
+ y = np.array(y)
75
+
76
+ # Initialize and fit each selector
77
+ for method in self.methods:
78
+ selector = self._create_selector(method)
79
+ selector.fit(X, y)
80
+ self._selectors[method] = selector
81
+ self._method_scores[method] = selector.get_feature_scores()
82
+
83
+ # Combine scores from all methods
84
+ self._combine_scores(X.columns.tolist())
85
+
86
+ # Apply redundancy elimination
87
+ if self.correlation_threshold < 1.0:
88
+ eliminator = RedundancyEliminator(
89
+ correlation_threshold=self.correlation_threshold,
90
+ importance_scores=self._feature_scores,
91
+ verbose=self.verbose,
92
+ )
93
+ eliminator.fit(X)
94
+ non_redundant = set(eliminator.get_selected_features())
95
+ self._feature_scores = {k: v for k, v in self._feature_scores.items() if k in non_redundant}
96
+
97
+ # Final selection
98
+ self._final_selection()
99
+
100
+ self._is_fitted = True
101
+ return self
102
+
103
+ def _create_selector(self, method: str) -> BaseSelector:
104
+ """Create selector for a given method."""
105
+ if method == "mutual_info":
106
+ return StatisticalSelector(method="mutual_info", verbose=self.verbose)
107
+ elif method == "f_test":
108
+ return StatisticalSelector(method="f_test", verbose=self.verbose)
109
+ elif method == "chi2":
110
+ return StatisticalSelector(method="chi2", verbose=self.verbose)
111
+ elif method == "correlation":
112
+ return StatisticalSelector(method="correlation", verbose=self.verbose)
113
+ elif method == "importance":
114
+ return ImportanceSelector(model="random_forest", verbose=self.verbose)
115
+ elif method == "xgboost":
116
+ return ImportanceSelector(model="xgboost", verbose=self.verbose)
117
+ else:
118
+ raise ValueError(f"Unknown selection method: {method}")
119
+
120
+ def _combine_scores(self, columns: list[str]) -> None:
121
+ """Combine scores from multiple methods."""
122
+ combined = {}
123
+
124
+ for col in columns:
125
+ scores = []
126
+ for _, method_scores in self._method_scores.items():
127
+ if col in method_scores:
128
+ # Normalize score to 0-1 range
129
+ all_scores = list(method_scores.values())
130
+ max_score = max(all_scores) if all_scores else 1
131
+ if max_score > 0:
132
+ normalized = method_scores[col] / max_score
133
+ else:
134
+ normalized = 0
135
+ scores.append(normalized)
136
+
137
+ # Average normalized scores
138
+ if scores:
139
+ combined[col] = np.mean(scores)
140
+ else:
141
+ combined[col] = 0
142
+
143
+ self._feature_scores = combined
144
+
145
+ def _final_selection(self) -> None:
146
+ """Make final feature selection."""
147
+ sorted_features = sorted(self._feature_scores.items(), key=lambda x: x[1], reverse=True)
148
+
149
+ if self.max_features is not None:
150
+ sorted_features = sorted_features[: self.max_features]
151
+
152
+ self._selected_features = [name for name, _ in sorted_features]
153
+
154
+ if self.verbose:
155
+ print(f"FeatureSelector: Selected {len(self._selected_features)} features")
156
+
157
+ def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
158
+ """Select features from data."""
159
+ if not self._is_fitted:
160
+ raise RuntimeError("Selector must be fitted before transform")
161
+
162
+ X = self._validate_input(X)
163
+ available = [f for f in self._selected_features if f in X.columns]
164
+ return X[available]
165
+
166
+ def get_method_scores(self) -> dict[str, dict[str, float]]:
167
+ """Get scores from each individual method."""
168
+ return self._method_scores
169
+
170
+ def get_ranking(self) -> list[tuple]:
171
+ """Get feature ranking as list of (name, score) tuples."""
172
+ return sorted(self._feature_scores.items(), key=lambda x: x[1], reverse=True)
@@ -0,0 +1,11 @@
1
+ """Scikit-learn compatible transformers."""
2
+
3
+ from featcopilot.transformers.sklearn_compat import (
4
+ AutoFeatureEngineer,
5
+ FeatureEngineerTransformer,
6
+ )
7
+
8
+ __all__ = [
9
+ "AutoFeatureEngineer",
10
+ "FeatureEngineerTransformer",
11
+ ]