sdufseval 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sdufseval/fseval.py
CHANGED
|
@@ -17,16 +17,16 @@ class FSEVAL:
|
|
|
17
17
|
unsupervised_iter=10,
|
|
18
18
|
eval_type="both",
|
|
19
19
|
metrics=None,
|
|
20
|
-
experiments=None
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
"""
|
|
20
|
+
experiments=None,
|
|
21
|
+
save_all=False):
|
|
22
|
+
|
|
24
23
|
self.output_dir = output_dir
|
|
25
24
|
self.cv = cv
|
|
26
25
|
self.avg_steps = avg_steps
|
|
27
26
|
self.supervised_iter = supervised_iter
|
|
28
27
|
self.unsupervised_iter = unsupervised_iter
|
|
29
28
|
self.eval_type = eval_type
|
|
29
|
+
self.save_all = save_all
|
|
30
30
|
|
|
31
31
|
# Metric configuration
|
|
32
32
|
all_metrics = ["CLSACC", "NMI", "ACC", "AUC"]
|
|
@@ -44,21 +44,11 @@ class FSEVAL:
|
|
|
44
44
|
os.makedirs(self.output_dir)
|
|
45
45
|
|
|
46
46
|
def random_baseline(self, X, **kwargs):
|
|
47
|
-
"""
|
|
48
|
-
Randomly assigns importance scores to features.
|
|
49
|
-
Internal method for lower-bound baseline.
|
|
50
|
-
"""
|
|
47
|
+
"""Randomly assigns importance scores to features."""
|
|
51
48
|
return np.random.rand(X.shape[1])
|
|
52
49
|
|
|
53
50
|
def run(self, datasets, methods, classifier=None):
|
|
54
|
-
"""
|
|
55
|
-
Executes the benchmark for given datasets and FS methods.
|
|
56
|
-
|
|
57
|
-
Args:
|
|
58
|
-
datasets: List of dataset names.
|
|
59
|
-
methods: List of dicts {'name': str, 'func': callable, 'stochastic': bool}.
|
|
60
|
-
classifier: Optional sklearn classifier instance to pass to supervised_eval.
|
|
61
|
-
"""
|
|
51
|
+
"""Executes the benchmark for given datasets and FS methods."""
|
|
62
52
|
warnings.filterwarnings("ignore")
|
|
63
53
|
for ds_name in datasets:
|
|
64
54
|
print(f"\n>>> Benchmarking Dataset: {ds_name}")
|
|
@@ -71,7 +61,6 @@ class FSEVAL:
|
|
|
71
61
|
for m_info in methods:
|
|
72
62
|
name = m_info['name']
|
|
73
63
|
fs_func = m_info['func']
|
|
74
|
-
# Stochastic methods run 10 times and average
|
|
75
64
|
repeats = self.avg_steps if m_info.get('stochastic', False) else 1
|
|
76
65
|
|
|
77
66
|
# Internal storage for current dataset results
|
|
@@ -80,7 +69,6 @@ class FSEVAL:
|
|
|
80
69
|
for r in range(repeats):
|
|
81
70
|
print(f" [{name}] Progress: {r+1}/{repeats}")
|
|
82
71
|
|
|
83
|
-
# Get feature ranking
|
|
84
72
|
scores = fs_func(X)
|
|
85
73
|
indices = np.argsort(scores)[::-1]
|
|
86
74
|
|
|
@@ -91,17 +79,14 @@ class FSEVAL:
|
|
|
91
79
|
k = max(1, min(math.ceil(p * n_features), n_features))
|
|
92
80
|
X_subset = X[:, indices[:k]]
|
|
93
81
|
|
|
94
|
-
# Run evaluators
|
|
95
82
|
c_acc, nmi, acc, auc = np.nan, np.nan, np.nan, np.nan
|
|
96
83
|
|
|
97
84
|
if self.eval_type in ["unsupervised", "both"]:
|
|
98
85
|
c_acc, nmi = unsupervised_eval(X_subset, y, avg_steps=self.unsupervised_iter)
|
|
99
86
|
|
|
100
87
|
if self.eval_type in ["supervised", "both"]:
|
|
101
|
-
# Passes classifier (None or instance) to eval.py
|
|
102
88
|
acc, auc = supervised_eval(X_subset, y, classifier=classifier, cv=self.cv, avg_steps=self.supervised_iter)
|
|
103
89
|
|
|
104
|
-
# Map metrics to columns
|
|
105
90
|
mapping = {"CLSACC": c_acc, "NMI": nmi, "ACC": acc, "AUC": auc}
|
|
106
91
|
for met in self.selected_metrics:
|
|
107
92
|
row[met][p] = mapping[met]
|
|
@@ -109,21 +94,34 @@ class FSEVAL:
|
|
|
109
94
|
for met in self.selected_metrics:
|
|
110
95
|
ds_results[scale_name][met].append(row[met])
|
|
111
96
|
|
|
112
|
-
# Save/Update results for this method/dataset
|
|
113
97
|
self._save_results(name, ds_results)
|
|
114
98
|
|
|
115
|
-
|
|
99
|
+
def _save_results(self, method_name, ds_results):
|
|
100
|
+
for scale, metrics in ds_results.items():
|
|
101
|
+
for met_name, rows in metrics.items():
|
|
102
|
+
df_new = pd.DataFrame(rows)
|
|
103
|
+
|
|
104
|
+
if not self.save_all:
|
|
105
|
+
df_new = df_new.groupby('Dataset').mean().reset_index()
|
|
106
|
+
|
|
107
|
+
df_new.columns = df_new.columns.astype(str)
|
|
108
|
+
|
|
109
|
+
fname = os.path.join(self.output_dir, f"{method_name}_{met_name}_{scale}.csv")
|
|
110
|
+
|
|
111
|
+
if os.path.exists(fname):
|
|
112
|
+
df_old = pd.read_csv(fname)
|
|
113
|
+
df_old.columns = df_old.columns.astype(str)
|
|
114
|
+
|
|
115
|
+
if self.save_all:
|
|
116
|
+
df_final = pd.concat([df_old, df_new], ignore_index=True)
|
|
117
|
+
else:
|
|
118
|
+
df_final = pd.concat([df_old, df_new]).drop_duplicates(subset=['Dataset'], keep='last')
|
|
119
|
+
else:
|
|
120
|
+
df_final = df_new
|
|
121
|
+
|
|
122
|
+
df_final.to_csv(fname, index=False)
|
|
123
|
+
|
|
116
124
|
def timer(self, methods, vary_param='both', time_limit=3600):
|
|
117
|
-
"""
|
|
118
|
-
Runs a standalone runtime analysis experiment with a time cap.
|
|
119
|
-
|
|
120
|
-
Args:
|
|
121
|
-
methods: List of dicts {'name': str, 'func': callable}.
|
|
122
|
-
vary_param: 'features', 'instances', or 'both'.
|
|
123
|
-
time_limit: Max seconds per method before it is skipped.
|
|
124
|
-
"""
|
|
125
|
-
|
|
126
|
-
# Determine which experiments to run
|
|
127
125
|
experiments = []
|
|
128
126
|
if vary_param in ['features', 'both']:
|
|
129
127
|
experiments.append({
|
|
@@ -145,15 +143,11 @@ class FSEVAL:
|
|
|
145
143
|
val_range = exp['range']
|
|
146
144
|
filename = os.path.join(self.output_dir, exp['file'])
|
|
147
145
|
|
|
148
|
-
# Tracking for this specific experiment
|
|
149
146
|
timed_out_methods = set()
|
|
150
147
|
results = {m['name']: [] for m in methods}
|
|
151
148
|
|
|
152
149
|
print(f"\n--- Starting Experiment: Varying {vary_type} ---")
|
|
153
|
-
print(f"Time limit: {time_limit}s | Output: {filename}")
|
|
154
|
-
|
|
155
150
|
for val in val_range:
|
|
156
|
-
# 1. Generate synthetic data based on vary_param
|
|
157
151
|
if vary_type == 'features':
|
|
158
152
|
n_samples, n_features = exp['fixed_val'], val
|
|
159
153
|
else:
|
|
@@ -162,62 +156,32 @@ class FSEVAL:
|
|
|
162
156
|
try:
|
|
163
157
|
X = np.random.rand(n_samples, n_features)
|
|
164
158
|
except MemoryError:
|
|
165
|
-
print(f" FATAL: MemoryError: Failed to allocate {n_samples}x{n_features} data.")
|
|
166
159
|
for m in methods:
|
|
167
160
|
results[m['name']].append(-1 if m['name'] in timed_out_methods else np.nan)
|
|
168
161
|
continue
|
|
169
162
|
|
|
170
|
-
# 2. Run each method
|
|
171
163
|
for m_info in methods:
|
|
172
164
|
name = m_info['name']
|
|
173
165
|
func = m_info['func']
|
|
174
166
|
|
|
175
|
-
# Check if method has already timed out in this experiment
|
|
176
167
|
if name in timed_out_methods:
|
|
177
168
|
results[name].append(-1)
|
|
178
169
|
continue
|
|
179
170
|
|
|
180
171
|
try:
|
|
181
172
|
start_time = time.time()
|
|
182
|
-
|
|
183
|
-
# Execute the method (assuming benchmark format)
|
|
184
173
|
func(X)
|
|
185
|
-
|
|
186
174
|
duration = time.time() - start_time
|
|
187
175
|
|
|
188
176
|
if duration > time_limit:
|
|
189
|
-
print(f" - {name:<18}: {duration:.4f}s (TIMEOUT
|
|
177
|
+
print(f" - {name:<18}: {duration:.4f}s (TIMEOUT)")
|
|
190
178
|
timed_out_methods.add(name)
|
|
191
179
|
else:
|
|
192
180
|
print(f" - {name:<18}: {duration:.4f}s")
|
|
193
|
-
|
|
194
181
|
results[name].append(duration)
|
|
195
|
-
|
|
196
182
|
except Exception as e:
|
|
197
|
-
print(f" - {name:<18}: FAILED ({type(e).__name__})")
|
|
198
183
|
results[name].append(np.nan)
|
|
199
184
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
df_results.index.name = 'Method'
|
|
204
|
-
df_results.to_csv(filename)
|
|
205
|
-
print(f"\n--- Results saved to {filename} ---")
|
|
206
|
-
except Exception as e:
|
|
207
|
-
print(f"\n--- FAILED to save results: {e} ---")
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
def _save_results(self, method_name, ds_results):
|
|
211
|
-
"""Aggregates repeats and saves to disk after each dataset."""
|
|
212
|
-
for scale, metrics in ds_results.items():
|
|
213
|
-
for met_name, rows in metrics.items():
|
|
214
|
-
df_new = pd.DataFrame(rows).groupby('Dataset').mean().reset_index()
|
|
215
|
-
fname = os.path.join(self.output_dir, f"{method_name}_{met_name}_{scale}.csv")
|
|
216
|
-
|
|
217
|
-
if os.path.exists(fname):
|
|
218
|
-
df_old = pd.read_csv(fname)
|
|
219
|
-
df_final = pd.concat([df_old, df_new]).drop_duplicates(subset=['Dataset'], keep='last')
|
|
220
|
-
else:
|
|
221
|
-
df_final = df_new
|
|
222
|
-
|
|
223
|
-
df_final.to_csv(fname, index=False)
|
|
185
|
+
df_results = pd.DataFrame.from_dict(results, orient='index', columns=list(val_range))
|
|
186
|
+
df_results.index.name = 'Method'
|
|
187
|
+
df_results.to_csv(filename)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sdufseval
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5
|
|
4
4
|
Summary: Evaluation and Benchmark Tool for Feature Selection
|
|
5
5
|
Project-URL: Homepage, https://github.com/mrajabinasab/FSEVAL
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/mrajabinasab/FSEVAL/issues
|
|
@@ -124,6 +124,7 @@ Initializes the evalutation and benchmark object.
|
|
|
124
124
|
| **`eval_type`** | both | "supervised", "unsupervised", or "both". |
|
|
125
125
|
| **`metrics`** | ["CLSACC", "NMI", "ACC", "AUC"] | Evaluation metrics to calculate. |
|
|
126
126
|
| **`experiments`** | ["10Percent", "100Percent"] | Which feature ratio grids to evaluate. |
|
|
127
|
+
| **`save_all`** | False | Save the results of all runs of the stochastic methods separately. |
|
|
127
128
|
|
|
128
129
|
### ⚙️ `run(datasets, methods, classifier=None)`
|
|
129
130
|
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
sdufseval/__init__.py,sha256=KIZIPxldHV2TLZVTW1jP6L8-tDw7-iMgmW-e-DeTdvo,271
|
|
2
|
+
sdufseval/eval.py,sha256=445pNvgOo_fcdOKNElDsflDZ6iptxPRKGz5wuxLYPUE,2573
|
|
3
|
+
sdufseval/fseval.py,sha256=MsirV6niLvyQzz95qtPBJ3zaI9XRFdGj1pYIuTw4F68,7864
|
|
4
|
+
sdufseval/loader.py,sha256=YUMSAdi2zcg2MizcGlnCxhsV5Y5cikL1hfk5ofwaI6s,2286
|
|
5
|
+
sdufseval-1.0.5.dist-info/METADATA,sha256=YhZY5IpbbWaUkKZWMCJ_bbbR5VtXG8iKXfjOFWAGMAs,5892
|
|
6
|
+
sdufseval-1.0.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
7
|
+
sdufseval-1.0.5.dist-info/RECORD,,
|
sdufseval-1.0.3.dist-info/RECORD
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
sdufseval/__init__.py,sha256=KIZIPxldHV2TLZVTW1jP6L8-tDw7-iMgmW-e-DeTdvo,271
|
|
2
|
-
sdufseval/eval.py,sha256=445pNvgOo_fcdOKNElDsflDZ6iptxPRKGz5wuxLYPUE,2573
|
|
3
|
-
sdufseval/fseval.py,sha256=kEsOlZ9B6yPZ0-SK6l_9LY08eTOyz4sd26TfXAANGAg,9511
|
|
4
|
-
sdufseval/loader.py,sha256=YUMSAdi2zcg2MizcGlnCxhsV5Y5cikL1hfk5ofwaI6s,2286
|
|
5
|
-
sdufseval-1.0.3.dist-info/METADATA,sha256=-NnHEAUQmru6sdBDuDQJhX9znIkF_jvtLxel7NYNIfw,5796
|
|
6
|
-
sdufseval-1.0.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
7
|
-
sdufseval-1.0.3.dist-info/RECORD,,
|
|
File without changes
|