sdufseval 1.0.5__tar.gz → 1.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sdufseval-1.0.5 → sdufseval-1.0.7}/PKG-INFO +1 -1
- {sdufseval-1.0.5 → sdufseval-1.0.7}/pyproject.toml +1 -1
- {sdufseval-1.0.5 → sdufseval-1.0.7}/sdufseval/fseval.py +29 -42
- {sdufseval-1.0.5 → sdufseval-1.0.7}/README.md +0 -0
- {sdufseval-1.0.5 → sdufseval-1.0.7}/sdufseval/__init__.py +0 -0
- {sdufseval-1.0.5 → sdufseval-1.0.7}/sdufseval/eval.py +0 -0
- {sdufseval-1.0.5 → sdufseval-1.0.7}/sdufseval/loader.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sdufseval
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.7
|
|
4
4
|
Summary: Evaluation and Benchmark Tool for Feature Selection
|
|
5
5
|
Project-URL: Homepage, https://github.com/mrajabinasab/FSEVAL
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/mrajabinasab/FSEVAL/issues
|
|
@@ -4,9 +4,7 @@ import time
|
|
|
4
4
|
import warnings
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
|
-
from
|
|
8
|
-
from loader import load_dataset
|
|
9
|
-
|
|
7
|
+
from sdufseval import supervised_eval, unsupervised_eval, load_dataset
|
|
10
8
|
|
|
11
9
|
class FSEVAL:
|
|
12
10
|
def __init__(self,
|
|
@@ -19,7 +17,6 @@ class FSEVAL:
|
|
|
19
17
|
metrics=None,
|
|
20
18
|
experiments=None,
|
|
21
19
|
save_all=False):
|
|
22
|
-
|
|
23
20
|
self.output_dir = output_dir
|
|
24
21
|
self.cv = cv
|
|
25
22
|
self.avg_steps = avg_steps
|
|
@@ -28,11 +25,9 @@ class FSEVAL:
|
|
|
28
25
|
self.eval_type = eval_type
|
|
29
26
|
self.save_all = save_all
|
|
30
27
|
|
|
31
|
-
# Metric configuration
|
|
32
28
|
all_metrics = ["CLSACC", "NMI", "ACC", "AUC"]
|
|
33
29
|
self.selected_metrics = metrics if metrics else all_metrics
|
|
34
30
|
|
|
35
|
-
# Experiment/Scale configuration
|
|
36
31
|
self.scales = {}
|
|
37
32
|
target_exps = experiments if experiments else ["10Percent", "100Percent"]
|
|
38
33
|
if "10Percent" in target_exps:
|
|
@@ -44,14 +39,30 @@ class FSEVAL:
|
|
|
44
39
|
os.makedirs(self.output_dir)
|
|
45
40
|
|
|
46
41
|
def random_baseline(self, X, **kwargs):
|
|
47
|
-
"""Randomly assigns importance scores to features."""
|
|
48
42
|
return np.random.rand(X.shape[1])
|
|
49
43
|
|
|
44
|
+
def _should_skip(self, ds_name, methods):
|
|
45
|
+
for m_info in methods:
|
|
46
|
+
for scale_name in self.scales.keys():
|
|
47
|
+
last_met = self.selected_metrics[-1]
|
|
48
|
+
fname = os.path.join(self.output_dir, f"{m_info['name']}_{last_met}_{scale_name}.csv")
|
|
49
|
+
|
|
50
|
+
if not os.path.exists(fname):
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
df = pd.read_csv(fname)
|
|
54
|
+
if 'Dataset' not in df.columns or ds_name not in df['Dataset'].values:
|
|
55
|
+
return False
|
|
56
|
+
return True
|
|
57
|
+
|
|
50
58
|
def run(self, datasets, methods, classifier=None):
|
|
51
|
-
"""Executes the benchmark for given datasets and FS methods."""
|
|
52
59
|
warnings.filterwarnings("ignore")
|
|
60
|
+
|
|
53
61
|
for ds_name in datasets:
|
|
54
|
-
|
|
62
|
+
if self._should_skip(ds_name, methods):
|
|
63
|
+
print(f">>> Skipping {ds_name}")
|
|
64
|
+
continue
|
|
65
|
+
|
|
55
66
|
X, y_raw = load_dataset(ds_name)
|
|
56
67
|
if X is None: continue
|
|
57
68
|
|
|
@@ -63,33 +74,27 @@ class FSEVAL:
|
|
|
63
74
|
fs_func = m_info['func']
|
|
64
75
|
repeats = self.avg_steps if m_info.get('stochastic', False) else 1
|
|
65
76
|
|
|
66
|
-
# Internal storage for current dataset results
|
|
67
77
|
ds_results = {s: {met: [] for met in self.selected_metrics} for s in self.scales}
|
|
68
78
|
|
|
69
79
|
for r in range(repeats):
|
|
70
|
-
print(f" [{name}]
|
|
71
|
-
|
|
80
|
+
print(f" [{name}] {ds_name} - Run {r+1}/{repeats}")
|
|
72
81
|
scores = fs_func(X)
|
|
73
82
|
indices = np.argsort(scores)[::-1]
|
|
74
83
|
|
|
75
84
|
for scale_name, percentages in self.scales.items():
|
|
76
85
|
row = {met: {'Dataset': ds_name} for met in self.selected_metrics}
|
|
77
|
-
|
|
78
86
|
for p in percentages:
|
|
79
87
|
k = max(1, min(math.ceil(p * n_features), n_features))
|
|
80
88
|
X_subset = X[:, indices[:k]]
|
|
81
89
|
|
|
82
|
-
|
|
83
|
-
|
|
90
|
+
res = {"CLSACC": np.nan, "NMI": np.nan, "ACC": np.nan, "AUC": np.nan}
|
|
84
91
|
if self.eval_type in ["unsupervised", "both"]:
|
|
85
|
-
|
|
86
|
-
|
|
92
|
+
res["CLSACC"], res["NMI"] = unsupervised_eval(X_subset, y, avg_steps=self.unsupervised_iter)
|
|
87
93
|
if self.eval_type in ["supervised", "both"]:
|
|
88
|
-
|
|
94
|
+
res["ACC"], res["AUC"] = supervised_eval(X_subset, y, classifier=classifier, cv=self.cv, avg_steps=self.supervised_iter)
|
|
89
95
|
|
|
90
|
-
mapping = {"CLSACC": c_acc, "NMI": nmi, "ACC": acc, "AUC": auc}
|
|
91
96
|
for met in self.selected_metrics:
|
|
92
|
-
row[met][p] =
|
|
97
|
+
row[met][p] = res[met]
|
|
93
98
|
|
|
94
99
|
for met in self.selected_metrics:
|
|
95
100
|
ds_results[scale_name][met].append(row[met])
|
|
@@ -105,12 +110,11 @@ class FSEVAL:
|
|
|
105
110
|
df_new = df_new.groupby('Dataset').mean().reset_index()
|
|
106
111
|
|
|
107
112
|
df_new.columns = df_new.columns.astype(str)
|
|
108
|
-
|
|
109
113
|
fname = os.path.join(self.output_dir, f"{method_name}_{met_name}_{scale}.csv")
|
|
110
114
|
|
|
111
115
|
if os.path.exists(fname):
|
|
112
116
|
df_old = pd.read_csv(fname)
|
|
113
|
-
df_old.columns = df_old.columns.astype(str)
|
|
117
|
+
df_old.columns = df_old.columns.astype(str)
|
|
114
118
|
|
|
115
119
|
if self.save_all:
|
|
116
120
|
df_final = pd.concat([df_old, df_new], ignore_index=True)
|
|
@@ -124,29 +128,17 @@ class FSEVAL:
|
|
|
124
128
|
def timer(self, methods, vary_param='both', time_limit=3600):
|
|
125
129
|
experiments = []
|
|
126
130
|
if vary_param in ['features', 'both']:
|
|
127
|
-
experiments.append({
|
|
128
|
-
'name': 'features',
|
|
129
|
-
'fixed_val': 100,
|
|
130
|
-
'range': range(1000, 20001, 500),
|
|
131
|
-
'file': 'time_analysis_features.csv'
|
|
132
|
-
})
|
|
131
|
+
experiments.append({'name': 'features', 'fixed_val': 100, 'range': range(1000, 20001, 500), 'file': 'time_analysis_features.csv'})
|
|
133
132
|
if vary_param in ['instances', 'both']:
|
|
134
|
-
experiments.append({
|
|
135
|
-
'name': 'instances',
|
|
136
|
-
'fixed_val': 100,
|
|
137
|
-
'range': range(1000, 20001, 500),
|
|
138
|
-
'file': 'time_analysis_instances.csv'
|
|
139
|
-
})
|
|
133
|
+
experiments.append({'name': 'instances', 'fixed_val': 100, 'range': range(1000, 20001, 500), 'file': 'time_analysis_instances.csv'})
|
|
140
134
|
|
|
141
135
|
for exp in experiments:
|
|
142
136
|
vary_type = exp['name']
|
|
143
137
|
val_range = exp['range']
|
|
144
138
|
filename = os.path.join(self.output_dir, exp['file'])
|
|
145
|
-
|
|
146
139
|
timed_out_methods = set()
|
|
147
140
|
results = {m['name']: [] for m in methods}
|
|
148
141
|
|
|
149
|
-
print(f"\n--- Starting Experiment: Varying {vary_type} ---")
|
|
150
142
|
for val in val_range:
|
|
151
143
|
if vary_type == 'features':
|
|
152
144
|
n_samples, n_features = exp['fixed_val'], val
|
|
@@ -163,7 +155,6 @@ class FSEVAL:
|
|
|
163
155
|
for m_info in methods:
|
|
164
156
|
name = m_info['name']
|
|
165
157
|
func = m_info['func']
|
|
166
|
-
|
|
167
158
|
if name in timed_out_methods:
|
|
168
159
|
results[name].append(-1)
|
|
169
160
|
continue
|
|
@@ -172,14 +163,10 @@ class FSEVAL:
|
|
|
172
163
|
start_time = time.time()
|
|
173
164
|
func(X)
|
|
174
165
|
duration = time.time() - start_time
|
|
175
|
-
|
|
176
166
|
if duration > time_limit:
|
|
177
|
-
print(f" - {name:<18}: {duration:.4f}s (TIMEOUT)")
|
|
178
167
|
timed_out_methods.add(name)
|
|
179
|
-
else:
|
|
180
|
-
print(f" - {name:<18}: {duration:.4f}s")
|
|
181
168
|
results[name].append(duration)
|
|
182
|
-
except Exception
|
|
169
|
+
except Exception:
|
|
183
170
|
results[name].append(np.nan)
|
|
184
171
|
|
|
185
172
|
df_results = pd.DataFrame.from_dict(results, orient='index', columns=list(val_range))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|