sdufseval 1.0.5__py3-none-any.whl → 1.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sdufseval/fseval.py
CHANGED
|
@@ -7,7 +7,6 @@ import pandas as pd
|
|
|
7
7
|
from eval import unsupervised_eval, supervised_eval
|
|
8
8
|
from loader import load_dataset
|
|
9
9
|
|
|
10
|
-
|
|
11
10
|
class FSEVAL:
|
|
12
11
|
def __init__(self,
|
|
13
12
|
output_dir="results",
|
|
@@ -19,7 +18,6 @@ class FSEVAL:
|
|
|
19
18
|
metrics=None,
|
|
20
19
|
experiments=None,
|
|
21
20
|
save_all=False):
|
|
22
|
-
|
|
23
21
|
self.output_dir = output_dir
|
|
24
22
|
self.cv = cv
|
|
25
23
|
self.avg_steps = avg_steps
|
|
@@ -28,11 +26,9 @@ class FSEVAL:
|
|
|
28
26
|
self.eval_type = eval_type
|
|
29
27
|
self.save_all = save_all
|
|
30
28
|
|
|
31
|
-
# Metric configuration
|
|
32
29
|
all_metrics = ["CLSACC", "NMI", "ACC", "AUC"]
|
|
33
30
|
self.selected_metrics = metrics if metrics else all_metrics
|
|
34
31
|
|
|
35
|
-
# Experiment/Scale configuration
|
|
36
32
|
self.scales = {}
|
|
37
33
|
target_exps = experiments if experiments else ["10Percent", "100Percent"]
|
|
38
34
|
if "10Percent" in target_exps:
|
|
@@ -44,14 +40,30 @@ class FSEVAL:
|
|
|
44
40
|
os.makedirs(self.output_dir)
|
|
45
41
|
|
|
46
42
|
def random_baseline(self, X, **kwargs):
|
|
47
|
-
"""Randomly assigns importance scores to features."""
|
|
48
43
|
return np.random.rand(X.shape[1])
|
|
49
44
|
|
|
45
|
+
def _should_skip(self, ds_name, methods):
|
|
46
|
+
for m_info in methods:
|
|
47
|
+
for scale_name in self.scales.keys():
|
|
48
|
+
last_met = self.selected_metrics[-1]
|
|
49
|
+
fname = os.path.join(self.output_dir, f"{m_info['name']}_{last_met}_{scale_name}.csv")
|
|
50
|
+
|
|
51
|
+
if not os.path.exists(fname):
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
df = pd.read_csv(fname)
|
|
55
|
+
if 'Dataset' not in df.columns or ds_name not in df['Dataset'].values:
|
|
56
|
+
return False
|
|
57
|
+
return True
|
|
58
|
+
|
|
50
59
|
def run(self, datasets, methods, classifier=None):
|
|
51
|
-
"""Executes the benchmark for given datasets and FS methods."""
|
|
52
60
|
warnings.filterwarnings("ignore")
|
|
61
|
+
|
|
53
62
|
for ds_name in datasets:
|
|
54
|
-
|
|
63
|
+
if self._should_skip(ds_name, methods):
|
|
64
|
+
print(f">>> Skipping {ds_name}")
|
|
65
|
+
continue
|
|
66
|
+
|
|
55
67
|
X, y_raw = load_dataset(ds_name)
|
|
56
68
|
if X is None: continue
|
|
57
69
|
|
|
@@ -63,33 +75,27 @@ class FSEVAL:
|
|
|
63
75
|
fs_func = m_info['func']
|
|
64
76
|
repeats = self.avg_steps if m_info.get('stochastic', False) else 1
|
|
65
77
|
|
|
66
|
-
# Internal storage for current dataset results
|
|
67
78
|
ds_results = {s: {met: [] for met in self.selected_metrics} for s in self.scales}
|
|
68
79
|
|
|
69
80
|
for r in range(repeats):
|
|
70
|
-
print(f" [{name}]
|
|
71
|
-
|
|
81
|
+
print(f" [{name}] {ds_name} - Run {r+1}/{repeats}")
|
|
72
82
|
scores = fs_func(X)
|
|
73
83
|
indices = np.argsort(scores)[::-1]
|
|
74
84
|
|
|
75
85
|
for scale_name, percentages in self.scales.items():
|
|
76
86
|
row = {met: {'Dataset': ds_name} for met in self.selected_metrics}
|
|
77
|
-
|
|
78
87
|
for p in percentages:
|
|
79
88
|
k = max(1, min(math.ceil(p * n_features), n_features))
|
|
80
89
|
X_subset = X[:, indices[:k]]
|
|
81
90
|
|
|
82
|
-
|
|
83
|
-
|
|
91
|
+
res = {"CLSACC": np.nan, "NMI": np.nan, "ACC": np.nan, "AUC": np.nan}
|
|
84
92
|
if self.eval_type in ["unsupervised", "both"]:
|
|
85
|
-
|
|
86
|
-
|
|
93
|
+
res["CLSACC"], res["NMI"] = unsupervised_eval(X_subset, y, avg_steps=self.unsupervised_iter)
|
|
87
94
|
if self.eval_type in ["supervised", "both"]:
|
|
88
|
-
|
|
95
|
+
res["ACC"], res["AUC"] = supervised_eval(X_subset, y, classifier=classifier, cv=self.cv, avg_steps=self.supervised_iter)
|
|
89
96
|
|
|
90
|
-
mapping = {"CLSACC": c_acc, "NMI": nmi, "ACC": acc, "AUC": auc}
|
|
91
97
|
for met in self.selected_metrics:
|
|
92
|
-
row[met][p] =
|
|
98
|
+
row[met][p] = res[met]
|
|
93
99
|
|
|
94
100
|
for met in self.selected_metrics:
|
|
95
101
|
ds_results[scale_name][met].append(row[met])
|
|
@@ -105,12 +111,11 @@ class FSEVAL:
|
|
|
105
111
|
df_new = df_new.groupby('Dataset').mean().reset_index()
|
|
106
112
|
|
|
107
113
|
df_new.columns = df_new.columns.astype(str)
|
|
108
|
-
|
|
109
114
|
fname = os.path.join(self.output_dir, f"{method_name}_{met_name}_{scale}.csv")
|
|
110
115
|
|
|
111
116
|
if os.path.exists(fname):
|
|
112
117
|
df_old = pd.read_csv(fname)
|
|
113
|
-
df_old.columns = df_old.columns.astype(str)
|
|
118
|
+
df_old.columns = df_old.columns.astype(str)
|
|
114
119
|
|
|
115
120
|
if self.save_all:
|
|
116
121
|
df_final = pd.concat([df_old, df_new], ignore_index=True)
|
|
@@ -124,29 +129,17 @@ class FSEVAL:
|
|
|
124
129
|
def timer(self, methods, vary_param='both', time_limit=3600):
|
|
125
130
|
experiments = []
|
|
126
131
|
if vary_param in ['features', 'both']:
|
|
127
|
-
experiments.append({
|
|
128
|
-
'name': 'features',
|
|
129
|
-
'fixed_val': 100,
|
|
130
|
-
'range': range(1000, 20001, 500),
|
|
131
|
-
'file': 'time_analysis_features.csv'
|
|
132
|
-
})
|
|
132
|
+
experiments.append({'name': 'features', 'fixed_val': 100, 'range': range(1000, 20001, 500), 'file': 'time_analysis_features.csv'})
|
|
133
133
|
if vary_param in ['instances', 'both']:
|
|
134
|
-
experiments.append({
|
|
135
|
-
'name': 'instances',
|
|
136
|
-
'fixed_val': 100,
|
|
137
|
-
'range': range(1000, 20001, 500),
|
|
138
|
-
'file': 'time_analysis_instances.csv'
|
|
139
|
-
})
|
|
134
|
+
experiments.append({'name': 'instances', 'fixed_val': 100, 'range': range(1000, 20001, 500), 'file': 'time_analysis_instances.csv'})
|
|
140
135
|
|
|
141
136
|
for exp in experiments:
|
|
142
137
|
vary_type = exp['name']
|
|
143
138
|
val_range = exp['range']
|
|
144
139
|
filename = os.path.join(self.output_dir, exp['file'])
|
|
145
|
-
|
|
146
140
|
timed_out_methods = set()
|
|
147
141
|
results = {m['name']: [] for m in methods}
|
|
148
142
|
|
|
149
|
-
print(f"\n--- Starting Experiment: Varying {vary_type} ---")
|
|
150
143
|
for val in val_range:
|
|
151
144
|
if vary_type == 'features':
|
|
152
145
|
n_samples, n_features = exp['fixed_val'], val
|
|
@@ -163,7 +156,6 @@ class FSEVAL:
|
|
|
163
156
|
for m_info in methods:
|
|
164
157
|
name = m_info['name']
|
|
165
158
|
func = m_info['func']
|
|
166
|
-
|
|
167
159
|
if name in timed_out_methods:
|
|
168
160
|
results[name].append(-1)
|
|
169
161
|
continue
|
|
@@ -172,14 +164,10 @@ class FSEVAL:
|
|
|
172
164
|
start_time = time.time()
|
|
173
165
|
func(X)
|
|
174
166
|
duration = time.time() - start_time
|
|
175
|
-
|
|
176
167
|
if duration > time_limit:
|
|
177
|
-
print(f" - {name:<18}: {duration:.4f}s (TIMEOUT)")
|
|
178
168
|
timed_out_methods.add(name)
|
|
179
|
-
else:
|
|
180
|
-
print(f" - {name:<18}: {duration:.4f}s")
|
|
181
169
|
results[name].append(duration)
|
|
182
|
-
except Exception
|
|
170
|
+
except Exception:
|
|
183
171
|
results[name].append(np.nan)
|
|
184
172
|
|
|
185
173
|
df_results = pd.DataFrame.from_dict(results, orient='index', columns=list(val_range))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sdufseval
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.6
|
|
4
4
|
Summary: Evaluation and Benchmark Tool for Feature Selection
|
|
5
5
|
Project-URL: Homepage, https://github.com/mrajabinasab/FSEVAL
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/mrajabinasab/FSEVAL/issues
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
sdufseval/__init__.py,sha256=KIZIPxldHV2TLZVTW1jP6L8-tDw7-iMgmW-e-DeTdvo,271
|
|
2
|
+
sdufseval/eval.py,sha256=445pNvgOo_fcdOKNElDsflDZ6iptxPRKGz5wuxLYPUE,2573
|
|
3
|
+
sdufseval/fseval.py,sha256=VNFFYB72GCesInKeFJftGDg9pd2eI75gkArIPQrf5sY,7594
|
|
4
|
+
sdufseval/loader.py,sha256=YUMSAdi2zcg2MizcGlnCxhsV5Y5cikL1hfk5ofwaI6s,2286
|
|
5
|
+
sdufseval-1.0.6.dist-info/METADATA,sha256=Ja5jQbLYP9Fkb5BnZxQ6S1YiM4BkiD4SEYVIIGtgDwI,5892
|
|
6
|
+
sdufseval-1.0.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
7
|
+
sdufseval-1.0.6.dist-info/RECORD,,
|
sdufseval-1.0.5.dist-info/RECORD
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
sdufseval/__init__.py,sha256=KIZIPxldHV2TLZVTW1jP6L8-tDw7-iMgmW-e-DeTdvo,271
|
|
2
|
-
sdufseval/eval.py,sha256=445pNvgOo_fcdOKNElDsflDZ6iptxPRKGz5wuxLYPUE,2573
|
|
3
|
-
sdufseval/fseval.py,sha256=MsirV6niLvyQzz95qtPBJ3zaI9XRFdGj1pYIuTw4F68,7864
|
|
4
|
-
sdufseval/loader.py,sha256=YUMSAdi2zcg2MizcGlnCxhsV5Y5cikL1hfk5ofwaI6s,2286
|
|
5
|
-
sdufseval-1.0.5.dist-info/METADATA,sha256=YhZY5IpbbWaUkKZWMCJ_bbbR5VtXG8iKXfjOFWAGMAs,5892
|
|
6
|
-
sdufseval-1.0.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
7
|
-
sdufseval-1.0.5.dist-info/RECORD,,
|
|
File without changes
|