sdufseval 1.0.5__tar.gz → 1.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdufseval
3
- Version: 1.0.5
3
+ Version: 1.0.7
4
4
  Summary: Evaluation and Benchmark Tool for Feature Selection
5
5
  Project-URL: Homepage, https://github.com/mrajabinasab/FSEVAL
6
6
  Project-URL: Bug Tracker, https://github.com/mrajabinasab/FSEVAL/issues
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sdufseval"
7
- version = "1.0.5"
7
+ version = "1.0.7"
8
8
  authors = [
9
9
  { name="Muhammad Rajabinasab", email="muhammad.rajabinasab@outlook.com" },
10
10
  ]
@@ -4,9 +4,7 @@ import time
4
4
  import warnings
5
5
  import numpy as np
6
6
  import pandas as pd
7
- from eval import unsupervised_eval, supervised_eval
8
- from loader import load_dataset
9
-
7
+ from sdufseval import supervised_eval, unsupervised_eval, load_dataset
10
8
 
11
9
  class FSEVAL:
12
10
  def __init__(self,
@@ -19,7 +17,6 @@ class FSEVAL:
19
17
  metrics=None,
20
18
  experiments=None,
21
19
  save_all=False):
22
-
23
20
  self.output_dir = output_dir
24
21
  self.cv = cv
25
22
  self.avg_steps = avg_steps
@@ -28,11 +25,9 @@ class FSEVAL:
28
25
  self.eval_type = eval_type
29
26
  self.save_all = save_all
30
27
 
31
- # Metric configuration
32
28
  all_metrics = ["CLSACC", "NMI", "ACC", "AUC"]
33
29
  self.selected_metrics = metrics if metrics else all_metrics
34
30
 
35
- # Experiment/Scale configuration
36
31
  self.scales = {}
37
32
  target_exps = experiments if experiments else ["10Percent", "100Percent"]
38
33
  if "10Percent" in target_exps:
@@ -44,14 +39,30 @@ class FSEVAL:
44
39
  os.makedirs(self.output_dir)
45
40
 
46
41
  def random_baseline(self, X, **kwargs):
47
- """Randomly assigns importance scores to features."""
48
42
  return np.random.rand(X.shape[1])
49
43
 
44
+ def _should_skip(self, ds_name, methods):
45
+ for m_info in methods:
46
+ for scale_name in self.scales.keys():
47
+ last_met = self.selected_metrics[-1]
48
+ fname = os.path.join(self.output_dir, f"{m_info['name']}_{last_met}_{scale_name}.csv")
49
+
50
+ if not os.path.exists(fname):
51
+ return False
52
+
53
+ df = pd.read_csv(fname)
54
+ if 'Dataset' not in df.columns or ds_name not in df['Dataset'].values:
55
+ return False
56
+ return True
57
+
50
58
  def run(self, datasets, methods, classifier=None):
51
- """Executes the benchmark for given datasets and FS methods."""
52
59
  warnings.filterwarnings("ignore")
60
+
53
61
  for ds_name in datasets:
54
- print(f"\n>>> Benchmarking Dataset: {ds_name}")
62
+ if self._should_skip(ds_name, methods):
63
+ print(f">>> Skipping {ds_name}")
64
+ continue
65
+
55
66
  X, y_raw = load_dataset(ds_name)
56
67
  if X is None: continue
57
68
 
@@ -63,33 +74,27 @@ class FSEVAL:
63
74
  fs_func = m_info['func']
64
75
  repeats = self.avg_steps if m_info.get('stochastic', False) else 1
65
76
 
66
- # Internal storage for current dataset results
67
77
  ds_results = {s: {met: [] for met in self.selected_metrics} for s in self.scales}
68
78
 
69
79
  for r in range(repeats):
70
- print(f" [{name}] Progress: {r+1}/{repeats}")
71
-
80
+ print(f" [{name}] {ds_name} - Run {r+1}/{repeats}")
72
81
  scores = fs_func(X)
73
82
  indices = np.argsort(scores)[::-1]
74
83
 
75
84
  for scale_name, percentages in self.scales.items():
76
85
  row = {met: {'Dataset': ds_name} for met in self.selected_metrics}
77
-
78
86
  for p in percentages:
79
87
  k = max(1, min(math.ceil(p * n_features), n_features))
80
88
  X_subset = X[:, indices[:k]]
81
89
 
82
- c_acc, nmi, acc, auc = np.nan, np.nan, np.nan, np.nan
83
-
90
+ res = {"CLSACC": np.nan, "NMI": np.nan, "ACC": np.nan, "AUC": np.nan}
84
91
  if self.eval_type in ["unsupervised", "both"]:
85
- c_acc, nmi = unsupervised_eval(X_subset, y, avg_steps=self.unsupervised_iter)
86
-
92
+ res["CLSACC"], res["NMI"] = unsupervised_eval(X_subset, y, avg_steps=self.unsupervised_iter)
87
93
  if self.eval_type in ["supervised", "both"]:
88
- acc, auc = supervised_eval(X_subset, y, classifier=classifier, cv=self.cv, avg_steps=self.supervised_iter)
94
+ res["ACC"], res["AUC"] = supervised_eval(X_subset, y, classifier=classifier, cv=self.cv, avg_steps=self.supervised_iter)
89
95
 
90
- mapping = {"CLSACC": c_acc, "NMI": nmi, "ACC": acc, "AUC": auc}
91
96
  for met in self.selected_metrics:
92
- row[met][p] = mapping[met]
97
+ row[met][p] = res[met]
93
98
 
94
99
  for met in self.selected_metrics:
95
100
  ds_results[scale_name][met].append(row[met])
@@ -105,12 +110,11 @@ class FSEVAL:
105
110
  df_new = df_new.groupby('Dataset').mean().reset_index()
106
111
 
107
112
  df_new.columns = df_new.columns.astype(str)
108
-
109
113
  fname = os.path.join(self.output_dir, f"{method_name}_{met_name}_{scale}.csv")
110
114
 
111
115
  if os.path.exists(fname):
112
116
  df_old = pd.read_csv(fname)
113
- df_old.columns = df_old.columns.astype(str)
117
+ df_old.columns = df_old.columns.astype(str)
114
118
 
115
119
  if self.save_all:
116
120
  df_final = pd.concat([df_old, df_new], ignore_index=True)
@@ -124,29 +128,17 @@ class FSEVAL:
124
128
  def timer(self, methods, vary_param='both', time_limit=3600):
125
129
  experiments = []
126
130
  if vary_param in ['features', 'both']:
127
- experiments.append({
128
- 'name': 'features',
129
- 'fixed_val': 100,
130
- 'range': range(1000, 20001, 500),
131
- 'file': 'time_analysis_features.csv'
132
- })
131
+ experiments.append({'name': 'features', 'fixed_val': 100, 'range': range(1000, 20001, 500), 'file': 'time_analysis_features.csv'})
133
132
  if vary_param in ['instances', 'both']:
134
- experiments.append({
135
- 'name': 'instances',
136
- 'fixed_val': 100,
137
- 'range': range(1000, 20001, 500),
138
- 'file': 'time_analysis_instances.csv'
139
- })
133
+ experiments.append({'name': 'instances', 'fixed_val': 100, 'range': range(1000, 20001, 500), 'file': 'time_analysis_instances.csv'})
140
134
 
141
135
  for exp in experiments:
142
136
  vary_type = exp['name']
143
137
  val_range = exp['range']
144
138
  filename = os.path.join(self.output_dir, exp['file'])
145
-
146
139
  timed_out_methods = set()
147
140
  results = {m['name']: [] for m in methods}
148
141
 
149
- print(f"\n--- Starting Experiment: Varying {vary_type} ---")
150
142
  for val in val_range:
151
143
  if vary_type == 'features':
152
144
  n_samples, n_features = exp['fixed_val'], val
@@ -163,7 +155,6 @@ class FSEVAL:
163
155
  for m_info in methods:
164
156
  name = m_info['name']
165
157
  func = m_info['func']
166
-
167
158
  if name in timed_out_methods:
168
159
  results[name].append(-1)
169
160
  continue
@@ -172,14 +163,10 @@ class FSEVAL:
172
163
  start_time = time.time()
173
164
  func(X)
174
165
  duration = time.time() - start_time
175
-
176
166
  if duration > time_limit:
177
- print(f" - {name:<18}: {duration:.4f}s (TIMEOUT)")
178
167
  timed_out_methods.add(name)
179
- else:
180
- print(f" - {name:<18}: {duration:.4f}s")
181
168
  results[name].append(duration)
182
- except Exception as e:
169
+ except Exception:
183
170
  results[name].append(np.nan)
184
171
 
185
172
  df_results = pd.DataFrame.from_dict(results, orient='index', columns=list(val_range))
File without changes
File without changes
File without changes