sdufseval 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sdufseval/fseval.py CHANGED
@@ -4,9 +4,8 @@ import time
4
4
  import warnings
5
5
  import numpy as np
6
6
  import pandas as pd
7
- from .eval import unsupervised_eval, supervised_eval
8
- from .loader import load_dataset
9
-
7
+ from eval import unsupervised_eval, supervised_eval
8
+ from loader import load_dataset
10
9
 
11
10
  class FSEVAL:
12
11
  def __init__(self,
@@ -17,22 +16,19 @@ class FSEVAL:
17
16
  unsupervised_iter=10,
18
17
  eval_type="both",
19
18
  metrics=None,
20
- experiments=None):
21
- """
22
- Feature Selection Evaluation Suite.
23
- """
19
+ experiments=None,
20
+ save_all=False):
24
21
  self.output_dir = output_dir
25
22
  self.cv = cv
26
23
  self.avg_steps = avg_steps
27
24
  self.supervised_iter = supervised_iter
28
25
  self.unsupervised_iter = unsupervised_iter
29
26
  self.eval_type = eval_type
27
+ self.save_all = save_all
30
28
 
31
- # Metric configuration
32
29
  all_metrics = ["CLSACC", "NMI", "ACC", "AUC"]
33
30
  self.selected_metrics = metrics if metrics else all_metrics
34
31
 
35
- # Experiment/Scale configuration
36
32
  self.scales = {}
37
33
  target_exps = experiments if experiments else ["10Percent", "100Percent"]
38
34
  if "10Percent" in target_exps:
@@ -44,24 +40,30 @@ class FSEVAL:
44
40
  os.makedirs(self.output_dir)
45
41
 
46
42
  def random_baseline(self, X, **kwargs):
47
- """
48
- Randomly assigns importance scores to features.
49
- Internal method for lower-bound baseline.
50
- """
51
43
  return np.random.rand(X.shape[1])
52
44
 
45
+ def _should_skip(self, ds_name, methods):
46
+ for m_info in methods:
47
+ for scale_name in self.scales.keys():
48
+ last_met = self.selected_metrics[-1]
49
+ fname = os.path.join(self.output_dir, f"{m_info['name']}_{last_met}_{scale_name}.csv")
50
+
51
+ if not os.path.exists(fname):
52
+ return False
53
+
54
+ df = pd.read_csv(fname)
55
+ if 'Dataset' not in df.columns or ds_name not in df['Dataset'].values:
56
+ return False
57
+ return True
58
+
53
59
  def run(self, datasets, methods, classifier=None):
54
- """
55
- Executes the benchmark for given datasets and FS methods.
56
-
57
- Args:
58
- datasets: List of dataset names.
59
- methods: List of dicts {'name': str, 'func': callable, 'stochastic': bool}.
60
- classifier: Optional sklearn classifier instance to pass to supervised_eval.
61
- """
62
60
  warnings.filterwarnings("ignore")
61
+
63
62
  for ds_name in datasets:
64
- print(f"\n>>> Benchmarking Dataset: {ds_name}")
63
+ if self._should_skip(ds_name, methods):
64
+ print(f">>> Skipping {ds_name}")
65
+ continue
66
+
65
67
  X, y_raw = load_dataset(ds_name)
66
68
  if X is None: continue
67
69
 
@@ -71,89 +73,74 @@ class FSEVAL:
71
73
  for m_info in methods:
72
74
  name = m_info['name']
73
75
  fs_func = m_info['func']
74
- # Stochastic methods run 10 times and average
75
76
  repeats = self.avg_steps if m_info.get('stochastic', False) else 1
76
77
 
77
- # Internal storage for current dataset results
78
78
  ds_results = {s: {met: [] for met in self.selected_metrics} for s in self.scales}
79
79
 
80
80
  for r in range(repeats):
81
- print(f" [{name}] Progress: {r+1}/{repeats}")
82
-
83
- # Get feature ranking
81
+ print(f" [{name}] {ds_name} - Run {r+1}/{repeats}")
84
82
  scores = fs_func(X)
85
83
  indices = np.argsort(scores)[::-1]
86
84
 
87
85
  for scale_name, percentages in self.scales.items():
88
86
  row = {met: {'Dataset': ds_name} for met in self.selected_metrics}
89
-
90
87
  for p in percentages:
91
88
  k = max(1, min(math.ceil(p * n_features), n_features))
92
89
  X_subset = X[:, indices[:k]]
93
90
 
94
- # Run evaluators
95
- c_acc, nmi, acc, auc = np.nan, np.nan, np.nan, np.nan
96
-
91
+ res = {"CLSACC": np.nan, "NMI": np.nan, "ACC": np.nan, "AUC": np.nan}
97
92
  if self.eval_type in ["unsupervised", "both"]:
98
- c_acc, nmi = unsupervised_eval(X_subset, y, avg_steps=self.unsupervised_iter)
99
-
93
+ res["CLSACC"], res["NMI"] = unsupervised_eval(X_subset, y, avg_steps=self.unsupervised_iter)
100
94
  if self.eval_type in ["supervised", "both"]:
101
- # Passes classifier (None or instance) to eval.py
102
- acc, auc = supervised_eval(X_subset, y, classifier=classifier, cv=self.cv, avg_steps=self.supervised_iter)
95
+ res["ACC"], res["AUC"] = supervised_eval(X_subset, y, classifier=classifier, cv=self.cv, avg_steps=self.supervised_iter)
103
96
 
104
- # Map metrics to columns
105
- mapping = {"CLSACC": c_acc, "NMI": nmi, "ACC": acc, "AUC": auc}
106
97
  for met in self.selected_metrics:
107
- row[met][p] = mapping[met]
98
+ row[met][p] = res[met]
108
99
 
109
100
  for met in self.selected_metrics:
110
101
  ds_results[scale_name][met].append(row[met])
111
102
 
112
- # Save/Update results for this method/dataset
113
103
  self._save_results(name, ds_results)
114
104
 
115
-
105
+ def _save_results(self, method_name, ds_results):
106
+ for scale, metrics in ds_results.items():
107
+ for met_name, rows in metrics.items():
108
+ df_new = pd.DataFrame(rows)
109
+
110
+ if not self.save_all:
111
+ df_new = df_new.groupby('Dataset').mean().reset_index()
112
+
113
+ df_new.columns = df_new.columns.astype(str)
114
+ fname = os.path.join(self.output_dir, f"{method_name}_{met_name}_{scale}.csv")
115
+
116
+ if os.path.exists(fname):
117
+ df_old = pd.read_csv(fname)
118
+ df_old.columns = df_old.columns.astype(str)
119
+
120
+ if self.save_all:
121
+ df_final = pd.concat([df_old, df_new], ignore_index=True)
122
+ else:
123
+ df_final = pd.concat([df_old, df_new]).drop_duplicates(subset=['Dataset'], keep='last')
124
+ else:
125
+ df_final = df_new
126
+
127
+ df_final.to_csv(fname, index=False)
128
+
116
129
  def timer(self, methods, vary_param='both', time_limit=3600):
117
- """
118
- Runs a standalone runtime analysis experiment with a time cap.
119
-
120
- Args:
121
- methods: List of dicts {'name': str, 'func': callable}.
122
- vary_param: 'features', 'instances', or 'both'.
123
- time_limit: Max seconds per method before it is skipped.
124
- """
125
-
126
- # Determine which experiments to run
127
130
  experiments = []
128
131
  if vary_param in ['features', 'both']:
129
- experiments.append({
130
- 'name': 'features',
131
- 'fixed_val': 100,
132
- 'range': range(1000, 20001, 500),
133
- 'file': 'time_analysis_features.csv'
134
- })
132
+ experiments.append({'name': 'features', 'fixed_val': 100, 'range': range(1000, 20001, 500), 'file': 'time_analysis_features.csv'})
135
133
  if vary_param in ['instances', 'both']:
136
- experiments.append({
137
- 'name': 'instances',
138
- 'fixed_val': 100,
139
- 'range': range(1000, 20001, 500),
140
- 'file': 'time_analysis_instances.csv'
141
- })
134
+ experiments.append({'name': 'instances', 'fixed_val': 100, 'range': range(1000, 20001, 500), 'file': 'time_analysis_instances.csv'})
142
135
 
143
136
  for exp in experiments:
144
137
  vary_type = exp['name']
145
138
  val_range = exp['range']
146
139
  filename = os.path.join(self.output_dir, exp['file'])
147
-
148
- # Tracking for this specific experiment
149
140
  timed_out_methods = set()
150
141
  results = {m['name']: [] for m in methods}
151
142
 
152
- print(f"\n--- Starting Experiment: Varying {vary_type} ---")
153
- print(f"Time limit: {time_limit}s | Output: {filename}")
154
-
155
143
  for val in val_range:
156
- # 1. Generate synthetic data based on vary_param
157
144
  if vary_type == 'features':
158
145
  n_samples, n_features = exp['fixed_val'], val
159
146
  else:
@@ -162,62 +149,27 @@ class FSEVAL:
162
149
  try:
163
150
  X = np.random.rand(n_samples, n_features)
164
151
  except MemoryError:
165
- print(f" FATAL: MemoryError: Failed to allocate {n_samples}x{n_features} data.")
166
152
  for m in methods:
167
153
  results[m['name']].append(-1 if m['name'] in timed_out_methods else np.nan)
168
154
  continue
169
155
 
170
- # 2. Run each method
171
156
  for m_info in methods:
172
157
  name = m_info['name']
173
158
  func = m_info['func']
174
-
175
- # Check if method has already timed out in this experiment
176
159
  if name in timed_out_methods:
177
160
  results[name].append(-1)
178
161
  continue
179
162
 
180
163
  try:
181
164
  start_time = time.time()
182
-
183
- # Execute the method (assuming benchmark format)
184
165
  func(X)
185
-
186
166
  duration = time.time() - start_time
187
-
188
167
  if duration > time_limit:
189
- print(f" - {name:<18}: {duration:.4f}s (TIMEOUT - skipping future runs)")
190
168
  timed_out_methods.add(name)
191
- else:
192
- print(f" - {name:<18}: {duration:.4f}s")
193
-
194
169
  results[name].append(duration)
195
-
196
- except Exception as e:
197
- print(f" - {name:<18}: FAILED ({type(e).__name__})")
170
+ except Exception:
198
171
  results[name].append(np.nan)
199
172
 
200
- # 3. Save results to CSV
201
- try:
202
- df_results = pd.DataFrame.from_dict(results, orient='index', columns=list(val_range))
203
- df_results.index.name = 'Method'
204
- df_results.to_csv(filename)
205
- print(f"\n--- Results saved to {filename} ---")
206
- except Exception as e:
207
- print(f"\n--- FAILED to save results: {e} ---")
208
-
209
-
210
- def _save_results(self, method_name, ds_results):
211
- """Aggregates repeats and saves to disk after each dataset."""
212
- for scale, metrics in ds_results.items():
213
- for met_name, rows in metrics.items():
214
- df_new = pd.DataFrame(rows).groupby('Dataset').mean().reset_index()
215
- fname = os.path.join(self.output_dir, f"{method_name}_{met_name}_{scale}.csv")
216
-
217
- if os.path.exists(fname):
218
- df_old = pd.read_csv(fname)
219
- df_final = pd.concat([df_old, df_new]).drop_duplicates(subset=['Dataset'], keep='last')
220
- else:
221
- df_final = df_new
222
-
223
- df_final.to_csv(fname, index=False)
173
+ df_results = pd.DataFrame.from_dict(results, orient='index', columns=list(val_range))
174
+ df_results.index.name = 'Method'
175
+ df_results.to_csv(filename)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdufseval
3
- Version: 1.0.4
3
+ Version: 1.0.6
4
4
  Summary: Evaluation and Benchmark Tool for Feature Selection
5
5
  Project-URL: Homepage, https://github.com/mrajabinasab/FSEVAL
6
6
  Project-URL: Bug Tracker, https://github.com/mrajabinasab/FSEVAL/issues
@@ -124,6 +124,7 @@ Initializes the evalutation and benchmark object.
124
124
  | **`eval_type`** | both | "supervised", "unsupervised", or "both". |
125
125
  | **`metrics`** | ["CLSACC", "NMI", "ACC", "AUC"] | Evaluation metrics to calculate. |
126
126
  | **`experiments`** | ["10Percent", "100Percent"] | Which feature ratio grids to evaluate. |
127
+ | **`save_all`** | False | Save the results of all runs of the stochastic methods separately. |
127
128
 
128
129
  ### ⚙️ `run(datasets, methods, classifier=None)`
129
130
 
@@ -0,0 +1,7 @@
1
+ sdufseval/__init__.py,sha256=KIZIPxldHV2TLZVTW1jP6L8-tDw7-iMgmW-e-DeTdvo,271
2
+ sdufseval/eval.py,sha256=445pNvgOo_fcdOKNElDsflDZ6iptxPRKGz5wuxLYPUE,2573
3
+ sdufseval/fseval.py,sha256=VNFFYB72GCesInKeFJftGDg9pd2eI75gkArIPQrf5sY,7594
4
+ sdufseval/loader.py,sha256=YUMSAdi2zcg2MizcGlnCxhsV5Y5cikL1hfk5ofwaI6s,2286
5
+ sdufseval-1.0.6.dist-info/METADATA,sha256=Ja5jQbLYP9Fkb5BnZxQ6S1YiM4BkiD4SEYVIIGtgDwI,5892
6
+ sdufseval-1.0.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
7
+ sdufseval-1.0.6.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- sdufseval/__init__.py,sha256=KIZIPxldHV2TLZVTW1jP6L8-tDw7-iMgmW-e-DeTdvo,271
2
- sdufseval/eval.py,sha256=445pNvgOo_fcdOKNElDsflDZ6iptxPRKGz5wuxLYPUE,2573
3
- sdufseval/fseval.py,sha256=LPBKfXEj6lHSKPSEVv1b9U7V_7jXNBWpd5snD355_Rc,9513
4
- sdufseval/loader.py,sha256=YUMSAdi2zcg2MizcGlnCxhsV5Y5cikL1hfk5ofwaI6s,2286
5
- sdufseval-1.0.4.dist-info/METADATA,sha256=1qm0ZQSe4UhcAazHwOdLdoIISx3QpxhKI4qPbtPswOU,5796
6
- sdufseval-1.0.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
7
- sdufseval-1.0.4.dist-info/RECORD,,