sdufseval 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sdufseval/fseval.py CHANGED
@@ -17,16 +17,16 @@ class FSEVAL:
17
17
  unsupervised_iter=10,
18
18
  eval_type="both",
19
19
  metrics=None,
20
- experiments=None):
21
- """
22
- Feature Selection Evaluation Suite.
23
- """
20
+ experiments=None,
21
+ save_all=False):
22
+
24
23
  self.output_dir = output_dir
25
24
  self.cv = cv
26
25
  self.avg_steps = avg_steps
27
26
  self.supervised_iter = supervised_iter
28
27
  self.unsupervised_iter = unsupervised_iter
29
28
  self.eval_type = eval_type
29
+ self.save_all = save_all
30
30
 
31
31
  # Metric configuration
32
32
  all_metrics = ["CLSACC", "NMI", "ACC", "AUC"]
@@ -44,21 +44,11 @@ class FSEVAL:
44
44
  os.makedirs(self.output_dir)
45
45
 
46
46
  def random_baseline(self, X, **kwargs):
47
- """
48
- Randomly assigns importance scores to features.
49
- Internal method for lower-bound baseline.
50
- """
47
+ """Randomly assigns importance scores to features."""
51
48
  return np.random.rand(X.shape[1])
52
49
 
53
50
  def run(self, datasets, methods, classifier=None):
54
- """
55
- Executes the benchmark for given datasets and FS methods.
56
-
57
- Args:
58
- datasets: List of dataset names.
59
- methods: List of dicts {'name': str, 'func': callable, 'stochastic': bool}.
60
- classifier: Optional sklearn classifier instance to pass to supervised_eval.
61
- """
51
+ """Executes the benchmark for given datasets and FS methods."""
62
52
  warnings.filterwarnings("ignore")
63
53
  for ds_name in datasets:
64
54
  print(f"\n>>> Benchmarking Dataset: {ds_name}")
@@ -71,7 +61,6 @@ class FSEVAL:
71
61
  for m_info in methods:
72
62
  name = m_info['name']
73
63
  fs_func = m_info['func']
74
- # Stochastic methods run 10 times and average
75
64
  repeats = self.avg_steps if m_info.get('stochastic', False) else 1
76
65
 
77
66
  # Internal storage for current dataset results
@@ -80,7 +69,6 @@ class FSEVAL:
80
69
  for r in range(repeats):
81
70
  print(f" [{name}] Progress: {r+1}/{repeats}")
82
71
 
83
- # Get feature ranking
84
72
  scores = fs_func(X)
85
73
  indices = np.argsort(scores)[::-1]
86
74
 
@@ -91,17 +79,14 @@ class FSEVAL:
91
79
  k = max(1, min(math.ceil(p * n_features), n_features))
92
80
  X_subset = X[:, indices[:k]]
93
81
 
94
- # Run evaluators
95
82
  c_acc, nmi, acc, auc = np.nan, np.nan, np.nan, np.nan
96
83
 
97
84
  if self.eval_type in ["unsupervised", "both"]:
98
85
  c_acc, nmi = unsupervised_eval(X_subset, y, avg_steps=self.unsupervised_iter)
99
86
 
100
87
  if self.eval_type in ["supervised", "both"]:
101
- # Passes classifier (None or instance) to eval.py
102
88
  acc, auc = supervised_eval(X_subset, y, classifier=classifier, cv=self.cv, avg_steps=self.supervised_iter)
103
89
 
104
- # Map metrics to columns
105
90
  mapping = {"CLSACC": c_acc, "NMI": nmi, "ACC": acc, "AUC": auc}
106
91
  for met in self.selected_metrics:
107
92
  row[met][p] = mapping[met]
@@ -109,21 +94,34 @@ class FSEVAL:
109
94
  for met in self.selected_metrics:
110
95
  ds_results[scale_name][met].append(row[met])
111
96
 
112
- # Save/Update results for this method/dataset
113
97
  self._save_results(name, ds_results)
114
98
 
115
-
99
+ def _save_results(self, method_name, ds_results):
100
+ for scale, metrics in ds_results.items():
101
+ for met_name, rows in metrics.items():
102
+ df_new = pd.DataFrame(rows)
103
+
104
+ if not self.save_all:
105
+ df_new = df_new.groupby('Dataset').mean().reset_index()
106
+
107
+ df_new.columns = df_new.columns.astype(str)
108
+
109
+ fname = os.path.join(self.output_dir, f"{method_name}_{met_name}_{scale}.csv")
110
+
111
+ if os.path.exists(fname):
112
+ df_old = pd.read_csv(fname)
113
+ df_old.columns = df_old.columns.astype(str)
114
+
115
+ if self.save_all:
116
+ df_final = pd.concat([df_old, df_new], ignore_index=True)
117
+ else:
118
+ df_final = pd.concat([df_old, df_new]).drop_duplicates(subset=['Dataset'], keep='last')
119
+ else:
120
+ df_final = df_new
121
+
122
+ df_final.to_csv(fname, index=False)
123
+
116
124
  def timer(self, methods, vary_param='both', time_limit=3600):
117
- """
118
- Runs a standalone runtime analysis experiment with a time cap.
119
-
120
- Args:
121
- methods: List of dicts {'name': str, 'func': callable}.
122
- vary_param: 'features', 'instances', or 'both'.
123
- time_limit: Max seconds per method before it is skipped.
124
- """
125
-
126
- # Determine which experiments to run
127
125
  experiments = []
128
126
  if vary_param in ['features', 'both']:
129
127
  experiments.append({
@@ -145,15 +143,11 @@ class FSEVAL:
145
143
  val_range = exp['range']
146
144
  filename = os.path.join(self.output_dir, exp['file'])
147
145
 
148
- # Tracking for this specific experiment
149
146
  timed_out_methods = set()
150
147
  results = {m['name']: [] for m in methods}
151
148
 
152
149
  print(f"\n--- Starting Experiment: Varying {vary_type} ---")
153
- print(f"Time limit: {time_limit}s | Output: {filename}")
154
-
155
150
  for val in val_range:
156
- # 1. Generate synthetic data based on vary_param
157
151
  if vary_type == 'features':
158
152
  n_samples, n_features = exp['fixed_val'], val
159
153
  else:
@@ -162,62 +156,32 @@ class FSEVAL:
162
156
  try:
163
157
  X = np.random.rand(n_samples, n_features)
164
158
  except MemoryError:
165
- print(f" FATAL: MemoryError: Failed to allocate {n_samples}x{n_features} data.")
166
159
  for m in methods:
167
160
  results[m['name']].append(-1 if m['name'] in timed_out_methods else np.nan)
168
161
  continue
169
162
 
170
- # 2. Run each method
171
163
  for m_info in methods:
172
164
  name = m_info['name']
173
165
  func = m_info['func']
174
166
 
175
- # Check if method has already timed out in this experiment
176
167
  if name in timed_out_methods:
177
168
  results[name].append(-1)
178
169
  continue
179
170
 
180
171
  try:
181
172
  start_time = time.time()
182
-
183
- # Execute the method (assuming benchmark format)
184
173
  func(X)
185
-
186
174
  duration = time.time() - start_time
187
175
 
188
176
  if duration > time_limit:
189
- print(f" - {name:<18}: {duration:.4f}s (TIMEOUT - skipping future runs)")
177
+ print(f" - {name:<18}: {duration:.4f}s (TIMEOUT)")
190
178
  timed_out_methods.add(name)
191
179
  else:
192
180
  print(f" - {name:<18}: {duration:.4f}s")
193
-
194
181
  results[name].append(duration)
195
-
196
182
  except Exception as e:
197
- print(f" - {name:<18}: FAILED ({type(e).__name__})")
198
183
  results[name].append(np.nan)
199
184
 
200
- # 3. Save results to CSV
201
- try:
202
- df_results = pd.DataFrame.from_dict(results, orient='index', columns=list(val_range))
203
- df_results.index.name = 'Method'
204
- df_results.to_csv(filename)
205
- print(f"\n--- Results saved to {filename} ---")
206
- except Exception as e:
207
- print(f"\n--- FAILED to save results: {e} ---")
208
-
209
-
210
- def _save_results(self, method_name, ds_results):
211
- """Aggregates repeats and saves to disk after each dataset."""
212
- for scale, metrics in ds_results.items():
213
- for met_name, rows in metrics.items():
214
- df_new = pd.DataFrame(rows).groupby('Dataset').mean().reset_index()
215
- fname = os.path.join(self.output_dir, f"{method_name}_{met_name}_{scale}.csv")
216
-
217
- if os.path.exists(fname):
218
- df_old = pd.read_csv(fname)
219
- df_final = pd.concat([df_old, df_new]).drop_duplicates(subset=['Dataset'], keep='last')
220
- else:
221
- df_final = df_new
222
-
223
- df_final.to_csv(fname, index=False)
185
+ df_results = pd.DataFrame.from_dict(results, orient='index', columns=list(val_range))
186
+ df_results.index.name = 'Method'
187
+ df_results.to_csv(filename)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdufseval
3
- Version: 1.0.3
3
+ Version: 1.0.5
4
4
  Summary: Evaluation and Benchmark Tool for Feature Selection
5
5
  Project-URL: Homepage, https://github.com/mrajabinasab/FSEVAL
6
6
  Project-URL: Bug Tracker, https://github.com/mrajabinasab/FSEVAL/issues
@@ -124,6 +124,7 @@ Initializes the evalutation and benchmark object.
124
124
  | **`eval_type`** | both | "supervised", "unsupervised", or "both". |
125
125
  | **`metrics`** | ["CLSACC", "NMI", "ACC", "AUC"] | Evaluation metrics to calculate. |
126
126
  | **`experiments`** | ["10Percent", "100Percent"] | Which feature ratio grids to evaluate. |
127
+ | **`save_all`** | False | Save the results of all runs of the stochastic methods separately. |
127
128
 
128
129
  ### ⚙️ `run(datasets, methods, classifier=None)`
129
130
 
@@ -0,0 +1,7 @@
1
+ sdufseval/__init__.py,sha256=KIZIPxldHV2TLZVTW1jP6L8-tDw7-iMgmW-e-DeTdvo,271
2
+ sdufseval/eval.py,sha256=445pNvgOo_fcdOKNElDsflDZ6iptxPRKGz5wuxLYPUE,2573
3
+ sdufseval/fseval.py,sha256=MsirV6niLvyQzz95qtPBJ3zaI9XRFdGj1pYIuTw4F68,7864
4
+ sdufseval/loader.py,sha256=YUMSAdi2zcg2MizcGlnCxhsV5Y5cikL1hfk5ofwaI6s,2286
5
+ sdufseval-1.0.5.dist-info/METADATA,sha256=YhZY5IpbbWaUkKZWMCJ_bbbR5VtXG8iKXfjOFWAGMAs,5892
6
+ sdufseval-1.0.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
7
+ sdufseval-1.0.5.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- sdufseval/__init__.py,sha256=KIZIPxldHV2TLZVTW1jP6L8-tDw7-iMgmW-e-DeTdvo,271
2
- sdufseval/eval.py,sha256=445pNvgOo_fcdOKNElDsflDZ6iptxPRKGz5wuxLYPUE,2573
3
- sdufseval/fseval.py,sha256=kEsOlZ9B6yPZ0-SK6l_9LY08eTOyz4sd26TfXAANGAg,9511
4
- sdufseval/loader.py,sha256=YUMSAdi2zcg2MizcGlnCxhsV5Y5cikL1hfk5ofwaI6s,2286
5
- sdufseval-1.0.3.dist-info/METADATA,sha256=-NnHEAUQmru6sdBDuDQJhX9znIkF_jvtLxel7NYNIfw,5796
6
- sdufseval-1.0.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
7
- sdufseval-1.0.3.dist-info/RECORD,,