sdufseval 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sdufseval/fseval.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import math
3
+ import time
3
4
  import warnings
4
5
  import numpy as np
5
6
  import pandas as pd
@@ -11,7 +12,9 @@ class FSEVAL:
11
12
  def __init__(self,
12
13
  output_dir="results",
13
14
  cv=5,
14
- avg_steps=10,
15
+ avg_steps=10,
16
+ supervised_iter=5,
17
+ unsupervised_iter=10,
15
18
  eval_type="both",
16
19
  metrics=None,
17
20
  experiments=None):
@@ -21,6 +24,8 @@ class FSEVAL:
21
24
  self.output_dir = output_dir
22
25
  self.cv = cv
23
26
  self.avg_steps = avg_steps
27
+ self.supervised_iter = supervised_iter
28
+ self.unsupervised_iter = unsupervised_iter
24
29
  self.eval_type = eval_type
25
30
 
26
31
  # Metric configuration
@@ -67,7 +72,7 @@ class FSEVAL:
67
72
  name = m_info['name']
68
73
  fs_func = m_info['func']
69
74
  # Stochastic methods run 10 times and average
70
- repeats = 10 if m_info.get('stochastic', False) else 1
75
+ repeats = self.avg_steps if m_info.get('stochastic', False) else 1
71
76
 
72
77
  # Internal storage for current dataset results
73
78
  ds_results = {s: {met: [] for met in self.selected_metrics} for s in self.scales}
@@ -90,11 +95,11 @@ class FSEVAL:
90
95
  c_acc, nmi, acc, auc = np.nan, np.nan, np.nan, np.nan
91
96
 
92
97
  if self.eval_type in ["unsupervised", "both"]:
93
- c_acc, nmi = unsupervised_eval(X_subset, y, avg_steps=self.avg_steps)
98
+ c_acc, nmi = unsupervised_eval(X_subset, y, avg_steps=self.unsupervised_iter)
94
99
 
95
100
  if self.eval_type in ["supervised", "both"]:
96
101
  # Passes classifier (None or instance) to eval.py
97
- acc, auc = supervised_eval(X_subset, y, classifier=classifier, cv=self.cv, avg_steps=self.avg_steps)
102
+ acc, auc = supervised_eval(X_subset, y, classifier=classifier, cv=self.cv, avg_steps=self.supervised_iter)
98
103
 
99
104
  # Map metrics to columns
100
105
  mapping = {"CLSACC": c_acc, "NMI": nmi, "ACC": acc, "AUC": auc}
@@ -107,6 +112,101 @@ class FSEVAL:
107
112
  # Save/Update results for this method/dataset
108
113
  self._save_results(name, ds_results)
109
114
 
115
+
116
+ def timer(self, methods, vary_param='both', time_limit=3600):
117
+ """
118
+ Runs a standalone runtime analysis experiment with a time cap.
119
+
120
+ Args:
121
+ methods: List of dicts {'name': str, 'func': callable}.
122
+ vary_param: 'features', 'instances', or 'both'.
123
+ time_limit: Max seconds per method before it is skipped.
124
+ """
125
+
126
+ # Determine which experiments to run
127
+ experiments = []
128
+ if vary_param in ['features', 'both']:
129
+ experiments.append({
130
+ 'name': 'features',
131
+ 'fixed_val': 100,
132
+ 'range': range(1000, 20001, 500),
133
+ 'file': 'time_analysis_features.csv'
134
+ })
135
+ if vary_param in ['instances', 'both']:
136
+ experiments.append({
137
+ 'name': 'instances',
138
+ 'fixed_val': 100,
139
+ 'range': range(1000, 20001, 500),
140
+ 'file': 'time_analysis_instances.csv'
141
+ })
142
+
143
+ for exp in experiments:
144
+ vary_type = exp['name']
145
+ val_range = exp['range']
146
+ filename = os.path.join(self.output_dir, exp['file'])
147
+
148
+ # Tracking for this specific experiment
149
+ timed_out_methods = set()
150
+ results = {m['name']: [] for m in methods}
151
+
152
+ print(f"\n--- Starting Experiment: Varying {vary_type} ---")
153
+ print(f"Time limit: {time_limit}s | Output: {filename}")
154
+
155
+ for val in val_range:
156
+ # 1. Generate synthetic data based on vary_param
157
+ if vary_type == 'features':
158
+ n_samples, n_features = exp['fixed_val'], val
159
+ else:
160
+ n_samples, n_features = val, exp['fixed_val']
161
+
162
+ try:
163
+ X = np.random.rand(n_samples, n_features)
164
+ except MemoryError:
165
+ print(f" FATAL: MemoryError: Failed to allocate {n_samples}x{n_features} data.")
166
+ for m in methods:
167
+ results[m['name']].append(-1 if m['name'] in timed_out_methods else np.nan)
168
+ continue
169
+
170
+ # 2. Run each method
171
+ for m_info in methods:
172
+ name = m_info['name']
173
+ func = m_info['func']
174
+
175
+ # Check if method has already timed out in this experiment
176
+ if name in timed_out_methods:
177
+ results[name].append(-1)
178
+ continue
179
+
180
+ try:
181
+ start_time = time.time()
182
+
183
+ # Execute the method (assuming benchmark format)
184
+ func(X)
185
+
186
+ duration = time.time() - start_time
187
+
188
+ if duration > time_limit:
189
+ print(f" - {name:<18}: {duration:.4f}s (TIMEOUT - skipping future runs)")
190
+ timed_out_methods.add(name)
191
+ else:
192
+ print(f" - {name:<18}: {duration:.4f}s")
193
+
194
+ results[name].append(duration)
195
+
196
+ except Exception as e:
197
+ print(f" - {name:<18}: FAILED ({type(e).__name__})")
198
+ results[name].append(np.nan)
199
+
200
+ # 3. Save results to CSV
201
+ try:
202
+ df_results = pd.DataFrame.from_dict(results, orient='index', columns=list(val_range))
203
+ df_results.index.name = 'Method'
204
+ df_results.to_csv(filename)
205
+ print(f"\n--- Results saved to {filename} ---")
206
+ except Exception as e:
207
+ print(f"\n--- FAILED to save results: {e} ---")
208
+
209
+
110
210
  def _save_results(self, method_name, ds_results):
111
211
  """Aggregates repeats and saves to disk after each dataset."""
112
212
  for scale, metrics in ds_results.items():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdufseval
3
- Version: 1.0.1
3
+ Version: 1.0.3
4
4
  Summary: Evaluation and Benchmark Tool for Feature Selection
5
5
  Project-URL: Homepage, https://github.com/mrajabinasab/FSEVAL
6
6
  Project-URL: Bug Tracker, https://github.com/mrajabinasab/FSEVAL/issues
@@ -62,7 +62,7 @@ import numpy as np
62
62
 
63
63
  if __name__ == "__main__":
64
64
 
65
- # The 23 real datasets
65
+ # The 23 benchmark datasets
66
66
  DATASETS_TO_RUN = [
67
67
  'ALLAML', 'CLL_SUB_111', 'COIL20', 'Carcinom', 'GLIOMA', 'GLI_85',
68
68
  'Isolet', 'ORL', 'Prostate_GE', 'SMK_CAN_187', 'TOX_171', 'Yale',
@@ -73,7 +73,7 @@ if __name__ == "__main__":
73
73
  # Initialize FSEVAL
74
74
  evaluator = FSEVAL(output_dir="benchmark_results", avg_steps=10)
75
75
 
76
- # Configuration for methods using the class internal random_baseline
76
+ # Configuration for methods
77
77
  methods_list = [
78
78
  {
79
79
  'name': 'Random',
@@ -87,8 +87,19 @@ if __name__ == "__main__":
87
87
  }
88
88
  ]
89
89
 
90
- # Run Benchmark (Defaults to RF)
90
+ # --- 1. Run Standard Benchmark ---
91
+ # Evaluates methods on real-world datasets across different feature scales
91
92
  evaluator.run(DATASETS_TO_RUN, methods_list)
93
+
94
+ # --- 2. Run Runtime Analysis ---
95
+ # Performs scalability testing on synthetic data with a time cap.
96
+ # vary_param='both' triggers both 'features' and 'instances' experiments.
97
+ print("\n>>> Starting Scalability Analysis...")
98
+ evaluator.timer(
99
+ methods=methods_list,
100
+ vary_param='both',
101
+ time_limit=3600 # 1 hour limit
102
+ )
92
103
  ```
93
104
 
94
105
  ## Data Loading
@@ -107,9 +118,11 @@ Initializes the evalutation and benchmark object.
107
118
  | :--- | :--- | :--- |
108
119
  | **`output_dir`** | results | Folder where CSV result files are saved. |
109
120
  | **`cv`** | 5 | Cross-validation folds (supervised only). |
110
- | **`avg_steps`** | 10 | Number of random restarts / seeds to average over. |
111
- | **`eval_type`** | both | Number of random restarts / seeds to average over. |
112
- | **`metrics`** | ["CLSACC", "NMI", "ACC", "AUC"] | "supervised", "unsupervised", or "both". |
121
+ | **`avg_steps`** | 10 | Number of repetitions for stochastic methods.|
122
+ | **`supervised_iter`** | 5 | Number of classifier's runs with different random seeds.|
123
+ | **`unsupervised_iter`** | 10 | Number of clustering runs with different random seeds.|
124
+ | **`eval_type`** | both | "supervised", "unsupervised", or "both". |
125
+ | **`metrics`** | ["CLSACC", "NMI", "ACC", "AUC"] | Evaluation metrics to calculate. |
113
126
  | **`experiments`** | ["10Percent", "100Percent"] | Which feature ratio grids to evaluate. |
114
127
 
115
128
  ### ⚙️ `run(datasets, methods, classifier=None)`
@@ -122,6 +135,16 @@ Initializes the evalutation and benchmark object.
122
135
  | **`methods`** | List[dict] | "[{""name"": str, ""func"": callable, ""stochastic"": bool}, ...]" |
123
136
  | **`classifier`** | sklearn classifier | Classifier for supervised eval (default: RandomForestClassifier) |
124
137
 
138
+ ### ⚙️ `timer(methods, vary_param='features', time_limit=3600)`
139
+
140
+ Runs a runtime analysis on the methods.
141
+
142
+ | Argument | Type | Description |
143
+ | :--- | :--- | :--- |
144
+ | **`methods`** | List[dict] | "[{""name"": str, ""func"": callable, ""stochastic"": bool}, ...]" |
145
+ | **`vary_param`** | ["CLSACC", "NMI", "ACC", "AUC"] | "features", "instances", or "both". |
146
+ | **`time_limit`** | 3600 | Terminate the method after reecording first time it exceeds this limit. |
147
+
125
148
  # Dashboard
126
149
 
127
150
  There is a Feature Selection Evaluation Dashboard based on the benchmarks provided by FSEVAL, available on:
@@ -0,0 +1,7 @@
1
+ sdufseval/__init__.py,sha256=KIZIPxldHV2TLZVTW1jP6L8-tDw7-iMgmW-e-DeTdvo,271
2
+ sdufseval/eval.py,sha256=445pNvgOo_fcdOKNElDsflDZ6iptxPRKGz5wuxLYPUE,2573
3
+ sdufseval/fseval.py,sha256=kEsOlZ9B6yPZ0-SK6l_9LY08eTOyz4sd26TfXAANGAg,9511
4
+ sdufseval/loader.py,sha256=YUMSAdi2zcg2MizcGlnCxhsV5Y5cikL1hfk5ofwaI6s,2286
5
+ sdufseval-1.0.3.dist-info/METADATA,sha256=-NnHEAUQmru6sdBDuDQJhX9znIkF_jvtLxel7NYNIfw,5796
6
+ sdufseval-1.0.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
7
+ sdufseval-1.0.3.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- sdufseval/__init__.py,sha256=KIZIPxldHV2TLZVTW1jP6L8-tDw7-iMgmW-e-DeTdvo,271
2
- sdufseval/eval.py,sha256=445pNvgOo_fcdOKNElDsflDZ6iptxPRKGz5wuxLYPUE,2573
3
- sdufseval/fseval.py,sha256=n2t9cyDY8CbaphGaPJLJtuLT_uPvv-F0CQ1DK8A05Ig,5293
4
- sdufseval/loader.py,sha256=YUMSAdi2zcg2MizcGlnCxhsV5Y5cikL1hfk5ofwaI6s,2286
5
- sdufseval-1.0.1.dist-info/METADATA,sha256=pfRrB7uzkN4_b5TtllYl_vHBNNAV0Pncu1YS6lAmaGg,4768
6
- sdufseval-1.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
7
- sdufseval-1.0.1.dist-info/RECORD,,