PyPI - sdufseval - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl - Mend

sdufseval 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

sdufseval/fseval.py +104 -4
{sdufseval-1.0.1.dist-info → sdufseval-1.0.3.dist-info}/METADATA +30 -7
sdufseval-1.0.3.dist-info/RECORD +7 -0
sdufseval-1.0.1.dist-info/RECORD +0 -7
{sdufseval-1.0.1.dist-info → sdufseval-1.0.3.dist-info}/WHEEL +0 -0

sdufseval/fseval.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import math
+import time
 import warnings
 import numpy as np
 import pandas as pd
@@ -11,7 +12,9 @@ class FSEVAL:
     def __init__(self,
                  output_dir="results",
                  cv=5,
-                 avg_steps=10,
+                 avg_steps=10,
+                 supervised_iter=5,
+                 unsupervised_iter=10,
                  eval_type="both",
                  metrics=None,
                  experiments=None):
@@ -21,6 +24,8 @@ class FSEVAL:
         self.output_dir = output_dir
         self.cv = cv
         self.avg_steps = avg_steps
+        self.supervised_iter = supervised_iter
+        self.unsupervised_iter = unsupervised_iter
         self.eval_type = eval_type
         # Metric configuration
@@ -67,7 +72,7 @@ class FSEVAL:
                 name = m_info['name']
                 fs_func = m_info['func']
                 # Stochastic methods run 10 times and average
-                repeats = 10 if m_info.get('stochastic', False) else 1
+                repeats = self.avg_steps if m_info.get('stochastic', False) else 1
                 # Internal storage for current dataset results
                 ds_results = {s: {met: [] for met in self.selected_metrics} for s in self.scales}
@@ -90,11 +95,11 @@ class FSEVAL:
                             c_acc, nmi, acc, auc = np.nan, np.nan, np.nan, np.nan
                             if self.eval_type in ["unsupervised", "both"]:
-                                c_acc, nmi = unsupervised_eval(X_subset, y, avg_steps=self.avg_steps)
+                                c_acc, nmi = unsupervised_eval(X_subset, y, avg_steps=self.unsupervised_iter)
                             if self.eval_type in ["supervised", "both"]:
                                 # Passes classifier (None or instance) to eval.py
-                                acc, auc = supervised_eval(X_subset, y, classifier=classifier, cv=self.cv, avg_steps=self.avg_steps)
+                                acc, auc = supervised_eval(X_subset, y, classifier=classifier, cv=self.cv, avg_steps=self.supervised_iter)
                             # Map metrics to columns
                             mapping = {"CLSACC": c_acc, "NMI": nmi, "ACC": acc, "AUC": auc}
@@ -107,6 +112,101 @@ class FSEVAL:
                 # Save/Update results for this method/dataset
                 self._save_results(name, ds_results)
+    def timer(self, methods, vary_param='both', time_limit=3600):
+        """
+        Runs a standalone runtime analysis experiment with a time cap.
+        Args:
+            methods: List of dicts {'name': str, 'func': callable}.
+            vary_param: 'features', 'instances', or 'both'.
+            time_limit: Max seconds per method before it is skipped.
+        """
+        # Determine which experiments to run
+        experiments = []
+        if vary_param in ['features', 'both']:
+            experiments.append({
+                'name': 'features',
+                'fixed_val': 100,
+                'range': range(1000, 20001, 500),
+                'file': 'time_analysis_features.csv'
+            })
+        if vary_param in ['instances', 'both']:
+            experiments.append({
+                'name': 'instances',
+                'fixed_val': 100,
+                'range': range(1000, 20001, 500),
+                'file': 'time_analysis_instances.csv'
+            })
+        for exp in experiments:
+            vary_type = exp['name']
+            val_range = exp['range']
+            filename = os.path.join(self.output_dir, exp['file'])
+            # Tracking for this specific experiment
+            timed_out_methods = set()
+            results = {m['name']: [] for m in methods}
+            print(f"\n--- Starting Experiment: Varying {vary_type} ---")
+            print(f"Time limit: {time_limit}s | Output: {filename}")
+            for val in val_range:
+                # 1. Generate synthetic data based on vary_param
+                if vary_type == 'features':
+                    n_samples, n_features = exp['fixed_val'], val
+                else:
+                    n_samples, n_features = val, exp['fixed_val']
+                try:
+                    X = np.random.rand(n_samples, n_features)
+                except MemoryError:
+                    print(f"  FATAL: MemoryError: Failed to allocate {n_samples}x{n_features} data.")
+                    for m in methods:
+                        results[m['name']].append(-1 if m['name'] in timed_out_methods else np.nan)
+                    continue
+                # 2. Run each method
+                for m_info in methods:
+                    name = m_info['name']
+                    func = m_info['func']
+                    # Check if method has already timed out in this experiment
+                    if name in timed_out_methods:
+                        results[name].append(-1)
+                        continue
+                    try:
+                        start_time = time.time()
+                        # Execute the method (assuming benchmark format)
+                        func(X)
+                        duration = time.time() - start_time
+                        if duration > time_limit:
+                            print(f"  - {name:<18}: {duration:.4f}s (TIMEOUT - skipping future runs)")
+                            timed_out_methods.add(name)
+                        else:
+                            print(f"  - {name:<18}: {duration:.4f}s")
+                        results[name].append(duration)
+                    except Exception as e:
+                        print(f"  - {name:<18}: FAILED ({type(e).__name__})")
+                        results[name].append(np.nan)
+            # 3. Save results to CSV
+            try:
+                df_results = pd.DataFrame.from_dict(results, orient='index', columns=list(val_range))
+                df_results.index.name = 'Method'
+                df_results.to_csv(filename)
+                print(f"\n--- Results saved to {filename} ---")
+            except Exception as e:
+                print(f"\n--- FAILED to save results: {e} ---")
     def _save_results(self, method_name, ds_results):
         """Aggregates repeats and saves to disk after each dataset."""
         for scale, metrics in ds_results.items():

{sdufseval-1.0.1.dist-info → sdufseval-1.0.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sdufseval
-Version: 1.0.1
+Version: 1.0.3
 Summary: Evaluation and Benchmark Tool for Feature Selection
 Project-URL: Homepage, https://github.com/mrajabinasab/FSEVAL
 Project-URL: Bug Tracker, https://github.com/mrajabinasab/FSEVAL/issues
@@ -62,7 +62,7 @@ import numpy as np
 if __name__ == "__main__":
-    # The 23 real datasets
+    # The 23 benchmark datasets
     DATASETS_TO_RUN = [
         'ALLAML', 'CLL_SUB_111', 'COIL20', 'Carcinom', 'GLIOMA', 'GLI_85',
         'Isolet', 'ORL', 'Prostate_GE', 'SMK_CAN_187', 'TOX_171', 'Yale',
@@ -73,7 +73,7 @@ if __name__ == "__main__":
     # Initialize FSEVAL
     evaluator = FSEVAL(output_dir="benchmark_results", avg_steps=10)
-    # Configuration for methods using the class internal random_baseline
+    # Configuration for methods
     methods_list = [
         {
             'name': 'Random',
@@ -87,8 +87,19 @@ if __name__ == "__main__":
         }
     ]
-    # Run Benchmark (Defaults to RF)
+    # --- 1. Run Standard Benchmark ---
+    # Evaluates methods on real-world datasets across different feature scales
     evaluator.run(DATASETS_TO_RUN, methods_list)
+    # --- 2. Run Runtime Analysis ---
+    # Performs scalability testing on synthetic data with a time cap.
+    # vary_param='both' triggers both 'features' and 'instances' experiments.
+    print("\n>>> Starting Scalability Analysis...")
+    evaluator.timer(
+        methods=methods_list,
+        vary_param='both',
+        time_limit=3600  # 1 hour limit
+    )
 ```
 ## Data Loading
@@ -107,9 +118,11 @@ Initializes the evalutation and benchmark object.
 | :--- | :--- | :--- |
 | **`output_dir`** | results | Folder where CSV result files are saved. |
 | **`cv`** | 5 | Cross-validation folds (supervised only). |
-| **`avg_steps`** | 10 | Number of random restarts / seeds to average over. |
-| **`eval_type`** | both | Number of random restarts / seeds to average over. |
-| **`metrics`** | ["CLSACC", "NMI", "ACC", "AUC"] | "supervised", "unsupervised", or "both". |
+| **`avg_steps`** | 10 | Number of repetitions for stochastic methods.|
+| **`supervised_iter`** | 5 | Number of classifier's runs with different random seeds.|
+| **`unsupervised_iter`** | 10 | Number of clustering runs with different random seeds.|
+| **`eval_type`** | both | "supervised", "unsupervised", or "both". |
+| **`metrics`** | ["CLSACC", "NMI", "ACC", "AUC"] | Evaluation metrics to calculate. |
 | **`experiments`** | ["10Percent", "100Percent"] | Which feature ratio grids to evaluate. |
 ### ⚙️ `run(datasets, methods, classifier=None)`
@@ -122,6 +135,16 @@ Initializes the evalutation and benchmark object.
 | **`methods`** | List[dict] | "[{""name"": str, ""func"": callable, ""stochastic"": bool}, ...]" |
 | **`classifier`** | sklearn classifier | Classifier for supervised eval (default: RandomForestClassifier) |
+### ⚙️ `timer(methods, vary_param='features', time_limit=3600)`
+Runs a runtime analysis on the methods.
+| Argument | Type | Description |
+| :--- | :--- | :--- |
+| **`methods`** | List[dict] | "[{""name"": str, ""func"": callable, ""stochastic"": bool}, ...]" |
+| **`vary_param`** | ["CLSACC", "NMI", "ACC", "AUC"] | "features", "instances", or "both". |
+| **`time_limit`** | 3600 | Terminate the method after reecording first time it exceeds this limit. |
 #  Dashboard
 There is a Feature Selection Evaluation Dashboard based on the benchmarks provided by FSEVAL, available on:

sdufseval-1.0.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+sdufseval/__init__.py,sha256=KIZIPxldHV2TLZVTW1jP6L8-tDw7-iMgmW-e-DeTdvo,271
+sdufseval/eval.py,sha256=445pNvgOo_fcdOKNElDsflDZ6iptxPRKGz5wuxLYPUE,2573
+sdufseval/fseval.py,sha256=kEsOlZ9B6yPZ0-SK6l_9LY08eTOyz4sd26TfXAANGAg,9511
+sdufseval/loader.py,sha256=YUMSAdi2zcg2MizcGlnCxhsV5Y5cikL1hfk5ofwaI6s,2286
+sdufseval-1.0.3.dist-info/METADATA,sha256=-NnHEAUQmru6sdBDuDQJhX9znIkF_jvtLxel7NYNIfw,5796
+sdufseval-1.0.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+sdufseval-1.0.3.dist-info/RECORD,,

sdufseval-1.0.1.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-sdufseval/__init__.py,sha256=KIZIPxldHV2TLZVTW1jP6L8-tDw7-iMgmW-e-DeTdvo,271
-sdufseval/eval.py,sha256=445pNvgOo_fcdOKNElDsflDZ6iptxPRKGz5wuxLYPUE,2573
-sdufseval/fseval.py,sha256=n2t9cyDY8CbaphGaPJLJtuLT_uPvv-F0CQ1DK8A05Ig,5293
-sdufseval/loader.py,sha256=YUMSAdi2zcg2MizcGlnCxhsV5Y5cikL1hfk5ofwaI6s,2286
-sdufseval-1.0.1.dist-info/METADATA,sha256=pfRrB7uzkN4_b5TtllYl_vHBNNAV0Pncu1YS6lAmaGg,4768
-sdufseval-1.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-sdufseval-1.0.1.dist-info/RECORD,,

{sdufseval-1.0.1.dist-info → sdufseval-1.0.3.dist-info}/WHEEL RENAMED Viewed

File without changes

sdufseval 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

sdufseval 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl