PyPI - BenchmarkDPFair - Versions diffs - 0.1.0__py3-none-any.whl - Mend

BenchmarkDPFair 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

BenchmarkDPFair/Benchmark/__init__.py +4 -0
BenchmarkDPFair/Benchmark/benchmark.py +282 -0
BenchmarkDPFair/Benchmark/dataconf.py +58 -0
BenchmarkDPFair/Benchmark/utils/__init__.py +0 -0
BenchmarkDPFair/Benchmark/utils/auxiliar.py +94 -0
BenchmarkDPFair/Benchmark/utils/benchmark.py +176 -0
BenchmarkDPFair/Benchmark/utils/inp.py +141 -0
BenchmarkDPFair/Benchmark/utils/pos.py +183 -0
BenchmarkDPFair/Benchmark/utils/pre.py +233 -0
BenchmarkDPFair/Benchmark/utils/types.py +15 -0
BenchmarkDPFair/Benchmark/utils/verifiers.py +102 -0
BenchmarkDPFair/DataGenerator/__init__.py +5 -0
BenchmarkDPFair/DataGenerator/dataconf.py +94 -0
BenchmarkDPFair/DataGenerator/datagen.py +246 -0
BenchmarkDPFair/DataGenerator/utils/verifiers.py +28 -0
BenchmarkDPFair/__init__.py +4 -0
benchmarkdpfair-0.1.0.dist-info/METADATA +165 -0
benchmarkdpfair-0.1.0.dist-info/RECORD +21 -0
benchmarkdpfair-0.1.0.dist-info/WHEEL +5 -0
benchmarkdpfair-0.1.0.dist-info/licenses/LICENSE +21 -0
benchmarkdpfair-0.1.0.dist-info/top_level.txt +1 -0

BenchmarkDPFair/Benchmark/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .benchmark import BenchmarkInfo,benchmark
+from .dataconf import BenchmarkDatasetConfig
+__all__ = ["BenchmarkInfo", "BenchmarkDatasetConfig", "benchmark"]

BenchmarkDPFair/Benchmark/benchmark.py ADDED Viewed

@@ -0,0 +1,282 @@
+import os
+import warnings
+import pandas as pd
+import numpy as np
+import inspect
+from typing import Callable, List, Any, Optional, Tuple, Union
+from sklearn.model_selection import train_test_split
+from tabulate import tabulate
+from .dataconf import BenchmarkDatasetConfig
+from .utils.types import FloatOrTuple, DFTuple
+from .utils.verifiers import check_data_loader, check_splitdata, check_target, read_verification, check_dict
+from .utils.benchmark import Benchmark
+from .utils.auxiliar import save_experiment
+DEFAULT_SEEDS : List[float]= [5,42,253,4112,32645,602627,153073,53453,178753,243421,767707,113647,796969,553067,96797,133843,6977,460403,126613,583879]
+DEFAULT_EPS  : List[float] = [0.05, 0.1, 0.25, 0.5, 0.75, 1, 2, 3, 5, 10, 15, 20]
+DP_ALGORITHM  : str = ""
+class BenchmarkInfo:
+    def __init__(self, dp_method:str, output_dir: str, data_loader: Optional[Callable[..., DFTuple]] = None, dlkwargs: Union[dict, set] = {},
+                 split_data: Optional[FloatOrTuple] = None, normalize: bool = True, seeds: List[float] = DEFAULT_SEEDS,
+                 eps: List[Union[float,int]] = DEFAULT_EPS, classifier: Any = None, classifier_kwargs: Optional[Union[dict,set]] = None):
+        """
+        Set of possible confiigurations for the Benchmark experiments.
+        **In case you do not use our own generator, read the documentation first to understand how the benchmark expects the data to be organized.**
+        Parameters
+        ----------
+        dp_method : str
+            Which DP symthetic data generator was used
+        output_dir : str
+            Directory to save the experiment logs and metrics.
+        data_loader : Callable, optional
+            In case a new data loader needs to be used, refer to the documentation to understand the default data loader's behaviour. data_loader must accept seed as an argument and also kwargs.
+        dlkwargs : dict | set, optional
+            Custom parameters for the data loader.
+        split_data : FloatOrTuple, optional
+            Split distributions used while loading data. If not provided, the final distributions are **0.6, 0.2 and 0.2**, which is `split_data = (0.4, 0.5)`.
+        normalize : bool, optional
+            Allow MinMax normalization of the data. Default is **True**.
+        seeds : List[int], optional
+            List of seeds for the benchmark. Used to increase reproducibility.
+        eps : List[float|int], optional
+            List of DP epsilons (privacy budget) analysed during the benchmark.
+        classifier : Any, optional
+            Custom classifier. **Must implement fit, predict and predict_proba**. Default is [XGBoost](https://xgboost.readthedocs.io/en/stable/).
+        classifier_kwargs : dict | set, optional
+            Custom parameters for the classifier.
+        """
+        self.dp_method    = dp_method
+        self.output_dir   = output_dir
+        self.normalize    = normalize
+        self.seeds = seeds
+        self.eps   = eps
+        global DP_ALGORITHM
+        DP_ALGORITHM = self.dp_method
+        check_splitdata(split_data)
+        self.split = split_data
+        # Wrap user-supplied function with enforcement
+        self.data_loader = check_data_loader(data_loader) if data_loader is not None else self.__data_loader
+        self.custom_loader = False if data_loader is None else True
+        self.dlkwargs = dlkwargs
+        self.classifier = classifier
+        self.classifier_kwargs = classifier_kwargs
+    def dataloader(self, **kwargs) -> DFTuple:
+        """
+        Data loader, by default assumes that within the `baseline_dir` there exists a CSV file with the name set in `filename` parameter.
+        If the `split_data` has been set before, it will look for the file mentioned and split it into three sets following the provided distribution.
+        The split happens sequentially, if two values has been provided to split, the first split (train+test) happens normally, and then the test set is split following the second distribution.
+        If only one number has been provided and no test directory found, the split happens sequentially following the distribution of the test set.
+        **Please refer to the documentation to understand how the default dataloader expects the directory structure to be like.**
+        Parameters
+        ----------
+        data_conf : DatasetConf
+            Configuration of the desired dataset.
+        filename : str
+            The name of the CSV file to load.
+        seed : int
+            The current seed used to load the file and split the data.
+        verbose : bool, optional
+            If `true` prints information on the laoded dataset.
+        extra_processing : Callable, optional
+            Custom (users) porcessing function applied to loaded data. Will be called using kwargs and the loaded data as arguments.
+        kwargs : Any, optional,
+            If an extra processing function is provided, will be forwarded while calling, with the loaded dataset.
+        Returns
+        ----------
+        Three tuple[pd.DataFrame, pd.DataFrame]
+            - A 2-tuple of pandas DataFrames `(X, y)`.
+        """
+        return self.data_loader(**kwargs)
+    @check_data_loader
+    def __data_loader(self, data_conf: BenchmarkDatasetConfig, filename: str, seed: int,  **kwargs) -> DFTuple:
+        return _load_data(data_conf, filename, seed, split=self.split, **kwargs)
+def _load_data(data_conf: BenchmarkDatasetConfig, filename: str, seed: int, epsilon: Optional[float] = None,
+               verbose: bool=True, split: Optional[FloatOrTuple] = None, extra_processing: Optional[Callable] = None, **kwargs) -> DFTuple:
+    if verbose:
+        print(f"** Loading dataset {data_conf.name.upper()} **")
+    if split is None:
+        split = (0.4, 0.5)
+    base, ext = os.path.splitext(filename)
+    base_pattern = base.rsplit("_", 1)
+    if (os.path.dirname(filename)):
+        test_path = os.path.dirname(os.path.dirname(filename)) + "DP-dataset-test/"
+    else:
+        test_path = f"{data_conf.dir}/{data_conf.name}/{DP_ALGORITHM}/DP-dataset-test/"
+        filename = f"{data_conf.dir}/{data_conf.name}/{DP_ALGORITHM}/DP-dataset-{f'epsilon-{epsilon}' if epsilon is not None else 'train'}/{filename}"
+    test_filename = f"{base_pattern[0]}_test{ext}"
+    cols = list(dict.fromkeys(data_conf.usecols + [data_conf.index_col] if data_conf.index_col else data_conf.usecols))
+    ds = pd.read_csv(filename, usecols=lambda col: col in cols)
+    if data_conf.index_col:
+        ds.set_index(data_conf.index_col, inplace=True)
+    # Verify if data was read successfully
+    read_verification(ds, data_conf.usecols)
+    # Apply extra processing to dataset if the user wants it
+    if extra_processing is not None:
+        extra_processing(ds, **kwargs)
+    # Ensure all dataset is numerical
+    for col in data_conf.categorical_cols:
+        if not pd.api.types.is_numeric_dtype(ds[col]):
+            ds[col] = ds[col].astype('category').cat.codes # Int encode
+    X = ds.drop(columns=[data_conf.target])
+    y = ds[data_conf.target]
+    # Split data
+    if not os.path.exists(test_path) or not os.path.exists(test_path + "/" + test_filename):
+        if verbose:
+            train_split_distrib = 1 - split[0] if isinstance(split, Tuple) else split
+            val_split_distrib = split[0] * (1 - split[1]) if isinstance(split, Tuple) else split * (1 - split)
+            test_split_distrib = split[0] * split[1] if isinstance(split, Tuple) else split * split
+            print(f"[WARN] Test directory and/or file with test set not found, the provided {filename} will be split into three sets with distributions {(train_split_distrib, val_split_distrib, test_split_distrib)}.")
+            print(f"       This is the path we are looking for: {test_path + '/' + test_filename}.\n")
+        # No test path found, so split the data from filename
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split[0] if isinstance(split, Tuple) else split, random_state=seed)
+        X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=split[1] if isinstance(split, Tuple) else split, random_state=seed)
+    else:
+        X_train = X
+        y_train = y
+        test_ds = pd.read_csv(test_path + "/" + test_filename, usecols=lambda col: col in cols)
+        if data_conf.index_col:
+            test_ds.set_index(data_conf.index_col, inplace=True)
+        # Verify if data was read successfully
+        read_verification(test_ds, data_conf.usecols)
+        # Apply extra processing to dataset if the user wants it
+        if extra_processing is not None:
+            extra_processing(test_ds, **kwargs)
+        X_test = test_ds.drop(columns=[data_conf.target])
+        y_test = test_ds[data_conf.target]
+        if isinstance(split, Tuple):
+            print(f"[WARN] You provided a tuple {split} of splitting distribution and a test directory and file has been found in {test_path}, the second value of the tuple will be used.\n")
+        X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=split[1] if isinstance(split, Tuple) else split, random_state=seed)
+    if verbose:
+        data = [
+            ["X_train", X_train.shape],
+            ["X_val",   X_val.shape],
+            ["X_test",  X_test.shape],
+            ["y_train", y_train.shape],
+            ["y_val",   y_val.shape],
+            ["y_test",  y_test.shape],
+        ]
+        print("\n#### Data  Information ####")
+        print(tabulate(data, headers=["Dataset", "Shape"], tablefmt="github"))
+        print("###########################\n")
+    # Check that the target column is binary
+    check_target(y_train, data_conf.target)
+    check_target(y_val, data_conf.target)
+    check_target(y_test, data_conf.target)
+    return (X_train, y_train), (X_val, y_val), (X_test, y_test)
+############# Experiments #############
+def _experiment(seed, dataset_conf: BenchmarkDatasetConfig, benchmark_info: BenchmarkInfo, savefile):
+    np.random.seed(seed)
+    output_dir = f"{benchmark_info.output_dir}/{dataset_conf.name}/{benchmark_info.dp_method}/results/"
+    print(f"\n*********************** Fair-only - seed = {seed} ***********************\n")
+    extra_kwargs = {
+        "data_conf": dataset_conf,
+        "filename": dataset_conf.name + f"_split_dataset_seed_{seed}_train.csv",
+        "custom_loader": benchmark_info.custom_loader,
+        "epsilon": None,
+        "seed": seed,
+        "classifier": benchmark_info.classifier,
+        "classifier_kwargs": benchmark_info.classifier_kwargs
+    }
+    original_experiment = Benchmark(
+        name="baseline", data_loader=benchmark_info.data_loader,
+        normalize=benchmark_info.normalize, seed=seed, dlkwargs=benchmark_info.dlkwargs, ekwargs = extra_kwargs
+    )
+    original_experiment.run()
+    save_experiment(original_experiment, seed, filename=savefile, path=output_dir,synth=benchmark_info.dp_method)
+    del original_experiment
+    for epsilon in benchmark_info.eps:
+        print(f"\n*********************** DP & DP+Fair | ε={epsilon} ***********************\n")
+        extra_kwargs = {
+            "data_conf": dataset_conf,
+            "filename": dataset_conf.name + f"_split_dataset_seed_{seed}_epsilon-{epsilon}.csv",
+            "custom_loader": benchmark_info.custom_loader,
+            "epsilon": epsilon,
+            "seed": seed,
+            "classifier": benchmark_info.classifier,
+            "classifier_kwargs": benchmark_info.classifier_kwargs
+        }
+        dp_experiment = Benchmark(
+            name="dp", data_loader=benchmark_info.data_loader,
+            normalize=benchmark_info.normalize, seed=seed, dlkwargs=benchmark_info.dlkwargs, ekwargs=extra_kwargs
+        )
+        dp_experiment.run()
+        save_experiment(dp_experiment, seed, epsilon, filename=savefile, path=output_dir,synth=benchmark_info.dp_method)
+        del dp_experiment.data_loader, dp_experiment
+def benchmark(data_conf: BenchmarkDatasetConfig, benchmark_info: BenchmarkInfo):
+    """
+    Execute benchmark of Fairness interventions on models trained on original data and differentially private synthetic data.
+    **The results obtained are output into a csv file in the defined output directory.**
+    Parameters
+    -----------
+    data_conf: BenchmarkDatasetConfig
+        Configurations on the dataset used
+    benchmark_info: BenchmarkInfo
+        Configurations about the experiments
+    """
+    print(f"Running DP Benchmark on dataset: '{data_conf.name}' with target: '{data_conf.target}' and sensitive attribute: '{data_conf.sensitive_attr}'")
+    savefile = f"benchmark_results_seeds_{'_'.join(str(seed) for seed in benchmark_info.seeds)}_eps_{'_'.join(str(e) for e in benchmark_info.eps)}_synth_{benchmark_info.dp_method}.csv"
+    for seed in benchmark_info.seeds:
+        _experiment(seed, data_conf, benchmark_info, savefile)

BenchmarkDPFair/Benchmark/dataconf.py ADDED Viewed

@@ -0,0 +1,58 @@
+from typing import List, Optional
+class BenchmarkDatasetConfig:
+    def __init__(self, name : str, target : str, sensitive_attr : str, sensitive_cols : List[str] = [], categorical_cols : List[str] = [],
+                 ordinal_cols : List[str] = [], continuous_cols : List[str] = [], root_dir : str = "../../data/", usecols : Optional[List[str]] = None, index_col : Optional[str] = None):
+        """
+        Configuration of a given dataset.
+        Parameters
+        ----------
+        name : str
+            Name of the dataset, this will be used for outputing logs
+        dir : str
+            Path to the root directory of the dataset. For example "../../data/" for the Adult dataset already provided.
+        target : str
+            Column to be predicted and/or used as ground truth.
+        sensitive_attr : str
+            Senstive attribute in the dataset. So far, only one is possible. Ex: **race**.
+        categorical_cols : List[str]
+            Columns with categorical data.
+        ordinal_cols : List[str]
+            Columns with ordinal data.
+        continuous_cols : List[str]
+            Columns with continuous data.
+        usecols : List[str], optional
+            Columns to be read from the dataset file. If empty or none, all columns will be read.
+        """
+        self.name    = name
+        self.dir     = root_dir
+        self.target  = target
+        self.sensitive_attr   = sensitive_attr
+        self.sensitive_cols   = sensitive_cols or [sensitive_attr]
+        self.categorical_cols = categorical_cols
+        self.ordinal_cols     = ordinal_cols
+        self.continuous_cols  = continuous_cols
+        self.index_col = index_col
+        if usecols is None or len(usecols) == 0:
+            usecols = [self.target] + self.categorical_cols + self.continuous_cols + self.ordinal_cols + self.sensitive_cols
+        self.usecols = usecols
+        if not name:
+            raise ValueError(f"Argument 'name' must not be empty as it is necessary for the benchmark.")
+        if not root_dir:
+            self.dir = "./"
+        if not sensitive_attr:
+            raise ValueError(f"A sensitive attribute is required for the benchmark.")
+        if len(categorical_cols) == 0 and len(ordinal_cols) == 0 and len(continuous_cols) == 0:
+            raise ValueError(f"The columns must be of one of the three categories: Categorical, Ordinal or Continuous.")
+    def __str__(self):
+        return f"BenchmarkDatasetConfig(name={self.name},dir={self.dir},target={self.target},sensitive_attr={self.sensitive_attr},categorical_cols={self.categorical_cols},ordinal_cols={self.ordinal_cols},continuous_cols={self.continuous_cols})"

BenchmarkDPFair/Benchmark/utils/__init__.py ADDED Viewed

File without changes

BenchmarkDPFair/Benchmark/utils/auxiliar.py ADDED Viewed

@@ -0,0 +1,94 @@
+import pandas as pd
+import numpy as np
+import os
+def getMetrics(metric):
+    DI         = metric.disparate_impact() if metric is not None else None
+    ACC        = metric.accuracy() if metric is not None else None
+    ACC_PRIV   = metric.accuracy(privileged=True) if metric is not None else None
+    ACC_UNPRIV = metric.accuracy(privileged=False) if metric is not None else None
+    PREC       = metric.precision() if metric is not None else None
+    REC        = metric.recall() if metric is not None else None
+    MAD        = metric.accuracy(privileged=False) - metric.accuracy(privileged=True) if metric is not None else None
+    EOD        = metric.equal_opportunity_difference() if metric is not None else None
+    TPR        = metric.true_positive_rate() if metric is not None else None
+    FPR        = metric.false_positive_rate() if metric is not None else None
+    TNR        = metric.true_negative_rate() if metric is not None else None
+    FNR        = metric.false_negative_rate() if metric is not None else None
+    SPD        = metric.statistical_parity_difference() if metric is not None else None
+    EODD       = metric.equalized_odds_difference() if metric is not None else None
+    if metric is not None:
+        del metric
+    return {
+        "DI": DI if (DI is not None and not np.isnan(DI)) else 'inf',
+        "ACC": ACC if (ACC is not None and not np.isnan(ACC)) else 'inf',
+        "ACC_PRIV": ACC_PRIV if (ACC_PRIV is not None and not np.isnan(ACC_PRIV)) else 'inf',
+        "ACC_UNPRIV": ACC_UNPRIV if (ACC_UNPRIV is not None and not np.isnan(ACC_UNPRIV)) else 'inf',
+        "PREC": PREC if (PREC is not None and not np.isnan(PREC)) else 'inf',
+        "REC": REC if (REC is not None and not np.isnan(REC) ) else 'inf',
+        "MAD": MAD if (MAD is not None and not np.isnan(MAD) ) else 'inf',
+        "EOD": EOD if (EOD is not None and not np.isnan(EOD) ) else 'inf',
+        "TPR": TPR if (TPR is not None and not np.isnan(TPR) ) else 'inf',
+        "FPR": FPR if (FPR is not None and not np.isnan(FPR) ) else 'inf',
+        "TNR": TNR if (TNR is not None and not np.isnan(TNR) ) else 'inf',
+        "FNR": FNR if (FNR is not None and not np.isnan(FNR) ) else 'inf',
+        "SPD": SPD if (SPD is not None and not np.isnan(SPD) ) else 'inf',
+        "EODD": EODD if (EODD is not None and not np.isnan(EODD) ) else 'inf',
+    }
+def save_experiment(experiment, seed, eps=None, filename="exp_metrics.csv", path="../data/metrics/", synth=""):
+    results = []
+    exp_set_original  = "original_classification_metrics"
+    exp_set_mitigator = "mitigated_classification_metrics"
+    logs = []
+    for r in experiment.results:
+        if "error" in r:
+            logs.append({
+                "Seed": seed,
+                "Epsilon": eps if eps is not None else "",
+                "Fair-Method": r["mitigator"] if "mitigator" in r else "",
+                "DP-Method": synth if "dp_method" in r else "",
+                "Error": r["error"],
+                "Info": r["info"]
+            })
+            r.pop("error", None)
+            r.pop("info", None)
+        for exp_set in [exp_set_original, exp_set_mitigator]:
+            if exp_set in r:
+                if r[exp_set] is None:
+                    continue
+                results.append({
+                    "Seed": seed,
+                    "Epsilon": eps if eps is not None else "",
+                    "Fair-Method": r["mitigator"] if "mitigator" in r else "",
+                    "DP-Method": synth if "dp_method" in r else "",
+                    **(r[exp_set]),
+                })
+    del experiment.results
+    # Check if file exists
+    file_exists = os.path.isfile(path+filename)
+    # Create directory if it doesn't exist
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    # Save results to CSV
+    pd.DataFrame(results).to_csv(path + filename, index=False, mode='a', header=not file_exists)
+    # Check if file exists
+    file_exists = os.path.isfile(path+"log/"+filename.replace(".csv", "-log.csv"))
+    # Create directory if it doesn't exist
+    os.makedirs(os.path.dirname(path + "log/"), exist_ok=True)
+    # Save logs to CSV
+    pd.DataFrame(logs).to_csv(path + "log/" + filename.replace(".csv", "-log.csv"), index=False, mode='a', header=not file_exists)

BenchmarkDPFair/Benchmark/utils/benchmark.py ADDED Viewed

@@ -0,0 +1,176 @@
+EXP_CLASSES = ["original", "pre", "pos", "in"]
+import sys
+import traceback
+import pandas as pd
+from sklearn.preprocessing import MinMaxScaler
+from aif360.metrics import ClassificationMetric
+from aif360.datasets import BinaryLabelDataset
+from xgboost import XGBClassifier
+from .pre import pre_mitigator_experiment
+from .inp import in_mitigator_experiment
+from .pos import pos_mitigator_experiment
+from .auxiliar import getMetrics
+from .verifiers import check_signatures
+import gc
+def original_experiment(x_train, y_train, x_test, y_test, sensitive_attr, target_column, seed=42, normalize=True, threshold=.5, classifier=None, classifier_kwargs=None):
+    privileged_groups = [{sensitive_attr: 1}] # Ex: White
+    unprivileged_groups = [{sensitive_attr: 0}] # Ex: Not white
+    scaler = None
+    if normalize:
+        scaler = MinMaxScaler()
+    if scaler is not None:
+        cols = x_train.columns
+        x_train = scaler.fit_transform(x_train)
+        x_train = pd.DataFrame(x_train, columns=cols)
+        x_test = scaler.transform(x_test)
+        x_test = pd.DataFrame(x_test, columns=cols)
+    model = XGBClassifier(objective='binary:logistic', random_state=seed)
+    if classifier is not None:
+        model = classifier(random_state=seed, **classifier_kwargs)
+    model.fit(x_train, y_train)
+    y_pred_prob = None
+    y_pred = None
+    y_pred_prob = model.predict_proba(x_test)[:, 1]
+    y_pred = (y_pred_prob >= threshold).astype(int)
+    y_preds = pd.DataFrame(y_pred, columns=[target_column])
+    # Reset the index
+    y_preds = y_preds.reset_index(drop=True)
+    x_test = x_test.reset_index(drop=True)
+    og_dataset_test = pd.concat([x_test, y_preds], axis=1)
+    og_dataset_test_pred = BinaryLabelDataset(df=og_dataset_test, label_names=[target_column], protected_attribute_names=[sensitive_attr],
+                                unprivileged_protected_attributes=unprivileged_groups)
+    y_test = y_test.reset_index(drop=True)
+    df_test = pd.concat([x_test, y_test], axis=1)
+    df_test = BinaryLabelDataset(df=df_test, label_names=[target_column], protected_attribute_names=[sensitive_attr],
+                                unprivileged_protected_attributes=unprivileged_groups)
+    og_classification_metrics = ClassificationMetric(df_test, og_dataset_test_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
+    og_metrics = getMetrics(og_classification_metrics)
+    og_classification_metrics.dataset            = None
+    og_classification_metrics.classified_dataset = None
+    del scaler,  y_preds, model, x_train, y_train, x_test, y_test, og_dataset_test, y_pred, y_pred_prob, og_dataset_test_pred, df_test, og_classification_metrics
+    return {
+        "original_classification_metrics": og_metrics,
+    }
+class Benchmark:
+    def __init__(self, name, data_loader, normalize=None, seed=42, verbose=False, threshold=.5, dlkwargs=None, ekwargs = None):
+        """
+        :param name: name of the experiment
+        :param model: instance of a ML model.
+        :param normalize: should use a normalizer
+        :param data_loader: instance of DataLoader
+        :param seed: seed for reproducibility
+        :param verbose: verbosity of the experiment
+        """
+        self.name        = name
+        self.data_loader = data_loader
+        self.normalize   = normalize
+        self.seed        = seed
+        self.verbose     = verbose
+        self.mitigators  = {
+            "reweigh":"pre",
+            "dir": "pre",
+            "lfr": "pre",
+            "egr": "in",
+            "gsr": "in",
+            "roc":"pos",
+            "eqodds":"pos",
+            "ceop": "pos",
+        }
+        self.threshold   = threshold
+        self.results = []
+        self.dlkwargs = dlkwargs
+        self.ekwargs = ekwargs
+    def run(self):
+        data_conf = self.ekwargs["data_conf"]
+        classifier = self.ekwargs["classifier"]
+        ckwargs    = self.ekwargs["classifier_kwargs"]
+        args = check_signatures(self.data_loader, self.dlkwargs|self.ekwargs)
+        train_data, cal_data, test_data = self.data_loader(**args)
+        X_train, y_train = train_data[0].copy(), train_data[1].copy()
+        X_cal, y_cal     = cal_data[0].copy(), cal_data[1].copy()
+        X_test, y_test   = test_data[0].copy(), test_data[1].copy()
+        # Run the original experiment
+        print("# Original - ", end="")
+        try:
+            self.results.append(original_experiment(X_train, y_train, X_test, y_test,
+                                                    data_conf.sensitive_attr, data_conf.target, self.seed, self.normalize, self.threshold,
+                                                    classifier=classifier, classifier_kwargs=ckwargs))
+        except Exception as e:
+            self.results.append({"original_classification_metrics": getMetrics(None), "error": e, 'info': traceback.format_tb(e.__traceback__)})
+        print("OK", flush=True)
+        # Run the experiment with mitigators
+        for mitigator, exp_class in self.mitigators.items():
+            print(f"# {exp_class.upper()} - {mitigator.upper()} - ", end="")
+            X_train, y_train = train_data[0].copy(), train_data[1].copy()
+            X_cal, y_cal     = cal_data[0].copy(), cal_data[1].copy()
+            X_test, y_test   = test_data[0].copy(), test_data[1].copy()
+            try:
+                if exp_class == "pre":
+                    self.results.append(pre_mitigator_experiment(X_train, y_train, X_cal, y_cal, X_test, y_test,
+                                                                data_conf.sensitive_attr, data_conf.target, mitigator, self.seed, self.normalize, self.threshold,
+                                                                classifier=classifier, classifier_kwargs=ckwargs))
+                elif exp_class == "pos":
+                    self.results.append(pos_mitigator_experiment(X_train, y_train, X_cal, y_cal, X_test, y_test,
+                                                                data_conf.sensitive_attr, data_conf.target, mitigator, self.seed, self.normalize, self.threshold,
+                                                                classifier=classifier, classifier_kwargs=ckwargs))
+                else:
+                    self.results.append(in_mitigator_experiment(X_train, y_train, X_cal, y_cal, X_test, y_test,
+                                                                data_conf.sensitive_attr, data_conf.target, mitigator, self.seed, self.normalize, self.threshold,
+                                                                classifier=classifier, classifier_kwargs=ckwargs))
+            except Exception as e:
+                self.results.append({
+                    "mitigator": mitigator, "original_classification_metrics": getMetrics(None),
+                    "mitigated_classification_metrics": getMetrics(None), "error": e, "dp_method": True,
+                    'info': traceback.format_tb(e.__traceback__)
+                })
+            X_train = None
+            y_train = None
+            X_cal   = None
+            y_cal   = None
+            X_test  = None
+            y_test  = None
+            del X_train, y_train, X_cal, y_cal, X_test, y_test
+            print("OK", flush=True)
+        del train_data, cal_data, test_data
+        gc.collect()