PyPI - nkululeko - Versions diffs - 0.88.7__py3-none-any.whl → 0.88.9__py3-none-any.whl - Mend

nkululeko 0.88.7py3-none-any.whl → 0.88.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.88.7"
+VERSION="0.88.9"
 SAMPLING_RATE = 16000

nkululeko/ensemble.py CHANGED Viewed

@@ -1,3 +1,20 @@
+"""
+Ensemble predictions from multiple experiments.
+Args:
+    config_files (list): List of configuration file paths.
+    method (str): Ensemble method to use. Options are 'majority_voting', 'mean', 'max', 'sum', 'uncertainty', 'uncertainty_weighted', 'confidence_weighted', or 'performance_weighted'.
+    threshold (float): Threshold for the 'uncertainty' ensemble method (default: 1.0, i.e. no threshold).
+    weights (list): Weights for the 'performance_weighted' ensemble method.
+    no_labels (bool): Flag indicating whether the predictions have labels or not.
+Returns:
+    pandas.DataFrame: The ensemble predictions.
+Raises:
+    ValueError: If an unknown ensemble method is provided.
+    AssertionError: If the number of config files is less than 2 for majority voting.
+"""
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
@@ -45,45 +62,7 @@ def sum_ensemble(ensemble_preds, labels):
     return ensemble_preds[labels].idxmax(axis=1)
-def uncertainty_ensemble(ensemble_preds):
-    """Same as uncertainty_threshold with a threshold of 0.1"""
-    final_predictions = []
-    best_uncertainty = []
-    for _, row in ensemble_preds.iterrows():
-        uncertainties = row[["uncertainty"]].values
-        min_uncertainty_idx = np.argmin(uncertainties)
-        final_predictions.append(row["predicted"].iloc[min_uncertainty_idx])
-        best_uncertainty.append(uncertainties[min_uncertainty_idx])
-    return final_predictions, best_uncertainty
-def max_class_ensemble(ensemble_preds_ls, labels):
-    """Compare the highest probabilites of all models across classes (instead of same class as in max_ensemble) and return the highest probability and the class"""
-    final_preds = []
-    final_probs = []
-    for _, row in pd.concat(ensemble_preds_ls, axis=1).iterrows():
-        max_probs = []
-        max_classes = []
-        for model_df in ensemble_preds_ls:
-            model_probs = row[labels].astype(float)
-            max_prob = model_probs.max()
-            max_class = model_probs.idxmax()
-            max_probs.append(max_prob)
-            max_classes.append(max_class)
-        best_model_index = np.argmax(max_probs)
-        final_preds.append(max_classes[best_model_index])
-        final_probs.append(max_probs[best_model_index])
-    return pd.Series(final_preds), pd.Series(final_probs)
-def uncertainty_threshold_ensemble(ensemble_preds_ls, labels, threshold):
+def uncertainty_ensemble(ensemble_preds_ls, labels, threshold):
     final_predictions = []
     final_uncertainties = []
@@ -173,8 +152,40 @@ def confidence_weighted_ensemble(ensemble_preds_ls, labels):
     return final_predictions, final_confidences
+def performance_weighted_ensemble(ensemble_preds_ls, labels, weights):
+    """Weighted ensemble based on performances"""
+    final_predictions = []
+    final_confidences = []
+    # asserts weiths in decimal 0-1
+    assert all(0 <= w <= 1 for w in weights), "Weights must be between 0 and 1"
+    # assert lenght of weights matches number of models
+    assert len(weights) == len(ensemble_preds_ls), "Number of weights must match number of models"
+    # Normalize weights
+    total_weight = sum(weights)
+    weights = [weight / total_weight for weight in weights]
+    for idx in ensemble_preds_ls[0].index:
+        class_probabilities = {label: 0 for label in labels}
+        for df, weight in zip(ensemble_preds_ls, weights):
+            row = df.loc[idx]
+            for label in labels:
+                class_probabilities[label] += row[label] * weight
+        predicted_class = max(class_probabilities, key=class_probabilities.get)
+        final_predictions.append(predicted_class)
+        final_confidences.append(max(class_probabilities.values()))
+    return final_predictions, final_confidences
 def ensemble_predictions(
-    config_files: List[str], method: str, threshold: float, no_labels: bool
+    config_files: List[str], method: str, threshold: float, weights: List[float], no_labels: bool
 ) -> pd.DataFrame:
     """
     Ensemble predictions from multiple experiments.
@@ -235,12 +246,8 @@ def ensemble_predictions(
         ensemble_preds["predicted"] = max_ensemble(ensemble_preds, labels)
     elif method == "sum":
         ensemble_preds["predicted"] = sum_ensemble(ensemble_preds, labels)
-    elif method == "max_class":
-        ensemble_preds["predicted"], ensemble_preds["max_probability"] = (
-            max_class_ensemble(ensemble_preds_ls, labels)
-        )
-    elif method == "uncertainty_threshold":
-        ensemble_preds["predicted"] = uncertainty_threshold_ensemble(
+    elif method == "uncertainty":
+        ensemble_preds["predicted"] = uncertainty_ensemble(
             ensemble_preds_ls, labels, threshold
         )
     elif method == "uncertainty_weighted":
@@ -251,6 +258,10 @@ def ensemble_predictions(
         ensemble_preds["predicted"], ensemble_preds["confidence"] = (
             confidence_weighted_ensemble(ensemble_preds_ls, labels)
         )
+    elif method == "performance_weighted":
+        ensemble_preds["predicted"], ensemble_preds["confidence"] = (
+            performance_weighted_ensemble(ensemble_preds_ls, labels, weights)
+        )
     else:
         raise ValueError(f"Unknown ensemble method: {method}")
@@ -269,7 +280,6 @@ def ensemble_predictions(
     ensemble_preds = ensemble_preds.iloc[:, : len(labels) + 3]
     # calculate UAR from predicted and truth columns
     truth = ensemble_preds["truth"]
     predicted = ensemble_preds["predicted"]
     uar = balanced_accuracy_score(truth, predicted)
@@ -285,7 +295,7 @@ def main(src_dir: Path) -> None:
         "configs",
         nargs="+",
         help="Paths to the configuration files of the experiments to ensemble. \
-             Can be INI files for Nkululeko.nkululeo or CSV files from Nkululeko.demo.",
+             Can be INI files for Nkululeko.nkululeko or CSV files from Nkululeko.demo.",
     )
     parser.add_argument(
         "--method",
@@ -295,12 +305,13 @@ def main(src_dir: Path) -> None:
             "mean",
             "max",
             "sum",
-            "max_class",
+            # "max_class",
             # "uncertainty_lowest",
             # "entropy",
-            "uncertainty_threshold",
+            "uncertainty",
             "uncertainty_weighted",
             "confidence_weighted",
+            "performance_weighted",
         ],
         help=f"Ensemble method to use (default: {DEFAULT_METHOD})",
     )
@@ -316,6 +327,13 @@ def main(src_dir: Path) -> None:
         default=DEFAULT_OUTFILE,
         help=f"Output file path for the ensemble predictions (default: {DEFAULT_OUTFILE})",
     )
+    parser.add_argument(
+        "--weights",
+        default=None,
+        nargs="+",
+        type=float,
+        help="Weights for the ensemble method (default: None, e.g. 0.5 0.5)",
+    )
     parser.add_argument(
         "--no_labels",
         action="store_true",
@@ -327,7 +345,7 @@ def main(src_dir: Path) -> None:
     start = time.time()
     ensemble_preds = ensemble_predictions(
-        args.configs, args.method, args.threshold, args.no_labels
+        args.configs, args.method, args.threshold, args.weights, args.no_labels
     )
     # save to csv

{nkululeko-0.88.7.dist-info → nkululeko-0.88.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.88.7
+Version: 0.88.9
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -360,6 +360,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.88.9
+--------------
+* added performance_weighted ensemble
+Version 0.88.8
+--------------
+* some cosmetics
 Version 0.88.7
 --------------
 * added use_splits for multidb

{nkululeko-0.88.7.dist-info → nkululeko-0.88.9.dist-info}/RECORD RENAMED Viewed

@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
 nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
 nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
-nkululeko/constants.py,sha256=p-kvGUZX0J2JPXoROES9PcftVSZ1B1GfzkBt6d8MJhY,39
+nkululeko/constants.py,sha256=tK1QIQ72lahwT47cOoEvhMfH2sH4BRnP3p6P7kdC_QQ,39
 nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
 nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
 nkululeko/demo_predictor.py,sha256=zs1bjhpnKuNCPLJeiyDm19ME1NEDOQT3QNeyVKJq9Yc,4882
-nkululeko/ensemble.py,sha256=rUHg8YmD6L8Ktt2T5M6iwsWVWbpCnfiynhHdN22bLRQ,11873
+nkululeko/ensemble.py,sha256=cVz8hWd2m7poyS0lTIfrsha0K8U-hd6eiBWMqDOAlt8,12669
 nkululeko/experiment.py,sha256=L4PzoScPLG2xTyniVy9evcBy_8CIe3RTeTEUVTqiuvQ,31186
 nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
 nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
@@ -107,8 +107,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
 nkululeko/utils/util.py,sha256=KMxPzb0HN3XuNzAd7Kn3M3Nq91-0sDrAAEBgDKryCdo,16688
-nkululeko-0.88.7.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.88.7.dist-info/METADATA,sha256=VKwlkHohr4PJezcmZ45fVykmKmh1T6d2LCDvjR8Ierw,40017
-nkululeko-0.88.7.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
-nkululeko-0.88.7.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.88.7.dist-info/RECORD,,
+nkululeko-0.88.9.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.88.9.dist-info/METADATA,sha256=2NTuv6JzIYo9FbjMFT2zP_SuxZcBuagowGZ9YneOcOA,40134
+nkululeko-0.88.9.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
+nkululeko-0.88.9.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.88.9.dist-info/RECORD,,

{nkululeko-0.88.7.dist-info → nkululeko-0.88.9.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.88.7.dist-info → nkululeko-0.88.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{nkululeko-0.88.7.dist-info → nkululeko-0.88.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.88.7__py3-none-any.whl → 0.88.9__py3-none-any.whl

nkululeko 0.88.7py3-none-any.whl → 0.88.9py3-none-any.whl