PyPI - nkululeko - Versions diffs - 0.86.8__py3-none-any.whl → 0.88.0__py3-none-any.whl - Mend

nkululeko 0.86.8py3-none-any.whl → 0.88.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

nkululeko/constants.py +1 -1
nkululeko/data/dataset_csv.py +12 -14
nkululeko/demo.py +7 -10
nkululeko/ensemble.py +158 -0
nkululeko/feat_extract/feats_ast.py +118 -0
nkululeko/feat_extract/feats_wav2vec2.py +2 -4
nkululeko/feat_extract/feats_wavlm.py +7 -4
nkululeko/feature_extractor.py +5 -9
nkululeko/modelrunner.py +5 -5
nkululeko/models/model.py +23 -3
nkululeko/models/model_cnn.py +41 -22
nkululeko/models/model_mlp.py +37 -17
nkululeko/models/model_mlp_regression.py +3 -1
nkululeko/plots.py +25 -37
nkululeko/reporting/reporter.py +69 -6
nkululeko/runmanager.py +8 -11
nkululeko/test_predictor.py +2 -9
nkululeko/utils/stats.py +11 -7
nkululeko/utils/util.py +24 -19
{nkululeko-0.86.8.dist-info → nkululeko-0.88.0.dist-info}/METADATA +22 -1
{nkululeko-0.86.8.dist-info → nkululeko-0.88.0.dist-info}/RECORD +24 -22
{nkululeko-0.86.8.dist-info → nkululeko-0.88.0.dist-info}/WHEEL +1 -1
{nkululeko-0.86.8.dist-info → nkululeko-0.88.0.dist-info}/LICENSE +0 -0
{nkululeko-0.86.8.dist-info → nkululeko-0.88.0.dist-info}/top_level.txt +0 -0

nkululeko/models/model_mlp.py CHANGED Viewed

@@ -1,25 +1,33 @@
 # model_mlp.py
+import ast
+from collections import OrderedDict
+import numpy as np
 import pandas as pd
+from sklearn.metrics import recall_score
+import torch
-from nkululeko.utils.util import Util
 import nkululeko.glob_conf as glob_conf
+from nkululeko.losses.loss_softf1loss import SoftF1Loss
 from nkululeko.models.model import Model
 from nkululeko.reporting.reporter import Reporter
-import torch
-import ast
-import numpy as np
-from sklearn.metrics import recall_score
-from collections import OrderedDict
-from nkululeko.losses.loss_softf1loss import SoftF1Loss
+from nkululeko.utils.util import Util
-class MLP_model(Model):
+class MLPModel(Model):
     """MLP = multi layer perceptron."""
     is_classifier = True
     def __init__(self, df_train, df_test, feats_train, feats_test):
-        """Constructor taking the configuration and all dataframes."""
+        """Constructor, taking all dataframes.
+        Args:
+            df_train (pd.DataFrame): The train labels.
+            df_test (pd.DataFrame): The test labels.
+            feats_train (pd.DataFrame): The train features.
+            feats_test (pd.DataFrame): The test features.
+        """
         super().__init__(df_train, df_test, feats_train, feats_test)
         super().set_model_type("ann")
         self.name = "mlp"
@@ -97,7 +105,7 @@ class MLP_model(Model):
             self.optimizer.step()
         self.loss = (np.asarray(losses)).mean()
-    def evaluate_model(self, model, loader, device):
+    def evaluate(self, model, loader, device):
         logits = torch.zeros(len(loader.dataset), self.class_num)
         targets = torch.zeros(len(loader.dataset))
         model.eval()
@@ -119,14 +127,28 @@ class MLP_model(Model):
         self.loss_eval = (np.asarray(losses)).mean()
         predictions = logits.argmax(dim=1)
         uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
-        return uar, targets, predictions
+        return uar, targets, predictions, logits
+    def get_probas(self, logits):
+        # make a dataframe for probabilites (logits)
+        proba_d = {}
+        classes = self.df_test[self.target].unique()
+        classes.sort()
+        for c in classes:
+            proba_d[c] = []
+        for i, c in enumerate(classes):
+            proba_d[c] = list(logits.numpy().T[i])
+        probas = pd.DataFrame(proba_d)
+        probas = probas.set_index(self.df_test.index)
+        return probas
     def predict(self):
-        _, truths, predictions = self.evaluate_model(
+        _, truths, predictions, logits = self.evaluate(
             self.model, self.testloader, self.device
         )
-        uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
-        report = Reporter(truths, predictions, self.run, self.epoch)
+        uar, _, _, _ = self.evaluate(self.model, self.trainloader, self.device)
+        probas = self.get_probas(logits)
+        report = Reporter(truths, predictions, self.run, self.epoch, probas=probas)
         try:
             report.result.loss = self.loss
         except AttributeError:  # if the model was loaded from disk the loss is unknown
@@ -139,9 +161,7 @@ class MLP_model(Model):
         return report
     def get_predictions(self):
-        _, truths, predictions = self.evaluate_model(
-            self.model, self.testloader, self.device
-        )
+        _, _, predictions, _ = self.evaluate(self.model, self.testloader, self.device)
         return predictions.numpy()
     def get_loader(self, df_x, df_y, shuffle):

nkululeko/models/model_mlp_regression.py CHANGED Viewed

@@ -97,7 +97,9 @@ class MLP_Reg_model(Model):
             self.model, self.testloader, self.device
         )
         result, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
-        report = Reporter(truths.numpy(), predictions.numpy(), self.run, self.epoch)
+        report = Reporter(
+            truths.numpy(), predictions.numpy(), None, self.run, self.epoch
+        )
         try:
             report.result.loss = self.loss
         except AttributeError:  # if the model was loaded from disk the loss is unknown

nkululeko/plots.py CHANGED Viewed

@@ -48,7 +48,7 @@ class Plots:
             )
             ax.set_ylabel(f"number of speakers")
             ax.set_xlabel("number of samples")
-            self._save_plot(
+            self.save_plot(
                 ax,
                 "Samples per speaker",
                 f"Samples per speaker ({df_speakers.shape[0]})",
@@ -70,9 +70,9 @@ class Plots:
                     rot=0,
                 )
             )
-            ax.set_ylabel(f"number of speakers")
+            ax.set_ylabel("number of speakers")
             ax.set_xlabel("number of samples")
-            self._save_plot(
+            self.save_plot(
                 ax,
                 "Sample value counts",
                 f"Samples per speaker ({df_speakers.shape[0]})",
@@ -96,7 +96,7 @@ class Plots:
             binned_data = self.util.continuous_to_categorical(df[class_label])
             ax = binned_data.value_counts().plot(kind="bar")
             filename_binned = f"{class_label}_discreet"
-            self._save_plot(
+            self.save_plot(
                 ax,
                 "Sample value counts",
                 filename_binned,
@@ -106,7 +106,7 @@ class Plots:
             dist_type = self.util.config_val("EXPL", "dist_type", "hist")
             ax = df[class_label].plot(kind=dist_type)
-        self._save_plot(
+        self.save_plot(
             ax,
             "Sample value counts",
             filename,
@@ -131,17 +131,17 @@ class Plots:
                             df, class_label, att1, self.target, type_s
                         )
                     else:
-                        ax, caption = self._plotcatcont(
+                        ax, caption = self.plotcatcont(
                             df, class_label, att1, att1, type_s
                         )
                 else:
                     if self.util.is_categorical(df[att1]):
-                        ax, caption = self._plotcatcont(
+                        ax, caption = self.plotcatcont(
                             df, att1, class_label, att1, type_s
                         )
                     else:
                         ax, caption = self._plot2cont(df, class_label, att1, type_s)
-                self._save_plot(
+                self.save_plot(
                     ax,
                     caption,
                     f"Correlation of {self.target} and {att[0]}",
@@ -171,15 +171,11 @@ class Plots:
                             ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
                         else:
                             # class_label = cat, att1 = cat, att2 = cont
-                            ax, caption = self._plotcatcont(
-                                df, att1, att2, att1, type_s
-                            )
+                            ax, caption = self.plotcatcont(df, att1, att2, att1, type_s)
                     else:
                         if self.util.is_categorical(df[att2]):
                             # class_label = cat, att1 = cont, att2 = cat
-                            ax, caption = self._plotcatcont(
-                                df, att2, att1, att2, type_s
-                            )
+                            ax, caption = self.plotcatcont(df, att2, att1, att2, type_s)
                         else:
                             # class_label = cat, att1 = cont, att2 = cont
                             ax, caption = self._plot2cont_cat(
@@ -205,7 +201,7 @@ class Plots:
                             # class_label = cont, att1 = cont, att2 = cont
                             ax, caption = self._plot2cont(df, att1, att2, type_s)
-                self._save_plot(
+                self.save_plot(
                     ax, caption, f"Correlation of {att1} and {att2}", filename, type_s
                 )
@@ -215,16 +211,16 @@ class Plots:
                     f" {att} has more than 2 values. Perhaps you forgot to state a list of lists?"
                 )
-    def _save_plot(self, ax, caption, header, filename, type_s):
+    def save_plot(self, ax, caption, header, filename, type_s):
         # one up because of the runs
         fig_dir = self.util.get_path("fig_dir") + "../"
-        fig = ax.figure
+        fig_plots = ax.figure
         # avoid warning
         # plt.tight_layout()
         img_path = f"{fig_dir}{filename}_{type_s}.{self.format}"
         plt.savefig(img_path)
-        plt.close(fig)
-        # fig.clear()   # avoid error
+        plt.close(fig_plots)
+        self.util.debug(f"Saved plot to {img_path}")
         glob_conf.report.add_item(
             ReportItem(
                 Header.HEADER_EXPLORE,
@@ -244,35 +240,29 @@ class Plots:
         return att, df
     def _plot2cont_cat(self, df, cont1, cont2, cat, ylab):
-        """
-        plot relation of two continuous distributions with one categorical
-        """
+        """Plot relation of two continuous distributions with one categorical."""
         pearson = stats.pearsonr(df[cont1], df[cont2])
         # trunc to three digits
         pearson = int(pearson[0] * 1000) / 1000
         pearson_string = f"PCC: {pearson}"
         ax = sns.lmplot(data=df, x=cont1, y=cont2, hue=cat)
         caption = f"{ylab} {df.shape[0]}. {pearson_string}"
-        ax.fig.suptitle(caption)
+        ax.figure.suptitle(caption)
         return ax, caption
     def _plot2cont(self, df, col1, col2, ylab):
-        """
-        plot relation of two continuous distributions
-        """
+        """Plot relation of two continuous distributions."""
         pearson = stats.pearsonr(df[col1], df[col2])
         # trunc to three digits
         pearson = int(pearson[0] * 1000) / 1000
         pearson_string = f"PCC: {pearson}"
         ax = sns.lmplot(data=df, x=col1, y=col2)
         caption = f"{ylab} {df.shape[0]}. {pearson_string}"
-        ax.fig.suptitle(caption)
+        ax.figure.suptitle(caption)
         return ax, caption
-    def _plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
-        """
-        plot relation of categorical distribution with continuous
-        """
+    def plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
+        """Plot relation of categorical distribution with continuous."""
         dist_type = self.util.config_val("EXPL", "dist_type", "hist")
         cats, cat_str, es = su.get_effect_size(df, cat_col, cont_col)
         if dist_type == "hist":
@@ -287,13 +277,11 @@ class Plots:
             )
             ax.set(xlabel=f"{cont_col}")
             caption = f"{ylab} {df.shape[0]}. {cat_str} ({cats}):" f" {es}"
-            ax.fig.suptitle(caption)
+            ax.figure.suptitle(caption)
         return ax, caption
     def _plot2cat(self, df, col1, col2, xlab, ylab):
-        """
-        plot relation of 2 categorical distributions
-        """
+        """Plot relation of 2 categorical distributions."""
         crosstab = pd.crosstab(index=df[col1], columns=df[col2])
         res_pval = stats.chi2_contingency(crosstab)
         res_pval = int(res_pval[1] * 1000) / 1000
@@ -320,8 +308,8 @@ class Plots:
         max = self.util.to_3_digits(df.duration.max())
         title = f"Duration distr. for {sample_selection} {df.shape[0]}. min={min}, max={max}"
         ax.set_title(title)
-        ax.set_xlabel(f"duration")
-        ax.set_ylabel(f"number of samples")
+        ax.set_xlabel("duration")
+        ax.set_ylabel("number of samples")
         fig = ax.figure
         # plt.tight_layout()
         img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"

nkululeko/reporting/reporter.py CHANGED Viewed

@@ -2,16 +2,21 @@ import ast
 import glob
 import json
 import math
+import os
 from confidence_intervals import evaluate_with_conf_int
 import matplotlib.pyplot as plt
 import numpy as np
+from scipy.special import softmax
+from scipy.stats import entropy
 from scipy.stats import pearsonr
-from sklearn.metrics import ConfusionMatrixDisplay, roc_curve
+from sklearn.metrics import ConfusionMatrixDisplay
+from sklearn.metrics import auc
 from sklearn.metrics import classification_report
 from sklearn.metrics import confusion_matrix
 from sklearn.metrics import r2_score
-from sklearn.metrics import roc_curve, auc, roc_auc_score
+from sklearn.metrics import roc_auc_score
+from sklearn.metrics import roc_curve
 from torch import is_tensor
 from audmetric import accuracy
@@ -21,6 +26,7 @@ from audmetric import mean_squared_error
 from audmetric import unweighted_average_recall
 import nkululeko.glob_conf as glob_conf
+from nkululeko.plots import Plots
 from nkululeko.reporting.defines import Header
 from nkululeko.reporting.report_item import ReportItem
 from nkululeko.reporting.result import Result
@@ -46,9 +52,18 @@ class Reporter:
                 self.MEASURE = "CCC"
                 self.result.measure = self.MEASURE
-    def __init__(self, truths, preds, run, epoch):
-        """Initialization with ground truth und predictions vector."""
+    def __init__(self, truths, preds, run, epoch, probas=None):
+        """Initialization with ground truth und predictions vector.
+        Args:
+            truths (list): the ground truth
+            preds (list): the predictions
+            run (int): number of run
+            epoch (int): number of epoch
+            probas (pd.Dataframe, optional): probabilities per class. Defaults to None.
+        """
         self.util = Util("reporter")
+        self.probas = probas
         self.format = self.util.config_val("PLOT", "format", "png")
         self.truths = np.asarray(truths)
         self.preds = np.asarray(preds)
@@ -108,6 +123,47 @@ class Reporter:
                 self.result.test = test_result
                 self.result.set_upper_lower(upper, lower)
                 # train and loss are being set by the model
+        # print out the class  probilities
+    def print_probabilities(self):
+        """Print the probabilities per class to a file in the store."""
+        if (
+            self.util.exp_is_classification()
+            and self.probas is not None
+            and "uncertainty" not in self.probas
+        ):
+            probas = self.probas
+            probas["predicted"] = self.preds
+            probas["truth"] = self.truths
+            # softmax the probabilities or logits
+            uncertainty = probas.apply(softmax, axis=1)
+            try:
+                le = glob_conf.label_encoder
+                mapping = dict(zip(le.classes_, range(len(le.classes_))))
+                mapping_reverse = {value: key for key, value in mapping.items()}
+                probas = probas.rename(columns=mapping_reverse)
+                probas["predicted"] = probas["predicted"].map(mapping_reverse)
+                probas["truth"] = probas["truth"].map(mapping_reverse)
+            except AttributeError as ae:
+                self.util.debug(f"Can't label categories: {ae}")
+            # compute entropy per sample
+            uncertainty = uncertainty.apply(entropy)
+            # scale it to 0-1
+            max_ent = math.log(len(glob_conf.labels))
+            uncertainty = (uncertainty - uncertainty.min()) / (
+                max_ent - uncertainty.min()
+            )
+            probas["uncertainty"] = uncertainty
+            probas["correct"] = probas.predicted == probas.truth
+            sp = os.path.join(self.util.get_path("store"), "pred_df.csv")
+            self.probas = probas
+            probas.to_csv(sp)
+            self.util.debug(f"Saved probabilities to {sp}")
+            plots = Plots()
+            ax, caption = plots.plotcatcont(
+                probas, "correct", "uncertainty", "uncertainty", "correct"
+            )
+            plots.save_plot(ax, caption, "Uncertainty", "uncertainty", "samples")
     def set_id(self, run, epoch):
         """Make the report identifiable with run and epoch index."""
@@ -123,6 +179,12 @@ class Reporter:
         self.preds = np.digitize(self.preds, bins) - 1
     def plot_confmatrix(self, plot_name, epoch=None):
+        """Plot a confusionmatrix to the store.
+        Args:
+            plot_name (str): name for the image file.
+            epoch (int, optional): Number of epoch. Defaults to None.
+        """
         if not self.util.exp_is_classification():
             self.continuous_to_categorical()
         self._plot_confmat(self.truths, self.preds, plot_name, epoch)
@@ -212,10 +274,11 @@ class Reporter:
             )
         img_path = f"{fig_dir}{plot_name}{self.filenameadd}.{self.format}"
         plt.savefig(img_path)
+        self.util.debug(f"Saved confusion plot to {img_path}")
         fig.clear()
         plt.close(fig)
-        plt.savefig(img_path)
-        plt.close(fig)
+        plt.close()
+        plt.clf()
         glob_conf.report.add_item(
             ReportItem(
                 Header.HEADER_RESULTS,

nkululeko/runmanager.py CHANGED Viewed

@@ -11,7 +11,7 @@ from nkululeko.utils.util import Util
 class Runmanager:
-    """Class to manage the runs of the experiment (e.g. when results differ caused by random initialization)"""
+    """Class to manage the runs of the experiment (e.g. when results differ caused by random initialization)."""
     model = None  # The underlying model
     df_train, df_test, feats_train, feats_test = (
@@ -23,15 +23,14 @@ class Runmanager:
     reports = []
     def __init__(self, df_train, df_test, feats_train, feats_test):
-        """Constructor setting up the dataframes
+        """Constructor setting up the dataframes.
         Args:
             df_train: train dataframe
             df_test: test dataframe
             feats_train: train features
             feats_train: test features
-        Returns:
         """
         self.df_train, self.df_test, self.feats_train, self.feats_test = (
             df_train,
@@ -46,7 +45,7 @@ class Runmanager:
         # self._select_model(model_type)
     def do_runs(self):
-        """Start the runs"""
+        """Start the runs."""
         self.best_results = []  # keep the best result per run
         self.last_epochs = []  # keep the epoch of best result per run
         # for all runs
@@ -105,15 +104,13 @@ class Runmanager:
                 )
                 self.print_model(best_report, plot_name)
             # finally, print out the numbers for this run
-            # self.reports[-1].print_results(
-            #     int(self.util.config_val("EXP", "epochs", 1))
-            # )
             best_report.print_results(best_report.epoch)
+            best_report.print_probabilities()
             self.best_results.append(best_report)
             self.last_epochs.append(last_epoch)
     def print_best_result_runs(self):
-        """Print the best result for all runs"""
+        """Print the best result for all runs."""
         best_report = self.get_best_result(self.best_results)
         self.util.debug(
             f"best result all runs with run {best_report.run}             and"
@@ -177,7 +174,7 @@ class Runmanager:
         return self.load_model(best_report)
     def get_best_result(self, reports):
-        best_r = Reporter([], [], 0, 0)
+        best_r = Reporter([], [], None, 0, 0)
         if self.util.high_is_good():
             best_r = self.search_best_result(reports, "ascending")
         else:
@@ -185,7 +182,7 @@ class Runmanager:
         return best_r
     def search_best_result(self, reports, order):
-        best_r = Reporter([], [], 0, 0)
+        best_r = Reporter([], [], None, 0, 0)
         if order == "ascending":
             best_result = 0
             for r in reports:

nkululeko/test_predictor.py CHANGED Viewed

@@ -6,13 +6,12 @@
 import ast
-import numpy as np
 import pandas as pd
 from sklearn.preprocessing import LabelEncoder
+import nkululeko.glob_conf as glob_conf
 from nkululeko.data.dataset import Dataset
 from nkululeko.feature_extractor import FeatureExtractor
-import nkululeko.glob_conf as glob_conf
 from nkululeko.scaler import Scaler
 from nkululeko.utils.util import Util
@@ -42,7 +41,6 @@ class TestPredictor:
             scale = self.util.config_val("FEATS", "scale", False)
             labelenc = LabelEncoder()
             data_df[self.target] = labelenc.fit_transform(data_df[self.target])
-            #            data_df[self.target] = self.label_encoder.fit_transform(data_df[self.target])
             if scale:
                 self.scaler = Scaler(data_df, None, feats_df, None, scale)
                 feats_df, _ = self.scaler.scale()
@@ -56,18 +54,13 @@ class TestPredictor:
         else:
             test_dbs = ast.literal_eval(glob_conf.config["DATA"]["tests"])
             test_dbs_string = "_".join(test_dbs)
-            predictions = self.model.get_predictions()
+            predictions, _ = self.model.get_predictions()
             report = self.model.predict()
             result = report.result.get_result()
             report.set_filename_add(f"test-{test_dbs_string}")
             self.util.print_best_results([report])
             report.plot_confmatrix(self.util.get_plot_name(), 0)
             report.print_results(0)
-            # print(predictions)
-            # df = pd.DataFrame(index=self.orig_df.index)
-            # df["speaker"] = self.orig_df["speaker"]
-            # df["gender"] = self.orig_df["gender"]
-            # df[self.target] = self.orig_df[self.target]
             df = self.orig_df.copy()
             df["predictions"] = self.label_encoder.inverse_transform(predictions)
             target = self.util.config_val("DATA", "target", "emotion")

nkululeko/utils/stats.py CHANGED Viewed

@@ -70,12 +70,16 @@ def get_effect_size(df, target, variable):
         cats[c] = df[df[target] == c][variable].values
     combos = all_combinations(categories)
     results = {}
-    for combo in combos:
-        one = combo[0]
-        other = combo[1]
-        results[f"{one}-{other}"] = cohen_d(cats[one], cats[other])
-    max_cat = max(results, key=results.get)
-    cat_s = cohens_D_to_string(float(results[max_cat]))
+    if len(categories) == 1:
+        cat_s = cohens_D_to_string(0)
+        return categories[0], cat_s, 0
+    else:
+        for combo in combos:
+            one = combo[0]
+            other = combo[1]
+            results[f"{one}-{other}"] = cohen_d(cats[one], cats[other])
+        max_cat = max(results, key=results.get)
+        cat_s = cohens_D_to_string(float(results[max_cat]))
     return max_cat, cat_s, results[max_cat]
@@ -92,7 +96,7 @@ def cohens_D_to_string(val):
 def normalize(values):
-    """Do a z-transformation of a distribution.
+    """Do a z-transformation of a distribution.
     So that mean = 0 and variance = 1
     """

nkululeko/utils/util.py CHANGED Viewed

@@ -37,8 +37,7 @@ class Util:
                 import nkululeko.glob_conf as glob_conf
                 self.config = glob_conf.config
-                self.got_data_roots = self.config_val(
-                    "DATA", "root_folders", False)
+                self.got_data_roots = self.config_val("DATA", "root_folders", False)
                 if self.got_data_roots:
                     # if there is a global data rootfolder file, read from
                     # there
@@ -108,19 +107,17 @@ class Util:
             if self.got_data_roots:
                 try:
                     if len(key) > 0:
-                        return self.data_roots["DATA"][dataset +
-                                                       "." + key].strip("'\"")
+                        return self.data_roots["DATA"][dataset + "." + key].strip("'\"")
                     else:
                         return self.data_roots["DATA"][dataset].strip("'\"")
                 except KeyError:
                     if default not in self.stopvals:
                         self.debug(
-                            f"value for {key} not found, using default:"
-                            f" {default}")
+                            f"value for {key} not found, using default:" f" {default}"
+                        )
                     return default
             if default not in self.stopvals:
-                self.debug(
-                    f"value for {key} not found, using default: {default}")
+                self.debug(f"value for {key} not found, using default: {default}")
             return default
     def set_config(self, config):
@@ -131,6 +128,10 @@ class Util:
         store = self.get_path("store")
         return f"{store}/{self.get_exp_name()}.pkl"
+    def get_pred_name(self):
+        store = self.get_path("store")
+        return f"{store}/pred_df.csv"
     def is_categorical(self, pd_series):
         """Check if a dataframe column is categorical"""
         return pd_series.dtype.name == "object" or isinstance(
@@ -163,10 +164,8 @@ class Util:
         if len(df) == 0:
             return df
         if not isinstance(df.index, pd.MultiIndex):
-            self.debug(
-                "converting to segmented index, this might take a while...")
-            df.index = audformat.utils.to_segmented_index(
-                df.index, allow_nat=False)
+            self.debug("converting to segmented index, this might take a while...")
+            df.index = audformat.utils.to_segmented_index(df.index, allow_nat=False)
         return df
     def _get_value_descript(self, section, name):
@@ -209,7 +208,11 @@ class Util:
         mt = f'{self.config["MODEL"]["type"]}'
         # ft = "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
         ft_value = self.config["FEATS"]["type"]
-        if isinstance(ft_value, str) and ft_value.startswith("[") and ft_value.endswith("]"):
+        if (
+            isinstance(ft_value, str)
+            and ft_value.startswith("[")
+            and ft_value.endswith("]")
+        ):
             ft = "_".join(ast.literal_eval(ft_value))
         else:
             ft = ft_value
@@ -237,8 +240,9 @@ class Util:
             ["FEATS", "wav2vec2.layer"],
         ]
         for option in options:
-            return_string += self._get_value_descript(
-                option[0], option[1]).replace(".", "-")
+            return_string += self._get_value_descript(option[0], option[1]).replace(
+                ".", "-"
+            )
         return return_string
     def get_plot_name(self):
@@ -284,8 +288,7 @@ class Util:
             return self.config[section][key]
         except KeyError:
             if default not in self.stopvals:
-                self.debug(
-                    f"value for {key} not found, using default: {default}")
+                self.debug(f"value for {key} not found, using default: {default}")
             return default
     def config_val_list(self, section, key, default):
@@ -293,10 +296,12 @@ class Util:
             return ast.literal_eval(self.config[section][key])
         except KeyError:
             if default not in self.stopvals:
-                self.debug(
-                    f"value for {key} not found, using default: {default}")
+                self.debug(f"value for {key} not found, using default: {default}")
             return default
+    def get_labels(self):
+        return ast.literal_eval(self.config["DATA"]["labels"])
     def continuous_to_categorical(self, series):
         """
         discretize a categorical variable.

nkululeko 0.86.8__py3-none-any.whl → 0.88.0__py3-none-any.whl

nkululeko 0.86.8py3-none-any.whl → 0.88.0py3-none-any.whl