PyPI - nkululeko - Versions diffs - 0.86.8__tar.gz → 0.87.0__tar.gz - Mend

nkululeko 0.86.8tar.gz → 0.87.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

{nkululeko-0.86.8 → nkululeko-0.87.0}/CHANGELOG.md RENAMED Viewed

@@ -1,6 +1,10 @@
 Changelog
 =========
+Version 0.87.0
+--------------
+* added class probability output and uncertainty analysis
 Version 0.86.8
 --------------
 * handle single feature sets as strings in the config

{nkululeko-0.86.8/nkululeko.egg-info → nkululeko-0.87.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.86.8
+Version: 0.87.0
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -51,6 +51,7 @@ Requires-Dist: pylatex
   - [t-SNE plots](#t-sne-plots)
   - [Data distribution](#data-distribution)
   - [Bias checking](#bias-checking)
+  - [Uncertainty](#uncertainty)
 - [Documentation](#documentation)
 - [Installation](#installation)
 - [Usage](#usage)
@@ -113,6 +114,13 @@ In cases you might wonder if there's bias in your data. You can try to detect th
 <img src="meta/images/emotion-pesq.png" width="500px"/>
+### Uncertainty
+Nkululeko estimates uncertainty of model decision (only for classifiers) with entropy over the class-probabilities or logits per sample.
+<img src="meta/images/uncertainty.png" width="500px"/>
 ## Documentation
 The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).
@@ -343,6 +351,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.87.0
+--------------
+* added class probability output and uncertainty analysis
 Version 0.86.8
 --------------
 * handle single feature sets as strings in the config

{nkululeko-0.86.8 → nkululeko-0.87.0}/README.md RENAMED Viewed

@@ -7,6 +7,7 @@
   - [t-SNE plots](#t-sne-plots)
   - [Data distribution](#data-distribution)
   - [Bias checking](#bias-checking)
+  - [Uncertainty](#uncertainty)
 - [Documentation](#documentation)
 - [Installation](#installation)
 - [Usage](#usage)
@@ -69,6 +70,13 @@ In cases you might wonder if there's bias in your data. You can try to detect th
 <img src="meta/images/emotion-pesq.png" width="500px"/>
+### Uncertainty
+Nkululeko estimates uncertainty of model decision (only for classifiers) with entropy over the class-probabilities or logits per sample.
+<img src="meta/images/uncertainty.png" width="500px"/>
 ## Documentation
 The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).

{nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/constants.py RENAMED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.86.8"
+VERSION="0.87.0"
 SAMPLING_RATE = 16000

{nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/data/dataset_csv.py RENAMED Viewed

@@ -23,6 +23,9 @@ class Dataset_CSV(Dataset):
         root = os.path.dirname(data_file)
         audio_path = self.util.config_val_data(self.name, "audio_path", "./")
         df = pd.read_csv(data_file)
+        # trim all string values
+        df_obj = df.select_dtypes("object")
+        df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())
         # special treatment for segmented dataframes with only one column:
         if "start" in df.columns and len(df.columns) == 4:
             index = audformat.segmented_index(
@@ -49,8 +52,7 @@ class Dataset_CSV(Dataset):
                     .map(lambda x: root + "/" + audio_path + "/" + x)
                     .values
                 )
-                df = df.set_index(df.index.set_levels(
-                    file_index, level="file"))
+                df = df.set_index(df.index.set_levels(file_index, level="file"))
             else:
                 if not isinstance(df, pd.DataFrame):
                     df = pd.DataFrame(df)
@@ -59,27 +61,24 @@ class Dataset_CSV(Dataset):
                         lambda x: root + "/" + audio_path + "/" + x
                     )
                 )
-        else: # absolute path is True
+        else:  # absolute path is True
             if audformat.index_type(df.index) == "segmented":
                 file_index = (
-                    df.index.levels[0]
-                    .map(lambda x: audio_path + "/" + x)
-                    .values
+                    df.index.levels[0].map(lambda x: audio_path + "/" + x).values
                 )
-                df = df.set_index(df.index.set_levels(
-                    file_index, level="file"))
+                df = df.set_index(df.index.set_levels(file_index, level="file"))
             else:
                 if not isinstance(df, pd.DataFrame):
                     df = pd.DataFrame(df)
-                df = df.set_index(df.index.to_series().apply(
-                    lambda x: audio_path + "/" + x ))
+                df = df.set_index(
+                    df.index.to_series().apply(lambda x: audio_path + "/" + x)
+                )
         self.df = df
         self.db = None
         self.got_target = True
         self.is_labeled = self.got_target
-        self.start_fresh = eval(
-            self.util.config_val("DATA", "no_reuse", "False"))
+        self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
         is_index = False
         try:
             if self.is_labeled and not "class_label" in self.df.columns:
@@ -106,8 +105,7 @@ class Dataset_CSV(Dataset):
                 f" {self.got_gender}, got age: {self.got_age}"
             )
         self.util.debug(r_string)
-        glob_conf.report.add_item(ReportItem(
-            "Data", "Loaded report", r_string))
+        glob_conf.report.add_item(ReportItem("Data", "Loaded report", r_string))
     def prepare(self):
         super().prepare()

{nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/demo.py RENAMED Viewed

@@ -30,10 +30,8 @@ from transformers import pipeline
 def main(src_dir):
-    parser = argparse.ArgumentParser(
-        description="Call the nkululeko DEMO framework.")
-    parser.add_argument("--config", default="exp.ini",
-                        help="The base configuration")
+    parser = argparse.ArgumentParser(description="Call the nkululeko DEMO framework.")
+    parser.add_argument("--config", default="exp.ini", help="The base configuration")
     parser.add_argument(
         "--file", help="A file that should be processed (16kHz mono wav)"
     )
@@ -84,8 +82,7 @@ def main(src_dir):
     )
     def print_pipe(files, outfile):
-        """
-        Prints the pipeline output for a list of files, and optionally writes the results to an output file.
+        """Prints the pipeline output for a list of files, and optionally writes the results to an output file.
         Args:
             files (list): A list of file paths to process through the pipeline.
@@ -108,8 +105,7 @@ def main(src_dir):
                 f.write("\n".join(results))
     if util.get_model_type() == "finetune":
-        model_path = os.path.join(
-            util.get_exp_dir(), "models", "run_0", "torch")
+        model_path = os.path.join(util.get_exp_dir(), "models", "run_0", "torch")
         pipe = pipeline("audio-classification", model=model_path)
         if args.file is not None:
             print_pipe([args.file], args.outfile)

{nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/modelrunner.py RENAMED Viewed

@@ -85,7 +85,7 @@ class Modelrunner:
                     f"run: {self.run} epoch: {epoch}: result: {test_score_metric}"
                 )
                 # print(f"performance: {performance.split(' ')[1]}")
-                performance = float(test_score_metric.split(' ')[1])
+                performance = float(test_score_metric.split(" ")[1])
                 if performance > self.best_performance:
                     self.best_performance = performance
                     self.best_epoch = epoch
@@ -204,15 +204,15 @@ class Modelrunner:
                 self.df_train, self.df_test, self.feats_train, self.feats_test
             )
         elif model_type == "cnn":
-            from nkululeko.models.model_cnn import CNN_model
+            from nkululeko.models.model_cnn import CNNModel
-            self.model = CNN_model(
+            self.model = CNNModel(
                 self.df_train, self.df_test, self.feats_train, self.feats_test
             )
         elif model_type == "mlp":
-            from nkululeko.models.model_mlp import MLP_model
+            from nkululeko.models.model_mlp import MLPModel
-            self.model = MLP_model(
+            self.model = MLPModel(
                 self.df_train, self.df_test, self.feats_train, self.feats_test
             )
         elif model_type == "mlp_reg":

{nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model.py RENAMED Viewed

@@ -247,8 +247,25 @@ class Model:
                 self.clf.fit(feats, labels)
     def get_predictions(self):
-        predictions = self.clf.predict(self.feats_test.to_numpy())
-        return predictions
+        #        predictions = self.clf.predict(self.feats_test.to_numpy())
+        if self.util.exp_is_classification():
+            # make a dataframe for the class probabilities
+            proba_d = {}
+            for c in self.clf.classes_:
+                proba_d[c] = []
+            # get the class probabilities
+            predictions = self.clf.predict_proba(self.feats_test.to_numpy())
+            # pred = self.clf.predict(features)
+            for i, c in enumerate(self.clf.classes_):
+                proba_d[c] = list(predictions.T[i])
+            probas = pd.DataFrame(proba_d)
+            probas = probas.set_index(self.feats_test.index)
+            predictions = probas.idxmax(axis=1).values
+        else:
+            predictions = self.clf.predict(self.feats_test.to_numpy())
+            probas = None
+        return predictions, probas
     def predict(self):
         if self.feats_test.isna().to_numpy().any():
@@ -263,13 +280,16 @@ class Model:
             )
             return report
         """Predict the whole eval feature set"""
-        predictions = self.get_predictions()
+        predictions, probas = self.get_predictions()
         report = Reporter(
             self.df_test[self.target].to_numpy().astype(float),
             predictions,
             self.run,
             self.epoch,
+            probas=probas,
         )
+        report.print_probabilities()
         return report
     def get_type(self):

{nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_cnn.py RENAMED Viewed

@@ -5,33 +5,40 @@ Inspired by code from Su Lei
 """
+import ast
+from collections import OrderedDict
+import numpy as np
+import pandas as pd
+from PIL import Image
+from sklearn.metrics import recall_score
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-import torchvision
-import torchvision.transforms as transforms
 from torch.utils.data import Dataset
-import ast
-import numpy as np
-from sklearn.metrics import recall_score
-from collections import OrderedDict
-from PIL import Image
-from traitlets import default
+import torchvision.transforms as transforms
-from nkululeko.utils.util import Util
 import nkululeko.glob_conf as glob_conf
+from nkululeko.losses.loss_softf1loss import SoftF1Loss
 from nkululeko.models.model import Model
 from nkululeko.reporting.reporter import Reporter
-from nkululeko.losses.loss_softf1loss import SoftF1Loss
+from nkululeko.utils.util import Util
-class CNN_model(Model):
-    """CNN = convolutional neural net"""
+class CNNModel(Model):
+    """CNN = convolutional neural net."""
     is_classifier = True
     def __init__(self, df_train, df_test, feats_train, feats_test):
-        """Constructor taking the configuration and all dataframes"""
+        """Constructor, taking all dataframes.
+        Args:
+            df_train (pd.DataFrame): The train labels.
+            df_test (pd.DataFrame): The test labels.
+            feats_train (pd.DataFrame): The train features.
+            feats_test (pd.DataFrame): The test features.
+        """
         super().__init__(df_train, df_test, feats_train, feats_test)
         super().set_model_type("ann")
         self.name = "cnn"
@@ -147,7 +154,20 @@ class CNN_model(Model):
             self.optimizer.step()
         self.loss = (np.asarray(losses)).mean()
-    def evaluate_model(self, model, loader, device):
+    def get_probas(self, logits):
+        # make a dataframe for probabilites (logits)
+        proba_d = {}
+        classes = self.df_test[self.target].unique()
+        classes.sort()
+        for c in classes:
+            proba_d[c] = []
+        for i, c in enumerate(classes):
+            proba_d[c] = list(logits.numpy().T[i])
+        probas = pd.DataFrame(proba_d)
+        probas = probas.set_index(self.df_test.index)
+        return probas
+    def evaluate(self, model, loader, device):
         logits = torch.zeros(len(loader.dataset), self.class_num)
         targets = torch.zeros(len(loader.dataset))
         model.eval()
@@ -169,14 +189,15 @@ class CNN_model(Model):
         self.loss_eval = (np.asarray(losses)).mean()
         predictions = logits.argmax(dim=1)
         uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
-        return uar, targets, predictions
+        return uar, targets, predictions, logits
     def predict(self):
-        _, truths, predictions = self.evaluate_model(
+        _, truths, predictions, logits = self.evaluate(
             self.model, self.testloader, self.device
         )
-        uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
-        report = Reporter(truths, predictions, self.run, self.epoch)
+        uar, _, _, _ = self.evaluate(self.model, self.trainloader, self.device)
+        probas = self.get_probas(logits)
+        report = Reporter(truths, predictions, self.run, self.epoch, probas=probas)
         try:
             report.result.loss = self.loss
         except AttributeError:  # if the model was loaded from disk the loss is unknown
@@ -189,13 +210,11 @@ class CNN_model(Model):
         return report
     def get_predictions(self):
-        _, truths, predictions = self.evaluate_model(
-            self.model, self.testloader, self.device
-        )
+        _, _, predictions, _ = self.evaluate(self.model, self.testloader, self.device)
         return predictions.numpy()
     def predict_sample(self, features):
-        """Predict one sample"""
+        """Predict one sample."""
         with torch.no_grad():
             logits = self.model(torch.from_numpy(features).to(self.device))
         a = logits.numpy()

{nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_mlp.py RENAMED Viewed

@@ -1,25 +1,33 @@
 # model_mlp.py
+import ast
+from collections import OrderedDict
+import numpy as np
 import pandas as pd
+from sklearn.metrics import recall_score
+import torch
-from nkululeko.utils.util import Util
 import nkululeko.glob_conf as glob_conf
+from nkululeko.losses.loss_softf1loss import SoftF1Loss
 from nkululeko.models.model import Model
 from nkululeko.reporting.reporter import Reporter
-import torch
-import ast
-import numpy as np
-from sklearn.metrics import recall_score
-from collections import OrderedDict
-from nkululeko.losses.loss_softf1loss import SoftF1Loss
+from nkululeko.utils.util import Util
-class MLP_model(Model):
+class MLPModel(Model):
     """MLP = multi layer perceptron."""
     is_classifier = True
     def __init__(self, df_train, df_test, feats_train, feats_test):
-        """Constructor taking the configuration and all dataframes."""
+        """Constructor, taking all dataframes.
+        Args:
+            df_train (pd.DataFrame): The train labels.
+            df_test (pd.DataFrame): The test labels.
+            feats_train (pd.DataFrame): The train features.
+            feats_test (pd.DataFrame): The test features.
+        """
         super().__init__(df_train, df_test, feats_train, feats_test)
         super().set_model_type("ann")
         self.name = "mlp"
@@ -97,7 +105,7 @@ class MLP_model(Model):
             self.optimizer.step()
         self.loss = (np.asarray(losses)).mean()
-    def evaluate_model(self, model, loader, device):
+    def evaluate(self, model, loader, device):
         logits = torch.zeros(len(loader.dataset), self.class_num)
         targets = torch.zeros(len(loader.dataset))
         model.eval()
@@ -119,14 +127,28 @@ class MLP_model(Model):
         self.loss_eval = (np.asarray(losses)).mean()
         predictions = logits.argmax(dim=1)
         uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
-        return uar, targets, predictions
+        return uar, targets, predictions, logits
+    def get_probas(self, logits):
+        # make a dataframe for probabilites (logits)
+        proba_d = {}
+        classes = self.df_test[self.target].unique()
+        classes.sort()
+        for c in classes:
+            proba_d[c] = []
+        for i, c in enumerate(classes):
+            proba_d[c] = list(logits.numpy().T[i])
+        probas = pd.DataFrame(proba_d)
+        probas = probas.set_index(self.df_test.index)
+        return probas
     def predict(self):
-        _, truths, predictions = self.evaluate_model(
+        _, truths, predictions, logits = self.evaluate(
             self.model, self.testloader, self.device
         )
-        uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
-        report = Reporter(truths, predictions, self.run, self.epoch)
+        uar, _, _, _ = self.evaluate(self.model, self.trainloader, self.device)
+        probas = self.get_probas(logits)
+        report = Reporter(truths, predictions, self.run, self.epoch, probas=probas)
         try:
             report.result.loss = self.loss
         except AttributeError:  # if the model was loaded from disk the loss is unknown
@@ -139,9 +161,7 @@ class MLP_model(Model):
         return report
     def get_predictions(self):
-        _, truths, predictions = self.evaluate_model(
-            self.model, self.testloader, self.device
-        )
+        _, _, predictions, _ = self.evaluate(self.model, self.testloader, self.device)
         return predictions.numpy()
     def get_loader(self, df_x, df_y, shuffle):

{nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_mlp_regression.py RENAMED Viewed

@@ -97,7 +97,9 @@ class MLP_Reg_model(Model):
             self.model, self.testloader, self.device
         )
         result, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
-        report = Reporter(truths.numpy(), predictions.numpy(), self.run, self.epoch)
+        report = Reporter(
+            truths.numpy(), predictions.numpy(), None, self.run, self.epoch
+        )
         try:
             report.result.loss = self.loss
         except AttributeError:  # if the model was loaded from disk the loss is unknown

{nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/plots.py RENAMED Viewed

@@ -48,7 +48,7 @@ class Plots:
             )
             ax.set_ylabel(f"number of speakers")
             ax.set_xlabel("number of samples")
-            self._save_plot(
+            self.save_plot(
                 ax,
                 "Samples per speaker",
                 f"Samples per speaker ({df_speakers.shape[0]})",
@@ -70,9 +70,9 @@ class Plots:
                     rot=0,
                 )
             )
-            ax.set_ylabel(f"number of speakers")
+            ax.set_ylabel("number of speakers")
             ax.set_xlabel("number of samples")
-            self._save_plot(
+            self.save_plot(
                 ax,
                 "Sample value counts",
                 f"Samples per speaker ({df_speakers.shape[0]})",
@@ -96,7 +96,7 @@ class Plots:
             binned_data = self.util.continuous_to_categorical(df[class_label])
             ax = binned_data.value_counts().plot(kind="bar")
             filename_binned = f"{class_label}_discreet"
-            self._save_plot(
+            self.save_plot(
                 ax,
                 "Sample value counts",
                 filename_binned,
@@ -106,7 +106,7 @@ class Plots:
             dist_type = self.util.config_val("EXPL", "dist_type", "hist")
             ax = df[class_label].plot(kind=dist_type)
-        self._save_plot(
+        self.save_plot(
             ax,
             "Sample value counts",
             filename,
@@ -131,17 +131,17 @@ class Plots:
                             df, class_label, att1, self.target, type_s
                         )
                     else:
-                        ax, caption = self._plotcatcont(
+                        ax, caption = self.plotcatcont(
                             df, class_label, att1, att1, type_s
                         )
                 else:
                     if self.util.is_categorical(df[att1]):
-                        ax, caption = self._plotcatcont(
+                        ax, caption = self.plotcatcont(
                             df, att1, class_label, att1, type_s
                         )
                     else:
                         ax, caption = self._plot2cont(df, class_label, att1, type_s)
-                self._save_plot(
+                self.save_plot(
                     ax,
                     caption,
                     f"Correlation of {self.target} and {att[0]}",
@@ -171,15 +171,11 @@ class Plots:
                             ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
                         else:
                             # class_label = cat, att1 = cat, att2 = cont
-                            ax, caption = self._plotcatcont(
-                                df, att1, att2, att1, type_s
-                            )
+                            ax, caption = self.plotcatcont(df, att1, att2, att1, type_s)
                     else:
                         if self.util.is_categorical(df[att2]):
                             # class_label = cat, att1 = cont, att2 = cat
-                            ax, caption = self._plotcatcont(
-                                df, att2, att1, att2, type_s
-                            )
+                            ax, caption = self.plotcatcont(df, att2, att1, att2, type_s)
                         else:
                             # class_label = cat, att1 = cont, att2 = cont
                             ax, caption = self._plot2cont_cat(
@@ -205,7 +201,7 @@ class Plots:
                             # class_label = cont, att1 = cont, att2 = cont
                             ax, caption = self._plot2cont(df, att1, att2, type_s)
-                self._save_plot(
+                self.save_plot(
                     ax, caption, f"Correlation of {att1} and {att2}", filename, type_s
                 )
@@ -215,16 +211,16 @@ class Plots:
                     f" {att} has more than 2 values. Perhaps you forgot to state a list of lists?"
                 )
-    def _save_plot(self, ax, caption, header, filename, type_s):
+    def save_plot(self, ax, caption, header, filename, type_s):
         # one up because of the runs
         fig_dir = self.util.get_path("fig_dir") + "../"
-        fig = ax.figure
+        fig_plots = ax.figure
         # avoid warning
         # plt.tight_layout()
         img_path = f"{fig_dir}{filename}_{type_s}.{self.format}"
         plt.savefig(img_path)
-        plt.close(fig)
-        # fig.clear()   # avoid error
+        plt.close(fig_plots)
+        self.util.debug(f"Saved plot to {img_path}")
         glob_conf.report.add_item(
             ReportItem(
                 Header.HEADER_EXPLORE,
@@ -244,35 +240,29 @@ class Plots:
         return att, df
     def _plot2cont_cat(self, df, cont1, cont2, cat, ylab):
-        """
-        plot relation of two continuous distributions with one categorical
-        """
+        """Plot relation of two continuous distributions with one categorical."""
         pearson = stats.pearsonr(df[cont1], df[cont2])
         # trunc to three digits
         pearson = int(pearson[0] * 1000) / 1000
         pearson_string = f"PCC: {pearson}"
         ax = sns.lmplot(data=df, x=cont1, y=cont2, hue=cat)
         caption = f"{ylab} {df.shape[0]}. {pearson_string}"
-        ax.fig.suptitle(caption)
+        ax.figure.suptitle(caption)
         return ax, caption
     def _plot2cont(self, df, col1, col2, ylab):
-        """
-        plot relation of two continuous distributions
-        """
+        """Plot relation of two continuous distributions."""
         pearson = stats.pearsonr(df[col1], df[col2])
         # trunc to three digits
         pearson = int(pearson[0] * 1000) / 1000
         pearson_string = f"PCC: {pearson}"
         ax = sns.lmplot(data=df, x=col1, y=col2)
         caption = f"{ylab} {df.shape[0]}. {pearson_string}"
-        ax.fig.suptitle(caption)
+        ax.figure.suptitle(caption)
         return ax, caption
-    def _plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
-        """
-        plot relation of categorical distribution with continuous
-        """
+    def plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
+        """Plot relation of categorical distribution with continuous."""
         dist_type = self.util.config_val("EXPL", "dist_type", "hist")
         cats, cat_str, es = su.get_effect_size(df, cat_col, cont_col)
         if dist_type == "hist":
@@ -287,13 +277,11 @@ class Plots:
             )
             ax.set(xlabel=f"{cont_col}")
             caption = f"{ylab} {df.shape[0]}. {cat_str} ({cats}):" f" {es}"
-            ax.fig.suptitle(caption)
+            ax.figure.suptitle(caption)
         return ax, caption
     def _plot2cat(self, df, col1, col2, xlab, ylab):
-        """
-        plot relation of 2 categorical distributions
-        """
+        """Plot relation of 2 categorical distributions."""
         crosstab = pd.crosstab(index=df[col1], columns=df[col2])
         res_pval = stats.chi2_contingency(crosstab)
         res_pval = int(res_pval[1] * 1000) / 1000
@@ -320,8 +308,8 @@ class Plots:
         max = self.util.to_3_digits(df.duration.max())
         title = f"Duration distr. for {sample_selection} {df.shape[0]}. min={min}, max={max}"
         ax.set_title(title)
-        ax.set_xlabel(f"duration")
-        ax.set_ylabel(f"number of samples")
+        ax.set_xlabel("duration")
+        ax.set_ylabel("number of samples")
         fig = ax.figure
         # plt.tight_layout()
         img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"

nkululeko 0.86.8__tar.gz → 0.87.0__tar.gz

nkululeko 0.86.8tar.gz → 0.87.0tar.gz