PyPI - nkululeko - Versions diffs - 0.83.0__py3-none-any.whl → 0.83.2__py3-none-any.whl - Mend

nkululeko 0.83.0py3-none-any.whl → 0.83.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

nkululeko/constants.py +1 -1
nkululeko/experiment.py +8 -4
nkululeko/feat_extract/feats_agender_agender.py +4 -2
nkululeko/feat_extract/feats_squim.py +8 -3
nkululeko/feat_extract/feats_wav2vec2.py +8 -7
nkululeko/feat_extract/feats_whisper.py +6 -3
nkululeko/models/model_cnn.py +14 -6
nkululeko/models/model_mlp.py +16 -7
nkululeko/models/model_mlp_regression.py +15 -7
nkululeko/nkuluflag.py +19 -6
nkululeko/plots.py +30 -15
nkululeko/test.py +20 -15
nkululeko/test_predictor.py +3 -0
{nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/METADATA +9 -1
{nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/RECORD +18 -19
nkululeko/reporter.py +0 -324
{nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/LICENSE +0 -0
{nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/WHEEL +0 -0
{nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/top_level.txt +0 -0

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.83.0"
+VERSION="0.83.2"
 SAMPLING_RATE = 16000

nkululeko/experiment.py CHANGED Viewed

@@ -675,12 +675,16 @@ class Experiment:
         test_predictor = TestPredictor(
             model, self.df_test, self.label_encoder, result_name
         )
-        test_predictor.predict_and_store()
+        result = test_predictor.predict_and_store()
+        return result
     def load(self, filename):
-        f = open(filename, "rb")
-        tmp_dict = pickle.load(f)
-        f.close()
+        try:
+            f = open(filename, "rb")
+            tmp_dict = pickle.load(f)
+            f.close()
+        except EOFError as eof:
+            self.util.error(f"can't open file {filename}: {eof}")
         self.__dict__.update(tmp_dict)
         glob_conf.set_labels(self.labels)

nkululeko/feat_extract/feats_agender_agender.py CHANGED Viewed

@@ -28,9 +28,11 @@ class AgenderAgenderSet(Featureset):
         if not os.path.isdir(model_root):
             cache_root = audeer.mkdir("cache")
             model_root = audeer.mkdir(model_root)
-            archive_path = audeer.download_url(model_url, cache_root, verbose=True)
+            archive_path = audeer.download_url(
+                model_url, cache_root, verbose=True)
             audeer.extract_archive(archive_path, model_root)
-        device = self.util.config_val("MODEL", "device", "cpu")
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        device = self.util.config_val("MODEL", "device", cuda)
         self.model = audonnx.load(model_root, device=device)
         #        pytorch_total_params = sum(p.numel() for p in self.model.parameters())
         # self.util.debug(

nkululeko/feat_extract/feats_squim.py CHANGED Viewed

@@ -28,12 +28,17 @@ from nkululeko.utils.util import Util
 class SquimSet(Featureset):
-    """Class to predict SQUIM features"""
+    """Class to predict SQUIM features."""
     def __init__(self, name, data_df, feats_type):
-        """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
+        """Constructor.
+        Is_train is needed to distinguish from test/dev sets,
+        because they use the codebook from the training.
+        """
         super().__init__(name, data_df, feats_type)
-        self.device = self.util.config_val("MODEL", "device", "cpu")
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = self.util.config_val("MODEL", "device", cuda)
         self.model_initialized = False
     def init_model(self):

nkululeko/feat_extract/feats_wav2vec2.py CHANGED Viewed

@@ -21,7 +21,11 @@ class Wav2vec2(Featureset):
     """Class to extract wav2vec2 embeddings"""
     def __init__(self, name, data_df, feat_type):
-        """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
+        """Constructor.
+        If_train is needed to distinguish from test/dev sets,
+        because they use the codebook from the training
+        """
         super().__init__(name, data_df, feat_type)
         cuda = "cuda" if torch.cuda.is_available() else "cpu"
         self.device = self.util.config_val("MODEL", "device", cuda)
@@ -39,8 +43,7 @@ class Wav2vec2(Featureset):
         )
         config = transformers.AutoConfig.from_pretrained(model_path)
         layer_num = config.num_hidden_layers
-        hidden_layer = int(self.util.config_val(
-            "FEATS", "wav2vec2.layer", "0"))
+        hidden_layer = int(self.util.config_val("FEATS", "wav2vec2.layer", "0"))
         config.num_hidden_layers = layer_num - hidden_layer
         self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
         self.processor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
@@ -55,8 +58,7 @@ class Wav2vec2(Featureset):
         """Extract the features or load them from disk if present."""
         store = self.util.get_path("store")
         storage = f"{store}{self.name}.pkl"
-        extract = self.util.config_val(
-            "FEATS", "needs_feature_extraction", False)
+        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
             if not self.model_initialized:
@@ -77,8 +79,7 @@ class Wav2vec2(Featureset):
                 emb = self.get_embeddings(signal, sampling_rate, file)
                 emb_series[idx] = emb
             # print(f"emb_series shape: {emb_series.shape}")
-            self.df = pd.DataFrame(
-                emb_series.values.tolist(), index=self.data_df.index)
+            self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
             # print(f"df shape: {self.df.shape}")
             self.df.to_pickle(storage)
             try:

nkululeko/feat_extract/feats_whisper.py CHANGED Viewed

@@ -32,19 +32,22 @@ class Whisper(Featureset):
         model_name = f"openai/{self.feat_type}"
         self.model = WhisperModel.from_pretrained(model_name).to(self.device)
         print(f"intialized Whisper model on {self.device}")
-        self.feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
+        self.feature_extractor = AutoFeatureExtractor.from_pretrained(
+            model_name)
         self.model_initialized = True
     def extract(self):
         """Extract the features or load them from disk if present."""
         store = self.util.get_path("store")
         storage = f"{store}{self.name}.pkl"
-        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
+        extract = self.util.config_val(
+            "FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
             if not self.model_initialized:
                 self.init_model()
-            self.util.debug("extracting whisper embeddings, this might take a while...")
+            self.util.debug(
+                "extracting whisper embeddings, this might take a while...")
             emb_series = []
             for (file, start, end), _ in audeer.progress_bar(
                 self.data_df.iterrows(),

nkululeko/models/model_cnn.py CHANGED Viewed

@@ -16,6 +16,7 @@ import numpy as np
 from sklearn.metrics import recall_score
 from collections import OrderedDict
 from PIL import Image
+from traitlets import default
 from nkululeko.utils.util import Util
 import nkululeko.glob_conf as glob_conf
@@ -48,6 +49,7 @@ class CNN_model(Model):
             self.util.error(f"unknown loss function: {criterion}")
         self.util.debug(f"using model with cross entropy loss function")
         # set up the model
+        # cuda = "cuda" if torch.cuda.is_available() else "cpu"
         self.device = self.util.config_val("MODEL", "device", "cpu")
         try:
             layers_string = glob_conf.config["MODEL"]["layers"]
@@ -84,7 +86,8 @@ class CNN_model(Model):
         train_set = self.Dataset_image(
             feats_train, df_train, self.target, transformations
         )
-        test_set = self.Dataset_image(feats_test, df_test, self.target, transformations)
+        test_set = self.Dataset_image(
+            feats_test, df_test, self.target, transformations)
         # Define data loaders
         self.trainloader = torch.utils.data.DataLoader(
             train_set,
@@ -137,7 +140,8 @@ class CNN_model(Model):
         losses = []
         for images, labels in self.trainloader:
             logits = self.model(images.to(self.device))
-            loss = self.criterion(logits, labels.to(self.device, dtype=torch.int64))
+            loss = self.criterion(logits, labels.to(
+                self.device, dtype=torch.int64))
             losses.append(loss.item())
             self.optimizer.zero_grad()
             loss.backward()
@@ -165,14 +169,16 @@ class CNN_model(Model):
         self.loss_eval = (np.asarray(losses)).mean()
         predictions = logits.argmax(dim=1)
-        uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
+        uar = recall_score(
+            targets.numpy(), predictions.numpy(), average="macro")
         return uar, targets, predictions
     def predict(self):
         _, truths, predictions = self.evaluate_model(
             self.model, self.testloader, self.device
         )
-        uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
+        uar, _, _ = self.evaluate_model(
+            self.model, self.trainloader, self.device)
         report = Reporter(truths, predictions, self.run, self.epoch)
         try:
             report.result.loss = self.loss
@@ -209,7 +215,8 @@ class CNN_model(Model):
         dir = self.util.get_path("model_dir")
         # name = f'{self.util.get_exp_name()}_{run}_{epoch:03d}.model'
         name = f"{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model"
-        self.device = self.util.config_val("MODEL", "device", "cpu")
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = self.util.config_val("MODEL", "device", cuda)
         layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
         self.store_path = dir + name
         drop = self.util.config_val("MODEL", "drop", False)
@@ -222,7 +229,8 @@ class CNN_model(Model):
     def load_path(self, path, run, epoch):
         self.set_id(run, epoch)
         with open(path, "rb") as handle:
-            self.device = self.util.config_val("MODEL", "device", "cpu")
+            cuda = "cuda" if torch.cuda.is_available() else "cpu"
+            self.device = self.util.config_val("MODEL", "device", cuda)
             layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
             self.store_path = path
             drop = self.util.config_val("MODEL", "drop", False)

nkululeko/models/model_mlp.py CHANGED Viewed

@@ -34,8 +34,9 @@ class MLP_model(Model):
         else:
             self.util.error(f"unknown loss function: {criterion}")
         self.util.debug(f"using model with cross entropy loss function")
-        # set up the model
-        self.device = self.util.config_val("MODEL", "device", "cpu")
+        # set up the model, use GPU if availabe
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = self.util.config_val("MODEL", "device", cuda)
         try:
             layers_string = glob_conf.config["MODEL"]["layers"]
         except KeyError as ke:
@@ -86,7 +87,8 @@ class MLP_model(Model):
         losses = []
         for features, labels in self.trainloader:
             logits = self.model(features.to(self.device))
-            loss = self.criterion(logits, labels.to(self.device, dtype=torch.int64))
+            loss = self.criterion(logits, labels.to(
+                self.device, dtype=torch.int64))
             losses.append(loss.item())
             self.optimizer.zero_grad()
             loss.backward()
@@ -114,14 +116,16 @@ class MLP_model(Model):
         self.loss_eval = (np.asarray(losses)).mean()
         predictions = logits.argmax(dim=1)
-        uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
+        uar = recall_score(
+            targets.numpy(), predictions.numpy(), average="macro")
         return uar, targets, predictions
     def predict(self):
         _, truths, predictions = self.evaluate_model(
             self.model, self.testloader, self.device
         )
-        uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
+        uar, _, _ = self.evaluate_model(
+            self.model, self.trainloader, self.device)
         report = Reporter(truths, predictions, self.run, self.epoch)
         try:
             report.result.loss = self.loss
@@ -179,6 +183,9 @@ class MLP_model(Model):
             features = np.reshape(features, (-1, 1)).T
             logits = self.model(features.to(self.device))
             # logits = self.model(features)
+        # if tensor conver to cpu
+        if isinstance(logits, torch.Tensor):
+            logits = logits.cpu()
         a = logits.numpy()
         res = {}
         for i in range(len(a[0])):
@@ -196,7 +203,8 @@ class MLP_model(Model):
         dir = self.util.get_path("model_dir")
         # name = f'{self.util.get_exp_name()}_{run}_{epoch:03d}.model'
         name = f"{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model"
-        self.device = self.util.config_val("MODEL", "device", "cpu")
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = self.util.config_val("MODEL", "device", cuda)
         layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
         self.store_path = dir + name
         drop = self.util.config_val("MODEL", "drop", False)
@@ -211,7 +219,8 @@ class MLP_model(Model):
     def load_path(self, path, run, epoch):
         self.set_id(run, epoch)
         with open(path, "rb") as handle:
-            self.device = self.util.config_val("MODEL", "device", "cpu")
+            cuda = "cuda" if torch.cuda.is_available() else "cpu"
+            self.device = self.util.config_val("MODEL", "device", cuda)
             layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
             self.store_path = path
             drop = self.util.config_val("MODEL", "drop", False)

nkululeko/models/model_mlp_regression.py CHANGED Viewed

@@ -9,6 +9,7 @@ import torch
 from audmetric import concordance_cc
 from audmetric import mean_absolute_error
 from audmetric import mean_squared_error
+from traitlets import default
 import nkululeko.glob_conf as glob_conf
 from nkululeko.losses.loss_ccc import ConcordanceCorCoeff
@@ -40,7 +41,8 @@ class MLP_Reg_model(Model):
             self.util.error(f"unknown loss function: {criterion}")
         self.util.debug(f"training model with {criterion} loss function")
         # set up the model
-        self.device = self.util.config_val("MODEL", "device", "cpu")
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = self.util.config_val("MODEL", "device", cuda)
         layers_string = glob_conf.config["MODEL"]["layers"]
         self.util.debug(f"using layers {layers_string}")
         try:
@@ -50,7 +52,8 @@ class MLP_Reg_model(Model):
         drop = self.util.config_val("MODEL", "drop", False)
         if drop:
             self.util.debug(f"training with dropout: {drop}")
-        self.model = self.MLP(feats_train.shape[1], layers, 1, drop).to(self.device)
+        self.model = self.MLP(
+            feats_train.shape[1], layers, 1, drop).to(self.device)
         self.learning_rate = float(
             self.util.config_val("MODEL", "learning_rate", 0.0001)
         )
@@ -93,8 +96,10 @@ class MLP_Reg_model(Model):
         _, truths, predictions = self.evaluate_model(
             self.model, self.testloader, self.device
         )
-        result, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
-        report = Reporter(truths.numpy(), predictions.numpy(), self.run, self.epoch)
+        result, _, _ = self.evaluate_model(
+            self.model, self.trainloader, self.device)
+        report = Reporter(truths.numpy(), predictions.numpy(),
+                          self.run, self.epoch)
         try:
             report.result.loss = self.loss
         except AttributeError:  # if the model was loaded from disk the loss is unknown
@@ -128,9 +133,11 @@ class MLP_Reg_model(Model):
         def __getitem__(self, item):
             index = self.df.index[item]
-            features = self.df_features.loc[index, :].values.astype("float32").squeeze()
+            features = self.df_features.loc[index, :].values.astype(
+                "float32").squeeze()
             labels = (
-                np.array([self.df.loc[index, self.label]]).astype("float32").squeeze()
+                np.array([self.df.loc[index, self.label]]
+                         ).astype("float32").squeeze()
             )
             return features, labels
@@ -187,7 +194,8 @@ class MLP_Reg_model(Model):
                 end_index = (index + 1) * loader.batch_size
                 if end_index > len(loader.dataset):
                     end_index = len(loader.dataset)
-                logits[start_index:end_index] = model(features.to(device)).reshape(-1)
+                logits[start_index:end_index] = model(
+                    features.to(device)).reshape(-1)
                 targets[start_index:end_index] = labels
                 loss = self.criterion(
                     logits[start_index:end_index].to(

nkululeko/nkuluflag.py CHANGED Viewed

@@ -2,13 +2,16 @@ import argparse
 import configparser
 import os
 import os.path
+import sys
 from nkululeko.nkululeko import doit as nkulu
+from nkululeko.test import do_it as test_mod
-def do_it(src_dir):
+def doit(cla):
     parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
     parser.add_argument("--config", help="The base configuration")
+    parser.add_argument("--mod", default="nkulu", help="Which nkululeko module to call")
     parser.add_argument("--data", help="The databases", nargs="*", action="append")
     parser.add_argument(
         "--label", nargs="*", help="The labels for the target", action="append"
@@ -25,20 +28,23 @@ def do_it(src_dir):
     parser.add_argument("--model", default="xgb", help="The model type")
     parser.add_argument("--feat", default="['os']", help="The feature type")
     parser.add_argument("--set", help="The opensmile set")
-    parser.add_argument("--with_os", help="To add os features")
     parser.add_argument("--target", help="The target designation")
     parser.add_argument("--epochs", help="The number of epochs")
     parser.add_argument("--runs", help="The number of runs")
     parser.add_argument("--learning_rate", help="The learning rate")
     parser.add_argument("--drop", help="The dropout rate [0:1]")
-    args = parser.parse_args()
+    args = parser.parse_args(cla)
     if args.config is not None:
         config_file = args.config
     else:
         print("ERROR: need config file")
         quit(-1)
+    if args.mod is not None:
+        nkulu_mod = args.mod
     # test if config is there
     if not os.path.isfile(config_file):
         print(f"ERROR: no such file {config_file}")
@@ -86,10 +92,17 @@ def do_it(src_dir):
     with open(tmp_config, "w") as tmp_file:
         config.write(tmp_file)
-    result, last_epoch = nkulu(tmp_config)
+    result, last_epoch = 0, 0
+    if nkulu_mod == "nkulu":
+        result, last_epoch = nkulu(tmp_config)
+    elif nkulu_mod == "test":
+        result, last_epoch = test_mod(tmp_config, "test_results.csv")
+    else:
+        print(f"ERROR: unknown module: {nkulu_mod}, should be [nkulu | test]")
     return result, last_epoch
 if __name__ == "__main__":
-    cwd = os.path.dirname(os.path.abspath(__file__))
-    do_it(cwd)  # sys.argv[1])
+    cla = sys.argv
+    cla.pop(0)
+    doit(cla)  # sys.argv[1])

nkululeko/plots.py CHANGED Viewed

@@ -28,7 +28,8 @@ class Plots:
             df_speaker["samplenum"] = df_speaker.shape[0]
             df_speakers = pd.concat([df_speakers, df_speaker.head(1)])
         # plot the distribution of samples per speaker
-        fig_dir = self.util.get_path("fig_dir") + "../"  # one up because of the runs
+        # one up because of the runs
+        fig_dir = self.util.get_path("fig_dir") + "../"
         self.util.debug(f"plotting samples per speaker")
         if "gender" in df_speakers:
             filename = f"samples_value_counts"
@@ -137,7 +138,8 @@ class Plots:
                             df, att1, class_label, att1, type_s
                         )
                     else:
-                        ax, caption = self._plot2cont(df, class_label, att1, type_s)
+                        ax, caption = self._plot2cont(
+                            df, class_label, att1, type_s)
                 self._save_plot(
                     ax,
                     caption,
@@ -150,7 +152,8 @@ class Plots:
                 att1 = att[0]
                 att2 = att[1]
                 if att1 == self.target or att2 == self.target:
-                    self.util.debug(f"no need to correlate {self.target} with itself")
+                    self.util.debug(
+                        f"no need to correlate {self.target} with itself")
                     return
                 if att1 not in df:
                     self.util.error(f"unknown feature: {att1}")
@@ -165,7 +168,8 @@ class Plots:
                     if self.util.is_categorical(df[att1]):
                         if self.util.is_categorical(df[att2]):
                             # class_label = cat, att1 = cat, att2 = cat
-                            ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
+                            ax, caption = self._plot2cat(
+                                df, att1, att2, att1, type_s)
                         else:
                             # class_label = cat, att1 = cat, att2 = cont
                             ax, caption = self._plotcatcont(
@@ -186,7 +190,8 @@ class Plots:
                     if self.util.is_categorical(df[att1]):
                         if self.util.is_categorical(df[att2]):
                             # class_label = cont, att1 = cat, att2 = cat
-                            ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
+                            ax, caption = self._plot2cat(
+                                df, att1, att2, att1, type_s)
                         else:
                             # class_label = cont, att1 = cat, att2 = cont
                             ax, caption = self._plot2cont_cat(
@@ -200,7 +205,8 @@ class Plots:
                             )
                         else:
                             # class_label = cont, att1 = cont, att2 = cont
-                            ax, caption = self._plot2cont(df, att1, att2, type_s)
+                            ax, caption = self._plot2cont(
+                                df, att1, att2, type_s)
                 self._save_plot(
                     ax, caption, f"Correlation of {att1} and {att2}", filename, type_s
@@ -213,7 +219,8 @@ class Plots:
                 )
     def _save_plot(self, ax, caption, header, filename, type_s):
-        fig_dir = self.util.get_path("fig_dir") + "../"  # one up because of the runs
+        # one up because of the runs
+        fig_dir = self.util.get_path("fig_dir") + "../"
         fig = ax.figure
         # avoid warning
         # plt.tight_layout()
@@ -231,7 +238,8 @@ class Plots:
         )
     def _check_binning(self, att, df):
-        bin_reals_att = eval(self.util.config_val("EXPL", f"{att}.bin_reals", "False"))
+        bin_reals_att = eval(self.util.config_val(
+            "EXPL", f"{att}.bin_reals", "False"))
         if bin_reals_att:
             self.util.debug(f"binning continuous variable {att} to categories")
             att_new = f"{att}_binned"
@@ -305,7 +313,8 @@ class Plots:
         return ax, caption
     def plot_durations(self, df, filename, sample_selection, caption=""):
-        fig_dir = self.util.get_path("fig_dir") + "../"  # one up because of the runs
+        # one up because of the runs
+        fig_dir = self.util.get_path("fig_dir") + "../"
         try:
             ax = sns.histplot(df, x="duration", hue="class_label", kde=True)
         except AttributeError as ae:
@@ -333,7 +342,8 @@ class Plots:
     def describe_df(self, name, df, target, filename):
         """Make a stacked barplot of samples and speakers per sex and target values. speaker, gender and target columns must be present"""
-        fig_dir = self.util.get_path("fig_dir") + "../"  # one up because of the runs
+        fig_dir = self.util.get_path(
+            "fig_dir") + "../"  # one up because of the runs
         sampl_num = df.shape[0]
         sex_col = "gender"
         if target == "gender":
@@ -380,8 +390,10 @@ class Plots:
     def scatter_plot(self, feats, label_df, label, dimred_type):
         dim_num = int(self.util.config_val("EXPL", "scatter.dim", 2))
-        fig_dir = self.util.get_path("fig_dir") + "../"  # one up because of the runs
-        sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
+        # one up because of the runs
+        fig_dir = self.util.get_path("fig_dir") + "../"
+        sample_selection = self.util.config_val(
+            "EXPL", "sample_selection", "all")
         filename = f"{label}_{self.util.get_feattype_name()}_{sample_selection}_{dimred_type}_{str(dim_num)}d"
         filename = f"{fig_dir}{filename}.{self.format}"
         self.util.debug(f"computing {dimred_type}, this might take a while...")
@@ -423,7 +435,8 @@ class Plots:
         if dim_num == 2:
             plot_data = np.vstack((data.T, labels)).T
-            plot_df = pd.DataFrame(data=plot_data, columns=("Dim_1", "Dim_2", "label"))
+            plot_df = pd.DataFrame(
+                data=plot_data, columns=("Dim_1", "Dim_2", "label"))
             # plt.tight_layout()
             ax = (
                 sns.FacetGrid(plot_df, hue="label", height=6)
@@ -515,7 +528,8 @@ class Plots:
     def plot_feature(self, title, feature, label, df_labels, df_features):
         # remove fullstops in the name
         feature_name = feature.replace(".", "-")
-        fig_dir = self.util.get_path("fig_dir") + "../"  # one up because of the runs
+        # one up because of the runs
+        fig_dir = self.util.get_path("fig_dir") + "../"
         filename = f"{fig_dir}feat_dist_{title}_{feature_name}.{self.format}"
         if self.util.is_categorical(df_labels[label]):
             df_plot = pd.DataFrame(
@@ -554,7 +568,8 @@ class Plots:
         tree.plot_tree(model, feature_names=list(features.columns), ax=ax)
         # plt.tight_layout()
         # print(ax)
-        fig_dir = self.util.get_path("fig_dir") + "../"  # one up because of the runs
+        # one up because of the runs
+        fig_dir = self.util.get_path("fig_dir") + "../"
         exp_name = self.util.get_exp_name(only_data=True)
         format = self.util.config_val("PLOT", "format", "png")
         filename = f"{fig_dir}{exp_name}EXPL_tree-plot.{format}"

nkululeko/test.py CHANGED Viewed

@@ -10,20 +10,7 @@ from nkululeko.experiment import Experiment
 from nkululeko.utils.util import Util
-def main(src_dir):
-    parser = argparse.ArgumentParser(
-        description="Call the nkululeko TEST framework.")
-    parser.add_argument("--config", default="exp.ini",
-                        help="The base configuration")
-    parser.add_argument(
-        "--outfile",
-        default="my_results.csv",
-        help="File name to store the predictions",
-    )
-    args = parser.parse_args()
-    config_file = args.config
+def do_it(config_file, outfile):
     # test if the configuration file exists
     if not os.path.isfile(config_file):
@@ -48,10 +35,28 @@ def main(src_dir):
     expr.load(f"{util.get_save_name()}")
     expr.fill_tests()
     expr.extract_test_feats()
-    expr.predict_test_and_save(args.outfile)
+    result = expr.predict_test_and_save(outfile)
     print("DONE")
+    return result, 0
+def main(src_dir):
+    parser = argparse.ArgumentParser(description="Call the nkululeko TEST framework.")
+    parser.add_argument("--config", default="exp.ini", help="The base configuration")
+    parser.add_argument(
+        "--outfile",
+        default="my_results.csv",
+        help="File name to store the predictions",
+    )
+    args = parser.parse_args()
+    if args.config is not None:
+        config_file = args.config
+    else:
+        config_file = f"{src_dir}/exp.ini"
+    do_it(config_file, args.outfile)
 if __name__ == "__main__":
     cwd = os.path.dirname(os.path.abspath(__file__))

nkululeko/test_predictor.py CHANGED Viewed

@@ -29,6 +29,7 @@ class TestPredictor:
     def predict_and_store(self):
         label_data = self.util.config_val("DATA", "label_data", False)
+        result = 0
         if label_data:
             data = Dataset(label_data)
             data.load()
@@ -57,6 +58,7 @@ class TestPredictor:
             test_dbs_string = "_".join(test_dbs)
             predictions = self.model.get_predictions()
             report = self.model.predict()
+            result = report.result.get_result()
             report.set_filename_add(f"test-{test_dbs_string}")
             self.util.print_best_results([report])
             report.plot_confmatrix(self.util.get_plot_name(), 0)
@@ -74,3 +76,4 @@ class TestPredictor:
                 df = df.rename(columns={"class_label": target})
             df.to_csv(self.name)
         self.util.debug(f"results stored in {self.name}")
+        return result

{nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.83.0
+Version: 0.83.2
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -333,6 +333,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.83.2
+--------------
+* added default cuda if present and not stated
+Version 0.83.1
+--------------
+* add test module to nkuluflag
 Version 0.83.0
 --------------
 * test module now prints out reports

{nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/RECORD RENAMED Viewed

@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
 nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
 nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
-nkululeko/constants.py,sha256=NNx53OyRpXv780Ycj6Cdw4bDJfdvEn180CaN2PcmQkY,39
+nkululeko/constants.py,sha256=VE94aCLZ8N-hTKIgb4OLo1s9l_Fxncl9iTNis0eotFw,39
 nkululeko/demo.py,sha256=55kNFA2helMhOxD4yZuKg1JWDtlUUpxm-6uAnroIydI,3264
 nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
 nkululeko/demo_predictor.py,sha256=-ggSHc3DXxRzjzcGB4qFBOMvKsfUdTkkde50BDrS9dA,4755
-nkululeko/experiment.py,sha256=SRcB0ni0XLK910NSWTyRAe-Eoa6fVSKDCJlDJKyCzMc,29574
+nkululeko/experiment.py,sha256=WyLiOJ_VxlaXoS1cwXruzYV9OESMjjedcFNreKE1Z8I,29728
 nkululeko/explore.py,sha256=2wdoGRqldvsN1zCiWk0quSDgHHHUoF2UZOWQ1r-2OLM,2310
 nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
 nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
@@ -15,18 +15,17 @@ nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
 nkululeko/glob_conf.py,sha256=iHiVSxDYgmYwdx6z0HuGUMSWrfZfufPHxHb60q2dLRY,453
 nkululeko/modelrunner.py,sha256=GwDXcE2gDQXat4W0-HhHQ1BcUNCRBXMBQ4QycfHp_5c,9288
 nkululeko/multidb.py,sha256=fG3VukEWP1vreVN4gB1IRXxwwg4jLftsSEYtu0o1f78,5634
-nkululeko/nkuluflag.py,sha256=FCetTfgH69u4AwENgeCKVi3vBIR10Di67SfbupGQqfc,3354
+nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
 nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
-nkululeko/plots.py,sha256=K88ZRPFGX_r03BT742H06Dde20xZYdltv7dxjgUiAFA,23025
+nkululeko/plots.py,sha256=nd9tF_61DyAx7oGZF8gTrHXazkgFjFe4eClxu1nQ_XU,23276
 nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
-nkululeko/reporter.py,sha256=8mlIaKep4hM-tdRv8t98tK80rx3zOmVGXSORhiPc3as,12483
 nkululeko/resample.py,sha256=3WbxkwgyTe_fW38046Rjxk3knOkFdhqn2C4nfhbUurQ,2287
 nkululeko/runmanager.py,sha256=eTM1DNQKt1lxYhzt4vZyZluPXW9sWlIJHNQzex4lkJU,7624
 nkululeko/scaler.py,sha256=4nkIqoajkIkuTPK0Z02ifMN_awl6fP_i-GBYdoGYgGM,4101
 nkululeko/segment.py,sha256=YLKckX44tbvTb3LrdgYw9X4guzuF27sutl92z9DkpZU,4835
 nkululeko/syllable_nuclei.py,sha256=Sky-C__MeUDaxqHnDl2TGLLYOYvsahD35TUjWGeG31k,10047
-nkululeko/test.py,sha256=JRoLgqQJEhAIGetw-qlOUihSTTQ7O8DYafB0FlQESIQ,1525
-nkululeko/test_predictor.py,sha256=L8XKrIweTf-oKeaGuDw_ZhtvzRUxFuWmOhva6jgf7-s,3148
+nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
+nkululeko/test_predictor.py,sha256=_w5J8CxH6hmW3mLTKbdfmywl5QpdNAnW1Y8TE5GtlfE,3237
 nkululeko/augmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/augmenting/augmenter.py,sha256=XAt0dpmlnKxqyysqCgV3rcz-pRIvOz7rU7dmGDCVAzs,2905
 nkululeko/augmenting/randomsplicer.py,sha256=Z5rxdKKUpuncLWuTS6xVfVKUeVbeiYU_dLRHQ5fcg4Y,2669
@@ -49,7 +48,7 @@ nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,276
 nkululeko/data/dataset_csv.py,sha256=uLa7jW4w2ft299NkpXZMD361kPHF8oSYoIZ_ucxhuOM,3884
 nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
-nkululeko/feat_extract/feats_agender_agender.py,sha256=5dA7YA-YGxODovMC7ynMk3bnpPjfs0ApvSfjqvoSZY0,3346
+nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
 nkululeko/feat_extract/feats_analyser.py,sha256=_5oz4y-NZCEBgfNP2GZ9WNqQR50Hbykm0TvDVomWP0U,11399
 nkululeko/feat_extract/feats_auddim.py,sha256=VlzKKXTXa5kjLgQBWyEFy-daIyU1SkOwCCOIhKsWCvE,3162
 nkululeko/feat_extract/feats_audmodel.py,sha256=VjBNgAoxsHJhwr6Kwt9CxX6SaCM4RK_OV-GU2W5-bhU,3187
@@ -64,11 +63,11 @@ nkululeko/feat_extract/feats_praat.py,sha256=kZrS6srzH7WoWEd2prp1Dxw6g9JklFQGTNq
 nkululeko/feat_extract/feats_snr.py,sha256=9dqZ-4RpK98iJEssM3ttozNd18LWlZYM_QVXvp5xDcs,2829
 nkululeko/feat_extract/feats_spectra.py,sha256=5Pex8awIQC3cjQRHSu4NQFmg4quamG0RL3V3Yd0pJHs,3670
 nkululeko/feat_extract/feats_spkrec.py,sha256=VK4ma3uWzM0YZStsgRTirfkbzjWIfRWSgsYI038QlRY,4803
-nkululeko/feat_extract/feats_squim.py,sha256=jToXiwRq5-MQheAP6xczvry1uVIHYUrD8bM7Wb1cnqM,4528
+nkululeko/feat_extract/feats_squim.py,sha256=Y31YmDmscuG0YozvxyBZIutO3id8t7IZJWCfKucw-6M,4617
 nkululeko/feat_extract/feats_trill.py,sha256=HXQBaPWTX0iNEjBY7RD8uyFeYjDieHqv8ZilE0Jb-Pg,3319
-nkululeko/feat_extract/feats_wav2vec2.py,sha256=tFGe4t6MIVxTDQYR8geNCtZ_3ALc-gpi-rmQbF74HfI,5285
+nkululeko/feat_extract/feats_wav2vec2.py,sha256=9WUMfyddB_3nx79g7mZoQrRynhM1uEBWuOotRq8bxoU,5268
 nkululeko/feat_extract/feats_wavlm.py,sha256=ulxpGjifUFx2ZgGmY32SmBJGIuvkYHoLb2n1LZ8KMwA,4703
-nkululeko/feat_extract/feats_whisper.py,sha256=0N7Vj65OVi2PNoB_NrDjWT5lP6xZNKxFOZZIoxkJvcA,4533
+nkululeko/feat_extract/feats_whisper.py,sha256=BFspQBI53HAgw22vBEeFskGwFZA-94Rpl17xM458HRo,4576
 nkululeko/feat_extract/featureset.py,sha256=HtgW2389rmlRAgFP3F1sSFzq2_iUVr2NhOfIXG9omt0,1448
 nkululeko/feat_extract/feinberg_praat.py,sha256=EP9pMALjlKdiYInLQdrZ7MmE499Mq-ISRCgqbqL3Rxc,21304
 nkululeko/losses/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -77,13 +76,13 @@ nkululeko/losses/loss_softf1loss.py,sha256=5gW-PuiqeAZcRgfwjueIOQtMokOjZWgQnVIv5
 nkululeko/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/models/model.py,sha256=oAdKq2wY5lYKfpZkQwO46ojYRsj_Z-FR56oR1uHAWI0,11569
 nkululeko/models/model_bayes.py,sha256=wI7-sCwibqXMCHviu349TYjgJXXNXym-Z6ZM83uxlFQ,378
-nkululeko/models/model_cnn.py,sha256=j4NTp7quWqInzOPfpiMrTcfMbXkOsdlFF9ns0tW_ld4,9726
+nkululeko/models/model_cnn.py,sha256=revCxyeX69DU6OA63YTnF28UaAFV7AmUfqODMCE_pbQ,10002
 nkululeko/models/model_gmm.py,sha256=onovzGBeguwZ-upXtuDLaBw9sd6fDDQslVBOrz1Z8TE,645
 nkululeko/models/model_knn.py,sha256=5tGqiPo2JTw9VLmD-MXNZKFJ5RTLA6uv_blJDJ9lScA,573
 nkululeko/models/model_knn_reg.py,sha256=Fbuk6Ku6eyrbbMEk7rB5dwfhvQOMsdZk6HI_0T0gYPw,580
 nkululeko/models/model_lin_reg.py,sha256=NBTnY2ULuhUBt5ArYQwskZ2Vq4BBDGkqd9SYBFl7Ql4,392
-nkululeko/models/model_mlp.py,sha256=lYhGrkqEj6fa6a_tcPrqEoorOpM7t7bjSfFLKEV6pu4,9107
-nkululeko/models/model_mlp_regression.py,sha256=NP1yEsqvpDcDBWWzDq7W4SHnXC1kE4fAo4A9aBCq3cY,10083
+nkululeko/models/model_mlp.py,sha256=IuNGrLPx54-ZmpydH2yJdm2ddCm4rgu59Csv5ikbEpI,9471
+nkululeko/models/model_mlp_regression.py,sha256=-ailThquUXwLkOj5jlJ4qn1vlb3nSHW5s0KS7GLp4qI,10290
 nkululeko/models/model_svm.py,sha256=QqwRjfG9I5y-57CcJAMUSbvYzV0DOlDcpDK5f4yQ_qw,914
 nkululeko/models/model_svr.py,sha256=p-Mb4Bn54yOe1upuHQKNpfj4ttOmQnm9pCB7ECkJkJQ,699
 nkululeko/models/model_tree.py,sha256=soXjV523eRvRZ-jbX7X_3S73Wto1B9bm7ZzzDmgYzTc,390
@@ -104,8 +103,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
 nkululeko/utils/util.py,sha256=_Z6OMJ3f-8TdETW9eqJYY5hwNRS5XCt9azzRnqoTTZE,12330
-nkululeko-0.83.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.83.0.dist-info/METADATA,sha256=20S7IpMbLE7irV0ikdaFNfdqdBEEywH7jjlJwur8smA,36018
-nkululeko-0.83.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-nkululeko-0.83.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.83.0.dist-info/RECORD,,
+nkululeko-0.83.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.83.2.dist-info/METADATA,sha256=DMkXO8jSm6iR4eETrG2aEK__7MfPhpAvOe6Tf99n_HE,36158
+nkululeko-0.83.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+nkululeko-0.83.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.83.2.dist-info/RECORD,,

nkululeko/reporter.py DELETED Viewed

@@ -1,324 +0,0 @@
-"""Reporter module.
-This module contains the Reporter class which is responsible for generating reports.
-"""
-import ast
-import glob
-import json
-import math
-import matplotlib.pyplot as plt
-import numpy as np
-from scipy.stats import pearsonr
-from sklearn.metrics import ConfusionMatrixDisplay
-from sklearn.metrics import accuracy_score
-from sklearn.metrics import classification_report
-from sklearn.metrics import confusion_matrix
-from sklearn.metrics import mean_absolute_error
-from sklearn.metrics import mean_squared_error
-from sklearn.metrics import r2_score
-from sklearn.metrics import recall_score
-from sklearn.utils import resample
-import nkululeko.glob_conf as glob_conf
-from nkululeko.reporting.defines import Header
-from nkululeko.reporting.report_item import ReportItem
-from nkululeko.result import Result
-from nkululeko.utils.util import Util
-class Reporter:
-    def __set_measure(self):
-        if self.util.exp_is_classification():
-            self.MEASURE = "UAR"
-            self.result.measure = self.MEASURE
-            self.is_classification = True
-        else:
-            self.is_classification = False
-            self.measure = self.util.config_val("MODEL", "measure", "mse")
-            if self.measure == "mse":
-                self.MEASURE = "MSE"
-                self.result.measure = self.MEASURE
-            elif self.measure == "mae":
-                self.MEASURE = "MAE"
-                self.result.measure = self.MEASURE
-            elif self.measure == "ccc":
-                self.MEASURE = "CCC"
-                self.result.measure = self.MEASURE
-    def __init__(self, truths, preds, run, epoch):
-        """Initialization with ground truth und predictions vector"""
-        self.util = Util("reporter")
-        self.format = self.util.config_val("PLOT", "format", "png")
-        self.truths = truths
-        self.preds = preds
-        self.result = Result(0, 0, 0, 0, "unknown")
-        self.run = run
-        self.epoch = epoch
-        self.__set_measure()
-        self.cont_to_cat = False
-        if len(self.truths) > 0 and len(self.preds) > 0:
-            if self.util.exp_is_classification():
-                self.result.test = recall_score(
-                    self.truths, self.preds, average="macro"
-                )
-                self.result.loss = 1 - accuracy_score(self.truths, self.preds)
-            else:
-                # regression experiment
-                if self.measure == "mse":
-                    self.result.test = mean_squared_error(self.truths, self.preds)
-                elif self.measure == "mae":
-                    self.result.test = mean_absolute_error(self.truths, self.preds)
-                elif self.measure == "ccc":
-                    self.result.test = self.ccc(self.truths, self.preds)
-                    if math.isnan(self.result.test):
-                        self.util.debug(f"Truth: {self.truths}")
-                        self.util.debug(f"Predict.: {self.preds}")
-                        self.util.debug(f"Result is NAN: setting to -1")
-                        self.result.test = -1
-                else:
-                    self.util.error(f"unknown measure: {self.measure}")
-                # train and loss are being set by the model
-    def set_id(self, run, epoch):
-        """Make the report identifiable with run and epoch index"""
-        self.run = run
-        self.epoch = epoch
-    def continuous_to_categorical(self):
-        if self.cont_to_cat:
-            return
-        self.cont_to_cat = True
-        bins = ast.literal_eval(glob_conf.config["DATA"]["bins"])
-        self.truths = np.digitize(self.truths, bins) - 1
-        self.preds = np.digitize(self.preds, bins) - 1
-    def plot_confmatrix(self, plot_name, epoch):
-        if not self.util.exp_is_classification():
-            self.continuous_to_categorical()
-        self._plot_confmat(self.truths, self.preds, plot_name, epoch)
-def plot_per_speaker(self, result_df, plot_name, function):
-    """Plot a confusion matrix with the mode category per speakers.
-    This function creates a confusion matrix for each speaker in the result_df.
-    The result_df should contain the columns: preds, truths and speaker.
-    Args:
-        * result_df: a pandas dataframe with columns: preds, truths and speaker
-        * plot_name: a string with the name of the plot
-        * function: a string with the function to use for each speaker,
-        can be 'mode' or 'mean'
-    Returns:
-        * None
-    """
-    # Initialize empty arrays for predictions and truths
-    pred = np.zeros(0)
-    truth = np.zeros(0)
-    # Iterate over each speaker
-    for s in result_df.speaker.unique():
-        # Filter the dataframe for the current speaker
-        s_df = result_df[result_df.speaker == s]
-        # Get the mode or mean prediction for the current speaker
-        mode = s_df.pred.mode().iloc[-1]
-        mean = s_df.pred.mean()
-        if function == "mode":
-            s_df.pred = mode
-        elif function == "mean":
-            s_df.pred = mean
-        else:
-            self.util.error(f"unknown function {function}")
-        # Append the current speaker's predictions and truths to the arrays
-        pred = np.append(pred, s_df.pred.values)
-        truth = np.append(truth, s_df["truth"].values)
-    # If the experiment is not a classification or continuous to categorical conversion was performed,
-    # convert the truths and predictions to categorical
-    if not (self.is_classification or self.cont_to_cat):
-        bins = ast.literal_eval(glob_conf.config["DATA"]["bins"])
-        truth = np.digitize(truth, bins) - 1
-        pred = np.digitize(pred, bins) - 1
-    # Plot the confusion matrix for the speakers
-    self._plot_confmat(truth, pred.astype("int"), plot_name, 0)
-    def _plot_confmat(self, truths, preds, plot_name, epoch):
-        # print(truths)
-        # print(preds)
-        fig_dir = self.util.get_path("fig_dir")
-        labels = glob_conf.labels
-        fig = plt.figure()  # figsize=[5, 5]
-        uar = recall_score(truths, preds, average="macro")
-        acc = accuracy_score(truths, preds)
-        cm = confusion_matrix(
-            truths, preds, normalize=None
-        )  # normalize must be one of {'true', 'pred', 'all', None}
-        if cm.shape[0] != len(labels):
-            self.util.error(
-                f"mismatch between confmatrix dim ({cm.shape[0]}) and labels"
-                f" length ({len(labels)}: {labels})"
-            )
-        try:
-            disp = ConfusionMatrixDisplay(
-                confusion_matrix=cm, display_labels=labels
-            ).plot(cmap="Blues")
-        except ValueError:
-            disp = ConfusionMatrixDisplay(
-                confusion_matrix=cm,
-                display_labels=list(labels).remove("neutral"),
-            ).plot(cmap="Blues")
-        reg_res = ""
-        if not self.is_classification:
-            reg_res = f", {self.MEASURE}: {self.result.test:.3f}"
-        if epoch != 0:
-            plt.title(f"Confusion Matrix, UAR: {uar:.3f}{reg_res}, Epoch: {epoch}")
-        else:
-            plt.title(f"Confusion Matrix, UAR: {uar:.3f}{reg_res}")
-        img_path = f"{fig_dir}{plot_name}.{self.format}"
-        plt.savefig(img_path)
-        fig.clear()
-        plt.close(fig)
-        plt.savefig(img_path)
-        plt.close(fig)
-        glob_conf.report.add_item(
-            ReportItem(
-                Header.HEADER_RESULTS,
-                self.util.get_model_description(),
-                "Confusion matrix",
-                img_path,
-            )
-        )
-        res_dir = self.util.get_path("res_dir")
-        uar = int(uar * 1000) / 1000.0
-        acc = int(acc * 1000) / 1000.0
-        rpt = f"epoch: {epoch}, UAR: {uar}, ACC: {acc}"
-        # print(rpt)
-        self.util.debug(rpt)
-        file_name = f"{res_dir}{self.util.get_exp_name()}_conf.txt"
-        with open(file_name, "w") as text_file:
-            text_file.write(rpt)
-    def print_results(self, epoch):
-        """Print all evaluation values to text file"""
-        res_dir = self.util.get_path("res_dir")
-        file_name = f"{res_dir}{self.util.get_exp_name()}_{epoch}.txt"
-        if self.util.exp_is_classification():
-            labels = glob_conf.labels
-            try:
-                rpt = classification_report(
-                    self.truths,
-                    self.preds,
-                    target_names=labels,
-                    output_dict=True,
-                )
-            except ValueError as e:
-                self.util.debug(
-                    "Reporter: caught a ValueError when trying to get"
-                    " classification_report: " + e
-                )
-                rpt = self.result.to_string()
-            with open(file_name, "w") as text_file:
-                c_ress = list(range(len(labels)))
-                for i, l in enumerate(labels):
-                    c_res = rpt[l]["f1-score"]
-                    c_ress[i] = float(f"{c_res:.3f}")
-                self.util.debug(f"labels: {labels}")
-                f1_per_class = f"result per class (F1 score): {c_ress}"
-                self.util.debug(f1_per_class)
-                rpt_str = f"{json.dumps(rpt)}\n{f1_per_class}"
-                text_file.write(rpt_str)
-                glob_conf.report.add_item(
-                    ReportItem(
-                        Header.HEADER_RESULTS,
-                        f"Classification result {self.util.get_model_description()}",
-                        rpt_str,
-                    )
-                )
-        else:  # regression
-            result = self.result.test
-            r2 = r2_score(self.truths, self.preds)
-            pcc = pearsonr(self.truths, self.preds)[0]
-            measure = self.util.config_val("MODEL", "measure", "mse")
-            with open(file_name, "w") as text_file:
-                text_file.write(
-                    f"{measure}: {result:.3f}, r_2: {r2:.3f}, pcc {pcc:.3f}"
-                )
-    def make_conf_animation(self, out_name):
-        import imageio
-        fig_dir = self.util.get_path("fig_dir")
-        filenames = glob.glob(fig_dir + f"{self.util.get_plot_name()}*_?_???_cnf.png")
-        images = []
-        for filename in filenames:
-            images.append(imageio.imread(filename))
-        fps = self.util.config_val("PLOT", "fps", "1")
-        try:
-            imageio.mimsave(fig_dir + out_name, images, fps=int(fps))
-        except RuntimeError as e:
-            self.util.error("error writing anim gif: " + e)
-    def get_result(self):
-        return self.result
-    def plot_epoch_progression(self, reports, out_name):
-        fig_dir = self.util.get_path("fig_dir")
-        results, losses, train_results, losses_eval = [], [], [], []
-        for r in reports:
-            results.append(r.get_result().test)
-            losses.append(r.get_result().loss)
-            train_results.append(r.get_result().train)
-            losses_eval.append(r.get_result().loss_eval)
-        # do a plot per run
-        # scale the losses so they fit on the picture
-        losses, results, train_results, losses_eval = (
-            np.asarray(losses),
-            np.asarray(results),
-            np.asarray(train_results),
-            np.asarray(losses_eval),
-        )
-        if np.all((results > 1)):
-            # scale down values
-            results = results / 100.0
-            train_results = train_results / 100.0
-        # if np.all((losses < 1)):
-        # scale up values
-        plt.figure(dpi=200)
-        plt.plot(train_results, "green", label="train set")
-        plt.plot(results, "red", label="dev set")
-        plt.plot(losses, "black", label="losses")
-        plt.plot(losses_eval, "grey", label="losses_eval")
-        plt.xlabel("epochs")
-        plt.ylabel(f"{self.MEASURE}")
-        plt.legend()
-        plt.savefig(f"{fig_dir}{out_name}.{self.format}")
-        plt.close()
-    @staticmethod
-    def ccc(ground_truth, prediction):
-        mean_gt = np.mean(ground_truth, 0)
-        mean_pred = np.mean(prediction, 0)
-        var_gt = np.var(ground_truth, 0)
-        var_pred = np.var(prediction, 0)
-        v_pred = prediction - mean_pred
-        v_gt = ground_truth - mean_gt
-        cor = sum(v_pred * v_gt) / (np.sqrt(sum(v_pred**2)) * np.sqrt(sum(v_gt**2)))
-        sd_gt = np.std(ground_truth)
-        sd_pred = np.std(prediction)
-        numerator = 2 * cor * sd_gt * sd_pred
-        denominator = var_gt + var_pred + (mean_gt - mean_pred) ** 2
-        ccc = numerator / denominator
-        return ccc

{nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.83.0__py3-none-any.whl → 0.83.2__py3-none-any.whl

nkululeko 0.83.0py3-none-any.whl → 0.83.2py3-none-any.whl