PyPI - nkululeko - Versions diffs - 0.83.1__tar.gz → 0.83.3__tar.gz - Mend

nkululeko 0.83.1tar.gz → 0.83.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

{nkululeko-0.83.1 → nkululeko-0.83.3}/CHANGELOG.md RENAMED Viewed

@@ -1,6 +1,14 @@
 Changelog
 =========
+Version 0.83.3
+--------------
+* fixed a naming error in trill features that prevented storage of experiment
+Version 0.83.2
+--------------
+* added default cuda if present and not stated
 Version 0.83.1
 --------------
 * add test module to nkuluflag

{nkululeko-0.83.1/nkululeko.egg-info → nkululeko-0.83.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.83.1
+Version: 0.83.3
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -333,6 +333,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.83.3
+--------------
+* fixed a naming error in trill features that prevented storage of experiment
+Version 0.83.2
+--------------
+* added default cuda if present and not stated
 Version 0.83.1
 --------------
 * add test module to nkuluflag

{nkululeko-0.83.1 → nkululeko-0.83.3}/nkululeko/constants.py RENAMED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.83.1"
+VERSION="0.83.3"
 SAMPLING_RATE = 16000

{nkululeko-0.83.1 → nkululeko-0.83.3}/nkululeko/experiment.py RENAMED Viewed

@@ -679,9 +679,12 @@ class Experiment:
         return result
     def load(self, filename):
-        f = open(filename, "rb")
-        tmp_dict = pickle.load(f)
-        f.close()
+        try:
+            f = open(filename, "rb")
+            tmp_dict = pickle.load(f)
+            f.close()
+        except EOFError as eof:
+            self.util.error(f"can't open file {filename}: {eof}")
         self.__dict__.update(tmp_dict)
         glob_conf.set_labels(self.labels)
@@ -689,22 +692,26 @@ class Experiment:
         if self.runmgr.modelrunner.model.is_ann():
             self.runmgr.modelrunner.model = None
             self.util.warn(
-                f"Save experiment: Can't pickle the learning model so saving without it."
+                "Save experiment: Can't pickle the learning model so saving without it."
             )
         try:
             f = open(filename, "wb")
             pickle.dump(self.__dict__, f)
             f.close()
-        except TypeError:
+        except (TypeError, AttributeError) as error:
             self.feature_extractor.feat_extractor.model = None
             f = open(filename, "wb")
             pickle.dump(self.__dict__, f)
             f.close()
             self.util.warn(
-                f"Save experiment: Can't pickle the feature extraction model so saving without it."
+                "Save experiment: Can't pickle the feature extraction model so saving without it."
+                + f"{type(error).__name__} {error}"
+            )
+        except RuntimeError as error:
+            self.util.warn(
+                "Save experiment: Can't pickle local object, NOT saving: "
+                + f"{type(error).__name__} {error}"
             )
-        except (AttributeError, RuntimeError) as error:
-            self.util.warn(f"Save experiment: Can't pickle local object: {error}")
     def save_onnx(self, filename):
         # export the model to onnx

{nkululeko-0.83.1 → nkululeko-0.83.3}/nkululeko/feat_extract/feats_agender_agender.py RENAMED Viewed

@@ -28,9 +28,11 @@ class AgenderAgenderSet(Featureset):
         if not os.path.isdir(model_root):
             cache_root = audeer.mkdir("cache")
             model_root = audeer.mkdir(model_root)
-            archive_path = audeer.download_url(model_url, cache_root, verbose=True)
+            archive_path = audeer.download_url(
+                model_url, cache_root, verbose=True)
             audeer.extract_archive(archive_path, model_root)
-        device = self.util.config_val("MODEL", "device", "cpu")
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        device = self.util.config_val("MODEL", "device", cuda)
         self.model = audonnx.load(model_root, device=device)
         #        pytorch_total_params = sum(p.numel() for p in self.model.parameters())
         # self.util.debug(

{nkululeko-0.83.1 → nkululeko-0.83.3}/nkululeko/feat_extract/feats_squim.py RENAMED Viewed

@@ -28,12 +28,17 @@ from nkululeko.utils.util import Util
 class SquimSet(Featureset):
-    """Class to predict SQUIM features"""
+    """Class to predict SQUIM features."""
     def __init__(self, name, data_df, feats_type):
-        """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
+        """Constructor.
+        Is_train is needed to distinguish from test/dev sets,
+        because they use the codebook from the training.
+        """
         super().__init__(name, data_df, feats_type)
-        self.device = self.util.config_val("MODEL", "device", "cpu")
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = self.util.config_val("MODEL", "device", cuda)
         self.model_initialized = False
     def init_model(self):

{nkululeko-0.83.1 → nkululeko-0.83.3}/nkululeko/feat_extract/feats_trill.py RENAMED Viewed

@@ -1,35 +1,39 @@
 # feats_trill.py
-import tensorflow_hub as hub
 import os
+import pandas as pd
 import tensorflow as tf
-from numpy.core.numeric import tensordot
+import tensorflow_hub as hub
 from tqdm import tqdm
-import pandas as pd
 import audiofile as af
-from nkululeko.utils.util import Util
-import nkululeko.glob_conf as glob_conf
 from nkululeko.feat_extract.featureset import Featureset
+import nkululeko.glob_conf as glob_conf
+from nkululeko.utils.util import Util
 # Import TF 2.X and make sure we're running eager.
 assert tf.executing_eagerly()
 class TRILLset(Featureset):
-    """A feature extractor for the Google TRILL embeddings"""
+    """A feature extractor for the Google TRILL embeddings.
-    """https://ai.googleblog.com/2020/06/improving-speech-representations-and.html"""
+    See https://ai.googleblog.com/2020/06/improving-speech-representations-and.html.
+    """
     # Initialization of the class
     def __init__(self, name, data_df, feats_type):
-        """
-        Initialize the class with name, data and Util instance
-        Also loads the model from hub
+        """Initialize the class with name, data and Util instance.
-        :param name: Name of the class
-        :type name: str
-        :param data_df: Data of the class
-        :type data_df: DataFrame
-        :return: None
+        Also loads the model from hub
+        Args:
+            :param name: Name of the class
+            :type name: str
+            :param data_df: Data of the class
+            :type data_df: DataFrame
+            :return: None
         """
         super().__init__(name, data_df, feats_type)
         # Load the model from the configured path
@@ -38,25 +42,21 @@ class TRILLset(Featureset):
             "trill.model",
             "https://tfhub.dev/google/nonsemantic-speech-benchmark/trill/3",
         )
-        self.module = hub.load(model_path)
+        self.model = hub.load(model_path)
         self.feats_type = feats_type
     def extract(self):
         store = self.util.get_path("store")
         storage = f"{store}{self.name}.pkl"
-        extract = self.util.config_val(
-            "FEATS", "needs_feature_extraction", False)
+        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
-            self.util.debug(
-                "extracting TRILL embeddings, this might take a while...")
+            self.util.debug("extracting TRILL embeddings, this might take a while...")
             emb_series = pd.Series(index=self.data_df.index, dtype=object)
-            length = len(self.data_df.index)
             for idx, file in enumerate(tqdm(self.data_df.index.get_level_values(0))):
-                emb = self.getEmbeddings(file)
-                emb_series[idx] = emb
-            self.df = pd.DataFrame(
-                emb_series.values.tolist(), index=self.data_df.index)
+                emb = self.get_embeddings(file)
+                emb_series.iloc[idx] = emb
+            self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
             self.df.to_pickle(storage)
             try:
                 glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
@@ -70,15 +70,15 @@ class TRILLset(Featureset):
         if len(wav.shape) > 1:
             wav = tf.reduce_mean(wav, axis=0)
-        emb_dict = self.module(samples=wav, sample_rate=tf.constant(16000))
+        emb_dict = self.model(samples=wav, sample_rate=tf.constant(16000))
         return emb_dict["embedding"]
-    def getEmbeddings(self, file):
+    def get_embeddings(self, file):
         wav = af.read(file)[0]
-        emb_short = self.getEmbeddings_signal(wav, 16000)
+        emb_short = self.get_embeddings_signal(wav, 16000)
         return emb_short
-    def getEmbeddings_signal(self, signal, sr):
+    def get_embeddings_signal(self, signal, sr):
         wav = tf.convert_to_tensor(signal)
         emb_short = self.embed_wav(wav)
         # you get one embedding per frame, we use the mean for all the frames
@@ -86,7 +86,7 @@ class TRILLset(Featureset):
         return emb_short
     def extract_sample(self, signal, sr):
-        if self.module == None:
+        if self.model == None:
             self.__init__("na", None)
-        feats = self.getEmbeddings_signal(signal, sr)
+        feats = self.get_embeddings_signal(signal, sr)
         return feats

{nkululeko-0.83.1 → nkululeko-0.83.3}/nkululeko/feat_extract/feats_wav2vec2.py RENAMED Viewed

@@ -21,7 +21,11 @@ class Wav2vec2(Featureset):
     """Class to extract wav2vec2 embeddings"""
     def __init__(self, name, data_df, feat_type):
-        """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
+        """Constructor.
+        If_train is needed to distinguish from test/dev sets,
+        because they use the codebook from the training
+        """
         super().__init__(name, data_df, feat_type)
         cuda = "cuda" if torch.cuda.is_available() else "cpu"
         self.device = self.util.config_val("MODEL", "device", cuda)
@@ -39,8 +43,7 @@ class Wav2vec2(Featureset):
         )
         config = transformers.AutoConfig.from_pretrained(model_path)
         layer_num = config.num_hidden_layers
-        hidden_layer = int(self.util.config_val(
-            "FEATS", "wav2vec2.layer", "0"))
+        hidden_layer = int(self.util.config_val("FEATS", "wav2vec2.layer", "0"))
         config.num_hidden_layers = layer_num - hidden_layer
         self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
         self.processor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
@@ -55,8 +58,7 @@ class Wav2vec2(Featureset):
         """Extract the features or load them from disk if present."""
         store = self.util.get_path("store")
         storage = f"{store}{self.name}.pkl"
-        extract = self.util.config_val(
-            "FEATS", "needs_feature_extraction", False)
+        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
             if not self.model_initialized:
@@ -77,8 +79,7 @@ class Wav2vec2(Featureset):
                 emb = self.get_embeddings(signal, sampling_rate, file)
                 emb_series[idx] = emb
             # print(f"emb_series shape: {emb_series.shape}")
-            self.df = pd.DataFrame(
-                emb_series.values.tolist(), index=self.data_df.index)
+            self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
             # print(f"df shape: {self.df.shape}")
             self.df.to_pickle(storage)
             try:

{nkululeko-0.83.1 → nkululeko-0.83.3}/nkululeko/feat_extract/feats_whisper.py RENAMED Viewed

@@ -32,19 +32,22 @@ class Whisper(Featureset):
         model_name = f"openai/{self.feat_type}"
         self.model = WhisperModel.from_pretrained(model_name).to(self.device)
         print(f"intialized Whisper model on {self.device}")
-        self.feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
+        self.feature_extractor = AutoFeatureExtractor.from_pretrained(
+            model_name)
         self.model_initialized = True
     def extract(self):
         """Extract the features or load them from disk if present."""
         store = self.util.get_path("store")
         storage = f"{store}{self.name}.pkl"
-        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
+        extract = self.util.config_val(
+            "FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
             if not self.model_initialized:
                 self.init_model()
-            self.util.debug("extracting whisper embeddings, this might take a while...")
+            self.util.debug(
+                "extracting whisper embeddings, this might take a while...")
             emb_series = []
             for (file, start, end), _ in audeer.progress_bar(
                 self.data_df.iterrows(),

{nkululeko-0.83.1 → nkululeko-0.83.3}/nkululeko/models/model_cnn.py RENAMED Viewed

@@ -16,6 +16,7 @@ import numpy as np
 from sklearn.metrics import recall_score
 from collections import OrderedDict
 from PIL import Image
+from traitlets import default
 from nkululeko.utils.util import Util
 import nkululeko.glob_conf as glob_conf
@@ -48,6 +49,7 @@ class CNN_model(Model):
             self.util.error(f"unknown loss function: {criterion}")
         self.util.debug(f"using model with cross entropy loss function")
         # set up the model
+        # cuda = "cuda" if torch.cuda.is_available() else "cpu"
         self.device = self.util.config_val("MODEL", "device", "cpu")
         try:
             layers_string = glob_conf.config["MODEL"]["layers"]
@@ -84,7 +86,8 @@ class CNN_model(Model):
         train_set = self.Dataset_image(
             feats_train, df_train, self.target, transformations
         )
-        test_set = self.Dataset_image(feats_test, df_test, self.target, transformations)
+        test_set = self.Dataset_image(
+            feats_test, df_test, self.target, transformations)
         # Define data loaders
         self.trainloader = torch.utils.data.DataLoader(
             train_set,
@@ -137,7 +140,8 @@ class CNN_model(Model):
         losses = []
         for images, labels in self.trainloader:
             logits = self.model(images.to(self.device))
-            loss = self.criterion(logits, labels.to(self.device, dtype=torch.int64))
+            loss = self.criterion(logits, labels.to(
+                self.device, dtype=torch.int64))
             losses.append(loss.item())
             self.optimizer.zero_grad()
             loss.backward()
@@ -165,14 +169,16 @@ class CNN_model(Model):
         self.loss_eval = (np.asarray(losses)).mean()
         predictions = logits.argmax(dim=1)
-        uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
+        uar = recall_score(
+            targets.numpy(), predictions.numpy(), average="macro")
         return uar, targets, predictions
     def predict(self):
         _, truths, predictions = self.evaluate_model(
             self.model, self.testloader, self.device
         )
-        uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
+        uar, _, _ = self.evaluate_model(
+            self.model, self.trainloader, self.device)
         report = Reporter(truths, predictions, self.run, self.epoch)
         try:
             report.result.loss = self.loss
@@ -209,7 +215,8 @@ class CNN_model(Model):
         dir = self.util.get_path("model_dir")
         # name = f'{self.util.get_exp_name()}_{run}_{epoch:03d}.model'
         name = f"{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model"
-        self.device = self.util.config_val("MODEL", "device", "cpu")
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = self.util.config_val("MODEL", "device", cuda)
         layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
         self.store_path = dir + name
         drop = self.util.config_val("MODEL", "drop", False)
@@ -222,7 +229,8 @@ class CNN_model(Model):
     def load_path(self, path, run, epoch):
         self.set_id(run, epoch)
         with open(path, "rb") as handle:
-            self.device = self.util.config_val("MODEL", "device", "cpu")
+            cuda = "cuda" if torch.cuda.is_available() else "cpu"
+            self.device = self.util.config_val("MODEL", "device", cuda)
             layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
             self.store_path = path
             drop = self.util.config_val("MODEL", "drop", False)

{nkululeko-0.83.1 → nkululeko-0.83.3}/nkululeko/models/model_mlp.py RENAMED Viewed

@@ -34,8 +34,9 @@ class MLP_model(Model):
         else:
             self.util.error(f"unknown loss function: {criterion}")
         self.util.debug(f"using model with cross entropy loss function")
-        # set up the model
-        self.device = self.util.config_val("MODEL", "device", "cpu")
+        # set up the model, use GPU if availabe
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = self.util.config_val("MODEL", "device", cuda)
         try:
             layers_string = glob_conf.config["MODEL"]["layers"]
         except KeyError as ke:
@@ -86,7 +87,8 @@ class MLP_model(Model):
         losses = []
         for features, labels in self.trainloader:
             logits = self.model(features.to(self.device))
-            loss = self.criterion(logits, labels.to(self.device, dtype=torch.int64))
+            loss = self.criterion(logits, labels.to(
+                self.device, dtype=torch.int64))
             losses.append(loss.item())
             self.optimizer.zero_grad()
             loss.backward()
@@ -114,14 +116,16 @@ class MLP_model(Model):
         self.loss_eval = (np.asarray(losses)).mean()
         predictions = logits.argmax(dim=1)
-        uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
+        uar = recall_score(
+            targets.numpy(), predictions.numpy(), average="macro")
         return uar, targets, predictions
     def predict(self):
         _, truths, predictions = self.evaluate_model(
             self.model, self.testloader, self.device
         )
-        uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
+        uar, _, _ = self.evaluate_model(
+            self.model, self.trainloader, self.device)
         report = Reporter(truths, predictions, self.run, self.epoch)
         try:
             report.result.loss = self.loss
@@ -179,6 +183,9 @@ class MLP_model(Model):
             features = np.reshape(features, (-1, 1)).T
             logits = self.model(features.to(self.device))
             # logits = self.model(features)
+        # if tensor conver to cpu
+        if isinstance(logits, torch.Tensor):
+            logits = logits.cpu()
         a = logits.numpy()
         res = {}
         for i in range(len(a[0])):
@@ -196,7 +203,8 @@ class MLP_model(Model):
         dir = self.util.get_path("model_dir")
         # name = f'{self.util.get_exp_name()}_{run}_{epoch:03d}.model'
         name = f"{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model"
-        self.device = self.util.config_val("MODEL", "device", "cpu")
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = self.util.config_val("MODEL", "device", cuda)
         layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
         self.store_path = dir + name
         drop = self.util.config_val("MODEL", "drop", False)
@@ -211,7 +219,8 @@ class MLP_model(Model):
     def load_path(self, path, run, epoch):
         self.set_id(run, epoch)
         with open(path, "rb") as handle:
-            self.device = self.util.config_val("MODEL", "device", "cpu")
+            cuda = "cuda" if torch.cuda.is_available() else "cpu"
+            self.device = self.util.config_val("MODEL", "device", cuda)
             layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
             self.store_path = path
             drop = self.util.config_val("MODEL", "drop", False)

{nkululeko-0.83.1 → nkululeko-0.83.3}/nkululeko/models/model_mlp_regression.py RENAMED Viewed

@@ -9,6 +9,7 @@ import torch
 from audmetric import concordance_cc
 from audmetric import mean_absolute_error
 from audmetric import mean_squared_error
+from traitlets import default
 import nkululeko.glob_conf as glob_conf
 from nkululeko.losses.loss_ccc import ConcordanceCorCoeff
@@ -40,7 +41,8 @@ class MLP_Reg_model(Model):
             self.util.error(f"unknown loss function: {criterion}")
         self.util.debug(f"training model with {criterion} loss function")
         # set up the model
-        self.device = self.util.config_val("MODEL", "device", "cpu")
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = self.util.config_val("MODEL", "device", cuda)
         layers_string = glob_conf.config["MODEL"]["layers"]
         self.util.debug(f"using layers {layers_string}")
         try:
@@ -50,7 +52,8 @@ class MLP_Reg_model(Model):
         drop = self.util.config_val("MODEL", "drop", False)
         if drop:
             self.util.debug(f"training with dropout: {drop}")
-        self.model = self.MLP(feats_train.shape[1], layers, 1, drop).to(self.device)
+        self.model = self.MLP(
+            feats_train.shape[1], layers, 1, drop).to(self.device)
         self.learning_rate = float(
             self.util.config_val("MODEL", "learning_rate", 0.0001)
         )
@@ -93,8 +96,10 @@ class MLP_Reg_model(Model):
         _, truths, predictions = self.evaluate_model(
             self.model, self.testloader, self.device
         )
-        result, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
-        report = Reporter(truths.numpy(), predictions.numpy(), self.run, self.epoch)
+        result, _, _ = self.evaluate_model(
+            self.model, self.trainloader, self.device)
+        report = Reporter(truths.numpy(), predictions.numpy(),
+                          self.run, self.epoch)
         try:
             report.result.loss = self.loss
         except AttributeError:  # if the model was loaded from disk the loss is unknown
@@ -128,9 +133,11 @@ class MLP_Reg_model(Model):
         def __getitem__(self, item):
             index = self.df.index[item]
-            features = self.df_features.loc[index, :].values.astype("float32").squeeze()
+            features = self.df_features.loc[index, :].values.astype(
+                "float32").squeeze()
             labels = (
-                np.array([self.df.loc[index, self.label]]).astype("float32").squeeze()
+                np.array([self.df.loc[index, self.label]]
+                         ).astype("float32").squeeze()
             )
             return features, labels
@@ -187,7 +194,8 @@ class MLP_Reg_model(Model):
                 end_index = (index + 1) * loader.batch_size
                 if end_index > len(loader.dataset):
                     end_index = len(loader.dataset)
-                logits[start_index:end_index] = model(features.to(device)).reshape(-1)
+                logits[start_index:end_index] = model(
+                    features.to(device)).reshape(-1)
                 targets[start_index:end_index] = labels
                 loss = self.criterion(
                     logits[start_index:end_index].to(

nkululeko 0.83.1__tar.gz → 0.83.3__tar.gz

nkululeko 0.83.1tar.gz → 0.83.3tar.gz