PyPI - nkululeko - Versions diffs - 0.81.6__py3-none-any.whl → 0.81.7__py3-none-any.whl - Mend

nkululeko 0.81.6py3-none-any.whl → 0.81.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

nkululeko/constants.py +1 -1
nkululeko/feat_extract/feats_agender.py +6 -4
nkululeko/feat_extract/feats_auddim.py +5 -3
nkululeko/feat_extract/feats_audmodel.py +5 -3
nkululeko/feat_extract/feats_clap.py +10 -6
nkululeko/feat_extract/feats_hubert.py +3 -2
nkululeko/feat_extract/feats_import.py +2 -2
nkululeko/feat_extract/feats_mos.py +2 -2
nkululeko/feat_extract/feats_opensmile.py +10 -24
nkululeko/feat_extract/feats_oxbow.py +16 -11
nkululeko/feat_extract/feats_praat.py +8 -5
nkululeko/feat_extract/feats_spectra.py +3 -2
nkululeko/feat_extract/feats_squim.py +2 -2
nkululeko/feat_extract/feats_trill.py +10 -6
nkululeko/feat_extract/feats_wav2vec2.py +16 -7
nkululeko/feat_extract/feats_wavlm.py +1 -4
nkululeko/feat_extract/feats_whisper.py +110 -0
nkululeko/feat_extract/featureset.py +6 -3
nkululeko/feature_extractor.py +15 -4
{nkululeko-0.81.6.dist-info → nkululeko-0.81.7.dist-info}/METADATA +6 -1
{nkululeko-0.81.6.dist-info → nkululeko-0.81.7.dist-info}/RECORD +24 -23
{nkululeko-0.81.6.dist-info → nkululeko-0.81.7.dist-info}/LICENSE +0 -0
{nkululeko-0.81.6.dist-info → nkululeko-0.81.7.dist-info}/WHEEL +0 -0
{nkululeko-0.81.6.dist-info → nkululeko-0.81.7.dist-info}/top_level.txt +0 -0

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.81.6"
+VERSION="0.81.7"
 SAMPLING_RATE = 16000

nkululeko/feat_extract/feats_agender.py CHANGED Viewed

@@ -9,16 +9,17 @@ import numpy as np
 import audinterface
-class AudModelAgenderSet(Featureset):
+class AgenderSet(Featureset):
     """
     Embeddings from the wav2vec2. based model finetuned on agender data, described in the paper
     "Speech-based Age and Gender Prediction with Transformers"
     https://arxiv.org/abs/2306.16962
     """
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type):
+        super().__init__(name, data_df, feats_type)
         self.model_loaded = False
+        self.feats_type = feats_type
     def _load_model(self):
         model_url = "https://zenodo.org/record/7761387/files/w2v2-L-robust-6-age-gender.25c844af-1.1.1.zip"
@@ -28,7 +29,8 @@ class AudModelAgenderSet(Featureset):
         if not os.path.isdir(model_root):
             cache_root = audeer.mkdir("cache")
             model_root = audeer.mkdir(model_root)
-            archive_path = audeer.download_url(model_url, cache_root, verbose=True)
+            archive_path = audeer.download_url(
+                model_url, cache_root, verbose=True)
             audeer.extract_archive(archive_path, model_root)
         device = self.util.config_val("MODEL", "device", "cpu")
         self.model = audonnx.load(model_root, device=device)

nkululeko/feat_extract/feats_auddim.py CHANGED Viewed

@@ -21,9 +21,10 @@ class AuddimSet(Featureset):
     https://arxiv.org/abs/2203.07378.
     """
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type):
+        super().__init__(name, data_df, feats_type)
         self.model_loaded = False
+        self.feats_types = feats_type
     def _load_model(self):
         model_url = "https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip"
@@ -31,7 +32,8 @@ class AuddimSet(Featureset):
         if not os.path.isdir(model_root):
             cache_root = audeer.mkdir("cache")
             model_root = audeer.mkdir(model_root)
-            archive_path = audeer.download_url(model_url, cache_root, verbose=True)
+            archive_path = audeer.download_url(
+                model_url, cache_root, verbose=True)
             audeer.extract_archive(archive_path, model_root)
         cuda = "cuda" if torch.cuda.is_available() else "cpu"
         device = self.util.config_val("MODEL", "device", cuda)

nkululeko/feat_extract/feats_audmodel.py CHANGED Viewed

@@ -19,9 +19,10 @@ class AudmodelSet(Featureset):
     https://arxiv.org/abs/2203.07378.
     """
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type):
+        super().__init__(name, data_df, feats_type)
         self.model_loaded = False
+        self.feats_type = feats_type
     def _load_model(self):
         model_url = "https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip"
@@ -29,7 +30,8 @@ class AudmodelSet(Featureset):
         if not os.path.isdir(model_root):
             cache_root = audeer.mkdir("cache")
             model_root = audeer.mkdir(model_root)
-            archive_path = audeer.download_url(model_url, cache_root, verbose=True)
+            archive_path = audeer.download_url(
+                model_url, cache_root, verbose=True)
             audeer.extract_archive(archive_path, model_root)
         cuda = "cuda" if torch.cuda.is_available() else "cpu"
         device = self.util.config_val("MODEL", "device", cuda)

nkululeko/feat_extract/feats_clap.py CHANGED Viewed

@@ -11,14 +11,15 @@ import laion_clap
 import audiofile
-class Clap(Featureset):
+class ClapSet(Featureset):
     """Class to extract laion's clap embeddings (https://github.com/LAION-AI/CLAP)"""
-    def __init__(self, name, data_df):
+    def __init__(self, name, data_df, feats_type):
         """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feats_type)
         self.device = self.util.config_val("MODEL", "device", "cpu")
         self.model_initialized = False
+        self.feat_type = feats_type
     def init_model(self):
         # load model
@@ -32,12 +33,14 @@ class Clap(Featureset):
         store = self.util.get_path("store")
         store_format = self.util.config_val("FEATS", "store_format", "pkl")
         storage = f"{store}{self.name}.{store_format}"
-        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
+        extract = self.util.config_val(
+            "FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
             if not self.model_initialized:
                 self.init_model()
-            self.util.debug("extracting clap embeddings, this might take a while...")
+            self.util.debug(
+                "extracting clap embeddings, this might take a while...")
             emb_series = pd.Series(index=self.data_df.index, dtype=object)
             length = len(self.data_df.index)
             for idx, (file, start, end) in enumerate(
@@ -51,7 +54,8 @@ class Clap(Featureset):
                 )
                 emb = self.get_embeddings(signal, sampling_rate)
                 emb_series[idx] = emb
-            self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
+            self.df = pd.DataFrame(
+                emb_series.values.tolist(), index=self.data_df.index)
             self.util.write_store(self.df, storage, store_format)
             try:
                 glob_conf.config["DATA"]["needs_feature_extraction"] = "false"

nkululeko/feat_extract/feats_hubert.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # feats_hubert.py
 # HuBERT feature extractor for Nkululeko
-# example feat_type = "hubert-large-ll60k", "hubert-xlarge-ll60k"
+# example feat_type = "hubert-large-ll60k", "hubert-xlarge-ll60k",
+# "hubert-base-ls960", hubert-large-ls960-ft", "hubert-xlarge-ls960-ft"
 import os
@@ -22,7 +23,7 @@ class Hubert(Featureset):
     def __init__(self, name, data_df, feat_type):
         """Constructor. is_train is needed to distinguish from test/dev sets,
         because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feat_type)
         # check if device is not set, use cuda if available
         cuda = "cuda" if torch.cuda.is_available() else "cpu"
         self.device = self.util.config_val("MODEL", "device", cuda)

nkululeko/feat_extract/feats_import.py CHANGED Viewed

@@ -11,8 +11,8 @@ from nkululeko.feat_extract.featureset import Featureset
 class ImportSet(Featureset):
     """Class to import features that have been compiled elsewhere"""
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type):
+        super().__init__(name, data_df, feats_type)
     def extract(self):
         """Import the features."""

nkululeko/feat_extract/feats_mos.py CHANGED Viewed

@@ -27,9 +27,9 @@ from nkululeko.feat_extract.featureset import Featureset
 class MosSet(Featureset):
     """Class to predict MOS (mean opinion score)"""
-    def __init__(self, name, data_df):
+    def __init__(self, name, data_df, feats_type):
         """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feats_type)
         self.device = self.util.config_val("MODEL", "device", "cpu")
         self.model_initialized = False

nkululeko/feat_extract/feats_opensmile.py CHANGED Viewed

@@ -8,31 +8,21 @@ import opensmile
 class Opensmileset(Featureset):
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type=None, config_file=None):
+        super().__init__(name, data_df, feats_type)
         self.featset = self.util.config_val("FEATS", "set", "eGeMAPSv02")
         try:
             self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
-            #'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
+            # 'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
         except AttributeError:
-            self.util.error(
-                f"something is wrong with feature set: {self.featset}"
-            )
+            self.util.error(f"something is wrong with feature set: {self.featset}")
         self.featlevel = self.util.config_val("FEATS", "level", "functionals")
         try:
-            self.featlevel = self.featlevel.replace(
-                "lld", "LowLevelDescriptors"
-            )
-            self.featlevel = self.featlevel.replace(
-                "functionals", "Functionals"
-            )
-            self.feature_level = eval(
-                f"opensmile.FeatureLevel.{self.featlevel}"
-            )
+            self.featlevel = self.featlevel.replace("lld", "LowLevelDescriptors")
+            self.featlevel = self.featlevel.replace("functionals", "Functionals")
+            self.feature_level = eval(f"opensmile.FeatureLevel.{self.featlevel}")
         except AttributeError:
-            self.util.error(
-                f"something is wrong with feature level: {self.featlevel}"
-            )
+            self.util.error(f"something is wrong with feature level: {self.featlevel}")
     def extract(self):
         """Extract the features based on the initialized dataset or re-open them when found on disk."""
@@ -44,9 +34,7 @@ class Opensmileset(Featureset):
         )
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or not os.path.isfile(storage) or no_reuse:
-            self.util.debug(
-                "extracting openSmile features, this might take a while..."
-            )
+            self.util.debug("extracting openSmile features, this might take a while...")
             smile = opensmile.Smile(
                 feature_set=self.feature_set,
                 feature_level=self.feature_level,
@@ -85,9 +73,7 @@ class Opensmileset(Featureset):
             selected_features = ast.literal_eval(
                 glob_conf.config["FEATS"]["os.features"]
             )
-            self.util.debug(
-                f"selecting features from opensmile: {selected_features}"
-            )
+            self.util.debug(f"selecting features from opensmile: {selected_features}")
             sel_feats_df = pd.DataFrame()
             hit = False
             for feat in selected_features:

nkululeko/feat_extract/feats_oxbow.py CHANGED Viewed

@@ -10,9 +10,10 @@ import opensmile
 class Openxbow(Featureset):
     """Class to extract openXBOW processed opensmile features (https://github.com/openXBOW)"""
-    def __init__(self, name, data_df, is_train=False):
+    def __init__(self, name, data_df, feats_type, is_train=False):
         """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feats_type)
+        self.feats_types = feats_type
         self.is_train = is_train
     def extract(self):
@@ -21,11 +22,13 @@ class Openxbow(Featureset):
         self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
         store = self.util.get_path("store")
         storage = f"{store}{self.name}_{self.featset}.pkl"
-        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
+        extract = self.util.config_val(
+            "FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
             # extract smile features first
-            self.util.debug("extracting openSmile features, this might take a while...")
+            self.util.debug(
+                "extracting openSmile features, this might take a while...")
             smile = opensmile.Smile(
                 feature_set=self.feature_set,
                 feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
@@ -48,7 +51,13 @@ class Openxbow(Featureset):
             # save the smile features
             smile_df.to_csv(lld_name, sep=";", header=False)
             # get the path of the xbow java jar file
-            xbow_path = self.util.config_val("FEATS", "xbow.model", "../openXBOW/")
+            xbow_path = self.util.config_val(
+                "FEATS", "xbow.model", "openXBOW")
+            # check if JAR file exist
+            if not os.path.isfile(f"{xbow_path}/openXBOW.jar"):
+                # download using wget if not exist and locate in xbow_path
+                os.system(
+                    f"git clone https://github.com/openXBOW/openXBOW")
             # get the size of the codebook
             size = self.util.config_val("FEATS", "size", 500)
             # get the number of assignements
@@ -57,16 +66,12 @@ class Openxbow(Featureset):
             if self.is_train:
                 # store the codebook
                 os.system(
-                    f"java -jar {xbow_path}openXBOW.jar -i"
-                    f" {lld_name} -standardizeInput -log                     -o"
-                    f" {xbow_name} -size {size} -a {assignments} -B"
-                    f" {codebook_name}"
+                    f"java -jar {xbow_path}/openXBOW.jar -i {lld_name} -standardizeInput -log -o {xbow_name} -size {size} -a {assignments} -B {codebook_name}"
                 )
             else:
                 # use the codebook
                 os.system(
-                    f"java -jar {xbow_path}openXBOW.jar -i {lld_name}          "
-                    f"           -o {xbow_name} -b {codebook_name}"
+                    f"java -jar {xbow_path}/openXBOW.jar -i {lld_name} -o {xbow_name} -b {codebook_name}"
                 )
             # read in the result from disk
             xbow_df = pd.read_csv(xbow_name, sep=";", header=None)

nkululeko/feat_extract/feats_praat.py CHANGED Viewed

@@ -18,18 +18,20 @@ class PraatSet(Featureset):
     """
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type):
+        super().__init__(name, data_df, feats_type)
     def extract(self):
         """Extract the features based on the initialized dataset or re-open them when found on disk."""
         store = self.util.get_path("store")
         store_format = self.util.config_val("FEATS", "store_format", "pkl")
         storage = f"{store}{self.name}.{store_format}"
-        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
+        extract = self.util.config_val(
+            "FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
-            self.util.debug("extracting Praat features, this might take a while...")
+            self.util.debug(
+                "extracting Praat features, this might take a while...")
             self.df = feinberg_praat.compute_features(self.data_df.index)
             self.df = self.df.set_index(self.data_df.index)
             for i, col in enumerate(self.df.columns):
@@ -52,7 +54,8 @@ class PraatSet(Featureset):
         self.df = self.df.astype(float)
     def extract_sample(self, signal, sr):
-        import audiofile, audformat
+        import audiofile
+        import audformat
         tmp_audio_names = ["praat_audio_tmp.wav"]
         audiofile.write(tmp_audio_names[0], signal, sr)

nkululeko/feat_extract/feats_spectra.py CHANGED Viewed

@@ -4,6 +4,7 @@ feats_spectra.py
 Inspired by code from Su Lei
 """
 import os
 import torchaudio
 import torchaudio.transforms as T
@@ -23,9 +24,9 @@ import nkululeko.glob_conf as glob_conf
 class Spectraloader(Featureset):
-    def __init__(self, name, data_df):
+    def __init__(self, name, data_df, feat_type):
         """Constructor setting the name"""
-        Featureset.__init__(self, name, data_df)
+        super().__init__(name, data_df, feat_type)
         self.sampling_rate = SAMPLING_RATE
         self.num_bands = int(self.util.config_val("FEATS", "fft_nbands", "64"))
         self.win_dur = int(self.util.config_val("FEATS", "fft_win_dur", "25"))

nkululeko/feat_extract/feats_squim.py CHANGED Viewed

@@ -30,9 +30,9 @@ from nkululeko.utils.util import Util
 class SquimSet(Featureset):
     """Class to predict SQUIM features"""
-    def __init__(self, name, data_df):
+    def __init__(self, name, data_df, feats_type):
         """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feats_type)
         self.device = self.util.config_val("MODEL", "device", "cpu")
         self.model_initialized = False

nkululeko/feat_extract/feats_trill.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # feats_trill.py
+import tensorflow_hub as hub
 import os
 import tensorflow as tf
 from numpy.core.numeric import tensordot
@@ -11,7 +12,6 @@ from nkululeko.feat_extract.featureset import Featureset
 # Import TF 2.X and make sure we're running eager.
 assert tf.executing_eagerly()
-import tensorflow_hub as hub
 class TRILLset(Featureset):
@@ -20,7 +20,7 @@ class TRILLset(Featureset):
     """https://ai.googleblog.com/2020/06/improving-speech-representations-and.html"""
     # Initialization of the class
-    def __init__(self, name, data_df):
+    def __init__(self, name, data_df, feats_type):
         """
         Initialize the class with name, data and Util instance
         Also loads the model from hub
@@ -31,7 +31,7 @@ class TRILLset(Featureset):
         :type data_df: DataFrame
         :return: None
         """
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feats_type)
         # Load the model from the configured path
         model_path = self.util.config_val(
             "FEATS",
@@ -39,20 +39,24 @@ class TRILLset(Featureset):
             "https://tfhub.dev/google/nonsemantic-speech-benchmark/trill/3",
         )
         self.module = hub.load(model_path)
+        self.feats_type = feats_type
     def extract(self):
         store = self.util.get_path("store")
         storage = f"{store}{self.name}.pkl"
-        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
+        extract = self.util.config_val(
+            "FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
-            self.util.debug("extracting TRILL embeddings, this might take a while...")
+            self.util.debug(
+                "extracting TRILL embeddings, this might take a while...")
             emb_series = pd.Series(index=self.data_df.index, dtype=object)
             length = len(self.data_df.index)
             for idx, file in enumerate(tqdm(self.data_df.index.get_level_values(0))):
                 emb = self.getEmbeddings(file)
                 emb_series[idx] = emb
-            self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
+            self.df = pd.DataFrame(
+                emb_series.values.tolist(), index=self.data_df.index)
             self.df.to_pickle(storage)
             try:
                 glob_conf.config["DATA"]["needs_feature_extraction"] = "false"

nkululeko/feat_extract/feats_wav2vec2.py CHANGED Viewed

@@ -1,5 +1,11 @@
-# feats_wav2vec2.py
-# feat_types example = wav2vec2-large-robust-ft-swbd-300h
+""" feats_wav2vec2.py
+feat_types example = [wav2vec2-large-robust-ft-swbd-300h,
+wav2vec2-xls-r-2b, wav2vec2-large, wav2vec2-large-xlsr-53, wav2vec2-base]
+Complete list: https://huggingface.co/facebook?search_models=wav2vec2
+Currently only supports wav2vec2
+"""
 import os
 from tqdm import tqdm
 import pandas as pd
@@ -16,11 +22,11 @@ class Wav2vec2(Featureset):
     def __init__(self, name, data_df, feat_type):
         """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feat_type)
         cuda = "cuda" if torch.cuda.is_available() else "cpu"
         self.device = self.util.config_val("MODEL", "device", cuda)
         self.model_initialized = False
-        if feat_type == "wav2vec" or feat_type == "wav2vec2":
+        if feat_type == "wav2vec2":
             self.feat_type = "wav2vec2-large-robust-ft-swbd-300h"
         else:
             self.feat_type = feat_type
@@ -33,7 +39,8 @@ class Wav2vec2(Featureset):
         )
         config = transformers.AutoConfig.from_pretrained(model_path)
         layer_num = config.num_hidden_layers
-        hidden_layer = int(self.util.config_val("FEATS", "wav2vec2.layer", "0"))
+        hidden_layer = int(self.util.config_val(
+            "FEATS", "wav2vec2.layer", "0"))
         config.num_hidden_layers = layer_num - hidden_layer
         self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
         self.processor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
@@ -48,7 +55,8 @@ class Wav2vec2(Featureset):
         """Extract the features or load them from disk if present."""
         store = self.util.get_path("store")
         storage = f"{store}{self.name}.pkl"
-        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
+        extract = self.util.config_val(
+            "FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
             if not self.model_initialized:
@@ -69,7 +77,8 @@ class Wav2vec2(Featureset):
                 emb = self.get_embeddings(signal, sampling_rate, file)
                 emb_series[idx] = emb
             # print(f"emb_series shape: {emb_series.shape}")
-            self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
+            self.df = pd.DataFrame(
+                emb_series.values.tolist(), index=self.data_df.index)
             # print(f"df shape: {self.df.shape}")
             self.df.to_pickle(storage)
             try:

nkululeko/feat_extract/feats_wavlm.py CHANGED Viewed

@@ -59,10 +59,7 @@ class Wavlm(Featureset):
                     frame_offset=int(start.total_seconds() * 16000),
                     num_frames=int((end - start).total_seconds() * 16000),
                 )
-                if sampling_rate != 16000:
-                    self.util.error(
-                        f"sampling rate should be 16000 but is {sampling_rate}"
-                    )
+                assert sampling_rate == 16000, f"sampling rate should be 16000 but is {sampling_rate}"
                 emb = self.get_embeddings(signal, sampling_rate, file)
                 emb_series.iloc[idx] = emb
             self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)

nkululeko/feat_extract/feats_whisper.py ADDED Viewed

@@ -0,0 +1,110 @@
+# feats_whisper.py
+import os
+import pandas as pd
+import torch
+from transformers import AutoFeatureExtractor
+from transformers import WhisperModel
+import audeer
+import audiofile
+from nkululeko.feat_extract.featureset import Featureset
+import nkululeko.glob_conf as glob_conf
+class Whisper(Featureset):
+    """Class to extract whisper embeddings."""
+    def __init__(self, name, data_df, feat_type):
+        super().__init__(name, data_df, feat_type)
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = self.util.config_val("MODEL", "device", cuda)
+        self.model_initialized = False
+        if feat_type == "whisper":
+            self.feat_type = "whisper-base"
+        else:
+            self.feat_type = feat_type
+    def init_model(self):
+        # load model
+        self.util.debug("loading whisper model...")
+        model_name = f"openai/{self.feat_type}"
+        self.model = WhisperModel.from_pretrained(model_name).to(self.device)
+        print(f"intialized Whisper model on {self.device}")
+        self.feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
+        self.model_initialized = True
+    def extract(self):
+        """Extract the features or load them from disk if present."""
+        store = self.util.get_path("store")
+        storage = f"{store}{self.name}.pkl"
+        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
+        no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
+        if extract or no_reuse or not os.path.isfile(storage):
+            if not self.model_initialized:
+                self.init_model()
+            self.util.debug("extracting whisper embeddings, this might take a while...")
+            emb_series = []
+            for (file, start, end), _ in audeer.progress_bar(
+                self.data_df.iterrows(),
+                total=len(self.data_df),
+                desc=f"Running whisper on {len(self.data_df)} audiofiles",
+            ):
+                if end == pd.NaT:
+                    signal, sr = audiofile.read(file, offset=start)
+                else:
+                    signal, sr = audiofile.read(
+                        file, duration=end - start, offset=start
+                    )
+                emb = self.get_embeddings(signal, sr, file)
+                emb_series.append(emb)
+            # print(f"emb_series shape: {emb_series.shape}")
+            self.df = pd.DataFrame(emb_series, index=self.data_df.index)
+            # print(f"df shape: {self.df.shape}")
+            self.df.to_pickle(storage)
+            try:
+                glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
+            except KeyError:
+                pass
+        else:
+            self.util.debug("reusing extracted wav2vec2 embeddings")
+            self.df = pd.read_pickle(storage)
+            if self.df.isnull().values.any():
+                nanrows = self.df.columns[self.df.isna().any()].tolist()
+                # print(nanrows)
+                self.util.error(
+                    f"got nan: {self.df.shape} {self.df.isnull().sum().sum()}"
+                )
+    def get_embeddings(self, signal, sampling_rate, file):
+        r"""Extract embeddings from raw audio signal."""
+        try:
+            with torch.no_grad():
+                embed_size = self.model.config.hidden_size
+                embed_columns = [f"whisper_{i}" for i in range(embed_size)]
+                inputs = self.feature_extractor(signal, sampling_rate=16000)[
+                    "input_features"
+                ][0]
+                inputs = torch.from_numpy(inputs).to(self.device).unsqueeze(0)
+                decoder_input_ids = (
+                    torch.tensor([[1, 1]]).to(self.device)
+                    * self.model.config.decoder_start_token_id
+                )
+                full_outputs = self.model(
+                    inputs,
+                    decoder_input_ids=decoder_input_ids,
+                    output_hidden_states=True,
+                )
+                outputs = full_outputs.encoder_last_hidden_state[0]
+                average_embeds = outputs.squeeze().mean(axis=0).cpu().detach().numpy()
+        except RuntimeError as re:
+            print(str(re))
+            self.util.error(f"couldn't extract file: {file}")
+        # print(f"y flattened shape: {y.ravel().shape}")
+        return average_embeds
+    def extract_sample(self, signal, sr):
+        self.init_model()
+        feats = self.get_embeddings(signal, sr, "no file")
+        return feats

nkululeko/feat_extract/featureset.py CHANGED Viewed

@@ -7,13 +7,15 @@ import ast
 class Featureset:
     name = ""  # designation
-    df = None  # pandas dataframe to store the features (and indexed with the data from the sets)
+    df = None  # pandas dataframe to store the features
+    # (and indexed with the data from the sets)
     data_df = None  # dataframe to get audio paths
-    def __init__(self, name, data_df):
+    def __init__(self, name, data_df, feats_type):
         self.name = name
         self.data_df = data_df
         self.util = Util("featureset")
+        self.feats_types = feats_type
     def extract(self):
         pass
@@ -23,7 +25,8 @@ class Featureset:
         self.df = self.df[self.df.index.isin(self.data_df.index)]
         try:
             # use only some features
-            selected_features = ast.literal_eval(glob_conf.config["FEATS"]["features"])
+            selected_features = ast.literal_eval(
+                glob_conf.config["FEATS"]["features"])
             self.util.debug(f"selecting features: {selected_features}")
             sel_feats_df = pd.DataFrame()
             hit = False

nkululeko/feature_extractor.py CHANGED Viewed

@@ -53,7 +53,7 @@ class FeatureExtractor:
         if feat_extractor_class is None:
             self.util.error(f"unknown feats_type: {feats_type}")
         return feat_extractor_class(
-            f"{store_name}_{self.feats_designation}", self.data_df
+            f"{store_name}_{self.feats_designation}", self.data_df, feats_type
         )
     def _get_feat_extractor_class(self, feats_type):
@@ -61,16 +61,27 @@ class FeatureExtractor:
             from nkululeko.feat_extract.feats_opensmile import Opensmileset
             return Opensmileset
         elif feats_type == "spectra":
             from nkululeko.feat_extract.feats_spectra import Spectraloader
             return Spectraloader
         elif feats_type == "trill":
             from nkululeko.feat_extract.feats_trill import TRILLset
             return TRILLset
-        elif feats_type.startswith(("wav2vec", "hubert", "wavlm", "spkrec")):
+        elif feats_type.startswith(
+            ("wav2vec2", "hubert", "wavlm", "spkrec", "whisper")
+        ):
             return self._get_feat_extractor_by_prefix(feats_type)
+        elif feats_type == "xbow":
+            from nkululeko.feat_extract.feats_oxbow import Openxbow
+            return Openxbow
         elif feats_type in (
             "audmodel",
             "auddim",
@@ -89,11 +100,11 @@ class FeatureExtractor:
             return None
     def _get_feat_extractor_by_prefix(self, feats_type):
-        prefix, _, ext = feats_type.partition("_")
+        prefix, _, ext = feats_type.partition("-")
         from importlib import import_module
         module = import_module(f"nkululeko.feat_extract.feats_{prefix.lower()}")
-        class_name = f"{prefix.capitalize()}{ext.capitalize()}set"
+        class_name = f"{prefix.capitalize()}"
         return getattr(module, class_name)
     def _get_feat_extractor_by_name(self, feats_type):

{nkululeko-0.81.6.dist-info → nkululeko-0.81.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.81.6
+Version: 0.81.7
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -323,6 +323,11 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.81.7
+--------------
+* bugfixes
+* added whisper feature extractor
 Version 0.81.6
 --------------
 * updated documentation

{nkululeko-0.81.6.dist-info → nkululeko-0.81.7.dist-info}/RECORD RENAMED Viewed

@@ -2,14 +2,14 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
 nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
 nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
-nkululeko/constants.py,sha256=QwuK_rtVCEaN-oZZnh3s104ROC_O7hpEGY_ZMDKtRcw,39
+nkululeko/constants.py,sha256=7yZ6tYUvMMX3FdTsBGzuH-Hgw5ALAhmDCAiKRrOESM0,39
 nkululeko/demo.py,sha256=55kNFA2helMhOxD4yZuKg1JWDtlUUpxm-6uAnroIydI,3264
 nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
 nkululeko/demo_predictor.py,sha256=-ggSHc3DXxRzjzcGB4qFBOMvKsfUdTkkde50BDrS9dA,4755
 nkululeko/experiment.py,sha256=CSEvQxK2_tzJyND5sUHQSc6MkRp1g6EVam8JX8txqps,29576
 nkululeko/explore.py,sha256=2wdoGRqldvsN1zCiWk0quSDgHHHUoF2UZOWQ1r-2OLM,2310
 nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
-nkululeko/feature_extractor.py,sha256=4UIvfh0m54286Y8q28aYBy-ojTFi3bWiActwBeAg_yE,3814
+nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
 nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,3474
 nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
 nkululeko/glob_conf.py,sha256=iHiVSxDYgmYwdx6z0HuGUMSWrfZfufPHxHb60q2dLRY,453
@@ -47,27 +47,28 @@ nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,27650
 nkululeko/data/dataset_csv.py,sha256=v3lSjF23EVjoP460QOfhdcqbWAlBQWlBOuaYujZoS4s,3407
 nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nkululeko/feat_extract/feats_agender.py,sha256=w13UsYsUTzMe5B2Rkg0sfvBXrVBBo0-Ljo532zkDXgM,3043
+nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
 nkululeko/feat_extract/feats_agender_agender.py,sha256=5dA7YA-YGxODovMC7ynMk3bnpPjfs0ApvSfjqvoSZY0,3346
 nkululeko/feat_extract/feats_analyser.py,sha256=_5oz4y-NZCEBgfNP2GZ9WNqQR50Hbykm0TvDVomWP0U,11399
-nkululeko/feat_extract/feats_auddim.py,sha256=lMhKbEfnA0qKjRgMFx1xYpEhKCB8TZeFn8AJs_oqkvE,3083
-nkululeko/feat_extract/feats_audmodel.py,sha256=AO5BcrZ0QRD7--64WZq73KWvInM-8dVfgrLDQfq4sZ0,3109
-nkululeko/feat_extract/feats_clap.py,sha256=v82mbjdjGDSKUUBggttw7jW0oka22fWAmfUf-4VmaDU,3379
-nkululeko/feat_extract/feats_hubert.py,sha256=uL-9mgQHuGPQi1nuUaw6aNU9DscsO89uJAmBdmnCegM,5205
-nkululeko/feat_extract/feats_import.py,sha256=LMxFXSO2Ui7Jj2t9oBotqWe-je8rnDgkX4ay6StNrGk,1598
+nkululeko/feat_extract/feats_auddim.py,sha256=VlzKKXTXa5kjLgQBWyEFy-daIyU1SkOwCCOIhKsWCvE,3162
+nkululeko/feat_extract/feats_audmodel.py,sha256=VjBNgAoxsHJhwr6Kwt9CxX6SaCM4RK_OV-GU2W5-bhU,3187
+nkululeko/feat_extract/feats_clap.py,sha256=nR6eEIRdsMHcfmD1bNtt5WfDvkxKjvEbukSSrXHm-HU,3489
+nkululeko/feat_extract/feats_hubert.py,sha256=ebj5PJtj-DcMudtnBWeY3_d_9pPFeEDEtP6NMDXIZNI,5289
+nkululeko/feat_extract/feats_import.py,sha256=rj1p8lz19tCAC8hLzzZAwZ0M6gzwH3BzfabFUgal0yw,1622
 nkululeko/feat_extract/feats_mld.py,sha256=Vvu7GZOkn7Vda8eIOXqHjg78zegkFe3vTUaCXyVM0eA,2021
-nkululeko/feat_extract/feats_mos.py,sha256=SgsEw6_niVTUNO1tj92eUHBxKOeIcSHpYJBuHFvbJY8,4150
-nkululeko/feat_extract/feats_opensmile.py,sha256=yDRGSiUQV3K3oLxVqq8Cxj5bkc-RiLzDYbAGKC9I5vc,4140
-nkululeko/feat_extract/feats_oxbow.py,sha256=7W26NbEJnSckZzedolsIW1PJPSdCHhuh8YM19kOxaMA,4734
-nkululeko/feat_extract/feats_praat.py,sha256=6VCEU264bV-1lTuwfn0oCbQJ96J2WbyOU724Opg2_Ms,3037
+nkululeko/feat_extract/feats_mos.py,sha256=KXNt7QYEfxkvr6UyVhig2aWQBaIvovlrR4gPuP03gmo,4174
+nkululeko/feat_extract/feats_opensmile.py,sha256=vLY8HCpeOj9NdJXzt_GVI3Vxwsjf9cEfcqJ3IHqlTQY,3978
+nkululeko/feat_extract/feats_oxbow.py,sha256=CmIG9cbHTJTJVnzgCPdQpYpnlewWExpsr5ZcK8Malyo,4980
+nkululeko/feat_extract/feats_praat.py,sha256=kZrS6srzH7WoWEd2prp1Dxw6g9JklFQGTNq5zzPpHzg,3105
 nkululeko/feat_extract/feats_snr.py,sha256=9dqZ-4RpK98iJEssM3ttozNd18LWlZYM_QVXvp5xDcs,2829
-nkululeko/feat_extract/feats_spectra.py,sha256=PLKoc_S3v3wibodUCiOnFFdF87U2rk2sfndRo2mmG64,3656
+nkululeko/feat_extract/feats_spectra.py,sha256=5Pex8awIQC3cjQRHSu4NQFmg4quamG0RL3V3Yd0pJHs,3670
 nkululeko/feat_extract/feats_spkrec.py,sha256=VK4ma3uWzM0YZStsgRTirfkbzjWIfRWSgsYI038QlRY,4803
-nkululeko/feat_extract/feats_squim.py,sha256=dDsWlTfXJeUnUD5XSVw4cfuf3XJ-MHfBHxx3xyFR5mE,4504
-nkululeko/feat_extract/feats_trill.py,sha256=PpygJK_W6QoBNeSah9npQPiQlJxLWFn6TSOaZUYehNU,3211
-nkululeko/feat_extract/feats_wav2vec2.py,sha256=sFf-WkLUgKUQsFxGO9m2hS3uYoGkv95mZavCEZyWFGA,5072
-nkululeko/feat_extract/feats_wavlm.py,sha256=RhI0oWIsknnxTVmdnNS_xJO1NnUUR0CUNDWH1yTpNLk,4683
-nkululeko/feat_extract/featureset.py,sha256=-ynkdor8iX7BFx10aIbB3LfwxrrzPoBGz9kXwyAJO9M,1375
+nkululeko/feat_extract/feats_squim.py,sha256=jToXiwRq5-MQheAP6xczvry1uVIHYUrD8bM7Wb1cnqM,4528
+nkululeko/feat_extract/feats_trill.py,sha256=HXQBaPWTX0iNEjBY7RD8uyFeYjDieHqv8ZilE0Jb-Pg,3319
+nkululeko/feat_extract/feats_wav2vec2.py,sha256=tFGe4t6MIVxTDQYR8geNCtZ_3ALc-gpi-rmQbF74HfI,5285
+nkululeko/feat_extract/feats_wavlm.py,sha256=8afzqZgHwDRrlHh4y5jnop4objURpXU_IrfiK6orsew,4604
+nkululeko/feat_extract/feats_whisper.py,sha256=0N7Vj65OVi2PNoB_NrDjWT5lP6xZNKxFOZZIoxkJvcA,4533
+nkululeko/feat_extract/featureset.py,sha256=HtgW2389rmlRAgFP3F1sSFzq2_iUVr2NhOfIXG9omt0,1448
 nkululeko/feat_extract/feinberg_praat.py,sha256=EP9pMALjlKdiYInLQdrZ7MmE499Mq-ISRCgqbqL3Rxc,21304
 nkululeko/losses/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/losses/loss_ccc.py,sha256=NOK0y0fxKUnU161B5geap6Fmn8QzoPl2MqtPiV8IuJE,976
@@ -102,8 +103,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
 nkululeko/utils/util.py,sha256=_Z6OMJ3f-8TdETW9eqJYY5hwNRS5XCt9azzRnqoTTZE,12330
-nkululeko-0.81.6.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.81.6.dist-info/METADATA,sha256=zYBbwBbVfPQFWfN8yjyjNGYmI6GMYsyeRoeb8FfS3gs,34905
-nkululeko-0.81.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-nkululeko-0.81.6.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.81.6.dist-info/RECORD,,
+nkululeko-0.81.7.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.81.7.dist-info/METADATA,sha256=7P8gRtSvPadRGBsWRhT34-Xj8jwkbL7OcLJ__AGtoQs,34981
+nkululeko-0.81.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+nkululeko-0.81.7.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.81.7.dist-info/RECORD,,

{nkululeko-0.81.6.dist-info → nkululeko-0.81.7.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.81.6.dist-info → nkululeko-0.81.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{nkululeko-0.81.6.dist-info → nkululeko-0.81.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.81.6__py3-none-any.whl → 0.81.7__py3-none-any.whl

nkululeko 0.81.6py3-none-any.whl → 0.81.7py3-none-any.whl