PyPI - nkululeko - Versions diffs - 0.81.4__py3-none-any.whl → 0.81.7__py3-none-any.whl - Mend

nkululeko 0.81.4py3-none-any.whl → 0.81.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

nkululeko/autopredict/estimate_snr.py +17 -6
nkululeko/constants.py +1 -1
nkululeko/data/dataset.py +9 -2
nkululeko/demo.py +20 -5
nkululeko/demo_predictor.py +6 -3
nkululeko/experiment.py +1 -1
nkululeko/explore.py +13 -8
nkululeko/feat_extract/feats_agender.py +7 -8
nkululeko/feat_extract/{feats_audmodel_dim.py → feats_auddim.py} +10 -7
nkululeko/feat_extract/feats_audmodel.py +10 -7
nkululeko/feat_extract/feats_clap.py +10 -6
nkululeko/feat_extract/feats_hubert.py +3 -2
nkululeko/feat_extract/feats_import.py +3 -3
nkululeko/feat_extract/feats_mos.py +4 -3
nkululeko/feat_extract/feats_opensmile.py +10 -24
nkululeko/feat_extract/feats_oxbow.py +16 -11
nkululeko/feat_extract/feats_praat.py +18 -13
nkululeko/feat_extract/feats_snr.py +17 -9
nkululeko/feat_extract/feats_spectra.py +3 -2
nkululeko/feat_extract/feats_squim.py +15 -18
nkululeko/feat_extract/feats_trill.py +10 -6
nkululeko/feat_extract/feats_wav2vec2.py +16 -7
nkululeko/feat_extract/feats_wavlm.py +1 -4
nkululeko/feat_extract/feats_whisper.py +110 -0
nkululeko/feat_extract/featureset.py +6 -3
nkululeko/feature_extractor.py +83 -148
nkululeko/multidb.py +18 -12
nkululeko/predict.py +26 -8
nkululeko/reporter.py +332 -0
nkululeko/resample.py +12 -7
nkululeko/runmanager.py +17 -8
nkululeko/test.py +9 -6
nkululeko/test_predictor.py +1 -0
nkululeko/utils/stats.py +12 -5
{nkululeko-0.81.4.dist-info → nkululeko-0.81.7.dist-info}/METADATA +16 -1
{nkululeko-0.81.4.dist-info → nkululeko-0.81.7.dist-info}/RECORD +39 -37
{nkululeko-0.81.4.dist-info → nkululeko-0.81.7.dist-info}/LICENSE +0 -0
{nkululeko-0.81.4.dist-info → nkululeko-0.81.7.dist-info}/WHEEL +0 -0
{nkululeko-0.81.4.dist-info → nkululeko-0.81.7.dist-info}/top_level.txt +0 -0

nkululeko/autopredict/estimate_snr.py CHANGED Viewed

@@ -1,20 +1,30 @@
 # estimate.snr
-import numpy as np
+"""
+Module for estimating SNR (signal to noise ratio) from an audio signal.
+This module provides a class `SNREstimator` which calculates the SNR based on
+the log energy and energy thresholds of the audio signal.
+"""
+import argparse
 import audiofile
 import matplotlib.pyplot as plt
+import numpy as np
 from scipy.signal.windows import hamming
-import argparse
 class SNREstimator:
-    """Estimate SNR from audio signal using log energy and energy thresholds
+    """Estimate SNR from audio signal using log energy and energy thresholds.
     Args:
         input_data (ndarray): Input audio signal
         sample_rate (int): Sampling rate of input audio signal
         window_size (int): Window size in samples
         hop_size (int): Hop size in samples
-        Returns:
+    Returns:
         object: SNREstimator object
         estimated_snr (float): Estimated SNR in dB, extracted from SNREstimator.estimate_snr()
@@ -34,7 +44,7 @@ class SNREstimator:
         num_frames = 1 + (len(signal) - self.frame_length) // self.hop_length
         frames = [
             signal[
-                i * self.hop_length : (i * self.hop_length) + self.frame_length
+                i * self.hop_length: (i * self.hop_length) + self.frame_length
             ]
             for i in range(num_frames)
         ]
@@ -54,7 +64,8 @@ class SNREstimator:
             for frame in frames
         ]
-        energy_threshold_low = np.percentile(log_energies, 25)  # First quartile
+        energy_threshold_low = np.percentile(
+            log_energies, 25)  # First quartile
         energy_threshold_high = np.percentile(
             log_energies, 75
         )  # Third quartile

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.81.4"
+VERSION="0.81.7"
 SAMPLING_RATE = 16000

nkululeko/data/dataset.py CHANGED Viewed

@@ -76,6 +76,7 @@ class Dataset:
         if rename_cols:
             col_dict = ast.literal_eval(rename_cols)
             df = df.rename(columns=col_dict)
+            self.util.debug(f"renamed data columns: {col_dict}")
         return df
     def _report_load(self):
@@ -281,13 +282,19 @@ class Dataset:
                 # try to get the age values
                 df_local["age"] = source_df["age"].astype(int)
                 got_age = True
-            except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
+            except (KeyError, ValueError, audformat.errors.BadKeyError):
                 pass
             try:
                 # also it might be possible that the sex is part of the speaker description
                 df_local["gender"] = db[table]["speaker"].get(map="gender")
                 got_gender = True
-            except (ValueError, audformat.errors.BadKeyError) as e:
+            except (ValueError, audformat.errors.BadKeyError):
+                pass
+            try:
+                # also it might be possible that the sex is part of the speaker description
+                df_local["gender"] = db[table]["speaker"].get(map="sex")
+                got_gender = True
+            except (ValueError, audformat.errors.BadKeyError):
                 pass
             try:
                 # also it might be possible that the age is part of the speaker description

nkululeko/demo.py CHANGED Viewed

@@ -2,20 +2,35 @@
 # Demonstration code to use the ML-experiment framework
 # Test the loading of a previously trained model and demo mode
 # needs the project config file to run before
+"""
+This script is used to test the loading of a previously trained model and run it in demo mode.
+It requires the project config file to be run before.
-import os
+Usage:
+python -m nkululeko.demo [--config CONFIG] [--file FILE] [--list LIST] [--folder FOLDER] [--outfile OUTFILE]
+Options:   \n
+--config CONFIG     The base configuration file (default: exp.ini) \n
+--file FILE         A file that should be processed (16kHz mono wav) \n
+--list LIST         A file with a list of files, one per line, that should be processed (16kHz mono wav) \n
+--folder FOLDER     A name of a folder where the files within the list are in   (default: ./) \n
+--outfile OUTFILE   A filename to store the results in CSV  (default: None)
+"""
 import argparse
 import configparser
+import os
+import nkululeko.glob_conf as glob_conf
+from nkululeko.constants import VERSION
 from nkululeko.experiment import Experiment
 from nkululeko.utils.util import Util
-from nkululeko.constants import VERSION
-import nkululeko.glob_conf as glob_conf
 def main(src_dir):
-    parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
-    parser.add_argument("--config", default="exp.ini", help="The base configuration")
+    parser = argparse.ArgumentParser(
+        description="Call the nkululeko DEMO framework.")
+    parser.add_argument("--config", default="exp.ini",
+                        help="The base configuration")
     parser.add_argument(
         "--file", help="A file that should be processed (16kHz mono wav)"
     )

nkululeko/demo_predictor.py CHANGED Viewed

@@ -1,8 +1,11 @@
+# demo_predictor.py
 import os
-import pandas as pd
-import numpy as np
-import audiofile
 import audformat
+import audiofile
+import numpy as np
+import pandas as pd
 import nkululeko.glob_conf as glob_conf
 from nkululeko.utils.util import Util

nkululeko/experiment.py CHANGED Viewed

@@ -695,7 +695,7 @@ class Experiment:
             pickle.dump(self.__dict__, f)
             f.close()
         except TypeError:
-            self.feature_extractor.featExtractor.model = None
+            self.feature_extractor.feat_extractor.model = None
             f = open(filename, "wb")
             pickle.dump(self.__dict__, f)
             f.close()

nkululeko/explore.py CHANGED Viewed

@@ -1,17 +1,20 @@
 # explore.py
 # explore the feature sets
-from nkululeko.experiment import Experiment
-import configparser
-from nkululeko.utils.util import Util
-from nkululeko.constants import VERSION
 import argparse
+import configparser
 import os
+from nkululeko.constants import VERSION
+from nkululeko.experiment import Experiment
+from nkululeko.utils.util import Util
 def main(src_dir):
-    parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
-    parser.add_argument("--config", default="exp.ini", help="The base configuration")
+    parser = argparse.ArgumentParser(
+        description="Call the nkululeko EXPLORE framework.")
+    parser.add_argument("--config", default="exp.ini",
+                        help="The base configuration")
     args = parser.parse_args()
     if args.config is not None:
         config_file = args.config
@@ -46,9 +49,11 @@ def main(src_dir):
     # split into train and test
     expr.fill_train_and_tests()
-    util.debug(f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
+    util.debug(
+        f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
-    plot_feats = eval(util.config_val("EXPL", "feature_distributions", "False"))
+    plot_feats = eval(util.config_val(
+        "EXPL", "feature_distributions", "False"))
     tsne = eval(util.config_val("EXPL", "tsne", "False"))
     scatter = eval(util.config_val("EXPL", "scatter", "False"))
     spotlight = eval(util.config_val("EXPL", "spotlight", "False"))

nkululeko/feat_extract/feats_agender.py CHANGED Viewed

@@ -9,16 +9,17 @@ import numpy as np
 import audinterface
-class AudModelAgenderSet(Featureset):
+class AgenderSet(Featureset):
     """
     Embeddings from the wav2vec2. based model finetuned on agender data, described in the paper
     "Speech-based Age and Gender Prediction with Transformers"
     https://arxiv.org/abs/2306.16962
     """
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type):
+        super().__init__(name, data_df, feats_type)
         self.model_loaded = False
+        self.feats_type = feats_type
     def _load_model(self):
         model_url = "https://zenodo.org/record/7761387/files/w2v2-L-robust-6-age-gender.25c844af-1.1.1.zip"
@@ -28,14 +29,12 @@ class AudModelAgenderSet(Featureset):
         if not os.path.isdir(model_root):
             cache_root = audeer.mkdir("cache")
             model_root = audeer.mkdir(model_root)
-            archive_path = audeer.download_url(model_url, cache_root, verbose=True)
+            archive_path = audeer.download_url(
+                model_url, cache_root, verbose=True)
             audeer.extract_archive(archive_path, model_root)
         device = self.util.config_val("MODEL", "device", "cpu")
         self.model = audonnx.load(model_root, device=device)
-        pytorch_total_params = sum(p.numel() for p in self.model.parameters())
-        self.util.debug(
-            f"initialized agender model with {pytorch_total_params} parameters in total"
-        )
+        self.util.debug(f"initialized agender model")
         self.model_loaded = True
     def extract(self):

nkululeko/feat_extract/{feats_audmodel_dim.py → feats_auddim.py} RENAMED Viewed

@@ -13,16 +13,18 @@ from nkululeko.feat_extract.featureset import Featureset
 import nkululeko.glob_conf as glob_conf
-class AudModelDimSet(Featureset):
-    """
-    Emotional dimensions from the wav2vec2. based model finetuned on MSPPodcast emotions, described in the paper
+class AuddimSet(Featureset):
+    """Emotional dimensions from the wav2vec2 model finetuned on MSPPodcast emotions.
+    Described in the paper
     "Dawn of the transformer era in speech emotion recognition: closing the valence gap"
-    https://arxiv.org/abs/2203.07378
+    https://arxiv.org/abs/2203.07378.
     """
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type):
+        super().__init__(name, data_df, feats_type)
         self.model_loaded = False
+        self.feats_types = feats_type
     def _load_model(self):
         model_url = "https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip"
@@ -30,7 +32,8 @@ class AudModelDimSet(Featureset):
         if not os.path.isdir(model_root):
             cache_root = audeer.mkdir("cache")
             model_root = audeer.mkdir(model_root)
-            archive_path = audeer.download_url(model_url, cache_root, verbose=True)
+            archive_path = audeer.download_url(
+                model_url, cache_root, verbose=True)
             audeer.extract_archive(archive_path, model_root)
         cuda = "cuda" if torch.cuda.is_available() else "cpu"
         device = self.util.config_val("MODEL", "device", cuda)

nkululeko/feat_extract/feats_audmodel.py CHANGED Viewed

@@ -11,16 +11,18 @@ import torch
 from nkululeko.feat_extract.featureset import Featureset
-class AudModelSet(Featureset):
-    """
-    Embeddings from the wav2vec2. based model finetuned on MSPPodcast emotions, described in the paper
+class AudmodelSet(Featureset):
+    """Embeddings from the wav2vec2 based model finetuned on MSPPodcast emotions.
+    Described in the paper:
     "Dawn of the transformer era in speech emotion recognition: closing the valence gap"
-    https://arxiv.org/abs/2203.07378
+    https://arxiv.org/abs/2203.07378.
     """
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type):
+        super().__init__(name, data_df, feats_type)
         self.model_loaded = False
+        self.feats_type = feats_type
     def _load_model(self):
         model_url = "https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip"
@@ -28,7 +30,8 @@ class AudModelSet(Featureset):
         if not os.path.isdir(model_root):
             cache_root = audeer.mkdir("cache")
             model_root = audeer.mkdir(model_root)
-            archive_path = audeer.download_url(model_url, cache_root, verbose=True)
+            archive_path = audeer.download_url(
+                model_url, cache_root, verbose=True)
             audeer.extract_archive(archive_path, model_root)
         cuda = "cuda" if torch.cuda.is_available() else "cpu"
         device = self.util.config_val("MODEL", "device", cuda)

nkululeko/feat_extract/feats_clap.py CHANGED Viewed

@@ -11,14 +11,15 @@ import laion_clap
 import audiofile
-class Clap(Featureset):
+class ClapSet(Featureset):
     """Class to extract laion's clap embeddings (https://github.com/LAION-AI/CLAP)"""
-    def __init__(self, name, data_df):
+    def __init__(self, name, data_df, feats_type):
         """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feats_type)
         self.device = self.util.config_val("MODEL", "device", "cpu")
         self.model_initialized = False
+        self.feat_type = feats_type
     def init_model(self):
         # load model
@@ -32,12 +33,14 @@ class Clap(Featureset):
         store = self.util.get_path("store")
         store_format = self.util.config_val("FEATS", "store_format", "pkl")
         storage = f"{store}{self.name}.{store_format}"
-        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
+        extract = self.util.config_val(
+            "FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
             if not self.model_initialized:
                 self.init_model()
-            self.util.debug("extracting clap embeddings, this might take a while...")
+            self.util.debug(
+                "extracting clap embeddings, this might take a while...")
             emb_series = pd.Series(index=self.data_df.index, dtype=object)
             length = len(self.data_df.index)
             for idx, (file, start, end) in enumerate(
@@ -51,7 +54,8 @@ class Clap(Featureset):
                 )
                 emb = self.get_embeddings(signal, sampling_rate)
                 emb_series[idx] = emb
-            self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
+            self.df = pd.DataFrame(
+                emb_series.values.tolist(), index=self.data_df.index)
             self.util.write_store(self.df, storage, store_format)
             try:
                 glob_conf.config["DATA"]["needs_feature_extraction"] = "false"

nkululeko/feat_extract/feats_hubert.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # feats_hubert.py
 # HuBERT feature extractor for Nkululeko
-# example feat_type = "hubert-large-ll60k", "hubert-xlarge-ll60k"
+# example feat_type = "hubert-large-ll60k", "hubert-xlarge-ll60k",
+# "hubert-base-ls960", hubert-large-ls960-ft", "hubert-xlarge-ls960-ft"
 import os
@@ -22,7 +23,7 @@ class Hubert(Featureset):
     def __init__(self, name, data_df, feat_type):
         """Constructor. is_train is needed to distinguish from test/dev sets,
         because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feat_type)
         # check if device is not set, use cuda if available
         cuda = "cuda" if torch.cuda.is_available() else "cpu"
         self.device = self.util.config_val("MODEL", "device", cuda)

nkululeko/feat_extract/feats_import.py CHANGED Viewed

@@ -8,11 +8,11 @@ from nkululeko.utils.util import Util
 from nkululeko.feat_extract.featureset import Featureset
-class Importset(Featureset):
+class ImportSet(Featureset):
     """Class to import features that have been compiled elsewhere"""
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type):
+        super().__init__(name, data_df, feats_type)
     def extract(self):
         """Import the features."""

nkululeko/feat_extract/feats_mos.py CHANGED Viewed

@@ -10,6 +10,7 @@ pip uninstall -y torch torchvision torchaudio
 pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
 """
 import os
 import pandas as pd
 from tqdm import tqdm
@@ -23,12 +24,12 @@ from nkululeko.utils.util import Util
 from nkululeko.feat_extract.featureset import Featureset
-class MOSSet(Featureset):
+class MosSet(Featureset):
     """Class to predict MOS (mean opinion score)"""
-    def __init__(self, name, data_df):
+    def __init__(self, name, data_df, feats_type):
         """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feats_type)
         self.device = self.util.config_val("MODEL", "device", "cpu")
         self.model_initialized = False

nkululeko/feat_extract/feats_opensmile.py CHANGED Viewed

@@ -8,31 +8,21 @@ import opensmile
 class Opensmileset(Featureset):
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type=None, config_file=None):
+        super().__init__(name, data_df, feats_type)
         self.featset = self.util.config_val("FEATS", "set", "eGeMAPSv02")
         try:
             self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
-            #'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
+            # 'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
         except AttributeError:
-            self.util.error(
-                f"something is wrong with feature set: {self.featset}"
-            )
+            self.util.error(f"something is wrong with feature set: {self.featset}")
         self.featlevel = self.util.config_val("FEATS", "level", "functionals")
         try:
-            self.featlevel = self.featlevel.replace(
-                "lld", "LowLevelDescriptors"
-            )
-            self.featlevel = self.featlevel.replace(
-                "functionals", "Functionals"
-            )
-            self.feature_level = eval(
-                f"opensmile.FeatureLevel.{self.featlevel}"
-            )
+            self.featlevel = self.featlevel.replace("lld", "LowLevelDescriptors")
+            self.featlevel = self.featlevel.replace("functionals", "Functionals")
+            self.feature_level = eval(f"opensmile.FeatureLevel.{self.featlevel}")
         except AttributeError:
-            self.util.error(
-                f"something is wrong with feature level: {self.featlevel}"
-            )
+            self.util.error(f"something is wrong with feature level: {self.featlevel}")
     def extract(self):
         """Extract the features based on the initialized dataset or re-open them when found on disk."""
@@ -44,9 +34,7 @@ class Opensmileset(Featureset):
         )
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or not os.path.isfile(storage) or no_reuse:
-            self.util.debug(
-                "extracting openSmile features, this might take a while..."
-            )
+            self.util.debug("extracting openSmile features, this might take a while...")
             smile = opensmile.Smile(
                 feature_set=self.feature_set,
                 feature_level=self.feature_level,
@@ -85,9 +73,7 @@ class Opensmileset(Featureset):
             selected_features = ast.literal_eval(
                 glob_conf.config["FEATS"]["os.features"]
             )
-            self.util.debug(
-                f"selecting features from opensmile: {selected_features}"
-            )
+            self.util.debug(f"selecting features from opensmile: {selected_features}")
             sel_feats_df = pd.DataFrame()
             hit = False
             for feat in selected_features:

nkululeko/feat_extract/feats_oxbow.py CHANGED Viewed

@@ -10,9 +10,10 @@ import opensmile
 class Openxbow(Featureset):
     """Class to extract openXBOW processed opensmile features (https://github.com/openXBOW)"""
-    def __init__(self, name, data_df, is_train=False):
+    def __init__(self, name, data_df, feats_type, is_train=False):
         """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feats_type)
+        self.feats_types = feats_type
         self.is_train = is_train
     def extract(self):
@@ -21,11 +22,13 @@ class Openxbow(Featureset):
         self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
         store = self.util.get_path("store")
         storage = f"{store}{self.name}_{self.featset}.pkl"
-        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
+        extract = self.util.config_val(
+            "FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
             # extract smile features first
-            self.util.debug("extracting openSmile features, this might take a while...")
+            self.util.debug(
+                "extracting openSmile features, this might take a while...")
             smile = opensmile.Smile(
                 feature_set=self.feature_set,
                 feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
@@ -48,7 +51,13 @@ class Openxbow(Featureset):
             # save the smile features
             smile_df.to_csv(lld_name, sep=";", header=False)
             # get the path of the xbow java jar file
-            xbow_path = self.util.config_val("FEATS", "xbow.model", "../openXBOW/")
+            xbow_path = self.util.config_val(
+                "FEATS", "xbow.model", "openXBOW")
+            # check if JAR file exist
+            if not os.path.isfile(f"{xbow_path}/openXBOW.jar"):
+                # download using wget if not exist and locate in xbow_path
+                os.system(
+                    f"git clone https://github.com/openXBOW/openXBOW")
             # get the size of the codebook
             size = self.util.config_val("FEATS", "size", 500)
             # get the number of assignements
@@ -57,16 +66,12 @@ class Openxbow(Featureset):
             if self.is_train:
                 # store the codebook
                 os.system(
-                    f"java -jar {xbow_path}openXBOW.jar -i"
-                    f" {lld_name} -standardizeInput -log                     -o"
-                    f" {xbow_name} -size {size} -a {assignments} -B"
-                    f" {codebook_name}"
+                    f"java -jar {xbow_path}/openXBOW.jar -i {lld_name} -standardizeInput -log -o {xbow_name} -size {size} -a {assignments} -B {codebook_name}"
                 )
             else:
                 # use the codebook
                 os.system(
-                    f"java -jar {xbow_path}openXBOW.jar -i {lld_name}          "
-                    f"           -o {xbow_name} -b {codebook_name}"
+                    f"java -jar {xbow_path}/openXBOW.jar -i {lld_name} -o {xbow_name} -b {codebook_name}"
                 )
             # read in the result from disk
             xbow_df = pd.read_csv(xbow_name, sep=";", header=None)

nkululeko/feat_extract/feats_praat.py CHANGED Viewed

@@ -1,33 +1,37 @@
 # feats_praat.py
-from nkululeko.feat_extract.featureset import Featureset
+import ast
 import os
-import pandas as pd
 import numpy as np
-import nkululeko.glob_conf as glob_conf
+import pandas as pd
 from nkululeko.feat_extract import feinberg_praat
-import ast
+from nkululeko.feat_extract.featureset import Featureset
+import nkululeko.glob_conf as glob_conf
-class Praatset(Featureset):
-    """
-    a feature extractor for the Praat software, based on
-    David R. Feinberg's Praat scripts for the parselmouth python interface.
+class PraatSet(Featureset):
+    """A feature extractor for the Praat software.
+    Based on David R. Feinberg's Praat scripts for the parselmouth python interface.
     https://osf.io/6dwr3/
     """
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type):
+        super().__init__(name, data_df, feats_type)
     def extract(self):
         """Extract the features based on the initialized dataset or re-open them when found on disk."""
         store = self.util.get_path("store")
         store_format = self.util.config_val("FEATS", "store_format", "pkl")
         storage = f"{store}{self.name}.{store_format}"
-        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
+        extract = self.util.config_val(
+            "FEATS", "needs_feature_extraction", False)
         no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
         if extract or no_reuse or not os.path.isfile(storage):
-            self.util.debug("extracting Praat features, this might take a while...")
+            self.util.debug(
+                "extracting Praat features, this might take a while...")
             self.df = feinberg_praat.compute_features(self.data_df.index)
             self.df = self.df.set_index(self.data_df.index)
             for i, col in enumerate(self.df.columns):
@@ -50,7 +54,8 @@ class Praatset(Featureset):
         self.df = self.df.astype(float)
     def extract_sample(self, signal, sr):
-        import audiofile, audformat
+        import audiofile
+        import audformat
         tmp_audio_names = ["praat_audio_tmp.wav"]
         audiofile.write(tmp_audio_names[0], signal, sr)

nkululeko 0.81.4__py3-none-any.whl → 0.81.7__py3-none-any.whl

nkululeko 0.81.4py3-none-any.whl → 0.81.7py3-none-any.whl