PyPI - nkululeko - Versions diffs - 0.79.5__py3-none-any.whl → 0.80.1__py3-none-any.whl - Mend

nkululeko 0.79.5py3-none-any.whl → 0.80.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

nkululeko/constants.py +1 -1
nkululeko/demo_feats.py +71 -0
nkululeko/experiment.py +1 -1
nkululeko/feat_extract/feats_import.py +29 -31
nkululeko/multidb.py +14 -9
{nkululeko-0.79.5.dist-info → nkululeko-0.80.1.dist-info}/METADATA +27 -11
{nkululeko-0.79.5.dist-info → nkululeko-0.80.1.dist-info}/RECORD +10 -9
{nkululeko-0.79.5.dist-info → nkululeko-0.80.1.dist-info}/LICENSE +0 -0
{nkululeko-0.79.5.dist-info → nkululeko-0.80.1.dist-info}/WHEEL +0 -0
{nkululeko-0.79.5.dist-info → nkululeko-0.80.1.dist-info}/top_level.txt +0 -0

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.79.5"
+VERSION="0.80.1"
 SAMPLING_RATE = 16000

nkululeko/demo_feats.py ADDED Viewed

@@ -0,0 +1,71 @@
+# demo_feats.py
+# Test some features extracted
+from nkululeko.experiment import Experiment
+import configparser
+from nkululeko.utils.util import Util
+from nkululeko.constants import VERSION
+import argparse
+import os
+def main(src_dir):
+    parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
+    parser.add_argument("--config", default="exp.ini", help="The base configuration")
+    parser.add_argument(
+        "--file", help="A file that should be processed (16kHz mono wav)"
+    )
+    parser.add_argument(
+        "--list",
+        help=(
+            "A file with a list of files, one per line, that should be"
+            " processed (16kHz mono wav)"
+        ),
+        nargs="?",
+        default=None,
+    )
+    parser.add_argument(
+        "--outfile",
+        help=("A filename to store the results in CSV"),
+        nargs="?",
+        default=None,
+    )
+    args = parser.parse_args()
+    if args.config is not None:
+        config_file = args.config
+    else:
+        config_file = f"{src_dir}/exp.ini"
+    # test if the configuration file exists
+    if not os.path.isfile(config_file):
+        print(f"ERROR: no such file: {config_file}")
+        exit()
+    # load one configuration per experiment
+    config = configparser.ConfigParser()
+    config.read(config_file)
+    # create a new experiment
+    expr = Experiment(config)
+    module = "demo_feats"
+    expr.set_module(module)
+    util = Util(module)
+    util.debug(
+        f"running {expr.name} from config {config_file}, nkululeko version"
+        f" {VERSION}"
+    )
+    if args.file is None and args.list is None:
+        expr.demo_feats(None, False, args.outfile)
+    else:
+        if args.list is None:
+            expr.demo_feats(args.file, False, args.outfile)
+        else:
+            expr.demo_feats(args.list, True, args.outfile)
+    print("DONE")
+if __name__ == "__main__":
+    cwd = os.path.dirname(os.path.abspath(__file__))
+    main(cwd)  # use this if you want to state the config file path on command line

nkululeko/experiment.py CHANGED Viewed

@@ -357,7 +357,7 @@ class Experiment:
             self.df_train = self.df_train[
                 self.df_train.index.isin(self.feats_train.index)
             ]
-            self.util.warn(f"mew train labels shape: {self.df_train.shape[0]}")
+            self.util.warn(f"new train labels shape: {self.df_train.shape[0]}")
         if self.feats_test.shape[0] < self.df_test.shape[0]:
             self.util.warn(
                 f"test feats ({self.feats_test.shape[0]}) != test labels"

nkululeko/feat_extract/feats_import.py CHANGED Viewed

@@ -1,10 +1,11 @@
 # feats_import.py
-from nkululeko.utils.util import Util
-from nkululeko.feat_extract.featureset import Featureset
 import os
-import pandas as pd
+import ast
 import audformat
+import pandas as pd
+from nkululeko.utils.util import Util
+from nkululeko.feat_extract.featureset import Featureset
 class Importset(Featureset):
@@ -14,32 +15,29 @@ class Importset(Featureset):
         super().__init__(name, data_df)
     def extract(self):
-        """Import the features or load them from disk if present."""
-        store = self.util.get_path("store")
-        storage = f"{store}{self.name}.pkl"
-        extract = eval(self.util.config_val("FEATS", "needs_feature_extraction", False))
-        no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
-        feat_import_file = self.util.config_val("FEATS", "import_file", False)
-        if not os.path.isfile(feat_import_file):
-            self.util.warn(f"no import file: {feat_import_file}")
-        if extract or no_reuse or not os.path.isfile(storage):
-            self.util.debug(f"importing features for {self.name}")
-            # df = pd.read_csv(feat_import_file, sep=',', header=0,
-            #     index_col=['file', 'start', 'end'])
+        """Import the features."""
+        self.util.debug(f"importing features for {self.name}")
+        try:
+            feat_import_files = self.util.config_val("FEATS", "import_file", False)
+            feat_import_files = ast.literal_eval(feat_import_files)
+        except ValueError as e:
+            self.util.error(
+                "feature type == import needs import_file = ['file1', 'filex']"
+            )
+        except SyntaxError as se:
+            if type(feat_import_files) == str:
+                feat_import_files = [feat_import_files]
+            else:
+                self.util.error(f"import_file is wrong: {feat_import_files}")
+        feat_df = pd.DataFrame()
+        for feat_import_file in feat_import_files:
+            if not os.path.isfile(feat_import_file):
+                self.util.error(f"no import file: {feat_import_file}")
             df = audformat.utils.read_csv(feat_import_file)
-            # scale features before use?
-            # from sklearn.preprocessing import StandardScaler
-            # scaler = StandardScaler()
-            # scaled_features = scaler.fit_transform(df.values)
-            # df = pd.DataFrame(scaled_features, index=df.index, columns=df.columns)
-            # use only the rows from the data index
-            # df = self.data_df.join(df).drop(columns=self.data_df.columns)
-            df = df.loc[self.data_df.index]
-            # df = pd.concat([self.data_df, df], axis=1, join="inner").drop(columns=self.data_df.columns)
-            # in any case, store to disk for later use
-            df.to_pickle(storage)
-            # and assign to be the "official" feature set
-            self.df = df
-        else:
-            self.util.debug("reusing imported features.")
-            self.df = pd.read_pickle(storage)
+            df = df[df.index.isin(self.data_df.index)]
+            feat_df = pd.concat([feat_df, df])
+        if feat_df.shape[0] == 0:
+            self.util.error(f"Imported features for data set {self.name} not found!")
+        # and assign to be the "official" feature set
+        self.df = feat_df

nkululeko/multidb.py CHANGED Viewed

@@ -9,7 +9,6 @@ import matplotlib.pyplot as plt
 import matplotlib.cm as cm
 import numpy as np
 import os
-import audeer
 from nkululeko.experiment import Experiment
 import configparser
 from nkululeko.utils.util import Util
@@ -41,9 +40,9 @@ def main(src_dir):
     last_epochs = np.zeros(dim * dim).reshape([dim, dim])
     # check if some data should be added to training
     try:
-        extra_train = config["CROSSDB"]["train_extra"]
+        extra_trains = config["CROSSDB"]["train_extra"]
     except KeyError:
-        extra_train = False
+        extra_trains = False
     for i in range(dim):
         for j in range(dim):
@@ -54,9 +53,12 @@ def main(src_dir):
             if i == j:
                 dataset = datasets[i]
                 print(f"running {dataset}")
-                if extra_train:
-                    config["DATA"]["databases"] = f"['{dataset}', '{extra_train}']"
-                    config["DATA"][f"{extra_train}.split_strategy"] = "train"
+                if extra_trains:
+                    extra_trains_1 = extra_trains.removeprefix("[").removesuffix("]")
+                    config["DATA"]["databases"] = f"['{dataset}', {extra_trains_1}]"
+                    extra_trains_2 = ast.literal_eval(extra_trains)
+                    for extra_train in extra_trains_2:
+                        config["DATA"][f"{extra_train}.split_strategy"] = "train"
                 else:
                     config["DATA"]["databases"] = f"['{dataset}']"
                 config["EXP"]["name"] = dataset
@@ -64,13 +66,16 @@ def main(src_dir):
                 train = datasets[i]
                 test = datasets[j]
                 print(f"running train: {train}, test: {test}")
-                if extra_train:
+                if extra_trains:
+                    extra_trains_1 = extra_trains.removeprefix("[").removesuffix("]")
                     config["DATA"][
                         "databases"
-                    ] = f"['{train}', '{test}', '{extra_train}']"
+                    ] = f"['{train}', '{test}', {extra_trains_1}]"
                     config["DATA"][f"{test}.split_strategy"] = "test"
                     config["DATA"][f"{train}.split_strategy"] = "train"
-                    config["DATA"][f"{extra_train}.split_strategy"] = "train"
+                    extra_trains_2 = ast.literal_eval(extra_trains)
+                    for extra_train in extra_trains_2:
+                        config["DATA"][f"{extra_train}.split_strategy"] = "train"
                 else:
                     config["DATA"]["databases"] = f"['{train}', '{test}']"
                     config["DATA"][f"{test}.split_strategy"] = "test"

{nkululeko-0.79.5.dist-info → nkululeko-0.80.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.79.5
+Version: 0.80.1
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -187,16 +187,22 @@ combine_per_speaker = mode
 Read the [Hello World example](#hello-world-example) for initial usage with Emo-DB dataset.
 Here is an overview of the interfaces/modules:
+All of them take *--config <my_config.ini>* as an argument.
 * **nkululeko.nkululeko**: do machine learning experiments combining features and learners
-* **nkululeko.multidb**: do multiple experiments, comparing several databases cross and in itself
-* **nkululeko.demo**: demo the current best model on the command line
-* **nkululeko.test**: predict a series of files with the current best model
-* **nkululeko.explore**: perform data exploration
-* **nkululeko.augment**: augment the current training data
-* **nkululeko.aug_train**: augment the current training data and do a training including this data
-* **nkululeko.predict**: predict features like SNR, MOS, arousal/valence, age/gender, with DNN models
-* **nkululeko.segment**: segment a database based on VAD (voice activity detection)
-* **nkululeko.resample**: check on all sampling rates and change to 16kHz
+* **nkululeko.multidb**: do [multiple experiments](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/), comparing several databases cross and in itself
+* **nkululeko.demo**: [demo the current best model](http://blog.syntheticspeech.de/2022/09/01/nkululeko-how-to-evaluate-a-test-set-with-a-given-best-model/) on the command line
+  * *--list* (optional) list of input files
+  * *--file* (optional) name of input file
+  * *--outfile* (optional) name of CSV file for output
+* **nkululeko.test**: predict a [given data set](http://blog.syntheticspeech.de/2022/09/01/nkululeko-how-to-evaluate-a-test-set-with-a-given-best-model/) with the current best model
+* **nkululeko.explore**: perform [data exploration](http://blog.syntheticspeech.de/2023/05/11/nkululeko-how-to-visualize-your-data-distribution/)
+* **nkululeko.augment**: [augment](http://blog.syntheticspeech.de/2023/03/13/nkululeko-how-to-augment-the-training-set/) the current training data
+* **nkululeko.aug_train**: augment the current training data [and do a training](http://blog.syntheticspeech.de/2023/03/13/nkululeko-how-to-augment-the-training-set/) including this data
+* **nkululeko.predict**: [predict features](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/) like SNR, MOS, arousal/valence, age/gender, with DNN models
+* **nkululeko.segment**: [segment a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/) based on VAD (voice activity detection)
+* **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz
 There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
 * [Introduction](http://blog.syntheticspeech.de/2021/08/04/machine-learning-experiment-framework/)
@@ -316,6 +322,16 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.80.1
+--------------
+* added support for string value in import_features
++ added support for multiple extra training databases when doing multi-db experiments
+Version 0.80.0
+--------------
+* fixed bug no feature import
+* add support for multiple import feature files
 Version 0.79.5
 --------------
 * fixed bug on demo without in- or output
@@ -323,7 +339,7 @@ Version 0.79.5
 Version 0.79.4
 --------------
-* added funcionality in demo for regression
+* added functionality in demo for regression
 Version 0.79.3
 --------------

{nkululeko-0.79.5.dist-info → nkululeko-0.80.1.dist-info}/RECORD RENAMED Viewed

@@ -2,10 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
 nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
 nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
-nkululeko/constants.py,sha256=C00eVCYmrYEMiZCYO88sFFchP1FC_YVNsA0z_GS9OSA,39
+nkululeko/constants.py,sha256=PjrmxRyljCQ53PzG5WIALMzgVNR8dJ4k70ZhEHPpQ98,39
 nkululeko/demo.py,sha256=l_rxmDCaVO3hchWSQYzh3dJ5U-6CHE0umbfxIsES5s0,2196
+nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
 nkululeko/demo_predictor.py,sha256=o8bXdH2r_7K-9N033iL9i7vYJXS2baPxi8rYqFlhodk,3751
-nkululeko/experiment.py,sha256=pP8rLLWW_shVmZ3tPLHoSX1Vh5WVBQkw_aZua77iRNc,29542
+nkululeko/experiment.py,sha256=972eUHFCF08J2HbtFtOzu7vRl2sU7x3fqjxTvY5htjQ,29542
 nkululeko/explore.py,sha256=5c89hGpjt5mRMN7w2Ajjnr2VjoFF0hOFs0O1BQruw80,2250
 nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
 nkululeko/feature_extractor.py,sha256=tKv1b1-o7xNMgBavTR8VY8_H5HKoJEnnosS-KcjmOEU,7281
@@ -13,7 +14,7 @@ nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,347
 nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
 nkululeko/glob_conf.py,sha256=iHiVSxDYgmYwdx6z0HuGUMSWrfZfufPHxHb60q2dLRY,453
 nkululeko/modelrunner.py,sha256=cU6FHbpI2mrG0BY7pn5UgFFpYh3u-v_GH7q73Kknhug,9337
-nkululeko/multidb.py,sha256=qJECRCCtIk97i8SY7E_vRY3nSmbBTDp_kylewUub044,5314
+nkululeko/multidb.py,sha256=4ceCu9LFrMGlrcgtz4pWuOQb2KA3jR5uo3FjZgAEBD4,5732
 nkululeko/nkululeko.py,sha256=Ty8cdusXUec9BHml8Gsp1r7DXuvIBMFXUckMpzILBnQ,1966
 nkululeko/plots.py,sha256=K88ZRPFGX_r03BT742H06Dde20xZYdltv7dxjgUiAFA,23025
 nkululeko/predict.py,sha256=dRXX-sQVESa7cNi_56S6UkUOa_pV1g_K4xYtYVM1SJs,1876
@@ -54,7 +55,7 @@ nkululeko/feat_extract/feats_audmodel.py,sha256=ifXo4ItOGiSD8QUA64Ha-lf_4-_0MQuJ
 nkululeko/feat_extract/feats_audmodel_dim.py,sha256=HZtQc7_4lIbReUz41Ks-EewcKOmkMc0V98pHTCCIMto,2849
 nkululeko/feat_extract/feats_clap.py,sha256=v82mbjdjGDSKUUBggttw7jW0oka22fWAmfUf-4VmaDU,3379
 nkululeko/feat_extract/feats_hubert.py,sha256=uL-9mgQHuGPQi1nuUaw6aNU9DscsO89uJAmBdmnCegM,5205
-nkululeko/feat_extract/feats_import.py,sha256=SqTuNdbInOO_oXucSlwCTfNz6OUCNyJfUrGX_cS9Mn0,2054
+nkululeko/feat_extract/feats_import.py,sha256=m7Yh1sj7C1yrDDbZAqS75oMMF5rAtO7XC_sdWdQN5Iw,1598
 nkululeko/feat_extract/feats_mld.py,sha256=Vvu7GZOkn7Vda8eIOXqHjg78zegkFe3vTUaCXyVM0eA,2021
 nkululeko/feat_extract/feats_mos.py,sha256=XZI7U99QcSuzd1v5pVAo0JwdcrXrRICUNt_K5G6eRPU,4149
 nkululeko/feat_extract/feats_opensmile.py,sha256=yDRGSiUQV3K3oLxVqq8Cxj5bkc-RiLzDYbAGKC9I5vc,4140
@@ -100,8 +101,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=29otJpUp1VqbtDKmlLkPPzBmVfTFiHZ70rUdR4860rM,2788
 nkululeko/utils/util.py,sha256=Hn27x0f2rjSR-iae2h9_70J4SdXKJTduLFIH13w3db0,12363
-nkululeko-0.79.5.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.79.5.dist-info/METADATA,sha256=sr4nNPwmT3-EJMnW1x-28aP3lq17n0rybKrqcpu7vA8,32490
-nkululeko-0.79.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-nkululeko-0.79.5.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.79.5.dist-info/RECORD,,
+nkululeko-0.80.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.80.1.dist-info/METADATA,sha256=_kjligQaChrCbn5le4rfDAJrRKOZcAGiPNwNjrbSqA0,33856
+nkululeko-0.80.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+nkululeko-0.80.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.80.1.dist-info/RECORD,,

{nkululeko-0.79.5.dist-info → nkululeko-0.80.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.79.5.dist-info → nkululeko-0.80.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{nkululeko-0.79.5.dist-info → nkululeko-0.80.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.79.5__py3-none-any.whl → 0.80.1__py3-none-any.whl

nkululeko 0.79.5py3-none-any.whl → 0.80.1py3-none-any.whl