PyPI - nkululeko - Versions diffs - 0.86.7__py3-none-any.whl → 0.86.8__py3-none-any.whl - Mend

nkululeko 0.86.7py3-none-any.whl → 0.86.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.86.7"
+VERSION="0.86.8"
 SAMPLING_RATE = 16000

nkululeko/experiment.py CHANGED Viewed

@@ -5,13 +5,13 @@ import pickle
 import random
 import time
+import audeer
+import audformat
 import numpy as np
 import pandas as pd
 from sklearn.preprocessing import LabelEncoder
-import audeer
-import audformat
+import nkululeko.glob_conf as glob_conf
 from nkululeko.data.dataset import Dataset
 from nkululeko.data.dataset_csv import Dataset_CSV
 from nkululeko.demo_predictor import Demo_predictor
@@ -19,8 +19,6 @@ from nkululeko.feat_extract.feats_analyser import FeatureAnalyser
 from nkululeko.feature_extractor import FeatureExtractor
 from nkululeko.file_checker import FileChecker
 from nkululeko.filter_data import DataFilter
-from nkululeko.filter_data import filter_min_dur
-import nkululeko.glob_conf as glob_conf
 from nkululeko.plots import Plots
 from nkululeko.reporting.report import Report
 from nkululeko.runmanager import Runmanager
@@ -109,7 +107,8 @@ class Experiment:
         # print keys/column
         dbs = ",".join(list(self.datasets.keys()))
         labels = self.util.config_val("DATA", "labels", False)
-        auto_labels = list(next(iter(self.datasets.values())).df[self.target].unique())
+        auto_labels = list(
+            next(iter(self.datasets.values())).df[self.target].unique())
         if labels:
             self.labels = ast.literal_eval(labels)
             self.util.debug(f"Using target labels (from config): {labels}")
@@ -159,7 +158,8 @@ class Experiment:
                 data.split()
                 data.prepare_labels()
                 self.df_test = pd.concat(
-                    [self.df_test, self.util.make_segmented_index(data.df_test)]
+                    [self.df_test, self.util.make_segmented_index(
+                        data.df_test)]
                 )
                 self.df_test.is_labeled = data.is_labeled
             self.df_test.got_gender = self.got_gender
@@ -260,7 +260,8 @@ class Experiment:
                     test_cats = self.df_test[self.target].unique()
                 else:
                     # if there is no target, copy a dummy label
-                    self.df_test = self._add_random_target(self.df_test).astype("str")
+                    self.df_test = self._add_random_target(
+                        self.df_test).astype("str")
                 train_cats = self.df_train[self.target].unique()
                 # print(f"df_train: {pd.DataFrame(self.df_train[self.target])}")
                 # print(f"train_cats with target {self.target}: {train_cats}")
@@ -268,7 +269,8 @@ class Experiment:
                 if type(test_cats) == np.ndarray:
                     self.util.debug(f"Categories test (nd.array): {test_cats}")
                 else:
-                    self.util.debug(f"Categories test (list): {list(test_cats)}")
+                    self.util.debug(
+                        f"Categories test (list): {list(test_cats)}")
             if type(train_cats) == np.ndarray:
                 self.util.debug(f"Categories train (nd.array): {train_cats}")
             else:
@@ -291,7 +293,8 @@ class Experiment:
         target_factor = self.util.config_val("DATA", "target_divide_by", False)
         if target_factor:
-            self.df_test[self.target] = self.df_test[self.target] / float(target_factor)
+            self.df_test[self.target] = self.df_test[self.target] / \
+                float(target_factor)
             self.df_train[self.target] = self.df_train[self.target] / float(
                 target_factor
             )
@@ -314,14 +317,16 @@ class Experiment:
     def plot_distribution(self, df_labels):
         """Plot the distribution of samples and speaker per target class and biological sex"""
         plot = Plots()
-        sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
+        sample_selection = self.util.config_val(
+            "EXPL", "sample_selection", "all")
         plot.plot_distributions(df_labels)
         if self.got_speaker:
             plot.plot_distributions_speaker(df_labels)
     def extract_test_feats(self):
         self.feats_test = pd.DataFrame()
-        feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["tests"]))
+        feats_name = "_".join(ast.literal_eval(
+            glob_conf.config["DATA"]["tests"]))
         feats_types = self.util.config_val_list("FEATS", "type", ["os"])
         self.feature_extractor = FeatureExtractor(
             self.df_test, feats_types, feats_name, "test"
@@ -338,9 +343,17 @@ class Experiment:
         """
         df_train, df_test = self.df_train, self.df_test
-        feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
+        feats_name = "_".join(ast.literal_eval(
+            glob_conf.config["DATA"]["databases"]))
         self.feats_test, self.feats_train = pd.DataFrame(), pd.DataFrame()
-        feats_types = self.util.config_val_list("FEATS", "type", [])
+        feats_types = self.util.config_val("FEATS", "type", "os")
+        # Ensure feats_types is always a list of strings
+        if isinstance(feats_types, str):
+            if feats_types.startswith("[") and feats_types.endswith("]"):
+                feats_types = ast.literal_eval(feats_types)
+            else:
+                feats_types = [feats_types]
+        # print(f"feats_types: {feats_types}")
         # for some models no features are needed
         if len(feats_types) == 0:
             self.util.debug("no feature extractor specified.")
@@ -372,7 +385,8 @@ class Experiment:
                 f"test feats ({self.feats_test.shape[0]}) != test labels"
                 f" ({self.df_test.shape[0]})"
             )
-            self.df_test = self.df_test[self.df_test.index.isin(self.feats_test.index)]
+            self.df_test = self.df_test[self.df_test.index.isin(
+                self.feats_test.index)]
             self.util.warn(f"new test labels shape: {self.df_test.shape[0]}")
         self._check_scale()
@@ -387,7 +401,8 @@ class Experiment:
         """Augment the selected samples."""
         from nkululeko.augmenting.augmenter import Augmenter
-        sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
+        sample_selection = self.util.config_val(
+            "AUGMENT", "sample_selection", "all")
         if sample_selection == "all":
             df = pd.concat([self.df_train, self.df_test])
         elif sample_selection == "train":
@@ -482,7 +497,8 @@ class Experiment:
         """
         from nkululeko.augmenting.randomsplicer import Randomsplicer
-        sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
+        sample_selection = self.util.config_val(
+            "AUGMENT", "sample_selection", "all")
         if sample_selection == "all":
             df = pd.concat([self.df_train, self.df_test])
         elif sample_selection == "train":
@@ -503,7 +519,8 @@ class Experiment:
         plot_feats = eval(
             self.util.config_val("EXPL", "feature_distributions", "False")
         )
-        sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
+        sample_selection = self.util.config_val(
+            "EXPL", "sample_selection", "all")
         # get the data labels
         if sample_selection == "all":
             df_labels = pd.concat([self.df_train, self.df_test])
@@ -566,7 +583,8 @@ class Experiment:
             for scat_target in scat_targets:
                 if self.util.is_categorical(df_labels[scat_target]):
                     for scatter in scatters:
-                        plots.scatter_plot(df_feats, df_labels, scat_target, scatter)
+                        plots.scatter_plot(
+                            df_feats, df_labels, scat_target, scatter)
                 else:
                     self.util.debug(
                         f"{self.name}: binning continuous variable to categories"
@@ -657,7 +675,8 @@ class Experiment:
         preds = best.preds
         speakers = self.df_test.speaker.values
         print(f"{len(truths)} {len(preds)} {len(speakers) }")
-        df = pd.DataFrame(data={"truth": truths, "pred": preds, "speaker": speakers})
+        df = pd.DataFrame(
+            data={"truth": truths, "pred": preds, "speaker": speakers})
         plot_name = "result_combined_per_speaker"
         self.util.debug(
             f"plotting speaker combination ({function}) confusion matrix to"
@@ -733,7 +752,6 @@ class Experiment:
         if model.is_ann():
             print("converting to onnx from torch")
         else:
-            from skl2onnx import to_onnx
             print("converting to onnx from sklearn")
         # save the rest

nkululeko/feature_extractor.py CHANGED Viewed

@@ -39,16 +39,20 @@ class FeatureExtractor:
         self.feats = pd.DataFrame()
         for feats_type in self.feats_types:
             store_name = f"{self.data_name}_{feats_type}"
-            self.feat_extractor = self._get_feat_extractor(store_name, feats_type)
+            self.feat_extractor = self._get_feat_extractor(
+                store_name, feats_type)
             self.feat_extractor.extract()
             self.feat_extractor.filter()
-            self.feats = pd.concat([self.feats, self.feat_extractor.df], axis=1)
+            self.feats = pd.concat(
+                [self.feats, self.feat_extractor.df], axis=1)
         return self.feats
     def extract_sample(self, signal, sr):
         return self.feat_extractor.extract_sample(signal, sr)
     def _get_feat_extractor(self, store_name, feats_type):
+        if isinstance(feats_type, list) and len(feats_type) == 1:
+            feats_type = feats_type[0]
         feat_extractor_class = self._get_feat_extractor_class(feats_type)
         if feat_extractor_class is None:
             self.util.error(f"unknown feats_type: {feats_type}")
@@ -103,13 +107,15 @@ class FeatureExtractor:
         prefix, _, ext = feats_type.partition("-")
         from importlib import import_module
-        module = import_module(f"nkululeko.feat_extract.feats_{prefix.lower()}")
+        module = import_module(
+            f"nkululeko.feat_extract.feats_{prefix.lower()}")
         class_name = f"{prefix.capitalize()}"
         return getattr(module, class_name)
     def _get_feat_extractor_by_name(self, feats_type):
         from importlib import import_module
-        module = import_module(f"nkululeko.feat_extract.feats_{feats_type.lower()}")
+        module = import_module(
+            f"nkululeko.feat_extract.feats_{feats_type.lower()}")
         class_name = f"{feats_type.capitalize()}Set"
         return getattr(module, class_name)

nkululeko/utils/util.py CHANGED Viewed

@@ -5,15 +5,15 @@ import os.path
 import pickle
 import sys
-import numpy as np
-import pandas as pd
 import audeer
 import audformat
+import numpy as np
+import pandas as pd
 class Util:
-    # a list of words that need not to be warned upon if default values are used
+    # a list of words that need not to be warned upon if default values are
+    # used
     stopvals = [
         "all",
         False,
@@ -40,7 +40,8 @@ class Util:
                 self.got_data_roots = self.config_val(
                     "DATA", "root_folders", False)
                 if self.got_data_roots:
-                    # if there is a global data rootfolder file, read from there
+                    # if there is a global data rootfolder file, read from
+                    # there
                     if not os.path.isfile(self.got_data_roots):
                         self.error(f"no such file: {self.got_data_roots}")
                     self.data_roots = configparser.ConfigParser()
@@ -107,16 +108,17 @@ class Util:
             if self.got_data_roots:
                 try:
                     if len(key) > 0:
-                        return self.data_roots["DATA"][dataset + "." + key].strip("'\"")
+                        return self.data_roots["DATA"][dataset +
+                                                       "." + key].strip("'\"")
                     else:
                         return self.data_roots["DATA"][dataset].strip("'\"")
                 except KeyError:
-                    if not default in self.stopvals:
+                    if default not in self.stopvals:
                         self.debug(
-                            f"value for {key} not found, using default:" f" {default}"
-                        )
+                            f"value for {key} not found, using default:"
+                            f" {default}")
                     return default
-            if not default in self.stopvals:
+            if default not in self.stopvals:
                 self.debug(
                     f"value for {key} not found, using default: {default}")
             return default
@@ -182,7 +184,7 @@ class Util:
     def get_feattype_name(self):
         """
-        Get a string as name from all feature sets that are useed
+        Get a string as name from all feature sets that are used
         """
         return "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
@@ -205,7 +207,12 @@ class Util:
     def get_model_description(self):
         mt = ""
         mt = f'{self.config["MODEL"]["type"]}'
-        ft = "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
+        # ft = "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
+        ft_value = self.config["FEATS"]["type"]
+        if isinstance(ft_value, str) and ft_value.startswith("[") and ft_value.endswith("]"):
+            ft = "_".join(ast.literal_eval(ft_value))
+        else:
+            ft = ft_value
         ft += "_"
         layer_string = ""
         layer_s = self.config_val("MODEL", "layers", False)
@@ -230,9 +237,8 @@ class Util:
             ["FEATS", "wav2vec2.layer"],
         ]
         for option in options:
-            return_string += self._get_value_descript(option[0], option[1]).replace(
-                ".", "-"
-            )
+            return_string += self._get_value_descript(
+                option[0], option[1]).replace(".", "-")
         return return_string
     def get_plot_name(self):
@@ -286,7 +292,7 @@ class Util:
         try:
             return ast.literal_eval(self.config[section][key])
         except KeyError:
-            if not default in self.stopvals:
+            if default not in self.stopvals:
                 self.debug(
                     f"value for {key} not found, using default: {default}")
             return default

{nkululeko-0.86.7.dist-info → nkululeko-0.86.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.86.7
+Version: 0.86.8
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -343,6 +343,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.86.8
+--------------
+* handle single feature sets as strings in the config
 Version 0.86.7
 --------------
 * handles now audformat tables where the target is in a file index

{nkululeko-0.86.7.dist-info → nkululeko-0.86.8.dist-info}/RECORD RENAMED Viewed

@@ -2,14 +2,14 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
 nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
 nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
-nkululeko/constants.py,sha256=CscqJhC7nceHk2wmZd2bBFSeFExtr0HkXt99qpAZU4E,39
+nkululeko/constants.py,sha256=FOK-XF_DHGNFHsO_OMLof3jwgrn2buWnPVfrHy5QBm8,39
 nkululeko/demo.py,sha256=WSKr-W5uJ9DQfemK923g7Hd5V3kgAn03Er0JX1Pa45I,5142
 nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
 nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
-nkululeko/experiment.py,sha256=5nF-eDf8OCp6KRIU7KnryWL5SLJQUtr2BueHhEdcKw0,31040
+nkululeko/experiment.py,sha256=s9PIjm45dR9yzmHu_69JpBjX9qMVzi5wIgPfMR3F44A,31530
 nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
 nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
-nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
+nkululeko/feature_extractor.py,sha256=rL-TybLmjZz5uxT9LNTORaDat9FKp_1qloxbyMrinyE,4141
 nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,3474
 nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
 nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
@@ -104,9 +104,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
 nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
-nkululeko/utils/util.py,sha256=ILpfNuaeq-hy1bUkRhVrzO2wG9z9Upaozs9EBoIaMG0,14123
-nkululeko-0.86.7.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.86.7.dist-info/METADATA,sha256=t5cI43YRp3qmyJj03ACfgCbKoAuLYImDCLS1QkYbMQM,38024
-nkululeko-0.86.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-nkululeko-0.86.7.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.86.7.dist-info/RECORD,,
+nkululeko/utils/util.py,sha256=ZCS02mE2c3_h9_q4hpsSm4XAooCranqRF_5pY-6055E,14432
+nkululeko-0.86.8.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.86.8.dist-info/METADATA,sha256=5TQSWqzrN9E7XJGcVn5oPKGl6qy-RliYGEG2Ycl46qk,38109
+nkululeko-0.86.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+nkululeko-0.86.8.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.86.8.dist-info/RECORD,,

{nkululeko-0.86.7.dist-info → nkululeko-0.86.8.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.86.7.dist-info → nkululeko-0.86.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{nkululeko-0.86.7.dist-info → nkululeko-0.86.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.86.7__py3-none-any.whl → 0.86.8__py3-none-any.whl

nkululeko 0.86.7py3-none-any.whl → 0.86.8py3-none-any.whl