PyPI - nkululeko - Versions diffs - 0.85.1__py3-none-any.whl → 0.85.2__py3-none-any.whl - Mend

nkululeko 0.85.1py3-none-any.whl → 0.85.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.85.1"
+VERSION="0.85.2"
 SAMPLING_RATE = 16000

nkululeko/data/dataset_csv.py CHANGED Viewed

@@ -21,7 +21,7 @@ class Dataset_CSV(Dataset):
         #     exp_root = self.util.config_val("EXP", "root", "")
         #     data_file = os.path.join(exp_root, data_file)
         root = os.path.dirname(data_file)
-        audio_path = self.util.config_val_data(self.name, "audio_path", "")
+        audio_path = self.util.config_val_data(self.name, "audio_path", "./")
         df = pd.read_csv(data_file)
         # special treatment for segmented dataframes with only one column:
         if "start" in df.columns and len(df.columns) == 4:
@@ -49,7 +49,8 @@ class Dataset_CSV(Dataset):
                     .map(lambda x: root + "/" + audio_path + "/" + x)
                     .values
                 )
-                df = df.set_index(df.index.set_levels(file_index, level="file"))
+                df = df.set_index(df.index.set_levels(
+                    file_index, level="file"))
             else:
                 if not isinstance(df, pd.DataFrame):
                     df = pd.DataFrame(df)
@@ -63,7 +64,8 @@ class Dataset_CSV(Dataset):
         self.db = None
         self.got_target = True
         self.is_labeled = self.got_target
-        self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
+        self.start_fresh = eval(
+            self.util.config_val("DATA", "no_reuse", "False"))
         is_index = False
         try:
             if self.is_labeled and not "class_label" in self.df.columns:
@@ -90,7 +92,8 @@ class Dataset_CSV(Dataset):
                 f" {self.got_gender}, got age: {self.got_age}"
             )
         self.util.debug(r_string)
-        glob_conf.report.add_item(ReportItem("Data", "Loaded report", r_string))
+        glob_conf.report.add_item(ReportItem(
+            "Data", "Loaded report", r_string))
     def prepare(self):
         super().prepare()

nkululeko/experiment.py CHANGED Viewed

@@ -109,14 +109,15 @@ class Experiment:
         # print keys/column
         dbs = ",".join(list(self.datasets.keys()))
         labels = self.util.config_val("DATA", "labels", False)
+        auto_labels = list(
+            next(iter(self.datasets.values())).df[self.target].unique()
+        )
         if labels:
             self.labels = ast.literal_eval(labels)
             self.util.debug(f"Target labels (from config): {labels}")
         else:
-            self.labels = list(
-                next(iter(self.datasets.values())).df[self.target].unique()
-            )
-            self.util.debug(f"Target labels (from database): {labels}")
+            self.labels = auto_labels
+        self.util.debug(f"Target labels (from database): {auto_labels}")
         glob_conf.set_labels(self.labels)
         self.util.debug(f"loaded databases {dbs}")
@@ -159,7 +160,8 @@ class Experiment:
                 data.split()
                 data.prepare_labels()
                 self.df_test = pd.concat(
-                    [self.df_test, self.util.make_segmented_index(data.df_test)]
+                    [self.df_test, self.util.make_segmented_index(
+                        data.df_test)]
                 )
                 self.df_test.is_labeled = data.is_labeled
             self.df_test.got_gender = self.got_gender
@@ -260,7 +262,8 @@ class Experiment:
                     test_cats = self.df_test[self.target].unique()
                 else:
                     # if there is no target, copy a dummy label
-                    self.df_test = self._add_random_target(self.df_test).astype("str")
+                    self.df_test = self._add_random_target(
+                        self.df_test).astype("str")
                 train_cats = self.df_train[self.target].unique()
                 # print(f"df_train: {pd.DataFrame(self.df_train[self.target])}")
                 # print(f"train_cats with target {self.target}: {train_cats}")
@@ -268,7 +271,8 @@ class Experiment:
                 if type(test_cats) == np.ndarray:
                     self.util.debug(f"Categories test (nd.array): {test_cats}")
                 else:
-                    self.util.debug(f"Categories test (list): {list(test_cats)}")
+                    self.util.debug(
+                        f"Categories test (list): {list(test_cats)}")
             if type(train_cats) == np.ndarray:
                 self.util.debug(f"Categories train (nd.array): {train_cats}")
             else:
@@ -291,7 +295,8 @@ class Experiment:
         target_factor = self.util.config_val("DATA", "target_divide_by", False)
         if target_factor:
-            self.df_test[self.target] = self.df_test[self.target] / float(target_factor)
+            self.df_test[self.target] = self.df_test[self.target] / \
+                float(target_factor)
             self.df_train[self.target] = self.df_train[self.target] / float(
                 target_factor
             )
@@ -314,14 +319,16 @@ class Experiment:
     def plot_distribution(self, df_labels):
         """Plot the distribution of samples and speaker per target class and biological sex"""
         plot = Plots()
-        sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
+        sample_selection = self.util.config_val(
+            "EXPL", "sample_selection", "all")
         plot.plot_distributions(df_labels)
         if self.got_speaker:
             plot.plot_distributions_speaker(df_labels)
     def extract_test_feats(self):
         self.feats_test = pd.DataFrame()
-        feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["tests"]))
+        feats_name = "_".join(ast.literal_eval(
+            glob_conf.config["DATA"]["tests"]))
         feats_types = self.util.config_val_list("FEATS", "type", ["os"])
         self.feature_extractor = FeatureExtractor(
             self.df_test, feats_types, feats_name, "test"
@@ -338,7 +345,8 @@ class Experiment:
         """
         df_train, df_test = self.df_train, self.df_test
-        feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
+        feats_name = "_".join(ast.literal_eval(
+            glob_conf.config["DATA"]["databases"]))
         self.feats_test, self.feats_train = pd.DataFrame(), pd.DataFrame()
         feats_types = self.util.config_val_list("FEATS", "type", [])
         # for some models no features are needed
@@ -372,7 +380,8 @@ class Experiment:
                 f"test feats ({self.feats_test.shape[0]}) != test labels"
                 f" ({self.df_test.shape[0]})"
             )
-            self.df_test = self.df_test[self.df_test.index.isin(self.feats_test.index)]
+            self.df_test = self.df_test[self.df_test.index.isin(
+                self.feats_test.index)]
             self.util.warn(f"mew test labels shape: {self.df_test.shape[0]}")
         self._check_scale()
@@ -383,7 +392,8 @@ class Experiment:
         """
         from nkululeko.augmenting.augmenter import Augmenter
-        sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
+        sample_selection = self.util.config_val(
+            "AUGMENT", "sample_selection", "all")
         if sample_selection == "all":
             df = pd.concat([self.df_train, self.df_test])
         elif sample_selection == "train":
@@ -478,7 +488,8 @@ class Experiment:
         """
         from nkululeko.augmenting.randomsplicer import Randomsplicer
-        sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
+        sample_selection = self.util.config_val(
+            "AUGMENT", "sample_selection", "all")
         if sample_selection == "all":
             df = pd.concat([self.df_train, self.df_test])
         elif sample_selection == "train":
@@ -499,7 +510,8 @@ class Experiment:
         plot_feats = eval(
             self.util.config_val("EXPL", "feature_distributions", "False")
         )
-        sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
+        sample_selection = self.util.config_val(
+            "EXPL", "sample_selection", "all")
         # get the data labels
         if sample_selection == "all":
             df_labels = pd.concat([self.df_train, self.df_test])
@@ -562,7 +574,8 @@ class Experiment:
             for scat_target in scat_targets:
                 if self.util.is_categorical(df_labels[scat_target]):
                     for scatter in scatters:
-                        plots.scatter_plot(df_feats, df_labels, scat_target, scatter)
+                        plots.scatter_plot(
+                            df_feats, df_labels, scat_target, scatter)
                 else:
                     self.util.debug(
                         f"{self.name}: binning continuous variable to categories"
@@ -651,7 +664,8 @@ class Experiment:
         preds = best.preds
         speakers = self.df_test.speaker.values
         print(f"{len(truths)} {len(preds)} {len(speakers) }")
-        df = pd.DataFrame(data={"truth": truths, "pred": preds, "speaker": speakers})
+        df = pd.DataFrame(
+            data={"truth": truths, "pred": preds, "speaker": speakers})
         plot_name = "result_combined_per_speaker"
         self.util.debug(
             f"plotting speaker combination ({function}) confusion matrix to"

{nkululeko-0.85.1.dist-info → nkululeko-0.85.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.85.1
+Version: 0.85.2
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -333,6 +333,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.85.2
+--------------
+* added data, and automatic task label detection
 Version 0.85.1
 --------------
 * fixed bug in model_finetuned that label_num was constant 2

{nkululeko-0.85.1.dist-info → nkululeko-0.85.2.dist-info}/RECORD RENAMED Viewed

@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
 nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
 nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
-nkululeko/constants.py,sha256=WnTSXQjJmWE-IrXcNSEa5FFV_83-z0EOGXa9trq00uE,39
+nkululeko/constants.py,sha256=l15EMSj8vmejkCKCzQ6jMrgj5PuNrcHIREXt9kbSw7U,39
 nkululeko/demo.py,sha256=8bl15Kitoesnz8oa8yrs52T6YCSOhWbbq9PnZ8Hj6D0,3232
 nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
 nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
-nkululeko/experiment.py,sha256=9Nw23b7sVOciH8IaOuAAKbY7otXYSsPrj_rQCA_U9cc,30465
+nkululeko/experiment.py,sha256=ZsSWdasWUyIBF_4vxb4FxvHs42pytG7ErUOABA-WWTo,30722
 nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
 nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
 nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
@@ -46,7 +46,7 @@ nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzW
 nkululeko/autopredict/estimate_snr.py,sha256=S-bpS0xFkwWc4Ch75UrjbS8y538lQ0U3g_iLRFXureY,5048
 nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,27650
-nkululeko/data/dataset_csv.py,sha256=uLa7jW4w2ft299NkpXZMD361kPHF8oSYoIZ_ucxhuOM,3884
+nkululeko/data/dataset_csv.py,sha256=vTnjIc2UdSJT7foL-ltE9MWrZTCg0nplwKdEtMPxt2o,3933
 nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
 nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
@@ -106,8 +106,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
 nkululeko/utils/util.py,sha256=b1IHFucRNuF9Iyv5IJeK4AEg0Rga0xKG80UM5GWWdHA,13816
-nkululeko-0.85.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.85.1.dist-info/METADATA,sha256=RonY9PdKyHjwYsZ3T9TgEs1JNnY1qbMdDr-Sp6kcCW8,36591
-nkululeko-0.85.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-nkululeko-0.85.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.85.1.dist-info/RECORD,,
+nkululeko-0.85.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.85.2.dist-info/METADATA,sha256=RVGREhA1jakUtQ707C0ecklnUZwx4skVHV0UbPwEsn0,36671
+nkululeko-0.85.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+nkululeko-0.85.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.85.2.dist-info/RECORD,,

{nkululeko-0.85.1.dist-info → nkululeko-0.85.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.85.1.dist-info → nkululeko-0.85.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{nkululeko-0.85.1.dist-info → nkululeko-0.85.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.85.1__py3-none-any.whl → 0.85.2__py3-none-any.whl

nkululeko 0.85.1py3-none-any.whl → 0.85.2py3-none-any.whl