PyPI - nkululeko - Versions diffs - 0.88.4__py3-none-any.whl → 0.88.6__py3-none-any.whl - Mend

nkululeko 0.88.4py3-none-any.whl → 0.88.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

nkululeko/constants.py +1 -1
nkululeko/data/dataset.py +16 -0
nkululeko/feat_extract/feats_agender.py +5 -3
nkululeko/feat_extract/feats_spkrec.py +1 -1
nkululeko/reporting/reporter.py +30 -20
nkululeko/utils/util.py +8 -2
{nkululeko-0.88.4.dist-info → nkululeko-0.88.6.dist-info}/METADATA +14 -3
{nkululeko-0.88.4.dist-info → nkululeko-0.88.6.dist-info}/RECORD +11 -11
{nkululeko-0.88.4.dist-info → nkululeko-0.88.6.dist-info}/WHEEL +1 -1
{nkululeko-0.88.4.dist-info → nkululeko-0.88.6.dist-info}/LICENSE +0 -0
{nkululeko-0.88.4.dist-info → nkululeko-0.88.6.dist-info}/top_level.txt +0 -0

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.88.4"
+VERSION="0.88.6"
 SAMPLING_RATE = 16000

nkululeko/data/dataset.py CHANGED Viewed

@@ -423,6 +423,9 @@ class Dataset:
             self.util.debug(f"{self.name}: trying to reuse data splits")
             self.df_test = pd.read_pickle(storage_test)
             self.df_train = pd.read_pickle(storage_train)
+        elif isinstance(ast.literal_eval(split_strategy), list):
+            # treat this as a list of test speakers
+            self.assign_speakers(ast.literal_eval(split_strategy))
         else:
             self.util.error(f"unknown split strategy: {split_strategy}")
@@ -515,6 +518,19 @@ class Dataset:
         # because this generates new train/test sample quantaties, the feature extraction has to be done again
         glob_conf.config["FEATS"]["needs_feature_extraction"] = "True"
+    def assign_speakers(self, speakers):
+        """One way to split train and eval sets: Specify test speaker names."""
+        self.df_test = self.df[self.df.speaker.isin(speakers)]
+        if len(self.df_test) == 0:
+            self.util.error(f"no speakers found in {speakers}")
+        self.df_train = self.df[~self.df.index.isin(self.df_test.index)]
+        self.util.debug(
+            f"{self.name} (speakers assigned): [{self.df_train.shape[0]}/{self.df_test.shape[0]}]"
+            " samples in train/test"
+        )
+        # because this generates new train/test sample quantaties, the feature extraction has to be done again
+        glob_conf.config["FEATS"]["needs_feature_extraction"] = "True"
     def split_speakers(self):
         """One way to split train and eval sets: Specify percentage of evaluation speakers"""
         test_percent = int(self.util.config_val_data(self.name, "test_size", 20))

nkululeko/feat_extract/feats_agender.py CHANGED Viewed

@@ -1,13 +1,14 @@
 # feats_agender.py
 from nkululeko.feat_extract.featureset import Featureset
 import os
-import pandas as pd
+# import pandas as pd
 import audeer
 import nkululeko.glob_conf as glob_conf
 import audonnx
 import numpy as np
 import audinterface
+import torch
 class AgenderSet(Featureset):
     """
@@ -32,7 +33,8 @@ class AgenderSet(Featureset):
             archive_path = audeer.download_url(
                 model_url, cache_root, verbose=True)
             audeer.extract_archive(archive_path, model_root)
-        device = self.util.config_val("MODEL", "device", "cpu")
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        device = self.util.config_val("MODEL", "device", cuda)
         self.model = audonnx.load(model_root, device=device)
         self.util.debug(f"initialized agender model")
         self.model_loaded = True

nkululeko/feat_extract/feats_spkrec.py CHANGED Viewed

@@ -24,7 +24,7 @@ class Spkrec(Featureset):
     def __init__(self, name, data_df, feat_type):
         """Constructor. is_train is needed to distinguish from test/dev sets,
         because they use the codebook from the training"""
-        super().__init__(name, data_df)
+        super().__init__(name, data_df, feat_type)
         # check if device is not set, use cuda if available
         cuda = "cuda" if torch.cuda.is_available() else "cpu"
         self.device = self.util.config_val("MODEL", "device", cuda)

nkululeko/reporting/reporter.py CHANGED Viewed

@@ -2,28 +2,32 @@ import ast
 import glob
 import json
 import math
-import os
-from confidence_intervals import evaluate_with_conf_int
 import matplotlib.pyplot as plt
 import numpy as np
+# from torch import is_tensor
+from audmetric import (
+    accuracy,
+    concordance_cc,
+    mean_absolute_error,
+    mean_squared_error,
+    unweighted_average_recall,
+)
+# import os
+from confidence_intervals import evaluate_with_conf_int
 from scipy.special import softmax
-from scipy.stats import entropy
-from scipy.stats import pearsonr
-from sklearn.metrics import ConfusionMatrixDisplay
-from sklearn.metrics import auc
-from sklearn.metrics import classification_report
-from sklearn.metrics import confusion_matrix
-from sklearn.metrics import r2_score
-from sklearn.metrics import roc_auc_score
-from sklearn.metrics import roc_curve
-from torch import is_tensor
-from audmetric import accuracy
-from audmetric import concordance_cc
-from audmetric import mean_absolute_error
-from audmetric import mean_squared_error
-from audmetric import unweighted_average_recall
+from scipy.stats import entropy, pearsonr
+from sklearn.metrics import (
+    ConfusionMatrixDisplay,
+    auc,
+    classification_report,
+    confusion_matrix,
+    r2_score,
+    roc_auc_score,
+    roc_curve,
+)
 import nkululeko.glob_conf as glob_conf
 from nkululeko.plots import Plots
@@ -167,7 +171,7 @@ class Reporter:
             probas["uncertainty"] = uncertainty
             probas["correct"] = probas.predicted == probas.truth
             sp = self.util.get_pred_name()
             self.probas = probas
             probas.to_csv(sp)
             self.util.debug(f"Saved probabilities to {sp}")
@@ -175,7 +179,13 @@ class Reporter:
             ax, caption = plots.plotcatcont(
                 probas, "correct", "uncertainty", "uncertainty", "correct"
             )
-            plots.save_plot(ax, caption, "Uncertainty", "uncertainty", "samples")
+            plots.save_plot(
+                ax,
+                caption,
+                "Uncertainty",
+                "uncertainty_samples",
+                self.util.get_exp_name(),
+            )
     def set_id(self, run, epoch):
         """Make the report identifiable with run and epoch index."""

nkululeko/utils/util.py CHANGED Viewed

@@ -157,8 +157,9 @@ class Util:
     def get_pred_name(self):
         store = self.get_path("store")
+        target = self.get_target_name()
         pred_name = self.get_model_description()
-        return f"{store}/pred_{pred_name}.csv"
+        return f"{store}/pred_{target}_{pred_name}.csv"
     def is_categorical(self, pd_series):
         """Check if a dataframe column is categorical"""
@@ -217,9 +218,14 @@ class Util:
         return_string = f"{ds}"
         if not only_data:
             mt = self.get_model_description()
-            return_string = return_string + "_" + mt
+            target = self.get_target_name()
+            return_string = return_string + "_" + target + "_" + mt
         return return_string.replace("__", "_")
+    def get_target_name(self):
+        """Get a string as name from all target sets that are used."""
+        return self.config["DATA"]["target"]
     def get_model_type(self):
         return self.config["MODEL"]["type"]

{nkululeko-0.88.4.dist-info → nkululeko-0.88.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.88.4
+Version: 0.88.6
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -68,7 +68,8 @@ A project to detect speaker characteristics by machine learning experiments with
 The idea is to have a framework (based on e.g. sklearn and torch) that can be used to rapidly and automatically analyse audio data and explore machine learning models based on that data.
-* NEW with nkululeko: [Finetune transformer-models](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
+* NEW with nkululek: [Ensemble learning](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/)
+* NEW: [Finetune transformer-models](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
 * The latest features can be seen in [the ini-file](./ini_file.md) options that are used to control Nkululeko
 * Below is a [Hello World example](#helloworld) that should set you up fastly, also on [Google Colab](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
 * [Here's a blog post on how to set up nkululeko on your computer.](http://blog.syntheticspeech.de/2021/08/30/how-to-set-up-your-first-nkululeko-project/)
@@ -203,7 +204,7 @@ All of them take *--config <my_config.ini>* as an argument.
 * **nkululeko.nkululeko**: do machine learning experiments combining features and learners
 * **nkululeko.ensemble**: [combine several nkululeko experiments](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/) and report on late fusion results
   * *configurations*: which experiments to combine
-  * *--method* (optional): mean, max, sum, max_class, uncertainty_threshold, uncertainty_weighted, confidence_weighted
+  * *--method* (optional): majority_voting, mean, max, sum, max_class, uncertainty_threshold, uncertainty_weighted, confidence_weighted
   * *--threshold*: uncertainty threshold (1.0 means no threshold)
   * *--outfile* (optional): name of CSV file for output
   * *--no_labels* (optional): indicate that no ground truth is given
@@ -359,6 +360,16 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.88.6
+--------------
+* added test speaker assign
+Version 0.88.5
+--------------
+* add a unique name to the uncertainty plot
+* fix error in speaker embedding (still need speechbrain < 1.0)
+* add get_target_name function in util
 Version 0.88.4
 --------------
 * added more ensemble methods, e.g. based on uncertainty

{nkululeko-0.88.4.dist-info → nkululeko-0.88.6.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
 nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
 nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
-nkululeko/constants.py,sha256=iiVolfJ9RJn2fD9QaaoFnxuLzxJos6Q4H3tzHQGLfp4,39
+nkululeko/constants.py,sha256=HFKr4pZomwthK3M6yBJLjNzKCEuB1PvMeUwKrHm2cL8,39
 nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
 nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
 nkululeko/demo_predictor.py,sha256=zs1bjhpnKuNCPLJeiyDm19ME1NEDOQT3QNeyVKJq9Yc,4882
@@ -46,10 +46,10 @@ nkululeko/autopredict/ap_stoi.py,sha256=It0Lk-ki-gohA2AzD8nkLAN2WahYvD9rPDGTQuvd
 nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzWeWW4VM,1024
 nkululeko/autopredict/estimate_snr.py,sha256=S-bpS0xFkwWc4Ch75UrjbS8y538lQ0U3g_iLRFXureY,5048
 nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nkululeko/data/dataset.py,sha256=hUD0NqWCfRaSHG8JNs1MsPb0zjUZAf8FJkg_c0ebq0Q,28046
+nkululeko/data/dataset.py,sha256=o2xuluErZg0I8qkR0YtMu2UdewdcgSdRUvUhRXDMwuI,28940
 nkululeko/data/dataset_csv.py,sha256=UGEpi__eT2KFS6Fop6N4HkMrzO-u5VP71gt44kwZavo,4588
 nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
+nkululeko/feat_extract/feats_agender.py,sha256=sHyvxxlWXv1QGYXHGHIYEQK7X39eifSVie0tu-zBG3M,3189
 nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
 nkululeko/feat_extract/feats_analyser.py,sha256=Y9hMpZ9WsQOrxTP3B1diHnzMeOgwbVpVFWVlIyhHMJs,12722
 nkululeko/feat_extract/feats_ast.py,sha256=ycJn5eSVOxcEpmeHVk0FPB8q5XiTC8VSKz61L9n0Wa4,4638
@@ -65,7 +65,7 @@ nkululeko/feat_extract/feats_oxbow.py,sha256=CmIG9cbHTJTJVnzgCPdQpYpnlewWExpsr5Z
 nkululeko/feat_extract/feats_praat.py,sha256=kZrS6srzH7WoWEd2prp1Dxw6g9JklFQGTNq5zzPpHzg,3105
 nkululeko/feat_extract/feats_snr.py,sha256=9dqZ-4RpK98iJEssM3ttozNd18LWlZYM_QVXvp5xDcs,2829
 nkululeko/feat_extract/feats_spectra.py,sha256=5Pex8awIQC3cjQRHSu4NQFmg4quamG0RL3V3Yd0pJHs,3670
-nkululeko/feat_extract/feats_spkrec.py,sha256=VK4ma3uWzM0YZStsgRTirfkbzjWIfRWSgsYI038QlRY,4803
+nkululeko/feat_extract/feats_spkrec.py,sha256=FugR-X2lDFKLLRRhKnhUYJhz-VIktIj8iVEDLbwNwtw,4814
 nkululeko/feat_extract/feats_squim.py,sha256=Y31YmDmscuG0YozvxyBZIutO3id8t7IZJWCfKucw-6M,4617
 nkululeko/feat_extract/feats_trill.py,sha256=K2ahhdpwpjgg3WZS1POg3UMP2U44i8cLZZvn5Rq7fUI,3228
 nkululeko/feat_extract/feats_wav2vec2.py,sha256=XyxD4NcrF4VFWSeHkXCKWdEOdr8VMzgVUz8N4mwhdyo,5248
@@ -98,7 +98,7 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
 nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
 nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
 nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
-nkululeko/reporting/reporter.py,sha256=vV6SAHWSIvybFvXBGapHjPmWWhKxIsIWuVO-uY9RHzQ,19219
+nkululeko/reporting/reporter.py,sha256=Gt8tEiDQ9rbsYAKb-EbKGVaogPWKKoAEkwtVeiFoTSA,19119
 nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
 nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
@@ -106,9 +106,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
 nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
-nkululeko/utils/util.py,sha256=nK108-v6UubFj2kjJo38flS2yTTeUZyu3gNBGyhaR1c,16512
-nkululeko-0.88.4.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.88.4.dist-info/METADATA,sha256=WHQrQU39sA1MuTnFTF6Fs47wWfVAtcQTQ4Tga_i5gB0,39583
-nkululeko-0.88.4.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
-nkululeko-0.88.4.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.88.4.dist-info/RECORD,,
+nkululeko/utils/util.py,sha256=y-pdrjovT8yGtBTJ3ifIpTcF0fPnoz8UKbuLIZ0efpc,16768
+nkululeko-0.88.6.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.88.6.dist-info/METADATA,sha256=7UE8yEbdfJo_SU4xeE3gLlaLkfwC2NEg7w1nou8eGLQ,39955
+nkululeko-0.88.6.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
+nkululeko-0.88.6.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.88.6.dist-info/RECORD,,

{nkululeko-0.88.4.dist-info → nkululeko-0.88.6.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (70.2.0)
+Generator: setuptools (71.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{nkululeko-0.88.4.dist-info → nkululeko-0.88.6.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.88.4.dist-info → nkululeko-0.88.6.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.88.4__py3-none-any.whl → 0.88.6__py3-none-any.whl

nkululeko 0.88.4py3-none-any.whl → 0.88.6py3-none-any.whl