PyPI - nkululeko - Versions diffs - 0.88.9__py3-none-any.whl → 0.88.10__py3-none-any.whl - Mend

nkululeko 0.88.9py3-none-any.whl → 0.88.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

nkululeko/constants.py +1 -1
nkululeko/explore.py +21 -0
nkululeko/feat_extract/feats_agender_agender.py +5 -4
nkululeko/feat_extract/feats_spkrec.py +1 -1
nkululeko/models/model_gmm.py +20 -3
nkululeko/multidb.py +7 -0
nkululeko/plots.py +11 -10
{nkululeko-0.88.9.dist-info → nkululeko-0.88.10.dist-info}/METADATA +9 -3
{nkululeko-0.88.9.dist-info → nkululeko-0.88.10.dist-info}/RECORD +12 -12
{nkululeko-0.88.9.dist-info → nkululeko-0.88.10.dist-info}/WHEEL +1 -1
{nkululeko-0.88.9.dist-info → nkululeko-0.88.10.dist-info}/LICENSE +0 -0
{nkululeko-0.88.9.dist-info → nkululeko-0.88.10.dist-info}/top_level.txt +0 -0

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.88.9"
+VERSION="0.88.10"
 SAMPLING_RATE = 16000

nkululeko/explore.py CHANGED Viewed

@@ -1,3 +1,24 @@
+"""
+Explore the feature sets of a machine learning experiment.
+This script is the entry point for the 'explore' module of the nkululeko framework.
+It handles loading the experiment configuration, setting up the experiment, and
+running various feature exploration techniques based on the configuration.
+The script supports the following configuration options:
+- `no_warnings`: If set to `True`, it will ignore all warnings during the exploration.
+- `feature_distributions`: If set to `True`, it will generate plots of the feature distributions.
+- `tsne`: If set to `True`, it will generate a t-SNE plot of the feature space.
+- `scatter`: If set to `True`, it will generate a scatter plot of the feature space.
+- `spotlight`: If set to `True`, it will generate a 'spotlight' plot of the feature space.
+- `shap`: If set to `True`, it will generate SHAP feature importance plots.
+- `model`: The type of model to use for the feature exploration (e.g. 'SVM').
+- `plot_tree`: If set to `True`, it will generate a decision tree plot.
+The script can be run from the command line with the `--config` argument to specify
+the configuration file to use. If no configuration file is provided, it will look
+for an `exp.ini` file in the same directory as the script.
+"""
 # explore.py
 # explore the feature sets

nkululeko/feat_extract/feats_agender_agender.py CHANGED Viewed

@@ -7,18 +7,19 @@ import nkululeko.glob_conf as glob_conf
 import audonnx
 import numpy as np
 import audinterface
+import torch
-class AgenderAgenderSet(Featureset):
+class Agender_agenderSet(Featureset):
     """
     Age and gender predictions from the wav2vec2. based model finetuned on agender, described in the paper
     "Speech-based Age and Gender Prediction with Transformers"
     https://arxiv.org/abs/2306.16962
     """
-    def __init__(self, name, data_df):
-        super().__init__(name, data_df)
+    def __init__(self, name, data_df, feats_type):
+        super().__init__(name, data_df, feats_type)
         self.model_loaded = False
+        self.feats_type = feats_type
     def _load_model(self):
         model_url = "https://zenodo.org/record/7761387/files/w2v2-L-robust-6-age-gender.25c844af-1.1.1.zip"

nkululeko/feat_extract/feats_spkrec.py CHANGED Viewed

@@ -12,7 +12,7 @@ import pandas as pd
 import torch
 import torchaudio
 from nkululeko.feat_extract.featureset import Featureset
-from speechbrain.pretrained import EncoderClassifier
+from speechbrain.inference import EncoderClassifier
 from tqdm import tqdm
 # from transformers import HubertModel, Wav2Vec2FeatureExtractor

nkululeko/models/model_gmm.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from sklearn import mixture
 from nkululeko.models.model import Model
+import pandas as pd
 class GMM_model(Model):
     """An GMM model"""
@@ -12,9 +12,26 @@ class GMM_model(Model):
     def __init__(self, df_train, df_test, feats_train, feats_test):
         super().__init__(df_train, df_test, feats_train, feats_test)
         self.name = "gmm"
-        n_components = int(self.util.config_val("MODEL", "GMM_components", "4"))
+        self.n_components = int(self.util.config_val("MODEL", "GMM_components", "4"))
         covariance_type = self.util.config_val("MODEL", "GMM_covariance_type", "full")
         self.clf = mixture.GaussianMixture(
-            n_components=n_components, covariance_type=covariance_type
+            n_components=self.n_components,
+            covariance_type=covariance_type,
+            random_state = 42,
         )
         # set up the classifier
+    def get_predictions(self):
+        """Use the predict_proba method of the GaussianMixture model to get
+        probabilities. Create a DataFrame with these probabilities and return
+        it along with the predictions."""
+        probs = self.clf.predict_proba(self.feats_test)
+        preds = self.clf.predict(self.feats_test)
+        # Convert predictions to a list
+        preds = preds.tolist()
+        # Create a DataFrame for probabilities
+        proba_df = pd.DataFrame(probs, index=self.feats_test.index, columns=range(self.n_components))
+        return preds, proba_df

nkululeko/multidb.py CHANGED Viewed

@@ -1,3 +1,10 @@
+"""
+Demonstrates the usage of the ML-experiment framework for the nkululeko MULTIDB project.
+The `main` function is the entry point of the script, which parses command-line arguments, reads a configuration file, and runs the nkululeko or aug_train functions based on the configuration.
+The `plot_heatmap` function generates a heatmap plot of the results and saves it to a file, along with some summary statistics.
+"""
 # main.py
 # Demonstration code to use the ML-experiment framework

nkululeko/plots.py CHANGED Viewed

@@ -4,14 +4,14 @@ import ast
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from scipy import stats
 import seaborn as sns
+from scipy import stats
 from sklearn.manifold import TSNE
 import nkululeko.glob_conf as glob_conf
+import nkululeko.utils.stats as su
 from nkululeko.reporting.defines import Header
 from nkululeko.reporting.report_item import ReportItem
-import nkululeko.utils.stats as su
 from nkululeko.utils.util import Util
@@ -32,9 +32,9 @@ class Plots:
         # plot the distribution of samples per speaker
         # one up because of the runs
         fig_dir = self.util.get_path("fig_dir") + "../"
-        self.util.debug(f"plotting samples per speaker")
+        self.util.debug("plotting samples per speaker")
         if "gender" in df_speakers:
-            filename = f"samples_value_counts"
+            filename = "samples_value_counts"
             ax = (
                 df_speakers.groupby("samplenum")["gender"]
                 .value_counts()
@@ -46,7 +46,7 @@ class Plots:
                     rot=0,
                 )
             )
-            ax.set_ylabel(f"number of speakers")
+            ax.set_ylabel("number of speakers")
             ax.set_xlabel("number of samples")
             self.save_plot(
                 ax,
@@ -58,7 +58,7 @@ class Plots:
             # fig.clear()
         else:
-            filename = f"samples_value_counts"
+            filename = "samples_value_counts"
             ax = (
                 df_speakers["samplenum"]
                 .value_counts()
@@ -265,7 +265,8 @@ class Plots:
         """Plot relation of categorical distribution with continuous."""
         dist_type = self.util.config_val("EXPL", "dist_type", "hist")
         cats, cat_str, es = su.get_effect_size(df, cat_col, cont_col)
-        if dist_type == "hist":
+        model_type = self.util.get_model_type()
+        if dist_type == "hist" and model_type != "tree":
             ax = sns.histplot(df, x=cont_col, hue=cat_col, kde=True)
             caption = f"{ylab} {df.shape[0]}. {cat_str} ({cats}):" f" {es}"
             ax.set_title(caption)
@@ -489,7 +490,7 @@ class Plots:
         glob_conf.report.add_item(
             ReportItem(
                 Header.HEADER_EXPLORE,
-                f"Scatter plot",
+                "Scatter plot",
                 f"using {dimred_type}",
                 filename,
             )
@@ -561,8 +562,8 @@ class Plots:
         glob_conf.report.add_item(
             ReportItem(
                 Header.HEADER_EXPLORE,
-                f"Tree plot",
-                f"for feature importance",
+                "Tree plot",
+                "for feature importance",
                 filename,
             )
         )

{nkululeko-0.88.9.dist-info → nkululeko-0.88.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.88.9
+Version: 0.88.10
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -204,9 +204,10 @@ All of them take *--config <my_config.ini>* as an argument.
 * **nkululeko.nkululeko**: do machine learning experiments combining features and learners
 * **nkululeko.ensemble**: [combine several nkululeko experiments](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/) and report on late fusion results
   * *configurations*: which experiments to combine
-  * *--method* (optional): majority_voting, mean, max, sum, max_class, uncertainty_threshold, uncertainty_weighted, confidence_weighted
+  * *--method* (optional): majority_voting, mean (default), max, sum, uncertainty, uncertainty_weighted, confidence_weighted, performance_weighted
   * *--threshold*: uncertainty threshold (1.0 means no threshold)
-  * *--outfile* (optional): name of CSV file for output
+  * *--weightes*: weights for performance_weighted method (could be from previous UAR, ACC)
+  * *--outfile* (optional): name of CSV file for output (default: ensemble_result.csv)
   * *--no_labels* (optional): indicate that no ground truth is given
 * **nkululeko.multidb**: do [multiple experiments](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/), comparing several databases cross and in itself
 * **nkululeko.demo**: [demo the current best model](http://blog.syntheticspeech.de/2022/01/24/nkululeko-try-out-demo-a-trained-model/) on the command line
@@ -360,6 +361,11 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.88.10
+--------------
+* SVM C val defaults to 1
+* fixed agender_agender naming bug
 Version 0.88.9
 --------------
 * added performance_weighted ensemble

{nkululeko-0.88.9.dist-info → nkululeko-0.88.10.dist-info}/RECORD RENAMED Viewed

@@ -2,23 +2,23 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
 nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
 nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
-nkululeko/constants.py,sha256=tK1QIQ72lahwT47cOoEvhMfH2sH4BRnP3p6P7kdC_QQ,39
+nkululeko/constants.py,sha256=8iRgPx-MBB6fcD0RICfYCOaSZFjH2hPcLRqFhgbTcTU,40
 nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
 nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
 nkululeko/demo_predictor.py,sha256=zs1bjhpnKuNCPLJeiyDm19ME1NEDOQT3QNeyVKJq9Yc,4882
 nkululeko/ensemble.py,sha256=cVz8hWd2m7poyS0lTIfrsha0K8U-hd6eiBWMqDOAlt8,12669
 nkululeko/experiment.py,sha256=L4PzoScPLG2xTyniVy9evcBy_8CIe3RTeTEUVTqiuvQ,31186
-nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
+nkululeko/explore.py,sha256=_GOgcRaPvh2xBbKPAkSJjYzgHhD_xb3ZCB6M1MPA6ao,3867
 nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
 nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
 nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,3474
 nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
 nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
 nkululeko/modelrunner.py,sha256=cKYD9a7MRoBxfqUy3X8kf6rGTYho-33In8I9YkzMOo8,11196
-nkululeko/multidb.py,sha256=1X2vZwDHf6HuYKCoIGDP34FECMZ2mcGNZ6-cFYZFnIQ,6332
+nkululeko/multidb.py,sha256=CCjmVsZyvydgOztFlaeBvOJH8nsvU-sPQdFAw8-q0U4,6752
 nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
 nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
-nkululeko/plots.py,sha256=WsI_dtPKfrYPsKymHRmIhqj33aZzTcE8fF_EwLkm_5A,22899
+nkululeko/plots.py,sha256=gfNy9Eu2PhSaykMazBPThcYS5o5KwuQwY2jshAUK5Rk,22965
 nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
 nkululeko/resample.py,sha256=2d9eao_0sLrGZ_KSl8OVKsPor3BkFrlmMhrpB9WelIs,4267
 nkululeko/runmanager.py,sha256=xvxL5a9d3jtGFqx0Z3nyyxowA368uNyP0ZitO8kxIIE,7581
@@ -50,7 +50,7 @@ nkululeko/data/dataset.py,sha256=xaawk5QthuVStWjHWTFBtorcIe71lbPQgC6mHzSXGeI,292
 nkululeko/data/dataset_csv.py,sha256=UGEpi__eT2KFS6Fop6N4HkMrzO-u5VP71gt44kwZavo,4588
 nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/feat_extract/feats_agender.py,sha256=sHyvxxlWXv1QGYXHGHIYEQK7X39eifSVie0tu-zBG3M,3189
-nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
+nkululeko/feat_extract/feats_agender_agender.py,sha256=19NoRT0KJ8WoZ3EabTYexXymD7bDy58-H20jYmdqjD0,3498
 nkululeko/feat_extract/feats_analyser.py,sha256=Y9hMpZ9WsQOrxTP3B1diHnzMeOgwbVpVFWVlIyhHMJs,12722
 nkululeko/feat_extract/feats_ast.py,sha256=ycJn5eSVOxcEpmeHVk0FPB8q5XiTC8VSKz61L9n0Wa4,4638
 nkululeko/feat_extract/feats_auddim.py,sha256=VlzKKXTXa5kjLgQBWyEFy-daIyU1SkOwCCOIhKsWCvE,3162
@@ -65,7 +65,7 @@ nkululeko/feat_extract/feats_oxbow.py,sha256=CmIG9cbHTJTJVnzgCPdQpYpnlewWExpsr5Z
 nkululeko/feat_extract/feats_praat.py,sha256=kZrS6srzH7WoWEd2prp1Dxw6g9JklFQGTNq5zzPpHzg,3105
 nkululeko/feat_extract/feats_snr.py,sha256=9dqZ-4RpK98iJEssM3ttozNd18LWlZYM_QVXvp5xDcs,2829
 nkululeko/feat_extract/feats_spectra.py,sha256=5Pex8awIQC3cjQRHSu4NQFmg4quamG0RL3V3Yd0pJHs,3670
-nkululeko/feat_extract/feats_spkrec.py,sha256=FugR-X2lDFKLLRRhKnhUYJhz-VIktIj8iVEDLbwNwtw,4814
+nkululeko/feat_extract/feats_spkrec.py,sha256=j_-h2NfLa3qes6vOFrNiIfPc5HmAxDpMpMlw5QqSBAM,4813
 nkululeko/feat_extract/feats_squim.py,sha256=Y31YmDmscuG0YozvxyBZIutO3id8t7IZJWCfKucw-6M,4617
 nkululeko/feat_extract/feats_trill.py,sha256=K2ahhdpwpjgg3WZS1POg3UMP2U44i8cLZZvn5Rq7fUI,3228
 nkululeko/feat_extract/feats_wav2vec2.py,sha256=XyxD4NcrF4VFWSeHkXCKWdEOdr8VMzgVUz8N4mwhdyo,5248
@@ -80,7 +80,7 @@ nkululeko/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
 nkululeko/models/model.py,sha256=JXrd0fbU0JhTxUDrs0kOEHF9rtPJBxBeO6zcrHAzk8k,12475
 nkululeko/models/model_bayes.py,sha256=WJFZ8wFKwWATz6MhmjeZIi1Pal1viU549WL_PjXDSy8,406
 nkululeko/models/model_cnn.py,sha256=NreR2LrKMyBYHyIJEL6wm3UQ4mA5HleZfpUyA5wNYpA,10629
-nkululeko/models/model_gmm.py,sha256=hZ9UO36KNf48qa3J-xkWIicIj9-TApmt21zNES2vEOs,649
+nkululeko/models/model_gmm.py,sha256=m1ONBql-T0La8Cv0awB7lPUG-kgbygoWmbuqzDzmj-Y,1337
 nkululeko/models/model_knn.py,sha256=KlnrJfwiVnmXZrAaYGFrKA2f5sznvTzSJQ8-5etOP0k,599
 nkululeko/models/model_knn_reg.py,sha256=j7YFfVm6xOR2d9yBYdQiwwqYfqkX0JynX_qLCvkr1fk,610
 nkululeko/models/model_lin_reg.py,sha256=0D7mSnSwK82lNWDMwHYRyq3FmGa6y-DHDGg4qUe85q4,422
@@ -107,8 +107,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
 nkululeko/utils/util.py,sha256=KMxPzb0HN3XuNzAd7Kn3M3Nq91-0sDrAAEBgDKryCdo,16688
-nkululeko-0.88.9.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.88.9.dist-info/METADATA,sha256=2NTuv6JzIYo9FbjMFT2zP_SuxZcBuagowGZ9YneOcOA,40134
-nkululeko-0.88.9.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
-nkululeko-0.88.9.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.88.9.dist-info/RECORD,,
+nkululeko-0.88.10.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.88.10.dist-info/METADATA,sha256=EABiFmDYNwCs_0_5L2XlGqcdxA4bfZhWKmL1ZkiNQC8,40364
+nkululeko-0.88.10.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
+nkululeko-0.88.10.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.88.10.dist-info/RECORD,,

{nkululeko-0.88.9.dist-info → nkululeko-0.88.10.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (71.1.0)
+Generator: setuptools (72.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{nkululeko-0.88.9.dist-info → nkululeko-0.88.10.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.88.9.dist-info → nkululeko-0.88.10.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.88.9__py3-none-any.whl → 0.88.10__py3-none-any.whl

nkululeko 0.88.9py3-none-any.whl → 0.88.10py3-none-any.whl