nkululeko 0.88.9__py3-none-any.whl → 0.88.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.88.9"
1
+ VERSION="0.88.10"
2
2
  SAMPLING_RATE = 16000
nkululeko/explore.py CHANGED
@@ -1,3 +1,24 @@
1
+ """
2
+ Explore the feature sets of a machine learning experiment.
3
+
4
+ This script is the entry point for the 'explore' module of the nkululeko framework.
5
+ It handles loading the experiment configuration, setting up the experiment, and
6
+ running various feature exploration techniques based on the configuration.
7
+
8
+ The script supports the following configuration options:
9
+ - `no_warnings`: If set to `True`, it will ignore all warnings during the exploration.
10
+ - `feature_distributions`: If set to `True`, it will generate plots of the feature distributions.
11
+ - `tsne`: If set to `True`, it will generate a t-SNE plot of the feature space.
12
+ - `scatter`: If set to `True`, it will generate a scatter plot of the feature space.
13
+ - `spotlight`: If set to `True`, it will generate a 'spotlight' plot of the feature space.
14
+ - `shap`: If set to `True`, it will generate SHAP feature importance plots.
15
+ - `model`: The type of model to use for the feature exploration (e.g. 'SVM').
16
+ - `plot_tree`: If set to `True`, it will generate a decision tree plot.
17
+
18
+ The script can be run from the command line with the `--config` argument to specify
19
+ the configuration file to use. If no configuration file is provided, it will look
20
+ for an `exp.ini` file in the same directory as the script.
21
+ """
1
22
  # explore.py
2
23
  # explore the feature sets
3
24
 
@@ -7,18 +7,19 @@ import nkululeko.glob_conf as glob_conf
7
7
  import audonnx
8
8
  import numpy as np
9
9
  import audinterface
10
+ import torch
10
11
 
11
-
12
- class AgenderAgenderSet(Featureset):
12
+ class Agender_agenderSet(Featureset):
13
13
  """
14
14
  Age and gender predictions from the wav2vec2. based model finetuned on agender, described in the paper
15
15
  "Speech-based Age and Gender Prediction with Transformers"
16
16
  https://arxiv.org/abs/2306.16962
17
17
  """
18
18
 
19
- def __init__(self, name, data_df):
20
- super().__init__(name, data_df)
19
+ def __init__(self, name, data_df, feats_type):
20
+ super().__init__(name, data_df, feats_type)
21
21
  self.model_loaded = False
22
+ self.feats_type = feats_type
22
23
 
23
24
  def _load_model(self):
24
25
  model_url = "https://zenodo.org/record/7761387/files/w2v2-L-robust-6-age-gender.25c844af-1.1.1.zip"
@@ -12,7 +12,7 @@ import pandas as pd
12
12
  import torch
13
13
  import torchaudio
14
14
  from nkululeko.feat_extract.featureset import Featureset
15
- from speechbrain.pretrained import EncoderClassifier
15
+ from speechbrain.inference import EncoderClassifier
16
16
  from tqdm import tqdm
17
17
 
18
18
  # from transformers import HubertModel, Wav2Vec2FeatureExtractor
@@ -2,7 +2,7 @@
2
2
 
3
3
  from sklearn import mixture
4
4
  from nkululeko.models.model import Model
5
-
5
+ import pandas as pd
6
6
 
7
7
  class GMM_model(Model):
8
8
  """An GMM model"""
@@ -12,9 +12,26 @@ class GMM_model(Model):
12
12
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
13
  super().__init__(df_train, df_test, feats_train, feats_test)
14
14
  self.name = "gmm"
15
- n_components = int(self.util.config_val("MODEL", "GMM_components", "4"))
15
+ self.n_components = int(self.util.config_val("MODEL", "GMM_components", "4"))
16
16
  covariance_type = self.util.config_val("MODEL", "GMM_covariance_type", "full")
17
17
  self.clf = mixture.GaussianMixture(
18
- n_components=n_components, covariance_type=covariance_type
18
+ n_components=self.n_components,
19
+ covariance_type=covariance_type,
20
+ random_state = 42,
19
21
  )
20
22
  # set up the classifier
23
+
24
+ def get_predictions(self):
25
+ """Use the predict_proba method of the GaussianMixture model to get
26
+ probabilities. Create a DataFrame with these probabilities and return
27
+ it along with the predictions."""
28
+ probs = self.clf.predict_proba(self.feats_test)
29
+ preds = self.clf.predict(self.feats_test)
30
+
31
+ # Convert predictions to a list
32
+ preds = preds.tolist()
33
+
34
+ # Create a DataFrame for probabilities
35
+ proba_df = pd.DataFrame(probs, index=self.feats_test.index, columns=range(self.n_components))
36
+
37
+ return preds, proba_df
nkululeko/multidb.py CHANGED
@@ -1,3 +1,10 @@
1
+ """
2
+ Demonstrates the usage of the ML-experiment framework for the nkululeko MULTIDB project.
3
+
4
+ The `main` function is the entry point of the script, which parses command-line arguments, reads a configuration file, and runs the nkululeko or aug_train functions based on the configuration.
5
+
6
+ The `plot_heatmap` function generates a heatmap plot of the results and saves it to a file, along with some summary statistics.
7
+ """
1
8
  # main.py
2
9
  # Demonstration code to use the ML-experiment framework
3
10
 
nkululeko/plots.py CHANGED
@@ -4,14 +4,14 @@ import ast
4
4
  import matplotlib.pyplot as plt
5
5
  import numpy as np
6
6
  import pandas as pd
7
- from scipy import stats
8
7
  import seaborn as sns
8
+ from scipy import stats
9
9
  from sklearn.manifold import TSNE
10
10
 
11
11
  import nkululeko.glob_conf as glob_conf
12
+ import nkululeko.utils.stats as su
12
13
  from nkululeko.reporting.defines import Header
13
14
  from nkululeko.reporting.report_item import ReportItem
14
- import nkululeko.utils.stats as su
15
15
  from nkululeko.utils.util import Util
16
16
 
17
17
 
@@ -32,9 +32,9 @@ class Plots:
32
32
  # plot the distribution of samples per speaker
33
33
  # one up because of the runs
34
34
  fig_dir = self.util.get_path("fig_dir") + "../"
35
- self.util.debug(f"plotting samples per speaker")
35
+ self.util.debug("plotting samples per speaker")
36
36
  if "gender" in df_speakers:
37
- filename = f"samples_value_counts"
37
+ filename = "samples_value_counts"
38
38
  ax = (
39
39
  df_speakers.groupby("samplenum")["gender"]
40
40
  .value_counts()
@@ -46,7 +46,7 @@ class Plots:
46
46
  rot=0,
47
47
  )
48
48
  )
49
- ax.set_ylabel(f"number of speakers")
49
+ ax.set_ylabel("number of speakers")
50
50
  ax.set_xlabel("number of samples")
51
51
  self.save_plot(
52
52
  ax,
@@ -58,7 +58,7 @@ class Plots:
58
58
 
59
59
  # fig.clear()
60
60
  else:
61
- filename = f"samples_value_counts"
61
+ filename = "samples_value_counts"
62
62
  ax = (
63
63
  df_speakers["samplenum"]
64
64
  .value_counts()
@@ -265,7 +265,8 @@ class Plots:
265
265
  """Plot relation of categorical distribution with continuous."""
266
266
  dist_type = self.util.config_val("EXPL", "dist_type", "hist")
267
267
  cats, cat_str, es = su.get_effect_size(df, cat_col, cont_col)
268
- if dist_type == "hist":
268
+ model_type = self.util.get_model_type()
269
+ if dist_type == "hist" and model_type != "tree":
269
270
  ax = sns.histplot(df, x=cont_col, hue=cat_col, kde=True)
270
271
  caption = f"{ylab} {df.shape[0]}. {cat_str} ({cats}):" f" {es}"
271
272
  ax.set_title(caption)
@@ -489,7 +490,7 @@ class Plots:
489
490
  glob_conf.report.add_item(
490
491
  ReportItem(
491
492
  Header.HEADER_EXPLORE,
492
- f"Scatter plot",
493
+ "Scatter plot",
493
494
  f"using {dimred_type}",
494
495
  filename,
495
496
  )
@@ -561,8 +562,8 @@ class Plots:
561
562
  glob_conf.report.add_item(
562
563
  ReportItem(
563
564
  Header.HEADER_EXPLORE,
564
- f"Tree plot",
565
- f"for feature importance",
565
+ "Tree plot",
566
+ "for feature importance",
566
567
  filename,
567
568
  )
568
569
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.88.9
3
+ Version: 0.88.10
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -204,9 +204,10 @@ All of them take *--config <my_config.ini>* as an argument.
204
204
  * **nkululeko.nkululeko**: do machine learning experiments combining features and learners
205
205
  * **nkululeko.ensemble**: [combine several nkululeko experiments](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/) and report on late fusion results
206
206
  * *configurations*: which experiments to combine
207
- * *--method* (optional): majority_voting, mean, max, sum, max_class, uncertainty_threshold, uncertainty_weighted, confidence_weighted
207
+ * *--method* (optional): majority_voting, mean (default), max, sum, uncertainty, uncertainty_weighted, confidence_weighted, performance_weighted
208
208
  * *--threshold*: uncertainty threshold (1.0 means no threshold)
209
- * *--outfile* (optional): name of CSV file for output
209
+ * *--weightes*: weights for performance_weighted method (could be from previous UAR, ACC)
210
+ * *--outfile* (optional): name of CSV file for output (default: ensemble_result.csv)
210
211
  * *--no_labels* (optional): indicate that no ground truth is given
211
212
  * **nkululeko.multidb**: do [multiple experiments](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/), comparing several databases cross and in itself
212
213
  * **nkululeko.demo**: [demo the current best model](http://blog.syntheticspeech.de/2022/01/24/nkululeko-try-out-demo-a-trained-model/) on the command line
@@ -360,6 +361,11 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
360
361
  Changelog
361
362
  =========
362
363
 
364
+ Version 0.88.10
365
+ --------------
366
+ * SVM C val defaults to 1
367
+ * fixed agender_agender naming bug
368
+
363
369
  Version 0.88.9
364
370
  --------------
365
371
  * added performance_weighted ensemble
@@ -2,23 +2,23 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=tK1QIQ72lahwT47cOoEvhMfH2sH4BRnP3p6P7kdC_QQ,39
5
+ nkululeko/constants.py,sha256=8iRgPx-MBB6fcD0RICfYCOaSZFjH2hPcLRqFhgbTcTU,40
6
6
  nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
8
  nkululeko/demo_predictor.py,sha256=zs1bjhpnKuNCPLJeiyDm19ME1NEDOQT3QNeyVKJq9Yc,4882
9
9
  nkululeko/ensemble.py,sha256=cVz8hWd2m7poyS0lTIfrsha0K8U-hd6eiBWMqDOAlt8,12669
10
10
  nkululeko/experiment.py,sha256=L4PzoScPLG2xTyniVy9evcBy_8CIe3RTeTEUVTqiuvQ,31186
11
- nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
11
+ nkululeko/explore.py,sha256=_GOgcRaPvh2xBbKPAkSJjYzgHhD_xb3ZCB6M1MPA6ao,3867
12
12
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
13
13
  nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
14
14
  nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,3474
15
15
  nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
16
16
  nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
17
17
  nkululeko/modelrunner.py,sha256=cKYD9a7MRoBxfqUy3X8kf6rGTYho-33In8I9YkzMOo8,11196
18
- nkululeko/multidb.py,sha256=1X2vZwDHf6HuYKCoIGDP34FECMZ2mcGNZ6-cFYZFnIQ,6332
18
+ nkululeko/multidb.py,sha256=CCjmVsZyvydgOztFlaeBvOJH8nsvU-sPQdFAw8-q0U4,6752
19
19
  nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
20
20
  nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
21
- nkululeko/plots.py,sha256=WsI_dtPKfrYPsKymHRmIhqj33aZzTcE8fF_EwLkm_5A,22899
21
+ nkululeko/plots.py,sha256=gfNy9Eu2PhSaykMazBPThcYS5o5KwuQwY2jshAUK5Rk,22965
22
22
  nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
23
23
  nkululeko/resample.py,sha256=2d9eao_0sLrGZ_KSl8OVKsPor3BkFrlmMhrpB9WelIs,4267
24
24
  nkululeko/runmanager.py,sha256=xvxL5a9d3jtGFqx0Z3nyyxowA368uNyP0ZitO8kxIIE,7581
@@ -50,7 +50,7 @@ nkululeko/data/dataset.py,sha256=xaawk5QthuVStWjHWTFBtorcIe71lbPQgC6mHzSXGeI,292
50
50
  nkululeko/data/dataset_csv.py,sha256=UGEpi__eT2KFS6Fop6N4HkMrzO-u5VP71gt44kwZavo,4588
51
51
  nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  nkululeko/feat_extract/feats_agender.py,sha256=sHyvxxlWXv1QGYXHGHIYEQK7X39eifSVie0tu-zBG3M,3189
53
- nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
53
+ nkululeko/feat_extract/feats_agender_agender.py,sha256=19NoRT0KJ8WoZ3EabTYexXymD7bDy58-H20jYmdqjD0,3498
54
54
  nkululeko/feat_extract/feats_analyser.py,sha256=Y9hMpZ9WsQOrxTP3B1diHnzMeOgwbVpVFWVlIyhHMJs,12722
55
55
  nkululeko/feat_extract/feats_ast.py,sha256=ycJn5eSVOxcEpmeHVk0FPB8q5XiTC8VSKz61L9n0Wa4,4638
56
56
  nkululeko/feat_extract/feats_auddim.py,sha256=VlzKKXTXa5kjLgQBWyEFy-daIyU1SkOwCCOIhKsWCvE,3162
@@ -65,7 +65,7 @@ nkululeko/feat_extract/feats_oxbow.py,sha256=CmIG9cbHTJTJVnzgCPdQpYpnlewWExpsr5Z
65
65
  nkululeko/feat_extract/feats_praat.py,sha256=kZrS6srzH7WoWEd2prp1Dxw6g9JklFQGTNq5zzPpHzg,3105
66
66
  nkululeko/feat_extract/feats_snr.py,sha256=9dqZ-4RpK98iJEssM3ttozNd18LWlZYM_QVXvp5xDcs,2829
67
67
  nkululeko/feat_extract/feats_spectra.py,sha256=5Pex8awIQC3cjQRHSu4NQFmg4quamG0RL3V3Yd0pJHs,3670
68
- nkululeko/feat_extract/feats_spkrec.py,sha256=FugR-X2lDFKLLRRhKnhUYJhz-VIktIj8iVEDLbwNwtw,4814
68
+ nkululeko/feat_extract/feats_spkrec.py,sha256=j_-h2NfLa3qes6vOFrNiIfPc5HmAxDpMpMlw5QqSBAM,4813
69
69
  nkululeko/feat_extract/feats_squim.py,sha256=Y31YmDmscuG0YozvxyBZIutO3id8t7IZJWCfKucw-6M,4617
70
70
  nkululeko/feat_extract/feats_trill.py,sha256=K2ahhdpwpjgg3WZS1POg3UMP2U44i8cLZZvn5Rq7fUI,3228
71
71
  nkululeko/feat_extract/feats_wav2vec2.py,sha256=XyxD4NcrF4VFWSeHkXCKWdEOdr8VMzgVUz8N4mwhdyo,5248
@@ -80,7 +80,7 @@ nkululeko/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
80
80
  nkululeko/models/model.py,sha256=JXrd0fbU0JhTxUDrs0kOEHF9rtPJBxBeO6zcrHAzk8k,12475
81
81
  nkululeko/models/model_bayes.py,sha256=WJFZ8wFKwWATz6MhmjeZIi1Pal1viU549WL_PjXDSy8,406
82
82
  nkululeko/models/model_cnn.py,sha256=NreR2LrKMyBYHyIJEL6wm3UQ4mA5HleZfpUyA5wNYpA,10629
83
- nkululeko/models/model_gmm.py,sha256=hZ9UO36KNf48qa3J-xkWIicIj9-TApmt21zNES2vEOs,649
83
+ nkululeko/models/model_gmm.py,sha256=m1ONBql-T0La8Cv0awB7lPUG-kgbygoWmbuqzDzmj-Y,1337
84
84
  nkululeko/models/model_knn.py,sha256=KlnrJfwiVnmXZrAaYGFrKA2f5sznvTzSJQ8-5etOP0k,599
85
85
  nkululeko/models/model_knn_reg.py,sha256=j7YFfVm6xOR2d9yBYdQiwwqYfqkX0JynX_qLCvkr1fk,610
86
86
  nkululeko/models/model_lin_reg.py,sha256=0D7mSnSwK82lNWDMwHYRyq3FmGa6y-DHDGg4qUe85q4,422
@@ -107,8 +107,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
107
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
108
108
  nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
109
109
  nkululeko/utils/util.py,sha256=KMxPzb0HN3XuNzAd7Kn3M3Nq91-0sDrAAEBgDKryCdo,16688
110
- nkululeko-0.88.9.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
111
- nkululeko-0.88.9.dist-info/METADATA,sha256=2NTuv6JzIYo9FbjMFT2zP_SuxZcBuagowGZ9YneOcOA,40134
112
- nkululeko-0.88.9.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
113
- nkululeko-0.88.9.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
114
- nkululeko-0.88.9.dist-info/RECORD,,
110
+ nkululeko-0.88.10.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
111
+ nkululeko-0.88.10.dist-info/METADATA,sha256=EABiFmDYNwCs_0_5L2XlGqcdxA4bfZhWKmL1ZkiNQC8,40364
112
+ nkululeko-0.88.10.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
113
+ nkululeko-0.88.10.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
114
+ nkululeko-0.88.10.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (71.1.0)
2
+ Generator: setuptools (72.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5