nkululeko 0.88.4__py3-none-any.whl → 0.88.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.88.4"
1
+ VERSION="0.88.6"
2
2
  SAMPLING_RATE = 16000
nkululeko/data/dataset.py CHANGED
@@ -423,6 +423,9 @@ class Dataset:
423
423
  self.util.debug(f"{self.name}: trying to reuse data splits")
424
424
  self.df_test = pd.read_pickle(storage_test)
425
425
  self.df_train = pd.read_pickle(storage_train)
426
+ elif isinstance(ast.literal_eval(split_strategy), list):
427
+ # treat this as a list of test speakers
428
+ self.assign_speakers(ast.literal_eval(split_strategy))
426
429
  else:
427
430
  self.util.error(f"unknown split strategy: {split_strategy}")
428
431
 
@@ -515,6 +518,19 @@ class Dataset:
515
518
  # because this generates new train/test sample quantaties, the feature extraction has to be done again
516
519
  glob_conf.config["FEATS"]["needs_feature_extraction"] = "True"
517
520
 
521
+ def assign_speakers(self, speakers):
522
+ """One way to split train and eval sets: Specify test speaker names."""
523
+ self.df_test = self.df[self.df.speaker.isin(speakers)]
524
+ if len(self.df_test) == 0:
525
+ self.util.error(f"no speakers found in {speakers}")
526
+ self.df_train = self.df[~self.df.index.isin(self.df_test.index)]
527
+ self.util.debug(
528
+ f"{self.name} (speakers assigned): [{self.df_train.shape[0]}/{self.df_test.shape[0]}]"
529
+ " samples in train/test"
530
+ )
531
+ # because this generates new train/test sample quantaties, the feature extraction has to be done again
532
+ glob_conf.config["FEATS"]["needs_feature_extraction"] = "True"
533
+
518
534
  def split_speakers(self):
519
535
  """One way to split train and eval sets: Specify percentage of evaluation speakers"""
520
536
  test_percent = int(self.util.config_val_data(self.name, "test_size", 20))
@@ -1,13 +1,14 @@
1
1
  # feats_agender.py
2
+
2
3
  from nkululeko.feat_extract.featureset import Featureset
3
4
  import os
4
- import pandas as pd
5
+ # import pandas as pd
5
6
  import audeer
6
7
  import nkululeko.glob_conf as glob_conf
7
8
  import audonnx
8
9
  import numpy as np
9
10
  import audinterface
10
-
11
+ import torch
11
12
 
12
13
  class AgenderSet(Featureset):
13
14
  """
@@ -32,7 +33,8 @@ class AgenderSet(Featureset):
32
33
  archive_path = audeer.download_url(
33
34
  model_url, cache_root, verbose=True)
34
35
  audeer.extract_archive(archive_path, model_root)
35
- device = self.util.config_val("MODEL", "device", "cpu")
36
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
37
+ device = self.util.config_val("MODEL", "device", cuda)
36
38
  self.model = audonnx.load(model_root, device=device)
37
39
  self.util.debug(f"initialized agender model")
38
40
  self.model_loaded = True
@@ -24,7 +24,7 @@ class Spkrec(Featureset):
24
24
  def __init__(self, name, data_df, feat_type):
25
25
  """Constructor. is_train is needed to distinguish from test/dev sets,
26
26
  because they use the codebook from the training"""
27
- super().__init__(name, data_df)
27
+ super().__init__(name, data_df, feat_type)
28
28
  # check if device is not set, use cuda if available
29
29
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
30
30
  self.device = self.util.config_val("MODEL", "device", cuda)
@@ -2,28 +2,32 @@ import ast
2
2
  import glob
3
3
  import json
4
4
  import math
5
- import os
6
5
 
7
- from confidence_intervals import evaluate_with_conf_int
8
6
  import matplotlib.pyplot as plt
9
7
  import numpy as np
8
+
9
+ # from torch import is_tensor
10
+ from audmetric import (
11
+ accuracy,
12
+ concordance_cc,
13
+ mean_absolute_error,
14
+ mean_squared_error,
15
+ unweighted_average_recall,
16
+ )
17
+
18
+ # import os
19
+ from confidence_intervals import evaluate_with_conf_int
10
20
  from scipy.special import softmax
11
- from scipy.stats import entropy
12
- from scipy.stats import pearsonr
13
- from sklearn.metrics import ConfusionMatrixDisplay
14
- from sklearn.metrics import auc
15
- from sklearn.metrics import classification_report
16
- from sklearn.metrics import confusion_matrix
17
- from sklearn.metrics import r2_score
18
- from sklearn.metrics import roc_auc_score
19
- from sklearn.metrics import roc_curve
20
- from torch import is_tensor
21
-
22
- from audmetric import accuracy
23
- from audmetric import concordance_cc
24
- from audmetric import mean_absolute_error
25
- from audmetric import mean_squared_error
26
- from audmetric import unweighted_average_recall
21
+ from scipy.stats import entropy, pearsonr
22
+ from sklearn.metrics import (
23
+ ConfusionMatrixDisplay,
24
+ auc,
25
+ classification_report,
26
+ confusion_matrix,
27
+ r2_score,
28
+ roc_auc_score,
29
+ roc_curve,
30
+ )
27
31
 
28
32
  import nkululeko.glob_conf as glob_conf
29
33
  from nkululeko.plots import Plots
@@ -167,7 +171,7 @@ class Reporter:
167
171
  probas["uncertainty"] = uncertainty
168
172
  probas["correct"] = probas.predicted == probas.truth
169
173
  sp = self.util.get_pred_name()
170
-
174
+
171
175
  self.probas = probas
172
176
  probas.to_csv(sp)
173
177
  self.util.debug(f"Saved probabilities to {sp}")
@@ -175,7 +179,13 @@ class Reporter:
175
179
  ax, caption = plots.plotcatcont(
176
180
  probas, "correct", "uncertainty", "uncertainty", "correct"
177
181
  )
178
- plots.save_plot(ax, caption, "Uncertainty", "uncertainty", "samples")
182
+ plots.save_plot(
183
+ ax,
184
+ caption,
185
+ "Uncertainty",
186
+ "uncertainty_samples",
187
+ self.util.get_exp_name(),
188
+ )
179
189
 
180
190
  def set_id(self, run, epoch):
181
191
  """Make the report identifiable with run and epoch index."""
nkululeko/utils/util.py CHANGED
@@ -157,8 +157,9 @@ class Util:
157
157
 
158
158
  def get_pred_name(self):
159
159
  store = self.get_path("store")
160
+ target = self.get_target_name()
160
161
  pred_name = self.get_model_description()
161
- return f"{store}/pred_{pred_name}.csv"
162
+ return f"{store}/pred_{target}_{pred_name}.csv"
162
163
 
163
164
  def is_categorical(self, pd_series):
164
165
  """Check if a dataframe column is categorical"""
@@ -217,9 +218,14 @@ class Util:
217
218
  return_string = f"{ds}"
218
219
  if not only_data:
219
220
  mt = self.get_model_description()
220
- return_string = return_string + "_" + mt
221
+ target = self.get_target_name()
222
+ return_string = return_string + "_" + target + "_" + mt
221
223
  return return_string.replace("__", "_")
222
224
 
225
+ def get_target_name(self):
226
+ """Get a string as name from all target sets that are used."""
227
+ return self.config["DATA"]["target"]
228
+
223
229
  def get_model_type(self):
224
230
  return self.config["MODEL"]["type"]
225
231
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.88.4
3
+ Version: 0.88.6
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -68,7 +68,8 @@ A project to detect speaker characteristics by machine learning experiments with
68
68
 
69
69
  The idea is to have a framework (based on e.g. sklearn and torch) that can be used to rapidly and automatically analyse audio data and explore machine learning models based on that data.
70
70
 
71
- * NEW with nkululeko: [Finetune transformer-models](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
71
+ * NEW with nkululek: [Ensemble learning](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/)
72
+ * NEW: [Finetune transformer-models](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
72
73
  * The latest features can be seen in [the ini-file](./ini_file.md) options that are used to control Nkululeko
73
74
  * Below is a [Hello World example](#helloworld) that should set you up fastly, also on [Google Colab](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
74
75
  * [Here's a blog post on how to set up nkululeko on your computer.](http://blog.syntheticspeech.de/2021/08/30/how-to-set-up-your-first-nkululeko-project/)
@@ -203,7 +204,7 @@ All of them take *--config <my_config.ini>* as an argument.
203
204
  * **nkululeko.nkululeko**: do machine learning experiments combining features and learners
204
205
  * **nkululeko.ensemble**: [combine several nkululeko experiments](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/) and report on late fusion results
205
206
  * *configurations*: which experiments to combine
206
- * *--method* (optional): mean, max, sum, max_class, uncertainty_threshold, uncertainty_weighted, confidence_weighted
207
+ * *--method* (optional): majority_voting, mean, max, sum, max_class, uncertainty_threshold, uncertainty_weighted, confidence_weighted
207
208
  * *--threshold*: uncertainty threshold (1.0 means no threshold)
208
209
  * *--outfile* (optional): name of CSV file for output
209
210
  * *--no_labels* (optional): indicate that no ground truth is given
@@ -359,6 +360,16 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
359
360
  Changelog
360
361
  =========
361
362
 
363
+ Version 0.88.6
364
+ --------------
365
+ * added test speaker assign
366
+
367
+ Version 0.88.5
368
+ --------------
369
+ * add a unique name to the uncertainty plot
370
+ * fix error in speaker embedding (still need speechbrain < 1.0)
371
+ * add get_target_name function in util
372
+
362
373
  Version 0.88.4
363
374
  --------------
364
375
  * added more ensemble methods, e.g. based on uncertainty
@@ -2,7 +2,7 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=iiVolfJ9RJn2fD9QaaoFnxuLzxJos6Q4H3tzHQGLfp4,39
5
+ nkululeko/constants.py,sha256=HFKr4pZomwthK3M6yBJLjNzKCEuB1PvMeUwKrHm2cL8,39
6
6
  nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
8
  nkululeko/demo_predictor.py,sha256=zs1bjhpnKuNCPLJeiyDm19ME1NEDOQT3QNeyVKJq9Yc,4882
@@ -46,10 +46,10 @@ nkululeko/autopredict/ap_stoi.py,sha256=It0Lk-ki-gohA2AzD8nkLAN2WahYvD9rPDGTQuvd
46
46
  nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzWeWW4VM,1024
47
47
  nkululeko/autopredict/estimate_snr.py,sha256=S-bpS0xFkwWc4Ch75UrjbS8y538lQ0U3g_iLRFXureY,5048
48
48
  nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- nkululeko/data/dataset.py,sha256=hUD0NqWCfRaSHG8JNs1MsPb0zjUZAf8FJkg_c0ebq0Q,28046
49
+ nkululeko/data/dataset.py,sha256=o2xuluErZg0I8qkR0YtMu2UdewdcgSdRUvUhRXDMwuI,28940
50
50
  nkululeko/data/dataset_csv.py,sha256=UGEpi__eT2KFS6Fop6N4HkMrzO-u5VP71gt44kwZavo,4588
51
51
  nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
- nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
52
+ nkululeko/feat_extract/feats_agender.py,sha256=sHyvxxlWXv1QGYXHGHIYEQK7X39eifSVie0tu-zBG3M,3189
53
53
  nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
54
54
  nkululeko/feat_extract/feats_analyser.py,sha256=Y9hMpZ9WsQOrxTP3B1diHnzMeOgwbVpVFWVlIyhHMJs,12722
55
55
  nkululeko/feat_extract/feats_ast.py,sha256=ycJn5eSVOxcEpmeHVk0FPB8q5XiTC8VSKz61L9n0Wa4,4638
@@ -65,7 +65,7 @@ nkululeko/feat_extract/feats_oxbow.py,sha256=CmIG9cbHTJTJVnzgCPdQpYpnlewWExpsr5Z
65
65
  nkululeko/feat_extract/feats_praat.py,sha256=kZrS6srzH7WoWEd2prp1Dxw6g9JklFQGTNq5zzPpHzg,3105
66
66
  nkululeko/feat_extract/feats_snr.py,sha256=9dqZ-4RpK98iJEssM3ttozNd18LWlZYM_QVXvp5xDcs,2829
67
67
  nkululeko/feat_extract/feats_spectra.py,sha256=5Pex8awIQC3cjQRHSu4NQFmg4quamG0RL3V3Yd0pJHs,3670
68
- nkululeko/feat_extract/feats_spkrec.py,sha256=VK4ma3uWzM0YZStsgRTirfkbzjWIfRWSgsYI038QlRY,4803
68
+ nkululeko/feat_extract/feats_spkrec.py,sha256=FugR-X2lDFKLLRRhKnhUYJhz-VIktIj8iVEDLbwNwtw,4814
69
69
  nkululeko/feat_extract/feats_squim.py,sha256=Y31YmDmscuG0YozvxyBZIutO3id8t7IZJWCfKucw-6M,4617
70
70
  nkululeko/feat_extract/feats_trill.py,sha256=K2ahhdpwpjgg3WZS1POg3UMP2U44i8cLZZvn5Rq7fUI,3228
71
71
  nkululeko/feat_extract/feats_wav2vec2.py,sha256=XyxD4NcrF4VFWSeHkXCKWdEOdr8VMzgVUz8N4mwhdyo,5248
@@ -98,7 +98,7 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
98
98
  nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
99
99
  nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
100
100
  nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
101
- nkululeko/reporting/reporter.py,sha256=vV6SAHWSIvybFvXBGapHjPmWWhKxIsIWuVO-uY9RHzQ,19219
101
+ nkululeko/reporting/reporter.py,sha256=Gt8tEiDQ9rbsYAKb-EbKGVaogPWKKoAEkwtVeiFoTSA,19119
102
102
  nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
103
103
  nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
104
  nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
@@ -106,9 +106,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
106
106
  nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
107
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
108
108
  nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
109
- nkululeko/utils/util.py,sha256=nK108-v6UubFj2kjJo38flS2yTTeUZyu3gNBGyhaR1c,16512
110
- nkululeko-0.88.4.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
111
- nkululeko-0.88.4.dist-info/METADATA,sha256=WHQrQU39sA1MuTnFTF6Fs47wWfVAtcQTQ4Tga_i5gB0,39583
112
- nkululeko-0.88.4.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
113
- nkululeko-0.88.4.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
114
- nkululeko-0.88.4.dist-info/RECORD,,
109
+ nkululeko/utils/util.py,sha256=y-pdrjovT8yGtBTJ3ifIpTcF0fPnoz8UKbuLIZ0efpc,16768
110
+ nkululeko-0.88.6.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
111
+ nkululeko-0.88.6.dist-info/METADATA,sha256=7UE8yEbdfJo_SU4xeE3gLlaLkfwC2NEg7w1nou8eGLQ,39955
112
+ nkululeko-0.88.6.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
113
+ nkululeko-0.88.6.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
114
+ nkululeko-0.88.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.2.0)
2
+ Generator: setuptools (71.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5