nkululeko 0.88.12__py3-none-any.whl → 0.89.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.88.12"
1
+ VERSION="0.89.1"
2
2
  SAMPLING_RATE = 16000
nkululeko/ensemble.py CHANGED
@@ -26,7 +26,11 @@ from pathlib import Path
26
26
 
27
27
  import numpy as np
28
28
  import pandas as pd
29
- from sklearn.metrics import balanced_accuracy_score
29
+ from sklearn.metrics import(
30
+ balanced_accuracy_score,
31
+ classification_report,
32
+ f1_score
33
+ )
30
34
 
31
35
  from nkululeko.constants import VERSION
32
36
  from nkululeko.experiment import Experiment
@@ -284,6 +288,10 @@ def ensemble_predictions(
284
288
  predicted = ensemble_preds["predicted"]
285
289
  uar = balanced_accuracy_score(truth, predicted)
286
290
  acc = (truth == predicted).mean()
291
+ # print classification report
292
+ Util("ensemble").debug(f"\n {classification_report(truth, predicted)}")
293
+ # f1 = f1_score(truth, predicted, pos_label='p')
294
+ # Util("ensemble").debug(f"F1: {f1:.3f}")
287
295
  Util("ensemble").debug(f"{method}: UAR: {uar:.3f}, ACC: {acc:.3f}")
288
296
 
289
297
  return ensemble_preds
@@ -139,7 +139,7 @@ class FeatureAnalyser:
139
139
  elif model_s == "svm":
140
140
  from sklearn.svm import SVC
141
141
 
142
- c = float(self.util.config_val("MODEL", "C_val", "0.001"))
142
+ c = float(self.util.config_val("MODEL", "C_val", "1.0"))
143
143
  model = SVC(kernel="linear", C=c, gamma="scale")
144
144
  result_importances[model_s] = self._get_importance(
145
145
  model, permutation
@@ -205,7 +205,7 @@ class FeatureAnalyser:
205
205
  model, permutation
206
206
  )
207
207
  elif model_s == "xgr":
208
- from xgboost import XGBClassifier
208
+ from xgboost import XGBRegressor
209
209
 
210
210
  model = XGBRegressor()
211
211
  result_importances[model_s] = self._get_importance(
@@ -270,12 +270,14 @@ class FeatureAnalyser:
270
270
  )
271
271
  )
272
272
 
273
+ # print feature importance values to file and debug and save to result
274
+ self.util.debug(f"Importance features from {model_name}: features = \n{df_imp['feats'].values.tolist()}")
273
275
  # result file
274
276
  res_dir = self.util.get_path("res_dir")
275
277
  filename = f"_EXPL_{model_name}"
276
278
  if permutation:
277
279
  filename += "_perm"
278
- filename = f"{res_dir}{self.util.get_exp_name(only_data=True)}{filename}_{model_name}.txt"
280
+ filename = f"{res_dir}{self.util.get_exp_name(only_data=True)}{filename}_{max_feat_num}_fi.txt"
279
281
  with open(filename, "w") as text_file:
280
282
  text_file.write(
281
283
  "features in order of decreasing importance according to model"
@@ -283,7 +285,8 @@ class FeatureAnalyser:
283
285
  )
284
286
 
285
287
  df_imp.to_csv(filename, mode="a")
286
-
288
+ self.util.debug(f"Saved feature importance values to {filename}")
289
+
287
290
  # check if feature distributions should be plotted
288
291
  plot_feats = self.util.config_val("EXPL", "feature_distributions", False)
289
292
  if plot_feats:
@@ -12,4 +12,6 @@ class Tree_model(Model):
12
12
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
13
  super().__init__(df_train, df_test, feats_train, feats_test)
14
14
  self.name = "tree"
15
- self.clf = DecisionTreeClassifier() # set up the classifier
15
+ self.clf = DecisionTreeClassifier(
16
+ random_state=42
17
+ ) # set up the classifier
@@ -27,6 +27,7 @@ from sklearn.metrics import (
27
27
  r2_score,
28
28
  roc_auc_score,
29
29
  roc_curve,
30
+ RocCurveDisplay,
30
31
  )
31
32
 
32
33
  import nkululeko.glob_conf as glob_conf
@@ -75,6 +76,7 @@ class Reporter:
75
76
  self.result = Result(0, 0, 0, 0, "unknown")
76
77
  self.run = run
77
78
  self.epoch = epoch
79
+ self.model_type = self.util.get_model_type()
78
80
  self._set_metric()
79
81
  self.filenameadd = ""
80
82
  self.cont_to_cat = False
@@ -387,6 +389,7 @@ class Reporter:
387
389
  epoch = self.epoch
388
390
  """Print all evaluation values to text file."""
389
391
  res_dir = self.util.get_path("res_dir")
392
+ fig_dir = self.util.get_path("fig_dir")
390
393
  file_name = f"{res_dir}{self.util.get_exp_name()}_{epoch}{self.filenameadd}.txt"
391
394
  if self.util.exp_is_classification():
392
395
  labels = glob_conf.labels
@@ -397,6 +400,10 @@ class Reporter:
397
400
  target_names=labels,
398
401
  output_dict=True,
399
402
  )
403
+ # print classifcation report in console
404
+ self.util.debug(
405
+ f"\n {classification_report(self.truths, self.preds, target_names=labels)}"
406
+ )
400
407
  except ValueError as e:
401
408
  self.util.debug(
402
409
  "Reporter: caught a ValueError when trying to get"
@@ -415,6 +422,17 @@ class Reporter:
415
422
  if len(np.unique(self.truths)) == 2:
416
423
  fpr, tpr, _ = roc_curve(self.truths, self.preds)
417
424
  auc_score = auc(fpr, tpr)
425
+ display = RocCurveDisplay(
426
+ fpr=fpr,
427
+ tpr=tpr,
428
+ roc_auc=auc_score,
429
+ estimator_name=f"{self.model_type} estimator",
430
+ )
431
+ # save plot
432
+ plot_path = f"{fig_dir}{self.util.get_exp_name()}_{epoch}{self.filenameadd}_roc.{self.format}"
433
+ display.plot(ax=None)
434
+ plt.savefig(plot_path)
435
+ self.util.debug(f"Saved ROC curve to {plot_path}")
418
436
  pauc_score = roc_auc_score(self.truths, self.preds, max_fpr=0.1)
419
437
  auc_pauc = f"auc: {auc_score:.3f}, pauc: {pauc_score:.3f} from epoch: {epoch}"
420
438
  self.util.debug(auc_pauc)
nkululeko/utils/util.py CHANGED
@@ -175,7 +175,7 @@ class Util:
175
175
  """Get the experiment directory."""
176
176
  root = os.path.join(self.config["EXP"]["root"], "")
177
177
  name = self.config["EXP"]["name"]
178
- dir_name = f"{root}{name}"
178
+ dir_name = f"{root}/{name}"
179
179
  audeer.mkdir(dir_name)
180
180
  return dir_name
181
181
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.88.12
3
+ Version: 0.89.1
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -204,7 +204,7 @@ All of them take *--config <my_config.ini>* as an argument.
204
204
  * *configurations*: which experiments to combine
205
205
  * *--method* (optional): majority_voting, mean (default), max, sum, uncertainty, uncertainty_weighted, confidence_weighted, performance_weighted
206
206
  * *--threshold*: uncertainty threshold (1.0 means no threshold)
207
- * *--weightes*: weights for performance_weighted method (could be from previous UAR, ACC)
207
+ * *--weights*: weights for performance_weighted method (could be from previous UAR, ACC)
208
208
  * *--outfile* (optional): name of CSV file for output (default: ensemble_result.csv)
209
209
  * *--no_labels* (optional): indicate that no ground truth is given
210
210
  * **nkululeko.multidb**: do [multiple experiments](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/), comparing several databases cross and in itself
@@ -220,14 +220,11 @@ All of them take *--config <my_config.ini>* as an argument.
220
220
  * **nkululeko.predict**: [predict features](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/) like SNR, MOS, arousal/valence, age/gender, with DNN models
221
221
  * **nkululeko.segment**: [segment a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/) based on VAD (voice activity detection)
222
222
  * **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz
223
- * **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command-line.
224
- * usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
225
- [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
226
-
227
-
228
-
229
-
223
+ * **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command-line. Usage:
230
224
 
225
+ ```bash
226
+ $ python -m nkululeko.nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET] [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
227
+ ```
231
228
 
232
229
  There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
233
230
  * [Introduction](http://blog.syntheticspeech.de/2021/08/04/machine-learning-experiment-framework/)
@@ -359,8 +356,17 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
359
356
  Changelog
360
357
  =========
361
358
 
362
- Version 0.88.12
359
+ Version 0.89.1
363
360
  --------------
361
+ * print and save result of feature importance
362
+
363
+ Version 0.89.0
364
+ --------------
365
+ * added Roc plots and classification report on Debug
366
+
367
+
368
+ Version 0.88.12
369
+ ---------------
364
370
  * added n_jobs for sklearn processing
365
371
  * re_named num_workers n_jobs
366
372
 
@@ -833,9 +839,9 @@ Version 0.66.3
833
839
 
834
840
  Version 0.66.2
835
841
  --------------
836
- * enabled data-pathes with quotes
842
+ * enabled data-pacthes with quotes
837
843
  * enabled missing category labels
838
- * used tgdm for progress display
844
+ * used tqdm for progress display
839
845
 
840
846
  Version 0.66.1
841
847
  --------------
@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=7-twiCGYx2fTsGKDZex9QsUXRhaudHMRFwoV8HehRcg,40
5
+ nkululeko/constants.py,sha256=nRA0bWrvi-5tXm8QWv4dzDE-3sujMiz26U4QgSVuck0,39
6
6
  nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
8
  nkululeko/demo_predictor.py,sha256=zs1bjhpnKuNCPLJeiyDm19ME1NEDOQT3QNeyVKJq9Yc,4882
9
- nkululeko/ensemble.py,sha256=cVz8hWd2m7poyS0lTIfrsha0K8U-hd6eiBWMqDOAlt8,12669
9
+ nkululeko/ensemble.py,sha256=egtOFxEp7gjuM5cKBfETnhTn1-7_4zWBPEah65K1C3U,12927
10
10
  nkululeko/experiment.py,sha256=L4PzoScPLG2xTyniVy9evcBy_8CIe3RTeTEUVTqiuvQ,31186
11
11
  nkululeko/explore.py,sha256=_GOgcRaPvh2xBbKPAkSJjYzgHhD_xb3ZCB6M1MPA6ao,3867
12
12
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
@@ -51,7 +51,7 @@ nkululeko/data/dataset_csv.py,sha256=UGEpi__eT2KFS6Fop6N4HkMrzO-u5VP71gt44kwZavo
51
51
  nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  nkululeko/feat_extract/feats_agender.py,sha256=tMK3_qs8adylNNSR0CS1RjU9RxmpumLqmuyzmc2ZYjA,3184
53
53
  nkululeko/feat_extract/feats_agender_agender.py,sha256=19NoRT0KJ8WoZ3EabTYexXymD7bDy58-H20jYmdqjD0,3498
54
- nkululeko/feat_extract/feats_analyser.py,sha256=Y9hMpZ9WsQOrxTP3B1diHnzMeOgwbVpVFWVlIyhHMJs,12722
54
+ nkululeko/feat_extract/feats_analyser.py,sha256=eW0v7Boybfj2gXi77MPjaLyHUQ1C42mx9hgoQeDwNac,12999
55
55
  nkululeko/feat_extract/feats_ast.py,sha256=ycJn5eSVOxcEpmeHVk0FPB8q5XiTC8VSKz61L9n0Wa4,4638
56
56
  nkululeko/feat_extract/feats_auddim.py,sha256=ulP_o4SGeQDFTs8YYCGKgccARAo6-wcjPK6-hhGjmn8,3155
57
57
  nkululeko/feat_extract/feats_audmodel.py,sha256=aRGTBDKdYaTT_9xDaFZqpuyPhzxSNN_3b1PJDUHtYW4,3180
@@ -88,7 +88,7 @@ nkululeko/models/model_mlp.py,sha256=CaR0PCRBcdCo_hhC5r9Q6IbVIApvtoRVrUdZsgzbx1M
88
88
  nkululeko/models/model_mlp_regression.py,sha256=YMHMWRlWL6iL8HdYe6rTAoAW6GwHBx3PDvysCZYj5tQ,10186
89
89
  nkululeko/models/model_svm.py,sha256=AzWksBRbIdpUuMbDnAh_YAXebewR5POj9AkB9VC40pI,1010
90
90
  nkululeko/models/model_svr.py,sha256=_YZeksqB3eBENGlg3g9RwYFlk9rQQ-XCeNBKLlGGVoE,725
91
- nkululeko/models/model_tree.py,sha256=rf16faUm4o2LJgkoYpeY998b8DQIvXZ73_m1IS3TnnE,417
91
+ nkululeko/models/model_tree.py,sha256=KScDTGgkOePTZEcereB7bxQ47wIKhYI-xhTKCU4cKDk,454
92
92
  nkululeko/models/model_tree_reg.py,sha256=IgQcPTE-304HQLYSKPF8Z4ot_Ur9dH01fZjS0nXke_M,428
93
93
  nkululeko/models/model_tuned.py,sha256=vmNBkqvEH-4nnhY1REXDA9kA4vpZJzeRmGJFq7E3bLM,21340
94
94
  nkululeko/models/model_xgb.py,sha256=Thgx5ESdIok4v72mKh4plxpo4smGcKALWNCJTDScY0M,447
@@ -98,7 +98,7 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
98
98
  nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
99
99
  nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
100
100
  nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
101
- nkululeko/reporting/reporter.py,sha256=Gt8tEiDQ9rbsYAKb-EbKGVaogPWKKoAEkwtVeiFoTSA,19119
101
+ nkululeko/reporting/reporter.py,sha256=xFyGj6gQ8T1WB3w3tJ0awlgQcq1e3IKXEIfl_DvOngg,19996
102
102
  nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
103
103
  nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
104
  nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
@@ -106,9 +106,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
106
106
  nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
107
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
108
108
  nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
109
- nkululeko/utils/util.py,sha256=CY45KIhpYsfYa9qXyqOYErjhS2fjYfaVzRTfOZZKzaQ,16709
110
- nkululeko-0.88.12.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
111
- nkululeko-0.88.12.dist-info/METADATA,sha256=46Vj4H4faGcNqc-dMMpyDo_BB9q6yczjYaPpKtsXgNc,40494
112
- nkululeko-0.88.12.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
113
- nkululeko-0.88.12.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
114
- nkululeko-0.88.12.dist-info/RECORD,,
109
+ nkululeko/utils/util.py,sha256=363Lgmcg6fPKCGbroX0DDyW_zcYNx-Ayqv67qdpfYcw,16710
110
+ nkululeko-0.89.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
111
+ nkululeko-0.89.1.dist-info/METADATA,sha256=AuVssWNRMXlseH5xSzcls--AAYLFSeEbFtHbAFT2o_o,40667
112
+ nkululeko-0.89.1.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
113
+ nkululeko-0.89.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
114
+ nkululeko-0.89.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.1.0)
2
+ Generator: setuptools (74.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5