nkululeko 0.89.1__tar.gz → 0.90.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. {nkululeko-0.89.1 → nkululeko-0.90.0}/CHANGELOG.md +10 -0
  2. {nkululeko-0.89.1/nkululeko.egg-info → nkululeko-0.90.0}/PKG-INFO +11 -1
  3. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/augment.py +2 -2
  4. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/augmenting/randomsplicer.py +6 -3
  5. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/constants.py +1 -1
  6. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/data/dataset.py +8 -2
  7. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/ensemble.py +8 -4
  8. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/experiment.py +8 -2
  9. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/explore.py +3 -1
  10. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_analyser.py +17 -3
  11. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/modelrunner.py +2 -2
  12. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/plots.py +1 -1
  13. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/reporting/reporter.py +4 -3
  14. {nkululeko-0.89.1 → nkululeko-0.90.0/nkululeko.egg-info}/PKG-INFO +11 -1
  15. {nkululeko-0.89.1 → nkululeko-0.90.0}/LICENSE +0 -0
  16. {nkululeko-0.89.1 → nkululeko-0.90.0}/README.md +0 -0
  17. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/aesdd/process_database.py +0 -0
  18. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/androids/process_database.py +0 -0
  19. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/ased/process_database.py +0 -0
  20. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/asvp-esd/process_database.py +0 -0
  21. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/baved/process_database.py +0 -0
  22. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/cafe/process_database.py +0 -0
  23. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/clac/process_database.py +0 -0
  24. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/cmu-mosei/process_database.py +0 -0
  25. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/demos/process_database.py +0 -0
  26. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/ekorpus/process_database.py +0 -0
  27. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/emns/process_database.py +0 -0
  28. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/emofilm/convert_to_16k.py +0 -0
  29. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/emofilm/process_database.py +0 -0
  30. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/emorynlp/process_database.py +0 -0
  31. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/emov-db/process_database.py +0 -0
  32. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/emovo/process_database.py +0 -0
  33. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/emozionalmente/create.py +0 -0
  34. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/enterface/process_database.py +0 -0
  35. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/esd/process_database.py +0 -0
  36. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/gerparas/process_database.py +0 -0
  37. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/iemocap/process_database.py +0 -0
  38. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/jl/process_database.py +0 -0
  39. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/jtes/process_database.py +0 -0
  40. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/meld/process_database.py +0 -0
  41. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/mesd/process_database.py +0 -0
  42. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/mess/process_database.py +0 -0
  43. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/mlendsnd/process_database.py +0 -0
  44. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/msp-improv/process_database2.py +0 -0
  45. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/msp-podcast/process_database.py +0 -0
  46. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/oreau2/process_database.py +0 -0
  47. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/portuguese/process_database.py +0 -0
  48. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/ravdess/process_database.py +0 -0
  49. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/ravdess/process_database_speaker.py +0 -0
  50. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/savee/process_database.py +0 -0
  51. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/shemo/process_database.py +0 -0
  52. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/subesco/process_database.py +0 -0
  53. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/tess/process_database.py +0 -0
  54. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/thorsten-emotional/process_database.py +0 -0
  55. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/urdu/process_database.py +0 -0
  56. {nkululeko-0.89.1 → nkululeko-0.90.0}/data/vivae/process_database.py +0 -0
  57. {nkululeko-0.89.1 → nkululeko-0.90.0}/docs/source/conf.py +0 -0
  58. {nkululeko-0.89.1 → nkululeko-0.90.0}/meta/demos/demo_best_model.py +0 -0
  59. {nkululeko-0.89.1 → nkululeko-0.90.0}/meta/demos/my_experiment.py +0 -0
  60. {nkululeko-0.89.1 → nkululeko-0.90.0}/meta/demos/my_experiment_local.py +0 -0
  61. {nkululeko-0.89.1 → nkululeko-0.90.0}/meta/demos/plot_faster_anim.py +0 -0
  62. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/__init__.py +0 -0
  63. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/aug_train.py +0 -0
  64. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/augmenting/__init__.py +0 -0
  65. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/augmenting/augmenter.py +0 -0
  66. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/augmenting/randomsplicing.py +0 -0
  67. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/augmenting/resampler.py +0 -0
  68. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/autopredict/__init__.py +0 -0
  69. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/autopredict/ap_age.py +0 -0
  70. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/autopredict/ap_arousal.py +0 -0
  71. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/autopredict/ap_dominance.py +0 -0
  72. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/autopredict/ap_gender.py +0 -0
  73. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/autopredict/ap_mos.py +0 -0
  74. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/autopredict/ap_pesq.py +0 -0
  75. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/autopredict/ap_sdr.py +0 -0
  76. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/autopredict/ap_snr.py +0 -0
  77. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/autopredict/ap_stoi.py +0 -0
  78. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/autopredict/ap_valence.py +0 -0
  79. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/autopredict/estimate_snr.py +0 -0
  80. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/cacheddataset.py +0 -0
  81. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/data/__init__.py +0 -0
  82. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/data/dataset_csv.py +0 -0
  83. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/demo.py +0 -0
  84. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/demo_feats.py +0 -0
  85. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/demo_predictor.py +0 -0
  86. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/export.py +0 -0
  87. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/__init__.py +0 -0
  88. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_agender.py +0 -0
  89. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
  90. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_ast.py +0 -0
  91. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_auddim.py +0 -0
  92. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_audmodel.py +0 -0
  93. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_clap.py +0 -0
  94. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_hubert.py +0 -0
  95. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_import.py +0 -0
  96. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_mld.py +0 -0
  97. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_mos.py +0 -0
  98. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_opensmile.py +0 -0
  99. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_oxbow.py +0 -0
  100. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_praat.py +0 -0
  101. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_snr.py +0 -0
  102. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_spectra.py +0 -0
  103. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_spkrec.py +0 -0
  104. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_squim.py +0 -0
  105. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_trill.py +0 -0
  106. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_wav2vec2.py +0 -0
  107. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_wavlm.py +0 -0
  108. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feats_whisper.py +0 -0
  109. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/featureset.py +0 -0
  110. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feat_extract/feinberg_praat.py +0 -0
  111. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/feature_extractor.py +0 -0
  112. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/file_checker.py +0 -0
  113. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/filter_data.py +0 -0
  114. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/glob_conf.py +0 -0
  115. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/losses/__init__.py +0 -0
  116. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/losses/loss_ccc.py +0 -0
  117. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/losses/loss_softf1loss.py +0 -0
  118. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/__init__.py +0 -0
  119. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model.py +0 -0
  120. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_bayes.py +0 -0
  121. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_cnn.py +0 -0
  122. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_gmm.py +0 -0
  123. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_knn.py +0 -0
  124. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_knn_reg.py +0 -0
  125. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_lin_reg.py +0 -0
  126. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_mlp.py +0 -0
  127. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_mlp_regression.py +0 -0
  128. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_svm.py +0 -0
  129. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_svr.py +0 -0
  130. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_tree.py +0 -0
  131. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_tree_reg.py +0 -0
  132. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_tuned.py +0 -0
  133. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_xgb.py +0 -0
  134. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/models/model_xgr.py +0 -0
  135. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/multidb.py +0 -0
  136. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/nkuluflag.py +0 -0
  137. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/nkululeko.py +0 -0
  138. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/predict.py +0 -0
  139. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/reporting/__init__.py +0 -0
  140. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/reporting/defines.py +0 -0
  141. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/reporting/latex_writer.py +0 -0
  142. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/reporting/report.py +0 -0
  143. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/reporting/report_item.py +0 -0
  144. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/reporting/result.py +0 -0
  145. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/resample.py +0 -0
  146. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/runmanager.py +0 -0
  147. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/scaler.py +0 -0
  148. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/segment.py +0 -0
  149. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/segmenting/__init__.py +0 -0
  150. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
  151. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/segmenting/seg_silero.py +0 -0
  152. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/syllable_nuclei.py +0 -0
  153. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/test.py +0 -0
  154. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/test_predictor.py +0 -0
  155. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/test_pretrain.py +0 -0
  156. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/utils/__init__.py +0 -0
  157. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/utils/files.py +0 -0
  158. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/utils/stats.py +0 -0
  159. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko/utils/util.py +0 -0
  160. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko.egg-info/SOURCES.txt +0 -0
  161. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko.egg-info/dependency_links.txt +0 -0
  162. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko.egg-info/requires.txt +0 -0
  163. {nkululeko-0.89.1 → nkululeko-0.90.0}/nkululeko.egg-info/top_level.txt +0 -0
  164. {nkululeko-0.89.1 → nkululeko-0.90.0}/pyproject.toml +0 -0
  165. {nkululeko-0.89.1 → nkululeko-0.90.0}/setup.cfg +0 -0
  166. {nkululeko-0.89.1 → nkululeko-0.90.0}/setup.py +0 -0
  167. {nkululeko-0.89.1 → nkululeko-0.90.0}/venv/bin/activate_this.py +0 -0
@@ -1,6 +1,16 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ Version 0.90.0
5
+ --------------
6
+ * augmentation can now be done without target
7
+ * random splicing params configurable
8
+ * made kde default for plot continous/categorical plots
9
+
10
+ Version 0.89.2
11
+ --------------
12
+ * fix shap value calculation
13
+
4
14
  Version 0.89.1
5
15
  --------------
6
16
  * print and save result of feature importance
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.89.1
3
+ Version: 0.90.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -356,6 +356,16 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
356
356
  Changelog
357
357
  =========
358
358
 
359
+ Version 0.90.0
360
+ --------------
361
+ * augmentation can now be done without target
362
+ * random splicing params configurable
363
+ * made kde default for plot continous/categorical plots
364
+
365
+ Version 0.89.2
366
+ --------------
367
+ * fix shap value calculation
368
+
359
369
  Version 0.89.1
360
370
  --------------
361
371
  * print and save result of feature importance
@@ -37,8 +37,8 @@ def doit(config_file):
37
37
 
38
38
  filename = util.config_val("AUGMENT", "result", "augmented.csv")
39
39
  filename = f"{expr.data_dir}/{filename}"
40
-
41
- if os.path.exists(filename):
40
+ no_reuse = eval(util.config_val("DATA", "no_reuse", "False"))
41
+ if os.path.exists(filename) and not no_reuse:
42
42
  util.debug("files already augmented")
43
43
  else:
44
44
  # load the data
@@ -41,14 +41,17 @@ class Randomsplicer:
41
41
  * top_db: top db level for silence to be recognized (default: 12)
42
42
  """
43
43
 
44
- p_reverse = 0.3
45
- top_db = 12
44
+ p_reverse = float(self.util.config_val("AUGMENT", "p_reverse", "0.3"))
45
+ top_db = float(self.util.config_val("AUGMENT", "top_db", "12"))
46
46
 
47
47
  files = self.df.index.get_level_values(0).values
48
48
  store = self.util.get_path("store")
49
49
  filepath = f"{store}randomspliced/"
50
50
  audeer.mkdir(filepath)
51
- self.util.debug(f"random splicing {sample_selection} samples to {filepath}")
51
+ self.util.debug(
52
+ f"random splicing {sample_selection} samples to {filepath}, "
53
+ + f"p_reverse = {p_reverse}, top_db = {top_db}",
54
+ )
52
55
  newpath = ""
53
56
  index_map = {}
54
57
  for i, f in enumerate(tqdm(files)):
@@ -1,2 +1,2 @@
1
- VERSION="0.89.1"
1
+ VERSION="0.90.0"
2
2
  SAMPLING_RATE = 16000
@@ -30,8 +30,8 @@ class Dataset:
30
30
  def __init__(self, name):
31
31
  """Constructor setting up name and configuration"""
32
32
  self.name = name
33
- self.target = glob_conf.config["DATA"]["target"]
34
33
  self.util = Util("dataset")
34
+ self.target = self.util.config_val("DATA", "target", "none")
35
35
  self.plot = Plots()
36
36
  self.limit = int(self.util.config_val_data(self.name, "limit", 0))
37
37
  self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
@@ -127,6 +127,9 @@ class Dataset:
127
127
  self.got_gender,
128
128
  self.got_age,
129
129
  ) = self._get_df_for_lists(self.db, df_files_tables)
130
+ if df.shape[0] > 0 and self.target == "none":
131
+ self.df = df
132
+ return
130
133
  if False in {
131
134
  self.is_labeled,
132
135
  self.got_speaker,
@@ -553,7 +556,10 @@ class Dataset:
553
556
  " samples in train/test"
554
557
  )
555
558
  # because this generates new train/test sample quantaties, the feature extraction has to be done again
556
- glob_conf.config["FEATS"]["needs_feature_extraction"] = "True"
559
+ try:
560
+ glob_conf.config["FEATS"]["needs_feature_extraction"] = "True"
561
+ except KeyError:
562
+ pass
557
563
 
558
564
  def random_split(self):
559
565
  """One way to split train and eval sets: Specify percentage of random samples"""
@@ -18,6 +18,7 @@ Raises:
18
18
  #!/usr/bin/env python
19
19
  # -*- coding: utf-8 -*-
20
20
 
21
+
21
22
  from typing import List
22
23
  import configparser
23
24
  import time
@@ -26,10 +27,15 @@ from pathlib import Path
26
27
 
27
28
  import numpy as np
28
29
  import pandas as pd
30
+ import matplotlib.pyplot as plt
31
+
29
32
  from sklearn.metrics import(
33
+ RocCurveDisplay,
30
34
  balanced_accuracy_score,
31
35
  classification_report,
32
- f1_score
36
+ auc,
37
+ roc_auc_score,
38
+ roc_curve
33
39
  )
34
40
 
35
41
  from nkululeko.constants import VERSION
@@ -289,9 +295,7 @@ def ensemble_predictions(
289
295
  uar = balanced_accuracy_score(truth, predicted)
290
296
  acc = (truth == predicted).mean()
291
297
  # print classification report
292
- Util("ensemble").debug(f"\n {classification_report(truth, predicted)}")
293
- # f1 = f1_score(truth, predicted, pos_label='p')
294
- # Util("ensemble").debug(f"F1: {f1:.3f}")
298
+ Util("ensemble").debug(f"\n {classification_report(truth, predicted, digits=4)}")
295
299
  Util("ensemble").debug(f"{method}: UAR: {uar:.3f}, ACC: {acc:.3f}")
296
300
 
297
301
  return ensemble_preds
@@ -101,12 +101,15 @@ class Experiment:
101
101
  if data.got_speaker:
102
102
  self.got_speaker = True
103
103
  self.datasets.update({d: data})
104
- self.target = self.util.config_val("DATA", "target", "emotion")
104
+ self.target = self.util.config_val("DATA", "target", "none")
105
105
  glob_conf.set_target(self.target)
106
106
  # print target via debug
107
107
  self.util.debug(f"target: {self.target}")
108
108
  # print keys/column
109
109
  dbs = ",".join(list(self.datasets.keys()))
110
+ if self.target == "none":
111
+ self.util.debug(f"loaded databases {dbs}")
112
+ return
110
113
  labels = self.util.config_val("DATA", "labels", False)
111
114
  auto_labels = list(next(iter(self.datasets.values())).df[self.target].unique())
112
115
  if labels:
@@ -191,7 +194,8 @@ class Experiment:
191
194
  self.df_train, self.df_test = pd.DataFrame(), pd.DataFrame()
192
195
  for d in self.datasets.values():
193
196
  d.split()
194
- d.prepare_labels()
197
+ if self.target != "none":
198
+ d.prepare_labels()
195
199
  if d.df_train.shape[0] == 0:
196
200
  self.util.debug(f"warn: {d.name} train empty")
197
201
  self.df_train = pd.concat([self.df_train, d.df_train])
@@ -207,6 +211,8 @@ class Experiment:
207
211
  self.df_test.to_csv(storage_test)
208
212
  self.df_train.to_csv(storage_train)
209
213
 
214
+ if self.target == "none":
215
+ return
210
216
  self.util.copy_flags(self, self.df_test)
211
217
  self.util.copy_flags(self, self.df_train)
212
218
  # Try data checks
@@ -91,7 +91,9 @@ def main(src_dir):
91
91
  # these investigations need features to explore
92
92
  expr.extract_feats()
93
93
  needs_feats = True
94
- # explore
94
+ # explore
95
+ expr.init_runmanager()
96
+ expr.runmgr.do_runs()
95
97
  expr.analyse_features(needs_feats)
96
98
  expr.store_report()
97
99
  print("DONE")
@@ -50,19 +50,32 @@ class FeatureAnalyser:
50
50
 
51
51
  name = "my_shap_values"
52
52
  if not self.util.exist_pickle(name):
53
-
53
+ # get model name
54
+ model_name = self.util.get_model_type()
55
+ if hasattr(model, "predict_shap"):
56
+ model_func = model.predict_shap
57
+ elif hasattr(model, "clf"):
58
+ model_func = model.clf.predict
59
+ else:
60
+ raise Exception("Model not supported for SHAP analysis")
61
+
62
+ self.util.debug(f"using SHAP explainer for {model_name} model")
63
+
54
64
  explainer = shap.Explainer(
55
- model.predict_shap,
65
+ model_func,
56
66
  self.features,
57
67
  output_names=glob_conf.labels,
58
68
  algorithm="permutation",
59
69
  npermutations=5,
60
70
  )
71
+
61
72
  self.util.debug("computing SHAP values...")
62
73
  shap_values = explainer(self.features)
63
74
  self.util.to_pickle(shap_values, name)
64
75
  else:
65
76
  shap_values = self.util.from_pickle(name)
77
+ # plt.figure()
78
+ plt.close('all')
66
79
  plt.tight_layout()
67
80
  shap.plots.bar(shap_values)
68
81
  fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
@@ -71,7 +84,8 @@ class FeatureAnalyser:
71
84
  filename = f"_SHAP_{model.name}"
72
85
  filename = f"{fig_dir}{exp_name}{filename}.{format}"
73
86
  plt.savefig(filename)
74
- self.util.debug(f"plotted SHAP feature importance tp {filename}")
87
+ plt.close()
88
+ self.util.debug(f"plotted SHAP feature importance to {filename}")
75
89
 
76
90
  def analyse(self):
77
91
  models = ast.literal_eval(self.util.config_val("EXPL", "model", "['log_reg']"))
@@ -53,8 +53,8 @@ class Modelrunner:
53
53
  # epochs are handled by Huggingface API
54
54
  self.model.train()
55
55
  report = self.model.predict()
56
- # todo: findout the best epoch, no need
57
- # since oad_best_model_at_end is given in training args
56
+ # todo: findout the best epoch -> no need
57
+ # since load_best_model_at_end is given in training args
58
58
  epoch = epoch_num
59
59
  report.set_id(self.run, epoch)
60
60
  plot_name = self.util.get_plot_name() + f"_{self.run}_{epoch:03d}_cnf"
@@ -263,7 +263,7 @@ class Plots:
263
263
 
264
264
  def plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
265
265
  """Plot relation of categorical distribution with continuous."""
266
- dist_type = self.util.config_val("EXPL", "dist_type", "hist")
266
+ dist_type = self.util.config_val("EXPL", "dist_type", "kde")
267
267
  cats, cat_str, es = su.get_effect_size(df, cat_col, cont_col)
268
268
  model_type = self.util.get_model_type()
269
269
  if dist_type == "hist" and model_type != "tree":
@@ -402,7 +402,7 @@ class Reporter:
402
402
  )
403
403
  # print classifcation report in console
404
404
  self.util.debug(
405
- f"\n {classification_report(self.truths, self.preds, target_names=labels)}"
405
+ f"\n {classification_report(self.truths, self.preds, target_names=labels, digits=4)}"
406
406
  )
407
407
  except ValueError as e:
408
408
  self.util.debug(
@@ -422,16 +422,17 @@ class Reporter:
422
422
  if len(np.unique(self.truths)) == 2:
423
423
  fpr, tpr, _ = roc_curve(self.truths, self.preds)
424
424
  auc_score = auc(fpr, tpr)
425
+ plot_path = f"{fig_dir}{self.util.get_exp_name()}_{epoch}{self.filenameadd}_roc.{self.format}"
426
+ plt.figure()
425
427
  display = RocCurveDisplay(
426
428
  fpr=fpr,
427
429
  tpr=tpr,
428
430
  roc_auc=auc_score,
429
431
  estimator_name=f"{self.model_type} estimator",
430
432
  )
431
- # save plot
432
- plot_path = f"{fig_dir}{self.util.get_exp_name()}_{epoch}{self.filenameadd}_roc.{self.format}"
433
433
  display.plot(ax=None)
434
434
  plt.savefig(plot_path)
435
+ plt.close()
435
436
  self.util.debug(f"Saved ROC curve to {plot_path}")
436
437
  pauc_score = roc_auc_score(self.truths, self.preds, max_fpr=0.1)
437
438
  auc_pauc = f"auc: {auc_score:.3f}, pauc: {pauc_score:.3f} from epoch: {epoch}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.89.1
3
+ Version: 0.90.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -356,6 +356,16 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
356
356
  Changelog
357
357
  =========
358
358
 
359
+ Version 0.90.0
360
+ --------------
361
+ * augmentation can now be done without target
362
+ * random splicing params configurable
363
+ * made kde default for plot continous/categorical plots
364
+
365
+ Version 0.89.2
366
+ --------------
367
+ * fix shap value calculation
368
+
359
369
  Version 0.89.1
360
370
  --------------
361
371
  * print and save result of feature importance
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes