nkululeko 0.86.8__tar.gz → 0.87.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. {nkululeko-0.86.8 → nkululeko-0.87.0}/CHANGELOG.md +4 -0
  2. {nkululeko-0.86.8/nkululeko.egg-info → nkululeko-0.87.0}/PKG-INFO +13 -1
  3. {nkululeko-0.86.8 → nkululeko-0.87.0}/README.md +8 -0
  4. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/constants.py +1 -1
  5. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/data/dataset_csv.py +12 -14
  6. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/demo.py +4 -8
  7. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/modelrunner.py +5 -5
  8. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model.py +23 -3
  9. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_cnn.py +41 -22
  10. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_mlp.py +37 -17
  11. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_mlp_regression.py +3 -1
  12. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/plots.py +25 -37
  13. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/reporter.py +69 -6
  14. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/runmanager.py +8 -11
  15. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/test_predictor.py +1 -6
  16. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/utils/stats.py +11 -7
  17. {nkululeko-0.86.8 → nkululeko-0.87.0/nkululeko.egg-info}/PKG-INFO +13 -1
  18. {nkululeko-0.86.8 → nkululeko-0.87.0}/LICENSE +0 -0
  19. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/aesdd/process_database.py +0 -0
  20. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/androids/process_database.py +0 -0
  21. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/androids_orig/process_database.py +0 -0
  22. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/androids_test/process_database.py +0 -0
  23. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/ased/process_database.py +0 -0
  24. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/asvp-esd/process_database.py +0 -0
  25. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/baved/process_database.py +0 -0
  26. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/cafe/process_database.py +0 -0
  27. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/clac/process_database.py +0 -0
  28. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/cmu-mosei/process_database.py +0 -0
  29. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/demos/process_database.py +0 -0
  30. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/ekorpus/process_database.py +0 -0
  31. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emns/process_database.py +0 -0
  32. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emofilm/convert_to_16k.py +0 -0
  33. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emofilm/process_database.py +0 -0
  34. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emorynlp/process_database.py +0 -0
  35. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emov-db/process_database.py +0 -0
  36. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emovo/process_database.py +0 -0
  37. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emozionalmente/create.py +0 -0
  38. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/enterface/process_database.py +0 -0
  39. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/esd/process_database.py +0 -0
  40. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/gerparas/process_database.py +0 -0
  41. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/iemocap/process_database.py +0 -0
  42. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/jl/process_database.py +0 -0
  43. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/jtes/process_database.py +0 -0
  44. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/meld/process_database.py +0 -0
  45. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/mesd/process_database.py +0 -0
  46. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/mess/process_database.py +0 -0
  47. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/mlendsnd/process_database.py +0 -0
  48. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/msp-improv/process_database2.py +0 -0
  49. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/msp-podcast/process_database.py +0 -0
  50. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/oreau2/process_database.py +0 -0
  51. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/portuguese/process_database.py +0 -0
  52. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/ravdess/process_database.py +0 -0
  53. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/ravdess/process_database_speaker.py +0 -0
  54. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/savee/process_database.py +0 -0
  55. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/shemo/process_database.py +0 -0
  56. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/subesco/process_database.py +0 -0
  57. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/tess/process_database.py +0 -0
  58. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/thorsten-emotional/process_database.py +0 -0
  59. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/urdu/process_database.py +0 -0
  60. {nkululeko-0.86.8 → nkululeko-0.87.0}/data/vivae/process_database.py +0 -0
  61. {nkululeko-0.86.8 → nkululeko-0.87.0}/docs/source/conf.py +0 -0
  62. {nkululeko-0.86.8 → nkululeko-0.87.0}/meta/demos/demo_best_model.py +0 -0
  63. {nkululeko-0.86.8 → nkululeko-0.87.0}/meta/demos/my_experiment.py +0 -0
  64. {nkululeko-0.86.8 → nkululeko-0.87.0}/meta/demos/my_experiment_local.py +0 -0
  65. {nkululeko-0.86.8 → nkululeko-0.87.0}/meta/demos/plot_faster_anim.py +0 -0
  66. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/__init__.py +0 -0
  67. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/aug_train.py +0 -0
  68. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/augment.py +0 -0
  69. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/augmenting/__init__.py +0 -0
  70. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/augmenting/augmenter.py +0 -0
  71. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/augmenting/randomsplicer.py +0 -0
  72. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/augmenting/randomsplicing.py +0 -0
  73. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/augmenting/resampler.py +0 -0
  74. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/__init__.py +0 -0
  75. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_age.py +0 -0
  76. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_arousal.py +0 -0
  77. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_dominance.py +0 -0
  78. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_gender.py +0 -0
  79. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_mos.py +0 -0
  80. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_pesq.py +0 -0
  81. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_sdr.py +0 -0
  82. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_snr.py +0 -0
  83. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_stoi.py +0 -0
  84. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_valence.py +0 -0
  85. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/estimate_snr.py +0 -0
  86. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/cacheddataset.py +0 -0
  87. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/data/__init__.py +0 -0
  88. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/data/dataset.py +0 -0
  89. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/demo_feats.py +0 -0
  90. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/demo_predictor.py +0 -0
  91. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/experiment.py +0 -0
  92. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/explore.py +0 -0
  93. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/export.py +0 -0
  94. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/__init__.py +0 -0
  95. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_agender.py +0 -0
  96. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
  97. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_analyser.py +0 -0
  98. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_auddim.py +0 -0
  99. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_audmodel.py +0 -0
  100. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_clap.py +0 -0
  101. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_hubert.py +0 -0
  102. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_import.py +0 -0
  103. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_mld.py +0 -0
  104. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_mos.py +0 -0
  105. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_opensmile.py +0 -0
  106. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_oxbow.py +0 -0
  107. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_praat.py +0 -0
  108. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_snr.py +0 -0
  109. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_spectra.py +0 -0
  110. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_spkrec.py +0 -0
  111. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_squim.py +0 -0
  112. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_trill.py +0 -0
  113. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_wav2vec2.py +0 -0
  114. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_wavlm.py +0 -0
  115. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_whisper.py +0 -0
  116. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/featureset.py +0 -0
  117. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feinberg_praat.py +0 -0
  118. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feature_extractor.py +0 -0
  119. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/file_checker.py +0 -0
  120. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/filter_data.py +0 -0
  121. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/glob_conf.py +0 -0
  122. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/losses/__init__.py +0 -0
  123. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/losses/loss_ccc.py +0 -0
  124. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/losses/loss_softf1loss.py +0 -0
  125. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/__init__.py +0 -0
  126. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_bayes.py +0 -0
  127. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_gmm.py +0 -0
  128. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_knn.py +0 -0
  129. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_knn_reg.py +0 -0
  130. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_lin_reg.py +0 -0
  131. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_svm.py +0 -0
  132. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_svr.py +0 -0
  133. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_tree.py +0 -0
  134. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_tree_reg.py +0 -0
  135. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_tuned.py +0 -0
  136. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_xgb.py +0 -0
  137. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_xgr.py +0 -0
  138. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/multidb.py +0 -0
  139. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/nkuluflag.py +0 -0
  140. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/nkululeko.py +0 -0
  141. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/predict.py +0 -0
  142. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/__init__.py +0 -0
  143. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/defines.py +0 -0
  144. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/latex_writer.py +0 -0
  145. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/report.py +0 -0
  146. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/report_item.py +0 -0
  147. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/result.py +0 -0
  148. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/resample.py +0 -0
  149. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/scaler.py +0 -0
  150. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/segment.py +0 -0
  151. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/segmenting/__init__.py +0 -0
  152. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
  153. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/segmenting/seg_silero.py +0 -0
  154. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/syllable_nuclei.py +0 -0
  155. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/test.py +0 -0
  156. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/test_pretrain.py +0 -0
  157. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/utils/__init__.py +0 -0
  158. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/utils/files.py +0 -0
  159. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/utils/util.py +0 -0
  160. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko.egg-info/SOURCES.txt +0 -0
  161. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko.egg-info/dependency_links.txt +0 -0
  162. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko.egg-info/requires.txt +0 -0
  163. {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko.egg-info/top_level.txt +0 -0
  164. {nkululeko-0.86.8 → nkululeko-0.87.0}/pyproject.toml +0 -0
  165. {nkululeko-0.86.8 → nkululeko-0.87.0}/setup.cfg +0 -0
  166. {nkululeko-0.86.8 → nkululeko-0.87.0}/setup.py +0 -0
  167. {nkululeko-0.86.8 → nkululeko-0.87.0}/venv/bin/activate_this.py +0 -0
@@ -1,6 +1,10 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ Version 0.87.0
5
+ --------------
6
+ * added class probability output and uncertainty analysis
7
+
4
8
  Version 0.86.8
5
9
  --------------
6
10
  * handle single feature sets as strings in the config
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.86.8
3
+ Version: 0.87.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -51,6 +51,7 @@ Requires-Dist: pylatex
51
51
  - [t-SNE plots](#t-sne-plots)
52
52
  - [Data distribution](#data-distribution)
53
53
  - [Bias checking](#bias-checking)
54
+ - [Uncertainty](#uncertainty)
54
55
  - [Documentation](#documentation)
55
56
  - [Installation](#installation)
56
57
  - [Usage](#usage)
@@ -113,6 +114,13 @@ In cases you might wonder if there's bias in your data. You can try to detect th
113
114
 
114
115
  <img src="meta/images/emotion-pesq.png" width="500px"/>
115
116
 
117
+ ### Uncertainty
118
+ Nkululeko estimates uncertainty of model decision (only for classifiers) with entropy over the class-probabilities or logits per sample.
119
+
120
+ <img src="meta/images/uncertainty.png" width="500px"/>
121
+
122
+
123
+
116
124
  ## Documentation
117
125
  The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).
118
126
 
@@ -343,6 +351,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
343
351
  Changelog
344
352
  =========
345
353
 
354
+ Version 0.87.0
355
+ --------------
356
+ * added class probability output and uncertainty analysis
357
+
346
358
  Version 0.86.8
347
359
  --------------
348
360
  * handle single feature sets as strings in the config
@@ -7,6 +7,7 @@
7
7
  - [t-SNE plots](#t-sne-plots)
8
8
  - [Data distribution](#data-distribution)
9
9
  - [Bias checking](#bias-checking)
10
+ - [Uncertainty](#uncertainty)
10
11
  - [Documentation](#documentation)
11
12
  - [Installation](#installation)
12
13
  - [Usage](#usage)
@@ -69,6 +70,13 @@ In cases you might wonder if there's bias in your data. You can try to detect th
69
70
 
70
71
  <img src="meta/images/emotion-pesq.png" width="500px"/>
71
72
 
73
+ ### Uncertainty
74
+ Nkululeko estimates uncertainty of model decision (only for classifiers) with entropy over the class-probabilities or logits per sample.
75
+
76
+ <img src="meta/images/uncertainty.png" width="500px"/>
77
+
78
+
79
+
72
80
  ## Documentation
73
81
  The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).
74
82
 
@@ -1,2 +1,2 @@
1
- VERSION="0.86.8"
1
+ VERSION="0.87.0"
2
2
  SAMPLING_RATE = 16000
@@ -23,6 +23,9 @@ class Dataset_CSV(Dataset):
23
23
  root = os.path.dirname(data_file)
24
24
  audio_path = self.util.config_val_data(self.name, "audio_path", "./")
25
25
  df = pd.read_csv(data_file)
26
+ # trim all string values
27
+ df_obj = df.select_dtypes("object")
28
+ df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())
26
29
  # special treatment for segmented dataframes with only one column:
27
30
  if "start" in df.columns and len(df.columns) == 4:
28
31
  index = audformat.segmented_index(
@@ -49,8 +52,7 @@ class Dataset_CSV(Dataset):
49
52
  .map(lambda x: root + "/" + audio_path + "/" + x)
50
53
  .values
51
54
  )
52
- df = df.set_index(df.index.set_levels(
53
- file_index, level="file"))
55
+ df = df.set_index(df.index.set_levels(file_index, level="file"))
54
56
  else:
55
57
  if not isinstance(df, pd.DataFrame):
56
58
  df = pd.DataFrame(df)
@@ -59,27 +61,24 @@ class Dataset_CSV(Dataset):
59
61
  lambda x: root + "/" + audio_path + "/" + x
60
62
  )
61
63
  )
62
- else: # absolute path is True
64
+ else: # absolute path is True
63
65
  if audformat.index_type(df.index) == "segmented":
64
66
  file_index = (
65
- df.index.levels[0]
66
- .map(lambda x: audio_path + "/" + x)
67
- .values
67
+ df.index.levels[0].map(lambda x: audio_path + "/" + x).values
68
68
  )
69
- df = df.set_index(df.index.set_levels(
70
- file_index, level="file"))
69
+ df = df.set_index(df.index.set_levels(file_index, level="file"))
71
70
  else:
72
71
  if not isinstance(df, pd.DataFrame):
73
72
  df = pd.DataFrame(df)
74
- df = df.set_index(df.index.to_series().apply(
75
- lambda x: audio_path + "/" + x ))
73
+ df = df.set_index(
74
+ df.index.to_series().apply(lambda x: audio_path + "/" + x)
75
+ )
76
76
 
77
77
  self.df = df
78
78
  self.db = None
79
79
  self.got_target = True
80
80
  self.is_labeled = self.got_target
81
- self.start_fresh = eval(
82
- self.util.config_val("DATA", "no_reuse", "False"))
81
+ self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
83
82
  is_index = False
84
83
  try:
85
84
  if self.is_labeled and not "class_label" in self.df.columns:
@@ -106,8 +105,7 @@ class Dataset_CSV(Dataset):
106
105
  f" {self.got_gender}, got age: {self.got_age}"
107
106
  )
108
107
  self.util.debug(r_string)
109
- glob_conf.report.add_item(ReportItem(
110
- "Data", "Loaded report", r_string))
108
+ glob_conf.report.add_item(ReportItem("Data", "Loaded report", r_string))
111
109
 
112
110
  def prepare(self):
113
111
  super().prepare()
@@ -30,10 +30,8 @@ from transformers import pipeline
30
30
 
31
31
 
32
32
  def main(src_dir):
33
- parser = argparse.ArgumentParser(
34
- description="Call the nkululeko DEMO framework.")
35
- parser.add_argument("--config", default="exp.ini",
36
- help="The base configuration")
33
+ parser = argparse.ArgumentParser(description="Call the nkululeko DEMO framework.")
34
+ parser.add_argument("--config", default="exp.ini", help="The base configuration")
37
35
  parser.add_argument(
38
36
  "--file", help="A file that should be processed (16kHz mono wav)"
39
37
  )
@@ -84,8 +82,7 @@ def main(src_dir):
84
82
  )
85
83
 
86
84
  def print_pipe(files, outfile):
87
- """
88
- Prints the pipeline output for a list of files, and optionally writes the results to an output file.
85
+ """Prints the pipeline output for a list of files, and optionally writes the results to an output file.
89
86
 
90
87
  Args:
91
88
  files (list): A list of file paths to process through the pipeline.
@@ -108,8 +105,7 @@ def main(src_dir):
108
105
  f.write("\n".join(results))
109
106
 
110
107
  if util.get_model_type() == "finetune":
111
- model_path = os.path.join(
112
- util.get_exp_dir(), "models", "run_0", "torch")
108
+ model_path = os.path.join(util.get_exp_dir(), "models", "run_0", "torch")
113
109
  pipe = pipeline("audio-classification", model=model_path)
114
110
  if args.file is not None:
115
111
  print_pipe([args.file], args.outfile)
@@ -85,7 +85,7 @@ class Modelrunner:
85
85
  f"run: {self.run} epoch: {epoch}: result: {test_score_metric}"
86
86
  )
87
87
  # print(f"performance: {performance.split(' ')[1]}")
88
- performance = float(test_score_metric.split(' ')[1])
88
+ performance = float(test_score_metric.split(" ")[1])
89
89
  if performance > self.best_performance:
90
90
  self.best_performance = performance
91
91
  self.best_epoch = epoch
@@ -204,15 +204,15 @@ class Modelrunner:
204
204
  self.df_train, self.df_test, self.feats_train, self.feats_test
205
205
  )
206
206
  elif model_type == "cnn":
207
- from nkululeko.models.model_cnn import CNN_model
207
+ from nkululeko.models.model_cnn import CNNModel
208
208
 
209
- self.model = CNN_model(
209
+ self.model = CNNModel(
210
210
  self.df_train, self.df_test, self.feats_train, self.feats_test
211
211
  )
212
212
  elif model_type == "mlp":
213
- from nkululeko.models.model_mlp import MLP_model
213
+ from nkululeko.models.model_mlp import MLPModel
214
214
 
215
- self.model = MLP_model(
215
+ self.model = MLPModel(
216
216
  self.df_train, self.df_test, self.feats_train, self.feats_test
217
217
  )
218
218
  elif model_type == "mlp_reg":
@@ -247,8 +247,25 @@ class Model:
247
247
  self.clf.fit(feats, labels)
248
248
 
249
249
  def get_predictions(self):
250
- predictions = self.clf.predict(self.feats_test.to_numpy())
251
- return predictions
250
+ # predictions = self.clf.predict(self.feats_test.to_numpy())
251
+ if self.util.exp_is_classification():
252
+ # make a dataframe for the class probabilities
253
+ proba_d = {}
254
+ for c in self.clf.classes_:
255
+ proba_d[c] = []
256
+ # get the class probabilities
257
+ predictions = self.clf.predict_proba(self.feats_test.to_numpy())
258
+ # pred = self.clf.predict(features)
259
+ for i, c in enumerate(self.clf.classes_):
260
+ proba_d[c] = list(predictions.T[i])
261
+ probas = pd.DataFrame(proba_d)
262
+ probas = probas.set_index(self.feats_test.index)
263
+ predictions = probas.idxmax(axis=1).values
264
+ else:
265
+ predictions = self.clf.predict(self.feats_test.to_numpy())
266
+ probas = None
267
+
268
+ return predictions, probas
252
269
 
253
270
  def predict(self):
254
271
  if self.feats_test.isna().to_numpy().any():
@@ -263,13 +280,16 @@ class Model:
263
280
  )
264
281
  return report
265
282
  """Predict the whole eval feature set"""
266
- predictions = self.get_predictions()
283
+ predictions, probas = self.get_predictions()
284
+
267
285
  report = Reporter(
268
286
  self.df_test[self.target].to_numpy().astype(float),
269
287
  predictions,
270
288
  self.run,
271
289
  self.epoch,
290
+ probas=probas,
272
291
  )
292
+ report.print_probabilities()
273
293
  return report
274
294
 
275
295
  def get_type(self):
@@ -5,33 +5,40 @@ Inspired by code from Su Lei
5
5
 
6
6
  """
7
7
 
8
+ import ast
9
+ from collections import OrderedDict
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ from PIL import Image
14
+ from sklearn.metrics import recall_score
8
15
  import torch
9
16
  import torch.nn as nn
10
17
  import torch.nn.functional as F
11
- import torchvision
12
- import torchvision.transforms as transforms
13
18
  from torch.utils.data import Dataset
14
- import ast
15
- import numpy as np
16
- from sklearn.metrics import recall_score
17
- from collections import OrderedDict
18
- from PIL import Image
19
- from traitlets import default
19
+ import torchvision.transforms as transforms
20
20
 
21
- from nkululeko.utils.util import Util
22
21
  import nkululeko.glob_conf as glob_conf
22
+ from nkululeko.losses.loss_softf1loss import SoftF1Loss
23
23
  from nkululeko.models.model import Model
24
24
  from nkululeko.reporting.reporter import Reporter
25
- from nkululeko.losses.loss_softf1loss import SoftF1Loss
25
+ from nkululeko.utils.util import Util
26
26
 
27
27
 
28
- class CNN_model(Model):
29
- """CNN = convolutional neural net"""
28
+ class CNNModel(Model):
29
+ """CNN = convolutional neural net."""
30
30
 
31
31
  is_classifier = True
32
32
 
33
33
  def __init__(self, df_train, df_test, feats_train, feats_test):
34
- """Constructor taking the configuration and all dataframes"""
34
+ """Constructor, taking all dataframes.
35
+
36
+ Args:
37
+ df_train (pd.DataFrame): The train labels.
38
+ df_test (pd.DataFrame): The test labels.
39
+ feats_train (pd.DataFrame): The train features.
40
+ feats_test (pd.DataFrame): The test features.
41
+ """
35
42
  super().__init__(df_train, df_test, feats_train, feats_test)
36
43
  super().set_model_type("ann")
37
44
  self.name = "cnn"
@@ -147,7 +154,20 @@ class CNN_model(Model):
147
154
  self.optimizer.step()
148
155
  self.loss = (np.asarray(losses)).mean()
149
156
 
150
- def evaluate_model(self, model, loader, device):
157
+ def get_probas(self, logits):
158
+ # make a dataframe for probabilites (logits)
159
+ proba_d = {}
160
+ classes = self.df_test[self.target].unique()
161
+ classes.sort()
162
+ for c in classes:
163
+ proba_d[c] = []
164
+ for i, c in enumerate(classes):
165
+ proba_d[c] = list(logits.numpy().T[i])
166
+ probas = pd.DataFrame(proba_d)
167
+ probas = probas.set_index(self.df_test.index)
168
+ return probas
169
+
170
+ def evaluate(self, model, loader, device):
151
171
  logits = torch.zeros(len(loader.dataset), self.class_num)
152
172
  targets = torch.zeros(len(loader.dataset))
153
173
  model.eval()
@@ -169,14 +189,15 @@ class CNN_model(Model):
169
189
  self.loss_eval = (np.asarray(losses)).mean()
170
190
  predictions = logits.argmax(dim=1)
171
191
  uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
172
- return uar, targets, predictions
192
+ return uar, targets, predictions, logits
173
193
 
174
194
  def predict(self):
175
- _, truths, predictions = self.evaluate_model(
195
+ _, truths, predictions, logits = self.evaluate(
176
196
  self.model, self.testloader, self.device
177
197
  )
178
- uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
179
- report = Reporter(truths, predictions, self.run, self.epoch)
198
+ uar, _, _, _ = self.evaluate(self.model, self.trainloader, self.device)
199
+ probas = self.get_probas(logits)
200
+ report = Reporter(truths, predictions, self.run, self.epoch, probas=probas)
180
201
  try:
181
202
  report.result.loss = self.loss
182
203
  except AttributeError: # if the model was loaded from disk the loss is unknown
@@ -189,13 +210,11 @@ class CNN_model(Model):
189
210
  return report
190
211
 
191
212
  def get_predictions(self):
192
- _, truths, predictions = self.evaluate_model(
193
- self.model, self.testloader, self.device
194
- )
213
+ _, _, predictions, _ = self.evaluate(self.model, self.testloader, self.device)
195
214
  return predictions.numpy()
196
215
 
197
216
  def predict_sample(self, features):
198
- """Predict one sample"""
217
+ """Predict one sample."""
199
218
  with torch.no_grad():
200
219
  logits = self.model(torch.from_numpy(features).to(self.device))
201
220
  a = logits.numpy()
@@ -1,25 +1,33 @@
1
1
  # model_mlp.py
2
+ import ast
3
+ from collections import OrderedDict
4
+
5
+ import numpy as np
2
6
  import pandas as pd
7
+ from sklearn.metrics import recall_score
8
+ import torch
3
9
 
4
- from nkululeko.utils.util import Util
5
10
  import nkululeko.glob_conf as glob_conf
11
+ from nkululeko.losses.loss_softf1loss import SoftF1Loss
6
12
  from nkululeko.models.model import Model
7
13
  from nkululeko.reporting.reporter import Reporter
8
- import torch
9
- import ast
10
- import numpy as np
11
- from sklearn.metrics import recall_score
12
- from collections import OrderedDict
13
- from nkululeko.losses.loss_softf1loss import SoftF1Loss
14
+ from nkululeko.utils.util import Util
14
15
 
15
16
 
16
- class MLP_model(Model):
17
+ class MLPModel(Model):
17
18
  """MLP = multi layer perceptron."""
18
19
 
19
20
  is_classifier = True
20
21
 
21
22
  def __init__(self, df_train, df_test, feats_train, feats_test):
22
- """Constructor taking the configuration and all dataframes."""
23
+ """Constructor, taking all dataframes.
24
+
25
+ Args:
26
+ df_train (pd.DataFrame): The train labels.
27
+ df_test (pd.DataFrame): The test labels.
28
+ feats_train (pd.DataFrame): The train features.
29
+ feats_test (pd.DataFrame): The test features.
30
+ """
23
31
  super().__init__(df_train, df_test, feats_train, feats_test)
24
32
  super().set_model_type("ann")
25
33
  self.name = "mlp"
@@ -97,7 +105,7 @@ class MLP_model(Model):
97
105
  self.optimizer.step()
98
106
  self.loss = (np.asarray(losses)).mean()
99
107
 
100
- def evaluate_model(self, model, loader, device):
108
+ def evaluate(self, model, loader, device):
101
109
  logits = torch.zeros(len(loader.dataset), self.class_num)
102
110
  targets = torch.zeros(len(loader.dataset))
103
111
  model.eval()
@@ -119,14 +127,28 @@ class MLP_model(Model):
119
127
  self.loss_eval = (np.asarray(losses)).mean()
120
128
  predictions = logits.argmax(dim=1)
121
129
  uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
122
- return uar, targets, predictions
130
+ return uar, targets, predictions, logits
131
+
132
+ def get_probas(self, logits):
133
+ # make a dataframe for probabilites (logits)
134
+ proba_d = {}
135
+ classes = self.df_test[self.target].unique()
136
+ classes.sort()
137
+ for c in classes:
138
+ proba_d[c] = []
139
+ for i, c in enumerate(classes):
140
+ proba_d[c] = list(logits.numpy().T[i])
141
+ probas = pd.DataFrame(proba_d)
142
+ probas = probas.set_index(self.df_test.index)
143
+ return probas
123
144
 
124
145
  def predict(self):
125
- _, truths, predictions = self.evaluate_model(
146
+ _, truths, predictions, logits = self.evaluate(
126
147
  self.model, self.testloader, self.device
127
148
  )
128
- uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
129
- report = Reporter(truths, predictions, self.run, self.epoch)
149
+ uar, _, _, _ = self.evaluate(self.model, self.trainloader, self.device)
150
+ probas = self.get_probas(logits)
151
+ report = Reporter(truths, predictions, self.run, self.epoch, probas=probas)
130
152
  try:
131
153
  report.result.loss = self.loss
132
154
  except AttributeError: # if the model was loaded from disk the loss is unknown
@@ -139,9 +161,7 @@ class MLP_model(Model):
139
161
  return report
140
162
 
141
163
  def get_predictions(self):
142
- _, truths, predictions = self.evaluate_model(
143
- self.model, self.testloader, self.device
144
- )
164
+ _, _, predictions, _ = self.evaluate(self.model, self.testloader, self.device)
145
165
  return predictions.numpy()
146
166
 
147
167
  def get_loader(self, df_x, df_y, shuffle):
@@ -97,7 +97,9 @@ class MLP_Reg_model(Model):
97
97
  self.model, self.testloader, self.device
98
98
  )
99
99
  result, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
100
- report = Reporter(truths.numpy(), predictions.numpy(), self.run, self.epoch)
100
+ report = Reporter(
101
+ truths.numpy(), predictions.numpy(), None, self.run, self.epoch
102
+ )
101
103
  try:
102
104
  report.result.loss = self.loss
103
105
  except AttributeError: # if the model was loaded from disk the loss is unknown
@@ -48,7 +48,7 @@ class Plots:
48
48
  )
49
49
  ax.set_ylabel(f"number of speakers")
50
50
  ax.set_xlabel("number of samples")
51
- self._save_plot(
51
+ self.save_plot(
52
52
  ax,
53
53
  "Samples per speaker",
54
54
  f"Samples per speaker ({df_speakers.shape[0]})",
@@ -70,9 +70,9 @@ class Plots:
70
70
  rot=0,
71
71
  )
72
72
  )
73
- ax.set_ylabel(f"number of speakers")
73
+ ax.set_ylabel("number of speakers")
74
74
  ax.set_xlabel("number of samples")
75
- self._save_plot(
75
+ self.save_plot(
76
76
  ax,
77
77
  "Sample value counts",
78
78
  f"Samples per speaker ({df_speakers.shape[0]})",
@@ -96,7 +96,7 @@ class Plots:
96
96
  binned_data = self.util.continuous_to_categorical(df[class_label])
97
97
  ax = binned_data.value_counts().plot(kind="bar")
98
98
  filename_binned = f"{class_label}_discreet"
99
- self._save_plot(
99
+ self.save_plot(
100
100
  ax,
101
101
  "Sample value counts",
102
102
  filename_binned,
@@ -106,7 +106,7 @@ class Plots:
106
106
  dist_type = self.util.config_val("EXPL", "dist_type", "hist")
107
107
  ax = df[class_label].plot(kind=dist_type)
108
108
 
109
- self._save_plot(
109
+ self.save_plot(
110
110
  ax,
111
111
  "Sample value counts",
112
112
  filename,
@@ -131,17 +131,17 @@ class Plots:
131
131
  df, class_label, att1, self.target, type_s
132
132
  )
133
133
  else:
134
- ax, caption = self._plotcatcont(
134
+ ax, caption = self.plotcatcont(
135
135
  df, class_label, att1, att1, type_s
136
136
  )
137
137
  else:
138
138
  if self.util.is_categorical(df[att1]):
139
- ax, caption = self._plotcatcont(
139
+ ax, caption = self.plotcatcont(
140
140
  df, att1, class_label, att1, type_s
141
141
  )
142
142
  else:
143
143
  ax, caption = self._plot2cont(df, class_label, att1, type_s)
144
- self._save_plot(
144
+ self.save_plot(
145
145
  ax,
146
146
  caption,
147
147
  f"Correlation of {self.target} and {att[0]}",
@@ -171,15 +171,11 @@ class Plots:
171
171
  ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
172
172
  else:
173
173
  # class_label = cat, att1 = cat, att2 = cont
174
- ax, caption = self._plotcatcont(
175
- df, att1, att2, att1, type_s
176
- )
174
+ ax, caption = self.plotcatcont(df, att1, att2, att1, type_s)
177
175
  else:
178
176
  if self.util.is_categorical(df[att2]):
179
177
  # class_label = cat, att1 = cont, att2 = cat
180
- ax, caption = self._plotcatcont(
181
- df, att2, att1, att2, type_s
182
- )
178
+ ax, caption = self.plotcatcont(df, att2, att1, att2, type_s)
183
179
  else:
184
180
  # class_label = cat, att1 = cont, att2 = cont
185
181
  ax, caption = self._plot2cont_cat(
@@ -205,7 +201,7 @@ class Plots:
205
201
  # class_label = cont, att1 = cont, att2 = cont
206
202
  ax, caption = self._plot2cont(df, att1, att2, type_s)
207
203
 
208
- self._save_plot(
204
+ self.save_plot(
209
205
  ax, caption, f"Correlation of {att1} and {att2}", filename, type_s
210
206
  )
211
207
 
@@ -215,16 +211,16 @@ class Plots:
215
211
  f" {att} has more than 2 values. Perhaps you forgot to state a list of lists?"
216
212
  )
217
213
 
218
- def _save_plot(self, ax, caption, header, filename, type_s):
214
+ def save_plot(self, ax, caption, header, filename, type_s):
219
215
  # one up because of the runs
220
216
  fig_dir = self.util.get_path("fig_dir") + "../"
221
- fig = ax.figure
217
+ fig_plots = ax.figure
222
218
  # avoid warning
223
219
  # plt.tight_layout()
224
220
  img_path = f"{fig_dir}{filename}_{type_s}.{self.format}"
225
221
  plt.savefig(img_path)
226
- plt.close(fig)
227
- # fig.clear() # avoid error
222
+ plt.close(fig_plots)
223
+ self.util.debug(f"Saved plot to {img_path}")
228
224
  glob_conf.report.add_item(
229
225
  ReportItem(
230
226
  Header.HEADER_EXPLORE,
@@ -244,35 +240,29 @@ class Plots:
244
240
  return att, df
245
241
 
246
242
  def _plot2cont_cat(self, df, cont1, cont2, cat, ylab):
247
- """
248
- plot relation of two continuous distributions with one categorical
249
- """
243
+ """Plot relation of two continuous distributions with one categorical."""
250
244
  pearson = stats.pearsonr(df[cont1], df[cont2])
251
245
  # trunc to three digits
252
246
  pearson = int(pearson[0] * 1000) / 1000
253
247
  pearson_string = f"PCC: {pearson}"
254
248
  ax = sns.lmplot(data=df, x=cont1, y=cont2, hue=cat)
255
249
  caption = f"{ylab} {df.shape[0]}. {pearson_string}"
256
- ax.fig.suptitle(caption)
250
+ ax.figure.suptitle(caption)
257
251
  return ax, caption
258
252
 
259
253
  def _plot2cont(self, df, col1, col2, ylab):
260
- """
261
- plot relation of two continuous distributions
262
- """
254
+ """Plot relation of two continuous distributions."""
263
255
  pearson = stats.pearsonr(df[col1], df[col2])
264
256
  # trunc to three digits
265
257
  pearson = int(pearson[0] * 1000) / 1000
266
258
  pearson_string = f"PCC: {pearson}"
267
259
  ax = sns.lmplot(data=df, x=col1, y=col2)
268
260
  caption = f"{ylab} {df.shape[0]}. {pearson_string}"
269
- ax.fig.suptitle(caption)
261
+ ax.figure.suptitle(caption)
270
262
  return ax, caption
271
263
 
272
- def _plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
273
- """
274
- plot relation of categorical distribution with continuous
275
- """
264
+ def plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
265
+ """Plot relation of categorical distribution with continuous."""
276
266
  dist_type = self.util.config_val("EXPL", "dist_type", "hist")
277
267
  cats, cat_str, es = su.get_effect_size(df, cat_col, cont_col)
278
268
  if dist_type == "hist":
@@ -287,13 +277,11 @@ class Plots:
287
277
  )
288
278
  ax.set(xlabel=f"{cont_col}")
289
279
  caption = f"{ylab} {df.shape[0]}. {cat_str} ({cats}):" f" {es}"
290
- ax.fig.suptitle(caption)
280
+ ax.figure.suptitle(caption)
291
281
  return ax, caption
292
282
 
293
283
  def _plot2cat(self, df, col1, col2, xlab, ylab):
294
- """
295
- plot relation of 2 categorical distributions
296
- """
284
+ """Plot relation of 2 categorical distributions."""
297
285
  crosstab = pd.crosstab(index=df[col1], columns=df[col2])
298
286
  res_pval = stats.chi2_contingency(crosstab)
299
287
  res_pval = int(res_pval[1] * 1000) / 1000
@@ -320,8 +308,8 @@ class Plots:
320
308
  max = self.util.to_3_digits(df.duration.max())
321
309
  title = f"Duration distr. for {sample_selection} {df.shape[0]}. min={min}, max={max}"
322
310
  ax.set_title(title)
323
- ax.set_xlabel(f"duration")
324
- ax.set_ylabel(f"number of samples")
311
+ ax.set_xlabel("duration")
312
+ ax.set_ylabel("number of samples")
325
313
  fig = ax.figure
326
314
  # plt.tight_layout()
327
315
  img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"