nkululeko 0.83.3__tar.gz → 0.84.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. {nkululeko-0.83.3 → nkululeko-0.84.0}/CHANGELOG.md +5 -0
  2. {nkululeko-0.83.3/nkululeko.egg-info → nkululeko-0.84.0}/PKG-INFO +6 -1
  3. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/constants.py +1 -1
  4. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/demo.py +6 -7
  5. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/demo_predictor.py +4 -3
  6. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/experiment.py +15 -12
  7. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/explore.py +29 -23
  8. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_analyser.py +33 -0
  9. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/glob_conf.py +5 -0
  10. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/model.py +1 -0
  11. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/model_bayes.py +1 -0
  12. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/model_cnn.py +6 -9
  13. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/model_gmm.py +2 -3
  14. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/model_knn.py +1 -0
  15. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/model_knn_reg.py +1 -0
  16. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/model_lin_reg.py +1 -0
  17. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/model_mlp.py +17 -7
  18. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/model_mlp_regression.py +7 -12
  19. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/model_svm.py +1 -0
  20. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/model_svr.py +1 -0
  21. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/model_tree.py +1 -0
  22. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/model_tree_reg.py +1 -0
  23. nkululeko-0.84.0/nkululeko/models/model_xgb.py +17 -0
  24. nkululeko-0.84.0/nkululeko/models/model_xgr.py +14 -0
  25. nkululeko-0.84.0/nkululeko/test_pretrain.py +117 -0
  26. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/utils/util.py +28 -3
  27. {nkululeko-0.83.3 → nkululeko-0.84.0/nkululeko.egg-info}/PKG-INFO +6 -1
  28. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko.egg-info/SOURCES.txt +1 -0
  29. nkululeko-0.83.3/nkululeko/models/model_xgb.py +0 -15
  30. nkululeko-0.83.3/nkululeko/models/model_xgr.py +0 -12
  31. {nkululeko-0.83.3 → nkululeko-0.84.0}/LICENSE +0 -0
  32. {nkululeko-0.83.3 → nkululeko-0.84.0}/README.md +0 -0
  33. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/aesdd/process_database.py +0 -0
  34. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/androids/process_database.py +0 -0
  35. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/androids_orig/process_database.py +0 -0
  36. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/androids_test/process_database.py +0 -0
  37. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/ased/process_database.py +0 -0
  38. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/asvp-esd/process_database.py +0 -0
  39. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/baved/process_database.py +0 -0
  40. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/cafe/process_database.py +0 -0
  41. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/clac/process_database.py +0 -0
  42. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/cmu-mosei/process_database.py +0 -0
  43. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/demos/process_database.py +0 -0
  44. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/ekorpus/process_database.py +0 -0
  45. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/emns/process_database.py +0 -0
  46. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/emofilm/convert_to_16k.py +0 -0
  47. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/emofilm/process_database.py +0 -0
  48. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/emorynlp/process_database.py +0 -0
  49. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/emov-db/process_database.py +0 -0
  50. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/emovo/process_database.py +0 -0
  51. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/emozionalmente/create.py +0 -0
  52. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/enterface/process_database.py +0 -0
  53. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/esd/process_database.py +0 -0
  54. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/gerparas/process_database.py +0 -0
  55. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/iemocap/process_database.py +0 -0
  56. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/jl/process_database.py +0 -0
  57. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/jtes/process_database.py +0 -0
  58. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/meld/process_database.py +0 -0
  59. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/mesd/process_database.py +0 -0
  60. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/mess/process_database.py +0 -0
  61. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/mlendsnd/process_database.py +0 -0
  62. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/msp-improv/process_database2.py +0 -0
  63. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/msp-podcast/process_database.py +0 -0
  64. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/oreau2/process_database.py +0 -0
  65. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/portuguese/process_database.py +0 -0
  66. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/ravdess/process_database.py +0 -0
  67. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/ravdess/process_database_speaker.py +0 -0
  68. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/savee/process_database.py +0 -0
  69. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/shemo/process_database.py +0 -0
  70. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/subesco/process_database.py +0 -0
  71. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/tess/process_database.py +0 -0
  72. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/thorsten-emotional/process_database.py +0 -0
  73. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/urdu/process_database.py +0 -0
  74. {nkululeko-0.83.3 → nkululeko-0.84.0}/data/vivae/process_database.py +0 -0
  75. {nkululeko-0.83.3 → nkululeko-0.84.0}/docs/source/conf.py +0 -0
  76. {nkululeko-0.83.3 → nkululeko-0.84.0}/meta/demos/demo_best_model.py +0 -0
  77. {nkululeko-0.83.3 → nkululeko-0.84.0}/meta/demos/my_experiment.py +0 -0
  78. {nkululeko-0.83.3 → nkululeko-0.84.0}/meta/demos/my_experiment_local.py +0 -0
  79. {nkululeko-0.83.3 → nkululeko-0.84.0}/meta/demos/plot_faster_anim.py +0 -0
  80. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/__init__.py +0 -0
  81. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/aug_train.py +0 -0
  82. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/augment.py +0 -0
  83. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/augmenting/__init__.py +0 -0
  84. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/augmenting/augmenter.py +0 -0
  85. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/augmenting/randomsplicer.py +0 -0
  86. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/augmenting/randomsplicing.py +0 -0
  87. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/augmenting/resampler.py +0 -0
  88. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/autopredict/__init__.py +0 -0
  89. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/autopredict/ap_age.py +0 -0
  90. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/autopredict/ap_arousal.py +0 -0
  91. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/autopredict/ap_dominance.py +0 -0
  92. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/autopredict/ap_gender.py +0 -0
  93. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/autopredict/ap_mos.py +0 -0
  94. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/autopredict/ap_pesq.py +0 -0
  95. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/autopredict/ap_sdr.py +0 -0
  96. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/autopredict/ap_snr.py +0 -0
  97. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/autopredict/ap_stoi.py +0 -0
  98. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/autopredict/ap_valence.py +0 -0
  99. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/autopredict/estimate_snr.py +0 -0
  100. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/cacheddataset.py +0 -0
  101. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/data/__init__.py +0 -0
  102. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/data/dataset.py +0 -0
  103. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/data/dataset_csv.py +0 -0
  104. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/demo_feats.py +0 -0
  105. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/export.py +0 -0
  106. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/__init__.py +0 -0
  107. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_agender.py +0 -0
  108. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
  109. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_auddim.py +0 -0
  110. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_audmodel.py +0 -0
  111. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_clap.py +0 -0
  112. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_hubert.py +0 -0
  113. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_import.py +0 -0
  114. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_mld.py +0 -0
  115. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_mos.py +0 -0
  116. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_opensmile.py +0 -0
  117. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_oxbow.py +0 -0
  118. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_praat.py +0 -0
  119. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_snr.py +0 -0
  120. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_spectra.py +0 -0
  121. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_spkrec.py +0 -0
  122. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_squim.py +0 -0
  123. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_trill.py +0 -0
  124. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_wav2vec2.py +0 -0
  125. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_wavlm.py +0 -0
  126. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feats_whisper.py +0 -0
  127. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/featureset.py +0 -0
  128. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feat_extract/feinberg_praat.py +0 -0
  129. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/feature_extractor.py +0 -0
  130. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/file_checker.py +0 -0
  131. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/filter_data.py +0 -0
  132. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/losses/__init__.py +0 -0
  133. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/losses/loss_ccc.py +0 -0
  134. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/losses/loss_softf1loss.py +0 -0
  135. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/modelrunner.py +0 -0
  136. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/models/__init__.py +0 -0
  137. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/multidb.py +0 -0
  138. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/nkuluflag.py +0 -0
  139. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/nkululeko.py +0 -0
  140. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/plots.py +0 -0
  141. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/predict.py +0 -0
  142. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/reporting/__init__.py +0 -0
  143. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/reporting/defines.py +0 -0
  144. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/reporting/latex_writer.py +0 -0
  145. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/reporting/report.py +0 -0
  146. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/reporting/report_item.py +0 -0
  147. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/reporting/reporter.py +0 -0
  148. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/reporting/result.py +0 -0
  149. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/resample.py +0 -0
  150. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/runmanager.py +0 -0
  151. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/scaler.py +0 -0
  152. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/segment.py +0 -0
  153. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/segmenting/__init__.py +0 -0
  154. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
  155. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/segmenting/seg_silero.py +0 -0
  156. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/syllable_nuclei.py +0 -0
  157. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/test.py +0 -0
  158. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/test_predictor.py +0 -0
  159. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/utils/__init__.py +0 -0
  160. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/utils/files.py +0 -0
  161. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko/utils/stats.py +0 -0
  162. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko.egg-info/dependency_links.txt +0 -0
  163. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko.egg-info/requires.txt +0 -0
  164. {nkululeko-0.83.3 → nkululeko-0.84.0}/nkululeko.egg-info/top_level.txt +0 -0
  165. {nkululeko-0.83.3 → nkululeko-0.84.0}/pyproject.toml +0 -0
  166. {nkululeko-0.83.3 → nkululeko-0.84.0}/setup.cfg +0 -0
  167. {nkululeko-0.83.3 → nkululeko-0.84.0}/setup.py +0 -0
  168. {nkululeko-0.83.3 → nkululeko-0.84.0}/venv/bin/activate_this.py +0 -0
@@ -1,6 +1,11 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ Version 0.84.0
5
+ --------------
6
+ * added SHAP analysis
7
+ * started with finetuning
8
+
4
9
  Version 0.83.3
5
10
  --------------
6
11
  * fixed a naming error in trill features that prevented storage of experiment
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.83.3
3
+ Version: 0.84.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -333,6 +333,11 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
333
333
  Changelog
334
334
  =========
335
335
 
336
+ Version 0.84.0
337
+ --------------
338
+ * added SHAP analysis
339
+ * started with finetuning
340
+
336
341
  Version 0.83.3
337
342
  --------------
338
343
  * fixed a naming error in trill features that prevented storage of experiment
@@ -1,2 +1,2 @@
1
- VERSION="0.83.3"
1
+ VERSION="0.84.0"
2
2
  SAMPLING_RATE = 16000
@@ -2,8 +2,9 @@
2
2
  # Demonstration code to use the ML-experiment framework
3
3
  # Test the loading of a previously trained model and demo mode
4
4
  # needs the project config file to run before
5
- """
6
- This script is used to test the loading of a previously trained model and run it in demo mode.
5
+ """This script is used to test the loading of a previously trained model.
6
+
7
+ And run it in demo mode.
7
8
  It requires the project config file to be run before.
8
9
 
9
10
  Usage:
@@ -20,17 +21,15 @@ import argparse
20
21
  import configparser
21
22
  import os
22
23
 
23
- import nkululeko.glob_conf as glob_conf
24
24
  from nkululeko.constants import VERSION
25
25
  from nkululeko.experiment import Experiment
26
+ import nkululeko.glob_conf as glob_conf
26
27
  from nkululeko.utils.util import Util
27
28
 
28
29
 
29
30
  def main(src_dir):
30
- parser = argparse.ArgumentParser(
31
- description="Call the nkululeko DEMO framework.")
32
- parser.add_argument("--config", default="exp.ini",
33
- help="The base configuration")
31
+ parser = argparse.ArgumentParser(description="Call the nkululeko DEMO framework.")
32
+ parser.add_argument("--config", default="exp.ini", help="The base configuration")
34
33
  parser.add_argument(
35
34
  "--file", help="A file that should be processed (16kHz mono wav)"
36
35
  )
@@ -1,18 +1,19 @@
1
1
  # demo_predictor.py
2
2
  import os
3
3
 
4
- import audformat
5
- import audiofile
6
4
  import numpy as np
7
5
  import pandas as pd
8
6
 
7
+ import audformat
8
+ import audiofile
9
+
9
10
  import nkululeko.glob_conf as glob_conf
10
11
  from nkululeko.utils.util import Util
11
12
 
12
13
 
13
14
  class Demo_predictor:
14
15
  def __init__(self, model, file, is_list, feature_extractor, label_encoder, outfile):
15
- """Constructor setting up name and configuration"""
16
+ """Constructor setting up name and configuration."""
16
17
  self.model = model
17
18
  self.feature_extractor = feature_extractor
18
19
  self.label_encoder = label_encoder
@@ -5,20 +5,22 @@ import pickle
5
5
  import random
6
6
  import time
7
7
 
8
- import audeer
9
- import audformat
10
8
  import numpy as np
11
9
  import pandas as pd
12
10
  from sklearn.preprocessing import LabelEncoder
13
11
 
14
- import nkululeko.glob_conf as glob_conf
12
+ import audeer
13
+ import audformat
14
+
15
15
  from nkululeko.data.dataset import Dataset
16
16
  from nkululeko.data.dataset_csv import Dataset_CSV
17
17
  from nkululeko.demo_predictor import Demo_predictor
18
18
  from nkululeko.feat_extract.feats_analyser import FeatureAnalyser
19
19
  from nkululeko.feature_extractor import FeatureExtractor
20
20
  from nkululeko.file_checker import FileChecker
21
- from nkululeko.filter_data import DataFilter, filter_min_dur
21
+ from nkululeko.filter_data import DataFilter
22
+ from nkululeko.filter_data import filter_min_dur
23
+ import nkululeko.glob_conf as glob_conf
22
24
  from nkululeko.plots import Plots
23
25
  from nkululeko.reporting.report import Report
24
26
  from nkululeko.runmanager import Runmanager
@@ -101,6 +103,7 @@ class Experiment:
101
103
  self.got_speaker = True
102
104
  self.datasets.update({d: data})
103
105
  self.target = self.util.config_val("DATA", "target", "emotion")
106
+ glob_conf.set_target(self.target)
104
107
  # print target via debug
105
108
  self.util.debug(f"target: {self.target}")
106
109
  # print keys/column
@@ -487,11 +490,7 @@ class Experiment:
487
490
  return df_ret
488
491
 
489
492
  def analyse_features(self, needs_feats):
490
- """
491
- Do a feature exploration
492
-
493
- """
494
-
493
+ """Do a feature exploration."""
495
494
  plot_feats = eval(
496
495
  self.util.config_val("EXPL", "feature_distributions", "False")
497
496
  )
@@ -511,7 +510,7 @@ class Experiment:
511
510
  f"unknown sample selection specifier {sample_selection}, should"
512
511
  " be [all | train | test]"
513
512
  )
514
-
513
+ self.util.debug(f"sampling selection: {sample_selection}")
515
514
  if self.util.config_val("EXPL", "value_counts", False):
516
515
  self.plot_distribution(df_labels)
517
516
 
@@ -537,9 +536,13 @@ class Experiment:
537
536
  f"unknown sample selection specifier {sample_selection}, should"
538
537
  " be [all | train | test]"
539
538
  )
539
+ feat_analyser = FeatureAnalyser(sample_selection, df_labels, df_feats)
540
+ # check if SHAP features should be analysed
541
+ shap = eval(self.util.config_val("EXPL", "shap", "False"))
542
+ if shap:
543
+ feat_analyser.analyse_shap(self.runmgr.get_best_model())
540
544
 
541
545
  if plot_feats:
542
- feat_analyser = FeatureAnalyser(sample_selection, df_labels, df_feats)
543
546
  feat_analyser.analyse()
544
547
 
545
548
  # check if a scatterplot should be done
@@ -692,7 +695,7 @@ class Experiment:
692
695
  if self.runmgr.modelrunner.model.is_ann():
693
696
  self.runmgr.modelrunner.model = None
694
697
  self.util.warn(
695
- "Save experiment: Can't pickle the learning model so saving without it."
698
+ "Save experiment: Can't pickle the trained model so saving without it. (it should be stored anyway)"
696
699
  )
697
700
  try:
698
701
  f = open(filename, "wb")
@@ -12,9 +12,9 @@ from nkululeko.utils.util import Util
12
12
 
13
13
  def main(src_dir):
14
14
  parser = argparse.ArgumentParser(
15
- description="Call the nkululeko EXPLORE framework.")
16
- parser.add_argument("--config", default="exp.ini",
17
- help="The base configuration")
15
+ description="Call the nkululeko EXPLORE framework."
16
+ )
17
+ parser.add_argument("--config", default="exp.ini", help="The base configuration")
18
18
  args = parser.parse_args()
19
19
  if args.config is not None:
20
20
  config_file = args.config
@@ -43,28 +43,34 @@ def main(src_dir):
43
43
  import warnings
44
44
 
45
45
  warnings.filterwarnings("ignore")
46
-
47
- # load the data
48
- expr.load_datasets()
49
-
50
- # split into train and test
51
- expr.fill_train_and_tests()
52
- util.debug(
53
- f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
54
-
55
- plot_feats = eval(util.config_val(
56
- "EXPL", "feature_distributions", "False"))
57
- tsne = eval(util.config_val("EXPL", "tsne", "False"))
58
- scatter = eval(util.config_val("EXPL", "scatter", "False"))
59
- spotlight = eval(util.config_val("EXPL", "spotlight", "False"))
60
- model_type = util.config_val("EXPL", "model", False)
61
- plot_tree = eval(util.config_val("EXPL", "plot_tree", "False"))
62
46
  needs_feats = False
63
- if plot_feats or tsne or scatter or model_type or plot_tree:
64
- # these investigations need features to explore
65
- expr.extract_feats()
47
+ try:
48
+ # load the experiment
49
+ expr.load(f"{util.get_save_name()}")
66
50
  needs_feats = True
67
- # explore
51
+ except FileNotFoundError:
52
+ # first time: load the data
53
+ expr.load_datasets()
54
+
55
+ # split into train and test
56
+ expr.fill_train_and_tests()
57
+ util.debug(
58
+ f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}"
59
+ )
60
+
61
+ plot_feats = eval(util.config_val("EXPL", "feature_distributions", "False"))
62
+ tsne = eval(util.config_val("EXPL", "tsne", "False"))
63
+ scatter = eval(util.config_val("EXPL", "scatter", "False"))
64
+ spotlight = eval(util.config_val("EXPL", "spotlight", "False"))
65
+ shap = eval(util.config_val("EXPL", "shap", "False"))
66
+ model_type = util.config_val("EXPL", "model", False)
67
+ plot_tree = eval(util.config_val("EXPL", "plot_tree", "False"))
68
+ needs_feats = False
69
+ if plot_feats or tsne or scatter or model_type or plot_tree or shap:
70
+ # these investigations need features to explore
71
+ expr.extract_feats()
72
+ needs_feats = True
73
+ # explore
68
74
  expr.analyse_features(needs_feats)
69
75
  expr.store_report()
70
76
  print("DONE")
@@ -40,6 +40,39 @@ class FeatureAnalyser:
40
40
  importance = model.feature_importances_
41
41
  return importance
42
42
 
43
+ def analyse_shap(self, model):
44
+ """Shap analysis.
45
+
46
+ Use the best model from a previous run and analyse feature importance with SHAP.
47
+ https://m.mage.ai/how-to-interpret-and-explain-your-machine-learning-models-using-shap-values-471c2635b78e.
48
+ """
49
+ import shap
50
+
51
+ name = "my_shap_values"
52
+ if not self.util.exist_pickle(name):
53
+
54
+ explainer = shap.Explainer(
55
+ model.predict_shap,
56
+ self.features,
57
+ output_names=glob_conf.labels,
58
+ algorithm="permutation",
59
+ npermutations=5,
60
+ )
61
+ self.util.debug("computing SHAP values...")
62
+ shap_values = explainer(self.features)
63
+ self.util.to_pickle(shap_values, name)
64
+ else:
65
+ shap_values = self.util.from_pickle(name)
66
+ plt.tight_layout()
67
+ shap.plots.bar(shap_values)
68
+ fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
69
+ exp_name = self.util.get_exp_name(only_data=True)
70
+ format = self.util.config_val("PLOT", "format", "png")
71
+ filename = f"_SHAP_{model.name}"
72
+ filename = f"{fig_dir}{exp_name}{filename}.{format}"
73
+ plt.savefig(filename)
74
+ self.util.debug(f"plotted SHAP feature importance tp {filename}")
75
+
43
76
  def analyse(self):
44
77
  models = ast.literal_eval(self.util.config_val("EXPL", "model", "['log_reg']"))
45
78
  model_name = "_".join(models)
@@ -29,3 +29,8 @@ def set_report(report_obj):
29
29
  def set_labels(labels_obj):
30
30
  global labels
31
31
  labels = labels_obj
32
+
33
+
34
+ def set_target(target_obj):
35
+ global target
36
+ target = target_obj
@@ -20,6 +20,7 @@ class Model:
20
20
 
21
21
  def __init__(self, df_train, df_test, feats_train, feats_test):
22
22
  """Constructor taking the configuration and all dataframes."""
23
+ self.name = "undefined"
23
24
  self.df_train, self.df_test, self.feats_train, self.feats_test = (
24
25
  df_train,
25
26
  df_test,
@@ -12,3 +12,4 @@ class Bayes_model(Model):
12
12
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
13
  super().__init__(df_train, df_test, feats_train, feats_test)
14
14
  self.clf = GaussianNB() # set up the classifier
15
+ self.name = "bayes"
@@ -34,7 +34,8 @@ class CNN_model(Model):
34
34
  """Constructor taking the configuration and all dataframes"""
35
35
  super().__init__(df_train, df_test, feats_train, feats_test)
36
36
  super().set_model_type("ann")
37
- self.target = glob_conf.config["DATA"]["target"]
37
+ self.name = "cnn"
38
+ self.target = glob_conf.target
38
39
  labels = glob_conf.labels
39
40
  self.class_num = len(labels)
40
41
  # set up loss criterion
@@ -86,8 +87,7 @@ class CNN_model(Model):
86
87
  train_set = self.Dataset_image(
87
88
  feats_train, df_train, self.target, transformations
88
89
  )
89
- test_set = self.Dataset_image(
90
- feats_test, df_test, self.target, transformations)
90
+ test_set = self.Dataset_image(feats_test, df_test, self.target, transformations)
91
91
  # Define data loaders
92
92
  self.trainloader = torch.utils.data.DataLoader(
93
93
  train_set,
@@ -140,8 +140,7 @@ class CNN_model(Model):
140
140
  losses = []
141
141
  for images, labels in self.trainloader:
142
142
  logits = self.model(images.to(self.device))
143
- loss = self.criterion(logits, labels.to(
144
- self.device, dtype=torch.int64))
143
+ loss = self.criterion(logits, labels.to(self.device, dtype=torch.int64))
145
144
  losses.append(loss.item())
146
145
  self.optimizer.zero_grad()
147
146
  loss.backward()
@@ -169,16 +168,14 @@ class CNN_model(Model):
169
168
 
170
169
  self.loss_eval = (np.asarray(losses)).mean()
171
170
  predictions = logits.argmax(dim=1)
172
- uar = recall_score(
173
- targets.numpy(), predictions.numpy(), average="macro")
171
+ uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
174
172
  return uar, targets, predictions
175
173
 
176
174
  def predict(self):
177
175
  _, truths, predictions = self.evaluate_model(
178
176
  self.model, self.testloader, self.device
179
177
  )
180
- uar, _, _ = self.evaluate_model(
181
- self.model, self.trainloader, self.device)
178
+ uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
182
179
  report = Reporter(truths, predictions, self.run, self.epoch)
183
180
  try:
184
181
  report.result.loss = self.loss
@@ -11,10 +11,9 @@ class GMM_model(Model):
11
11
 
12
12
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
13
  super().__init__(df_train, df_test, feats_train, feats_test)
14
+ self.name = "gmm"
14
15
  n_components = int(self.util.config_val("MODEL", "GMM_components", "4"))
15
- covariance_type = self.util.config_val(
16
- "MODEL", "GMM_covariance_type", "full"
17
- )
16
+ covariance_type = self.util.config_val("MODEL", "GMM_covariance_type", "full")
18
17
  self.clf = mixture.GaussianMixture(
19
18
  n_components=n_components, covariance_type=covariance_type
20
19
  )
@@ -11,6 +11,7 @@ class KNN_model(Model):
11
11
 
12
12
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
13
  super().__init__(df_train, df_test, feats_train, feats_test)
14
+ self.name = "knn"
14
15
  method = self.util.config_val("MODEL", "KNN_weights", "uniform")
15
16
  k = int(self.util.config_val("MODEL", "K_val", "5"))
16
17
  self.clf = KNeighborsClassifier(
@@ -11,6 +11,7 @@ class KNN_reg_model(Model):
11
11
 
12
12
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
13
  super().__init__(df_train, df_test, feats_train, feats_test)
14
+ self.name = "knn_reg"
14
15
  method = self.util.config_val("MODEL", "KNN_weights", "uniform")
15
16
  k = int(self.util.config_val("MODEL", "K_val", "5"))
16
17
  self.clf = KNeighborsRegressor(
@@ -11,4 +11,5 @@ class Lin_reg_model(Model):
11
11
 
12
12
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
13
  super().__init__(df_train, df_test, feats_train, feats_test)
14
+ self.name = "lin_reg"
14
15
  self.clf = LinearRegression() # set up the classifier
@@ -1,4 +1,6 @@
1
1
  # model_mlp.py
2
+ import pandas as pd
3
+
2
4
  from nkululeko.utils.util import Util
3
5
  import nkululeko.glob_conf as glob_conf
4
6
  from nkululeko.models.model import Model
@@ -20,6 +22,7 @@ class MLP_model(Model):
20
22
  """Constructor taking the configuration and all dataframes"""
21
23
  super().__init__(df_train, df_test, feats_train, feats_test)
22
24
  super().set_model_type("ann")
25
+ self.name = "mlp"
23
26
  self.target = glob_conf.config["DATA"]["target"]
24
27
  labels = glob_conf.labels
25
28
  self.class_num = len(labels)
@@ -87,8 +90,7 @@ class MLP_model(Model):
87
90
  losses = []
88
91
  for features, labels in self.trainloader:
89
92
  logits = self.model(features.to(self.device))
90
- loss = self.criterion(logits, labels.to(
91
- self.device, dtype=torch.int64))
93
+ loss = self.criterion(logits, labels.to(self.device, dtype=torch.int64))
92
94
  losses.append(loss.item())
93
95
  self.optimizer.zero_grad()
94
96
  loss.backward()
@@ -116,16 +118,14 @@ class MLP_model(Model):
116
118
 
117
119
  self.loss_eval = (np.asarray(losses)).mean()
118
120
  predictions = logits.argmax(dim=1)
119
- uar = recall_score(
120
- targets.numpy(), predictions.numpy(), average="macro")
121
+ uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
121
122
  return uar, targets, predictions
122
123
 
123
124
  def predict(self):
124
125
  _, truths, predictions = self.evaluate_model(
125
126
  self.model, self.testloader, self.device
126
127
  )
127
- uar, _, _ = self.evaluate_model(
128
- self.model, self.trainloader, self.device)
128
+ uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
129
129
  report = Reporter(truths, predictions, self.run, self.epoch)
130
130
  try:
131
131
  report.result.loss = self.loss
@@ -176,8 +176,18 @@ class MLP_model(Model):
176
176
  x = x.squeeze(dim=1).float()
177
177
  return self.linear(x)
178
178
 
179
+ def predict_shap(self, features):
180
+ # predict outputs for all samples in SHAP format (pd. dataframe)
181
+ results = []
182
+ for index, row in features.iterrows():
183
+ feats = row.values
184
+ res_dict = self.predict_sample(feats)
185
+ class_key = max(res_dict, key=res_dict.get)
186
+ results.append(class_key)
187
+ return results
188
+
179
189
  def predict_sample(self, features):
180
- """Predict one sample"""
190
+ """Predict one sample."""
181
191
  with torch.no_grad():
182
192
  features = torch.from_numpy(features)
183
193
  features = np.reshape(features, (-1, 1)).T
@@ -25,6 +25,7 @@ class MLP_Reg_model(Model):
25
25
  def __init__(self, df_train, df_test, feats_train, feats_test):
26
26
  """Constructor taking the configuration and all dataframes"""
27
27
  super().__init__(df_train, df_test, feats_train, feats_test)
28
+ self.name = "mlp_reg"
28
29
  super().set_model_type("ann")
29
30
  self.target = glob_conf.config["DATA"]["target"]
30
31
  labels = glob_conf.labels
@@ -52,8 +53,7 @@ class MLP_Reg_model(Model):
52
53
  drop = self.util.config_val("MODEL", "drop", False)
53
54
  if drop:
54
55
  self.util.debug(f"training with dropout: {drop}")
55
- self.model = self.MLP(
56
- feats_train.shape[1], layers, 1, drop).to(self.device)
56
+ self.model = self.MLP(feats_train.shape[1], layers, 1, drop).to(self.device)
57
57
  self.learning_rate = float(
58
58
  self.util.config_val("MODEL", "learning_rate", 0.0001)
59
59
  )
@@ -96,10 +96,8 @@ class MLP_Reg_model(Model):
96
96
  _, truths, predictions = self.evaluate_model(
97
97
  self.model, self.testloader, self.device
98
98
  )
99
- result, _, _ = self.evaluate_model(
100
- self.model, self.trainloader, self.device)
101
- report = Reporter(truths.numpy(), predictions.numpy(),
102
- self.run, self.epoch)
99
+ result, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
100
+ report = Reporter(truths.numpy(), predictions.numpy(), self.run, self.epoch)
103
101
  try:
104
102
  report.result.loss = self.loss
105
103
  except AttributeError: # if the model was loaded from disk the loss is unknown
@@ -133,11 +131,9 @@ class MLP_Reg_model(Model):
133
131
 
134
132
  def __getitem__(self, item):
135
133
  index = self.df.index[item]
136
- features = self.df_features.loc[index, :].values.astype(
137
- "float32").squeeze()
134
+ features = self.df_features.loc[index, :].values.astype("float32").squeeze()
138
135
  labels = (
139
- np.array([self.df.loc[index, self.label]]
140
- ).astype("float32").squeeze()
136
+ np.array([self.df.loc[index, self.label]]).astype("float32").squeeze()
141
137
  )
142
138
  return features, labels
143
139
 
@@ -194,8 +190,7 @@ class MLP_Reg_model(Model):
194
190
  end_index = (index + 1) * loader.batch_size
195
191
  if end_index > len(loader.dataset):
196
192
  end_index = len(loader.dataset)
197
- logits[start_index:end_index] = model(
198
- features.to(device)).reshape(-1)
193
+ logits[start_index:end_index] = model(features.to(device)).reshape(-1)
199
194
  targets[start_index:end_index] = labels
200
195
  loss = self.criterion(
201
196
  logits[start_index:end_index].to(
@@ -11,6 +11,7 @@ class SVM_model(Model):
11
11
 
12
12
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
13
  super().__init__(df_train, df_test, feats_train, feats_test)
14
+ self.name = "svm"
14
15
  c = float(self.util.config_val("MODEL", "C_val", "0.001"))
15
16
  if eval(self.util.config_val("MODEL", "class_weight", "False")):
16
17
  class_weight = "balanced"
@@ -11,6 +11,7 @@ class SVR_model(Model):
11
11
 
12
12
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
13
  super().__init__(df_train, df_test, feats_train, feats_test)
14
+ self.name = "svr"
14
15
  c = float(self.util.config_val("MODEL", "C_val", "0.001"))
15
16
  # kernel{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’
16
17
  kernel = self.util.config_val("MODEL", "kernel", "rbf")
@@ -11,4 +11,5 @@ class Tree_model(Model):
11
11
 
12
12
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
13
  super().__init__(df_train, df_test, feats_train, feats_test)
14
+ self.name = "tree"
14
15
  self.clf = DecisionTreeClassifier() # set up the classifier
@@ -11,4 +11,5 @@ class Tree_reg_model(Model):
11
11
 
12
12
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
13
  super().__init__(df_train, df_test, feats_train, feats_test)
14
+ self.name = "tree_reg"
14
15
  self.clf = DecisionTreeRegressor() # set up the classifier
@@ -0,0 +1,17 @@
1
+ # xgbmodel.py
2
+
3
+ from xgboost import XGBClassifier
4
+ from nkululeko.models.model import Model
5
+
6
+
7
+ class XGB_model(Model):
8
+ """An XGBoost model"""
9
+
10
+ def __init__(self, df_train, df_test, feats_train, feats_test):
11
+ super().__init__(df_train, df_test, feats_train, feats_test)
12
+ self.name = "xgb"
13
+ self.is_classifier = True
14
+ self.clf = XGBClassifier() # set up the classifier
15
+
16
+ def get_type(self):
17
+ return "xgb"
@@ -0,0 +1,14 @@
1
+ # xgrmodel.py
2
+
3
+ from xgboost.sklearn import XGBRegressor
4
+ from nkululeko.models.model import Model
5
+
6
+
7
+ class XGR_model(Model):
8
+ """An XGBoost regression model"""
9
+
10
+ def __init__(self, df_train, df_test, feats_train, feats_test):
11
+ super().__init__(df_train, df_test, feats_train, feats_test)
12
+ self.name = "xgr"
13
+ self.is_classifier = False
14
+ self.clf = XGBRegressor() # set up the regressor