nkululeko 0.85.2__tar.gz → 0.86.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. {nkululeko-0.85.2 → nkululeko-0.86.0}/CHANGELOG.md +6 -0
  2. {nkululeko-0.85.2/nkululeko.egg-info → nkululeko-0.86.0}/PKG-INFO +7 -1
  3. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/constants.py +1 -1
  4. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/experiment.py +30 -40
  5. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_opensmile.py +25 -25
  6. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/featureset.py +4 -4
  7. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_tuned.py +149 -88
  8. {nkululeko-0.85.2 → nkululeko-0.86.0/nkululeko.egg-info}/PKG-INFO +7 -1
  9. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko.egg-info/SOURCES.txt +0 -1
  10. nkululeko-0.85.2/nkululeko/models/finetune_model.py +0 -190
  11. {nkululeko-0.85.2 → nkululeko-0.86.0}/LICENSE +0 -0
  12. {nkululeko-0.85.2 → nkululeko-0.86.0}/README.md +0 -0
  13. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/aesdd/process_database.py +0 -0
  14. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/androids/process_database.py +0 -0
  15. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/androids_orig/process_database.py +0 -0
  16. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/androids_test/process_database.py +0 -0
  17. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/ased/process_database.py +0 -0
  18. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/asvp-esd/process_database.py +0 -0
  19. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/baved/process_database.py +0 -0
  20. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/cafe/process_database.py +0 -0
  21. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/clac/process_database.py +0 -0
  22. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/cmu-mosei/process_database.py +0 -0
  23. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/demos/process_database.py +0 -0
  24. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/ekorpus/process_database.py +0 -0
  25. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/emns/process_database.py +0 -0
  26. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/emofilm/convert_to_16k.py +0 -0
  27. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/emofilm/process_database.py +0 -0
  28. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/emorynlp/process_database.py +0 -0
  29. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/emov-db/process_database.py +0 -0
  30. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/emovo/process_database.py +0 -0
  31. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/emozionalmente/create.py +0 -0
  32. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/enterface/process_database.py +0 -0
  33. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/esd/process_database.py +0 -0
  34. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/gerparas/process_database.py +0 -0
  35. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/iemocap/process_database.py +0 -0
  36. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/jl/process_database.py +0 -0
  37. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/jtes/process_database.py +0 -0
  38. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/meld/process_database.py +0 -0
  39. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/mesd/process_database.py +0 -0
  40. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/mess/process_database.py +0 -0
  41. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/mlendsnd/process_database.py +0 -0
  42. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/msp-improv/process_database2.py +0 -0
  43. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/msp-podcast/process_database.py +0 -0
  44. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/oreau2/process_database.py +0 -0
  45. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/portuguese/process_database.py +0 -0
  46. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/ravdess/process_database.py +0 -0
  47. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/ravdess/process_database_speaker.py +0 -0
  48. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/savee/process_database.py +0 -0
  49. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/shemo/process_database.py +0 -0
  50. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/subesco/process_database.py +0 -0
  51. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/tess/process_database.py +0 -0
  52. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/thorsten-emotional/process_database.py +0 -0
  53. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/urdu/process_database.py +0 -0
  54. {nkululeko-0.85.2 → nkululeko-0.86.0}/data/vivae/process_database.py +0 -0
  55. {nkululeko-0.85.2 → nkululeko-0.86.0}/docs/source/conf.py +0 -0
  56. {nkululeko-0.85.2 → nkululeko-0.86.0}/meta/demos/demo_best_model.py +0 -0
  57. {nkululeko-0.85.2 → nkululeko-0.86.0}/meta/demos/my_experiment.py +0 -0
  58. {nkululeko-0.85.2 → nkululeko-0.86.0}/meta/demos/my_experiment_local.py +0 -0
  59. {nkululeko-0.85.2 → nkululeko-0.86.0}/meta/demos/plot_faster_anim.py +0 -0
  60. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/__init__.py +0 -0
  61. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/aug_train.py +0 -0
  62. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/augment.py +0 -0
  63. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/augmenting/__init__.py +0 -0
  64. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/augmenting/augmenter.py +0 -0
  65. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/augmenting/randomsplicer.py +0 -0
  66. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/augmenting/randomsplicing.py +0 -0
  67. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/augmenting/resampler.py +0 -0
  68. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/autopredict/__init__.py +0 -0
  69. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/autopredict/ap_age.py +0 -0
  70. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/autopredict/ap_arousal.py +0 -0
  71. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/autopredict/ap_dominance.py +0 -0
  72. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/autopredict/ap_gender.py +0 -0
  73. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/autopredict/ap_mos.py +0 -0
  74. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/autopredict/ap_pesq.py +0 -0
  75. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/autopredict/ap_sdr.py +0 -0
  76. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/autopredict/ap_snr.py +0 -0
  77. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/autopredict/ap_stoi.py +0 -0
  78. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/autopredict/ap_valence.py +0 -0
  79. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/autopredict/estimate_snr.py +0 -0
  80. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/cacheddataset.py +0 -0
  81. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/data/__init__.py +0 -0
  82. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/data/dataset.py +0 -0
  83. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/data/dataset_csv.py +0 -0
  84. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/demo.py +0 -0
  85. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/demo_feats.py +0 -0
  86. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/demo_predictor.py +0 -0
  87. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/explore.py +0 -0
  88. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/export.py +0 -0
  89. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/__init__.py +0 -0
  90. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_agender.py +0 -0
  91. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
  92. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_analyser.py +0 -0
  93. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_auddim.py +0 -0
  94. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_audmodel.py +0 -0
  95. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_clap.py +0 -0
  96. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_hubert.py +0 -0
  97. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_import.py +0 -0
  98. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_mld.py +0 -0
  99. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_mos.py +0 -0
  100. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_oxbow.py +0 -0
  101. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_praat.py +0 -0
  102. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_snr.py +0 -0
  103. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_spectra.py +0 -0
  104. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_spkrec.py +0 -0
  105. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_squim.py +0 -0
  106. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_trill.py +0 -0
  107. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_wav2vec2.py +0 -0
  108. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_wavlm.py +0 -0
  109. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feats_whisper.py +0 -0
  110. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feat_extract/feinberg_praat.py +0 -0
  111. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/feature_extractor.py +0 -0
  112. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/file_checker.py +0 -0
  113. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/filter_data.py +0 -0
  114. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/glob_conf.py +0 -0
  115. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/losses/__init__.py +0 -0
  116. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/losses/loss_ccc.py +0 -0
  117. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/losses/loss_softf1loss.py +0 -0
  118. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/modelrunner.py +0 -0
  119. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/__init__.py +0 -0
  120. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model.py +0 -0
  121. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_bayes.py +0 -0
  122. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_cnn.py +0 -0
  123. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_gmm.py +0 -0
  124. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_knn.py +0 -0
  125. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_knn_reg.py +0 -0
  126. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_lin_reg.py +0 -0
  127. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_mlp.py +0 -0
  128. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_mlp_regression.py +0 -0
  129. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_svm.py +0 -0
  130. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_svr.py +0 -0
  131. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_tree.py +0 -0
  132. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_tree_reg.py +0 -0
  133. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_xgb.py +0 -0
  134. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/models/model_xgr.py +0 -0
  135. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/multidb.py +0 -0
  136. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/nkuluflag.py +0 -0
  137. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/nkululeko.py +0 -0
  138. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/plots.py +0 -0
  139. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/predict.py +0 -0
  140. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/reporting/__init__.py +0 -0
  141. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/reporting/defines.py +0 -0
  142. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/reporting/latex_writer.py +0 -0
  143. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/reporting/report.py +0 -0
  144. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/reporting/report_item.py +0 -0
  145. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/reporting/reporter.py +0 -0
  146. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/reporting/result.py +0 -0
  147. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/resample.py +0 -0
  148. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/runmanager.py +0 -0
  149. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/scaler.py +0 -0
  150. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/segment.py +0 -0
  151. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/segmenting/__init__.py +0 -0
  152. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
  153. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/segmenting/seg_silero.py +0 -0
  154. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/syllable_nuclei.py +0 -0
  155. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/test.py +0 -0
  156. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/test_predictor.py +0 -0
  157. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/test_pretrain.py +0 -0
  158. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/utils/__init__.py +0 -0
  159. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/utils/files.py +0 -0
  160. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/utils/stats.py +0 -0
  161. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko/utils/util.py +0 -0
  162. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko.egg-info/dependency_links.txt +0 -0
  163. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko.egg-info/requires.txt +0 -0
  164. {nkululeko-0.85.2 → nkululeko-0.86.0}/nkululeko.egg-info/top_level.txt +0 -0
  165. {nkululeko-0.85.2 → nkululeko-0.86.0}/pyproject.toml +0 -0
  166. {nkululeko-0.85.2 → nkululeko-0.86.0}/setup.cfg +0 -0
  167. {nkululeko-0.85.2 → nkululeko-0.86.0}/setup.py +0 -0
  168. {nkululeko-0.85.2 → nkululeko-0.86.0}/venv/bin/activate_this.py +0 -0
@@ -1,6 +1,12 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ Version 0.86.0
5
+ --------------
6
+ * added regression to finetuning
7
+ * added other transformer models to finetuning
8
+ * added output the train/dev features sets actually used by the model
9
+
4
10
  Version 0.85.2
5
11
  --------------
6
12
  * added data, and automatic task label detection
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.85.2
3
+ Version: 0.86.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -333,6 +333,12 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
333
333
  Changelog
334
334
  =========
335
335
 
336
+ Version 0.86.0
337
+ --------------
338
+ * added regression to finetuning
339
+ * added other transformer models to finetuning
340
+ * added output the train/dev features sets actually used by the model
341
+
336
342
  Version 0.85.2
337
343
  --------------
338
344
  * added data, and automatic task label detection
@@ -1,2 +1,2 @@
1
- VERSION="0.85.2"
1
+ VERSION="0.86.0"
2
2
  SAMPLING_RATE = 16000
@@ -30,15 +30,14 @@ from nkululeko.utils.util import Util
30
30
 
31
31
 
32
32
  class Experiment:
33
- """Main class specifying an experiment"""
33
+ """Main class specifying an experiment."""
34
34
 
35
35
  def __init__(self, config_obj):
36
- """
37
- Parameters
38
- ----------
39
- config_obj : a config parser object that sets the experiment parameters and being set as a global object.
40
- """
36
+ """Constructor.
41
37
 
38
+ Args:
39
+ - config_obj : a config parser object that sets the experiment parameters and being set as a global object.
40
+ """
42
41
  self.set_globals(config_obj)
43
42
  self.name = glob_conf.config["EXP"]["name"]
44
43
  self.root = os.path.join(glob_conf.config["EXP"]["root"], "")
@@ -109,15 +108,13 @@ class Experiment:
109
108
  # print keys/column
110
109
  dbs = ",".join(list(self.datasets.keys()))
111
110
  labels = self.util.config_val("DATA", "labels", False)
112
- auto_labels = list(
113
- next(iter(self.datasets.values())).df[self.target].unique()
114
- )
111
+ auto_labels = list(next(iter(self.datasets.values())).df[self.target].unique())
115
112
  if labels:
116
113
  self.labels = ast.literal_eval(labels)
117
114
  self.util.debug(f"Target labels (from config): {labels}")
118
115
  else:
119
116
  self.labels = auto_labels
120
- self.util.debug(f"Target labels (from database): {auto_labels}")
117
+ self.util.debug(f"Target labels (from database): {auto_labels}")
121
118
  glob_conf.set_labels(self.labels)
122
119
  self.util.debug(f"loaded databases {dbs}")
123
120
 
@@ -160,8 +157,7 @@ class Experiment:
160
157
  data.split()
161
158
  data.prepare_labels()
162
159
  self.df_test = pd.concat(
163
- [self.df_test, self.util.make_segmented_index(
164
- data.df_test)]
160
+ [self.df_test, self.util.make_segmented_index(data.df_test)]
165
161
  )
166
162
  self.df_test.is_labeled = data.is_labeled
167
163
  self.df_test.got_gender = self.got_gender
@@ -262,8 +258,7 @@ class Experiment:
262
258
  test_cats = self.df_test[self.target].unique()
263
259
  else:
264
260
  # if there is no target, copy a dummy label
265
- self.df_test = self._add_random_target(
266
- self.df_test).astype("str")
261
+ self.df_test = self._add_random_target(self.df_test).astype("str")
267
262
  train_cats = self.df_train[self.target].unique()
268
263
  # print(f"df_train: {pd.DataFrame(self.df_train[self.target])}")
269
264
  # print(f"train_cats with target {self.target}: {train_cats}")
@@ -271,8 +266,7 @@ class Experiment:
271
266
  if type(test_cats) == np.ndarray:
272
267
  self.util.debug(f"Categories test (nd.array): {test_cats}")
273
268
  else:
274
- self.util.debug(
275
- f"Categories test (list): {list(test_cats)}")
269
+ self.util.debug(f"Categories test (list): {list(test_cats)}")
276
270
  if type(train_cats) == np.ndarray:
277
271
  self.util.debug(f"Categories train (nd.array): {train_cats}")
278
272
  else:
@@ -295,8 +289,7 @@ class Experiment:
295
289
 
296
290
  target_factor = self.util.config_val("DATA", "target_divide_by", False)
297
291
  if target_factor:
298
- self.df_test[self.target] = self.df_test[self.target] / \
299
- float(target_factor)
292
+ self.df_test[self.target] = self.df_test[self.target] / float(target_factor)
300
293
  self.df_train[self.target] = self.df_train[self.target] / float(
301
294
  target_factor
302
295
  )
@@ -319,16 +312,14 @@ class Experiment:
319
312
  def plot_distribution(self, df_labels):
320
313
  """Plot the distribution of samples and speaker per target class and biological sex"""
321
314
  plot = Plots()
322
- sample_selection = self.util.config_val(
323
- "EXPL", "sample_selection", "all")
315
+ sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
324
316
  plot.plot_distributions(df_labels)
325
317
  if self.got_speaker:
326
318
  plot.plot_distributions_speaker(df_labels)
327
319
 
328
320
  def extract_test_feats(self):
329
321
  self.feats_test = pd.DataFrame()
330
- feats_name = "_".join(ast.literal_eval(
331
- glob_conf.config["DATA"]["tests"]))
322
+ feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["tests"]))
332
323
  feats_types = self.util.config_val_list("FEATS", "type", ["os"])
333
324
  self.feature_extractor = FeatureExtractor(
334
325
  self.df_test, feats_types, feats_name, "test"
@@ -345,8 +336,7 @@ class Experiment:
345
336
 
346
337
  """
347
338
  df_train, df_test = self.df_train, self.df_test
348
- feats_name = "_".join(ast.literal_eval(
349
- glob_conf.config["DATA"]["databases"]))
339
+ feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
350
340
  self.feats_test, self.feats_train = pd.DataFrame(), pd.DataFrame()
351
341
  feats_types = self.util.config_val_list("FEATS", "type", [])
352
342
  # for some models no features are needed
@@ -380,20 +370,22 @@ class Experiment:
380
370
  f"test feats ({self.feats_test.shape[0]}) != test labels"
381
371
  f" ({self.df_test.shape[0]})"
382
372
  )
383
- self.df_test = self.df_test[self.df_test.index.isin(
384
- self.feats_test.index)]
385
- self.util.warn(f"mew test labels shape: {self.df_test.shape[0]}")
373
+ self.df_test = self.df_test[self.df_test.index.isin(self.feats_test.index)]
374
+ self.util.warn(f"new test labels shape: {self.df_test.shape[0]}")
386
375
 
387
376
  self._check_scale()
377
+ # store = self.util.get_path("store")
378
+ # store_format = self.util.config_val("FEATS", "store_format", "pkl")
379
+ # storage = f"{store}test_feats.{store_format}"
380
+ # self.util.write_store(self.feats_test, storage, store_format)
381
+ # storage = f"{store}train_feats.{store_format}"
382
+ # self.util.write_store(self.feats_train, storage, store_format)
388
383
 
389
384
  def augment(self):
390
- """
391
- Augment the selected samples
392
- """
385
+ """Augment the selected samples."""
393
386
  from nkululeko.augmenting.augmenter import Augmenter
394
387
 
395
- sample_selection = self.util.config_val(
396
- "AUGMENT", "sample_selection", "all")
388
+ sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
397
389
  if sample_selection == "all":
398
390
  df = pd.concat([self.df_train, self.df_test])
399
391
  elif sample_selection == "train":
@@ -488,8 +480,7 @@ class Experiment:
488
480
  """
489
481
  from nkululeko.augmenting.randomsplicer import Randomsplicer
490
482
 
491
- sample_selection = self.util.config_val(
492
- "AUGMENT", "sample_selection", "all")
483
+ sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
493
484
  if sample_selection == "all":
494
485
  df = pd.concat([self.df_train, self.df_test])
495
486
  elif sample_selection == "train":
@@ -510,8 +501,7 @@ class Experiment:
510
501
  plot_feats = eval(
511
502
  self.util.config_val("EXPL", "feature_distributions", "False")
512
503
  )
513
- sample_selection = self.util.config_val(
514
- "EXPL", "sample_selection", "all")
504
+ sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
515
505
  # get the data labels
516
506
  if sample_selection == "all":
517
507
  df_labels = pd.concat([self.df_train, self.df_test])
@@ -574,8 +564,7 @@ class Experiment:
574
564
  for scat_target in scat_targets:
575
565
  if self.util.is_categorical(df_labels[scat_target]):
576
566
  for scatter in scatters:
577
- plots.scatter_plot(
578
- df_feats, df_labels, scat_target, scatter)
567
+ plots.scatter_plot(df_feats, df_labels, scat_target, scatter)
579
568
  else:
580
569
  self.util.debug(
581
570
  f"{self.name}: binning continuous variable to categories"
@@ -590,6 +579,8 @@ class Experiment:
590
579
  )
591
580
 
592
581
  def _check_scale(self):
582
+ self.util.save_to_store(self.feats_train, "feats_train")
583
+ self.util.save_to_store(self.feats_test, "feats_test")
593
584
  scale_feats = self.util.config_val("FEATS", "scale", False)
594
585
  # print the scale
595
586
  self.util.debug(f"scaler: {scale_feats}")
@@ -664,8 +655,7 @@ class Experiment:
664
655
  preds = best.preds
665
656
  speakers = self.df_test.speaker.values
666
657
  print(f"{len(truths)} {len(preds)} {len(speakers) }")
667
- df = pd.DataFrame(
668
- data={"truth": truths, "pred": preds, "speaker": speakers})
658
+ df = pd.DataFrame(data={"truth": truths, "pred": preds, "speaker": speakers})
669
659
  plot_name = "result_combined_per_speaker"
670
660
  self.util.debug(
671
661
  f"plotting speaker combination ({function}) confusion matrix to"
@@ -65,28 +65,28 @@ class Opensmileset(Featureset):
65
65
  feats = smile.process_signal(signal, sr)
66
66
  return feats.to_numpy()
67
67
 
68
- def filter(self):
69
- # use only the features that are indexed in the target dataframes
70
- self.df = self.df[self.df.index.isin(self.data_df.index)]
71
- try:
72
- # use only some features
73
- selected_features = ast.literal_eval(
74
- glob_conf.config["FEATS"]["os.features"]
75
- )
76
- self.util.debug(f"selecting features from opensmile: {selected_features}")
77
- sel_feats_df = pd.DataFrame()
78
- hit = False
79
- for feat in selected_features:
80
- try:
81
- sel_feats_df[feat] = self.df[feat]
82
- hit = True
83
- except KeyError:
84
- pass
85
- if hit:
86
- self.df = sel_feats_df
87
- self.util.debug(
88
- "new feats shape after selecting opensmile features:"
89
- f" {self.df.shape}"
90
- )
91
- except KeyError:
92
- pass
68
+ # def filter(self):
69
+ # # use only the features that are indexed in the target dataframes
70
+ # self.df = self.df[self.df.index.isin(self.data_df.index)]
71
+ # try:
72
+ # # use only some features
73
+ # selected_features = ast.literal_eval(
74
+ # glob_conf.config["FEATS"]["os.features"]
75
+ # )
76
+ # self.util.debug(f"selecting features from opensmile: {selected_features}")
77
+ # sel_feats_df = pd.DataFrame()
78
+ # hit = False
79
+ # for feat in selected_features:
80
+ # try:
81
+ # sel_feats_df[feat] = self.df[feat]
82
+ # hit = True
83
+ # except KeyError:
84
+ # pass
85
+ # if hit:
86
+ # self.df = sel_feats_df
87
+ # self.util.debug(
88
+ # "new feats shape after selecting opensmile features:"
89
+ # f" {self.df.shape}"
90
+ # )
91
+ # except KeyError:
92
+ # pass
@@ -15,7 +15,7 @@ class Featureset:
15
15
  self.name = name
16
16
  self.data_df = data_df
17
17
  self.util = Util("featureset")
18
- self.feats_types = feats_type
18
+ self.feats_type = feats_type
19
19
 
20
20
  def extract(self):
21
21
  pass
@@ -25,8 +25,7 @@ class Featureset:
25
25
  self.df = self.df[self.df.index.isin(self.data_df.index)]
26
26
  try:
27
27
  # use only some features
28
- selected_features = ast.literal_eval(
29
- glob_conf.config["FEATS"]["features"])
28
+ selected_features = ast.literal_eval(glob_conf.config["FEATS"]["features"])
30
29
  self.util.debug(f"selecting features: {selected_features}")
31
30
  sel_feats_df = pd.DataFrame()
32
31
  hit = False
@@ -35,11 +34,12 @@ class Featureset:
35
34
  sel_feats_df[feat] = self.df[feat]
36
35
  hit = True
37
36
  except KeyError:
37
+ self.util.warn(f"non existent feature in {self.feats_type}: {feat}")
38
38
  pass
39
39
  if hit:
40
40
  self.df = sel_feats_df
41
41
  self.util.debug(
42
- f"new feats shape after selecting features: {self.df.shape}"
42
+ f"new feats shape after selecting features for {self.feats_type}: {self.df.shape}"
43
43
  )
44
44
  except KeyError:
45
45
  pass