nkululeko 0.81.6__tar.gz → 0.82.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. {nkululeko-0.81.6 → nkululeko-0.82.0}/CHANGELOG.md +9 -0
  2. {nkululeko-0.81.6/nkululeko.egg-info → nkululeko-0.82.0}/PKG-INFO +20 -1
  3. {nkululeko-0.81.6 → nkululeko-0.82.0}/README.md +10 -0
  4. nkululeko-0.82.0/meta/demos/multiple_exeriments/do_experiments.py +35 -0
  5. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/constants.py +1 -1
  6. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_agender.py +6 -4
  7. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_auddim.py +5 -3
  8. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_audmodel.py +5 -3
  9. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_clap.py +10 -6
  10. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_hubert.py +3 -2
  11. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_import.py +2 -2
  12. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_mos.py +2 -2
  13. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_opensmile.py +10 -24
  14. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_oxbow.py +16 -11
  15. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_praat.py +8 -5
  16. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_spectra.py +3 -2
  17. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_squim.py +2 -2
  18. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_trill.py +10 -6
  19. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_wav2vec2.py +16 -7
  20. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_wavlm.py +1 -4
  21. nkululeko-0.82.0/nkululeko/feat_extract/feats_whisper.py +110 -0
  22. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/featureset.py +6 -3
  23. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feature_extractor.py +15 -4
  24. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/multidb.py +5 -10
  25. nkululeko-0.82.0/nkululeko/nkuluflag.py +95 -0
  26. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/nkululeko.py +6 -4
  27. {nkululeko-0.81.6 → nkululeko-0.82.0/nkululeko.egg-info}/PKG-INFO +20 -1
  28. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko.egg-info/SOURCES.txt +2 -1
  29. nkululeko-0.81.6/meta/demos/multiple_exeriments/do_experiments.py +0 -48
  30. nkululeko-0.81.6/meta/demos/multiple_exeriments/parse_nkulu.py +0 -112
  31. {nkululeko-0.81.6 → nkululeko-0.82.0}/LICENSE +0 -0
  32. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/aesdd/process_database.py +0 -0
  33. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/androids/process_database.py +0 -0
  34. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/androids_orig/process_database.py +0 -0
  35. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/androids_test/process_database.py +0 -0
  36. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/ased/process_database.py +0 -0
  37. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/asvp-esd/process_database.py +0 -0
  38. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/baved/process_database.py +0 -0
  39. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/cafe/process_database.py +0 -0
  40. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/clac/process_database.py +0 -0
  41. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/cmu-mosei/process_database.py +0 -0
  42. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/demos/process_database.py +0 -0
  43. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/ekorpus/process_database.py +0 -0
  44. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emns/process_database.py +0 -0
  45. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emofilm/convert_to_16k.py +0 -0
  46. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emofilm/process_database.py +0 -0
  47. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emorynlp/process_database.py +0 -0
  48. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emov-db/process_database.py +0 -0
  49. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emovo/process_database.py +0 -0
  50. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emozionalmente/create.py +0 -0
  51. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/enterface/process_database.py +0 -0
  52. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/esd/process_database.py +0 -0
  53. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/gerparas/process_database.py +0 -0
  54. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/iemocap/process_database.py +0 -0
  55. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/jl/process_database.py +0 -0
  56. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/jtes/process_database.py +0 -0
  57. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/meld/process_database.py +0 -0
  58. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/mesd/process_database.py +0 -0
  59. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/mess/process_database.py +0 -0
  60. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/mlendsnd/process_database.py +0 -0
  61. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/msp-improv/process_database2.py +0 -0
  62. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/msp-podcast/process_database.py +0 -0
  63. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/oreau2/process_database.py +0 -0
  64. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/portuguese/process_database.py +0 -0
  65. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/ravdess/process_database.py +0 -0
  66. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/ravdess/process_database_speaker.py +0 -0
  67. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/savee/process_database.py +0 -0
  68. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/shemo/process_database.py +0 -0
  69. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/subesco/process_database.py +0 -0
  70. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/tess/process_database.py +0 -0
  71. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/thorsten-emotional/process_database.py +0 -0
  72. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/urdu/process_database.py +0 -0
  73. {nkululeko-0.81.6 → nkululeko-0.82.0}/data/vivae/process_database.py +0 -0
  74. {nkululeko-0.81.6 → nkululeko-0.82.0}/docs/source/conf.py +0 -0
  75. {nkululeko-0.81.6 → nkululeko-0.82.0}/meta/demos/demo_best_model.py +0 -0
  76. {nkululeko-0.81.6 → nkululeko-0.82.0}/meta/demos/my_experiment.py +0 -0
  77. {nkululeko-0.81.6 → nkululeko-0.82.0}/meta/demos/my_experiment_local.py +0 -0
  78. {nkululeko-0.81.6 → nkululeko-0.82.0}/meta/demos/plot_faster_anim.py +0 -0
  79. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/__init__.py +0 -0
  80. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/aug_train.py +0 -0
  81. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/augment.py +0 -0
  82. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/augmenting/__init__.py +0 -0
  83. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/augmenting/augmenter.py +0 -0
  84. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/augmenting/randomsplicer.py +0 -0
  85. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/augmenting/randomsplicing.py +0 -0
  86. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/augmenting/resampler.py +0 -0
  87. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/__init__.py +0 -0
  88. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_age.py +0 -0
  89. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_arousal.py +0 -0
  90. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_dominance.py +0 -0
  91. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_gender.py +0 -0
  92. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_mos.py +0 -0
  93. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_pesq.py +0 -0
  94. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_sdr.py +0 -0
  95. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_snr.py +0 -0
  96. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_stoi.py +0 -0
  97. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_valence.py +0 -0
  98. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/estimate_snr.py +0 -0
  99. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/cacheddataset.py +0 -0
  100. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/data/__init__.py +0 -0
  101. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/data/dataset.py +0 -0
  102. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/data/dataset_csv.py +0 -0
  103. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/demo.py +0 -0
  104. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/demo_feats.py +0 -0
  105. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/demo_predictor.py +0 -0
  106. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/experiment.py +0 -0
  107. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/explore.py +0 -0
  108. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/export.py +0 -0
  109. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/__init__.py +0 -0
  110. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
  111. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_analyser.py +0 -0
  112. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_mld.py +0 -0
  113. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_snr.py +0 -0
  114. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_spkrec.py +0 -0
  115. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feinberg_praat.py +0 -0
  116. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/file_checker.py +0 -0
  117. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/filter_data.py +0 -0
  118. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/glob_conf.py +0 -0
  119. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/losses/__init__.py +0 -0
  120. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/losses/loss_ccc.py +0 -0
  121. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/losses/loss_softf1loss.py +0 -0
  122. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/modelrunner.py +0 -0
  123. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/__init__.py +0 -0
  124. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model.py +0 -0
  125. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_bayes.py +0 -0
  126. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_cnn.py +0 -0
  127. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_gmm.py +0 -0
  128. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_knn.py +0 -0
  129. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_knn_reg.py +0 -0
  130. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_lin_reg.py +0 -0
  131. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_mlp.py +0 -0
  132. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_mlp_regression.py +0 -0
  133. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_svm.py +0 -0
  134. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_svr.py +0 -0
  135. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_tree.py +0 -0
  136. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_tree_reg.py +0 -0
  137. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_xgb.py +0 -0
  138. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_xgr.py +0 -0
  139. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/plots.py +0 -0
  140. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/predict.py +0 -0
  141. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporter.py +0 -0
  142. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/__init__.py +0 -0
  143. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/defines.py +0 -0
  144. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/latex_writer.py +0 -0
  145. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/report.py +0 -0
  146. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/report_item.py +0 -0
  147. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/reporter.py +0 -0
  148. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/result.py +0 -0
  149. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/resample.py +0 -0
  150. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/runmanager.py +0 -0
  151. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/scaler.py +0 -0
  152. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/segment.py +0 -0
  153. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/segmenting/__init__.py +0 -0
  154. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
  155. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/segmenting/seg_silero.py +0 -0
  156. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/syllable_nuclei.py +0 -0
  157. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/test.py +0 -0
  158. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/test_predictor.py +0 -0
  159. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/utils/__init__.py +0 -0
  160. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/utils/files.py +0 -0
  161. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/utils/stats.py +0 -0
  162. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/utils/util.py +0 -0
  163. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko.egg-info/dependency_links.txt +0 -0
  164. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko.egg-info/requires.txt +0 -0
  165. {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko.egg-info/top_level.txt +0 -0
  166. {nkululeko-0.81.6 → nkululeko-0.82.0}/pyproject.toml +0 -0
  167. {nkululeko-0.81.6 → nkululeko-0.82.0}/setup.cfg +0 -0
  168. {nkululeko-0.81.6 → nkululeko-0.82.0}/setup.py +0 -0
  169. {nkululeko-0.81.6 → nkululeko-0.82.0}/venv/bin/activate_this.py +0 -0
@@ -1,6 +1,15 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ Version 0.82.0
5
+ --------------
6
+ * added nkuluflag module
7
+
8
+ Version 0.81.7
9
+ --------------
10
+ * bugfixes
11
+ * added whisper feature extractor
12
+
4
13
  Version 0.81.6
5
14
  --------------
6
15
  * updated documentation
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.81.6
3
+ Version: 0.82.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -42,6 +42,8 @@ Requires-Dist: umap-learn
42
42
  Requires-Dist: xgboost
43
43
  Requires-Dist: pylatex
44
44
 
45
+ usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
46
+ [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
45
47
  - [Overview](#overview)
46
48
  - [Confusion matrix](#confusion-matrix)
47
49
  - [Epoch progression](#epoch-progression)
@@ -203,6 +205,14 @@ All of them take *--config <my_config.ini>* as an argument.
203
205
  * **nkululeko.predict**: [predict features](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/) like SNR, MOS, arousal/valence, age/gender, with DNN models
204
206
  * **nkululeko.segment**: [segment a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/) based on VAD (voice activity detection)
205
207
  * **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz
208
+ * **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command-line.
209
+ * usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
210
+ [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
211
+
212
+
213
+
214
+
215
+
206
216
 
207
217
  There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
208
218
  * [Introduction](http://blog.syntheticspeech.de/2021/08/04/machine-learning-experiment-framework/)
@@ -323,6 +333,15 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
323
333
  Changelog
324
334
  =========
325
335
 
336
+ Version 0.82.0
337
+ --------------
338
+ * added nkuluflag module
339
+
340
+ Version 0.81.7
341
+ --------------
342
+ * bugfixes
343
+ * added whisper feature extractor
344
+
326
345
  Version 0.81.6
327
346
  --------------
328
347
  * updated documentation
@@ -1,3 +1,5 @@
1
+ usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
2
+ [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
1
3
  - [Overview](#overview)
2
4
  - [Confusion matrix](#confusion-matrix)
3
5
  - [Epoch progression](#epoch-progression)
@@ -159,6 +161,14 @@ All of them take *--config <my_config.ini>* as an argument.
159
161
  * **nkululeko.predict**: [predict features](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/) like SNR, MOS, arousal/valence, age/gender, with DNN models
160
162
  * **nkululeko.segment**: [segment a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/) based on VAD (voice activity detection)
161
163
  * **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz
164
+ * **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command-line.
165
+ * usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
166
+ [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
167
+
168
+
169
+
170
+
171
+
162
172
 
163
173
  There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
164
174
  * [Introduction](http://blog.syntheticspeech.de/2021/08/04/machine-learning-experiment-framework/)
@@ -0,0 +1,35 @@
1
+ import os
2
+
3
+
4
+ classifiers = [
5
+ {"--model": "mlp", "--layers": "\"{'l1':64,'l2':16}\"", "--epochs": 100},
6
+ {
7
+ "--model": "mlp",
8
+ "--layers": "\"{'l1':128,'l2':64,'l3':16}\"",
9
+ "--learning_rate": ".01",
10
+ "--drop": ".3",
11
+ "--epochs": 100,
12
+ },
13
+ {"--model": "xgb", "--epochs": 1},
14
+ {"--model": "svm", "--epochs": 1},
15
+ ]
16
+
17
+ features = [
18
+ {"--feat": "os"},
19
+ # {'--feat': 'os',
20
+ # '--set': 'ComParE_2016',
21
+ # },
22
+ {"--feat": "audmodel"},
23
+ ]
24
+
25
+
26
+ for c in classifiers:
27
+ for f in features:
28
+ cmd = "python -m nkululeko.nkuluflag --config exp.ini "
29
+ for item in c:
30
+ cmd += f"{item} {c[item]} "
31
+ for item in f:
32
+ cmd += f"{item} {f[item]} "
33
+ print(cmd)
34
+ os.system(cmd)
35
+ # print(f"results: {result}, {last_epoch}")
@@ -1,2 +1,2 @@
1
- VERSION="0.81.6"
1
+ VERSION="0.82.0"
2
2
  SAMPLING_RATE = 16000
@@ -9,16 +9,17 @@ import numpy as np
9
9
  import audinterface
10
10
 
11
11
 
12
- class AudModelAgenderSet(Featureset):
12
+ class AgenderSet(Featureset):
13
13
  """
14
14
  Embeddings from the wav2vec2. based model finetuned on agender data, described in the paper
15
15
  "Speech-based Age and Gender Prediction with Transformers"
16
16
  https://arxiv.org/abs/2306.16962
17
17
  """
18
18
 
19
- def __init__(self, name, data_df):
20
- super().__init__(name, data_df)
19
+ def __init__(self, name, data_df, feats_type):
20
+ super().__init__(name, data_df, feats_type)
21
21
  self.model_loaded = False
22
+ self.feats_type = feats_type
22
23
 
23
24
  def _load_model(self):
24
25
  model_url = "https://zenodo.org/record/7761387/files/w2v2-L-robust-6-age-gender.25c844af-1.1.1.zip"
@@ -28,7 +29,8 @@ class AudModelAgenderSet(Featureset):
28
29
  if not os.path.isdir(model_root):
29
30
  cache_root = audeer.mkdir("cache")
30
31
  model_root = audeer.mkdir(model_root)
31
- archive_path = audeer.download_url(model_url, cache_root, verbose=True)
32
+ archive_path = audeer.download_url(
33
+ model_url, cache_root, verbose=True)
32
34
  audeer.extract_archive(archive_path, model_root)
33
35
  device = self.util.config_val("MODEL", "device", "cpu")
34
36
  self.model = audonnx.load(model_root, device=device)
@@ -21,9 +21,10 @@ class AuddimSet(Featureset):
21
21
  https://arxiv.org/abs/2203.07378.
22
22
  """
23
23
 
24
- def __init__(self, name, data_df):
25
- super().__init__(name, data_df)
24
+ def __init__(self, name, data_df, feats_type):
25
+ super().__init__(name, data_df, feats_type)
26
26
  self.model_loaded = False
27
+ self.feats_types = feats_type
27
28
 
28
29
  def _load_model(self):
29
30
  model_url = "https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip"
@@ -31,7 +32,8 @@ class AuddimSet(Featureset):
31
32
  if not os.path.isdir(model_root):
32
33
  cache_root = audeer.mkdir("cache")
33
34
  model_root = audeer.mkdir(model_root)
34
- archive_path = audeer.download_url(model_url, cache_root, verbose=True)
35
+ archive_path = audeer.download_url(
36
+ model_url, cache_root, verbose=True)
35
37
  audeer.extract_archive(archive_path, model_root)
36
38
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
37
39
  device = self.util.config_val("MODEL", "device", cuda)
@@ -19,9 +19,10 @@ class AudmodelSet(Featureset):
19
19
  https://arxiv.org/abs/2203.07378.
20
20
  """
21
21
 
22
- def __init__(self, name, data_df):
23
- super().__init__(name, data_df)
22
+ def __init__(self, name, data_df, feats_type):
23
+ super().__init__(name, data_df, feats_type)
24
24
  self.model_loaded = False
25
+ self.feats_type = feats_type
25
26
 
26
27
  def _load_model(self):
27
28
  model_url = "https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip"
@@ -29,7 +30,8 @@ class AudmodelSet(Featureset):
29
30
  if not os.path.isdir(model_root):
30
31
  cache_root = audeer.mkdir("cache")
31
32
  model_root = audeer.mkdir(model_root)
32
- archive_path = audeer.download_url(model_url, cache_root, verbose=True)
33
+ archive_path = audeer.download_url(
34
+ model_url, cache_root, verbose=True)
33
35
  audeer.extract_archive(archive_path, model_root)
34
36
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
35
37
  device = self.util.config_val("MODEL", "device", cuda)
@@ -11,14 +11,15 @@ import laion_clap
11
11
  import audiofile
12
12
 
13
13
 
14
- class Clap(Featureset):
14
+ class ClapSet(Featureset):
15
15
  """Class to extract laion's clap embeddings (https://github.com/LAION-AI/CLAP)"""
16
16
 
17
- def __init__(self, name, data_df):
17
+ def __init__(self, name, data_df, feats_type):
18
18
  """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
19
- super().__init__(name, data_df)
19
+ super().__init__(name, data_df, feats_type)
20
20
  self.device = self.util.config_val("MODEL", "device", "cpu")
21
21
  self.model_initialized = False
22
+ self.feat_type = feats_type
22
23
 
23
24
  def init_model(self):
24
25
  # load model
@@ -32,12 +33,14 @@ class Clap(Featureset):
32
33
  store = self.util.get_path("store")
33
34
  store_format = self.util.config_val("FEATS", "store_format", "pkl")
34
35
  storage = f"{store}{self.name}.{store_format}"
35
- extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
36
+ extract = self.util.config_val(
37
+ "FEATS", "needs_feature_extraction", False)
36
38
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
37
39
  if extract or no_reuse or not os.path.isfile(storage):
38
40
  if not self.model_initialized:
39
41
  self.init_model()
40
- self.util.debug("extracting clap embeddings, this might take a while...")
42
+ self.util.debug(
43
+ "extracting clap embeddings, this might take a while...")
41
44
  emb_series = pd.Series(index=self.data_df.index, dtype=object)
42
45
  length = len(self.data_df.index)
43
46
  for idx, (file, start, end) in enumerate(
@@ -51,7 +54,8 @@ class Clap(Featureset):
51
54
  )
52
55
  emb = self.get_embeddings(signal, sampling_rate)
53
56
  emb_series[idx] = emb
54
- self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
57
+ self.df = pd.DataFrame(
58
+ emb_series.values.tolist(), index=self.data_df.index)
55
59
  self.util.write_store(self.df, storage, store_format)
56
60
  try:
57
61
  glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
@@ -1,6 +1,7 @@
1
1
  # feats_hubert.py
2
2
  # HuBERT feature extractor for Nkululeko
3
- # example feat_type = "hubert-large-ll60k", "hubert-xlarge-ll60k"
3
+ # example feat_type = "hubert-large-ll60k", "hubert-xlarge-ll60k",
4
+ # "hubert-base-ls960", hubert-large-ls960-ft", "hubert-xlarge-ls960-ft"
4
5
 
5
6
 
6
7
  import os
@@ -22,7 +23,7 @@ class Hubert(Featureset):
22
23
  def __init__(self, name, data_df, feat_type):
23
24
  """Constructor. is_train is needed to distinguish from test/dev sets,
24
25
  because they use the codebook from the training"""
25
- super().__init__(name, data_df)
26
+ super().__init__(name, data_df, feat_type)
26
27
  # check if device is not set, use cuda if available
27
28
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
28
29
  self.device = self.util.config_val("MODEL", "device", cuda)
@@ -11,8 +11,8 @@ from nkululeko.feat_extract.featureset import Featureset
11
11
  class ImportSet(Featureset):
12
12
  """Class to import features that have been compiled elsewhere"""
13
13
 
14
- def __init__(self, name, data_df):
15
- super().__init__(name, data_df)
14
+ def __init__(self, name, data_df, feats_type):
15
+ super().__init__(name, data_df, feats_type)
16
16
 
17
17
  def extract(self):
18
18
  """Import the features."""
@@ -27,9 +27,9 @@ from nkululeko.feat_extract.featureset import Featureset
27
27
  class MosSet(Featureset):
28
28
  """Class to predict MOS (mean opinion score)"""
29
29
 
30
- def __init__(self, name, data_df):
30
+ def __init__(self, name, data_df, feats_type):
31
31
  """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
32
- super().__init__(name, data_df)
32
+ super().__init__(name, data_df, feats_type)
33
33
  self.device = self.util.config_val("MODEL", "device", "cpu")
34
34
  self.model_initialized = False
35
35
 
@@ -8,31 +8,21 @@ import opensmile
8
8
 
9
9
 
10
10
  class Opensmileset(Featureset):
11
- def __init__(self, name, data_df):
12
- super().__init__(name, data_df)
11
+ def __init__(self, name, data_df, feats_type=None, config_file=None):
12
+ super().__init__(name, data_df, feats_type)
13
13
  self.featset = self.util.config_val("FEATS", "set", "eGeMAPSv02")
14
14
  try:
15
15
  self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
16
- #'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
16
+ # 'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
17
17
  except AttributeError:
18
- self.util.error(
19
- f"something is wrong with feature set: {self.featset}"
20
- )
18
+ self.util.error(f"something is wrong with feature set: {self.featset}")
21
19
  self.featlevel = self.util.config_val("FEATS", "level", "functionals")
22
20
  try:
23
- self.featlevel = self.featlevel.replace(
24
- "lld", "LowLevelDescriptors"
25
- )
26
- self.featlevel = self.featlevel.replace(
27
- "functionals", "Functionals"
28
- )
29
- self.feature_level = eval(
30
- f"opensmile.FeatureLevel.{self.featlevel}"
31
- )
21
+ self.featlevel = self.featlevel.replace("lld", "LowLevelDescriptors")
22
+ self.featlevel = self.featlevel.replace("functionals", "Functionals")
23
+ self.feature_level = eval(f"opensmile.FeatureLevel.{self.featlevel}")
32
24
  except AttributeError:
33
- self.util.error(
34
- f"something is wrong with feature level: {self.featlevel}"
35
- )
25
+ self.util.error(f"something is wrong with feature level: {self.featlevel}")
36
26
 
37
27
  def extract(self):
38
28
  """Extract the features based on the initialized dataset or re-open them when found on disk."""
@@ -44,9 +34,7 @@ class Opensmileset(Featureset):
44
34
  )
45
35
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
46
36
  if extract or not os.path.isfile(storage) or no_reuse:
47
- self.util.debug(
48
- "extracting openSmile features, this might take a while..."
49
- )
37
+ self.util.debug("extracting openSmile features, this might take a while...")
50
38
  smile = opensmile.Smile(
51
39
  feature_set=self.feature_set,
52
40
  feature_level=self.feature_level,
@@ -85,9 +73,7 @@ class Opensmileset(Featureset):
85
73
  selected_features = ast.literal_eval(
86
74
  glob_conf.config["FEATS"]["os.features"]
87
75
  )
88
- self.util.debug(
89
- f"selecting features from opensmile: {selected_features}"
90
- )
76
+ self.util.debug(f"selecting features from opensmile: {selected_features}")
91
77
  sel_feats_df = pd.DataFrame()
92
78
  hit = False
93
79
  for feat in selected_features:
@@ -10,9 +10,10 @@ import opensmile
10
10
  class Openxbow(Featureset):
11
11
  """Class to extract openXBOW processed opensmile features (https://github.com/openXBOW)"""
12
12
 
13
- def __init__(self, name, data_df, is_train=False):
13
+ def __init__(self, name, data_df, feats_type, is_train=False):
14
14
  """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
15
- super().__init__(name, data_df)
15
+ super().__init__(name, data_df, feats_type)
16
+ self.feats_types = feats_type
16
17
  self.is_train = is_train
17
18
 
18
19
  def extract(self):
@@ -21,11 +22,13 @@ class Openxbow(Featureset):
21
22
  self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
22
23
  store = self.util.get_path("store")
23
24
  storage = f"{store}{self.name}_{self.featset}.pkl"
24
- extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
25
+ extract = self.util.config_val(
26
+ "FEATS", "needs_feature_extraction", False)
25
27
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
26
28
  if extract or no_reuse or not os.path.isfile(storage):
27
29
  # extract smile features first
28
- self.util.debug("extracting openSmile features, this might take a while...")
30
+ self.util.debug(
31
+ "extracting openSmile features, this might take a while...")
29
32
  smile = opensmile.Smile(
30
33
  feature_set=self.feature_set,
31
34
  feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
@@ -48,7 +51,13 @@ class Openxbow(Featureset):
48
51
  # save the smile features
49
52
  smile_df.to_csv(lld_name, sep=";", header=False)
50
53
  # get the path of the xbow java jar file
51
- xbow_path = self.util.config_val("FEATS", "xbow.model", "../openXBOW/")
54
+ xbow_path = self.util.config_val(
55
+ "FEATS", "xbow.model", "openXBOW")
56
+ # check if JAR file exist
57
+ if not os.path.isfile(f"{xbow_path}/openXBOW.jar"):
58
+ # download using wget if not exist and locate in xbow_path
59
+ os.system(
60
+ f"git clone https://github.com/openXBOW/openXBOW")
52
61
  # get the size of the codebook
53
62
  size = self.util.config_val("FEATS", "size", 500)
54
63
  # get the number of assignements
@@ -57,16 +66,12 @@ class Openxbow(Featureset):
57
66
  if self.is_train:
58
67
  # store the codebook
59
68
  os.system(
60
- f"java -jar {xbow_path}openXBOW.jar -i"
61
- f" {lld_name} -standardizeInput -log -o"
62
- f" {xbow_name} -size {size} -a {assignments} -B"
63
- f" {codebook_name}"
69
+ f"java -jar {xbow_path}/openXBOW.jar -i {lld_name} -standardizeInput -log -o {xbow_name} -size {size} -a {assignments} -B {codebook_name}"
64
70
  )
65
71
  else:
66
72
  # use the codebook
67
73
  os.system(
68
- f"java -jar {xbow_path}openXBOW.jar -i {lld_name} "
69
- f" -o {xbow_name} -b {codebook_name}"
74
+ f"java -jar {xbow_path}/openXBOW.jar -i {lld_name} -o {xbow_name} -b {codebook_name}"
70
75
  )
71
76
  # read in the result from disk
72
77
  xbow_df = pd.read_csv(xbow_name, sep=";", header=None)
@@ -18,18 +18,20 @@ class PraatSet(Featureset):
18
18
 
19
19
  """
20
20
 
21
- def __init__(self, name, data_df):
22
- super().__init__(name, data_df)
21
+ def __init__(self, name, data_df, feats_type):
22
+ super().__init__(name, data_df, feats_type)
23
23
 
24
24
  def extract(self):
25
25
  """Extract the features based on the initialized dataset or re-open them when found on disk."""
26
26
  store = self.util.get_path("store")
27
27
  store_format = self.util.config_val("FEATS", "store_format", "pkl")
28
28
  storage = f"{store}{self.name}.{store_format}"
29
- extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
29
+ extract = self.util.config_val(
30
+ "FEATS", "needs_feature_extraction", False)
30
31
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
31
32
  if extract or no_reuse or not os.path.isfile(storage):
32
- self.util.debug("extracting Praat features, this might take a while...")
33
+ self.util.debug(
34
+ "extracting Praat features, this might take a while...")
33
35
  self.df = feinberg_praat.compute_features(self.data_df.index)
34
36
  self.df = self.df.set_index(self.data_df.index)
35
37
  for i, col in enumerate(self.df.columns):
@@ -52,7 +54,8 @@ class PraatSet(Featureset):
52
54
  self.df = self.df.astype(float)
53
55
 
54
56
  def extract_sample(self, signal, sr):
55
- import audiofile, audformat
57
+ import audiofile
58
+ import audformat
56
59
 
57
60
  tmp_audio_names = ["praat_audio_tmp.wav"]
58
61
  audiofile.write(tmp_audio_names[0], signal, sr)
@@ -4,6 +4,7 @@ feats_spectra.py
4
4
  Inspired by code from Su Lei
5
5
 
6
6
  """
7
+
7
8
  import os
8
9
  import torchaudio
9
10
  import torchaudio.transforms as T
@@ -23,9 +24,9 @@ import nkululeko.glob_conf as glob_conf
23
24
 
24
25
 
25
26
  class Spectraloader(Featureset):
26
- def __init__(self, name, data_df):
27
+ def __init__(self, name, data_df, feat_type):
27
28
  """Constructor setting the name"""
28
- Featureset.__init__(self, name, data_df)
29
+ super().__init__(name, data_df, feat_type)
29
30
  self.sampling_rate = SAMPLING_RATE
30
31
  self.num_bands = int(self.util.config_val("FEATS", "fft_nbands", "64"))
31
32
  self.win_dur = int(self.util.config_val("FEATS", "fft_win_dur", "25"))
@@ -30,9 +30,9 @@ from nkululeko.utils.util import Util
30
30
  class SquimSet(Featureset):
31
31
  """Class to predict SQUIM features"""
32
32
 
33
- def __init__(self, name, data_df):
33
+ def __init__(self, name, data_df, feats_type):
34
34
  """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
35
- super().__init__(name, data_df)
35
+ super().__init__(name, data_df, feats_type)
36
36
  self.device = self.util.config_val("MODEL", "device", "cpu")
37
37
  self.model_initialized = False
38
38
 
@@ -1,4 +1,5 @@
1
1
  # feats_trill.py
2
+ import tensorflow_hub as hub
2
3
  import os
3
4
  import tensorflow as tf
4
5
  from numpy.core.numeric import tensordot
@@ -11,7 +12,6 @@ from nkululeko.feat_extract.featureset import Featureset
11
12
 
12
13
  # Import TF 2.X and make sure we're running eager.
13
14
  assert tf.executing_eagerly()
14
- import tensorflow_hub as hub
15
15
 
16
16
 
17
17
  class TRILLset(Featureset):
@@ -20,7 +20,7 @@ class TRILLset(Featureset):
20
20
  """https://ai.googleblog.com/2020/06/improving-speech-representations-and.html"""
21
21
 
22
22
  # Initialization of the class
23
- def __init__(self, name, data_df):
23
+ def __init__(self, name, data_df, feats_type):
24
24
  """
25
25
  Initialize the class with name, data and Util instance
26
26
  Also loads the model from hub
@@ -31,7 +31,7 @@ class TRILLset(Featureset):
31
31
  :type data_df: DataFrame
32
32
  :return: None
33
33
  """
34
- super().__init__(name, data_df)
34
+ super().__init__(name, data_df, feats_type)
35
35
  # Load the model from the configured path
36
36
  model_path = self.util.config_val(
37
37
  "FEATS",
@@ -39,20 +39,24 @@ class TRILLset(Featureset):
39
39
  "https://tfhub.dev/google/nonsemantic-speech-benchmark/trill/3",
40
40
  )
41
41
  self.module = hub.load(model_path)
42
+ self.feats_type = feats_type
42
43
 
43
44
  def extract(self):
44
45
  store = self.util.get_path("store")
45
46
  storage = f"{store}{self.name}.pkl"
46
- extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
47
+ extract = self.util.config_val(
48
+ "FEATS", "needs_feature_extraction", False)
47
49
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
48
50
  if extract or no_reuse or not os.path.isfile(storage):
49
- self.util.debug("extracting TRILL embeddings, this might take a while...")
51
+ self.util.debug(
52
+ "extracting TRILL embeddings, this might take a while...")
50
53
  emb_series = pd.Series(index=self.data_df.index, dtype=object)
51
54
  length = len(self.data_df.index)
52
55
  for idx, file in enumerate(tqdm(self.data_df.index.get_level_values(0))):
53
56
  emb = self.getEmbeddings(file)
54
57
  emb_series[idx] = emb
55
- self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
58
+ self.df = pd.DataFrame(
59
+ emb_series.values.tolist(), index=self.data_df.index)
56
60
  self.df.to_pickle(storage)
57
61
  try:
58
62
  glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
@@ -1,5 +1,11 @@
1
- # feats_wav2vec2.py
2
- # feat_types example = wav2vec2-large-robust-ft-swbd-300h
1
+ """ feats_wav2vec2.py
2
+ feat_types example = [wav2vec2-large-robust-ft-swbd-300h,
3
+ wav2vec2-xls-r-2b, wav2vec2-large, wav2vec2-large-xlsr-53, wav2vec2-base]
4
+
5
+ Complete list: https://huggingface.co/facebook?search_models=wav2vec2
6
+ Currently only supports wav2vec2
7
+ """
8
+
3
9
  import os
4
10
  from tqdm import tqdm
5
11
  import pandas as pd
@@ -16,11 +22,11 @@ class Wav2vec2(Featureset):
16
22
 
17
23
  def __init__(self, name, data_df, feat_type):
18
24
  """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
19
- super().__init__(name, data_df)
25
+ super().__init__(name, data_df, feat_type)
20
26
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
21
27
  self.device = self.util.config_val("MODEL", "device", cuda)
22
28
  self.model_initialized = False
23
- if feat_type == "wav2vec" or feat_type == "wav2vec2":
29
+ if feat_type == "wav2vec2":
24
30
  self.feat_type = "wav2vec2-large-robust-ft-swbd-300h"
25
31
  else:
26
32
  self.feat_type = feat_type
@@ -33,7 +39,8 @@ class Wav2vec2(Featureset):
33
39
  )
34
40
  config = transformers.AutoConfig.from_pretrained(model_path)
35
41
  layer_num = config.num_hidden_layers
36
- hidden_layer = int(self.util.config_val("FEATS", "wav2vec2.layer", "0"))
42
+ hidden_layer = int(self.util.config_val(
43
+ "FEATS", "wav2vec2.layer", "0"))
37
44
  config.num_hidden_layers = layer_num - hidden_layer
38
45
  self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
39
46
  self.processor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
@@ -48,7 +55,8 @@ class Wav2vec2(Featureset):
48
55
  """Extract the features or load them from disk if present."""
49
56
  store = self.util.get_path("store")
50
57
  storage = f"{store}{self.name}.pkl"
51
- extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
58
+ extract = self.util.config_val(
59
+ "FEATS", "needs_feature_extraction", False)
52
60
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
53
61
  if extract or no_reuse or not os.path.isfile(storage):
54
62
  if not self.model_initialized:
@@ -69,7 +77,8 @@ class Wav2vec2(Featureset):
69
77
  emb = self.get_embeddings(signal, sampling_rate, file)
70
78
  emb_series[idx] = emb
71
79
  # print(f"emb_series shape: {emb_series.shape}")
72
- self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
80
+ self.df = pd.DataFrame(
81
+ emb_series.values.tolist(), index=self.data_df.index)
73
82
  # print(f"df shape: {self.df.shape}")
74
83
  self.df.to_pickle(storage)
75
84
  try:
@@ -59,10 +59,7 @@ class Wavlm(Featureset):
59
59
  frame_offset=int(start.total_seconds() * 16000),
60
60
  num_frames=int((end - start).total_seconds() * 16000),
61
61
  )
62
- if sampling_rate != 16000:
63
- self.util.error(
64
- f"sampling rate should be 16000 but is {sampling_rate}"
65
- )
62
+ assert sampling_rate == 16000, f"sampling rate should be 16000 but is {sampling_rate}"
66
63
  emb = self.get_embeddings(signal, sampling_rate, file)
67
64
  emb_series.iloc[idx] = emb
68
65
  self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)