nkululeko 0.91.3__tar.gz → 0.92.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. {nkululeko-0.91.3 → nkululeko-0.92.1}/CHANGELOG.md +8 -0
  2. {nkululeko-0.91.3/nkululeko.egg-info → nkululeko-0.92.1}/PKG-INFO +9 -1
  3. nkululeko-0.92.1/nkululeko/autopredict/ap_sid.py +48 -0
  4. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/constants.py +1 -1
  5. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/experiment.py +6 -1
  6. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/plots.py +27 -4
  7. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/predict.py +1 -1
  8. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/segment.py +13 -10
  9. nkululeko-0.92.1/nkululeko/segmenting/seg_pyannote.py +129 -0
  10. {nkululeko-0.91.3 → nkululeko-0.92.1/nkululeko.egg-info}/PKG-INFO +9 -1
  11. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko.egg-info/SOURCES.txt +1 -0
  12. nkululeko-0.91.3/nkululeko/autopredict/ap_sid.py +0 -43
  13. {nkululeko-0.91.3 → nkululeko-0.92.1}/LICENSE +0 -0
  14. {nkululeko-0.91.3 → nkululeko-0.92.1}/README.md +0 -0
  15. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/aesdd/process_database.py +0 -0
  16. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/androids/process_database.py +0 -0
  17. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/ased/process_database.py +0 -0
  18. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/asvp-esd/process_database.py +0 -0
  19. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/baved/process_database.py +0 -0
  20. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/cafe/process_database.py +0 -0
  21. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/clac/process_database.py +0 -0
  22. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/cmu-mosei/process_database.py +0 -0
  23. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/demos/process_database.py +0 -0
  24. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/ekorpus/process_database.py +0 -0
  25. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/emns/process_database.py +0 -0
  26. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/emofilm/convert_to_16k.py +0 -0
  27. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/emofilm/process_database.py +0 -0
  28. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/emorynlp/process_database.py +0 -0
  29. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/emov-db/process_database.py +0 -0
  30. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/emovo/process_database.py +0 -0
  31. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/emozionalmente/create.py +0 -0
  32. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/enterface/process_database.py +0 -0
  33. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/esd/process_database.py +0 -0
  34. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/gerparas/process_database.py +0 -0
  35. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/iemocap/process_database.py +0 -0
  36. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/jl/process_database.py +0 -0
  37. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/jtes/process_database.py +0 -0
  38. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/meld/process_database.py +0 -0
  39. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/mesd/process_database.py +0 -0
  40. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/mess/process_database.py +0 -0
  41. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/mlendsnd/process_database.py +0 -0
  42. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/msp-improv/process_database2.py +0 -0
  43. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/msp-podcast/process_database.py +0 -0
  44. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/oreau2/process_database.py +0 -0
  45. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/portuguese/process_database.py +0 -0
  46. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/ravdess/process_database.py +0 -0
  47. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/ravdess/process_database_speaker.py +0 -0
  48. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/savee/process_database.py +0 -0
  49. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/shemo/process_database.py +0 -0
  50. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/subesco/process_database.py +0 -0
  51. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/tess/process_database.py +0 -0
  52. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/thorsten-emotional/process_database.py +0 -0
  53. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/urdu/process_database.py +0 -0
  54. {nkululeko-0.91.3 → nkululeko-0.92.1}/data/vivae/process_database.py +0 -0
  55. {nkululeko-0.91.3 → nkululeko-0.92.1}/docs/source/conf.py +0 -0
  56. {nkululeko-0.91.3 → nkululeko-0.92.1}/meta/demos/demo_best_model.py +0 -0
  57. {nkululeko-0.91.3 → nkululeko-0.92.1}/meta/demos/my_experiment.py +0 -0
  58. {nkululeko-0.91.3 → nkululeko-0.92.1}/meta/demos/my_experiment_local.py +0 -0
  59. {nkululeko-0.91.3 → nkululeko-0.92.1}/meta/demos/plot_faster_anim.py +0 -0
  60. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/__init__.py +0 -0
  61. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/aug_train.py +0 -0
  62. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/augment.py +0 -0
  63. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/augmenting/__init__.py +0 -0
  64. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/augmenting/augmenter.py +0 -0
  65. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/augmenting/randomsplicer.py +0 -0
  66. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/augmenting/randomsplicing.py +0 -0
  67. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/augmenting/resampler.py +0 -0
  68. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/autopredict/__init__.py +0 -0
  69. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/autopredict/ap_age.py +0 -0
  70. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/autopredict/ap_arousal.py +0 -0
  71. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/autopredict/ap_dominance.py +0 -0
  72. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/autopredict/ap_gender.py +0 -0
  73. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/autopredict/ap_mos.py +0 -0
  74. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/autopredict/ap_pesq.py +0 -0
  75. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/autopredict/ap_sdr.py +0 -0
  76. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/autopredict/ap_snr.py +0 -0
  77. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/autopredict/ap_stoi.py +0 -0
  78. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/autopredict/ap_valence.py +0 -0
  79. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/autopredict/estimate_snr.py +0 -0
  80. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/cacheddataset.py +0 -0
  81. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/data/__init__.py +0 -0
  82. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/data/dataset.py +0 -0
  83. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/data/dataset_csv.py +0 -0
  84. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/demo-ft.py +0 -0
  85. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/demo.py +0 -0
  86. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/demo_feats.py +0 -0
  87. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/demo_predictor.py +0 -0
  88. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/ensemble.py +0 -0
  89. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/explore.py +0 -0
  90. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/export.py +0 -0
  91. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/__init__.py +0 -0
  92. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_agender.py +0 -0
  93. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
  94. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_analyser.py +0 -0
  95. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_ast.py +0 -0
  96. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_auddim.py +0 -0
  97. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_audmodel.py +0 -0
  98. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_clap.py +0 -0
  99. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_hubert.py +0 -0
  100. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_import.py +0 -0
  101. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_mld.py +0 -0
  102. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_mos.py +0 -0
  103. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_opensmile.py +0 -0
  104. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_oxbow.py +0 -0
  105. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_praat.py +0 -0
  106. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_snr.py +0 -0
  107. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_spectra.py +0 -0
  108. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_spkrec.py +0 -0
  109. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_squim.py +0 -0
  110. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_trill.py +0 -0
  111. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_wav2vec2.py +0 -0
  112. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_wavlm.py +0 -0
  113. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feats_whisper.py +0 -0
  114. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/featureset.py +0 -0
  115. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/feinberg_praat.py +0 -0
  116. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feat_extract/transformer_feature_extractor.py +0 -0
  117. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/feature_extractor.py +0 -0
  118. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/file_checker.py +0 -0
  119. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/filter_data.py +0 -0
  120. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/fixedsegment.py +0 -0
  121. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/glob_conf.py +0 -0
  122. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/losses/__init__.py +0 -0
  123. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/losses/loss_ccc.py +0 -0
  124. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/losses/loss_softf1loss.py +0 -0
  125. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/modelrunner.py +0 -0
  126. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/__init__.py +0 -0
  127. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model.py +0 -0
  128. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_bayes.py +0 -0
  129. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_cnn.py +0 -0
  130. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_gmm.py +0 -0
  131. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_knn.py +0 -0
  132. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_knn_reg.py +0 -0
  133. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_lin_reg.py +0 -0
  134. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_mlp.py +0 -0
  135. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_mlp_regression.py +0 -0
  136. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_svm.py +0 -0
  137. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_svr.py +0 -0
  138. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_tree.py +0 -0
  139. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_tree_reg.py +0 -0
  140. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_tuned.py +0 -0
  141. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_xgb.py +0 -0
  142. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/models/model_xgr.py +0 -0
  143. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/multidb.py +0 -0
  144. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/nkuluflag.py +0 -0
  145. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/nkululeko.py +0 -0
  146. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/reporting/__init__.py +0 -0
  147. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/reporting/defines.py +0 -0
  148. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/reporting/latex_writer.py +0 -0
  149. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/reporting/report.py +0 -0
  150. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/reporting/report_item.py +0 -0
  151. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/reporting/reporter.py +0 -0
  152. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/reporting/result.py +0 -0
  153. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/resample.py +0 -0
  154. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/runmanager.py +0 -0
  155. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/scaler.py +0 -0
  156. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/segmenting/__init__.py +0 -0
  157. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
  158. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/segmenting/seg_silero.py +0 -0
  159. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/syllable_nuclei.py +0 -0
  160. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/test.py +0 -0
  161. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/test_predictor.py +0 -0
  162. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/test_pretrain.py +0 -0
  163. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/utils/__init__.py +0 -0
  164. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/utils/files.py +0 -0
  165. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/utils/stats.py +0 -0
  166. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko/utils/util.py +0 -0
  167. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko.egg-info/dependency_links.txt +0 -0
  168. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko.egg-info/entry_points.txt +0 -0
  169. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko.egg-info/requires.txt +0 -0
  170. {nkululeko-0.91.3 → nkululeko-0.92.1}/nkululeko.egg-info/top_level.txt +0 -0
  171. {nkululeko-0.91.3 → nkululeko-0.92.1}/pyproject.toml +0 -0
  172. {nkululeko-0.91.3 → nkululeko-0.92.1}/setup.cfg +0 -0
  173. {nkululeko-0.91.3 → nkululeko-0.92.1}/setup.py +0 -0
  174. {nkululeko-0.91.3 → nkululeko-0.92.1}/venv/bin/activate_this.py +0 -0
@@ -1,6 +1,14 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ Version 0.92.1
5
+ --------------
6
+ * added a speaker plot to pyannote results
7
+
8
+ Version 0.92.0
9
+ --------------
10
+ * added first version of automatic speaker prediction/segmentation
11
+
4
12
  Version 0.91.3
5
13
  --------------
6
14
  * some additions for robustness
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.91.3
3
+ Version: 0.92.1
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -355,6 +355,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
355
355
  Changelog
356
356
  =========
357
357
 
358
+ Version 0.92.1
359
+ --------------
360
+ * added a speaker plot to pyannote results
361
+
362
+ Version 0.92.0
363
+ --------------
364
+ * added first version of automatic speaker prediction/segmentation
365
+
358
366
  Version 0.91.3
359
367
  --------------
360
368
  * some additions for robustness
@@ -0,0 +1,48 @@
1
+ """"
2
+ A predictor for sid - Speaker ID.
3
+ """
4
+
5
+ import numpy as np
6
+ from pyannote.audio import Pipeline
7
+ import torch
8
+
9
+ from nkululeko.feature_extractor import FeatureExtractor
10
+ import nkululeko.glob_conf as glob_conf
11
+ from nkululeko.utils.util import Util
12
+
13
+
14
+ class SIDPredictor:
15
+ """SIDPredictor.
16
+
17
+ predicting speaker id.
18
+ """
19
+
20
+ def __init__(self, df):
21
+ self.df = df
22
+ self.util = Util("sidPredictor")
23
+ hf_token = self.util.config_val("Model", "hf_token", None)
24
+ if hf_token is None:
25
+ self.util.error(
26
+ "speaker id prediction needs huggingface token: [MODEL][hf_token]"
27
+ )
28
+ self.pipeline = Pipeline.from_pretrained(
29
+ "pyannote/speaker-diarization-3.1",
30
+ use_auth_token=hf_token,
31
+ )
32
+ device = self.util.config_val("Model", "device", "cpu")
33
+ self.pipeline.to(torch.device(device))
34
+
35
+ def predict(self, split_selection):
36
+ self.util.debug(f"estimating speaker id for {split_selection} samples")
37
+ return_df = self.df.copy()
38
+ # @todo
39
+ # 1) concat all audio files
40
+ # 2) get segmentations with pyannote
41
+ # 3) map pyannote segments with orginal ones and assign speaker id
42
+
43
+ return return_df
44
+
45
+ def concat_files(self, df):
46
+ pass
47
+ # todo
48
+ # please use https://audeering.github.io/audiofile/usage.html#read-a-file
@@ -1,2 +1,2 @@
1
- VERSION="0.91.3"
1
+ VERSION="0.92.1"
2
2
  SAMPLING_RATE = 16000
@@ -439,7 +439,12 @@ class Experiment:
439
439
  )
440
440
  targets = self.util.config_val_list("PREDICT", "targets", ["gender"])
441
441
  for target in targets:
442
- if target == "gender":
442
+ if target == "speaker":
443
+ from nkululeko.autopredict.ap_sid import SIDPredictor
444
+
445
+ predictor = SIDPredictor(df)
446
+ df = predictor.predict(sample_selection)
447
+ elif target == "gender":
443
448
  from nkululeko.autopredict.ap_gender import GenderPredictor
444
449
 
445
450
  predictor = GenderPredictor(df)
@@ -4,14 +4,14 @@ import ast
4
4
  import matplotlib.pyplot as plt
5
5
  import numpy as np
6
6
  import pandas as pd
7
- import seaborn as sns
8
7
  from scipy import stats
8
+ import seaborn as sns
9
9
  from sklearn.manifold import TSNE
10
10
 
11
11
  import nkululeko.glob_conf as glob_conf
12
- import nkululeko.utils.stats as su
13
12
  from nkululeko.reporting.defines import Header
14
13
  from nkululeko.reporting.report_item import ReportItem
14
+ import nkululeko.utils.stats as su
15
15
  from nkululeko.utils.util import Util
16
16
 
17
17
 
@@ -30,8 +30,6 @@ class Plots:
30
30
  df_speaker["samplenum"] = df_speaker.shape[0]
31
31
  df_speakers = pd.concat([df_speakers, df_speaker.head(1)])
32
32
  # plot the distribution of samples per speaker
33
- # one up because of the runs
34
- fig_dir = self.util.get_path("fig_dir") + "../"
35
33
  self.util.debug("plotting samples per speaker")
36
34
  if "gender" in df_speakers:
37
35
  filename = "samples_value_counts"
@@ -319,6 +317,31 @@ class Plots:
319
317
  img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"
320
318
  plt.savefig(img_path)
321
319
  plt.close(fig)
320
+ self.util.debug(f"plotted durations to {img_path}")
321
+ glob_conf.report.add_item(
322
+ ReportItem(
323
+ Header.HEADER_EXPLORE,
324
+ caption,
325
+ title,
326
+ img_path,
327
+ )
328
+ )
329
+
330
+ def plot_speakers(self, df, sample_selection):
331
+ filename = "speakers"
332
+ caption = "speakers"
333
+ # one up because of the runs
334
+ fig_dir = self.util.get_path("fig_dir") + "../"
335
+ sns.set_style("whitegrid") # Set style for chart
336
+ ax = df["speaker"].value_counts().plot(kind="pie")
337
+ title = f"Speaker distr. for {sample_selection} {df.shape[0]}."
338
+ ax.set_title(title)
339
+ fig = ax.figure
340
+ # plt.tight_layout()
341
+ img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"
342
+ plt.savefig(img_path)
343
+ plt.close(fig)
344
+ self.util.debug(f"plotted speakers to {img_path}")
322
345
  glob_conf.report.add_item(
323
346
  ReportItem(
324
347
  Header.HEADER_EXPLORE,
@@ -2,7 +2,7 @@
2
2
  # use some model and add automatically predicted labels to train and test splits
3
3
  # then save as a new dataset
4
4
 
5
- """This script is used to call the nkululeko PREDICT framework.
5
+ r"""This script is used to call the nkululeko PREDICT framework.
6
6
 
7
7
  It loads a configuration file, creates a new experiment,
8
8
  and performs automatic prediction on the train and test datasets. The predicted labels are added to the datasets and
@@ -1,5 +1,4 @@
1
- """
2
- Segments the samples in the dataset into chunks based on voice activity detection using SILERO VAD [1].
1
+ """Segments the samples in the dataset into chunks based on voice activity detection using SILERO VAD [1].
3
2
 
4
3
  The segmentation results are saved to a file, and the distributions of the original and
5
4
  segmented durations are plotted.
@@ -15,7 +14,7 @@ Example:
15
14
 
16
15
  References:
17
16
  [1] https://github.com/snakers4/silero-vad
18
-
17
+ [2] https://github.com/pyannote/pyannote-audio
19
18
  """
20
19
 
21
20
  import argparse
@@ -64,7 +63,7 @@ def main():
64
63
  # segment
65
64
  segmented_file = util.config_val("SEGMENT", "result", "segmented.csv")
66
65
 
67
- segmenter = util.config_val("SEGMENT", "method", "silero")
66
+ method = util.config_val("SEGMENT", "method", "silero")
68
67
  sample_selection = util.config_val("SEGMENT", "sample_selection", "all")
69
68
  if sample_selection == "all":
70
69
  df = pd.concat([expr.df_train, expr.df_test])
@@ -77,18 +76,21 @@ def main():
77
76
  f"unknown segmentation selection specifier {sample_selection},"
78
77
  " should be [all | train | test]"
79
78
  )
80
- util.debug(f"segmenting {sample_selection}: {df.shape[0]} samples with {segmenter}")
81
- if segmenter == "silero":
79
+ util.debug(f"segmenting {sample_selection}: {df.shape[0]} samples with {method}")
80
+ if method == "silero":
82
81
  from nkululeko.segmenting.seg_silero import Silero_segmenter
83
82
 
84
83
  segmenter = Silero_segmenter()
85
84
  df_seg = segmenter.segment_dataframe(df)
85
+ elif method == "pyannote":
86
+ from nkululeko.segmenting.seg_pyannote import Pyannote_segmenter
86
87
 
88
+ segmenter = Pyannote_segmenter(config)
89
+ df_seg = segmenter.segment_dataframe(df)
87
90
  else:
88
- util.error(f"unkown segmenter: {segmenter}")
91
+ util.error(f"unknown segmenter: {method}")
89
92
 
90
93
  def calc_dur(x):
91
-
92
94
  starts = x[1]
93
95
  ends = x[2]
94
96
  return (ends - starts).total_seconds()
@@ -108,6 +110,9 @@ def main():
108
110
  plots.plot_durations(
109
111
  df_seg, "segmented_durations", sample_selection, caption="Segmented durations"
110
112
  )
113
+ if method == "pyannote":
114
+ plots.plot_speakers(df_seg, sample_selection)
115
+
111
116
  print("")
112
117
  # remove encoded labels
113
118
  target = util.config_val("DATA", "target", None)
@@ -115,8 +120,6 @@ def main():
115
120
  df_seg = df_seg.drop(columns=[target])
116
121
  df_seg = df_seg.rename(columns={"class_label": target})
117
122
  # save file
118
- # dataname = "_".join(expr.datasets.keys())
119
- # name = f"{dataname}{segment_target}"
120
123
  df_seg.to_csv(f"{expr.data_dir}/{segmented_file}")
121
124
  util.debug(
122
125
  f"saved {segmented_file} to {expr.data_dir}, {num_after} samples (was"
@@ -0,0 +1,129 @@
1
+ """seg_pyannote.py.
2
+
3
+ Segment a dataset with the Pyannote segmenter.
4
+ Also adds speaker ids to the segments.
5
+
6
+ """
7
+
8
+ import pandas as pd
9
+ from pyannote.audio import Pipeline
10
+ import torch
11
+ from tqdm import tqdm
12
+
13
+ import audformat
14
+ from audformat import segmented_index
15
+
16
+ from nkululeko.utils.util import Util
17
+
18
+
19
+ SAMPLING_RATE = 16000
20
+
21
+
22
+ class Pyannote_segmenter:
23
+ def __init__(self, not_testing=True):
24
+ # initialize the VAD model
25
+ torch.set_num_threads(1)
26
+ self.no_testing = not_testing
27
+ self.util = Util("pyannote_segmenter")
28
+ hf_token = self.util.config_val("MODEL", "hf_token", None)
29
+ if hf_token is None:
30
+ self.util.error(
31
+ "speaker id prediction needs huggingface token: [MODEL][hf_token]"
32
+ )
33
+ self.pipeline = Pipeline.from_pretrained(
34
+ "pyannote/speaker-diarization-3.1",
35
+ use_auth_token=hf_token,
36
+ )
37
+ device = self.util.config_val("MODEL", "device", "cpu")
38
+ if device == "cpu":
39
+ self.util.warn(
40
+ "running pyannote on CPU can be really slow, consider using a GPU"
41
+ )
42
+ self.pipeline.to(torch.device(device))
43
+
44
+ def get_segmentation_simple(self, file):
45
+
46
+ annotation = self.pipeline(file[0])
47
+
48
+ speakers, starts, ends, files = [], [], [], []
49
+ # print the result
50
+ for turn, _, speaker in annotation.itertracks(yield_label=True):
51
+ start = turn.start
52
+ end = turn.end
53
+ speakers.append(speaker)
54
+ starts.append(start)
55
+ files.append(file[0])
56
+ ends.append(end)
57
+ seg_index = segmented_index(files, starts, ends)
58
+ return seg_index, speakers
59
+
60
+ def get_segmentation(self, file, min_length, max_length):
61
+ annotation = self.pipeline(file)
62
+ files, starts, ends, speakers = [], [], [], []
63
+ for turn, _, speaker in annotation.itertracks(yield_label=True):
64
+ start = turn.start
65
+ end = turn.end
66
+ new_end = end
67
+ handled = False
68
+ while end - start > max_length:
69
+ new_end = start + max_length
70
+ if end - new_end < min_length:
71
+ new_end = end
72
+ files.append(file[0])
73
+ starts.append(start)
74
+ ends.append(new_end)
75
+ speakers.append(speaker)
76
+ start += max_length
77
+ handled = True
78
+ if not handled and end - start > min_length:
79
+ files.append(file[0])
80
+ starts.append(start)
81
+ ends.append(end)
82
+ speakers.append(speaker)
83
+ seg_index = segmented_index(files, starts, ends)
84
+ return seg_index, speakers
85
+
86
+ def segment_dataframe(self, df):
87
+ dfs = []
88
+ max_length = eval(self.util.config_val("SEGMENT", "max_length", "False"))
89
+ if max_length:
90
+ if self.no_testing:
91
+ min_length = float(self.util.config_val("SEGMENT", "min_length", 2))
92
+ else:
93
+ min_length = 2
94
+ self.util.debug(f"segmenting with max length: {max_length+min_length}")
95
+ for file, values in tqdm(df.iterrows()):
96
+ if max_length:
97
+ index, speakers = self.get_segmentation(file, min_length, max_length)
98
+ else:
99
+ index, speakers = self.get_segmentation_simple(file)
100
+ df = pd.DataFrame(
101
+ values.to_dict(),
102
+ index,
103
+ )
104
+ df["speaker"] = speakers
105
+ dfs.append(df)
106
+ return audformat.utils.concat(dfs)
107
+
108
+
109
+ def main():
110
+ files = pd.Series(["test_wavs/very_long.wav"])
111
+ df_sample = pd.DataFrame(index=files)
112
+ df_sample["target"] = "anger"
113
+ df_sample.index = audformat.utils.to_segmented_index(
114
+ df_sample.index, allow_nat=False
115
+ )
116
+ segmenter = Pyannote_segmenter(not_testing=False)
117
+ df_seg = segmenter.segment_dataframe(df_sample)
118
+
119
+ def calc_dur(x):
120
+ starts = x[1]
121
+ ends = x[2]
122
+ return (ends - starts).total_seconds()
123
+
124
+ df_seg["duration"] = df_seg.index.to_series().map(lambda x: calc_dur(x))
125
+ print(df_seg.head(100))
126
+
127
+
128
+ if __name__ == "__main__":
129
+ main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.91.3
3
+ Version: 0.92.1
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -355,6 +355,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
355
355
  Changelog
356
356
  =========
357
357
 
358
+ Version 0.92.1
359
+ --------------
360
+ * added a speaker plot to pyannote results
361
+
362
+ Version 0.92.0
363
+ --------------
364
+ * added first version of automatic speaker prediction/segmentation
365
+
358
366
  Version 0.91.3
359
367
  --------------
360
368
  * some additions for robustness
@@ -163,6 +163,7 @@ nkululeko/reporting/reporter.py
163
163
  nkululeko/reporting/result.py
164
164
  nkululeko/segmenting/__init__.py
165
165
  nkululeko/segmenting/seg_inaspeechsegmenter.py
166
+ nkululeko/segmenting/seg_pyannote.py
166
167
  nkululeko/segmenting/seg_silero.py
167
168
  nkululeko/utils/__init__.py
168
169
  nkululeko/utils/files.py
@@ -1,43 +0,0 @@
1
- """"
2
- A predictor for sid - Speaker ID.
3
- """
4
-
5
- from pyannote.audio import Pipeline
6
-
7
-
8
- import numpy as np
9
-
10
- import nkululeko.glob_conf as glob_conf
11
- from nkululeko.feature_extractor import FeatureExtractor
12
- from nkululeko.utils.util import Util
13
-
14
-
15
- class SIDPredictor:
16
- """SIDPredictor.
17
-
18
- predicting speaker id.
19
- """
20
-
21
- def __init__(self, df):
22
- self.df = df
23
- self.util = Util("sidPredictor")
24
- self.pipeline = Pipeline.from_pretrained(
25
- "pyannote/speaker-diarization-3.1",
26
- use_auth_token="HUGGINGFACE_ACCESS_TOKEN_GOES_HERE",
27
- )
28
-
29
- def predict(self, split_selection):
30
- self.util.debug(f"estimating PESQ for {split_selection} samples")
31
- return_df = self.df.copy()
32
- feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
33
- self.feature_extractor = FeatureExtractor(
34
- self.df, ["squim"], feats_name, split_selection
35
- )
36
- result_df = self.feature_extractor.extract()
37
- # replace missing values by 0
38
- result_df = result_df.fillna(0)
39
- result_df = result_df.replace(np.nan, 0)
40
- result_df.replace([np.inf, -np.inf], 0, inplace=True)
41
- pred_vals = result_df.pesq * 100
42
- return_df["pesq_pred"] = pred_vals.astype("int") / 100
43
- return return_df
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes