nkululeko 0.94.1__tar.gz → 0.94.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nkululeko-0.94.1 → nkululeko-0.94.3}/CHANGELOG.md +10 -0
- nkululeko-0.94.3/PKG-INFO +76 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/README.md +144 -169
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/augmenting/randomsplicer.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/augmenting/randomsplicing.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/augmenting/resampler.py +22 -14
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/autopredict/ap_age.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/autopredict/ap_arousal.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/autopredict/ap_gender.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/autopredict/ap_mos.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/autopredict/ap_pesq.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/autopredict/ap_sdr.py +2 -2
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/autopredict/ap_sid.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/autopredict/ap_snr.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/autopredict/ap_stoi.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/autopredict/ap_valence.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/constants.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/demo.py +7 -7
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/experiment.py +2 -1
- nkululeko-0.94.3/nkululeko/feat_extract/feats_emotion2vec.py +218 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_mos.py +2 -2
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_snr.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feature_extractor.py +2 -2
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model.py +42 -4
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_cnn.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_xgb.py +1 -1
- nkululeko-0.94.3/nkululeko/models/tests/test_model_svm.py +56 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/nkululeko.py +13 -2
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/reporting/report_item.py +1 -1
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/runmanager.py +19 -9
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/scaler.py +22 -14
- nkululeko-0.94.3/nkululeko/segmenting/__init__.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/test_predictor.py +1 -1
- nkululeko-0.94.3/nkululeko/utils/__init__.py +0 -0
- nkululeko-0.94.3/nkululeko/utils/unzip.py +38 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/utils/util.py +19 -5
- nkululeko-0.94.3/nkululeko.egg-info/PKG-INFO +76 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko.egg-info/SOURCES.txt +8 -1
- nkululeko-0.94.3/nkululeko.egg-info/requires.txt +60 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko.egg-info/top_level.txt +1 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/pyproject.toml +4 -0
- nkululeko-0.94.3/setup.cfg +80 -0
- nkululeko-0.94.3/setup.py +96 -0
- nkululeko-0.94.3/tests/test_install.py +136 -0
- nkululeko-0.94.3/tests/test_modules.py +53 -0
- nkululeko-0.94.1/PKG-INFO +0 -40
- nkululeko-0.94.1/nkululeko.egg-info/PKG-INFO +0 -40
- nkululeko-0.94.1/nkululeko.egg-info/requires.txt +0 -25
- nkululeko-0.94.1/setup.cfg +0 -49
- nkululeko-0.94.1/setup.py +0 -22
- {nkululeko-0.94.1 → nkululeko-0.94.3}/LICENSE +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/aesdd/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/ased/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/asvp-esd/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/baved/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/cafe/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/clac/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/cmu-mosei/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/demos/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/ekorpus/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/emns/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/emofilm/convert_to_16k.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/emofilm/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/emorynlp/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/emov-db/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/emovo/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/emozionalmente/create.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/enterface/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/esd/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/gerparas/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/iemocap/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/jl/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/jtes/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/meld/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/mesd/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/mess/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/mlendsnd/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/msp-improv/process_database2.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/msp-podcast/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/oreau2/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/portuguese/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/ravdess/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/ravdess/process_database_speaker.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/savee/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/shemo/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/subesco/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/tess/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/thorsten-emotional/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/urdu/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/data/vivae/process_database.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/docs/source/conf.py +0 -0
- {nkululeko-0.94.1/nkululeko/augmenting → nkululeko-0.94.3/examples}/__init__.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/meta/demos/demo_best_model.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/meta/demos/my_experiment.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/meta/demos/my_experiment_local.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/meta/demos/plot_faster_anim.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/__init__.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/aug_train.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/augment.py +0 -0
- {nkululeko-0.94.1/nkululeko/autopredict → nkululeko-0.94.3/nkululeko/augmenting}/__init__.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/augmenting/augmenter.py +0 -0
- {nkululeko-0.94.1/nkululeko/data → nkululeko-0.94.3/nkululeko/autopredict}/__init__.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/autopredict/ap_dominance.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/autopredict/estimate_snr.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/cacheddataset.py +0 -0
- {nkululeko-0.94.1/nkululeko/feat_extract → nkululeko-0.94.3/nkululeko/data}/__init__.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/data/dataset.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/data/dataset_csv.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/demo-ft.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/demo_feats.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/demo_predictor.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/ensemble.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/explore.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/export.py +0 -0
- {nkululeko-0.94.1/nkululeko/losses → nkululeko-0.94.3/nkululeko/feat_extract}/__init__.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_agender.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_analyser.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_ast.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_auddim.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_audmodel.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_clap.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_hubert.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_import.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_mld.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_opensmile copy.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_opensmile.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_oxbow.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_praat.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_spectra.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_spkrec.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_squim.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_trill.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_wav2vec2.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_wavlm.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feats_whisper.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/featureset.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/feinberg_praat.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/feat_extract/transformer_feature_extractor.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/file_checker.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/filter_data.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/fixedsegment.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/glob_conf.py +0 -0
- {nkululeko-0.94.1/nkululeko/models → nkululeko-0.94.3/nkululeko/losses}/__init__.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/losses/loss_ccc.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/losses/loss_softf1loss.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/modelrunner.py +0 -0
- {nkululeko-0.94.1/nkululeko/reporting → nkululeko-0.94.3/nkululeko/models}/__init__.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_bayes.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_gmm.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_knn.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_knn_reg.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_lin_reg.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_mlp.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_mlp_regression.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_svm.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_svr.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_tree.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_tree_reg.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_tuned.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/models/model_xgr.py +0 -0
- {nkululeko-0.94.1/nkululeko/segmenting → nkululeko-0.94.3/nkululeko/models/tests}/__init__.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/multidb.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/nkuluflag.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/plots.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/predict.py +0 -0
- {nkululeko-0.94.1/nkululeko/utils → nkululeko-0.94.3/nkululeko/reporting}/__init__.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/reporting/defines.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/reporting/latex_writer.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/reporting/report.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/reporting/reporter.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/reporting/result.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/resample.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/segment.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/segmenting/seg_pyannote.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/segmenting/seg_silero.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/syllable_nuclei.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/test.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/test_pretrain.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/utils/files.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko/utils/stats.py +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko.egg-info/dependency_links.txt +0 -0
- {nkululeko-0.94.1 → nkululeko-0.94.3}/nkululeko.egg-info/entry_points.txt +0 -0
@@ -1,6 +1,16 @@
|
|
1
1
|
Changelog
|
2
2
|
=========
|
3
3
|
|
4
|
+
Version 0.94.3 (25-07-22)
|
5
|
+
--------------------------
|
6
|
+
* adding the following features (related to dementia/alzheimer):
|
7
|
+
* pause_lognorm_mu, pause_lognorm_sigma, pause_lognorm_ks_pvalue
|
8
|
+
* pause_mean_duration, pause_std_duration, pause_cv, proportion_pause_duration (
|
9
|
+
|
10
|
+
Version 0.94.2 (25-06-02)
|
11
|
+
--------------------------
|
12
|
+
* added better error message: util.py might not have a logger
|
13
|
+
|
4
14
|
Version 0.94.1 (25-04-03)
|
5
15
|
--------------------------
|
6
16
|
* fixed bug: plot uncertainties had wrong file path
|
@@ -0,0 +1,76 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: nkululeko
|
3
|
+
Version: 0.94.3
|
4
|
+
Summary: Machine learning audio prediction experiments based on templates
|
5
|
+
Home-page: https://github.com/felixbur/nkululeko
|
6
|
+
Author: Felix Burkhardt
|
7
|
+
Author-email: fxburk@gmail.com
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
10
|
+
Classifier: Operating System :: OS Independent
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
12
|
+
Classifier: Topic :: Scientific/Engineering
|
13
|
+
Requires-Python: >=3.9
|
14
|
+
License-File: LICENSE
|
15
|
+
Requires-Dist: audeer>=1.0.0
|
16
|
+
Requires-Dist: audformat>=1.3.1
|
17
|
+
Requires-Dist: audinterface>=1.0.0
|
18
|
+
Requires-Dist: audiofile>=1.0.0
|
19
|
+
Requires-Dist: audiomentations==0.31.0
|
20
|
+
Requires-Dist: audmetric>=1.0.0
|
21
|
+
Requires-Dist: audonnx>=0.7.0
|
22
|
+
Requires-Dist: confidence-intervals>=0.0.2
|
23
|
+
Requires-Dist: datasets>=2.0.0
|
24
|
+
Requires-Dist: imageio>=2.0.0
|
25
|
+
Requires-Dist: matplotlib>=3.0.0
|
26
|
+
Requires-Dist: numpy>=1.20.0
|
27
|
+
Requires-Dist: opensmile>=2.0.0
|
28
|
+
Requires-Dist: pandas>=1.0.0
|
29
|
+
Requires-Dist: praat-parselmouth>=0.4.0
|
30
|
+
Requires-Dist: scikit_learn>=1.0.0
|
31
|
+
Requires-Dist: scipy>=1.0.0
|
32
|
+
Requires-Dist: seaborn>=0.11.0
|
33
|
+
Requires-Dist: sounddevice>=0.4.0
|
34
|
+
Requires-Dist: transformers>=4.0.0
|
35
|
+
Requires-Dist: umap-learn>=0.5.0
|
36
|
+
Requires-Dist: xgboost>=1.0.0
|
37
|
+
Requires-Dist: pylatex>=1.0.0
|
38
|
+
Provides-Extra: torch
|
39
|
+
Requires-Dist: torch>=1.0.0; extra == "torch"
|
40
|
+
Requires-Dist: torchvision>=0.10.0; extra == "torch"
|
41
|
+
Requires-Dist: torchaudio>=0.10.0; extra == "torch"
|
42
|
+
Provides-Extra: torch-cpu
|
43
|
+
Requires-Dist: torch>=1.0.0; extra == "torch-cpu"
|
44
|
+
Requires-Dist: torchvision>=0.10.0; extra == "torch-cpu"
|
45
|
+
Requires-Dist: torchaudio>=0.10.0; extra == "torch-cpu"
|
46
|
+
Provides-Extra: torch-nightly
|
47
|
+
Requires-Dist: torch; extra == "torch-nightly"
|
48
|
+
Requires-Dist: torchvision; extra == "torch-nightly"
|
49
|
+
Requires-Dist: torchaudio; extra == "torch-nightly"
|
50
|
+
Provides-Extra: spotlight
|
51
|
+
Requires-Dist: renumics-spotlight>=1.6.13; extra == "spotlight"
|
52
|
+
Requires-Dist: sliceguard>=0.0.35; extra == "spotlight"
|
53
|
+
Provides-Extra: tensorflow
|
54
|
+
Requires-Dist: tensorflow>=2.0.0; extra == "tensorflow"
|
55
|
+
Requires-Dist: tensorflow_hub>=0.12.0; extra == "tensorflow"
|
56
|
+
Provides-Extra: all
|
57
|
+
Requires-Dist: torch>=1.0.0; extra == "all"
|
58
|
+
Requires-Dist: torchvision>=0.10.0; extra == "all"
|
59
|
+
Requires-Dist: torchaudio>=0.10.0; extra == "all"
|
60
|
+
Requires-Dist: renumics-spotlight>=0.1.0; extra == "all"
|
61
|
+
Requires-Dist: sliceguard>=0.1.0; extra == "all"
|
62
|
+
Requires-Dist: tensorflow>=2.0.0; extra == "all"
|
63
|
+
Requires-Dist: tensorflow_hub>=0.12.0; extra == "all"
|
64
|
+
Requires-Dist: shap>=0.40.0; extra == "all"
|
65
|
+
Requires-Dist: imblearn>=0.0.0; extra == "all"
|
66
|
+
Requires-Dist: cylimiter>=0.0.1; extra == "all"
|
67
|
+
Requires-Dist: audtorch>=0.0.1; extra == "all"
|
68
|
+
Requires-Dist: splitutils>=0.0.1; extra == "all"
|
69
|
+
Dynamic: author
|
70
|
+
Dynamic: author-email
|
71
|
+
Dynamic: home-page
|
72
|
+
Dynamic: license-file
|
73
|
+
Dynamic: provides-extra
|
74
|
+
Dynamic: requires-dist
|
75
|
+
Dynamic: requires-python
|
76
|
+
Dynamic: summary
|
@@ -1,141 +1,96 @@
|
|
1
|
+
## Nkululeko
|
1
2
|
|
2
|
-
-
|
3
|
-
- [Confusion matrix](#confusion-matrix)
|
4
|
-
- [Epoch progression](#epoch-progression)
|
5
|
-
- [Feature importance](#feature-importance)
|
6
|
-
- [Feature distribution](#feature-distribution)
|
7
|
-
- [t-SNE plots](#t-sne-plots)
|
8
|
-
- [Data distribution](#data-distribution)
|
9
|
-
- [Bias checking](#bias-checking)
|
10
|
-
- [Uncertainty](#uncertainty)
|
11
|
-
- [Documentation](#documentation)
|
12
|
-
- [Installation](#installation)
|
13
|
-
- [Usage](#usage)
|
14
|
-
- [ini-file values](#ini-file-values)
|
15
|
-
- [Hello World example](#hello-world-example)
|
16
|
-
- [Features](#features)
|
17
|
-
- [License](#license)
|
18
|
-
- [Contributing](#contributing)
|
19
|
-
- [Citing](#citing)
|
20
|
-
|
21
|
-
|
22
|
-
## Overview
|
23
|
-
A project to detect speaker characteristics by machine learning experiments with a high-level interface.
|
24
|
-
|
25
|
-
The idea is to have a framework (based on e.g. sklearn and torch) that can be used to rapidly and automatically analyse audio data and explore machine learning models based on that data.
|
26
|
-
|
27
|
-
* NEW with nkululeko: [Ensemble learning](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/)
|
28
|
-
* NEW: [Finetune transformer-models](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
|
29
|
-
* The latest features can be seen in [the ini-file](./ini_file.md) options that are used to control Nkululeko
|
30
|
-
* Below is a [Hello World example](#helloworld) that should set you up fastly, also on [Google Colab](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
|
31
|
-
* [Here's a blog post on how to set up nkululeko on your computer.](http://blog.syntheticspeech.de/2021/08/30/how-to-set-up-your-first-nkululeko-project/)
|
32
|
-
* [Here is a slack channel to discuss issues related to nkululeko](https://join.slack.com/t/nkululekoworkspace/shared_invite/zt-2v3q3yfzk-XfNGoqLfp3ts9KfCZpfTyg). Please click the link if interested in contributing.
|
33
|
-
* [Here's a slide presentation about nkululeko](docs/nkululeko.pdf)
|
34
|
-
* [Here's a video presentation about nkululeko](https://www.youtube.com/playlist?list=PLRceVavtxLg0y2jiLmpnUfiMtfvkK912D)
|
35
|
-
* [Here's the 2022 LREC article on nkululeko](http://felix.syntheticspeech.de/publications/Nkululeko_LREC.pdf)
|
36
|
-
|
37
|
-
Here are some examples of typical output:
|
3
|
+
Nkululeko is a project to detect speaker characteristics by machine learning experiments with a high-level interface. The idea is to have a framework (based on e.g. sklearn and torch) that can be used to rapidly and automatically analyse audio data and explore machine learning models based on that data.
|
38
4
|
|
39
|
-
|
40
|
-
Per default, Nkululeko displays results as a confusion matrix using binning with regression.
|
5
|
+
Some abilities that Nkululeko provides: combines acoustic features and machine learning models (including feature selection and features concatenation); performs data exploration, selection and visualization the results; finetuning; ensemble learning models; soft labeling (predicting labels with pre-trained model); and inference the model on a test set.
|
41
6
|
|
42
|
-
|
7
|
+
Nkululeko orchestrates data loading, feature extraction, and model training, allowing you to specify your experiment in a configuration file. The framework handles the process from raw data to trained model and evaluation, making it easy to run machine learning experiments without directly coding in Python.
|
43
8
|
|
44
|
-
|
45
|
-
|
9
|
+
## Who is this for?
|
10
|
+
Nkululeko is for speech processing learners, researchers and ML practitioners focused on speaker characteristics, e.g., emotion, age, gender, or disorder detection.
|
46
11
|
|
47
|
-
|
12
|
+
## Installation
|
48
13
|
|
49
|
-
|
50
|
-
Using the *explore* interface, Nkululeko analyses the importance of acoustic features:
|
51
|
-
|
52
|
-
<img src="meta/images/feat_importance.png" width="500px"/>
|
14
|
+
Nkululeko requires Python 3.9 or higher with the following build status:
|
53
15
|
|
54
|
-
|
55
|
-
|
16
|
+

|
17
|
+

|
18
|
+

|
19
|
+

|
56
20
|
|
57
|
-
|
21
|
+
Create and activate a virtual Python environment and simply install Nkululeko:
|
58
22
|
|
59
|
-
|
23
|
+
```bash
|
24
|
+
python -m venv .env
|
25
|
+
source .env/bin/activate # specify OS versions, add a separate line for Windows users
|
26
|
+
pip install nkululeko
|
27
|
+
```
|
60
28
|
|
61
|
-
|
29
|
+
Current version: **0.94.1**
|
62
30
|
|
63
|
-
###
|
64
|
-
A t-SNE plot can give you an estimate of whether your acoustic features are useful at all:
|
31
|
+
### Optional Dependencies
|
65
32
|
|
66
|
-
|
33
|
+
Nkululeko supports optional dependencies through extras:
|
67
34
|
|
68
|
-
|
69
|
-
|
35
|
+
```bash
|
36
|
+
# Install with PyTorch support
|
37
|
+
pip install nkululeko[torch]
|
70
38
|
|
71
|
-
|
39
|
+
# Install with CPU-only PyTorch
|
40
|
+
pip install nkululeko[torch-cpu]
|
72
41
|
|
73
|
-
|
74
|
-
|
42
|
+
# Install with TensorFlow support
|
43
|
+
pip install nkululeko[tensorflow]
|
75
44
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
Nkululeko estimates the uncertainty of model decisions (only for classifiers) with entropy over the class probabilities or logits per sample.
|
80
|
-
|
81
|
-
<img src="meta/images/uncertainty.png" width="500px"/>
|
45
|
+
# Install all optional dependencies
|
46
|
+
pip install nkululeko[all]
|
47
|
+
```
|
82
48
|
|
49
|
+
#### Manual Installation Options
|
83
50
|
|
51
|
+
You can also install dependencies manually:
|
84
52
|
|
85
|
-
|
86
|
-
The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).
|
87
|
-
|
88
|
-
## Installation
|
53
|
+
##### PyTorch Installation
|
89
54
|
|
90
|
-
|
91
|
-
```
|
92
|
-
pip install
|
93
|
-
```
|
94
|
-
We excluded some packages from the automatic installation because they might depend on your computer and some of them are only needed in special cases. So if the error
|
95
|
-
```
|
96
|
-
module x not found
|
97
|
-
```
|
98
|
-
appears, please try
|
99
|
-
```
|
100
|
-
pip install x
|
101
|
-
```
|
102
|
-
For many packages, you will need the missing torch package.
|
103
|
-
If you don't have a GPU (which is probably true if you don't know what that is), please use
|
104
|
-
```
|
105
|
-
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
106
|
-
```
|
107
|
-
else, you can use the default:
|
55
|
+
For CPU-only installation (recommended for most users):
|
56
|
+
```bash
|
57
|
+
pip install torch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 --index-url https://download.pytorch.org/whl/cpu
|
108
58
|
```
|
59
|
+
|
60
|
+
For GPU support (cuda 12.6):
|
61
|
+
```bash
|
109
62
|
pip install torch torchvision torchaudio
|
110
63
|
```
|
111
64
|
|
112
65
|
Some functionalities require extra packages to be installed, which we didn't include automatically:
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
pip install
|
117
|
-
|
118
|
-
* the spotlight adapter needs spotlight:
|
119
|
-
```
|
120
|
-
pip install renumics-spotlight sliceguard
|
66
|
+
|
67
|
+
* For spotlight adapter:
|
68
|
+
```bash
|
69
|
+
pip install PyYAML # Install PyYAML first to avoid dependency issues
|
70
|
+
pip install nkululeko[spotlight]
|
121
71
|
```
|
122
72
|
|
73
|
+
Some examples for *ini*-files (which you use to control nkululeko) are in the [examples folder](https://github.com/felixbur/nkululeko/tree/main/examples).
|
123
74
|
|
124
|
-
Some examples for *ini*-files (which you use to control nkululeko) are in the [tests folder](https://github.com/felixbur/nkululeko/tree/main/tests).
|
125
75
|
|
126
76
|
|
127
|
-
##
|
77
|
+
## Documentation
|
78
|
+
The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).
|
128
79
|
|
129
|
-
### [ini-file values](./ini_file.md)
|
130
80
|
|
131
|
-
|
81
|
+
## Usage
|
132
82
|
|
83
|
+
### [ini-file values](./ini_file.md)
|
133
84
|
|
134
85
|
Basically, you specify your experiment in an ["ini" file](./ini_file.md) (e.g. *experiment.ini*) and then call one of the Nkululeko interfaces to run the experiment like this:
|
135
|
-
|
86
|
+
|
87
|
+
```bash
|
88
|
+
python -m nkululeko.nkululeko --config experiment.ini
|
89
|
+
```
|
136
90
|
|
137
91
|
A basic configuration looks like this:
|
138
|
-
|
92
|
+
|
93
|
+
```ini
|
139
94
|
[EXP]
|
140
95
|
root = ./
|
141
96
|
name = exp_emodb
|
@@ -159,20 +114,10 @@ Here is an overview of the interfaces/modules:
|
|
159
114
|
|
160
115
|
All of them take *--config <my_config.ini>* as an argument.
|
161
116
|
|
162
|
-
* **nkululeko.nkululeko**: do machine learning experiments combining features and learners
|
117
|
+
* **nkululeko.nkululeko**: do machine learning experiments combining features and learners (e.g. opensmile with SVM)
|
163
118
|
* **nkululeko.ensemble**: [combine several nkululeko experiments](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/) and report on late fusion results
|
164
|
-
* *--config*: which experiments (INI files) to combine
|
165
|
-
* *--method* (optional): majority_voting, mean (default), max, sum, uncertainty, uncertainty_weighted, confidence_weighted, performance_weighted
|
166
|
-
* *--threshold*: uncertainty threshold (1.0 means no threshold)
|
167
|
-
* *--weights*: weights for performance_weighted method (could be from previous UAR, ACC)
|
168
|
-
* *--outfile* (optional): name of CSV file for output (default: ensemble_result.csv)
|
169
|
-
* *--no_labels* (optional): indicate that no ground truth is given
|
170
119
|
* **nkululeko.multidb**: do [multiple experiments](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/), comparing several databases cross and in itself
|
171
120
|
* **nkululeko.demo**: [demo the current best model](http://blog.syntheticspeech.de/2022/01/24/nkululeko-try-out-demo-a-trained-model/) on the command line
|
172
|
-
* *--list* (optional) list of input files
|
173
|
-
* *--file* (optional) name of input file
|
174
|
-
* *--folder* (optional) parent folder for input files
|
175
|
-
* *--outfile* (optional) name of CSV file for output
|
176
121
|
* **nkululeko.test**: predict a [given data set](http://blog.syntheticspeech.de/2022/09/01/nkululeko-how-to-evaluate-a-test-set-with-a-given-best-model/) with the current best model
|
177
122
|
* **nkululeko.explore**: perform [data exploration](http://blog.syntheticspeech.de/2023/05/11/nkululeko-how-to-visualize-your-data-distribution/)
|
178
123
|
* **nkululeko.augment**: [augment](http://blog.syntheticspeech.de/2023/03/13/nkululeko-how-to-augment-the-training-set/) the current training data
|
@@ -182,59 +127,7 @@ All of them take *--config <my_config.ini>* as an argument.
|
|
182
127
|
* **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz
|
183
128
|
* **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command line. Usage:
|
184
129
|
|
185
|
-
|
186
|
-
$ python -m nkululeko.nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET] [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
|
187
|
-
```
|
188
|
-
|
189
|
-
There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
190
|
-
* [Introduction](http://blog.syntheticspeech.de/2021/08/04/machine-learning-experiment-framework/)
|
191
|
-
* [Nkulueko FAQ](http://blog.syntheticspeech.de/2022/07/07/nkululeko-faq/)
|
192
|
-
* [How to set up your first nkululeko project](http://blog.syntheticspeech.de/2021/08/30/how-to-set-up-your-first-nkululeko-project/)
|
193
|
-
* [Setting up a base nkululeko experiment](http://blog.syntheticspeech.de/2021/10/05/setting-up-a-base-nkululeko-experiment/)
|
194
|
-
* [How to import a database](http://blog.syntheticspeech.de/2022/01/27/nkululeko-how-to-import-a-database/)
|
195
|
-
* [Comparing classifiers and features](http://blog.syntheticspeech.de/2021/10/05/nkululeko-comparing-classifiers-and-features/)
|
196
|
-
* [Use Praat features](http://blog.syntheticspeech.de/2022/06/27/how-to-use-selected-features-from-praat-with-nkululeko/)
|
197
|
-
* [Combine feature sets](http://blog.syntheticspeech.de/2022/06/30/how-to-combine-feature-sets-with-nkululeko/)
|
198
|
-
* [Classifying continuous variables](http://blog.syntheticspeech.de/2022/01/26/nkululeko-classifying-continuous-variables/)
|
199
|
-
* [Try out / demo a trained model](http://blog.syntheticspeech.de/2022/01/24/nkululeko-try-out-demo-a-trained-model/)
|
200
|
-
* [Perform cross-database experiments](http://blog.syntheticspeech.de/2021/10/05/nkululeko-perform-cross-database-experiments/)
|
201
|
-
* [Meta parameter optimization](http://blog.syntheticspeech.de/2021/09/03/perform-optimization-with-nkululeko/)
|
202
|
-
* [How to set up wav2vec embedding](http://blog.syntheticspeech.de/2021/12/03/how-to-set-up-wav2vec-embedding-for-nkululeko/)
|
203
|
-
* [How to soft-label a database](http://blog.syntheticspeech.de/2022/01/24/how-to-soft-label-a-database-with-nkululeko/)
|
204
|
-
* [Re-generate the progressing confusion matrix animation wit a different framerate](demos/plot_faster_anim.py)
|
205
|
-
* [How to limit/filter a dataset](http://blog.syntheticspeech.de/2022/02/22/how-to-limit-a-dataset-with-nkululeko/)
|
206
|
-
* [Specifying database disk location](http://blog.syntheticspeech.de/2022/02/21/specifying-database-disk-location-with-nkululeko/)
|
207
|
-
* [Add dropout with MLP models](http://blog.syntheticspeech.de/2022/02/25/adding-dropout-to-mlp-models-with-nkululeko/)
|
208
|
-
* [Do cross-validation](http://blog.syntheticspeech.de/2022/03/23/how-to-do-cross-validation-with-nkululeko/)
|
209
|
-
* [Combine predictions per speaker](http://blog.syntheticspeech.de/2022/03/24/how-to-combine-predictions-per-speaker-with-nkululeko/)
|
210
|
-
* [Run multiple experiments in one go](http://blog.syntheticspeech.de/2022/03/28/how-to-run-multiple-experiments-in-one-go-with-nkululeko/)
|
211
|
-
* [Compare several MLP layer layouts with each other](http://blog.syntheticspeech.de/2022/04/11/how-to-compare-several-mlp-layer-layouts-with-each-other/)
|
212
|
-
* [Import features from outside the software](http://blog.syntheticspeech.de/2022/10/18/how-to-import-features-from-outside-the-nkululeko-software/)
|
213
|
-
* [Export acoustic features](http://blog.syntheticspeech.de/2024/05/30/nkululeko-export-acoustic-features/)
|
214
|
-
* [Explore feature importance](http://blog.syntheticspeech.de/2023/02/20/nkululeko-show-feature-importance/)
|
215
|
-
* [Plot distributions for feature values](http://blog.syntheticspeech.de/2023/02/16/nkululeko-how-to-plot-distributions-of-feature-values/)
|
216
|
-
* [Show feature importance](http://blog.syntheticspeech.de/2023/02/20/nkululeko-show-feature-importance/)
|
217
|
-
* [Augment the training set](http://blog.syntheticspeech.de/2023/03/13/nkululeko-how-to-augment-the-training-set/)
|
218
|
-
* [Visualize clusters of acoustic features](http://blog.syntheticspeech.de/2023/04/20/nkululeko-visualize-clusters-of-your-acoustic-features/)
|
219
|
-
* [Visualize your data distribution](http://blog.syntheticspeech.de/2023/05/11/nkululeko-how-to-visualize-your-data-distribution/)
|
220
|
-
* [Check your dataset](http://blog.syntheticspeech.de/2023/07/11/nkululeko-check-your-dataset/)
|
221
|
-
* [Segmenting a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/)
|
222
|
-
* [Predict new labels for your data from public models and check bias](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/)
|
223
|
-
* [Resample](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/)
|
224
|
-
* [Get some statistics on correlation and effect-size](http://blog.syntheticspeech.de/2023/09/05/nkululeko-get-some-statistics-on-correlation-and-effect-size/)
|
225
|
-
* [Automatic generation of a latex/pdf report](http://blog.syntheticspeech.de/2023/09/26/nkululeko-generate-a-latex-pdf-report/)
|
226
|
-
* [Inspect your data with Spotlight](http://blog.syntheticspeech.de/2023/10/31/nkululeko-inspect-your-data-with-spotlight/)
|
227
|
-
* [Automatically stratify your split sets](http://blog.syntheticspeech.de/2023/11/07/nkululeko-automatically-stratify-your-split-sets/)
|
228
|
-
* [re-name data column names](http://blog.syntheticspeech.de/2023/11/16/nkululeko-re-name-data-column-names/)
|
229
|
-
* [Oversample the training set](http://blog.syntheticspeech.de/2023/11/16/nkululeko-oversample-the-training-set/)
|
230
|
-
* [Compare several databases](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/)
|
231
|
-
* [Tweak the target variable for database comparison](http://blog.syntheticspeech.de/2024/03/13/nkululeko-how-to-tweak-the-target-variable-for-database-comparison/)
|
232
|
-
* [How to run multiple experiments in one go](http://blog.syntheticspeech.de/2022/03/28/how-to-run-multiple-experiments-in-one-go-with-nkululeko/)
|
233
|
-
* [How to finetune a transformer-model](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
|
234
|
-
* [Ensemble (combine) classifiers with late-fusion](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/)
|
235
|
-
* [Use train, dev and test splits](https://blog.syntheticspeech.de/2025/03/31/nkululeko-how-to-use-train-dev-test-splits/)
|
236
|
-
|
237
|
-
### <a name="helloworld">Hello World example</a>
|
130
|
+
## <a name="helloworld">Hello World example</a>
|
238
131
|
* NEW: [Here's a Google colab that runs this example out-of-the-box](https://colab.research.google.com/drive/1Up7t5Nn7VwDPCCEpTg2U7cpZ_PdoEgj-?usp=sharing), and here is the same [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
|
239
132
|
* [I made a video to show you how to do this on Windows](https://www.youtube.com/playlist?list=PLRceVavtxLg0y2jiLmpnUfiMtfvkK912D)
|
240
133
|
* Set up Python on your computer, version >= 3.8
|
@@ -266,7 +159,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
|
266
159
|
* Inspect and play around with the [demo configuration file](meta/demos/exp_emodb.ini) that defined your experiment, then re-run.
|
267
160
|
* There are many ways to experiment with different classifiers and acoustic feature sets, [all described here](https://github.com/felixbur/nkululeko/blob/main/ini_file.md)
|
268
161
|
|
269
|
-
|
162
|
+
## Features
|
270
163
|
The framework is targeted at the speech domain and supports experiments where different classifiers are combined with different feature extractors.
|
271
164
|
|
272
165
|
* Classifiers: Naive Bayes, KNN, Tree, XGBoost, SVM, MLP
|
@@ -275,6 +168,7 @@ The framework is targeted at the speech domain and supports experiments where di
|
|
275
168
|
* Label encoding
|
276
169
|
* Binning (continuous to categorical)
|
277
170
|
* Online demo interface for trained models
|
171
|
+
* Visualization: confusion matrix, feature importance, feature distribution, epoch progression, t-SNE plot, data distribution, bias checking, uncertainty estimation
|
278
172
|
|
279
173
|
Here's a rough UML-like sketch of the framework (and [here's the real one done with pyreverse](meta/images/classes.png)).
|
280
174
|

|
@@ -284,8 +178,89 @@ Currently, the following linear classifiers are implemented (integrated from skl
|
|
284
178
|
and the following ANNs (artificial neural networks)
|
285
179
|
* MLP (multi-layer perceptron), CNN (convolutional neural network)
|
286
180
|
|
287
|
-
|
181
|
+
For visualization, besides confusion matrix, feature importance, feature distribution, t-SNE plot, data distribution (just names a few), Nkululeko can also be used for bias checking, uncertainty estimation, and epoch progression.
|
182
|
+
|
183
|
+
### Bias checking
|
184
|
+
|
185
|
+
<details>
|
186
|
+
In some cases, you might wonder if there's bias in your data. You can try to detect this with automatically estimated speech properties by visualizing the correlation of target labels and predicted labels.
|
187
|
+
|
188
|
+
<img src="meta/images/emotion-pesq.png" width="500px"/>
|
189
|
+
|
190
|
+
</details>
|
191
|
+
|
192
|
+
### Uncertainty
|
193
|
+
|
194
|
+
<details>
|
195
|
+
Nkululeko estimates the uncertainty of model decisions (only for classifiers) with entropy over the class probabilities or logits per sample.
|
196
|
+
|
197
|
+
<img src="meta/images/uncertainty.png" width="500px"/>
|
198
|
+
|
199
|
+
</details>
|
200
|
+
|
201
|
+
Here's [an animation that shows the progress of classification done with nkululeko](https://youtu.be/6Y0M382GjvM).
|
202
|
+
|
203
|
+
## News
|
204
|
+
|
205
|
+
<details>
|
206
|
+
|
207
|
+
There's Felix [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials below:
|
208
|
+
* [Ensemble learning with Nkululeko](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/)
|
209
|
+
* [Finetune transformer-models with Nkululeko](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
|
210
|
+
* Below is a [Hello World example for Nkululeko](#helloworld) that should set you up fastly, also on [Google Colab](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
|
211
|
+
* [Thanks to deepwiki, here's an analysis of the source code](https://deepwiki.com/felixbur/nkululeko)
|
212
|
+
* [Here's a blog post on how to set up nkululeko on your computer.](http://blog.syntheticspeech.de/2021/08/30/how-to-set-up-your-first-nkululeko-project/)
|
213
|
+
* [Here's a slide presentation about nkululeko](docs/nkululeko.pdf)
|
214
|
+
* [Here's a video presentation about nkululeko](https://www.youtube.com/playlist?list=PLRceVavtxLg0y2jiLmpnUfiMtfvkK912D)
|
215
|
+
* [Here's the 2022 LREC article on nkululeko](http://felix.syntheticspeech.de/publications/Nkululeko_LREC.pdf)
|
216
|
+
* [Introduction](http://blog.syntheticspeech.de/2021/08/04/machine-learning-experiment-framework/)
|
217
|
+
* [Nkululeko FAQ](http://blog.syntheticspeech.de/2022/07/07/nkululeko-faq/)
|
218
|
+
* [How to set up your first nkululeko project](http://blog.syntheticspeech.de/2021/08/30/how-to-set-up-your-first-nkululeko-project/)
|
219
|
+
* [Setting up a base nkululeko experiment](http://blog.syntheticspeech.de/2021/10/05/setting-up-a-base-nkululeko-experiment/)
|
220
|
+
* [How to import a database](http://blog.syntheticspeech.de/2022/01/27/nkululeko-how-to-import-a-database/)
|
221
|
+
* [Comparing classifiers and features](http://blog.syntheticspeech.de/2021/10/05/nkululeko-comparing-classifiers-and-features/)
|
222
|
+
* [Use Praat features](http://blog.syntheticspeech.de/2022/06/27/how-to-use-selected-features-from-praat-with-nkululeko/)
|
223
|
+
* [Combine feature sets](http://blog.syntheticspeech.de/2022/06/30/how-to-combine-feature-sets-with-nkululeko/)
|
224
|
+
* [Classifying continuous variables](http://blog.syntheticspeech.de/2022/01/26/nkululeko-classifying-continuous-variables/)
|
225
|
+
* [Try out / demo a trained model](http://blog.syntheticspeech.de/2022/01/24/nkululeko-try-out-demo-a-trained-model/)
|
226
|
+
* [Perform cross-database experiments](http://blog.syntheticspeech.de/2021/10/05/nkululeko-perform-cross-database-experiments/)
|
227
|
+
* [Meta parameter optimization](http://blog.syntheticspeech.de/2021/09/03/perform-optimization-with-nkululeko/)
|
228
|
+
* [How to set up wav2vec embedding](http://blog.syntheticspeech.de/2021/12/03/how-to-set-up-wav2vec-embedding-for-nkululeko/)
|
229
|
+
* [How to soft-label a database](http://blog.syntheticspeech.de/2022/01/24/how-to-soft-label-a-database-with-nkululeko/)
|
230
|
+
* [Re-generate the progressing confusion matrix animation wit a different framerate](demos/plot_faster_anim.py)
|
231
|
+
* [How to limit/filter a dataset](http://blog.syntheticspeech.de/2022/02/22/how-to-limit-a-dataset-with-nkululeko/)
|
232
|
+
* [Specifying database disk location](http://blog.syntheticspeech.de/2022/02/21/specifying-database-disk-location-with-nkululeko/)
|
233
|
+
* [Add dropout with MLP models](http://blog.syntheticspeech.de/2022/02/25/adding-dropout-to-mlp-models-with-nkululeko/)
|
234
|
+
* [Do cross-validation](http://blog.syntheticspeech.de/2022/03/23/how-to-do-cross-validation-with-nkululeko/)
|
235
|
+
* [Combine predictions per speaker](http://blog.syntheticspeech.de/2022/03/24/how-to-combine-predictions-per-speaker-with-nkululeko/)
|
236
|
+
* [Run multiple experiments in one go](http://blog.syntheticspeech.de/2022/03/28/how-to-run-multiple-experiments-in-one-go-with-nkululeko/)
|
237
|
+
* [Compare several MLP layer layouts with each other](http://blog.syntheticspeech.de/2022/04/11/how-to-compare-several-mlp-layer-layouts-with-each-other/)
|
238
|
+
* [Import features from outside the software](http://blog.syntheticspeech.de/2022/10/18/how-to-import-features-from-outside-the-nkululeko-software/)
|
239
|
+
* [Export acoustic features](http://blog.syntheticspeech.de/2024/05/30/nkululeko-export-acoustic-features/)
|
240
|
+
* [Explore feature importance](http://blog.syntheticspeech.de/2023/02/20/nkululeko-show-feature-importance/)
|
241
|
+
* [Plot distributions for feature values](http://blog.syntheticspeech.de/2023/02/16/nkululeko-how-to-plot-distributions-of-feature-values/)
|
242
|
+
* [Show feature importance](http://blog.syntheticspeech.de/2023/02/20/nkululeko-show-feature-importance/)
|
243
|
+
* [Augment the training set](http://blog.syntheticspeech.de/2023/03/13/nkululeko-how-to-augment-the-training-set/)
|
244
|
+
* [Visualize clusters of acoustic features](http://blog.syntheticspeech.de/2023/04/20/nkululeko-visualize-clusters-of-your-acoustic-features/)
|
245
|
+
* [Visualize your data distribution](http://blog.syntheticspeech.de/2023/05/11/nkululeko-how-to-visualize-your-data-distribution/)
|
246
|
+
* [Check your dataset](http://blog.syntheticspeech.de/2023/07/11/nkululeko-check-your-dataset/)
|
247
|
+
* [Segmenting a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/)
|
248
|
+
* [Predict new labels for your data from public models and check bias](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/)
|
249
|
+
* [Resample](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/)
|
250
|
+
* [Get some statistics on correlation and effect-size](http://blog.syntheticspeech.de/2023/09/05/nkululeko-get-some-statistics-on-correlation-and-effect-size/)
|
251
|
+
* [Automatic generation of a latex/pdf report](http://blog.syntheticspeech.de/2023/09/26/nkululeko-generate-a-latex-pdf-report/)
|
252
|
+
* [Inspect your data with Spotlight](http://blog.syntheticspeech.de/2023/10/31/nkululeko-inspect-your-data-with-spotlight/)
|
253
|
+
* [Automatically stratify your split sets](http://blog.syntheticspeech.de/2023/11/07/nkululeko-automatically-stratify-your-split-sets/)
|
254
|
+
* [re-name data column names](http://blog.syntheticspeech.de/2023/11/16/nkululeko-re-name-data-column-names/)
|
255
|
+
* [Oversample the training set](http://blog.syntheticspeech.de/2023/11/16/nkululeko-oversample-the-training-set/)
|
256
|
+
* [Compare several databases](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/)
|
257
|
+
* [Tweak the target variable for database comparison](http://blog.syntheticspeech.de/2024/03/13/nkululeko-how-to-tweak-the-target-variable-for-database-comparison/)
|
258
|
+
* [How to run multiple experiments in one go](http://blog.syntheticspeech.de/2022/03/28/how-to-run-multiple-experiments-in-one-go-with-nkululeko/)
|
259
|
+
* [How to finetune a transformer-model](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
|
260
|
+
* [Ensemble (combine) classifiers with late-fusion](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/)
|
261
|
+
* [Use train, dev and test splits](https://blog.syntheticspeech.de/2025/03/31/nkululeko-how-to-use-train-dev-test-splits/)
|
288
262
|
|
263
|
+
</details>
|
289
264
|
|
290
265
|
## License
|
291
266
|
Nkululeko can be used under the [MIT license](https://choosealicense.com/licenses/mit/).
|
@@ -294,8 +269,8 @@ Nkululeko can be used under the [MIT license](https://choosealicense.com/license
|
|
294
269
|
## Contributing
|
295
270
|
Contributions are welcome and encouraged. To learn more about how to contribute to nkululeko, please refer to the [Contributing guidelines](./CONTRIBUTING.md).
|
296
271
|
|
297
|
-
##
|
298
|
-
If you use
|
272
|
+
## Citation
|
273
|
+
If you use Nkululeko, please cite the paper:
|
299
274
|
|
300
275
|
> F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schuller: Nkululeko: A Tool For Rapid Speaker Characteristics Detection, Proc. Proc. LREC, 2022
|
301
276
|
|
@@ -5,7 +5,7 @@ Code originally by Oliver Pauly
|
|
5
5
|
|
6
6
|
Based on an idea by Klaus Scherer
|
7
7
|
|
8
|
-
K. R. Scherer, “Randomized splicing: A note on a simple technique for masking speech content”
|
8
|
+
K. R. Scherer, “Randomized splicing: A note on a simple technique for masking speech content”
|
9
9
|
Journal of Experimental Research in Personality, vol. 5, pp. 155–159, 1971.
|
10
10
|
|
11
11
|
Evaluated in:
|
@@ -3,7 +3,7 @@ Code originally by Oliver Pauly
|
|
3
3
|
|
4
4
|
Based on an idea by Klaus Scherer
|
5
5
|
|
6
|
-
K. R. Scherer, “Randomized splicing: A note on a simple technique for masking speech content”
|
6
|
+
K. R. Scherer, “Randomized splicing: A note on a simple technique for masking speech content”
|
7
7
|
Journal of Experimental Research in Personality, vol. 5, pp. 155–159, 1971.
|
8
8
|
|
9
9
|
Evaluated in:
|
@@ -17,7 +17,7 @@ class Resampler:
|
|
17
17
|
def __init__(self, df, replace, not_testing=True):
|
18
18
|
self.SAMPLING_RATE = 16000
|
19
19
|
self.df = df
|
20
|
-
self.util = Util("resampler", has_config=not_testing)
|
20
|
+
self.util = Util("resampler", has_config=not not_testing)
|
21
21
|
self.util.warn(f"all files might be resampled to {self.SAMPLING_RATE}")
|
22
22
|
self.not_testing = not_testing
|
23
23
|
self.replace = (
|
@@ -30,7 +30,7 @@ class Resampler:
|
|
30
30
|
files = self.df.index.get_level_values(0).values
|
31
31
|
# replace = eval(self.util.config_val("RESAMPLE", "replace", "False"))
|
32
32
|
replace = self.replace
|
33
|
-
if self.not_testing:
|
33
|
+
if not self.not_testing:
|
34
34
|
store = self.util.get_path("store")
|
35
35
|
else:
|
36
36
|
store = "./"
|
@@ -67,17 +67,25 @@ class Resampler:
|
|
67
67
|
self.df = self.df.set_index(
|
68
68
|
self.df.index.set_levels(new_files, level="file")
|
69
69
|
)
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
70
|
+
if not self.not_testing:
|
71
|
+
target_file = self.util.config_val("RESAMPLE", "target", "resampled.csv")
|
72
|
+
# remove encoded labels
|
73
|
+
target = self.util.config_val("DATA", "target", "emotion")
|
74
|
+
if "class_label" in self.df.columns:
|
75
|
+
self.df = self.df.drop(columns=[target])
|
76
|
+
self.df = self.df.rename(columns={"class_label": target})
|
77
|
+
# save file
|
78
|
+
self.df.to_csv(target_file)
|
79
|
+
self.util.debug(
|
80
|
+
"saved resampled list of files to" f" {os.path.abspath(target_file)}"
|
81
|
+
)
|
82
|
+
else:
|
83
|
+
# When running from command line, save to simple resampled.csv
|
84
|
+
target_file = "resampled.csv"
|
85
|
+
self.df.to_csv(target_file)
|
86
|
+
self.util.debug(
|
87
|
+
f"saved resampled list of files to {os.path.abspath(target_file)}"
|
88
|
+
)
|
81
89
|
self.util.debug(f"resampled {succes} files, {error} errors")
|
82
90
|
|
83
91
|
|
@@ -91,7 +99,7 @@ def main():
|
|
91
99
|
df_sample.index, allow_nat=False
|
92
100
|
)
|
93
101
|
df_sample.head(10)
|
94
|
-
resampler = Resampler(df_sample, not_testing=False)
|
102
|
+
resampler = Resampler(df_sample, False, not_testing=False)
|
95
103
|
resampler.resample()
|
96
104
|
shutil.copyfile(testfile, "tmp.resample_result.wav")
|
97
105
|
shutil.copyfile("tmp.wav", testfile)
|